# Install dependencies (run once in your environment): # pip install git+https://github.com/huggingface/transformers.git # pip install accelerate # pip install torch import torch from transformers import pipeline # Initialize the text-generation pipeline with Zephyr model pipe = pipeline( "text-generation", model="HuggingFaceH4/zephyr-7b-beta", torch_dtype=torch.bfloat16, device_map="auto" ) # Define the chat messages with roles and content messages = [ { "role": "system", "content": "You are a friendly chatbot who always responds in the style of a pirate", }, { "role": "user", "content": "How many helicopters can a human eat in one sitting?" } ] # Use the tokenizer's chat template to format the messages correctly prompt = pipe.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) # Generate response with sampling parameters for creativity outputs = pipe( prompt, max_new_tokens=256, do_sample=True, temperature=0.7, top_k=50, top_p=0.95 ) # Print the generated pirate-style response print(outputs[0]["generated_text"])