# Install dependencies (run once in your environment):
# pip install git+https://github.com/huggingface/transformers.git
# pip install accelerate
# pip install torch

import torch
from transformers import pipeline

# Initialize the text-generation pipeline with Zephyr model
pipe = pipeline(
    "text-generation",
    model="HuggingFaceH4/zephyr-7b-beta",
    torch_dtype=torch.bfloat16,
    device_map="auto"
)

# Define the chat messages with roles and content
messages = [
    {
        "role": "system",
        "content": "You are a friendly chatbot who always responds in the style of a pirate",
    },
    {
        "role": "user",
        "content": "How many helicopters can a human eat in one sitting?"
    }
]

# Use the tokenizer's chat template to format the messages correctly
prompt = pipe.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)

# Generate response with sampling parameters for creativity
outputs = pipe(
    prompt,
    max_new_tokens=256,
    do_sample=True,
    temperature=0.7,
    top_k=50,
    top_p=0.95
)

# Print the generated pirate-style response
print(outputs[0]["generated_text"])