!pip install optimum

!pip install auto-gptq

from transformers import pipeline, AutoTokenizer

messages = [
{"role": "user", "content": "Who are you?"},
]

tokenizer = AutoTokenizer.from_pretrained("facebook/opt-125m")
pipe = pipeline("text-generation", model="lebe1/opt-125m-2bit", tokenizer=tokenizer, trust_remote_code=True)

Define a chat template

chat_template = """{% for message in messages %}{{ message.role }}: {{ message.content }}{% endfor %}"""

Set the tokenizer's chat template

tokenizer.chat_template = chat_template

Apply the chat template

prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)

Generate text using the formatted prompt

generated_text = pipe(prompt, max_new_tokens=50)[0]['generated_text']

print(generated_text)

lebe1
/

opt-125m-2bit

run

Define a chat template

Set the tokenizer's chat template

Apply the chat template

Generate text using the formatted prompt