Spaces:
Sleeping
Sleeping
File size: 1,065 Bytes
5429f80 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 |
import gradio as gr
from llama_cpp import Llama
llm = Llama(
model_path="gemma-2b-uk.gguf",
chat_format="gemma"
)
def convert_history(message, history):
chat_history = []
for block in history[-1:]:
chat_history.append({
"role": "user",
"content": block[0]
})
chat_history.append({
"role": "model",
"content": block[1]
})
chat_history.append({
"role": "user",
"content": message
})
return chat_history
def ask(message, history):
chat_history = convert_history(message, history)
chunks = llm.create_chat_completion(
messages = chat_history,
temperature = 0,
stream = True,
repeat_penalty = 1.05,
)
response = ""
for chunk in chunks:
delta = chunk["choices"][0]["delta"]
if "content" not in delta:
continue
response += delta["content"]
yield response
demo = gr.ChatInterface(ask)
if __name__ == "__main__":
demo.queue().launch() |