Shome's picture
Update app.py
a588155 verified
import gradio as gr
from llama_cpp import Llama
import time
import os
import huggingface_hub
# Download model from Hugging Face
print("Preuzimanje modela s Hugging Face-a...")
model_path = huggingface_hub.hf_hub_download(
repo_id="Shome/croguana-RC2-gguf",
filename="unsloth.Q5_K_M.gguf" # Adjust this filename if needed
)
# Initialize the model with llama.cpp
print("Učitavanje modela s llama.cpp...")
model = Llama(
model_path=model_path,
n_ctx=4096, # Larger context for chat history
n_threads=4 # Number of CPU threads to use
)
def format_chat_history(chat_history, new_message):
"""Format the entire chat history according to the required prompt template."""
formatted_prompt = ""
# Add all previous messages
for user_msg, ai_msg in chat_history:
formatted_prompt += f"### Korisnik:\n{user_msg}\n"
if ai_msg: # Skip if None
formatted_prompt += f"### AI asistent:\n{ai_msg}\n"
# Add the new message
formatted_prompt += f"### Korisnik:\n{new_message}\n### AI asistent:\n"
return formatted_prompt
def generate_response(message, chat_history, max_tokens, temperature, top_p, rep_penalty):
"""Generate a response and update the chat history."""
if not message:
return "", chat_history
start_time = time.time()
# Format the entire conversation history with the new message
formatted_prompt = format_chat_history(chat_history, message)
# Generate response with llama.cpp
response = model(
formatted_prompt,
max_tokens=max_tokens,
temperature=temperature,
top_p=top_p,
repeat_penalty=rep_penalty,
stop=["</s>", "### Korisnik:"] # Stop tokens
)
# Get the generated text
ai_response = response["choices"][0]["text"].strip()
# Calculate inference time
inference_time = time.time() - start_time
print(f"Generiranje završeno za {inference_time:.2f} sekundi (temp={temperature}, top_p={top_p})")
# Update chat history
chat_history.append((message, ai_response))
return "", chat_history
# Create Gradio interface with custom chat UI
with gr.Blocks(title="Croguana Chat") as demo:
gr.Markdown("# Croguana-RC2 Hrvatski Jezični Model")
gr.Markdown("Ovaj demo omogućuje chat s hrvatskim jezičnim modelom koristeći llama.cpp.")
with gr.Row():
with gr.Column(scale=3):
chatbot = gr.Chatbot(height=500, label="Razgovor")
with gr.Row():
msg = gr.Textbox(
placeholder="Napišite poruku na hrvatskom jeziku...",
label="Vaša poruka",
show_label=False,
container=False
)
submit_btn = gr.Button("Pošalji", variant="primary")
with gr.Row():
clear_btn = gr.Button("Očisti razgovor")
with gr.Column(scale=1):
gr.Markdown("### Postavke generiranja")
max_tokens = gr.Slider(
minimum=64, maximum=1024, value=512, step=64,
label="Maksimalan broj tokena"
)
temperature = gr.Slider(
minimum=0.1, maximum=2.0, value=0.7, step=0.1,
label="Temperatura"
)
top_p = gr.Slider(
minimum=0.5, maximum=1.0, value=0.95, step=0.05,
label="Top-p"
)
rep_penalty = gr.Slider(
minimum=1.0, maximum=2.0, value=1.15, step=0.05,
label="Kazna ponavljanja"
)
gr.Markdown("### Informacije o modelu")
gr.Markdown("- **Model**: Shome/croguana-RC2-gguf")
gr.Markdown("- **Backend**: llama.cpp za CPU")
gr.Markdown("- **Jezik**: Hrvatski")
# Set up event handlers
submit_btn.click(
generate_response,
inputs=[msg, chatbot, max_tokens, temperature, top_p, rep_penalty],
outputs=[msg, chatbot]
)
msg.submit(
generate_response,
inputs=[msg, chatbot, max_tokens, temperature, top_p, rep_penalty],
outputs=[msg, chatbot]
)
clear_btn.click(lambda: [], None, chatbot)
# Example conversations
gr.Examples(
examples=[
["Pozdrav! Možeš li mi reći nešto o Hrvatskoj?"],
["Koja su najpoznatija hrvatska jela?"],
["Napiši kratku priču o moru."],
["Objasni mi neki znanstveni koncept na jednostavan način."]
],
inputs=msg
)
# Launch the app
if __name__ == "__main__":
demo.launch()