Spaces:
Build error
Build error
import gradio as gr | |
from llama_cpp import Llama | |
import time | |
import os | |
import huggingface_hub | |
# Download model from Hugging Face | |
print("Preuzimanje modela s Hugging Face-a...") | |
model_path = huggingface_hub.hf_hub_download( | |
repo_id="Shome/croguana-RC2-gguf", | |
filename="unsloth.Q5_K_M.gguf" # Adjust this filename if needed | |
) | |
# Initialize the model with llama.cpp | |
print("Učitavanje modela s llama.cpp...") | |
model = Llama( | |
model_path=model_path, | |
n_ctx=4096, # Larger context for chat history | |
n_threads=4 # Number of CPU threads to use | |
) | |
def format_chat_history(chat_history, new_message): | |
"""Format the entire chat history according to the required prompt template.""" | |
formatted_prompt = "" | |
# Add all previous messages | |
for user_msg, ai_msg in chat_history: | |
formatted_prompt += f"### Korisnik:\n{user_msg}\n" | |
if ai_msg: # Skip if None | |
formatted_prompt += f"### AI asistent:\n{ai_msg}\n" | |
# Add the new message | |
formatted_prompt += f"### Korisnik:\n{new_message}\n### AI asistent:\n" | |
return formatted_prompt | |
def generate_response(message, chat_history, max_tokens, temperature, top_p, rep_penalty): | |
"""Generate a response and update the chat history.""" | |
if not message: | |
return "", chat_history | |
start_time = time.time() | |
# Format the entire conversation history with the new message | |
formatted_prompt = format_chat_history(chat_history, message) | |
# Generate response with llama.cpp | |
response = model( | |
formatted_prompt, | |
max_tokens=max_tokens, | |
temperature=temperature, | |
top_p=top_p, | |
repeat_penalty=rep_penalty, | |
stop=["</s>", "### Korisnik:"] # Stop tokens | |
) | |
# Get the generated text | |
ai_response = response["choices"][0]["text"].strip() | |
# Calculate inference time | |
inference_time = time.time() - start_time | |
print(f"Generiranje završeno za {inference_time:.2f} sekundi (temp={temperature}, top_p={top_p})") | |
# Update chat history | |
chat_history.append((message, ai_response)) | |
return "", chat_history | |
# Create Gradio interface with custom chat UI | |
with gr.Blocks(title="Croguana Chat") as demo: | |
gr.Markdown("# Croguana-RC2 Hrvatski Jezični Model") | |
gr.Markdown("Ovaj demo omogućuje chat s hrvatskim jezičnim modelom koristeći llama.cpp.") | |
with gr.Row(): | |
with gr.Column(scale=3): | |
chatbot = gr.Chatbot(height=500, label="Razgovor") | |
with gr.Row(): | |
msg = gr.Textbox( | |
placeholder="Napišite poruku na hrvatskom jeziku...", | |
label="Vaša poruka", | |
show_label=False, | |
container=False | |
) | |
submit_btn = gr.Button("Pošalji", variant="primary") | |
with gr.Row(): | |
clear_btn = gr.Button("Očisti razgovor") | |
with gr.Column(scale=1): | |
gr.Markdown("### Postavke generiranja") | |
max_tokens = gr.Slider( | |
minimum=64, maximum=1024, value=512, step=64, | |
label="Maksimalan broj tokena" | |
) | |
temperature = gr.Slider( | |
minimum=0.1, maximum=2.0, value=0.7, step=0.1, | |
label="Temperatura" | |
) | |
top_p = gr.Slider( | |
minimum=0.5, maximum=1.0, value=0.95, step=0.05, | |
label="Top-p" | |
) | |
rep_penalty = gr.Slider( | |
minimum=1.0, maximum=2.0, value=1.15, step=0.05, | |
label="Kazna ponavljanja" | |
) | |
gr.Markdown("### Informacije o modelu") | |
gr.Markdown("- **Model**: Shome/croguana-RC2-gguf") | |
gr.Markdown("- **Backend**: llama.cpp za CPU") | |
gr.Markdown("- **Jezik**: Hrvatski") | |
# Set up event handlers | |
submit_btn.click( | |
generate_response, | |
inputs=[msg, chatbot, max_tokens, temperature, top_p, rep_penalty], | |
outputs=[msg, chatbot] | |
) | |
msg.submit( | |
generate_response, | |
inputs=[msg, chatbot, max_tokens, temperature, top_p, rep_penalty], | |
outputs=[msg, chatbot] | |
) | |
clear_btn.click(lambda: [], None, chatbot) | |
# Example conversations | |
gr.Examples( | |
examples=[ | |
["Pozdrav! Možeš li mi reći nešto o Hrvatskoj?"], | |
["Koja su najpoznatija hrvatska jela?"], | |
["Napiši kratku priču o moru."], | |
["Objasni mi neki znanstveni koncept na jednostavan način."] | |
], | |
inputs=msg | |
) | |
# Launch the app | |
if __name__ == "__main__": | |
demo.launch() |