Croguana-RC2-GGUF-Run-On-CPU-Basic

Build error

App Files Files Community

Croguana-RC2-GGUF-Run-On-CPU-Basic / app.py

Shome

Update app.py

a588155 verified 4 months ago

raw

history blame contribute delete

4.71 kB

	import gradio as gr
	from llama_cpp import Llama
	import time
	import os
	import huggingface_hub

	# Download model from Hugging Face
	print("Preuzimanje modela s Hugging Face-a...")
	model_path = huggingface_hub.hf_hub_download(
	repo_id="Shome/croguana-RC2-gguf",
	filename="unsloth.Q5_K_M.gguf" # Adjust this filename if needed
	)

	# Initialize the model with llama.cpp
	print("Učitavanje modela s llama.cpp...")
	model = Llama(
	model_path=model_path,
	n_ctx=4096, # Larger context for chat history
	n_threads=4 # Number of CPU threads to use
	)

	def format_chat_history(chat_history, new_message):
	"""Format the entire chat history according to the required prompt template."""
	formatted_prompt = ""

	# Add all previous messages
	for user_msg, ai_msg in chat_history:
	formatted_prompt += f"### Korisnik:\n{user_msg}\n"
	if ai_msg: # Skip if None
	formatted_prompt += f"### AI asistent:\n{ai_msg}\n"

	# Add the new message
	formatted_prompt += f"### Korisnik:\n{new_message}\n### AI asistent:\n"

	return formatted_prompt

	def generate_response(message, chat_history, max_tokens, temperature, top_p, rep_penalty):
	"""Generate a response and update the chat history."""
	if not message:
	return "", chat_history

	start_time = time.time()

	# Format the entire conversation history with the new message
	formatted_prompt = format_chat_history(chat_history, message)

	# Generate response with llama.cpp
	response = model(
	formatted_prompt,
	max_tokens=max_tokens,
	temperature=temperature,
	top_p=top_p,
	repeat_penalty=rep_penalty,
	stop=["</s>", "### Korisnik:"] # Stop tokens
	)

	# Get the generated text
	ai_response = response["choices"][0]["text"].strip()

	# Calculate inference time
	inference_time = time.time() - start_time
	print(f"Generiranje završeno za {inference_time:.2f} sekundi (temp={temperature}, top_p={top_p})")

	# Update chat history
	chat_history.append((message, ai_response))

	return "", chat_history

	# Create Gradio interface with custom chat UI
	with gr.Blocks(title="Croguana Chat") as demo:
	gr.Markdown("# Croguana-RC2 Hrvatski Jezični Model")
	gr.Markdown("Ovaj demo omogućuje chat s hrvatskim jezičnim modelom koristeći llama.cpp.")

	with gr.Row():
	with gr.Column(scale=3):
	chatbot = gr.Chatbot(height=500, label="Razgovor")

	with gr.Row():
	msg = gr.Textbox(
	placeholder="Napišite poruku na hrvatskom jeziku...",
	label="Vaša poruka",
	show_label=False,
	container=False
	)
	submit_btn = gr.Button("Pošalji", variant="primary")

	with gr.Row():
	clear_btn = gr.Button("Očisti razgovor")

	with gr.Column(scale=1):
	gr.Markdown("### Postavke generiranja")
	max_tokens = gr.Slider(
	minimum=64, maximum=1024, value=512, step=64,
	label="Maksimalan broj tokena"
	)
	temperature = gr.Slider(
	minimum=0.1, maximum=2.0, value=0.7, step=0.1,
	label="Temperatura"
	)
	top_p = gr.Slider(
	minimum=0.5, maximum=1.0, value=0.95, step=0.05,
	label="Top-p"
	)
	rep_penalty = gr.Slider(
	minimum=1.0, maximum=2.0, value=1.15, step=0.05,
	label="Kazna ponavljanja"
	)

	gr.Markdown("### Informacije o modelu")
	gr.Markdown("- Model: Shome/croguana-RC2-gguf")
	gr.Markdown("- Backend: llama.cpp za CPU")
	gr.Markdown("- Jezik: Hrvatski")

	# Set up event handlers
	submit_btn.click(
	generate_response,
	inputs=[msg, chatbot, max_tokens, temperature, top_p, rep_penalty],
	outputs=[msg, chatbot]
	)
	msg.submit(
	generate_response,
	inputs=[msg, chatbot, max_tokens, temperature, top_p, rep_penalty],
	outputs=[msg, chatbot]
	)
	clear_btn.click(lambda: [], None, chatbot)

	# Example conversations
	gr.Examples(
	examples=[
	["Pozdrav! Možeš li mi reći nešto o Hrvatskoj?"],
	["Koja su najpoznatija hrvatska jela?"],
	["Napiši kratku priču o moru."],
	["Objasni mi neki znanstveni koncept na jednostavan način."]
	],
	inputs=msg
	)

	# Launch the app
	if __name__ == "__main__":
	demo.launch()