from huggingface_hub import hf_hub_download from llama_cpp import Llama import gradio as gr # Download the base model base_model_repo = "johnpaulbin/articulate-11-expspanish-base-merged-Q8_0-GGUF" base_model_file = "articulate-11-expspanish-base-merged-q8_0.gguf" base_model_path = hf_hub_download(repo_id=base_model_repo, filename=base_model_file) # Download the LoRA adapter adapter_repo = "johnpaulbin/articulate-V1-Q8_0-GGUF" adapter_file = "articulate-V1-q8_0.gguf" adapter_path = hf_hub_download(repo_id=adapter_repo, filename=adapter_file) # Initialize the Llama model with base model and adapter llm = Llama( model_path=base_model_path, lora_path=adapter_path, n_ctx=256, # Reduced context length (adjust based on your needs) n_threads=2, # Use all available CPU cores (adjust based on your system) n_gpu_layers=0, # Enable GPU acceleration (adjust based on VRAM) use_mmap=False, # Disable memory mapping for faster access use_mlock=True, # Lock model in memory for stability logits_all=False, # Only compute necessary logits embedding=False # Set to >0 if GPU acceleration is desired and supported ) # Define the translation function def translate(direction, text): # Determine source and target languages based on direction if direction == "English to Spanish": source_lang = "ENGLISH" target_lang = "SPANISH" elif direction == "Spanish to English": source_lang = "SPANISH" target_lang = "ENGLISH" elif direction == "Korean to English": source_lang = "KOREAN" target_lang = "ENGLISH" elif direction == "English to Korean": source_lang = "ENGLISH" target_lang = "KOREAN" else: return "Invalid direction" # Construct the prompt for raw completion prompt = f"[{source_lang}]{text}[{target_lang}]" # Generate completion with deterministic settings (greedy decoding) response = llm.create_completion( prompt, max_tokens=200, # Limit output length temperature=0, # Greedy decoding top_k=1 # Select the most probable token ) # Extract and return the generated text return response['choices'][0]['text'].strip() # Define the Gradio interface direction_options = ["English to Spanish", "Spanish to English", "Korean to English", "English to Korean"] iface = gr.Interface( fn=translate, inputs=[ gr.Dropdown(choices=direction_options, label="Translation Direction"), gr.Textbox(lines=5, label="Input Text") ], outputs=gr.Textbox(lines=5, label="Translation"), title="Translation App", description="Translate text between English and Spanish using the Articulate V1 model." ) # Launch the app iface.launch(debug=True)