Spaces:
Sleeping
Sleeping
from huggingface_hub import hf_hub_download | |
from llama_cpp import Llama | |
import gradio as gr | |
# Download the base model | |
base_model_repo = "johnpaulbin/articulate-11-expspanish-base-merged-Q8_0-GGUF" | |
base_model_file = "articulate-11-expspanish-base-merged-q8_0.gguf" | |
base_model_path = hf_hub_download(repo_id=base_model_repo, filename=base_model_file) | |
# Download the LoRA adapter | |
adapter_repo = "johnpaulbin/articulate-V1-Q8_0-GGUF" | |
adapter_file = "articulate-V1-q8_0.gguf" | |
adapter_path = hf_hub_download(repo_id=adapter_repo, filename=adapter_file) | |
# Initialize the Llama model with base model and adapter | |
llm = Llama( | |
model_path=base_model_path, | |
lora_path=adapter_path, | |
n_ctx=256, # Reduced context length (adjust based on your needs) | |
n_threads=2, # Use all available CPU cores (adjust based on your system) | |
n_gpu_layers=0, # Enable GPU acceleration (adjust based on VRAM) | |
use_mmap=False, # Disable memory mapping for faster access | |
use_mlock=True, # Lock model in memory for stability | |
logits_all=False, # Only compute necessary logits | |
embedding=False # Set to >0 if GPU acceleration is desired and supported | |
) | |
# Define the translation function | |
def translate(direction, text): | |
# Determine source and target languages based on direction | |
if direction == "English to Spanish": | |
source_lang = "ENGLISH" | |
target_lang = "SPANISH" | |
elif direction == "Spanish to English": | |
source_lang = "SPANISH" | |
target_lang = "ENGLISH" | |
elif direction == "Korean to English": | |
source_lang = "KOREAN" | |
target_lang = "ENGLISH" | |
elif direction == "English to Korean": | |
source_lang = "ENGLISH" | |
target_lang = "KOREAN" | |
else: | |
return "Invalid direction" | |
# Construct the prompt for raw completion | |
prompt = f"[{source_lang}]{text}[{target_lang}]" | |
# Generate completion with deterministic settings (greedy decoding) | |
response = llm.create_completion( | |
prompt, | |
max_tokens=200, # Limit output length | |
temperature=0, # Greedy decoding | |
top_k=1 # Select the most probable token | |
) | |
# Extract and return the generated text | |
return response['choices'][0]['text'].strip() | |
# Define the Gradio interface | |
direction_options = ["English to Spanish", "Spanish to English", "Korean to English", "English to Korean"] | |
iface = gr.Interface( | |
fn=translate, | |
inputs=[ | |
gr.Dropdown(choices=direction_options, label="Translation Direction"), | |
gr.Textbox(lines=5, label="Input Text") | |
], | |
outputs=gr.Textbox(lines=5, label="Translation"), | |
title="Translation App", | |
description="Translate text between English and Spanish using the Articulate V1 model." | |
) | |
# Launch the app | |
iface.launch(debug=True) |