|
import gradio as gr |
|
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer |
|
import spaces |
|
import os |
|
|
|
|
|
model_name = "meta-llama/Meta-Llama-3-70B-Instruct" |
|
lora_name = "Thermostatic/Llama-3-NeuralTranslate-Instructions-70b-v0.1-lora" |
|
model = AutoModelForSeq2SeqLM.from_pretrained(model_name, token=os.environ["HUGGINGFACE_TOKEN"]) |
|
tokenizer = AutoTokenizer.from_pretrained(model_name) |
|
lora_adapter = model.load_adapter(lora_name, with_head=False) |
|
model.to('cuda') |
|
|
|
@spaces.GPU |
|
def translate(input_text): |
|
input_ids = tokenizer.encode(input_text, return_tensors="pt") |
|
response = model.generate(input_ids, adapter_name=lora_name, max_length=1024) |
|
response_text = tokenizer.decode(response[0], skip_special_tokens=True) |
|
return f"Translated text: {response_text}" |
|
|
|
with gr.Blocks() as demo: |
|
with gr.Row(): |
|
input_text = gr.Textbox(label="Enter a message to translate:") |
|
submit = gr.Button("Translate") |
|
output = gr.Textbox(label="Translated text:") |
|
|
|
submit.click(fn=translate, inputs="input_text", outputs="output") |
|
|
|
demo.launch() |