|
import torch |
|
from transformers import AutoModelForCausalLM, AutoTokenizer |
|
import gradio as gr |
|
|
|
|
|
model_name = "tomg-group-umd/huginn-0125" |
|
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.bfloat16, trust_remote_code=True) |
|
tokenizer = AutoTokenizer.from_pretrained(model_name) |
|
|
|
|
|
def generate_response(prompt, num_steps): |
|
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") |
|
model.to(device) |
|
|
|
input_ids = tokenizer.encode(prompt, return_tensors="pt").to(device) |
|
model.eval() |
|
|
|
with torch.no_grad(): |
|
output = model.generate(input_ids, num_steps=num_steps, max_length=256) |
|
|
|
response = tokenizer.decode(output[0], skip_special_tokens=True) |
|
return response |
|
|
|
|
|
iface = gr.Interface( |
|
fn=generate_response, |
|
inputs=[ |
|
gr.Textbox(lines=5, label="Input Prompt"), |
|
gr.Slider(minimum=4, maximum=64, step=1, value=16, label="Computation Scale (num_steps)") |
|
], |
|
outputs="text", |
|
title="Huginn-0125 Text Generation", |
|
description="Generate text using the Huginn-0125 model with adjustable computation scale." |
|
) |
|
|
|
|
|
if __name__ == "__main__": |
|
iface.launch() |