Spaces:

Sal-ONE
/

Huginn

Running

Huginn / app.py

Update app.py

2850349 verified 5 months ago

1.24 kB

	import torch
	from transformers import AutoModelForCausalLM, AutoTokenizer
	import gradio as gr

	# Load model and tokenizer
	model_name = "tomg-group-umd/huginn-0125"
	model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.bfloat16, trust_remote_code=True)
	tokenizer = AutoTokenizer.from_pretrained(model_name)

	# Function to generate text
	def generate_response(prompt, num_steps):
	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
	model.to(device)

	input_ids = tokenizer.encode(prompt, return_tensors="pt").to(device)
	model.eval()

	with torch.no_grad():
	output = model.generate(input_ids, num_steps=num_steps, max_length=256)

	response = tokenizer.decode(output[0], skip_special_tokens=True)
	return response

	# Gradio interface
	iface = gr.Interface(
	fn=generate_response,
	inputs=[
	gr.Textbox(lines=5, label="Input Prompt"),
	gr.Slider(minimum=4, maximum=64, step=1, value=16, label="Computation Scale (num_steps)")
	],
	outputs="text",
	title="Huginn-0125 Text Generation",
	description="Generate text using the Huginn-0125 model with adjustable computation scale."
	)

	# Run app
	if __name__ == "__main__":
	iface.launch()