Spaces:

taarhissian
/

projectcostestimator

Sleeping

projectcostestimator / app.py

Update app.py

4fba3ae verified 10 days ago

1.37 kB

	import torch
	from transformers import AutoProcessor, AutoModelForVision2Seq
	import gradio as gr
	from PIL import Image

	# Load Kosmos-2 Model
	MODEL_NAME = "microsoft/kosmos-2-patch14-224"

	processor = AutoProcessor.from_pretrained(MODEL_NAME)
	model = AutoModelForVision2Seq.from_pretrained(MODEL_NAME)

	# Ensure model is on GPU if available
	device = "cuda" if torch.cuda.is_available() else "cpu"
	model.to(device)

	def analyze_image(image, prompt):
	"""Process an image with a text prompt using Kosmos-2."""
	try:
	image = Image.fromarray(image) # Convert to PIL Image
	inputs = processor(images=image, text=prompt, return_tensors="pt").to(device)

	# Generate output
	output = model.generate(**inputs, max_new_tokens=100) # Allow up to 100 new tokens
	result_text = processor.batch_decode(output, skip_special_tokens=True)[0]

	return result_text

	except Exception as e:
	return f"Error: {str(e)}"

	# Gradio Interface
	iface = gr.Interface(
	fn=analyze_image,
	inputs=[gr.Image(type="numpy"), gr.Textbox(label="Prompt")],
	outputs=gr.Textbox(label="Generated Response"),
	title="Kosmos-2 Image Reasoning",
	description="Upload an image and provide a text prompt. Kosmos-2 will generate insights based on the image and text input.",
	)

	# Launch the Gradio app
	iface.launch()