import streamlit as st from transformers import AutoModel, AutoTokenizer import torch model_name = "your-model-name-here" model = AutoModel.from_pretrained(model_name) tokenizer = AutoTokenizer.from_pretrained(model_name) st.title("Multimodal Model Demo") text_input = st.text_input("Enter text prompt (optional)") image_input = st.file_uploader("Upload image (optional)", type=["jpg", "jpeg", "png"]) if text_input or image_input: inputs = tokenizer(text_input, image_input, return_tensors="pt", padding=True) outputs = model(**inputs) output_text = tokenizer.decode(outputs.logits[0], skip_special_tokens=True) st.write("Output:") st.write(output_text)