import streamlit as st from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline st.set_page_config(page_title="Chat with Qwen2.5-Omni-7B", layout="centered") st.title("Chat with Qwen2.5-Omni-7B") # Model name model_name = "Qwen/Qwen2.5-Omni-7B" # Prompt input system_prompt = st.text_area("System Prompt", "You are a helpful assistant.", height=100) user_input = st.text_input("Your Message", "") # Temp & token sliders temperature = st.slider("Temperature", 0.0, 1.0, 0.7) max_tokens = st.slider("Max Tokens", 16, 1024, 256) # Optional: Hugging Face token field (left empty for user) hf_token = st.text_input("Hugging Face Token (optional)", type="password") # Load model pipeline @st.cache_resource def load_pipeline(): return pipeline( "text-generation", model=model_name, tokenizer=model_name, use_auth_token=hf_token if hf_token else None, device_map="auto" ) if user_input: pipe = load_pipeline() prompt = f"{system_prompt}\nUser: {user_input}\nAssistant:" response = pipe(prompt, temperature=temperature, max_new_tokens=max_tokens)[0]['generated_text'] st.markdown("**Response:**") st.write(response.replace(prompt, ""))