import streamlit as st from transformers import AutoModelForCausalLM, AutoTokenizer # Load the model and tokenizer @st.cache_resource def load_model(): tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-chat-hf") model = AutoModelForCausalLM.from_pretrained( "meta-llama/Llama-2-7b-chat-hf", device_map="auto", torch_dtype="auto" ) return model, tokenizer # Initialize st.title("LLaMA-2 Chatbot") st.sidebar.title("Configuration") # Input parameters max_tokens = st.sidebar.slider("Max Tokens", 50, 1000, 256) temperature = st.sidebar.slider("Temperature", 0.1, 1.5, 0.7) model, tokenizer = load_model() # Chat interface if "messages" not in st.session_state: st.session_state.messages = [] user_input = st.text_input("Your message:", key="input") if st.button("Send"): if user_input: st.session_state.messages.append({"role": "user", "content": user_input}) inputs = tokenizer( user_input, return_tensors="pt", truncation=True ).to(model.device) outputs = model.generate( inputs.input_ids, max_length=max_tokens, temperature=temperature, do_sample=True, ) response = tokenizer.decode(outputs[0], skip_special_tokens=True) st.session_state.messages.append({"role": "assistant", "content": response}) # Display the conversation for message in st.session_state.messages: role = "User" if message["role"] == "user" else "Assistant" st.write(f"**{role}:** {message['content']}")