Spaces:

kalpanie
/

MedQA

Sleeping

App Files Files Community

kalpanie commited on May 16

Commit

e4fed54

verified ·

1 Parent(s): 450333d

Update src/streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py +115 -41

src/streamlit_app.py CHANGED Viewed

@@ -1,46 +1,120 @@
 import streamlit as st
 import torch
-from transformers import AutoTokenizer, AutoModelForCausalLM
-st.set_page_config(page_title="🩺 Medical Q&A with OpenBioLLM", layout="centered")
-st.title("🧠 OpenBioLLM Medical Assistant")
-st.markdown("Ask a medical question below and get an expert answer using [OpenBioLLM-8B](https://huggingface.co/aaditya/Llama3-OpenBioLLM-8B)")
-@st.cache_resource
-def load_model():
-    model_name = "aaditya/Llama3-OpenBioLLM-8B"
-    tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
-    model = AutoModelForCausalLM.from_pretrained(
-        model_name,
-        device_map="auto",
-        torch_dtype=torch.float16,
-        trust_remote_code=True
-    )
-    return tokenizer, model
-tokenizer, model = load_model()
-question = st.text_input("🩺 Enter your medical question:", placeholder="E.g., What are the symptoms of iron deficiency?")
-if question:
-    with st.spinner("Thinking..."):
-        prompt = f"""You are a helpful and accurate medical assistant. Answer the following question precisely:
-Question: {question}
-Answer:"""
-        input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(model.device)
-        outputs = model.generate(
-            input_ids=input_ids,
-            max_new_tokens=256,
-            do_sample=False,
-            temperature=0.7,
-            top_k=50,
-            top_p=0.9
         )
-        response = tokenizer.decode(outputs[0], skip_special_tokens=True)
-        answer = response.split("Answer:")[-1].strip()
-        st.markdown("### ✅ Answer:")
-        st.success(answer)

 import streamlit as st
+from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
 import torch
+# --- Page Configuration ---
+st.set_page_config(
+    page_title="Medical Question Answering with OpenBioLLM",
+    page_icon="⚕️",
+    layout="wide",
+    initial_sidebar_state="expanded",
+)
+# --- Model Loading ---
+# Choose your OpenBioLLM model. The 8B parameter model is more manageable for typical Hugging Face Spaces resources.
+# For larger models like 70B, you might need upgraded hardware on Spaces.
+MODEL_NAME = "aaditya/Llama3-OpenBioLLM-8B"
+@st.cache_resource # Caches the model and tokenizer for better performance
+def load_model_and_tokenizer():
+    """Loads the pre-trained model and tokenizer."""
+    try:
+        tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
+        # Load the model with torch_dtype=torch.float16 for potentially faster inference and lower memory,
+        # and device_map='auto' to leverage available hardware (CPU/GPU) efficiently.
+        model = AutoModelForCausalLM.from_pretrained(
+            MODEL_NAME,
+            torch_dtype=torch.float16, # Using float16 to reduce memory footprint
+            device_map="auto", # Automatically uses GPU if available, otherwise CPU
+        )
+        # For models that might not explicitly support "question-answering" pipeline directly,
+        # we use "text-generation".
+        qa_pipeline = pipeline(
+            "text-generation",
+            model=model,
+            tokenizer=tokenizer,
+            max_new_tokens=512,  # Adjust as needed for answer length
+            do_sample=True,
+            temperature=0.7, # Controls randomness. Lower for more factual, higher for more creative.
+            top_p=0.9,       # Nucleus sampling
         )
+        return qa_pipeline
+    except Exception as e:
+        st.error(f"Error loading model: {e}")
+        st.error("This could be due to model availability, network issues, or resource limitations on the Hugging Face Space.")
+        st.error(f"Attempted to load: {MODEL_NAME}")
+        st.info("If you are running this on a free Hugging Face Space, larger models like the 70B version might exceed resource limits. The 8B version is generally more suitable.")
+        return None
+qa_pipeline = load_model_and_tokenizer()
+# --- Application Interface ---
+st.title("⚕️ Medical Question Answering with OpenBioLLM")
+st.markdown("Ask a medical-related question and get an answer from the OpenBioLLM model.")
+st.markdown(f"**Model used:** `{MODEL_NAME}`")
+st.sidebar.header("⚠️ Disclaimer")
+st.sidebar.warning(
+    "This application is for informational and educational purposes only. "
+    "The answers are generated by an AI model (OpenBioLLM) and may contain inaccuracies or biases. "
+    "**It is NOT a substitute for professional medical advice, diagnosis, or treatment.** "
+    "Always consult with a qualified healthcare professional for any medical concerns."
+)
+st.sidebar.info(
+    "The model's performance has not been rigorously evaluated in real-world healthcare environments. "
+    "Do not rely on its outputs for medical decision-making."
+)
+# --- User Input ---
+question = st.text_area("Enter your medical question here:", height=100, key="question_input")
+if st.button("Get Answer", key="get_answer_button"):
+    if qa_pipeline and question:
+        with st.spinner("Generating answer... Please wait."):
+            try:
+                # Construct a prompt for the Llama3-based OpenBioLLM model.
+                # Llama 3 uses a specific chat template structure.
+                # We adapt this for a direct question.
+                messages = [
+                    {"role": "system", "content": "You are a helpful medical information assistant. Please answer the user's question based on your knowledge. Provide informative and clear answers."},
+                    {"role": "user", "content": question}
+                ]
+                # The pipeline with a text-generation model expects a string prompt.
+                # We'll format the messages into a string that Llama3 expects.
+                # A simpler approach for direct QA might be a direct instruction:
+                prompt = f"<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\nYou are a helpful medical information assistant. Please answer the user's question based on your knowledge. Provide informative and clear answers.<|eot_id|><|start_header_id|>user<|end_header_id|>\n\n{question}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n"
+                response = qa_pipeline(prompt)
+                # The output from the text-generation pipeline is usually a list of dictionaries.
+                if response and isinstance(response, list) and len(response) > 0 and "generated_text" in response[0]:
+                    generated_answer = response[0]["generated_text"]
+                    # The model will repeat the prompt, so we need to extract only the assistant's response.
+                    assistant_response_start = generated_answer.rfind("<|start_header_id|>assistant<|end_header_id|>")
+                    if assistant_response_start != -1:
+                        answer_text = generated_answer[assistant_response_start + len("<|start_header_id|>assistant<|end_header_id|>"):].strip()
+                        # Further clean up any trailing special tokens if necessary
+                        if "<|eot_id|>" in answer_text:
+                            answer_text = answer_text.split("<|eot_id|>")[0].strip()
+                        st.subheader("📝 Model's Answer:")
+                        st.info(answer_text)
+                    else:
+                        st.warning("Could not properly parse the assistant's response from the model output.")
+                        st.text_area("Raw model output:", generated_answer, height=200)
+                else:
+                    st.error("The model did not return a valid response.")
+                    st.write("Raw response:", response)
+            except Exception as e:
+                st.error(f"An error occurred during answer generation: {e}")
+                st.info("This might be due to the complexity of the question, model limitations, or resource constraints.")
+    elif not qa_pipeline:
+        st.error("Model could not be loaded. Please check the logs for more details.")
+    elif not question:
+        st.warning("Please enter a question.")
+st.markdown("---")
+st.markdown("Created with [Streamlit](https://streamlit.io/) and [Hugging Face Transformers](https://huggingface.co/transformers).")