srinivasbt's picture
updated to use moses tokenizer properly
404113b verified
import streamlit as st
from transformers import AutoModel, AutoTokenizer, pipeline
# The model name for embeddings
embedding_model_name = "emilyalsentzer/Bio_ClinicalBERT"
# Load the tokenizer and model for embeddings
embedding_tokenizer = AutoTokenizer.from_pretrained(embedding_model_name)
embedding_model = AutoModel.from_pretrained(embedding_model_name)
# The model name for medical question answering or opinion generation
qa_model_name = "microsoft/BioGPT" # Replace with a medical QA model from Hugging Face
# Load the pipeline for text generation or QA
qa_pipeline = pipeline("text-generation", model=qa_model_name) # Use "question-answering" if more appropriate
# Streamlit app UI
st.title("Medical Symptom Analysis with ClinicalBERT & BioGPT")
st.write("Type in a medical symptom to get insights and opinions.")
# User input
text = st.text_input("Enter Medical Symptom")
if st.button("Analyze"):
if text.strip():
# Step 1: Generate embeddings
inputs = embedding_tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=512)
outputs = embedding_model(**inputs)
cls_embedding = outputs.last_hidden_state[:, 0, :].detach().numpy()
st.write(f"CLS Embedding (first 5 values): {cls_embedding[0][:5]}")
# Step 2: Use QA or opinion-generation model
st.write("Generating medical opinion...")
response = qa_pipeline(text, max_length=200, num_return_sequences=1)
st.write("Medical Opinion:")
st.write(response[0]["generated_text"])
else:
st.write("Please enter some text.")