AndrewLi403's picture
Create app.py
51f8993 verified
raw
history blame
4.38 kB
import streamlit as st
import pandas as pd
import matplotlib.pyplot as plt
from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer
from wordcloud import WordCloud
import os
import shutil
# Configuration
MODEL_CONFIG = {
"local_path": "./models/tripadvisor_sentiment_model",
"required_files": [
"config.json",
"pytorch_model.bin", # or model.safetensors
"tokenizer.json",
"vocab.txt",
"special_tokens_map.json",
"tokenizer_config.json"
]
}
def setup_model_directory():
"""Create model directory structure if it doesn't exist"""
os.makedirs(MODEL_CONFIG["local_path"], exist_ok=True)
# Create a README with instructions
readme_path = os.path.join(MODEL_CONFIG["local_path"], "README.md")
if not os.path.exists(readme_path):
with open(readme_path, "w") as f:
f.write("# TripAdvisor Sentiment Model\n\n")
f.write("Place the following files in this directory:\n")
for file in MODEL_CONFIG["required_files"]:
f.write(f"- {file}\n")
def verify_model_files():
"""Check if all required model files exist"""
missing_files = []
for file in MODEL_CONFIG["required_files"]:
if not os.path.exists(os.path.join(MODEL_CONFIG["local_path"], file)):
# Check alternative names
if file == "pytorch_model.bin" and os.path.exists(os.path.join(MODEL_CONFIG["local_path"], "model.safetensors")):
continue
missing_files.append(file)
return missing_files
@st.cache_resource
def load_models():
"""Load sentiment analysis model with comprehensive error handling"""
setup_model_directory()
missing_files = verify_model_files()
if missing_files:
st.warning(f"Missing model files: {', '.join(missing_files)}")
show_model_instructions(missing_files)
return None
try:
st.success("Loading local sentiment model...")
# Load components separately for better error handling
model = AutoModelForSequenceClassification.from_pretrained(MODEL_CONFIG["local_path"])
tokenizer = AutoTokenizer.from_pretrained(MODEL_CONFIG["local_path"])
return pipeline(
"text-classification",
model=model,
tokenizer=tokenizer
)
except Exception as e:
st.error(f"Model loading failed: {str(e)}")
show_model_instructions()
return None
def show_model_instructions(missing_files=None):
"""Display detailed model setup instructions"""
with st.expander("Model Setup Instructions", expanded=True):
st.markdown("""
### Option 1: Set Up Local Model
1. **Create the model folder**:
```bash
mkdir -p models/tripadvisor_sentiment_model
```
2. **Add these required files** to the folder:
""")
col1, col2 = st.columns(2)
with col1:
st.markdown("**Essential Files**")
for file in ["config.json", "pytorch_model.bin", "model.safetensors"]:
st.write(f"- {file}")
with col2:
st.markdown("**Tokenizer Files**")
for file in ["tokenizer.json", "vocab.txt", "special_tokens_map.json", "tokenizer_config.json"]:
st.write(f"- {file}")
if missing_files:
st.warning(f"Currently missing: {', '.join(missing_files)}")
st.markdown("""
### Option 2: Use Test Model
For testing, you can use this sample model:
```python
MODEL_CONFIG["hf_model_name"] = "distilbert-base-uncased-finetuned-sst-2-english"
```
""")
# [Rest of your existing code - analyze_batch, generate_wordcloud, load_file, and UI components]
# Initialize the app
st.title("TripAdvisor Review Analysis System")
# Check model availability before file upload
model = load_models()
if model is None:
st.error("Please set up the sentiment model first")
st.stop() # Prevent file upload until model is ready
uploaded_file = st.file_uploader("Upload review data (CSV or Parquet)", type=["csv", "parquet"])
if uploaded_file:
df = load_file(uploaded_file)
if df is not None:
# [Rest of your file processing and analysis code]