Spaces:

AndrewLi403
/

G10-TripAdvisor-Final_Project

Sleeping

App Files Files Community

G10-TripAdvisor-Final_Project / app.py

AndrewLi403

Create app.py

51f8993 verified 8 months ago

raw

history blame

4.38 kB

	import streamlit as st
	import pandas as pd
	import matplotlib.pyplot as plt
	from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer
	from wordcloud import WordCloud
	import os
	import shutil

	# Configuration
	MODEL_CONFIG = {
	"local_path": "./models/tripadvisor_sentiment_model",
	"required_files": [
	"config.json",
	"pytorch_model.bin", # or model.safetensors
	"tokenizer.json",
	"vocab.txt",
	"special_tokens_map.json",
	"tokenizer_config.json"
	]
	}

	def setup_model_directory():
	"""Create model directory structure if it doesn't exist"""
	os.makedirs(MODEL_CONFIG["local_path"], exist_ok=True)

	# Create a README with instructions
	readme_path = os.path.join(MODEL_CONFIG["local_path"], "README.md")
	if not os.path.exists(readme_path):
	with open(readme_path, "w") as f:
	f.write("# TripAdvisor Sentiment Model\n\n")
	f.write("Place the following files in this directory:\n")
	for file in MODEL_CONFIG["required_files"]:
	f.write(f"- {file}\n")

	def verify_model_files():
	"""Check if all required model files exist"""
	missing_files = []
	for file in MODEL_CONFIG["required_files"]:
	if not os.path.exists(os.path.join(MODEL_CONFIG["local_path"], file)):
	# Check alternative names
	if file == "pytorch_model.bin" and os.path.exists(os.path.join(MODEL_CONFIG["local_path"], "model.safetensors")):
	continue
	missing_files.append(file)
	return missing_files

	@st.cache_resource
	def load_models():
	"""Load sentiment analysis model with comprehensive error handling"""
	setup_model_directory()
	missing_files = verify_model_files()

	if missing_files:
	st.warning(f"Missing model files: {', '.join(missing_files)}")
	show_model_instructions(missing_files)
	return None

	try:
	st.success("Loading local sentiment model...")

	# Load components separately for better error handling
	model = AutoModelForSequenceClassification.from_pretrained(MODEL_CONFIG["local_path"])
	tokenizer = AutoTokenizer.from_pretrained(MODEL_CONFIG["local_path"])

	return pipeline(
	"text-classification",
	model=model,
	tokenizer=tokenizer
	)
	except Exception as e:
	st.error(f"Model loading failed: {str(e)}")
	show_model_instructions()
	return None

	def show_model_instructions(missing_files=None):
	"""Display detailed model setup instructions"""
	with st.expander("Model Setup Instructions", expanded=True):
	st.markdown("""
	### Option 1: Set Up Local Model

	1. Create the model folder:
	```bash
	mkdir -p models/tripadvisor_sentiment_model
	```

	2. Add these required files to the folder:
	""")

	col1, col2 = st.columns(2)
	with col1:
	st.markdown("Essential Files")
	for file in ["config.json", "pytorch_model.bin", "model.safetensors"]:
	st.write(f"- {file}")

	with col2:
	st.markdown("Tokenizer Files")
	for file in ["tokenizer.json", "vocab.txt", "special_tokens_map.json", "tokenizer_config.json"]:
	st.write(f"- {file}")

	if missing_files:
	st.warning(f"Currently missing: {', '.join(missing_files)}")

	st.markdown("""
	### Option 2: Use Test Model
	For testing, you can use this sample model:
	```python
	MODEL_CONFIG["hf_model_name"] = "distilbert-base-uncased-finetuned-sst-2-english"
	```
	""")

	# [Rest of your existing code - analyze_batch, generate_wordcloud, load_file, and UI components]

	# Initialize the app
	st.title("TripAdvisor Review Analysis System")

	# Check model availability before file upload
	model = load_models()
	if model is None:
	st.error("Please set up the sentiment model first")
	st.stop() # Prevent file upload until model is ready

	uploaded_file = st.file_uploader("Upload review data (CSV or Parquet)", type=["csv", "parquet"])
	if uploaded_file:
	df = load_file(uploaded_file)

	if df is not None:
	# [Rest of your file processing and analysis code]