""" Data Analysis Platform Copyright (c) 2025 JEAN YOUNG All rights reserved. This software is proprietary and confidential. Unauthorized copying, distribution, or use is prohibited. """ import streamlit as st import pandas as pd from data_handler import load_data from analyzer import DataAnalysisWorkflow, AIAssistant def main(): st.set_page_config( page_title="Data Analysis Platform", page_icon="📊", layout="wide" ) st.title("📊 Data Analysis Platform") st.markdown("**Optimized workflow with caching and pagination**") # Initialize session state if 'current_stage' not in st.session_state: st.session_state.current_stage = 1 if 'workflow' not in st.session_state: st.session_state.workflow = None if 'ai_assistant' not in st.session_state: st.session_state.ai_assistant = AIAssistant() # File upload uploaded_file = st.file_uploader("Upload Dataset", type=['csv', 'xlsx']) if uploaded_file is not None: try: # Load data df = load_data(uploaded_file) st.success(f"✅ Dataset loaded! Shape: {df.shape}") # Initialize workflow if st.session_state.workflow is None: st.session_state.workflow = DataAnalysisWorkflow(df) # Progress sidebar st.sidebar.header("Progress") progress = st.sidebar.progress(st.session_state.current_stage / 5) stages = ["Data Overview", "Exploration", "Quality Check", "Analysis", "Summary"] for i, stage in enumerate(stages, 1): if i == st.session_state.current_stage: st.sidebar.write(f"🔄 **{i}. {stage}**") elif i < st.session_state.current_stage: st.sidebar.write(f"✅ {i}. {stage}") else: st.sidebar.write(f"⏳ {i}. {stage}") # Navigation col1, col2 = st.sidebar.columns(2) with col1: if st.button("← Previous") and st.session_state.current_stage > 1: st.session_state.current_stage -= 1 st.rerun() with col2: if st.button("Next →") and st.session_state.current_stage < 5: st.session_state.current_stage += 1 st.rerun() # Recent insights st.sidebar.header("💡 Recent Insights") recent_insights = st.session_state.workflow.insights[-3:] for insight in recent_insights: st.sidebar.info(f"**Stage {insight['stage']}:** {insight['insight']}") # Main content with AI assistant main_col, ai_col = st.columns([3, 1]) with main_col: # Execute current stage if st.session_state.current_stage == 1: st.session_state.workflow.stage_1_overview() elif st.session_state.current_stage == 2: st.session_state.workflow.stage_2_exploration() elif st.session_state.current_stage == 3: st.session_state.workflow.stage_3_cleaning() elif st.session_state.current_stage == 4: st.session_state.workflow.stage_4_analysis() elif st.session_state.current_stage == 5: st.session_state.workflow.stage_5_summary() with ai_col: st.subheader("🤖 AI Assistant") # AI model selection available_models = st.session_state.ai_assistant.get_available_models() if available_models: selected_model = st.selectbox("AI Model:", available_models) if st.button("Get AI Insights"): if st.session_state.workflow.insights: with st.spinner("Analyzing with AI..."): ai_analysis = st.session_state.ai_assistant.analyze_insights( df, st.session_state.workflow.insights, selected_model ) st.write("**AI Analysis:**") st.write(ai_analysis) else: st.warning("Complete some analysis stages first.") else: st.warning("No AI models available.") st.info("Set GOOGLE_API_KEY or OPENAI_API_KEY environment variables.") # Quick insights st.subheader("📊 Quick Stats") if st.session_state.workflow.insights: st.metric("Total Insights", len(st.session_state.workflow.insights)) st.metric("Current Stage", f"{st.session_state.current_stage}/5") # Latest insight if st.session_state.workflow.insights: latest = st.session_state.workflow.insights[-1] st.info(f"**Latest:** {latest['insight']}") # Data quality indicator quality_score = 100 if st.session_state.workflow.stats['missing_values'] > 0: quality_score -= 30 if st.session_state.workflow.stats['duplicates'] > 0: quality_score -= 20 st.metric("Data Quality", f"{quality_score}%") except Exception as e: st.error(f"Error: {str(e)}") st.info("Please check your file format and try again.") if __name__ == "__main__": main()