|
""" |
|
Data Analysis Platform |
|
Copyright (c) 2025 JEAN YOUNG |
|
All rights reserved. |
|
|
|
This software is proprietary and confidential. |
|
Unauthorized copying, distribution, or use is prohibited. |
|
""" |
|
import streamlit as st |
|
import pandas as pd |
|
from data_handler import load_data |
|
from analyzer import DataAnalysisWorkflow, AIAssistant |
|
|
|
def main(): |
|
st.set_page_config( |
|
page_title="Data Analysis Platform", |
|
page_icon="π", |
|
layout="wide" |
|
) |
|
|
|
st.title("π Data Analysis Platform") |
|
st.markdown("**Optimized workflow with caching and pagination**") |
|
|
|
|
|
if 'current_stage' not in st.session_state: |
|
st.session_state.current_stage = 1 |
|
if 'workflow' not in st.session_state: |
|
st.session_state.workflow = None |
|
if 'ai_assistant' not in st.session_state: |
|
st.session_state.ai_assistant = AIAssistant() |
|
|
|
|
|
uploaded_file = st.file_uploader("Upload Dataset", type=['csv', 'xlsx']) |
|
|
|
if uploaded_file is not None: |
|
try: |
|
|
|
df = load_data(uploaded_file) |
|
st.success(f"β
Dataset loaded! Shape: {df.shape}") |
|
|
|
|
|
if st.session_state.workflow is None: |
|
st.session_state.workflow = DataAnalysisWorkflow(df) |
|
|
|
|
|
st.sidebar.header("Progress") |
|
progress = st.sidebar.progress(st.session_state.current_stage / 5) |
|
|
|
stages = ["Data Overview", "Exploration", "Quality Check", "Analysis", "Summary"] |
|
for i, stage in enumerate(stages, 1): |
|
if i == st.session_state.current_stage: |
|
st.sidebar.write(f"π **{i}. {stage}**") |
|
elif i < st.session_state.current_stage: |
|
st.sidebar.write(f"β
{i}. {stage}") |
|
else: |
|
st.sidebar.write(f"β³ {i}. {stage}") |
|
|
|
|
|
col1, col2 = st.sidebar.columns(2) |
|
with col1: |
|
if st.button("β Previous") and st.session_state.current_stage > 1: |
|
st.session_state.current_stage -= 1 |
|
st.rerun() |
|
with col2: |
|
if st.button("Next β") and st.session_state.current_stage < 5: |
|
st.session_state.current_stage += 1 |
|
st.rerun() |
|
|
|
|
|
st.sidebar.header("π‘ Recent Insights") |
|
recent_insights = st.session_state.workflow.insights[-3:] |
|
for insight in recent_insights: |
|
st.sidebar.info(f"**Stage {insight['stage']}:** {insight['insight']}") |
|
|
|
|
|
main_col, ai_col = st.columns([3, 1]) |
|
|
|
with main_col: |
|
|
|
if st.session_state.current_stage == 1: |
|
st.session_state.workflow.stage_1_overview() |
|
elif st.session_state.current_stage == 2: |
|
st.session_state.workflow.stage_2_exploration() |
|
elif st.session_state.current_stage == 3: |
|
st.session_state.workflow.stage_3_cleaning() |
|
elif st.session_state.current_stage == 4: |
|
st.session_state.workflow.stage_4_analysis() |
|
elif st.session_state.current_stage == 5: |
|
st.session_state.workflow.stage_5_summary() |
|
|
|
with ai_col: |
|
st.subheader("π€ AI Assistant") |
|
|
|
|
|
available_models = st.session_state.ai_assistant.get_available_models() |
|
|
|
if available_models: |
|
selected_model = st.selectbox("AI Model:", available_models) |
|
|
|
if st.button("Get AI Insights"): |
|
if st.session_state.workflow.insights: |
|
with st.spinner("Analyzing with AI..."): |
|
ai_analysis = st.session_state.ai_assistant.analyze_insights( |
|
df, st.session_state.workflow.insights, selected_model |
|
) |
|
st.write("**AI Analysis:**") |
|
st.write(ai_analysis) |
|
else: |
|
st.warning("Complete some analysis stages first.") |
|
else: |
|
st.warning("No AI models available.") |
|
st.info("Set GOOGLE_API_KEY or OPENAI_API_KEY environment variables.") |
|
|
|
|
|
st.subheader("π Quick Stats") |
|
if st.session_state.workflow.insights: |
|
st.metric("Total Insights", len(st.session_state.workflow.insights)) |
|
st.metric("Current Stage", f"{st.session_state.current_stage}/5") |
|
|
|
|
|
if st.session_state.workflow.insights: |
|
latest = st.session_state.workflow.insights[-1] |
|
st.info(f"**Latest:** {latest['insight']}") |
|
|
|
|
|
quality_score = 100 |
|
if st.session_state.workflow.stats['missing_values'] > 0: |
|
quality_score -= 30 |
|
if st.session_state.workflow.stats['duplicates'] > 0: |
|
quality_score -= 20 |
|
|
|
st.metric("Data Quality", f"{quality_score}%") |
|
|
|
except Exception as e: |
|
st.error(f"Error: {str(e)}") |
|
st.info("Please check your file format and try again.") |
|
|
|
if __name__ == "__main__": |
|
main() |