Spaces:
Sleeping
Sleeping
| import pandas as pd | |
| import statsmodels.api as sm | |
| import streamlit as st | |
| from datetime import datetime | |
| import os | |
| # ---- App title ---- | |
| st.title("Mini Stata - Simplified Version") | |
| # ---- Student login ---- | |
| st.subheader("Student Login") | |
| student_name = st.text_input("Enter your name:") | |
| student_id = st.text_input("Enter your student ID:") | |
| if student_name and student_id: | |
| logfile = f"log_{student_id}.csv" | |
| # Ensure log file exists | |
| if not os.path.exists(logfile): | |
| pd.DataFrame(columns=["timestamp", "student_name", "student_id", "command", "result"]).to_csv(logfile, index=False) | |
| st.success(f"Logged in as {student_name} (ID: {student_id})") | |
| # ---- File upload ---- | |
| uploaded_file = st.file_uploader("Upload CSV file", type=["csv"]) | |
| if uploaded_file is not None: | |
| df = pd.read_csv(uploaded_file) | |
| st.success("File uploaded successfully!") | |
| else: | |
| st.info("No file uploaded. Using default sample dataset.") | |
| df = pd.DataFrame({ | |
| "mpg": [21, 22, 18, 30], | |
| "weight": [2500, 2800, 3200, 2100], | |
| "cyl": [4, 4, 6, 4], | |
| "horsepower": [90, 95, 110, 80] | |
| }) | |
| # ---- summarize ---- | |
| def summarize(var=None): | |
| try: | |
| if var is None: | |
| return df.describe().T | |
| elif var in df.columns: | |
| return df[var].describe().to_frame().T | |
| else: | |
| return f"Variable '{var}' not found." | |
| except Exception as e: | |
| return f"Error in summarize: {e}" | |
| # ---- browse ---- | |
| def browse(n=None): | |
| try: | |
| if n is None: | |
| return df | |
| return df.head(n) | |
| except Exception as e: | |
| return f"Error in browse: {e}" | |
| # ---- tab ---- | |
| def tab(var): | |
| try: | |
| if var not in df.columns: | |
| return f"Variable '{var}' not found." | |
| return df[var].value_counts().to_frame("Frequency") | |
| except Exception as e: | |
| return f"Error in tab: {e}" | |
| # ---- reg (simplified like Stata) ---- | |
| def reg(dep_var, indep_vars): | |
| try: | |
| if dep_var not in df.columns: | |
| return f"Dependent variable '{dep_var}' not found." | |
| for v in indep_vars: | |
| if v not in df.columns: | |
| return f"Independent variable '{v}' not found." | |
| X = sm.add_constant(df[indep_vars]) | |
| y = df[dep_var] | |
| model = sm.OLS(y, X).fit() | |
| # Create clean results table | |
| results_table = pd.DataFrame({ | |
| 'Variable': model.params.index, | |
| 'Coef.': model.params.values.round(4), | |
| 'Std. Err.': model.bse.values.round(4), | |
| 't': model.tvalues.values.round(3), | |
| 'P>|t|': model.pvalues.values.round(3) | |
| }) | |
| # Display concise summary | |
| summary_stats = f"Number of obs = {int(model.nobs)} R-squared = {model.rsquared:.3f}" | |
| return results_table, summary_stats | |
| except Exception as e: | |
| return f"Error in regression: {e}" | |
| # ---- Command parser ---- | |
| def run_command(cmd): | |
| parts = cmd.strip().split() | |
| if not parts: | |
| return "No command entered." | |
| command = parts[0].lower() | |
| args = parts[1:] | |
| if command == "summarize": | |
| return summarize(args[0]) if args else summarize() | |
| elif command == "browse": | |
| return browse() | |
| elif command == "tab": | |
| if not args: | |
| return "Usage: tab varname" | |
| return tab(args[0]) | |
| elif command == "reg": | |
| if len(args) < 2: | |
| return "Usage: reg depvar indepvar1 indepvar2 ..." | |
| return reg(args[0], args[1:]) | |
| else: | |
| return f"Unknown command: '{command}'. Available commands: summarize, browse, tab, reg." | |
| # ---- Interface ---- | |
| st.markdown(""" | |
| ### Available commands | |
| - `summarize` | |
| - `summarize mpg` | |
| - `browse` | |
| - `tab cyl` | |
| - `reg mpg weight horsepower` | |
| """) | |
| cmd = st.text_input("Enter command:") | |
| if st.button("Run"): | |
| result = run_command(cmd) | |
| # Log | |
| log_entry = { | |
| "timestamp": datetime.now().isoformat(), | |
| "student_name": student_name, | |
| "student_id": student_id, | |
| "command": cmd, | |
| "result": str(result)[:500] | |
| } | |
| pd.DataFrame([log_entry]).to_csv(logfile, mode="a", header=False, index=False) | |
| # Display result | |
| if isinstance(result, tuple): # regression output | |
| table, stats = result | |
| st.text(stats) | |
| st.table(table) | |
| elif isinstance(result, pd.DataFrame): | |
| st.dataframe(result) | |
| else: | |
| st.text(result) | |
| if st.button("Download My Log"): | |
| with open(logfile, "r") as f: | |
| st.download_button("Click to download", f, file_name=logfile, mime="text/csv") | |
| else: | |
| st.warning("Please enter name and student ID to start.") | |