import pandas as pd import statsmodels.api as sm import streamlit as st from datetime import datetime import os # ---- App title ---- st.title("Mini Stata - Simplified Version") # ---- Student login ---- st.subheader("Student Login") student_name = st.text_input("Enter your name:") student_id = st.text_input("Enter your student ID:") if student_name and student_id: logfile = f"log_{student_id}.csv" # Ensure log file exists if not os.path.exists(logfile): pd.DataFrame(columns=["timestamp", "student_name", "student_id", "command", "result"]).to_csv(logfile, index=False) st.success(f"Logged in as {student_name} (ID: {student_id})") # ---- File upload ---- uploaded_file = st.file_uploader("Upload CSV file", type=["csv"]) if uploaded_file is not None: df = pd.read_csv(uploaded_file) st.success("File uploaded successfully!") else: st.info("No file uploaded. Using default sample dataset.") df = pd.DataFrame({ "mpg": [21, 22, 18, 30], "weight": [2500, 2800, 3200, 2100], "cyl": [4, 4, 6, 4], "horsepower": [90, 95, 110, 80] }) # ---- summarize ---- def summarize(var=None): try: if var is None: return df.describe().T elif var in df.columns: return df[var].describe().to_frame().T else: return f"Variable '{var}' not found." except Exception as e: return f"Error in summarize: {e}" # ---- browse ---- def browse(n=None): try: if n is None: return df return df.head(n) except Exception as e: return f"Error in browse: {e}" # ---- tab ---- def tab(var): try: if var not in df.columns: return f"Variable '{var}' not found." return df[var].value_counts().to_frame("Frequency") except Exception as e: return f"Error in tab: {e}" # ---- reg (simplified like Stata) ---- def reg(dep_var, indep_vars): try: if dep_var not in df.columns: return f"Dependent variable '{dep_var}' not found." for v in indep_vars: if v not in df.columns: return f"Independent variable '{v}' not found." X = sm.add_constant(df[indep_vars]) y = df[dep_var] model = sm.OLS(y, X).fit() # Create clean results table results_table = pd.DataFrame({ 'Variable': model.params.index, 'Coef.': model.params.values.round(4), 'Std. Err.': model.bse.values.round(4), 't': model.tvalues.values.round(3), 'P>|t|': model.pvalues.values.round(3) }) # Display concise summary summary_stats = f"Number of obs = {int(model.nobs)} R-squared = {model.rsquared:.3f}" return results_table, summary_stats except Exception as e: return f"Error in regression: {e}" # ---- Command parser ---- def run_command(cmd): parts = cmd.strip().split() if not parts: return "No command entered." command = parts[0].lower() args = parts[1:] if command == "summarize": return summarize(args[0]) if args else summarize() elif command == "browse": return browse() elif command == "tab": if not args: return "Usage: tab varname" return tab(args[0]) elif command == "reg": if len(args) < 2: return "Usage: reg depvar indepvar1 indepvar2 ..." return reg(args[0], args[1:]) else: return f"Unknown command: '{command}'. Available commands: summarize, browse, tab, reg." # ---- Interface ---- st.markdown(""" ### Available commands - `summarize` - `summarize mpg` - `browse` - `tab cyl` - `reg mpg weight horsepower` """) cmd = st.text_input("Enter command:") if st.button("Run"): result = run_command(cmd) # Log log_entry = { "timestamp": datetime.now().isoformat(), "student_name": student_name, "student_id": student_id, "command": cmd, "result": str(result)[:500] } pd.DataFrame([log_entry]).to_csv(logfile, mode="a", header=False, index=False) # Display result if isinstance(result, tuple): # regression output table, stats = result st.text(stats) st.table(table) elif isinstance(result, pd.DataFrame): st.dataframe(result) else: st.text(result) if st.button("Download My Log"): with open(logfile, "r") as f: st.download_button("Click to download", f, file_name=logfile, mime="text/csv") else: st.warning("Please enter name and student ID to start.")