stata_modo / app.py
arielgom's picture
Upload 2 files
f9720a4 verified
import pandas as pd
import statsmodels.api as sm
import streamlit as st
from datetime import datetime
import os
# ---- App title ----
st.title("Mini Stata - Simplified Version")
# ---- Student login ----
st.subheader("Student Login")
student_name = st.text_input("Enter your name:")
student_id = st.text_input("Enter your student ID:")
if student_name and student_id:
logfile = f"log_{student_id}.csv"
# Ensure log file exists
if not os.path.exists(logfile):
pd.DataFrame(columns=["timestamp", "student_name", "student_id", "command", "result"]).to_csv(logfile, index=False)
st.success(f"Logged in as {student_name} (ID: {student_id})")
# ---- File upload ----
uploaded_file = st.file_uploader("Upload CSV file", type=["csv"])
if uploaded_file is not None:
df = pd.read_csv(uploaded_file)
st.success("File uploaded successfully!")
else:
st.info("No file uploaded. Using default sample dataset.")
df = pd.DataFrame({
"mpg": [21, 22, 18, 30],
"weight": [2500, 2800, 3200, 2100],
"cyl": [4, 4, 6, 4],
"horsepower": [90, 95, 110, 80]
})
# ---- summarize ----
def summarize(var=None):
try:
if var is None:
return df.describe().T
elif var in df.columns:
return df[var].describe().to_frame().T
else:
return f"Variable '{var}' not found."
except Exception as e:
return f"Error in summarize: {e}"
# ---- browse ----
def browse(n=None):
try:
if n is None:
return df
return df.head(n)
except Exception as e:
return f"Error in browse: {e}"
# ---- tab ----
def tab(var):
try:
if var not in df.columns:
return f"Variable '{var}' not found."
return df[var].value_counts().to_frame("Frequency")
except Exception as e:
return f"Error in tab: {e}"
# ---- reg (simplified like Stata) ----
def reg(dep_var, indep_vars):
try:
if dep_var not in df.columns:
return f"Dependent variable '{dep_var}' not found."
for v in indep_vars:
if v not in df.columns:
return f"Independent variable '{v}' not found."
X = sm.add_constant(df[indep_vars])
y = df[dep_var]
model = sm.OLS(y, X).fit()
# Create clean results table
results_table = pd.DataFrame({
'Variable': model.params.index,
'Coef.': model.params.values.round(4),
'Std. Err.': model.bse.values.round(4),
't': model.tvalues.values.round(3),
'P>|t|': model.pvalues.values.round(3)
})
# Display concise summary
summary_stats = f"Number of obs = {int(model.nobs)} R-squared = {model.rsquared:.3f}"
return results_table, summary_stats
except Exception as e:
return f"Error in regression: {e}"
# ---- Command parser ----
def run_command(cmd):
parts = cmd.strip().split()
if not parts:
return "No command entered."
command = parts[0].lower()
args = parts[1:]
if command == "summarize":
return summarize(args[0]) if args else summarize()
elif command == "browse":
return browse()
elif command == "tab":
if not args:
return "Usage: tab varname"
return tab(args[0])
elif command == "reg":
if len(args) < 2:
return "Usage: reg depvar indepvar1 indepvar2 ..."
return reg(args[0], args[1:])
else:
return f"Unknown command: '{command}'. Available commands: summarize, browse, tab, reg."
# ---- Interface ----
st.markdown("""
### Available commands
- `summarize`
- `summarize mpg`
- `browse`
- `tab cyl`
- `reg mpg weight horsepower`
""")
cmd = st.text_input("Enter command:")
if st.button("Run"):
result = run_command(cmd)
# Log
log_entry = {
"timestamp": datetime.now().isoformat(),
"student_name": student_name,
"student_id": student_id,
"command": cmd,
"result": str(result)[:500]
}
pd.DataFrame([log_entry]).to_csv(logfile, mode="a", header=False, index=False)
# Display result
if isinstance(result, tuple): # regression output
table, stats = result
st.text(stats)
st.table(table)
elif isinstance(result, pd.DataFrame):
st.dataframe(result)
else:
st.text(result)
if st.button("Download My Log"):
with open(logfile, "r") as f:
st.download_button("Click to download", f, file_name=logfile, mime="text/csv")
else:
st.warning("Please enter name and student ID to start.")