CSV / app.py
XythicK's picture
Update app.py
433c2fa verified
import gradio as gr
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import io
# --- 1. Function to process the uploaded file and generate stats/dataframe ---
def analyze_csv(file):
"""Reads a CSV file, calculates statistics, and prepares components for plotting."""
if file is None:
# Clear previous outputs if a file is unuploaded or if this is the initial state
return None, "Please upload a CSV file.", gr.Dropdown(choices=[], label="Select Column to Plot"), None
try:
# Read the uploaded file object (it's a temporary file path)
df = pd.read_csv(file.name)
# Basic descriptive statistics
stats = df.describe().round(2).T
# Convert stats to markdown string for display
stats_markdown = stats.to_markdown()
# Get list of numeric columns for the dropdown
numeric_cols = df.select_dtypes(include=['number']).columns.tolist()
# Return the full DataFrame, the statistics table, the updated column dropdown, and the DataFrame state
return df, stats_markdown, gr.Dropdown(choices=numeric_cols, label="Select Column to Plot"), df
except Exception as e:
error_message = f"Error processing file: {e}"
# Return error message and clear other components
return None, error_message, gr.Dropdown(choices=[], label="Select Column to Plot"), None
# --- 2. Function to generate a plot for a selected column ---
def generate_plot(df_state, column_name):
"""Generates a distribution plot (histogram) for the selected column."""
if df_state is None or column_name is None or column_name == "":
return None
try:
# Reset Matplotlib figure for a clean slate
plt.figure(figsize=(8, 5))
# Use a Histogram for distribution of the selected numeric column
sns.histplot(df_state[column_name], kde=True)
plt.title(f'Distribution of {column_name}')
plt.xlabel(column_name)
plt.ylabel('Frequency')
# Save plot to an in-memory buffer
buf = io.BytesIO()
plt.savefig(buf, format='png')
plt.close() # Close figure to free memory
buf.seek(0)
return buf.read()
except Exception as e:
print(f"Plotting Error: {e}")
return None
# --- 3. Gradio Interface Definition ---
with gr.Blocks(title="CSV Data Analyzer") as demo:
gr.Markdown("## ๐Ÿ“ˆ CSV Data Analyzer")
gr.Markdown("Upload your CSV file and see instant statistics and visualizations.")
# State component to hold the DataFrame across function calls
# This is essential for passing the DataFrame from `analyze_csv` to `generate_plot`
df_state = gr.State(None)
# Input components
csv_file = gr.File(label="Upload CSV File (.csv)", file_types=[".csv"])
with gr.Row():
# Outputs
# **FIXED:** Removed the unsupported 'height=200' argument.
df_output = gr.Dataframe(label="Uploaded Data Preview", interactive=False)
stats_output = gr.Markdown(label="Descriptive Statistics")
gr.HTML("<hr>")
# Plotting section
with gr.Row():
# Component that will be populated with column names
column_dropdown = gr.Dropdown(label="Select Column to Plot", interactive=True)
plot_button = gr.Button("Generate Plot")
plot_output = gr.Plot(label="Column Distribution Plot")
# --- 4. Event Handling (Interactions) ---
# Triggered when a file is uploaded (or cleared)
csv_file.upload(
analyze_csv,
inputs=[csv_file],
# Note: The output for the dropdown component is updated with new choices here.
outputs=[df_output, stats_output, column_dropdown, df_state]
)
# Triggered when the plot button is clicked
plot_button.click(
generate_plot,
inputs=[df_state, column_dropdown],
outputs=[plot_output]
)
# Launch the app for the Hugging Face Space environment
if __name__ == "__main__":
demo.launch(server_name="0.0.0.0", server_port=7860)