Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -6,6 +6,7 @@ from transformers import pipeline
|
|
| 6 |
import tempfile
|
| 7 |
import os
|
| 8 |
|
|
|
|
| 9 |
def load_and_clean_data():
|
| 10 |
url = "https://raw.githubusercontent.com/datasciencedojo/datasets/master/titanic.csv"
|
| 11 |
df = pd.read_csv(url)
|
|
@@ -14,6 +15,7 @@ def load_and_clean_data():
|
|
| 14 |
df.drop(columns=['Cabin', 'Ticket', 'Name'], inplace=True)
|
| 15 |
return df
|
| 16 |
|
|
|
|
| 17 |
def generate_plot_images(df):
|
| 18 |
temp_files = []
|
| 19 |
for plot_func in [plot_survival_count, plot_survival_by_gender]:
|
|
@@ -31,6 +33,7 @@ def plot_survival_count(df, ax):
|
|
| 31 |
def plot_survival_by_gender(df, ax):
|
| 32 |
sns.countplot(x='Sex', hue='Survived', data=df, ax=ax)
|
| 33 |
|
|
|
|
| 34 |
summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6")
|
| 35 |
|
| 36 |
def summarize_eda(df):
|
|
@@ -45,15 +48,24 @@ def summarize_eda(df):
|
|
| 45 |
except Exception as e:
|
| 46 |
return f"Summarization failed: {str(e)}"
|
| 47 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 48 |
def run_titanic_eda():
|
| 49 |
try:
|
| 50 |
df = load_and_clean_data()
|
| 51 |
plots = generate_plot_images(df)
|
| 52 |
summary = summarize_eda(df)
|
| 53 |
-
return df
|
| 54 |
except Exception as e:
|
| 55 |
return "Data load failed", None, None, f"Error: {str(e)}"
|
| 56 |
|
|
|
|
| 57 |
demo = gr.Interface(
|
| 58 |
fn=run_titanic_eda,
|
| 59 |
inputs=[],
|
|
|
|
| 6 |
import tempfile
|
| 7 |
import os
|
| 8 |
|
| 9 |
+
# Load and clean Titanic dataset
|
| 10 |
def load_and_clean_data():
|
| 11 |
url = "https://raw.githubusercontent.com/datasciencedojo/datasets/master/titanic.csv"
|
| 12 |
df = pd.read_csv(url)
|
|
|
|
| 15 |
df.drop(columns=['Cabin', 'Ticket', 'Name'], inplace=True)
|
| 16 |
return df
|
| 17 |
|
| 18 |
+
# Generate EDA plots
|
| 19 |
def generate_plot_images(df):
|
| 20 |
temp_files = []
|
| 21 |
for plot_func in [plot_survival_count, plot_survival_by_gender]:
|
|
|
|
| 33 |
def plot_survival_by_gender(df, ax):
|
| 34 |
sns.countplot(x='Sex', hue='Survived', data=df, ax=ax)
|
| 35 |
|
| 36 |
+
# Summarizer pipeline
|
| 37 |
summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6")
|
| 38 |
|
| 39 |
def summarize_eda(df):
|
|
|
|
| 48 |
except Exception as e:
|
| 49 |
return f"Summarization failed: {str(e)}"
|
| 50 |
|
| 51 |
+
# Markdown fallback logic
|
| 52 |
+
def safe_markdown(df):
|
| 53 |
+
try:
|
| 54 |
+
return df.head().to_markdown()
|
| 55 |
+
except Exception:
|
| 56 |
+
return df.head().to_string()
|
| 57 |
+
|
| 58 |
+
# Main app logic
|
| 59 |
def run_titanic_eda():
|
| 60 |
try:
|
| 61 |
df = load_and_clean_data()
|
| 62 |
plots = generate_plot_images(df)
|
| 63 |
summary = summarize_eda(df)
|
| 64 |
+
return safe_markdown(df), plots[0], plots[1], summary
|
| 65 |
except Exception as e:
|
| 66 |
return "Data load failed", None, None, f"Error: {str(e)}"
|
| 67 |
|
| 68 |
+
# Gradio interface
|
| 69 |
demo = gr.Interface(
|
| 70 |
fn=run_titanic_eda,
|
| 71 |
inputs=[],
|