engralimalik commited on
Commit
9189033
·
verified ·
1 Parent(s): 161fe1c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +70 -102
app.py CHANGED
@@ -1,109 +1,77 @@
1
  import pandas as pd
2
- import matplotlib.pyplot as plt
3
  import plotly.express as px
4
  import streamlit as st
5
  from transformers import pipeline
6
 
7
- # Upload CSV file containing transaction data
8
- uploaded_file = st.file_uploader("Upload Expense CSV", type="csv")
9
-
10
- if uploaded_file is not None:
11
- # Load the file into a DataFrame
12
  df = pd.read_csv(uploaded_file)
13
 
14
- # Debug: Display the column names to check if 'Description' exists
15
- st.write("Columns in the uploaded file:", df.columns)
16
-
17
- # Check if the 'Description' column exists
18
- if 'Description' not in df.columns:
19
- st.error("Error: The CSV file does not contain a 'Description' column.")
20
- else:
21
- # Initialize Hugging Face's zero-shot text classification model
22
- model_name = 'distilbert-base-uncased'
23
- classifier = pipeline('zero-shot-classification', model=model_name)
24
-
25
- # List of possible expense categories
26
- categories = ["Groceries", "Rent", "Utilities", "Entertainment", "Dining", "Transportation", "Salary"]
27
-
28
- # Function to classify transaction descriptions into categories
29
- def categorize_expense(description):
30
- result = classifier(description, candidate_labels=categories)
31
- return result['labels'][0] # Choose the most probable category
32
-
33
- # Apply the categorization function to the 'Description' column in the dataset
34
- df['Category'] = df['Description'].apply(categorize_expense)
35
-
36
- # Show the categorized data
37
- st.write("Categorized Data:", df.head())
38
-
39
- # Visualization 1: Pie Chart of Spending by Category
40
- category_expenses = df.groupby('Category')['Amount'].sum()
41
-
42
- # Plot pie chart for expense distribution by category
43
- fig1, ax1 = plt.subplots(figsize=(8, 8))
44
- category_expenses.plot(kind='pie', autopct='%1.1f%%', startangle=90, colors=plt.cm.Paired.colors, ax=ax1)
45
- ax1.set_title('Expense Distribution by Category')
46
- ax1.set_ylabel('') # Hide the y-axis label
47
- st.pyplot(fig1)
48
-
49
- # Visualization 2: Monthly Spending Trends (Line Chart)
50
- # Convert 'Date' to datetime and remove time part
51
- df['Date'] = pd.to_datetime(df['Date']).dt.date # Keep only the date, no time
52
-
53
- # Extract month-year for grouping and convert the Period to string to avoid JSON serialization issues
54
- df['Month'] = df['Date'].apply(lambda x: x.strftime('%Y-%m')) # Extract Year-Month as string
55
-
56
- # Group by month and calculate the total amount spent per month
57
- monthly_expenses = df.groupby('Month')['Amount'].sum()
58
-
59
- # Plot monthly spending trends as a line chart
60
- fig2 = px.line(
61
- monthly_expenses,
62
- x=monthly_expenses.index,
63
- y=monthly_expenses.values,
64
- title="Monthly Expenses",
65
- labels={"x": "Month", "y": "Amount ($)"}
66
- )
67
- st.plotly_chart(fig2)
68
-
69
- # Default Budget Values
70
- default_budgets = {
71
- "Groceries": 300,
72
- "Rent": 1000,
73
- "Utilities": 150,
74
- "Entertainment": 100,
75
- "Dining": 150,
76
- "Transportation": 120,
77
- }
78
-
79
- # Sliders for adjusting the monthly budget
80
- st.write("Adjust your monthly budget for each category:")
81
-
82
- budgets = {}
83
- for category in default_budgets:
84
- budgets[category] = st.slider(f"Budget for {category} ($)",
85
- min_value=0,
86
- max_value=2000,
87
- value=default_budgets[category],
88
- step=50)
89
-
90
- # Track if any category exceeds its budget
91
- df['Budget_Exceeded'] = df.apply(lambda row: row['Amount'] > budgets.get(row['Category'], 0), axis=1)
92
-
93
- # Show which categories exceeded their budgets
94
- exceeded_budget = df[df['Budget_Exceeded'] == True]
95
- st.write("Categories that exceeded the budget:", exceeded_budget[['Date', 'Category', 'Amount']])
96
-
97
- # Visualization 3: Monthly Spending vs Budget (Bar Chart)
98
- # Create a figure explicitly for the bar chart
99
- fig3, ax3 = plt.subplots(figsize=(10, 6)) # Create figure and axes
100
- monthly_expenses_df = pd.DataFrame({
101
- 'Actual': monthly_expenses,
102
- 'Budget': [sum(budgets.values())] * len(monthly_expenses) # Same budget for simplicity
103
- })
104
- monthly_expenses_df.plot(kind='bar', ax=ax3) # Pass the axes to the plot
105
- ax3.set_title('Monthly Spending vs Budget')
106
- ax3.set_ylabel('Amount ($)')
107
-
108
- # Display the plot with Streamlit
109
- st.pyplot(fig3)
 
1
  import pandas as pd
 
2
  import plotly.express as px
3
  import streamlit as st
4
  from transformers import pipeline
5
 
6
+ # File upload
7
+ uploaded_file = st.file_uploader("Upload your expense CSV file", type=["csv"])
8
+ if uploaded_file:
 
 
9
  df = pd.read_csv(uploaded_file)
10
 
11
+ # Display Dataframe
12
+ st.write(df.head())
13
+
14
+ # Initialize Hugging Face model for zero-shot classification
15
+ classifier = pipeline('zero-shot-classification', model='distilbert-base-uncased')
16
+ categories = ["Groceries", "Rent", "Utilities", "Entertainment", "Dining", "Transportation"]
17
+
18
+ # Function to categorize
19
+ def categorize_expense(description):
20
+ result = classifier(description, candidate_labels=categories)
21
+ return result['labels'][0] # Most probable category
22
+
23
+ # Apply categorization
24
+ df['Category'] = df['Description'].apply(categorize_expense)
25
+
26
+ # Display categorized data
27
+ st.write("Categorized Data", df)
28
+
29
+ # Sidebar for setting the monthly budget using sliders
30
+ st.sidebar.header("Set Your Monthly Budget")
31
+ groceries_budget = st.sidebar.slider("Groceries Budget", 0, 1000, 300)
32
+ rent_budget = st.sidebar.slider("Rent Budget", 0, 5000, 1000)
33
+ utilities_budget = st.sidebar.slider("Utilities Budget", 0, 500, 150)
34
+ entertainment_budget = st.sidebar.slider("Entertainment Budget", 0, 1000, 100)
35
+ dining_budget = st.sidebar.slider("Dining Budget", 0, 1000, 150)
36
+ transportation_budget = st.sidebar.slider("Transportation Budget", 0, 500, 120)
37
+
38
+ # Store the updated budget values
39
+ budgets = {
40
+ "Groceries": groceries_budget,
41
+ "Rent": rent_budget,
42
+ "Utilities": utilities_budget,
43
+ "Entertainment": entertainment_budget,
44
+ "Dining": dining_budget,
45
+ "Transportation": transportation_budget
46
+ }
47
+
48
+ # Track if any category exceeds its budget
49
+ df['Budget_Exceeded'] = df.apply(lambda row: row['Amount'] > budgets.get(row['Category'], 0), axis=1)
50
+
51
+ # Show categories that exceeded their budget
52
+ exceeded_budget = df[df['Budget_Exceeded'] == True]
53
+ st.write("Categories that exceeded the budget:", exceeded_budget[['Date', 'Category', 'Amount']])
54
+
55
+ # Visualizations
56
+
57
+ # 1. Pie Chart for expense distribution by category
58
+ category_expenses = df.groupby('Category')['Amount'].sum()
59
+ fig1 = px.pie(category_expenses, values=category_expenses.values, names=category_expenses.index, title="Expense Distribution by Category")
60
+ st.plotly_chart(fig1)
61
+
62
+ # 2. Monthly Spending Trends (Line Chart)
63
+ df['Date'] = pd.to_datetime(df['Date'])
64
+ df['Month'] = df['Date'].dt.to_period('M').astype(str) # Convert Period to string for Plotly compatibility
65
+ monthly_expenses = df.groupby('Month')['Amount'].sum()
66
+
67
+ fig2 = px.line(monthly_expenses, x=monthly_expenses.index, y=monthly_expenses.values, title="Monthly Expenses", labels={"x": "Month", "y": "Amount ($)"})
68
+ st.plotly_chart(fig2)
69
+
70
+ # 3. Monthly Spending vs Budget (Bar Chart)
71
+ monthly_expenses_df = pd.DataFrame({
72
+ 'Actual': monthly_expenses,
73
+ 'Budget': [sum(budgets.values())] * len(monthly_expenses) # Same budget for simplicity
74
+ })
75
+
76
+ fig3 = monthly_expenses_df.plot(kind='bar', figsize=(10, 6))
77
+ st.pyplot(fig3)