Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import pandas as pd | |
| import numpy as np | |
| import matplotlib.pyplot as plt | |
| import seaborn as sns | |
| import matplotlib.dates as mdates | |
| import plotly.express as px | |
| import plotly.graph_objects as go | |
| import re | |
| from datetime import datetime, timedelta | |
| import warnings | |
| import time | |
| import dask.dataframe as dd | |
| def date_from_week(year, week): | |
| # Assuming the fiscal year starts in August and the week starts from August 1st | |
| base_date = pd.to_datetime((year - 1).astype(str) + '-08-01') | |
| dates = base_date + pd.to_timedelta((week - 1) * 7, unit='days') | |
| return dates | |
| def load_data(active_card): | |
| # st.write(f"{active_card}") | |
| # Define columns common to multiple cards if there are any | |
| common_cols = ['FyWeek', 'Itemtype', 'Chaincode', 'State', 'SalesVolume', 'UnitPrice', 'Sales'] | |
| # Columns specific to cards | |
| card_specific_cols = { | |
| 'card1': ['FyWeek', 'State', 'Itemtype', 'Chaincode', 'SalesVolume'], | |
| 'card2': ['FyWeek', 'Fy', 'State','Store','Address','Zipcode','City','Itemtype', 'Chaincode', 'Containercode', 'SalesVolume', 'UnitPrice', 'Sales'], | |
| } | |
| # Choose columns based on the active card | |
| required_columns = card_specific_cols.get(active_card, common_cols) | |
| # Define the data types for efficient memory usage | |
| dtype_spec = { | |
| 'FyWeek': 'string', | |
| 'Fy': 'category', # Add data type for 'Fy' if it's used | |
| 'Itemtype': 'category', | |
| 'Chaincode': 'category', | |
| 'State': 'category', | |
| "Store": "category", | |
| 'Containercode': 'category', | |
| "Address": "string", | |
| "Zipcode": "float", | |
| "City": "category", | |
| 'SalesVolume': 'float', | |
| 'UnitPrice': 'float', | |
| 'Sales': 'float' | |
| } | |
| # Read only the necessary columns | |
| # st.write(required_columns) | |
| ddf = dd.read_csv("fy21-24.csv", usecols=required_columns, dtype=dtype_spec) | |
| df = ddf.compute() | |
| if active_card in ['card2']: | |
| df = df.groupby(['FyWeek', 'Fy', 'Chaincode', 'Store', 'Address', 'Zipcode', 'City', 'State', 'Containercode', 'Itemtype'], observed=True).agg({ | |
| 'SalesVolume': 'sum', | |
| 'UnitPrice': 'mean', | |
| 'Sales': 'sum' | |
| }).reset_index() | |
| df[['FY', 'Week']] = df['FyWeek'].str.split(' Week ', expand=True) | |
| df['Week'] = df['Week'].astype(int) # Convert 'Week' to int | |
| df['Year'] = df['FY'].str[2:].astype(int) # Extract year part and convert to int | |
| df['Dt'] = date_from_week(df['Year'], df['Week']) | |
| # st.write(df.columns) | |
| return df | |
| # Display logo | |
| st.image("bonnie.png", width=150) # Adjust width as needed | |
| # Display title | |
| st.title("Bonnie Plants Pricing & Sales Analytics Dashboard") | |
| # Close the div for logo and title | |
| st.markdown('</div>', unsafe_allow_html=True) | |
| # Initialize session state for storing which card was clicked and item type | |
| if 'active_card' not in st.session_state: | |
| st.session_state['active_card'] = None | |
| if 'selected_item_type' not in st.session_state: | |
| st.session_state['selected_item_type'] = 'CORE' # Set default to 'CORE' | |
| if 'selected_feature' not in st.session_state: | |
| st.session_state['selected_feature'] = 'Chaincode' # Default to 'Chain Code' | |
| # Card selection buttons | |
| col1, col2 = st.columns(2) | |
| # Define buttons for plot categories, update session state when clicked | |
| with col1: | |
| if st.button("Sales Volume Trend for Item Category"): | |
| st.session_state['active_card'] = 'card1' | |
| with col2: | |
| if st.button("Sales Volume & Unit Price Correlation for Item Category and Container Code"): | |
| st.session_state['active_card'] = 'card2' | |
| start_time=time.time() | |
| # st.write(st.session_state['active_card']) | |
| df = load_data(st.session_state['active_card']) | |
| time_taken = time.time() - start_time | |
| st.write(f"Data loaded in {time_taken:.2f} seconds") | |
| # Initialize session state for storing the selected state and feature | |
| if 'selected_state' not in st.session_state: | |
| st.session_state['selected_state'] = df['State'].unique()[0] # Default to the first state | |
| ############################################ CARD #1 #################################################### | |
| if st.session_state['active_card'] == 'card1': | |
| # st.write("Processing card1...") | |
| # Dropdown for selecting the state | |
| selected_state = st.selectbox('Select State', df['State'].unique()) | |
| # Dropdown for selecting the feature for grouping | |
| selected_feature = st.selectbox('Select Feature for Grouping', ['Chaincode', 'Itemtype',]) | |
| # Filter the dataframe based on selected state | |
| filtered_df = df[df['State'] == selected_state] | |
| # Time the grouping operation | |
| start_time = time.time() | |
| group_data = filtered_df.groupby(['FyWeek', selected_feature],observed=True)['SalesVolume'].sum().reset_index() | |
| time_taken = time.time() - start_time | |
| # Plotting | |
| fig = px.bar(group_data, x='FyWeek', y='SalesVolume', color=selected_feature, | |
| title=f'Sales Volume over Fiscal Week in {selected_state} by {selected_feature}', | |
| labels={'SalesVolume': 'Sales Volume', 'Fiscal Week Short': 'Fiscal Week'}) | |
| st.plotly_chart(fig) | |
| ########################################################################################################## | |
| ########################################### CARD #2 #################################################### | |
| if st.session_state['active_card'] == 'card2': | |
| # Dropdown to select item type (using session_state) | |
| st.session_state['selected_item_type'] = st.selectbox( | |
| 'Select Item Type', df['Itemtype'].unique(), | |
| index=list(df['Itemtype'].unique()).index(st.session_state['selected_item_type'])) | |
| # Dropdown to select the grouping category (container code, chain code, or state) | |
| group_by_option = st.selectbox('Group by', ['Containercode', 'Chaincode', 'State']) | |
| # Multi-select checkbox to select multiple years | |
| selected_years = st.multiselect('Select Year(s)', [2021, 2022, 2023, 2024], default=[2021]) | |
| st.subheader(f"Sales Volume & Unit Price Correlation for {group_by_option} in {', '.join(map(str, selected_years))}") | |
| # Convert 'Dt' column to datetime | |
| df['Dt'] = pd.to_datetime(df['Dt'], errors='coerce') | |
| df['Promo'] = np.where(df['Dt'].dt.month.astype(str).isin(['3', '4', '5', '6']), 'Promo', 'NoPromo') | |
| df["Promo"] = df["Promo"].astype("category") | |
| # Filter the dataframe based on the selected item type and selected years | |
| filtered_df = df[(df['Itemtype'] == st.session_state['selected_item_type']) & (df['Dt'].dt.year.isin(selected_years))] | |
| # Find the top 3 values based on total SalesVolume in the selected grouping category | |
| top_3_values = filtered_df.groupby(group_by_option, observed=True)['SalesVolume'].sum().nlargest(3).index | |
| # Filter the data for only the top 3 values | |
| top_group_data = filtered_df[filtered_df[group_by_option].isin(top_3_values)] | |
| # Aggregate data | |
| agg_df = top_group_data.groupby([group_by_option, 'Year', 'Week', 'Dt'], observed=True).agg({ | |
| 'SalesVolume': 'sum', | |
| 'UnitPrice': 'mean' | |
| }).reset_index() | |
| # Create a new column 'week-year' for X-axis labels | |
| agg_df['week-year'] = agg_df['Dt'].dt.strftime('%U-%Y') | |
| # Loop through the top 3 values and create separate plots using Plotly | |
| for value in top_3_values: | |
| value_data = agg_df[agg_df[group_by_option] == value] | |
| # Assuming you have 'value_data' from your previous code | |
| mean_sales_volume = value_data['SalesVolume'].mean() | |
| mean_unit_price = value_data['UnitPrice'].mean() | |
| # Create a Plotly figure | |
| fig = go.Figure() | |
| # Add SalesVolume trace | |
| fig.add_trace(go.Scatter( | |
| x=value_data['week-year'], | |
| y=value_data['SalesVolume'], | |
| mode='lines+markers', | |
| name='SalesVolume', | |
| line=dict(color='blue'), | |
| hovertemplate='SalesVolume: %{y}<br>Week-Year: %{x}' | |
| )) | |
| # Add UnitPrice trace on a secondary Y-axis | |
| fig.add_trace(go.Scatter( | |
| x=value_data['week-year'], | |
| y=value_data['UnitPrice'], | |
| mode='lines+markers', | |
| name='UnitPrice', | |
| line=dict(color='green'), | |
| yaxis='y2', | |
| hovertemplate='UnitPrice: %{y}<br>Week-Year: %{x}' | |
| )) | |
| # Add mean line for SalesVolume | |
| fig.add_shape(type="line", | |
| x0=value_data['week-year'].min(), x1=value_data['week-year'].max(), | |
| y0=mean_sales_volume, y1=mean_sales_volume, | |
| line=dict(color="blue", width=2, dash="dash"), | |
| xref='x', yref='y') | |
| # Add mean line for UnitPrice (on secondary Y-axis) | |
| fig.add_shape(type="line", | |
| x0=value_data['week-year'].min(), x1=value_data['week-year'].max(), | |
| y0=mean_unit_price, y1=mean_unit_price, | |
| line=dict(color="green", width=2, dash="dash"), | |
| xref='x', yref='y2') | |
| # Update layout for dual axes | |
| fig.update_layout( | |
| template='plotly_white', | |
| title=f"SalesVolume and UnitPrice - {value} ({group_by_option})", | |
| xaxis_title='Week-Year', | |
| yaxis_title='Sales Volume', | |
| yaxis2=dict(title='UnitPrice', overlaying='y', side='right'), | |
| legend=dict(x=0.9, y=1.15), | |
| hovermode="x unified", # Show both values in a tooltip | |
| height=500, | |
| autosize=True, | |
| margin=dict(l=50, r=50, t=50, b=50) | |
| ) | |
| # Rotate X-axis labels | |
| fig.update_xaxes(tickangle=90) | |
| # Display the Plotly figure in Streamlit | |
| st.plotly_chart(fig, use_container_width=True) | |
| ########################################################################################################## | |