import pandas as pd import matplotlib.pyplot as plt import seaborn as sns import gradio as gr from darts import TimeSeries from darts.models import TFTModel, NBEATSModel from darts.dataprocessing.transformers import Scaler from sklearn.preprocessing import LabelEncoder import numpy as np import io import os # ---------------------------- # SAFE DATASET LOADER # ---------------------------- def load_dataset(path="dataset.csv", url=None): try: # Try UTF-8 return pd.read_csv(path, encoding="utf-8") except UnicodeDecodeError: try: # Fallback to Latin1 return pd.read_csv(path, encoding="latin1") except Exception as e: if url: # If file missing, try URL try: return pd.read_csv(url, encoding="utf-8") except UnicodeDecodeError: return pd.read_csv(url, encoding="latin1") else: raise e # Path or fallback URL url = "https://raw.githubusercontent.com/yourusername/yourrepo/main/dataset.csv" if os.path.exists("dataset.csv"): df = load_dataset("dataset.csv") else: df = load_dataset(url=url) # ---------------------------- # Preprocessing # ---------------------------- df['datetime'] = pd.to_datetime(df['datetime']) df = df.sort_values("datetime") # Encode weather icons encoder = LabelEncoder() if "icon" in df.columns: df['icon_encoded'] = encoder.fit_transform(df['icon']) # Create timeseries series = TimeSeries.from_dataframe(df, "datetime", "pv_output_kWh") scaler = Scaler() series_scaled = scaler.fit_transform(series) # Pre-trained or fallback model try: model = TFTModel.load_from_checkpoint("tft_pretrained", work_dir="./") except Exception: model = NBEATSModel(input_chunk_length=30, output_chunk_length=7, n_epochs=10) model.fit(series_scaled) # ---------------------------- # EDA FUNCTIONS # ---------------------------- def eda_summary(): buf = io.StringIO() df.describe().to_string(buf) return buf.getvalue() def eda_histogram(column): plt.figure(figsize=(6,4)) sns.histplot(df[column], kde=True, bins=20) plt.title(f"Distribution of {column}") plt.tight_layout() return plt.gcf() def eda_correlation(): plt.figure(figsize=(8,6)) sns.heatmap(df.corr(), annot=True, cmap="coolwarm", fmt=".2f") plt.title("Correlation Heatmap") plt.tight_layout() return plt.gcf() def eda_timeseries(): plt.figure(figsize=(10,4)) plt.plot(df["datetime"], df["pv_output_kWh"], label="PV Output (kWh)") plt.title("Time-Series Trend of PV Output") plt.xlabel("Date") plt.ylabel("PV Output (kWh)") plt.legend() plt.tight_layout() return plt.gcf() # ---------------------------- # FORECAST FUNCTION # ---------------------------- def forecast_pv(horizon, weather_condition): horizon_map = {"24 Hours": 24, "3 Days": 72, "7 Days": 168, "14 Days": 336} steps = horizon_map[horizon] forecast = model.predict(steps) forecast = scaler.inverse_transform(forecast) # Weather impact adjustment adjustment = { "Clear": 1.0, "Partly Cloudy": 0.85, "Cloudy": 0.65, "Fog": 0.55, "Smoke/Dust": 0.6, "Winter": 0.7, "Rain": 0.5 } adj_factor = adjustment.get(weather_condition, 1.0) forecast_adj = forecast * adj_factor # Plot plt.figure(figsize=(10,4)) series[-7*24:].plot(label="History") # last week history forecast.plot(label="Forecast (Base)") forecast_adj.plot(label=f"Forecast (Adjusted: {weather_condition})") plt.legend() plt.title(f"PV Forecast for {horizon}") plt.tight_layout() # Peak info peak_time = forecast_adj.time_index[np.argmax(forecast_adj.values())] peak_val = np.max(forecast_adj.values()) peak_info = f"🔺 Peak PV Output: {round(peak_val,2)} kWh at {peak_time}" return plt.gcf(), peak_info # ---------------------------- # GRADIO DASHBOARD # ---------------------------- eda_tab = gr.TabbedInterface( [ gr.Interface(fn=eda_summary, inputs=[], outputs="text", title="Summary Stats"), gr.Interface(fn=eda_histogram, inputs=gr.Dropdown(df.columns, label="Select Column"), outputs="plot", title="Histogram"), gr.Interface(fn=eda_correlation, inputs=[], outputs="plot", title="Correlation Heatmap"), gr.Interface(fn=eda_timeseries, inputs=[], outputs="plot", title="Time Series Trend") ], tab_names=["Summary", "Histogram", "Correlation", "Time Series"] ) forecast_tab = gr.Interface( fn=forecast_pv, inputs=[ gr.Radio(["24 Hours", "3 Days", "7 Days", "14 Days"], label="Select Forecast Horizon"), gr.Dropdown(["Clear","Partly Cloudy","Cloudy","Fog","Smoke/Dust","Winter","Rain"], label="Weather Condition") ], outputs=[ gr.Plot(label="Forecast Plot"), gr.Textbox(label="Peak Info") ], title="PV Forecasting" ) app = gr.TabbedInterface([eda_tab, forecast_tab], tab_names=["EDA Dashboard", "Forecasting"]) if __name__ == "__main__": app.launch()