Spaces:
Runtime error
Runtime error
import numpy as np | |
import pandas as pd | |
import tensorflow as tf | |
from numpy import mean | |
from numpy import std | |
from sklearn.metrics import mean_squared_error | |
from sklearn.preprocessing import MinMaxScaler | |
from statsmodels.tsa.statespace.sarimax import SARIMAX | |
from prophet import Prophet | |
import gradio as gr | |
import matplotlib.pyplot as plt | |
import datetime as dt | |
from io import BytesIO | |
from PIL import Image | |
# Load the dataset | |
dataset = pd.read_csv('temp_data.csv') | |
def df_to_sup(dataset): | |
df = dataset.copy() # Create a copy of the input DataFrame | |
window_sizes = [2, 3, 6, 12] | |
for window_size in window_sizes: | |
df[f'SMA({window_size})'] = df['Valor'].rolling(window=window_size).mean() | |
lags = [1, 2, 3, 4, 6, 12] | |
for lag in lags: | |
df[f'lag({lag})'] = df['Valor'].shift(lag) | |
df.dropna(inplace=True) # Drop rows with NaN values | |
df['Data_Completa'] = pd.to_datetime(df['Data_Completa']).map(dt.datetime.toordinal) | |
df_1 = df.iloc[:, -11:] # Select the last 11 columns | |
df_1['Valor'] = df['Valor'] | |
return df_1 | |
# Function to perform predictions and generate the final plot | |
def predict_and_plot(instituicao, conta, train_sizes=[0.65, 0.7, 0.75, 0.8, 0.85]): | |
dataset_filter = dataset[(dataset['Instituição'] == instituicao) & (dataset['Conta'] == conta)] | |
# ------------------- | |
# Univariate | |
data = dataset_filter[['Data_Completa', 'Valor']] | |
data.rename(columns={'Valor': 'y', 'Data_Completa': 'ds'}, inplace=True) | |
# ------------------- | |
# Supervised | |
df_1 = df_to_sup(dataset_filter) | |
X = df_1.drop('Valor', axis = 1) | |
y = df_1.loc[:,['Valor']] | |
# ------------------- | |
results = {} # Dictionary to store results for different train sizes | |
for train_size_proportion in train_sizes: | |
# ------------------- | |
# Univariate | |
train_size = int(train_size_proportion * len(data)) | |
train_data, test_data = data[:train_size], data[train_size:] | |
train_features = train_data['ds'] | |
train_target = train_data['y'] | |
test_features = test_data['ds'] | |
test_target = test_data['y'] | |
# ------------------ | |
# Supervised | |
train_size_S = int(train_size_proportion * len(df_1)) | |
X_train = X[:train_size_S] | |
y_train = y[:train_size_S] | |
X_test = X[train_size_S:] | |
y_test = y[train_size_S:] | |
scaler_x = MinMaxScaler(feature_range = (0,1)) | |
scaler_y = MinMaxScaler(feature_range = (0,1)) | |
input_scaler = scaler_x.fit(X_train) | |
output_scaler = scaler_y.fit(y_train) | |
train_y_norm = output_scaler.transform(y_train) | |
train_x_norm = input_scaler.transform(X_train) | |
test_y_norm = output_scaler.transform(y_test) | |
test_x_norm = input_scaler.transform(X_test) | |
X_test = test_x_norm.reshape(test_x_norm.shape[0], 1, test_x_norm.shape[1]) | |
X_train = train_x_norm.reshape(train_x_norm.shape[0], 1, train_x_norm.shape[1]) | |
y_test = test_y_norm.reshape(test_y_norm.shape[0],1) | |
y_train = train_y_norm.reshape(train_y_norm.shape[0], 1) | |
# ------------------ | |
sarimax_model = SARIMAX(train_target, order=(1, 1, 1), seasonal_order=(1, 1, 1, 12)) | |
sarimax_results = sarimax_model.fit() | |
prophet_model = Prophet(seasonality_mode='multiplicative') | |
prophet_model.fit(train_data.rename(columns={'Valor': 'y'})) | |
lstm_model = tf.keras.models.Sequential() | |
lstm_model.add(tf.keras.layers.LSTM(units = 1000, return_sequences = True, input_shape = [X_train.shape[1], X_train.shape[2]])) | |
lstm_model.add(tf.keras.layers.Dropout(0.05)) | |
lstm_model.add(tf.keras.layers.LSTM(units = 1000)) | |
lstm_model.add(tf.keras.layers.Dropout(0.05)) | |
lstm_model.add(tf.keras.layers.Dense(units = 1)) | |
lstm_model.compile(loss='mse', optimizer='adam') | |
lstm_model.fit(X_train, y_train, epochs = 150, validation_split = 0.2, batch_size = 4, shuffle = False) | |
# ------------------ | |
sarimax_predictions = sarimax_results.predict(start=len(train_target), end=len(train_target) + len(test_target) - 1, dynamic=False) | |
prophet_predictions = prophet_model.predict(test_data.rename(columns={'target_column': 'y'})) | |
y_test = scaler_y.inverse_transform(y_test) | |
y_train = scaler_y.inverse_transform(y_train) | |
def prediction(model): | |
prediction = model.predict(X_test) | |
prediction = scaler_y.inverse_transform(prediction) | |
return prediction | |
lstm_predictions = prediction(lstm_model) | |
# ------------------ | |
sarimax_error = mean_squared_error(test_target, sarimax_predictions) | |
prophet_error = mean_squared_error(test_target, prophet_predictions['yhat']) | |
lstm_error = lstm_model.evaluate(X_test, y_test, batch_size = 4, verbose = 0) | |
# ------------------ | |
errors = {'SARIMAX': sarimax_error, 'Prophet': prophet_error, 'LSTM': lstm_error} | |
best_model = min(errors, key=errors.get) | |
if best_model == 'SARIMAX': | |
best_predictions = sarimax_predictions | |
elif best_model == 'Prophet': | |
best_predictions = prophet_predictions['yhat'] | |
else: | |
best_predictions = lstm_predictions | |
best_predictions.reset_index(drop=True, inplace=True) | |
test_target.reset_index(drop=True, inplace=True) | |
to_plot = pd.concat([best_predictions, test_target], axis=1) | |
plt.figure(figsize=(10, 6)) | |
for column in to_plot.columns: | |
plt.plot(to_plot.index, to_plot[column], label=column) | |
plt.xlabel('Índice') | |
plt.ylabel('Valores') | |
plt.legend() | |
plt.title('Comparação de Valores') | |
plt.grid(True) | |
# Save the plot as an image | |
img_buffer = BytesIO() | |
plt.savefig(img_buffer, format="png") | |
img_buffer.seek(0) | |
plot_image = Image.open(img_buffer) | |
table = pd.DataFrame(best_predictions) | |
results[train_size_proportion] = { | |
'best_model': best_model, | |
'plot_image': plot_image, | |
'table': pd.DataFrame(best_predictions) | |
} | |
#return results | |
return best_model, plot_image, table | |
# Create a Gradio interface | |
iface = gr.Interface( | |
fn=predict_and_plot, | |
inputs=[ | |
gr.inputs.Dropdown(label="Instituição", choices=list(dataset['Instituição'].unique())), | |
gr.inputs.Dropdown(label="Conta", choices=list(dataset['Conta'].unique())) | |
], | |
outputs=[ | |
gr.outputs.Textbox(label="Melhor Modelo"), | |
gr.outputs.Image(type="pil", label="Gráfico"), | |
#gr.outputs.Dataframe(type="numpy", label="Best Predictions") | |
#gr.outputs.Textbox(label="Best Predictions JSON") | |
gr.outputs.Dataframe(label="Previsões", type='pandas') | |
], | |
live=False, | |
title="Timeseries Pipeline", | |
description="Seleciona 'Instituição' e 'Conta' para obter o modelo com as melhores previsões, gráfico, e tabela com as previsões.", | |
#theme=gr.themes.Glass() | |
) | |
# Launch the Gradio interface | |
iface.launch() |