vinimoreira commited on
Commit
64785ea
·
verified ·
1 Parent(s): b9a2b1e

the big regression

Browse files
api/app.py ADDED
@@ -0,0 +1,121 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pandas as pd
3
+ import numpy as np
4
+ import torch
5
+ import joblib
6
+ import json
7
+ import os
8
+ from transformers import AutoTokenizer, AutoModel
9
+ from torch import nn
10
+
11
+ class RegressionTransformer(nn.Module):
12
+ def __init__(self, model_name='neuralmind/bert-base-portuguese-cased'):
13
+ super(RegressionTransformer, self).__init__()
14
+ self.bert = AutoModel.from_pretrained(model_name)
15
+ self.regressor = nn.Sequential(
16
+ nn.Dropout(p=0.2),
17
+ nn.Linear(self.bert.config.hidden_size, 128),
18
+ nn.ReLU(),
19
+ nn.Dropout(p=0.2),
20
+ nn.Linear(128, 1)
21
+ )
22
+ def forward(self, input_ids, attention_mask):
23
+ outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
24
+ pooled_output = outputs.pooler_output
25
+ return self.regressor(pooled_output)
26
+
27
+ print("Carregando artefatos do modelo...")
28
+ device = torch.device("cpu")
29
+
30
+ MODEL_PATH = './modelo/best_model_state.pth'
31
+ model = RegressionTransformer()
32
+ model.load_state_dict(torch.load(MODEL_PATH, map_location=device))
33
+ model.to(device)
34
+ model.eval()
35
+ print("Modelo carregado com sucesso.")
36
+
37
+ TOKENIZER_NAME = 'neuralmind/bert-base-portuguese-cased'
38
+ tokenizer = AutoTokenizer.from_pretrained(TOKENIZER_NAME)
39
+ print("Tokenizador carregado com sucesso.")
40
+
41
+ FEATURES_PATH = './modelo/model_features.json'
42
+ with open(FEATURES_PATH, 'r') as f:
43
+ model_features = json.load()['text_features']
44
+ print("Features do modelo carregadas.")
45
+
46
+ def predict_price(make, model_name, year, odometer, trim, body, transmission, color, interior):
47
+ print(f"Recebida nova predição: {make}, {model_name}, {year}, {odometer}km")
48
+
49
+ input_data_dict = {
50
+ 'make': make, 'model': model_name, 'year': year, 'odometer': odometer,
51
+ 'trim': trim, 'body': body, 'transmission': transmission,
52
+ 'color': color, 'interior': interior
53
+ }
54
+ input_df = pd.DataFrame([input_data_dict])
55
+
56
+ ano_referencia = 2024
57
+ input_df['age'] = ano_referencia - input_df['year']
58
+ input_df['sale_month'] = pd.to_datetime('now').month
59
+ input_df['sale_dayofweek'] = pd.to_datetime('now').dayofweek
60
+
61
+ def criar_representacao_textual(row):
62
+ partes = [f"{coluna}[{str(row[coluna])}]" for coluna in model_features if col in row]
63
+ return " | ".join(partes)
64
+
65
+ text_input = input_df.apply(criar_representacao_textual, axis=1).iloc[0]
66
+
67
+ encoded_text = tokenizer.encode_plus(
68
+ text_input, max_length=128, add_special_tokens=True,
69
+ return_token_type_ids=False, padding='max_length',
70
+ truncation=True, return_attention_mask=True, return_tensors='pt',
71
+ )
72
+
73
+ input_ids = encoded_text['input_ids'].to(device)
74
+ attention_mask = encoded_text['attention_mask'].to(device)
75
+
76
+ with torch.no_grad():
77
+ prediction_log = model(input_ids, attention_mask)
78
+
79
+ predicted_price = np.expm1(prediction_log.cpu().numpy()[0][0])
80
+
81
+ return f"R$ {predicted_price:,.2f}".replace(",", "X").replace(".", ",").replace("X", ".")
82
+
83
+ with gr.Blocks(theme=gr.themes.Soft()) as iface:
84
+ gr.Markdown("# 🚗 FipeFinder AI: Previsão de Preços de Carros Usados")
85
+ gr.Markdown("Preencha as características do veículo para receber uma estimativa de preço de mercado baseada em nosso modelo de IA.")
86
+
87
+ with gr.Row():
88
+ with gr.Column():
89
+ make_input = gr.Dropdown(label="Marca", choices=["Ford", "Chevrolet", "Honda", "Toyota", "Nissan", "Hyundai", "Kia"])
90
+ model_input = gr.Textbox(label="Modelo", placeholder="Ex: Ka, Onix, Civic...")
91
+ year_input = gr.Slider(label="Ano do Modelo", minimum=2000, maximum=2015, step=1, value=2012)
92
+ odo_input = gr.Number(label="Quilometragem (km)", value=80000)
93
+ trim_input = gr.Textbox(label="Versão", placeholder="Ex: SE 1.0, LTZ, EXL...")
94
+
95
+ with gr.Column():
96
+ body_input = gr.Dropdown(label="Carroceria", choices=["Sedan", "SUV", "Hatchback", "Pickup", "Minivan", "Coupe"])
97
+ trans_input = gr.Radio(label="Transmissão", choices=["automatic", "manual"], value="automatic")
98
+ color_input = gr.Textbox(label="Cor", placeholder="Ex: preto, branco, prata...")
99
+ interior_input = gr.Textbox(label="Cor Interior", placeholder="Ex: preto, cinza, bege...")
100
+
101
+ predict_btn = gr.Button("Estimar Preço", variant="primary")
102
+
103
+ with gr.Row():
104
+ output_price = gr.Label(label="Preço Estimado")
105
+
106
+ predict_btn.click(
107
+ fn=predict_price,
108
+ inputs=[make_input, model_input, year_input, odo_input, trim_input, body_input, trans_input, color_input, interior_input],
109
+ outputs=output_price
110
+ )
111
+
112
+ gr.Examples(
113
+ examples=[
114
+ ["Ford", "Focus", 2013, 75000, "SE 2.0", "Hatchback", "automatic", "prata", "preto"],
115
+ ["Chevrolet", "Onix", 2014, 90000, "LT 1.0", "Hatchback", "manual", "branco", "cinza"],
116
+ ],
117
+ inputs=[make_input, model_input, year_input, odo_input, trim_input, body_input, trans_input, color_input, interior_input]
118
+ )
119
+
120
+ if __name__ == "__main__":
121
+ iface.launch()
model/best_model_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:48c0c48e29f53c023e16f98357f670bdb947cacf5195a70f3d64de6bf7bed6db
3
+ size 436171458
model/model_features.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"numeric": ["age", "odometer", "sale_month", "sale_dayofweek", "sale_dayofyear"], "categorical": ["make", "model", "trim", "body", "transmission", "color", "interior"]}
model/preprocessor.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:36e18e8602fe9477c0fde1100d402befa84dd74508e61f0241684b25b1f4c5dd
3
+ size 769
requirements.txt ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ gradio
2
+ torch
3
+ torchvision
4
+ torchaudio
5
+ transformers
6
+ sentence-transformers
7
+ scikit-learn==1.3.2
8
+ joblib
9
+ pandas
10
+ numpy
11
+ python-dotenv