sm4rtdev commited on
Commit
4cd7e71
·
verified ·
1 Parent(s): b4272e4

Update MLBaseModelDriver.py

Browse files
Files changed (1) hide show
  1. MLBaseModelDriver.py +159 -159
MLBaseModelDriver.py CHANGED
@@ -1,159 +1,159 @@
1
- import torch
2
- import sys
3
- import pandas as pd
4
- from typing import TypedDict, Optional, Tuple
5
- import datetime
6
- import math
7
- import importlib.util
8
- from huggingface_hub import hf_hub_download
9
- import pickle
10
-
11
-
12
- """
13
- Data container class representing the data shape of the synapse coming into `run_inference`
14
- """
15
-
16
-
17
- class ProcessedSynapse(TypedDict):
18
- id: Optional[str]
19
- nextplace_id: Optional[str]
20
- property_id: Optional[str]
21
- listing_id: Optional[str]
22
- address: Optional[str]
23
- city: Optional[str]
24
- state: Optional[str]
25
- zip_code: Optional[str]
26
- price: Optional[float]
27
- beds: Optional[int]
28
- baths: Optional[float]
29
- sqft: Optional[int]
30
- lot_size: Optional[int]
31
- year_built: Optional[int]
32
- days_on_market: Optional[int]
33
- latitude: Optional[float]
34
- longitude: Optional[float]
35
- property_type: Optional[str]
36
- last_sale_date: Optional[str]
37
- hoa_dues: Optional[float]
38
- query_date: Optional[str]
39
-
40
-
41
- """
42
- This class must do two things
43
- 1) The constructor must load the model
44
- 2) This class must implement a method called `run_inference` that takes the input data and returns a tuple
45
- of float, str representing the predicted sale price and the predicted sale date.
46
- """
47
-
48
-
49
- class MLBaseModelDriver:
50
-
51
- def __init__(self):
52
- self.model, self.label_encoder, self.scaler = self.load_model()
53
-
54
- def load_model(self) -> Tuple[any, any, any]:
55
- """
56
- load the model and model parameters
57
- :return: model, label encoder, and scaler
58
- """
59
- print(f"Loading model...")
60
- model_file, scaler_file, label_encoders_file, model_class_file = self._download_model_files()
61
- model_class = self._import_model_class(model_class_file)
62
-
63
- model = model_class(input_dim=4)
64
- state_dict = torch.load(model_file, weights_only=False)
65
- model.load_state_dict(state_dict)
66
- model.eval()
67
-
68
- # Load additional artifacts
69
- with open(scaler_file, 'rb') as f:
70
- scaler = pickle.load(f)
71
-
72
- with open(label_encoders_file, 'rb') as f:
73
- label_encoders = pickle.load(f)
74
-
75
- print(f"Model Loaded.")
76
- return model, label_encoders, scaler
77
-
78
- def _download_model_files(self) -> Tuple[str, str, str, str]:
79
- """
80
- download files from hugging face
81
- :return: downloaded files
82
- """
83
- model_path = "Nickel5HF/NextPlace"
84
-
85
- # Download the model files from the Hugging Face Hub
86
- model_file = hf_hub_download(repo_id=model_path, filename="model_files/real_estate_model.pth")
87
- scaler_file = hf_hub_download(repo_id=model_path, filename="model_files/scaler.pkl")
88
- label_encoders_file = hf_hub_download(repo_id=model_path, filename="model_files/label_encoder.pkl")
89
- model_class_file = hf_hub_download(repo_id=model_path, filename="MLBaseModel.py")
90
-
91
- # Load the model and artifacts
92
- return model_file, scaler_file, label_encoders_file, model_class_file
93
-
94
- def _import_model_class(self, model_class_file):
95
- """
96
- import the model class and instantiate it
97
- :param model_class_file: file path to the model class
98
- :return: None
99
- """
100
- # Reference docs here: https://docs.python.org/3/library/importlib.html#importlib.util.spec_from_loader
101
- module_name = "MLBaseModel"
102
- spec = importlib.util.spec_from_file_location(module_name, model_class_file)
103
- model_module = importlib.util.module_from_spec(spec)
104
- sys.modules[module_name] = model_module
105
- spec.loader.exec_module(model_module)
106
-
107
- if hasattr(model_module, "MLBaseModel"):
108
- return model_module.MLBaseModel
109
- else:
110
- raise AttributeError(f"The module does not contain a class named 'MLBaseModel'")
111
-
112
- def run_inference(self, input_data: ProcessedSynapse) -> Tuple[float, str]:
113
- """
114
- run inference using the MLBaseModel
115
- :param input_data: synapse from the validator
116
- :return: the predicted sale price and date
117
- """
118
- input_tensor = self._preprocess_input(input_data)
119
-
120
- with torch.no_grad():
121
- prediction = self.model(input_tensor)
122
- predicted_sale_price, predicted_days_on_market = prediction[0].numpy()
123
- predicted_days_on_market = math.floor(predicted_days_on_market)
124
- predicted_sale_date = self._sale_date_predictor(input_data['days_on_market'], predicted_days_on_market)
125
-
126
- return float(predicted_sale_price), predicted_sale_date.strftime("%Y-%m-%d")
127
-
128
- def _sale_date_predictor(self, days_on_market: int, predicted_days_on_market: int) -> datetime.date:
129
- """
130
- convert predicted days on market to a sale date
131
- :param days_on_market: number of days this home has been on the market
132
- :param predicted_days_on_market: the predicted number of days for this home on the market
133
- :return: the predicted sale date
134
- """
135
- if days_on_market < predicted_days_on_market:
136
- days_until_sale = predicted_days_on_market - days_on_market
137
- sale_date = datetime.date.today() + datetime.timedelta(days=days_until_sale)
138
- return sale_date
139
- else:
140
- return datetime.date.today() + datetime.timedelta(days=1)
141
-
142
- def _preprocess_input(self, data: ProcessedSynapse) -> torch.tensor:
143
- """
144
- preprocess the input for inference
145
- :param data: synapse from the validator
146
- :return: tensor representing the synapse
147
- """
148
- df = pd.DataFrame([data])
149
- default_beds = 3
150
- default_sqft = 1500.0
151
- default_property_type = '6'
152
- df['beds'] = df['beds'].fillna(default_beds)
153
- df['sqft'] = pd.to_numeric(df['sqft'], errors='coerce').fillna(default_sqft)
154
- df['property_type'] = df['property_type'].fillna(default_property_type)
155
- df['property_type'] = df['property_type'].astype(int)
156
- df[['sqft', 'price']] = self.scaler.transform(df[['sqft', 'price']])
157
- X = df[['beds', 'sqft', 'property_type', 'price']]
158
- input_tensor = torch.tensor(X.values, dtype=torch.float32)
159
- return input_tensor
 
1
+ import torch
2
+ import sys
3
+ import pandas as pd
4
+ from typing import TypedDict, Optional, Tuple
5
+ import datetime
6
+ import math
7
+ import importlib.util
8
+ from huggingface_hub import hf_hub_download
9
+ import pickle
10
+
11
+
12
+ """
13
+ Data container class representing the data shape of the synapse coming into `run_inference`
14
+ """
15
+
16
+
17
+ class ProcessedSynapse(TypedDict):
18
+ id: Optional[str]
19
+ nextplace_id: Optional[str]
20
+ property_id: Optional[str]
21
+ listing_id: Optional[str]
22
+ address: Optional[str]
23
+ city: Optional[str]
24
+ state: Optional[str]
25
+ zip_code: Optional[str]
26
+ price: Optional[float]
27
+ beds: Optional[int]
28
+ baths: Optional[float]
29
+ sqft: Optional[int]
30
+ lot_size: Optional[int]
31
+ year_built: Optional[int]
32
+ days_on_market: Optional[int]
33
+ latitude: Optional[float]
34
+ longitude: Optional[float]
35
+ property_type: Optional[str]
36
+ last_sale_date: Optional[str]
37
+ hoa_dues: Optional[float]
38
+ query_date: Optional[str]
39
+
40
+
41
+ """
42
+ This class must do two things
43
+ 1) The constructor must load the model
44
+ 2) This class must implement a method called `run_inference` that takes the input data and returns a tuple
45
+ of float, str representing the predicted sale price and the predicted sale date.
46
+ """
47
+
48
+
49
+ class MLBaseModelDriver:
50
+
51
+ def __init__(self):
52
+ self.model, self.label_encoder, self.scaler = self.load_model()
53
+
54
+ def load_model(self) -> Tuple[any, any, any]:
55
+ """
56
+ load the model and model parameters
57
+ :return: model, label encoder, and scaler
58
+ """
59
+ print(f"Loading model...")
60
+ model_file, scaler_file, label_encoders_file, model_class_file = self._download_model_files()
61
+ model_class = self._import_model_class(model_class_file)
62
+
63
+ model = model_class(input_dim=4)
64
+ state_dict = torch.load(model_file, weights_only=False)
65
+ model.load_state_dict(state_dict)
66
+ model.eval()
67
+
68
+ # Load additional artifacts
69
+ with open(scaler_file, 'rb') as f:
70
+ scaler = pickle.load(f)
71
+
72
+ with open(label_encoders_file, 'rb') as f:
73
+ label_encoders = pickle.load(f)
74
+
75
+ print(f"Model Loaded.")
76
+ return model, label_encoders, scaler
77
+
78
+ def _download_model_files(self) -> Tuple[str, str, str, str]:
79
+ """
80
+ download files from hugging face
81
+ :return: downloaded files
82
+ """
83
+ model_path = "Nickel5HF/NextPlace"
84
+
85
+ # Download the model files from the Hugging Face Hub
86
+ model_file = hf_hub_download(repo_id=model_path, filename="model_files/real_estate_model.pth")
87
+ scaler_file = hf_hub_download(repo_id=model_path, filename="model_files/scaler.pkl")
88
+ label_encoders_file = hf_hub_download(repo_id=model_path, filename="model_files/label_encoder.pkl")
89
+ model_class_file = hf_hub_download(repo_id=model_path, filename="MLBaseModel.py")
90
+
91
+ # Load the model and artifacts
92
+ return model_file, scaler_file, label_encoders_file, model_class_file
93
+
94
+ def _import_model_class(self, model_class_file):
95
+ """
96
+ import the model class and instantiate it
97
+ :param model_class_file: file path to the model class
98
+ :return: None
99
+ """
100
+ # Reference docs here: https://docs.python.org/3/library/importlib.html#importlib.util.spec_from_loader
101
+ module_name = "MLBaseModel"
102
+ spec = importlib.util.spec_from_file_location(module_name, model_class_file)
103
+ model_module = importlib.util.module_from_spec(spec)
104
+ sys.modules[module_name] = model_module
105
+ spec.loader.exec_module(model_module)
106
+
107
+ if hasattr(model_module, "MLBaseModel"):
108
+ return model_module.MLBaseModel
109
+ else:
110
+ raise AttributeError(f"The module does not contain a class named 'MLBaseModel'")
111
+
112
+ def run_inference(self, input_data: ProcessedSynapse) -> Tuple[float, str]:
113
+ """
114
+ run inference using the MLBaseModel
115
+ :param input_data: synapse from the validator
116
+ :return: the predicted sale price and date
117
+ """
118
+ input_tensor = self._preprocess_input(input_data)
119
+
120
+ with torch.no_grad():
121
+ prediction = self.model(input_tensor)
122
+ predicted_sale_price, predicted_days_on_market = prediction[0].numpy()
123
+ predicted_days_on_market = math.floor(predicted_days_on_market)
124
+ predicted_sale_date = self._sale_date_predictor(input_data['days_on_market'], predicted_days_on_market)
125
+
126
+ return float(predicted_sale_price), predicted_sale_date.strftime("%Y-%m-%d")
127
+
128
+ def _sale_date_predictor(self, days_on_market: int, predicted_days_on_market: int) -> datetime.date:
129
+ """
130
+ convert predicted days on market to a sale date
131
+ :param days_on_market: number of days this home has been on the market
132
+ :param predicted_days_on_market: the predicted number of days for this home on the market
133
+ :return: the predicted sale date
134
+ """
135
+ if days_on_market < predicted_days_on_market:
136
+ days_until_sale = predicted_days_on_market - days_on_market
137
+ sale_date = datetime.date.today() + datetime.timedelta(days=days_until_sale)
138
+ return sale_date
139
+ else:
140
+ return datetime.date.today() + datetime.timedelta(days=1)
141
+
142
+ def _preprocess_input(self, data: ProcessedSynapse) -> torch.tensor:
143
+ """
144
+ preprocess the input for inference
145
+ :param data: synapse from the validator
146
+ :return: tensor representing the synapse
147
+ """
148
+ df = pd.DataFrame([data])
149
+ default_beds = 3
150
+ default_sqft = 1500.0
151
+ default_property_type = '6'
152
+ df['beds'] = df['beds'].fillna(default_beds).infer_objects(copy=False)
153
+ df['sqft'] = pd.to_numeric(df['sqft'], errors='coerce').fillna(default_sqft)
154
+ df['property_type'] = df['property_type'].fillna(default_property_type)
155
+ df['property_type'] = df['property_type'].astype(int)
156
+ df[['sqft', 'price']] = self.scaler.transform(df[['sqft', 'price']])
157
+ X = df[['beds', 'sqft', 'property_type', 'price']]
158
+ input_tensor = torch.tensor(X.values, dtype=torch.float32)
159
+ return input_tensor