|
import torch |
|
import sys |
|
import pandas as pd |
|
pd.set_option('future.no_silent_downcasting', True) |
|
from typing import TypedDict, Optional, Tuple |
|
import datetime |
|
import math |
|
import importlib.util |
|
from huggingface_hub import hf_hub_download |
|
import pickle |
|
|
|
|
|
""" |
|
Data container class representing the data shape of the synapse coming into `run_inference` |
|
""" |
|
|
|
|
|
class ProcessedSynapse(TypedDict): |
|
id: Optional[str] |
|
nextplace_id: Optional[str] |
|
property_id: Optional[str] |
|
listing_id: Optional[str] |
|
address: Optional[str] |
|
city: Optional[str] |
|
state: Optional[str] |
|
zip_code: Optional[str] |
|
price: Optional[float] |
|
beds: Optional[int] |
|
baths: Optional[float] |
|
sqft: Optional[int] |
|
lot_size: Optional[int] |
|
year_built: Optional[int] |
|
days_on_market: Optional[int] |
|
latitude: Optional[float] |
|
longitude: Optional[float] |
|
property_type: Optional[str] |
|
last_sale_date: Optional[str] |
|
hoa_dues: Optional[float] |
|
query_date: Optional[str] |
|
|
|
|
|
""" |
|
This class must do two things |
|
1) The constructor must load the model |
|
2) This class must implement a method called `run_inference` that takes the input data and returns a tuple |
|
of float, str representing the predicted sale price and the predicted sale date. |
|
""" |
|
|
|
|
|
class MLBaseModelDriver: |
|
|
|
def __init__(self): |
|
self.model, self.label_encoder, self.scaler = self.load_model() |
|
|
|
def load_model(self) -> Tuple[any, any, any]: |
|
""" |
|
load the model and model parameters |
|
:return: model, label encoder, and scaler |
|
""" |
|
print(f"Loading model...") |
|
model_file, scaler_file, label_encoders_file, model_class_file = self._download_model_files() |
|
model_class = self._import_model_class(model_class_file) |
|
|
|
model = model_class(input_dim=4) |
|
state_dict = torch.load(model_file, weights_only=False) |
|
model.load_state_dict(state_dict) |
|
model.eval() |
|
|
|
|
|
with open(scaler_file, 'rb') as f: |
|
scaler = pickle.load(f) |
|
|
|
with open(label_encoders_file, 'rb') as f: |
|
label_encoders = pickle.load(f) |
|
|
|
print(f"Model Loaded.") |
|
return model, label_encoders, scaler |
|
|
|
def _download_model_files(self) -> Tuple[str, str, str, str]: |
|
""" |
|
download files from hugging face |
|
:return: downloaded files |
|
""" |
|
model_path = "Nickel5HF/NextPlace" |
|
|
|
|
|
model_file = hf_hub_download(repo_id=model_path, filename="model_files/real_estate_model.pth") |
|
scaler_file = hf_hub_download(repo_id=model_path, filename="model_files/scaler.pkl") |
|
label_encoders_file = hf_hub_download(repo_id=model_path, filename="model_files/label_encoder.pkl") |
|
model_class_file = hf_hub_download(repo_id=model_path, filename="MLBaseModel.py") |
|
|
|
|
|
return model_file, scaler_file, label_encoders_file, model_class_file |
|
|
|
def _import_model_class(self, model_class_file): |
|
""" |
|
import the model class and instantiate it |
|
:param model_class_file: file path to the model class |
|
:return: None |
|
""" |
|
|
|
module_name = "MLBaseModel" |
|
spec = importlib.util.spec_from_file_location(module_name, model_class_file) |
|
model_module = importlib.util.module_from_spec(spec) |
|
sys.modules[module_name] = model_module |
|
spec.loader.exec_module(model_module) |
|
|
|
if hasattr(model_module, "MLBaseModel"): |
|
return model_module.MLBaseModel |
|
else: |
|
raise AttributeError(f"The module does not contain a class named 'MLBaseModel'") |
|
|
|
def run_inference(self, input_data: ProcessedSynapse) -> Tuple[float, str]: |
|
""" |
|
run inference using the MLBaseModel |
|
:param input_data: synapse from the validator |
|
:return: the predicted sale price and date |
|
""" |
|
input_tensor = self._preprocess_input(input_data) |
|
|
|
with torch.no_grad(): |
|
prediction = self.model(input_tensor) |
|
predicted_sale_price, predicted_days_on_market = prediction[0].numpy() |
|
predicted_days_on_market = math.floor(predicted_days_on_market) |
|
predicted_sale_date = self._sale_date_predictor(input_data['days_on_market'], predicted_days_on_market) |
|
|
|
return float(predicted_sale_price), predicted_sale_date.strftime("%Y-%m-%d") |
|
|
|
def _sale_date_predictor(self, days_on_market: Optional[int], predicted_days_on_market: int) -> datetime.date: |
|
""" |
|
convert predicted days on market to a sale date |
|
:param days_on_market: number of days this home has been on the market |
|
:param predicted_days_on_market: the predicted number of days for this home on the market |
|
:return: the predicted sale date |
|
""" |
|
if days_on_market is None: |
|
days_on_market = 0 |
|
|
|
if days_on_market < predicted_days_on_market: |
|
days_until_sale = predicted_days_on_market - days_on_market |
|
sale_date = datetime.date.today() + datetime.timedelta(days=days_until_sale) |
|
return sale_date |
|
else: |
|
return datetime.date.today() + datetime.timedelta(days=1) |
|
|
|
def _preprocess_input(self, data: ProcessedSynapse) -> torch.tensor: |
|
""" |
|
preprocess the input for inference |
|
:param data: synapse from the validator |
|
:return: tensor representing the synapse |
|
""" |
|
df = pd.DataFrame([data]) |
|
|
|
|
|
default_beds = 3 |
|
default_sqft = 1500.0 |
|
default_property_type = 6 |
|
default_price = 300000.0 |
|
|
|
|
|
df['beds'] = pd.to_numeric(df['beds'], errors='coerce').fillna(default_beds).astype(int) |
|
df['sqft'] = pd.to_numeric(df['sqft'], errors='coerce').fillna(default_sqft) |
|
df['price'] = pd.to_numeric(df['price'], errors='coerce').fillna(default_price) |
|
df['property_type'] = pd.to_numeric(df['property_type'], errors='coerce').fillna(default_property_type).astype(int) |
|
|
|
|
|
df[['sqft', 'price']] = self.scaler.transform(df[['sqft', 'price']]) |
|
|
|
|
|
X = df[['beds', 'sqft', 'property_type', 'price']].astype('float32') |
|
input_tensor = torch.tensor(X.values, dtype=torch.float32) |
|
return input_tensor |
|
|