File size: 6,614 Bytes
4cd7e71 63dd971 4cd7e71 573aeab 4cd7e71 573aeab 4cd7e71 1332239 4cd7e71 1332239 4cd7e71 1332239 4cd7e71 1332239 4cd7e71 bbd11b7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 |
import torch
import sys
import pandas as pd
pd.set_option('future.no_silent_downcasting', True)
from typing import TypedDict, Optional, Tuple
import datetime
import math
import importlib.util
from huggingface_hub import hf_hub_download
import pickle
"""
Data container class representing the data shape of the synapse coming into `run_inference`
"""
class ProcessedSynapse(TypedDict):
id: Optional[str]
nextplace_id: Optional[str]
property_id: Optional[str]
listing_id: Optional[str]
address: Optional[str]
city: Optional[str]
state: Optional[str]
zip_code: Optional[str]
price: Optional[float]
beds: Optional[int]
baths: Optional[float]
sqft: Optional[int]
lot_size: Optional[int]
year_built: Optional[int]
days_on_market: Optional[int]
latitude: Optional[float]
longitude: Optional[float]
property_type: Optional[str]
last_sale_date: Optional[str]
hoa_dues: Optional[float]
query_date: Optional[str]
"""
This class must do two things
1) The constructor must load the model
2) This class must implement a method called `run_inference` that takes the input data and returns a tuple
of float, str representing the predicted sale price and the predicted sale date.
"""
class MLBaseModelDriver:
def __init__(self):
self.model, self.label_encoder, self.scaler = self.load_model()
def load_model(self) -> Tuple[any, any, any]:
"""
load the model and model parameters
:return: model, label encoder, and scaler
"""
print(f"Loading model...")
model_file, scaler_file, label_encoders_file, model_class_file = self._download_model_files()
model_class = self._import_model_class(model_class_file)
model = model_class(input_dim=4)
state_dict = torch.load(model_file, weights_only=False)
model.load_state_dict(state_dict)
model.eval()
# Load additional artifacts
with open(scaler_file, 'rb') as f:
scaler = pickle.load(f)
with open(label_encoders_file, 'rb') as f:
label_encoders = pickle.load(f)
print(f"Model Loaded.")
return model, label_encoders, scaler
def _download_model_files(self) -> Tuple[str, str, str, str]:
"""
download files from hugging face
:return: downloaded files
"""
model_path = "Nickel5HF/NextPlace"
# Download the model files from the Hugging Face Hub
model_file = hf_hub_download(repo_id=model_path, filename="model_files/real_estate_model.pth")
scaler_file = hf_hub_download(repo_id=model_path, filename="model_files/scaler.pkl")
label_encoders_file = hf_hub_download(repo_id=model_path, filename="model_files/label_encoder.pkl")
model_class_file = hf_hub_download(repo_id=model_path, filename="MLBaseModel.py")
# Load the model and artifacts
return model_file, scaler_file, label_encoders_file, model_class_file
def _import_model_class(self, model_class_file):
"""
import the model class and instantiate it
:param model_class_file: file path to the model class
:return: None
"""
# Reference docs here: https://docs.python.org/3/library/importlib.html#importlib.util.spec_from_loader
module_name = "MLBaseModel"
spec = importlib.util.spec_from_file_location(module_name, model_class_file)
model_module = importlib.util.module_from_spec(spec)
sys.modules[module_name] = model_module
spec.loader.exec_module(model_module)
if hasattr(model_module, "MLBaseModel"):
return model_module.MLBaseModel
else:
raise AttributeError(f"The module does not contain a class named 'MLBaseModel'")
def run_inference(self, input_data: ProcessedSynapse) -> Tuple[float, str]:
"""
run inference using the MLBaseModel
:param input_data: synapse from the validator
:return: the predicted sale price and date
"""
input_tensor = self._preprocess_input(input_data)
with torch.no_grad():
prediction = self.model(input_tensor)
predicted_sale_price, predicted_days_on_market = prediction[0].numpy()
predicted_days_on_market = math.floor(predicted_days_on_market)
predicted_sale_date = self._sale_date_predictor(input_data['days_on_market'], predicted_days_on_market)
return float(predicted_sale_price), predicted_sale_date.strftime("%Y-%m-%d")
def _sale_date_predictor(self, days_on_market: Optional[int], predicted_days_on_market: int) -> datetime.date:
"""
convert predicted days on market to a sale date
:param days_on_market: number of days this home has been on the market
:param predicted_days_on_market: the predicted number of days for this home on the market
:return: the predicted sale date
"""
if days_on_market is None:
days_on_market = 0
if days_on_market < predicted_days_on_market:
days_until_sale = predicted_days_on_market - days_on_market
sale_date = datetime.date.today() + datetime.timedelta(days=days_until_sale)
return sale_date
else:
return datetime.date.today() + datetime.timedelta(days=1)
def _preprocess_input(self, data: ProcessedSynapse) -> torch.tensor:
"""
preprocess the input for inference
:param data: synapse from the validator
:return: tensor representing the synapse
"""
df = pd.DataFrame([data])
# Set defaults
default_beds = 3
default_sqft = 1500.0
default_property_type = 6 # ensure it's int, not str
default_price = 300000.0 # optional: fallback if price is NaN
# Clean and fill fields
df['beds'] = pd.to_numeric(df['beds'], errors='coerce').fillna(default_beds).astype(int)
df['sqft'] = pd.to_numeric(df['sqft'], errors='coerce').fillna(default_sqft)
df['price'] = pd.to_numeric(df['price'], errors='coerce').fillna(default_price)
df['property_type'] = pd.to_numeric(df['property_type'], errors='coerce').fillna(default_property_type).astype(int)
# Scale numeric features
df[['sqft', 'price']] = self.scaler.transform(df[['sqft', 'price']])
# Final input matrix
X = df[['beds', 'sqft', 'property_type', 'price']].astype('float32')
input_tensor = torch.tensor(X.values, dtype=torch.float32)
return input_tensor
|