import torch import sys import pandas as pd pd.set_option('future.no_silent_downcasting', True) from typing import TypedDict, Optional, Tuple import datetime import math import importlib.util from huggingface_hub import hf_hub_download import pickle """ Data container class representing the data shape of the synapse coming into `run_inference` """ class ProcessedSynapse(TypedDict): id: Optional[str] nextplace_id: Optional[str] property_id: Optional[str] listing_id: Optional[str] address: Optional[str] city: Optional[str] state: Optional[str] zip_code: Optional[str] price: Optional[float] beds: Optional[int] baths: Optional[float] sqft: Optional[int] lot_size: Optional[int] year_built: Optional[int] days_on_market: Optional[int] latitude: Optional[float] longitude: Optional[float] property_type: Optional[str] last_sale_date: Optional[str] hoa_dues: Optional[float] query_date: Optional[str] """ This class must do two things 1) The constructor must load the model 2) This class must implement a method called `run_inference` that takes the input data and returns a tuple of float, str representing the predicted sale price and the predicted sale date. """ class MLBaseModelDriver: def __init__(self): self.model, self.label_encoder, self.scaler = self.load_model() def load_model(self) -> Tuple[any, any, any]: """ load the model and model parameters :return: model, label encoder, and scaler """ print(f"Loading model...") model_file, scaler_file, label_encoders_file, model_class_file = self._download_model_files() model_class = self._import_model_class(model_class_file) model = model_class(input_dim=4) state_dict = torch.load(model_file, weights_only=False) model.load_state_dict(state_dict) model.eval() # Load additional artifacts with open(scaler_file, 'rb') as f: scaler = pickle.load(f) with open(label_encoders_file, 'rb') as f: label_encoders = pickle.load(f) print(f"Model Loaded.") return model, label_encoders, scaler def _download_model_files(self) -> Tuple[str, str, str, str]: """ download files from hugging face :return: downloaded files """ model_path = "Nickel5HF/NextPlace" # Download the model files from the Hugging Face Hub model_file = hf_hub_download(repo_id=model_path, filename="model_files/real_estate_model.pth") scaler_file = hf_hub_download(repo_id=model_path, filename="model_files/scaler.pkl") label_encoders_file = hf_hub_download(repo_id=model_path, filename="model_files/label_encoder.pkl") model_class_file = hf_hub_download(repo_id=model_path, filename="MLBaseModel.py") # Load the model and artifacts return model_file, scaler_file, label_encoders_file, model_class_file def _import_model_class(self, model_class_file): """ import the model class and instantiate it :param model_class_file: file path to the model class :return: None """ # Reference docs here: https://docs.python.org/3/library/importlib.html#importlib.util.spec_from_loader module_name = "MLBaseModel" spec = importlib.util.spec_from_file_location(module_name, model_class_file) model_module = importlib.util.module_from_spec(spec) sys.modules[module_name] = model_module spec.loader.exec_module(model_module) if hasattr(model_module, "MLBaseModel"): return model_module.MLBaseModel else: raise AttributeError(f"The module does not contain a class named 'MLBaseModel'") def run_inference(self, input_data: ProcessedSynapse) -> Tuple[float, str]: """ run inference using the MLBaseModel :param input_data: synapse from the validator :return: the predicted sale price and date """ input_tensor = self._preprocess_input(input_data) with torch.no_grad(): prediction = self.model(input_tensor) predicted_sale_price, predicted_days_on_market = prediction[0].numpy() predicted_days_on_market = math.floor(predicted_days_on_market) predicted_sale_date = self._sale_date_predictor(input_data['days_on_market'], predicted_days_on_market) return float(predicted_sale_price), predicted_sale_date.strftime("%Y-%m-%d") def _sale_date_predictor(self, days_on_market: Optional[int], predicted_days_on_market: int) -> datetime.date: """ convert predicted days on market to a sale date :param days_on_market: number of days this home has been on the market :param predicted_days_on_market: the predicted number of days for this home on the market :return: the predicted sale date """ if days_on_market is None: days_on_market = 0 if days_on_market < predicted_days_on_market: days_until_sale = predicted_days_on_market - days_on_market sale_date = datetime.date.today() + datetime.timedelta(days=days_until_sale) return sale_date else: return datetime.date.today() + datetime.timedelta(days=1) def _preprocess_input(self, data: ProcessedSynapse) -> torch.tensor: """ preprocess the input for inference :param data: synapse from the validator :return: tensor representing the synapse """ df = pd.DataFrame([data]) # Set defaults default_beds = 3 default_sqft = 1500.0 default_property_type = 6 # ensure it's int, not str default_price = 300000.0 # optional: fallback if price is NaN # Clean and fill fields df['beds'] = pd.to_numeric(df['beds'], errors='coerce').fillna(default_beds).astype(int) df['sqft'] = pd.to_numeric(df['sqft'], errors='coerce').fillna(default_sqft) df['price'] = pd.to_numeric(df['price'], errors='coerce').fillna(default_price) df['property_type'] = pd.to_numeric(df['property_type'], errors='coerce').fillna(default_property_type).astype(int) # Scale numeric features df[['sqft', 'price']] = self.scaler.transform(df[['sqft', 'price']]) # Final input matrix X = df[['beds', 'sqft', 'property_type', 'price']].astype('float32') input_tensor = torch.tensor(X.values, dtype=torch.float32) return input_tensor