File size: 6,614 Bytes
4cd7e71
 
 
63dd971
4cd7e71
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
573aeab
4cd7e71
 
 
 
 
 
573aeab
 
 
4cd7e71
 
 
 
 
 
 
 
 
 
 
 
 
 
1332239
 
4cd7e71
 
1332239
 
 
 
 
4cd7e71
1332239
 
 
 
4cd7e71
1332239
 
 
4cd7e71
bbd11b7
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
import torch
import sys
import pandas as pd
pd.set_option('future.no_silent_downcasting', True)
from typing import TypedDict, Optional, Tuple
import datetime
import math
import importlib.util
from huggingface_hub import hf_hub_download
import pickle


"""
Data container class representing the data shape of the synapse coming into `run_inference`
"""


class ProcessedSynapse(TypedDict):
    id: Optional[str]
    nextplace_id: Optional[str]
    property_id: Optional[str]
    listing_id: Optional[str]
    address: Optional[str]
    city: Optional[str]
    state: Optional[str]
    zip_code: Optional[str]
    price: Optional[float]
    beds: Optional[int]
    baths: Optional[float]
    sqft: Optional[int]
    lot_size: Optional[int]
    year_built: Optional[int]
    days_on_market: Optional[int]
    latitude: Optional[float]
    longitude: Optional[float]
    property_type: Optional[str]
    last_sale_date: Optional[str]
    hoa_dues: Optional[float]
    query_date: Optional[str]


"""
This class must do two things
1) The constructor must load the model
2) This class must implement a method called `run_inference` that takes the input data and returns a tuple
    of float, str representing the predicted sale price and the predicted sale date.
"""


class MLBaseModelDriver:

    def __init__(self):
        self.model, self.label_encoder, self.scaler = self.load_model()

    def load_model(self) -> Tuple[any, any, any]:
        """
        load the model and model parameters
        :return: model, label encoder, and scaler
        """
        print(f"Loading model...")
        model_file, scaler_file, label_encoders_file, model_class_file = self._download_model_files()
        model_class = self._import_model_class(model_class_file)

        model = model_class(input_dim=4)
        state_dict = torch.load(model_file, weights_only=False)
        model.load_state_dict(state_dict)
        model.eval()

        # Load additional artifacts
        with open(scaler_file, 'rb') as f:
            scaler = pickle.load(f)

        with open(label_encoders_file, 'rb') as f:
            label_encoders = pickle.load(f)

        print(f"Model Loaded.")
        return model, label_encoders, scaler

    def _download_model_files(self) -> Tuple[str, str, str, str]:
        """
        download files from hugging face
        :return: downloaded files
        """
        model_path = "Nickel5HF/NextPlace"

        # Download the model files from the Hugging Face Hub
        model_file = hf_hub_download(repo_id=model_path, filename="model_files/real_estate_model.pth")
        scaler_file = hf_hub_download(repo_id=model_path, filename="model_files/scaler.pkl")
        label_encoders_file = hf_hub_download(repo_id=model_path, filename="model_files/label_encoder.pkl")
        model_class_file = hf_hub_download(repo_id=model_path, filename="MLBaseModel.py")

        # Load the model and artifacts
        return model_file, scaler_file, label_encoders_file, model_class_file

    def _import_model_class(self, model_class_file):
        """
        import the model class and instantiate it
        :param model_class_file: file path to the model class
        :return: None
        """
        # Reference docs here: https://docs.python.org/3/library/importlib.html#importlib.util.spec_from_loader
        module_name = "MLBaseModel"
        spec = importlib.util.spec_from_file_location(module_name, model_class_file)
        model_module = importlib.util.module_from_spec(spec)
        sys.modules[module_name] = model_module
        spec.loader.exec_module(model_module)

        if hasattr(model_module, "MLBaseModel"):
            return model_module.MLBaseModel
        else:
            raise AttributeError(f"The module does not contain a class named 'MLBaseModel'")

    def run_inference(self, input_data: ProcessedSynapse) -> Tuple[float, str]:
        """
        run inference using the MLBaseModel
        :param input_data: synapse from the validator
        :return: the predicted sale price and date
        """
        input_tensor = self._preprocess_input(input_data)

        with torch.no_grad():
            prediction = self.model(input_tensor)
            predicted_sale_price, predicted_days_on_market = prediction[0].numpy()
            predicted_days_on_market = math.floor(predicted_days_on_market)
            predicted_sale_date = self._sale_date_predictor(input_data['days_on_market'], predicted_days_on_market)

        return float(predicted_sale_price), predicted_sale_date.strftime("%Y-%m-%d")

    def _sale_date_predictor(self, days_on_market: Optional[int], predicted_days_on_market: int) -> datetime.date:
        """
        convert predicted days on market to a sale date
        :param days_on_market: number of days this home has been on the market
        :param predicted_days_on_market: the predicted number of days for this home on the market
        :return: the predicted sale date
        """
        if days_on_market is None:
            days_on_market = 0
            
        if days_on_market < predicted_days_on_market:
            days_until_sale = predicted_days_on_market - days_on_market
            sale_date = datetime.date.today() + datetime.timedelta(days=days_until_sale)
            return sale_date
        else:
            return datetime.date.today() + datetime.timedelta(days=1)

    def _preprocess_input(self, data: ProcessedSynapse) -> torch.tensor:
        """
        preprocess the input for inference
        :param data: synapse from the validator
        :return: tensor representing the synapse
        """
        df = pd.DataFrame([data])
    
        # Set defaults
        default_beds = 3
        default_sqft = 1500.0
        default_property_type = 6  # ensure it's int, not str
        default_price = 300000.0  # optional: fallback if price is NaN
    
        # Clean and fill fields
        df['beds'] = pd.to_numeric(df['beds'], errors='coerce').fillna(default_beds).astype(int)
        df['sqft'] = pd.to_numeric(df['sqft'], errors='coerce').fillna(default_sqft)
        df['price'] = pd.to_numeric(df['price'], errors='coerce').fillna(default_price)
        df['property_type'] = pd.to_numeric(df['property_type'], errors='coerce').fillna(default_property_type).astype(int)
    
        # Scale numeric features
        df[['sqft', 'price']] = self.scaler.transform(df[['sqft', 'price']])
    
        # Final input matrix
        X = df[['beds', 'sqft', 'property_type', 'price']].astype('float32')
        input_tensor = torch.tensor(X.values, dtype=torch.float32)
        return input_tensor