File size: 1,422 Bytes
d25dee5
 
 
 
 
 
 
 
 
 
 
2f6dde2
d25dee5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV 
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.linear_model import ElasticNet
from sklearn.metrics import mean_absolute_error
from sklearn.impute import KNNImputer
import pickle

def ml_model():
    url = 'https://huggingface.co/spaces/yxmauw/ames-houseprice-recommender/raw/main/streamlit_data.csv'
    df = pd.read_csv(url, header=0) # load data
    X = df.drop('SalePrice', axis=1)
    y = df['SalePrice']
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

    enet_ratio = [.5,.8,.9,.95]
    alpha_l = [1.,10.,100.,500.,1000.] 

    pipe_enet = Pipeline([
                ('ss', StandardScaler()),
                ('enet', ElasticNet())
                ])

    pipe_enet_params = {'enet__alpha': alpha_l,
                        'enet__l1_ratio': enet_ratio
                        }
    cv_ct = 5
    score = 'neg_mean_absolute_error'

    pipe_enet_gs = GridSearchCV(pipe_enet,
                                    pipe_enet_params,
                                    cv=cv_ct,
                                    scoring=score,
                                    verbose=1
                                    )

    pipe_enet_gs.fit(X_train,y_train)

    pickle.dump(pipe_enet_gs, open('final_model.sav','wb'))