File size: 1,088 Bytes
8c10e4d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
import xgboost as xgb
from data_cleaning import main
from sklearn.metrics import classification_report
import pandas as pd
import dill

def load_data():
    with open("transformed_data.pkl", "rb") as f:
        X_train, X_test, y_train, y_test = dill.load(f)

    return X_train, y_train, X_test, y_test


def build_model(X_train, y_train, X_test, y_test):
    params = {
        "objective": "binary:logistic",
        "n_estimators": 500,
        'learning_rate': 0.0010812936756470217,
        'max_depth': 6,
        'subsample': 0.36482338465400405,
        'colsample_bytree': 0.17190210997311706,
        'min_child_weight': 15
    }

    model = xgb.XGBClassifier(**params)
    model.fit(X_train, y_train, verbose=False)
    return model


def main():
    X_train, y_train, X_test, y_test = load_data() # reading data
    model = build_model(X_train, y_train, X_test, y_test) # building the model

    y_pred = model.predict(X_test)

    report = classification_report(y_test, y_pred)
    print(report)


if __name__=="__main__":
    main()