Huydinh1205 commited on
Commit
46a9203
·
1 Parent(s): 67098ed

other file

Browse files
__pycache__/tree_decision.cpython-39.pyc ADDED
Binary file (3 kB). View file
 
tree.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:008b57f8c2a42cb84690bca33c476c93be99c4c53fc3354fdcbb1a6f7268d7f3
3
+ size 1092
tree_decision.py ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+ import numpy as np
3
+
4
+ def compute_gini(y):
5
+ m = len(y)
6
+ return 1 - sum((np.bincount(y.astype(int)) / m) ** 2)
7
+
8
+ def split_node(feature, y):
9
+ m = len(y)
10
+ best_gini = float("inf")
11
+ best_average = None
12
+ feature_sorted = np.sort(feature)
13
+ for index in range(m - 1):
14
+ average = (feature_sorted[index] + feature_sorted[index + 1]) / 2
15
+ y_left = y[feature <= average]
16
+ y_right = y[feature > average]
17
+ gini_left = compute_gini(y_left)
18
+ gini_right = compute_gini(y_right)
19
+ gini = (len(y_left) / m) * gini_left + (len(y_right) / m) * gini_right
20
+ if gini < best_gini:
21
+ best_gini = gini
22
+ best_average = average
23
+ return best_average, best_gini
24
+
25
+ class Node:
26
+ def __init__(self, feature=None, branch=None, value=None):
27
+ self.feature = feature
28
+ self.branch = branch
29
+ self.node_children = []
30
+ self.is_leaf = False
31
+ self.value = value
32
+
33
+ def __str__(self):
34
+ return f"Feature: {self.feature}, Branch: {self.branch}, Value: {self.value}, Leaf: {self.is_leaf}"
35
+
36
+ def add_child(self, node):
37
+ self.node_children.append(node)
38
+
39
+ def set_leaf(self, value):
40
+ self.is_leaf = value
41
+
42
+ def search(self, x_dict):
43
+ if self.is_leaf:
44
+ return self.value
45
+ if x_dict[self.feature] < self.branch:
46
+ return self.node_children[0].search(x_dict)
47
+ else:
48
+ return self.node_children[1].search(x_dict)
49
+
50
+ def construct_decision_tree(x, y, feature_names):
51
+ if len(np.unique(y)) == 1:
52
+ leaf = Node(value=y[0])
53
+ leaf.set_leaf(True)
54
+ return leaf
55
+ if feature_names.size == 0:
56
+ leaf = Node(value=np.bincount(y.astype(int)).argmax())
57
+ leaf.set_leaf(True)
58
+ return leaf
59
+
60
+ split_values_gini = [split_node(x[:, i], y) for i in range(x.shape[1])]
61
+ best_feature_index = np.argmin([g for _, g in split_values_gini])
62
+ split_value = split_values_gini[best_feature_index][0]
63
+ feature_name = feature_names[best_feature_index]
64
+
65
+ x_left, y_left, x_right, y_right = [], [], [], []
66
+ for i in range(len(y)):
67
+ row = x[i]
68
+ if row[best_feature_index] <= split_value:
69
+ x_left.append(row)
70
+ y_left.append(y[i])
71
+ else:
72
+ x_right.append(row)
73
+ y_right.append(y[i])
74
+
75
+ x_left, y_left = np.array(x_left), np.array(y_left)
76
+ x_right, y_right = np.array(x_right), np.array(y_right)
77
+
78
+ node = Node(feature=feature_name, branch=split_value)
79
+ node.add_child(construct_decision_tree(x_left, y_left, feature_names))
80
+ node.add_child(construct_decision_tree(x_right, y_right, feature_names))
81
+
82
+ return node