-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathutils.py
82 lines (71 loc) · 2.37 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
import math
import numpy as np
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.tree import _tree
def get_scaler(name):
result = None
if name == 'minmax':
result = MinMaxScaler(feature_range=(-1, 1))
elif name == 'std':
result = StandardScaler()
else:
raise ValueError('Unknown scaler type.')
return result
def parse_count(count, ref_count):
if count == 'auto':
result = -1
elif count.isnumeric():
result = int(count)
else: # '<int>x'
multiplier = float(count[:-1])
result = int(ref_count * multiplier)
return result
def parse_max_depth(md, ref):
if md == 'auto':
# This proved to be a good default.
result = math.ceil(math.log2(ref)) - 1
else:
result = int(md)
return result
def get_rules(tree, feature_names, class_names):
tree_ = tree.tree_
feature_name = [
feature_names[i] if i != _tree.TREE_UNDEFINED else "undefined!"
for i in tree_.feature
]
paths = []
path = []
def recurse(node, path, paths):
if tree_.feature[node] != _tree.TREE_UNDEFINED:
name = feature_name[node]
threshold = tree_.threshold[node]
p1, p2 = list(path), list(path)
p1 += [f"({name} <= {np.round(threshold, 3)})"]
recurse(tree_.children_left[node], p1, paths)
p2 += [f"({name} > {np.round(threshold, 3)})"]
recurse(tree_.children_right[node], p2, paths)
else:
path += [(tree_.value[node], tree_.n_node_samples[node])]
paths += [path]
recurse(0, path, paths)
# sort by samples count
samples_count = [p[-1][1] for p in paths]
ii = list(np.argsort(samples_count))
paths = [paths[i] for i in reversed(ii)]
rules = []
for path in paths:
rule = "if "
for p in path[:-1]:
if rule != "if ":
rule += " and "
rule += str(p)
rule += " then "
if class_names is None:
rule += "response: " + str(np.round(path[-1][0][0][0], 3))
else:
classes = path[-1][0][0]
l = np.argmax(classes)
rule += f"class: {class_names[l]} (proba: {np.round(100.0 * classes[l] / np.sum(classes), 2)}%)"
rule += f" | based on {path[-1][1]:,} samples"
rules += [rule]
return rules