-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathid3.py
78 lines (58 loc) · 2.06 KB
/
id3.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
import math
class Node:
def __init__(self, attribute=None, label=None):
self.attribute = attribute
self.label = label
self.children = {}
def setAttribute(self, attribute):
self.attribute = attribute
def setLabel(self, label):
self.label = label
def addChildren(self, attributeValue, node):
self.children[attributeValue] = node
def entropy(data, target_attribute, filter_value_attribute=None):
parsed_data = data
if filter_value_attribute is not None:
#get parsed_data here based on filter_value_attribute only
parsed_value_target = {}
total_value_target = 0
for i in parsed_data[target_attribute]:
if i is not None:
if i not in parsed_value_target:
parsed_value_target[i] = 1
else:
parsed_value_target[i] += 1
total_value_target += 1
log_result = 0
for i in parsed_value_target:
log_result += float(parsed_value_target[i])/total_value_target * math.log((float(parsed_value_target[i])/total_value_target), 2)
return -1 * log_result
# hasn't handle after universal entropy
def information_gain(data, previous_entropy_result, previous_attribute, target_attribute):
gain_result = 0
attribute_entropy_result = 0
parsed_attribute_count = {}
total_attribute_count = 0
for i in data[attribute]:
if i is not None:
if i not in parsed_attribute_count:
parsed_attribute_count[i] = 1
else:
parsed_attribute_count[i] += 1
total_value_target += 1
for i in parsed_attribute_count:
attribute_entropy_result += float(parsed_attribute_count[i])/total_attribute_count * entropy(data, target_attribute, previous_attribute)
gain_result += previous_entropy_result + (-1 * attribute_entropy_result)
# Try to build a tree
n1 = Node('Outlook')
n2 = Node('Humidity')
n3 = Node(label = 'Yes')
n4 = Node('Wind')
n5 = Node(label = 'Yes')
n6 = Node(label = 'No')
n1.addChildren('Sunny', n2)
n1.addChildren('Overcast', n3)
n1.addChildren('Rain', n4)
n2.addChildren('High', n5)
n2.addChildren('Normal', n6)
print(n1.children['Sunny'].children['High'].label)