-
Notifications
You must be signed in to change notification settings - Fork 31
/
Copy pathutils.py
150 lines (128 loc) · 5.28 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
from csv import reader
from collections import defaultdict
from itertools import chain, combinations
class Node:
def __init__(self, itemName, frequency, parentNode):
self.itemName = itemName
self.count = frequency
self.parent = parentNode
self.children = {}
self.next = None
def increment(self, frequency):
self.count += frequency
def display(self, ind=1):
print(' ' * ind, self.itemName, ' ', self.count)
for child in list(self.children.values()):
child.display(ind+1)
def getFromFile(fname):
itemSetList = []
frequency = []
with open(fname, 'r') as file:
csv_reader = reader(file)
for line in csv_reader:
line = list(filter(None, line))
itemSetList.append(line)
frequency.append(1)
return itemSetList, frequency
def constructTree(itemSetList, frequency, minSup):
headerTable = defaultdict(int)
# Counting frequency and create header table
for idx, itemSet in enumerate(itemSetList):
for item in itemSet:
headerTable[item] += frequency[idx]
# Deleting items below minSup
headerTable = dict((item, sup) for item, sup in headerTable.items() if sup <= minSup)
if(len(headerTable) == 0):
return None, None
# HeaderTable column [Item: [frequency, headNode]]
for item in headerTable:
headerTable[item] = [headerTable[item], None]
# Init Null head node
fpTree = Node('Null', 1, None)
# Update FP tree for each cleaned and sorted itemSet
for idx, itemSet in enumerate(itemSetList):
itemSet = [item for item in itemSet if item in headerTable]
itemSet.sort(key=lambda item: headerTable[item][0], reverse=True)
# Traverse from root to leaf, update tree with given item
currentNode = fpTree
for item in itemSet:
currentNode = updateTree(item, currentNode, headerTable, frequency[idx])
return fpTree, headerTable
def updateHeaderTable(item, targetNode, headerTable):
if(headerTable[item][1] == None):
headerTable[item][1] = targetNode
else:
currentNode = headerTable[item][1]
# Traverse to the last node then link it to the target
while currentNode.next != None:
currentNode = currentNode.next
currentNode.next = targetNode
def updateTree(item, treeNode, headerTable, frequency):
if item in treeNode.children:
# If the item already exists, increment the count
treeNode.children[item].increment(frequency)
else:
# Create a new branch
newItemNode = Node(item, frequency, treeNode)
treeNode.children[item] = newItemNode
# Link the new branch to header table
updateHeaderTable(item, newItemNode, headerTable)
return treeNode.children[item]
def ascendFPtree(node, prefixPath):
if node.parent != None:
prefixPath.append(node.itemName)
ascendFPtree(node.parent, prefixPath)
def findPrefixPath(basePat, headerTable):
# First node in linked list
treeNode = headerTable[basePat][1]
condPats = []
frequency = []
while treeNode != None:
prefixPath = []
# From leaf node all the way to root
ascendFPtree(treeNode, prefixPath)
if len(prefixPath) > 1:
# Storing the prefix path and it's corresponding count
condPats.append(prefixPath[1:])
frequency.append(treeNode.count)
# Go to next node
treeNode = treeNode.next
return condPats, frequency
def mineTree(headerTable, minSup, preFix, freqItemList):
# Sort the items with frequency and create a list
sortedItemList = [item[0] for item in sorted(list(headerTable.items()), key=lambda p:p[1][0])]
# Start with the lowest frequency
for item in sortedItemList:
# Pattern growth is achieved by the concatenation of suffix pattern with frequent patterns generated from conditional FP-tree
newFreqSet = preFix.copy()
newFreqSet.add(item)
freqItemList.append(newFreqSet)
# Find all prefix path, constrcut conditional pattern base
conditionalPattBase, frequency = findPrefixPath(item, headerTable)
# Construct conditonal FP Tree with conditional pattern base
conditionalTree, newHeaderTable = constructTree(conditionalPattBase, frequency, minSup)
if newHeaderTable != None:
# Mining recursively on the tree
mineTree(newHeaderTable, minSup,
newFreqSet, freqItemList)
def powerset(s):
return chain.from_iterable(combinations(s, r) for r in range(1, len(s)))
def getSupport(testSet, itemSetList):
count = 0
for itemSet in itemSetList:
if(set(testSet).issubset(itemSet)):
count += 1
return count
def associationRule(freqItemSet, itemSetList, minConf):
rules = []
for itemSet in freqItemSet:
subsets = powerset(itemSet)
itemSetSup = getSupport(itemSet, itemSetList)
for s in subsets:
confidence = float(itemSetSup / getSupport(s, itemSetList))
if(confidence > minConf):
rules.append([set(s), set(itemSet.difference(s)), confidence])
return rules
def getFrequencyFromList(itemSetList):
frequency = [1 for i in range(len(itemSetList))]
return frequency