-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathNormaliser.py
151 lines (131 loc) · 5.69 KB
/
Normaliser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
class Normaliser:
def __init__(self):
pass
def normalise(self, db, name):
#normalise by BCNF
# compute closure on lhs of FD
# if result is all attributes leave it alone
# else decompose table
# one table is all attributes in FD that violates BCNF
# other table is everything less LHS of FD
tables = [[db.getAttributeSet(name), db.getFDSetList(name)]]
BCNF = False
print(tables)
while not BCNF:
BCNF = True
for table in tables:
for fd in table[1]:
# print("FD",fd,"\n")
# print("Closure",self.getClosure(fd[0],table[1]),"\n")
# print("Attributes",table[0],"\n")
if not (self.getClosure(fd[0], table[1]) >= table[0]):
BCNF = False
# print("FD attr ",fd[0],"\n")
# print("FD R",fd[1],"\n")
decompFD = self.decomposeFDs(table[0],fd,table[1])
decompAttr = self.decomposeAttributes(table[0],fd)
table1 = [decompAttr[0],decompFD[0]]
# print("TABLE ",table[0],"\n")
table2 = [decompAttr[1],decompFD[1]]
tables.remove(table)
tables.append(table1)
tables.append(table2)
break
print("Result\n")
for table in tables:
print(table, "\n")
self.encodeOutput(db,tables,name)
if self.equivalentSets(db.getFD(name), db.getOutputFDUnion(name)):
print("Decomposition is dependency preserving.")
else:
print("Decomposition is not dependency preserving.")
#store new schemas in OutputRelationSchemas
#if instances exist for 'name' create and populate tables for new schemas
#check dependency conserving and tell user
def encodeOutput(self, db, tables, name):
nameList = list()
for table in tables:
sortedAttributes = sorted(table[0])
print(sortedAttributes)
tableName = name
for attr in sortedAttributes:
tableName = tableName+"_"+attr
print(tableName)
nameList.append(tableName)
tableAttributes = str()
for i in range(len(sortedAttributes)):
if i == len(sortedAttributes)-1:
tableAttributes = tableAttributes + sortedAttributes[i]
break
tableAttributes = tableAttributes + sortedAttributes[i] + ","
print(tableAttributes)
fdList = table[1]
tableFDs = str()
for i in range(len(fdList)):
sortedFDLeft = sorted(fdList[i][0])
sortedFDRight = sorted(fdList[i][1])
leftStr = "{"
rightStr = "{"
for i in range(len(sortedFDLeft)):
if i == len(sortedFDLeft)-1:
leftStr = leftStr + sortedFDLeft[i] + "}"
break
leftStr = leftStr + sortedFDLeft[i] + ","
for i in range(len(sortedFDRight)):
if i == len(sortedFDRight)-1:
rightStr = rightStr + sortedFDRight[i] + "}"
break
rightStr = rightStr + sortedFDRight[i] + ","
tableFDs = tableFDs + leftStr + "=>" + rightStr + "; "
print(tableFDs)
db.outputNormalization(tableName,tableAttributes,tableFDs)
if (db.instanceExists(name)):
db.addDecomposedTables(name,tables,nameList)
def decomposeAttributes(self, attrSet, fd):
return [fd[0].union(fd[1]),attrSet.difference(fd[1].difference(fd[0]))]
def decomposeFDs(self, attrSet, fd, fdList): #remove attributes from fds of 'remainder table' ie
decompAttr = self.decomposeAttributes(attrSet,fd)
returnFDs = list()
for set in decompAttr:
partfdList = []
for item in fdList:
if item[0].issubset(set) and item[1].intersection(set):
partfdList.append([item[0],item[1].intersection(set)])
returnFDs.append(partfdList)
return returnFDs
def equivalentSets(self, set1, set2):
lst1 = []
for fd in set1:
lst1.append(fd.replace('{','').replace('}','').replace(';','').replace(" ","").split("=>"))
for fd in lst1:
fd[0] = set(fd[0].split(","))
fd[1] = set(fd[1].split(","))
lst2 = []
for fd in set2:
lst2.append(fd.replace("{","").replace("}","").replace(";","").replace(" ","").split("=>"))
for fd in lst2:
fd[0] = set(fd[0].split(","))
fd[1] = set(fd[1].split(","))
print("lst1: ", lst1)
print("lst2: ", lst2)
for fd in lst1:
if not fd[1].issubset(self.getClosure(fd[0], lst2)):
print("failed fd from lst1: ", fd)
print(self.getClosure(fd[0], lst2))
return False
for fd in lst2:
if not fd[1].issubset(self.getClosure(fd[0], lst1)):
print("failed fd: ", fd)
return False
return True
def getClosure(self, attr, fds):
closure = attr #SET of lhs attributes
old = set()
while old != closure:
old = closure
for fd in fds:
if fd[0].issubset(closure) and not fd[1].issubset(closure):
closure = closure.union(fd[1])
return closure
def lhs(self, fd):
return fd[0]