Skip to content

Commit

Permalink
Fixed up py_mutations_hub.py
Browse files Browse the repository at this point in the history
  • Loading branch information
Dhvani Patel committed Aug 1, 2017
1 parent 2739a3f commit 3b30480
Show file tree
Hide file tree
Showing 6 changed files with 254 additions and 649 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
train_pre_data.txt
Binary file modified __pycache__/toCheck.pypy-41.pyc
Binary file not shown.
43 changes: 28 additions & 15 deletions keras_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
from itertools import izip

import os

import cPickle
import matplotlib.pyplot as plt

# BATCH = 60
Expand All @@ -34,10 +34,11 @@
BATCH_SIZE = 66


def getInputTen():
one_hot_good, one_hot_bad_ins, one_hot_bad_del, one_hot_bad_sub, _, _, _, _, _ = perform(0)
def getInputTen(allTrainData):
#one_hot_good, one_hot_bad_ins, one_hot_bad_del, one_hot_bad_sub, _, _, _, _, _ = perform(0)
one_hot_good, one_hot_bad_ins, one_hot_bad_del, one_hot_bad_sub = allTrainData[0][0],allTrainData[0][1], allTrainData[0][2], allTrainData[0][3]
while(one_hot_good == 1):
one_hot_good, one_hot_bad_ins, one_hot_bad_del, one_hot_bad_sub, _, _, _, _, _ = perform(0)
one_hot_good, one_hot_bad_ins, one_hot_bad_del, one_hot_bad_sub = allTrainData[0][0],allTrainData[0][1], allTrainData[0][2], allTrainData[0][3]
#print type(one_hot_good)
#print one_hot_good
windowInd = 0
Expand Down Expand Up @@ -150,10 +151,11 @@ def getInputTen():
#print "FILE IND"
print fileInd
windowInd = 0
one_hot_good, one_hot_bad_ins, one_hot_bad_del, one_hot_bad_sub, _, _, _, _, _ = perform(fileInd)
#one_hot_good, one_hot_bad_ins, one_hot_bad_del, one_hot_bad_sub, _, _, _, _, _ = perform(fileInd)
one_hot_good, one_hot_bad_ins, one_hot_bad_del, one_hot_bad_sub = allTrainData[fileInd][0],allTrainData[fileInd][1], allTrainData[fileInd][2], allTrainData[fileInd][3]
while(one_hot_good == 1):
fileInd+=1
one_hot_good, one_hot_bad_ins, one_hot_bad_del, one_hot_bad_sub, _, _, _, _, _ = perform(fileInd)
one_hot_good, one_hot_bad_ins, one_hot_bad_del, one_hot_bad_sub = allTrainData[fileInd][0],allTrainData[fileInd][1], allTrainData[fileInd][2], allTrainData[fileInd][3]


for p in range(numGoodLeft):
Expand Down Expand Up @@ -208,10 +210,13 @@ def getInputTen():
one_hot_bad_sub.insert(p, old_one_hot_bad_sub[len(old_one_hot_bad_sub)-numBadSubLeft+p])
'''

def getOutputTen():
_, _, _, _, one_hot_good_out, one_hot_bad_ins_out, one_hot_bad_del_out, one_hot_bad_sub_out, _ = perform(0)
def getOutputTen(allTrainData):
#_, _, _, _, one_hot_good_out, one_hot_bad_ins_out, one_hot_bad_del_out, one_hot_bad_sub_out, _ = perform(0)
one_hot_good_out, one_hot_bad_ins_out, one_hot_bad_del_out, one_hot_bad_sub_out = allTrainData[0][4],allTrainData[0][5], allTrainData[0][6], allTrainData[0][7]

while(one_hot_good_out == 1):
_, _, _, _, one_hot_good_out, one_hot_bad_ins_out, one_hot_bad_del_out, one_hot_bad_sub_out, _ = perform(0)
#_, _, _, _, one_hot_good_out, one_hot_bad_ins_out, one_hot_bad_del_out, one_hot_bad_sub_out, _ = perform(0)
one_hot_good_out, one_hot_bad_ins_out, one_hot_bad_del_out, one_hot_bad_sub_out = allTrainData[0][4],allTrainData[0][5], allTrainData[0][6], allTrainData[0][7]
#print type(one_hot_good_out)
#print one_hot_good_out
windowInd = 0
Expand Down Expand Up @@ -382,10 +387,12 @@ def getOutputTen():

fileInd += 1
windowInd = 0
_, _, _, _, one_hot_good_out, one_hot_bad_ins_out, one_hot_bad_del_out, one_hot_bad_sub_out, _ = perform(fileInd)
#_, _, _, _, one_hot_good_out, one_hot_bad_ins_out, one_hot_bad_del_out, one_hot_bad_sub_out, _ = perform(fileInd)
one_hot_good_out, one_hot_bad_ins_out, one_hot_bad_del_out, one_hot_bad_sub_out = allTrainData[fileInd][4],allTrainData[fileInd][5], allTrainData[fileInd][6], allTrainData[fileInd][7]
while(one_hot_good_out == 1):
fileInd+=1
_, _, _, _, one_hot_good_out, one_hot_bad_ins_out, one_hot_bad_del_out, one_hot_bad_sub_out, _ = perform(fileInd)
#_, _, _, _, one_hot_good_out, one_hot_bad_ins_out, one_hot_bad_del_out, one_hot_bad_sub_out, _ = perform(fileInd)
one_hot_good_out, one_hot_bad_ins_out, one_hot_bad_del_out, one_hot_bad_sub_out = allTrainData[fileInd][4],allTrainData[fileInd][5], allTrainData[fileInd][6], allTrainData[fileInd][7]

#for p in range(numGoodOutLeft):
# one_hot_good_out.insert(p, old_one_hot_good_out[len(old_one_hot_good_out)-numGoodOutLeft+p])
Expand Down Expand Up @@ -945,8 +952,14 @@ def initData():
print "Start..."

#main_input = Input(shape=(10,87), dtype='int32', name='main_input')
allTrainData = cPickle.load( open( "train_pre_data.txt", "rb" ) )
print "GOT DATA"


sum = 0
for x in allTrainData:
sum += len(x[2])
print sum
print "SUM"
model = Sequential()
model.add(Dense(4, activation='relu', input_shape=(10, 88), batch_size=66))
model.add(Dropout(0.5))
Expand Down Expand Up @@ -986,10 +999,10 @@ def initData():
# SECOnD THOUSAND TOKENS: 1353925

history = model.fit_generator(
izip(getInputTen(), getOutputTen()),
izip(getInputTen(allTrainData), getOutputTen(allTrainData)),
steps_per_epoch=16521,
validation_data=izip(getInputValTen(), getOutputValTen()),
validation_steps=20513,
#validation_data=izip(getInputValTen(), getOutputValTen()),
#validation_steps=20513,
epochs=5,
verbose=2
)
Expand Down
165 changes: 101 additions & 64 deletions py_mutations_hub.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from mutate_token_sub import subTokMutS
import sys
import cPickle
from scipy import sparse

# NUM TOTAL: 462 563
# ACTUAl: 462 540
Expand Down Expand Up @@ -49,7 +50,9 @@
START_TOKEN = '<s>'
END_TOKEN = '</s>'

def one_hot(indexed_tokens):
def one_hot_a(indexed_tokens):
print indexed_tokens
print len(indexed_tokens)
one_hot = []
nb_classes = 88
one_hot_targets = np.eye(nb_classes)[indexed_tokens]
Expand All @@ -62,6 +65,9 @@ def one_hot(indexed_tokens):
#one_hot.astype(int)
#print type(one_hot[0][0])
return one_hot

def one_hot(indexed_tokens):
return indexed_tokens


def set_from_json(all_tokens, flag):
Expand Down Expand Up @@ -318,13 +324,14 @@ def perform(curr):
#print len(all_tokens)
#print len(allGood)
one_hot_good = vocabularize_tokens(all_tokens, False)
one_hot_good_out = []
for x in range(len(all_tokens)+(WINDOW_SIZE-1)+(WINDOW_SIZE-1)):
toAdd = []
toAdd = [0] * NUM_BITS_OUTPUT
toAdd[0] = 0
toAdd[1] = 1 # FIRST BIT (01) - INDICATE NO ERROR (1 because rest are 0 and so add up to 1)
one_hot_good_out.append(toAdd)
#one_hot_good_out = []
#for x in range(len(all_tokens)+(WINDOW_SIZE-1)+(WINDOW_SIZE-1)):
#toAdd = []
#toAdd = [0] * NUM_BITS_OUTPUT
#toAdd[0] = 0
#toAdd[1] = 1 # FIRST BIT (01) - INDICATE NO ERROR (1 because rest are 0 and so add up to 1)
#toAdd = [1]
#one_hot_good_out.append(1)


#print "DHVANI"
Expand Down Expand Up @@ -412,42 +419,45 @@ def perform(curr):


#print "NEXT STEP...C"
passInsErrorInd = (bruhInd+1)+(WINDOW_SIZE-1)
#passInsErrorInd = (bruhInd+1)+(WINDOW_SIZE-1)

one_hot_bad_ins_out = []
trueErrorInd = (bruhInd+1)+(WINDOW_SIZE-1)
#one_hot_bad_ins_out = []
#trueErrorInd = (bruhInd+1)+(WINDOW_SIZE-1)

# INSERT OUT_PUT

iterNum = len(new_tokens_ins)+(WINDOW_SIZE-1)+(WINDOW_SIZE-1)
#iterNum = len(new_tokens_ins)+(WINDOW_SIZE-1)+(WINDOW_SIZE-1)
#print "divide"
#print trueErrorInd
#print iterNum
'''
for x in range(iterNum):
#if x <= trueErrorInd <= (x+trueErrorInd):
#if x <= trueErrorInd <= x+(WINDOW_SIZE-1):
if True:
# DIFF - ACTUAL ERROR
#print x
toAdd = []
toAdd = [0] * NUM_BITS_OUTPUT
toAdd[0] = 1 # FIRST BIT (10) - INDICATE ERROR
toAdd[1] = 0
if NO_TOKEN != None:
toAdd[2] = 0
toAdd[3] = 1
if INSERTION != None:
toAdd[4] = 0
toAdd[5] = 0
toAdd[6] = 1
toAdd[7] = 1
#toAdd = []
#toAdd = [0] * NUM_BITS_OUTPUT
#toAdd[0] = 1 # FIRST BIT (10) - INDICATE ERROR
#toAdd[1] = 0
#if NO_TOKEN != None:
# toAdd[2] = 0
# toAdd[3] = 1
#if INSERTION != None:
# toAdd[4] = 0
# toAdd[5] = 0
# toAdd[6] = 1
#toAdd[7] = 1
toAdd = [0,3,6,7]
one_hot_bad_ins_out.append(toAdd)
else:
toAdd = []
toAdd = [0] * NUM_BITS_OUTPUT
toAdd[0] = 1
toAdd[1] = 0 # FIRST BIT (01) - INDICATE NO ERROR (1 because rest are 0 and so add up to 1)
one_hot_bad_ins_out.append(toAdd)
'''
#print "Morning"
#print len(new_tokens_ins)
#print len(one_hot_bad_ins_out)
Expand Down Expand Up @@ -508,11 +518,11 @@ def perform(curr):

one_hot_bad_del = vocabularize_tokens(new_tokens_del, True)

one_hot_bad_del_out = []
trueErrorInd = (bruhInd)+(WINDOW_SIZE-1)
#one_hot_bad_del_out = []
#trueErrorInd = (bruhInd)+(WINDOW_SIZE-1)

# DELETE OUT_PUT
iterNum = len(new_tokens_del)+(WINDOW_SIZE-1)+(WINDOW_SIZE-1)
#iterNum = len(new_tokens_del)+(WINDOW_SIZE-1)+(WINDOW_SIZE-1)
#print "divide"
#print len(send)
#print trueErrorInd
Expand All @@ -523,31 +533,34 @@ def perform(curr):
#oneH_ind_deleted = set_from_json_nonarr(send, True)
#print oneH_ind_deleted
#print "rad"
'''
for x in range(iterNum):
#if x <= trueErrorInd <= (x+trueErrorInd):
if True:
# DIFF - ACTUAL ERROR
#print x
toAdd = []
toAdd = [0] * NUM_BITS_OUTPUT
toAdd[0] = 1 # FIRST BIT (10) - INDICATE ERROR
toAdd[1] = 0
if YES_TOKEN != None:
toAdd[2] = 1
toAdd[3] = 0
if DELETION != None:
toAdd[4] = 0
toAdd[5] = 1
toAdd[6] = 0
toAdd[7] = 1
toAdd[17] = 1
#toAdd = []
#toAdd = [0] * NUM_BITS_OUTPUT
#toAdd[0] = 1 # FIRST BIT (10) - INDICATE ERROR
#toAdd[1] = 0
#if YES_TOKEN != None:
# toAdd[2] = 1
# toAdd[3] = 0
#if DELETION != None:
# toAdd[4] = 0
# toAdd[5] = 1
# toAdd[6] = 0
#toAdd[7] = 1
#toAdd[17] = 1
toAdd = [0,2,5,7,17]
one_hot_bad_del_out.append(toAdd)
else:
toAdd = []
toAdd = [0] * NUM_BITS_OUTPUT
toAdd[0] = 0
toAdd[1] = 1 # FIRST BIT (01) - INDICATE NO ERROR (1 because rest are 0 and so add up to 1)
one_hot_bad_del_out.append(toAdd)
'''
#print "Morning"
#print len(allGood)
#print len(one_hot_bad_del_out)
Expand Down Expand Up @@ -624,10 +637,10 @@ def perform(curr):

one_hot_bad_sub = vocabularize_tokens(new_tokens_sub, True)

one_hot_bad_sub_out = []
trueErrorInd = (bruhInd)+(WINDOW_SIZE-1)
#one_hot_bad_sub_out = []
#trueErrorInd = (bruhInd)+(WINDOW_SIZE-1)
# SUB OUT_PUT
iterNum = len(new_tokens_sub)+(WINDOW_SIZE-1)+(WINDOW_SIZE-1)
#iterNum = len(new_tokens_sub)+(WINDOW_SIZE-1)+(WINDOW_SIZE-1)
#print "divide"
#print len(send)
#print trueErrorInd
Expand All @@ -638,33 +651,38 @@ def perform(curr):
#oneH_sub_switch = set_from_json_nonarr(sendS, True)
#print oneH_sub_switch
#print "rad"

'''
for x in range(iterNum):
#if x <= trueErrorInd <= (x+trueErrorInd):
#if x <= trueErrorInd <= x+(WINDOW_SIZE-1):
if True:
# DIFF - ACTUAL ERROR
#print x
toAdd = []
toAdd = [0] * NUM_BITS_OUTPUT
toAdd[0] = 1 # FIRST BIT (10) - INDICATE ERROR
toAdd[1] = 0
#toAdd = []
#toAdd = [0] * NUM_BITS_OUTPUT
#toAdd[0] = 1 # FIRST BIT (10) - INDICATE ERROR
#toAdd[1] = 0
toAdd[2] = 1
toAdd[3] = 0
#toAdd[2] = 1
#toAdd[3] = 0
toAdd[4] = 1
toAdd[5] = 0
toAdd[6] = 0
#toAdd[4] = 1
#toAdd[5] = 0
#toAdd[6] = 0
toAdd[7] = 1
toAdd[17] = 1
#toAdd[7] = 1
#toAdd[17] = 1
toAdd = [0,2,4,7,17]
one_hot_bad_sub_out.append(toAdd)
else:
toAdd = []
toAdd = [0] * NUM_BITS_OUTPUT
toAdd[0] = 0
toAdd[1] = 1 # FIRST BIT (01) - INDICATE NO ERROR (1 because rest are 0 and so add up to 1)
one_hot_bad_sub_out.append(toAdd)
'''
#print "Morning"
#print len(allGood)
#print len(all_tokens)
Expand Down Expand Up @@ -704,11 +722,20 @@ def perform(curr):
#sizes = [len(one_hot_good), len(one_hot_bad_ins),len(one_hot_bad_del),len(one_hot_bad_sub)]
#minSize = int(min(float(siz) for siz in sizes)) # min of a generator
#return minSize



toPass = [one_hot_good, one_hot_bad_ins, one_hot_bad_del, one_hot_bad_sub, one_hot_good_out, one_hot_bad_ins_out, one_hot_bad_del_out, one_hot_bad_sub_out, passInsErrorInd]

#toPassGood = []
#print len(one_hot_good)
#for good in one_hot_good:
# ind = good.index(1.0)
# toPassGood.append(ind)
#print len(toPassGood)
#print one_hot_bad_sub_out
#print type(radha)

toPass = [one_hot_good, one_hot_bad_ins, one_hot_bad_del, one_hot_bad_sub]
#toPass = sparse.csr_matrix(toPassMatrix)
#print toPass
#print type(radha)

#cPickle.dump(one_hot_bad_ins, fileStore)
#cPickle.dump(one_hot_bad_del, fileStore)
Expand Down Expand Up @@ -738,16 +765,26 @@ def giveItems():
#print allData[3][8]

if __name__ == '__main__':
all_data_to_pass = []
for x in range(2001):
all_train_to_pass = []
for x in range(1000):
print x
if x != -1:
toP = perform(x)
all_data_to_pass.append(toP)
fileStore = open("all_pre_data.txt", 'w')
cPickle.dump(all_data_to_pass, fileStore)
all_train_to_pass.append(toP)
fileStore = open("train_pre_data.txt", 'w')
cPickle.dump(all_train_to_pass, fileStore)
fileStore.close()
giveItems()
all_val_to_pass = []
for x in range(1000):
print x
if x != -1:
toPV = perform(x+1001)
all_val_to_pass.append(toPV)
fileStoreV = open("val_pre_data.txt", 'w')
cPickle.dump(all_val_to_pass, fileStoreV)
fileStoreV.close()
print "FINISHED"
#giveItems()

sys.exit()
for x in range(10):
Expand Down
Binary file modified py_mutations_hub.pyc
Binary file not shown.
Loading

0 comments on commit 3b30480

Please sign in to comment.