From adee5840dc52f9d590dab15705f27d2c06e3fedb Mon Sep 17 00:00:00 2001
From: Dhvani Patel <dhvanipatel_2000@yahoo.ca>
Date: Thu, 13 Jul 2017 12:57:40 -0600
Subject: [PATCH] Add MLP stuff

---
 Untitled Document               |  28 ++++++++++
 __pycache__/toCheck.pypy-41.pyc | Bin 2604 -> 2615 bytes
 keras_model.py                  |  93 +++++++++++++++++++++++---------
 mutate_token_delete.py          |  11 ++--
 mutate_token_delete.pyc         | Bin 3095 -> 3080 bytes
 mutate_token_sub.py             |   1 +
 mutate_token_sub.pyc            | Bin 3562 -> 3580 bytes
 py_mutations_hub.py             |  89 +++++++++++++++++++++++++-----
 py_mutations_hub.pyc            | Bin 5732 -> 6485 bytes
 9 files changed, 177 insertions(+), 45 deletions(-)
 create mode 100644 Untitled Document

diff --git a/Untitled Document b/Untitled Document
new file mode 100644
index 0000000..f8bb320
--- /dev/null
+++ b/Untitled Document	
@@ -0,0 +1,28 @@
+# Copyright 2017 Dhvani Patel
+
+from keras.models import Sequential
+from keras.layers import Dense, Dropout
+import numpy
+from Token import Token
+from py_mutations_hub import perform
+
+# BATCH = 60
+# So 15 of 4 of one window
+# One 4:
+# [Good, Insert, Delete, Sub]
+# WINDOW SIZE = 10
+
+def create_batches():
+	one_hot_good, one_hot_bad_ins, one_hot_bad_del, one_hot_bad_sub = perform()
+	print "Finished..."
+
+	#return train_input, train_output
+
+def initData():
+	print "Start..."
+	create_batches()
+	#train_input, train_output = create_batches()
+
+
+if __name__ == '__main__':
+	initData()
diff --git a/__pycache__/toCheck.pypy-41.pyc b/__pycache__/toCheck.pypy-41.pyc
index 4ab1d06a3ded93c67db16c6fd7050b3486a489be..d8be621ce212247e529842f9b80566aca626250d 100644
GIT binary patch
delta 120
zcmZ1@vRy=p>oYIc<U{F^$qZ1y3Z;w7fs9lJ2F8h6(u~Fv-Rwj-8M3$-(ij<1SQ%Ov
z7@8RvBBQuB&UnYc&jVCh!UrVMbMliCb0+t5MsSqoWtNnr7MDzZ#A(CF!o<udz{txe
Kz$CyZ#tZ<o?ig7B

delta 136
zcmdlkvPQ&>>oYGGYgc+?G6NK_Lh0gqAR`5c7#VUH7@`;$Dg|I%MusRRhVVRD29{K2
zhHNf|BI}8Eb|MT6S)2@Mj0`EP3@r={%?u2YQCu4*yyM{H1{zet0VF2(az;%4#A(jQ
S!^F%ez{txez$CyZ#tZ-(brx9w

diff --git a/keras_model.py b/keras_model.py
index 4a55bbc..ce65084 100644
--- a/keras_model.py
+++ b/keras_model.py
@@ -5,6 +5,10 @@
 import numpy
 from Token import Token
 from py_mutations_hub import perform
+from skimage.io import imread
+from skimage.transform import resize
+import numpy as np
+from itertools import izip_longest
 
 # BATCH = 60
 # So 15 of 4 of one window
@@ -12,34 +16,75 @@
 # [Good, Insert, Delete, Sub]
 # WINDOW SIZE = 10
 
+def chunker(seq, size):
+    return (seq[pos:pos + size] for pos in xrange(0, len(seq), size))
+
+class feedData():
+
+	def __init__(self, x_set, y_set, batch_size):
+    		self.X,self.y = x_set,y_set
+    		self.batch_size = batch_size
+
+	def __len__(self):
+    		return len(self.X) // self.batch_size
+
+	def __getitem__(self,idx):
+    		batch_x = self.X[idx*self.batch_size:(idx+1)*self.batch_size]
+    		batch_y = self.y[idx*self.batch_size:(idx+1)*self.batch_size]
+    		return np.array([batch_x]), np.array(batch_y)
+
+
 def create_batches():
-	# Copyright 2016, 2017 Eddie Antonio Santos <easantos@ualberta.ca>
-        """
-        Return a tuple of infinite training and validation examples,
-        respectively.
-        """
-        training = LoopBatchesEndlessly(
-            filehashes=self.training_set,
-            vectors_path=self.vectors_path,
-            batch_size=self.batch_size,
-            context_length=self.context_length,
-            backwards=self.backwards
-        )
-        validation = LoopBatchesEndlessly(
-            filehashes=self.validation_set,
-            vectors_path=self.vectors_path,
-            batch_size=self.batch_size,
-            context_length=self.context_length,
-            backwards=self.backwards
-        )
-        return training, validation
+	one_hot_good, one_hot_bad_ins, one_hot_bad_del, one_hot_bad_sub = perform()
+	print "Finished..."
+	
+	ohg_g = chunker(one_hot_good, 10)
+	ohbi_g = chunker(one_hot_bad_ins, 10)
+	ohbd_g = chunker(one_hot_bad_del, 10)
+	ohbs_g = chunker(one_hot_bad_sub, 10)
+	
+	ohg_group = []
+	for rad in ohg_g:
+		ohg_group.append(rad)
+
+	ohbi_group = []
+	for rad in ohbi_g:
+		ohbi_group.append(rad)
+
+	ohbd_group = []
+	for rad in ohbd_g:
+		ohbd_group.append(rad)
+
+	ohbs_group = []
+	for rad in ohbs_g:
+		ohbs_group.append(rad)
+			
+	print len(ohg_group)
+	print len(ohbi_group)
+	print len(ohbd_group[53])
+	#print ohbd_group[53]
+	print len(ohbs_group)
+
+	goodA = np.array(ohg_group)
+	insA = np.array(ohbi_group)
+	delA = np.array(ohbd_group)
+	subA = np.array(ohbs_group)
+			
+	temp = np.insert(subA, np.arange(len(delA)), delA)
+	temp2 = np.insert(temp, np.arange(len(insA)), insA)
+	train_input = np.insert(temp2, np.arange(len(goodA)), goodA)
+	
+	# feedData(train_input	
+
+	#return train_input, train_output
 
 def initData():
 	print "Start..."
-	one_hot_all = perform()
-	print len(one_hot_all)
-
-	train_input, train_output = create_batches()
+	create_batches()
+	model = Sequential()
+	# Fit the model
+	#model.fit(iter(train_input), iter(train_output), epochs=150, batch_size=10)
+	#train_input, train_output = create_batches()
 
 
 if __name__ == '__main__':
diff --git a/mutate_token_delete.py b/mutate_token_delete.py
index 33d8b66..aa26c0d 100644
--- a/mutate_token_delete.py
+++ b/mutate_token_delete.py
@@ -127,6 +127,7 @@ def deleteTokMut(raw_tokens, raw_text):
 	if toTest == None:
  		print "Try again..."	
 		deleteTokMut(raw_tokens_pass, raw_text)
+		return new_text, YES_TOKEN, DELETION, out_tokens_loc, send
 	else:
 		print toTest[0]
 		print toTest[0].filename
@@ -135,12 +136,8 @@ def deleteTokMut(raw_tokens, raw_text):
 		print toTest[0].functionname
 		print toTest[0].text
 		print toTest[0].errorname
+		print "-----------FINISHED-------------------"
+		print chosenLineInd+1
+		print out_tokens_loc
 		return new_text, YES_TOKEN, DELETION, out_tokens_loc, send
-	
-	print "-----------FINISHED-------------------"
-	print chosenLineInd+1
-	print out_tokens_loc
-	print len(raw_tokens_pass)
-	print len(out_tokens_loc)
-	print lenD
 
diff --git a/mutate_token_delete.pyc b/mutate_token_delete.pyc
index 1655715b5db6c63c0f17f443030d43bd7a0b4136..203e79447682c22d60632682f18f22c4670d9033 100644
GIT binary patch
delta 199
zcmbO((ILUc{F#?4Z01I`4kkwZ&HYRhnDw?WrwB8+dzA1p)G#yDFfueVF)-FhGnDW#
zl<+guFfr5!F{H6F1lPzgq%bl_P7Y-=wuLaWM1b0$!dap~b_!U4lOctjK^!Q<1d_%y
zX!8lSD8|Xz98!}NIXpMFa-3#loHsd;t5ckpQJ9I7kp~H~GIBDqPX5Z3#was6hFhLd
Jc5)5(Yyh8oDF^@n

delta 238
zcmeB>m@dJ_{F#^QRsBY`4kkvc&HYRhn9Y_jrwB8+dzA1p)G#yDFfueVF)-$(Fx1E}
zq%bl_x_dwwS$sf6Fg8EKWM4L82Wf^95rz^`h8iY@8X<->R)*jd5s)fQh7@)NaUg>U
zWH2Mh;ATcBlL^AC;Ro_J?`4Z(oNUD5Azi}Bz`zjc8XO<u@9pYW!U5#Dxcaz;c>4Qo
yuHrb&$hc^-8&{{a023!84-yn+VrArHWM$-L<evPDD~(ZUvM;whqw?fj?%4ns3o#x5

diff --git a/mutate_token_sub.py b/mutate_token_sub.py
index 1a9e55f..955e4e4 100644
--- a/mutate_token_sub.py
+++ b/mutate_token_sub.py
@@ -152,6 +152,7 @@ def subTokMut(raw_tokens, raw_text):
 	if toTest == None:
  		print "Try again..."	
 		subTokMut(raw_tokens_pass, raw_text)
+		return new_text, YES_TOKEN, SUBSTITUTION, out_tokens_loc, send
 	else:
 		print toTest[0]
 		print toTest[0].filename
diff --git a/mutate_token_sub.pyc b/mutate_token_sub.pyc
index b1f672c0f6e85b216091d38903f3292bdccad4ca..e3cd9c02413a894ebb1068d894a2a0801f92fcf1 100644
GIT binary patch
delta 176
zcmaDQ{YRRO`7<w9z|4(otxSwzn|qk%u;?9NNs(r7_b3r!s9|HMVPt4#VqmP1XDAV7
zC=p?(VP~ijV@P9V2(D3NNMUA>oE*n)EDvF3NdmP&g|nnUY^biyGuT;}<Tx1^7$RMR
z<3s$tUHwXUfV|*Pr{ECJkkAlMf4|AIIXyQsbFniqo|w$dvzD2cQF!uBo*G8w$!5Gd
Kj4G3pdD{UvhbYbf

delta 131
zcmew({YsjR`7<w<QNu>IRwhQb%{@$WSj;xCq)0Qkdz1(<)UYwsFfueVF)-$(Fw`hA
zq%bo`x_dwwS;9a?Ft!N8<S=$)c6o*pNrn=s%`4bhm?od*^q9PwQ+~5B7dsQ<vB}ar
YYZ(P5pXRAy)ST?WtHY=@xs<mZ0ApkzaR2}S

diff --git a/py_mutations_hub.py b/py_mutations_hub.py
index e2aa30b..5e64636 100644
--- a/py_mutations_hub.py
+++ b/py_mutations_hub.py
@@ -25,10 +25,11 @@
 # ONE HOT = 87
 
 BATCH_SIZE = 66
-EPOCHS = 14017
-all_tokens = []
-new_tokens = []
-indexed_tokens = []
+global all_tokens
+new_tokens_ins = []
+new_tokens_del = []
+new_tokens_sub = []
+global indexed_tokens
 data = None
 
 def one_hot(indexed_tokens):
@@ -46,6 +47,7 @@ def set_from_json(all_tokens):
 		#pprint(data)
 	for token in all_tokens:
 		toCompare = token.value
+		global indexed_tokens
 		indexed_tokens.append(data["indexes"].index(toCompare))
 	print indexed_tokens
 	return one_hot(indexed_tokens)
@@ -141,6 +143,7 @@ def handle_token(type, token, (srow, scol), (erow, ecol), line):
     else:
         val = repr(token)[1:len(repr(token))-1]
     send = Token(tokenize.tok_name[type], val, srow, scol, erow, ecol, line)
+    global all_tokens
     all_tokens.append(send)
     print "%d,%d-%d,%d:\t%s\t%s" % \
         (srow, scol, erow, ecol, tokenize.tok_name[type], repr(token))
@@ -165,26 +168,81 @@ def perform():
 		print "CURRENT: "
 		print curr
 		if toTest == None:
+			global all_tokens
+			all_tokens = []
+			global indexed_tokens
+			indexed_tokens = []
 			tokenStream = tokenize.tokenize(StringIO.StringIO(all_rows[curr][0]).readline, handle_token)
-			print "RAW"			
+			print "RAW"		
 			print len(all_tokens)
-		
+			
 			one_hot_good = vocabularize_tokens(all_tokens)
+			print "DHVANI"
+			print len(one_hot_good)
+		
 			raw_tokens = tokenize.generate_tokens(StringIO.StringIO(all_rows[curr][0]).readline)		
 			source_code = str(all_rows[curr][0])
 			
 			#MUTATIONS PER TOKEN
-			new_text, NO_TOKEN, INSERTION, out_tokens_loc = insertTokMut(raw_tokens, source_code)
+
+			# INSERT
+			global all_tokens
+			all_tokens = []
+			global indexed_tokens
+			indexed_tokens = []
+			print "RAW"		
+			print len(all_tokens)
+			new_i_text, NO_TOKEN, INSERTION, out_tokens_loc_i = insertTokMut(raw_tokens, source_code)
+			print "NEXT STEP...C"
+			#print len(new_i_text)
+			#print len(source_code)
+			try:
+				newTokenStream = tokenize.tokenize(StringIO.StringIO(new_i_text).readline, handle_token)
+			except tokenize.TokenError:
+    				pass
+			new_tokens_ins = all_tokens
+			print len(new_tokens_ins)
+			print "CC"		
+			one_hot_bad_ins = vocabularize_tokens(new_tokens_ins)
+			
+
+			# DELETE
+			raw_tokens = tokenize.generate_tokens(StringIO.StringIO(all_rows[curr][0]).readline)	
+			global all_tokens
+			all_tokens = []
+			global indexed_tokens
+			indexed_tokens = []
+			print type(raw_tokens)
+			print type(source_code)
+			new_d_text, YES_TOKEN, DELETION, out_tokens_loc_d, sendD = deleteTokMut(raw_tokens, source_code)
+			
 
 			print "NEXT STEP..."
 			try:
-				newTokenStream = tokenize.tokenize(StringIO.StringIO(new_text).readline, handle_token)
+				newTokenStream = tokenize.tokenize(StringIO.StringIO(new_d_text).readline, handle_token)
 			except tokenize.TokenError:
     				pass
-			one_hot_bad = vocabularize_tokens(new_tokens)
+			new_tokens_del = all_tokens
+			one_hot_bad_del = vocabularize_tokens(new_tokens_del)
+
+		
+			# SUB
+			raw_tokens = tokenize.generate_tokens(StringIO.StringIO(all_rows[curr][0]).readline)	
+			global all_tokens
+			all_tokens = []
+			global indexed_tokens
+			indexed_tokens = []
+			print type(raw_tokens)
 			
-			#deleteTokMut(raw_tokens, source_code)
-			#subTokMut(raw_tokens, source_code)
+			new_s_text, YES_TOKEN, SUBSTITUTION, out_tokens_loc_s, sendS = subTokMut(raw_tokens, source_code)
+
+			print "NEXT STEP..."
+			try:
+				newTokenStream = tokenize.tokenize(StringIO.StringIO(new_s_text).readline, handle_token)
+			except (tokenize.TokenError, IndentationError) as e:
+    				pass	
+			new_tokens_sub = all_tokens
+			one_hot_bad_sub = vocabularize_tokens(new_tokens_sub)
 
 			# MUTATIONS PER CHARACTER
 			# insertMut(source_code)
@@ -195,15 +253,18 @@ def perform():
 			#print one_hot_bad[0]
 			
 			print len(one_hot_good)
-			print len(one_hot_bad)
+			print len(one_hot_bad_ins)
+			print len(one_hot_bad_del)
+			print len(one_hot_bad_sub)
+			
 
-			one_hot_all = np.concatenate((one_hot_good, one_hot_bad), axis=0)
+			#one_hot_all = np.concatenate((one_hot_good, one_hot_bad), axis=0)
 
 			#print len(one_hot_all)
 			#print one_hot_all[538]
 
 			print "SUCCESS"
-			return one_hot_all
+			return one_hot_good, one_hot_bad_ins, one_hot_bad_del, one_hot_bad_sub
 	
 		else:
 			print "Try again..."
diff --git a/py_mutations_hub.pyc b/py_mutations_hub.pyc
index f1e214a4fac4df44d1225eb68f0377b162a70ca8..5c24141462a12e85eec2fca196733c29a7296f05 100644
GIT binary patch
delta 1589
zcmai!O>7fK6vyA&wH>d$@yB}okuOu?G))yqMOENdQ4$L*s(@^#kO;Ig%tD;Ny0s?)
zq+MAkw3XUk+L5XrdgD;3s)Uf+Yo(rh=28ju(jI!|j{0Vtg*u_IE01U1o0<Rby?L{L
z9eMojl>T%)@$lh|^=q*AX!L)Ie!bIQPd%Br0kBa5*-of-5*Wh9G_WMx2Vvo{X}1C_
z3)>oW-$!>Ab^*`>IjgZ?iA{VY%Gxum3}*BPh>PO4NDj5Z<H!$DJUIvwe<E%Uu9%D3
z(XXUrh8i96SipI$f^TZ$sG|ZqM2<1~Ez4P->f)DFJ^hZ(qrl>@i?DL#ur3PeGyOX{
zp|;`@wPgTLz*Yiy61I}y>)=OVf*CMD3${$)<FKni5CfKiGiP}UxY~-k>Ah|+1}r`F
z`D0Rse6(Zc|90HP6A;9KXZMK(hT;jS4(M6pkMv|eL)+h-9c4~P)fcQOxaipskq|l9
zPC)k&(&HgqBKCl=Q{bnEpmZcElHoWY=2w$LpU6jYXM-Omgu2FF{CR1WBMmM`A;>~t
zL6C#Mh9D2jdPxk~*Tldz6+?d*L;)!MenGAYim!!!1#@8(w4^{MAee+;>SdsP@Jk0t
zr^9UXB3##0&gXOtrm4pFvOc4LXB2Q*oj_PBbQl?FR>kEPUv$gEF3XXN3X5iux8RrI
zdITEGChI4c;UWXNLHWUFnHnDFF{=BOvmbr1TI-EdceQ${`9`Bzy|A>jw9Ms=U0&{)
z`yAq*Y#B=;Zmq?N<f<pdx^+~1X}vdJ-;iUuXI)+@F0Wn9W3I<ryxsAt-EOD5a7g@Z
z-8T~C^4!}_TYQ{5Of?G7+2IcFY`b31x!Jkqw5jOSl%}*bc-L*+3Qgg>w%4OmrKO(h
zY<9SFwZ$B13pZyDckgg+(3JIR!)cyBS6!uwrjV;2R-3i+tBSC*(;rc0bp7dT;CgIj
zzaVpO9uSnaP_Q=?lo@NRoo+O1&9(o?jHvXUt2Fo#qy_GNvEI(r17c(oXs(D!I~n^<
zQhp;A?fJeI(_%P}QA}#4YAa~Rq>U^%g;8C{S<GMrV>FJU8L6lhNg9>)D18h=?{kk<
z)`s?yT4g5WTzcbC%;+kIj&rNk_8ezn4kC2m_rw?WDg0dgZvSl7zbCZ%1KIp2zRkad
fKZ#%STX;geU6?Hr6}_Zy#0}FpX3SB0*TjDT?_obC

delta 864
zcmZ`%%}*0S6n}4bTe=_pqTkdQZK0xQ4H^xeAc2N@QPS3c1hbazf-SI(?HDBK29pw_
zcrp{OUO1R|!}<s4Q4@_9j)sf>f&YMSnxfIf+05HF@AsSEd-G<FuKc)|mVf#+X@C80
z4F*G^{|wD`@n!n^h^51F64n8hQ++$qw`s7D$8d7Y3d0^@S+GaJ8ioxCT61V;p$mWx
z5DUuV5x$Cta{c4=gRq2{=?%*V7`q@cgvbDTFo-e9k1F;)D{nk_t=H0y5!ZSpV?w&L
ze?pWYhH=bESsX(@%GfVBOsJRUtYCdX{x(nwZX@%6<%2H5+`T$-JvhJn0htdPK42;|
zROraiQ6OchkRnn;1Exb)0@n*x0OpI#0Ol9l$qfu#cL;<+@P7)#c1b?;kG87<3qxnu
z2OU+USW&>$z}3Mu!1aR_q#Qe$zVz?nG?DsGQ2|(VqM(TE)L;!-J4nMpSQ5oW1lgss
z4Co9EY9=>0aHG(O5ilVlYZ6!?phI^NU2?*dyAy&k-P?Te;R-BTAa1L7b@(QZv<>R$
zt-?~}Ou156$mjD+p+NO<z0D6Jr!mgIMegv+(es$#mFQHCX@olVw#k|sw$m>8Xh}^4
zs5PBhmDvu>@i2cE-5U}RG5K`tY#~5?^AQ{DGB=wwI#VC62%R!(+10JSPSdf?2Tf+K
zR;|C*)y++QUzy`y<GS}1ZEY9%aXfbebM&lq$-pc|<+S9X_maU7Droq1il;puRAg!?
zfN5@3>yBxTkI|y^gg;Bn;8Xr7@jk)C!zwJ?o-LM32?4$kM(59y=kPT@Og3<epGsv?
S=so!))u-y}q&iA=+Q45)-l*>Y