From adee5840dc52f9d590dab15705f27d2c06e3fedb Mon Sep 17 00:00:00 2001 From: Dhvani Patel Date: Thu, 13 Jul 2017 12:57:40 -0600 Subject: [PATCH] Add MLP stuff --- Untitled Document | 28 ++++++++++ __pycache__/toCheck.pypy-41.pyc | Bin 2604 -> 2615 bytes keras_model.py | 93 +++++++++++++++++++++++--------- mutate_token_delete.py | 11 ++-- mutate_token_delete.pyc | Bin 3095 -> 3080 bytes mutate_token_sub.py | 1 + mutate_token_sub.pyc | Bin 3562 -> 3580 bytes py_mutations_hub.py | 89 +++++++++++++++++++++++++----- py_mutations_hub.pyc | Bin 5732 -> 6485 bytes 9 files changed, 177 insertions(+), 45 deletions(-) create mode 100644 Untitled Document diff --git a/Untitled Document b/Untitled Document new file mode 100644 index 0000000..f8bb320 --- /dev/null +++ b/Untitled Document @@ -0,0 +1,28 @@ +# Copyright 2017 Dhvani Patel + +from keras.models import Sequential +from keras.layers import Dense, Dropout +import numpy +from Token import Token +from py_mutations_hub import perform + +# BATCH = 60 +# So 15 of 4 of one window +# One 4: +# [Good, Insert, Delete, Sub] +# WINDOW SIZE = 10 + +def create_batches(): + one_hot_good, one_hot_bad_ins, one_hot_bad_del, one_hot_bad_sub = perform() + print "Finished..." + + #return train_input, train_output + +def initData(): + print "Start..." + create_batches() + #train_input, train_output = create_batches() + + +if __name__ == '__main__': + initData() diff --git a/__pycache__/toCheck.pypy-41.pyc b/__pycache__/toCheck.pypy-41.pyc index 4ab1d06a3ded93c67db16c6fd7050b3486a489be..d8be621ce212247e529842f9b80566aca626250d 100644 GIT binary patch delta 120 zcmZ1@vRy=p>oYIc>oYGGYgc+?G6NK_Lh0gqAR`5c7#VUH7@`;$Dg|I%MusRRhVVRD29{K2 zhHNf|BI}8Eb|MT6S)2@Mj0`EP3@r={%?u2YQCu4*yyM{H1{zet0VF2(az;%4#A(jQ S!^F%ez{txez$CyZ#tZ-(brx9w diff --git a/keras_model.py b/keras_model.py index 4a55bbc..ce65084 100644 --- a/keras_model.py +++ b/keras_model.py @@ -5,6 +5,10 @@ import numpy from Token import Token from py_mutations_hub import perform +from skimage.io import imread +from skimage.transform import resize +import numpy as np +from itertools import izip_longest # BATCH = 60 # So 15 of 4 of one window @@ -12,34 +16,75 @@ # [Good, Insert, Delete, Sub] # WINDOW SIZE = 10 +def chunker(seq, size): + return (seq[pos:pos + size] for pos in xrange(0, len(seq), size)) + +class feedData(): + + def __init__(self, x_set, y_set, batch_size): + self.X,self.y = x_set,y_set + self.batch_size = batch_size + + def __len__(self): + return len(self.X) // self.batch_size + + def __getitem__(self,idx): + batch_x = self.X[idx*self.batch_size:(idx+1)*self.batch_size] + batch_y = self.y[idx*self.batch_size:(idx+1)*self.batch_size] + return np.array([batch_x]), np.array(batch_y) + + def create_batches(): - # Copyright 2016, 2017 Eddie Antonio Santos - """ - Return a tuple of infinite training and validation examples, - respectively. - """ - training = LoopBatchesEndlessly( - filehashes=self.training_set, - vectors_path=self.vectors_path, - batch_size=self.batch_size, - context_length=self.context_length, - backwards=self.backwards - ) - validation = LoopBatchesEndlessly( - filehashes=self.validation_set, - vectors_path=self.vectors_path, - batch_size=self.batch_size, - context_length=self.context_length, - backwards=self.backwards - ) - return training, validation + one_hot_good, one_hot_bad_ins, one_hot_bad_del, one_hot_bad_sub = perform() + print "Finished..." + + ohg_g = chunker(one_hot_good, 10) + ohbi_g = chunker(one_hot_bad_ins, 10) + ohbd_g = chunker(one_hot_bad_del, 10) + ohbs_g = chunker(one_hot_bad_sub, 10) + + ohg_group = [] + for rad in ohg_g: + ohg_group.append(rad) + + ohbi_group = [] + for rad in ohbi_g: + ohbi_group.append(rad) + + ohbd_group = [] + for rad in ohbd_g: + ohbd_group.append(rad) + + ohbs_group = [] + for rad in ohbs_g: + ohbs_group.append(rad) + + print len(ohg_group) + print len(ohbi_group) + print len(ohbd_group[53]) + #print ohbd_group[53] + print len(ohbs_group) + + goodA = np.array(ohg_group) + insA = np.array(ohbi_group) + delA = np.array(ohbd_group) + subA = np.array(ohbs_group) + + temp = np.insert(subA, np.arange(len(delA)), delA) + temp2 = np.insert(temp, np.arange(len(insA)), insA) + train_input = np.insert(temp2, np.arange(len(goodA)), goodA) + + # feedData(train_input + + #return train_input, train_output def initData(): print "Start..." - one_hot_all = perform() - print len(one_hot_all) - - train_input, train_output = create_batches() + create_batches() + model = Sequential() + # Fit the model + #model.fit(iter(train_input), iter(train_output), epochs=150, batch_size=10) + #train_input, train_output = create_batches() if __name__ == '__main__': diff --git a/mutate_token_delete.py b/mutate_token_delete.py index 33d8b66..aa26c0d 100644 --- a/mutate_token_delete.py +++ b/mutate_token_delete.py @@ -127,6 +127,7 @@ def deleteTokMut(raw_tokens, raw_text): if toTest == None: print "Try again..." deleteTokMut(raw_tokens_pass, raw_text) + return new_text, YES_TOKEN, DELETION, out_tokens_loc, send else: print toTest[0] print toTest[0].filename @@ -135,12 +136,8 @@ def deleteTokMut(raw_tokens, raw_text): print toTest[0].functionname print toTest[0].text print toTest[0].errorname + print "-----------FINISHED-------------------" + print chosenLineInd+1 + print out_tokens_loc return new_text, YES_TOKEN, DELETION, out_tokens_loc, send - - print "-----------FINISHED-------------------" - print chosenLineInd+1 - print out_tokens_loc - print len(raw_tokens_pass) - print len(out_tokens_loc) - print lenD diff --git a/mutate_token_delete.pyc b/mutate_token_delete.pyc index 1655715b5db6c63c0f17f443030d43bd7a0b4136..203e79447682c22d60632682f18f22c4670d9033 100644 GIT binary patch delta 199 zcmbO((ILUc{F#?4Z01I`4kkwZ&HYRhnDw?WrwB8+dzA1p)G#yDFfueVF)-FhGnDW# zl<+guFfr5!F{H6F1lPzgq%bl_P7Y-=wuLaWM1b0$!dap~b_!U4lOctjK^!Q<1d_%y zX!8lSD8|Xz98!}NIXpMFa-3#loHsd;t5ckpQJ9I7kp~H~GIBDqPX5Z3#was6hFhLd Jc5)5(Yyh8oDF^@n delta 238 zcmeB>m@dJ_{F#^QRsBY`4kkvc&HYRhn9Y_jrwB8+dzA1p)G#yDFfueVF)-$(Fx1E} zq%bl_x_dwwS$sf6Fg8EKWM4L82Wf^95rz^`h8iY@8X<->R)*jd5s)fQh7@)NaUg>U zWH2Mh;ATcBlL^AC;Ro_J?`4Z(oNUD5Azi}Bz`zjc8XO4Qo yuHrb&$hc^-8&{{a023!84-yn+VrArHWM$-LoE*n)EDvF3NdmP&g|nnUY^biyGuT;}IRwhQb%{@$WSj;xCq)0Qkdz1(<)UYwsFfueVF)-$(Fw`hA zq%bo`x_dwwS;9a?Ft!N87fK6vyA&wH>d$@yB}okuOu?G))yqMOENdQ4$L*s(@^#kO;Ig%tD;Ny0s?) zq+MAkw3XUk+L5XrdgD;3s)Uf+Yo(rh=28ju(jI!|j{0Vtg*u_IE01U1o0T%)@$lh|^=q*AX!L)Ie!bIQPd%Br0kBa5*-of-5*Wh9G_WMx2Vvo{X}1C_ z3)>oW-$!>Ab^*`>IjgZ?iA{VY%Gxum3}*BPh>PO4NDj5Zf)DFJ^hZ(qrl>@i?DL#ur3PeGyOX{ zp|;`@wPgTLz*Yiy61I}y>)=OVf*CMD3${$)Ah|+1}r`F z`D0Rse6(Zc|90HP6A;9KXZMK(hT;jS4(M6pkMv|eL)+h-9c4~P)fcQOxaipskq|l9 zPC)k&(&HgqBKCl=Q{bnEpmZcElHoWY=2w$LpU6jYXM-Omgu2FF{CR1WBMmM`A;>~t zL6C#Mh9D2jdPxk~*Tldz6+?d*L;)!MenGAYim!!!1#@8(w4^{MAee+;>SdsP@Jk0t zr^9UXB3##0&gXOtrm4pFvOc4LXB2Q*oj_PBbQl?FR>kEPUv$gEF3XXN3X5iux8RrI zdITEGChI4c;UWXNLHWUFnHnDFF{=BOvmbr1TI-EdceQ${`9`Bzy|A>jw9Ms=U0&{) z`yAq*Y#B=;Zmq?Nw%4OmrKO(h zY<9SFwZ$B13pZyDckgg+(3JIR!)cyBS6!uwrjV;2R-3i+tBSC*(;rc0bp7dT;CgIj zzaVpO9uSnaP_Q=?lo@NRoo+O1&9(o?jHvXUt2Fo#qy_GNvEI(r17c(oXs(D!I~n^< zQhp;A?fJeI(_%P}QA}#4YAa~Rq>U^%g;8C{SFJU8L6lhNg9>)D18h=?{kk< z)`s?yT4g5WTzcbC%;+kIj&rNk_8ezn4kC2m_rw?WDg0dgZvSl7zbCZ%1KIp2zRkad fKZ#%STX;geU6?Hr6}_Zy#0}FpX3SB0*TjDT?_obC delta 864 zcmZ`%%}*0S6n}4bTe=_pqTkdQZK0xQ4H^xeAc2N@QPS3c1hbazf-SI(?HDBK29pw_ zcrp{OUO1R|!}f&YMSnxfIf+05HF@AsSEd-G$z}3Mu!1aR_q#Qe$zVz?nG?DsGQ2|(VqM(TE)L;!-J4nMpSQ5oW1lgss z4Co9EY9=>0aHG(O5ilVlYZ6!?phI^NU2?*dyAy&k-P?Te;R-BTAa1L7b@(QZv<>R$ zt-?~}Ou156$mjD+p+NO8Xh}^4 zs5PBhmDvu>@i2cE-5U}RG5K`tY#~5?^AQ{DGB=wwI#VC62%R!(+10JSPSdf?2Tf+K zR;|C*)y++QUzy`yyBxTkI|y^gg;Bn;8Xr7@jk)C!zwJ?o-LM32?4$kM(59y=kPT@Og3Y