diff --git a/__pycache__/toCheck.pypy-41.pyc b/__pycache__/toCheck.pypy-41.pyc index ed4bd45..439cc42 100644 Binary files a/__pycache__/toCheck.pypy-41.pyc and b/__pycache__/toCheck.pypy-41.pyc differ diff --git a/__pycache__/toCheckD.pypy-41.pyc b/__pycache__/toCheckD.pypy-41.pyc index f63c651..a7cf2ca 100644 Binary files a/__pycache__/toCheckD.pypy-41.pyc and b/__pycache__/toCheckD.pypy-41.pyc differ diff --git a/__pycache__/toCheckI.pypy-41.pyc b/__pycache__/toCheckI.pypy-41.pyc index 13cad0e..62dcb77 100644 Binary files a/__pycache__/toCheckI.pypy-41.pyc and b/__pycache__/toCheckI.pypy-41.pyc differ diff --git a/__pycache__/toCheckS.pypy-41.pyc b/__pycache__/toCheckS.pypy-41.pyc index 41018a6..00830f1 100644 Binary files a/__pycache__/toCheckS.pypy-41.pyc and b/__pycache__/toCheckS.pypy-41.pyc differ diff --git a/check_pypy_syntax.py b/check_pypy_syntax.py index 930577b..dfdbd92 100644 --- a/check_pypy_syntax.py +++ b/check_pypy_syntax.py @@ -34,174 +34,6 @@ def find_nth(haystack, needle, n): n -= 1 return start -# Main method -def checkPyPySyntaxDel(src): - myFile = open("toCheckD.py", "w") - myFile.write(src) - myFile.close() - proc = subprocess.Popen(['pypy', '-m', 'py_compile', 'toCheckD.py'], stderr=subprocess.PIPE) - streamdata, err = proc.communicate() - rc = proc.returncode - if rc == 0: - # No errors, all good - if os.path.isfile("toCheckD.py") == True: - os.remove("toCheckD.py") - return None - else: - # Error, disect data for constructor - fileBegInd = find_nth(err, 'File ', 1) - fileEndInd = find_nth(err, ',', 1) - lineInd = find_nth(err, 'line ', 1) - - nextLineInd = find_nth(err, '\n', 1) - - - add = err[lineInd+5:nextLineInd] - add = re.sub("[^0-9]", "", add) - if(add == ''): - add = '-1' - line = int(add) - - textInd = find_nth(err, ' ', 1) - temp2 = err[textInd+4:] - - - nextLineIndTemp = find_nth(temp2, ' ', 1) - textAfter = err[textInd+4:nextLineIndTemp+textInd+3] - - fileName = err[fileBegInd+6:fileEndInd-1] - - colon = ':' - - textBeforeInd = err.rfind(colon.encode()) - textBefore = err[textBeforeInd+2:] - textBefore = textBefore.strip() - - colonTwo = ':' - - text = textBefore + colon.encode() + textAfter - - cutoffInd = find_nth(err, '^', 1) - errorname = err[cutoffInd+2:textBeforeInd] - - errorObj = CompileError(fileName, line, None, None, text, errorname) - if os.path.isfile("toCheckD.py") == True: - os.remove("toCheckD.py") - return [errorObj] - - -# Main method -def checkPyPySyntaxIns(src): - myFile = open("toCheckI.py", "w") - myFile.write(src) - myFile.close() - proc = subprocess.Popen(['pypy', '-m', 'py_compile', 'toCheckI.py'], stderr=subprocess.PIPE) - streamdata, err = proc.communicate() - rc = proc.returncode - if rc == 0: - # No errors, all good - if os.path.isfile("toCheckI.py") == True: - os.remove("toCheckI.py") - return None - else: - # Error, disect data for constructor - fileBegInd = find_nth(err, 'File ', 1) - fileEndInd = find_nth(err, ',', 1) - lineInd = find_nth(err, 'line ', 1) - - nextLineInd = find_nth(err, '\n', 1) - - - add = err[lineInd+5:nextLineInd] - add = re.sub("[^0-9]", "", add) - if(add == ''): - add = '-1' - line = int(add) - - textInd = find_nth(err, ' ', 1) - temp2 = err[textInd+4:] - - - nextLineIndTemp = find_nth(temp2, ' ', 1) - textAfter = err[textInd+4:nextLineIndTemp+textInd+3] - - fileName = err[fileBegInd+6:fileEndInd-1] - - colon = ':' - - textBeforeInd = err.rfind(colon.encode()) - textBefore = err[textBeforeInd+2:] - textBefore = textBefore.strip() - - colonTwo = ':' - - text = textBefore + colon.encode() + textAfter - - cutoffInd = find_nth(err, '^', 1) - errorname = err[cutoffInd+2:textBeforeInd] - - errorObj = CompileError(fileName, line, None, None, text, errorname) - if os.path.isfile("toCheckI.py") == True: - os.remove("toCheckI.py") - return [errorObj] - -# Main method -def checkPyPySyntaxSub(src): - myFile = open("toCheckS.py", "w") - myFile.write(src) - myFile.close() - proc = subprocess.Popen(['pypy', '-m', 'py_compile', 'toCheckS.py'], stderr=subprocess.PIPE) - streamdata, err = proc.communicate() - rc = proc.returncode - if rc == 0: - # No errors, all good - if os.path.isfile("toCheckS.py") == True: - os.remove("toCheckS.py") - return None - else: - # Error, disect data for constructor - fileBegInd = find_nth(err, 'File ', 1) - fileEndInd = find_nth(err, ',', 1) - lineInd = find_nth(err, 'line ', 1) - - nextLineInd = find_nth(err, '\n', 1) - - - add = err[lineInd+5:nextLineInd] - add = re.sub("[^0-9]", "", add) - if(add == ''): - add = '-1' - line = int(add) - - textInd = find_nth(err, ' ', 1) - temp2 = err[textInd+4:] - - - nextLineIndTemp = find_nth(temp2, ' ', 1) - textAfter = err[textInd+4:nextLineIndTemp+textInd+3] - - fileName = err[fileBegInd+6:fileEndInd-1] - - colon = ':' - - textBeforeInd = err.rfind(colon.encode()) - textBefore = err[textBeforeInd+2:] - textBefore = textBefore.strip() - - colonTwo = ':' - - text = textBefore + colon.encode() + textAfter - - cutoffInd = find_nth(err, '^', 1) - errorname = err[cutoffInd+2:textBeforeInd] - - errorObj = CompileError(fileName, line, None, None, text, errorname) - if os.path.isfile("toCheckS.py") == True: - os.remove("toCheckS.py") - return [errorObj] - - - # Main method def checkPyPySyntax(src): myFile = open("toCheck.py", "w") diff --git a/check_pypy_syntax.pyc b/check_pypy_syntax.pyc index b967420..a67041b 100644 Binary files a/check_pypy_syntax.pyc and b/check_pypy_syntax.pyc differ diff --git a/keras_model.py b/keras_model.py index 42f3dd6..3846c70 100644 --- a/keras_model.py +++ b/keras_model.py @@ -34,13 +34,21 @@ def getInputTen(): one_hot_good, one_hot_bad_ins, one_hot_bad_del, one_hot_bad_sub, _, _, _, _ = perform(0) windowInd = 0 fileInd = 0 + batchInd = 1 + count = 0 while fileInd <= 1000: # 462540 #while windowInd < int(len(insArr)/10): sizes = [len(one_hot_good), len(one_hot_bad_ins),len(one_hot_bad_del),len(one_hot_bad_sub)] minSize = min(float(siz) for siz in sizes) # min of a generator print "file" print fileInd - if windowInd < int((minSize / 10)): + print minSize + print windowInd + print int((int(minSize) / 10)) + while windowInd < int((int(minSize) / 10)): + print windowInd + print "WINDOW" + batchInd = 1 print len(one_hot_good) print len(one_hot_bad_ins) print len(one_hot_bad_del) @@ -70,53 +78,74 @@ def getInputTen(): print len(toPassTwo) print len(toPassThree) print len(toPassFour) - toPass = np.array((toPassOne, toPassTwo, toPassThree, toPassFour)) + #toPass = np.array((toPassOne, toPassTwo, toPassThree, toPassFour)) #print toPass.shape - a = toPass.astype(int) - #print b.shape - yield a + #toPass = [] + + while(batchInd % 5 != 0): + toPass = [] + print "BATCH IND" + print batchInd + if(batchInd == 1): + toPass = toPassOne[:] + elif(batchInd == 2): + toPass = toPassTwo[:] + elif(batchInd == 3): + toPass = toPassThree[:] + elif(batchInd == 4): + toPass = toPassFour[:] + a = numpy.array(toPass).astype(int) + print a.shape + count+=1 + print "COUNT" + print count + #print b.shape + yield a + batchInd += 1 #print numpy.array(toPass).shape - #print "mine too" + print "mine too" windowInd += 1 - else: - #print "NEXT FILE" - - old_one_hot_good = one_hot_good[:] - old_one_hot_bad_ins = one_hot_bad_ins[:] - old_one_hot_bad_del = one_hot_bad_del[:] - old_one_hot_bad_sub = one_hot_bad_sub[:] - - numGoodLeft = len(one_hot_good) % 10 - numBadInsLeft = len(one_hot_bad_ins) % 10 - numBadDelLeft = len(one_hot_bad_del) % 10 - numBadSubLeft = len(one_hot_bad_sub) % 10 - - fileInd += 1 - windowInd = 0 + #else: + print "NEXT FILE" + print "DONE BRO" + old_one_hot_good = one_hot_good[:] + old_one_hot_bad_ins = one_hot_bad_ins[:] + old_one_hot_bad_del = one_hot_bad_del[:] + old_one_hot_bad_sub = one_hot_bad_sub[:] + + numGoodLeft = len(one_hot_good) % 10 + numBadInsLeft = len(one_hot_bad_ins) % 10 + numBadDelLeft = len(one_hot_bad_del) % 10 + numBadSubLeft = len(one_hot_bad_sub) % 10 + + fileInd += 1 + windowInd = 0 + one_hot_good, one_hot_bad_ins, one_hot_bad_del, one_hot_bad_sub, _, _, _, _ = perform(fileInd) + while(one_hot_good == None): + fileInd+=1 one_hot_good, one_hot_bad_ins, one_hot_bad_del, one_hot_bad_sub, _, _, _, _ = perform(fileInd) - while(one_hot_good == None): - fileInd+=1 - one_hot_good, one_hot_bad_ins, one_hot_bad_del, one_hot_bad_sub, _, _, _, _ = perform(fileInd) - for p in range(numGoodLeft): - one_hot_good.insert(p, old_one_hot_good[len(old_one_hot_good)-numGoodLeft+p]) - for p in range(numBadInsLeft): - one_hot_bad_ins.insert(p, old_one_hot_bad_ins[len(old_one_hot_bad_ins)-numBadInsLeft+p]) - for p in range(numBadDelLeft): - one_hot_bad_del.insert(p, old_one_hot_bad_del[len(old_one_hot_bad_del)-numBadDelLeft+p]) - for p in range(numBadSubLeft): - one_hot_bad_sub.insert(p, old_one_hot_bad_sub[len(old_one_hot_bad_sub)-numBadSubLeft+p]) + for p in range(numGoodLeft): + one_hot_good.insert(p, old_one_hot_good[len(old_one_hot_good)-numGoodLeft+p]) + for p in range(numBadInsLeft): + one_hot_bad_ins.insert(p, old_one_hot_bad_ins[len(old_one_hot_bad_ins)-numBadInsLeft+p]) + for p in range(numBadDelLeft): + one_hot_bad_del.insert(p, old_one_hot_bad_del[len(old_one_hot_bad_del)-numBadDelLeft+p]) + for p in range(numBadSubLeft): + one_hot_bad_sub.insert(p, old_one_hot_bad_sub[len(old_one_hot_bad_sub)-numBadSubLeft+p]) def getOutputTen(): _, _, _, _, one_hot_good_out, one_hot_bad_ins_out, one_hot_bad_del_out, one_hot_bad_sub_out = perform(0) windowInd = 0 fileInd = 0 + batchInd = 1 while fileInd <= 1000: # 462540 #while windowInd < int(len(insArr)/10): sizes = [len(one_hot_good_out), len(one_hot_bad_ins_out),len(one_hot_bad_del_out),len(one_hot_bad_sub_out)] minSize = min(float(siz) for siz in sizes) # min of a generator - if windowInd < int(minSize/10): + while windowInd < int((int(minSize)/10)): + batchInd = 1 toPassOne = [] for x in range(10): y = x + windowInd @@ -138,54 +167,83 @@ def getOutputTen(): if y < len(one_hot_bad_sub_out): toPassFour.append(one_hot_bad_sub_out[y]) #print len(toPass) - toPass = np.array((toPassOne, toPassTwo, toPassThree, toPassFour)) + #toPass = np.array((toPassOne, toPassTwo, toPassThree, toPassFour)) #print toPass.shape - a = toPass.astype(int) - #print b.shape - yield a - #print numpy.array(toPass).shape - #print "mine" + while(batchInd % 5 != 0): + toPass = [] + print "BATCH IND" + print batchInd + if(batchInd == 1): + toPass = toPassOne[:] + elif(batchInd == 2): + toPass = toPassTwo[:] + elif(batchInd == 3): + toPass = toPassThree[:] + elif(batchInd == 4): + toPass = toPassFour[:] + a = numpy.array(toPass).astype(int) + print a.shape + #count+=1 + #print "COUNT" + #print count + #print b.shape + yield a + batchInd += 1 windowInd += 1 - else: - #print "NEXT FILE" - old_one_hot_good_out = one_hot_good_out[:] - old_one_hot_bad_ins_out = one_hot_bad_ins_out[:] - old_one_hot_bad_del_out = one_hot_bad_del_out[:] - old_one_hot_bad_sub_out = one_hot_bad_sub_out[:] + #print "NEXT FILE" - numGoodOutLeft = len(one_hot_good_out) % 10 - numBadInsOutLeft = len(one_hot_bad_ins_out) % 10 - numBadDelOutLeft = len(one_hot_bad_del_out) % 10 - numBadSubOutLeft = len(one_hot_bad_sub_out) % 10 + old_one_hot_good_out = one_hot_good_out[:] + old_one_hot_bad_ins_out = one_hot_bad_ins_out[:] + old_one_hot_bad_del_out = one_hot_bad_del_out[:] + old_one_hot_bad_sub_out = one_hot_bad_sub_out[:] - fileInd += 1 - windowInd = 0 + numGoodOutLeft = len(one_hot_good_out) % 10 + numBadInsOutLeft = len(one_hot_bad_ins_out) % 10 + numBadDelOutLeft = len(one_hot_bad_del_out) % 10 + numBadSubOutLeft = len(one_hot_bad_sub_out) % 10 + + fileInd += 1 + windowInd = 0 + _, _, _, _, one_hot_good_out, one_hot_bad_ins_out, one_hot_bad_del_out, one_hot_bad_sub_out = perform(fileInd) + while(one_hot_good_out == None): + fileInd+=1 _, _, _, _, one_hot_good_out, one_hot_bad_ins_out, one_hot_bad_del_out, one_hot_bad_sub_out = perform(fileInd) - while(one_hot_good_out == None): - fileInd+=1 - _, _, _, _, one_hot_good_out, one_hot_bad_ins_out, one_hot_bad_del_out, one_hot_bad_sub_out = perform(fileInd) - for p in range(numGoodOutLeft): - one_hot_good_out.insert(p, old_one_hot_good_out[len(old_one_hot_good_out)-numGoodOutLeft+p]) - for p in range(numBadInsOutLeft): - one_hot_bad_ins_out.insert(p, old_one_hot_bad_ins_out[len(old_one_hot_bad_ins_out)-numBadInsOutLeft+p]) - for p in range(numBadDelOutLeft): - one_hot_bad_del_out.insert(p, old_one_hot_bad_del_out[len(old_one_hot_bad_del_out)-numBadDelOutLeft+p]) - for p in range(numBadSubOutLeft): - one_hot_bad_sub_out.insert(p, old_one_hot_bad_sub_out[len(old_one_hot_bad_sub_out)-numBadSubOutLeft+p]) + for p in range(numGoodOutLeft): + one_hot_good_out.insert(p, old_one_hot_good_out[len(old_one_hot_good_out)-numGoodOutLeft+p]) + for p in range(numBadInsOutLeft): + one_hot_bad_ins_out.insert(p, old_one_hot_bad_ins_out[len(old_one_hot_bad_ins_out)-numBadInsOutLeft+p]) + for p in range(numBadDelOutLeft): + one_hot_bad_del_out.insert(p, old_one_hot_bad_del_out[len(old_one_hot_bad_del_out)-numBadDelOutLeft+p]) + for p in range(numBadSubOutLeft): + one_hot_bad_sub_out.insert(p, old_one_hot_bad_sub_out[len(old_one_hot_bad_sub_out)-numBadSubOutLeft+p]) def getInputValTen(): one_hot_good, one_hot_bad_ins, one_hot_bad_del, one_hot_bad_sub, _, _, _, _ = perform(1001) windowInd = 0 fileInd = 1001 + batchInd = 1 + count = 0 while fileInd <= 2000: # 462540 #while windowInd < int(len(insArr)/10): sizes = [len(one_hot_good), len(one_hot_bad_ins),len(one_hot_bad_del),len(one_hot_bad_sub)] minSize = min(float(siz) for siz in sizes) # min of a generator - if windowInd < int(minSize/10): - toPassOne = [] + print "file" + print fileInd + print minSize + print windowInd + print int((int(minSize) / 10)) + while windowInd < int((int(minSize) / 10)): + print windowInd + print "WINDOW" + batchInd = 1 + print len(one_hot_good) + print len(one_hot_bad_ins) + print len(one_hot_bad_del) + print len(one_hot_bad_sub) + toPassOne = [] for x in range(10): y = x + windowInd if y < len(one_hot_good): @@ -206,52 +264,78 @@ def getInputValTen(): if y < len(one_hot_bad_sub): toPassFour.append(one_hot_bad_sub[y]) #print len(toPass) - toPass = np.array((toPassOne, toPassTwo, toPassThree, toPassFour)) + print len(toPassOne) + print len(toPassTwo) + print len(toPassThree) + print len(toPassFour) + #toPass = np.array((toPassOne, toPassTwo, toPassThree, toPassFour)) #print toPass.shape - a = toPass.astype(int) - #print b.shape - yield a + #toPass = [] + + while(batchInd % 5 != 0): + toPass = [] + print "BATCH IND" + print batchInd + if(batchInd == 1): + toPass = toPassOne[:] + elif(batchInd == 2): + toPass = toPassTwo[:] + elif(batchInd == 3): + toPass = toPassThree[:] + elif(batchInd == 4): + toPass = toPassFour[:] + a = numpy.array(toPass).astype(int) + print a.shape + count+=1 + print "COUNT" + print count + #print b.shape + yield a + batchInd += 1 #print numpy.array(toPass).shape - #print "mine too" + print "mine too" windowInd += 1 - else: - #print "NEXT FILE" - - old_one_hot_good = one_hot_good[:] - old_one_hot_bad_ins = one_hot_bad_ins[:] - old_one_hot_bad_del = one_hot_bad_del[:] - old_one_hot_bad_sub = one_hot_bad_sub[:] - - numGoodLeft = len(one_hot_good) % 10 - numBadInsLeft = len(one_hot_bad_ins) % 10 - numBadDelLeft = len(one_hot_bad_del) % 10 - numBadSubLeft = len(one_hot_bad_sub) % 10 - - fileInd += 1 - windowInd = 0 + #else: + print "NEXT FILE" + print "DONE BRO" + old_one_hot_good = one_hot_good[:] + old_one_hot_bad_ins = one_hot_bad_ins[:] + old_one_hot_bad_del = one_hot_bad_del[:] + old_one_hot_bad_sub = one_hot_bad_sub[:] + + numGoodLeft = len(one_hot_good) % 10 + numBadInsLeft = len(one_hot_bad_ins) % 10 + numBadDelLeft = len(one_hot_bad_del) % 10 + numBadSubLeft = len(one_hot_bad_sub) % 10 + + fileInd += 1 + windowInd = 0 + one_hot_good, one_hot_bad_ins, one_hot_bad_del, one_hot_bad_sub, _, _, _, _ = perform(fileInd) + while(one_hot_good == None): + fileInd+=1 one_hot_good, one_hot_bad_ins, one_hot_bad_del, one_hot_bad_sub, _, _, _, _ = perform(fileInd) - while(one_hot_good == None): - fileInd+=1 - one_hot_good, one_hot_bad_ins, one_hot_bad_del, one_hot_bad_sub, _, _, _, _ = perform(fileInd) + - for p in range(numGoodLeft): - one_hot_good.insert(p, old_one_hot_good[len(old_one_hot_good)-numGoodLeft+p]) - for p in range(numBadInsLeft): - one_hot_bad_ins.insert(p, old_one_hot_bad_ins[len(old_one_hot_bad_ins)-numBadInsLeft+p]) - for p in range(numBadDelLeft): - one_hot_bad_del.insert(p, old_one_hot_bad_del[len(old_one_hot_bad_del)-numBadDelLeft+p]) - for p in range(numBadSubLeft): - one_hot_bad_sub.insert(p, old_one_hot_bad_sub[len(old_one_hot_bad_sub)-numBadSubLeft+p]) + for p in range(numGoodLeft): + one_hot_good.insert(p, old_one_hot_good[len(old_one_hot_good)-numGoodLeft+p]) + for p in range(numBadInsLeft): + one_hot_bad_ins.insert(p, old_one_hot_bad_ins[len(old_one_hot_bad_ins)-numBadInsLeft+p]) + for p in range(numBadDelLeft): + one_hot_bad_del.insert(p, old_one_hot_bad_del[len(old_one_hot_bad_del)-numBadDelLeft+p]) + for p in range(numBadSubLeft): + one_hot_bad_sub.insert(p, old_one_hot_bad_sub[len(old_one_hot_bad_sub)-numBadSubLeft+p]) def getOutputValTen(): _, _, _, _, one_hot_good_out, one_hot_bad_ins_out, one_hot_bad_del_out, one_hot_bad_sub_out = perform(1001) windowInd = 0 fileInd = 1001 + batchInd = 1 while fileInd <= 2000: # 462540 #while windowInd < int(len(insArr)/10): sizes = [len(one_hot_good_out), len(one_hot_bad_ins_out),len(one_hot_bad_del_out),len(one_hot_bad_sub_out)] minSize = min(float(siz) for siz in sizes) # min of a generator - if windowInd < int(minSize/10): + while windowInd < int((int(minSize)/10)): + batchInd = 1 toPassOne = [] for x in range(10): y = x + windowInd @@ -273,49 +357,65 @@ def getOutputValTen(): if y < len(one_hot_bad_sub_out): toPassFour.append(one_hot_bad_sub_out[y]) #print len(toPass) - toPass = np.array((toPassOne, toPassTwo, toPassThree, toPassFour)) + #toPass = np.array((toPassOne, toPassTwo, toPassThree, toPassFour)) #print toPass.shape - a = toPass.astype(int) - #print b.shape - yield a - #print numpy.array(toPass).shape - #print "mine" + while(batchInd % 5 != 0): + toPass = [] + print "BATCH IND" + print batchInd + if(batchInd == 1): + toPass = toPassOne[:] + elif(batchInd == 2): + toPass = toPassTwo[:] + elif(batchInd == 3): + toPass = toPassThree[:] + elif(batchInd == 4): + toPass = toPassFour[:] + a = numpy.array(toPass).astype(int) + print a.shape + #count+=1 + #print "COUNT" + #print count + #print b.shape + yield a + batchInd += 1 windowInd += 1 - else: - #print "NEXT FILE" - old_one_hot_good_out = one_hot_good_out[:] - old_one_hot_bad_ins_out = one_hot_bad_ins_out[:] - old_one_hot_bad_del_out = one_hot_bad_del_out[:] - old_one_hot_bad_sub_out = one_hot_bad_sub_out[:] + #print "NEXT FILE" - numGoodOutLeft = len(one_hot_good_out) % 10 - numBadInsOutLeft = len(one_hot_bad_ins_out) % 10 - numBadDelOutLeft = len(one_hot_bad_del_out) % 10 - numBadSubOutLeft = len(one_hot_bad_sub_out) % 10 + old_one_hot_good_out = one_hot_good_out[:] + old_one_hot_bad_ins_out = one_hot_bad_ins_out[:] + old_one_hot_bad_del_out = one_hot_bad_del_out[:] + old_one_hot_bad_sub_out = one_hot_bad_sub_out[:] - fileInd += 1 - windowInd = 0 + numGoodOutLeft = len(one_hot_good_out) % 10 + numBadInsOutLeft = len(one_hot_bad_ins_out) % 10 + numBadDelOutLeft = len(one_hot_bad_del_out) % 10 + numBadSubOutLeft = len(one_hot_bad_sub_out) % 10 + + fileInd += 1 + windowInd = 0 + _, _, _, _, one_hot_good_out, one_hot_bad_ins_out, one_hot_bad_del_out, one_hot_bad_sub_out = perform(fileInd) + while(one_hot_good_out == None): + fileInd+=1 _, _, _, _, one_hot_good_out, one_hot_bad_ins_out, one_hot_bad_del_out, one_hot_bad_sub_out = perform(fileInd) - - while(one_hot_good_out == None): - fileInd+=1 - _, _, _, _, one_hot_good_out, one_hot_bad_ins_out, one_hot_bad_del_out, one_hot_bad_sub_out = perform(fileInd) - for p in range(numGoodOutLeft): - one_hot_good_out.insert(p, old_one_hot_good_out[len(old_one_hot_good_out)-numGoodOutLeft+p]) - for p in range(numBadInsOutLeft): - one_hot_bad_ins_out.insert(p, old_one_hot_bad_ins_out[len(old_one_hot_bad_ins_out)-numBadInsOutLeft+p]) - for p in range(numBadDelOutLeft): - one_hot_bad_del_out.insert(p, old_one_hot_bad_del_out[len(old_one_hot_bad_del_out)-numBadDelOutLeft+p]) - for p in range(numBadSubOutLeft): - one_hot_bad_sub_out.insert(p, old_one_hot_bad_sub_out[len(old_one_hot_bad_sub_out)-numBadSubOutLeft+p]) + for p in range(numGoodOutLeft): + one_hot_good_out.insert(p, old_one_hot_good_out[len(old_one_hot_good_out)-numGoodOutLeft+p]) + for p in range(numBadInsOutLeft): + one_hot_bad_ins_out.insert(p, old_one_hot_bad_ins_out[len(old_one_hot_bad_ins_out)-numBadInsOutLeft+p]) + for p in range(numBadDelOutLeft): + one_hot_bad_del_out.insert(p, old_one_hot_bad_del_out[len(old_one_hot_bad_del_out)-numBadDelOutLeft+p]) + for p in range(numBadSubOutLeft): + one_hot_bad_sub_out.insert(p, old_one_hot_bad_sub_out[len(old_one_hot_bad_sub_out)-numBadSubOutLeft+p]) def getInputTestTen(): - one_hot_good, one_hot_bad_ins, one_hot_bad_del, one_hot_bad_sub, _, _, _, _ = perform(101) + one_hot_good, one_hot_bad_ins, one_hot_bad_del, one_hot_bad_sub, _, _, _, _ = perform(2) windowInd = 0 - fileInd = 101 - while fileInd <= 102: # 462540 + fileInd = 2 + batchInd = 1 + #while fileInd <= 2: # 462540 + if True: #while windowInd < int(len(insArr)/10): sizes = [len(one_hot_good), len(one_hot_bad_ins),len(one_hot_bad_del),len(one_hot_bad_sub)] minSize = min(float(siz) for siz in sizes) # min of a generator @@ -351,13 +451,25 @@ def getInputTestTen(): print len(toPassTwo) print len(toPassThree) print len(toPassFour) - toPass = np.array((toPassOne, toPassTwo, toPassThree, toPassFour)) + #toPass = np.array((toPassOne, toPassTwo, toPassThree, toPassFour)) #print toPass.shape - a = toPass.astype(int) + toPass = [] + if(batchInd == 1): + toPass = toPassOne[:] + elif(batchInd == 2): + toPass = toPassTwo[:] + elif(batchInd == 3): + toPass = toPassThree[:] + elif(batchInd == 4): + toPass = toPassFour[:] + if(batchInd % 4 == 0): + batchInd = 1 + a = numpy.array(toPass).astype(int) #print b.shape - yield a + return a #print numpy.array(toPass).shape #print "mine too" + batchInd += 1 windowInd += 1 else: #print "NEXT FILE" @@ -393,6 +505,7 @@ def getOutputTestTen(): _, _, _, _, one_hot_good_out, one_hot_bad_ins_out, one_hot_bad_del_out, one_hot_bad_sub_out = perform(10) windowInd = 0 fileInd = 10 + batchInd = 1 while fileInd <= 11: # 462540 #while windowInd < int(len(insArr)/10): sizes = [len(one_hot_good_out), len(one_hot_bad_ins_out),len(one_hot_bad_del_out),len(one_hot_bad_sub_out)] @@ -419,13 +532,25 @@ def getOutputTestTen(): if y < len(one_hot_bad_sub_out): toPassFour.append(one_hot_bad_sub_out[y]) #print len(toPass) - toPass = np.array((toPassOne, toPassTwo, toPassThree, toPassFour)) + #toPass = np.array((toPassOne, toPassTwo, toPassThree, toPassFour)) #print toPass.shape - a = toPass.astype(int) + toPass = [] + if(batchInd == 1): + toPass = toPassOne[:] + elif(batchInd == 2): + toPass = toPassTwo[:] + elif(batchInd == 3): + toPass = toPassThree[:] + elif(batchInd == 4): + toPass = toPassFour[:] + if(batchInd % 4 == 0): + batchInd = 1 + a = numpy.array(toPass).astype(int) #print b.shape yield a #print numpy.array(toPass).shape #print "mine" + batchInd += 1 windowInd += 1 else: #print "NEXT FILE" @@ -475,11 +600,11 @@ def initData(): model = Sequential() - model.add(Dense(102, activation='relu', input_shape=(10,88))) + model.add(Dense(102, activation='relu', input_shape=(88,))) model.add(Dropout(0.5)) model.add(Dense(102, activation='relu')) model.add(Dropout(0.5)) - model.add(Dense(102, activation='sigmoid')) + model.add(Dense(102, activation='softmax')) # For a binary classification problem @@ -487,20 +612,23 @@ def initData(): # loss='binary_crossentropy', # metrics=['accuracy']) - #opt = optimizers.SGD(lr=0.2) - opt = optimizers.RMSprop(lr=0.001, rho=0.9, epsilon=1e-08, decay=0.0) - model.compile(loss = "binary_crossentropy", optimizer = opt, metrics=['accuracy']) + opt = optimizers.SGD(lr=0.001) + #opt = optimizers.RMSprop(lr=0.001, rho=0.9, epsilon=1e-08, decay=0.0) + model.compile(loss = "categorical_crossentropy", optimizer = opt, metrics=['accuracy']) #zipped = iter() #print type(zipped) history = model.fit_generator( izip(getInputTen(), getOutputTen()), - steps_per_epoch=10, + steps_per_epoch=12, validation_data=izip(getInputValTen(), getOutputValTen()), - validation_steps=15, - epochs=2000, - callbacks=[ + validation_steps=12, + epochs=200, + verbose=2 + ) + ''' + callbacks=[ ModelCheckpoint( str(weight_path_pattern), save_best_only=False, @@ -509,15 +637,15 @@ def initData(): ), CSVLogger(str(log_path), append=True), EarlyStopping(patience=3, mode='auto') - ], - verbose=2 - ) + ], + ''' # list all data in history print(history.history.keys()) print len(history.history.keys()) # summarize history for accuracy + plt.plot(history.history['acc']) plt.plot(history.history['val_acc']) plt.title('model accuracy') @@ -539,16 +667,28 @@ def initData(): print "MODEL FIT" - scores = model.evaluate_generator(izip(getInputTestTen(), getOutputTestTen()), steps=10) - - outPredict = model.predict_generator(getInputTestTen(), steps=10, verbose=1) + #scores = model.evaluate_generator(izip(getInputTestTen(), getOutputTestTen()), steps=10) + genIn = getInputTestTen() + input_testT = [] + for x in genIn: + input_testT.append(x) + + print len(input_testT) + print len(input_testT[0]) + print input_testT[0][0] + from numpy import zeros, newaxis + a = numpy.array(input_testT) + print a.shape + + #outPredict = model.predict_generator(getInputTestTen(), steps=10, verbose=1) + outPredict = model.predict(a, batch_size=4, verbose=1) print "PREDICT" print len(outPredict) print outPredict.shape #outPredict = outPredict.astype(int) for x in outPredict: - print x[0][0] + print x[0] #scores = model.evaluate(a, c, batch_size=10) print "SCORE" diff --git a/mutate_token_delete.py b/mutate_token_delete.py index a4c0b00..c895c50 100644 --- a/mutate_token_delete.py +++ b/mutate_token_delete.py @@ -4,7 +4,6 @@ from pprint import pprint import tokenize from check_pypy_syntax import checkPyPySyntax -from check_pypy_syntax import checkPyPySyntaxDel from compile_error import CompileError import token from Token import Token @@ -122,7 +121,7 @@ def deleteTokMut(raw_tokens, raw_text): #print new_text - toTest = checkPyPySyntaxDel(new_text) + toTest = checkPyPySyntax(new_text) if toTest == None: #print "Try again..." diff --git a/mutate_token_delete.pyc b/mutate_token_delete.pyc index 9700f18..45fda3f 100644 Binary files a/mutate_token_delete.pyc and b/mutate_token_delete.pyc differ diff --git a/mutate_token_insert.py b/mutate_token_insert.py index af1cfa7..1b1ec6d 100644 --- a/mutate_token_insert.py +++ b/mutate_token_insert.py @@ -3,7 +3,7 @@ import json from pprint import pprint import tokenize -from check_pypy_syntax import checkPyPySyntaxIns +from check_pypy_syntax import checkPyPySyntax from compile_error import CompileError import token from Token import Token @@ -177,7 +177,7 @@ def insertTokMut(raw_tokens, raw_text): #print new_text - toTest = checkPyPySyntaxIns(new_text) + toTest = checkPyPySyntax(new_text) if toTest == None: #print "Try again..." diff --git a/mutate_token_insert.pyc b/mutate_token_insert.pyc index c3cc2cc..525ba1e 100644 Binary files a/mutate_token_insert.pyc and b/mutate_token_insert.pyc differ diff --git a/mutate_token_sub.py b/mutate_token_sub.py index 86716e3..4856fbc 100644 --- a/mutate_token_sub.py +++ b/mutate_token_sub.py @@ -3,7 +3,7 @@ import json from pprint import pprint import tokenize -from check_pypy_syntax import checkPyPySyntaxSub +from check_pypy_syntax import checkPyPySyntax from compile_error import CompileError import token from Token import Token @@ -178,7 +178,7 @@ def subTokMut(raw_tokens, raw_text): #print new_text - toTest = checkPyPySyntaxSub(new_text) + toTest = checkPyPySyntax(new_text) if toTest == None: #print "Try again..." diff --git a/mutate_token_sub.pyc b/mutate_token_sub.pyc index 9a6fe15..2af2728 100644 Binary files a/mutate_token_sub.pyc and b/mutate_token_sub.pyc differ diff --git a/py_mutations_hub.py b/py_mutations_hub.py index 852cc42..8630d5d 100644 --- a/py_mutations_hub.py +++ b/py_mutations_hub.py @@ -544,19 +544,19 @@ def perform(curr): #print one_hot_good[0] #print one_hot_bad[0] - print "----------INPUT-------------" + #print "----------INPUT-------------" - print len(one_hot_good) - print len(one_hot_bad_ins) - print len(one_hot_bad_del) - print len(one_hot_bad_sub) + #print len(one_hot_good) + #print len(one_hot_bad_ins) + #print len(one_hot_bad_del) + #print len(one_hot_bad_sub) - print "----------OUTPUT-------------" + #print "----------OUTPUT-------------" - print len(one_hot_good_out) - print len(one_hot_bad_ins_out) - print len(one_hot_bad_del_out) - print len(one_hot_bad_sub_out) + #print len(one_hot_good_out) + #print len(one_hot_bad_ins_out) + #print len(one_hot_bad_del_out) + #print len(one_hot_bad_sub_out) #print curr diff --git a/py_mutations_hub.pyc b/py_mutations_hub.pyc index 86fe534..035b7f3 100644 Binary files a/py_mutations_hub.pyc and b/py_mutations_hub.pyc differ