Skip to content

Commit

Permalink
Added start and end markers
Browse files Browse the repository at this point in the history
  • Loading branch information
Dhvani Patel committed Jul 17, 2017
1 parent 1cc2870 commit 90d696c
Show file tree
Hide file tree
Showing 4 changed files with 29 additions and 9 deletions.
Binary file modified __pycache__/toCheck.pypy-41.pyc
Binary file not shown.
2 changes: 1 addition & 1 deletion keras_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ def __getitem__(self,idx):


def create_batches():
one_hot_good, one_hot_bad_ins, one_hot_bad_del, one_hot_bad_sub = perform()
one_hot_good, one_hot_bad_ins, one_hot_bad_del, one_hot_bad_sub, one_hot_good_out, one_hot_bad_ins_out, one_hot_bad_del_out, one_hot_bad_sub_out = perform()
print "Finished..."

ohg_g = chunker(one_hot_good, 10)
Expand Down
34 changes: 27 additions & 7 deletions py_mutations_hub.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@

def one_hot(indexed_tokens):
one_hot = []
nb_classes = 85
nb_classes = 87
one_hot_targets = np.eye(nb_classes)[indexed_tokens]
one_hot = one_hot_targets.tolist()
return one_hot
Expand All @@ -68,6 +68,10 @@ def set_from_json(all_tokens, flag):
#print token.line
global indexed_tokens
indexed_tokens.append(data["indexes"].index(toCompare))
for r in range(9):
indexed_tokens.insert(r, data["indexes"].index(START_TOKEN))
indexed_tokens.append(data["indexes"].index(END_TOKEN))

print indexed_tokens
return one_hot(indexed_tokens)

Expand Down Expand Up @@ -232,13 +236,23 @@ def perform():
tokenStream = tokenize.tokenize(StringIO.StringIO(all_rows[curr][0]).readline, handle_token)
print "RAW"
print len(all_tokens)
allGood = all_tokens[:]
allGood = []
global all_tokens
allGood = list(all_tokens)
one_hot_good = vocabularize_tokens(all_tokens, False)
one_hot_gOut = [0] * NUM_BITS_OUTPUT
one_hot_good_out = []
for x in range(len(all_tokens)+(WINDOW_SIZE-1)+(WINDOW_SIZE-1)):
toAdd = []
toAdd = [0] * NUM_BITS_OUTPUT
toAdd[0] = 0
toAdd[1] = 1 # FIRST BIT (01) - INDICATE NO ERROR (1 because rest are 0 and so add up to 1)
one_hot_good_out.append(toAdd)


print "DHVANI"
print len(one_hot_good)
print len(allGood)
print len(all_tokens)

raw_tokens = tokenize.generate_tokens(StringIO.StringIO(all_rows[curr][0]).readline)
source_code = str(all_rows[curr][0])
Expand Down Expand Up @@ -285,7 +299,7 @@ def perform():
one_hot_bad_ins_out = []
trueErrorInd = (bruhInd)+(WINDOW_SIZE-1)
# INSERT OUT_PUT
iterNum = len(allGood)+(WINDOW_SIZE-1)+(WINDOW_SIZE-1)
iterNum = len(new_tokens_ins)+(WINDOW_SIZE-1)+(WINDOW_SIZE-1)
#print "divide"
#print trueErrorInd
#print iterNum
Expand Down Expand Up @@ -362,7 +376,7 @@ def perform():
trueErrorInd = (bruhInd)+(WINDOW_SIZE-1)

# DELETE OUT_PUT
iterNum = len(allGood)+(WINDOW_SIZE-1)+(WINDOW_SIZE-1)
iterNum = len(new_tokens_del)+(WINDOW_SIZE-1)+(WINDOW_SIZE-1)
#print "divide"
#print len(send)
#print trueErrorInd
Expand Down Expand Up @@ -452,7 +466,7 @@ def perform():
one_hot_bad_sub_out = []
trueErrorInd = (bruhInd)+(WINDOW_SIZE-1)
# SUB OUT_PUT
iterNum = len(allGood)+(WINDOW_SIZE-1)+(WINDOW_SIZE-1)
iterNum = len(new_tokens_sub)+(WINDOW_SIZE-1)+(WINDOW_SIZE-1)
print "divide"
#print len(send)
print trueErrorInd
Expand Down Expand Up @@ -487,6 +501,7 @@ def perform():
one_hot_bad_sub_out.append(toAdd)
print "Morning"
print len(allGood)
print len(all_tokens)
print len(one_hot_bad_sub_out)
print one_hot_bad_sub_out[trueErrorInd]

Expand All @@ -499,11 +514,16 @@ def perform():
#print one_hot_good[0]
#print one_hot_bad[0]

print "----------INPUT-------------"

print len(one_hot_good)
print len(one_hot_bad_ins)
print len(one_hot_bad_del)
print len(one_hot_bad_sub)

print "----------OUTPUT-------------"

print len(one_hot_good_out)
print len(one_hot_bad_ins_out)
print len(one_hot_bad_del_out)
print len(one_hot_bad_sub_out)
Expand All @@ -512,7 +532,7 @@ def perform():
#one_hot_all = np.concatenate((one_hot_good, one_hot_bad), axis=0)

print "SUCCESS"
return one_hot_good, one_hot_bad_ins, one_hot_bad_del, one_hot_bad_sub, one_hot_bad_ins_out, one_hot_bad_del_out, one_hot_bad_sub_out
return one_hot_good, one_hot_bad_ins, one_hot_bad_del, one_hot_bad_sub, one_hot_good_out, one_hot_bad_ins_out, one_hot_bad_del_out, one_hot_bad_sub_out

else:
print "Try again..."
Expand Down
2 changes: 1 addition & 1 deletion vocabulary.json
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
{"indexes": ["!=", "%", "%=", "&", "&=", "(", ")", "*", "**", "**=", "*=", "+", "+=", ",", "-", "-=", "print", ".", "~", "/", "//", "//=", "/=", ":", ";", "<", "<<", "<<=", "<=", "<IDENTIFIER>", "<NUMBER>", "<STRING>", "=", "==", ">", ">=", ">>", ">>=", "@", "DEDENT", "False", "INDENT", "NEWLINE", "None", "True", "[", "]", "^", "^=", "and", "as", "assert", "async", "await", "break", "class", "continue", "def", "del", "elif", "else", "except", "finally", "for", "from", "global", "if", "import", "in", "is", "lambda", "nonlocal", "not", "or", "pass", "raise", "return", "try", "while", "with", "yield", "{", "|", "|=", "}"]
{"indexes": ["!=", "%", "%=", "&", "&=", "(", ")", "*", "**", "**=", "*=", "+", "+=", ",", "-", "-=", "print", ".", "~", "/", "//", "//=", "/=", ":", ";", "<", "<<", "<<=", "<=", "<IDENTIFIER>", "<NUMBER>", "<STRING>", "=", "==", ">", ">=", ">>", ">>=", "@", "DEDENT", "False", "INDENT", "NEWLINE", "None", "True", "[", "]", "^", "^=", "and", "as", "assert", "async", "await", "break", "class", "continue", "def", "del", "elif", "else", "except", "finally", "for", "from", "global", "if", "import", "in", "is", "lambda", "nonlocal", "not", "or", "pass", "raise", "return", "try", "while", "with", "yield", "{", "|", "|=", "}", "<s>", "</s>"]
}

0 comments on commit 90d696c

Please sign in to comment.