Skip to content

Commit

Permalink
Fixed up sub to make sub for every window
Browse files Browse the repository at this point in the history
  • Loading branch information
Dhvani Patel committed Jul 27, 2017
1 parent 9014946 commit 7cdeb5e
Show file tree
Hide file tree
Showing 5 changed files with 825 additions and 25 deletions.
Binary file modified __pycache__/toCheck.pypy-41.pyc
Binary file not shown.
206 changes: 206 additions & 0 deletions mutate_token_sub.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,212 @@ def find_nth(haystack, needle, n):
n -= 1
return start

def subTokMutS(raw_tokens, all_tokens, raw_text):
new_text = raw_text
with open('vocabulary_mutate.json') as data_file:
data = json.load(data_file)
#pprint(data)
#print "HI"

out_tokens_loc = []
raw_tokens_pass = []
actual_token_len = []
orig = []
for token in all_tokens:
token_use = token
#orig.append(token_use)
actual_token_len.append(token_use)

for token in raw_tokens:
token_use = token
orig.append(token_use)

raw_tokens_pass.append(token_use)

num_lines = len(actual_token_len)
num_encode = len(orig)
if (num_lines % 10 == 0):
numTokensNeeded = int((num_lines / 10))
else:
numTokensNeeded = int((num_lines / 10))
insToks = []
fixToks = []
chosens = []

#print numTokensNeeded
#print "import num"

inds = []
for i in actual_token_len:
if i.type != 'COMMENT':
if i.type != 'INDENT':
if i.type != 'DEDENT':
if i.type != 'NEWLINE':
if i.type != 'NL':
if i.type != 'ENDMARKER':
inds.append(actual_token_len.index(i))


allInds = []
for nah in range(numTokensNeeded+1):
temp = []
#print nah
for nahHoi in range(len(inds)):
if nah != 0:
flag = nah * 10
pastFlag = (nah-1)*10
#print "inds"
#print inds[nahHoi]
#print "indsSSS"
if pastFlag < inds[nahHoi] <= flag:
temp.append(inds[nahHoi])
if len(temp) != 0:
allInds.append(temp)


curr = 0
new_text = ''
haha = -1
radOut = 0
while radOut < len(allInds):

if radOut == (numTokensNeeded-1):
param_start = haha
param_end = num_lines-1
else:
param_start = radOut * 10
param_end = param_start + 9
haha = param_end

toChooseArr = allInds[radOut]

chosenLineIndTemp = randint(0, len(toChooseArr)-1) #num_lines-1

chosenLineInd = toChooseArr[chosenLineIndTemp]
#print "ok"
#print chosenLineInd
chosens.append(chosenLineInd)
#print radOut

source_code = raw_text

send = actual_token_len[chosenLineInd]

fixToks.append(send)

chosenInd = randint(0,84)
chosenToken = data["indexes_m"][chosenInd]

global new_token
new_token = []
try:
toksG = tokenize.tokenize(StringIO.StringIO(chosenToken).readline, handle_token)
except tokenize.TokenError:
pass

insEdTok = new_token[0]
insTok = insEdTok
insToks.append(insTok)

indexToRemove = source_code.index(actual_token_len[chosenLineInd].line)

temp = source_code[indexToRemove:indexToRemove+len(actual_token_len[chosenLineInd].line)+1]

change = temp.strip()

check = change.find(raw_tokens_pass[chosenLineInd][1])

shotInd = temp.index(raw_tokens_pass[chosenLineInd][1])

change = temp.strip()
check = temp.index(change)
#print "WHAT"
#print change


#print "TEMP"
#print temp

#print shotInd

actual_target_ind = indexToRemove + shotInd

#print raw_tokens_pass[chosenLineInd][1]

#print len(raw_tokens_pass[chosenLineInd][1])
#print len(change)

if check == 0 and len(raw_tokens_pass[chosenLineInd][1]) == len(change):
before = source_code[:indexToRemove]
else:
before = source_code[:actual_target_ind]
#print "B"
#print before


after = source_code[actual_target_ind+len(raw_tokens_pass[chosenLineInd][1]):]
#print "A"
#print after
#print chosenToken.encode()
if check == 0:
#print "GOT EM"
if len(after) > 0:
if after[0] == ' ':
new_text = before + chosenToken.encode() + after
else:
new_text = before + chosenToken.encode() + after
else:
new_text = before + chosenToken.encode() + after
else:

if chosenInd == data["indexes_m"].index('\n'):
#print "shiz"
if after[0] == ' ':
space = ' ' * (check-1)
else:
space = ' ' * (check)
new_text = before + chosenToken.encode() + space + after
else:
#print "WAS HERE"
new_text = before + chosenToken.encode() + after


#print actual_target_ind

#print '-------------------------------'
#print new_text
toTest = checkPyPySyntax(new_text)
if toTest == None:
#print radOut
#if radOut != 0:
# radOut = radOut-1
#else:
# radOut = 0
#print radOut
curr = curr + 1
if curr > 10:
radOut = radOut + 1
else:
radOut = radOut
fixToks.remove(send)
chosens.remove(chosenLineInd)
insToks.remove(insTok)
#print "test_t"
else:
curr = 0
radOut = radOut + 1


return new_text, YES_TOKEN, SUBSTITUTION, chosens, fixToks, insToks

#print "-----------FINISHED-------------------"
#print chosenLineInd+1
#print out_tokens_loc
#print len(raw_tokens_pass)
#print len(out_tokens_loc)
#print lenD


def subTokMut(raw_tokens, raw_text):

with open('vocabulary_mutate.json') as data_file:
Expand Down
Binary file modified mutate_token_sub.pyc
Binary file not shown.
74 changes: 49 additions & 25 deletions py_mutations_hub.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from mutate_deletion import deleteMut
from mutate_token_insert import insertTokMutS
from mutate_token_delete import deleteTokMutS
from mutate_token_sub import subTokMut
from mutate_token_sub import subTokMutS
import sys
import cPickle

Expand Down Expand Up @@ -558,11 +558,11 @@ def perform(curr):
global indexed_tokens
indexed_tokens = []
#print type(raw_tokens)

new_s_text, YES_TOKEN, SUBSTITUTION, out_tokens_loc_s, sendS, insTokS = subTokMut(raw_tokens, source_code)
passBall = allGood[:]
new_s_text, YES_TOKEN, SUBSTITUTION, chosensS, fixToksS, insToksS = subTokMutS(raw_tokens, passBall, source_code)

while isinstance(new_s_text, int):
new_s_text, YES_TOKEN, SUBSTITUTION, out_tokens_loc_s, sendS, insTokS = subTokMut(YES_TOKEN, SUBSTITUTION)
new_s_text, YES_TOKEN, SUBSTITUTION, chosensS, fixToksS, insToksS = subTokMutS(YES_TOKEN, SUBSTITUTION)
if isinstance(new_s_text, str):
break

Expand All @@ -572,31 +572,53 @@ def perform(curr):

new_tokens_sub = allGood[:]

vocab_entry = open_closed_tokens(sendS)
sendS.value = vocab_entry

bruhInd = -1
iterInd = 0
for a in allGood:
if a == sendS:
bruhInd = iterInd
iterInd = iterInd + 1
#print bruhInd
#print len(new_tokens_del)
del new_tokens_sub[bruhInd]
temp = insToksS[:]
for insTok in temp:
if insTok.type == "NL":
insToks[insToksS.index(insTok)].type = "NEWLINE"


tempFix = fixToksS[:]
for send in tempFix:
vocab_entry = open_closed_tokens(send)
fixToksS[fixToksS.index(send)].value = vocab_entry

removeInds = []
for wow in range(len(chosensS)):
bruhInd = -1
iterInd = 0
send = fixToksS[wow]
#print send.value
for a in allGood:
if a == send:
bruhInd = iterInd
iterInd = iterInd + 1
#print bruhInd
#print "CHECK"
#print len(new_tokens_del)
removeInds.append(bruhInd)
#del new_tokens_del[bruhInd]
#print len(new_tokens_del)
#print "DEL ROR"

# SUB INSERT
#print len(removeInds)
#print len(insToksS)
comeIter = len(insToksS)-1
for r in reversed(removeInds):
del new_tokens_sub[r]
#print insToksS[comeIter].value
new_tokens_sub.insert(r, insToksS[comeIter])
comeIter -= 1
#for x in new_tokens_sub:
#print x.value
#print len(new_tokens_del)

if insTokS.type == "NL":
insTokS.type = "NEWLINE"
if insTokS.type == "ENDMARKER":
insTokS.type = "INDENT"

#print insTokS.type
#print insTokS.value
#print "LUNCH"

new_tokens_sub.insert(bruhInd, insTokS)

one_hot_bad_sub = vocabularize_tokens(new_tokens_sub, True)

Expand All @@ -611,12 +633,13 @@ def perform(curr):
#print "sub"
#print sendS.type
#print sendS.value
oneH_sub_switch = set_from_json_nonarr(sendS, True)
#oneH_sub_switch = set_from_json_nonarr(sendS, True)
#print oneH_sub_switch
#print "rad"
for x in range(iterNum):
#if x <= trueErrorInd <= (x+trueErrorInd):
if x <= trueErrorInd <= x+(WINDOW_SIZE-1):
#if x <= trueErrorInd <= x+(WINDOW_SIZE-1):
if True:
# DIFF - ACTUAL ERROR
#print x
toAdd = []
Expand All @@ -630,8 +653,9 @@ def perform(curr):
toAdd[4] = 1
toAdd[5] = 0
toAdd[6] = 0
toAdd[7+trueErrorInd-x] = 1
toAdd[17+oneH_sub_switch] = 1

toAdd[7] = 1
toAdd[17] = 1
one_hot_bad_sub_out.append(toAdd)
else:
toAdd = []
Expand Down
Loading

0 comments on commit 7cdeb5e

Please sign in to comment.