Fixed up sub to make sub for every window

naturalness · Jul 27, 2017 · 7cdeb5e · 7cdeb5e
1 parent 9014946
commit 7cdeb5e
Show file tree

Hide file tree

Showing 5 changed files with 825 additions and 25 deletions.
diff --git a/__pycache__/toCheck.pypy-41.pyc b/__pycache__/toCheck.pypy-41.pyc
diff --git a/mutate_token_sub.py b/mutate_token_sub.py
@@ -39,6 +39,212 @@ def find_nth(haystack, needle, n):
         n -= 1
     return start
 
+def subTokMutS(raw_tokens, all_tokens, raw_text):
+	new_text = raw_text
+	with open('vocabulary_mutate.json') as data_file:    
+    		data = json.load(data_file)
+		#pprint(data)
+		#print "HI"
+
+	out_tokens_loc = []
+	raw_tokens_pass = []
+	actual_token_len = []
+	orig = []
+	for token in all_tokens:
+		token_use = token		
+		#orig.append(token_use)
+		actual_token_len.append(token_use)
+
+	for token in raw_tokens:
+		token_use = token		
+		orig.append(token_use)
+
+		raw_tokens_pass.append(token_use)
+
+	num_lines = len(actual_token_len)
+	num_encode = len(orig)	
+	if (num_lines % 10 == 0):
+		numTokensNeeded = int((num_lines / 10))
+	else:
+		numTokensNeeded = int((num_lines / 10))
+	insToks = []
+	fixToks = []
+	chosens = []
+
+	#print numTokensNeeded
+	#print "import num"
+
+	inds = []
+	for i in actual_token_len:
+		if i.type != 'COMMENT':
+			if i.type != 'INDENT':
+				if i.type != 'DEDENT':
+					if i.type != 'NEWLINE':
+						if i.type != 'NL':			
+							if i.type != 'ENDMARKER':
+								inds.append(actual_token_len.index(i))	
+
+
+	allInds = []
+	for nah in range(numTokensNeeded+1):
+		temp = []
+		#print nah
+		for nahHoi in range(len(inds)):
+			if nah != 0:
+				flag = nah * 10
+				pastFlag = (nah-1)*10
+				#print "inds"
+				#print inds[nahHoi]
+				#print "indsSSS"
+				if pastFlag < inds[nahHoi] <= flag:	
+					temp.append(inds[nahHoi])
+		if len(temp) != 0:
+			allInds.append(temp)
+
+
+	curr = 0
+	new_text = ''
+	haha = -1
+	radOut = 0
+	while radOut < len(allInds):
+
+		if radOut == (numTokensNeeded-1):
+			param_start = haha
+			param_end = num_lines-1
+		else:
+			param_start = radOut * 10
+			param_end = param_start + 9
+			haha = param_end
+
+		toChooseArr = allInds[radOut]
+
+		chosenLineIndTemp = randint(0, len(toChooseArr)-1) #num_lines-1
+
+		chosenLineInd = toChooseArr[chosenLineIndTemp]
+		#print "ok"
+		#print chosenLineInd
+		chosens.append(chosenLineInd)
+		#print radOut
+
+		source_code = raw_text
+
+		send = actual_token_len[chosenLineInd]
+
+		fixToks.append(send)
+
+		chosenInd = randint(0,84)
+		chosenToken = data["indexes_m"][chosenInd]
+
+		global new_token
+		new_token = []
+		try:
+			toksG = tokenize.tokenize(StringIO.StringIO(chosenToken).readline, handle_token)
+		except tokenize.TokenError:
+			pass	
+
+		insEdTok = new_token[0]
+		insTok = insEdTok
+		insToks.append(insTok)
+
+		indexToRemove = source_code.index(actual_token_len[chosenLineInd].line)
+
+		temp = source_code[indexToRemove:indexToRemove+len(actual_token_len[chosenLineInd].line)+1]
+
+		change = temp.strip()
+
+		check = change.find(raw_tokens_pass[chosenLineInd][1])
+
+		shotInd = temp.index(raw_tokens_pass[chosenLineInd][1])
+
+		change = temp.strip()
+		check = temp.index(change)
+		#print "WHAT"
+		#print change
+
+
+		#print "TEMP"
+		#print temp
+
+		#print shotInd
+
+		actual_target_ind = indexToRemove + shotInd
+
+		#print raw_tokens_pass[chosenLineInd][1]
+
+		#print len(raw_tokens_pass[chosenLineInd][1])
+		#print len(change)
+
+		if check == 0 and len(raw_tokens_pass[chosenLineInd][1]) == len(change):
+			before = source_code[:indexToRemove]
+		else:
+			before = source_code[:actual_target_ind]
+		#print "B"
+		#print before
+
+
+		after = source_code[actual_target_ind+len(raw_tokens_pass[chosenLineInd][1]):]
+		#print "A"
+		#print after	
+		#print chosenToken.encode()
+		if check == 0:
+			#print "GOT EM"
+			if len(after) > 0:
+				if after[0] == ' ':
+					new_text = before + chosenToken.encode() + after
+				else:
+					new_text = before + chosenToken.encode() + after
+			else:
+				new_text = before + chosenToken.encode() + after
+		else:	
+
+			if chosenInd == data["indexes_m"].index('\n'): 
+				#print "shiz"
+				if after[0] == ' ':
+					space = ' ' * (check-1)
+				else:
+					space = ' ' * (check)
+				new_text = before + chosenToken.encode() + space + after
+			else:	
+				#print "WAS HERE"
+				new_text = before + chosenToken.encode() + after
+
+
+		#print actual_target_ind
+
+		#print '-------------------------------'
+		#print new_text
+		toTest = checkPyPySyntax(new_text)
+		if toTest == None:
+			#print radOut
+			#if radOut != 0:
+			#	radOut = radOut-1
+			#else:
+			#	radOut = 0
+			#print radOut	
+			curr = curr + 1
+			if curr > 10:
+				radOut = radOut + 1
+			else:
+				radOut = radOut
+				fixToks.remove(send)
+				chosens.remove(chosenLineInd)
+				insToks.remove(insTok)
+			#print "test_t"
+		else:
+			curr = 0
+			radOut = radOut + 1
+
+
+	return new_text, YES_TOKEN, SUBSTITUTION, chosens, fixToks, insToks
+
+	#print "-----------FINISHED-------------------"
+	#print chosenLineInd+1
+	#print out_tokens_loc
+	#print len(raw_tokens_pass)
+	#print len(out_tokens_loc)
+	#print lenD
+
+
 def subTokMut(raw_tokens, raw_text):
 
 	with open('vocabulary_mutate.json') as data_file:    

diff --git a/mutate_token_sub.pyc b/mutate_token_sub.pyc
diff --git a/py_mutations_hub.py b/py_mutations_hub.py
@@ -15,7 +15,7 @@
 from mutate_deletion import deleteMut
 from mutate_token_insert import insertTokMutS
 from mutate_token_delete import deleteTokMutS
-from mutate_token_sub import subTokMut
+from mutate_token_sub import subTokMutS
 import sys
 import cPickle
 
@@ -558,11 +558,11 @@ def perform(curr):
 			global indexed_tokens
 			indexed_tokens = []
 			#print type(raw_tokens)
-
-			new_s_text, YES_TOKEN, SUBSTITUTION, out_tokens_loc_s, sendS, insTokS = subTokMut(raw_tokens, source_code)
+			passBall = allGood[:]
+			new_s_text, YES_TOKEN, SUBSTITUTION, chosensS, fixToksS, insToksS = subTokMutS(raw_tokens, passBall, source_code)
 
 			while isinstance(new_s_text, int):
-				new_s_text, YES_TOKEN, SUBSTITUTION, out_tokens_loc_s, sendS, insTokS = subTokMut(YES_TOKEN, SUBSTITUTION)
+				new_s_text, YES_TOKEN, SUBSTITUTION, chosensS, fixToksS, insToksS = subTokMutS(YES_TOKEN, SUBSTITUTION)
 				if isinstance(new_s_text, str):
 					break
 
@@ -572,31 +572,53 @@ def perform(curr):
 
 			new_tokens_sub = allGood[:]
 
-			vocab_entry = open_closed_tokens(sendS)
-			sendS.value = vocab_entry
-
-			bruhInd = -1
-			iterInd = 0
-			for a in allGood:
-				if a == sendS:
-					bruhInd = iterInd
-				iterInd = iterInd + 1
-			#print bruhInd
-			#print len(new_tokens_del)
-			del new_tokens_sub[bruhInd]	
+			temp = insToksS[:]
+			for insTok in temp:
+				if insTok.type == "NL":
+					insToks[insToksS.index(insTok)].type = "NEWLINE"
+
 
+			tempFix = fixToksS[:]
+			for send in tempFix:
+				vocab_entry = open_closed_tokens(send)
+				fixToksS[fixToksS.index(send)].value = vocab_entry
+
+			removeInds = []
+			for wow in range(len(chosensS)):
+				bruhInd = -1
+				iterInd = 0
+				send = fixToksS[wow]	
+				#print send.value
+				for a in allGood:
+					if a == send:
+						bruhInd = iterInd
+					iterInd = iterInd + 1
+				#print bruhInd
+				#print "CHECK"
+				#print len(new_tokens_del)
+				removeInds.append(bruhInd)
+				#del new_tokens_del[bruhInd]	
+				#print len(new_tokens_del)
+				#print "DEL ROR"
+
 			# SUB INSERT
+			#print len(removeInds)
+			#print len(insToksS)
+			comeIter = len(insToksS)-1
+			for r in reversed(removeInds):
+				del new_tokens_sub[r]
+				#print insToksS[comeIter].value
+				new_tokens_sub.insert(r, insToksS[comeIter])
+				comeIter -= 1
+			#for x in new_tokens_sub:
+				#print x.value
+			#print len(new_tokens_del)
 
-			if insTokS.type == "NL":
-				insTokS.type = "NEWLINE"
-			if insTokS.type == "ENDMARKER":
-				insTokS.type = "INDENT"
 
 			#print insTokS.type
 			#print insTokS.value
 			#print "LUNCH"
 
-			new_tokens_sub.insert(bruhInd, insTokS)
 
 			one_hot_bad_sub = vocabularize_tokens(new_tokens_sub, True)
 
@@ -611,12 +633,13 @@ def perform(curr):
 			#print "sub"
 			#print sendS.type
 			#print sendS.value
-			oneH_sub_switch = set_from_json_nonarr(sendS, True)
+			#oneH_sub_switch = set_from_json_nonarr(sendS, True)
 			#print oneH_sub_switch
 			#print "rad"
 			for x in range(iterNum):
 				#if x <= trueErrorInd <= (x+trueErrorInd):
-				if x <= trueErrorInd <= x+(WINDOW_SIZE-1):
+				#if x <= trueErrorInd <= x+(WINDOW_SIZE-1):
+				if True:
 					# DIFF - ACTUAL ERROR
 					#print x
 					toAdd = []
@@ -630,8 +653,9 @@ def perform(curr):
 					toAdd[4] = 1
 					toAdd[5] = 0
 					toAdd[6] = 0
-					toAdd[7+trueErrorInd-x] = 1
-					toAdd[17+oneH_sub_switch] = 1
+
+					toAdd[7] = 1
+					toAdd[17] = 1
 					one_hot_bad_sub_out.append(toAdd)
 				else:
 					toAdd = []