Implemented sub; Fixed bug for insert and sub

naturalness · Jul 14, 2017 · 77f4d9e · 77f4d9e
1 parent 6c63e68
commit 77f4d9e
Show file tree

Hide file tree

Showing 5 changed files with 82 additions and 25 deletions.
diff --git a/__pycache__/toCheck.pypy-41.pyc b/__pycache__/toCheck.pypy-41.pyc
diff --git a/mutate_token_insert.pyc b/mutate_token_insert.pyc
diff --git a/mutate_token_sub.py b/mutate_token_sub.py
@@ -8,6 +8,7 @@
 import token
 from Token import Token
 from random import randint
+import StringIO
 
 #Declaring Global Constants
 YES_TOKEN = 0b00
@@ -16,6 +17,20 @@
 DELETION = 0b010
 SUBSTITUTION = 0b100
 
+global new_token
+
+# Create list of tokens
+def handle_token(type, token, (srow, scol), (erow, ecol), line):
+    if repr(token)[:2] == 'u\'':
+	val = repr(token)[2:len(repr(token))-1]
+    else:
+        val = repr(token)[1:len(repr(token))-1]
+    send = Token(tokenize.tok_name[type], val, srow, scol, erow, ecol, line)
+    global new_token
+    new_token.append(send)
+    print "%d,%d-%d,%d:\t%s\t%s" % \
+        (srow, scol, erow, ecol, tokenize.tok_name[type], repr(token))
+
 # Method for finding index of certain characters in a string, n being the n'th occurence of the character/string
 def find_nth(haystack, needle, n):
     start = haystack.find(needle.encode())
@@ -34,6 +49,17 @@ def subTokMut(raw_tokens, raw_text):
 	chosenInd = randint(0,84)
 	chosenToken = data["indexes_m"][chosenInd]
 	print chosenToken
+
+	global new_token
+	new_token = []
+	try:
+		toksG = tokenize.tokenize(StringIO.StringIO(chosenToken).readline, handle_token)
+	except tokenize.TokenError:
+		pass	
+	#print type(toksG)
+	print len(new_token)
+	insEdTok = new_token[0]
+	insTokS = insEdTok
 
 	raw_tokens_pass = []
 	out_tokens_loc = []
@@ -153,8 +179,10 @@ def subTokMut(raw_tokens, raw_text):
 
 	if toTest == None:
  		print "Try again..."	
-		subTokMut(raw_tokens_pass, raw_text)
-		return new_text, YES_TOKEN, SUBSTITUTION, out_tokens_loc, send
+		#subTokMut(raw_tokens_pass, raw_text)
+		lenR = 2
+		lenK = 2
+		return lenR, raw_tokens_pass, raw_text, lenK, send, insTokS
 	else:
 		print toTest[0]
 		print toTest[0].filename
@@ -163,7 +191,7 @@ def subTokMut(raw_tokens, raw_text):
 		print toTest[0].functionname
 		print toTest[0].text
 		print toTest[0].errorname
-		return new_text, YES_TOKEN, SUBSTITUTION, out_tokens_loc, send
+		return new_text, YES_TOKEN, SUBSTITUTION, out_tokens_loc, send, insTokS
 
 	print "-----------FINISHED-------------------"
 	print chosenLineInd+1

diff --git a/mutate_token_sub.pyc b/mutate_token_sub.pyc
diff --git a/py_mutations_hub.py b/py_mutations_hub.py
@@ -245,7 +245,7 @@ def perform():
 				if isinstance(new_i_text, str):
 					break
 
-			new_tokens_ins = all_tokens
+			new_tokens_ins = all_tokens[:]
 
 			if insTok.type == "NL":
 				insTok.type = "NEWLINE"
@@ -286,7 +286,7 @@ def perform():
 
 			print "NEXT STEP..."
 
-			new_tokens_del = allGood
+			new_tokens_del = allGood[:]
 
 			vocab_entry = open_closed_tokens(send)
 			send.value = vocab_entry
@@ -308,20 +308,51 @@ def perform():
 
 			# SUB
 			raw_tokens = tokenize.generate_tokens(StringIO.StringIO(all_rows[curr][0]).readline)	
-			global all_tokens
-			all_tokens = []
+			#global all_tokens
+			#all_tokens = []
 			global indexed_tokens
 			indexed_tokens = []
 			print type(raw_tokens)
 
-			new_s_text, YES_TOKEN, SUBSTITUTION, out_tokens_loc_s, sendS = subTokMut(raw_tokens, source_code)
+			new_s_text, YES_TOKEN, SUBSTITUTION, out_tokens_loc_s, sendS, insTokS = subTokMut(raw_tokens, source_code)
 
+			while isinstance(new_s_text, int):
+				new_s_text, YES_TOKEN, SUBSTITUTION, out_tokens_loc_s, sendS, insTokS = subTokMut(YES_TOKEN, SUBSTITUTION)
+				if isinstance(new_s_text, str):
+					break
+
 			print "NEXT STEP..."
-			try:
-				newTokenStream = tokenize.tokenize(StringIO.StringIO(new_s_text).readline, handle_token)
-			except (tokenize.TokenError) as e:
-    				pass	
-			new_tokens_sub = all_tokens
+
+			# SUB DELETE
+
+			new_tokens_sub = allGood[:]
+
+			vocab_entry = open_closed_tokens(sendS)
+			sendS.value = vocab_entry
+
+			bruhInd = -1
+			iterInd = 0
+			for a in allGood:
+				if a == sendS:
+					bruhInd = iterInd
+				iterInd = iterInd + 1
+			#print bruhInd
+			#print len(new_tokens_del)
+			del new_tokens_sub[bruhInd]	
+
+			# SUB INSERT
+
+			if insTokS.type == "NL":
+				insTokS.type = "NEWLINE"
+			if insTokS.type == "ENDMARKER":
+				insTokS.type = "INDENT"
+
+			print insTokS.type
+			print insTokS.value
+			print "LUNCH"
+
+			new_tokens_sub.insert(bruhInd, insTokS)
+
 			one_hot_bad_sub = vocabularize_tokens(new_tokens_sub, True)
 
 			# MUTATIONS PER CHARACTER
@@ -339,23 +370,21 @@ def perform():
 			print source_code
 			print len(new_i_text)
 			print len(new_d_text)
-			print new_i_text
+			print new_d_text
 
 			print len(new_tokens_del)
 			print len(allGood)
 
 
-			if len(one_hot_bad_del) != len(one_hot_good)-1:
-				for token in new_tokens_ins:
-					#print token.type
-					print token.value
-				print "<3 <3 <3 GOOD:"
-				for token in allGood:
-					#print token.type
-					print token.value
-			else:
-				perform()
-				return
+
+			for token in new_tokens_del:
+				#print token.type
+				print token.value
+			print "<3 <3 <3 GOOD:"
+			for token in allGood:
+				#print token.type
+				print token.value
+
 
 			#one_hot_all = np.concatenate((one_hot_good, one_hot_bad), axis=0)