Script for finding col

naturalness · Aug 9, 2017 · 4597c26 · 4597c26
1 parent 88ffea6
commit 4597c26
Show file tree

Hide file tree

Showing 5 changed files with 231 additions and 14 deletions.
diff --git a/Scripts/.~lock.eclipse_fixes.csv# b/Scripts/.~lock.eclipse_fixes.csv#
diff --git a/Scripts/col_finder.py b/Scripts/col_finder.py
@@ -0,0 +1,86 @@
+import csv
+import os
+from javac_parser import Java
+from io import open
+import sys
+import keyword
+import difflib
+
+def getCol():
+	rootdir = '/home/dhvani/java-mistakes-data'
+
+	strPathsBef = []
+	strPathsAft = []
+
+	# Init java tokenizer:
+	java = Java()
+
+	for subdir, dirs, files in os.walk(rootdir):
+    		for file in files:
+			path = os.path.join(subdir, file)
+			if "mistakes.csv" not in path:
+				if "after.java" not in path:
+					#print path
+					strPathsBef.append(path)
+				elif "after.java" in path:
+					strPathsAft.append(path)
+	# print strPaths
+	assert len(strPathsBef) == len(strPathsAft)
+
+	for loopInd in range(len(strPathsBef)):		
+		beforeStrList = []
+		afterStrList = []
+		beforeF = strPathsBef[loopInd]
+		afterF = strPathsAft[loopInd]
+		#beforeF = "/home/dhvani/java-mistakes-data/10827021/387968016/before.java"
+		#afterF = "/home/dhvani/java-mistakes-data/10827021/387968016/after.java"
+		beforeToks = []
+		with open(beforeF, 'r') as myfile:
+    			dataBefore=myfile.read()
+			beforeToks = java.lex(dataBefore)
+			for token in beforeToks:
+				if token[0] == "DOUBLELITERAL":
+					beforeStrList.append("INTLITERAL")
+				else:
+					beforeStrList.append(token[0])
+				#print token[1]
+		#print "---------------------"
+		afterToks = []
+		with open(afterF, 'r') as myfile:
+    			dataAfter=myfile.read()
+			afterToks = java.lex(dataAfter)
+			for token in afterToks:
+				if token[0] == "DOUBLELITERAL":
+					afterStrList.append("INTLITERAL")
+				else:
+					afterStrList.append(token[0])
+				#print token[1]
+		#print len(beforeStrList)
+		#print len(afterStrList)
+
+		print beforeF
+		print afterF
+		s = difflib.SequenceMatcher(None,a='', b='',autojunk=False)
+		s.set_seqs(beforeStrList, afterStrList)
+		#assert len(s.get_opcodes()) > 3
+		if len(s.get_opcodes()) > 3:
+			for opcode in s.get_opcodes():
+				print opcode
+
+				#print beforeStrList[opcode[1]]
+				print beforeToks[opcode[1]]
+				print beforeToks[opcode[3]]
+				print "-----------"
+
+			print type(radha)
+		print loopInd
+
+
+		#sys.exit()
+	print "FINISHED"
+
+	#getToks = java.lex(toCompTokD)
+
+
+if __name__ == '__main__':
+	getCol()
diff --git a/Scripts/violin_javac_class.py b/Scripts/violin_javac_class.py
@@ -41,17 +41,14 @@ def create_plot_class(file_name):
 			fileName = row[2]
 			if fileName == 'file':
 				continue
-        		sfid = fileName[:fileName.index('/')]
-			meid = fileName[fileName.index('/')+1:]
-			#print sfid
-			#print meid
-			#print row
+        		sfid = row[1]
+			meid = row[2]
 			if sfid == beforeS and meid == beforeM:
 				countRank += 1
-				if row[14] == 'f.kind':
+				if row[6] == 'f.kind':
 					continue
-				toCompClass = row[14]
-				assert toCompClass in valid
+				toCompClass = row[6]
+				#assert toCompClass in valid
 				print toCompClass
 				if toCompClass == actual_class:
 					all_ranks.append(countRank)
@@ -74,8 +71,8 @@ def create_plot_class(file_name):
 				assert actual_class != '-1'
 				beforeS = sfid
 				beforeM = meid
-				toCompClassD = row[14]
-				assert toCompClassD in valid
+				toCompClassD = row[6]
+				#assert toCompClassD in valid
 				print toCompClassD
 				#print row
 				if toCompClassD == actual_class:

diff --git a/Scripts/violin_javac_fix.py b/Scripts/violin_javac_fix.py
@@ -41,7 +41,7 @@ def create_plot_fix(file_name):
 		flagIsDel = False
 		for row in check_reader:
 			#print row
-			checkType = row[6]
+			checkType = row[7]
 			if checkType == 'd':
 				flagIsDel = True
 			else:
@@ -53,6 +53,7 @@ def create_plot_fix(file_name):
 			if sfid == beforeS and meid == beforeM:
 				countRank += 1
 				toCompTok = row[7]
+
 				if toCompTok != '':
 					print "----------------------------------"
 					print toCompTok
@@ -62,6 +63,7 @@ def create_plot_fix(file_name):
 					toCompTok = getToks[0][0]
 
 					print toCompTok
+
 				if toCompTok == actual_tok:
 					all_ranks.append(countRank)
 					actual_tok = ''
@@ -88,6 +90,7 @@ def create_plot_fix(file_name):
 				print row
 
 				toCompTokD = row[7]
+				print toCompTokD
 				# TOKENIZE TOKEN:
 				#print row
 
@@ -96,11 +99,11 @@ def create_plot_fix(file_name):
 					print toCompTokD
 					getToks = java.lex(toCompTokD)
 					print getToks
-					assert len(getToks) <= 3
+					#assert len(getToks) <= 3
 					toCompTokD = getToks[0][0]
 
 					print toCompTokD
-
+				print type(radha)
 
 				if toCompTokD == actual_tok:
 					all_ranks.append(countRank)

diff --git a/Scripts/violin_lstm_class.py b/Scripts/violin_lstm_class.py
@@ -0,0 +1,132 @@
+# Copyright 2017 Dhvani Patel
+
+from __future__ import division
+
+import sys
+import os
+import csv
+import matplotlib.pyplot as plt
+import numpy as np
+import javalang
+def set_axis_style(ax, labels):
+    ax.get_xaxis().set_tick_params(direction='out')
+    ax.xaxis.set_ticks_position('bottom')
+    ax.set_xticks(np.arange(1, len(labels) + 1))
+    ax.set_xticklabels(labels)
+    ax.set_xlim(0.25, len(labels) + 0.75)
+    ax.set_xlabel('Compiler')
+
+def create_plot_class(file_name):
+
+	ref_lines = []
+	with open("/home/dhvani/java-mistakes-data/mistakes.csv", 'rb') as reffile:
+		ref_reader = csv.reader(reffile, delimiter=',')
+		count = 0
+		for line in ref_reader:
+			if count != 0:
+				ref_lines.append([line[0], line[1], line[6]])
+			count +=1
+
+	with open(file_name, 'rb') as csvfile:
+   		check_reader = csv.reader(csvfile, delimiter=',')
+		beforeS = -1
+		beforeM = -1
+		actual_class = '-1'
+		countRank = -1
+		all_ranks = []
+		count = 0
+		valid = {'', 'x', 'i', 's'}
+		for row in check_reader:
+			#print row
+			fileName = row[2]
+			if fileName == 'file':
+				continue
+        		sfid = fileName[:fileName.index('/')]
+			meid = fileName[fileName.index('/')+1:]
+			#print sfid
+			#print meid
+			#print row
+			if sfid == beforeS and meid == beforeM:
+				countRank += 1
+				if row[14] == 'f.kind':
+					continue
+				toCompClass = row[14]
+				assert toCompClass in valid
+				print toCompClass
+				if toCompClass == actual_class:
+					all_ranks.append(countRank)
+					actual_class = '-1'
+
+			else:
+				if actual_class != '-1':
+					all_ranks.append(0)
+				actual_class = '-1'
+				countRank = 1
+				for line in ref_lines:
+					if line[0] == sfid and line[1] == meid:
+						# Files matched
+						actual_class = line[2]
+						if actual_class == 'x':
+							actual_class = 'd'
+						#print actual_tok
+						break
+				#print count
+				assert actual_class != '-1'
+				beforeS = sfid
+				beforeM = meid
+				toCompClassD = row[14]
+				assert toCompClassD in valid
+				print toCompClassD
+				#print row
+				if toCompClassD == actual_class:
+					all_ranks.append(countRank)
+					actual_class = '-1'
+			#print row
+			#print actual_line
+		#print all_ranks
+		print count
+		print len(all_ranks)
+		#print type(radha)
+		mean_ranks = []
+		from math import log
+		for score in all_ranks:
+			if score == 0:
+				mean_ranks.append(0)
+			else:
+				mean_ranks.append(1/score)
+		print mean_ranks	
+		print "Finished"
+		sumTot = 0
+		for sc in mean_ranks:
+			sumTot += sc
+		print len(mean_ranks)
+		print sumTot/len(mean_ranks)
+
+
+		fig, (ax1) = plt.subplots(nrows=1, ncols=1, figsize=(7, 7), sharey=True)
+
+		ax1.set_title('MRR For LSTM: Class')
+		ax1.set_ylabel('Reciprocal Rank')
+
+
+
+		ax1.violinplot(mean_ranks)
+		#plax1t.set_xticks([0, 1, 2])
+		#plt.set_xticklabels(['A', 'B', 'C'])
+		#ax1.set_ylabel('Observed values')
+		#plt.subplots_adjust(bottom=0.15, wspace=0.05)
+
+		#ax1.get_xaxis().set_tick_params(direction='out')
+   		#ax1.xaxis.set_ticks_position('bottom')
+   		#ax1.set_xticks(np.arange(1, 1 + 1))
+   		#ax1.set_xticklabels(" ")
+  		#ax1.set_xlim(0.25, 1 + 0.75)	
+		ax1.set_xticklabels([])
+  		ax1.set_xlabel('LSTM')
+
+		plt.show()
+
+
+if __name__ == '__main__':
+	file_name = sys.argv[1]
+	create_plot_class(file_name)