Skip to content

Commit

Permalink
Script for finding col
Browse files Browse the repository at this point in the history
  • Loading branch information
Dhvani Patel committed Aug 9, 2017
1 parent 88ffea6 commit 4597c26
Show file tree
Hide file tree
Showing 5 changed files with 231 additions and 14 deletions.
1 change: 0 additions & 1 deletion Scripts/.~lock.eclipse_fixes.csv#

This file was deleted.

86 changes: 86 additions & 0 deletions Scripts/col_finder.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
import csv
import os
from javac_parser import Java
from io import open
import sys
import keyword
import difflib

def getCol():
rootdir = '/home/dhvani/java-mistakes-data'

strPathsBef = []
strPathsAft = []

# Init java tokenizer:
java = Java()

for subdir, dirs, files in os.walk(rootdir):
for file in files:
path = os.path.join(subdir, file)
if "mistakes.csv" not in path:
if "after.java" not in path:
#print path
strPathsBef.append(path)
elif "after.java" in path:
strPathsAft.append(path)
# print strPaths
assert len(strPathsBef) == len(strPathsAft)

for loopInd in range(len(strPathsBef)):
beforeStrList = []
afterStrList = []
beforeF = strPathsBef[loopInd]
afterF = strPathsAft[loopInd]
#beforeF = "/home/dhvani/java-mistakes-data/10827021/387968016/before.java"
#afterF = "/home/dhvani/java-mistakes-data/10827021/387968016/after.java"
beforeToks = []
with open(beforeF, 'r') as myfile:
dataBefore=myfile.read()
beforeToks = java.lex(dataBefore)
for token in beforeToks:
if token[0] == "DOUBLELITERAL":
beforeStrList.append("INTLITERAL")
else:
beforeStrList.append(token[0])
#print token[1]
#print "---------------------"
afterToks = []
with open(afterF, 'r') as myfile:
dataAfter=myfile.read()
afterToks = java.lex(dataAfter)
for token in afterToks:
if token[0] == "DOUBLELITERAL":
afterStrList.append("INTLITERAL")
else:
afterStrList.append(token[0])
#print token[1]
#print len(beforeStrList)
#print len(afterStrList)

print beforeF
print afterF
s = difflib.SequenceMatcher(None,a='', b='',autojunk=False)
s.set_seqs(beforeStrList, afterStrList)
#assert len(s.get_opcodes()) > 3
if len(s.get_opcodes()) > 3:
for opcode in s.get_opcodes():
print opcode

#print beforeStrList[opcode[1]]
print beforeToks[opcode[1]]
print beforeToks[opcode[3]]
print "-----------"

print type(radha)
print loopInd


#sys.exit()
print "FINISHED"

#getToks = java.lex(toCompTokD)


if __name__ == '__main__':
getCol()
17 changes: 7 additions & 10 deletions Scripts/violin_javac_class.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,17 +41,14 @@ def create_plot_class(file_name):
fileName = row[2]
if fileName == 'file':
continue
sfid = fileName[:fileName.index('/')]
meid = fileName[fileName.index('/')+1:]
#print sfid
#print meid
#print row
sfid = row[1]
meid = row[2]
if sfid == beforeS and meid == beforeM:
countRank += 1
if row[14] == 'f.kind':
if row[6] == 'f.kind':
continue
toCompClass = row[14]
assert toCompClass in valid
toCompClass = row[6]
#assert toCompClass in valid
print toCompClass
if toCompClass == actual_class:
all_ranks.append(countRank)
Expand All @@ -74,8 +71,8 @@ def create_plot_class(file_name):
assert actual_class != '-1'
beforeS = sfid
beforeM = meid
toCompClassD = row[14]
assert toCompClassD in valid
toCompClassD = row[6]
#assert toCompClassD in valid
print toCompClassD
#print row
if toCompClassD == actual_class:
Expand Down
9 changes: 6 additions & 3 deletions Scripts/violin_javac_fix.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ def create_plot_fix(file_name):
flagIsDel = False
for row in check_reader:
#print row
checkType = row[6]
checkType = row[7]
if checkType == 'd':
flagIsDel = True
else:
Expand All @@ -53,6 +53,7 @@ def create_plot_fix(file_name):
if sfid == beforeS and meid == beforeM:
countRank += 1
toCompTok = row[7]

if toCompTok != '':
print "----------------------------------"
print toCompTok
Expand All @@ -62,6 +63,7 @@ def create_plot_fix(file_name):
toCompTok = getToks[0][0]

print toCompTok

if toCompTok == actual_tok:
all_ranks.append(countRank)
actual_tok = ''
Expand All @@ -88,6 +90,7 @@ def create_plot_fix(file_name):
print row

toCompTokD = row[7]
print toCompTokD
# TOKENIZE TOKEN:
#print row

Expand All @@ -96,11 +99,11 @@ def create_plot_fix(file_name):
print toCompTokD
getToks = java.lex(toCompTokD)
print getToks
assert len(getToks) <= 3
#assert len(getToks) <= 3
toCompTokD = getToks[0][0]

print toCompTokD

print type(radha)

if toCompTokD == actual_tok:
all_ranks.append(countRank)
Expand Down
132 changes: 132 additions & 0 deletions Scripts/violin_lstm_class.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
# Copyright 2017 Dhvani Patel

from __future__ import division

import sys
import os
import csv
import matplotlib.pyplot as plt
import numpy as np
import javalang
def set_axis_style(ax, labels):
ax.get_xaxis().set_tick_params(direction='out')
ax.xaxis.set_ticks_position('bottom')
ax.set_xticks(np.arange(1, len(labels) + 1))
ax.set_xticklabels(labels)
ax.set_xlim(0.25, len(labels) + 0.75)
ax.set_xlabel('Compiler')

def create_plot_class(file_name):

ref_lines = []
with open("/home/dhvani/java-mistakes-data/mistakes.csv", 'rb') as reffile:
ref_reader = csv.reader(reffile, delimiter=',')
count = 0
for line in ref_reader:
if count != 0:
ref_lines.append([line[0], line[1], line[6]])
count +=1

with open(file_name, 'rb') as csvfile:
check_reader = csv.reader(csvfile, delimiter=',')
beforeS = -1
beforeM = -1
actual_class = '-1'
countRank = -1
all_ranks = []
count = 0
valid = {'', 'x', 'i', 's'}
for row in check_reader:
#print row
fileName = row[2]
if fileName == 'file':
continue
sfid = fileName[:fileName.index('/')]
meid = fileName[fileName.index('/')+1:]
#print sfid
#print meid
#print row
if sfid == beforeS and meid == beforeM:
countRank += 1
if row[14] == 'f.kind':
continue
toCompClass = row[14]
assert toCompClass in valid
print toCompClass
if toCompClass == actual_class:
all_ranks.append(countRank)
actual_class = '-1'

else:
if actual_class != '-1':
all_ranks.append(0)
actual_class = '-1'
countRank = 1
for line in ref_lines:
if line[0] == sfid and line[1] == meid:
# Files matched
actual_class = line[2]
if actual_class == 'x':
actual_class = 'd'
#print actual_tok
break
#print count
assert actual_class != '-1'
beforeS = sfid
beforeM = meid
toCompClassD = row[14]
assert toCompClassD in valid
print toCompClassD
#print row
if toCompClassD == actual_class:
all_ranks.append(countRank)
actual_class = '-1'
#print row
#print actual_line
#print all_ranks
print count
print len(all_ranks)
#print type(radha)
mean_ranks = []
from math import log
for score in all_ranks:
if score == 0:
mean_ranks.append(0)
else:
mean_ranks.append(1/score)
print mean_ranks
print "Finished"
sumTot = 0
for sc in mean_ranks:
sumTot += sc
print len(mean_ranks)
print sumTot/len(mean_ranks)


fig, (ax1) = plt.subplots(nrows=1, ncols=1, figsize=(7, 7), sharey=True)

ax1.set_title('MRR For LSTM: Class')
ax1.set_ylabel('Reciprocal Rank')



ax1.violinplot(mean_ranks)
#plax1t.set_xticks([0, 1, 2])
#plt.set_xticklabels(['A', 'B', 'C'])
#ax1.set_ylabel('Observed values')
#plt.subplots_adjust(bottom=0.15, wspace=0.05)

#ax1.get_xaxis().set_tick_params(direction='out')
#ax1.xaxis.set_ticks_position('bottom')
#ax1.set_xticks(np.arange(1, 1 + 1))
#ax1.set_xticklabels(" ")
#ax1.set_xlim(0.25, 1 + 0.75)
ax1.set_xticklabels([])
ax1.set_xlabel('LSTM')

plt.show()


if __name__ == '__main__':
file_name = sys.argv[1]
create_plot_class(file_name)

0 comments on commit 4597c26

Please sign in to comment.