Skip to content

Commit

Permalink
Keras model working
Browse files Browse the repository at this point in the history
  • Loading branch information
Dhvani Patel committed Aug 1, 2017
1 parent 3b30480 commit 83f9bff
Show file tree
Hide file tree
Showing 3 changed files with 138 additions and 20 deletions.
89 changes: 89 additions & 0 deletions Scripts/violin_javac.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
# Copyright 2017 Dhvani Patel

from __future__ import division

import sys
import os
import csv
import matplotlib.pyplot as plt
import numpy as np


def create_plot(file_name):

ref_lines = []
with open("/home/dhvani/java-mistakes-data/mistakes.csv", 'rb') as reffile:
ref_reader = csv.reader(reffile, delimiter=',')
count = 0
for line in ref_reader:
if count != 0:
ref_lines.append([line[0], line[1], line[4]])
count +=1


with open(file_name, 'rb') as csvfile:
check_reader = csv.reader(csvfile, delimiter=',', quotechar='|')
beforeS = -1
beforeM = -1
actual_line = -1
countRank = -1
all_ranks = []
for row in check_reader:
sfid = row[1]
meid = row[2]
print row
if sfid == beforeS and meid == beforeM:
countRank += 1
toCompLine = row[5]
if toCompLine == actual_line:
all_ranks.append(countRank)
actual_line = -1

else:
if actual_line != -1:
all_ranks.append(0)
actual_line = -1
countRank = 1
for line in ref_lines:
if line[0] == sfid and line[1] == meid:
# Files matched
actual_line = line[2]
break
#print count
assert actual_line != -1
beforeS = sfid
beforeM = meid
toCompLineD = row[5]
if toCompLineD == actual_line:
all_ranks.append(countRank)
actual_line = -1
#print row
#print actual_line
#print all_ranks
mean_ranks = []
for score in all_ranks:
if score == 0:
mean_ranks.append(0)
else:
mean_ranks.append(1/score)
print mean_ranks
print "Finished"
sumTot = 0
for sc in mean_ranks:
sumTot += sc
print len(mean_ranks)
print sumTot/len(mean_ranks)


#ax1 = plt.plot(nrows=1, ncols=1, figsize=(4, 4), sharey=True)

#ax1.set_title('Default violin plot')
#ax1.set_ylabel('Observed values')
plt.violinplot(mean_ranks)
#plt.subplots_adjust(bottom=0.15, wspace=0.05)
plt.show()


if __name__ == '__main__':
file_name = sys.argv[1]
create_plot(file_name)
69 changes: 49 additions & 20 deletions keras_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,12 +33,28 @@
# WINDOW = 10, SO BATCH = 40 INPUT, 40 OUTPUT
BATCH_SIZE = 66

def one_hot(indexed_tokens):
one_hot = []
nb_classes = 88
one_hot_targets = np.eye(nb_classes)[indexed_tokens]
one_hot = one_hot_targets.tolist()
#print "fort"
#bruhTemp = one_hot[:]
for x in range(len(one_hot)):
#one_hot[x].astype(int)
[int(i) for i in one_hot[x]]
#one_hot.astype(int)
#print type(one_hot[0][0])
return one_hot

def getInputTen(allTrainData):
#one_hot_good, one_hot_bad_ins, one_hot_bad_del, one_hot_bad_sub, _, _, _, _, _ = perform(0)
one_hot_good, one_hot_bad_ins, one_hot_bad_del, one_hot_bad_sub = allTrainData[0][0],allTrainData[0][1], allTrainData[0][2], allTrainData[0][3]



one_hot_good, one_hot_bad_ins, one_hot_bad_del, one_hot_bad_sub = one_hot(allTrainData[0][0]),one_hot(allTrainData[0][1]), one_hot(allTrainData[0][2]), one_hot(allTrainData[0][3])
while(one_hot_good == 1):
one_hot_good, one_hot_bad_ins, one_hot_bad_del, one_hot_bad_sub = allTrainData[0][0],allTrainData[0][1], allTrainData[0][2], allTrainData[0][3]
one_hot_good, one_hot_bad_ins, one_hot_bad_del, one_hot_bad_sub = one_hot(allTrainData[0][0]),one_hot(allTrainData[0][1]), one_hot(allTrainData[0][2]), one_hot(allTrainData[0][3])
#print type(one_hot_good)
#print one_hot_good
windowInd = 0
Expand Down Expand Up @@ -149,13 +165,15 @@ def getInputTen(allTrainData):

fileInd += 1
#print "FILE IND"
print fileInd
#print fileInd
windowInd = 0
#one_hot_good, one_hot_bad_ins, one_hot_bad_del, one_hot_bad_sub, _, _, _, _, _ = perform(fileInd)
one_hot_good, one_hot_bad_ins, one_hot_bad_del, one_hot_bad_sub = allTrainData[fileInd][0],allTrainData[fileInd][1], allTrainData[fileInd][2], allTrainData[fileInd][3]
while(one_hot_good == 1):
one_hot_good = allTrainData[fileInd]
while(one_hot_good == -1):
fileInd+=1
one_hot_good, one_hot_bad_ins, one_hot_bad_del, one_hot_bad_sub = allTrainData[fileInd][0],allTrainData[fileInd][1], allTrainData[fileInd][2], allTrainData[fileInd][3]
one_hot_good, one_hot_bad_ins, one_hot_bad_del, one_hot_bad_sub = one_hot(allTrainData[fileInd][0]),one_hot(allTrainData[fileInd][1]), one_hot(allTrainData[fileInd][2]), one_hot(allTrainData[fileInd][3])
if one_hot_good != -1:
one_hot_good, one_hot_bad_ins, one_hot_bad_del, one_hot_bad_sub = one_hot(allTrainData[fileInd][0]),one_hot(allTrainData[fileInd][1]), one_hot(allTrainData[fileInd][2]), one_hot(allTrainData[fileInd][3])


for p in range(numGoodLeft):
Expand Down Expand Up @@ -212,11 +230,11 @@ def getInputTen(allTrainData):

def getOutputTen(allTrainData):
#_, _, _, _, one_hot_good_out, one_hot_bad_ins_out, one_hot_bad_del_out, one_hot_bad_sub_out, _ = perform(0)
one_hot_good_out, one_hot_bad_ins_out, one_hot_bad_del_out, one_hot_bad_sub_out = allTrainData[0][4],allTrainData[0][5], allTrainData[0][6], allTrainData[0][7]
one_hot_good_out, one_hot_bad_ins_out, one_hot_bad_del_out, one_hot_bad_sub_out = one_hot(allTrainData[0][0]),one_hot(allTrainData[0][1]), one_hot(allTrainData[0][2]), one_hot(allTrainData[0][3])

while(one_hot_good_out == 1):
#_, _, _, _, one_hot_good_out, one_hot_bad_ins_out, one_hot_bad_del_out, one_hot_bad_sub_out, _ = perform(0)
one_hot_good_out, one_hot_bad_ins_out, one_hot_bad_del_out, one_hot_bad_sub_out = allTrainData[0][4],allTrainData[0][5], allTrainData[0][6], allTrainData[0][7]
one_hot_good_out, one_hot_bad_ins_out, one_hot_bad_del_out, one_hot_bad_sub_out = one_hot(allTrainData[0][0]),one_hot(allTrainData[0][1]), one_hot(allTrainData[0][2]), one_hot(allTrainData[0][3])
#print type(one_hot_good_out)
#print one_hot_good_out
windowInd = 0
Expand All @@ -242,7 +260,7 @@ def getOutputTen(allTrainData):
clasF = one_hot_good_out[y][5]
clasF = one_hot_good_out[y][6]
#bruhOne = []
if(err == 0):
if True:
zero = 1
toPassOne.append(zero)
one = 0
Expand All @@ -266,7 +284,7 @@ def getOutputTen(allTrainData):
clasF = one_hot_bad_ins_out[y][5]
clasF = one_hot_bad_ins_out[y][6]
#bruhTwo = []
if(err == 1):
if True:
zero = 0
toPassTwo.append(zero)
one = 0
Expand All @@ -289,7 +307,7 @@ def getOutputTen(allTrainData):
clasF = one_hot_bad_del_out[y][5]
clasF = one_hot_bad_del_out[y][6]
#bruhThree = []
if(err == 1):
if True:
zero = 0
toPassThree.append(zero)
one = 0
Expand All @@ -312,7 +330,7 @@ def getOutputTen(allTrainData):
clasF = one_hot_bad_sub_out[y][5]
clasF = one_hot_bad_sub_out[y][6]
#bruhFour = []
if(err == 1):
if True:
zero = 0
toPassFour.append(zero)
one = 1
Expand Down Expand Up @@ -388,11 +406,22 @@ def getOutputTen(allTrainData):
fileInd += 1
windowInd = 0
#_, _, _, _, one_hot_good_out, one_hot_bad_ins_out, one_hot_bad_del_out, one_hot_bad_sub_out, _ = perform(fileInd)
one_hot_good_out, one_hot_bad_ins_out, one_hot_bad_del_out, one_hot_bad_sub_out = allTrainData[fileInd][4],allTrainData[fileInd][5], allTrainData[fileInd][6], allTrainData[fileInd][7]
while(one_hot_good_out == 1):
print fileInd
#print allTrainData[37]
#print type(allTrainData[fileInd][1])
#print type(allTrainData[fileInd][2])
#print type(allTrainData[fileInd][3])


one_hot_good_out = allTrainData[fileInd]
while(one_hot_good_out == -1):
fileInd+=1
#_, _, _, _, one_hot_good_out, one_hot_bad_ins_out, one_hot_bad_del_out, one_hot_bad_sub_out, _ = perform(fileInd)
one_hot_good_out, one_hot_bad_ins_out, one_hot_bad_del_out, one_hot_bad_sub_out = allTrainData[fileInd][4],allTrainData[fileInd][5], allTrainData[fileInd][6], allTrainData[fileInd][7]
one_hot_good_out, one_hot_bad_ins_out, one_hot_bad_del_out, one_hot_bad_sub_out = one_hot(allTrainData[fileInd][0]),one_hot(allTrainData[fileInd][1]), one_hot(allTrainData[fileInd][2]), one_hot(allTrainData[fileInd][3])


if one_hot_good_out != -1:
one_hot_good_out, one_hot_bad_ins_out, one_hot_bad_del_out, one_hot_bad_sub_out = one_hot(allTrainData[fileInd][0]),one_hot(allTrainData[fileInd][1]), one_hot(allTrainData[fileInd][2]), one_hot(allTrainData[fileInd][3])

#for p in range(numGoodOutLeft):
# one_hot_good_out.insert(p, old_one_hot_good_out[len(old_one_hot_good_out)-numGoodOutLeft+p])
Expand Down Expand Up @@ -955,11 +984,11 @@ def initData():
allTrainData = cPickle.load( open( "train_pre_data.txt", "rb" ) )
print "GOT DATA"

sum = 0
for x in allTrainData:
sum += len(x[2])
print sum
print "SUM"
#sum = 0
#for x in allTrainData:
# sum += len(x[2])
#print sum
#print "SUM"
model = Sequential()
model.add(Dense(4, activation='relu', input_shape=(10, 88), batch_size=66))
model.add(Dropout(0.5))
Expand Down
Binary file modified py_mutations_hub.pyc
Binary file not shown.

0 comments on commit 83f9bff

Please sign in to comment.