-
Notifications
You must be signed in to change notification settings - Fork 9
/
Copy pathCloneDigger.java
288 lines (240 loc) · 10.8 KB
/
CloneDigger.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.text.SimpleDateFormat;
import java.util.Calendar;
import java.util.ArrayList;
import java.util.List;
import java.io.File;
import java.nio.file.Files;
import java.io.PrintWriter;
import java.io.IOException;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.CommandLineParser;
import org.apache.commons.cli.DefaultParser;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.*;
import org.apache.commons.io.FilenameUtils;
import org.apache.commons.io.FileUtils;
import java.util.concurrent.TimeUnit;
public class CloneDigger {
private static void displayError(ArrayList<String> errorList) {
if (errorList.size() > 0) {
System.out.println("Eclipse API error on the following files:");
}
for (String str : errorList) {
System.out.println(str);
}
}
private static void clearAndSaveConfig(String inputPath, String outputDir) {
String outputFile = outputDir + "config.xml";
File from = new File(inputPath);
File toDir = new File(outputDir);
File toFile = new File(outputFile);
try {
// cleans the directory without deleting it
FileUtils.cleanDirectory(toDir);
Files.copy(from.toPath(), toFile.toPath());
} catch (Exception e) {
System.out.println(e);
System.out.println("Error while copying config to output dir");
System.exit(0);
}
System.out.println("Saved config to output dir:");
System.out.println(outputDir);
}
private static ArrayList<String> loadSimilarityBanList(boolean debug) {
ArrayList<String> banList = new ArrayList<String>();
try {
BufferedReader br = new BufferedReader (
new FileReader("./similarityBanList.txt"));
String line;
while ((line = br.readLine()) != null) {
if (debug == true) {
System.out.println(line);
}
banList.add(line);
}
} catch (IOException e) {
System.out.println("Error while reading similarity ban file\n" + e);
System.exit(0);
}
return banList;
}
public static void main(String args[]) throws IOException {
Options options = new Options();
options.addOption("generateBaseline", true, "generate baseline config file to the provided path");
options.addOption("configPath", true, "configuration xml file path");
CommandLineParser parser = new DefaultParser();
String baseLineOutputPath = null;
String configPath = null;
try {
CommandLine cmd = parser.parse(options, args);
if (cmd.hasOption("configPath")) {
configPath = cmd.getOptionValue("configPath");
}
if (cmd.hasOption("generateBaseline")) {
baseLineOutputPath = cmd.getOptionValue("generateBaseline");
}
} catch (ParseException e) {
System.out.println(e);
}
// generate a baseline config file
if (baseLineOutputPath != null) {
System.out.println("Writing baseline config file..");
ConfigFile.writeBaseline(baseLineOutputPath);
System.out.println("Exiting.");
System.exit(0);
}
// load config file
ConfigFile config = new ConfigFile();
config.loadConfig(configPath);
int gapSize = config.gapSize;
int matchAlgorithm = config.matchAlgorithm;
int matchMode = config.matchMode;
int minNumLines = config.minNumLines;
int meshBlockSize = config.meshBlockSize;
String databaseDir = config.database;
String projectDir = config.project;
String outputDir = config.outputDir;
boolean debug = config.debug;
boolean saveEmpty = config.saveEmpty;
String resultPath = config.resultPath;
boolean exportResults = config.exportResults;
boolean loadResults = config.loadResults;
int similarityRange = config.similarityRange;
boolean enableSimilarity = config.enableSimilarity;
boolean enableRepetitive = config.enableRepetitive;
boolean enableOneMethod = config.enableOneMethod;
boolean buildTFIDF = config.buildTFIDF;
int numberThreads = config.numberThreads;
int minNumberStatements = config.minNumberStatements;
boolean enablePercentageMatching = config.enablePercentageMatching;
boolean forceRetokenization = config.forceRetokenization;
boolean loadDatabaseFilePaths = config.loadDatabaseFilePaths;
int aprioriMinSupport = config.aprioriMinSupport;
boolean enableQuery = config.enableQuery;
ArrayList<String> banListSim = loadSimilarityBanList(debug);
// done parsing
System.out.println("Finished parsing XML parameters");
// clear existing output and save the config to output dir
clearAndSaveConfig(configPath, outputDir);
// Get current time
SimpleDateFormat sdf = new SimpleDateFormat("hh:mm:ss a");
Calendar cal = Calendar.getInstance();
System.out.println("Start @ " + sdf.format(cal.getTime()) );
// Measure elapsed time
long startTime = System.nanoTime();
// Cached list of files from the database
String databaseFilePaths = databaseDir + "cachedList.tmp";
File f = new File(databaseFilePaths);
List<String> databaseFileList;
// Check if an existing cache file exists
if (f.exists() && !f.isDirectory() && loadDatabaseFilePaths == true) {
// exist, load it
System.out.println("Path file exists");
databaseFileList = Database.loadFileList(databaseFilePaths);
} else {
// doesn't exist or forced to create new path file, create it
System.out.println("Path file doesn't exist or specified to regen path file");
databaseFileList = Database.generateFileList(databaseDir, "cachedList.tmp");
}
// td-idf
/*
TermFrequency termFreq = new TermFrequency();
if (buildTFIDF == true) {
termFreq.buildFrequencyMap(databaseDir);
} else {
//termFreq.loadFrequencyMap();
}*/
// Start loading main content
ArrayList<String> errorList = new ArrayList<String>();
Output output = new Output(matchAlgorithm, enableRepetitive, enableOneMethod,
matchMode, outputDir, minNumberStatements, debug, enablePercentageMatching);
if (matchMode == 1) {
// full mesh comparison
if (loadResults == false) {
System.out.println("Mode: full mesh");
ArrayList<Text> database_TextList = new ArrayList<Text>();
// build the database
ArrayList<String> temp = Database.constructCache(
minNumLines, debug, databaseFileList, databaseDir, forceRetokenization);
errorList.addAll(temp);
// Capture time
cal = Calendar.getInstance();
System.out.println("Start comparison @ " + sdf.format(cal.getTime()) );
// perform the comparison
// full mesh requires both dir paths to be the same
Compare comp = new Compare(minNumLines, databaseDir, databaseDir);
comp.installTextFiles(databaseFileList);
comp.compareMeshed(output, matchAlgorithm, gapSize, meshBlockSize);
if (exportResults) {
output.saveResults(resultPath);
}
} else {
output.loadResults(resultPath);
}
// enable the query engine
if (enableQuery) {
output.search(outputDir);
}
output.printResults(saveEmpty, similarityRange, enableSimilarity, matchMode, debug, banListSim);
/*
// Frequency Map of all terms
FrequencyMap fMap = new FrequencyMap(aprioriMinSupport);
output.processOutputTerms(fMap);
fMap.exportTable("table.txt");
*/
} else {
// Between comparison
if (loadResults == false) {
// between comparison
System.out.println("Mode: between comparison");
ArrayList<Text> db_TextList = new ArrayList<Text>();
ArrayList<Text> project_TextList = new ArrayList<Text>();
List<String> projectFilePaths = Database.getFileList(projectDir);
ArrayList<String> temp;
temp = Database.constructCache(minNumLines, debug, projectFilePaths, projectDir, true);
errorList.addAll(temp);
if (loadDatabaseFilePaths == false) {
temp = Database.constructCache(minNumLines, debug, databaseFileList, databaseDir, forceRetokenization);
}
errorList.addAll(temp);
// only load the projects into memory
System.out.println("\nLoading a total of " + projectFilePaths.size() +
" cached project files from \n" + projectDir);
Database.loadCache(project_TextList, debug, projectFilePaths, projectDir);
// Capture time
cal = Calendar.getInstance();
System.out.println("Start comparison @ " + sdf.format(cal.getTime()) );
// perform the comparison
Compare comp = new Compare(minNumLines, databaseDir, projectDir);
comp.installTextFiles(project_TextList, databaseFileList);
output = comp.compareBetween(output, matchAlgorithm, gapSize, numberThreads);
if (exportResults) {
output.saveResults(resultPath);
}
} else {
output.loadResults(resultPath);
}
output.printResults(saveEmpty, similarityRange, enableSimilarity, matchMode, debug, banListSim);
}
// Display all the errors
displayError(errorList);
// Display and save elapsed time
long endTime = System.nanoTime();
long elapsedTime = endTime - startTime;
String msg = ("Elapsed for " +
TimeUnit.MINUTES.convert(elapsedTime, TimeUnit.NANOSECONDS) +
" minutes");
System.out.println(msg);
// save elapsed time
PrintWriter writer = new PrintWriter(outputDir + "log", "UTF-8");
writer.println(msg);
writer.close();
// Display finish time
Calendar cal2 = Calendar.getInstance();
System.out.println("Finish @ " + sdf.format(cal2.getTime()) );
System.out.println("graceful exit...");
}
}