Skip to content

Commit

Permalink
Improved recovery stats gathering and added R script to process the s…
Browse files Browse the repository at this point in the history
…tats
  • Loading branch information
PieterOlivier committed Nov 18, 2024
1 parent e8bed22 commit ea5658f
Show file tree
Hide file tree
Showing 2 changed files with 51 additions and 4 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ private TestMeasurement testRecovery(&T (value input, loc origin) standardParser
FileStats updateStats(FileStats stats, TestMeasurement measurement, int referenceParseTime, int recoverySuccessLimit) {
stats.totalParses += 1;

int ratio = referenceParseTime == 0 ? measurement.duration : measurement.duration/referenceParseTime;
int ratio = measurement.duration/referenceParseTime;
int parseTimeRatio = ratio == 0 ? 0 : round(log2(ratio));
switch (measurement) {
Expand Down Expand Up @@ -393,11 +393,15 @@ FileStats testErrorRecovery(loc syntaxFile, str topSort, loc testInput, str inpu
standardParser = parser(begin, allowAmbiguity=true, allowRecovery=false);
recoveryParser = parser(begin, allowAmbiguity=true, allowRecovery=true);

// Initialization run
standardParser(input, testInput);

// Timed run
int startTime = realTime();
standardParser(input, testInput);
int referenceParseTime = realTime() - startTime;
int referenceParseTime = max(1, realTime() - startTime);

recoverySuccessLimit = size(input)/4;
recoverySuccessLimit = size(input)/4;
println("Error recovery of <syntaxFile> (<topSort>) on <testInput>, reference parse time: <referenceParseTime> ms.");
Expand Down Expand Up @@ -429,12 +433,13 @@ TestStats batchRecoveryTest(loc syntaxFile, str topSort, loc dir, str ext, int m
fileNr = 0;
fromFile = from;
writeFile(statFile, "source,size,result,duration,ratio,disambiguationDuration,errorCount,errorSize\n");
return runBatchRecoveryTest(syntaxFile, topSort, dir, ext, maxFiles, minFileSize, maxFileSize, statFile, testStats());
}
TestStats runBatchRecoveryTest(loc syntaxFile, str topSort, loc dir, str ext, int maxFiles, int minFileSize, int maxFileSize, loc statFile, TestStats cumulativeStats) {
println("Batch testing in directory <dir> (maxFiles=<maxFiles>, maxFileSize=<maxFileSize>, fromFile=<fromFile>)");
writeFile(statFile, "source,size,result,duration,ratio,disambiguationDuration,errorCount,errorSize\n");
for (entry <- listEntries(dir)) {
loc file = dir + entry;
if (isFile(file)) {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
# nolint start: line_length_linter.

options("width" = 60)

library("fs")

input <- path_expand("~/stats/benchmark-stats-2024-11-16-0-5120.txt")
raw_data <- read.csv(input, header = TRUE)

# Select interesting data subsets
recovery_data <- raw_data[raw_data$result == "recovery",]
error_data <- raw_data[raw_data$result == "error", ]
success_data <- raw_data[raw_data$result == "success", ]

drop <- c("source", "result")

recovery_fail_data <- recovery_data[recovery_data$errorSize >= recovery_data$size / 4, ]
recovery_ok_data <- recovery_data[recovery_data$errorSize < recovery_data$size / 4, ]

# Drop uninteresting columns
recovery <- recovery_data[, !(names(recovery_data) %in% drop)]
error <- error_data[, !(names(error_data) %in% drop)]
success <- success_data[, !(names(success_data) %in% drop)]
recovery_fail <- recovery_fail_data[, !(names(recovery_fail_data) %in% drop)]
recovery_ok <- recovery_ok_data[, !(names(recovery_ok_data) %in% drop)]

print("Total recovery stats")
summary(recovery)

print("Successful recovery stats (error size < 25% of file size)")
summary(recovery_ok)

print("Failed recovery stats (error size >= 25% of file size)")
summary(recovery_fail)

print("Parse error stats")
summary(error)

print("Successful parse stats")
summary(success)

# nolint end

0 comments on commit ea5658f

Please sign in to comment.