From ea5658ff32f71d3df030d897a70a6a2aac7f386a Mon Sep 17 00:00:00 2001 From: Pieter Olivier Date: Mon, 18 Nov 2024 08:11:40 +0100 Subject: [PATCH] Improved recovery stats gathering and added R script to process the stats --- .../concrete/recovery/RecoveryTestSupport.rsc | 13 ++++-- .../tests/concrete/recovery/recovery-stats.R | 42 +++++++++++++++++++ 2 files changed, 51 insertions(+), 4 deletions(-) create mode 100644 src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/recovery-stats.R diff --git a/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/RecoveryTestSupport.rsc b/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/RecoveryTestSupport.rsc index 13dee209b2b..fe0961fd5aa 100644 --- a/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/RecoveryTestSupport.rsc +++ b/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/RecoveryTestSupport.rsc @@ -104,7 +104,7 @@ private TestMeasurement testRecovery(&T (value input, loc origin) standardParser FileStats updateStats(FileStats stats, TestMeasurement measurement, int referenceParseTime, int recoverySuccessLimit) { stats.totalParses += 1; - int ratio = referenceParseTime == 0 ? measurement.duration : measurement.duration/referenceParseTime; + int ratio = measurement.duration/referenceParseTime; int parseTimeRatio = ratio == 0 ? 0 : round(log2(ratio)); switch (measurement) { @@ -393,11 +393,15 @@ FileStats testErrorRecovery(loc syntaxFile, str topSort, loc testInput, str inpu standardParser = parser(begin, allowAmbiguity=true, allowRecovery=false); recoveryParser = parser(begin, allowAmbiguity=true, allowRecovery=true); + // Initialization run + standardParser(input, testInput); + + // Timed run int startTime = realTime(); standardParser(input, testInput); - int referenceParseTime = realTime() - startTime; + int referenceParseTime = max(1, realTime() - startTime); - recoverySuccessLimit = size(input)/4; + recoverySuccessLimit = size(input)/4; println("Error recovery of () on , reference parse time: ms."); @@ -429,12 +433,13 @@ TestStats batchRecoveryTest(loc syntaxFile, str topSort, loc dir, str ext, int m fileNr = 0; fromFile = from; + writeFile(statFile, "source,size,result,duration,ratio,disambiguationDuration,errorCount,errorSize\n"); + return runBatchRecoveryTest(syntaxFile, topSort, dir, ext, maxFiles, minFileSize, maxFileSize, statFile, testStats()); } TestStats runBatchRecoveryTest(loc syntaxFile, str topSort, loc dir, str ext, int maxFiles, int minFileSize, int maxFileSize, loc statFile, TestStats cumulativeStats) { println("Batch testing in directory (maxFiles=, maxFileSize=, fromFile=)"); - writeFile(statFile, "source,size,result,duration,ratio,disambiguationDuration,errorCount,errorSize\n"); for (entry <- listEntries(dir)) { loc file = dir + entry; if (isFile(file)) { diff --git a/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/recovery-stats.R b/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/recovery-stats.R new file mode 100644 index 00000000000..142e01608c1 --- /dev/null +++ b/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/recovery-stats.R @@ -0,0 +1,42 @@ +# nolint start: line_length_linter. + +options("width" = 60) + +library("fs") + +input <- path_expand("~/stats/benchmark-stats-2024-11-16-0-5120.txt") +raw_data <- read.csv(input, header = TRUE) + +# Select interesting data subsets +recovery_data <- raw_data[raw_data$result == "recovery",] +error_data <- raw_data[raw_data$result == "error", ] +success_data <- raw_data[raw_data$result == "success", ] + +drop <- c("source", "result") + +recovery_fail_data <- recovery_data[recovery_data$errorSize >= recovery_data$size / 4, ] +recovery_ok_data <- recovery_data[recovery_data$errorSize < recovery_data$size / 4, ] + +# Drop uninteresting columns +recovery <- recovery_data[, !(names(recovery_data) %in% drop)] +error <- error_data[, !(names(error_data) %in% drop)] +success <- success_data[, !(names(success_data) %in% drop)] +recovery_fail <- recovery_fail_data[, !(names(recovery_fail_data) %in% drop)] +recovery_ok <- recovery_ok_data[, !(names(recovery_ok_data) %in% drop)] + +print("Total recovery stats") +summary(recovery) + +print("Successful recovery stats (error size < 25% of file size)") +summary(recovery_ok) + +print("Failed recovery stats (error size >= 25% of file size)") +summary(recovery_fail) + +print("Parse error stats") +summary(error) + +print("Successful parse stats") +summary(success) + +# nolint end