From 5ffc8e3bbedf5bb82e46a6b71b7f1223657fb046 Mon Sep 17 00:00:00 2001
From: Kait Lam <39479354+katrinafyi@users.noreply.github.com>
Date: Thu, 8 Aug 2024 15:52:13 +1000
Subject: [PATCH] add markdown output for github actions display (#224)
add markdown summary, histogram and mean/stdev running time to results table
---
.github/workflows/run-examples.yml | 28 +++-
.gitignore | 5 +-
src/test/scala/SystemTests.scala | 203 +++++++++++++++++++++++++----
3 files changed, 208 insertions(+), 28 deletions(-)
diff --git a/.github/workflows/run-examples.yml b/.github/workflows/run-examples.yml
index 472cc3bd0..55e421e58 100644
--- a/.github/workflows/run-examples.yml
+++ b/.github/workflows/run-examples.yml
@@ -46,10 +46,32 @@ jobs:
- name: System Tests
run: ./mill test.testOnly '*SystemTests*' || true
-
- uses: actions/upload-artifact@v4
with:
name: testresult-${{ github.run_number }}
- path: src/test/*.csv
+ path: |
+ src/test/*.csv
+ src/test/*.svg
+ - run: |
+ pushd src/test
+ tail -n+1 summary-*.csv
+ pasted="$(paste headers.md.part summary-*.md.part)"
+ for part in summary-*.md.part; do
+ # basename, then everything after "summary-", then everything before ".md.part" (via two rev passes)
+ testname="$(basename $part | cut -d- -f2- | rev | cut -d. -f3- | rev)"
+ [[ -n "$testname" ]]
+
+ svg="verifyTime-$testname.svg"
+ ls -l "$svg"
+
+ # 1920 hours = 80 days
+ url="$(curl -F"file=@$svg" -Fexpires=1920 http://0x0.st)"
+ [[ -n "$url" ]]
+
+ pasted="$(echo "$pasted" | sed "s#HISTO${testname}HISTO#$url#g")"
+ done
+ popd
+
+ echo "$pasted" > $GITHUB_STEP_SUMMARY
+ shell: "bash -xe {0}"
- - run: tail -n+1 src/test/summary-*.csv
diff --git a/.gitignore b/.gitignore
index f7b512ec5..5a90429e1 100644
--- a/.gitignore
+++ b/.gitignore
@@ -23,4 +23,7 @@ examplesold/
src/test/scala/dump/
src/test/analysis/dump/
*.gtirb
-*.json
\ No newline at end of file
+*.json
+src/test/*.svg
+src/test/*.md.part
+src/test/summary-*.md
diff --git a/src/test/scala/SystemTests.scala b/src/test/scala/SystemTests.scala
index 2d1f4d677..26ccf1c05 100644
--- a/src/test/scala/SystemTests.scala
+++ b/src/test/scala/SystemTests.scala
@@ -1,7 +1,7 @@
import org.scalatest.funsuite.AnyFunSuite
import util.{Logger, PerformanceTimer}
-
+import Numeric.Implicits._
import java.io.{BufferedWriter, File, FileWriter}
import scala.collection.mutable
import scala.io.Source
@@ -11,6 +11,10 @@ import scala.sys.process.*
* directory structure and file-name patterns.
*/
+
+ case class TestConfig(val boogieFlags:Seq[String] = Seq("/timeLimit:10", "/useArrayAxioms"), val basilFlags:Seq[String] = Seq(), val useBAPFrontend: Boolean=true, val expectVerify: Boolean=true)
+
+
trait SystemTests extends AnyFunSuite {
val testPath = "./src/test/"
val correctPath = "./src/test/correct"
@@ -28,23 +32,28 @@ trait SystemTests extends AnyFunSuite {
val testResults: mutable.ArrayBuffer[(String, TestResult)] = mutable.ArrayBuffer()
- def runTests(programs: Array[String], path: String, name: String, shouldVerify: Boolean, useADT: Boolean): Unit = {
+ def runTests(programs: Array[String], path: String, name: String, conf: TestConfig): Unit = {
// get all variations of each program
- val testSuffix = if useADT then ":BAP" else ":GTIRB"
+ val testSuffix = if conf.useBAPFrontend then ":BAP" else ":GTIRB"
for (p <- programs) {
val programPath = path + "/" + p
val variations = getSubdirectories(programPath)
variations.foreach(t =>
test(name + "/" + p + "/" + t + testSuffix) {
- runTest(path, p, t, shouldVerify, useADT)
+ runTest(path, p, t, conf)
}
)
}
}
+ /**
+ * Writes test result data into .csv and .md files named according to given filename.
+ */
def summary(filename: String): Unit = {
val csv: String = "testCase," + TestResult.csvHeader + System.lineSeparator() + testResults.map(r => s"${r._1},${r._2.toCsv}").mkString(System.lineSeparator())
- log(csv, testPath + "full-" + filename)
+ log(csv, testPath + "full-" + filename + ".csv")
+
+ val verifTimes = testResults.map(_._2.verifyTime.toDouble)
val numVerified = testResults.count(_._2.verified)
val numCounterexample = testResults.count(x => !x._2.verified && !x._2.timedOut)
@@ -53,6 +62,10 @@ trait SystemTests extends AnyFunSuite {
val numTimeout = testResults.count(_._2.timedOut)
val verifying = testResults.filter(x => !x._2.timedOut && x._2.verified).map(_._2.verifyTime)
val counterExamples = testResults.filter(x => !x._2.timedOut && !x._2.verified).map(_._2.verifyTime)
+ val medianVerifyTime = median(verifTimes)
+ val meanVerifyTime = mean(verifTimes)
+ val stdDevVerifyTime = stdDev(verifTimes)
+
info(s"Test summary: $numSuccess succeeded, $numFail failed: $numVerified verified, $numCounterexample did not verify (including $numTimeout timeouts).")
if (verifying.nonEmpty)
@@ -60,31 +73,85 @@ trait SystemTests extends AnyFunSuite {
if (counterExamples.nonEmpty)
info(s"Average time to counterexample: ${counterExamples.sum / counterExamples.size}")
- val summaryHeader = "passedCount,failedCount,verifiedCount,counterexampleCount,timeoutCount,verifyTotalTime,counterexampleTotalTime"
- val summaryRow = s"$numSuccess,$numFail,$numVerified,${counterExamples.size},$numTimeout,${verifying.sum},${counterExamples.sum}"
- log(summaryHeader + System.lineSeparator() + summaryRow, testPath + "summary-" + filename)
+ val summaryMap = collection.immutable.ListMap(
+ "passedCount" -> numSuccess,
+ "failedCount" -> numFail,
+ "verifiedCount" -> numVerified,
+ "counterexampleCount" -> counterExamples.size,
+ "timeoutCount" -> numTimeout,
+ "verifyTotalTime" -> verifying.sum,
+ "counterexampleTotalTime" -> counterExamples.sum,
+ "meanVerifyTime" -> meanVerifyTime.toInt,
+ "medianVerifyTime" -> medianVerifyTime.toInt,
+ "stdDevVerifyTime" -> stdDevVerifyTime.toInt,
+ )
+ val summaryHeader = summaryMap.keys.mkString(",") + System.lineSeparator
+ val summaryRow = summaryMap.values.mkString(",") + System.lineSeparator
+ log(summaryHeader + summaryRow, testPath + "summary-" + filename + ".csv")
+
+ // generates a verification time histogram
+ val minB = Seq(0, meanVerifyTime - 2.25 * stdDevVerifyTime, verifTimes.min).max
+ val maxB = Seq(meanVerifyTime + 2.25 * stdDevVerifyTime, verifTimes.max).min
+ val nbins = 50
+
+ val histo = histogram(nbins, Some(minB, maxB))(verifTimes.toSeq)
+ val svgHistogram = histoToSvg(filename, 400,300, histo, minB, maxB)
+ log(svgHistogram, testPath + "verifyTime-" + filename + ".svg")
+
+
+ // generates a markdown table in separate parts.
+ // the complete markdown file can be constructed by horizontal (line-wise)
+ // concatenation of leftMarkdown and one or more partMarkdown.
+ val mdMap = summaryMap + ("VerifyTimeHistogram" -> ("![](" + "HISTO" + filename + "HISTO" + ")"))
+ val leftMarkdown =
+ s"""
+ || Metric |
+ ||--------|
+ |""".stripMargin
+ + mdMap.map((k,_) => s"| $k |${System.lineSeparator}").mkString
+
+ val partMarkdown =
+ s"""
+ | $filename |
+ |-------|
+ |""".stripMargin
+ + mdMap.map((k,v) => s" $v |${System.lineSeparator}").mkString
+
+ val summaryMarkdown = leftMarkdown.linesIterator
+ .zip(partMarkdown.linesIterator)
+ .map(_++_)
+ .mkString("", System.lineSeparator, System.lineSeparator)
+
+ log(partMarkdown, testPath + "summary-" + filename + ".md.part")
+ log(leftMarkdown, testPath + "headers.md.part") // XXX likely not thread-safe
+ log(summaryMarkdown, testPath + "summary-" + filename + ".md")
}
- def runTest(path: String, name: String, variation: String, shouldVerify: Boolean, useADT: Boolean): Unit = {
+ def runTest(path: String, name: String, variation: String, conf: TestConfig): Unit = {
+ val shouldVerify = conf.expectVerify
+ val useBAPFrontend = conf.useBAPFrontend
+
val directoryPath = path + "/" + name + "/"
val variationPath = directoryPath + variation + "/" + name
val specPath = directoryPath + name + ".spec"
- val outPath = if useADT then variationPath + "_bap.bpl" else variationPath + "_gtirb.bpl"
- val inputPath = if useADT then variationPath + ".adt" else variationPath + ".gts"
+ val outPath = if useBAPFrontend then variationPath + "_bap.bpl" else variationPath + "_gtirb.bpl"
+ val inputPath = if useBAPFrontend then variationPath + ".adt" else variationPath + ".gts"
val RELFPath = variationPath + ".relf"
Logger.info(outPath)
val timer = PerformanceTimer(s"test $name/$variation")
- val args = mutable.ArrayBuffer("--input", inputPath, "--relf", RELFPath, "--output", outPath)
+ val args = mutable.ArrayBuffer("--input", inputPath, "--relf", RELFPath, "--output", outPath) ++ conf.basilFlags
if (File(specPath).exists) args ++= Seq("--spec", specPath)
Main.main(args.toArray)
val translateTime = timer.checkPoint("translate-boogie")
Logger.info(outPath + " done")
val extraSpec = List.from(File(directoryPath).listFiles()).map(_.toString).filter(_.endsWith(".bpl")).filterNot(_.endsWith(outPath))
- val boogieResult = (Seq("boogie", "/timeLimit:10", "/printVerifiedProceduresCount:0", "/useArrayAxioms", outPath) ++ extraSpec).!!
+ val boogieCmd = (Seq("boogie", "/printVerifiedProceduresCount:0") ++ conf.boogieFlags ++ Seq(outPath) ++ extraSpec)
+ Logger.info(s"Verifying... ${boogieCmd.mkString(" ")}")
+ val boogieResult = boogieCmd.!!
val verifyTime = timer.checkPoint("verify")
- val resultPath = if useADT then variationPath + "_bap_result.txt" else variationPath + "_gtirb_result.txt"
+ val resultPath = if useBAPFrontend then variationPath + "_bap_result.txt" else variationPath + "_gtirb_result.txt"
log(boogieResult, resultPath)
val verified = boogieResult.strip().equals("Boogie program verifier finished with 0 errors")
val proveFailed = boogieResult.contains("could not be proved")
@@ -101,7 +168,7 @@ trait SystemTests extends AnyFunSuite {
case (false, true, true) => "Expected verification success, but got failure."
}
- val expectedOutPath = if useADT then variationPath + ".expected" else variationPath + "_gtirb.expected"
+ val expectedOutPath = if useBAPFrontend then variationPath + ".expected" else variationPath + "_gtirb.expected"
val hasExpected = File(expectedOutPath).exists
var matchesExpected = true
if (hasExpected) {
@@ -149,7 +216,10 @@ trait SystemTests extends AnyFunSuite {
* the names all subdirectories of the given parent directory
*/
def getSubdirectories(directoryName: String): Array[String] = {
- File(directoryName).listFiles.filter(_.isDirectory).map(_.getName)
+ Option(File(directoryName).listFiles(_.isDirectory)) match {
+ case None => throw java.io.IOException(s"failed to read directory '$directoryName'")
+ case Some(subdirs) => subdirs.map(_.getName)
+ }
}
def log(text: String, path: String): Unit = {
@@ -161,18 +231,103 @@ trait SystemTests extends AnyFunSuite {
}
+
+
+def histoToSvg(title: String, imgWidth: Int, imgHeight: Int, bins: List[Int], minBin: Double, maxBin: Double) : String = {
+ def template(width: Int = 300, height: Int = 130, content: String) =
+ s""" """
+ def mkRect(width: Int, height: Int, x: Int, y: Int, crx: Int=0, cry: Int=0, fill: String="Black") = {
+ s""""""
+ }
+ def text(content: String, x: Int, y: Int, cssClass: String = "small") = {
+ s"""$content"""
+ }
+
+
+ val leftMargin = 20
+ val histWidth = imgWidth - leftMargin
+ val bottomMargin = 20
+ val topMargin = 20
+ val histHeight = imgHeight - topMargin - bottomMargin
+ val maxHeight = bins.max
+ val binWidth : Double = (histWidth).doubleValue / bins.size
+ val heightScaling : Double = (histHeight.doubleValue)/(maxHeight)
+ val binPos = (0 to bins.size).map(i => (leftMargin + i * binWidth, binWidth * (i + 1)))
+ .zip(bins.map(bh => heightScaling * bh))
+
+ val rects = binPos.map((binXX, height) =>
+ mkRect(binWidth.ceil.intValue, height.intValue, binXX._1.floor.intValue, histHeight.intValue - height.intValue + topMargin))
+
+ val labels = {
+ (text(title, imgWidth / 8, topMargin - 5),
+ text("0", 0, histHeight + topMargin),
+ text(maxHeight.toInt.toString, 0, topMargin),
+ text(minBin.toInt.toString, 0, imgHeight),
+ text(maxBin.toInt.toString, (binWidth*(bins.size)).intValue - leftMargin, imgHeight))
+ }
+
+ val bg = mkRect(imgWidth, imgHeight, 0, 0, fill="White")
+
+ val content = (Seq(bg) ++ rects ++ labels.toList).mkString("\n")
+ template(imgWidth, imgHeight, content)
+}
+
+
+def loadHisto() = {
+ val source = scala.io.Source.fromFile("src/test/full-testresult-GTIRB.csv").getLines().toList
+ val headers = source.head.split(",")
+
+ val res = headers.map(h => h -> mutable.ArrayBuffer[String]()).toMap[String, mutable.ArrayBuffer[String]]
+
+ source.tail.map(line => {
+ val cols = line.split(",")
+ headers.zip(cols).foreach((h,v) => res(h).append(v))
+ })
+
+ val timeValues = res("verifyTime").map(_.toDouble)
+ val histo = histogram(50, Some(800.0, 1000.0))(timeValues.toSeq)
+ println(histoToSvg("test histogram", 500, 300, histo, 800.0, 1000.0))
+}
+
+
+
class SystemTestsBAP extends SystemTests {
- runTests(correctPrograms, correctPath, "correct", true, true)
- runTests(incorrectPrograms, incorrectPath, "incorrect", false, true)
- test("summary") {
- summary("testresult-BAP.csv")
+ runTests(correctPrograms, correctPath, "correct", TestConfig(useBAPFrontend=true, expectVerify=true))
+ runTests(incorrectPrograms, incorrectPath, "incorrect", TestConfig(useBAPFrontend=true, expectVerify=false))
+ test("summarybap") {
+ summary("testresult-BAP")
}
}
class SystemTestsGTIRB extends SystemTests {
- runTests(correctPrograms, correctPath, "correct", true, false)
- runTests(incorrectPrograms, incorrectPath, "incorrect", false, false)
- test("summary") {
- summary("testresult-GTIRB.csv")
+ runTests(correctPrograms, correctPath, "correct", TestConfig(useBAPFrontend=false, expectVerify=true))
+ runTests(incorrectPrograms, incorrectPath, "incorrect", TestConfig(useBAPFrontend=false, expectVerify=false))
+ test("summarygtirb") {
+ summary("testresult-GTIRB")
}
}
+
+
+def mean(xs: Iterable[Double]): Double = xs.sum.toDouble / xs.size
+
+def variance(xs: Iterable[Double]): Double = {
+ val avg = mean(xs)
+
+ xs.map(a => math.pow(a - avg, 2)).sum / xs.size
+}
+
+def median(xs: Iterable[Double]) = xs.toArray.sorted.apply(xs.size / 2)
+
+def stdDev(xs: Iterable[Double]): Double = math.sqrt(variance(xs))
+
+
+def histogram(numBins: Int, bounds: Option[(Double, Double)] = None)(xs: Seq[Double]) : List[Int] = {
+ val (mn, mx) = bounds.getOrElse(xs.min, xs.max)
+ val binSize = ((mx - mn) / numBins) * (1.000001)
+ val counts = (0 to numBins).map(x => (mn + x * binSize, mn + (x + 1) * binSize))
+ .map((left, right) => xs.count(x => x >= left && x < right))
+ .toList
+ counts
+}