From 4a692d958cda25a952f484d7714c330f42c29866 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Piotr=20Ko=C5=82aczkowski?= <pkolaczk@gmail.com>
Date: Fri, 5 Jul 2024 22:00:14 +0200
Subject: [PATCH] New command: `latte list` for tabular listing of run results

This option is intended for quick comparisons of multiple
benchmarks or for finding the right benchmark.
The command accepts basic filtering by workload or tags.

Additionally, the speed of loading reports has been
significantly improved.

This feature will be enhanced in the future.
---
 Cargo.lock    |   5 +-
 Cargo.toml    |   1 +
 src/config.rs |  27 +++++++++
 src/main.rs   |  85 +++++++++++++++++++++++++--
 src/report.rs | 146 +++++++++++++++++++++++++++++++++++++++--------
 src/table.rs  | 155 ++++++++++++++++++++++++++++++++++++++++++++++++++
 6 files changed, 389 insertions(+), 30 deletions(-)
 create mode 100644 src/table.rs
diff --git a/Cargo.lock b/Cargo.lock
index 3fafe35..1ab0af0 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1063,6 +1063,7 @@ dependencies = [
  "tracing-subscriber",
  "try-lock",
  "uuid",
+ "walkdir",
 ]
 
 [[package]]
@@ -2451,9 +2452,9 @@ checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f"
 
 [[package]]
 name = "walkdir"
-version = "2.4.0"
+version = "2.5.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d71d857dc86794ca4c280d616f7da00d2dbfd8cd788846559a6813e6aa4b54ee"
+checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b"
 dependencies = [
  "same-file",
  "winapi-util",
diff --git a/Cargo.toml b/Cargo.toml
index b27e7ce..a0d4aad 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -54,6 +54,7 @@ tracing = "0.1"
 tracing-subscriber = "0.3"
 try-lock = "0.2.3"
 uuid = { version = "1.1", features = ["v4"] }
+walkdir = "2"
 
 [dev-dependencies]
 tokio = { version = "1", features = ["rt", "test-util", "macros"] }
diff --git a/src/config.rs b/src/config.rs
index a1501bc..a8247eb 100644
--- a/src/config.rs
+++ b/src/config.rs
@@ -465,6 +465,29 @@ impl RunCommand {
     }
 }
 
+#[derive(Parser, Debug)]
+pub struct ListCommand {
+    /// Lists only the runs of specified workload.
+    #[clap()]
+    pub workload: Option<String>,
+
+    /// Lists only the runs of given function.
+    #[clap(long, short('f'))]
+    pub function: Option<String>,
+
+    /// Lists only the runs with specified tags.
+    #[clap(long("tag"), number_of_values = 1)]
+    pub tags: Vec<String>,
+
+    /// Path to JSON reports directory where the JSON reports were written to.
+    #[clap(long, short('o'), long, default_value = ".", number_of_values = 1)]
+    pub output: Vec<PathBuf>,
+
+    /// Descends into subdirectories recursively.
+    #[clap(short('r'), long)]
+    pub recursive: bool,
+}
+
 #[derive(Parser, Debug)]
 pub struct ShowCommand {
     /// Path to the JSON report file
@@ -545,6 +568,10 @@ pub enum Command {
     /// Additionally dumps all data into a JSON report file.
     Run(RunCommand),
 
+    /// Lists benchmark reports saved in the current or specified directory
+    /// with summaries of their results.
+    List(ListCommand),
+
     /// Displays the report(s) of previously executed benchmark(s).
     ///
     /// Can compare two runs.
diff --git a/src/main.rs b/src/main.rs
index b1ec1d2..13ab787 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -1,4 +1,5 @@
 use std::env;
+use std::ffi::OsStr;
 use std::fs::File;
 use std::io::{stdout, Write};
 use std::path::{Path, PathBuf};
@@ -6,18 +7,21 @@ use std::process::exit;
 use std::time::Duration;
 
 use clap::Parser;
+use config::RunCommand;
+use futures::stream::FuturesUnordered;
+use futures::StreamExt;
 use hdrhistogram::serialization::interval_log::Tag;
 use hdrhistogram::serialization::{interval_log, V2DeflateSerializer};
 use itertools::Itertools;
 use rune::Source;
 use search_path::SearchPath;
 use tokio::runtime::{Builder, Runtime};
-
-use config::RunCommand;
+use tokio::task::spawn_blocking;
+use walkdir::WalkDir;
 
 use crate::config::{
-    AppConfig, Command, ConnectionConf, EditCommand, HdrCommand, Interval, LoadCommand,
-    SchemaCommand, ShowCommand,
+    AppConfig, Command, ConnectionConf, EditCommand, HdrCommand, Interval, ListCommand,
+    LoadCommand, SchemaCommand, ShowCommand,
 };
 use crate::context::*;
 use crate::context::{CassError, CassErrorKind, Context, SessionStats};
@@ -26,9 +30,10 @@ use crate::error::{LatteError, Result};
 use crate::exec::{par_execute, ExecutionOptions};
 use crate::plot::plot_graph;
 use crate::progress::Progress;
-use crate::report::{Report, RunConfigCmp};
+use crate::report::{PathAndSummary, Report, RunConfigCmp};
 use crate::sampler::Sampler;
 use crate::stats::{BenchmarkCmp, BenchmarkStats, Recorder};
+use crate::table::{Alignment, Table};
 use crate::workload::{FnRef, Program, Workload, WorkloadStats, LOAD_FN};
 
 mod config;
@@ -42,6 +47,7 @@ mod progress;
 mod report;
 mod sampler;
 mod stats;
+mod table;
 mod workload;
 
 const VERSION: &str = env!("CARGO_PKG_VERSION");
@@ -301,6 +307,74 @@ async fn run(conf: RunCommand) -> Result<()> {
     Ok(())
 }
 
+async fn list(conf: ListCommand) -> Result<()> {
+    let max_depth = if conf.recursive { usize::MAX } else { 1 };
+
+    // Loading reports is a bit slow, so we do it in parallel:
+    let mut report_futures = FuturesUnordered::new();
+    for path in &conf.output {
+        let walk = WalkDir::new(path).max_depth(max_depth);
+        for entry in walk.into_iter().flatten() {
+            if !entry.file_type().is_file() {
+                continue;
+            }
+            if entry.path().extension() != Some(OsStr::new("json")) {
+                continue;
+            }
+
+            let path = entry.path().to_path_buf();
+            report_futures.push(spawn_blocking(move || (path.clone(), Report::load(&path))));
+        }
+    }
+
+    let mut reports = Vec::new();
+    while let Some(report) = report_futures.next().await {
+        match report.unwrap() {
+            (path, Ok(report)) if should_list(&report, &conf) => {
+                reports.push(PathAndSummary(path, report.summary()))
+            }
+            (path, Err(e)) => eprintln!("Failed to load report {}: {}", path.display(), e),
+            _ => {}
+        };
+    }
+
+    if !reports.is_empty() {
+        reports
+            .sort_unstable_by_key(|s| (s.1.workload.clone(), s.1.function.clone(), s.1.timestamp));
+        let mut table = Table::new(PathAndSummary::COLUMNS);
+        table.align(7, Alignment::Right);
+        table.align(8, Alignment::Right);
+        table.align(9, Alignment::Right);
+        for r in reports {
+            table.push(r);
+        }
+        println!("{}", table);
+    }
+    Ok(())
+}
+
+fn should_list(report: &Report, conf: &ListCommand) -> bool {
+    if let Some(workload_pattern) = &conf.workload {
+        if !report
+            .conf
+            .workload
+            .to_string_lossy()
+            .contains(workload_pattern)
+        {
+            return false;
+        }
+    }
+    if let Some(function) = &conf.function {
+        if report.conf.function != *function {
+            return false;
+        }
+    }
+    if !conf.tags.is_empty() && !conf.tags.iter().any(|t| report.conf.tags.contains(t)) {
+        return false;
+    }
+    true
+}
+
 async fn show(conf: ShowCommand) -> Result<()> {
     let report1 = load_report_or_abort(&conf.report);
     let report2 = conf.baseline.map(|p| load_report_or_abort(&p));
@@ -376,6 +450,7 @@ async fn async_main(command: Command) -> Result<()> {
         Command::Schema(config) => schema(config).await?,
         Command::Load(config) => load(config).await?,
         Command::Run(config) => run(config).await?,
+        Command::List(config) => list(config).await?,
         Command::Show(config) => show(config).await?,
         Command::Hdr(config) => export_hdr_log(config).await?,
         Command::Plot(config) => plot_graph(config).await?,
diff --git a/src/report.rs b/src/report.rs
index cfed4f9..46d242d 100644
--- a/src/report.rs
+++ b/src/report.rs
@@ -1,23 +1,23 @@
-use core::fmt;
-use std::collections::BTreeSet;
-use std::fmt::{Display, Formatter};
-use std::num::NonZeroUsize;
-use std::path::Path;
-use std::{fs, io};
-
-use chrono::{Local, TimeZone};
+use crate::config::{RunCommand, PRINT_RETRY_ERROR_LIMIT};
+use crate::stats::{
+    BenchmarkCmp, BenchmarkStats, Bucket, Mean, Percentile, Sample, Significance, TimeDistribution,
+};
+use crate::table::Row;
+use chrono::{DateTime, Local, TimeZone};
 use console::{pad_str, style, Alignment};
+use core::fmt;
 use err_derive::*;
 use itertools::Itertools;
 use serde::{Deserialize, Serialize};
 use statrs::statistics::Statistics;
+use std::collections::BTreeSet;
+use std::fmt::{Display, Formatter};
+use std::io::{BufReader, BufWriter};
+use std::num::NonZeroUsize;
+use std::path::{Path, PathBuf};
+use std::{fs, io};
 use strum::IntoEnumIterator;
 
-use crate::config::{RunCommand, PRINT_RETRY_ERROR_LIMIT};
-use crate::stats::{
-    BenchmarkCmp, BenchmarkStats, Bucket, Mean, Percentile, Sample, Significance, TimeDistribution,
-};
-
 /// A standard error is multiplied by this factor to get the error margin.
 /// For a normally distributed random variable,
 /// this should give us 0.999 confidence the expected value is within the (result +- error) range.
@@ -53,16 +53,47 @@ impl Report {
     /// Loads benchmark results from a JSON file
     pub fn load(path: &Path) -> Result<Report, ReportLoadError> {
         let file = fs::File::open(path)?;
-        let report = serde_json::from_reader(file)?;
+        let reader = BufReader::new(file);
+        let report = serde_json::from_reader(reader)?;
         Ok(report)
     }
 
     /// Saves benchmark results to a JSON file
     pub fn save(&self, path: &Path) -> io::Result<()> {
         let f = fs::File::create(path)?;
-        serde_json::to_writer_pretty(f, &self)?;
+        let writer = BufWriter::new(f);
+        serde_json::to_writer_pretty(writer, &self)?;
         Ok(())
     }
+
+    pub fn summary(&self) -> Summary {
+        Summary {
+            workload: self.conf.workload.clone(),
+            function: self.conf.function.clone(),
+            timestamp: self
+                .conf
+                .timestamp
+                .and_then(|ts| Local.timestamp_opt(ts, 0).latest()),
+            tags: self.conf.tags.clone(),
+            params: self.conf.params.clone(),
+            rate: self.conf.rate,
+            throughput: self.result.cycle_throughput.value,
+            latency_p50: self
+                .result
+                .cycle_time_ms
+                .percentiles
+                .get(Percentile::P50 as usize)
+                .unwrap()
+                .value,
+            latency_p99: self
+                .result
+                .cycle_time_ms
+                .percentiles
+                .get(Percentile::P99 as usize)
+                .unwrap()
+                .value,
+        }
+    }
 }
 
 /// A displayable, optional value with an optional error.
@@ -436,14 +467,7 @@ impl RunConfigCmp<'_> {
     }
 
     fn format_time(&self, conf: &RunCommand, format: &str) -> String {
-        conf.timestamp
-            .and_then(|ts| {
-                Local
-                    .timestamp_opt(ts, 0)
-                    .latest()
-                    .map(|l| l.format(format).to_string())
-            })
-            .unwrap_or_default()
+        format_time(conf.timestamp, format)
     }
 
     /// Returns the set union of custom user parameters in both configurations.
@@ -787,3 +811,79 @@ impl<'a> Display for BenchmarkCmp<'a> {
         Ok(())
     }
 }
+
+#[derive(Debug)]
+pub struct PathAndSummary(pub PathBuf, pub Summary);
+
+#[derive(Debug)]
+pub struct Summary {
+    pub workload: PathBuf,
+    pub function: String,
+    pub timestamp: Option<DateTime<Local>>,
+    pub tags: Vec<String>,
+    pub params: Vec<(String, String)>,
+    pub rate: Option<f64>,
+    pub throughput: f64,
+    pub latency_p50: f64,
+    pub latency_p99: f64,
+}
+
+impl PathAndSummary {
+    pub const COLUMNS: &'static [&'static str] = &[
+        "File",
+        "Workload",
+        "Function",
+        "Timestamp",
+        "Tags",
+        "Params",
+        "Rate",
+        "Thrpt. [req/s]",
+        "P50 [ms]",
+        "P99 [ms]",
+    ];
+}
+
+impl Row for PathAndSummary {
+    fn cell_value(&self, column: &str) -> Option<String> {
+        match column {
+            "File" => Some(self.0.display().to_string()),
+            "Workload" => Some(
+                self.1
+                    .workload
+                    .file_name()
+                    .unwrap_or_default()
+                    .to_string_lossy()
+                    .to_string(),
+            ),
+            "Function" => Some(self.1.function.clone()),
+            "Timestamp" => self
+                .1
+                .timestamp
+                .map(|ts| ts.format("%Y-%m-%d %H:%M:%S").to_string()),
+            "Tags" => Some(self.1.tags.join(", ")),
+            "Params" => Some(
+                self.1
+                    .params
+                    .iter()
+                    .map(|(k, v)| format!("{k} = {v}"))
+                    .join(", "),
+            ),
+            "Rate" => self.1.rate.map(|r| r.to_string()),
+            "Thrpt. [req/s]" => Some(format!("{:.0}", self.1.throughput)),
+            "P50 [ms]" => Some(format!("{:.1}", self.1.latency_p50 * 1000.0)),
+            "P99 [ms]" => Some(format!("{:.1}", self.1.latency_p99 * 1000.0)),
+            _ => None,
+        }
+    }
+}
+
+fn format_time(timestamp: Option<i64>, format: &str) -> String {
+    timestamp
+        .and_then(|ts| {
+            Local
+                .timestamp_opt(ts, 0)
+                .latest()
+                .map(|l| l.format(format).to_string())
+        })
+        .unwrap_or_default()
+}
diff --git a/src/table.rs b/src/table.rs
new file mode 100644
index 0000000..ad5e6a9
--- /dev/null
+++ b/src/table.rs
@@ -0,0 +1,155 @@
+use console::style;
+use std::fmt::{Display, Formatter};
+
+pub trait Row {
+    fn cell_value(&self, column: &str) -> Option<String>;
+}
+
+pub struct Table<R> {
+    columns: Vec<Column>,
+    rows: Vec<R>,
+}
+
+struct Column {
+    name: String,
+    width: usize,
+    alignment: Alignment,
+}
+
+pub enum Alignment {
+    Left,
+    Right,
+}
+
+impl<R: Row> Table<R> {
+    pub fn new<C: AsRef<str>>(columns: &[C]) -> Table<R> {
+        let columns: Vec<Column> = columns
+            .iter()
+            .map(|name| Column {
+                name: name.as_ref().to_owned(),
+                width: name.as_ref().len(),
+                alignment: Alignment::Left,
+            })
+            .collect();
+
+        Table {
+            columns,
+            rows: vec![],
+        }
+    }
+
+    pub fn align(&mut self, column_index: usize, alignment: Alignment) {
+        self.columns[column_index].alignment = alignment;
+    }
+
+    pub fn push(&mut self, row: R) {
+        for column in self.columns.iter_mut() {
+            let len = row
+                .cell_value(column.name.as_str())
+                .map(|v| v.to_string().len())
+                .unwrap_or_default();
+            column.width = column.width.max(len);
+        }
+        self.rows.push(row);
+    }
+
+    fn header(&self, column: &Column) -> String {
+        let column_name = column.name.as_str();
+        let column_width = column.width;
+        let padding = column_width - column_name.len();
+        match column.alignment {
+            Alignment::Left => format!("{}{}", column_name, Self::right_padding(padding)),
+            Alignment::Right => format!("{}{}", Self::left_padding(padding), column_name),
+        }
+    }
+
+    fn value(&self, row: &R, column: &Column) -> String {
+        let column_name = column.name.as_str();
+        let column_value = row
+            .cell_value(column_name)
+            .map(|v| v.to_string())
+            .unwrap_or_default();
+        let column_width = column.width;
+        let padding = column_width - column_value.len();
+        match column.alignment {
+            Alignment::Left => format!("{}{}", column_value, " ".repeat(padding)),
+            Alignment::Right => format!("{}{}", " ".repeat(padding), column_value),
+        }
+    }
+
+    fn left_padding(n: usize) -> String {
+        match n {
+            0 => "".to_string(),
+            1 => " ".to_string(),
+            2.. => format!("{} ", "─".repeat(n - 1)),
+        }
+    }
+
+    fn right_padding(n: usize) -> String {
+        match n {
+            0 => "".to_string(),
+            1 => " ".to_string(),
+            2.. => format!(" {}", "─".repeat(n - 1)),
+        }
+    }
+}
+
+impl<R: Row> Display for Table<R> {
+    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+        for column in &self.columns {
+            write!(
+                f,
+                "{}   ",
+                style(self.header(column))
+                    .yellow()
+                    .bold()
+                    .bright()
+                    .for_stdout()
+            )?;
+        }
+        writeln!(f)?;
+
+        for row in &self.rows {
+            for column in &self.columns {
+                write!(f, "{}   ", self.value(row, column))?;
+            }
+            writeln!(f)?;
+        }
+        Ok(())
+    }
+}
+
+#[cfg(test)]
+mod test {
+    use crate::table::{Alignment, Row, Table};
+
+    #[test]
+    fn render_table() {
+        struct DataPoint {
+            benchmark: &'static str,
+            result: u64,
+        }
+        impl Row for DataPoint {
+            fn cell_value(&self, column: &str) -> Option<String> {
+                match column {
+                    "A" => Some(self.benchmark.to_string()),
+                    "Result" => Some(self.result.to_string()),
+                    _ => None,
+                }
+            }
+        }
+
+        let mut table = Table::new(&["A", "Result"]);
+        table.push(DataPoint {
+            benchmark: "foo",
+            result: 10000000,
+        });
+        table.push(DataPoint {
+            benchmark: "long name",
+            result: 1,
+        });
+        table.align(0, Alignment::Left);
+        table.align(1, Alignment::Right);
+        println!("{}", table);
+    }
+}