From 3325cda83e088768bb7ce6e97c9e2c85f6a2be3f Mon Sep 17 00:00:00 2001
From: Alex Butler <alexheretic@gmail.com>
Date: Mon, 23 Dec 2024 20:29:57 +0000
Subject: [PATCH] Add XPSNR support as a VMAF alternative (#251)

* Add XPSNR support as a VMAF alternative

* Remove some "VMAF" hardcodes

* Use minimum xpsnr score

* xpsnr support ref-vfilter

* remove stats_file quoting causing file creation

* parse xpsnr rgb output

* reduce ScoreKind specific code

* Fix reference, distorted ffmpeg input order!

* Add xpsnr command to readme

* Update readme and xpsnr docs
---
 CHANGELOG.md                       |   6 +-
 Cargo.lock                         |   4 +-
 README.md                          |  23 +-
 src/command.rs                     |   2 +
 src/command/args.rs                |  11 +
 src/command/args/vmaf.rs           |  42 +---
 src/command/auto_encode.rs         |  22 +-
 src/command/crf_search.rs          |  69 ++++--
 src/command/sample_encode.rs       | 360 ++++++++++++++++++++---------
 src/command/sample_encode/cache.rs |  22 +-
 src/command/vmaf.rs                |   7 +-
 src/command/xpsnr.rs               | 119 ++++++++++
 src/main.rs                        |   3 +
 src/process.rs                     |   8 +-
 src/vmaf.rs                        |  13 +-
 src/xpsnr.rs                       | 230 ++++++++++++++++++
 16 files changed, 734 insertions(+), 207 deletions(-)
 create mode 100644 src/command/xpsnr.rs
 create mode 100644 src/xpsnr.rs
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 4db76cb..d9888b8 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,4 +1,8 @@
-# Unreleased (0.8.1)
+# Unreleased (0.9.0)
+* Add XPSNR support as a VMAF alternative.
+  - Add sample-encode `--xpsnr` arg which toggles use of XPSNR instead of VMAF.
+  - Add crf-search, auto-encode `--min-xpsnr` arg _(alternative to `--min-vmaf`)_.
+  - Add `xpsnr` command for measuring XPSNR score.
 * Support negative `--preset` args.
 * Add `--vmaf-fps`: Frame rate override used to analyse both reference & distorted videos.
 * Omit data streams when outputting to matroska (.mkv or .webm).
diff --git a/Cargo.lock b/Cargo.lock
index ba5ed22..f140620 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -98,9 +98,9 @@ dependencies = [
 
 [[package]]
 name = "anyhow"
-version = "1.0.94"
+version = "1.0.95"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c1fd03a028ef38ba2276dce7e33fcd6369c158a1bca17946c4b1b701891c1ff7"
+checksum = "34ac096ce696dc2fcabef30516bb13c0a68a11d30131d3df6f04711467681b04"
 
 [[package]]
 name = "arrayref"
diff --git a/README.md b/README.md
index e30630a..e316ace 100644
--- a/README.md
+++ b/README.md
@@ -7,19 +7,19 @@ Uses _ffmpeg_, _svt-av1_ & _vmaf_.
 Also supports other ffmpeg compatible encoders like libx265 & libx264.
 
 ### Command: auto-encode
-Automatically determine the best crf to deliver the min-vmaf and use it to encode a video or image.
+Automatically determine the best crf to deliver the `--min-vmaf` and use it to encode a video or image.
 
 Two phases:
 * [crf-search](#command-crf-search) to determine the best --crf value
 * ffmpeg to encode using the settings
 
 ```
-ab-av1 auto-encode [OPTIONS] -i <INPUT> --preset <PRESET>
+ab-av1 auto-encode [OPTIONS] -i <INPUT> --preset <PRESET> --min-vmaf <MIN_VMAF>
 ```
 
 ### Command: crf-search
 Interpolated binary search using [sample-encode](#command-sample-encode) to find the best 
-crf value delivering **min-vmaf** & **max-encoded-percent**.
+crf value delivering `--min-vmaf` & `--max-encoded-percent`.
 
 Outputs:
 * Best crf value
@@ -28,9 +28,12 @@ Outputs:
 * Predicted full encode time
 
 ```
-ab-av1 crf-search [OPTIONS] -i <INPUT> --preset <PRESET>
+ab-av1 crf-search [OPTIONS] -i <INPUT> --preset <PRESET> --min-vmaf <MIN_VMAF>
 ```
 
+#### Notable options
+* `--min-xpsnr <MIN_XPSNR>` may be used as an alternative to VMAF.
+
 ### Command: sample-encode
 Encode short video samples of an input using provided **crf** & **preset**. 
 This is much quicker than full encode/vmaf run. 
@@ -44,6 +47,9 @@ Outputs:
 ab-av1 sample-encode [OPTIONS] -i <INPUT> --crf <CRF> --preset <PRESET>
 ```
 
+#### Notable options
+* `--xpsnr` specifies calculation of XPSNR score instead of VMAF.
+
 ### Command: encode
 Invoke ffmpeg to encode a video or image.
 
@@ -58,12 +64,19 @@ Works with videos and images.
 * Auto sets model version (4k or 1k) according to resolution.
 * Auto sets _n_threads_ to system threads.
 * Auto upscales lower resolution videos to the model.
-* Converts distorted & reference to appropriate format yuv streams before passing to vmaf.
 
 ```
 ab-av1 vmaf --reference <REFERENCE> --distorted <DISTORTED>
 ```
 
+### Command: xpsnr
+Full XPSNR score calculation, distorted file vs reference file.
+Works with videos and images.
+
+```
+ab-av1 xpsnr --reference <REFERENCE> --distorted <DISTORTED>
+```
+
 ## Install
 ### Arch Linux
 Available in the [AUR](https://aur.archlinux.org/packages/ab-av1).
diff --git a/src/command.rs b/src/command.rs
index 99b930c..9fb6017 100644
--- a/src/command.rs
+++ b/src/command.rs
@@ -5,6 +5,7 @@ pub mod encode;
 pub mod print_completions;
 pub mod sample_encode;
 pub mod vmaf;
+pub mod xpsnr;
 
 pub use auto_encode::auto_encode;
 pub use crf_search::crf_search;
@@ -12,6 +13,7 @@ pub use encode::encode;
 pub use print_completions::print_completions;
 pub use sample_encode::sample_encode;
 pub use vmaf::vmaf;
+pub use xpsnr::xpsnr;
 
 const PROGRESS_CHARS: &str = "##-";
 
diff --git a/src/command/args.rs b/src/command/args.rs
index 411d4f6..d2b1a96 100644
--- a/src/command/args.rs
+++ b/src/command/args.rs
@@ -103,3 +103,14 @@ impl Sample {
         self.extension = output.extension().and_then(|e| e.to_str().map(Into::into));
     }
 }
+
+/// Args for when VMAF/XPSNR are used to score ref vs distorted.
+#[derive(Debug, Parser, Clone, Hash)]
+pub struct ScoreArgs {
+    /// Ffmpeg video filter applied to the VMAF/XPSNR reference before analysis.
+    /// E.g. --reference-vfilter "scale=1280:-1,fps=24".
+    ///
+    /// Overrides --vfilter which would otherwise be used.
+    #[arg(long)]
+    pub reference_vfilter: Option<Arc<str>>,
+}
diff --git a/src/command/args/vmaf.rs b/src/command/args/vmaf.rs
index 1ea4e66..5f9a387 100644
--- a/src/command/args/vmaf.rs
+++ b/src/command/args/vmaf.rs
@@ -40,13 +40,6 @@ pub struct Vmaf {
     /// By default no override is set.
     #[arg(long)]
     pub vmaf_fps: Option<f32>,
-
-    /// Ffmpeg video filter applied to the VMAF reference before analysis.
-    /// E.g. --reference-vfilter "scale=1280:-1,fps=24".
-    ///
-    /// Overrides --vfilter which would otherwise be used.
-    #[arg(long)]
-    pub reference_vfilter: Option<String>,
 }
 
 impl std::hash::Hash for Vmaf {
@@ -54,7 +47,6 @@ impl std::hash::Hash for Vmaf {
         self.vmaf_args.hash(state);
         self.vmaf_scale.hash(state);
         self.vmaf_fps.map(|f| f.to_ne_bytes()).hash(state);
-        self.reference_vfilter.hash(state);
     }
 }
 
@@ -63,19 +55,7 @@ fn parse_vmaf_arg(arg: &str) -> anyhow::Result<Arc<str>> {
 }
 
 impl Vmaf {
-    pub fn is_default(&self) -> bool {
-        let Self {
-            vmaf_args,
-            vmaf_scale,
-            vmaf_fps: _,
-            reference_vfilter,
-        } = self;
-        vmaf_args.is_empty() && *vmaf_scale == VmafScale::Auto && reference_vfilter.is_none()
-    }
-
     /// Returns ffmpeg `filter_complex`/`lavfi` value for calculating vmaf.
-    ///
-    /// Note `ref_vfilter` is ignored if `Self::reference_vfilter` is some.
     pub fn ffmpeg_lavfi(
         &self,
         distorted_res: Option<(u32, u32)>,
@@ -105,7 +85,7 @@ impl Vmaf {
             }
         }
 
-        let ref_vf: Cow<_> = match self.reference_vfilter.as_deref().or(ref_vfilter) {
+        let ref_vf: Cow<_> = match ref_vfilter {
             None => "".into(),
             Some(vf) if vf.ends_with(',') => vf.into(),
             Some(vf) => format!("{vf},").into(),
@@ -241,26 +221,6 @@ fn vmaf_lavfi() {
     );
 }
 
-#[test]
-fn vmaf_lavfi_override_reference_vfilter() {
-    let vmaf = Vmaf {
-        vmaf_args: vec!["n_threads=5".into(), "n_subsample=4".into()],
-        vmaf_scale: VmafScale::Auto,
-        vmaf_fps: None,
-        reference_vfilter: Some("scale=2560:-1".into()),
-    };
-    assert_eq!(
-        vmaf.ffmpeg_lavfi(
-            None,
-            PixelFormat::Yuv420p,
-            Some("scale_vaapi=w=2560:h=1280")
-        ),
-        "[0:v]format=yuv420p,setpts=PTS-STARTPTS,settb=AVTB[dis];\
-         [1:v]format=yuv420p,scale=2560:-1,setpts=PTS-STARTPTS,settb=AVTB[ref];\
-         [dis][ref]libvmaf=shortest=true:ts_sync_mode=nearest:n_threads=5:n_subsample=4"
-    );
-}
-
 #[test]
 fn vmaf_lavfi_default() {
     let vmaf = Vmaf::default();
diff --git a/src/command/auto_encode.rs b/src/command/auto_encode.rs
index 64b7e23..398e83d 100644
--- a/src/command/auto_encode.rs
+++ b/src/command/auto_encode.rs
@@ -68,7 +68,7 @@ pub async fn auto_encode(Args { mut search, encode }: Args) -> anyhow::Result<()
         bar.println(style!("Encoding {out}").dim().to_string());
     }
 
-    let min_vmaf = search.min_vmaf;
+    let min_score = search.min_score();
     let max_encoded_percent = search.max_encoded_percent;
     let enc_args = search.args.clone();
     let thorough = search.thorough;
@@ -86,15 +86,16 @@ pub async fn auto_encode(Args { mut search, encode }: Args) -> anyhow::Result<()
                             .template(SPINNER_FINISHED)?
                             .progress_chars(PROGRESS_CHARS),
                     );
-                    let mut vmaf = style(last.enc.vmaf);
-                    if last.enc.vmaf < min_vmaf {
+                    let mut vmaf = style(last.enc.score);
+                    if last.enc.score < min_score {
                         vmaf = vmaf.red();
                     }
                     let mut percent = style!("{:.0}%", last.enc.encode_percent);
                     if last.enc.encode_percent > max_encoded_percent as _ {
                         percent = percent.red();
                     }
-                    bar.finish_with_message(format!("VMAF {vmaf:.2}, size {percent}"));
+                    let score_kind = last.enc.score_kind;
+                    bar.finish_with_message(format!("{score_kind} {vmaf:.2}, size {percent}"));
                 }
                 bar.finish();
                 return Err(err.into());
@@ -118,11 +119,11 @@ pub async fn auto_encode(Args { mut search, encode }: Args) -> anyhow::Result<()
                     true => bar.set_prefix(format!("crf {crf} full pass")),
                     false => bar.set_prefix(format!("crf {crf} {sample}/{samples}")),
                 }
+                let label = work.fps_label();
                 match work {
                     Work::Encode if fps <= 0.0 => bar.set_message("encoding,  "),
-                    Work::Encode => bar.set_message(format!("enc {fps} fps, ")),
-                    Work::Vmaf if fps <= 0.0 => bar.set_message("vmaf,       "),
-                    Work::Vmaf => bar.set_message(format!("vmaf {fps} fps, ")),
+                    _ if fps <= 0.0 => bar.set_message(format!("{label},       ")),
+                    _ => bar.set_message(format!("{label} {fps} fps, ")),
                 }
             }
             Ok(crf_search::Update::SampleResult {
@@ -142,7 +143,7 @@ pub async fn auto_encode(Args { mut search, encode }: Args) -> anyhow::Result<()
                     .log_level()
                     .is_some_and(|lvl| lvl > log::Level::Error)
                 {
-                    result.print_attempt(&bar, min_vmaf, max_encoded_percent)
+                    result.print_attempt(&bar, min_score, max_encoded_percent)
                 }
             }
             Ok(crf_search::Update::Done(result)) => best = Some(result),
@@ -156,8 +157,9 @@ pub async fn auto_encode(Args { mut search, encode }: Args) -> anyhow::Result<()
             .progress_chars(PROGRESS_CHARS),
     );
     bar.finish_with_message(format!(
-        "VMAF {:.2}, size {}",
-        style(best.enc.vmaf).green(),
+        "{} {:.2}, size {}",
+        best.enc.score_kind,
+        style(best.enc.score).green(),
         style(format!("{:.0}%", best.enc.encode_percent)).green(),
     ));
     temporary::clean_all().await;
diff --git a/src/command/crf_search.rs b/src/command/crf_search.rs
index e20c1f6..25d082a 100644
--- a/src/command/crf_search.rs
+++ b/src/command/crf_search.rs
@@ -26,6 +26,7 @@ use std::{
 };
 
 const BAR_LEN: u64 = 1024 * 1024 * 1024;
+const DEFAULT_MIN_VMAF: f32 = 95.0;
 
 /// Interpolated binary search using sample-encode to find the best crf
 /// value delivering min-vmaf & max-encoded-percent.
@@ -45,8 +46,16 @@ pub struct Args {
     pub args: args::Encode,
 
     /// Desired min VMAF score to deliver.
-    #[arg(long, default_value_t = 95.0)]
-    pub min_vmaf: f32,
+    ///
+    /// [default: 95]
+    #[arg(long, group = "min_score")]
+    pub min_vmaf: Option<f32>,
+
+    /// Desired min XPSNR score to deliver.
+    ///
+    /// Enables use of XPSNR for score analysis instead of VMAF.
+    #[arg(long, group = "min_score")]
+    pub min_xpsnr: Option<f32>,
 
     /// Maximum desired encoded size percentage of the input size.
     #[arg(long, default_value_t = 80.0)]
@@ -92,10 +101,19 @@ pub struct Args {
     #[clap(flatten)]
     pub vmaf: args::Vmaf,
 
+    #[clap(flatten)]
+    pub score: args::ScoreArgs,
+
     #[command(flatten)]
     pub verbose: clap_verbosity_flag::Verbosity,
 }
 
+impl Args {
+    pub fn min_score(&self) -> f32 {
+        self.min_vmaf.or(self.min_xpsnr).unwrap_or(DEFAULT_MIN_VMAF)
+    }
+}
+
 pub async fn crf_search(mut args: Args) -> anyhow::Result<()> {
     let bar = ProgressBar::new(BAR_LEN).with_style(
         ProgressStyle::default_bar()
@@ -109,7 +127,7 @@ pub async fn crf_search(mut args: Args) -> anyhow::Result<()> {
     args.sample
         .set_extension_from_input(&args.args.input, &args.args.encoder, &probe);
 
-    let min_vmaf = args.min_vmaf;
+    let min_score = args.min_score();
     let max_encoded_percent = args.max_encoded_percent;
     let thorough = args.thorough;
     let enc_args = args.args.clone();
@@ -119,7 +137,7 @@ pub async fn crf_search(mut args: Args) -> anyhow::Result<()> {
     while let Some(update) = run.next().await {
         let update = update.inspect_err(|e| {
             if let Error::NoGoodCrf { last } = e {
-                last.print_attempt(&bar, min_vmaf, max_encoded_percent);
+                last.print_attempt(&bar, min_score, max_encoded_percent);
             }
         })?;
         match update {
@@ -142,11 +160,11 @@ pub async fn crf_search(mut args: Args) -> anyhow::Result<()> {
                     true => bar.set_prefix(format!("crf {crf} full pass")),
                     false => bar.set_prefix(format!("crf {crf} {sample}/{samples}")),
                 }
+                let label = work.fps_label();
                 match work {
                     Work::Encode if fps <= 0.0 => bar.set_message("encoding,  "),
-                    Work::Encode => bar.set_message(format!("enc {fps} fps, ")),
-                    Work::Vmaf if fps <= 0.0 => bar.set_message("vmaf,       "),
-                    Work::Vmaf => bar.set_message(format!("vmaf {fps} fps, ")),
+                    _ if fps <= 0.0 => bar.set_message(format!("{label},       ")),
+                    _ => bar.set_message(format!("{label} {fps} fps, ")),
                 }
             }
             Update::SampleResult {
@@ -161,7 +179,7 @@ pub async fn crf_search(mut args: Args) -> anyhow::Result<()> {
                     result.print_attempt(&bar, sample, Some(crf))
                 }
             }
-            Update::RunResult(result) => result.print_attempt(&bar, min_vmaf, max_encoded_percent),
+            Update::RunResult(result) => result.print_attempt(&bar, min_score, max_encoded_percent),
             Update::Done(best) => {
                 info!("crf {} successful", best.crf());
                 bar.finish_with_message("");
@@ -184,6 +202,7 @@ pub fn run(
     Args {
         args,
         min_vmaf,
+        min_xpsnr,
         max_encoded_percent,
         min_crf,
         max_crf,
@@ -192,6 +211,7 @@ pub fn run(
         sample,
         cache,
         vmaf,
+        score,
         verbose: _,
     }: Args,
     input_probe: Arc<Ffprobe>,
@@ -202,6 +222,8 @@ pub fn run(
         let default_min_crf = args.encoder.default_min_crf();
         let min_crf = min_crf.unwrap_or(default_min_crf);
         Error::ensure_other(min_crf < max_crf, "Invalid --min-crf & --max-crf")?;
+        // by default use vmaf 95, otherwise use whatever is specified
+        let min_score = min_vmaf.or(min_xpsnr).unwrap_or(DEFAULT_MIN_VMAF);
 
         // Whether to make the 2nd iteration on the ~20%/~80% crf point instead of the min/max to
         // improve interpolation by narrowing the crf range a 20% (or 30%) subrange.
@@ -228,6 +250,8 @@ pub fn run(
             cache,
             stdout_format: sample_encode::StdoutFormat::Json,
             vmaf: vmaf.clone(),
+            score: score.clone(),
+            xpsnr: min_xpsnr.is_some(),
         };
 
         let mut crf_attempts = Vec::new();
@@ -265,9 +289,9 @@ pub fn run(
             crf_attempts.push(sample.clone());
             let sample_small_enough = sample.enc.encode_percent <= max_encoded_percent as _;
 
-            if sample.enc.vmaf > min_vmaf {
+            if sample.enc.score > min_score {
                 // good
-                if sample_small_enough && sample.enc.vmaf < min_vmaf + higher_tolerance {
+                if sample_small_enough && sample.enc.score < min_score + higher_tolerance {
                     yield Update::Done(sample);
                     return;
                 }
@@ -283,7 +307,7 @@ pub fn run(
                         return;
                     }
                     Some(upper) => {
-                        q = vmaf_lerp_q(min_vmaf, upper, &sample);
+                        q = vmaf_lerp_q(min_score, upper, &sample);
                     }
                     None if sample.q == max_q => {
                         Error::ensure_or_no_good_crf(sample_small_enough, &sample)?;
@@ -314,7 +338,7 @@ pub fn run(
                         return;
                     }
                     Some(lower) => {
-                        q = vmaf_lerp_q(min_vmaf, &sample, lower);
+                        q = vmaf_lerp_q(min_score, &sample, lower);
                     }
                     None if cut_on_iter2 && run == 1 && sample.q > min_q + 1 => {
                         q = (sample.q as f32 * 0.4 + min_q as f32 * 0.6).round() as _;
@@ -340,11 +364,11 @@ impl Sample {
         self.q.to_crf(self.crf_increment)
     }
 
-    pub fn print_attempt(&self, bar: &ProgressBar, min_vmaf: f32, max_encoded_percent: f32) {
+    pub fn print_attempt(&self, bar: &ProgressBar, min_score: f32, max_encoded_percent: f32) {
         let crf_label = style("- crf").dim();
         let mut crf = style(TerseF32(self.crf()));
-        let vmaf_label = style("VMAF").dim();
-        let mut vmaf = style(self.enc.vmaf);
+        let vmaf_label = style(self.enc.score_kind).dim();
+        let mut vmaf = style(self.enc.score);
         let mut percent = style!("{:.0}%", self.enc.encode_percent);
         let open = style("(").dim();
         let close = style(")").dim();
@@ -353,7 +377,7 @@ impl Sample {
             false => style(""),
         };
 
-        if self.enc.vmaf < min_vmaf {
+        if self.enc.score < min_score {
             crf = crf.red().bright();
             vmaf = vmaf.red().bright();
         }
@@ -383,7 +407,8 @@ impl StdoutFormat {
             Self::Human => {
                 let crf = style(TerseF32(sample.crf())).bold().green();
                 let enc = &sample.enc;
-                let vmaf = style(enc.vmaf).bold().green();
+                let score = style(enc.score).bold().green();
+                let score_kind = enc.score_kind;
                 let size = style(HumanBytes(enc.predicted_encode_size)).bold().green();
                 let percent = style!("{}%", enc.encode_percent.round()).bold().green();
                 let time = style(HumanDuration(enc.predicted_encode_time)).bold();
@@ -392,7 +417,7 @@ impl StdoutFormat {
                     false => "video stream",
                 };
                 println!(
-                    "crf {crf} VMAF {vmaf:.2} predicted {enc_description} size {size} ({percent}) taking {time}"
+                    "crf {crf} {score_kind} {score:.2} predicted {enc_description} size {size} ({percent}) taking {time}"
                 );
             }
         }
@@ -412,14 +437,14 @@ impl StdoutFormat {
 /// This would be helpful particularly for small crf-increments.
 fn vmaf_lerp_q(min_vmaf: f32, worse_q: &Sample, better_q: &Sample) -> u64 {
     assert!(
-        worse_q.enc.vmaf <= min_vmaf
-            && worse_q.enc.vmaf < better_q.enc.vmaf
+        worse_q.enc.score <= min_vmaf
+            && worse_q.enc.score < better_q.enc.score
             && worse_q.q > better_q.q,
         "invalid vmaf_lerp_crf usage: ({min_vmaf}, {worse_q:?}, {better_q:?})"
     );
 
-    let vmaf_diff = better_q.enc.vmaf - worse_q.enc.vmaf;
-    let vmaf_factor = (min_vmaf - worse_q.enc.vmaf) / vmaf_diff;
+    let vmaf_diff = better_q.enc.score - worse_q.enc.score;
+    let vmaf_factor = (min_vmaf - worse_q.enc.score) / vmaf_diff;
 
     let q_diff = worse_q.q - better_q.q;
     let lerp = (worse_q.q as f32 - q_diff as f32 * vmaf_factor).round() as u64;
diff --git a/src/command/sample_encode.rs b/src/command/sample_encode.rs
index 34f7c90..866de45 100644
--- a/src/command/sample_encode.rs
+++ b/src/command/sample_encode.rs
@@ -3,6 +3,7 @@ mod cache;
 use crate::{
     command::{
         args::{self, PixelFormat},
+        sample_encode::cache::ScoringInfo,
         SmallDuration, PROGRESS_CHARS,
     },
     console_ext::style,
@@ -12,6 +13,7 @@ use crate::{
     process::FfmpegOut,
     sample, temporary,
     vmaf::{self, VmafOut},
+    xpsnr::{self, XpsnrOut},
 };
 use anyhow::{ensure, Context};
 use clap::{ArgAction, Parser};
@@ -20,6 +22,7 @@ use futures_util::Stream;
 use indicatif::{HumanBytes, HumanDuration, ProgressBar, ProgressStyle};
 use log::info;
 use std::{
+    fmt::Display,
     io::{self, IsTerminal},
     path::{Path, PathBuf},
     pin::pin,
@@ -33,7 +36,7 @@ use tokio_stream::StreamExt;
 /// This is much quicker than a full encode/vmaf run.
 ///
 /// Outputs:
-/// * Mean sample VMAF score
+/// * Mean sample score
 /// * Predicted full encode size
 /// * Predicted full encode time
 #[derive(Parser, Clone)]
@@ -65,6 +68,13 @@ pub struct Args {
 
     #[clap(flatten)]
     pub vmaf: args::Vmaf,
+
+    #[clap(flatten)]
+    pub score: args::ScoreArgs,
+
+    /// Calculate a XPSNR score instead of VMAF.
+    #[arg(long)]
+    pub xpsnr: bool,
 }
 
 pub async fn sample_encode(mut args: Args) -> anyhow::Result<()> {
@@ -102,11 +112,11 @@ pub async fn sample_encode(mut args: Args) -> anyhow::Result<()> {
                     true => bar.set_prefix("Full pass"),
                     false => bar.set_prefix(format!("Sample {sample}/{samples}")),
                 }
+                let label = work.fps_label();
                 match work {
                     Work::Encode if fps <= 0.0 => bar.set_message("encoding,  "),
-                    Work::Encode => bar.set_message(format!("enc {fps} fps, ")),
-                    Work::Vmaf if fps <= 0.0 => bar.set_message("vmaf,       "),
-                    Work::Vmaf => bar.set_message(format!("vmaf {fps} fps, ")),
+                    _ if fps <= 0.0 => bar.set_message(format!("{label},       ")),
+                    _ => bar.set_message(format!("{label} {fps} fps, ")),
                 }
                 bar.set_position((progress * BAR_LEN_F).round() as _);
             }
@@ -120,13 +130,7 @@ pub async fn sample_encode(mut args: Args) -> anyhow::Result<()> {
                         style(enc_args.encode_hint(crf)).dim().italic(),
                     );
                 }
-                stdout_fmt.print_result(
-                    output.vmaf,
-                    output.predicted_encode_size,
-                    output.encode_percent,
-                    output.predicted_encode_time,
-                    input_is_image,
-                );
+                stdout_fmt.print_result(&output, input_is_image);
             }
         }
     }
@@ -141,6 +145,8 @@ pub fn run(
         cache,
         stdout_format: _,
         vmaf,
+        score,
+        xpsnr,
     }: Args,
     input_probe: Arc<Ffprobe>,
 ) -> impl Stream<Item = anyhow::Result<Update>> {
@@ -155,6 +161,10 @@ pub fn run(
         let samples = sample_args.sample_count(duration).max(1);
         let keep = sample_args.keep;
         let temp_dir = sample_args.temp_dir;
+        let scoring = match xpsnr {
+            true => ScoringInfo::Xpsnr(&score),
+            _ => ScoringInfo::Vmaf(&vmaf, &score),
+        };
 
         let (samples, sample_duration, full_pass) = {
             if input_is_image {
@@ -232,17 +242,13 @@ pub fn run(
                 input_len,
                 full_pass,
                 &enc_args,
-                &vmaf,
+                scoring,
             )
             .await
             {
                 (Some(result), _) => {
                     if samples > 1 {
-                        info!(
-                            "sample {sample_n}/{samples} crf {crf} VMAF {:.2} ({:.0}%) (cache)",
-                            result.vmaf_score,
-                            100.0 * result.encoded_size as f32 / sample_size as f32,
-                        );
+                        result.log_attempt(sample_n, samples, crf);
                     }
                     result
                 }
@@ -275,76 +281,132 @@ pub fn run(
                     let encoded_size = fs::metadata(&encoded_sample).await?.len();
                     let encoded_probe = ffprobe::probe(&encoded_sample);
 
-                    // calculate vmaf
-                    yield Update::Status(Status {
-                        work: Work::Vmaf,
-                        fps: 0.0,
-                        progress: (sample_idx as f32 + 0.5) / samples as f32,
-                        full_pass,
-                        sample: sample_n,
-                        samples,
-                    });
-                    let vmaf = vmaf::run(
-                        &sample,
-                        &encoded_sample,
-                        &vmaf.ffmpeg_lavfi(
-                            encoded_probe.resolution,
-                            enc_args
-                                .pix_fmt
-                                .max(input_pixel_format.unwrap_or(PixelFormat::Yuv444p10le)),
-                            args.vfilter.as_deref(),
-                        ),
-                        vmaf.vmaf_fps,
-                    )?;
-                    let mut vmaf = pin!(vmaf);
-                    let mut logger = ProgressLogger::new("ab_av1::vmaf", Instant::now());
-                    let mut vmaf_score = None;
-                    while let Some(vmaf) = vmaf.next().await {
-                        match vmaf {
-                            VmafOut::Done(score) => {
-                                vmaf_score = Some(score);
-                                break;
+                    let result = match scoring {
+                        ScoringInfo::Vmaf(..) => {
+                            yield Update::Status(Status {
+                                work: Work::Score(ScoreKind::Vmaf),
+                                fps: 0.0,
+                                progress: (sample_idx as f32 + 0.5) / samples as f32,
+                                full_pass,
+                                sample: sample_n,
+                                samples,
+                            });
+                            let vmaf = vmaf::run(
+                                &sample,
+                                &encoded_sample,
+                                &vmaf.ffmpeg_lavfi(
+                                    encoded_probe.resolution,
+                                    enc_args
+                                        .pix_fmt
+                                        .max(input_pixel_format.unwrap_or(PixelFormat::Yuv444p10le)),
+                                    score.reference_vfilter.as_deref().or(args.vfilter.as_deref()),
+                                ),
+                                vmaf.vmaf_fps,
+                            )?;
+                            let mut vmaf = pin!(vmaf);
+                            let mut logger = ProgressLogger::new("ab_av1::vmaf", Instant::now());
+                            let mut vmaf_score = None;
+                            while let Some(vmaf) = vmaf.next().await {
+                                match vmaf {
+                                    VmafOut::Done(score) => {
+                                        vmaf_score = Some(score);
+                                        break;
+                                    }
+                                    VmafOut::Progress(FfmpegOut::Progress { time, fps, .. }) => {
+                                        yield Update::Status(Status {
+                                            work: Work::Score(ScoreKind::Vmaf),
+                                            fps,
+                                            progress: (sample_duration_us +
+                                                time.as_micros_u64() +
+                                                sample_idx * sample_duration_us * 2) as f32
+                                                / (sample_duration_us * samples * 2) as f32,
+                                            full_pass,
+                                            sample: sample_n,
+                                            samples,
+                                        });
+                                        logger.update(sample_duration, time, fps);
+                                    }
+                                    VmafOut::Progress(_) => {}
+                                    VmafOut::Err(e) => Err(e)?,
+                                }
                             }
-                            VmafOut::Progress(FfmpegOut::Progress { time, fps, .. }) => {
-                                yield Update::Status(Status {
-                                    work: Work::Vmaf,
-                                    fps,
-                                    progress: (sample_duration_us +
-                                        time.as_micros_u64() +
-                                        sample_idx * sample_duration_us * 2) as f32
-                                        / (sample_duration_us * samples * 2) as f32,
-                                    full_pass,
-                                    sample: sample_n,
-                                    samples,
-                                });
-                                logger.update(sample_duration, time, fps);
+
+                            EncodeResult {
+                                score: vmaf_score.context("no vmaf score")?,
+                                score_kind: ScoreKind::Vmaf,
+                                sample_size,
+                                encoded_size,
+                                encode_time,
+                                sample_duration: encoded_probe
+                                    .duration
+                                    .ok()
+                                    .filter(|d| !d.is_zero())
+                                    .unwrap_or(sample_duration),
+                                from_cache: false,
                             }
-                            VmafOut::Progress(_) => {}
-                            VmafOut::Err(e) => Err(e)?,
                         }
-                    }
-                    let vmaf_score = vmaf_score.context("no vmaf score")?;
+                        ScoringInfo::Xpsnr(..) => {
+                            yield Update::Status(Status {
+                                work: Work::Score(ScoreKind::Xpsnr),
+                                fps: 0.0,
+                                progress: (sample_idx as f32 + 0.5) / samples as f32,
+                                full_pass,
+                                sample: sample_n,
+                                samples,
+                            });
 
-                    if samples > 1 {
-                        info!(
-                            "sample {sample_n}/{samples} crf {crf} VMAF {vmaf_score:.2} ({:.0}%)",
-                            100.0 * encoded_size as f32 / sample_size as f32,
-                        );
-                    }
+                            let lavfi = super::xpsnr::lavfi(
+                                score.reference_vfilter.as_deref().or(args.vfilter.as_deref())
+                            );
+                            let xpsnr_out = xpsnr::run(&sample, &encoded_sample, &lavfi)?;
+                            let mut xpsnr_out = pin!(xpsnr_out);
+                            let mut logger = ProgressLogger::new("ab_av1::xpsnr", Instant::now());
+                            let mut score = None;
+                            while let Some(next) = xpsnr_out.next().await {
+                                match next {
+                                    XpsnrOut::Done(s) => {
+                                        score = Some(s);
+                                        break;
+                                    }
+                                    XpsnrOut::Progress(FfmpegOut::Progress { time, fps, .. }) => {
+                                        yield Update::Status(Status {
+                                            work: Work::Score(ScoreKind::Xpsnr),
+                                            fps,
+                                            progress: (sample_duration_us +
+                                                time.as_micros_u64() +
+                                                sample_idx * sample_duration_us * 2) as f32
+                                                / (sample_duration_us * samples * 2) as f32,
+                                            full_pass,
+                                            sample: sample_n,
+                                            samples,
+                                        });
+                                        logger.update(sample_duration, time, fps);
+                                    }
+                                    XpsnrOut::Progress(_) => {}
+                                    XpsnrOut::Err(e) => Err(e)?,
+                                }
+                            }
 
-                    let result = EncodeResult {
-                        vmaf_score,
-                        sample_size,
-                        encoded_size,
-                        encode_time,
-                        sample_duration: encoded_probe
-                            .duration
-                            .ok()
-                            .filter(|d| !d.is_zero())
-                            .unwrap_or(sample_duration),
-                        from_cache: false,
+                            EncodeResult {
+                                score: score.context("no xpsnr score")?,
+                                score_kind: ScoreKind::Xpsnr,
+                                sample_size,
+                                encoded_size,
+                                encode_time,
+                                sample_duration: encoded_probe
+                                    .duration
+                                    .ok()
+                                    .filter(|d| !d.is_zero())
+                                    .unwrap_or(sample_duration),
+                                from_cache: false,
+                            }
+                        }
                     };
 
+                    if samples > 1 {
+                        result.log_attempt(sample_n, samples, crf);
+                    }
+
                     if let Some(k) = key {
                         cache::cache_result(k, &result).await?;
                     }
@@ -363,8 +425,10 @@ pub fn run(
             yield Update::SampleResult { sample: sample_n, result };
         }
 
+        let score_kind = results.score_kind();
         let output = Output {
-            vmaf: results.mean_vmaf(),
+            score: results.mean_score(),
+            score_kind,
             // Using file size * encode_percent can over-estimate. However, if it ends up less
             // than the duration estimation it may turn out to be more accurate.
             predicted_encode_size: results
@@ -375,8 +439,8 @@ pub fn run(
             from_cache: results.iter().all(|r| r.from_cache),
         };
         info!(
-            "crf {crf} VMAF {:.2} predicted video stream size {} ({:.0}%) taking {}{}",
-            output.vmaf,
+            "crf {crf} {score_kind} {:.2} predicted video stream size {} ({:.0}%) taking {}{}",
+            output.score,
             HumanBytes(output.predicted_encode_size),
             output.encode_percent,
             HumanDuration(output.predicted_encode_time),
@@ -421,7 +485,8 @@ async fn sample(
 pub struct EncodeResult {
     pub sample_size: u64,
     pub encoded_size: u64,
-    pub vmaf_score: f32,
+    pub score: f32,
+    pub score_kind: ScoreKind,
     pub encode_time: Duration,
     /// Duration of the sample.
     ///
@@ -436,13 +501,14 @@ impl EncodeResult {
         let Self {
             sample_size,
             encoded_size,
-            vmaf_score,
+            score,
+            score_kind,
             from_cache,
             ..
         } = self;
         bar.println(
             style!(
-                "- {}Sample {sample_n} ({:.0}%) vmaf {vmaf_score:.2}{}",
+                "- {}Sample {sample_n} ({:.0}%) {score_kind} {score:.2}{}",
                 crf.map(|crf| format!("crf {crf}: ")).unwrap_or_default(),
                 100.0 * *encoded_size as f32 / *sample_size as f32,
                 if *from_cache { " (cache)" } else { "" },
@@ -451,12 +517,60 @@ impl EncodeResult {
             .to_string(),
         );
     }
+
+    pub fn log_attempt(&self, sample_n: u64, samples: u64, crf: f32) {
+        let Self {
+            sample_size,
+            encoded_size,
+            score,
+            score_kind,
+            from_cache,
+            ..
+        } = self;
+        info!(
+            "sample {sample_n}/{samples} crf {crf} {score_kind} {score:.2} ({:.0}%){}",
+            100.0 * *encoded_size as f32 / *sample_size as f32,
+            if *from_cache { " (cache)" } else { "" }
+        );
+    }
+}
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
+pub enum ScoreKind {
+    Vmaf,
+    Xpsnr,
+}
+
+impl ScoreKind {
+    /// Display label for fps in progress bar.
+    pub fn fps_label(&self) -> &'static str {
+        match self {
+            Self::Vmaf => "vmaf",
+            Self::Xpsnr => "xpsnr",
+        }
+    }
+
+    /// General display name.
+    pub fn display_str(&self) -> &'static str {
+        match self {
+            Self::Vmaf => "VMAF",
+            Self::Xpsnr => "XPSNR",
+        }
+    }
+}
+
+impl Display for ScoreKind {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        f.write_str(self.display_str())
+    }
 }
 
 trait EncodeResults {
     fn encoded_percent_size(&self) -> f64;
 
-    fn mean_vmaf(&self) -> f32;
+    fn score_kind(&self) -> ScoreKind;
+
+    fn mean_score(&self) -> f32;
 
     /// Return estimated encoded **video stream** size by multiplying sample size by duration.
     fn estimate_encode_size_by_duration(
@@ -477,11 +591,17 @@ impl EncodeResults for Vec<EncodeResult> {
         encoded * 100.0 / sample
     }
 
-    fn mean_vmaf(&self) -> f32 {
+    fn score_kind(&self) -> ScoreKind {
+        self.first()
+            .map(|r| r.score_kind)
+            .unwrap_or(ScoreKind::Vmaf)
+    }
+
+    fn mean_score(&self) -> f32 {
         if self.is_empty() {
             return 0.0;
         }
-        self.iter().map(|r| r.vmaf_score).sum::<f32>() / self.len() as f32
+        self.iter().map(|r| r.score).sum::<f32>() / self.len() as f32
     }
 
     fn estimate_encode_size_by_duration(
@@ -551,16 +671,27 @@ pub enum StdoutFormat {
 }
 
 impl StdoutFormat {
-    fn print_result(self, vmaf: f32, size: u64, percent: f64, time: Duration, image: bool) {
+    fn print_result(
+        self,
+        Output {
+            score,
+            score_kind,
+            predicted_encode_size,
+            encode_percent,
+            predicted_encode_time,
+            from_cache: _,
+        }: &Output,
+        image: bool,
+    ) {
         match self {
             Self::Human => {
-                let vmaf = match vmaf {
-                    v if v >= 95.0 => style(v).bold().green(),
-                    v if v < 80.0 => style(v).bold().red(),
-                    v => style(v).bold(),
+                let score = match (*score, score_kind) {
+                    (v, ScoreKind::Vmaf) if v >= 95.0 => style(v).bold().green(),
+                    (v, ScoreKind::Vmaf) if v < 80.0 => style(v).bold().red(),
+                    (v, _) => style(v).bold(),
                 };
-                let percent = percent.round();
-                let size = match size {
+                let percent = encode_percent.round();
+                let size = match *predicted_encode_size {
                     v if percent < 80.0 => style(HumanBytes(v)).bold().green(),
                     v if percent >= 100.0 => style(HumanBytes(v)).bold().red(),
                     v => style(HumanBytes(v)).bold(),
@@ -570,23 +701,26 @@ impl StdoutFormat {
                     v if v >= 100.0 => style!("{}%", v).bold().red(),
                     v => style!("{}%", v).bold(),
                 };
-                let time = style(HumanDuration(time)).bold();
+                let time = style(HumanDuration(*predicted_encode_time)).bold();
                 let enc_description = match image {
                     true => "image",
                     false => "video stream",
                 };
                 println!(
-                    "VMAF {vmaf:.2} predicted {enc_description} size {size} ({percent}) taking {time}"
+                    "{score_kind} {score:.2} predicted {enc_description} size {size} ({percent}) taking {time}"
                 );
             }
             Self::Json => {
-                let json = serde_json::json!({
-                    "vmaf": vmaf,
-                    "predicted_encode_size": size,
-                    "predicted_encode_percent": percent,
-                    "predicted_encode_seconds": time.as_secs(),
+                let mut json = serde_json::json!({
+                    "predicted_encode_size": predicted_encode_size,
+                    "predicted_encode_percent": encode_percent,
+                    "predicted_encode_seconds": predicted_encode_time.as_secs(),
                 });
-                println!("{}", serde_json::to_string(&json).unwrap());
+                match score_kind {
+                    ScoreKind::Vmaf => json["vmaf"] = (*score).into(),
+                    ScoreKind::Xpsnr => json["xpsnr"] = (*score).into(),
+                }
+                println!("{json}");
             }
         }
     }
@@ -595,8 +729,9 @@ impl StdoutFormat {
 /// Sample encode result.
 #[derive(Debug, Clone)]
 pub struct Output {
-    /// Sample mean VMAF score.
-    pub vmaf: f32,
+    /// Sample mean score.
+    pub score: f32,
+    pub score_kind: ScoreKind,
     /// Estimated full encoded **video stream** size.
     ///
     /// Encoded sample size multiplied by duration.
@@ -611,11 +746,22 @@ pub struct Output {
     pub from_cache: bool,
 }
 
+/// Kinds of sample-encode work.
 #[derive(Debug, Default, Clone, Copy, PartialEq, Eq)]
 pub enum Work {
     #[default]
     Encode,
-    Vmaf,
+    Score(ScoreKind),
+}
+
+impl Work {
+    /// Display label for fps in progress bar.
+    pub fn fps_label(&self) -> &'static str {
+        match self {
+            Self::Encode => "enc",
+            Self::Score(kind) => kind.fps_label(),
+        }
+    }
 }
 
 #[derive(Debug)]
diff --git a/src/command/sample_encode/cache.rs b/src/command/sample_encode/cache.rs
index eecf6c2..d9e32b8 100644
--- a/src/command/sample_encode/cache.rs
+++ b/src/command/sample_encode/cache.rs
@@ -1,5 +1,8 @@
 //! _sample-encode_ file system caching logic.
-use crate::{command::args::Vmaf, ffmpeg::FfmpegEncodeArgs};
+use crate::{
+    command::args::{ScoreArgs, Vmaf},
+    ffmpeg::FfmpegEncodeArgs,
+};
 use anyhow::Context;
 use std::{
     ffi::OsStr,
@@ -18,7 +21,7 @@ pub async fn cached_encode(
     input_size: u64,
     full_pass: bool,
     enc_args: &FfmpegEncodeArgs<'_>,
-    vmaf_args: &Vmaf,
+    scoring: ScoringInfo<'_>,
 ) -> (Option<super::EncodeResult>, Option<Key>) {
     if !cache {
         return (None, None);
@@ -36,7 +39,7 @@ pub async fn cached_encode(
             full_pass,
         ),
         enc_args,
-        vmaf_args,
+        scoring,
     );
 
     let key = Key(hash);
@@ -65,6 +68,12 @@ pub async fn cached_encode(
     }
 }
 
+#[derive(Debug, Hash, Clone, Copy)]
+pub enum ScoringInfo<'a> {
+    Vmaf(&'a Vmaf, &'a ScoreArgs),
+    Xpsnr(&'a ScoreArgs),
+}
+
 pub async fn cache_result(key: Key, result: &super::EncodeResult) -> anyhow::Result<()> {
     let data = serde_json::to_vec(result)?;
     let insert = tokio::task::spawn_blocking(move || {
@@ -103,16 +112,13 @@ pub struct Key(blake3::Hash);
 fn hash_encode(
     input_info: impl Hash,
     enc_args: &FfmpegEncodeArgs<'_>,
-    vmaf_args: &Vmaf,
+    scoring_info: impl Hash,
 ) -> blake3::Hash {
     let mut hasher = blake3::Hasher::new();
     let mut std_hasher = BlakeStdHasher(&mut hasher);
     input_info.hash(&mut std_hasher);
     enc_args.sample_encode_hash(&mut std_hasher);
-    if !vmaf_args.is_default() {
-        // avoid hashing if default for back compat
-        vmaf_args.hash(&mut std_hasher);
-    }
+    scoring_info.hash(&mut std_hasher);
     hasher.finalize()
 }
 
diff --git a/src/command/vmaf.rs b/src/command/vmaf.rs
index 82e9acb..92ae281 100644
--- a/src/command/vmaf.rs
+++ b/src/command/vmaf.rs
@@ -24,7 +24,6 @@ use tokio_stream::StreamExt;
 /// * Auto sets model version (4k or 1k) according to resolution.
 /// * Auto sets `n_threads` to system threads.
 /// * Auto upscales lower resolution videos to the model.
-/// * Converts distorted & reference to appropriate format yuv streams before passing to vmaf.
 #[derive(Parser)]
 #[clap(verbatim_doc_comment)]
 #[group(skip)]
@@ -39,6 +38,9 @@ pub struct Args {
 
     #[clap(flatten)]
     pub vmaf: args::Vmaf,
+
+    #[clap(flatten)]
+    pub score: args::ScoreArgs,
 }
 
 pub async fn vmaf(
@@ -46,6 +48,7 @@ pub async fn vmaf(
         reference,
         distorted,
         vmaf,
+        score,
     }: Args,
 ) -> anyhow::Result<()> {
     let bar = ProgressBar::new(1).with_style(
@@ -72,7 +75,7 @@ pub async fn vmaf(
         &vmaf.ffmpeg_lavfi(
             dprobe.resolution,
             dpix_fmt.max(rpix_fmt),
-            vmaf.reference_vfilter.as_deref(),
+            score.reference_vfilter.as_deref(),
         ),
         vmaf.vmaf_fps,
     )?);
diff --git a/src/command/xpsnr.rs b/src/command/xpsnr.rs
new file mode 100644
index 0000000..37f1a10
--- /dev/null
+++ b/src/command/xpsnr.rs
@@ -0,0 +1,119 @@
+use crate::{
+    command::{args, PROGRESS_CHARS},
+    ffprobe,
+    log::ProgressLogger,
+    process::FfmpegOut,
+    xpsnr::{self, XpsnrOut},
+};
+use anyhow::Context;
+use clap::Parser;
+use indicatif::{ProgressBar, ProgressStyle};
+use std::{
+    borrow::Cow,
+    path::PathBuf,
+    pin::pin,
+    sync::LazyLock,
+    time::{Duration, Instant},
+};
+use tokio_stream::StreamExt;
+
+/// Full XPSNR score calculation, distorted file vs reference file.
+/// Works with videos and images.
+#[derive(Parser)]
+#[clap(verbatim_doc_comment)]
+#[group(skip)]
+pub struct Args {
+    /// Reference video file.
+    #[arg(long)]
+    pub reference: PathBuf,
+
+    /// Re-encoded/distorted video file.
+    #[arg(long)]
+    pub distorted: PathBuf,
+
+    #[clap(flatten)]
+    pub score: args::ScoreArgs,
+}
+
+pub async fn xpsnr(
+    Args {
+        reference,
+        distorted,
+        score,
+    }: Args,
+) -> anyhow::Result<()> {
+    let bar = ProgressBar::new(1).with_style(
+        ProgressStyle::default_bar()
+            .template("{spinner:.cyan.bold} {elapsed_precise:.bold} {wide_bar:.cyan/blue} ({msg}eta {eta})")?
+            .progress_chars(PROGRESS_CHARS)
+    );
+    bar.enable_steady_tick(Duration::from_millis(100));
+    bar.set_message("xpsnr running, ");
+
+    let dprobe = ffprobe::probe(&distorted);
+    let rprobe = LazyLock::new(|| ffprobe::probe(&reference));
+    let nframes = dprobe.nframes().or_else(|_| rprobe.nframes());
+    let duration = dprobe
+        .duration
+        .as_ref()
+        .or_else(|_| rprobe.duration.as_ref());
+    if let Ok(nframes) = nframes {
+        bar.set_length(nframes);
+    }
+
+    let mut xpsnr_out = pin!(xpsnr::run(
+        &reference,
+        &distorted,
+        &lavfi(score.reference_vfilter.as_deref()),
+    )?);
+    let mut logger = ProgressLogger::new(module_path!(), Instant::now());
+    let mut score = None;
+    while let Some(next) = xpsnr_out.next().await {
+        match next {
+            XpsnrOut::Done(s) => {
+                score = Some(s);
+                break;
+            }
+            XpsnrOut::Progress(FfmpegOut::Progress {
+                frame, fps, time, ..
+            }) => {
+                if fps > 0.0 {
+                    bar.set_message(format!("xpsnr {fps} fps, "));
+                }
+                if nframes.is_ok() {
+                    bar.set_position(frame);
+                }
+                if let Ok(total) = duration {
+                    logger.update(*total, time, fps);
+                }
+            }
+            XpsnrOut::Progress(FfmpegOut::StreamSizes { .. }) => {}
+            XpsnrOut::Err(e) => return Err(e),
+        }
+    }
+    bar.finish();
+
+    println!("{}", score.context("no xpsnr score")?);
+    Ok(())
+}
+
+pub fn lavfi(ref_vfilter: Option<&str>) -> Cow<'static, str> {
+    match ref_vfilter {
+        None => "xpsnr=stats_file=-".into(),
+        Some(vf) => format!("[1:v]{vf}[ref];[0:v][ref]xpsnr=stats_file=-").into(),
+    }
+}
+
+#[test]
+fn test_lavfi_default() {
+    assert_eq!(lavfi(None), "xpsnr=stats_file=-");
+}
+
+#[test]
+fn test_lavfi_ref_vfilter() {
+    assert_eq!(
+        lavfi(Some("scale=1280:-1")),
+        "[1:v]scale=1280:-1[ref];\
+         [0:v][ref]xpsnr=stats_file=-"
+    );
+}
diff --git a/src/main.rs b/src/main.rs
index 4303902..bfaff34 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -8,6 +8,7 @@ mod process;
 mod sample;
 mod temporary;
 mod vmaf;
+mod xpsnr;
 
 use ::log::LevelFilter;
 use anyhow::anyhow;
@@ -21,6 +22,7 @@ use tokio::signal;
 enum Command {
     SampleEncode(command::sample_encode::Args),
     Vmaf(command::vmaf::Args),
+    Xpsnr(command::xpsnr::Args),
     Encode(command::encode::Args),
     CrfSearch(command::crf_search::Args),
     AutoEncode(command::auto_encode::Args),
@@ -47,6 +49,7 @@ async fn main() {
     let command = local.run_until(match action {
         Command::SampleEncode(args) => command::sample_encode(args).boxed_local(),
         Command::Vmaf(args) => command::vmaf(args).boxed_local(),
+        Command::Xpsnr(args) => command::xpsnr(args).boxed_local(),
         Command::Encode(args) => command::encode(args).boxed_local(),
         Command::CrfSearch(args) => command::crf_search(args).boxed_local(),
         Command::AutoEncode(args) => command::auto_encode(args).boxed_local(),
diff --git a/src/process.rs b/src/process.rs
index 9a3ab14..8a9244a 100644
--- a/src/process.rs
+++ b/src/process.rs
@@ -207,14 +207,18 @@ impl Chunks {
     }
 
     pub fn rfind_line(&self, predicate: impl Fn(&str) -> bool) -> Option<&str> {
+        self.rfind_line_map(|line| predicate(line).then_some(line))
+    }
+
+    pub fn rfind_line_map<'a, T>(&'a self, f: impl Fn(&'a str) -> Option<T>) -> Option<T> {
         let lines = self
             .out
             .rsplit(|b| *b == b'\n')
             .flat_map(|l| l.rsplit(|b| *b == b'\r'));
         for line in lines {
             if let Ok(line) = std::str::from_utf8(line) {
-                if predicate(line) {
-                    return Some(line);
+                if let Some(out) = f(line) {
+                    return Some(out);
                 }
             }
         }
diff --git a/src/vmaf.rs b/src/vmaf.rs
index b7240fc..bb0afd2 100644
--- a/src/vmaf.rs
+++ b/src/vmaf.rs
@@ -7,8 +7,7 @@ use tokio::process::Command;
 use tokio_process_stream::{Item, ProcessChunkStream};
 use tokio_stream::{Stream, StreamExt};
 
-/// Calculate VMAF score by converting the original first to yuv.
-/// This can produce more accurate results than testing directly from original source.
+/// Calculate VMAF score using ffmpeg.
 pub fn run(
     reference: &Path,
     distorted: &Path,
@@ -33,7 +32,7 @@ pub fn run(
 
     let cmd_str = cmd.to_cmd_str();
     debug!("cmd `{cmd_str}`");
-    let mut vmaf: ProcessChunkStream = cmd.try_into().context("ffmpeg vmaf")?;
+    let mut vmaf = ProcessChunkStream::try_from(cmd).context("ffmpeg vmaf")?;
 
     Ok(async_stream::stream! {
         let mut chunks = Chunks::default();
@@ -75,14 +74,14 @@ pub enum VmafOut {
 
 impl VmafOut {
     fn try_from_chunk(chunk: &[u8], chunks: &mut Chunks) -> Option<Self> {
-        const VMAF_SCORE_PRE: &str = "VMAF score: ";
+        const SCORE_PREFIX: &str = "VMAF score: ";
 
         chunks.push(chunk);
 
-        if let Some(line) = chunks.rfind_line(|l| l.contains(VMAF_SCORE_PRE)) {
-            let idx = line.find(VMAF_SCORE_PRE).unwrap();
+        if let Some(line) = chunks.rfind_line(|l| l.contains(SCORE_PREFIX)) {
+            let idx = line.find(SCORE_PREFIX).unwrap();
             return Some(Self::Done(
-                line[idx + VMAF_SCORE_PRE.len()..].trim().parse().ok()?,
+                line[idx + SCORE_PREFIX.len()..].trim().parse().ok()?,
             ));
         }
         if let Some(progress) = FfmpegOut::try_parse(chunks.last_line()) {
diff --git a/src/xpsnr.rs b/src/xpsnr.rs
new file mode 100644
index 0000000..14e5db8
--- /dev/null
+++ b/src/xpsnr.rs
@@ -0,0 +1,230 @@
+//! xpsnr logic
+use crate::process::{cmd_err, exit_ok_stderr, Chunks, CommandExt, FfmpegOut};
+use anyhow::Context;
+use log::{debug, info};
+use std::{path::Path, process::Stdio};
+use tokio::process::Command;
+use tokio_process_stream::{Item, ProcessChunkStream};
+use tokio_stream::{Stream, StreamExt};
+
+/// Calculate XPSNR score using ffmpeg.
+pub fn run(
+    reference: &Path,
+    distorted: &Path,
+    filter_complex: &str,
+) -> anyhow::Result<impl Stream<Item = XpsnrOut>> {
+    info!(
+        "xpsnr {} vs reference {}",
+        distorted.file_name().and_then(|n| n.to_str()).unwrap_or(""),
+        reference.file_name().and_then(|n| n.to_str()).unwrap_or(""),
+    );
+
+    let mut cmd = Command::new("ffmpeg");
+    cmd.arg2("-i", reference)
+        .arg2("-i", distorted)
+        .arg2("-filter_complex", filter_complex)
+        .arg2("-f", "null")
+        .arg("-")
+        .stdin(Stdio::null());
+
+    let cmd_str = cmd.to_cmd_str();
+    debug!("cmd `{cmd_str}`");
+    let mut xpsnr = ProcessChunkStream::try_from(cmd).context("ffmpeg xpsnr")?;
+
+    Ok(async_stream::stream! {
+        let mut chunks = Chunks::default();
+        let mut parsed_done = false;
+        while let Some(next) = xpsnr.next().await {
+            match next {
+                Item::Stderr(chunk) => {
+                    if let Some(out) = XpsnrOut::try_from_chunk(&chunk, &mut chunks) {
+                        if matches!(out, XpsnrOut::Done(_)) {
+                            parsed_done = true;
+                        }
+                        yield out;
+                    }
+                }
+                Item::Stdout(_) => {}
+                Item::Done(code) => {
+                    if let Err(err) = exit_ok_stderr("ffmpeg xpsnr", code, &cmd_str, &chunks) {
+                        yield XpsnrOut::Err(err);
+                    }
+                }
+            }
+        }
+        if !parsed_done {
+            yield XpsnrOut::Err(cmd_err(
+                "could not parse ffmpeg xpsnr score",
+                &cmd_str,
+                &chunks,
+            ));
+        }
+    })
+}
+
+#[derive(Debug)]
+pub enum XpsnrOut {
+    Progress(FfmpegOut),
+    Done(f32),
+    Err(anyhow::Error),
+}
+
+impl XpsnrOut {
+    fn try_from_chunk(chunk: &[u8], chunks: &mut Chunks) -> Option<Self> {
+        chunks.push(chunk);
+
+        if let Some(score) = chunks.rfind_line_map(score_from_line) {
+            return Some(Self::Done(score));
+        }
+        if let Some(progress) = FfmpegOut::try_parse(chunks.last_line()) {
+            return Some(Self::Progress(progress));
+        }
+        None
+    }
+}
+
+// E.g. "[Parsed_xpsnr_0 @ 0x711494004cc0] XPSNR  y: 33.6547  u: 41.8741  v: 42.2571  (minimum: 33.6547)"
+fn score_from_line(line: &str) -> Option<f32> {
+    const MIN_PREFIX: &str = "minimum: ";
+
+    if !line.contains("XPSNR") {
+        return None;
+    }
+
+    let yidx = line.find(MIN_PREFIX)?;
+    let tail = &line[yidx + MIN_PREFIX.len()..];
+    let end_idx = tail
+        .char_indices()
+        .take_while(|(_, c)| *c == '.' || c.is_numeric())
+        .last()?
+        .0;
+    tail[..=end_idx].parse().ok()
+}
+
+#[cfg(test)]
+mod test {
+    use super::*;
+
+    #[test]
+    fn parse_rgb_line() {
+        let score = score_from_line(
+            "XPSNR average, 1 frames  r: 40.6130  g: 41.0275  b: 40.6961  (minimum: 40.6130)",
+        );
+        assert_eq!(score, Some(40.6130));
+    }
+
+    #[test]
+    fn parse_xpsnr_score() {
+        // Note: some lines omitted for brevity
+        const FFMPEG_OUT: &str = r#"Input #0, matroska,webm, from 'tmp.mkv':
+  Metadata:
+    COMPATIBLE_BRANDS: isomiso2avc1mp41
+    MAJOR_BRAND     : isom
+    MINOR_VERSION   : 512
+    ENCODER         : Lavf61.7.100
+  Duration: 00:00:53.77, start: -0.007000, bitrate: 2698 kb/s
+  Stream #0:0(eng): Video: av1 (libdav1d) (Main), yuv420p10le(tv, progressive), 3840x2160, 25 fps, 25 tbr, 1k tbn (default)
+      Metadata:
+        HANDLER_NAME    : ?Mainconcept Video Media Handler
+        VENDOR_ID       : [0][0][0][0]
+        ENCODER         : Lavc61.19.100 libsvtav1
+        DURATION        : 00:00:53.760000000
+  Stream #0:1(eng): Audio: opus, 48000 Hz, stereo, fltp (default)
+      Metadata:
+        title           : Opus 96Kbps
+        HANDLER_NAME    : #Mainconcept MP4 Sound Media Handler
+        VENDOR_ID       : [0][0][0][0]
+        ENCODER         : Lavc61.19.100 libopus
+        DURATION        : 00:00:53.768000000
+Input #1, mov,mp4,m4a,3gp,3g2,mj2, from 'pixabay-lemon-82602.mp4':
+  Metadata:
+    major_brand     : isom
+    minor_version   : 512
+    compatible_brands: isomiso2avc1mp41
+    encoder         : Lavf58.20.100
+  Duration: 00:00:53.76, start: 0.000000, bitrate: 14109 kb/s
+  Stream #1:0[0x1](eng): Video: h264 (High) (avc1 / 0x31637661), yuv420p(progressive), 3840x2160, 14101 kb/s, 25 fps, 25 tbr, 12800 tbn (default)
+      Metadata:
+        handler_name    : ?Mainconcept Video Media Handler
+        vendor_id       : [0][0][0][0]
+  Stream #1:1[0x2](eng): Audio: aac (LC) (mp4a / 0x6134706D), 48000 Hz, stereo, fltp, 2 kb/s (default)
+      Metadata:
+        handler_name    : #Mainconcept MP4 Sound Media Handler
+        vendor_id       : [0][0][0][0]
+Stream mapping:
+  Stream #0:0 (libdav1d) -> xpsnr
+  Stream #1:0 (h264) -> xpsnr
+  xpsnr:default -> Stream #0:0 (wrapped_avframe)
+  Stream #0:1 -> #0:1 (opus (native) -> pcm_s16le (native))
+Press [q] to stop, [?] for help
+[Parsed_xpsnr_0 @ 0x78341c004d00] not matching timebases found between first input: 1/1000 and second input 1/12800, results may be incorrect!
+Output #0, null, to 'pipe:':
+  Metadata:
+    COMPATIBLE_BRANDS: isomiso2avc1mp41
+    MAJOR_BRAND     : isom
+    MINOR_VERSION   : 512
+    encoder         : Lavf61.7.100
+  Stream #0:0: Video: wrapped_avframe, yuv420p10le(tv, progressive), 3840x2160 [SAR 1:1 DAR 16:9], q=2-31, 200 kb/s, 25 fps, 25 tbn
+      Metadata:
+        encoder         : Lavc61.19.100 wrapped_avframe
+  Stream #0:1(eng): Audio: pcm_s16le, 48000 Hz, stereo, s16, 1536 kb/s (default)
+      Metadata:
+        title           : Opus 96Kbps
+        HANDLER_NAME    : #Mainconcept MP4 Sound Media Handler
+        VENDOR_ID       : [0][0][0][0]
+        DURATION        : 00:00:53.768000000
+        encoder         : Lavc61.19.100 pcm_s16le
+frame=    9 fps=0.0 q=-0.0 size=N/A time=00:00:00.32 bitrate=N/A speed=0.64x    
+frame=   28 fps= 28 q=-0.0 size=N/A time=00:00:01.08 bitrate=N/A speed=1.08x    
+frame=   46 fps= 31 q=-0.0 size=N/A time=00:00:01.80 bitrate=N/A speed= 1.2x    
+frame=   65 fps= 32 q=-0.0 size=N/A time=00:00:02.56 bitrate=N/A speed=1.28x    
+n:    1  XPSNR y: 54.5266  XPSNR u: 56.3886  XPSNR v: 58.7794
+n:    2  XPSNR y: 40.6035  XPSNR u: 39.3487  XPSNR v: 42.3634
+n:    3  XPSNR y: 40.9764  XPSNR u: 38.8791  XPSNR v: 41.8961
+n:   64  XPSNR y: 41.0726  XPSNR u: 39.7731  XPSNR v: 42.5210
+n:   65  XPSNR y: 41.3476  XPSNR u: 39.6055  XPSNR v: 42.4262
+n:   66  XPSNR y: 41.1029  XPSNR u: 39.8779  XPSNR v: 42.6400
+frame=   84 fps= 34 q=-0.0 size=N/A time=00:00:03.32 bitrate=N/A speed=1.33x    
+frame=  102 fps= 34 q=-0.0 size=N/A time=00:00:04.04 bitrate=N/A speed=1.35x    
+frame=  120 fps= 34 q=-0.0 size=N/A time=00:00:04.76 bitrate=N/A speed=1.36x    
+n:   67  XPSNR y: 40.9642  XPSNR u: 39.5204  XPSNR v: 42.1316
+n:   68  XPSNR y: 40.2677  XPSNR u: 38.9371  XPSNR v: 41.9560
+n:   69  XPSNR y: 40.6431  XPSNR u: 38.8864  XPSNR v: 41.6902
+n: 1319  XPSNR y: 41.4316  XPSNR u: 40.5146  XPSNR v: 42.1970
+n: 1320  XPSNR y: 41.4623  XPSNR u: 40.5527  XPSNR v: 42.3358
+n: 1321  XPSNR y: 42.5312  XPSNR u: 41.2487  XPSNR v: 42.8495
+frame= 1328 fps= 37 q=-0.0 size=N/A time=00:00:53.08 bitrate=N/A speed=1.47x    
+[Parsed_xpsnr_0 @ 0x78341c004d00] XPSNR  y: 40.7139  u: 39.1440  v: 41.7907  (minimum: 39.1440)
+[out#0/null @ 0x64006e11b1c0] video:578KiB audio:10080KiB subtitle:0KiB other streams:0KiB global headers:0KiB muxing overhead: unknown
+frame= 1344 fps= 37 q=-0.0 Lsize=N/A time=00:00:53.72 bitrate=N/A speed=1.48x    
+n: 1342  XPSNR y: 40.6841  XPSNR u: 39.0209  XPSNR v: 40.9250
+n: 1343  XPSNR y: 41.0269  XPSNR u: 39.2465  XPSNR v: 41.1238
+n: 1344  XPSNR y: 39.8468  XPSNR u: 38.4587  XPSNR v: 40.5844
+
+XPSNR average, 1344 frames  y: 40.7139
+"#;
+
+        const CHUNK_SIZE: usize = 64;
+
+        let ffmpeg = FFMPEG_OUT.as_bytes();
+
+        let mut chunks = Chunks::default();
+        let mut start_idx = 0;
+        let mut xpsnr_score = None;
+        while start_idx < ffmpeg.len() {
+            let chunk = &ffmpeg[start_idx..(start_idx + CHUNK_SIZE).min(FFMPEG_OUT.len())];
+            // println!("* {}", String::from_utf8_lossy(chunk).trim());
+
+            if let Some(xpsnr) = XpsnrOut::try_from_chunk(chunk, &mut chunks) {
+                println!("{xpsnr:?}");
+                if let XpsnrOut::Done(score) = xpsnr {
+                    xpsnr_score = Some(score);
+                }
+            }
+
+            start_idx += CHUNK_SIZE;
+        }
+
+        assert_eq!(xpsnr_score, Some(39.1440), "failed to parse xpsnr score");
+    }
+}