Add XPSNR support as a VMAF alternative (#251)

* Add XPSNR support as a VMAF alternative * Remove some "VMAF" hardcodes * Use minimum xpsnr score * xpsnr support ref-vfilter * remove stats_file quoting causing file creation * parse xpsnr rgb output * reduce ScoreKind specific code * Fix reference, distorted ffmpeg input order! * Add xpsnr command to readme * Update readme and xpsnr docs
alexheretic · Dec 23, 2024 · 3325cda · 3325cda
1 parent 746a0b0
commit 3325cda
Show file tree

Hide file tree

Showing 16 changed files with 734 additions and 207 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,4 +1,8 @@
-# Unreleased (0.8.1)
+# Unreleased (0.9.0)
+* Add XPSNR support as a VMAF alternative.
+  - Add sample-encode `--xpsnr` arg which toggles use of XPSNR instead of VMAF.
+  - Add crf-search, auto-encode `--min-xpsnr` arg _(alternative to `--min-vmaf`)_.
+  - Add `xpsnr` command for measuring XPSNR score.
 * Support negative `--preset` args.
 * Add `--vmaf-fps`: Frame rate override used to analyse both reference & distorted videos.
 * Omit data streams when outputting to matroska (.mkv or .webm).

diff --git a/Cargo.lock b/Cargo.lock
diff --git a/README.md b/README.md
@@ -7,19 +7,19 @@ Uses _ffmpeg_, _svt-av1_ & _vmaf_.
 Also supports other ffmpeg compatible encoders like libx265 & libx264.
 
 ### Command: auto-encode
-Automatically determine the best crf to deliver the min-vmaf and use it to encode a video or image.
+Automatically determine the best crf to deliver the `--min-vmaf` and use it to encode a video or image.
 
 Two phases:
 * [crf-search](#command-crf-search) to determine the best --crf value
 * ffmpeg to encode using the settings
 
 ```
-ab-av1 auto-encode [OPTIONS] -i <INPUT> --preset <PRESET>
+ab-av1 auto-encode [OPTIONS] -i <INPUT> --preset <PRESET> --min-vmaf <MIN_VMAF>
 ```
 
 ### Command: crf-search
 Interpolated binary search using [sample-encode](#command-sample-encode) to find the best 
-crf value delivering **min-vmaf** & **max-encoded-percent**.
+crf value delivering `--min-vmaf` & `--max-encoded-percent`.
 
 Outputs:
 * Best crf value
@@ -28,9 +28,12 @@ Outputs:
 * Predicted full encode time
 
 ```
-ab-av1 crf-search [OPTIONS] -i <INPUT> --preset <PRESET>
+ab-av1 crf-search [OPTIONS] -i <INPUT> --preset <PRESET> --min-vmaf <MIN_VMAF>
 ```
 
+#### Notable options
+* `--min-xpsnr <MIN_XPSNR>` may be used as an alternative to VMAF.
+
 ### Command: sample-encode
 Encode short video samples of an input using provided **crf** & **preset**. 
 This is much quicker than full encode/vmaf run. 
@@ -44,6 +47,9 @@ Outputs:
 ab-av1 sample-encode [OPTIONS] -i <INPUT> --crf <CRF> --preset <PRESET>
 ```
 
+#### Notable options
+* `--xpsnr` specifies calculation of XPSNR score instead of VMAF.
+
 ### Command: encode
 Invoke ffmpeg to encode a video or image.
 
@@ -58,12 +64,19 @@ Works with videos and images.
 * Auto sets model version (4k or 1k) according to resolution.
 * Auto sets _n_threads_ to system threads.
 * Auto upscales lower resolution videos to the model.
-* Converts distorted & reference to appropriate format yuv streams before passing to vmaf.
 
 ```
 ab-av1 vmaf --reference <REFERENCE> --distorted <DISTORTED>
 ```
 
+### Command: xpsnr
+Full XPSNR score calculation, distorted file vs reference file.
+Works with videos and images.
+
+```
+ab-av1 xpsnr --reference <REFERENCE> --distorted <DISTORTED>
+```
+
 ## Install
 ### Arch Linux
 Available in the [AUR](https://aur.archlinux.org/packages/ab-av1).

diff --git a/src/command.rs b/src/command.rs
@@ -5,13 +5,15 @@ pub mod encode;
 pub mod print_completions;
 pub mod sample_encode;
 pub mod vmaf;
+pub mod xpsnr;
 
 pub use auto_encode::auto_encode;
 pub use crf_search::crf_search;
 pub use encode::encode;
 pub use print_completions::print_completions;
 pub use sample_encode::sample_encode;
 pub use vmaf::vmaf;
+pub use xpsnr::xpsnr;
 
 const PROGRESS_CHARS: &str = "##-";
 

diff --git a/src/command/args.rs b/src/command/args.rs
@@ -103,3 +103,14 @@ impl Sample {
         self.extension = output.extension().and_then(|e| e.to_str().map(Into::into));
     }
 }
+
+/// Args for when VMAF/XPSNR are used to score ref vs distorted.
+#[derive(Debug, Parser, Clone, Hash)]
+pub struct ScoreArgs {
+    /// Ffmpeg video filter applied to the VMAF/XPSNR reference before analysis.
+    /// E.g. --reference-vfilter "scale=1280:-1,fps=24".
+    ///
+    /// Overrides --vfilter which would otherwise be used.
+    #[arg(long)]
+    pub reference_vfilter: Option<Arc<str>>,
+}
diff --git a/src/command/args/vmaf.rs b/src/command/args/vmaf.rs
@@ -40,21 +40,13 @@ pub struct Vmaf {
     /// By default no override is set.
     #[arg(long)]
     pub vmaf_fps: Option<f32>,
-
-    /// Ffmpeg video filter applied to the VMAF reference before analysis.
-    /// E.g. --reference-vfilter "scale=1280:-1,fps=24".
-    ///
-    /// Overrides --vfilter which would otherwise be used.
-    #[arg(long)]
-    pub reference_vfilter: Option<String>,
 }
 
 impl std::hash::Hash for Vmaf {
     fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
         self.vmaf_args.hash(state);
         self.vmaf_scale.hash(state);
         self.vmaf_fps.map(|f| f.to_ne_bytes()).hash(state);
-        self.reference_vfilter.hash(state);
     }
 }
 
@@ -63,19 +55,7 @@ fn parse_vmaf_arg(arg: &str) -> anyhow::Result<Arc<str>> {
 }
 
 impl Vmaf {
-    pub fn is_default(&self) -> bool {
-        let Self {
-            vmaf_args,
-            vmaf_scale,
-            vmaf_fps: _,
-            reference_vfilter,
-        } = self;
-        vmaf_args.is_empty() && *vmaf_scale == VmafScale::Auto && reference_vfilter.is_none()
-    }
-
     /// Returns ffmpeg `filter_complex`/`lavfi` value for calculating vmaf.
-    ///
-    /// Note `ref_vfilter` is ignored if `Self::reference_vfilter` is some.
     pub fn ffmpeg_lavfi(
         &self,
         distorted_res: Option<(u32, u32)>,
@@ -105,7 +85,7 @@ impl Vmaf {
             }
         }
 
-        let ref_vf: Cow<_> = match self.reference_vfilter.as_deref().or(ref_vfilter) {
+        let ref_vf: Cow<_> = match ref_vfilter {
             None => "".into(),
             Some(vf) if vf.ends_with(',') => vf.into(),
             Some(vf) => format!("{vf},").into(),
@@ -241,26 +221,6 @@ fn vmaf_lavfi() {
     );
 }
 
-#[test]
-fn vmaf_lavfi_override_reference_vfilter() {
-    let vmaf = Vmaf {
-        vmaf_args: vec!["n_threads=5".into(), "n_subsample=4".into()],
-        vmaf_scale: VmafScale::Auto,
-        vmaf_fps: None,
-        reference_vfilter: Some("scale=2560:-1".into()),
-    };
-    assert_eq!(
-        vmaf.ffmpeg_lavfi(
-            None,
-            PixelFormat::Yuv420p,
-            Some("scale_vaapi=w=2560:h=1280")
-        ),
-        "[0:v]format=yuv420p,setpts=PTS-STARTPTS,settb=AVTB[dis];\
-         [1:v]format=yuv420p,scale=2560:-1,setpts=PTS-STARTPTS,settb=AVTB[ref];\
-         [dis][ref]libvmaf=shortest=true:ts_sync_mode=nearest:n_threads=5:n_subsample=4"
-    );
-}
-
 #[test]
 fn vmaf_lavfi_default() {
     let vmaf = Vmaf::default();

diff --git a/src/command/auto_encode.rs b/src/command/auto_encode.rs
@@ -68,7 +68,7 @@ pub async fn auto_encode(Args { mut search, encode }: Args) -> anyhow::Result<()
         bar.println(style!("Encoding {out}").dim().to_string());
     }
 
-    let min_vmaf = search.min_vmaf;
+    let min_score = search.min_score();
     let max_encoded_percent = search.max_encoded_percent;
     let enc_args = search.args.clone();
     let thorough = search.thorough;
@@ -86,15 +86,16 @@ pub async fn auto_encode(Args { mut search, encode }: Args) -> anyhow::Result<()
                             .template(SPINNER_FINISHED)?
                             .progress_chars(PROGRESS_CHARS),
                     );
-                    let mut vmaf = style(last.enc.vmaf);
-                    if last.enc.vmaf < min_vmaf {
+                    let mut vmaf = style(last.enc.score);
+                    if last.enc.score < min_score {
                         vmaf = vmaf.red();
                     }
                     let mut percent = style!("{:.0}%", last.enc.encode_percent);
                     if last.enc.encode_percent > max_encoded_percent as _ {
                         percent = percent.red();
                     }
-                    bar.finish_with_message(format!("VMAF {vmaf:.2}, size {percent}"));
+                    let score_kind = last.enc.score_kind;
+                    bar.finish_with_message(format!("{score_kind} {vmaf:.2}, size {percent}"));
                 }
                 bar.finish();
                 return Err(err.into());
@@ -118,11 +119,11 @@ pub async fn auto_encode(Args { mut search, encode }: Args) -> anyhow::Result<()
                     true => bar.set_prefix(format!("crf {crf} full pass")),
                     false => bar.set_prefix(format!("crf {crf} {sample}/{samples}")),
                 }
+                let label = work.fps_label();
                 match work {
                     Work::Encode if fps <= 0.0 => bar.set_message("encoding,  "),
-                    Work::Encode => bar.set_message(format!("enc {fps} fps, ")),
-                    Work::Vmaf if fps <= 0.0 => bar.set_message("vmaf,       "),
-                    Work::Vmaf => bar.set_message(format!("vmaf {fps} fps, ")),
+                    _ if fps <= 0.0 => bar.set_message(format!("{label},       ")),
+                    _ => bar.set_message(format!("{label} {fps} fps, ")),
                 }
             }
             Ok(crf_search::Update::SampleResult {
@@ -142,7 +143,7 @@ pub async fn auto_encode(Args { mut search, encode }: Args) -> anyhow::Result<()
                     .log_level()
                     .is_some_and(|lvl| lvl > log::Level::Error)
                 {
-                    result.print_attempt(&bar, min_vmaf, max_encoded_percent)
+                    result.print_attempt(&bar, min_score, max_encoded_percent)
                 }
             }
             Ok(crf_search::Update::Done(result)) => best = Some(result),
@@ -156,8 +157,9 @@ pub async fn auto_encode(Args { mut search, encode }: Args) -> anyhow::Result<()
             .progress_chars(PROGRESS_CHARS),
     );
     bar.finish_with_message(format!(
-        "VMAF {:.2}, size {}",
-        style(best.enc.vmaf).green(),
+        "{} {:.2}, size {}",
+        best.enc.score_kind,
+        style(best.enc.score).green(),
         style(format!("{:.0}%", best.enc.encode_percent)).green(),
     ));
     temporary::clean_all().await;