From 3325cda83e088768bb7ce6e97c9e2c85f6a2be3f Mon Sep 17 00:00:00 2001 From: Alex Butler Date: Mon, 23 Dec 2024 20:29:57 +0000 Subject: [PATCH] Add XPSNR support as a VMAF alternative (#251) * Add XPSNR support as a VMAF alternative * Remove some "VMAF" hardcodes * Use minimum xpsnr score * xpsnr support ref-vfilter * remove stats_file quoting causing file creation * parse xpsnr rgb output * reduce ScoreKind specific code * Fix reference, distorted ffmpeg input order! * Add xpsnr command to readme * Update readme and xpsnr docs --- CHANGELOG.md | 6 +- Cargo.lock | 4 +- README.md | 23 +- src/command.rs | 2 + src/command/args.rs | 11 + src/command/args/vmaf.rs | 42 +--- src/command/auto_encode.rs | 22 +- src/command/crf_search.rs | 69 ++++-- src/command/sample_encode.rs | 360 ++++++++++++++++++++--------- src/command/sample_encode/cache.rs | 22 +- src/command/vmaf.rs | 7 +- src/command/xpsnr.rs | 119 ++++++++++ src/main.rs | 3 + src/process.rs | 8 +- src/vmaf.rs | 13 +- src/xpsnr.rs | 230 ++++++++++++++++++ 16 files changed, 734 insertions(+), 207 deletions(-) create mode 100644 src/command/xpsnr.rs create mode 100644 src/xpsnr.rs diff --git a/CHANGELOG.md b/CHANGELOG.md index 4db76cb..d9888b8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,8 @@ -# Unreleased (0.8.1) +# Unreleased (0.9.0) +* Add XPSNR support as a VMAF alternative. + - Add sample-encode `--xpsnr` arg which toggles use of XPSNR instead of VMAF. + - Add crf-search, auto-encode `--min-xpsnr` arg _(alternative to `--min-vmaf`)_. + - Add `xpsnr` command for measuring XPSNR score. * Support negative `--preset` args. * Add `--vmaf-fps`: Frame rate override used to analyse both reference & distorted videos. * Omit data streams when outputting to matroska (.mkv or .webm). diff --git a/Cargo.lock b/Cargo.lock index ba5ed22..f140620 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -98,9 +98,9 @@ dependencies = [ [[package]] name = "anyhow" -version = "1.0.94" +version = "1.0.95" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c1fd03a028ef38ba2276dce7e33fcd6369c158a1bca17946c4b1b701891c1ff7" +checksum = "34ac096ce696dc2fcabef30516bb13c0a68a11d30131d3df6f04711467681b04" [[package]] name = "arrayref" diff --git a/README.md b/README.md index e30630a..e316ace 100644 --- a/README.md +++ b/README.md @@ -7,19 +7,19 @@ Uses _ffmpeg_, _svt-av1_ & _vmaf_. Also supports other ffmpeg compatible encoders like libx265 & libx264. ### Command: auto-encode -Automatically determine the best crf to deliver the min-vmaf and use it to encode a video or image. +Automatically determine the best crf to deliver the `--min-vmaf` and use it to encode a video or image. Two phases: * [crf-search](#command-crf-search) to determine the best --crf value * ffmpeg to encode using the settings ``` -ab-av1 auto-encode [OPTIONS] -i --preset +ab-av1 auto-encode [OPTIONS] -i --preset --min-vmaf ``` ### Command: crf-search Interpolated binary search using [sample-encode](#command-sample-encode) to find the best -crf value delivering **min-vmaf** & **max-encoded-percent**. +crf value delivering `--min-vmaf` & `--max-encoded-percent`. Outputs: * Best crf value @@ -28,9 +28,12 @@ Outputs: * Predicted full encode time ``` -ab-av1 crf-search [OPTIONS] -i --preset +ab-av1 crf-search [OPTIONS] -i --preset --min-vmaf ``` +#### Notable options +* `--min-xpsnr ` may be used as an alternative to VMAF. + ### Command: sample-encode Encode short video samples of an input using provided **crf** & **preset**. This is much quicker than full encode/vmaf run. @@ -44,6 +47,9 @@ Outputs: ab-av1 sample-encode [OPTIONS] -i --crf --preset ``` +#### Notable options +* `--xpsnr` specifies calculation of XPSNR score instead of VMAF. + ### Command: encode Invoke ffmpeg to encode a video or image. @@ -58,12 +64,19 @@ Works with videos and images. * Auto sets model version (4k or 1k) according to resolution. * Auto sets _n_threads_ to system threads. * Auto upscales lower resolution videos to the model. -* Converts distorted & reference to appropriate format yuv streams before passing to vmaf. ``` ab-av1 vmaf --reference --distorted ``` +### Command: xpsnr +Full XPSNR score calculation, distorted file vs reference file. +Works with videos and images. + +``` +ab-av1 xpsnr --reference --distorted +``` + ## Install ### Arch Linux Available in the [AUR](https://aur.archlinux.org/packages/ab-av1). diff --git a/src/command.rs b/src/command.rs index 99b930c..9fb6017 100644 --- a/src/command.rs +++ b/src/command.rs @@ -5,6 +5,7 @@ pub mod encode; pub mod print_completions; pub mod sample_encode; pub mod vmaf; +pub mod xpsnr; pub use auto_encode::auto_encode; pub use crf_search::crf_search; @@ -12,6 +13,7 @@ pub use encode::encode; pub use print_completions::print_completions; pub use sample_encode::sample_encode; pub use vmaf::vmaf; +pub use xpsnr::xpsnr; const PROGRESS_CHARS: &str = "##-"; diff --git a/src/command/args.rs b/src/command/args.rs index 411d4f6..d2b1a96 100644 --- a/src/command/args.rs +++ b/src/command/args.rs @@ -103,3 +103,14 @@ impl Sample { self.extension = output.extension().and_then(|e| e.to_str().map(Into::into)); } } + +/// Args for when VMAF/XPSNR are used to score ref vs distorted. +#[derive(Debug, Parser, Clone, Hash)] +pub struct ScoreArgs { + /// Ffmpeg video filter applied to the VMAF/XPSNR reference before analysis. + /// E.g. --reference-vfilter "scale=1280:-1,fps=24". + /// + /// Overrides --vfilter which would otherwise be used. + #[arg(long)] + pub reference_vfilter: Option>, +} diff --git a/src/command/args/vmaf.rs b/src/command/args/vmaf.rs index 1ea4e66..5f9a387 100644 --- a/src/command/args/vmaf.rs +++ b/src/command/args/vmaf.rs @@ -40,13 +40,6 @@ pub struct Vmaf { /// By default no override is set. #[arg(long)] pub vmaf_fps: Option, - - /// Ffmpeg video filter applied to the VMAF reference before analysis. - /// E.g. --reference-vfilter "scale=1280:-1,fps=24". - /// - /// Overrides --vfilter which would otherwise be used. - #[arg(long)] - pub reference_vfilter: Option, } impl std::hash::Hash for Vmaf { @@ -54,7 +47,6 @@ impl std::hash::Hash for Vmaf { self.vmaf_args.hash(state); self.vmaf_scale.hash(state); self.vmaf_fps.map(|f| f.to_ne_bytes()).hash(state); - self.reference_vfilter.hash(state); } } @@ -63,19 +55,7 @@ fn parse_vmaf_arg(arg: &str) -> anyhow::Result> { } impl Vmaf { - pub fn is_default(&self) -> bool { - let Self { - vmaf_args, - vmaf_scale, - vmaf_fps: _, - reference_vfilter, - } = self; - vmaf_args.is_empty() && *vmaf_scale == VmafScale::Auto && reference_vfilter.is_none() - } - /// Returns ffmpeg `filter_complex`/`lavfi` value for calculating vmaf. - /// - /// Note `ref_vfilter` is ignored if `Self::reference_vfilter` is some. pub fn ffmpeg_lavfi( &self, distorted_res: Option<(u32, u32)>, @@ -105,7 +85,7 @@ impl Vmaf { } } - let ref_vf: Cow<_> = match self.reference_vfilter.as_deref().or(ref_vfilter) { + let ref_vf: Cow<_> = match ref_vfilter { None => "".into(), Some(vf) if vf.ends_with(',') => vf.into(), Some(vf) => format!("{vf},").into(), @@ -241,26 +221,6 @@ fn vmaf_lavfi() { ); } -#[test] -fn vmaf_lavfi_override_reference_vfilter() { - let vmaf = Vmaf { - vmaf_args: vec!["n_threads=5".into(), "n_subsample=4".into()], - vmaf_scale: VmafScale::Auto, - vmaf_fps: None, - reference_vfilter: Some("scale=2560:-1".into()), - }; - assert_eq!( - vmaf.ffmpeg_lavfi( - None, - PixelFormat::Yuv420p, - Some("scale_vaapi=w=2560:h=1280") - ), - "[0:v]format=yuv420p,setpts=PTS-STARTPTS,settb=AVTB[dis];\ - [1:v]format=yuv420p,scale=2560:-1,setpts=PTS-STARTPTS,settb=AVTB[ref];\ - [dis][ref]libvmaf=shortest=true:ts_sync_mode=nearest:n_threads=5:n_subsample=4" - ); -} - #[test] fn vmaf_lavfi_default() { let vmaf = Vmaf::default(); diff --git a/src/command/auto_encode.rs b/src/command/auto_encode.rs index 64b7e23..398e83d 100644 --- a/src/command/auto_encode.rs +++ b/src/command/auto_encode.rs @@ -68,7 +68,7 @@ pub async fn auto_encode(Args { mut search, encode }: Args) -> anyhow::Result<() bar.println(style!("Encoding {out}").dim().to_string()); } - let min_vmaf = search.min_vmaf; + let min_score = search.min_score(); let max_encoded_percent = search.max_encoded_percent; let enc_args = search.args.clone(); let thorough = search.thorough; @@ -86,15 +86,16 @@ pub async fn auto_encode(Args { mut search, encode }: Args) -> anyhow::Result<() .template(SPINNER_FINISHED)? .progress_chars(PROGRESS_CHARS), ); - let mut vmaf = style(last.enc.vmaf); - if last.enc.vmaf < min_vmaf { + let mut vmaf = style(last.enc.score); + if last.enc.score < min_score { vmaf = vmaf.red(); } let mut percent = style!("{:.0}%", last.enc.encode_percent); if last.enc.encode_percent > max_encoded_percent as _ { percent = percent.red(); } - bar.finish_with_message(format!("VMAF {vmaf:.2}, size {percent}")); + let score_kind = last.enc.score_kind; + bar.finish_with_message(format!("{score_kind} {vmaf:.2}, size {percent}")); } bar.finish(); return Err(err.into()); @@ -118,11 +119,11 @@ pub async fn auto_encode(Args { mut search, encode }: Args) -> anyhow::Result<() true => bar.set_prefix(format!("crf {crf} full pass")), false => bar.set_prefix(format!("crf {crf} {sample}/{samples}")), } + let label = work.fps_label(); match work { Work::Encode if fps <= 0.0 => bar.set_message("encoding, "), - Work::Encode => bar.set_message(format!("enc {fps} fps, ")), - Work::Vmaf if fps <= 0.0 => bar.set_message("vmaf, "), - Work::Vmaf => bar.set_message(format!("vmaf {fps} fps, ")), + _ if fps <= 0.0 => bar.set_message(format!("{label}, ")), + _ => bar.set_message(format!("{label} {fps} fps, ")), } } Ok(crf_search::Update::SampleResult { @@ -142,7 +143,7 @@ pub async fn auto_encode(Args { mut search, encode }: Args) -> anyhow::Result<() .log_level() .is_some_and(|lvl| lvl > log::Level::Error) { - result.print_attempt(&bar, min_vmaf, max_encoded_percent) + result.print_attempt(&bar, min_score, max_encoded_percent) } } Ok(crf_search::Update::Done(result)) => best = Some(result), @@ -156,8 +157,9 @@ pub async fn auto_encode(Args { mut search, encode }: Args) -> anyhow::Result<() .progress_chars(PROGRESS_CHARS), ); bar.finish_with_message(format!( - "VMAF {:.2}, size {}", - style(best.enc.vmaf).green(), + "{} {:.2}, size {}", + best.enc.score_kind, + style(best.enc.score).green(), style(format!("{:.0}%", best.enc.encode_percent)).green(), )); temporary::clean_all().await; diff --git a/src/command/crf_search.rs b/src/command/crf_search.rs index e20c1f6..25d082a 100644 --- a/src/command/crf_search.rs +++ b/src/command/crf_search.rs @@ -26,6 +26,7 @@ use std::{ }; const BAR_LEN: u64 = 1024 * 1024 * 1024; +const DEFAULT_MIN_VMAF: f32 = 95.0; /// Interpolated binary search using sample-encode to find the best crf /// value delivering min-vmaf & max-encoded-percent. @@ -45,8 +46,16 @@ pub struct Args { pub args: args::Encode, /// Desired min VMAF score to deliver. - #[arg(long, default_value_t = 95.0)] - pub min_vmaf: f32, + /// + /// [default: 95] + #[arg(long, group = "min_score")] + pub min_vmaf: Option, + + /// Desired min XPSNR score to deliver. + /// + /// Enables use of XPSNR for score analysis instead of VMAF. + #[arg(long, group = "min_score")] + pub min_xpsnr: Option, /// Maximum desired encoded size percentage of the input size. #[arg(long, default_value_t = 80.0)] @@ -92,10 +101,19 @@ pub struct Args { #[clap(flatten)] pub vmaf: args::Vmaf, + #[clap(flatten)] + pub score: args::ScoreArgs, + #[command(flatten)] pub verbose: clap_verbosity_flag::Verbosity, } +impl Args { + pub fn min_score(&self) -> f32 { + self.min_vmaf.or(self.min_xpsnr).unwrap_or(DEFAULT_MIN_VMAF) + } +} + pub async fn crf_search(mut args: Args) -> anyhow::Result<()> { let bar = ProgressBar::new(BAR_LEN).with_style( ProgressStyle::default_bar() @@ -109,7 +127,7 @@ pub async fn crf_search(mut args: Args) -> anyhow::Result<()> { args.sample .set_extension_from_input(&args.args.input, &args.args.encoder, &probe); - let min_vmaf = args.min_vmaf; + let min_score = args.min_score(); let max_encoded_percent = args.max_encoded_percent; let thorough = args.thorough; let enc_args = args.args.clone(); @@ -119,7 +137,7 @@ pub async fn crf_search(mut args: Args) -> anyhow::Result<()> { while let Some(update) = run.next().await { let update = update.inspect_err(|e| { if let Error::NoGoodCrf { last } = e { - last.print_attempt(&bar, min_vmaf, max_encoded_percent); + last.print_attempt(&bar, min_score, max_encoded_percent); } })?; match update { @@ -142,11 +160,11 @@ pub async fn crf_search(mut args: Args) -> anyhow::Result<()> { true => bar.set_prefix(format!("crf {crf} full pass")), false => bar.set_prefix(format!("crf {crf} {sample}/{samples}")), } + let label = work.fps_label(); match work { Work::Encode if fps <= 0.0 => bar.set_message("encoding, "), - Work::Encode => bar.set_message(format!("enc {fps} fps, ")), - Work::Vmaf if fps <= 0.0 => bar.set_message("vmaf, "), - Work::Vmaf => bar.set_message(format!("vmaf {fps} fps, ")), + _ if fps <= 0.0 => bar.set_message(format!("{label}, ")), + _ => bar.set_message(format!("{label} {fps} fps, ")), } } Update::SampleResult { @@ -161,7 +179,7 @@ pub async fn crf_search(mut args: Args) -> anyhow::Result<()> { result.print_attempt(&bar, sample, Some(crf)) } } - Update::RunResult(result) => result.print_attempt(&bar, min_vmaf, max_encoded_percent), + Update::RunResult(result) => result.print_attempt(&bar, min_score, max_encoded_percent), Update::Done(best) => { info!("crf {} successful", best.crf()); bar.finish_with_message(""); @@ -184,6 +202,7 @@ pub fn run( Args { args, min_vmaf, + min_xpsnr, max_encoded_percent, min_crf, max_crf, @@ -192,6 +211,7 @@ pub fn run( sample, cache, vmaf, + score, verbose: _, }: Args, input_probe: Arc, @@ -202,6 +222,8 @@ pub fn run( let default_min_crf = args.encoder.default_min_crf(); let min_crf = min_crf.unwrap_or(default_min_crf); Error::ensure_other(min_crf < max_crf, "Invalid --min-crf & --max-crf")?; + // by default use vmaf 95, otherwise use whatever is specified + let min_score = min_vmaf.or(min_xpsnr).unwrap_or(DEFAULT_MIN_VMAF); // Whether to make the 2nd iteration on the ~20%/~80% crf point instead of the min/max to // improve interpolation by narrowing the crf range a 20% (or 30%) subrange. @@ -228,6 +250,8 @@ pub fn run( cache, stdout_format: sample_encode::StdoutFormat::Json, vmaf: vmaf.clone(), + score: score.clone(), + xpsnr: min_xpsnr.is_some(), }; let mut crf_attempts = Vec::new(); @@ -265,9 +289,9 @@ pub fn run( crf_attempts.push(sample.clone()); let sample_small_enough = sample.enc.encode_percent <= max_encoded_percent as _; - if sample.enc.vmaf > min_vmaf { + if sample.enc.score > min_score { // good - if sample_small_enough && sample.enc.vmaf < min_vmaf + higher_tolerance { + if sample_small_enough && sample.enc.score < min_score + higher_tolerance { yield Update::Done(sample); return; } @@ -283,7 +307,7 @@ pub fn run( return; } Some(upper) => { - q = vmaf_lerp_q(min_vmaf, upper, &sample); + q = vmaf_lerp_q(min_score, upper, &sample); } None if sample.q == max_q => { Error::ensure_or_no_good_crf(sample_small_enough, &sample)?; @@ -314,7 +338,7 @@ pub fn run( return; } Some(lower) => { - q = vmaf_lerp_q(min_vmaf, &sample, lower); + q = vmaf_lerp_q(min_score, &sample, lower); } None if cut_on_iter2 && run == 1 && sample.q > min_q + 1 => { q = (sample.q as f32 * 0.4 + min_q as f32 * 0.6).round() as _; @@ -340,11 +364,11 @@ impl Sample { self.q.to_crf(self.crf_increment) } - pub fn print_attempt(&self, bar: &ProgressBar, min_vmaf: f32, max_encoded_percent: f32) { + pub fn print_attempt(&self, bar: &ProgressBar, min_score: f32, max_encoded_percent: f32) { let crf_label = style("- crf").dim(); let mut crf = style(TerseF32(self.crf())); - let vmaf_label = style("VMAF").dim(); - let mut vmaf = style(self.enc.vmaf); + let vmaf_label = style(self.enc.score_kind).dim(); + let mut vmaf = style(self.enc.score); let mut percent = style!("{:.0}%", self.enc.encode_percent); let open = style("(").dim(); let close = style(")").dim(); @@ -353,7 +377,7 @@ impl Sample { false => style(""), }; - if self.enc.vmaf < min_vmaf { + if self.enc.score < min_score { crf = crf.red().bright(); vmaf = vmaf.red().bright(); } @@ -383,7 +407,8 @@ impl StdoutFormat { Self::Human => { let crf = style(TerseF32(sample.crf())).bold().green(); let enc = &sample.enc; - let vmaf = style(enc.vmaf).bold().green(); + let score = style(enc.score).bold().green(); + let score_kind = enc.score_kind; let size = style(HumanBytes(enc.predicted_encode_size)).bold().green(); let percent = style!("{}%", enc.encode_percent.round()).bold().green(); let time = style(HumanDuration(enc.predicted_encode_time)).bold(); @@ -392,7 +417,7 @@ impl StdoutFormat { false => "video stream", }; println!( - "crf {crf} VMAF {vmaf:.2} predicted {enc_description} size {size} ({percent}) taking {time}" + "crf {crf} {score_kind} {score:.2} predicted {enc_description} size {size} ({percent}) taking {time}" ); } } @@ -412,14 +437,14 @@ impl StdoutFormat { /// This would be helpful particularly for small crf-increments. fn vmaf_lerp_q(min_vmaf: f32, worse_q: &Sample, better_q: &Sample) -> u64 { assert!( - worse_q.enc.vmaf <= min_vmaf - && worse_q.enc.vmaf < better_q.enc.vmaf + worse_q.enc.score <= min_vmaf + && worse_q.enc.score < better_q.enc.score && worse_q.q > better_q.q, "invalid vmaf_lerp_crf usage: ({min_vmaf}, {worse_q:?}, {better_q:?})" ); - let vmaf_diff = better_q.enc.vmaf - worse_q.enc.vmaf; - let vmaf_factor = (min_vmaf - worse_q.enc.vmaf) / vmaf_diff; + let vmaf_diff = better_q.enc.score - worse_q.enc.score; + let vmaf_factor = (min_vmaf - worse_q.enc.score) / vmaf_diff; let q_diff = worse_q.q - better_q.q; let lerp = (worse_q.q as f32 - q_diff as f32 * vmaf_factor).round() as u64; diff --git a/src/command/sample_encode.rs b/src/command/sample_encode.rs index 34f7c90..866de45 100644 --- a/src/command/sample_encode.rs +++ b/src/command/sample_encode.rs @@ -3,6 +3,7 @@ mod cache; use crate::{ command::{ args::{self, PixelFormat}, + sample_encode::cache::ScoringInfo, SmallDuration, PROGRESS_CHARS, }, console_ext::style, @@ -12,6 +13,7 @@ use crate::{ process::FfmpegOut, sample, temporary, vmaf::{self, VmafOut}, + xpsnr::{self, XpsnrOut}, }; use anyhow::{ensure, Context}; use clap::{ArgAction, Parser}; @@ -20,6 +22,7 @@ use futures_util::Stream; use indicatif::{HumanBytes, HumanDuration, ProgressBar, ProgressStyle}; use log::info; use std::{ + fmt::Display, io::{self, IsTerminal}, path::{Path, PathBuf}, pin::pin, @@ -33,7 +36,7 @@ use tokio_stream::StreamExt; /// This is much quicker than a full encode/vmaf run. /// /// Outputs: -/// * Mean sample VMAF score +/// * Mean sample score /// * Predicted full encode size /// * Predicted full encode time #[derive(Parser, Clone)] @@ -65,6 +68,13 @@ pub struct Args { #[clap(flatten)] pub vmaf: args::Vmaf, + + #[clap(flatten)] + pub score: args::ScoreArgs, + + /// Calculate a XPSNR score instead of VMAF. + #[arg(long)] + pub xpsnr: bool, } pub async fn sample_encode(mut args: Args) -> anyhow::Result<()> { @@ -102,11 +112,11 @@ pub async fn sample_encode(mut args: Args) -> anyhow::Result<()> { true => bar.set_prefix("Full pass"), false => bar.set_prefix(format!("Sample {sample}/{samples}")), } + let label = work.fps_label(); match work { Work::Encode if fps <= 0.0 => bar.set_message("encoding, "), - Work::Encode => bar.set_message(format!("enc {fps} fps, ")), - Work::Vmaf if fps <= 0.0 => bar.set_message("vmaf, "), - Work::Vmaf => bar.set_message(format!("vmaf {fps} fps, ")), + _ if fps <= 0.0 => bar.set_message(format!("{label}, ")), + _ => bar.set_message(format!("{label} {fps} fps, ")), } bar.set_position((progress * BAR_LEN_F).round() as _); } @@ -120,13 +130,7 @@ pub async fn sample_encode(mut args: Args) -> anyhow::Result<()> { style(enc_args.encode_hint(crf)).dim().italic(), ); } - stdout_fmt.print_result( - output.vmaf, - output.predicted_encode_size, - output.encode_percent, - output.predicted_encode_time, - input_is_image, - ); + stdout_fmt.print_result(&output, input_is_image); } } } @@ -141,6 +145,8 @@ pub fn run( cache, stdout_format: _, vmaf, + score, + xpsnr, }: Args, input_probe: Arc, ) -> impl Stream> { @@ -155,6 +161,10 @@ pub fn run( let samples = sample_args.sample_count(duration).max(1); let keep = sample_args.keep; let temp_dir = sample_args.temp_dir; + let scoring = match xpsnr { + true => ScoringInfo::Xpsnr(&score), + _ => ScoringInfo::Vmaf(&vmaf, &score), + }; let (samples, sample_duration, full_pass) = { if input_is_image { @@ -232,17 +242,13 @@ pub fn run( input_len, full_pass, &enc_args, - &vmaf, + scoring, ) .await { (Some(result), _) => { if samples > 1 { - info!( - "sample {sample_n}/{samples} crf {crf} VMAF {:.2} ({:.0}%) (cache)", - result.vmaf_score, - 100.0 * result.encoded_size as f32 / sample_size as f32, - ); + result.log_attempt(sample_n, samples, crf); } result } @@ -275,76 +281,132 @@ pub fn run( let encoded_size = fs::metadata(&encoded_sample).await?.len(); let encoded_probe = ffprobe::probe(&encoded_sample); - // calculate vmaf - yield Update::Status(Status { - work: Work::Vmaf, - fps: 0.0, - progress: (sample_idx as f32 + 0.5) / samples as f32, - full_pass, - sample: sample_n, - samples, - }); - let vmaf = vmaf::run( - &sample, - &encoded_sample, - &vmaf.ffmpeg_lavfi( - encoded_probe.resolution, - enc_args - .pix_fmt - .max(input_pixel_format.unwrap_or(PixelFormat::Yuv444p10le)), - args.vfilter.as_deref(), - ), - vmaf.vmaf_fps, - )?; - let mut vmaf = pin!(vmaf); - let mut logger = ProgressLogger::new("ab_av1::vmaf", Instant::now()); - let mut vmaf_score = None; - while let Some(vmaf) = vmaf.next().await { - match vmaf { - VmafOut::Done(score) => { - vmaf_score = Some(score); - break; + let result = match scoring { + ScoringInfo::Vmaf(..) => { + yield Update::Status(Status { + work: Work::Score(ScoreKind::Vmaf), + fps: 0.0, + progress: (sample_idx as f32 + 0.5) / samples as f32, + full_pass, + sample: sample_n, + samples, + }); + let vmaf = vmaf::run( + &sample, + &encoded_sample, + &vmaf.ffmpeg_lavfi( + encoded_probe.resolution, + enc_args + .pix_fmt + .max(input_pixel_format.unwrap_or(PixelFormat::Yuv444p10le)), + score.reference_vfilter.as_deref().or(args.vfilter.as_deref()), + ), + vmaf.vmaf_fps, + )?; + let mut vmaf = pin!(vmaf); + let mut logger = ProgressLogger::new("ab_av1::vmaf", Instant::now()); + let mut vmaf_score = None; + while let Some(vmaf) = vmaf.next().await { + match vmaf { + VmafOut::Done(score) => { + vmaf_score = Some(score); + break; + } + VmafOut::Progress(FfmpegOut::Progress { time, fps, .. }) => { + yield Update::Status(Status { + work: Work::Score(ScoreKind::Vmaf), + fps, + progress: (sample_duration_us + + time.as_micros_u64() + + sample_idx * sample_duration_us * 2) as f32 + / (sample_duration_us * samples * 2) as f32, + full_pass, + sample: sample_n, + samples, + }); + logger.update(sample_duration, time, fps); + } + VmafOut::Progress(_) => {} + VmafOut::Err(e) => Err(e)?, + } } - VmafOut::Progress(FfmpegOut::Progress { time, fps, .. }) => { - yield Update::Status(Status { - work: Work::Vmaf, - fps, - progress: (sample_duration_us + - time.as_micros_u64() + - sample_idx * sample_duration_us * 2) as f32 - / (sample_duration_us * samples * 2) as f32, - full_pass, - sample: sample_n, - samples, - }); - logger.update(sample_duration, time, fps); + + EncodeResult { + score: vmaf_score.context("no vmaf score")?, + score_kind: ScoreKind::Vmaf, + sample_size, + encoded_size, + encode_time, + sample_duration: encoded_probe + .duration + .ok() + .filter(|d| !d.is_zero()) + .unwrap_or(sample_duration), + from_cache: false, } - VmafOut::Progress(_) => {} - VmafOut::Err(e) => Err(e)?, } - } - let vmaf_score = vmaf_score.context("no vmaf score")?; + ScoringInfo::Xpsnr(..) => { + yield Update::Status(Status { + work: Work::Score(ScoreKind::Xpsnr), + fps: 0.0, + progress: (sample_idx as f32 + 0.5) / samples as f32, + full_pass, + sample: sample_n, + samples, + }); - if samples > 1 { - info!( - "sample {sample_n}/{samples} crf {crf} VMAF {vmaf_score:.2} ({:.0}%)", - 100.0 * encoded_size as f32 / sample_size as f32, - ); - } + let lavfi = super::xpsnr::lavfi( + score.reference_vfilter.as_deref().or(args.vfilter.as_deref()) + ); + let xpsnr_out = xpsnr::run(&sample, &encoded_sample, &lavfi)?; + let mut xpsnr_out = pin!(xpsnr_out); + let mut logger = ProgressLogger::new("ab_av1::xpsnr", Instant::now()); + let mut score = None; + while let Some(next) = xpsnr_out.next().await { + match next { + XpsnrOut::Done(s) => { + score = Some(s); + break; + } + XpsnrOut::Progress(FfmpegOut::Progress { time, fps, .. }) => { + yield Update::Status(Status { + work: Work::Score(ScoreKind::Xpsnr), + fps, + progress: (sample_duration_us + + time.as_micros_u64() + + sample_idx * sample_duration_us * 2) as f32 + / (sample_duration_us * samples * 2) as f32, + full_pass, + sample: sample_n, + samples, + }); + logger.update(sample_duration, time, fps); + } + XpsnrOut::Progress(_) => {} + XpsnrOut::Err(e) => Err(e)?, + } + } - let result = EncodeResult { - vmaf_score, - sample_size, - encoded_size, - encode_time, - sample_duration: encoded_probe - .duration - .ok() - .filter(|d| !d.is_zero()) - .unwrap_or(sample_duration), - from_cache: false, + EncodeResult { + score: score.context("no xpsnr score")?, + score_kind: ScoreKind::Xpsnr, + sample_size, + encoded_size, + encode_time, + sample_duration: encoded_probe + .duration + .ok() + .filter(|d| !d.is_zero()) + .unwrap_or(sample_duration), + from_cache: false, + } + } }; + if samples > 1 { + result.log_attempt(sample_n, samples, crf); + } + if let Some(k) = key { cache::cache_result(k, &result).await?; } @@ -363,8 +425,10 @@ pub fn run( yield Update::SampleResult { sample: sample_n, result }; } + let score_kind = results.score_kind(); let output = Output { - vmaf: results.mean_vmaf(), + score: results.mean_score(), + score_kind, // Using file size * encode_percent can over-estimate. However, if it ends up less // than the duration estimation it may turn out to be more accurate. predicted_encode_size: results @@ -375,8 +439,8 @@ pub fn run( from_cache: results.iter().all(|r| r.from_cache), }; info!( - "crf {crf} VMAF {:.2} predicted video stream size {} ({:.0}%) taking {}{}", - output.vmaf, + "crf {crf} {score_kind} {:.2} predicted video stream size {} ({:.0}%) taking {}{}", + output.score, HumanBytes(output.predicted_encode_size), output.encode_percent, HumanDuration(output.predicted_encode_time), @@ -421,7 +485,8 @@ async fn sample( pub struct EncodeResult { pub sample_size: u64, pub encoded_size: u64, - pub vmaf_score: f32, + pub score: f32, + pub score_kind: ScoreKind, pub encode_time: Duration, /// Duration of the sample. /// @@ -436,13 +501,14 @@ impl EncodeResult { let Self { sample_size, encoded_size, - vmaf_score, + score, + score_kind, from_cache, .. } = self; bar.println( style!( - "- {}Sample {sample_n} ({:.0}%) vmaf {vmaf_score:.2}{}", + "- {}Sample {sample_n} ({:.0}%) {score_kind} {score:.2}{}", crf.map(|crf| format!("crf {crf}: ")).unwrap_or_default(), 100.0 * *encoded_size as f32 / *sample_size as f32, if *from_cache { " (cache)" } else { "" }, @@ -451,12 +517,60 @@ impl EncodeResult { .to_string(), ); } + + pub fn log_attempt(&self, sample_n: u64, samples: u64, crf: f32) { + let Self { + sample_size, + encoded_size, + score, + score_kind, + from_cache, + .. + } = self; + info!( + "sample {sample_n}/{samples} crf {crf} {score_kind} {score:.2} ({:.0}%){}", + 100.0 * *encoded_size as f32 / *sample_size as f32, + if *from_cache { " (cache)" } else { "" } + ); + } +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)] +pub enum ScoreKind { + Vmaf, + Xpsnr, +} + +impl ScoreKind { + /// Display label for fps in progress bar. + pub fn fps_label(&self) -> &'static str { + match self { + Self::Vmaf => "vmaf", + Self::Xpsnr => "xpsnr", + } + } + + /// General display name. + pub fn display_str(&self) -> &'static str { + match self { + Self::Vmaf => "VMAF", + Self::Xpsnr => "XPSNR", + } + } +} + +impl Display for ScoreKind { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.write_str(self.display_str()) + } } trait EncodeResults { fn encoded_percent_size(&self) -> f64; - fn mean_vmaf(&self) -> f32; + fn score_kind(&self) -> ScoreKind; + + fn mean_score(&self) -> f32; /// Return estimated encoded **video stream** size by multiplying sample size by duration. fn estimate_encode_size_by_duration( @@ -477,11 +591,17 @@ impl EncodeResults for Vec { encoded * 100.0 / sample } - fn mean_vmaf(&self) -> f32 { + fn score_kind(&self) -> ScoreKind { + self.first() + .map(|r| r.score_kind) + .unwrap_or(ScoreKind::Vmaf) + } + + fn mean_score(&self) -> f32 { if self.is_empty() { return 0.0; } - self.iter().map(|r| r.vmaf_score).sum::() / self.len() as f32 + self.iter().map(|r| r.score).sum::() / self.len() as f32 } fn estimate_encode_size_by_duration( @@ -551,16 +671,27 @@ pub enum StdoutFormat { } impl StdoutFormat { - fn print_result(self, vmaf: f32, size: u64, percent: f64, time: Duration, image: bool) { + fn print_result( + self, + Output { + score, + score_kind, + predicted_encode_size, + encode_percent, + predicted_encode_time, + from_cache: _, + }: &Output, + image: bool, + ) { match self { Self::Human => { - let vmaf = match vmaf { - v if v >= 95.0 => style(v).bold().green(), - v if v < 80.0 => style(v).bold().red(), - v => style(v).bold(), + let score = match (*score, score_kind) { + (v, ScoreKind::Vmaf) if v >= 95.0 => style(v).bold().green(), + (v, ScoreKind::Vmaf) if v < 80.0 => style(v).bold().red(), + (v, _) => style(v).bold(), }; - let percent = percent.round(); - let size = match size { + let percent = encode_percent.round(); + let size = match *predicted_encode_size { v if percent < 80.0 => style(HumanBytes(v)).bold().green(), v if percent >= 100.0 => style(HumanBytes(v)).bold().red(), v => style(HumanBytes(v)).bold(), @@ -570,23 +701,26 @@ impl StdoutFormat { v if v >= 100.0 => style!("{}%", v).bold().red(), v => style!("{}%", v).bold(), }; - let time = style(HumanDuration(time)).bold(); + let time = style(HumanDuration(*predicted_encode_time)).bold(); let enc_description = match image { true => "image", false => "video stream", }; println!( - "VMAF {vmaf:.2} predicted {enc_description} size {size} ({percent}) taking {time}" + "{score_kind} {score:.2} predicted {enc_description} size {size} ({percent}) taking {time}" ); } Self::Json => { - let json = serde_json::json!({ - "vmaf": vmaf, - "predicted_encode_size": size, - "predicted_encode_percent": percent, - "predicted_encode_seconds": time.as_secs(), + let mut json = serde_json::json!({ + "predicted_encode_size": predicted_encode_size, + "predicted_encode_percent": encode_percent, + "predicted_encode_seconds": predicted_encode_time.as_secs(), }); - println!("{}", serde_json::to_string(&json).unwrap()); + match score_kind { + ScoreKind::Vmaf => json["vmaf"] = (*score).into(), + ScoreKind::Xpsnr => json["xpsnr"] = (*score).into(), + } + println!("{json}"); } } } @@ -595,8 +729,9 @@ impl StdoutFormat { /// Sample encode result. #[derive(Debug, Clone)] pub struct Output { - /// Sample mean VMAF score. - pub vmaf: f32, + /// Sample mean score. + pub score: f32, + pub score_kind: ScoreKind, /// Estimated full encoded **video stream** size. /// /// Encoded sample size multiplied by duration. @@ -611,11 +746,22 @@ pub struct Output { pub from_cache: bool, } +/// Kinds of sample-encode work. #[derive(Debug, Default, Clone, Copy, PartialEq, Eq)] pub enum Work { #[default] Encode, - Vmaf, + Score(ScoreKind), +} + +impl Work { + /// Display label for fps in progress bar. + pub fn fps_label(&self) -> &'static str { + match self { + Self::Encode => "enc", + Self::Score(kind) => kind.fps_label(), + } + } } #[derive(Debug)] diff --git a/src/command/sample_encode/cache.rs b/src/command/sample_encode/cache.rs index eecf6c2..d9e32b8 100644 --- a/src/command/sample_encode/cache.rs +++ b/src/command/sample_encode/cache.rs @@ -1,5 +1,8 @@ //! _sample-encode_ file system caching logic. -use crate::{command::args::Vmaf, ffmpeg::FfmpegEncodeArgs}; +use crate::{ + command::args::{ScoreArgs, Vmaf}, + ffmpeg::FfmpegEncodeArgs, +}; use anyhow::Context; use std::{ ffi::OsStr, @@ -18,7 +21,7 @@ pub async fn cached_encode( input_size: u64, full_pass: bool, enc_args: &FfmpegEncodeArgs<'_>, - vmaf_args: &Vmaf, + scoring: ScoringInfo<'_>, ) -> (Option, Option) { if !cache { return (None, None); @@ -36,7 +39,7 @@ pub async fn cached_encode( full_pass, ), enc_args, - vmaf_args, + scoring, ); let key = Key(hash); @@ -65,6 +68,12 @@ pub async fn cached_encode( } } +#[derive(Debug, Hash, Clone, Copy)] +pub enum ScoringInfo<'a> { + Vmaf(&'a Vmaf, &'a ScoreArgs), + Xpsnr(&'a ScoreArgs), +} + pub async fn cache_result(key: Key, result: &super::EncodeResult) -> anyhow::Result<()> { let data = serde_json::to_vec(result)?; let insert = tokio::task::spawn_blocking(move || { @@ -103,16 +112,13 @@ pub struct Key(blake3::Hash); fn hash_encode( input_info: impl Hash, enc_args: &FfmpegEncodeArgs<'_>, - vmaf_args: &Vmaf, + scoring_info: impl Hash, ) -> blake3::Hash { let mut hasher = blake3::Hasher::new(); let mut std_hasher = BlakeStdHasher(&mut hasher); input_info.hash(&mut std_hasher); enc_args.sample_encode_hash(&mut std_hasher); - if !vmaf_args.is_default() { - // avoid hashing if default for back compat - vmaf_args.hash(&mut std_hasher); - } + scoring_info.hash(&mut std_hasher); hasher.finalize() } diff --git a/src/command/vmaf.rs b/src/command/vmaf.rs index 82e9acb..92ae281 100644 --- a/src/command/vmaf.rs +++ b/src/command/vmaf.rs @@ -24,7 +24,6 @@ use tokio_stream::StreamExt; /// * Auto sets model version (4k or 1k) according to resolution. /// * Auto sets `n_threads` to system threads. /// * Auto upscales lower resolution videos to the model. -/// * Converts distorted & reference to appropriate format yuv streams before passing to vmaf. #[derive(Parser)] #[clap(verbatim_doc_comment)] #[group(skip)] @@ -39,6 +38,9 @@ pub struct Args { #[clap(flatten)] pub vmaf: args::Vmaf, + + #[clap(flatten)] + pub score: args::ScoreArgs, } pub async fn vmaf( @@ -46,6 +48,7 @@ pub async fn vmaf( reference, distorted, vmaf, + score, }: Args, ) -> anyhow::Result<()> { let bar = ProgressBar::new(1).with_style( @@ -72,7 +75,7 @@ pub async fn vmaf( &vmaf.ffmpeg_lavfi( dprobe.resolution, dpix_fmt.max(rpix_fmt), - vmaf.reference_vfilter.as_deref(), + score.reference_vfilter.as_deref(), ), vmaf.vmaf_fps, )?); diff --git a/src/command/xpsnr.rs b/src/command/xpsnr.rs new file mode 100644 index 0000000..37f1a10 --- /dev/null +++ b/src/command/xpsnr.rs @@ -0,0 +1,119 @@ +use crate::{ + command::{args, PROGRESS_CHARS}, + ffprobe, + log::ProgressLogger, + process::FfmpegOut, + xpsnr::{self, XpsnrOut}, +}; +use anyhow::Context; +use clap::Parser; +use indicatif::{ProgressBar, ProgressStyle}; +use std::{ + borrow::Cow, + path::PathBuf, + pin::pin, + sync::LazyLock, + time::{Duration, Instant}, +}; +use tokio_stream::StreamExt; + +/// Full XPSNR score calculation, distorted file vs reference file. +/// Works with videos and images. +#[derive(Parser)] +#[clap(verbatim_doc_comment)] +#[group(skip)] +pub struct Args { + /// Reference video file. + #[arg(long)] + pub reference: PathBuf, + + /// Re-encoded/distorted video file. + #[arg(long)] + pub distorted: PathBuf, + + #[clap(flatten)] + pub score: args::ScoreArgs, +} + +pub async fn xpsnr( + Args { + reference, + distorted, + score, + }: Args, +) -> anyhow::Result<()> { + let bar = ProgressBar::new(1).with_style( + ProgressStyle::default_bar() + .template("{spinner:.cyan.bold} {elapsed_precise:.bold} {wide_bar:.cyan/blue} ({msg}eta {eta})")? + .progress_chars(PROGRESS_CHARS) + ); + bar.enable_steady_tick(Duration::from_millis(100)); + bar.set_message("xpsnr running, "); + + let dprobe = ffprobe::probe(&distorted); + let rprobe = LazyLock::new(|| ffprobe::probe(&reference)); + let nframes = dprobe.nframes().or_else(|_| rprobe.nframes()); + let duration = dprobe + .duration + .as_ref() + .or_else(|_| rprobe.duration.as_ref()); + if let Ok(nframes) = nframes { + bar.set_length(nframes); + } + + let mut xpsnr_out = pin!(xpsnr::run( + &reference, + &distorted, + &lavfi(score.reference_vfilter.as_deref()), + )?); + let mut logger = ProgressLogger::new(module_path!(), Instant::now()); + let mut score = None; + while let Some(next) = xpsnr_out.next().await { + match next { + XpsnrOut::Done(s) => { + score = Some(s); + break; + } + XpsnrOut::Progress(FfmpegOut::Progress { + frame, fps, time, .. + }) => { + if fps > 0.0 { + bar.set_message(format!("xpsnr {fps} fps, ")); + } + if nframes.is_ok() { + bar.set_position(frame); + } + if let Ok(total) = duration { + logger.update(*total, time, fps); + } + } + XpsnrOut::Progress(FfmpegOut::StreamSizes { .. }) => {} + XpsnrOut::Err(e) => return Err(e), + } + } + bar.finish(); + + println!("{}", score.context("no xpsnr score")?); + Ok(()) +} + +pub fn lavfi(ref_vfilter: Option<&str>) -> Cow<'static, str> { + match ref_vfilter { + None => "xpsnr=stats_file=-".into(), + Some(vf) => format!("[1:v]{vf}[ref];[0:v][ref]xpsnr=stats_file=-").into(), + } +} + +#[test] +fn test_lavfi_default() { + assert_eq!(lavfi(None), "xpsnr=stats_file=-"); +} + +#[test] +fn test_lavfi_ref_vfilter() { + assert_eq!( + lavfi(Some("scale=1280:-1")), + "[1:v]scale=1280:-1[ref];\ + [0:v][ref]xpsnr=stats_file=-" + ); +} diff --git a/src/main.rs b/src/main.rs index 4303902..bfaff34 100644 --- a/src/main.rs +++ b/src/main.rs @@ -8,6 +8,7 @@ mod process; mod sample; mod temporary; mod vmaf; +mod xpsnr; use ::log::LevelFilter; use anyhow::anyhow; @@ -21,6 +22,7 @@ use tokio::signal; enum Command { SampleEncode(command::sample_encode::Args), Vmaf(command::vmaf::Args), + Xpsnr(command::xpsnr::Args), Encode(command::encode::Args), CrfSearch(command::crf_search::Args), AutoEncode(command::auto_encode::Args), @@ -47,6 +49,7 @@ async fn main() { let command = local.run_until(match action { Command::SampleEncode(args) => command::sample_encode(args).boxed_local(), Command::Vmaf(args) => command::vmaf(args).boxed_local(), + Command::Xpsnr(args) => command::xpsnr(args).boxed_local(), Command::Encode(args) => command::encode(args).boxed_local(), Command::CrfSearch(args) => command::crf_search(args).boxed_local(), Command::AutoEncode(args) => command::auto_encode(args).boxed_local(), diff --git a/src/process.rs b/src/process.rs index 9a3ab14..8a9244a 100644 --- a/src/process.rs +++ b/src/process.rs @@ -207,14 +207,18 @@ impl Chunks { } pub fn rfind_line(&self, predicate: impl Fn(&str) -> bool) -> Option<&str> { + self.rfind_line_map(|line| predicate(line).then_some(line)) + } + + pub fn rfind_line_map<'a, T>(&'a self, f: impl Fn(&'a str) -> Option) -> Option { let lines = self .out .rsplit(|b| *b == b'\n') .flat_map(|l| l.rsplit(|b| *b == b'\r')); for line in lines { if let Ok(line) = std::str::from_utf8(line) { - if predicate(line) { - return Some(line); + if let Some(out) = f(line) { + return Some(out); } } } diff --git a/src/vmaf.rs b/src/vmaf.rs index b7240fc..bb0afd2 100644 --- a/src/vmaf.rs +++ b/src/vmaf.rs @@ -7,8 +7,7 @@ use tokio::process::Command; use tokio_process_stream::{Item, ProcessChunkStream}; use tokio_stream::{Stream, StreamExt}; -/// Calculate VMAF score by converting the original first to yuv. -/// This can produce more accurate results than testing directly from original source. +/// Calculate VMAF score using ffmpeg. pub fn run( reference: &Path, distorted: &Path, @@ -33,7 +32,7 @@ pub fn run( let cmd_str = cmd.to_cmd_str(); debug!("cmd `{cmd_str}`"); - let mut vmaf: ProcessChunkStream = cmd.try_into().context("ffmpeg vmaf")?; + let mut vmaf = ProcessChunkStream::try_from(cmd).context("ffmpeg vmaf")?; Ok(async_stream::stream! { let mut chunks = Chunks::default(); @@ -75,14 +74,14 @@ pub enum VmafOut { impl VmafOut { fn try_from_chunk(chunk: &[u8], chunks: &mut Chunks) -> Option { - const VMAF_SCORE_PRE: &str = "VMAF score: "; + const SCORE_PREFIX: &str = "VMAF score: "; chunks.push(chunk); - if let Some(line) = chunks.rfind_line(|l| l.contains(VMAF_SCORE_PRE)) { - let idx = line.find(VMAF_SCORE_PRE).unwrap(); + if let Some(line) = chunks.rfind_line(|l| l.contains(SCORE_PREFIX)) { + let idx = line.find(SCORE_PREFIX).unwrap(); return Some(Self::Done( - line[idx + VMAF_SCORE_PRE.len()..].trim().parse().ok()?, + line[idx + SCORE_PREFIX.len()..].trim().parse().ok()?, )); } if let Some(progress) = FfmpegOut::try_parse(chunks.last_line()) { diff --git a/src/xpsnr.rs b/src/xpsnr.rs new file mode 100644 index 0000000..14e5db8 --- /dev/null +++ b/src/xpsnr.rs @@ -0,0 +1,230 @@ +//! xpsnr logic +use crate::process::{cmd_err, exit_ok_stderr, Chunks, CommandExt, FfmpegOut}; +use anyhow::Context; +use log::{debug, info}; +use std::{path::Path, process::Stdio}; +use tokio::process::Command; +use tokio_process_stream::{Item, ProcessChunkStream}; +use tokio_stream::{Stream, StreamExt}; + +/// Calculate XPSNR score using ffmpeg. +pub fn run( + reference: &Path, + distorted: &Path, + filter_complex: &str, +) -> anyhow::Result> { + info!( + "xpsnr {} vs reference {}", + distorted.file_name().and_then(|n| n.to_str()).unwrap_or(""), + reference.file_name().and_then(|n| n.to_str()).unwrap_or(""), + ); + + let mut cmd = Command::new("ffmpeg"); + cmd.arg2("-i", reference) + .arg2("-i", distorted) + .arg2("-filter_complex", filter_complex) + .arg2("-f", "null") + .arg("-") + .stdin(Stdio::null()); + + let cmd_str = cmd.to_cmd_str(); + debug!("cmd `{cmd_str}`"); + let mut xpsnr = ProcessChunkStream::try_from(cmd).context("ffmpeg xpsnr")?; + + Ok(async_stream::stream! { + let mut chunks = Chunks::default(); + let mut parsed_done = false; + while let Some(next) = xpsnr.next().await { + match next { + Item::Stderr(chunk) => { + if let Some(out) = XpsnrOut::try_from_chunk(&chunk, &mut chunks) { + if matches!(out, XpsnrOut::Done(_)) { + parsed_done = true; + } + yield out; + } + } + Item::Stdout(_) => {} + Item::Done(code) => { + if let Err(err) = exit_ok_stderr("ffmpeg xpsnr", code, &cmd_str, &chunks) { + yield XpsnrOut::Err(err); + } + } + } + } + if !parsed_done { + yield XpsnrOut::Err(cmd_err( + "could not parse ffmpeg xpsnr score", + &cmd_str, + &chunks, + )); + } + }) +} + +#[derive(Debug)] +pub enum XpsnrOut { + Progress(FfmpegOut), + Done(f32), + Err(anyhow::Error), +} + +impl XpsnrOut { + fn try_from_chunk(chunk: &[u8], chunks: &mut Chunks) -> Option { + chunks.push(chunk); + + if let Some(score) = chunks.rfind_line_map(score_from_line) { + return Some(Self::Done(score)); + } + if let Some(progress) = FfmpegOut::try_parse(chunks.last_line()) { + return Some(Self::Progress(progress)); + } + None + } +} + +// E.g. "[Parsed_xpsnr_0 @ 0x711494004cc0] XPSNR y: 33.6547 u: 41.8741 v: 42.2571 (minimum: 33.6547)" +fn score_from_line(line: &str) -> Option { + const MIN_PREFIX: &str = "minimum: "; + + if !line.contains("XPSNR") { + return None; + } + + let yidx = line.find(MIN_PREFIX)?; + let tail = &line[yidx + MIN_PREFIX.len()..]; + let end_idx = tail + .char_indices() + .take_while(|(_, c)| *c == '.' || c.is_numeric()) + .last()? + .0; + tail[..=end_idx].parse().ok() +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn parse_rgb_line() { + let score = score_from_line( + "XPSNR average, 1 frames r: 40.6130 g: 41.0275 b: 40.6961 (minimum: 40.6130)", + ); + assert_eq!(score, Some(40.6130)); + } + + #[test] + fn parse_xpsnr_score() { + // Note: some lines omitted for brevity + const FFMPEG_OUT: &str = r#"Input #0, matroska,webm, from 'tmp.mkv': + Metadata: + COMPATIBLE_BRANDS: isomiso2avc1mp41 + MAJOR_BRAND : isom + MINOR_VERSION : 512 + ENCODER : Lavf61.7.100 + Duration: 00:00:53.77, start: -0.007000, bitrate: 2698 kb/s + Stream #0:0(eng): Video: av1 (libdav1d) (Main), yuv420p10le(tv, progressive), 3840x2160, 25 fps, 25 tbr, 1k tbn (default) + Metadata: + HANDLER_NAME : ?Mainconcept Video Media Handler + VENDOR_ID : [0][0][0][0] + ENCODER : Lavc61.19.100 libsvtav1 + DURATION : 00:00:53.760000000 + Stream #0:1(eng): Audio: opus, 48000 Hz, stereo, fltp (default) + Metadata: + title : Opus 96Kbps + HANDLER_NAME : #Mainconcept MP4 Sound Media Handler + VENDOR_ID : [0][0][0][0] + ENCODER : Lavc61.19.100 libopus + DURATION : 00:00:53.768000000 +Input #1, mov,mp4,m4a,3gp,3g2,mj2, from 'pixabay-lemon-82602.mp4': + Metadata: + major_brand : isom + minor_version : 512 + compatible_brands: isomiso2avc1mp41 + encoder : Lavf58.20.100 + Duration: 00:00:53.76, start: 0.000000, bitrate: 14109 kb/s + Stream #1:0[0x1](eng): Video: h264 (High) (avc1 / 0x31637661), yuv420p(progressive), 3840x2160, 14101 kb/s, 25 fps, 25 tbr, 12800 tbn (default) + Metadata: + handler_name : ?Mainconcept Video Media Handler + vendor_id : [0][0][0][0] + Stream #1:1[0x2](eng): Audio: aac (LC) (mp4a / 0x6134706D), 48000 Hz, stereo, fltp, 2 kb/s (default) + Metadata: + handler_name : #Mainconcept MP4 Sound Media Handler + vendor_id : [0][0][0][0] +Stream mapping: + Stream #0:0 (libdav1d) -> xpsnr + Stream #1:0 (h264) -> xpsnr + xpsnr:default -> Stream #0:0 (wrapped_avframe) + Stream #0:1 -> #0:1 (opus (native) -> pcm_s16le (native)) +Press [q] to stop, [?] for help +[Parsed_xpsnr_0 @ 0x78341c004d00] not matching timebases found between first input: 1/1000 and second input 1/12800, results may be incorrect! +Output #0, null, to 'pipe:': + Metadata: + COMPATIBLE_BRANDS: isomiso2avc1mp41 + MAJOR_BRAND : isom + MINOR_VERSION : 512 + encoder : Lavf61.7.100 + Stream #0:0: Video: wrapped_avframe, yuv420p10le(tv, progressive), 3840x2160 [SAR 1:1 DAR 16:9], q=2-31, 200 kb/s, 25 fps, 25 tbn + Metadata: + encoder : Lavc61.19.100 wrapped_avframe + Stream #0:1(eng): Audio: pcm_s16le, 48000 Hz, stereo, s16, 1536 kb/s (default) + Metadata: + title : Opus 96Kbps + HANDLER_NAME : #Mainconcept MP4 Sound Media Handler + VENDOR_ID : [0][0][0][0] + DURATION : 00:00:53.768000000 + encoder : Lavc61.19.100 pcm_s16le +frame= 9 fps=0.0 q=-0.0 size=N/A time=00:00:00.32 bitrate=N/A speed=0.64x +frame= 28 fps= 28 q=-0.0 size=N/A time=00:00:01.08 bitrate=N/A speed=1.08x +frame= 46 fps= 31 q=-0.0 size=N/A time=00:00:01.80 bitrate=N/A speed= 1.2x +frame= 65 fps= 32 q=-0.0 size=N/A time=00:00:02.56 bitrate=N/A speed=1.28x +n: 1 XPSNR y: 54.5266 XPSNR u: 56.3886 XPSNR v: 58.7794 +n: 2 XPSNR y: 40.6035 XPSNR u: 39.3487 XPSNR v: 42.3634 +n: 3 XPSNR y: 40.9764 XPSNR u: 38.8791 XPSNR v: 41.8961 +n: 64 XPSNR y: 41.0726 XPSNR u: 39.7731 XPSNR v: 42.5210 +n: 65 XPSNR y: 41.3476 XPSNR u: 39.6055 XPSNR v: 42.4262 +n: 66 XPSNR y: 41.1029 XPSNR u: 39.8779 XPSNR v: 42.6400 +frame= 84 fps= 34 q=-0.0 size=N/A time=00:00:03.32 bitrate=N/A speed=1.33x +frame= 102 fps= 34 q=-0.0 size=N/A time=00:00:04.04 bitrate=N/A speed=1.35x +frame= 120 fps= 34 q=-0.0 size=N/A time=00:00:04.76 bitrate=N/A speed=1.36x +n: 67 XPSNR y: 40.9642 XPSNR u: 39.5204 XPSNR v: 42.1316 +n: 68 XPSNR y: 40.2677 XPSNR u: 38.9371 XPSNR v: 41.9560 +n: 69 XPSNR y: 40.6431 XPSNR u: 38.8864 XPSNR v: 41.6902 +n: 1319 XPSNR y: 41.4316 XPSNR u: 40.5146 XPSNR v: 42.1970 +n: 1320 XPSNR y: 41.4623 XPSNR u: 40.5527 XPSNR v: 42.3358 +n: 1321 XPSNR y: 42.5312 XPSNR u: 41.2487 XPSNR v: 42.8495 +frame= 1328 fps= 37 q=-0.0 size=N/A time=00:00:53.08 bitrate=N/A speed=1.47x +[Parsed_xpsnr_0 @ 0x78341c004d00] XPSNR y: 40.7139 u: 39.1440 v: 41.7907 (minimum: 39.1440) +[out#0/null @ 0x64006e11b1c0] video:578KiB audio:10080KiB subtitle:0KiB other streams:0KiB global headers:0KiB muxing overhead: unknown +frame= 1344 fps= 37 q=-0.0 Lsize=N/A time=00:00:53.72 bitrate=N/A speed=1.48x +n: 1342 XPSNR y: 40.6841 XPSNR u: 39.0209 XPSNR v: 40.9250 +n: 1343 XPSNR y: 41.0269 XPSNR u: 39.2465 XPSNR v: 41.1238 +n: 1344 XPSNR y: 39.8468 XPSNR u: 38.4587 XPSNR v: 40.5844 + +XPSNR average, 1344 frames y: 40.7139 +"#; + + const CHUNK_SIZE: usize = 64; + + let ffmpeg = FFMPEG_OUT.as_bytes(); + + let mut chunks = Chunks::default(); + let mut start_idx = 0; + let mut xpsnr_score = None; + while start_idx < ffmpeg.len() { + let chunk = &ffmpeg[start_idx..(start_idx + CHUNK_SIZE).min(FFMPEG_OUT.len())]; + // println!("* {}", String::from_utf8_lossy(chunk).trim()); + + if let Some(xpsnr) = XpsnrOut::try_from_chunk(chunk, &mut chunks) { + println!("{xpsnr:?}"); + if let XpsnrOut::Done(score) = xpsnr { + xpsnr_score = Some(score); + } + } + + start_idx += CHUNK_SIZE; + } + + assert_eq!(xpsnr_score, Some(39.1440), "failed to parse xpsnr score"); + } +}