From d279c9f4cd5f81469d206a3de296afaa72bd533a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Padarian?= Date: Sun, 3 Dec 2023 02:06:02 +1100 Subject: [PATCH] Add histogram calculation for RasterBand (#468) * Add histogram calculation for RasterBand --------- Co-authored-by: Simeon H.K. Fitch Co-authored-by: Even Rouault --- CHANGES.md | 4 + src/raster/mod.rs | 2 +- src/raster/rasterband.rs | 167 ++++++++++++++++++++++++++++++++++++++- src/raster/tests.rs | 37 +++++++++ 4 files changed, 206 insertions(+), 4 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index ea8a57561..7e7a8e4a7 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -50,6 +50,10 @@ - +- Added histogram calculation + + - + ## 0.16 - **Breaking**: `Dataset::close` now consumes `self` diff --git a/src/raster/mod.rs b/src/raster/mod.rs index 712902f1d..d921bc9bf 100644 --- a/src/raster/mod.rs +++ b/src/raster/mod.rs @@ -88,7 +88,7 @@ pub use mdarray::{ }; pub use rasterband::{ Buffer, ByteBuffer, CmykEntry, ColorEntry, ColorInterpretation, ColorTable, GrayEntry, - HlsEntry, PaletteInterpretation, RasterBand, ResampleAlg, RgbaEntry, StatisticsAll, + Histogram, HlsEntry, PaletteInterpretation, RasterBand, ResampleAlg, RgbaEntry, StatisticsAll, StatisticsMinMax, }; pub use rasterize::{rasterize, BurnSource, MergeAlgorithm, OptimizeMode, RasterizeOptions}; diff --git a/src/raster/rasterband.rs b/src/raster/rasterband.rs index 207486054..92c0d4fd6 100644 --- a/src/raster/rasterband.rs +++ b/src/raster/rasterband.rs @@ -5,9 +5,10 @@ use crate::raster::{GdalDataType, GdalType}; use crate::utils::{_last_cpl_err, _last_null_pointer_err, _string}; use gdal_sys::{ self, CPLErr, GDALColorEntry, GDALColorInterp, GDALColorTableH, GDALComputeRasterMinMax, - GDALCreateColorRamp, GDALCreateColorTable, GDALDestroyColorTable, GDALGetPaletteInterpretation, - GDALGetRasterStatistics, GDALMajorObjectH, GDALPaletteInterp, GDALRIOResampleAlg, GDALRWFlag, - GDALRasterBandH, GDALRasterIOExtraArg, GDALSetColorEntry, GDALSetRasterColorTable, + GDALCreateColorRamp, GDALCreateColorTable, GDALDestroyColorTable, GDALGetDefaultHistogramEx, + GDALGetPaletteInterpretation, GDALGetRasterHistogramEx, GDALGetRasterStatistics, + GDALMajorObjectH, GDALPaletteInterp, GDALRIOResampleAlg, GDALRWFlag, GDALRasterBandH, + GDALRasterIOExtraArg, GDALSetColorEntry, GDALSetRasterColorTable, }; use libc::c_int; use std::ffi::CString; @@ -847,6 +848,90 @@ impl<'a> RasterBand<'a> { max: min_max[1], }) } + + /// Fetch default raster histogram. + /// + /// # Arguments + /// + /// * `force` - If `true`, force the computation. If `false` and no default histogram is available, the method will return `Ok(None)` + pub fn default_histogram(&self, force: bool) -> Result> { + let mut counts = std::ptr::null_mut(); + let mut min = 0.0; + let mut max = 0.0; + let mut n_buckets = 0i32; + + let rv = unsafe { + GDALGetDefaultHistogramEx( + self.c_rasterband, + &mut min, + &mut max, + &mut n_buckets, + &mut counts as *mut *mut u64, + libc::c_int::from(force), + None, + std::ptr::null_mut(), + ) + }; + + match CplErrType::from(rv) { + CplErrType::None => Ok(Some(Histogram { + min, + max, + counts: HistogramCounts::GdalAllocated(counts, n_buckets as usize), + })), + CplErrType::Warning => Ok(None), + _ => Err(_last_cpl_err(rv)), + } + } + + /// Compute raster histogram. + /// + /// # Arguments + /// + /// * `min` - Histogram lower bound + /// * `max` - Histogram upper bound + /// * `n_buckets` - Number of buckets in the histogram + /// * `include_out_of_range` - if `true`, values below the histogram range will be mapped into the first bucket, and values above will be mapped into the last one. If `false`, out of range values are discarded + /// * `is_approx_ok` - If an approximate, or incomplete histogram is OK + pub fn histogram( + &self, + min: f64, + max: f64, + n_buckets: usize, + include_out_of_range: bool, + is_approx_ok: bool, + ) -> Result { + if n_buckets == 0 { + return Err(GdalError::BadArgument( + "n_buckets should be > 0".to_string(), + )); + } + + let mut counts = vec![0; n_buckets]; + + let rv = unsafe { + GDALGetRasterHistogramEx( + self.c_rasterband, + min, + max, + n_buckets as i32, + counts.as_mut_ptr(), + libc::c_int::from(include_out_of_range), + libc::c_int::from(is_approx_ok), + None, + std::ptr::null_mut(), + ) + }; + + match CplErrType::from(rv) { + CplErrType::None => Ok(Histogram { + min, + max, + counts: HistogramCounts::RustAllocated(counts), + }), + _ => Err(_last_cpl_err(rv)), + } + } } #[derive(Debug, PartialEq)] @@ -863,6 +948,82 @@ pub struct StatisticsAll { pub std_dev: f64, } +#[derive(Debug)] +pub struct Histogram { + min: f64, + max: f64, + counts: HistogramCounts, +} + +impl Histogram { + /// Histogram lower bound + pub fn min(&self) -> f64 { + self.min + } + + /// Histogram upper bound + pub fn max(&self) -> f64 { + self.max + } + + /// Histogram values for each bucket + pub fn counts(&self) -> &[u64] { + self.counts.as_slice() + } + + /// Number of buckets in histogram + pub fn n_buckets(&self) -> usize { + self.counts().len() + } + + /// Histogram bucket size + pub fn bucket_size(&self) -> f64 { + (self.max - self.min) / self.counts().len() as f64 + } +} + +/// Union type over histogram storage mechanisms. +/// +/// This private enum exists to normalize over the two different ways +/// [`GDALGetRasterHistogram`] and [`GDALGetDefaultHistogram`] return data: +/// * `GDALGetRasterHistogram`: requires a pre-allocated array, stored in `HistogramCounts::RustAllocated`. +/// * `GDALGetDefaultHistogram`: returns a pointer (via an 'out' parameter) to a GDAL allocated array, +/// stored in `HistogramCounts::GdalAllocated`, to be deallocated with [`VSIFree`][gdal_sys::VSIFree]. +enum HistogramCounts { + /// Pointer to GDAL allocated array and length of histogram counts. + /// + /// Requires freeing with [`VSIFree`][gdal_sys::VSIFree]. + GdalAllocated(*mut u64, usize), + /// Rust-allocated vector into which GDAL stores histogram counts. + RustAllocated(Vec), +} + +impl HistogramCounts { + fn as_slice(&self) -> &[u64] { + match &self { + Self::GdalAllocated(p, n) => unsafe { std::slice::from_raw_parts(*p, *n) }, + Self::RustAllocated(v) => v.as_slice(), + } + } +} + +impl Debug for HistogramCounts { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + self.as_slice().fmt(f) + } +} + +impl Drop for HistogramCounts { + fn drop(&mut self) { + match self { + HistogramCounts::GdalAllocated(p, _) => unsafe { + gdal_sys::VSIFree(*p as *mut libc::c_void); + }, + HistogramCounts::RustAllocated(_) => {} + } + } +} + impl<'a> MajorObject for RasterBand<'a> { fn gdal_object_ptr(&self) -> GDALMajorObjectH { self.c_rasterband diff --git a/src/raster/tests.rs b/src/raster/tests.rs index 3c97665ad..bbfc4e7f5 100644 --- a/src/raster/tests.rs +++ b/src/raster/tests.rs @@ -751,6 +751,43 @@ fn test_raster_stats() { ); } +#[test] +fn test_raster_histogram() { + let fixture = TempFixture::fixture("tinymarble.tif"); + + let dataset = Dataset::open(&fixture).unwrap(); + let rb = dataset.rasterband(1).unwrap(); + + let hist = rb.default_histogram(false).unwrap(); + assert!(hist.is_none()); + + let hist = rb.default_histogram(true).unwrap().unwrap(); + let expected = &[ + 548, 104, 133, 127, 141, 125, 156, 129, 130, 117, 94, 94, 80, 81, 78, 63, 50, 66, 48, 48, + 33, 38, 41, 35, 41, 39, 32, 40, 26, 27, 25, 24, 18, 25, 29, 27, 20, 34, 17, 24, 29, 11, 20, + 21, 12, 19, 16, 16, 11, 10, 19, 5, 11, 10, 6, 9, 7, 12, 13, 6, 8, 7, 8, 14, 9, 14, 4, 8, 5, + 12, 6, 10, 7, 9, 8, 6, 3, 7, 5, 8, 9, 5, 4, 8, 3, 9, 3, 6, 11, 7, 6, 3, 9, 9, 7, 6, 9, 10, + 10, 4, 7, 2, 4, 7, 2, 12, 7, 10, 4, 6, 5, 2, 4, 5, 7, 3, 5, 7, 7, 14, 9, 12, 6, 6, 8, 5, 8, + 3, 3, 5, 11, 4, 9, 7, 14, 7, 10, 11, 6, 6, 5, 4, 9, 6, 6, 9, 5, 12, 11, 9, 3, 8, 5, 6, 4, + 2, 9, 7, 9, 9, 9, 6, 6, 8, 5, 9, 13, 4, 9, 4, 7, 13, 10, 5, 7, 8, 11, 12, 5, 17, 9, 11, 9, + 8, 9, 5, 8, 9, 5, 6, 9, 11, 8, 7, 7, 6, 7, 8, 8, 8, 5, 6, 7, 5, 8, 5, 6, 8, 7, 4, 8, 6, 5, + 11, 8, 8, 5, 4, 6, 4, 9, 7, 6, 6, 7, 7, 12, 6, 9, 17, 12, 20, 18, 17, 21, 24, 30, 29, 57, + 72, 83, 21, 11, 9, 18, 7, 13, 10, 2, 4, 0, 1, 3, 4, 1, 1, + ]; + assert_eq!(hist.counts(), expected); + + let hist = rb.histogram(-0.5, 255.5, 128, true, true).unwrap(); + let expected_small = (0..expected.len()) + .step_by(2) + .map(|i| expected[i] + expected[i + 1]) + .collect::>(); + assert_eq!(hist.counts(), &expected_small); + + // n_buckets = 0 is not allowed + let hist = rb.histogram(-0.5, 255.5, 0, true, true); + hist.expect_err("histogram with 0 buckets should panic"); +} + #[test] fn test_resample_str() { assert!(ResampleAlg::from_str("foobar").is_err());