From 860073ae10c3e84dc9b04a7eb2179235ab74fbb8 Mon Sep 17 00:00:00 2001 From: EricLBuehler Date: Mon, 15 Jul 2024 10:46:09 -0400 Subject: [PATCH] Update preprocessor config --- .../src/vision_models/preprocessor_config.rs | 45 +++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/mistralrs-core/src/vision_models/preprocessor_config.rs b/mistralrs-core/src/vision_models/preprocessor_config.rs index c4a6b7cd5..b1711520d 100644 --- a/mistralrs-core/src/vision_models/preprocessor_config.rs +++ b/mistralrs-core/src/vision_models/preprocessor_config.rs @@ -4,6 +4,26 @@ use candle_core::Result; use image::imageops::FilterType; use serde::Deserialize; +#[derive(Deserialize, Debug, Clone)] +pub(crate) struct VisionCropParams { + pub(crate) output_size: Vec, +} + +#[derive(Deserialize, Debug, Clone)] +pub(crate) struct VisionNormalizeParams { + pub(crate) inplace: bool, + pub(crate) mean: Option<[f64; 3]>, + pub(crate) std: Option<[f64; 3]>, +} + +#[derive(Deserialize, Debug, Clone)] +pub(crate) struct VisionResizeParams { + pub(crate) antialias: bool, + pub(crate) interpolation: usize, + pub(crate) max_size: Option, + pub(crate) size: (usize, usize), +} + #[derive(Deserialize, Debug, Clone)] #[allow(dead_code)] pub struct PreProcessorConfig { @@ -22,6 +42,17 @@ pub struct PreProcessorConfig { pub(crate) crop_size: Option>, pub(crate) num_img_tokens: Option, pub(crate) num_crops: Option, + // OpenVLA + pub(crate) means: Option>, + pub(crate) stds: Option>, + pub(crate) input_sizes: Option>, + pub(crate) tvf_crop_params: Option>, + pub(crate) tvf_do_letterbox: Option, + pub(crate) tvf_letterbox_fill: Option<(usize, usize, usize)>, + pub(crate) tvf_normalize_params: Option>, + pub(crate) tvf_resize_params: Option>, + pub(crate) use_fused_vision_backbone: Option, + pub(crate) interpolations: Option>, } #[allow(dead_code)] @@ -43,3 +74,17 @@ impl ToFilter for Option { } } } + +impl ToFilter for String { + // https://github.com/python-pillow/Pillow/blob/4b68563e8a818fb9c528fa159ddf3f4eaefa35e6/src/PIL/Image.py#L164-L170 + // Default: https://github.com/huggingface/transformers/blob/0df888ffb72ea370555efdef45985378d3cc7b2b/src/transformers/models/idefics2/image_processing_idefics2.py#L226 + fn to_filter(self) -> Result { + match self.to_lowercase().as_str() { + "nearest" => Ok(FilterType::Nearest), + "lanczos" => Ok(FilterType::Lanczos3), + "bilinear" => Ok(FilterType::Triangle), // BiLinear + "bicubic" => Ok(FilterType::CatmullRom), // BiCubic + x => candle_core::bail!("Filter {x} not supported"), + } + } +}