Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(metadata): add codec_metadata_v2_to_v3 #141

Merged
merged 2 commits into from
Feb 3, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions zarrs_metadata/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## [Unreleased]

### Added
- Derive `Copy` for `ArrayMetadataV2Order`
- Add `codec_metadata_v2_to_v3`

### Fixed
- Interpret a `0` fill value as `""` for Zarr V2 string arrays (for `zarr-python` compatibility) ([#140] by [@zqfang])

Expand Down
2 changes: 1 addition & 1 deletion zarrs_metadata/src/v2/array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -296,7 +296,7 @@ impl Serialize for FillValueMetadataV2 {
}

/// The layout of bytes within each chunk of the array.
#[derive(Serialize, Deserialize, Clone, PartialEq, Eq, Debug)]
#[derive(Serialize, Deserialize, Clone, Copy, PartialEq, Eq, Debug)]
pub enum ArrayMetadataV2Order {
/// Row-major order. The last dimension varies fastest.
C,
Expand Down
173 changes: 99 additions & 74 deletions zarrs_metadata/src/v2_to_v3.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
data_type_metadata_v2_to_endianness, ArrayMetadataV2Order, DataTypeMetadataV2,
DataTypeMetadataV2InvalidEndiannessError, FillValueMetadataV2,
},
ArrayMetadataV2, GroupMetadataV2,
ArrayMetadataV2, GroupMetadataV2, MetadataV2,
},
v3::{
array::{
Expand All @@ -25,6 +25,7 @@
},
ArrayMetadataV3, GroupMetadataV3, MetadataV3,
},
Endianness,
};

use super::v3::array::data_type::DataTypeMetadataV3;
Expand Down Expand Up @@ -61,85 +62,27 @@
Other(String),
}

/// Convert Zarr V2 array metadata to V3.
/// Convert Zarr V2 codec metadata to the equivalent Zarr V3 codec metadata.
///
/// # Errors
/// Returns a [`ArrayMetadataV2ToV3ConversionError`] if the metadata is invalid or is not compatible with Zarr V3 metadata.
#[allow(clippy::too_many_lines)]
pub fn array_metadata_v2_to_v3(
array_metadata_v2: &ArrayMetadataV2,
) -> Result<ArrayMetadataV3, ArrayMetadataV2ToV3ConversionError> {
let shape = array_metadata_v2.shape.clone();
let chunk_grid = MetadataV3::new_with_serializable_configuration(
crate::v3::array::chunk_grid::regular::IDENTIFIER,
&RegularChunkGridConfiguration {
chunk_shape: array_metadata_v2.chunks.clone(),
},
)?;

let (Ok(data_type), endianness) = (
data_type_metadata_v2_to_v3_data_type(&array_metadata_v2.dtype),
data_type_metadata_v2_to_endianness(&array_metadata_v2.dtype)
.map_err(ArrayMetadataV2ToV3ConversionError::InvalidEndianness)?,
) else {
return Err(ArrayMetadataV2ToV3ConversionError::UnsupportedDataType(
match &array_metadata_v2.dtype {
DataTypeMetadataV2::Simple(dtype) => dtype.clone(),
DataTypeMetadataV2::Structured(dtype) => {
return Err(ArrayMetadataV2ToV3ConversionError::UnsupportedDataType(
format!("{dtype:?}"),
))
}
},
));
};

// Fill value
let mut fill_value = array_metadata_fill_value_v2_to_v3(&array_metadata_v2.fill_value)
.or_else(|| {
// Support zarr-python encoded string arrays with a `null` fill value
match data_type.name().as_str() {
"string" => Some(FillValueMetadataV3::String(String::new())),
_ => None,
}
})
.ok_or_else(|| {
// TODO: How best to deal with null fill values? What do other implementations do?
ArrayMetadataV2ToV3ConversionError::UnsupportedFillValue(
data_type.to_string(),
array_metadata_v2.fill_value.clone(),
)
})?;
if data_type.name() == "bool" {
// Map a 0/1 scalar fill value to a bool
if let Some(fill_value_uint) = fill_value.try_as_uint::<u64>() {
if fill_value_uint == 0 {
fill_value = FillValueMetadataV3::Bool(false);
} else if fill_value_uint == 1 {
fill_value = FillValueMetadataV3::Bool(true);
} else {
return Err(ArrayMetadataV2ToV3ConversionError::UnsupportedFillValue(
data_type.to_string(),
array_metadata_v2.fill_value.clone(),
));
}
}
} else if data_type.name() == "string" {
// Add a special case for `zarr-python` string data with a 0 fill value -> empty string
if let Some(0) = fill_value.try_as_uint::<u64>() {
fill_value = FillValueMetadataV3::String(String::new());
}
}

pub fn codec_metadata_v2_to_v3(
order: ArrayMetadataV2Order,
dimensionality: usize,
data_type: &DataTypeMetadataV3,
endianness: Option<Endianness>,
filters: &Option<Vec<MetadataV2>>,
compressor: &Option<MetadataV2>,
) -> Result<Vec<MetadataV3>, ArrayMetadataV2ToV3ConversionError> {
let mut codecs: Vec<MetadataV3> = vec![];

// Array-to-array codecs
if array_metadata_v2.order == ArrayMetadataV2Order::F {
if order == ArrayMetadataV2Order::F {
let transpose_metadata = MetadataV3::new_with_serializable_configuration(
crate::v3::array::codec::transpose::IDENTIFIER,
&TransposeCodecConfigurationV1 {
order: {
let f_order: Vec<usize> = (0..array_metadata_v2.shape.len()).rev().collect();
let f_order: Vec<usize> = (0..dimensionality).rev().collect();
unsafe {
// SAFETY: f_order is valid
TransposeOrder::new(&f_order).unwrap_unchecked()
Expand All @@ -152,7 +95,7 @@

// Filters (array to array or array to bytes codecs)
let mut has_array_to_bytes = false;
if let Some(filters) = &array_metadata_v2.filters {
if let Some(filters) = filters {
for filter in filters {
// TODO: Add a V2 registry with V2 to V3 conversion functions
match filter.id() {
Expand All @@ -175,7 +118,7 @@
}

// Compressor (array to bytes codec)
if let Some(compressor) = &array_metadata_v2.compressor {
if let Some(compressor) = compressor {
#[allow(clippy::single_match)]
match compressor.id() {
crate::v2::array::codec::zfpy::IDENTIFIER => {
Expand Down Expand Up @@ -211,7 +154,7 @@
}

// Compressor (bytes to bytes codec)
if let Some(compressor) = &array_metadata_v2.compressor {
if let Some(compressor) = compressor {
match compressor.id() {
crate::v2::array::codec::zfpy::IDENTIFIER
| crate::v3::array::codec::pcodec::IDENTIFIER => {
Expand All @@ -221,7 +164,7 @@
let blosc = serde_json::from_value::<BloscCodecConfigurationNumcodecs>(
serde_json::to_value(compressor.configuration())?,
)?;
let configuration = codec_blosc_v2_numcodecs_to_v3(&blosc, &data_type);
let configuration = codec_blosc_v2_numcodecs_to_v3(&blosc, data_type);
codecs.push(MetadataV3::new_with_serializable_configuration(
crate::v3::array::codec::blosc::IDENTIFIER,
&configuration,
Expand All @@ -244,6 +187,88 @@
};
}

Ok(codecs)
}

/// Convert Zarr V2 array metadata to V3.
///
/// # Errors
/// Returns a [`ArrayMetadataV2ToV3ConversionError`] if the metadata is invalid or is not compatible with Zarr V3 metadata.
#[allow(clippy::too_many_lines)]
pub fn array_metadata_v2_to_v3(
array_metadata_v2: &ArrayMetadataV2,
) -> Result<ArrayMetadataV3, ArrayMetadataV2ToV3ConversionError> {
let shape = array_metadata_v2.shape.clone();
let chunk_grid = MetadataV3::new_with_serializable_configuration(
crate::v3::array::chunk_grid::regular::IDENTIFIER,
&RegularChunkGridConfiguration {
chunk_shape: array_metadata_v2.chunks.clone(),
},
)?;

let (Ok(data_type), endianness) = (
data_type_metadata_v2_to_v3_data_type(&array_metadata_v2.dtype),
data_type_metadata_v2_to_endianness(&array_metadata_v2.dtype)
.map_err(ArrayMetadataV2ToV3ConversionError::InvalidEndianness)?,
) else {
return Err(ArrayMetadataV2ToV3ConversionError::UnsupportedDataType(
match &array_metadata_v2.dtype {
DataTypeMetadataV2::Simple(dtype) => dtype.clone(),
DataTypeMetadataV2::Structured(dtype) => {
return Err(ArrayMetadataV2ToV3ConversionError::UnsupportedDataType(
format!("{dtype:?}"),
))

Check warning on line 220 in zarrs_metadata/src/v2_to_v3.rs

View check run for this annotation

Codecov / codecov/patch

zarrs_metadata/src/v2_to_v3.rs#L215-L220

Added lines #L215 - L220 were not covered by tests
}
},
));
};

// Fill value
let mut fill_value = array_metadata_fill_value_v2_to_v3(&array_metadata_v2.fill_value)
.or_else(|| {
// Support zarr-python encoded string arrays with a `null` fill value
match data_type.name().as_str() {
"string" => Some(FillValueMetadataV3::String(String::new())),
_ => None,

Check warning on line 232 in zarrs_metadata/src/v2_to_v3.rs

View check run for this annotation

Codecov / codecov/patch

zarrs_metadata/src/v2_to_v3.rs#L232

Added line #L232 was not covered by tests
}
})
.ok_or_else(|| {
// TODO: How best to deal with null fill values? What do other implementations do?
ArrayMetadataV2ToV3ConversionError::UnsupportedFillValue(
data_type.to_string(),
array_metadata_v2.fill_value.clone(),
)

Check warning on line 240 in zarrs_metadata/src/v2_to_v3.rs

View check run for this annotation

Codecov / codecov/patch

zarrs_metadata/src/v2_to_v3.rs#L236-L240

Added lines #L236 - L240 were not covered by tests
})?;
if data_type.name() == "bool" {
// Map a 0/1 scalar fill value to a bool
if let Some(fill_value_uint) = fill_value.try_as_uint::<u64>() {
if fill_value_uint == 0 {
fill_value = FillValueMetadataV3::Bool(false);
} else if fill_value_uint == 1 {
fill_value = FillValueMetadataV3::Bool(true);
} else {
return Err(ArrayMetadataV2ToV3ConversionError::UnsupportedFillValue(
data_type.to_string(),
array_metadata_v2.fill_value.clone(),
));

Check warning on line 253 in zarrs_metadata/src/v2_to_v3.rs

View check run for this annotation

Codecov / codecov/patch

zarrs_metadata/src/v2_to_v3.rs#L244-L253

Added lines #L244 - L253 were not covered by tests
}
}

Check warning on line 255 in zarrs_metadata/src/v2_to_v3.rs

View check run for this annotation

Codecov / codecov/patch

zarrs_metadata/src/v2_to_v3.rs#L255

Added line #L255 was not covered by tests
} else if data_type.name() == "string" {
// Add a special case for `zarr-python` string data with a 0 fill value -> empty string
if let Some(0) = fill_value.try_as_uint::<u64>() {
fill_value = FillValueMetadataV3::String(String::new());
}
}

let codecs = codec_metadata_v2_to_v3(
array_metadata_v2.order,
array_metadata_v2.shape.len(),
&data_type,
endianness,
&array_metadata_v2.filters,
&array_metadata_v2.compressor,
)?;

let chunk_key_encoding = MetadataV3::new_with_serializable_configuration(
crate::v3::array::chunk_key_encoding::v2::IDENTIFIER,
&V2ChunkKeyEncodingConfiguration {
Expand Down