diff --git a/packages/app/src/providers/h5grove/__snapshots__/h5grove-api.test.ts.snap b/packages/app/src/providers/h5grove/__snapshots__/h5grove-api.test.ts.snap index 518f29ab4..cca906387 100644 --- a/packages/app/src/providers/h5grove/__snapshots__/h5grove-api.test.ts.snap +++ b/packages/app/src/providers/h5grove/__snapshots__/h5grove-api.test.ts.snap @@ -4,7 +4,13 @@ exports[`test file matches snapshot 1`] = ` [ { "name": "int8_scalar", - "rawType": "|i1", + "rawType": { + "class": 0, + "dtype": "|i1", + "order": 0, + "sign": 1, + "size": 1, + }, "shape": [], "type": { "class": "Integer", @@ -15,7 +21,13 @@ exports[`test file matches snapshot 1`] = ` }, { "name": "int8_2D", - "rawType": "|i1", + "rawType": { + "class": 0, + "dtype": "|i1", + "order": 0, + "sign": 1, + "size": 1, + }, "shape": [ 2, 3, @@ -36,7 +48,13 @@ exports[`test file matches snapshot 1`] = ` }, { "name": "int16_scalar", - "rawType": "i4", + "rawType": { + "class": 0, + "dtype": ">i4", + "order": 1, + "sign": 1, + "size": 4, + }, "shape": [], "type": { "class": "Integer", @@ -90,7 +126,13 @@ exports[`test file matches snapshot 1`] = ` }, { "name": "int32_2D", - "rawType": "f4", + "rawType": { + "class": 1, + "dtype": ">f4", + "order": 1, + "size": 4, + }, "shape": [], "type": { "class": "Float", @@ -339,7 +466,12 @@ exports[`test file matches snapshot 1`] = ` }, { "name": "float32_2D", - "rawType": "c16", + "rawType": { + "class": 6, + "dtype": ">c16", + "members": { + "i": { + "class": 1, + "dtype": ">f8", + "order": 1, + "size": 8, + }, + "r": { + "class": 1, + "dtype": ">f8", + "order": 1, + "size": 8, + }, + }, + "size": 16, + }, "shape": [], "type": { "class": "Complex", @@ -699,7 +1007,25 @@ exports[`test file matches snapshot 1`] = ` }, { "name": "complex128_2D", - "rawType": "", }, { "name": "reference_region_scalar", - "rawType": "|O", + "rawType": { + "class": 7, + "dtype": "|O", + "size": 12, + }, "shape": [], "type": { - "charSet": "UTF-8", - "class": "String", + "class": "Reference", }, "value": "", }, { "name": "bool_empty", - "rawType": "|b1", + "rawType": { + "base": { + "class": 0, + "dtype": "|u1", + "order": 0, + "sign": 0, + "size": 1, + }, + "class": 8, + "dtype": "|b1", + "members": { + "FALSE": 0, + "TRUE": 1, + }, + "size": 1, + }, "shape": null, "type": { "class": "Boolean", @@ -987,7 +1517,22 @@ exports[`test file matches snapshot 1`] = ` }, { "name": "bool_false_scalar", - "rawType": "|b1", + "rawType": { + "base": { + "class": 0, + "dtype": "|i1", + "order": 0, + "sign": 1, + "size": 1, + }, + "class": 8, + "dtype": "|b1", + "members": { + "FALSE": 0, + "TRUE": 1, + }, + "size": 1, + }, "shape": [], "type": { "class": "Boolean", @@ -996,7 +1541,22 @@ exports[`test file matches snapshot 1`] = ` }, { "name": "bool_true_scalar", - "rawType": "|b1", + "rawType": { + "base": { + "class": 0, + "dtype": "|i1", + "order": 0, + "sign": 1, + "size": 1, + }, + "class": 8, + "dtype": "|b1", + "members": { + "FALSE": 0, + "TRUE": 1, + }, + "size": 1, + }, "shape": [], "type": { "class": "Boolean", @@ -1005,7 +1565,22 @@ exports[`test file matches snapshot 1`] = ` }, { "name": "bool_2D", - "rawType": "|b1", + "rawType": { + "base": { + "class": 0, + "dtype": "|i1", + "order": 0, + "sign": 1, + "size": 1, + }, + "class": 8, + "dtype": "|b1", + "members": { + "FALSE": 0, + "TRUE": 1, + }, + "size": 1, + }, "shape": [ 2, 4, @@ -1026,33 +1601,92 @@ exports[`test file matches snapshot 1`] = ` }, { "name": "enum_uint8_scalar", - "rawType": "|u1", + "rawType": { + "base": { + "class": 0, + "dtype": "|u1", + "order": 0, + "sign": 0, + "size": 1, + }, + "class": 8, + "dtype": "|u1", + "members": { + "A": 0, + "B": 1, + }, + "size": 1, + }, "shape": [], "type": { - "class": "Integer (unsigned)", - "endianness": "little-endian", - "size": 8, + "base": { + "class": "Integer (unsigned)", + "endianness": "little-endian", + "size": 8, + }, + "class": "Enumeration", + "mapping": { + "A": 0, + "B": 1, + }, }, "value": 1, }, { "name": "enum_int32_scalar", - "rawType": "; +} + +export interface H5GroveReferenceType extends H5GroveBaseType { + class: 7; +} + +export interface H5GroveEnumType extends H5GroveBaseType { + class: 8; + members: Record; + base: H5GroveType; +} + +export interface H5GroveVlenType extends H5GroveBaseType { + class: 9; + base: H5GroveType; +} + +export interface H5GroveArrayType extends H5GroveBaseType { + class: 10; + dims: number[]; + base: H5GroveType; +} diff --git a/packages/app/src/providers/h5grove/utils.test.ts b/packages/app/src/providers/h5grove/utils.test.ts index a48ec1815..ad887503f 100644 --- a/packages/app/src/providers/h5grove/utils.test.ts +++ b/packages/app/src/providers/h5grove/utils.test.ts @@ -1,61 +1,133 @@ import { Endianness } from '@h5web/shared/hdf5-models'; import { + arrayType, + bitfieldType, boolType, compoundType, cplxType, + enumType, floatType, intType, + opaqueType, + referenceType, strType, + timeType, uintType, unknownType, } from '@h5web/shared/hdf5-utils'; import { describe, expect, it } from 'vitest'; +import type { H5GroveType } from './models'; import { parseDType } from './utils'; describe('parseDType', () => { - it('should convert integer dtypes', () => { - expect(parseDType('u8')).toStrictEqual(uintType(64, Endianness.BE)); - }); - - it('should convert float dtypes', () => { - expect(parseDType('f8')).toStrictEqual(floatType(64, Endianness.BE)); + it('should convert integer types', () => { + expect(parseDType({ class: 0, size: 1, order: 0, sign: 1 })).toStrictEqual( + intType(8, Endianness.LE), + ); + expect(parseDType({ class: 0, size: 8, order: 1, sign: 0 })).toStrictEqual( + uintType(64, Endianness.BE), + ); }); - it('should convert complex dtypes', () => { - expect(parseDType(' { + expect(parseDType({ class: 1, size: 4, order: 0 })).toStrictEqual( + floatType(32, Endianness.LE), + ); + expect(parseDType({ class: 1, size: 8, order: 1 })).toStrictEqual( + floatType(64, Endianness.BE), ); }); - it('should convert bytes string dtypes', () => { - expect(parseDType('|S6')).toStrictEqual(strType('ASCII', 6)); + it('should convert string types', () => { + expect( + parseDType({ class: 3, size: 6, cset: 0, vlen: false }), + ).toStrictEqual(strType('ASCII', 6)); + expect( + parseDType({ class: 3, size: 6, cset: 0, vlen: true }), + ).toStrictEqual(strType('ASCII')); + expect( + parseDType({ class: 3, size: 6, cset: 1, vlen: false }), + ).toStrictEqual(strType('UTF-8', 6)); + expect( + parseDType({ class: 3, size: 6, cset: 1, vlen: true }), + ).toStrictEqual(strType('UTF-8')); }); - it('should interpret objects as strings', () => { - expect(parseDType('|O')).toStrictEqual(strType('UTF-8')); - }); + it('should convert compound and complex types', () => { + expect( + parseDType({ + class: 6, + size: 4, + members: { foo: { class: 1, size: 4, order: 0 } }, + }), + ).toStrictEqual(compoundType({ foo: floatType() })); - it('should interpret |b1 as booleans', () => { - expect(parseDType('|b1')).toStrictEqual(boolType()); + expect( + parseDType({ + class: 6, + size: 8, + members: { + r: { class: 1, size: 4, order: 0 }, + i: { class: 1, size: 4, order: 0 }, + }, + }), + ).toStrictEqual(cplxType(floatType(), floatType())); }); - it('should handle "not applicable" endianness symbol', () => { - expect(parseDType('|f8')).toStrictEqual(floatType(64)); + it('should convert enum and boolean types', () => { + expect( + parseDType({ + class: 8, + size: 8, + base: { class: 0, size: 4, order: 0, sign: 0 }, + members: { FOO: 41, BAR: 42 }, + }), + ).toStrictEqual(enumType(uintType(), { FOO: 41, BAR: 42 })); + + expect( + parseDType({ + class: 8, + size: 2, + base: { class: 0, size: 1, order: 0, sign: 0 }, + members: { FALSE: 0, TRUE: 1 }, + }), + ).toStrictEqual(boolType()); }); - it('should convert compound dtype', () => { - expect(parseDType({ country: '|S10', population: ' { + expect( + parseDType({ + class: 9, + size: 1, + base: { class: 1, size: 4, order: 0 }, + }), + ).toStrictEqual(arrayType(floatType())); + + expect( + parseDType({ + class: 10, + size: 1, + base: { class: 1, size: 4, order: 0 }, + dims: [2, 3], }), + ).toStrictEqual(arrayType(floatType(), [2, 3])); + }); + + it('should convert other types', () => { + expect(parseDType({ class: 2, size: 1 })).toStrictEqual(timeType()); + expect(parseDType({ class: 4, size: 1, order: 0 })).toStrictEqual( + bitfieldType(Endianness.LE), + ); + expect(parseDType({ class: 5, size: 1, tag: 'foo' })).toStrictEqual( + opaqueType('foo'), ); + expect(parseDType({ class: 7, size: 1 })).toStrictEqual(referenceType()); }); - it('should handle unknown type', () => { - expect(parseDType('>notAType')).toStrictEqual(unknownType()); + it('should handle unknown types', () => { + expect( + parseDType({ class: 100, size: 1 } as unknown as H5GroveType), + ).toStrictEqual(unknownType()); }); }); diff --git a/packages/app/src/providers/h5grove/utils.ts b/packages/app/src/providers/h5grove/utils.ts index d9fb1c9d3..31f51e8c4 100644 --- a/packages/app/src/providers/h5grove/utils.ts +++ b/packages/app/src/providers/h5grove/utils.ts @@ -1,3 +1,4 @@ +import { isNumericType } from '@h5web/shared/guards'; import type { Attribute, ChildEntity, @@ -5,20 +6,25 @@ import type { Group, ProvidedEntity, } from '@h5web/shared/hdf5-models'; -import { Endianness, EntityKind } from '@h5web/shared/hdf5-models'; +import { EntityKind, H5TClass, H5TSign } from '@h5web/shared/hdf5-models'; import { - boolType, + arrayType, + bitfieldType, buildEntityPath, - compoundType, - cplxType, + compoundOrCplxType, + enumOrBoolType, floatType, - intType, + intOrUintType, + opaqueType, + referenceType, strType, - uintType, + timeType, + toCharSet, + toEndianness, unknownType, } from '@h5web/shared/hdf5-utils'; -import type { H5GroveAttribute, H5GroveDtype, H5GroveEntity } from './models'; +import type { H5GroveAttribute, H5GroveEntity, H5GroveType } from './models'; export function parseEntity( path: string, @@ -40,7 +46,7 @@ export function parseEntity( const { name } = h5gEntity; const baseEntity = { name, path }; - if (h5gEntity.type === EntityKind.Group) { + if (h5gEntity.kind === EntityKind.Group) { const { children = [], attributes: attrsMetadata } = h5gEntity; const attributes = parseAttributes(attrsMetadata); const baseGroup: Group = { @@ -62,10 +68,10 @@ export function parseEntity( }; } - if (h5gEntity.type === EntityKind.Dataset) { + if (h5gEntity.kind === EntityKind.Dataset) { const { attributes: attrsMetadata, - dtype, + type: dtype, shape, chunks, filters, @@ -82,7 +88,7 @@ export function parseEntity( }; } - if (h5gEntity.type === 'soft_link') { + if (h5gEntity.kind === 'soft_link') { const { target_path } = h5gEntity; return { ...baseEntity, @@ -92,7 +98,7 @@ export function parseEntity( }; } - if (h5gEntity.type === 'external_link') { + if (h5gEntity.kind === 'external_link') { const { target_file, target_path } = h5gEntity; return { ...baseEntity, @@ -115,79 +121,80 @@ export function parseEntity( } function parseAttributes(attrsMetadata: H5GroveAttribute[]): Attribute[] { - return attrsMetadata.map(({ name, dtype, shape }) => ({ + return attrsMetadata.map(({ name, type: dtype, shape }) => ({ name, shape, type: parseDType(dtype), })); } -// https://numpy.org/doc/stable/reference/generated/numpy.dtype.byteorder.html#numpy.dtype.byteorder -const ENDIANNESS_MAPPING: Record = { - '<': Endianness.LE, - '>': Endianness.BE, -}; - -export function parseDType(dtype: H5GroveDtype): DType { - if (typeof dtype === 'string') { - return parseDTypeFromString(dtype); - } - - return compoundType( - Object.fromEntries( - Object.entries(dtype).map(([k, v]) => [k, parseDType(v)]), - ), - ); +export function hasErrorMessage(error: unknown): error is { message: string } { + return !!error && typeof error === 'object' && 'message' in error; } -function parseDTypeFromString(dtype: string): DType { - const regexp = /([<>=|])?([A-Za-z])(\d*)/u; - const matches = regexp.exec(dtype); +export function parseDType(type: H5GroveType): DType { + const { class: h5tClass, size } = type; - if (matches === null) { - throw new Error(`Invalid dtype string: ${dtype}`); + if (h5tClass === H5TClass.Integer) { + return intOrUintType( + type.sign === H5TSign.Signed, + size * 8, + toEndianness(type.order), + ); } - const [, endianMatch, dataType, lengthMatch] = matches; + if (h5tClass === H5TClass.Float) { + return floatType(size * 8, toEndianness(type.order)); + } - const length = lengthMatch ? Number.parseInt(lengthMatch, 10) : 0; - const endianness = ENDIANNESS_MAPPING[endianMatch] || undefined; + if (h5tClass === H5TClass.Time) { + return timeType(); + } - switch (dataType) { - case 'b': - // Booleans are stored as bytes but numpy represents them distinctly from "normal" bytes: - // `|b1` for booleans vs. `|i1` for normal bytes - // https://numpy.org/doc/stable/reference/arrays.scalars.html#numpy.bool - return boolType(); + if (h5tClass === H5TClass.String) { + const { cset, vlen } = type; + return strType(toCharSet(cset), vlen ? undefined : size); + } - case 'f': - return floatType(length * 8, endianness); + if (h5tClass === H5TClass.Bitfield) { + return bitfieldType(); + } - case 'i': - return intType(length * 8, endianness); + if (h5tClass === H5TClass.Opaque) { + return opaqueType(type.tag); + } - case 'u': - return uintType(length * 8, endianness); + if (h5tClass === H5TClass.Compound) { + return compoundOrCplxType( + Object.fromEntries( + Object.entries(type.members).map(([mName, mType]) => [ + mName, + parseDType(mType), + ]), + ), + ); + } - case 'c': - return cplxType( - floatType( - (length / 2) * 8, // bytes are equally distributed between real and imag - endianness, - ), - ); + if (h5tClass === H5TClass.Reference) { + return referenceType(); + } - case 'S': - return strType('ASCII', length); + if (h5tClass === H5TClass.Enum) { + const base = parseDType(type.base); + if (!isNumericType(base)) { + throw new Error('Expected enum type to have numeric base type'); + } - case 'O': - return strType('UTF-8'); + return enumOrBoolType(base, type.members); + } - default: - return unknownType(); + if (h5tClass === H5TClass.Vlen) { + return arrayType(parseDType(type.base)); } -} -export function hasErrorMessage(error: unknown): error is { message: string } { - return !!error && typeof error === 'object' && 'message' in error; + if (h5tClass === H5TClass.Array) { + return arrayType(parseDType(type.base), type.dims); + } + + return unknownType(); } diff --git a/packages/shared/src/hdf5-models.ts b/packages/shared/src/hdf5-models.ts index 0a92b449b..06b06f65b 100644 --- a/packages/shared/src/hdf5-models.ts +++ b/packages/shared/src/hdf5-models.ts @@ -244,6 +244,16 @@ export enum H5TClass { Array = 10, } +export enum H5TOrder { + LE = 0, + BE = 1, +} + +export enum H5TSign { + Unsigned = 0, + Signed = 1, +} + export enum H5TCharSet { ASCII = 0, UTF8 = 1, diff --git a/packages/shared/src/hdf5-utils.ts b/packages/shared/src/hdf5-utils.ts index 705fedfd1..b5e0bf178 100644 --- a/packages/shared/src/hdf5-utils.ts +++ b/packages/shared/src/hdf5-utils.ts @@ -19,7 +19,7 @@ import type { TimeType, UnknownType, } from './hdf5-models'; -import { DTypeClass, Endianness, H5TCharSet } from './hdf5-models'; +import { DTypeClass, Endianness, H5TCharSet, H5TOrder } from './hdf5-models'; export function getChildEntity( group: GroupWithChildren, @@ -171,6 +171,10 @@ export function cplx(real: number, imag: number): H5WebComplex { /* ------------------------- */ /* --- HDF5 ENUM HELPERS --- */ +export function toEndianness(h5tOrder: number): Endianness { + return h5tOrder === H5TOrder.BE ? Endianness.BE : Endianness.LE; +} + export function toCharSet(h5tCharSet: number): CharSet { return h5tCharSet === H5TCharSet.ASCII ? 'ASCII' : 'UTF-8'; }