Skip to content

Commit

Permalink
add string dataset condition for data type conversion on export (#1205)
Browse files Browse the repository at this point in the history
* add strdataset condition for dtype conversion

* add strdataset conversion test

* update CHANGELOG
  • Loading branch information
stephprince authored Nov 11, 2024
1 parent ea6504f commit 6cf7752
Show file tree
Hide file tree
Showing 3 changed files with 24 additions and 2 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

### Bug fixes
- Fixed inaccurate error message when validating reference data types. @stephprince [#1199](https://github.com/hdmf-dev/hdmf/pull/1199)
- Fixed incorrect dtype conversion of a StrDataset. @stephprince [#1205](https://github.com/hdmf-dev/hdmf/pull/1205)

## HDMF 3.14.5 (October 6, 2024)

Expand Down
7 changes: 5 additions & 2 deletions src/hdmf/build/objectmapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
from ..query import ReferenceResolver
from ..spec import Spec, AttributeSpec, DatasetSpec, GroupSpec, LinkSpec, RefSpec
from ..spec.spec import BaseStorageSpec
from ..utils import docval, getargs, ExtenderMeta, get_docval, get_data_shape
from ..utils import docval, getargs, ExtenderMeta, get_docval, get_data_shape, StrDataset

_const_arg = '__constructor_arg'

Expand Down Expand Up @@ -212,7 +212,10 @@ def convert_dtype(cls, spec, value, spec_dtype=None): # noqa: C901
if (isinstance(value, np.ndarray) or
(hasattr(value, 'astype') and hasattr(value, 'dtype'))):
if spec_dtype_type is _unicode:
ret = value.astype('U')
if isinstance(value, StrDataset):
ret = value
else:
ret = value.astype('U')
ret_dtype = "utf8"
elif spec_dtype_type is _ascii:
ret = value.astype('S')
Expand Down
18 changes: 18 additions & 0 deletions tests/unit/build_tests/test_convert_dtype.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,17 @@
from datetime import datetime, date

import numpy as np
import h5py
import unittest

from hdmf.backends.hdf5 import H5DataIO
from hdmf.build import ObjectMapper
from hdmf.data_utils import DataChunkIterator
from hdmf.spec import DatasetSpec, RefSpec, DtypeSpec
from hdmf.testing import TestCase
from hdmf.utils import StrDataset

H5PY_3 = h5py.__version__.startswith('3')

class TestConvertDtype(TestCase):

Expand Down Expand Up @@ -321,6 +326,19 @@ def test_text_spec(self):
self.assertIs(ret, value)
self.assertEqual(ret_dtype, 'utf8')

@unittest.skipIf(not H5PY_3, "Use StrDataset only for h5py 3+")
def test_text_spec_str_dataset(self):
text_spec_types = ['text', 'utf', 'utf8', 'utf-8']
for spec_type in text_spec_types:
with self.subTest(spec_type=spec_type):
with h5py.File("test.h5", "w", driver="core", backing_store=False) as f:
spec = DatasetSpec('an example dataset', spec_type, name='data')

value = StrDataset(f.create_dataset('data', data=['a', 'b', 'c']), None)
ret, ret_dtype = ObjectMapper.convert_dtype(spec, value) # no conversion
self.assertIs(ret, value)
self.assertEqual(ret_dtype, 'utf8')

def test_ascii_spec(self):
ascii_spec_types = ['ascii', 'bytes']
for spec_type in ascii_spec_types:
Expand Down

0 comments on commit 6cf7752

Please sign in to comment.