diff --git a/src/geometamaker/geometamaker.py b/src/geometamaker/geometamaker.py index 9993259..e8a40bc 100644 --- a/src/geometamaker/geometamaker.py +++ b/src/geometamaker/geometamaker.py @@ -261,7 +261,7 @@ def describe_raster(source_dataset_path, scheme): b = i + 1 bands.append(models.BandSchema( index=b, - gdal_type=info['datatype'], + gdal_type=gdal.GetDataTypeName(info['datatype']), numpy_type=numpy.dtype(info['numpy_type']).name, nodata=info['nodata'][i])) description['schema'] = models.RasterSchema( diff --git a/src/geometamaker/models.py b/src/geometamaker/models.py index bbe432f..67e62f3 100644 --- a/src/geometamaker/models.py +++ b/src/geometamaker/models.py @@ -8,23 +8,12 @@ import yaml import geometamaker +from . import utils LOGGER = logging.getLogger(__name__) -# https://stackoverflow.com/questions/13518819/avoid-references-in-pyyaml -class _NoAliasDumper(yaml.SafeDumper): - """Keep the yaml human-readable by avoiding anchors and aliases.""" - - def ignore_aliases(self, data): - return True - - -def _yaml_dump(data): - return yaml.dump(data, allow_unicode=True, Dumper=_NoAliasDumper) - - @dataclass(frozen=True) class BoundingBox: """Class for a spatial bounding box.""" @@ -107,7 +96,7 @@ class BandSchema: """Class for metadata for a raster band.""" index: int - gdal_type: int + gdal_type: str numpy_type: str nodata: int | float description: str = '' @@ -275,7 +264,7 @@ def write(self, target_path): """ with open(target_path, 'w') as file: - file.write(_yaml_dump(dataclasses.asdict(self))) + file.write(utils.yaml_dump(dataclasses.asdict(self))) @dataclass() @@ -520,7 +509,7 @@ def write(self, workspace=None): workspace, os.path.basename(self.metadata_path)) with open(target_path, 'w') as file: - file.write(_yaml_dump(dataclasses.asdict(self))) + file.write(utils.yaml_dump(dataclasses.asdict(self))) def to_string(self): pass diff --git a/src/geometamaker/utils.py b/src/geometamaker/utils.py new file mode 100644 index 0000000..9bcd363 --- /dev/null +++ b/src/geometamaker/utils.py @@ -0,0 +1,29 @@ +import yaml + + +def _represent_str(dumper, data): + scalar = yaml.representer.SafeRepresenter.represent_str(dumper, data) + if len(data.splitlines()) > 1: + scalar.style = '|' # literal style, newline chars will be new lines + return scalar + + +class _SafeDumper(yaml.SafeDumper): + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + # Patch the default string representer to use a literal block + # style when the data contain newline characters + self.add_representer(str, _represent_str) + + # https://stackoverflow.com/questions/13518819/avoid-references-in-pyyaml + def ignore_aliases(self, data): + """Keep the yaml human-readable by avoiding anchors and aliases.""" + return True + + +def yaml_dump(data): + return yaml.dump( + data, + allow_unicode=True, + Dumper=_SafeDumper) diff --git a/tests/test_geometamaker.py b/tests/test_geometamaker.py index 0c4b688..3853585 100644 --- a/tests/test_geometamaker.py +++ b/tests/test_geometamaker.py @@ -255,7 +255,8 @@ def test_raster_attributes(self): band = resource.schema.bands[band_idx] self.assertEqual(band.title, title) self.assertEqual(band.description, description) - self.assertEqual(band.gdal_type, raster_info['datatype']) + self.assertEqual( + band.gdal_type, gdal.GetDataTypeName(raster_info['datatype'])) self.assertEqual(band.numpy_type, numpy.dtype(numpy_type).name) self.assertEqual(band.nodata, raster_info['nodata'][band_idx]) self.assertEqual(band.units, units) @@ -395,11 +396,28 @@ def test_set_and_get_lineage(self): import geometamaker resource = geometamaker.models.Resource() - statement = 'a lineage statment' + statement = 'a lineage statment\n is long and has\n many lines.' resource.set_lineage(statement) self.assertEqual(resource.get_lineage(), statement) + def test_lineage_roundtrip(self): + """Test writing and loading yaml with block indicator.""" + import geometamaker + + datasource_path = os.path.join(self.workspace_dir, 'raster.tif') + numpy_type = numpy.int16 + create_raster(numpy_type, datasource_path) + + resource = geometamaker.describe(datasource_path) + + statement = 'a lineage statment\n is long and has\n many lines.' + resource.set_lineage(statement) + resource.write() + + new_resource = geometamaker.describe(datasource_path) + self.assertEqual(new_resource.get_lineage(), statement) + def test_set_and_get_purpose(self): """Test set and get purpose of resource.""" import geometamaker