Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature: respect newline characters in strings #51

Merged
merged 7 commits into from
Nov 5, 2024
2 changes: 1 addition & 1 deletion src/geometamaker/geometamaker.py
Original file line number Diff line number Diff line change
Expand Up @@ -261,7 +261,7 @@ def describe_raster(source_dataset_path, scheme):
b = i + 1
bands.append(models.BandSchema(
index=b,
gdal_type=info['datatype'],
gdal_type=gdal.GetDataTypeName(info['datatype']),
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not related to the rest of the PR, but we should have been using the human-readable names all along.

numpy_type=numpy.dtype(info['numpy_type']).name,
nodata=info['nodata'][i]))
description['schema'] = models.RasterSchema(
Expand Down
19 changes: 4 additions & 15 deletions src/geometamaker/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,23 +8,12 @@
import yaml

import geometamaker
from . import utils


LOGGER = logging.getLogger(__name__)


# https://stackoverflow.com/questions/13518819/avoid-references-in-pyyaml
class _NoAliasDumper(yaml.SafeDumper):
"""Keep the yaml human-readable by avoiding anchors and aliases."""

def ignore_aliases(self, data):
return True


def _yaml_dump(data):
return yaml.dump(data, allow_unicode=True, Dumper=_NoAliasDumper)


@dataclass(frozen=True)
class BoundingBox:
"""Class for a spatial bounding box."""
Expand Down Expand Up @@ -107,7 +96,7 @@ class BandSchema:
"""Class for metadata for a raster band."""

index: int
gdal_type: int
gdal_type: str
numpy_type: str
nodata: int | float
description: str = ''
Expand Down Expand Up @@ -275,7 +264,7 @@ def write(self, target_path):

"""
with open(target_path, 'w') as file:
file.write(_yaml_dump(dataclasses.asdict(self)))
file.write(utils.yaml_dump(dataclasses.asdict(self)))


@dataclass()
Expand Down Expand Up @@ -520,7 +509,7 @@ def write(self, workspace=None):
workspace, os.path.basename(self.metadata_path))

with open(target_path, 'w') as file:
file.write(_yaml_dump(dataclasses.asdict(self)))
file.write(utils.yaml_dump(dataclasses.asdict(self)))

def to_string(self):
pass
Expand Down
29 changes: 29 additions & 0 deletions src/geometamaker/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
import yaml


def _represent_str(dumper, data):
scalar = yaml.representer.SafeRepresenter.represent_str(dumper, data)
if len(data.splitlines()) > 1:
scalar.style = '|' # literal style, newline chars will be new lines
return scalar


class _SafeDumper(yaml.SafeDumper):

def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
# Patch the default string representer to use a literal block
# style when the data contain newline characters
self.add_representer(str, _represent_str)

# https://stackoverflow.com/questions/13518819/avoid-references-in-pyyaml
def ignore_aliases(self, data):
"""Keep the yaml human-readable by avoiding anchors and aliases."""
return True


def yaml_dump(data):
return yaml.dump(
data,
allow_unicode=True,
Dumper=_SafeDumper)
22 changes: 20 additions & 2 deletions tests/test_geometamaker.py
Original file line number Diff line number Diff line change
Expand Up @@ -255,7 +255,8 @@ def test_raster_attributes(self):
band = resource.schema.bands[band_idx]
self.assertEqual(band.title, title)
self.assertEqual(band.description, description)
self.assertEqual(band.gdal_type, raster_info['datatype'])
self.assertEqual(
band.gdal_type, gdal.GetDataTypeName(raster_info['datatype']))
self.assertEqual(band.numpy_type, numpy.dtype(numpy_type).name)
self.assertEqual(band.nodata, raster_info['nodata'][band_idx])
self.assertEqual(band.units, units)
Expand Down Expand Up @@ -395,11 +396,28 @@ def test_set_and_get_lineage(self):
import geometamaker

resource = geometamaker.models.Resource()
statement = 'a lineage statment'
statement = 'a lineage statment\n is long and has\n many lines.'

resource.set_lineage(statement)
self.assertEqual(resource.get_lineage(), statement)

def test_lineage_roundtrip(self):
"""Test writing and loading yaml with block indicator."""
import geometamaker

datasource_path = os.path.join(self.workspace_dir, 'raster.tif')
numpy_type = numpy.int16
create_raster(numpy_type, datasource_path)

resource = geometamaker.describe(datasource_path)

statement = 'a lineage statment\n is long and has\n many lines.'
resource.set_lineage(statement)
resource.write()

new_resource = geometamaker.describe(datasource_path)
self.assertEqual(new_resource.get_lineage(), statement)

def test_set_and_get_purpose(self):
"""Test set and get purpose of resource."""
import geometamaker
Expand Down
Loading