Skip to content

Commit

Permalink
Merge pull request #60 from davemfish/task/58-minor-model-updates
Browse files Browse the repository at this point in the history
Minor updates to data models
  • Loading branch information
phargogh authored Jan 9, 2025
2 parents 1fb8abd + f41e17f commit cbd923d
Show file tree
Hide file tree
Showing 3 changed files with 33 additions and 15 deletions.
15 changes: 10 additions & 5 deletions src/geometamaker/geometamaker.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
'https',
]

DT_FMT = '%Y-%m-%d %H:%M:%S'
DT_FMT = '%Y-%m-%d %H:%M:%S %Z'


# TODO: In the future we can remove these exception managers in favor of the
Expand Down Expand Up @@ -179,6 +179,10 @@ def describe_file(source_dataset_path, scheme):
f'{description["bytes"]}{description["last_modified"]}\
{description["path"]}'.encode('ascii'))
description['uid'] = f'sizetimestamp:{hash_func.hexdigest()}'

# We don't have a use for including these attributes in our metadata:
description.pop('mediatype', None)
description.pop('name', None)
return description


Expand All @@ -196,7 +200,7 @@ def describe_archive(source_dataset_path, scheme):
description = describe_file(source_dataset_path, scheme)
# innerpath is from frictionless and not useful because
# it does not include all the files contained in the zip
del description['innerpath']
description.pop('innerpath', None)

ZFS = fsspec.get_filesystem_class('zip')
zfs = ZFS(source_dataset_path)
Expand Down Expand Up @@ -269,7 +273,8 @@ def describe_raster(source_dataset_path, scheme):
description['data_model'] = models.RasterSchema(
bands=bands,
pixel_size=info['pixel_size'],
raster_size=info['raster_size'])
raster_size={'width': info['raster_size'][0],
'height': info['raster_size'][1]})
# Some values of raster info are numpy types, which the
# yaml dumper doesn't know how to represent.
bbox = models.BoundingBox(*[float(x) for x in info['bounding_box']])
Expand Down Expand Up @@ -425,8 +430,8 @@ def validate(filepath):
with fsspec.open(filepath, 'r') as file:
yaml_string = file.read()
yaml_dict = yaml.safe_load(yaml_string)
if not yaml_dict or 'metadata_version' not in yaml_dict \
or not yaml_dict['metadata_version'].startswith('geometamaker'):
if not yaml_dict or ('metadata_version' not in yaml_dict
and 'geometamaker_version' not in yaml_dict):
message = (f'{filepath} exists but is not compatible with '
f'geometamaker.')
raise ValueError(message)
Expand Down
32 changes: 22 additions & 10 deletions src/geometamaker/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,13 @@ class RasterSchema(Parent):

bands: List[BandSchema]
pixel_size: list
raster_size: list
raster_size: Union[dict, list]

def model_post_init(self, __context):
# Migrate from previous model where we stored this as a list
if isinstance(self.raster_size, list):
self.raster_size = {'width': self.raster_size[0],
'height': self.raster_size[1]}


class BaseMetadata(Parent):
Expand Down Expand Up @@ -255,16 +261,14 @@ class Resource(BaseMetadata):
"""

# A version string we can use to identify geometamaker compliant documents
metadata_version: str = ''
geometamaker_version: str = ''
metadata_path: str = ''

# These are populated geometamaker.describe()
bytes: int = 0
encoding: str = ''
format: str = ''
uid: str = ''
mediatype: str = ''
name: str = ''
path: str = ''
scheme: str = ''
type: str = ''
Expand All @@ -289,7 +293,7 @@ class Resource(BaseMetadata):

def model_post_init(self, __context):
self.metadata_path = f'{self.path}.yml'
self.metadata_version: str = f'geometamaker.{geometamaker.__version__}'
self.geometamaker_version: str = geometamaker.__version__
self.path = self.path.replace('\\', '/')
self.sources = [x.replace('\\', '/') for x in self.sources]

Expand All @@ -312,14 +316,22 @@ def load(cls, filepath):
with fsspec.open(filepath, 'r') as file:
yaml_string = file.read()
yaml_dict = yaml.safe_load(yaml_string)
if 'metadata_version' not in yaml_dict \
or not yaml_dict['metadata_version'].startswith('geometamaker'):
if not yaml_dict or ('metadata_version' not in yaml_dict
and 'geometamaker_version' not in yaml_dict):
message = (f'{filepath} exists but is not compatible with '
f'geometamaker.')
raise ValueError(message)
# delete this property so that geometamaker can initialize it itself
# with the current version info.
del yaml_dict['metadata_version']

deprecated_attrs = ['metadata_version', 'mediatype', 'name']
for attr in deprecated_attrs:
if attr in yaml_dict:
warnings.warn(
f'"{attr}" exists in {filepath} but is no longer part of '
f'the geometamaker specification. "{attr}" will be '
f'removed from this document. In the future, presence '
f' of "{attr}" will raise a ValidationError',
category=FutureWarning)
del yaml_dict[attr]

# migrate from 'schema' to 'data_model', if needed.
if 'schema' in yaml_dict:
Expand Down
1 change: 1 addition & 0 deletions src/geometamaker/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,4 +26,5 @@ def yaml_dump(data):
return yaml.dump(
data,
allow_unicode=True,
sort_keys=False,
Dumper=_SafeDumper)

0 comments on commit cbd923d

Please sign in to comment.