Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Minor updates to data models #60

Merged
merged 7 commits into from
Jan 9, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 10 additions & 5 deletions src/geometamaker/geometamaker.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
'https',
]

DT_FMT = '%Y-%m-%d %H:%M:%S'
DT_FMT = '%Y-%m-%d %H:%M:%S %Z'
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nice!



# TODO: In the future we can remove these exception managers in favor of the
Expand Down Expand Up @@ -179,6 +179,10 @@ def describe_file(source_dataset_path, scheme):
f'{description["bytes"]}{description["last_modified"]}\
{description["path"]}'.encode('ascii'))
description['uid'] = f'sizetimestamp:{hash_func.hexdigest()}'

# We don't have a use for including these attributes in our metadata:
description.pop('mediatype', None)
description.pop('name', None)
return description


Expand All @@ -196,7 +200,7 @@ def describe_archive(source_dataset_path, scheme):
description = describe_file(source_dataset_path, scheme)
# innerpath is from frictionless and not useful because
# it does not include all the files contained in the zip
del description['innerpath']
description.pop('innerpath', None)

ZFS = fsspec.get_filesystem_class('zip')
zfs = ZFS(source_dataset_path)
Expand Down Expand Up @@ -269,7 +273,8 @@ def describe_raster(source_dataset_path, scheme):
description['data_model'] = models.RasterSchema(
bands=bands,
pixel_size=info['pixel_size'],
raster_size=info['raster_size'])
raster_size={'width': info['raster_size'][0],
'height': info['raster_size'][1]})
Comment on lines +276 to +277
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I also like that this could be expanded to include the number of bands or any other attributes that expression the dimensionality of the dataset in the future, should it become necessary.

# Some values of raster info are numpy types, which the
# yaml dumper doesn't know how to represent.
bbox = models.BoundingBox(*[float(x) for x in info['bounding_box']])
Expand Down Expand Up @@ -425,8 +430,8 @@ def validate(filepath):
with fsspec.open(filepath, 'r') as file:
yaml_string = file.read()
yaml_dict = yaml.safe_load(yaml_string)
if not yaml_dict or 'metadata_version' not in yaml_dict \
or not yaml_dict['metadata_version'].startswith('geometamaker'):
if not yaml_dict or ('metadata_version' not in yaml_dict
and 'geometamaker_version' not in yaml_dict):
message = (f'{filepath} exists but is not compatible with '
f'geometamaker.')
raise ValueError(message)
Expand Down
32 changes: 22 additions & 10 deletions src/geometamaker/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,13 @@ class RasterSchema(Parent):

bands: List[BandSchema]
pixel_size: list
raster_size: list
raster_size: Union[dict, list]

def model_post_init(self, __context):
# Migrate from previous model where we stored this as a list
if isinstance(self.raster_size, list):
self.raster_size = {'width': self.raster_size[0],
'height': self.raster_size[1]}


class BaseMetadata(Parent):
Expand Down Expand Up @@ -255,16 +261,14 @@ class Resource(BaseMetadata):
"""

# A version string we can use to identify geometamaker compliant documents
metadata_version: str = ''
geometamaker_version: str = ''
metadata_path: str = ''

# These are populated geometamaker.describe()
bytes: int = 0
encoding: str = ''
format: str = ''
uid: str = ''
mediatype: str = ''
name: str = ''
path: str = ''
scheme: str = ''
type: str = ''
Expand All @@ -289,7 +293,7 @@ class Resource(BaseMetadata):

def model_post_init(self, __context):
self.metadata_path = f'{self.path}.yml'
self.metadata_version: str = f'geometamaker.{geometamaker.__version__}'
self.geometamaker_version: str = geometamaker.__version__
self.path = self.path.replace('\\', '/')
self.sources = [x.replace('\\', '/') for x in self.sources]

Expand All @@ -312,14 +316,22 @@ def load(cls, filepath):
with fsspec.open(filepath, 'r') as file:
yaml_string = file.read()
yaml_dict = yaml.safe_load(yaml_string)
if 'metadata_version' not in yaml_dict \
or not yaml_dict['metadata_version'].startswith('geometamaker'):
if not yaml_dict or ('metadata_version' not in yaml_dict
and 'geometamaker_version' not in yaml_dict):
message = (f'{filepath} exists but is not compatible with '
f'geometamaker.')
raise ValueError(message)
# delete this property so that geometamaker can initialize it itself
# with the current version info.
del yaml_dict['metadata_version']

deprecated_attrs = ['metadata_version', 'mediatype', 'name']
for attr in deprecated_attrs:
if attr in yaml_dict:
warnings.warn(
f'"{attr}" exists in {filepath} but is no longer part of '
f'the geometamaker specification. "{attr}" will be '
f'removed from this document. In the future, presence '
f' of "{attr}" will raise a ValidationError',
category=FutureWarning)
del yaml_dict[attr]

# migrate from 'schema' to 'data_model', if needed.
if 'schema' in yaml_dict:
Expand Down
1 change: 1 addition & 0 deletions src/geometamaker/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,4 +26,5 @@ def yaml_dump(data):
return yaml.dump(
data,
allow_unicode=True,
sort_keys=False,
Dumper=_SafeDumper)
Loading