From a3378bc1a3133943697eadcd77fc5fe9860e887e Mon Sep 17 00:00:00 2001 From: davemfish Date: Tue, 7 Jan 2025 10:44:41 -0500 Subject: [PATCH 1/7] change metadata_version to geometamaker_version. #58 --- src/geometamaker/models.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/geometamaker/models.py b/src/geometamaker/models.py index 8a5df74..ffb9149 100644 --- a/src/geometamaker/models.py +++ b/src/geometamaker/models.py @@ -255,7 +255,7 @@ class Resource(BaseMetadata): """ # A version string we can use to identify geometamaker compliant documents - metadata_version: str = '' + geometamaker_version: str = '' metadata_path: str = '' # These are populated geometamaker.describe() @@ -289,7 +289,7 @@ class Resource(BaseMetadata): def model_post_init(self, __context): self.metadata_path = f'{self.path}.yml' - self.metadata_version: str = f'geometamaker.{geometamaker.__version__}' + self.geometamaker_version: str = geometamaker.__version__ self.path = self.path.replace('\\', '/') self.sources = [x.replace('\\', '/') for x in self.sources] From 1f152aa26e9566f881991fcb0f1f4bf7f69e5044 Mon Sep 17 00:00:00 2001 From: davemfish Date: Thu, 9 Jan 2025 10:22:16 -0500 Subject: [PATCH 2/7] deprecate some unused attributes and issue FutureWarnings. #58 --- src/geometamaker/geometamaker.py | 9 ++++++--- src/geometamaker/models.py | 19 +++++++++++++------ 2 files changed, 19 insertions(+), 9 deletions(-) diff --git a/src/geometamaker/geometamaker.py b/src/geometamaker/geometamaker.py index 7a97a21..0e1f748 100644 --- a/src/geometamaker/geometamaker.py +++ b/src/geometamaker/geometamaker.py @@ -179,6 +179,9 @@ def describe_file(source_dataset_path, scheme): f'{description["bytes"]}{description["last_modified"]}\ {description["path"]}'.encode('ascii')) description['uid'] = f'sizetimestamp:{hash_func.hexdigest()}' + + # We don't have a use for including this attribute in our metadata: + description.pop('mediatype', None) return description @@ -196,7 +199,7 @@ def describe_archive(source_dataset_path, scheme): description = describe_file(source_dataset_path, scheme) # innerpath is from frictionless and not useful because # it does not include all the files contained in the zip - del description['innerpath'] + description.pop('innerpath', None) ZFS = fsspec.get_filesystem_class('zip') zfs = ZFS(source_dataset_path) @@ -425,8 +428,8 @@ def validate(filepath): with fsspec.open(filepath, 'r') as file: yaml_string = file.read() yaml_dict = yaml.safe_load(yaml_string) - if not yaml_dict or 'metadata_version' not in yaml_dict \ - or not yaml_dict['metadata_version'].startswith('geometamaker'): + if not yaml_dict or ('metadata_version' not in yaml_dict + and 'geometamaker_version' not in yaml_dict): message = (f'{filepath} exists but is not compatible with ' f'geometamaker.') raise ValueError(message) diff --git a/src/geometamaker/models.py b/src/geometamaker/models.py index ffb9149..02a46b8 100644 --- a/src/geometamaker/models.py +++ b/src/geometamaker/models.py @@ -263,7 +263,6 @@ class Resource(BaseMetadata): encoding: str = '' format: str = '' uid: str = '' - mediatype: str = '' name: str = '' path: str = '' scheme: str = '' @@ -312,14 +311,22 @@ def load(cls, filepath): with fsspec.open(filepath, 'r') as file: yaml_string = file.read() yaml_dict = yaml.safe_load(yaml_string) - if 'metadata_version' not in yaml_dict \ - or not yaml_dict['metadata_version'].startswith('geometamaker'): + if not yaml_dict or ('metadata_version' not in yaml_dict + and 'geometamaker_version' not in yaml_dict): message = (f'{filepath} exists but is not compatible with ' f'geometamaker.') raise ValueError(message) - # delete this property so that geometamaker can initialize it itself - # with the current version info. - del yaml_dict['metadata_version'] + + deprecated_attrs = ['metadata_version', 'mediatype'] + for attr in deprecated_attrs: + if attr in yaml_dict: + warnings.warn( + f'"{attr}" exists in {filepath} but is no longer part of ' + f'the geometamaker specification. "{attr}" will be ' + f'removed from this document. In the future, presence ' + f' of "{attr}" will raise a ValidationError', + category=FutureWarning) + del yaml_dict[attr] # migrate from 'schema' to 'data_model', if needed. if 'schema' in yaml_dict: From 819fdf9cd7bc60590f52d72fd63694b579a07f10 Mon Sep 17 00:00:00 2001 From: davemfish Date: Thu, 9 Jan 2025 10:29:43 -0500 Subject: [PATCH 3/7] add timezone to datetime stamp. #58 --- src/geometamaker/geometamaker.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/geometamaker/geometamaker.py b/src/geometamaker/geometamaker.py index 0e1f748..da492c5 100644 --- a/src/geometamaker/geometamaker.py +++ b/src/geometamaker/geometamaker.py @@ -28,7 +28,7 @@ 'https', ] -DT_FMT = '%Y-%m-%d %H:%M:%S' +DT_FMT = '%Y-%m-%d %H:%M:%S %Z' # TODO: In the future we can remove these exception managers in favor of the From 84d2bca405ebd244668a01ce9d72aff349049dc3 Mon Sep 17 00:00:00 2001 From: davemfish Date: Thu, 9 Jan 2025 10:40:02 -0500 Subject: [PATCH 4/7] add 'name' to list of deprecated attrs. #58 --- src/geometamaker/geometamaker.py | 3 ++- src/geometamaker/models.py | 3 +-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/geometamaker/geometamaker.py b/src/geometamaker/geometamaker.py index da492c5..27b556d 100644 --- a/src/geometamaker/geometamaker.py +++ b/src/geometamaker/geometamaker.py @@ -180,8 +180,9 @@ def describe_file(source_dataset_path, scheme): {description["path"]}'.encode('ascii')) description['uid'] = f'sizetimestamp:{hash_func.hexdigest()}' - # We don't have a use for including this attribute in our metadata: + # We don't have a use for including these attributes in our metadata: description.pop('mediatype', None) + description.pop('name', None) return description diff --git a/src/geometamaker/models.py b/src/geometamaker/models.py index 02a46b8..caeed86 100644 --- a/src/geometamaker/models.py +++ b/src/geometamaker/models.py @@ -263,7 +263,6 @@ class Resource(BaseMetadata): encoding: str = '' format: str = '' uid: str = '' - name: str = '' path: str = '' scheme: str = '' type: str = '' @@ -317,7 +316,7 @@ def load(cls, filepath): f'geometamaker.') raise ValueError(message) - deprecated_attrs = ['metadata_version', 'mediatype'] + deprecated_attrs = ['metadata_version', 'mediatype', 'name'] for attr in deprecated_attrs: if attr in yaml_dict: warnings.warn( From b9633b3856ddeb18b56dcae5e6603ec16d72335e Mon Sep 17 00:00:00 2001 From: davemfish Date: Thu, 9 Jan 2025 11:24:30 -0500 Subject: [PATCH 5/7] migrate raster size from list to dict. #58 --- src/geometamaker/geometamaker.py | 3 ++- src/geometamaker/models.py | 8 +++++++- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/src/geometamaker/geometamaker.py b/src/geometamaker/geometamaker.py index 27b556d..0230ca7 100644 --- a/src/geometamaker/geometamaker.py +++ b/src/geometamaker/geometamaker.py @@ -273,7 +273,8 @@ def describe_raster(source_dataset_path, scheme): description['data_model'] = models.RasterSchema( bands=bands, pixel_size=info['pixel_size'], - raster_size=info['raster_size']) + raster_size={'width': info['raster_size'][0], + 'height': info['raster_size'][1]}) # Some values of raster info are numpy types, which the # yaml dumper doesn't know how to represent. bbox = models.BoundingBox(*[float(x) for x in info['bounding_box']]) diff --git a/src/geometamaker/models.py b/src/geometamaker/models.py index caeed86..512372a 100644 --- a/src/geometamaker/models.py +++ b/src/geometamaker/models.py @@ -102,7 +102,13 @@ class RasterSchema(Parent): bands: List[BandSchema] pixel_size: list - raster_size: list + raster_size: dict | list + + def model_post_init(self, __context): + # Migrate from previous model where we stored this as a list + if isinstance(self.raster_size, list): + self.raster_size = {'width': self.raster_size[0], + 'height': self.raster_size[1]} class BaseMetadata(Parent): From c6235f0767b8af3dbe9a170a4e2646d1aa53757f Mon Sep 17 00:00:00 2001 From: davemfish Date: Thu, 9 Jan 2025 11:57:36 -0500 Subject: [PATCH 6/7] use Python3.9 type syntax. #58 --- src/geometamaker/models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/geometamaker/models.py b/src/geometamaker/models.py index 512372a..b6e381b 100644 --- a/src/geometamaker/models.py +++ b/src/geometamaker/models.py @@ -102,7 +102,7 @@ class RasterSchema(Parent): bands: List[BandSchema] pixel_size: list - raster_size: dict | list + raster_size: Union[dict, list] def model_post_init(self, __context): # Migrate from previous model where we stored this as a list From f41e17fc4d7699bb48e1b27aa81d68e5ffe29000 Mon Sep 17 00:00:00 2001 From: davemfish Date: Thu, 9 Jan 2025 12:28:59 -0500 Subject: [PATCH 7/7] don't sort the yaml keys alphabetically. #58 --- src/geometamaker/utils.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/geometamaker/utils.py b/src/geometamaker/utils.py index 9bcd363..8cab2a5 100644 --- a/src/geometamaker/utils.py +++ b/src/geometamaker/utils.py @@ -26,4 +26,5 @@ def yaml_dump(data): return yaml.dump( data, allow_unicode=True, + sort_keys=False, Dumper=_SafeDumper)