From dc7acaf90b8708f1b7570037e60df4dc79ae2f4e Mon Sep 17 00:00:00 2001 From: davemfish Date: Wed, 6 Mar 2024 12:53:37 -0500 Subject: [PATCH 1/6] use fsspec to open remote MCF files. #18 --- requirements.txt | 6 +++-- src/geometamaker/geometamaker.py | 40 +++++++++++++++++++++++--------- tests/test_geometamaker.py | 7 ++++++ 3 files changed, 40 insertions(+), 13 deletions(-) diff --git a/requirements.txt b/requirements.txt index 95ba3cc..564356b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,9 +3,11 @@ # This file records the packages and requirements needed in order for # the library to work as expected. And to run tests. GDAL +httpaio jsonschema numpy pygeometa pygeoprocessing>=2.4.2 -shapely -pyyaml \ No newline at end of file +pyyaml +requests +shapely \ No newline at end of file diff --git a/src/geometamaker/geometamaker.py b/src/geometamaker/geometamaker.py index 836d5a6..10fe8ca 100644 --- a/src/geometamaker/geometamaker.py +++ b/src/geometamaker/geometamaker.py @@ -3,6 +3,7 @@ import uuid from datetime import datetime +import fsspec import jsonschema from jsonschema.exceptions import ValidationError import pygeometa.core @@ -198,23 +199,38 @@ def __init__(self, source_dataset_path=None): self.datasource = source_dataset_path self.mcf_path = f'{self.datasource}.yml' - if os.path.exists(self.mcf_path): - try: - # pygeometa.core.read_mcf can parse nested MCF documents, - # where one MCF refers to another - self.mcf = pygeometa.core.read_mcf(self.mcf_path) - self.validate() - except (pygeometa.core.MCFReadError, ValidationError, - AttributeError) as err: - # AttributeError in read_mcf not caught by pygeometa - LOGGER.warning(err) - self.mcf = None + # Despite naming, this does not open a resource that must be closed + of = fsspec.open(self.datasource) + if not of.fs.exists(self.datasource): + raise FileNotFoundError(f'{self.datasource} does not exist') + + try: + with fsspec.open(self.mcf_path, 'r') as file: + yaml_string = file.read() + + # pygeometa.core.read_mcf can parse nested MCF documents, + # where one MCF refers to another + self.mcf = pygeometa.core.read_mcf(yaml_string) + LOGGER.info(f'loaded existing metadata from {self.mcf_path}') + self.validate() + + # Common path: MCF often does not already exist + except FileNotFoundError as err: + LOGGER.debug(err) + + # Uncommon path: MCF already exists but cannot be used + except (pygeometa.core.MCFReadError, + ValidationError, AttributeError) as err: + # AttributeError in read_mcf not caught by pygeometa + LOGGER.warning(err) + self.mcf = None if self.mcf is None: self.mcf = _get_template(MCF_SCHEMA) self.mcf['metadata']['identifier'] = str(uuid.uuid4()) # fill all values that can be derived from the dataset + LOGGER.debug(f'getting properties from {source_dataset_path}') self._set_spatial_info() else: @@ -561,6 +577,7 @@ def _set_spatial_info(self): self.mcf['metadata']['hierarchylevel'] = 'dataset' if gis_type == pygeoprocessing.VECTOR_TYPE: + LOGGER.debug('opening as GDAL vector') self.mcf['spatial']['datatype'] = 'vector' self.mcf['content_info']['type'] = 'coverage' @@ -611,6 +628,7 @@ def _set_spatial_info(self): gis_info = pygeoprocessing.get_vector_info(self.datasource) if gis_type == pygeoprocessing.RASTER_TYPE: + LOGGER.debug('opening as GDAL raster') self.mcf['spatial']['datatype'] = 'grid' self.mcf['spatial']['geomtype'] = 'surface' self.mcf['content_info']['type'] = 'image' diff --git a/tests/test_geometamaker.py b/tests/test_geometamaker.py index 75ea870..00e8fe5 100644 --- a/tests/test_geometamaker.py +++ b/tests/test_geometamaker.py @@ -96,6 +96,13 @@ def tearDown(self): """Override tearDown function to remove temporary directory.""" shutil.rmtree(self.workspace_dir) + def test_file_does_not_exist(self): + """MetadataControl: raises exception if given file does not exist.""" + from geometamaker import MetadataControl + + with self.assertRaises(FileNotFoundError): + _ = MetadataControl('foo.tif') + def test_blank_MetadataControl(self): """MetadataControl: template has expected properties.""" from geometamaker import MetadataControl From 6618b788bfb0aa0a51f7e8768627658852308d70 Mon Sep 17 00:00:00 2001 From: davemfish Date: Wed, 6 Mar 2024 16:26:49 -0500 Subject: [PATCH 2/6] allow writing metadata docs to locations other than the same location as the dataset. #18 --- src/geometamaker/geometamaker.py | 16 +++++++++++++--- tests/test_geometamaker.py | 18 ++++++++++++++++++ 2 files changed, 31 insertions(+), 3 deletions(-) diff --git a/src/geometamaker/geometamaker.py b/src/geometamaker/geometamaker.py index 10fe8ca..916d2a7 100644 --- a/src/geometamaker/geometamaker.py +++ b/src/geometamaker/geometamaker.py @@ -541,7 +541,7 @@ def _write_mcf(self, target_path): with open(target_path, 'w') as file: file.write(yaml.dump(self.mcf, Dumper=_NoAliasDumper)) - def write(self): + def write(self, workspace=None): """Write MCF and ISO-19139 XML to disk. This creates sidecar files with '.yml' and '.xml' extensions @@ -552,12 +552,22 @@ def write(self): - 'myraster.tif.xml' """ + if workspace is None: + target_mcf_path = self.mcf_path + target_xml_path = f'{self.datasource}.xml' + else: + target_mcf_path = os.path.join( + workspace, f'{os.path.basename(self.datasource)}.yml') + target_xml_path = os.path.join( + workspace, f'{os.path.basename(self.datasource)}.xml') + self.mcf['metadata']['datestamp'] = datetime.utcnow().strftime( '%Y-%m-%d') - self._write_mcf(self.mcf_path) + self._write_mcf(target_mcf_path) + schema_obj = load_schema('iso19139') xml_string = schema_obj.write(self.mcf) - with open(f'{self.datasource}.xml', 'w') as xmlfile: + with open(target_xml_path, 'w') as xmlfile: xmlfile.write(xml_string) def validate(self): diff --git a/tests/test_geometamaker.py b/tests/test_geometamaker.py index 00e8fe5..b64bef5 100644 --- a/tests/test_geometamaker.py +++ b/tests/test_geometamaker.py @@ -658,3 +658,21 @@ def test_invalid_preexisting_mcf(self): self.fail( 'unexpected write error occurred\n' f'{e}') + + def test_write_to_local_workspace(self): + """MetadataControl: test write metadata to a different location.""" + from geometamaker import MetadataControl + + datasource_path = os.path.join(self.workspace_dir, 'raster.tif') + create_raster(numpy.int16, datasource_path) + mc = MetadataControl(datasource_path) + + temp_dir = tempfile.mkdtemp(dir=self.workspace_dir) + mc.write(workspace=temp_dir) + + self.assertTrue( + os.path.exists(os.path.join( + temp_dir, f'{os.path.basename(datasource_path)}.yml'))) + self.assertTrue( + os.path.exists(os.path.join( + temp_dir, f'{os.path.basename(datasource_path)}.xml'))) \ No newline at end of file From c06622462aaf7d9f9fa7b750044562e338fb8fef Mon Sep 17 00:00:00 2001 From: davemfish Date: Thu, 7 Mar 2024 08:26:34 -0500 Subject: [PATCH 3/6] bump pygeoprocessing requirement. #18 --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 564356b..526a9f3 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,7 +7,7 @@ httpaio jsonschema numpy pygeometa -pygeoprocessing>=2.4.2 +pygeoprocessing>=2.4.3 pyyaml requests shapely \ No newline at end of file From e21288bc9a4896e1b77b5ba2c9bed0fbf4a06d53 Mon Sep 17 00:00:00 2001 From: davemfish Date: Thu, 7 Mar 2024 08:43:41 -0500 Subject: [PATCH 4/6] edits to docstrings. #18 --- src/geometamaker/geometamaker.py | 11 +++++++++-- tests/test_geometamaker.py | 2 +- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/src/geometamaker/geometamaker.py b/src/geometamaker/geometamaker.py index 916d2a7..8bc6c39 100644 --- a/src/geometamaker/geometamaker.py +++ b/src/geometamaker/geometamaker.py @@ -190,8 +190,8 @@ def __init__(self, source_dataset_path=None): Instantiating without a ``source_dataset_path`` creates an MCF template. Args: - source_dataset_path (string): path to dataset to which the metadata - applies + source_dataset_path (string): path or URL to dataset to which the + metadata applies """ self.mcf = None @@ -551,6 +551,13 @@ def write(self, workspace=None): - 'myraster.tif.yml' - 'myraster.tif.xml' + Args: + workspace (str): if ``None``, files write to the same location + as the source data. If not ``None``, a path to a local directory + to write files. They will still be named to match the source + filename. Use this option if the source data is not on the local + filesystem. + """ if workspace is None: target_mcf_path = self.mcf_path diff --git a/tests/test_geometamaker.py b/tests/test_geometamaker.py index b64bef5..b4cfc6c 100644 --- a/tests/test_geometamaker.py +++ b/tests/test_geometamaker.py @@ -675,4 +675,4 @@ def test_write_to_local_workspace(self): temp_dir, f'{os.path.basename(datasource_path)}.yml'))) self.assertTrue( os.path.exists(os.path.join( - temp_dir, f'{os.path.basename(datasource_path)}.xml'))) \ No newline at end of file + temp_dir, f'{os.path.basename(datasource_path)}.xml'))) From 88630175e441187d97943e7efbd773e1fbc9cdd3 Mon Sep 17 00:00:00 2001 From: davemfish Date: Thu, 7 Mar 2024 09:22:14 -0500 Subject: [PATCH 5/6] incorrect spelling in requirements. #18 --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 526a9f3..0f1e878 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,8 +2,8 @@ # -------------------- # This file records the packages and requirements needed in order for # the library to work as expected. And to run tests. +aiohttp GDAL -httpaio jsonschema numpy pygeometa From 47a797b97fef8bbb9b7c7e17143b85f156d66264 Mon Sep 17 00:00:00 2001 From: davemfish Date: Thu, 7 Mar 2024 09:26:42 -0500 Subject: [PATCH 6/6] missing requirement. #18 --- requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements.txt b/requirements.txt index 0f1e878..a0a431d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,6 +3,7 @@ # This file records the packages and requirements needed in order for # the library to work as expected. And to run tests. aiohttp +fsspec GDAL jsonschema numpy