From e0186efe904232aac0c6f7973d7dbd6bfad162c0 Mon Sep 17 00:00:00 2001 From: Sam Greenbury Date: Wed, 26 Jun 2024 13:14:51 +0100 Subject: [PATCH 01/10] Update GeometryMetadata with country ID for filename_stem --- python/popgetter/assets/be/census_geometry.py | 1 + python/popgetter/assets/ni/__init__.py | 1 + python/popgetter/metadata.py | 6 +++++- 3 files changed, 7 insertions(+), 1 deletion(-) diff --git a/python/popgetter/assets/be/census_geometry.py b/python/popgetter/assets/be/census_geometry.py index c07cf94..51fecf0 100644 --- a/python/popgetter/assets/be/census_geometry.py +++ b/python/popgetter/assets/be/census_geometry.py @@ -115,6 +115,7 @@ def geometry(context, sector_geometries) -> list[GeometryOutput]: for level_details in BELGIUM_GEOMETRY_LEVELS.values(): geometry_metadata = GeometryMetadata( + country_id=asset_prefix, validity_period_start=date(2023, 1, 1), validity_period_end=date(2023, 12, 31), level=level_details.level, diff --git a/python/popgetter/assets/ni/__init__.py b/python/popgetter/assets/ni/__init__.py index 06c3715..c13ca40 100644 --- a/python/popgetter/assets/ni/__init__.py +++ b/python/popgetter/assets/ni/__init__.py @@ -394,6 +394,7 @@ def _geometry(self, context) -> list[GeometryOutput]: for level_details in NI_GEO_LEVELS.values(): # TODO: get correct values geometry_metadata = GeometryMetadata( + country_id=self.key_prefix, validity_period_start=CENSUS_COLLECTION_DATE, validity_period_end=CENSUS_COLLECTION_DATE, level=level_details.level, diff --git a/python/popgetter/metadata.py b/python/popgetter/metadata.py index a49032a..679f461 100644 --- a/python/popgetter/metadata.py +++ b/python/popgetter/metadata.py @@ -122,7 +122,11 @@ def id(self) -> str: def filename_stem(self) -> str: level = "_".join(self.level.lower().split()) year = self.validity_period_start.year - return f"{level}_{year}" + return f"{self.country_id}/geometries/{level}_{year}" + + country_id: str = Field( + "The country ID that is required to generate a complete `filename_stem`." + ) validity_period_start: date = Field( description="The start of the range of time for which the regions are valid (inclusive)" From 50578129b6dbe911214573b89a22d95fe630718c Mon Sep 17 00:00:00 2001 From: Sam Greenbury Date: Wed, 26 Jun 2024 13:24:49 +0100 Subject: [PATCH 02/10] Update metrics parquet_file_name --- python/popgetter/assets/be/census_derived.py | 5 ++++- python/popgetter/assets/ni/__init__.py | 3 ++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/python/popgetter/assets/be/census_derived.py b/python/popgetter/assets/be/census_derived.py index 3aec6c9..2f4986d 100644 --- a/python/popgetter/assets/be/census_derived.py +++ b/python/popgetter/assets/be/census_derived.py @@ -275,7 +275,10 @@ def derived_metrics_by_partition( derived_metrics: list[pd.DataFrame] = [] derived_mmd: list[MetricMetadata] = [] - parquet_file_name = "".join(c for c in node if c.isalnum()) + ".parquet" + parquet_file_name = ( + f"{asset_prefix}/metrics/" + f"{''.join(c for c in node if c.isalnum()) + '.parquet'}" + ) for metric_spec in metric_specs: new_table = ( diff --git a/python/popgetter/assets/ni/__init__.py b/python/popgetter/assets/ni/__init__.py index c13ca40..fab9ad4 100644 --- a/python/popgetter/assets/ni/__init__.py +++ b/python/popgetter/assets/ni/__init__.py @@ -539,7 +539,8 @@ def _derived_metrics( ) parquet_file_name = ( - "".join(c for c in partition_key if c.isalnum()) + ".parquet" + f"{self.key_prefix}/metrics/" + f"{''.join(c for c in partition_key if c.isalnum()) + '.parquet'}" ) derived_metrics: list[pd.DataFrame] = [] derived_mmd: list[MetricMetadata] = [] From 50b2dc962e30b5812b19a00306f33e09cc482780 Mon Sep 17 00:00:00 2001 From: Sam Greenbury Date: Wed, 26 Jun 2024 13:40:46 +0100 Subject: [PATCH 03/10] Rename modules and asset prefixes --- python/popgetter/assets/{be => bel}/__init__.py | 0 python/popgetter/assets/{be => bel}/belgium.py | 2 +- python/popgetter/assets/{be => bel}/census_derived.py | 0 python/popgetter/assets/{be => bel}/census_geometry.py | 0 python/popgetter/assets/{be => bel}/census_tables.py | 0 python/popgetter/assets/{ni => gb-nir}/README.md | 0 python/popgetter/assets/{ni => gb-nir}/__init__.py | 2 +- 7 files changed, 2 insertions(+), 2 deletions(-) rename python/popgetter/assets/{be => bel}/__init__.py (100%) rename python/popgetter/assets/{be => bel}/belgium.py (93%) rename python/popgetter/assets/{be => bel}/census_derived.py (100%) rename python/popgetter/assets/{be => bel}/census_geometry.py (100%) rename python/popgetter/assets/{be => bel}/census_tables.py (100%) rename python/popgetter/assets/{ni => gb-nir}/README.md (100%) rename python/popgetter/assets/{ni => gb-nir}/__init__.py (99%) diff --git a/python/popgetter/assets/be/__init__.py b/python/popgetter/assets/bel/__init__.py similarity index 100% rename from python/popgetter/assets/be/__init__.py rename to python/popgetter/assets/bel/__init__.py diff --git a/python/popgetter/assets/be/belgium.py b/python/popgetter/assets/bel/belgium.py similarity index 93% rename from python/popgetter/assets/be/belgium.py rename to python/popgetter/assets/bel/belgium.py index 45403af..d3d0f89 100644 --- a/python/popgetter/assets/be/belgium.py +++ b/python/popgetter/assets/bel/belgium.py @@ -13,4 +13,4 @@ ) WORKING_DIR = Path("belgium") -asset_prefix = "be" +asset_prefix = "bel" diff --git a/python/popgetter/assets/be/census_derived.py b/python/popgetter/assets/bel/census_derived.py similarity index 100% rename from python/popgetter/assets/be/census_derived.py rename to python/popgetter/assets/bel/census_derived.py diff --git a/python/popgetter/assets/be/census_geometry.py b/python/popgetter/assets/bel/census_geometry.py similarity index 100% rename from python/popgetter/assets/be/census_geometry.py rename to python/popgetter/assets/bel/census_geometry.py diff --git a/python/popgetter/assets/be/census_tables.py b/python/popgetter/assets/bel/census_tables.py similarity index 100% rename from python/popgetter/assets/be/census_tables.py rename to python/popgetter/assets/bel/census_tables.py diff --git a/python/popgetter/assets/ni/README.md b/python/popgetter/assets/gb-nir/README.md similarity index 100% rename from python/popgetter/assets/ni/README.md rename to python/popgetter/assets/gb-nir/README.md diff --git a/python/popgetter/assets/ni/__init__.py b/python/popgetter/assets/gb-nir/__init__.py similarity index 99% rename from python/popgetter/assets/ni/__init__.py rename to python/popgetter/assets/gb-nir/__init__.py index fab9ad4..dd5972a 100644 --- a/python/popgetter/assets/ni/__init__.py +++ b/python/popgetter/assets/gb-nir/__init__.py @@ -241,7 +241,7 @@ def census_table_metadata( class NorthernIreland(Country): - key_prefix: ClassVar[str] = "uk-ni" + key_prefix: ClassVar[str] = "gb-nir" geo_levels: ClassVar[list[str]] = list(NI_GEO_LEVELS.keys()) tables_to_process: list[str] | None = TABLES_TO_PROCESS From 5129d8c18f0f188c811cafbb0f8368fd2dcae59e Mon Sep 17 00:00:00 2001 From: Sam Greenbury Date: Wed, 26 Jun 2024 15:29:38 +0100 Subject: [PATCH 04/10] Rename modules with ISO codes, update GeometryMetadata to have CountryMetadata --- python/popgetter/assets/__init__.py | 4 ++-- python/popgetter/assets/bel/census_geometry.py | 4 ++-- python/popgetter/assets/country.py | 4 +++- .../assets/{gb-nir => gb_nir}/README.md | 0 .../assets/{gb-nir => gb_nir}/__init__.py | 18 +++++++++--------- python/popgetter/metadata.py | 10 +++++----- 6 files changed, 21 insertions(+), 19 deletions(-) rename python/popgetter/assets/{gb-nir => gb_nir}/README.md (100%) rename python/popgetter/assets/{gb-nir => gb_nir}/__init__.py (98%) diff --git a/python/popgetter/assets/__init__.py b/python/popgetter/assets/__init__.py index 55e91dc..1cb7eb6 100644 --- a/python/popgetter/assets/__init__.py +++ b/python/popgetter/assets/__init__.py @@ -1,7 +1,7 @@ from __future__ import annotations -from . import be, ni, uk, us +from . import bel, gb_nir, uk, us -countries = [(mod, mod.__name__.split(".")[-1]) for mod in [be, ni, uk, us]] +countries = [(mod, mod.__name__.split(".")[-1]) for mod in [bel, gb_nir, uk, us]] __all__ = ["countries"] diff --git a/python/popgetter/assets/bel/census_geometry.py b/python/popgetter/assets/bel/census_geometry.py index 51fecf0..a0c22e2 100644 --- a/python/popgetter/assets/bel/census_geometry.py +++ b/python/popgetter/assets/bel/census_geometry.py @@ -23,7 +23,7 @@ ) from popgetter.utils import markdown_from_plot -from .belgium import asset_prefix +from .belgium import asset_prefix, country from .census_tables import publisher @@ -115,7 +115,7 @@ def geometry(context, sector_geometries) -> list[GeometryOutput]: for level_details in BELGIUM_GEOMETRY_LEVELS.values(): geometry_metadata = GeometryMetadata( - country_id=asset_prefix, + country_metadata=country, validity_period_start=date(2023, 1, 1), validity_period_end=date(2023, 12, 31), level=level_details.level, diff --git a/python/popgetter/assets/country.py b/python/popgetter/assets/country.py index 448d3a9..745dd10 100644 --- a/python/popgetter/assets/country.py +++ b/python/popgetter/assets/country.py @@ -38,11 +38,13 @@ class Country(ABC): """ - key_prefix: ClassVar[str] + country_metadata: ClassVar[CountryMetadata] + key_prefix: str partition_name: str dataset_node_partition: DynamicPartitionsDefinition def __init__(self): + self.key_prefix = self.country_metadata.id self.partition_name = f"{self.key_prefix}_nodes" self.dataset_node_partition = DynamicPartitionsDefinition( name=self.partition_name diff --git a/python/popgetter/assets/gb-nir/README.md b/python/popgetter/assets/gb_nir/README.md similarity index 100% rename from python/popgetter/assets/gb-nir/README.md rename to python/popgetter/assets/gb_nir/README.md diff --git a/python/popgetter/assets/gb-nir/__init__.py b/python/popgetter/assets/gb_nir/__init__.py similarity index 98% rename from python/popgetter/assets/gb-nir/__init__.py rename to python/popgetter/assets/gb_nir/__init__.py index dd5972a..c4685e9 100644 --- a/python/popgetter/assets/gb-nir/__init__.py +++ b/python/popgetter/assets/gb_nir/__init__.py @@ -241,18 +241,18 @@ def census_table_metadata( class NorthernIreland(Country): - key_prefix: ClassVar[str] = "gb-nir" + country_metadata: CountryMetadata = CountryMetadata( + name_short_en="Northern Ireland", + name_official="Northern Ireland", + iso3="GBR", + iso2="GB", + iso3166_2="GB-NIR", + ) geo_levels: ClassVar[list[str]] = list(NI_GEO_LEVELS.keys()) tables_to_process: list[str] | None = TABLES_TO_PROCESS def _country_metadata(self, _context) -> CountryMetadata: - return CountryMetadata( - name_short_en="Northern Ireland", - name_official="Northern Ireland", - iso3="GBR", - iso2="GB", - iso3166_2="GB-NIR", - ) + return self.country_metadata def _data_publisher( self, _context, country_metadata: CountryMetadata @@ -394,7 +394,7 @@ def _geometry(self, context) -> list[GeometryOutput]: for level_details in NI_GEO_LEVELS.values(): # TODO: get correct values geometry_metadata = GeometryMetadata( - country_id=self.key_prefix, + country_metadata=self.country_metadata, validity_period_start=CENSUS_COLLECTION_DATE, validity_period_end=CENSUS_COLLECTION_DATE, level=level_details.level, diff --git a/python/popgetter/metadata.py b/python/popgetter/metadata.py index 679f461..4e7a074 100644 --- a/python/popgetter/metadata.py +++ b/python/popgetter/metadata.py @@ -67,8 +67,8 @@ class CountryMetadata(MetadataBaseModel): @property def id(self) -> str: if self.iso3166_2 is not None: - return self.iso3166_2.lower() - return self.iso3.lower() + return self.iso3166_2.lower().replace("-", "_") + return self.iso3.lower().replace("-", "_") name_short_en: str = Field( description="The short name of the country in English (for example 'Belgium')." @@ -122,10 +122,10 @@ def id(self) -> str: def filename_stem(self) -> str: level = "_".join(self.level.lower().split()) year = self.validity_period_start.year - return f"{self.country_id}/geometries/{level}_{year}" + return f"{self.country_metadata.id}/geometries/{level}_{year}" - country_id: str = Field( - "The country ID that is required to generate a complete `filename_stem`." + country_metadata: CountryMetadata = Field( + "The `CountryMetadata` associated with the geometry.", exclude=True ) validity_period_start: date = Field( From c9d2215c2c07e171cdd0fc1d2b50542adff9dc46 Mon Sep 17 00:00:00 2001 From: Sam Greenbury Date: Wed, 26 Jun 2024 16:28:01 +0100 Subject: [PATCH 05/10] Update source_data_releases and ensure serializable vars --- python/popgetter/metadata.py | 27 ++++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/python/popgetter/metadata.py b/python/popgetter/metadata.py index 4e7a074..bd92716 100644 --- a/python/popgetter/metadata.py +++ b/python/popgetter/metadata.py @@ -19,15 +19,28 @@ def hash_class_vars(self): Note that `vars()` does not include properties, so the IDs themselves are not part of the hash, which avoids self-reference issues. """ + # Must copy the dict to avoid overriding the actual instance attributes! # Because we're only modifying dates -> strings, we don't need to perform a - # deepcopy - variables = dict(**vars(self)) - # Python doesn't serialise dates to JSON, have to convert to ISO 8601 first - for key, val in variables.items(): - if isinstance(val, date): - variables[key] = val.isoformat() - return sha256(jcs.canonicalize(variables)).hexdigest() + # deepcopy but all variables must be serializable + def serializable_vars(obj: object) -> dict: + variables = {} + # Check if variables are serializable + for key, val in vars(obj).items(): + try: + jcs.canonicalize(val) + variables[key] = val + except Exception: + pass + + # Python doesn't serialise dates to JSON, have to convert to ISO 8601 first + for key, val in variables.items(): + if isinstance(val, date): + variables[key] = val.isoformat() + + return variables + + return sha256(jcs.canonicalize(serializable_vars(self))).hexdigest() @classmethod def fix_types(cls, df: pd.DataFrame) -> pd.DataFrame: From bdeb74809c09859593cc372785413149025dd170 Mon Sep 17 00:00:00 2001 From: Sam Greenbury Date: Wed, 26 Jun 2024 20:20:35 +0100 Subject: [PATCH 06/10] Remove directories from geometries and metrics IO manager --- python/popgetter/io_managers/__init__.py | 28 ++++++------------------ 1 file changed, 7 insertions(+), 21 deletions(-) diff --git a/python/popgetter/io_managers/__init__.py b/python/popgetter/io_managers/__init__.py index 3a524e7..d393499 100644 --- a/python/popgetter/io_managers/__init__.py +++ b/python/popgetter/io_managers/__init__.py @@ -132,27 +132,15 @@ class GeometryOutputPaths: def get_full_paths_geoms( self, - context: OutputContext, geo_metadata: GeometryMetadata, ) -> GeometryOutputPaths: filename_stem = geo_metadata.filename_stem - asset_prefix = list(context.partition_key.split("/"))[:-1] # e.g. ['be'] base_path = self.get_base_path() return self.GeometryOutputPaths( - flatgeobuf=base_path - / UPath("/".join([*asset_prefix, "geometries", f"{filename_stem}.fgb"])), - pmtiles=base_path - / UPath( - "/".join([*asset_prefix, "geometries", f"TODO_{filename_stem}.pmtiles"]) - ), - geojsonseq=base_path - / UPath( - "/".join([*asset_prefix, "geometries", f"{filename_stem}.geojsonseq"]) - ), - names=base_path - / UPath( - "/".join([*asset_prefix, "geometries", f"{filename_stem}.parquet"]) - ), + flatgeobuf=base_path / UPath(f"{filename_stem}.fgb"), + pmtiles=base_path / UPath(f"TODO_{filename_stem}.pmtiles"), + geojsonseq=base_path / UPath(f"{filename_stem}.geojsonseq"), + names=base_path / UPath(f"{filename_stem}.parquet"), ) def get_full_path_metadata( @@ -217,7 +205,7 @@ def handle_output( output.gdf["GEO_ID"] = output.gdf["GEO_ID"].astype("string") output.names_df = output.names_df.astype("string") - full_paths = self.get_full_paths_geoms(context, output.metadata) + full_paths = self.get_full_paths_geoms(output.metadata) self.handle_flatgeobuf(context, output.gdf, full_paths.flatgeobuf) self.handle_geojsonseq(context, output.gdf, full_paths.geojsonseq) @@ -253,12 +241,10 @@ def get_full_path_metadata( def get_full_path_metrics( self, - context: OutputContext, parquet_path: str, ) -> UPath: base_path = self.get_base_path() - asset_prefix = list(context.partition_key.split("/"))[:-1] - return base_path / UPath("/".join([*asset_prefix, "metrics", parquet_path])) + return base_path / UPath(parquet_path) def handle_output( self, @@ -329,7 +315,7 @@ def handle_output( # of the tuple for metrics_output in obj: rel_path = metrics_output.metadata[0].metric_parquet_path - full_path = self.get_full_path_metrics(context, rel_path) + full_path = self.get_full_path_metrics(rel_path) self.handle_df(context, metrics_output.metrics, full_path) # Add metadata From 9ad9a6622a891b95121e090ca1b16365aa2cdb9d Mon Sep 17 00:00:00 2001 From: Sam Greenbury Date: Thu, 27 Jun 2024 09:06:51 +0100 Subject: [PATCH 07/10] Fix test module --- tests/test_be.py | 20 ++++++++++---------- tests/test_metadata.py | 2 +- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/tests/test_be.py b/tests/test_be.py index fae975b..b08a6da 100644 --- a/tests/test_be.py +++ b/tests/test_be.py @@ -12,7 +12,7 @@ from rdflib import Graph from rdflib.namespace import DCAT -from popgetter.assets import be +from popgetter.assets import bel @pytest.fixture(scope="module") @@ -36,7 +36,7 @@ def demo_catalog() -> Graph: @pytest.fixture(scope="module") def demo_catalog_df(demo_catalog) -> pd.DataFrame: context = build_asset_context() - return be.census_tables.catalog_as_dataframe(context, demo_catalog) + return bel.census_tables.catalog_as_dataframe(context, demo_catalog) @pytest.mark.skip( @@ -46,7 +46,7 @@ def test_aggregate_sectors_to_municipalities(demo_sectors): # Test the that the row count is correctly added to the metadata context = build_asset_context() - actual_municipalities = be.census_geometry.aggregate_sectors_to_municipalities( + actual_municipalities = bel.census_geometry.aggregate_sectors_to_municipalities( context, demo_sectors ) @@ -62,7 +62,7 @@ def test_aggregate_sectors_to_municipalities(demo_sectors): @pytest.mark.skip(reason="Fix test_get_population_details_per_municipality first") def test_get_population_details_per_municipality(): with build_asset_context() as muni_context: - stat_muni = be.census_tables.get_population_details_per_municipality( + stat_muni = bel.census_tables.get_population_details_per_municipality( muni_context ) @@ -87,7 +87,7 @@ def test_pivot_population(): ) # Get the geometries - stat_muni = be.census_tables.get_population_details_per_municipality( + stat_muni = bel.census_tables.get_population_details_per_municipality( muni_context ) @@ -99,7 +99,7 @@ def test_pivot_population(): with build_asset_context() as pivot_context: # Pivot the population - pivoted = be.pivot_population(pivot_context, stat_muni) + pivoted = bel.pivot_population(pivot_context, stat_muni) expected_number_of_municipalities = 581 @@ -115,7 +115,7 @@ def test_demo_catalog(demo_catalog): actual_length = len( list( demo_catalog.objects( - subject=be.census_tables.opendata_catalog_root, + subject=bel.census_tables.opendata_catalog_root, predicate=DCAT.dataset, unique=False, ) @@ -128,7 +128,7 @@ def test_demo_catalog(demo_catalog): def test_catalog_metadata_details(demo_catalog_df): # Get the metadata for a specific dataset in the demo catalogue: # https://statbel.fgov.be/node/4151 "Population by Statistical sector" - # mmd = be.census_tables.get_mmd_from_dataset_node( + # mmd = bel.census_tables.get_mmd_from_dataset_node( # demo_catalog, dataset_node=URIRef("https://statbel.fgov.be/node/4151") # ) @@ -179,7 +179,7 @@ def test_catalog_as_dataframe(demo_catalog_df): # # Convert the demo catalog to a DataFrame # with build_asset_context() as context: - # catalog_df = be.census_tables.catalog_as_dataframe(context, demo_catalog_df) + # catalog_df = bel.census_tables.catalog_as_dataframe(context, demo_catalog_df) # # Check that the catalog has been converted to a DataFrame # assert isinstance(catalog_df, pd.DataFrame) @@ -228,7 +228,7 @@ def test_filter_known_failing_datasets(): "2676", ] - actual_list = be.census_tables.filter_known_failing_datasets(mock_catalog) + actual_list = bel.census_tables.filter_known_failing_datasets(mock_catalog) assert mock_catalog != expected_list assert actual_list != mock_catalog diff --git a/tests/test_metadata.py b/tests/test_metadata.py index bc272b2..502f186 100644 --- a/tests/test_metadata.py +++ b/tests/test_metadata.py @@ -57,7 +57,7 @@ def test_source_data_release_hash(): ) assert ( source_data_release.id - == "9ec7e234d73664339e4c1f04bfa485dbb17e204dd72dc3ffbb9cab6870475597" + == "4d61bfe401ba17becd02d6b3912152c135daa9ecaebc9bd45a589dc831a85217" ) source_data_release2 = SourceDataRelease( From 250e6acac72a030678878b1504cdbe3cc0fdb378 Mon Sep 17 00:00:00 2001 From: Sam Greenbury Date: Thu, 27 Jun 2024 11:24:00 +0100 Subject: [PATCH 08/10] Rename test_be module as test_bel --- tests/{test_be.py => test_bel.py} | 0 tests/test_cloud_outputs.py | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) rename tests/{test_be.py => test_bel.py} (100%) diff --git a/tests/test_be.py b/tests/test_bel.py similarity index 100% rename from tests/test_be.py rename to tests/test_bel.py diff --git a/tests/test_cloud_outputs.py b/tests/test_cloud_outputs.py index 7e65cfd..73df5d2 100644 --- a/tests/test_cloud_outputs.py +++ b/tests/test_cloud_outputs.py @@ -9,7 +9,7 @@ # ) # generate_pmtiles, # TODO, Move this to a fixture to somewhere more universal -from .test_be import demo_sectors # noqa: F401 +from .test_bel import demo_sectors # noqa: F401 # Commented out test as part of #92 as functions no longer importable # @pytest.mark.skip( From 9d3c758980af1f1a7af2e4632cd39945cb9490cd Mon Sep 17 00:00:00 2001 From: Sam Greenbury Date: Mon, 1 Jul 2024 09:15:09 +0100 Subject: [PATCH 09/10] Rename variable filename_stem to filepath_stem, add todo --- python/popgetter/io_managers/__init__.py | 10 +++++----- python/popgetter/metadata.py | 1 + 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/python/popgetter/io_managers/__init__.py b/python/popgetter/io_managers/__init__.py index d393499..611f313 100644 --- a/python/popgetter/io_managers/__init__.py +++ b/python/popgetter/io_managers/__init__.py @@ -134,13 +134,13 @@ def get_full_paths_geoms( self, geo_metadata: GeometryMetadata, ) -> GeometryOutputPaths: - filename_stem = geo_metadata.filename_stem + filepath_stem = geo_metadata.filename_stem base_path = self.get_base_path() return self.GeometryOutputPaths( - flatgeobuf=base_path / UPath(f"{filename_stem}.fgb"), - pmtiles=base_path / UPath(f"TODO_{filename_stem}.pmtiles"), - geojsonseq=base_path / UPath(f"{filename_stem}.geojsonseq"), - names=base_path / UPath(f"{filename_stem}.parquet"), + flatgeobuf=base_path / UPath(f"{filepath_stem}.fgb"), + pmtiles=base_path / UPath(f"TODO_{filepath_stem}.pmtiles"), + geojsonseq=base_path / UPath(f"{filepath_stem}.geojsonseq"), + names=base_path / UPath(f"{filepath_stem}.parquet"), ) def get_full_path_metadata( diff --git a/python/popgetter/metadata.py b/python/popgetter/metadata.py index bd92716..16107d9 100644 --- a/python/popgetter/metadata.py +++ b/python/popgetter/metadata.py @@ -132,6 +132,7 @@ def id(self) -> str: @computed_field @property + # TODO: update metadata field name to `filepath_stem` (https://github.com/Urban-Analytics-Technology-Platform/popgetter/issues/129) def filename_stem(self) -> str: level = "_".join(self.level.lower().split()) year = self.validity_period_start.year From 7921e9099b800e46568124f7cd861f85307417b8 Mon Sep 17 00:00:00 2001 From: Sam Greenbury Date: Mon, 1 Jul 2024 09:17:01 +0100 Subject: [PATCH 10/10] Add _country_metadata default implementation --- python/popgetter/assets/country.py | 5 ++--- python/popgetter/assets/gb_nir/__init__.py | 3 --- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/python/popgetter/assets/country.py b/python/popgetter/assets/country.py index 745dd10..4349c97 100644 --- a/python/popgetter/assets/country.py +++ b/python/popgetter/assets/country.py @@ -87,9 +87,8 @@ def country_metadata(context): return country_metadata - @abstractmethod - def _country_metadata(self, context) -> CountryMetadata: - ... + def _country_metadata(self, _context) -> CountryMetadata: + return self.country_metadata def create_data_publisher(self): """Creates an asset providing the data publisher metadata.""" diff --git a/python/popgetter/assets/gb_nir/__init__.py b/python/popgetter/assets/gb_nir/__init__.py index c4685e9..740538b 100644 --- a/python/popgetter/assets/gb_nir/__init__.py +++ b/python/popgetter/assets/gb_nir/__init__.py @@ -251,9 +251,6 @@ class NorthernIreland(Country): geo_levels: ClassVar[list[str]] = list(NI_GEO_LEVELS.keys()) tables_to_process: list[str] | None = TABLES_TO_PROCESS - def _country_metadata(self, _context) -> CountryMetadata: - return self.country_metadata - def _data_publisher( self, _context, country_metadata: CountryMetadata ) -> DataPublisher: