From 01dba805763651b12d917ebfa5374ce72b57a994 Mon Sep 17 00:00:00 2001 From: Mojmir Vinkler Date: Tue, 16 Apr 2024 18:58:10 +0200 Subject: [PATCH] :sparkles: compute checksums from ingredients only (#2514) * :sparkles: compute checksums from ingredients only --- apps/owidbot/etldiff.py | 5 ++++- etl/steps/__init__.py | 10 +++------- .../garden/nasa/2023-03-06/ozone_hole_area.meta.yml | 3 --- .../war/2023-01-18/dunnigan_martel_1987.meta.yml | 1 - .../data/garden/war/2023-01-18/eckhardt_1991.meta.yml | 1 - .../data/garden/war/2023-01-18/kaye_1985.meta.yml | 1 - .../data/garden/war/2023-01-18/sutton_1971.meta.yml | 4 ---- 7 files changed, 7 insertions(+), 18 deletions(-) diff --git a/apps/owidbot/etldiff.py b/apps/owidbot/etldiff.py index a721b43bd0f..504e53c48b6 100644 --- a/apps/owidbot/etldiff.py +++ b/apps/owidbot/etldiff.py @@ -55,6 +55,10 @@ def cli( nbranch = _normalise_branch(branch) if branch else "dry-run" + # TODO: only include site-screenshots if the PR is from owid-grapher. Similarly, don't + # run etl diff if the PR is from etl repo. + # - **Site-screenshots**: https://github.com/owid/site-screenshots/compare/{nbranch} + body = f"""
@@ -63,7 +67,6 @@ def cli( - **Admin**: http://staging-site-{nbranch}/admin/login - **Site**: http://staging-site-{nbranch}/ - **Login**: `ssh owid@staging-site-{nbranch}` -- **Site-screenshots**: https://github.com/owid/site-screenshots/compare/{nbranch}
diff --git a/etl/steps/__init__.py b/etl/steps/__init__.py index 652cd42ac0a..b47f18ea2e8 100644 --- a/etl/steps/__init__.py +++ b/etl/steps/__init__.py @@ -518,7 +518,8 @@ def _output_dataset(self) -> catalog.Dataset: return catalog.Dataset(self._dest_dir.as_posix()) def checksum_output(self) -> str: - return self._output_dataset.checksum() + # output checksum is checksum of all ingredients + return self.checksum_input() def _step_files(self) -> List[str]: "Return a list of code files defining this step." @@ -714,12 +715,7 @@ def has_existing_data(self) -> bool: return True def checksum_output(self) -> str: - # NOTE: we could use the checksum from `_dvc_path` to - # speed this up. Test the performance on - # time poetry run etl run garden --dry-run - # Make sure that the checksum below is the same as DVC checksum! It - # looks like it might be different for some reason - return files.checksum_file(self._dvc_path) + return Snapshot(self.path).m.outs[0]["md5"] @property def _dvc_path(self) -> str: diff --git a/etl/steps/data/garden/nasa/2023-03-06/ozone_hole_area.meta.yml b/etl/steps/data/garden/nasa/2023-03-06/ozone_hole_area.meta.yml index dc0290db97b..7ccf28e173a 100644 --- a/etl/steps/data/garden/nasa/2023-03-06/ozone_hole_area.meta.yml +++ b/etl/steps/data/garden/nasa/2023-03-06/ozone_hole_area.meta.yml @@ -16,9 +16,6 @@ dataset: Minimum and mean Southern Hemisphere daily ozone concentrations, measured in Dobson Units (DU). This dataset should be next updated by the source every year. We will update it on Our World in Data soon after the new version is published. At the link above you can directly access the source page and see the latest available data. - licenses: - - name: # TO BE FILLED. Example: Testing License Name - url: # TO BE FILLED. Example: https://url_of_testing_source.com/license sources: - *source-testing diff --git a/etl/steps/data/garden/war/2023-01-18/dunnigan_martel_1987.meta.yml b/etl/steps/data/garden/war/2023-01-18/dunnigan_martel_1987.meta.yml index 03e7ade1190..d4722538299 100644 --- a/etl/steps/data/garden/war/2023-01-18/dunnigan_martel_1987.meta.yml +++ b/etl/steps/data/garden/war/2023-01-18/dunnigan_martel_1987.meta.yml @@ -17,7 +17,6 @@ dataset: This dataset provides information on military and civilian deaths from wars, drawn from the book by Dunnigan and Martel (1987). licenses: - name: Doubleday (1987) - url: # TO BE FILLED. Example: https://url_of_testing_source.com/license sources: - *source-testing diff --git a/etl/steps/data/garden/war/2023-01-18/eckhardt_1991.meta.yml b/etl/steps/data/garden/war/2023-01-18/eckhardt_1991.meta.yml index 790bc299ceb..7f77a80f561 100644 --- a/etl/steps/data/garden/war/2023-01-18/eckhardt_1991.meta.yml +++ b/etl/steps/data/garden/war/2023-01-18/eckhardt_1991.meta.yml @@ -17,7 +17,6 @@ dataset: This dataset provides information on military and civilian deaths from wars, drawn from the chapter by Eckhardt (1991). licenses: - name: World Priorities - url: # TO BE FILLED. Example: https://url_of_testing_source.com/license sources: - *source-testing diff --git a/etl/steps/data/garden/war/2023-01-18/kaye_1985.meta.yml b/etl/steps/data/garden/war/2023-01-18/kaye_1985.meta.yml index 2f4f3db8fed..3ca1fb6a42a 100644 --- a/etl/steps/data/garden/war/2023-01-18/kaye_1985.meta.yml +++ b/etl/steps/data/garden/war/2023-01-18/kaye_1985.meta.yml @@ -17,7 +17,6 @@ dataset: This dataset provides information on direct and indirect military and civilian deaths from major armed conflicts, drawn from the report by Kaye et al. (1985). licenses: - name: Department of National Defence, Canada, Operational Research and Analysis Establishment, 1985 - url: # TO BE FILLED. Example: https://url_of_testing_source.com/license sources: - *source-testing diff --git a/etl/steps/data/garden/war/2023-01-18/sutton_1971.meta.yml b/etl/steps/data/garden/war/2023-01-18/sutton_1971.meta.yml index 6a12a6987e2..540549a7a8a 100644 --- a/etl/steps/data/garden/war/2023-01-18/sutton_1971.meta.yml +++ b/etl/steps/data/garden/war/2023-01-18/sutton_1971.meta.yml @@ -4,7 +4,6 @@ all_sources: published_by: Sutton, Antony. 1972. Wars and Revolutions in the Nineteenth Century. Hoover Institution Archives. url: https://searchworks.stanford.edu/view/3023823 date_accessed: 2023-01-09 - publication_date: # TO BE FILLED. Example: 2023-01-01 publication_year: 1971 # description: Source description. @@ -15,9 +14,6 @@ dataset: version: 2023-01-18 description: | This dataset provides information on deaths from wars and revolutions, using data from Sutton (1972). - licenses: - - name: Unknown - url: # TO BE FILLED. Example: https://url_of_testing_source.com/license sources: - *source-testing