From 4e9a2a0ab30a08ea41478b0a8e49ee1ab791df3c Mon Sep 17 00:00:00 2001 From: ErnestaP Date: Wed, 17 Jul 2024 16:58:00 +0200 Subject: [PATCH] Elsevier Parser: remove date_published simulation * ref: https://github.com/cern-sis/issues-scoap3/issues/338 --- dags/elsevier/metadata_parser.py | 5 ----- tests/units/elsevier/test_metadata_parser.py | 4 ++-- 2 files changed, 2 insertions(+), 7 deletions(-) diff --git a/dags/elsevier/metadata_parser.py b/dags/elsevier/metadata_parser.py index 080888e6..042c293e 100644 --- a/dags/elsevier/metadata_parser.py +++ b/dags/elsevier/metadata_parser.py @@ -31,7 +31,6 @@ def __init__(self, file_path): CustomExtractor( destination="date_published", extraction_function=self._get_published_date, - required=True, ), CustomExtractor( destination="journal_year", @@ -88,10 +87,6 @@ def _get_published_date(self, article): field_name="published_date", dois=self.dois, ) - if not date: - self.published_date = datetime.now().strftime("%Y-%m-%d") - self.year = datetime.now().strftime("%Y") - return self.published_date date = datetime.fromisoformat(date[:-1]) self.published_date = date.strftime("%Y-%m-%d") self.year = date.strftime("%Y") diff --git a/tests/units/elsevier/test_metadata_parser.py b/tests/units/elsevier/test_metadata_parser.py index 04f9c617..732d5367 100644 --- a/tests/units/elsevier/test_metadata_parser.py +++ b/tests/units/elsevier/test_metadata_parser.py @@ -75,7 +75,7 @@ def parsed_articles(parser, article): id="test_publication_info", ), param( - ["2023-11-02", "2023-11-02", "2023-02-04", "2023-11-02"], + ["", "", "2023-02-04", ""], "date_published", id="test_published_date", ), @@ -150,7 +150,7 @@ def parsed_articles(parser, article): @freeze_time("2023-11-02") def test_elsevier_dataset_parsing(parsed_articles, expected, key): for (parsed_article, expected_article) in zip(parsed_articles, expected): - assert expected_article == parsed_article[key] + assert expected_article == parsed_article.get(key, "") @fixture