From d5a2b987fec4930ef608d4fbabd55c69f1435b02 Mon Sep 17 00:00:00 2001 From: ErnestaP Date: Thu, 18 Jul 2024 15:38:02 +0200 Subject: [PATCH] Elsevier Parser: remove date_published simulation * ref: https://github.com/cern-sis/issues-scoap3/issues/338 --- dags/elsevier/metadata_parser.py | 12 ++++-------- tests/units/elsevier/test_metadata_parser.py | 4 ++-- 2 files changed, 6 insertions(+), 10 deletions(-) diff --git a/dags/elsevier/metadata_parser.py b/dags/elsevier/metadata_parser.py index 080888e6..9e590382 100644 --- a/dags/elsevier/metadata_parser.py +++ b/dags/elsevier/metadata_parser.py @@ -31,7 +31,6 @@ def __init__(self, file_path): CustomExtractor( destination="date_published", extraction_function=self._get_published_date, - required=True, ), CustomExtractor( destination="journal_year", @@ -88,14 +87,11 @@ def _get_published_date(self, article): field_name="published_date", dois=self.dois, ) - if not date: - self.published_date = datetime.now().strftime("%Y-%m-%d") - self.year = datetime.now().strftime("%Y") + if date: + date = datetime.fromisoformat(date[:-1]) + self.published_date = date.strftime("%Y-%m-%d") + self.year = date.strftime("%Y") return self.published_date - date = datetime.fromisoformat(date[:-1]) - self.published_date = date.strftime("%Y-%m-%d") - self.year = date.strftime("%Y") - return self.published_date def _get_journal_year(self, article): return self.year diff --git a/tests/units/elsevier/test_metadata_parser.py b/tests/units/elsevier/test_metadata_parser.py index 04f9c617..732d5367 100644 --- a/tests/units/elsevier/test_metadata_parser.py +++ b/tests/units/elsevier/test_metadata_parser.py @@ -75,7 +75,7 @@ def parsed_articles(parser, article): id="test_publication_info", ), param( - ["2023-11-02", "2023-11-02", "2023-02-04", "2023-11-02"], + ["", "", "2023-02-04", ""], "date_published", id="test_published_date", ), @@ -150,7 +150,7 @@ def parsed_articles(parser, article): @freeze_time("2023-11-02") def test_elsevier_dataset_parsing(parsed_articles, expected, key): for (parsed_article, expected_article) in zip(parsed_articles, expected): - assert expected_article == parsed_article[key] + assert expected_article == parsed_article.get(key, "") @fixture