diff --git a/dags/elsevier/metadata_parser.py b/dags/elsevier/metadata_parser.py index 080888e6..9e590382 100644 --- a/dags/elsevier/metadata_parser.py +++ b/dags/elsevier/metadata_parser.py @@ -31,7 +31,6 @@ def __init__(self, file_path): CustomExtractor( destination="date_published", extraction_function=self._get_published_date, - required=True, ), CustomExtractor( destination="journal_year", @@ -88,14 +87,11 @@ def _get_published_date(self, article): field_name="published_date", dois=self.dois, ) - if not date: - self.published_date = datetime.now().strftime("%Y-%m-%d") - self.year = datetime.now().strftime("%Y") + if date: + date = datetime.fromisoformat(date[:-1]) + self.published_date = date.strftime("%Y-%m-%d") + self.year = date.strftime("%Y") return self.published_date - date = datetime.fromisoformat(date[:-1]) - self.published_date = date.strftime("%Y-%m-%d") - self.year = date.strftime("%Y") - return self.published_date def _get_journal_year(self, article): return self.year diff --git a/tests/units/elsevier/test_metadata_parser.py b/tests/units/elsevier/test_metadata_parser.py index 04f9c617..732d5367 100644 --- a/tests/units/elsevier/test_metadata_parser.py +++ b/tests/units/elsevier/test_metadata_parser.py @@ -75,7 +75,7 @@ def parsed_articles(parser, article): id="test_publication_info", ), param( - ["2023-11-02", "2023-11-02", "2023-02-04", "2023-11-02"], + ["", "", "2023-02-04", ""], "date_published", id="test_published_date", ), @@ -150,7 +150,7 @@ def parsed_articles(parser, article): @freeze_time("2023-11-02") def test_elsevier_dataset_parsing(parsed_articles, expected, key): for (parsed_article, expected_article) in zip(parsed_articles, expected): - assert expected_article == parsed_article[key] + assert expected_article == parsed_article.get(key, "") @fixture