From 20dd44ed2f650d10738310c9847c8932f919ea39 Mon Sep 17 00:00:00 2001 From: EverVino Date: Wed, 14 Feb 2024 09:28:38 -0400 Subject: [PATCH] fix: solve bug in extracting date from pmc source (#9) --- src/pymedx/article.py | 9 +++++++-- tests/test_pmc.py | 34 +++++++++++++++++++++++++++++++++- 2 files changed, 40 insertions(+), 3 deletions(-) diff --git a/src/pymedx/article.py b/src/pymedx/article.py index 5614acf7..4544289a 100644 --- a/src/pymedx/article.py +++ b/src/pymedx/article.py @@ -285,19 +285,24 @@ def _extractPublicationDate( # Get the publication elements publication_date = xml_element.find(".//pub-date[@pub-type='epub']") - if not publication_date: # Check this part + if publication_date is None: publication_date = xml_element.find(".//pub-date") if publication_date is not None: publication_year = getContent(publication_date, ".//year", None) + if not publication_year or publication_year is None: + return None + publication_month = getContent(publication_date, ".//month", "1") publication_day = getContent(publication_date, ".//day", "1") # Construct a datetime object from the info date_str: str = ( - f"{publication_year}/{publication_month}/{publication_day}" + f"{str(publication_year).strip()}/" + f"{str(publication_month).strip()}/" + f"{str(publication_day).strip()}" ) return datetime.datetime.strptime(date_str, "%Y/%m/%d") diff --git a/tests/test_pmc.py b/tests/test_pmc.py index 0a013658..cc5efa61 100644 --- a/tests/test_pmc.py +++ b/tests/test_pmc.py @@ -1,7 +1,9 @@ """Test PubMedCentral class.""" +import datetime -from pymedx.api import PubMedCentral +from lxml import etree as xml +from pymedx.api import PubMedCentral, PubMedCentralArticle class TestPMC: @@ -21,3 +23,33 @@ def test_query_results(self): assert len(listed) > 0 assert len(listed[0].title) > 0 assert len(listed[0].pmc_id) > 0 + + def test_extracting_date(self): + """Test date extraction.""" + root = xml.Element("root") + date = xml.SubElement(root, "pub-date") + xml.SubElement(date, "year").text = "\n2024" + xml.SubElement(date, "month").text = "2\n" + xml.SubElement(date, "day").text = "9" + + test_collector = PubMedCentralArticle() + + result = test_collector._extractPublicationDate(root) + expected = datetime.datetime.strptime("2024/2/9", "%Y/%m/%d") + + assert result == expected + + def test_extracting_date_None(self): + """Test date extraction.""" + root = xml.Element("root") + date = xml.SubElement(root, "pub-date") + xml.SubElement(date, "year").text = "" + xml.SubElement(date, "month").text = "2\n" + xml.SubElement(date, "day").text = "9" + + test_collector = PubMedCentralArticle() + + result = test_collector._extractPublicationDate(root) + expected = None + + assert result == expected