Skip to content

Commit

Permalink
fix: solve bug in extracting date from pmc source (#9)
Browse files Browse the repository at this point in the history
  • Loading branch information
EverVino authored Feb 14, 2024
1 parent 4472cb6 commit 20dd44e
Show file tree
Hide file tree
Showing 2 changed files with 40 additions and 3 deletions.
9 changes: 7 additions & 2 deletions src/pymedx/article.py
Original file line number Diff line number Diff line change
Expand Up @@ -285,19 +285,24 @@ def _extractPublicationDate(
# Get the publication elements
publication_date = xml_element.find(".//pub-date[@pub-type='epub']")

if not publication_date: # Check this part
if publication_date is None:
publication_date = xml_element.find(".//pub-date")

if publication_date is not None:
publication_year = getContent(publication_date, ".//year", None)

if not publication_year or publication_year is None:
return None

publication_month = getContent(publication_date, ".//month", "1")

publication_day = getContent(publication_date, ".//day", "1")

# Construct a datetime object from the info
date_str: str = (
f"{publication_year}/{publication_month}/{publication_day}"
f"{str(publication_year).strip()}/"
f"{str(publication_month).strip()}/"
f"{str(publication_day).strip()}"
)

return datetime.datetime.strptime(date_str, "%Y/%m/%d")
Expand Down
34 changes: 33 additions & 1 deletion tests/test_pmc.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
"""Test PubMedCentral class."""

import datetime

from pymedx.api import PubMedCentral
from lxml import etree as xml
from pymedx.api import PubMedCentral, PubMedCentralArticle


class TestPMC:
Expand All @@ -21,3 +23,33 @@ def test_query_results(self):
assert len(listed) > 0
assert len(listed[0].title) > 0
assert len(listed[0].pmc_id) > 0

def test_extracting_date(self):
"""Test date extraction."""
root = xml.Element("root")
date = xml.SubElement(root, "pub-date")
xml.SubElement(date, "year").text = "\n2024"
xml.SubElement(date, "month").text = "2\n"
xml.SubElement(date, "day").text = "9"

test_collector = PubMedCentralArticle()

result = test_collector._extractPublicationDate(root)
expected = datetime.datetime.strptime("2024/2/9", "%Y/%m/%d")

assert result == expected

def test_extracting_date_None(self):
"""Test date extraction."""
root = xml.Element("root")
date = xml.SubElement(root, "pub-date")
xml.SubElement(date, "year").text = ""
xml.SubElement(date, "month").text = "2\n"
xml.SubElement(date, "day").text = "9"

test_collector = PubMedCentralArticle()

result = test_collector._extractPublicationDate(root)
expected = None

assert result == expected

0 comments on commit 20dd44e

Please sign in to comment.