diff --git a/libs/langchain/langchain/utilities/pubmed.py b/libs/langchain/langchain/utilities/pubmed.py index 2bb26b7a4176e..9400d2082a5a0 100644 --- a/libs/langchain/langchain/utilities/pubmed.py +++ b/libs/langchain/langchain/utilities/pubmed.py @@ -158,15 +158,30 @@ def retrieve_article(self, uid: str, webenv: str) -> dict: return self._parse_article(uid, text_dict) def _parse_article(self, uid: str, text_dict: dict) -> dict: - ar = text_dict["PubmedArticleSet"]["PubmedArticle"]["MedlineCitation"][ - "Article" - ] - summary = "\n".join( - [ - f"{txt['@Label']}: {txt['#text']}" - for txt in ar.get("Abstract", {}).get("AbstractText", []) - if "#text" in txt and "@Label" in txt + try: + ar = text_dict["PubmedArticleSet"]["PubmedArticle"]["MedlineCitation"][ + "Article" ] + except KeyError: + ar = text_dict["PubmedArticleSet"]["PubmedBookArticle"]["BookDocument"] + abstract_text = ar.get("Abstract", {}).get("AbstractText", []) + summaries = [ + f"{txt['@Label']}: {txt['#text']}" + for txt in abstract_text + if "#text" in txt and "@Label" in txt + ] + summary = ( + "\n".join(summaries) + if summaries + else ( + abstract_text + if isinstance(abstract_text, str) + else ( + "\n".join(str(value) for value in abstract_text.values()) + if isinstance(abstract_text, dict) + else "No abstract available" + ) + ) ) a_d = ar.get("ArticleDate", {}) pub_date = "-".join( diff --git a/libs/langchain/tests/integration_tests/utilities/test_pubmed.py b/libs/langchain/tests/integration_tests/utilities/test_pubmed.py index cda90258d3fa8..d015cb06fd98f 100644 --- a/libs/langchain/tests/integration_tests/utilities/test_pubmed.py +++ b/libs/langchain/tests/integration_tests/utilities/test_pubmed.py @@ -20,8 +20,16 @@ def api_client() -> PubMedAPIWrapper: def test_run_success(api_client: PubMedAPIWrapper) -> None: """Test that returns the correct answer""" - output = api_client.run("chatgpt") - assert "Performance of ChatGPT on the Situational Judgement Test-A" in output + search_string = ( + "Examining the Validity of ChatGPT in Identifying " + "Relevant Nephrology Literature" + ) + output = api_client.run(search_string) + test_string = ( + "Examining the Validity of ChatGPT in Identifying " + "Relevant Nephrology Literature: Findings and Implications" + ) + assert test_string in output assert len(output) == api_client.doc_content_chars_max @@ -32,6 +40,53 @@ def test_run_returns_no_result(api_client: PubMedAPIWrapper) -> None: assert "No good PubMed Result was found" == output +def test_retrieve_article_returns_book_abstract(api_client: PubMedAPIWrapper) -> None: + """Test that returns the excerpt of a book.""" + + output_nolabel = api_client.retrieve_article("25905357", "") + output_withlabel = api_client.retrieve_article("29262144", "") + test_string_nolabel = ( + "Osteoporosis is a multifactorial disorder associated with low bone mass and " + "enhanced skeletal fragility. Although" + ) + assert test_string_nolabel in output_nolabel["Summary"] + assert ( + "Wallenberg syndrome was first described in 1808 by Gaspard Vieusseux. However," + in output_withlabel["Summary"] + ) + + +def test_retrieve_article_returns_article_abstract( + api_client: PubMedAPIWrapper, +) -> None: + """Test that returns the abstract of an article.""" + + output_nolabel = api_client.retrieve_article("37666905", "") + output_withlabel = api_client.retrieve_article("37666551", "") + test_string_nolabel = ( + "This work aims to: (1) Provide maximal hand force data on six different " + "grasp types for healthy subjects; (2) detect grasp types with maximal " + "force significantly affected by hand osteoarthritis (HOA) in women; (3) " + "look for predictors to detect HOA from the maximal forces using discriminant " + "analyses." + ) + assert test_string_nolabel in output_nolabel["Summary"] + test_string_withlabel = ( + "OBJECTIVES: To assess across seven hospitals from six different countries " + "the extent to which the COVID-19 pandemic affected the volumes of orthopaedic " + "hospital admissions and patient outcomes for non-COVID-19 patients admitted " + "for orthopaedic care." + ) + assert test_string_withlabel in output_withlabel["Summary"] + + +def test_retrieve_article_no_abstract_available(api_client: PubMedAPIWrapper) -> None: + """Test that returns 'No abstract available'.""" + + output = api_client.retrieve_article("10766884", "") + assert "No abstract available" == output["Summary"] + + def assert_docs(docs: List[Document]) -> None: for doc in docs: assert doc.metadata @@ -87,8 +142,16 @@ def _load_pubmed_from_universal_entry(**kwargs: Any) -> BaseTool: def test_load_pupmed_from_universal_entry() -> None: pubmed_tool = _load_pubmed_from_universal_entry() - output = pubmed_tool("chatgpt") - assert "Performance of ChatGPT on the Situational Judgement Test-A" in output + search_string = ( + "Examining the Validity of ChatGPT in Identifying " + "Relevant Nephrology Literature" + ) + output = pubmed_tool(search_string) + test_string = ( + "Examining the Validity of ChatGPT in Identifying " + "Relevant Nephrology Literature: Findings and Implications" + ) + assert test_string in output def test_load_pupmed_from_universal_entry_with_params() -> None: