diff --git a/camel/toolkits/__init__.py b/camel/toolkits/__init__.py index 30909f9f4d..af7e00ff47 100644 --- a/camel/toolkits/__init__.py +++ b/camel/toolkits/__init__.py @@ -45,6 +45,7 @@ from .stripe_toolkit import StripeToolkit from .video_toolkit import VideoDownloaderToolkit from .dappier_toolkit import DappierToolkit +from .semanticscholar_toolkit import SemanticScholarToolkit __all__ = [ 'BaseToolkit', @@ -77,4 +78,5 @@ 'MeshyToolkit', 'OpenBBToolkit', 'DappierToolkit', + 'SemanticScholarToolkit', ] diff --git a/camel/toolkits/semanticscholar_toolkit.py b/camel/toolkits/semanticscholar_toolkit.py new file mode 100644 index 0000000000..627f9393a2 --- /dev/null +++ b/camel/toolkits/semanticscholar_toolkit.py @@ -0,0 +1,265 @@ +# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. ========= +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. ========= + +import json +from typing import List + +import requests + +from camel.toolkits import FunctionTool +from camel.toolkits.base import BaseToolkit + + +class SemanticScholarToolkit(BaseToolkit): + """A toolkit for interacting with the Semantic Scholar + API to fetch paper and author data.""" + + def __init__(self): + """Initializes the SemanticScholarToolkit.""" + self.base_url = "https://api.semanticscholar.org/graph/v1" + + def fetch_paper_data_title( + self, + paperTitle: str, + fields: str = """title,abstract,authors,year,citationCount, + publicationTypes,publicationDate,openAccessPdf""", + ) -> dict: + r"""Fetches a SINGLE paper from the Semantic Scholar + API based on a paper title. + + Args: + paperTitle (str): The title of the paper to fetch. + fields (str): A comma-separated list of fields to include + in the response (default includes title, abstract, authors, year, + citation count, publicationTypes,publicationDate,openAccessPdf). + + Returns: + dict: The response data from the API or error + information if the request fails. + """ + url = f"{self.base_url}/paper/search" + query_params = {"query": paperTitle, "fields": fields} + try: + response = requests.get(url, params=query_params) + response.raise_for_status() + return response.json() + except requests.exceptions.RequestException as e: + return { + "error": f"Request failed: {e!s}", + "message": str(e), + } + except ValueError: + return { + "error": "Response is not valid JSON", + "message": response.text, + } + + def fetch_paper_data_id( + self, + paperID: str, + fields: str = """title,abstract,authors,year,citationCount, + publicationTypes,publicationDate,openAccessPdf""", + ) -> dict: + r"""Fetches a SINGLE paper from the Semantic Scholar + API based on a paper ID. + + Args: + paperID (str): The ID of the paper to fetch. + fields (str): A comma-separated list of fields to + include in the response (default includes title, abstract, + authors, year, citation count, publicationTypes, + publicationDate, openAccessPdf). + + Returns: + dict: The response data from the API or error information + if the request fails. + """ + url = f"{self.base_url}/paper/{paperID}" + query_params = {"fields": fields} + try: + response = requests.get(url, params=query_params) + response.raise_for_status() + return response.json() + except requests.exceptions.RequestException as e: + return { + "error": f"Request failed: {e!s}", + "message": str(e), + } + except ValueError: + return { + "error": "Response is not valid JSON", + "message": response.text, + } + + def fetch_bulk_paper_data( + self, + query: str, + year: str = "2023-", + fields: str = """title,url,publicationTypes, + publicationDate,openAccessPdf""", + ) -> dict: + r"""Fetches MULTIPLE papers at once from the Semantic Scholar + API based on a related topic. + Args: + query (str): + The text query to match against the paper's title + and abstract. + For example, you can use the following operators + and techniques to construct your query: + Example 1: + ((cloud computing) | virtualization) + +security -privacy This will match papers + whose title or abstract contains "cloud" + and "computing", or contains the word + "virtualization". The papers must also + include the term "security" but exclude + papers that contain the word "privacy". + year (str): The year filter for papers (default is "2023-"). + fields (str): The fields to include in the response + (e.g., 'title,url,publicationTypes,publicationDate, + openAccessPdf'). + Returns: + dict: The response data from the API or + error information if the request fails. + """ + url = f"{self.base_url}/paper/search/bulk" + query_params = {"query": query, "fields": fields, "year": year} + try: + response = requests.get(url, params=query_params) + response.raise_for_status() + return response.json() + except requests.exceptions.RequestException as e: + return { + "error": f"Request failed: {e!s}", + "message": str(e), + } + except ValueError: + return { + "error": "Response is not valid JSON", + "message": response.text, + } + + def fetch_recommended_papers( + self, + positive_paper_ids: List[str], + negative_paper_ids: List[str], + fields: str = """title,url,citationCount,authors, + publicationTypes,publicationDate,openAccessPdf""", + limit: int = 500, + save_to_file: bool = False, + ) -> dict: + r"""Fetches recommended papers from the Semantic Scholar + API based on the positive and negative paper IDs. + + Args: + positive_paper_ids (list): A list of paper IDs (as strings) + that are positively correlated to the recommendation. + + negative_paper_ids (list): A list of paper IDs (as strings) + that are negatively correlated to the recommendation. + + fields (str): The fields to include in the response + (e.g., 'title,url,citationCount,authors,publicationTypes, + publicationDate,openAccessPdf'). + + limit (int): The maximum number of recommended papers to return. + Default is 500. + + save_to_file (bool): If True, saves the response data to a file + (default is False). + + Returns: + dict: A dictionary containing recommended papers sorted by + citation count. + """ + url = "https://api.semanticscholar.org/recommendations/v1/papers" + query_params = {"fields": fields, "limit": str(limit)} + data = { + "positivePaperIds": positive_paper_ids, + "negativePaperIds": negative_paper_ids, + } + try: + response = requests.post(url, params=query_params, json=data) + response.raise_for_status() + papers = response.json() + if save_to_file: + with open('recommended_papers.json', 'w') as output: + json.dump(papers, output) + return papers + except requests.exceptions.RequestException as e: + return {"error": str(e)} + except ValueError: + return { + "error": "Response is not valid JSON", + "message": response.text, + } + + def fetch_author_data( + self, + ids: List[str], + fields: str = "name,url,paperCount,hIndex,papers", + save_to_file: bool = False, + ) -> dict: + r"""Fetches author information from the Semantic Scholar + API based on author IDs. + + Args: + ids (list): A list of author IDs (as strings) to fetch + data for. + + fields (str): A comma-separated list of fields to include + in the response (default includes name, URL, paper count, + hIndex, and papers). + + save_to_file (bool): If True, saves the response data to + a file (default is False). + + Returns: + dict: The response data from the API or error information if + the request fails. + """ + url = f"{self.base_url}/author/batch" + query_params = {"fields": fields} + data = {"ids": ids} + try: + response = requests.post(url, params=query_params, json=data) + response.raise_for_status() + response_data = response.json() + if save_to_file: + with open('author_information.json', 'w') as output: + json.dump(response_data, output) + return response_data + except requests.exceptions.RequestException as e: + return {"error": str(e)} + except ValueError: + return { + "error": "Response is not valid JSON", + "message": response.text, + } + + def get_tools(self) -> List[FunctionTool]: + r"""Returns a list of FunctionTool objects representing the + functions in the toolkit. + + Returns: + List[FunctionTool]: A list of FunctionTool objects + representing the functions in the toolkit. + """ + return [ + FunctionTool(self.fetch_paper_data_title), + FunctionTool(self.fetch_paper_data_id), + FunctionTool(self.fetch_bulk_paper_data), + FunctionTool(self.fetch_recommended_papers), + FunctionTool(self.fetch_author_data), + ] diff --git a/examples/toolkits/semanticscholar_toolkit.py b/examples/toolkits/semanticscholar_toolkit.py new file mode 100644 index 0000000000..94df729827 --- /dev/null +++ b/examples/toolkits/semanticscholar_toolkit.py @@ -0,0 +1,353 @@ +# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. ========= +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. ========= +from camel.agents import ChatAgent +from camel.configs import ChatGPTConfig +from camel.messages import BaseMessage +from camel.models import ModelFactory +from camel.toolkits import SemanticScholarToolkit +from camel.types import ModelPlatformType, ModelType + +# Define the model, here in this case we use gpt-4o +model = ModelFactory.create( + model_platform=ModelPlatformType.DEFAULT, + model_type=ModelType.DEFAULT, + model_config_dict=ChatGPTConfig(temperature=0.0).as_dict(), +) + + +sys_msg = BaseMessage.make_assistant_message( + role_name='Tools calling operator', content='You are a helpful assistant' +) + + +# Initialize a toolkit +toolkit = SemanticScholarToolkit() +# Get list of tools +tools = toolkit.get_tools() + +# Initialize a ChatAgent with your custom tools +camel_agent = ChatAgent( + system_message=sys_msg, + model=model, + tools=tools, +) + +camel_agent.reset() + + +# Description of the added tools +usr_msg = BaseMessage.make_user_message( + role_name="CAMEL User", content="""Describe the tools you've added""" +) + +response = camel_agent.step(usr_msg) +print(response.msgs[0].content) + +''' +================================================================ +1. **fetch_paper_data_title**: This tool fetches a single paper + based on its title. You can specify which fields to include in + the response, such as the abstract, authors, year, citation + count, and more. + +2. **fetch_paper_data_id**: Similar to the previous tool, + this one retrieves a single paper but uses a paper ID instead + of the title. It also allows for specifying the fields to + include in the response. + +3. **fetch_bulk_paper_data**: This tool allows you to fetch + multiple papers at once based on a query that can include + various operators (like AND, OR, NOT). You can filter by + year and specify which fields to return. + +4. **fetch_recommended_papers**: This tool provides + recommendations for papers based on a list of positively + and negatively correlated paper IDs. You can specify the + fields to include in the response and limit the number + of papers returned. + +5. **fetch_author_data**: This tool retrieves information + about authors based on their IDs. You can specify which + fields to include in the response, such as the author's name, + URL, paper count, h-index, and their papers. + +These tools can be used individually or in combination to + gather comprehensive information about academic literature + and authors. +================================================================ +''' + +# Search a paper through its id +usr_msg = BaseMessage.make_user_message( + role_name="CAMEL User", + content="""search the paper 'Construction of the Literature + Graph in Semantic Scholar' for me including its paperid""", +) + +response = camel_agent.step(usr_msg) +print(str(response.info['tool_calls'])[:1000]) + +''' +================================================================ +[FunctionCallingRecord(func_name='fetch_paper_data_title', +args={'paperTitle': 'Construction of the Literature Graph in +Semantic Scholar', 'fields': 'title,abstract,authors,year, +citationCount,paperId'}, result={'total': 1, 'offset': 0, +'data': [{'paperId': '649def34f8be52c8b66281af98ae884c09aef38b', +'title': 'Construction of the Literature Graph in Semantic + Scholar', 'abstract': 'We describe a deployed scalable system +for organizing published scientific literature into a +heterogeneous graph to facilitate algorithmic manipulation and +discovery. The resulting literature graph consists of more than + 280M nodes, representing papers, authors, entities and various + interactions between them (e.g., authorships, citations, + entity mentions). We reduce literature graph construction into + familiar NLP tasks (e.g., entity extraction and linking), + point out research challenges due to differences from standard + formulations of these tasks, and report empirical results for + each task. The methods describe +================================================================ +''' + +# Search a paper through its title +usr_msg = BaseMessage.make_user_message( + role_name="CAMEL User", + content="""search the paper with paper id of + '649def34f8be52c8b66281af98ae884c09aef38b' for me""", +) + +response = camel_agent.step(usr_msg) +print(str(response.info['tool_calls'])[:1000]) + +''' +================================================================ +[FunctionCallingRecord(func_name='fetch_paper_data_id', args= +{'paperID': '649def34f8be52c8b66281af98ae884c09aef38b', +'fields': 'title,abstract,authors,year,citationCount, +publicationTypes,publicationDate,openAccessPdf'}, +result={'paperId': '649def34f8be52c8b66281af98ae884c09aef38b', +'title': 'Construction of the Literature Graph in Semantic + Scholar', 'abstract': 'We describe a deployed scalable system + for organizing published scientific literature into a + heterogeneous graph to facilitate algorithmic manipulation + and discovery. The resulting literature graph consists of + more than 280M nodes, representing papers, authors, entities + and various interactions between them (e.g., authorships, + citations, entity mentions). We reduce literature graph + construction into familiar NLP tasks (e.g., entity extraction + and linking), point out research challenges due to differences + from standard formulations of these tasks, and report + empirical results for each task. The methods described + in this paper ar +================================================================ +''' + +# Search papers thourgh related topic +usr_msg = BaseMessage.make_user_message( + role_name="CAMEL User", + content="""search 10 papers with topic related to + 'generative ai' from 2024 for me""", +) + +response = camel_agent.step(usr_msg) +print(str(response.info['tool_calls'])[:1000]) + +''' +================================================================ +[FunctionCallingRecord(func_name='fetch_bulk_paper_data', +args={'query': 'generative ai', 'year': '2024-', 'fields': +'title,url,publicationTypes,publicationDate,openAccessPdf'}, +result={'total': 9849, 'token': 'PCOA3RZZB2ADADAEYCX2BLJJRDEGL +PUCFA3I5XJAKEAB3YXPGDOTY2GU3WHI4ZMALUMAPUDPHP724CEUVEFKTYRZY5K +LUU53Y5MWWEINIKYZZRC3YT3H4AF7CTSQ', 'data': [{'paperId': +'0008cd09c0449451b9e6e6de35c29009f0883cd9', 'url': 'https://www +.semanticscholar.org/paper/0008cd09c0449451b9e6e6de35c29009 +f0883cd9', 'title': 'A Chitchat on Using ChatGPT for Cheating', + 'openAccessPdf': {'url': 'https://doi.org/10.34074/proc.240106' + , 'status': 'BRONZE'}, 'publicationTypes': ['Conference'], + 'publicationDate': '2024-07-24'}, {'paperId': '0013aecf813400 + 174158e4f012918c5408f90962', 'url': 'https://www.semanticsc + holar.org/paper/0013aecf813400174158e4f012918c5408f90962', + 'title': 'Can novice teachers detect AI-generated texts in EFL + writing?', 'openAccessPdf': None, 'publicationTypes': + ['JournalArticle'], 'publicationDate' +================================================================ +''' + +# Search papers through realted topic and operator +usr_msg = BaseMessage.make_user_message( + role_name="CAMEL User", + content="""search 20 papers with topic related to + 'ai and bio' from 2024 for me""", +) + +response = camel_agent.step(usr_msg) +print(str(response.info['tool_calls'])[:1000]) + +''' +================================================================ +[FunctionCallingRecord(func_name='fetch_bulk_paper_data', +args={'query': 'ai and bio', 'year': '2024-', 'fields': 'title, +url,publicationTypes,publicationDate,openAccessPdf'}, result= +{'total': 207, 'token': None, 'data': [{'paperId': '00c8477a9c +c28b85e4f6da13d2a889c94a955291', 'url': 'https://www.semantics +cholar.org/paper/00c8477a9cc28b85e4f6da13d2a889c94a955291', +'title': 'Explaining Enterprise Knowledge Graphs with Large + Language Models and Ontological Reasoning', 'openAccessPdf': + None, 'publicationTypes': ['JournalArticle'], 'publicationDate + ': None}, {'paperId': '01726fbfc8ee716c82b9c4cd70696906d3a4 + 46d0', 'url': 'https://www.semanticscholar.org/paper/01726fbfc + 8ee716c82b9c4cd70696906d3a446d0', 'title': 'Study Research + Protocol for Phenome India-CSIR Health Cohort Knowledgebase + (PI-CHeCK): A Prospective multi-modal follow-up study on a + nationwide employee cohort.', 'openAccessPdf': {'url': + 'https://www.medrxiv.org/content/medrxiv/early/2024/10/19/2024 + .10.17.24315252.full.pdf', 'status' +================================================================ +''' + +# Recommand papers thorugh positive and negative paper id +usr_msg = BaseMessage.make_user_message( + role_name="CAMEL User", + content="""recommend 20 papers with positive paper id + of "02138d6d094d1e7511c157f0b1a3dd4e5b20ebee", + "018f58247a20ec6b3256fd3119f57980a6f37748" and negative + paper id of "0045ad0c1e14a4d1f4b011c92eb36b8df63d65bc" + for me""", +) + +response = camel_agent.step(usr_msg) +print(str(response.info['tool_calls'])[:1000]) + +''' +================================================================ +[FunctionCallingRecord(func_name='fetch_recommended_papers', +args={'positive_paper_ids': ['02138d6d094d1e7511c157f0b1a3dd4e +5b20ebee', '018f58247a20ec6b3256fd3119f57980a6f37748'], 'negati +ve_paper_ids': ['0045ad0c1e14a4d1f4b011c92eb36b8df63d65bc'], +'fields': 'title,url,citationCount,authors,publicationTypes, +publicationDate,openAccessPdf', 'limit': 20, 'save_to_file': F +alse}, result={'recommendedPapers': [{'paperId': '9cb202a72171 +dc954f8180b42e08da7ab31e16a1', 'url': 'https://www.semanticsc +holar.org/paper/9cb202a72171dc954f8180b42e08da7ab31e16a1', 'tit +le': 'Embrace, Don't Avoid: Reimagining Higher Education with + Generative Artificial Intelligence', 'citationCount': 0, 'op + enAccessPdf': {'url': 'https://heca-analitika.com/jeml/arti + cle/download/233/157', 'status': 'HYBRID'}, 'publicationT + ypes': ['JournalArticle'], 'publicationDate': '2024-11-2 + 8', 'authors': [{'authorId': '1659371967', 'name': 'T. R. N + oviandy'}, {'authorId': '1657989613', 'name': 'A. Maulan + a'}, {'authorId': '146805414', 'name +================================================================ +''' + +# Recommend papers and save the result in a file +usr_msg = BaseMessage.make_user_message( + role_name="CAMEL User", + content="""search the authors of author ids of "2281351310", + "2281342663","2300302076","2300141520" for me""", +) + +response = camel_agent.step(usr_msg) +print(str(response.info['tool_calls'])[:1000]) + +''' +================================================================ +[FunctionCallingRecord(func_name='fetch_recommended_papers', +args={'positive_paper_ids': ['02138d6d094d1e7511c157f0b1a3dd4e5 +b20ebee', '018f58247a20ec6b3256fd3119f57980a6f37748'], 'negativ +e_paper_ids': ['0045ad0c1e14a4d1f4b011c92eb36b8df63d65bc'], + 'fields': 'title,url,citationCount,authors,publicationTypes, + publicationDate,openAccessPdf', 'limit': 20, 'save_to_file': T + rue}, result={'recommendedPapers': [{'paperId': '9cb202a7217 + 1dc954f8180b42e08da7ab31e16a1', 'url': 'https://www.semantics + cholar.org/paper/9cb202a72171dc954f8180b42e08da7ab31e16a1', + 'title': 'Embrace, Don't Avoid: Reimagining Higher Education + with Generative Artificial Intelligence', 'citationCount': + 0, 'openAccessPdf': {'url': 'https://heca-analitika.com/jeml + /article/download/233/157', 'status': 'HYBRID'}, 'publication + Types': ['JournalArticle'], 'publicationDate': '2024-11-28', + 'authors': [{'authorId': '1659371967', 'name': 'T. R. Novia + ndy'}, {'authorId': '1657989613', 'name': 'A. Maulana'}, + {'authorId': '146805414', 'name' +================================================================ +''' + +# Search author information through author id +usr_msg = BaseMessage.make_user_message( + role_name="CAMEL User", + content="""recommend 20 papers with positive paper id + of "02138d6d094d1e7511c157f0b1a3dd4e5b20ebee", "018f5 + 8247a20ec6b3256fd3119f57980a6f37748" and negative paper + id of "0045ad0c1e14a4d1f4b011c92eb36b8df63d65bc" for me, + and please save the result in a file.""", +) + +response = camel_agent.step(usr_msg) +print(str(response.info['tool_calls'])[:1000]) + +''' +================================================================ +[FunctionCallingRecord(func_name='fetch_recommended_papers', +args={'positive_paper_ids': ['02138d6d094d1e7511c157f0b1a3dd4e5 +b20ebee', '018f58247a20ec6b3256fd3119f57980a6f37748'], 'negat +ive_paper_ids': ['0045ad0c1e14a4d1f4b011c92eb36b8df63d65bc'], + 'fields': 'title,url,citationCount,authors,publicationTypes + ,publicationDate,openAccessPdf', 'limit': 20, 'save_to_file + ': True}, result={'recommendedPapers': [{'paperId': '9cb20 + 2a72171dc954f8180b42e08da7ab31e16a1', 'url': 'https://www.se + manticscholar.org/paper/9cb202a72171dc954f8180b42e08da7ab31e + 16a1', 'title': 'Embrace, Don't Avoid: Reimagining Higher + Education with Generative Artificial Intelligence', 'citat + ionCount': 0, 'openAccessPdf': {'url': 'https://heca-anali + tika.com/jeml/article/download/233/157', 'status': 'HYBR + ID'}, 'publicationTypes': ['JournalArticle'], 'publicatio + nDate': '2024-11-28', 'authors': [{'authorId': '165937196 + 7', 'name': 'T. R. Noviandy'}, {'authorId': '1657989613', + 'name': 'A. Maulana'}, {'authorId': '146805414', 'name' +================================================================ +''' + +# Search author information and save the result in a file +usr_msg = BaseMessage.make_user_message( + role_name="CAMEL User", + content="""search the authors of author ids of "2281351310" + ,"2281342663","2300302076","2300141520" for me, and please + save the record in a file.""", +) + +response = camel_agent.step(usr_msg) +print(str(response.info['tool_calls'])[:1000]) + +''' +================================================================ +[FunctionCallingRecord(func_name='fetch_author_data', args= +{'ids': ['2281351310', '2281342663', '2300302076', '230014152 +0'], 'fields': 'name,url,paperCount,hIndex,papers', 'save_to_ +file': True}, result=[{'authorId': '2281351310', 'url': 'ht +tps://www.semanticscholar.org/author/2281351310', 'name': 'Tho +mas K. F. Chiu', 'paperCount': 3, 'hIndex': 1, 'papers': [{'p +aperId': '218b2e3d3418edff705336a6e0c7f2125be7c562', 'title': N +one}, {'paperId': '630642b7040a0c396967e4dab93cf73094fa4f8f +', 'title': None}, {'paperId': '833ff07d2d1be9be7b12e88487d5631 +c141a2e95', 'title': None}]}, {'authorId': '2281342663', 'ur +l': 'https://www.semanticscholar.org/author/2281342663', 'nam +e': 'C. Chai', 'paperCount': 6, 'hIndex': 2, 'papers': [{'pape +rId': '0c70ca68c0239895b0d36abf7f11302cdcf01855', 'title': Non +e}, {'paperId': '218b2e3d3418edff705336a6e0c7f2125be7c562', 't +itle': None}, {'paperId': '7ce699e1cfb81cecf298df6be8eaac8f50 +2e0fcc', 'title': None}, {'paperId': '4521b51a8465e69d20a3ae4 +b770cf164a180f67b', 'ti +================================================================ +''' diff --git a/test/toolkits/test_semantic_functions.py b/test/toolkits/test_semantic_functions.py new file mode 100644 index 0000000000..8af11c5efc --- /dev/null +++ b/test/toolkits/test_semantic_functions.py @@ -0,0 +1,276 @@ +# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. ========= +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. ========= +import unittest +from unittest.mock import MagicMock, patch + +from camel.toolkits.semanticscholar_toolkit import SemanticScholarToolkit + + +class TestSemanticScholarToolkit(unittest.TestCase): + def setUp(self): + """ + Executed before each test, initialize the toolkit instance. + """ + self.toolkit = SemanticScholarToolkit() + + @patch("requests.get") + def test_fetch_paper_data_title_success(self, mock_get): + """ + Test fetch_paper_data_title returning 200 successfully. + """ + mock_response_data = {"data": "some paper data"} + # Configure the mock to return the response object + mock_response = MagicMock() + mock_response.status_code = 200 + mock_response.json.return_value = mock_response_data + mock_get.return_value = mock_response + + paper_title = "A Test Paper" + response = self.toolkit.fetch_paper_data_title(paper_title) + + # Verify the call details of requests.get + mock_get.assert_called_once() + called_url = mock_get.call_args[0][0] # The first argument is the URL + called_params = mock_get.call_args[1]["params"] + self.assertIn("paper/search", called_url) + self.assertEqual(called_params["query"], paper_title) + + # Verify the returned result + self.assertEqual(response, mock_response_data) + + @patch("requests.get") + def test_fetch_paper_data_title_error(self, mock_get): + """ + Test fetch_paper_data_title returning a non-200 status. + """ + mock_response = MagicMock() + mock_response.status_code = 404 + mock_response.text = "Not Found" + mock_response.json.return_value = { + "error": "Request failed with status code 404", + "message": "Not Found", + } + mock_get.return_value = mock_response + + paper_title = "Nonexistent Paper" + response = self.toolkit.fetch_paper_data_title(paper_title) + + self.assertIn("error", response) + self.assertIn("Request failed", response["error"]) + self.assertEqual(response["message"], "Not Found") + + @patch("requests.get") + def test_fetch_paper_data_id_success(self, mock_get): + """ + Test fetch_paper_data_id returning 200 successfully. + """ + mock_response_data = {"title": "Paper Title by ID"} + mock_response = MagicMock() + mock_response.status_code = 200 + mock_response.json.return_value = mock_response_data + mock_get.return_value = mock_response + + paper_id = "abcdef123456" + response = self.toolkit.fetch_paper_data_id(paper_id) + mock_get.assert_called_once() + self.assertEqual(response, mock_response_data) + + # Get the URL used in the call for confirmation + called_url = mock_get.call_args[0][0] + self.assertIn(paper_id, called_url) + + @patch("requests.get") + def test_fetch_paper_data_id_failure(self, mock_get): + """ + Test fetch_paper_data_id returning a non-200 status. + """ + mock_response = MagicMock() + mock_response.status_code = 500 + mock_response.text = "Internal Server Error" + mock_response.json.return_value = { + "error": "Request failed with status code 500", + "message": "Internal Server Error", + } + mock_get.return_value = mock_response + + paper_id = "xyz789" + response = self.toolkit.fetch_paper_data_id(paper_id) + + self.assertIn("error", response) + self.assertIn("500", response["error"]) + self.assertEqual(response["message"], "Internal Server Error") + + @patch("requests.get") + def test_fetch_bulk_paper_data_success(self, mock_get): + """ + Test fetch_bulk_paper_data returning 200 successfully. + """ + mock_response_data = {"data": ["paper1", "paper2"]} + mock_response = MagicMock() + mock_response.status_code = 200 + mock_response.json.return_value = mock_response_data + mock_get.return_value = mock_response + + query_str = "cloud computing" + response = self.toolkit.fetch_bulk_paper_data(query_str) + + mock_get.assert_called_once() + self.assertEqual(response, mock_response_data) + + # Check the parameters + called_url = mock_get.call_args[0][0] + called_params = mock_get.call_args[1]["params"] + self.assertIn("bulk", called_url) + self.assertEqual(called_params["query"], query_str) + + @patch("requests.get") + def test_fetch_bulk_paper_data_failure(self, mock_get): + """ + Test fetch_bulk_paper_data returning a non-200 status. + """ + mock_response = MagicMock() + mock_response.status_code = 403 + mock_response.text = "Forbidden" + mock_response.json.return_value = { + "error": "Request failed with status code 403", + "message": "Forbidden", + } + mock_get.return_value = mock_response + + query_str = "quantum computing" + response = self.toolkit.fetch_bulk_paper_data(query_str) + + self.assertIn("error", response) + self.assertIn("403", response["error"]) + self.assertEqual(response["message"], "Forbidden") + + @patch("requests.post") + def test_fetch_recommended_papers_success(self, mock_post): + """ + Test fetch_recommended_papers returning 200 successfully. + """ + mock_response_data = {"papers": [{"id": "123"}, {"id": "456"}]} + mock_response = MagicMock() + mock_response.status_code = 200 + mock_response.json.return_value = mock_response_data + mock_post.return_value = mock_response + + pos_ids = ["111", "222"] + neg_ids = ["333"] + result = self.toolkit.fetch_recommended_papers( + positive_paper_ids=pos_ids, + negative_paper_ids=neg_ids, + save_to_file=False, + ) + + mock_post.assert_called_once() + # Validate the request body, URL, and parameters + called_url = mock_post.call_args[0][0] + self.assertIn("recommendations/v1/papers", called_url) + + called_json = mock_post.call_args[1]["json"] + self.assertEqual(called_json["positivePaperIds"], pos_ids) + self.assertEqual(called_json["negativePaperIds"], neg_ids) + + self.assertEqual(result, mock_response_data) + + @patch("requests.post") + def test_fetch_recommended_papers_failure(self, mock_post): + """ + Test fetch_recommended_papers returning a non-200 status. + """ + mock_response = MagicMock() + mock_response.status_code = 400 + mock_response.text = "Bad Request" + mock_response.json.return_value = { + "error": "Request failed with status code 400", + "message": "Bad Request", + } + mock_post.return_value = mock_response + + result = self.toolkit.fetch_recommended_papers( + positive_paper_ids=["p1"], negative_paper_ids=["p2"] + ) + + self.assertIn("error", result) + self.assertIn("400", result["error"]) + + @patch("requests.post") + def test_fetch_author_data_success(self, mock_post): + """ + Test fetch_author_data returning 200 successfully. + """ + mock_response_data = { + "data": [ + {"authorId": "A1", "name": "Author One"}, + {"authorId": "A2", "name": "Author Two"}, + ] + } + mock_response = MagicMock() + mock_response.status_code = 200 + mock_response.json.return_value = mock_response_data + mock_post.return_value = mock_response + + author_ids = ["A1", "A2"] + result = self.toolkit.fetch_author_data(author_ids, save_to_file=False) + mock_post.assert_called_once() + self.assertEqual(result, mock_response_data) + + # Check if JSON body includes the correct IDs + called_json = mock_post.call_args[1]["json"] + self.assertEqual(called_json["ids"], author_ids) + + @patch("requests.post") + def test_fetch_author_data_failure(self, mock_post): + """ + Test fetch_author_data returning a non-200 status. + """ + mock_response = MagicMock() + mock_response.status_code = 404 + mock_response.text = "Not Found" + mock_response.json.return_value = { + "error": "Request failed with status code 404", + "message": "Not Found", + } + + # Let raise_for_status throw an HTTPError to trigger the except branch + from requests.exceptions import HTTPError + + mock_response.raise_for_status.side_effect = HTTPError( + "404 Client Error" + ) + + mock_post.return_value = mock_response + result = self.toolkit.fetch_author_data(["A999"]) + + self.assertIn("error", result) + self.assertIn("404 Client Error", result["error"]) + + def test_get_tools(self): + """ + Test whether get_tools returns the correct number of tool functions + and references. + """ + tools = self.toolkit.get_tools() + self.assertEqual(len(tools), 5) + # Simply assert whether the callable of each tool matches our methods + self.assertEqual(tools[0].func, self.toolkit.fetch_paper_data_title) + self.assertEqual(tools[1].func, self.toolkit.fetch_paper_data_id) + self.assertEqual(tools[2].func, self.toolkit.fetch_bulk_paper_data) + self.assertEqual(tools[3].func, self.toolkit.fetch_recommended_papers) + self.assertEqual(tools[4].func, self.toolkit.fetch_author_data) + + +if __name__ == "__main__": + unittest.main()