Skip to content

Commit

Permalink
add more reasonable arxiv retriever (#13327)
Browse files Browse the repository at this point in the history
  • Loading branch information
hwchase17 authored Nov 14, 2023
1 parent 4b7a858 commit be85422
Show file tree
Hide file tree
Showing 2 changed files with 43 additions and 1 deletion.
7 changes: 6 additions & 1 deletion libs/langchain/langchain/retrievers/arxiv.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,12 @@ class ArxivRetriever(BaseRetriever, ArxivAPIWrapper):
It uses all ArxivAPIWrapper arguments without any change.
"""

get_full_documents: bool = False

def _get_relevant_documents(
self, query: str, *, run_manager: CallbackManagerForRetrieverRun
) -> List[Document]:
return self.load(query=query)
if self.get_full_documents:
return self.load(query=query)
else:
return self.get_summaries_as_docs(query)
37 changes: 37 additions & 0 deletions libs/langchain/langchain/utilities/arxiv.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,43 @@ def validate_environment(cls, values: Dict) -> Dict:
)
return values

def get_summaries_as_docs(self, query: str) -> List[Document]:
"""
Performs an arxiv search and returns list of
documents, with summaries as the content.
If an error occurs or no documents found, error text
is returned instead. Wrapper for
https://lukasschwab.me/arxiv.py/index.html#Search
Args:
query: a plaintext search query
""" # noqa: E501
try:
if self.is_arxiv_identifier(query):
results = self.arxiv_search(
id_list=query.split(),
max_results=self.top_k_results,
).results()
else:
results = self.arxiv_search( # type: ignore
query[: self.ARXIV_MAX_QUERY_LENGTH], max_results=self.top_k_results
).results()
except self.arxiv_exceptions as ex:
return [Document(page_content=f"Arxiv exception: {ex}")]
docs = [
Document(
page_content=result.summary,
metadata={
"Published": result.updated.date(),
"Title": result.title,
"Authors": ", ".join(a.name for a in result.authors),
},
)
for result in results
]
return docs

def run(self, query: str) -> str:
"""
Performs an arxiv search and A single string
Expand Down

0 comments on commit be85422

Please sign in to comment.