From aa00b55b7a7a8a22d2b457e91449a26ee1610474 Mon Sep 17 00:00:00 2001 From: Ansh-info Date: Sat, 8 Feb 2025 17:37:37 +0100 Subject: [PATCH 01/17] chores: removed as will be moving to hydra --- .../talk2scholars/config/__init__.py | 5 - .../talk2scholars/config/config.py | 110 ------------------ 2 files changed, 115 deletions(-) delete mode 100644 aiagents4pharma/talk2scholars/config/__init__.py delete mode 100644 aiagents4pharma/talk2scholars/config/config.py diff --git a/aiagents4pharma/talk2scholars/config/__init__.py b/aiagents4pharma/talk2scholars/config/__init__.py deleted file mode 100644 index 6d2e9231..00000000 --- a/aiagents4pharma/talk2scholars/config/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -""" -This package contains configuration settings and prompts used by various AI agents -""" - -from . import config diff --git a/aiagents4pharma/talk2scholars/config/config.py b/aiagents4pharma/talk2scholars/config/config.py deleted file mode 100644 index aa0ae493..00000000 --- a/aiagents4pharma/talk2scholars/config/config.py +++ /dev/null @@ -1,110 +0,0 @@ -"""Configuration module for AI agents handling paper searches and recommendations.""" - - -# pylint: disable=R0903 -class Config: - """Configuration class containing prompts for AI agents. - - This class stores prompt templates used by various AI agents in the system, - particularly for academic paper searches and recommendations. - """ - - MAIN_AGENT_PROMPT = ( - "You are a supervisory AI agent that routes user queries to specialized tools.\n" - "Your task is to select the most appropriate tool based on the user's request.\n\n" - "Available tools and their capabilities:\n\n" - "1. semantic_scholar_agent:\n" - " - Search for academic papers and research\n" - " - Get paper recommendations\n" - " - Find similar papers\n" - " USE FOR: Any queries about finding papers, academic research, " - "or getting paper recommendations\n\n" - "ROUTING GUIDELINES:\n\n" - "ALWAYS route to semantic_scholar_agent for:\n" - "- Finding academic papers\n" - "- Searching research topics\n" - "- Getting paper recommendations\n" - "- Finding similar papers\n" - "- Any query about academic literature\n\n" - "Approach:\n" - "1. Identify the core need in the user's query\n" - "2. Select the most appropriate tool based on the guidelines above\n" - "3. If unclear, ask for clarification\n" - "4. For multi-step tasks, focus on the immediate next step\n\n" - "Remember:\n" - "- Be decisive in your tool selection\n" - "- Focus on the immediate task\n" - "- Default to semantic_scholar_agent for any paper-finding tasks\n" - "- Ask for clarification if the request is ambiguous\n\n" - "When presenting paper search results, always use this exact format:\n\n" - "Remember to:\n" - "- Always remember to add the url\n" - "- Put URLs on the title line itself as markdown\n" - "- Maintain consistent spacing and formatting" - ) - - S2_AGENT_PROMPT = ( - "You are a specialized academic research assistant with access to the following tools:\n\n" - "1. search_papers:\n" - " USE FOR: General paper searches\n" - " - Enhances search terms automatically\n" - " - Adds relevant academic keywords\n" - " - Focuses on recent research when appropriate\n\n" - "2. get_single_paper_recommendations:\n" - " USE FOR: Finding papers similar to a specific paper\n" - " - Takes a single paper ID\n" - " - Returns related papers\n\n" - "3. get_multi_paper_recommendations:\n" - " USE FOR: Finding papers similar to multiple papers\n" - " - Takes multiple paper IDs\n" - " - Finds papers related to all inputs\n\n" - "GUIDELINES:\n\n" - "For paper searches:\n" - "- Enhance search terms with academic language\n" - "- Include field-specific terminology\n" - '- Add "recent" or "latest" when appropriate\n' - "- Keep queries focused and relevant\n\n" - "For paper recommendations:\n" - "- Identify paper IDs (40-character hexadecimal strings)\n" - "- Use single_paper_recommendations for one ID\n" - "- Use multi_paper_recommendations for multiple IDs\n\n" - "Best practices:\n" - "1. Start with a broad search if no paper IDs are provided\n" - "2. Look for paper IDs in user input\n" - "3. Enhance search terms for better results\n" - "4. Consider the academic context\n" - "5. Be prepared to refine searches based on feedback\n\n" - "Remember:\n" - "- Always select the most appropriate tool\n" - "- Enhance search queries naturally\n" - "- Consider academic context\n" - "- Focus on delivering relevant results\n\n" - "IMPORTANT GUIDELINES FOR PAPER RECOMMENDATIONS:\n\n" - "For Multiple Papers:\n" - "- When getting recommendations for multiple papers, always use " - "get_multi_paper_recommendations tool\n" - "- DO NOT call get_single_paper_recommendations multiple times\n" - "- Always pass all paper IDs in a single call to get_multi_paper_recommendations\n" - '- Use for queries like "find papers related to both/all papers" or ' - '"find similar papers to these papers"\n\n' - "For Single Paper:\n" - "- Use get_single_paper_recommendations when focusing on one specific paper\n" - "- Pass only one paper ID at a time\n" - '- Use for queries like "find papers similar to this paper" or ' - '"get recommendations for paper X"\n' - "- Do not use for multiple papers\n\n" - "Examples:\n" - '- For "find related papers for both papers":\n' - " ✓ Use get_multi_paper_recommendations with both paper IDs\n" - " × Don't make multiple calls to get_single_paper_recommendations\n\n" - '- For "find papers related to the first paper":\n' - " ✓ Use get_single_paper_recommendations with just that paper's ID\n" - " × Don't use get_multi_paper_recommendations\n\n" - "Remember:\n" - "- Be precise in identifying which paper ID to use for single recommendations\n" - "- Don't reuse previous paper IDs unless specifically requested\n" - "- For fresh paper recommendations, always use the original paper ID" - ) - - -config = Config() From e15f261ae3cf1d1c79a3ad511f5243545242b39d Mon Sep 17 00:00:00 2001 From: Ansh-info Date: Sat, 8 Feb 2025 17:38:09 +0100 Subject: [PATCH 02/17] feat: hydra setup for talk2scholars --- aiagents4pharma/talk2scholars/configs/__init__.py | 7 +++++++ aiagents4pharma/talk2scholars/configs/config.yaml | 8 ++++++++ 2 files changed, 15 insertions(+) create mode 100644 aiagents4pharma/talk2scholars/configs/__init__.py create mode 100644 aiagents4pharma/talk2scholars/configs/config.yaml diff --git a/aiagents4pharma/talk2scholars/configs/__init__.py b/aiagents4pharma/talk2scholars/configs/__init__.py new file mode 100644 index 00000000..389e984a --- /dev/null +++ b/aiagents4pharma/talk2scholars/configs/__init__.py @@ -0,0 +1,7 @@ +""" +This package contains configuration settings and prompts used by various AI agents +""" + +from . import agents +from . import tools +from . import app diff --git a/aiagents4pharma/talk2scholars/configs/config.yaml b/aiagents4pharma/talk2scholars/configs/config.yaml new file mode 100644 index 00000000..b5cd9ed0 --- /dev/null +++ b/aiagents4pharma/talk2scholars/configs/config.yaml @@ -0,0 +1,8 @@ +defaults: + - _self_ + - agents/talk2scholars/main_agent: default + - agents/talk2scholars/s2_agent: default + - tools/search: default + - tools/single_paper_recommendation: default + - tools/multi_paper_recommendation: default + - app/frontend: default From 889ca984c99f078636ad2f417d95da08a93ad8d3 Mon Sep 17 00:00:00 2001 From: Ansh-info Date: Sat, 8 Feb 2025 17:38:40 +0100 Subject: [PATCH 03/17] feat: hydra setup for talk2scholars --- .../talk2scholars/configs/agents/__init__.py | 5 ++ .../configs/agents/talk2scholars/__init__.py | 6 ++ .../talk2scholars/main_agent/__init__.py | 3 + .../talk2scholars/main_agent/default.yaml | 39 +++++++++++ .../agents/talk2scholars/s2_agent/__init__.py | 3 + .../talk2scholars/s2_agent/default.yaml | 68 +++++++++++++++++++ 6 files changed, 124 insertions(+) create mode 100644 aiagents4pharma/talk2scholars/configs/agents/__init__.py create mode 100644 aiagents4pharma/talk2scholars/configs/agents/talk2scholars/__init__.py create mode 100644 aiagents4pharma/talk2scholars/configs/agents/talk2scholars/main_agent/__init__.py create mode 100644 aiagents4pharma/talk2scholars/configs/agents/talk2scholars/main_agent/default.yaml create mode 100644 aiagents4pharma/talk2scholars/configs/agents/talk2scholars/s2_agent/__init__.py create mode 100644 aiagents4pharma/talk2scholars/configs/agents/talk2scholars/s2_agent/default.yaml diff --git a/aiagents4pharma/talk2scholars/configs/agents/__init__.py b/aiagents4pharma/talk2scholars/configs/agents/__init__.py new file mode 100644 index 00000000..5e4e7af5 --- /dev/null +++ b/aiagents4pharma/talk2scholars/configs/agents/__init__.py @@ -0,0 +1,5 @@ +""" +Import all the modules in the package +""" + +from . import talk2scholars diff --git a/aiagents4pharma/talk2scholars/configs/agents/talk2scholars/__init__.py b/aiagents4pharma/talk2scholars/configs/agents/talk2scholars/__init__.py new file mode 100644 index 00000000..6a92e6a3 --- /dev/null +++ b/aiagents4pharma/talk2scholars/configs/agents/talk2scholars/__init__.py @@ -0,0 +1,6 @@ +""" +Import all the modules in the package +""" + +from . import s2_agent +from . import main_agent diff --git a/aiagents4pharma/talk2scholars/configs/agents/talk2scholars/main_agent/__init__.py b/aiagents4pharma/talk2scholars/configs/agents/talk2scholars/main_agent/__init__.py new file mode 100644 index 00000000..77c33b0f --- /dev/null +++ b/aiagents4pharma/talk2scholars/configs/agents/talk2scholars/main_agent/__init__.py @@ -0,0 +1,3 @@ +""" +Import all the modules in the package +""" diff --git a/aiagents4pharma/talk2scholars/configs/agents/talk2scholars/main_agent/default.yaml b/aiagents4pharma/talk2scholars/configs/agents/talk2scholars/main_agent/default.yaml new file mode 100644 index 00000000..3e3d8aed --- /dev/null +++ b/aiagents4pharma/talk2scholars/configs/agents/talk2scholars/main_agent/default.yaml @@ -0,0 +1,39 @@ +_target_: agents.main_agent.get_app +openai_api_key: ${oc.env:OPENAI_API_KEY} +openai_llms: + - "gpt-4o-mini" + - "gpt-4-turbo" + - "gpt-3.5-turbo" +temperature: 0 +main_agent: > + "You are a supervisory AI agent that routes user queries to specialized tools.\n" + "Your task is to select the most appropriate tool based on the user's request.\n\n" + "Available tools and their capabilities:\n\n" + "1. semantic_scholar_agent:\n" + " - Search for academic papers and research\n" + " - Get paper recommendations\n" + " - Find similar papers\n" + " USE FOR: Any queries about finding papers, academic research, " + "or getting paper recommendations\n\n" + "ROUTING GUIDELINES:\n\n" + "ALWAYS route to semantic_scholar_agent for:\n" + "- Finding academic papers\n" + "- Searching research topics\n" + "- Getting paper recommendations\n" + "- Finding similar papers\n" + "- Any query about academic literature\n\n" + "Approach:\n" + "1. Identify the core need in the user's query\n" + "2. Select the most appropriate tool based on the guidelines above\n" + "3. If unclear, ask for clarification\n" + "4. For multi-step tasks, focus on the immediate next step\n\n" + "Remember:\n" + "- Be decisive in your tool selection\n" + "- Focus on the immediate task\n" + "- Default to semantic_scholar_agent for any paper-finding tasks\n" + "- Ask for clarification if the request is ambiguous\n\n" + "When presenting paper search results, always use this exact format:\n\n" + "Remember to:\n" + "- Always remember to add the url\n" + "- Put URLs on the title line itself as markdown\n" + "- Maintain consistent spacing and formatting" diff --git a/aiagents4pharma/talk2scholars/configs/agents/talk2scholars/s2_agent/__init__.py b/aiagents4pharma/talk2scholars/configs/agents/talk2scholars/s2_agent/__init__.py new file mode 100644 index 00000000..77c33b0f --- /dev/null +++ b/aiagents4pharma/talk2scholars/configs/agents/talk2scholars/s2_agent/__init__.py @@ -0,0 +1,3 @@ +""" +Import all the modules in the package +""" diff --git a/aiagents4pharma/talk2scholars/configs/agents/talk2scholars/s2_agent/default.yaml b/aiagents4pharma/talk2scholars/configs/agents/talk2scholars/s2_agent/default.yaml new file mode 100644 index 00000000..6c98b4bf --- /dev/null +++ b/aiagents4pharma/talk2scholars/configs/agents/talk2scholars/s2_agent/default.yaml @@ -0,0 +1,68 @@ +_target_: agents.s2_agent.get_app +openai_api_key: ${oc.env:OPENAI_API_KEY} +openai_llms: + - "gpt-4o-mini" + - "gpt-4-turbo" + - "gpt-3.5-turbo" +temperature: 0 +s2_agent: > + "You are a specialized academic research assistant with access to the following tools:\n\n" + "1. search_papers:\n" + " USE FOR: General paper searches\n" + " - Enhances search terms automatically\n" + " - Adds relevant academic keywords\n" + " - Focuses on recent research when appropriate\n\n" + "2. get_single_paper_recommendations:\n" + " USE FOR: Finding papers similar to a specific paper\n" + " - Takes a single paper ID\n" + " - Returns related papers\n\n" + "3. get_multi_paper_recommendations:\n" + " USE FOR: Finding papers similar to multiple papers\n" + " - Takes multiple paper IDs\n" + " - Finds papers related to all inputs\n\n" + "GUIDELINES:\n\n" + "For paper searches:\n" + "- Enhance search terms with academic language\n" + "- Include field-specific terminology\n" + '- Add "recent" or "latest" when appropriate\n' + "- Keep queries focused and relevant\n\n" + "For paper recommendations:\n" + "- Identify paper IDs (40-character hexadecimal strings)\n" + "- Use single_paper_recommendations for one ID\n" + "- Use multi_paper_recommendations for multiple IDs\n\n" + "Best practices:\n" + "1. Start with a broad search if no paper IDs are provided\n" + "2. Look for paper IDs in user input\n" + "3. Enhance search terms for better results\n" + "4. Consider the academic context\n" + "5. Be prepared to refine searches based on feedback\n\n" + "Remember:\n" + "- Always select the most appropriate tool\n" + "- Enhance search queries naturally\n" + "- Consider academic context\n" + "- Focus on delivering relevant results\n\n" + "IMPORTANT GUIDELINES FOR PAPER RECOMMENDATIONS:\n\n" + "For Multiple Papers:\n" + "- When getting recommendations for multiple papers, always use " + "get_multi_paper_recommendations tool\n" + "- DO NOT call get_single_paper_recommendations multiple times\n" + "- Always pass all paper IDs in a single call to get_multi_paper_recommendations\n" + '- Use for queries like "find papers related to both/all papers" or ' + '"find similar papers to these papers"\n\n' + "For Single Paper:\n" + "- Use get_single_paper_recommendations when focusing on one specific paper\n" + "- Pass only one paper ID at a time\n" + '- Use for queries like "find papers similar to this paper" or ' + '"get recommendations for paper X"\n' + "- Do not use for multiple papers\n\n" + "Examples:\n" + '- For "find related papers for both papers":\n' + " ✓ Use get_multi_paper_recommendations with both paper IDs\n" + " × Don't make multiple calls to get_single_paper_recommendations\n\n" + '- For "find papers related to the first paper":\n' + " ✓ Use get_single_paper_recommendations with just that paper's ID\n" + " × Don't use get_multi_paper_recommendations\n\n" + "Remember:\n" + "- Be precise in identifying which paper ID to use for single recommendations\n" + "- Don't reuse previous paper IDs unless specifically requested\n" + "- For fresh paper recommendations, always use the original paper ID" From 40f54c46a4c0e6ec0f3b92914cd4bbdfb2a1bbdc Mon Sep 17 00:00:00 2001 From: Ansh-info Date: Sat, 8 Feb 2025 17:38:55 +0100 Subject: [PATCH 04/17] feat: hydra setup for tools --- .../talk2scholars/configs/tools/__init__.py | 7 +++++++ .../multi_paper_recommendation/__init__.py | 3 +++ .../multi_paper_recommendation/default.yaml | 19 ++++++++++++++++++ .../configs/tools/search/__init__.py | 3 +++ .../configs/tools/search/default.yaml | 19 ++++++++++++++++++ .../single_paper_recommendation/__init__.py | 3 +++ .../single_paper_recommendation/default.yaml | 20 +++++++++++++++++++ 7 files changed, 74 insertions(+) create mode 100644 aiagents4pharma/talk2scholars/configs/tools/__init__.py create mode 100644 aiagents4pharma/talk2scholars/configs/tools/multi_paper_recommendation/__init__.py create mode 100644 aiagents4pharma/talk2scholars/configs/tools/multi_paper_recommendation/default.yaml create mode 100644 aiagents4pharma/talk2scholars/configs/tools/search/__init__.py create mode 100644 aiagents4pharma/talk2scholars/configs/tools/search/default.yaml create mode 100644 aiagents4pharma/talk2scholars/configs/tools/single_paper_recommendation/__init__.py create mode 100644 aiagents4pharma/talk2scholars/configs/tools/single_paper_recommendation/default.yaml diff --git a/aiagents4pharma/talk2scholars/configs/tools/__init__.py b/aiagents4pharma/talk2scholars/configs/tools/__init__.py new file mode 100644 index 00000000..d4e77ccb --- /dev/null +++ b/aiagents4pharma/talk2scholars/configs/tools/__init__.py @@ -0,0 +1,7 @@ +""" +Import all the modules in the package +""" + +from . import search +from . import single_paper_recommendation +from . import multi_paper_recommendation diff --git a/aiagents4pharma/talk2scholars/configs/tools/multi_paper_recommendation/__init__.py b/aiagents4pharma/talk2scholars/configs/tools/multi_paper_recommendation/__init__.py new file mode 100644 index 00000000..77c33b0f --- /dev/null +++ b/aiagents4pharma/talk2scholars/configs/tools/multi_paper_recommendation/__init__.py @@ -0,0 +1,3 @@ +""" +Import all the modules in the package +""" diff --git a/aiagents4pharma/talk2scholars/configs/tools/multi_paper_recommendation/default.yaml b/aiagents4pharma/talk2scholars/configs/tools/multi_paper_recommendation/default.yaml new file mode 100644 index 00000000..735e97ec --- /dev/null +++ b/aiagents4pharma/talk2scholars/configs/tools/multi_paper_recommendation/default.yaml @@ -0,0 +1,19 @@ +api_endpoint: "https://api.semanticscholar.org/recommendations/v1/papers" +default_limit: 2 +request_timeout: 10 +api_fields: + - "paperId" + - "title" + - "abstract" + - "year" + - "authors" + - "citationCount" + - "url" + +# Default headers and params +headers: + Content-Type: "application/json" + +recommendation_params: + limit: ${.default_limit} + fields: ${.api_fields} diff --git a/aiagents4pharma/talk2scholars/configs/tools/search/__init__.py b/aiagents4pharma/talk2scholars/configs/tools/search/__init__.py new file mode 100644 index 00000000..77c33b0f --- /dev/null +++ b/aiagents4pharma/talk2scholars/configs/tools/search/__init__.py @@ -0,0 +1,3 @@ +""" +Import all the modules in the package +""" diff --git a/aiagents4pharma/talk2scholars/configs/tools/search/default.yaml b/aiagents4pharma/talk2scholars/configs/tools/search/default.yaml new file mode 100644 index 00000000..bf1f8376 --- /dev/null +++ b/aiagents4pharma/talk2scholars/configs/tools/search/default.yaml @@ -0,0 +1,19 @@ +api_endpoint: "https://api.semanticscholar.org/graph/v1/paper/search" +default_limit: 2 +request_timeout: 10 +api_fields: + - "paperId" + - "title" + - "abstract" + - "year" + - "authors" + - "citationCount" + - "url" +# Commented fields that could be added later if needed +# - "publicationTypes" +# - "openAccessPdf" + +# Default search parameters +search_params: + limit: ${.default_limit} # Reference to the default_limit above + fields: ${.api_fields} # Reference to the api_fields above diff --git a/aiagents4pharma/talk2scholars/configs/tools/single_paper_recommendation/__init__.py b/aiagents4pharma/talk2scholars/configs/tools/single_paper_recommendation/__init__.py new file mode 100644 index 00000000..77c33b0f --- /dev/null +++ b/aiagents4pharma/talk2scholars/configs/tools/single_paper_recommendation/__init__.py @@ -0,0 +1,3 @@ +""" +Import all the modules in the package +""" diff --git a/aiagents4pharma/talk2scholars/configs/tools/single_paper_recommendation/default.yaml b/aiagents4pharma/talk2scholars/configs/tools/single_paper_recommendation/default.yaml new file mode 100644 index 00000000..d0f14cea --- /dev/null +++ b/aiagents4pharma/talk2scholars/configs/tools/single_paper_recommendation/default.yaml @@ -0,0 +1,20 @@ +api_endpoint: "https://api.semanticscholar.org/recommendations/v1/papers/forpaper" +default_limit: 2 +request_timeout: 10 +api_fields: + - "paperId" + - "title" + - "abstract" + - "year" + - "authors" + - "citationCount" + - "url" +# Commented fields that could be added later if needed +# - "publicationTypes" +# - "openAccessPdf" + +# Default recommendation parameters +recommendation_params: + limit: ${.default_limit} # Reference to the default_limit above + fields: ${.api_fields} # Reference to the api_fields above + from_pool: "all-cs" # Using all-cs pool as specified in docs From db0270e233abbb92800b212cf21ae7b3c4b15761 Mon Sep 17 00:00:00 2001 From: Ansh-info Date: Sat, 8 Feb 2025 17:39:07 +0100 Subject: [PATCH 05/17] feat: hydra setup for streamlit frontend --- .../configs/app/frontend/__init__.py | 3 ++ .../configs/app/frontend/default.yaml | 33 +++++++++++++++++++ 2 files changed, 36 insertions(+) create mode 100644 aiagents4pharma/talk2scholars/configs/app/frontend/__init__.py create mode 100644 aiagents4pharma/talk2scholars/configs/app/frontend/default.yaml diff --git a/aiagents4pharma/talk2scholars/configs/app/frontend/__init__.py b/aiagents4pharma/talk2scholars/configs/app/frontend/__init__.py new file mode 100644 index 00000000..77c33b0f --- /dev/null +++ b/aiagents4pharma/talk2scholars/configs/app/frontend/__init__.py @@ -0,0 +1,3 @@ +""" +Import all the modules in the package +""" diff --git a/aiagents4pharma/talk2scholars/configs/app/frontend/default.yaml b/aiagents4pharma/talk2scholars/configs/app/frontend/default.yaml new file mode 100644 index 00000000..2cafee04 --- /dev/null +++ b/aiagents4pharma/talk2scholars/configs/app/frontend/default.yaml @@ -0,0 +1,33 @@ +# # Page configuration +# page: +# title: "Talk2Scholars" +# icon: "🤖" +# layout: "wide" + +# Available LLM models +llm_models: + - "gpt-4o-mini" + - "gpt-4-turbo" + - "gpt-3.5-turbo" +# # Chat UI configuration +# chat: +# assistant_avatar: "🤖" +# user_avatar: "👩🏻‍💻" +# input_placeholder: "Say something ..." +# spinner_text: "Fetching response ..." + +# # Feedback configuration +# feedback: +# type: "thumbs" +# text_label: "[Optional] Please provide an explanation" +# success_message: "Your feedback is on its way to the developers. Thank you!" +# success_icon: "🚀" + +# # Layout configuration +# layout: +# column_ratio: [3, 7] # Ratio for main_col1 and main_col2 +# chat_container_height: 575 +# sidebar_container_height: 500 +# +# # Project name prefix +# project_name_prefix: "Talk2Scholars-" From 67987ba85880ce114a352cd669b2605569094033 Mon Sep 17 00:00:00 2001 From: Ansh-info Date: Sat, 8 Feb 2025 17:39:13 +0100 Subject: [PATCH 06/17] feat: hydra setup for streamlit frontend --- aiagents4pharma/talk2scholars/configs/app/__init__.py | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 aiagents4pharma/talk2scholars/configs/app/__init__.py diff --git a/aiagents4pharma/talk2scholars/configs/app/__init__.py b/aiagents4pharma/talk2scholars/configs/app/__init__.py new file mode 100644 index 00000000..01a7ae6f --- /dev/null +++ b/aiagents4pharma/talk2scholars/configs/app/__init__.py @@ -0,0 +1,5 @@ +""" +Import all the modules in the package +""" + +from . import frontend From 4bf430e80345d40363f73769cde1724dc461e0f6 Mon Sep 17 00:00:00 2001 From: Ansh-info Date: Sat, 8 Feb 2025 17:39:56 +0100 Subject: [PATCH 07/17] chores: added markers to fix the error raised while running codecoverage --- pyproject.toml | 94 ++++++++++++++++++++++++++------------------------ 1 file changed, 48 insertions(+), 46 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 59d3d937..03ed1e7d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -8,54 +8,54 @@ description = "AI Agents for drug discovery, drug development, and other pharmac readme = "README.md" requires-python = ">=3.12" classifiers = [ - "Programming Language :: Python :: 3", - "License :: OSI Approved :: MIT License", - "Operating System :: OS Independent", + "Programming Language :: Python :: 3", + "License :: OSI Approved :: MIT License", + "Operating System :: OS Independent", ] dependencies = [ - "copasi_basico==0.78", - "coverage==7.6.4", - "einops==0.8.0", - "gdown==5.2.0", - "huggingface_hub==0.26.5", - "hydra-core==1.3.2", - "joblib==1.4.2", - "langchain==0.3.7", - "langchain-community==0.3.5", - "langchain-core==0.3.31", - "langchain-experimental==0.3.3", - "langchain-openai==0.2.5", - "langchain_ollama==0.2.2", - "langgraph==0.2.66", - "matplotlib==3.9.2", - "openai==1.59.4", - "ollama==0.4.6", - "pandas==2.2.3", - "plotly==5.24.1", - "pydantic==2.9.2", - "pylint==3.3.1", - "pypdf==5.2.0", - "pytest==8.3.3", - "pytest-asyncio==0.25.2", - "streamlit==1.39.0", - "sentence_transformers==3.3.1", - "tabulate==0.9.0", - "torch==2.2.2", - "torch_geometric==2.6.1", - "tqdm==4.66.6", - "transformers==4.48.0", - "mkdocs==1.6.1", - "mkdocs-jupyter==0.25.1", - "mkdocs-material==9.5.47", - "mkdocstrings-python==1.12.2", - "mkdocs-include-markdown-plugin==7.1.2", - "mkdocstrings==0.27.0", - "streamlit-feedback" + "copasi_basico==0.78", + "coverage==7.6.4", + "einops==0.8.0", + "gdown==5.2.0", + "huggingface_hub==0.26.5", + "hydra-core==1.3.2", + "joblib==1.4.2", + "langchain==0.3.7", + "langchain-community==0.3.5", + "langchain-core==0.3.31", + "langchain-experimental==0.3.3", + "langchain-openai==0.2.5", + "langchain_ollama==0.2.2", + "langgraph==0.2.66", + "matplotlib==3.9.2", + "openai==1.59.4", + "ollama==0.4.6", + "pandas==2.2.3", + "plotly==5.24.1", + "pydantic==2.9.2", + "pylint==3.3.1", + "pypdf==5.2.0", + "pytest==8.3.3", + "pytest-asyncio==0.25.2", + "streamlit==1.39.0", + "sentence_transformers==3.3.1", + "tabulate==0.9.0", + "torch==2.2.2", + "torch_geometric==2.6.1", + "tqdm==4.66.6", + "transformers==4.48.0", + "mkdocs==1.6.1", + "mkdocs-jupyter==0.25.1", + "mkdocs-material==9.5.47", + "mkdocstrings-python==1.12.2", + "mkdocs-include-markdown-plugin==7.1.2", + "mkdocstrings==0.27.0", + "streamlit-feedback", ] dynamic = ["version"] [tool.setuptools.dynamic] -version = {file = "release_version.txt"} +version = { file = "release_version.txt" } # find packages [tool.setuptools] @@ -67,10 +67,12 @@ include = ["aiagents4pharma*"] [tool.setuptools.package-data] aiagents4pharma = [ - "configs/*", - "configs/talk2biomodels/agents/t2b_agent/*", - "configs/talk2biomodels/tools/ask_question/*", - "configs/talk2biomodels/tools/get_annotation/*"] + "configs/*", + "configs/talk2biomodels/agents/t2b_agent/*", + "configs/talk2biomodels/tools/ask_question/*", + "configs/talk2biomodels/tools/get_annotation/*", +] [tool.pytest.ini_options] asyncio_default_fixture_loop_scope = "function" +markers = ["integration: marks tests as integration tests"] From 9c305491659e848b5ff2d08c695292dfac145216 Mon Sep 17 00:00:00 2001 From: Ansh-info Date: Sat, 8 Feb 2025 17:40:16 +0100 Subject: [PATCH 08/17] feat: added hydra config for the streamlit app --- app/frontend/streamlit_app_talk2scholars.py | 126 ++++++++++++-------- 1 file changed, 77 insertions(+), 49 deletions(-) diff --git a/app/frontend/streamlit_app_talk2scholars.py b/app/frontend/streamlit_app_talk2scholars.py index f6b0f680..083ca09d 100644 --- a/app/frontend/streamlit_app_talk2scholars.py +++ b/app/frontend/streamlit_app_talk2scholars.py @@ -1,13 +1,14 @@ #!/usr/bin/env python3 -''' +""" Talk2Scholars: A Streamlit app for the Talk2Scholars graph. -''' +""" import os import sys import random import streamlit as st +import hydra from streamlit_feedback import streamlit_feedback from langchain_core.messages import SystemMessage, HumanMessage, AIMessage from langchain_core.messages import ChatMessage @@ -15,24 +16,43 @@ from langchain_core.tracers.context import collect_runs from langchain.callbacks.tracers import LangChainTracer from langsmith import Client -sys.path.append('./') + +sys.path.append("./") from aiagents4pharma.talk2scholars.agents.main_agent import get_app st.set_page_config(page_title="Talk2Scholars", page_icon="🤖", layout="wide") +# Initialize configuration +hydra.core.global_hydra.GlobalHydra.instance().clear() +if "config" not in st.session_state: + # Load Hydra configuration + with hydra.initialize( + version_base=None, + config_path="../../aiagents4pharma/talk2scholars/configs", + ): + cfg = hydra.compose(config_name="config", overrides=["app/frontend=default"]) + cfg = cfg.app.frontend + st.session_state.config = cfg +else: + cfg = st.session_state.config + # Check if env variable OPENAI_API_KEY exists if "OPENAI_API_KEY" not in os.environ: - st.error("Please set the OPENAI_API_KEY environment \ - variable in the terminal where you run the app.") + st.error( + "Please set the OPENAI_API_KEY environment \ + variable in the terminal where you run the app." + ) st.stop() # Create a chat prompt template -prompt = ChatPromptTemplate.from_messages([ +prompt = ChatPromptTemplate.from_messages( + [ ("system", "Welcome to Talk2Scholars!"), - MessagesPlaceholder(variable_name='chat_history', optional=True), + MessagesPlaceholder(variable_name="chat_history", optional=True), ("human", "{input}"), ("placeholder", "{agent_scratchpad}"), -]) + ] +) # Initialize chat history if "messages" not in st.session_state: @@ -41,7 +61,7 @@ # Initialize project_name for Langsmith if "project_name" not in st.session_state: # st.session_state.project_name = str(st.session_state.user_name) + '@' + str(uuid.uuid4()) - st.session_state.project_name = 'Talk2Scholars-' + str(random.randint(1000, 9999)) + st.session_state.project_name = "Talk2Scholars-" + str(random.randint(1000, 9999)) # Initialize run_id for Langsmith if "run_id" not in st.session_state: @@ -55,55 +75,63 @@ if "llm_model" not in st.session_state: st.session_state.app = get_app(st.session_state.unique_id) else: - st.session_state.app = get_app(st.session_state.unique_id, - llm_model=st.session_state.llm_model) + st.session_state.app = get_app( + st.session_state.unique_id, llm_model=st.session_state.llm_model + ) # Get the app app = st.session_state.app + def _submit_feedback(user_response): - ''' + """ Function to submit feedback to the developers. - ''' + """ client = Client() client.create_feedback( st.session_state.run_id, key="feedback", - score=1 if user_response['score'] == "👍" else 0, - comment=user_response['text'] + score=1 if user_response["score"] == "👍" else 0, + comment=user_response["text"], ) st.info("Your feedback is on its way to the developers. Thank you!", icon="🚀") + @st.dialog("Warning ⚠️") def update_llm_model(): """ Function to update the LLM model. """ llm_model = st.session_state.llm_model - st.warning(f"Clicking 'Continue' will reset all agents, \ + st.warning( + f"Clicking 'Continue' will reset all agents, \ set the selected LLM to {llm_model}. \ This action will reset the entire app, \ and agents will lose access to the \ conversation history. Are you sure \ - you want to proceed?") + you want to proceed?" + ) if st.button("Continue"): # Delete all the messages and the app key for key in st.session_state.keys(): if key in ["messages", "app"]: del st.session_state[key] + # Main layout of the app split into two columns main_col1, main_col2 = st.columns([3, 7]) # First column with main_col1: with st.container(border=True): # Title - st.write(""" + st.write( + """

🤖 Talk2Scholars

""", - unsafe_allow_html=True) + unsafe_allow_html=True, + ) # LLM panel (Only at the front-end for now) llms = ["gpt-4o-mini", "gpt-4-turbo", "gpt-3.5-turbo"] @@ -118,7 +146,7 @@ def update_llm_model(): llms, index=0, key="llm_model", - on_change=update_llm_model + on_change=update_llm_model, ) # Upload files (placeholder) @@ -141,10 +169,10 @@ def update_llm_model(): # Display chat messages for count, message in enumerate(st.session_state.messages): - with st.chat_message(message["content"].role, - avatar="🤖" - if message["content"].role != 'user' - else "👩🏻‍💻"): + with st.chat_message( + message["content"].role, + avatar="🤖" if message["content"].role != "user" else "👩🏻‍💻", + ): st.markdown(message["content"].content) st.empty() @@ -156,12 +184,7 @@ def update_llm_model(): # Display user prompt prompt_msg = ChatMessage(prompt, role="user") - st.session_state.messages.append( - { - "type": "message", - "content": prompt_msg - } - ) + st.session_state.messages.append({"type": "message", "content": prompt_msg}) with st.chat_message("user", avatar="👩🏻‍💻"): st.markdown(prompt) st.empty() @@ -170,14 +193,22 @@ def update_llm_model(): # with st.spinner("Fetching response ..."): with st.spinner(): # Get chat history - history = [(m["content"].role, m["content"].content) - for m in st.session_state.messages - if m["type"] == "message"] + history = [ + (m["content"].role, m["content"].content) + for m in st.session_state.messages + if m["type"] == "message" + ] # Convert chat history to ChatMessage objects chat_history = [ - SystemMessage(content=m[1]) if m[0] == "system" else - HumanMessage(content=m[1]) if m[0] == "human" else - AIMessage(content=m[1]) + ( + SystemMessage(content=m[1]) + if m[0] == "system" + else ( + HumanMessage(content=m[1]) + if m[0] == "human" + else AIMessage(content=m[1]) + ) + ) for m in history ] @@ -186,32 +217,29 @@ def update_llm_model(): # Update the agent state with the selected LLM model current_state = app.get_state(config) - app.update_state( - config, - {"llm_model": st.session_state.llm_model} - ) + app.update_state(config, {"llm_model": st.session_state.llm_model}) with collect_runs() as cb: # Add Langsmith tracer tracer = LangChainTracer( project_name=st.session_state.project_name - ) + ) # Get response from the agent response = app.invoke( {"messages": [HumanMessage(content=prompt)]}, - config=config|{"callbacks": [tracer]} + config=config | {"callbacks": [tracer]}, ) st.session_state.run_id = cb.traced_runs[-1].id # Print the response # print (response) # Add assistant response to chat history - assistant_msg = ChatMessage(response["messages"][-1].content, - role="assistant") - st.session_state.messages.append({ - "type": "message", - "content": assistant_msg - }) + assistant_msg = ChatMessage( + response["messages"][-1].content, role="assistant" + ) + st.session_state.messages.append( + {"type": "message", "content": assistant_msg} + ) # Display the response in the chat st.markdown(response["messages"][-1].content) st.empty() @@ -221,5 +249,5 @@ def update_llm_model(): feedback_type="thumbs", optional_text_label="[Optional] Please provide an explanation", on_submit=_submit_feedback, - key=f"feedback_{st.session_state.run_id}" + key=f"feedback_{st.session_state.run_id}", ) From 368a1a985c6ea972ba664c2882f7161b74eada0d Mon Sep 17 00:00:00 2001 From: Ansh-info Date: Sat, 8 Feb 2025 17:40:29 +0100 Subject: [PATCH 09/17] chores: __init__ update --- aiagents4pharma/talk2scholars/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aiagents4pharma/talk2scholars/__init__.py b/aiagents4pharma/talk2scholars/__init__.py index e59e56e2..6d2291f7 100644 --- a/aiagents4pharma/talk2scholars/__init__.py +++ b/aiagents4pharma/talk2scholars/__init__.py @@ -2,4 +2,4 @@ This file is used to import all the modules in the package. """ -from . import agents, config, state, tests, tools +from . import agents, configs, state, tests, tools From 4b5b01e1b8f58bd9725b0e266ede201fbc4f3d98 Mon Sep 17 00:00:00 2001 From: Ansh-info Date: Sat, 8 Feb 2025 17:40:49 +0100 Subject: [PATCH 10/17] feat: updated test for hydra integration --- .../talk2scholars/tests/test_langgraph.py | 130 ++++++++++-------- 1 file changed, 76 insertions(+), 54 deletions(-) diff --git a/aiagents4pharma/talk2scholars/tests/test_langgraph.py b/aiagents4pharma/talk2scholars/tests/test_langgraph.py index d58e20a3..c6df9e54 100644 --- a/aiagents4pharma/talk2scholars/tests/test_langgraph.py +++ b/aiagents4pharma/talk2scholars/tests/test_langgraph.py @@ -5,12 +5,14 @@ """ from unittest.mock import Mock, patch - import pytest from langchain_core.messages import AIMessage, HumanMessage +import hydra +from hydra.core.global_hydra import GlobalHydra +from omegaconf import DictConfig, OmegaConf from ..agents.main_agent import get_app, make_supervisor_node -from ..state.state_talk2scholars import replace_dict +from ..state.state_talk2scholars import replace_dict, Talk2Scholars from ..tools.s2.display_results import display_results from ..tools.s2.multi_paper_rec import get_multi_paper_recommendations from ..tools.s2.search import search_tool @@ -18,6 +20,47 @@ # pylint: disable=redefined-outer-name + +def pytest_configure(config): + """Register integration marker.""" + config.addinivalue_line("markers", "integration: mark test as an integration test") + + +@pytest.fixture(autouse=True) +def hydra_setup(): + """Setup and cleanup Hydra for tests.""" + GlobalHydra.instance().clear() + with hydra.initialize(version_base=None, config_path="../configs"): + yield + + +@pytest.fixture +def mock_cfg() -> DictConfig: + """Create a mock configuration for testing.""" + config = { + "agents": { + "talk2scholars": { + "main_agent": { + "state_modifier": "Test prompt for main agent", + "temperature": 0, + }, + "s2_agent": { + "temperature": 0, + "s2_agent": "Test prompt for s2 agent", + }, + } + }, + "tools": { + "search": { + "api_endpoint": "https://api.semanticscholar.org/graph/v1/paper/search", + "default_limit": 2, + "api_fields": ["paperId", "title", "abstract", "year", "authors"], + } + }, + } + return OmegaConf.create(config) + + # Fixed test data for deterministic results MOCK_SEARCH_RESPONSE = { "data": [ @@ -45,27 +88,33 @@ @pytest.fixture -def initial_state(): +def initial_state() -> Talk2Scholars: """Create a base state for tests""" - return { - "messages": [], - "papers": {}, - "is_last_step": False, - "current_agent": None, - "llm_model": "gpt-4o-mini", - } + return Talk2Scholars( + messages=[], + papers={}, + is_last_step=False, + current_agent=None, + llm_model="gpt-4o-mini", + next="", + ) class TestMainAgent: """Unit tests for main agent functionality""" - def test_supervisor_routes_search_to_s2(self, initial_state): + def test_supervisor_routes_search_to_s2( + self, initial_state: Talk2Scholars, mock_cfg + ): """Verifies that search-related queries are routed to S2 agent""" llm_mock = Mock() llm_mock.invoke.return_value = AIMessage(content="Search initiated") - supervisor = make_supervisor_node(llm_mock) - state = initial_state.copy() + # Extract the main_agent config + supervisor = make_supervisor_node( + llm_mock, mock_cfg.agents.talk2scholars.main_agent + ) + state = initial_state state["messages"] = [HumanMessage(content="search for papers")] result = supervisor(state) @@ -73,13 +122,18 @@ def test_supervisor_routes_search_to_s2(self, initial_state): assert not result.update["is_last_step"] assert result.update["current_agent"] == "s2_agent" - def test_supervisor_routes_general_to_end(self, initial_state): + def test_supervisor_routes_general_to_end( + self, initial_state: Talk2Scholars, mock_cfg + ): """Verifies that non-search queries end the conversation""" llm_mock = Mock() llm_mock.invoke.return_value = AIMessage(content="General response") - supervisor = make_supervisor_node(llm_mock) - state = initial_state.copy() + # Extract the main_agent config + supervisor = make_supervisor_node( + llm_mock, mock_cfg.agents.talk2scholars.main_agent + ) + state = initial_state state["messages"] = [HumanMessage(content="What is ML?")] result = supervisor(state) @@ -90,9 +144,9 @@ def test_supervisor_routes_general_to_end(self, initial_state): class TestS2Tools: """Unit tests for individual S2 tools""" - def test_display_results_shows_papers(self, initial_state): + def test_display_results_shows_papers(self, initial_state: Talk2Scholars): """Verifies display_results tool correctly returns papers from state""" - state = initial_state.copy() + state = initial_state state["papers"] = MOCK_STATE_PAPER result = display_results.invoke(input={"state": state}) assert result == MOCK_STATE_PAPER @@ -199,40 +253,6 @@ def test_multi_paper_rec_with_optional_params(self, mock_post): assert "papers" in result.update assert len(result.update["messages"]) == 1 - @patch("requests.get") - def test_single_paper_rec_empty_response(self, mock_get): - """Tests single paper recommendations with empty response""" - mock_get.return_value.json.return_value = {"recommendedPapers": []} - mock_get.return_value.status_code = 200 - - result = get_single_paper_recommendations.invoke( - input={ - "paper_id": "123", - "limit": 1, - "tool_call_id": "test123", - "id": "test123", - } - ) - assert "papers" in result.update - assert len(result.update["papers"]) == 0 - - @patch("requests.post") - def test_multi_paper_rec_empty_response(self, mock_post): - """Tests multi-paper recommendations with empty response""" - mock_post.return_value.json.return_value = {"recommendedPapers": []} - mock_post.return_value.status_code = 200 - - result = get_multi_paper_recommendations.invoke( - input={ - "paper_ids": ["123", "456"], - "limit": 1, - "tool_call_id": "test123", - "id": "test123", - } - ) - assert "papers" in result.update - assert len(result.update["papers"]) == 0 - def test_state_replace_dict(): """Verifies state dictionary replacement works correctly""" @@ -244,11 +264,13 @@ def test_state_replace_dict(): @pytest.mark.integration -def test_end_to_end_search_workflow(initial_state): +def test_end_to_end_search_workflow(initial_state: Talk2Scholars, mock_cfg): """Integration test: Complete search workflow""" with ( patch("requests.get") as mock_get, patch("langchain_openai.ChatOpenAI") as mock_llm, + patch("hydra.compose", return_value=mock_cfg), + patch("hydra.initialize"), ): mock_get.return_value.json.return_value = MOCK_SEARCH_RESPONSE mock_get.return_value.status_code = 200 @@ -258,7 +280,7 @@ def test_end_to_end_search_workflow(initial_state): mock_llm.return_value = llm_instance app = get_app("test_integration") - test_state = initial_state.copy() + test_state = initial_state test_state["messages"] = [HumanMessage(content="search for ML papers")] config = { From 5ae215448b5f2f1d3f21ff40934bc46d6dfdd530 Mon Sep 17 00:00:00 2001 From: Ansh-info Date: Sat, 8 Feb 2025 17:41:09 +0100 Subject: [PATCH 11/17] feat: updated tools for hydra setup --- .../talk2scholars/tools/s2/multi_paper_rec.py | 18 ++++++++++++----- .../talk2scholars/tools/s2/search.py | 12 ++++++++--- .../tools/s2/single_paper_rec.py | 20 ++++++++++++------- 3 files changed, 35 insertions(+), 15 deletions(-) diff --git a/aiagents4pharma/talk2scholars/tools/s2/multi_paper_rec.py b/aiagents4pharma/talk2scholars/tools/s2/multi_paper_rec.py index 2474c078..486cfebf 100644 --- a/aiagents4pharma/talk2scholars/tools/s2/multi_paper_rec.py +++ b/aiagents4pharma/talk2scholars/tools/s2/multi_paper_rec.py @@ -8,7 +8,7 @@ import json import logging from typing import Annotated, Any, Dict, List, Optional - +import hydra import pandas as pd import requests from langchain_core.messages import ToolMessage @@ -40,6 +40,14 @@ class MultiPaperRecInput(BaseModel): model_config = {"arbitrary_types_allowed": True} +# Load hydra configuration +with hydra.initialize(version_base=None, config_path="../../configs"): + cfg = hydra.compose( + config_name="config", overrides=["tools/multi_paper_recommendation=default"] + ) + cfg = cfg.tools.multi_paper_recommendation + + @tool(args_schema=MultiPaperRecInput) def get_multi_paper_recommendations( paper_ids: List[str], @@ -62,12 +70,12 @@ def get_multi_paper_recommendations( """ logging.info("Starting multi-paper recommendations search.") - endpoint = "https://api.semanticscholar.org/recommendations/v1/papers" - headers = {"Content-Type": "application/json"} + endpoint = cfg.api_endpoint + headers = cfg.headers payload = {"positivePaperIds": paper_ids, "negativePaperIds": []} params = { "limit": min(limit, 500), - "fields": "paperId,title,abstract,year,authors,citationCount,url", + "fields": ",".join(cfg.api_fields), } # Add year parameter if provided @@ -80,7 +88,7 @@ def get_multi_paper_recommendations( headers=headers, params=params, data=json.dumps(payload), - timeout=10, + timeout=cfg.request_timeout, ) logging.info( "API Response Status for multi-paper recommendations: %s", response.status_code diff --git a/aiagents4pharma/talk2scholars/tools/s2/search.py b/aiagents4pharma/talk2scholars/tools/s2/search.py index a4649612..b4fde86b 100644 --- a/aiagents4pharma/talk2scholars/tools/s2/search.py +++ b/aiagents4pharma/talk2scholars/tools/s2/search.py @@ -6,7 +6,7 @@ import logging from typing import Annotated, Any, Dict, Optional - +import hydra import pandas as pd import requests from langchain_core.messages import ToolMessage @@ -34,6 +34,12 @@ class SearchInput(BaseModel): tool_call_id: Annotated[str, InjectedToolCallId] +# Load hydra configuration +with hydra.initialize(version_base=None, config_path="../../configs"): + cfg = hydra.compose(config_name="config", overrides=["tools/search=default"]) + cfg = cfg.tools.search + + @tool(args_schema=SearchInput) def search_tool( query: str, @@ -55,13 +61,13 @@ def search_tool( Dict[str, Any]: The search results and related information. """ print("Starting paper search...") - endpoint = "https://api.semanticscholar.org/graph/v1/paper/search" + endpoint = cfg.api_endpoint params = { "query": query, "limit": min(limit, 100), # "fields": "paperId,title,abstract,year,authors, # citationCount,url,publicationTypes,openAccessPdf", - "fields": "paperId,title,abstract,year,authors,citationCount,url", + "fields": ",".join(cfg.api_fields), } # Add year parameter if provided diff --git a/aiagents4pharma/talk2scholars/tools/s2/single_paper_rec.py b/aiagents4pharma/talk2scholars/tools/s2/single_paper_rec.py index c9ed59aa..e514fcb4 100644 --- a/aiagents4pharma/talk2scholars/tools/s2/single_paper_rec.py +++ b/aiagents4pharma/talk2scholars/tools/s2/single_paper_rec.py @@ -6,7 +6,7 @@ import logging from typing import Annotated, Any, Dict, Optional - +import hydra import pandas as pd import requests from langchain_core.messages import ToolMessage @@ -41,6 +41,14 @@ class SinglePaperRecInput(BaseModel): model_config = {"arbitrary_types_allowed": True} +# Load hydra configuration +with hydra.initialize(version_base=None, config_path="../../configs"): + cfg = hydra.compose( + config_name="config", overrides=["tools/single_paper_recommendation=default"] + ) + cfg = cfg.tools.single_paper_recommendation + + @tool(args_schema=SinglePaperRecInput) def get_single_paper_recommendations( paper_id: str, @@ -63,20 +71,18 @@ def get_single_paper_recommendations( """ logger.info("Starting single paper recommendations search.") - endpoint = ( - f"https://api.semanticscholar.org/recommendations/v1/papers/forpaper/{paper_id}" - ) + endpoint = f"{cfg.api_endpoint}/{paper_id}" params = { "limit": min(limit, 500), # Max 500 per API docs - "fields": "paperId,title,abstract,year,authors,citationCount,url", - "from": "all-cs", # Using all-cs pool as specified in docs + "fields": ",".join(cfg.api_fields), + "from": cfg.recommendation_params.from_pool, } # Add year parameter if provided if year: params["year"] = year - response = requests.get(endpoint, params=params, timeout=10) + response = requests.get(endpoint, params=params, timeout=cfg.request_timeout) data = response.json() papers = data.get("data", []) response = requests.get(endpoint, params=params, timeout=10) From 3638e5911e9f0777bb63bd9ce4b365460b33c85c Mon Sep 17 00:00:00 2001 From: Ansh-info Date: Sat, 8 Feb 2025 17:41:41 +0100 Subject: [PATCH 12/17] feat: updated main and sub-agent for hydra setup --- .../talk2scholars/agents/main_agent.py | 30 ++++++--- .../talk2scholars/agents/s2_agent.py | 67 +++++++++++-------- 2 files changed, 59 insertions(+), 38 deletions(-) diff --git a/aiagents4pharma/talk2scholars/agents/main_agent.py b/aiagents4pharma/talk2scholars/agents/main_agent.py index 12abab36..6d26df86 100644 --- a/aiagents4pharma/talk2scholars/agents/main_agent.py +++ b/aiagents4pharma/talk2scholars/agents/main_agent.py @@ -6,7 +6,7 @@ import logging from typing import Literal -from dotenv import load_dotenv +import hydra from langchain_core.language_models.chat_models import BaseChatModel from langchain_core.messages import AIMessage from langchain_openai import ChatOpenAI @@ -14,15 +14,13 @@ from langgraph.graph import END, START, StateGraph from langgraph.types import Command from ..agents import s2_agent -from ..config.config import config from ..state.state_talk2scholars import Talk2Scholars logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) -load_dotenv() -def make_supervisor_node(llm: BaseChatModel) -> str: +def make_supervisor_node(llm: BaseChatModel, cfg) -> str: """ Creates a supervisor node following LangGraph patterns. @@ -34,7 +32,9 @@ def make_supervisor_node(llm: BaseChatModel) -> str: """ # options = ["FINISH", "s2_agent"] - def supervisor_node(state: Talk2Scholars) -> Command[Literal["s2_agent", "__end__"]]: + def supervisor_node( + state: Talk2Scholars, + ) -> Command[Literal["s2_agent", "__end__"]]: """ Supervisor node that routes to appropriate sub-agents. @@ -46,7 +46,7 @@ def supervisor_node(state: Talk2Scholars) -> Command[Literal["s2_agent", "__end_ """ logger.info("Supervisor node called") - messages = [{"role": "system", "content": config.MAIN_AGENT_PROMPT}] + state[ + messages = [{"role": "system", "content": cfg.state_modifier}] + state[ "messages" ] response = llm.invoke(messages) @@ -81,7 +81,8 @@ def supervisor_node(state: Talk2Scholars) -> Command[Literal["s2_agent", "__end_ return supervisor_node -def get_app(thread_id: str, llm_model ='gpt-4o-mini') -> StateGraph: + +def get_app(thread_id: str, llm_model="gpt-4o-mini") -> StateGraph: """ Returns the langraph app with hierarchical structure. @@ -91,6 +92,15 @@ def get_app(thread_id: str, llm_model ='gpt-4o-mini') -> StateGraph: Returns: The compiled langraph app. """ + + # Load hydra configuration + logger.log(logging.INFO, "Load Hydra configuration for Talk2Scholars main agent.") + with hydra.initialize(version_base=None, config_path="../../configs"): + cfg = hydra.compose( + config_name="config", overrides=["agents/talk2scholars/main_agent=default"] + ) + cfg = cfg.agents.talk2scholars.main_agent + def call_s2_agent(state: Talk2Scholars) -> Command[Literal["__end__"]]: """ Node for calling the S2 agent. @@ -114,10 +124,12 @@ def call_s2_agent(state: Talk2Scholars) -> Command[Literal["__end__"]]: "current_agent": "s2_agent", }, ) - llm = ChatOpenAI(model=llm_model, temperature=0) + + logger.log(logging.INFO, "Using OpenAI model %s", llm_model) + llm = ChatOpenAI(model=llm_model, temperature=cfg.temperature) workflow = StateGraph(Talk2Scholars) - supervisor = make_supervisor_node(llm) + supervisor = make_supervisor_node(llm, cfg) workflow.add_node("supervisor", supervisor) workflow.add_node("s2_agent", call_s2_agent) diff --git a/aiagents4pharma/talk2scholars/agents/s2_agent.py b/aiagents4pharma/talk2scholars/agents/s2_agent.py index 60e67f91..5dbb057e 100644 --- a/aiagents4pharma/talk2scholars/agents/s2_agent.py +++ b/aiagents4pharma/talk2scholars/agents/s2_agent.py @@ -1,56 +1,65 @@ -#/usr/bin/env python3 +# /usr/bin/env python3 -''' +""" Agent for interacting with Semantic Scholar -''' +""" import logging -from dotenv import load_dotenv +import hydra from langchain_openai import ChatOpenAI from langgraph.graph import START, StateGraph -from langgraph.prebuilt import create_react_agent +from langgraph.prebuilt import create_react_agent, ToolNode from langgraph.checkpoint.memory import MemorySaver -from ..config.config import config from ..state.state_talk2scholars import Talk2Scholars -# from ..tools.s2 import s2_tools -from ..tools.s2.search import search_tool -from ..tools.s2.display_results import display_results -from ..tools.s2.single_paper_rec import get_single_paper_recommendations -from ..tools.s2.multi_paper_rec import get_multi_paper_recommendations +from ..tools.s2.search import search_tool as s2_search +from ..tools.s2.display_results import display_results as s2_display +from ..tools.s2.single_paper_rec import ( + get_single_paper_recommendations as s2_single_rec, +) +from ..tools.s2.multi_paper_rec import get_multi_paper_recommendations as s2_multi_rec -load_dotenv() # Initialize logger logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) -def get_app(uniq_id, llm_model='gpt-4o-mini'): - ''' + +def get_app(uniq_id, llm_model="gpt-4o-mini"): + """ This function returns the langraph app. - ''' + """ + def agent_s2_node(state: Talk2Scholars): - ''' + """ This function calls the model. - ''' + """ logger.log(logging.INFO, "Creating Agent_S2 node with thread_id %s", uniq_id) response = model.invoke(state, {"configurable": {"thread_id": uniq_id}}) return response + # Load hydra configuration + logger.log(logging.INFO, "Load Hydra configuration for Talk2Scholars S2 agent.") + with hydra.initialize(version_base=None, config_path="../../configs"): + cfg = hydra.compose( + config_name="config", overrides=["agents/talk2scholars/s2_agent=default"] + ) + cfg = cfg.agents.talk2scholars.s2_agent + # Define the tools - tools = [search_tool, - display_results, - get_single_paper_recommendations, - get_multi_paper_recommendations] + tools = ToolNode([s2_search, s2_display, s2_single_rec, s2_multi_rec]) + + # Define the model + logger.log(logging.INFO, "Using OpenAI model %s", llm_model) + llm = ChatOpenAI(model=llm_model, temperature=cfg.temperature) - # Create the LLM - llm = ChatOpenAI(model=llm_model, temperature=0) + # Create the agent model = create_react_agent( - llm, - tools=tools, - state_schema=Talk2Scholars, - state_modifier=config.S2_AGENT_PROMPT, - checkpointer=MemorySaver() - ) + llm, + tools=tools, + state_schema=Talk2Scholars, + state_modifier=cfg.s2_agent, + checkpointer=MemorySaver(), + ) # Define a new graph workflow = StateGraph(Talk2Scholars) From 74d67b18cacfedb62a4c79d75658abd7249596c7 Mon Sep 17 00:00:00 2001 From: Ansh-info Date: Sat, 8 Feb 2025 18:38:12 +0100 Subject: [PATCH 13/17] chores: markers already added in root toml file --- aiagents4pharma/talk2scholars/tests/test_langgraph.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/aiagents4pharma/talk2scholars/tests/test_langgraph.py b/aiagents4pharma/talk2scholars/tests/test_langgraph.py index c6df9e54..d7f86c27 100644 --- a/aiagents4pharma/talk2scholars/tests/test_langgraph.py +++ b/aiagents4pharma/talk2scholars/tests/test_langgraph.py @@ -21,11 +21,6 @@ # pylint: disable=redefined-outer-name -def pytest_configure(config): - """Register integration marker.""" - config.addinivalue_line("markers", "integration: mark test as an integration test") - - @pytest.fixture(autouse=True) def hydra_setup(): """Setup and cleanup Hydra for tests.""" From 048340cfdf8afd8f32ba61225cf5f49de5a6cc02 Mon Sep 17 00:00:00 2001 From: Ansh-info Date: Sun, 9 Feb 2025 14:12:50 +0100 Subject: [PATCH 14/17] chores: Updated system prompt --- .../configs/agents/talk2scholars/main_agent/default.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aiagents4pharma/talk2scholars/configs/agents/talk2scholars/main_agent/default.yaml b/aiagents4pharma/talk2scholars/configs/agents/talk2scholars/main_agent/default.yaml index 3e3d8aed..62a4d26b 100644 --- a/aiagents4pharma/talk2scholars/configs/agents/talk2scholars/main_agent/default.yaml +++ b/aiagents4pharma/talk2scholars/configs/agents/talk2scholars/main_agent/default.yaml @@ -34,6 +34,6 @@ main_agent: > "- Ask for clarification if the request is ambiguous\n\n" "When presenting paper search results, always use this exact format:\n\n" "Remember to:\n" - "- Always remember to add the url\n" + "- To always add the url\n" "- Put URLs on the title line itself as markdown\n" "- Maintain consistent spacing and formatting" From d2e7f29b2db327ea8610419da2bbc4e25e7def2a Mon Sep 17 00:00:00 2001 From: Ansh-info Date: Sun, 9 Feb 2025 14:19:16 +0100 Subject: [PATCH 15/17] fix: update logging for hydra --- .../talk2scholars/agents/main_agent.py | 21 ++++++++++++------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/aiagents4pharma/talk2scholars/agents/main_agent.py b/aiagents4pharma/talk2scholars/agents/main_agent.py index 6d26df86..f0df9e93 100644 --- a/aiagents4pharma/talk2scholars/agents/main_agent.py +++ b/aiagents4pharma/talk2scholars/agents/main_agent.py @@ -5,7 +5,7 @@ """ import logging -from typing import Literal +from typing import Literal, Any import hydra from langchain_core.language_models.chat_models import BaseChatModel from langchain_core.messages import AIMessage @@ -20,18 +20,17 @@ logger = logging.getLogger(__name__) -def make_supervisor_node(llm: BaseChatModel, cfg) -> str: +def make_supervisor_node(llm: BaseChatModel, cfg: Any) -> str: """ Creates a supervisor node following LangGraph patterns. Args: llm (BaseChatModel): The language model to use for generating responses. + cfg (Any): The configuration object. Returns: str: The supervisor node function. """ - # options = ["FINISH", "s2_agent"] - def supervisor_node( state: Talk2Scholars, ) -> Command[Literal["s2_agent", "__end__"]]: @@ -44,7 +43,7 @@ def supervisor_node( Returns: Command[Literal["s2_agent", "__end__"]]: The command to execute next. """ - logger.info("Supervisor node called") + logger.info("Supervisor node called with state: %s", state) messages = [{"role": "system", "content": cfg.state_modifier}] + state[ "messages" @@ -100,6 +99,7 @@ def get_app(thread_id: str, llm_model="gpt-4o-mini") -> StateGraph: config_name="config", overrides=["agents/talk2scholars/main_agent=default"] ) cfg = cfg.agents.talk2scholars.main_agent + logger.info("Hydra configuration loaded with values: %s", cfg) def call_s2_agent(state: Talk2Scholars) -> Command[Literal["__end__"]]: """ @@ -111,10 +111,10 @@ def call_s2_agent(state: Talk2Scholars) -> Command[Literal["__end__"]]: Returns: Command[Literal["__end__"]]: The command to execute next. """ - logger.info("Calling S2 agent") + logger.info("Calling S2 agent with state: %s", state) app = s2_agent.get_app(thread_id, llm_model) response = app.invoke(state) - logger.info("S2 agent completed") + logger.info("S2 agent completed with response: %s", response) return Command( goto=END, update={ @@ -125,7 +125,12 @@ def call_s2_agent(state: Talk2Scholars) -> Command[Literal["__end__"]]: }, ) - logger.log(logging.INFO, "Using OpenAI model %s", llm_model) + logger.log( + logging.INFO, + "Using OpenAI model %s with temperature %s", + llm_model, + cfg.temperature + ) llm = ChatOpenAI(model=llm_model, temperature=cfg.temperature) workflow = StateGraph(Talk2Scholars) From 2adb79f2eb6263e5676aecbedf6dc2face889fc1 Mon Sep 17 00:00:00 2001 From: Ansh-info Date: Sun, 9 Feb 2025 18:54:50 +0100 Subject: [PATCH 16/17] fix: updated loggs to include cirrent agent and message count --- aiagents4pharma/talk2scholars/agents/main_agent.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/aiagents4pharma/talk2scholars/agents/main_agent.py b/aiagents4pharma/talk2scholars/agents/main_agent.py index f0df9e93..980c65bc 100644 --- a/aiagents4pharma/talk2scholars/agents/main_agent.py +++ b/aiagents4pharma/talk2scholars/agents/main_agent.py @@ -43,7 +43,11 @@ def supervisor_node( Returns: Command[Literal["s2_agent", "__end__"]]: The command to execute next. """ - logger.info("Supervisor node called with state: %s", state) + logger.info( + "Supervisor node called - Messages count: %d, Current Agent: %s", + len(state["messages"]), + state.get("current_agent", "None"), + ) messages = [{"role": "system", "content": cfg.state_modifier}] + state[ "messages" From 2b995afcb41cabb3a3c288f094959dc3eb8e6f93 Mon Sep 17 00:00:00 2001 From: Ansh-info Date: Sun, 9 Feb 2025 18:55:24 +0100 Subject: [PATCH 17/17] fix: added path to talk2scholar hydra configs --- pyproject.toml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index 03ed1e7d..e2659bd6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -71,6 +71,14 @@ aiagents4pharma = [ "configs/talk2biomodels/agents/t2b_agent/*", "configs/talk2biomodels/tools/ask_question/*", "configs/talk2biomodels/tools/get_annotation/*", + "talk2scholars/configs/*", + "talk2scholars/configs/agents/talk2scholars/main_agent/*", + "talk2scholars/configs/agents/talk2scholars/s2_agent/*", + "talk2scholars/configs/app/frontend/*", + "talk2scholars/configs/tools/multi_paper_recommendation/*", + "talk2scholars/configs/tools/search/*", + "talk2scholars/configs/tools/single_paper_recommendation/*", + ] [tool.pytest.ini_options]