diff --git a/aiagents4pharma/talk2scholars/__init__.py b/aiagents4pharma/talk2scholars/__init__.py index e59e56e2..6d2291f7 100644 --- a/aiagents4pharma/talk2scholars/__init__.py +++ b/aiagents4pharma/talk2scholars/__init__.py @@ -2,4 +2,4 @@ This file is used to import all the modules in the package. """ -from . import agents, config, state, tests, tools +from . import agents, configs, state, tests, tools diff --git a/aiagents4pharma/talk2scholars/agents/main_agent.py b/aiagents4pharma/talk2scholars/agents/main_agent.py index 12abab36..980c65bc 100644 --- a/aiagents4pharma/talk2scholars/agents/main_agent.py +++ b/aiagents4pharma/talk2scholars/agents/main_agent.py @@ -5,8 +5,8 @@ """ import logging -from typing import Literal -from dotenv import load_dotenv +from typing import Literal, Any +import hydra from langchain_core.language_models.chat_models import BaseChatModel from langchain_core.messages import AIMessage from langchain_openai import ChatOpenAI @@ -14,27 +14,26 @@ from langgraph.graph import END, START, StateGraph from langgraph.types import Command from ..agents import s2_agent -from ..config.config import config from ..state.state_talk2scholars import Talk2Scholars logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) -load_dotenv() -def make_supervisor_node(llm: BaseChatModel) -> str: +def make_supervisor_node(llm: BaseChatModel, cfg: Any) -> str: """ Creates a supervisor node following LangGraph patterns. Args: llm (BaseChatModel): The language model to use for generating responses. + cfg (Any): The configuration object. Returns: str: The supervisor node function. """ - # options = ["FINISH", "s2_agent"] - - def supervisor_node(state: Talk2Scholars) -> Command[Literal["s2_agent", "__end__"]]: + def supervisor_node( + state: Talk2Scholars, + ) -> Command[Literal["s2_agent", "__end__"]]: """ Supervisor node that routes to appropriate sub-agents. @@ -44,9 +43,13 @@ def supervisor_node(state: Talk2Scholars) -> Command[Literal["s2_agent", "__end_ Returns: Command[Literal["s2_agent", "__end__"]]: The command to execute next. """ - logger.info("Supervisor node called") + logger.info( + "Supervisor node called - Messages count: %d, Current Agent: %s", + len(state["messages"]), + state.get("current_agent", "None"), + ) - messages = [{"role": "system", "content": config.MAIN_AGENT_PROMPT}] + state[ + messages = [{"role": "system", "content": cfg.state_modifier}] + state[ "messages" ] response = llm.invoke(messages) @@ -81,7 +84,8 @@ def supervisor_node(state: Talk2Scholars) -> Command[Literal["s2_agent", "__end_ return supervisor_node -def get_app(thread_id: str, llm_model ='gpt-4o-mini') -> StateGraph: + +def get_app(thread_id: str, llm_model="gpt-4o-mini") -> StateGraph: """ Returns the langraph app with hierarchical structure. @@ -91,6 +95,16 @@ def get_app(thread_id: str, llm_model ='gpt-4o-mini') -> StateGraph: Returns: The compiled langraph app. """ + + # Load hydra configuration + logger.log(logging.INFO, "Load Hydra configuration for Talk2Scholars main agent.") + with hydra.initialize(version_base=None, config_path="../../configs"): + cfg = hydra.compose( + config_name="config", overrides=["agents/talk2scholars/main_agent=default"] + ) + cfg = cfg.agents.talk2scholars.main_agent + logger.info("Hydra configuration loaded with values: %s", cfg) + def call_s2_agent(state: Talk2Scholars) -> Command[Literal["__end__"]]: """ Node for calling the S2 agent. @@ -101,10 +115,10 @@ def call_s2_agent(state: Talk2Scholars) -> Command[Literal["__end__"]]: Returns: Command[Literal["__end__"]]: The command to execute next. """ - logger.info("Calling S2 agent") + logger.info("Calling S2 agent with state: %s", state) app = s2_agent.get_app(thread_id, llm_model) response = app.invoke(state) - logger.info("S2 agent completed") + logger.info("S2 agent completed with response: %s", response) return Command( goto=END, update={ @@ -114,10 +128,17 @@ def call_s2_agent(state: Talk2Scholars) -> Command[Literal["__end__"]]: "current_agent": "s2_agent", }, ) - llm = ChatOpenAI(model=llm_model, temperature=0) + + logger.log( + logging.INFO, + "Using OpenAI model %s with temperature %s", + llm_model, + cfg.temperature + ) + llm = ChatOpenAI(model=llm_model, temperature=cfg.temperature) workflow = StateGraph(Talk2Scholars) - supervisor = make_supervisor_node(llm) + supervisor = make_supervisor_node(llm, cfg) workflow.add_node("supervisor", supervisor) workflow.add_node("s2_agent", call_s2_agent) diff --git a/aiagents4pharma/talk2scholars/agents/s2_agent.py b/aiagents4pharma/talk2scholars/agents/s2_agent.py index 60e67f91..5dbb057e 100644 --- a/aiagents4pharma/talk2scholars/agents/s2_agent.py +++ b/aiagents4pharma/talk2scholars/agents/s2_agent.py @@ -1,56 +1,65 @@ -#/usr/bin/env python3 +# /usr/bin/env python3 -''' +""" Agent for interacting with Semantic Scholar -''' +""" import logging -from dotenv import load_dotenv +import hydra from langchain_openai import ChatOpenAI from langgraph.graph import START, StateGraph -from langgraph.prebuilt import create_react_agent +from langgraph.prebuilt import create_react_agent, ToolNode from langgraph.checkpoint.memory import MemorySaver -from ..config.config import config from ..state.state_talk2scholars import Talk2Scholars -# from ..tools.s2 import s2_tools -from ..tools.s2.search import search_tool -from ..tools.s2.display_results import display_results -from ..tools.s2.single_paper_rec import get_single_paper_recommendations -from ..tools.s2.multi_paper_rec import get_multi_paper_recommendations +from ..tools.s2.search import search_tool as s2_search +from ..tools.s2.display_results import display_results as s2_display +from ..tools.s2.single_paper_rec import ( + get_single_paper_recommendations as s2_single_rec, +) +from ..tools.s2.multi_paper_rec import get_multi_paper_recommendations as s2_multi_rec -load_dotenv() # Initialize logger logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) -def get_app(uniq_id, llm_model='gpt-4o-mini'): - ''' + +def get_app(uniq_id, llm_model="gpt-4o-mini"): + """ This function returns the langraph app. - ''' + """ + def agent_s2_node(state: Talk2Scholars): - ''' + """ This function calls the model. - ''' + """ logger.log(logging.INFO, "Creating Agent_S2 node with thread_id %s", uniq_id) response = model.invoke(state, {"configurable": {"thread_id": uniq_id}}) return response + # Load hydra configuration + logger.log(logging.INFO, "Load Hydra configuration for Talk2Scholars S2 agent.") + with hydra.initialize(version_base=None, config_path="../../configs"): + cfg = hydra.compose( + config_name="config", overrides=["agents/talk2scholars/s2_agent=default"] + ) + cfg = cfg.agents.talk2scholars.s2_agent + # Define the tools - tools = [search_tool, - display_results, - get_single_paper_recommendations, - get_multi_paper_recommendations] + tools = ToolNode([s2_search, s2_display, s2_single_rec, s2_multi_rec]) + + # Define the model + logger.log(logging.INFO, "Using OpenAI model %s", llm_model) + llm = ChatOpenAI(model=llm_model, temperature=cfg.temperature) - # Create the LLM - llm = ChatOpenAI(model=llm_model, temperature=0) + # Create the agent model = create_react_agent( - llm, - tools=tools, - state_schema=Talk2Scholars, - state_modifier=config.S2_AGENT_PROMPT, - checkpointer=MemorySaver() - ) + llm, + tools=tools, + state_schema=Talk2Scholars, + state_modifier=cfg.s2_agent, + checkpointer=MemorySaver(), + ) # Define a new graph workflow = StateGraph(Talk2Scholars) diff --git a/aiagents4pharma/talk2scholars/config/config.py b/aiagents4pharma/talk2scholars/config/config.py deleted file mode 100644 index aa0ae493..00000000 --- a/aiagents4pharma/talk2scholars/config/config.py +++ /dev/null @@ -1,110 +0,0 @@ -"""Configuration module for AI agents handling paper searches and recommendations.""" - - -# pylint: disable=R0903 -class Config: - """Configuration class containing prompts for AI agents. - - This class stores prompt templates used by various AI agents in the system, - particularly for academic paper searches and recommendations. - """ - - MAIN_AGENT_PROMPT = ( - "You are a supervisory AI agent that routes user queries to specialized tools.\n" - "Your task is to select the most appropriate tool based on the user's request.\n\n" - "Available tools and their capabilities:\n\n" - "1. semantic_scholar_agent:\n" - " - Search for academic papers and research\n" - " - Get paper recommendations\n" - " - Find similar papers\n" - " USE FOR: Any queries about finding papers, academic research, " - "or getting paper recommendations\n\n" - "ROUTING GUIDELINES:\n\n" - "ALWAYS route to semantic_scholar_agent for:\n" - "- Finding academic papers\n" - "- Searching research topics\n" - "- Getting paper recommendations\n" - "- Finding similar papers\n" - "- Any query about academic literature\n\n" - "Approach:\n" - "1. Identify the core need in the user's query\n" - "2. Select the most appropriate tool based on the guidelines above\n" - "3. If unclear, ask for clarification\n" - "4. For multi-step tasks, focus on the immediate next step\n\n" - "Remember:\n" - "- Be decisive in your tool selection\n" - "- Focus on the immediate task\n" - "- Default to semantic_scholar_agent for any paper-finding tasks\n" - "- Ask for clarification if the request is ambiguous\n\n" - "When presenting paper search results, always use this exact format:\n\n" - "Remember to:\n" - "- Always remember to add the url\n" - "- Put URLs on the title line itself as markdown\n" - "- Maintain consistent spacing and formatting" - ) - - S2_AGENT_PROMPT = ( - "You are a specialized academic research assistant with access to the following tools:\n\n" - "1. search_papers:\n" - " USE FOR: General paper searches\n" - " - Enhances search terms automatically\n" - " - Adds relevant academic keywords\n" - " - Focuses on recent research when appropriate\n\n" - "2. get_single_paper_recommendations:\n" - " USE FOR: Finding papers similar to a specific paper\n" - " - Takes a single paper ID\n" - " - Returns related papers\n\n" - "3. get_multi_paper_recommendations:\n" - " USE FOR: Finding papers similar to multiple papers\n" - " - Takes multiple paper IDs\n" - " - Finds papers related to all inputs\n\n" - "GUIDELINES:\n\n" - "For paper searches:\n" - "- Enhance search terms with academic language\n" - "- Include field-specific terminology\n" - '- Add "recent" or "latest" when appropriate\n' - "- Keep queries focused and relevant\n\n" - "For paper recommendations:\n" - "- Identify paper IDs (40-character hexadecimal strings)\n" - "- Use single_paper_recommendations for one ID\n" - "- Use multi_paper_recommendations for multiple IDs\n\n" - "Best practices:\n" - "1. Start with a broad search if no paper IDs are provided\n" - "2. Look for paper IDs in user input\n" - "3. Enhance search terms for better results\n" - "4. Consider the academic context\n" - "5. Be prepared to refine searches based on feedback\n\n" - "Remember:\n" - "- Always select the most appropriate tool\n" - "- Enhance search queries naturally\n" - "- Consider academic context\n" - "- Focus on delivering relevant results\n\n" - "IMPORTANT GUIDELINES FOR PAPER RECOMMENDATIONS:\n\n" - "For Multiple Papers:\n" - "- When getting recommendations for multiple papers, always use " - "get_multi_paper_recommendations tool\n" - "- DO NOT call get_single_paper_recommendations multiple times\n" - "- Always pass all paper IDs in a single call to get_multi_paper_recommendations\n" - '- Use for queries like "find papers related to both/all papers" or ' - '"find similar papers to these papers"\n\n' - "For Single Paper:\n" - "- Use get_single_paper_recommendations when focusing on one specific paper\n" - "- Pass only one paper ID at a time\n" - '- Use for queries like "find papers similar to this paper" or ' - '"get recommendations for paper X"\n' - "- Do not use for multiple papers\n\n" - "Examples:\n" - '- For "find related papers for both papers":\n' - " ✓ Use get_multi_paper_recommendations with both paper IDs\n" - " × Don't make multiple calls to get_single_paper_recommendations\n\n" - '- For "find papers related to the first paper":\n' - " ✓ Use get_single_paper_recommendations with just that paper's ID\n" - " × Don't use get_multi_paper_recommendations\n\n" - "Remember:\n" - "- Be precise in identifying which paper ID to use for single recommendations\n" - "- Don't reuse previous paper IDs unless specifically requested\n" - "- For fresh paper recommendations, always use the original paper ID" - ) - - -config = Config() diff --git a/aiagents4pharma/talk2scholars/config/__init__.py b/aiagents4pharma/talk2scholars/configs/__init__.py similarity index 60% rename from aiagents4pharma/talk2scholars/config/__init__.py rename to aiagents4pharma/talk2scholars/configs/__init__.py index 6d2e9231..389e984a 100644 --- a/aiagents4pharma/talk2scholars/config/__init__.py +++ b/aiagents4pharma/talk2scholars/configs/__init__.py @@ -2,4 +2,6 @@ This package contains configuration settings and prompts used by various AI agents """ -from . import config +from . import agents +from . import tools +from . import app diff --git a/aiagents4pharma/talk2scholars/configs/agents/__init__.py b/aiagents4pharma/talk2scholars/configs/agents/__init__.py new file mode 100644 index 00000000..5e4e7af5 --- /dev/null +++ b/aiagents4pharma/talk2scholars/configs/agents/__init__.py @@ -0,0 +1,5 @@ +""" +Import all the modules in the package +""" + +from . import talk2scholars diff --git a/aiagents4pharma/talk2scholars/configs/agents/talk2scholars/__init__.py b/aiagents4pharma/talk2scholars/configs/agents/talk2scholars/__init__.py new file mode 100644 index 00000000..6a92e6a3 --- /dev/null +++ b/aiagents4pharma/talk2scholars/configs/agents/talk2scholars/__init__.py @@ -0,0 +1,6 @@ +""" +Import all the modules in the package +""" + +from . import s2_agent +from . import main_agent diff --git a/aiagents4pharma/talk2scholars/configs/agents/talk2scholars/main_agent/__init__.py b/aiagents4pharma/talk2scholars/configs/agents/talk2scholars/main_agent/__init__.py new file mode 100644 index 00000000..77c33b0f --- /dev/null +++ b/aiagents4pharma/talk2scholars/configs/agents/talk2scholars/main_agent/__init__.py @@ -0,0 +1,3 @@ +""" +Import all the modules in the package +""" diff --git a/aiagents4pharma/talk2scholars/configs/agents/talk2scholars/main_agent/default.yaml b/aiagents4pharma/talk2scholars/configs/agents/talk2scholars/main_agent/default.yaml new file mode 100644 index 00000000..62a4d26b --- /dev/null +++ b/aiagents4pharma/talk2scholars/configs/agents/talk2scholars/main_agent/default.yaml @@ -0,0 +1,39 @@ +_target_: agents.main_agent.get_app +openai_api_key: ${oc.env:OPENAI_API_KEY} +openai_llms: + - "gpt-4o-mini" + - "gpt-4-turbo" + - "gpt-3.5-turbo" +temperature: 0 +main_agent: > + "You are a supervisory AI agent that routes user queries to specialized tools.\n" + "Your task is to select the most appropriate tool based on the user's request.\n\n" + "Available tools and their capabilities:\n\n" + "1. semantic_scholar_agent:\n" + " - Search for academic papers and research\n" + " - Get paper recommendations\n" + " - Find similar papers\n" + " USE FOR: Any queries about finding papers, academic research, " + "or getting paper recommendations\n\n" + "ROUTING GUIDELINES:\n\n" + "ALWAYS route to semantic_scholar_agent for:\n" + "- Finding academic papers\n" + "- Searching research topics\n" + "- Getting paper recommendations\n" + "- Finding similar papers\n" + "- Any query about academic literature\n\n" + "Approach:\n" + "1. Identify the core need in the user's query\n" + "2. Select the most appropriate tool based on the guidelines above\n" + "3. If unclear, ask for clarification\n" + "4. For multi-step tasks, focus on the immediate next step\n\n" + "Remember:\n" + "- Be decisive in your tool selection\n" + "- Focus on the immediate task\n" + "- Default to semantic_scholar_agent for any paper-finding tasks\n" + "- Ask for clarification if the request is ambiguous\n\n" + "When presenting paper search results, always use this exact format:\n\n" + "Remember to:\n" + "- To always add the url\n" + "- Put URLs on the title line itself as markdown\n" + "- Maintain consistent spacing and formatting" diff --git a/aiagents4pharma/talk2scholars/configs/agents/talk2scholars/s2_agent/__init__.py b/aiagents4pharma/talk2scholars/configs/agents/talk2scholars/s2_agent/__init__.py new file mode 100644 index 00000000..77c33b0f --- /dev/null +++ b/aiagents4pharma/talk2scholars/configs/agents/talk2scholars/s2_agent/__init__.py @@ -0,0 +1,3 @@ +""" +Import all the modules in the package +""" diff --git a/aiagents4pharma/talk2scholars/configs/agents/talk2scholars/s2_agent/default.yaml b/aiagents4pharma/talk2scholars/configs/agents/talk2scholars/s2_agent/default.yaml new file mode 100644 index 00000000..6c98b4bf --- /dev/null +++ b/aiagents4pharma/talk2scholars/configs/agents/talk2scholars/s2_agent/default.yaml @@ -0,0 +1,68 @@ +_target_: agents.s2_agent.get_app +openai_api_key: ${oc.env:OPENAI_API_KEY} +openai_llms: + - "gpt-4o-mini" + - "gpt-4-turbo" + - "gpt-3.5-turbo" +temperature: 0 +s2_agent: > + "You are a specialized academic research assistant with access to the following tools:\n\n" + "1. search_papers:\n" + " USE FOR: General paper searches\n" + " - Enhances search terms automatically\n" + " - Adds relevant academic keywords\n" + " - Focuses on recent research when appropriate\n\n" + "2. get_single_paper_recommendations:\n" + " USE FOR: Finding papers similar to a specific paper\n" + " - Takes a single paper ID\n" + " - Returns related papers\n\n" + "3. get_multi_paper_recommendations:\n" + " USE FOR: Finding papers similar to multiple papers\n" + " - Takes multiple paper IDs\n" + " - Finds papers related to all inputs\n\n" + "GUIDELINES:\n\n" + "For paper searches:\n" + "- Enhance search terms with academic language\n" + "- Include field-specific terminology\n" + '- Add "recent" or "latest" when appropriate\n' + "- Keep queries focused and relevant\n\n" + "For paper recommendations:\n" + "- Identify paper IDs (40-character hexadecimal strings)\n" + "- Use single_paper_recommendations for one ID\n" + "- Use multi_paper_recommendations for multiple IDs\n\n" + "Best practices:\n" + "1. Start with a broad search if no paper IDs are provided\n" + "2. Look for paper IDs in user input\n" + "3. Enhance search terms for better results\n" + "4. Consider the academic context\n" + "5. Be prepared to refine searches based on feedback\n\n" + "Remember:\n" + "- Always select the most appropriate tool\n" + "- Enhance search queries naturally\n" + "- Consider academic context\n" + "- Focus on delivering relevant results\n\n" + "IMPORTANT GUIDELINES FOR PAPER RECOMMENDATIONS:\n\n" + "For Multiple Papers:\n" + "- When getting recommendations for multiple papers, always use " + "get_multi_paper_recommendations tool\n" + "- DO NOT call get_single_paper_recommendations multiple times\n" + "- Always pass all paper IDs in a single call to get_multi_paper_recommendations\n" + '- Use for queries like "find papers related to both/all papers" or ' + '"find similar papers to these papers"\n\n' + "For Single Paper:\n" + "- Use get_single_paper_recommendations when focusing on one specific paper\n" + "- Pass only one paper ID at a time\n" + '- Use for queries like "find papers similar to this paper" or ' + '"get recommendations for paper X"\n' + "- Do not use for multiple papers\n\n" + "Examples:\n" + '- For "find related papers for both papers":\n' + " ✓ Use get_multi_paper_recommendations with both paper IDs\n" + " × Don't make multiple calls to get_single_paper_recommendations\n\n" + '- For "find papers related to the first paper":\n' + " ✓ Use get_single_paper_recommendations with just that paper's ID\n" + " × Don't use get_multi_paper_recommendations\n\n" + "Remember:\n" + "- Be precise in identifying which paper ID to use for single recommendations\n" + "- Don't reuse previous paper IDs unless specifically requested\n" + "- For fresh paper recommendations, always use the original paper ID" diff --git a/aiagents4pharma/talk2scholars/configs/app/__init__.py b/aiagents4pharma/talk2scholars/configs/app/__init__.py new file mode 100644 index 00000000..01a7ae6f --- /dev/null +++ b/aiagents4pharma/talk2scholars/configs/app/__init__.py @@ -0,0 +1,5 @@ +""" +Import all the modules in the package +""" + +from . import frontend diff --git a/aiagents4pharma/talk2scholars/configs/app/frontend/__init__.py b/aiagents4pharma/talk2scholars/configs/app/frontend/__init__.py new file mode 100644 index 00000000..77c33b0f --- /dev/null +++ b/aiagents4pharma/talk2scholars/configs/app/frontend/__init__.py @@ -0,0 +1,3 @@ +""" +Import all the modules in the package +""" diff --git a/aiagents4pharma/talk2scholars/configs/app/frontend/default.yaml b/aiagents4pharma/talk2scholars/configs/app/frontend/default.yaml new file mode 100644 index 00000000..2cafee04 --- /dev/null +++ b/aiagents4pharma/talk2scholars/configs/app/frontend/default.yaml @@ -0,0 +1,33 @@ +# # Page configuration +# page: +# title: "Talk2Scholars" +# icon: "🤖" +# layout: "wide" + +# Available LLM models +llm_models: + - "gpt-4o-mini" + - "gpt-4-turbo" + - "gpt-3.5-turbo" +# # Chat UI configuration +# chat: +# assistant_avatar: "🤖" +# user_avatar: "👩🏻💻" +# input_placeholder: "Say something ..." +# spinner_text: "Fetching response ..." + +# # Feedback configuration +# feedback: +# type: "thumbs" +# text_label: "[Optional] Please provide an explanation" +# success_message: "Your feedback is on its way to the developers. Thank you!" +# success_icon: "🚀" + +# # Layout configuration +# layout: +# column_ratio: [3, 7] # Ratio for main_col1 and main_col2 +# chat_container_height: 575 +# sidebar_container_height: 500 +# +# # Project name prefix +# project_name_prefix: "Talk2Scholars-" diff --git a/aiagents4pharma/talk2scholars/configs/config.yaml b/aiagents4pharma/talk2scholars/configs/config.yaml new file mode 100644 index 00000000..b5cd9ed0 --- /dev/null +++ b/aiagents4pharma/talk2scholars/configs/config.yaml @@ -0,0 +1,8 @@ +defaults: + - _self_ + - agents/talk2scholars/main_agent: default + - agents/talk2scholars/s2_agent: default + - tools/search: default + - tools/single_paper_recommendation: default + - tools/multi_paper_recommendation: default + - app/frontend: default diff --git a/aiagents4pharma/talk2scholars/configs/tools/__init__.py b/aiagents4pharma/talk2scholars/configs/tools/__init__.py new file mode 100644 index 00000000..d4e77ccb --- /dev/null +++ b/aiagents4pharma/talk2scholars/configs/tools/__init__.py @@ -0,0 +1,7 @@ +""" +Import all the modules in the package +""" + +from . import search +from . import single_paper_recommendation +from . import multi_paper_recommendation diff --git a/aiagents4pharma/talk2scholars/configs/tools/multi_paper_recommendation/__init__.py b/aiagents4pharma/talk2scholars/configs/tools/multi_paper_recommendation/__init__.py new file mode 100644 index 00000000..77c33b0f --- /dev/null +++ b/aiagents4pharma/talk2scholars/configs/tools/multi_paper_recommendation/__init__.py @@ -0,0 +1,3 @@ +""" +Import all the modules in the package +""" diff --git a/aiagents4pharma/talk2scholars/configs/tools/multi_paper_recommendation/default.yaml b/aiagents4pharma/talk2scholars/configs/tools/multi_paper_recommendation/default.yaml new file mode 100644 index 00000000..735e97ec --- /dev/null +++ b/aiagents4pharma/talk2scholars/configs/tools/multi_paper_recommendation/default.yaml @@ -0,0 +1,19 @@ +api_endpoint: "https://api.semanticscholar.org/recommendations/v1/papers" +default_limit: 2 +request_timeout: 10 +api_fields: + - "paperId" + - "title" + - "abstract" + - "year" + - "authors" + - "citationCount" + - "url" + +# Default headers and params +headers: + Content-Type: "application/json" + +recommendation_params: + limit: ${.default_limit} + fields: ${.api_fields} diff --git a/aiagents4pharma/talk2scholars/configs/tools/search/__init__.py b/aiagents4pharma/talk2scholars/configs/tools/search/__init__.py new file mode 100644 index 00000000..77c33b0f --- /dev/null +++ b/aiagents4pharma/talk2scholars/configs/tools/search/__init__.py @@ -0,0 +1,3 @@ +""" +Import all the modules in the package +""" diff --git a/aiagents4pharma/talk2scholars/configs/tools/search/default.yaml b/aiagents4pharma/talk2scholars/configs/tools/search/default.yaml new file mode 100644 index 00000000..bf1f8376 --- /dev/null +++ b/aiagents4pharma/talk2scholars/configs/tools/search/default.yaml @@ -0,0 +1,19 @@ +api_endpoint: "https://api.semanticscholar.org/graph/v1/paper/search" +default_limit: 2 +request_timeout: 10 +api_fields: + - "paperId" + - "title" + - "abstract" + - "year" + - "authors" + - "citationCount" + - "url" +# Commented fields that could be added later if needed +# - "publicationTypes" +# - "openAccessPdf" + +# Default search parameters +search_params: + limit: ${.default_limit} # Reference to the default_limit above + fields: ${.api_fields} # Reference to the api_fields above diff --git a/aiagents4pharma/talk2scholars/configs/tools/single_paper_recommendation/__init__.py b/aiagents4pharma/talk2scholars/configs/tools/single_paper_recommendation/__init__.py new file mode 100644 index 00000000..77c33b0f --- /dev/null +++ b/aiagents4pharma/talk2scholars/configs/tools/single_paper_recommendation/__init__.py @@ -0,0 +1,3 @@ +""" +Import all the modules in the package +""" diff --git a/aiagents4pharma/talk2scholars/configs/tools/single_paper_recommendation/default.yaml b/aiagents4pharma/talk2scholars/configs/tools/single_paper_recommendation/default.yaml new file mode 100644 index 00000000..d0f14cea --- /dev/null +++ b/aiagents4pharma/talk2scholars/configs/tools/single_paper_recommendation/default.yaml @@ -0,0 +1,20 @@ +api_endpoint: "https://api.semanticscholar.org/recommendations/v1/papers/forpaper" +default_limit: 2 +request_timeout: 10 +api_fields: + - "paperId" + - "title" + - "abstract" + - "year" + - "authors" + - "citationCount" + - "url" +# Commented fields that could be added later if needed +# - "publicationTypes" +# - "openAccessPdf" + +# Default recommendation parameters +recommendation_params: + limit: ${.default_limit} # Reference to the default_limit above + fields: ${.api_fields} # Reference to the api_fields above + from_pool: "all-cs" # Using all-cs pool as specified in docs diff --git a/aiagents4pharma/talk2scholars/tests/test_langgraph.py b/aiagents4pharma/talk2scholars/tests/test_langgraph.py index d58e20a3..d7f86c27 100644 --- a/aiagents4pharma/talk2scholars/tests/test_langgraph.py +++ b/aiagents4pharma/talk2scholars/tests/test_langgraph.py @@ -5,12 +5,14 @@ """ from unittest.mock import Mock, patch - import pytest from langchain_core.messages import AIMessage, HumanMessage +import hydra +from hydra.core.global_hydra import GlobalHydra +from omegaconf import DictConfig, OmegaConf from ..agents.main_agent import get_app, make_supervisor_node -from ..state.state_talk2scholars import replace_dict +from ..state.state_talk2scholars import replace_dict, Talk2Scholars from ..tools.s2.display_results import display_results from ..tools.s2.multi_paper_rec import get_multi_paper_recommendations from ..tools.s2.search import search_tool @@ -18,6 +20,42 @@ # pylint: disable=redefined-outer-name + +@pytest.fixture(autouse=True) +def hydra_setup(): + """Setup and cleanup Hydra for tests.""" + GlobalHydra.instance().clear() + with hydra.initialize(version_base=None, config_path="../configs"): + yield + + +@pytest.fixture +def mock_cfg() -> DictConfig: + """Create a mock configuration for testing.""" + config = { + "agents": { + "talk2scholars": { + "main_agent": { + "state_modifier": "Test prompt for main agent", + "temperature": 0, + }, + "s2_agent": { + "temperature": 0, + "s2_agent": "Test prompt for s2 agent", + }, + } + }, + "tools": { + "search": { + "api_endpoint": "https://api.semanticscholar.org/graph/v1/paper/search", + "default_limit": 2, + "api_fields": ["paperId", "title", "abstract", "year", "authors"], + } + }, + } + return OmegaConf.create(config) + + # Fixed test data for deterministic results MOCK_SEARCH_RESPONSE = { "data": [ @@ -45,27 +83,33 @@ @pytest.fixture -def initial_state(): +def initial_state() -> Talk2Scholars: """Create a base state for tests""" - return { - "messages": [], - "papers": {}, - "is_last_step": False, - "current_agent": None, - "llm_model": "gpt-4o-mini", - } + return Talk2Scholars( + messages=[], + papers={}, + is_last_step=False, + current_agent=None, + llm_model="gpt-4o-mini", + next="", + ) class TestMainAgent: """Unit tests for main agent functionality""" - def test_supervisor_routes_search_to_s2(self, initial_state): + def test_supervisor_routes_search_to_s2( + self, initial_state: Talk2Scholars, mock_cfg + ): """Verifies that search-related queries are routed to S2 agent""" llm_mock = Mock() llm_mock.invoke.return_value = AIMessage(content="Search initiated") - supervisor = make_supervisor_node(llm_mock) - state = initial_state.copy() + # Extract the main_agent config + supervisor = make_supervisor_node( + llm_mock, mock_cfg.agents.talk2scholars.main_agent + ) + state = initial_state state["messages"] = [HumanMessage(content="search for papers")] result = supervisor(state) @@ -73,13 +117,18 @@ def test_supervisor_routes_search_to_s2(self, initial_state): assert not result.update["is_last_step"] assert result.update["current_agent"] == "s2_agent" - def test_supervisor_routes_general_to_end(self, initial_state): + def test_supervisor_routes_general_to_end( + self, initial_state: Talk2Scholars, mock_cfg + ): """Verifies that non-search queries end the conversation""" llm_mock = Mock() llm_mock.invoke.return_value = AIMessage(content="General response") - supervisor = make_supervisor_node(llm_mock) - state = initial_state.copy() + # Extract the main_agent config + supervisor = make_supervisor_node( + llm_mock, mock_cfg.agents.talk2scholars.main_agent + ) + state = initial_state state["messages"] = [HumanMessage(content="What is ML?")] result = supervisor(state) @@ -90,9 +139,9 @@ def test_supervisor_routes_general_to_end(self, initial_state): class TestS2Tools: """Unit tests for individual S2 tools""" - def test_display_results_shows_papers(self, initial_state): + def test_display_results_shows_papers(self, initial_state: Talk2Scholars): """Verifies display_results tool correctly returns papers from state""" - state = initial_state.copy() + state = initial_state state["papers"] = MOCK_STATE_PAPER result = display_results.invoke(input={"state": state}) assert result == MOCK_STATE_PAPER @@ -199,40 +248,6 @@ def test_multi_paper_rec_with_optional_params(self, mock_post): assert "papers" in result.update assert len(result.update["messages"]) == 1 - @patch("requests.get") - def test_single_paper_rec_empty_response(self, mock_get): - """Tests single paper recommendations with empty response""" - mock_get.return_value.json.return_value = {"recommendedPapers": []} - mock_get.return_value.status_code = 200 - - result = get_single_paper_recommendations.invoke( - input={ - "paper_id": "123", - "limit": 1, - "tool_call_id": "test123", - "id": "test123", - } - ) - assert "papers" in result.update - assert len(result.update["papers"]) == 0 - - @patch("requests.post") - def test_multi_paper_rec_empty_response(self, mock_post): - """Tests multi-paper recommendations with empty response""" - mock_post.return_value.json.return_value = {"recommendedPapers": []} - mock_post.return_value.status_code = 200 - - result = get_multi_paper_recommendations.invoke( - input={ - "paper_ids": ["123", "456"], - "limit": 1, - "tool_call_id": "test123", - "id": "test123", - } - ) - assert "papers" in result.update - assert len(result.update["papers"]) == 0 - def test_state_replace_dict(): """Verifies state dictionary replacement works correctly""" @@ -244,11 +259,13 @@ def test_state_replace_dict(): @pytest.mark.integration -def test_end_to_end_search_workflow(initial_state): +def test_end_to_end_search_workflow(initial_state: Talk2Scholars, mock_cfg): """Integration test: Complete search workflow""" with ( patch("requests.get") as mock_get, patch("langchain_openai.ChatOpenAI") as mock_llm, + patch("hydra.compose", return_value=mock_cfg), + patch("hydra.initialize"), ): mock_get.return_value.json.return_value = MOCK_SEARCH_RESPONSE mock_get.return_value.status_code = 200 @@ -258,7 +275,7 @@ def test_end_to_end_search_workflow(initial_state): mock_llm.return_value = llm_instance app = get_app("test_integration") - test_state = initial_state.copy() + test_state = initial_state test_state["messages"] = [HumanMessage(content="search for ML papers")] config = { diff --git a/aiagents4pharma/talk2scholars/tools/s2/multi_paper_rec.py b/aiagents4pharma/talk2scholars/tools/s2/multi_paper_rec.py index 2474c078..486cfebf 100644 --- a/aiagents4pharma/talk2scholars/tools/s2/multi_paper_rec.py +++ b/aiagents4pharma/talk2scholars/tools/s2/multi_paper_rec.py @@ -8,7 +8,7 @@ import json import logging from typing import Annotated, Any, Dict, List, Optional - +import hydra import pandas as pd import requests from langchain_core.messages import ToolMessage @@ -40,6 +40,14 @@ class MultiPaperRecInput(BaseModel): model_config = {"arbitrary_types_allowed": True} +# Load hydra configuration +with hydra.initialize(version_base=None, config_path="../../configs"): + cfg = hydra.compose( + config_name="config", overrides=["tools/multi_paper_recommendation=default"] + ) + cfg = cfg.tools.multi_paper_recommendation + + @tool(args_schema=MultiPaperRecInput) def get_multi_paper_recommendations( paper_ids: List[str], @@ -62,12 +70,12 @@ def get_multi_paper_recommendations( """ logging.info("Starting multi-paper recommendations search.") - endpoint = "https://api.semanticscholar.org/recommendations/v1/papers" - headers = {"Content-Type": "application/json"} + endpoint = cfg.api_endpoint + headers = cfg.headers payload = {"positivePaperIds": paper_ids, "negativePaperIds": []} params = { "limit": min(limit, 500), - "fields": "paperId,title,abstract,year,authors,citationCount,url", + "fields": ",".join(cfg.api_fields), } # Add year parameter if provided @@ -80,7 +88,7 @@ def get_multi_paper_recommendations( headers=headers, params=params, data=json.dumps(payload), - timeout=10, + timeout=cfg.request_timeout, ) logging.info( "API Response Status for multi-paper recommendations: %s", response.status_code diff --git a/aiagents4pharma/talk2scholars/tools/s2/search.py b/aiagents4pharma/talk2scholars/tools/s2/search.py index a4649612..b4fde86b 100644 --- a/aiagents4pharma/talk2scholars/tools/s2/search.py +++ b/aiagents4pharma/talk2scholars/tools/s2/search.py @@ -6,7 +6,7 @@ import logging from typing import Annotated, Any, Dict, Optional - +import hydra import pandas as pd import requests from langchain_core.messages import ToolMessage @@ -34,6 +34,12 @@ class SearchInput(BaseModel): tool_call_id: Annotated[str, InjectedToolCallId] +# Load hydra configuration +with hydra.initialize(version_base=None, config_path="../../configs"): + cfg = hydra.compose(config_name="config", overrides=["tools/search=default"]) + cfg = cfg.tools.search + + @tool(args_schema=SearchInput) def search_tool( query: str, @@ -55,13 +61,13 @@ def search_tool( Dict[str, Any]: The search results and related information. """ print("Starting paper search...") - endpoint = "https://api.semanticscholar.org/graph/v1/paper/search" + endpoint = cfg.api_endpoint params = { "query": query, "limit": min(limit, 100), # "fields": "paperId,title,abstract,year,authors, # citationCount,url,publicationTypes,openAccessPdf", - "fields": "paperId,title,abstract,year,authors,citationCount,url", + "fields": ",".join(cfg.api_fields), } # Add year parameter if provided diff --git a/aiagents4pharma/talk2scholars/tools/s2/single_paper_rec.py b/aiagents4pharma/talk2scholars/tools/s2/single_paper_rec.py index c9ed59aa..e514fcb4 100644 --- a/aiagents4pharma/talk2scholars/tools/s2/single_paper_rec.py +++ b/aiagents4pharma/talk2scholars/tools/s2/single_paper_rec.py @@ -6,7 +6,7 @@ import logging from typing import Annotated, Any, Dict, Optional - +import hydra import pandas as pd import requests from langchain_core.messages import ToolMessage @@ -41,6 +41,14 @@ class SinglePaperRecInput(BaseModel): model_config = {"arbitrary_types_allowed": True} +# Load hydra configuration +with hydra.initialize(version_base=None, config_path="../../configs"): + cfg = hydra.compose( + config_name="config", overrides=["tools/single_paper_recommendation=default"] + ) + cfg = cfg.tools.single_paper_recommendation + + @tool(args_schema=SinglePaperRecInput) def get_single_paper_recommendations( paper_id: str, @@ -63,20 +71,18 @@ def get_single_paper_recommendations( """ logger.info("Starting single paper recommendations search.") - endpoint = ( - f"https://api.semanticscholar.org/recommendations/v1/papers/forpaper/{paper_id}" - ) + endpoint = f"{cfg.api_endpoint}/{paper_id}" params = { "limit": min(limit, 500), # Max 500 per API docs - "fields": "paperId,title,abstract,year,authors,citationCount,url", - "from": "all-cs", # Using all-cs pool as specified in docs + "fields": ",".join(cfg.api_fields), + "from": cfg.recommendation_params.from_pool, } # Add year parameter if provided if year: params["year"] = year - response = requests.get(endpoint, params=params, timeout=10) + response = requests.get(endpoint, params=params, timeout=cfg.request_timeout) data = response.json() papers = data.get("data", []) response = requests.get(endpoint, params=params, timeout=10) diff --git a/app/frontend/streamlit_app_talk2scholars.py b/app/frontend/streamlit_app_talk2scholars.py index f6b0f680..083ca09d 100644 --- a/app/frontend/streamlit_app_talk2scholars.py +++ b/app/frontend/streamlit_app_talk2scholars.py @@ -1,13 +1,14 @@ #!/usr/bin/env python3 -''' +""" Talk2Scholars: A Streamlit app for the Talk2Scholars graph. -''' +""" import os import sys import random import streamlit as st +import hydra from streamlit_feedback import streamlit_feedback from langchain_core.messages import SystemMessage, HumanMessage, AIMessage from langchain_core.messages import ChatMessage @@ -15,24 +16,43 @@ from langchain_core.tracers.context import collect_runs from langchain.callbacks.tracers import LangChainTracer from langsmith import Client -sys.path.append('./') + +sys.path.append("./") from aiagents4pharma.talk2scholars.agents.main_agent import get_app st.set_page_config(page_title="Talk2Scholars", page_icon="🤖", layout="wide") +# Initialize configuration +hydra.core.global_hydra.GlobalHydra.instance().clear() +if "config" not in st.session_state: + # Load Hydra configuration + with hydra.initialize( + version_base=None, + config_path="../../aiagents4pharma/talk2scholars/configs", + ): + cfg = hydra.compose(config_name="config", overrides=["app/frontend=default"]) + cfg = cfg.app.frontend + st.session_state.config = cfg +else: + cfg = st.session_state.config + # Check if env variable OPENAI_API_KEY exists if "OPENAI_API_KEY" not in os.environ: - st.error("Please set the OPENAI_API_KEY environment \ - variable in the terminal where you run the app.") + st.error( + "Please set the OPENAI_API_KEY environment \ + variable in the terminal where you run the app." + ) st.stop() # Create a chat prompt template -prompt = ChatPromptTemplate.from_messages([ +prompt = ChatPromptTemplate.from_messages( + [ ("system", "Welcome to Talk2Scholars!"), - MessagesPlaceholder(variable_name='chat_history', optional=True), + MessagesPlaceholder(variable_name="chat_history", optional=True), ("human", "{input}"), ("placeholder", "{agent_scratchpad}"), -]) + ] +) # Initialize chat history if "messages" not in st.session_state: @@ -41,7 +61,7 @@ # Initialize project_name for Langsmith if "project_name" not in st.session_state: # st.session_state.project_name = str(st.session_state.user_name) + '@' + str(uuid.uuid4()) - st.session_state.project_name = 'Talk2Scholars-' + str(random.randint(1000, 9999)) + st.session_state.project_name = "Talk2Scholars-" + str(random.randint(1000, 9999)) # Initialize run_id for Langsmith if "run_id" not in st.session_state: @@ -55,55 +75,63 @@ if "llm_model" not in st.session_state: st.session_state.app = get_app(st.session_state.unique_id) else: - st.session_state.app = get_app(st.session_state.unique_id, - llm_model=st.session_state.llm_model) + st.session_state.app = get_app( + st.session_state.unique_id, llm_model=st.session_state.llm_model + ) # Get the app app = st.session_state.app + def _submit_feedback(user_response): - ''' + """ Function to submit feedback to the developers. - ''' + """ client = Client() client.create_feedback( st.session_state.run_id, key="feedback", - score=1 if user_response['score'] == "👍" else 0, - comment=user_response['text'] + score=1 if user_response["score"] == "👍" else 0, + comment=user_response["text"], ) st.info("Your feedback is on its way to the developers. Thank you!", icon="🚀") + @st.dialog("Warning ⚠️") def update_llm_model(): """ Function to update the LLM model. """ llm_model = st.session_state.llm_model - st.warning(f"Clicking 'Continue' will reset all agents, \ + st.warning( + f"Clicking 'Continue' will reset all agents, \ set the selected LLM to {llm_model}. \ This action will reset the entire app, \ and agents will lose access to the \ conversation history. Are you sure \ - you want to proceed?") + you want to proceed?" + ) if st.button("Continue"): # Delete all the messages and the app key for key in st.session_state.keys(): if key in ["messages", "app"]: del st.session_state[key] + # Main layout of the app split into two columns main_col1, main_col2 = st.columns([3, 7]) # First column with main_col1: with st.container(border=True): # Title - st.write(""" + st.write( + """