From 06bde0435c4490e6ad6cf2578be45df3f51d10d1 Mon Sep 17 00:00:00 2001 From: andthattoo Date: Sun, 21 Jul 2024 01:34:29 +0300 Subject: [PATCH] feat: now multiple post-process can be done feat: "ALL" keyword adds all tools based on availability of API keys --- docs/workflow.md | 63 ++++++++++++++++++++-- src/program/executor.rs | 53 ++++++++++++------ tests/run.rs | 24 +++++++++ tests/test_workflows/all.json | 75 ++++++++++++++++++++++++++ tests/test_workflows/post_process.json | 64 ++++++++++++++++++++++ 5 files changed, 259 insertions(+), 20 deletions(-) create mode 100644 tests/test_workflows/all.json create mode 100644 tests/test_workflows/post_process.json diff --git a/docs/workflow.md b/docs/workflow.md index fdcb1a4..b7e4447 100644 --- a/docs/workflow.md +++ b/docs/workflow.md @@ -39,9 +39,6 @@ pub static TOOLS: [&str; 6] = [ Main functionalities of tools are `Search` & `Scrape`. #### Search tools -`jina` - -Jina utilizies [Reader API](https://jina.ai/reader/ ) by Jina. **This can work with or without paid API keys.** Has higher rates when used with API. It searches the internet, yielding well formatted results. `serper` @@ -52,6 +49,11 @@ Serper utilizes [Serper's](https://serper.dev/) unmatched Google Search API to s Utilizes duckduckgo to search the web. Doesn't require an API key. #### Scrape tools + +`jina` + +Jina utilizies [Reader API](https://jina.ai/reader/ ) by Jina. **This can work with or without paid API keys.** Has higher rates when used with API. It searches the internet, yielding well formatted results. + `browserless` [Browserless](https://www.browserless.io/) offers a docker image that runs a headless browser to help scraping. Renders dynamic webpages. To use, you need to run a browserless image. **It doesn't require a paid service yet you need to make up a token and use it in your .env file.** @@ -75,6 +77,16 @@ Scraper is a request based scraping tool, **doesn't require API keys**. Helps receiving tickers values and **doesn't require an API key**. +You can also pass in "ALL" keyword to add all possible tool functionalities. + +```json + "config":{ + "max_steps": 5, + "max_time": 100, + "tools": ["ALL"] + }, +``` + ## Tasks The `tasks` field is an array of task objects. Tasks are designed to be the tasks to help reach your objective. Workflows help you outline the execution flow of each task. Each task object has the following properties: @@ -247,6 +259,30 @@ Memory operations are divided by I/O - `size`: Get the size of the stack. - `search`: Search the file system. +Input operations help fill variables in prompts using memory operations. + +```json + "prompt": "Write down a single search query to collect useful information to answer to given question. Be creative. Avoid asking previously asked questions, keep it concise and clear. \n\nQuery: {query} \n\n Previous Questions: {history} \n\n Search Query:", + "inputs": [ + { + "name": "query", + "value": { + "type": "input", + "key": "" + }, + "required": true + }, + { + "name": "history", + "value": { + "type": "get_all", + "key": "history" + }, + "required": false + } + ], +``` + **Outputs** - `write`: Write to the cache. @@ -265,7 +301,28 @@ This example writes the output of the task to cache. `__result` is the reserverd ``` - `push`: Push to the stack. + +Example +```json +{ + "type": "push", + "key": "queries", + "value": "__result" +}, +``` + +Equivalent to a push operation to a list + - `insert`: Insert into the file system. +Example +```json +{ + "type": "insert", + "key": "", + "value": "__result" +}, +``` +Insert adds `String`to file system for embedding based vector search. If string is large, it is automatically chunked up to smaller documents. These memory operations can be used in the `inputs` and `outputs` fields of the tasks to manipulate and access data during the workflow execution. \ No newline at end of file diff --git a/src/program/executor.rs b/src/program/executor.rs index 5106c55..49d741e 100644 --- a/src/program/executor.rs +++ b/src/program/executor.rs @@ -174,29 +174,29 @@ impl Executor { let mut return_string = return_value.to_string().clone(); if let Some(post_pr) = rv.post_process.clone() { - if let Some(proccess) = post_pr.into_iter().next() { - return match proccess.process_type { + for process in post_pr { + return_string = match process.process_type { PostProcessType::Replace => { - if proccess.lhs.is_none() || proccess.rhs.is_none() { + if process.lhs.is_none() || process.rhs.is_none() { error!("lhs and rhs are required for replace post process"); - return return_string; + continue; } - return_string.replace(&proccess.lhs.unwrap(), &proccess.rhs.unwrap()) + return_string.replace(&process.lhs.unwrap(), &process.rhs.unwrap()) } PostProcessType::Append => { - if proccess.lhs.is_none() { + if process.lhs.is_none() { error!("lhs is required for append post process"); - return return_string; + continue; } - return_string.push_str(&proccess.lhs.unwrap()); + return_string.push_str(&process.lhs.unwrap()); return_string } PostProcessType::Prepend => { - if proccess.lhs.is_none() { + if process.lhs.is_none() { error!("lhs is required for prepend post process"); - return return_string; + continue; } - format!("{}{}", proccess.lhs.unwrap(), return_string) + format!("{}{}", process.lhs.unwrap(), return_string) } PostProcessType::ToLower => return_string.to_lowercase(), PostProcessType::ToUpper => return_string.to_uppercase(), @@ -346,18 +346,37 @@ impl Executor { tool_names: Vec, custom_template: Option, ) -> Result>, ToolError> { - if !in_tools(&tool_names) { - return Err(ToolError::ToolDoesNotExist); + let mut tools: Vec> = vec![]; + + if tool_names.len() == 1 && tool_names[0] == *"ALL".to_string() { + // Check if serper API is set + // ALL results in [jina, serper, stock] or [jina, duckduckgo, stock] + let serper_key = std::env::var("SERPER_API_KEY"); + if serper_key.is_err() { + tools.push(Arc::new(DDGSearcher::new())); + } else { + tools.push(Arc::new(SearchTool {})); + } + tools.push(Arc::new(StockScraper::new())); + tools.push(Arc::new(Jina {})); + } else { + if !in_tools(&tool_names) { + return Err(ToolError::ToolDoesNotExist); + } + + let _tools: Vec> = tool_names + .iter() + .map(|tool| self.get_tool_by_name(tool)) + .collect(); + + tools.extend(_tools); } - let mut tools: Vec> = tool_names - .iter() - .map(|tool| self.get_tool_by_name(tool)) - .collect(); if let Some(template) = custom_template { let custom_tool = Arc::new(CustomTool::new_from_template(template)); tools.push(custom_tool); } + Ok(tools) } diff --git a/tests/run.rs b/tests/run.rs index 5559d9f..479eefb 100644 --- a/tests/run.rs +++ b/tests/run.rs @@ -1,5 +1,6 @@ use dotenv::dotenv; use env_logger::Env; +use log::info; use ollama_workflows::{Entry, Executor, Model, ProgramMemory, Workflow}; #[tokio::test] @@ -26,6 +27,29 @@ async fn test_search_workflow_openai() { exe.execute(Some(&input), workflow, &mut memory).await; } +#[tokio::test] +async fn test_search_workflow_openai_all_tools() { + dotenv().ok(); + let env = Env::default().filter_or("LOG_LEVEL", "info"); + env_logger::Builder::from_env(env).init(); + let exe = Executor::new(Model::GPT4oMini); + let workflow = Workflow::new_from_json("./tests/test_workflows/all.json").unwrap(); + let mut memory = ProgramMemory::new(); + exe.execute(None, workflow, &mut memory).await; +} + +#[tokio::test] +async fn test_post_process() { + dotenv().ok(); + let env = Env::default().filter_or("LOG_LEVEL", "info"); + env_logger::Builder::from_env(env).init(); + let exe = Executor::new(Model::GPT4oMini); + let workflow = Workflow::new_from_json("./tests/test_workflows/post_process.json").unwrap(); + let mut memory = ProgramMemory::new(); + let res = exe.execute(None, workflow, &mut memory).await; + info!("Result: {:?}", res); +} + #[tokio::test] async fn test_ticker_workflow_openai() { dotenv().ok(); diff --git a/tests/test_workflows/all.json b/tests/test_workflows/all.json new file mode 100644 index 0000000..0e9714d --- /dev/null +++ b/tests/test_workflows/all.json @@ -0,0 +1,75 @@ +{ + "name": "Simple", + "description": "This is a simple workflow", + "config":{ + "max_steps": 5, + "max_time": 100, + "tools": ["ALL"] + }, + "tasks":[ + { + "id": "A", + "name": "Random Query", + "description": "Writes a random question about Kapadokya.", + "prompt": "Please write a random question about Kapadokya.", + "inputs":[], + "operator": "generation", + "outputs":[ + { + "type": "write", + "key": "query", + "value": "__result" + } + ] + }, + { + "id": "B", + "name": "Search", + "description": "Searches query.", + "prompt": "Find useful information about the query: {query}", + "inputs":[ + { + "name": "query", + "value": { + "type": "read", + "key": "query" + }, + "required": true + } + ], + "operator": "function_calling", + "outputs":[ + { + "type": "write", + "key": "search", + "value": "__result" + } + ] + }, + { + "id": "__end", + "name": "end", + "description": "End of the task", + "prompt": "End of the task", + "inputs": [], + "operator": "end", + "outputs": [] + } + ], + "steps":[ + { + "source":"A", + "target":"B" + }, + { + "source":"B", + "target":"end" + } + ], + "return_value":{ + "input":{ + "type": "read", + "key": "search" + } + } +} \ No newline at end of file diff --git a/tests/test_workflows/post_process.json b/tests/test_workflows/post_process.json new file mode 100644 index 0000000..ea4e12a --- /dev/null +++ b/tests/test_workflows/post_process.json @@ -0,0 +1,64 @@ +{ + "name": "Simple", + "description": "This is a simple workflow", + "config":{ + "max_steps": 5, + "max_time": 100, + "tools": [] + }, + "tasks":[ + { + "id": "A", + "name": "Random Poem", + "description": "Writes a poem about Kapadokya.", + "prompt": "Please write a poem about Kapadokya.", + "inputs":[], + "operator": "generation", + "outputs":[ + { + "type": "write", + "key": "poem", + "value": "__result" + } + ] + }, + { + "id": "__end", + "name": "end", + "description": "End of the task", + "prompt": "End of the task", + "inputs": [], + "operator": "end", + "outputs": [] + } + ], + "steps":[ + { + "source":"A", + "target":"end" + } + ], + "return_value":{ + "input":{ + "type": "read", + "key": "poem" + }, + "post_process":[ + { + "process_type": "replace", + "lhs": "a", + "rhs": "#" + }, + { + "process_type": "replace", + "lhs": "b", + "rhs": "$" + }, + { + "process_type": "to_upper", + "lhs": "", + "rhs": "" + } + ] + } +}