feat: now multiple post-process can be done

feat: "ALL" keyword adds all tools based on availability of API keys
andthattoo · Jul 20, 2024 · 06bde04 · 06bde04
1 parent 67bf423
commit 06bde04
Show file tree

Hide file tree

Showing 5 changed files with 259 additions and 20 deletions.
diff --git a/docs/workflow.md b/docs/workflow.md
@@ -39,9 +39,6 @@ pub static TOOLS: [&str; 6] = [
 Main functionalities of tools are `Search` & `Scrape`. 
 
 #### Search tools
-`jina`
-
-Jina utilizies [Reader API](https://jina.ai/reader/ ) by Jina. **This can work with or without paid API keys.** Has higher rates when used with API. It searches the internet, yielding well formatted results. 
 
 `serper`
 
@@ -52,6 +49,11 @@ Serper utilizes [Serper's](https://serper.dev/) unmatched Google Search API to s
 Utilizes duckduckgo to search the web. Doesn't require an API key. 
 
 #### Scrape tools
+
+`jina`
+
+Jina utilizies [Reader API](https://jina.ai/reader/ ) by Jina. **This can work with or without paid API keys.** Has higher rates when used with API. It searches the internet, yielding well formatted results. 
+
 `browserless`
 
 [Browserless](https://www.browserless.io/) offers a docker image that runs a headless browser to help scraping. Renders dynamic webpages. To use, you need to run a browserless image. **It doesn't require a paid service yet you need to make up a token and use it in your .env file.** 
@@ -75,6 +77,16 @@ Scraper is a request based scraping tool, **doesn't require API keys**.
 
 Helps receiving tickers values and **doesn't require an API key**.
 
+You can also pass in "ALL" keyword to add all possible tool functionalities. 
+
+```json
+    "config":{
+        "max_steps": 5,
+        "max_time": 100,
+        "tools": ["ALL"]
+    },
+```
+
 ## Tasks
 
 The `tasks` field is an array of task objects. Tasks are designed to be the tasks to help reach your objective. Workflows help you outline the execution flow of each task. Each task object has the following properties:
@@ -247,6 +259,30 @@ Memory operations are divided by I/O
 - `size`: Get the size of the stack.
 - `search`: Search the file system.
 
+Input operations help fill variables in prompts using memory operations.
+
+```json
+            "prompt": "Write down a single search query to collect useful information to answer to given question. Be creative. Avoid asking previously asked questions, keep it concise and clear. \n\nQuery: {query} \n\n Previous Questions: {history} \n\n Search Query:",
+            "inputs": [
+                {
+                    "name": "query",
+                    "value": {
+                        "type": "input",
+                        "key": ""
+                    },
+                    "required": true
+                },
+                {
+                    "name": "history",
+                    "value": {
+                        "type": "get_all",
+                        "key": "history"
+                    },
+                    "required": false
+                }
+            ],
+```
+
 **Outputs**
 
 - `write`: Write to the cache. 
@@ -265,7 +301,28 @@ This example writes the output of the task to cache. `__result` is the reserverd
 ``` 
 
 - `push`: Push to the stack.
+
+Example
+```json
+{
+    "type": "push",
+    "key": "queries",
+    "value": "__result"
+},
+```
+
+Equivalent to a push operation to a list
+
 - `insert`: Insert into the file system.
 
+Example
+```json
+{
+    "type": "insert",
+    "key": "",
+    "value": "__result"
+},
+```
+Insert adds `String`to file system for embedding based vector search. If string is large, it is automatically chunked up to smaller documents. 
 
 These memory operations can be used in the `inputs` and `outputs` fields of the tasks to manipulate and access data during the workflow execution.
diff --git a/src/program/executor.rs b/src/program/executor.rs
@@ -174,29 +174,29 @@ impl Executor {
         let mut return_string = return_value.to_string().clone();
 
         if let Some(post_pr) = rv.post_process.clone() {
-            if let Some(proccess) = post_pr.into_iter().next() {
-                return match proccess.process_type {
+            for process in post_pr {
+                return_string = match process.process_type {
                     PostProcessType::Replace => {
-                        if proccess.lhs.is_none() || proccess.rhs.is_none() {
+                        if process.lhs.is_none() || process.rhs.is_none() {
                             error!("lhs and rhs are required for replace post process");
-                            return return_string;
+                            continue;
                         }
-                        return_string.replace(&proccess.lhs.unwrap(), &proccess.rhs.unwrap())
+                        return_string.replace(&process.lhs.unwrap(), &process.rhs.unwrap())
                     }
                     PostProcessType::Append => {
-                        if proccess.lhs.is_none() {
+                        if process.lhs.is_none() {
                             error!("lhs is required for append post process");
-                            return return_string;
+                            continue;
                         }
-                        return_string.push_str(&proccess.lhs.unwrap());
+                        return_string.push_str(&process.lhs.unwrap());
                         return_string
                     }
                     PostProcessType::Prepend => {
-                        if proccess.lhs.is_none() {
+                        if process.lhs.is_none() {
                             error!("lhs is required for prepend post process");
-                            return return_string;
+                            continue;
                         }
-                        format!("{}{}", proccess.lhs.unwrap(), return_string)
+                        format!("{}{}", process.lhs.unwrap(), return_string)
                     }
                     PostProcessType::ToLower => return_string.to_lowercase(),
                     PostProcessType::ToUpper => return_string.to_uppercase(),
@@ -346,18 +346,37 @@ impl Executor {
         tool_names: Vec<String>,
         custom_template: Option<CustomToolTemplate>,
     ) -> Result<Vec<Arc<dyn Tool>>, ToolError> {
-        if !in_tools(&tool_names) {
-            return Err(ToolError::ToolDoesNotExist);
+        let mut tools: Vec<Arc<dyn Tool>> = vec![];
+
+        if tool_names.len() == 1 && tool_names[0] == *"ALL".to_string() {
+            // Check if serper API is set
+            // ALL results in [jina, serper, stock] or [jina, duckduckgo, stock]
+            let serper_key = std::env::var("SERPER_API_KEY");
+            if serper_key.is_err() {
+                tools.push(Arc::new(DDGSearcher::new()));
+            } else {
+                tools.push(Arc::new(SearchTool {}));
+            }
+            tools.push(Arc::new(StockScraper::new()));
+            tools.push(Arc::new(Jina {}));
+        } else {
+            if !in_tools(&tool_names) {
+                return Err(ToolError::ToolDoesNotExist);
+            }
+
+            let _tools: Vec<Arc<dyn Tool>> = tool_names
+                .iter()
+                .map(|tool| self.get_tool_by_name(tool))
+                .collect();
+
+            tools.extend(_tools);
         }
-        let mut tools: Vec<Arc<dyn Tool>> = tool_names
-            .iter()
-            .map(|tool| self.get_tool_by_name(tool))
-            .collect();
 
         if let Some(template) = custom_template {
             let custom_tool = Arc::new(CustomTool::new_from_template(template));
             tools.push(custom_tool);
         }
+
         Ok(tools)
     }
 

diff --git a/tests/run.rs b/tests/run.rs
@@ -1,5 +1,6 @@
 use dotenv::dotenv;
 use env_logger::Env;
+use log::info;
 use ollama_workflows::{Entry, Executor, Model, ProgramMemory, Workflow};
 
 #[tokio::test]
@@ -26,6 +27,29 @@ async fn test_search_workflow_openai() {
     exe.execute(Some(&input), workflow, &mut memory).await;
 }
 
+#[tokio::test]
+async fn test_search_workflow_openai_all_tools() {
+    dotenv().ok();
+    let env = Env::default().filter_or("LOG_LEVEL", "info");
+    env_logger::Builder::from_env(env).init();
+    let exe = Executor::new(Model::GPT4oMini);
+    let workflow = Workflow::new_from_json("./tests/test_workflows/all.json").unwrap();
+    let mut memory = ProgramMemory::new();
+    exe.execute(None, workflow, &mut memory).await;
+}
+
+#[tokio::test]
+async fn test_post_process() {
+    dotenv().ok();
+    let env = Env::default().filter_or("LOG_LEVEL", "info");
+    env_logger::Builder::from_env(env).init();
+    let exe = Executor::new(Model::GPT4oMini);
+    let workflow = Workflow::new_from_json("./tests/test_workflows/post_process.json").unwrap();
+    let mut memory = ProgramMemory::new();
+    let res = exe.execute(None, workflow, &mut memory).await;
+    info!("Result: {:?}", res);
+}
+
 #[tokio::test]
 async fn test_ticker_workflow_openai() {
     dotenv().ok();

diff --git a/tests/test_workflows/all.json b/tests/test_workflows/all.json
@@ -0,0 +1,75 @@
+{
+    "name": "Simple",
+    "description": "This is a simple workflow",
+    "config":{
+        "max_steps": 5,
+        "max_time": 100,
+        "tools": ["ALL"]
+    },
+    "tasks":[
+        {
+            "id": "A",
+            "name": "Random Query",
+            "description": "Writes a random question about Kapadokya.",
+            "prompt": "Please write a random question about Kapadokya.",
+            "inputs":[],
+            "operator": "generation",
+            "outputs":[
+                {
+                    "type": "write",
+                    "key": "query",
+                    "value": "__result"
+                }
+            ]
+        },
+        {
+            "id": "B",
+            "name": "Search",
+            "description": "Searches query.",
+            "prompt": "Find useful information about the query: {query}",
+            "inputs":[
+                {
+                    "name": "query",
+                    "value": {
+                        "type": "read",
+                        "key": "query"
+                    },
+                    "required": true
+                }
+            ],
+            "operator": "function_calling",
+            "outputs":[
+                {
+                    "type": "write",
+                    "key": "search",
+                    "value": "__result"
+                }
+            ]
+        },
+        {
+            "id": "__end",
+            "name": "end",
+            "description": "End of the task",
+            "prompt": "End of the task",
+            "inputs": [],
+            "operator": "end",
+            "outputs": []
+        }
+    ],
+    "steps":[
+        {
+            "source":"A",
+            "target":"B"
+        },
+        {
+            "source":"B",
+            "target":"end"
+        }
+    ],
+    "return_value":{
+        "input":{
+            "type": "read",
+            "key": "search"
+        }
+    }
+}
diff --git a/tests/test_workflows/post_process.json b/tests/test_workflows/post_process.json
@@ -0,0 +1,64 @@
+{
+    "name": "Simple",
+    "description": "This is a simple workflow",
+    "config":{
+        "max_steps": 5,
+        "max_time": 100,
+        "tools": []
+    },
+    "tasks":[
+        {
+            "id": "A",
+            "name": "Random Poem",
+            "description": "Writes a poem about Kapadokya.",
+            "prompt": "Please write a poem about Kapadokya.",
+            "inputs":[],
+            "operator": "generation",
+            "outputs":[
+                {
+                    "type": "write",
+                    "key": "poem",
+                    "value": "__result"
+                }
+            ]
+        },
+        {
+            "id": "__end",
+            "name": "end",
+            "description": "End of the task",
+            "prompt": "End of the task",
+            "inputs": [],
+            "operator": "end",
+            "outputs": []
+        }
+    ],
+    "steps":[
+        {
+            "source":"A",
+            "target":"end"
+        }
+    ],
+    "return_value":{
+        "input":{
+            "type": "read",
+            "key": "poem"
+        },
+        "post_process":[
+            {
+                "process_type": "replace",
+                "lhs": "a",
+                "rhs": "#"
+            },
+            {
+                "process_type": "replace",
+                "lhs": "b",
+                "rhs": "$"
+            },
+            {
+                "process_type": "to_upper",
+                "lhs": "",
+                "rhs": ""
+            }
+        ]
+    }
+}