From ae99c08a83198322f17132ea59bb63dea8fa93ab Mon Sep 17 00:00:00 2001 From: William Fu-Hinthorn <13333726+hinthornw@users.noreply.github.com> Date: Mon, 7 Oct 2024 15:02:43 -0700 Subject: [PATCH 1/4] Share a thread again --- src/chatbot/graph.py | 25 ++++++++++--------------- 1 file changed, 10 insertions(+), 15 deletions(-) diff --git a/src/chatbot/graph.py b/src/chatbot/graph.py index 93aefa4..b234061 100644 --- a/src/chatbot/graph.py +++ b/src/chatbot/graph.py @@ -1,6 +1,5 @@ """Example chatbot that incorporates user memories.""" -import uuid from dataclasses import dataclass from datetime import datetime, timezone @@ -48,24 +47,20 @@ async def schedule_memories(state: ChatState, config: RunnableConfig) -> None: """Prompt the bot to respond to the user, incorporating memories (if provided).""" configurable = ChatConfigurable.from_runnable_config(config) memory_client = get_client() - mem_thread_id = str( - uuid.uuid5( - uuid.NAMESPACE_URL, - configurable.mem_assistant_id + config["configurable"]["thread_id"], - ) - ) - await memory_client.threads.create(thread_id=mem_thread_id, if_exists="do_nothing") await memory_client.runs.create( - # Generate a thread so we can run the memory service on a separate - # but consistent thread. This lets us cancel scheduled runs if - # a new message arrives to our chatbot before the memory service - # begins processing. - thread_id=mem_thread_id, + # We enqueue the memory formation process on the same thread. + # This means that IF this thread doesn't receive more messages before `after_seconds`, + # it will read from the shared state and extract memories for us. + # If a new request comes in for this thread before the scheduled run is executed, + # that run will be canceled, and a **new** one will be scheduled once + # this node is executed again. + thread_id=config["configurable"]["thread_id"], # Rollback & cancel any scheduled runs for the target thread # that haven't completed - multitask_strategy="rollback", + multitask_strategy="enqueue", # This lets us "debounce" repeated requests to the memory graph - # if the user is actively engaging in a conversation + # if the user is actively engaging in a conversation. This saves us $$ and + # can help reduce the occurence of duplicate memories. after_seconds=configurable.delay_seconds, # Specify the graph and/or graph configuration to handle the memory processing assistant_id=configurable.mem_assistant_id, From 98828a657021707519fe6f5fa27be1b4fed22ef4 Mon Sep 17 00:00:00 2001 From: Vadym Barda Date: Mon, 7 Oct 2024 18:11:49 -0400 Subject: [PATCH 2/4] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index b1bbb0a..f17eb72 100644 --- a/README.md +++ b/README.md @@ -134,7 +134,7 @@ The first schema is the `User` profile schema, copied below: The schema has a name and description, as well as JSON schema parameters that are all passed to an LLM. The LLM infers the values for the schema based on the conversations you send to the memory service. -The schema also has an `update_mode` parameter that defines **how** the service should update its memory when new information is provided. The **patch** update_mode instructs the graph that we should always have a single JSON object to represent this user. When new information is provided, the model can generate "patches", or small updates to extend, delete, or replace content in the current memory document. This type of `update_mode` is useful if you want strict visibility into a user's representation at any given point or if you want to let the end user directly view and update their own representation for the bot. By defining these specific parameters, we are decideing that this (and only this) information is relevant to track and excluding other information (like "relationships" or "religion", etc.) from being tracked. It's an easy way for us to bias the service into focusing on what we think is important for our specific bot. +The schema also has an `update_mode` parameter that defines **how** the service should update its memory when new information is provided. The **patch** update_mode instructs the graph that we should always have a single JSON object to represent this user. When new information is provided, the model can generate "patches", or small updates to extend, delete, or replace content in the current memory document. This type of `update_mode` is useful if you want strict visibility into a user's representation at any given point or if you want to let the end user directly view and update their own representation for the bot. By defining these specific parameters, we are deciding that this (and only this) information is relevant to track and excluding other information (like "relationships" or "religion", etc.) from being tracked. It's an easy way for us to bias the service into focusing on what we think is important for our specific bot. The second memory schema we provide is the **Note** schema, shown below: From 16a06a06bc201ed54d75717dd1e6de950b661bad Mon Sep 17 00:00:00 2001 From: Vadym Barda Date: Mon, 7 Oct 2024 18:17:13 -0400 Subject: [PATCH 3/4] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index f17eb72..673f5d8 100644 --- a/README.md +++ b/README.md @@ -174,7 +174,7 @@ In the previous section we showed how the memory schemas define how memories sho If no memory has been saved yet, `trust_call` prompts the model to populate the document. It additionally does schema validation to ensure the output is correct. -If a memory already exists, you _could_ simply prompt the model to re-geerate the schema anew on each round. Doing so, however, leads to frequent information loss, especially on complicated schemas, since LLMs are wont to forget or omit previously stored details when regenerating information from scratch if it doesn't happen to be immediately relevant. +If a memory already exists, you _could_ simply prompt the model to re-generate the schema anew on each round. Doing so, however, leads to frequent information loss, especially on complicated schemas, since LLMs are wont to forget or omit previously stored details when regenerating information from scratch if it doesn't happen to be immediately relevant. To avoid memory loss, your memory schema is placed in the system prompt but **not** made available as a tool for the model to call. Instead, the LLM is provided a `PatchDoc` tool. This forces the model to generate a chain-of-thought of 0 or more planned edits, along with patches to individual JSON paths to be modified. From 0d25f37c3e4311eff270ab339dfe6a6c8fed2d8a Mon Sep 17 00:00:00 2001 From: William Fu-Hinthorn <13333726+hinthornw@users.noreply.github.com> Date: Mon, 7 Oct 2024 16:05:28 -0700 Subject: [PATCH 4/4] Update comment --- README.md | 10 +++++----- src/chatbot/graph.py | 5 +++-- src/memory_graph/graph.py | 2 +- 3 files changed, 9 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index 673f5d8..6118117 100644 --- a/README.md +++ b/README.md @@ -160,7 +160,7 @@ The second memory schema we provide is the **Note** schema, shown below: } ``` -Just like the previous example, this schema has a name, description, and parameters. Notic that the `update_mode` this time is "insert". This instructs the LLM in the memory service to **insert new memories to the list or update existing ones**. The number of memories for this `update_mode` is **unbound** since the model can continue to store new notes any time something interesting shows up in the conversation. Each time the service runs, the model can generate multiple schemas, some to update or re-contextualize existing memories, some to document new information. Note taht these memory schemas tend to have fewer parameters and are usually most effective if you have a field to let the service provide contextual information (so that if your bot fetches this memory, it isn't taken out-of-context). +Just like the previous example, this schema has a name, description, and parameters. Notic that the `update_mode` this time is "insert". This instructs the LLM in the memory service to **insert new memories to the list or update existing ones**. The number of memories for this `update_mode` is **unbound** since the model can continue to store new notes any time something interesting shows up in the conversation. Each time the service runs, the model can generate multiple schemas, some to update or re-contextualize existing memories, some to document new information. Note that these memory schemas tend to have fewer parameters and are usually most effective if you have a field to let the service provide contextual information (so that if your bot fetches this memory, it isn't taken out-of-context). To wrap up this section: `memory_schemas` provide a name, description, and parameters that the LLM populates to store in the database. The `update_mode` controls whether new information should always overwrite an existing memory or whether it should insert new memories (while optionally updating existing ones). @@ -174,7 +174,7 @@ In the previous section we showed how the memory schemas define how memories sho If no memory has been saved yet, `trust_call` prompts the model to populate the document. It additionally does schema validation to ensure the output is correct. -If a memory already exists, you _could_ simply prompt the model to re-generate the schema anew on each round. Doing so, however, leads to frequent information loss, especially on complicated schemas, since LLMs are wont to forget or omit previously stored details when regenerating information from scratch if it doesn't happen to be immediately relevant. +If a memory already exists, you _could_ simply prompt the model to re-generate the schema anew on each round. Doing so, however, leads to frequent information loss, especially on complicated schemas, since LLMs are won't to forget or omit previously stored details when regenerating information from scratch if it doesn't happen to be immediately relevant. To avoid memory loss, your memory schema is placed in the system prompt but **not** made available as a tool for the model to call. Instead, the LLM is provided a `PatchDoc` tool. This forces the model to generate a chain-of-thought of 0 or more planned edits, along with patches to individual JSON paths to be modified. @@ -182,9 +182,9 @@ Applying updates as JSON patches helps minimize information loss, save token cos #### insert -If no memories have been saved yet, the model is given a single tool (the schema from your memory config). It is prompted to use multi-tool callint to generate 0 or more instances of your schema depending on the conversation context. +If no memories have been saved yet, the model is given a single tool (the schema from your memory config). It is prompted to use multi-tool calling to generate 0 or more instances of your schema depending on the conversation context. -If memories exist for this user, the memory graph searches for existing ones to provide additional context. These are put in the system promt along with two two tools: your memory schema as well as a "PatchDoc" tool. The LLM is prompted to invoke whichever tools are appropriate given the conversational context. The LLM can call the PatchDoc tool to update existing memories in case they are no longer correct or require additional context. It can also call your memory schema tool any number of times to save new memories or notes. Either way, it calls these tools in a single generation step, and the graph upserts the results to the memory store. +If memories exist for this user, the memory graph searches for existing ones to provide additional context. These are put in the system prompt along with two two tools: your memory schema as well as a "PatchDoc" tool. The LLM is prompted to invoke whichever tools are appropriate given the conversational context. The LLM can call the PatchDoc tool to update existing memories in case they are no longer correct or require additional context. It can also call your memory schema tool any number of times to save new memories or notes. Either way, it calls these tools in a single generation step, and the graph upserts the results to the memory store. ![Memory Diagram](./static/memory_graph.png) @@ -206,7 +206,7 @@ All these memories need to go somewhere reliable. All LangGraph deployments come You can learn more about Storage in LangGraph [here](https://langchain-ai.github.io/langgraph/how-tos/memory/shared-state/). -In our case, we are saving all memories namespaced by `user_id` and by the memory scheam you provide. That way you can easily search for memories for a given user and of a particualr type. This diagram shows how these pieces fit together: +In our case, we are saving all memories namespaced by `user_id` and by the memory scheam you provide. That way you can easily search for memories for a given user and of a particular type. This diagram shows how these pieces fit together: ![Memory types](./static/memory_types.png) diff --git a/src/chatbot/graph.py b/src/chatbot/graph.py index b234061..165669f 100644 --- a/src/chatbot/graph.py +++ b/src/chatbot/graph.py @@ -55,8 +55,9 @@ async def schedule_memories(state: ChatState, config: RunnableConfig) -> None: # that run will be canceled, and a **new** one will be scheduled once # this node is executed again. thread_id=config["configurable"]["thread_id"], - # Rollback & cancel any scheduled runs for the target thread - # that haven't completed + # This memory-formation run will be enqueued and run later + # If a new run comes in before it is scheduled, it will be cancelled, + # then when this node is executed again, a *new* run will be scheduled multitask_strategy="enqueue", # This lets us "debounce" repeated requests to the memory graph # if the user is actively engaging in a conversation. This saves us $$ and diff --git a/src/memory_graph/graph.py b/src/memory_graph/graph.py index 9ead059..793344d 100644 --- a/src/memory_graph/graph.py +++ b/src/memory_graph/graph.py @@ -31,7 +31,7 @@ async def handle_patch_memory( # Fetch existing memories from the store for this (patch) memory schema existing_item = await store.aget(namespace, "memory") - existing = {existing_item.key: existing_item.value} if existing_item else None + existing = {state.function_name: existing_item.value} if existing_item else None # Get the configuration for this memory schema (identified by function_name) memory_config = next(