langchain-ai · bracesproul · Sep 25, 2024 · Sep 25, 2024 · Sep 25, 2024 · Sep 25, 2024
diff --git a/libs/checkpoint/langgraph/store/base.py b/libs/checkpoint/langgraph/store/base.py
@@ -0,0 +1,225 @@
+"""Base classes and types for persistent key-value stores.
+
+Stores enable persistence and memory that can be shared across threads,
+scoped to user IDs, assistant IDs, or other arbitrary namespaces.
+"""
+
+from abc import ABC, abstractmethod
+from dataclasses import dataclass
+from datetime import datetime
+from typing import Any, Iterable, NamedTuple, Optional, Union
+
+
+@dataclass
+class Item:
+    """Represents a stored item with metadata."""
+
+    value: dict[str, Any]
+    """The stored data as a dictionary.
+
+    Keys are filterable.
+    """
+
+    scores: dict[str, float]
+    """Relevance scores for the item.
+
+    Keys can include built-in scores like 'recency' and 'relevance',
+    as well as any key present in the 'value' dictionary. This allows
+    for multi-dimensional scoring of items.
+    """
+
+    id: str
+    """Unique identifier within the namespace."""
+
+    namespace: tuple[str, ...]
+    """Hierarchical path defining the collection in which this document resides.
+
+    Represented as a tuple of strings, allowing for nested categorization.
+    For example: ("documents", 'user123')
+    """
+
+    created_at: datetime
+    """Timestamp of item creation."""
+
+    updated_at: datetime
+    """Timestamp of last update."""
+
+    last_accessed_at: datetime
+    """Timestamp of last access."""
+
+
+class GetOp(NamedTuple):
+    """Operation to retrieve an item by namespace and ID."""
+
+    namespace: tuple[str, ...]
+    """Hierarchical path for the item."""
+    id: str
+    """Unique identifier within the namespace."""
+
+
+class SearchOp(NamedTuple):
+    """Operation to search for items within a namespace prefix."""
+
+    namespace_prefix: tuple[str, ...]
+    """Hierarchical path prefix to search within."""
+    filter: Optional[dict[str, Any]] = None
+    """Key-value pairs to filter results."""
+    limit: int = 10
+    """Maximum number of items to return."""
+    offset: int = 0
+    """Number of items to skip before returning results."""
+
+
+class PutOp(NamedTuple):
+    """Operation to store, update, or delete an item."""
+
+    namespace: tuple[str, ...]
+    """Hierarchical path for the item.
+
+    Represented as a tuple of strings, allowing for nested categorization.
+    For example: ("documents", "user123")
+    """
+
+    id: str
+    """Unique identifier for the document.
+
+    Should be distinct within its namespace.
+    """
+
+    value: Optional[dict[str, Any]]
+    """Data to be stored, or None to delete the item.
+
+    Schema:
+    - Should be a dictionary where:
+      - Keys are strings representing field names
+      - Values can be of any serializable type
+    - If None, it indicates that the item should be deleted
+    """
+
+
+Op = Union[GetOp, SearchOp, PutOp]
+Result = Union[Item, list[Item], None]
+
+
+class BaseStore(ABC):
+    """Abstract base class for key-value stores."""
+
+    __slots__ = ("__weakref__",)
+
+    @abstractmethod
+    def batch(self, ops: Iterable[Op]) -> list[Result]:
+        """Execute a batch of operations synchronously."""
+
+    @abstractmethod
+    async def abatch(self, ops: Iterable[Op]) -> list[Result]:
+        """Execute a batch of operations asynchronously."""
+
+    def get(self, namespace: tuple[str, ...], id: str) -> Optional[Item]:
+        """Retrieve a single item.
+
+        Args:
+            namespace: Hierarchical path for the item.
+            id: Unique identifier within the namespace.
+
+        Returns:
+            The retrieved item or None if not found.
+        """
+        return self.batch([GetOp(namespace, id)])[0]
+
+    def search(
+        self,
+        namespace_prefix: tuple[str, ...],
+        /,
+        *,
+        filter: Optional[dict[str, Any]] = None,
+        limit: int = 10,
+        offset: int = 0,
+    ) -> list[Item]:
+        """Search for items within a namespace prefix.
+
+        Args:
+            namespace_prefix: Hierarchical path prefix to search within.
+            filter: Key-value pairs to filter results.
+            limit: Maximum number of items to return.
+            offset: Number of items to skip before returning results.
+
+        Returns:
+            List of items matching the search criteria.
+        """
+        return self.batch([SearchOp(namespace_prefix, filter, limit, offset)])[0]
+
+    def put(self, namespace: tuple[str, ...], id: str, value: dict[str, Any]) -> None:
+        """Store or update an item.
+
+        Args:
+            namespace: Hierarchical path for the item.
+            id: Unique identifier within the namespace.
+            value: Dictionary containing the item's data.
+        """
+        self.batch([PutOp(namespace, id, value)])
+
+    def delete(self, namespace: tuple[str, ...], id: str) -> None:
+        """Delete an item.
+
+        Args:
+            namespace: Hierarchical path for the item.
+            id: Unique identifier within the namespace.
+        """
+        self.batch([PutOp(namespace, id, None)])
+
+    async def aget(self, namespace: tuple[str, ...], id: str) -> Optional[Item]:
+        """Asynchronously retrieve a single item.
+
+        Args:
+            namespace: Hierarchical path for the item.
+            id: Unique identifier within the namespace.
+
+        Returns:
+            The retrieved item or None if not found.
+        """
+        return (await self.abatch([GetOp(namespace, id)]))[0]
+
+    async def asearch(
+        self,
+        namespace_prefix: tuple[str, ...],
+        /,
+        *,
+        filter: Optional[dict[str, Any]] = None,
+        limit: int = 10,
+        offset: int = 0,
+    ) -> list[Item]:
+        """Asynchronously search for items within a namespace prefix.
+
+        Args:
+            namespace_prefix: Hierarchical path prefix to search within.
+            filter: Key-value pairs to filter results.
+            limit: Maximum number of items to return.
+            offset: Number of items to skip before returning results.
+
+        Returns:
+            List of items matching the search criteria.
+        """
+        return (await self.abatch([SearchOp(namespace_prefix, filter, limit, offset)]))[
+            0
+        ]
+
+    async def aput(
+        self, namespace: tuple[str, ...], id: str, value: dict[str, Any]
+    ) -> None:
+        """Asynchronously store or update an item.
+
+        Args:
+            namespace: Hierarchical path for the item.
+            id: Unique identifier within the namespace.
+            value: Dictionary containing the item's data.
+        """
+        await self.abatch([PutOp(namespace, id, value)])
+
+    async def adelete(self, namespace: tuple[str, ...], id: str) -> None:
+        """Asynchronously delete an item.
+
+        Args:
+            namespace: Hierarchical path for the item.
+            id: Unique identifier within the namespace.
+        """
+        await self.abatch([PutOp(namespace, id, None)])
diff --git a/libs/checkpoint/langgraph/store/batch.py b/libs/checkpoint/langgraph/store/batch.py
@@ -0,0 +1,90 @@
+import asyncio
+import weakref
+from typing import Any, Optional
+
+from langgraph.store.base import BaseStore, GetOp, Item, Op, PutOp, SearchOp
+
+
+class AsyncBatchedBaseStore(BaseStore):
+    """Efficiently batch operations in a background task."""
+
+    __slots__ = ("_loop", "_aqueue", "_task")
+
+    def __init__(self) -> None:
+        self._loop = asyncio.get_running_loop()
+        self._aqueue: dict[asyncio.Future, Op] = {}
+        self._task = self._loop.create_task(_run(self._aqueue, weakref.ref(self)))
+
+    def __del__(self) -> None:
+        self._task.cancel()
+
+    async def aget(
+        self,
+        namespace: tuple[str, ...],
+        id: str,
+    ) -> Optional[Item]:
+        fut = self._loop.create_future()
+        self._aqueue[fut] = GetOp(namespace, id)
+        return await fut
+
+    async def asearch(
+        self,
+        namespace_prefix: tuple[str, ...],
+        /,
+        *,
+        query: Optional[str] = None,
+        filter: Optional[dict[str, Any]] = None,
+        weights: Optional[dict[str, float]] = None,
+        limit: int = 10,
+        offset: int = 0,
+    ) -> list[Item]:
+        fut = self._loop.create_future()
+        self._aqueue[fut] = SearchOp(namespace_prefix, filter, limit, offset)
+        return await fut
+
+    async def aput(
+        self,
+        namespace: tuple[str, ...],
+        id: str,
+        value: dict[str, Any],
+    ) -> None:
+        fut = self._loop.create_future()
+        self._aqueue[fut] = PutOp(namespace, id, value)
+        return await fut
+
+    async def adelete(
+        self,
+        namespace: tuple[str, ...],
+        id: str,
+    ) -> None:
+        fut = self._loop.create_future()
+        self._aqueue[fut] = PutOp(namespace, id, None)
+        return await fut
+
+
+async def _run(
+    aqueue: dict[asyncio.Future, Op], store: weakref.ReferenceType[BaseStore]
+) -> None:
+    while True:
+        await asyncio.sleep(0)
+        if not aqueue:
+            continue
+        if s := store():
+            # get the operations to run
+            taken = aqueue.copy()
+            # action each operation
+            try:
+                results = await s.abatch(taken.values())
+                # set the results of each operation
+                for fut, result in zip(taken, results):
+                    fut.set_result(result)
+            except Exception as e:
+                for fut in taken:
+                    fut.set_exception(e)
+            # remove the operations from the queue
+            for fut in taken:
+                del aqueue[fut]
+        else:
+            break
+        # remove strong ref to store
+        del s
diff --git a/libs/checkpoint/langgraph/store/memory.py b/libs/checkpoint/langgraph/store/memory.py
@@ -0,0 +1,68 @@
+from collections import defaultdict
+from datetime import datetime, timezone
+from typing import Iterable
+
+from langgraph.store.base import BaseStore, GetOp, Item, Op, PutOp, Result, SearchOp
+
+
+class InMemoryStore(BaseStore):
+    """A KV store backed by an in-memory python dictionary.
+
+    Useful for testing/experimentation and lightweight PoC's.
+    For actual persistence, use a Store backed by a proper database.
+    """
+
+    __slots__ = ("_data",)
+
+    def __init__(self) -> None:
+        self._data: dict[tuple[str, ...], dict[str, Item]] = defaultdict(dict)
+
+    def batch(self, ops: Iterable[Op]) -> list[Result]:
+        results: list[Result] = []
+        for op in ops:
+            if isinstance(op, GetOp):
+                item = self._data[op.namespace].get(op.id)
+                if item is not None:
+                    item.last_accessed_at = datetime.now(timezone.utc)
+                results.append(item)
+            elif isinstance(op, SearchOp):
+                candidates = [
+                    item
+                    for namespace, items in self._data.items()
+                    if (
+                        namespace[: len(op.namespace_prefix)] == op.namespace_prefix
+                        if len(namespace) >= len(op.namespace_prefix)
+                        else False
+                    )
+                    for item in items.values()
+                ]
+                if op.filter:
+                    candidates = [
+                        item
+                        for item in candidates
+                        if item.value.items() >= op.filter.items()
+                    ]
+                results.append(candidates[op.offset : op.offset + op.limit])
+            elif isinstance(op, PutOp):
+                if op.value is None:
+                    self._data[op.namespace].pop(op.id, None)
+                elif op.id in self._data[op.namespace]:
+                    self._data[op.namespace][op.id].value = op.value
+                    self._data[op.namespace][op.id].updated_at = datetime.now(
+                        timezone.utc
+                    )
+                else:
+                    self._data[op.namespace][op.id] = Item(
+                        value=op.value,
+                        scores={},
+                        id=op.id,
+                        namespace=op.namespace,
+                        created_at=datetime.now(timezone.utc),
+                        updated_at=datetime.now(timezone.utc),
+                        last_accessed_at=datetime.now(timezone.utc),
+                    )
+                results.append(None)
+        return results
+
+    async def abatch(self, ops: Iterable[Op]) -> list[Result]:
+        return self.batch(ops)
diff --git a/libs/langgraph/langgraph/store/__init__.py → libs/checkpoint/langgraph/store/py.typed b/libs/langgraph/langgraph/store/__init__.py → libs/checkpoint/langgraph/store/py.typed