Skip to content

Commit

Permalink
x
Browse files Browse the repository at this point in the history
  • Loading branch information
eyurtsev committed Apr 24, 2024
1 parent bcecb18 commit cc5acdb
Show file tree
Hide file tree
Showing 8 changed files with 169 additions and 9 deletions.
3 changes: 2 additions & 1 deletion libs/core/tests/unit_tests/stores/test_filesystem.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@

import pytest

from langchain_core.stores import InvalidKeyException, LocalFileStore
from langchain_core.stores import InvalidKeyException
from langchain.storage.file_system import LocalFileStore


@pytest.fixture
Expand Down
2 changes: 1 addition & 1 deletion libs/core/tests/unit_tests/stores/test_in_memory.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from langchain_core.stores import InMemoryStore
from langchain.storage.in_memory import InMemoryStore


def test_mget() -> None:
Expand Down
2 changes: 1 addition & 1 deletion libs/langchain/langchain/storage/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,11 @@
InMemoryByteStore,
InMemoryStore,
InvalidKeyException,
LocalFileStore,
)

from langchain.storage._lc_store import create_kv_docstore, create_lc_store
from langchain.storage.encoder_backed import EncoderBackedStore
from langchain.storage.file_system import LocalFileStore
from langchain.utils.interactive_env import is_interactive_env


Expand Down
163 changes: 161 additions & 2 deletions libs/langchain/langchain/storage/file_system.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,162 @@
from langchain_core.stores import LocalFileStore
import os
import re
from pathlib import Path
from typing import Iterator, List, Optional, Sequence, Tuple, Union

__all__ = ["LocalFileStore"]
from langchain_core.stores import ByteStore

from langchain.storage.exceptions import InvalidKeyException


class LocalFileStore(ByteStore):
"""BaseStore interface that works on the local file system.
Examples:
Create a LocalFileStore instance and perform operations on it:
.. code-block:: python
from langchain.storage import LocalFileStore
# Instantiate the LocalFileStore with the root path
file_store = LocalFileStore("/path/to/root")
# Set values for keys
file_store.mset([("key1", b"value1"), ("key2", b"value2")])
# Get values for keys
values = file_store.mget(["key1", "key2"]) # Returns [b"value1", b"value2"]
# Delete keys
file_store.mdelete(["key1"])
# Iterate over keys
for key in file_store.yield_keys():
print(key) # noqa: T201
"""

def __init__(
self,
root_path: Union[str, Path],
*,
chmod_file: Optional[int] = None,
chmod_dir: Optional[int] = None,
) -> None:
"""Implement the BaseStore interface for the local file system.
Args:
root_path (Union[str, Path]): The root path of the file store. All keys are
interpreted as paths relative to this root.
chmod_file: (optional, defaults to `None`) If specified, sets permissions
for newly created files, overriding the current `umask` if needed.
chmod_dir: (optional, defaults to `None`) If specified, sets permissions
for newly created dirs, overriding the current `umask` if needed.
"""
self.root_path = Path(root_path).absolute()
self.chmod_file = chmod_file
self.chmod_dir = chmod_dir

def _get_full_path(self, key: str) -> Path:
"""Get the full path for a given key relative to the root path.
Args:
key (str): The key relative to the root path.
Returns:
Path: The full path for the given key.
"""
if not re.match(r"^[a-zA-Z0-9_.\-/]+$", key):
raise InvalidKeyException(f"Invalid characters in key: {key}")
full_path = os.path.abspath(self.root_path / key)
common_path = os.path.commonpath([str(self.root_path), full_path])
if common_path != str(self.root_path):
raise InvalidKeyException(
f"Invalid key: {key}. Key should be relative to the full path."
f"{self.root_path} vs. {common_path} and full path of {full_path}"
)

return Path(full_path)

def _mkdir_for_store(self, dir: Path) -> None:
"""Makes a store directory path (including parents) with specified permissions
This is needed because `Path.mkdir()` is restricted by the current `umask`,
whereas the explicit `os.chmod()` used here is not.
Args:
dir: (Path) The store directory to make
Returns:
None
"""
if not dir.exists():
self._mkdir_for_store(dir.parent)
dir.mkdir(exist_ok=True)
if self.chmod_dir is not None:
os.chmod(dir, self.chmod_dir)

def mget(self, keys: Sequence[str]) -> List[Optional[bytes]]:
"""Get the values associated with the given keys.
Args:
keys: A sequence of keys.
Returns:
A sequence of optional values associated with the keys.
If a key is not found, the corresponding value will be None.
"""
values: List[Optional[bytes]] = []
for key in keys:
full_path = self._get_full_path(key)
if full_path.exists():
value = full_path.read_bytes()
values.append(value)
else:
values.append(None)
return values

def mset(self, key_value_pairs: Sequence[Tuple[str, bytes]]) -> None:
"""Set the values for the given keys.
Args:
key_value_pairs: A sequence of key-value pairs.
Returns:
None
"""
for key, value in key_value_pairs:
full_path = self._get_full_path(key)
self._mkdir_for_store(full_path.parent)
full_path.write_bytes(value)
if self.chmod_file is not None:
os.chmod(full_path, self.chmod_file)

def mdelete(self, keys: Sequence[str]) -> None:
"""Delete the given keys and their associated values.
Args:
keys (Sequence[str]): A sequence of keys to delete.
Returns:
None
"""
for key in keys:
full_path = self._get_full_path(key)
if full_path.exists():
full_path.unlink()

def yield_keys(self, prefix: Optional[str] = None) -> Iterator[str]:
"""Get an iterator over keys that match the given prefix.
Args:
prefix (Optional[str]): The prefix to match.
Returns:
Iterator[str]: An iterator over keys that match the given prefix.
"""
prefix_path = self._get_full_path(prefix) if prefix else self.root_path
for file in prefix_path.rglob("*"):
if file.is_file():
relative_path = file.relative_to(self.root_path)
yield str(relative_path)
2 changes: 1 addition & 1 deletion libs/langchain/tests/unit_tests/embeddings/test_caching.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@

import pytest
from langchain_core.embeddings import Embeddings
from langchain_core.stores import InMemoryStore

from langchain.embeddings import CacheBackedEmbeddings
from langchain.storage.in_memory import InMemoryStore


class MockEmbeddings(Embeddings):
Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
from typing import Any, List

from langchain_core.documents import Document
from langchain_core.stores import InMemoryStore

from langchain.retrievers.multi_vector import MultiVectorRetriever
from langchain.storage import InMemoryStore
from tests.unit_tests.indexes.test_indexing import InMemoryVectorStore


Expand Down
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
from typing import Any, List, Sequence

from langchain_core.documents import Document
from langchain_core.stores import InMemoryStore
from langchain_text_splitters.character import CharacterTextSplitter

from langchain.retrievers import ParentDocumentRetriever
from langchain.storage import InMemoryStore
from tests.unit_tests.indexes.test_indexing import InMemoryVectorStore


Expand Down
2 changes: 1 addition & 1 deletion libs/langchain/tests/unit_tests/storage/test_lc_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@

import pytest
from langchain_core.documents import Document
from langchain_core.stores import LocalFileStore

from langchain.storage._lc_store import create_kv_docstore, create_lc_store
from langchain.storage.file_system import LocalFileStore


@pytest.fixture
Expand Down

0 comments on commit cc5acdb

Please sign in to comment.