Skip to content

Commit

Permalink
vectorstore[minor]: Add support to filter by IS NULL and IS NOT NULL …
Browse files Browse the repository at this point in the history
…criteria (#40)

Description: Add support to filter by IS NULL criteria in the metadata
Issue: N/A
Dependencies: N/A
Twitter handle: @martinferenaz

This PR fixes the problem when you want to search if a tag has a value
(exists) in the metadata. To check that you need to compare with the "IS
NULL" method and the "$eq" operator only checks that the tag exists and
has a null value assigned.

In addition, a test for the $nin condition was added and a bug arrised,
the bug was resolved adding a not condition after the in_
  • Loading branch information
MartinGotelli authored May 16, 2024
1 parent f6ea1b2 commit a995b4c
Show file tree
Hide file tree
Showing 4 changed files with 57 additions and 3 deletions.
5 changes: 3 additions & 2 deletions examples/vectorstore.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -235,8 +235,8 @@
"\n",
"The vectorstore supports a set of filters that can be applied against the metadata fields of the documents.\n",
"\n",
"| Operator | Meaning/Category |\n",
"|----------|-------------------------|\n",
"| Operator | Meaning/Category |\n",
"|-----------|-------------------------|\n",
"| \\$eq | Equality (==) |\n",
"| \\$ne | Inequality (!=) |\n",
"| \\$lt | Less than (<) |\n",
Expand All @@ -246,6 +246,7 @@
"| \\$in | Special Cased (in) |\n",
"| \\$nin | Special Cased (not in) |\n",
"| \\$between | Special Cased (between) |\n",
"| \\$exists | Special Cased (is null) |\n",
"| \\$like | Text (like) |\n",
"| \\$ilike | Text (case-insensitive like) |\n",
"| \\$and | Logical (and) |\n",
Expand Down
14 changes: 13 additions & 1 deletion langchain_postgres/vectorstores.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ class DistanceStrategy(str, enum.Enum):
"$in",
"$nin",
"$between",
"$exists",
}

TEXT_OPERATORS = {
Expand Down Expand Up @@ -702,13 +703,24 @@ def _handle_field_filter(
if operator in {"$in"}:
return queried_field.in_([str(val) for val in filter_value])
elif operator in {"$nin"}:
return queried_field.nin_([str(val) for val in filter_value])
return ~queried_field.in_([str(val) for val in filter_value])
elif operator in {"$like"}:
return queried_field.like(filter_value)
elif operator in {"$ilike"}:
return queried_field.ilike(filter_value)
else:
raise NotImplementedError()
elif operator == "$exists":
if not isinstance(filter_value, bool):
raise ValueError(
"Expected a boolean value for $exists "
f"operator, but got: {filter_value}"
)
condition = func.jsonb_exists(
self.EmbeddingStore.cmetadata,
field,
)
return ~condition if filter_value else condition
else:
raise NotImplementedError()

Expand Down
26 changes: 26 additions & 0 deletions tests/unit_tests/fixtures/filtering_test_cases.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,10 +198,16 @@
{"id": {"$between": (1, 1)}},
[1],
),
# Test in
(
{"name": {"$in": ["adam", "bob"]}},
[1, 2],
),
# Test nin
(
{"name": {"$nin": ["adam", "bob"]}},
[3],
),
]

TYPE_5_FILTERING_TEST_CASES = [
Expand All @@ -216,3 +222,23 @@
[1, 3],
),
]

TYPE_6_FILTERING_TEST_CASES = [
# These involve the special operator $exists
(
{"happiness": {"$exists": True}},
[],
),
(
{"happiness": {"$exists": False}},
[1, 2, 3],
),
(
{"sadness": {"$exists": True}},
[3],
),
(
{"sadness": {"$exists": False}},
[1, 2],
),
]
15 changes: 15 additions & 0 deletions tests/unit_tests/test_vectorstore.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
TYPE_3_FILTERING_TEST_CASES,
TYPE_4_FILTERING_TEST_CASES,
TYPE_5_FILTERING_TEST_CASES,
TYPE_6_FILTERING_TEST_CASES,
)
from tests.utils import VECTORSTORE_CONNECTION_STRING as CONNECTION_STRING

Expand Down Expand Up @@ -484,6 +485,17 @@ def test_pgvector_with_with_metadata_filters_5(
assert [doc.metadata["id"] for doc in docs] == expected_ids, test_filter


@pytest.mark.parametrize("test_filter, expected_ids", TYPE_6_FILTERING_TEST_CASES)
def test_pgvector_with_with_metadata_filters_6(
pgvector: PGVector,
test_filter: Dict[str, Any],
expected_ids: List[int],
) -> None:
"""Test end to end construction and search."""
docs = pgvector.similarity_search("meow", k=5, filter=test_filter)
assert [doc.metadata["id"] for doc in docs] == expected_ids, test_filter


@pytest.mark.parametrize(
"invalid_filter",
[
Expand All @@ -496,6 +508,8 @@ def test_pgvector_with_with_metadata_filters_5(
{"$and": {}},
{"$between": {}},
{"$eq": {}},
{"$exists": {}},
{"$exists": 1},
],
)
def test_invalid_filters(pgvector: PGVector, invalid_filter: Any) -> None:
Expand All @@ -510,6 +524,7 @@ def test_validate_operators() -> None:
"$and",
"$between",
"$eq",
"$exists",
"$gt",
"$gte",
"$ilike",
Expand Down

0 comments on commit a995b4c

Please sign in to comment.