From 47312c01790915d7a219f3082c53ede30ebc2a90 Mon Sep 17 00:00:00 2001 From: Estelle Scifo Date: Mon, 21 Oct 2024 11:48:52 +0200 Subject: [PATCH] Harmonize return properties in Vector and Hybrid retrievers (#193) * Harmonize retriever results * Undo changes to external retriever * Update CHANGELOG --- CHANGELOG.md | 3 +++ src/neo4j_graphrag/neo4j_queries.py | 2 +- tests/e2e/qdrant_e2e/test_qdrant_e2e.py | 1 - tests/unit/retrievers/external/test_weaviate.py | 2 +- tests/unit/test_neo4j_queries.py | 8 ++++---- 5 files changed, 9 insertions(+), 7 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1ba50441..693d96f5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,9 @@ - Made `relations` and `potential_schema` optional in `SchemaBuilder`. - Added a check to prevent the use of deprecated Cypher syntax for Neo4j versions 5.23.0 and above. +### Changed +- Vector and Hybrid retrievers used with `return_properties` now also return the node labels (`nodeLabels`) and the node's element ID (`id`). + ## 1.1.0 ### Added diff --git a/src/neo4j_graphrag/neo4j_queries.py b/src/neo4j_graphrag/neo4j_queries.py index 243f74e1..6d7ad347 100644 --- a/src/neo4j_graphrag/neo4j_queries.py +++ b/src/neo4j_graphrag/neo4j_queries.py @@ -253,5 +253,5 @@ def get_query_tail( return retrieval_query if return_properties: return_properties_cypher = ", ".join([f".{prop}" for prop in return_properties]) - return f"RETURN node {{{return_properties_cypher}}} as node, score" + return f"RETURN node {{{return_properties_cypher}}} AS node, labels(node) AS nodeLabels, elementId(node) AS id, score" return fallback_return if fallback_return else "" diff --git a/tests/e2e/qdrant_e2e/test_qdrant_e2e.py b/tests/e2e/qdrant_e2e/test_qdrant_e2e.py index 8355abd7..5bb43977 100644 --- a/tests/e2e/qdrant_e2e/test_qdrant_e2e.py +++ b/tests/e2e/qdrant_e2e/test_qdrant_e2e.py @@ -67,7 +67,6 @@ def test_qdrant_neo4j_vector_input(driver: Driver, qdrant_client: QdrantClient) assert isinstance(results, RetrieverResult) assert len(results.items) == top_k assert isinstance(results.items[0], RetrieverResultItem) - print("Results are: ", results.items) pattern = ( r" None: "WITH match_param[0] AS match_id_value, match_param[1] AS score " "MATCH (node) " "WHERE node[$id_property] = match_id_value " - "RETURN node {.name, .age} as node, score" + "RETURN node {.name, .age} AS node, labels(node) AS nodeLabels, elementId(node) AS id, score" ) assert match_query.strip() == expected.strip() diff --git a/tests/unit/test_neo4j_queries.py b/tests/unit/test_neo4j_queries.py index 4e790337..20b4cdbe 100644 --- a/tests/unit/test_neo4j_queries.py +++ b/tests/unit/test_neo4j_queries.py @@ -56,7 +56,7 @@ def test_vector_search_with_properties() -> None: expected = ( "CALL db.index.vector.queryNodes($vector_index_name, $top_k, $query_vector) " "YIELD node, score " - "RETURN node {.name, .age} as node, score" + "RETURN node {.name, .age} AS node, labels(node) AS nodeLabels, elementId(node) AS id, score" ) result, _ = get_search_query(SearchType.VECTOR, return_properties=properties) assert result.strip() == expected.strip() @@ -159,7 +159,7 @@ def test_hybrid_search_with_properties() -> None: "RETURN n.node AS node, (n.score / ft_index_max_score) AS score " "} " "WITH node, max(score) AS score ORDER BY score DESC LIMIT $top_k " - "RETURN node {.name, .age} as node, score" + "RETURN node {.name, .age} AS node, labels(node) AS nodeLabels, elementId(node) AS id, score" ) result, _ = get_search_query(SearchType.HYBRID, return_properties=properties) assert result.strip() == expected.strip() @@ -174,7 +174,7 @@ def test_get_query_tail_with_retrieval_query() -> None: def test_get_query_tail_with_properties() -> None: properties = ["name", "age"] - expected = "RETURN node {.name, .age} as node, score" + expected = "RETURN node {.name, .age} AS node, labels(node) AS nodeLabels, elementId(node) AS id, score" result = get_query_tail(return_properties=properties) assert result.strip() == expected.strip() @@ -204,7 +204,7 @@ def test_get_query_tail_ordering_no_retrieval_query() -> None: properties = ["name", "age"] fallback = "HELLO" - expected = "RETURN node {.name, .age} as node, score" + expected = "RETURN node {.name, .age} AS node, labels(node) AS nodeLabels, elementId(node) AS id, score" result = get_query_tail( return_properties=properties, fallback_return=fallback,