From 8e35a0d50c60add6a5298422ac798c2cb9c74470 Mon Sep 17 00:00:00 2001 From: leila-messallem Date: Thu, 16 May 2024 16:39:55 +0200 Subject: [PATCH 1/4] Method for getting the database schema --- .gitignore | 1 + src/neo4j_genai/schema.py | 135 ++++++++++++++++++++++++++++++++++++++ tests/unit/test_schema.py | 84 ++++++++++++++++++++++++ 3 files changed, 220 insertions(+) create mode 100644 src/neo4j_genai/schema.py create mode 100644 tests/unit/test_schema.py diff --git a/.gitignore b/.gitignore index 16c7907e..489b0412 100644 --- a/.gitignore +++ b/.gitignore @@ -7,3 +7,4 @@ htmlcov/ .idea/ .env docs/build/ +.vscode/ diff --git a/src/neo4j_genai/schema.py b/src/neo4j_genai/schema.py new file mode 100644 index 00000000..0eea47dd --- /dev/null +++ b/src/neo4j_genai/schema.py @@ -0,0 +1,135 @@ +# Copyright (c) "Neo4j" +# Neo4j Sweden AB [https://neo4j.com] +# # +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# # +# https://www.apache.org/licenses/LICENSE-2.0 +# # +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Any + +import neo4j + + +BASE_ENTITY_LABEL = "__Entity__" +EXCLUDED_LABELS = ["_Bloom_Perspective_", "_Bloom_Scene_"] +EXCLUDED_RELS = ["_Bloom_HAS_SCENE_"] + +NODE_PROPERTIES_QUERY = """ +CALL apoc.meta.data() +YIELD label, other, elementType, type, property +WHERE NOT type = "RELATIONSHIP" AND elementType = "node" + AND NOT label IN $EXCLUDED_LABELS +WITH label AS nodeLabels, collect({property:property, type:type}) AS properties +RETURN {labels: nodeLabels, properties: properties} AS output +""" + +REL_PROPERTIES_QUERY = """ +CALL apoc.meta.data() +YIELD label, other, elementType, type, property +WHERE NOT type = "RELATIONSHIP" AND elementType = "relationship" + AND NOT label in $EXCLUDED_LABELS +WITH label AS nodeLabels, collect({property:property, type:type}) AS properties +RETURN {type: nodeLabels, properties: properties} AS output +""" + +REL_QUERY = """ +CALL apoc.meta.data() +YIELD label, other, elementType, type, property +WHERE type = "RELATIONSHIP" AND elementType = "node" +UNWIND other AS other_node +WITH * WHERE NOT label IN $EXCLUDED_LABELS + AND NOT other_node IN $EXCLUDED_LABELS +RETURN {start: label, type: property, end: toString(other_node)} AS output +""" + + +def _query(driver: neo4j.Driver, query: str, params: dict = {}) -> list[dict[str, Any]]: + """ + Queries the database. + + Args: + driver (neo4j.Driver): Neo4j Python driver instance. + query (str): The cypher query. + params (dict, optional): The query parameters. Defaults to {}. + + Returns: + List[Dict[str, Any]]: the result of the query in json format. + """ + data = driver.execute_query(query, params) + return [r.data() for r in data.records] + + +def get_schema( + driver: neo4j.Driver, +) -> str: + """ + Returns the schema of the graph. + + Args: + driver (neo4j.Driver): Neo4j Python driver instance. + + Returns: + str: the graph schema information in a serialized format. + """ + node_properties = [ + el["output"] + for el in _query( + driver, + NODE_PROPERTIES_QUERY, + params={"EXCLUDED_LABELS": EXCLUDED_LABELS + [BASE_ENTITY_LABEL]}, + ) + ] + + rel_properties = [ + el["output"] + for el in _query( + driver, REL_PROPERTIES_QUERY, params={"EXCLUDED_LABELS": EXCLUDED_RELS} + ) + ] + relationships = [ + el["output"] + for el in _query( + driver, + REL_QUERY, + params={"EXCLUDED_LABELS": EXCLUDED_LABELS + [BASE_ENTITY_LABEL]}, + ) + ] + + # Format node properties + formatted_node_props = [] + for el in node_properties: + props_str = ", ".join( + [f"{prop['property']}: {prop['type']}" for prop in el["properties"]] + ) + formatted_node_props.append(f"{el['labels']} {{{props_str}}}") + + # Format relationship properties + formatted_rel_props = [] + for el in rel_properties: + props_str = ", ".join( + [f"{prop['property']}: {prop['type']}" for prop in el["properties"]] + ) + formatted_rel_props.append(f"{el['type']} {{{props_str}}}") + + # Format relationships + formatted_rels = [ + f"(:{el['start']})-[:{el['type']}]->(:{el['end']})" for el in relationships + ] + + return "\n".join( + [ + "Node properties:", + "\n".join(formatted_node_props), + "Relationship properties:", + "\n".join(formatted_rel_props), + "The relationships:", + "\n".join(formatted_rels), + ] + ) diff --git a/tests/unit/test_schema.py b/tests/unit/test_schema.py new file mode 100644 index 00000000..9880ffbe --- /dev/null +++ b/tests/unit/test_schema.py @@ -0,0 +1,84 @@ +# Copyright (c) "Neo4j" +# Neo4j Sweden AB [https://neo4j.com] +# # +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# # +# https://www.apache.org/licenses/LICENSE-2.0 +# # +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from unittest.mock import patch +from neo4j_genai.schema import ( + get_schema, + NODE_PROPERTIES_QUERY, + REL_PROPERTIES_QUERY, + REL_QUERY, + EXCLUDED_LABELS, + BASE_ENTITY_LABEL, + EXCLUDED_RELS, +) + + +def _query_return_value(*args, **kwargs): + if NODE_PROPERTIES_QUERY in args[1]: + return [ + { + "output": { + "properties": [{"property": "property_a", "type": "STRING"}], + "labels": "LabelA", + } + } + ] + if REL_PROPERTIES_QUERY in args[1]: + return [ + { + "output": { + "type": "REL_TYPE", + "properties": [{"property": "rel_prop", "type": "STRING"}], + } + } + ] + if REL_QUERY in args[1]: + return [ + {"output": {"start": "LabelA", "type": "REL_TYPE", "end": "LabelB"}}, + {"output": {"start": "LabelA", "type": "REL_TYPE", "end": "LabelC"}}, + ] + + raise AssertionError("Unexpected query") + + +def test_get_schema_happy_path(driver): + get_schema(driver) + assert 3 == driver.execute_query.call_count + driver.execute_query.assert_any_call( + NODE_PROPERTIES_QUERY, + {"EXCLUDED_LABELS": EXCLUDED_LABELS + [BASE_ENTITY_LABEL]}, + ) + driver.execute_query.assert_any_call( + REL_PROPERTIES_QUERY, + {"EXCLUDED_LABELS": EXCLUDED_RELS}, + ) + driver.execute_query.assert_any_call( + REL_QUERY, + {"EXCLUDED_LABELS": EXCLUDED_LABELS + [BASE_ENTITY_LABEL]}, + ) + + +@patch("neo4j_genai.schema._query", side_effect=_query_return_value) +def test_get_schema_ensure_formatted_response(driver): + result = get_schema(driver) + assert ( + result + == """Node properties: +LabelA {property_a: STRING} +Relationship properties: +REL_TYPE {rel_prop: STRING} +The relationships: +(:LabelA)-[:REL_TYPE]->(:LabelB) +(:LabelA)-[:REL_TYPE]->(:LabelC)""" + ) From 2eaacadf5539564c12443315526e4e9c644fac62 Mon Sep 17 00:00:00 2001 From: Leila Messallem Date: Mon, 20 May 2024 11:38:32 +0200 Subject: [PATCH 2/4] Address review comments --- src/neo4j_genai/schema.py | 31 +++++++++++++++++-------------- tests/unit/test_schema.py | 2 +- 2 files changed, 18 insertions(+), 15 deletions(-) diff --git a/src/neo4j_genai/schema.py b/src/neo4j_genai/schema.py index 0eea47dd..426a8d13 100644 --- a/src/neo4j_genai/schema.py +++ b/src/neo4j_genai/schema.py @@ -50,7 +50,9 @@ """ -def _query(driver: neo4j.Driver, query: str, params: dict = {}) -> list[dict[str, Any]]: +def _query_database( + driver: neo4j.Driver, query: str, params: dict = {} +) -> list[dict[str, Any]]: """ Queries the database. @@ -79,8 +81,8 @@ def get_schema( str: the graph schema information in a serialized format. """ node_properties = [ - el["output"] - for el in _query( + data["output"] + for data in _query_database( driver, NODE_PROPERTIES_QUERY, params={"EXCLUDED_LABELS": EXCLUDED_LABELS + [BASE_ENTITY_LABEL]}, @@ -88,14 +90,14 @@ def get_schema( ] rel_properties = [ - el["output"] - for el in _query( + data["output"] + for data in _query_database( driver, REL_PROPERTIES_QUERY, params={"EXCLUDED_LABELS": EXCLUDED_RELS} ) ] relationships = [ - el["output"] - for el in _query( + data["output"] + for data in _query_database( driver, REL_QUERY, params={"EXCLUDED_LABELS": EXCLUDED_LABELS + [BASE_ENTITY_LABEL]}, @@ -104,23 +106,24 @@ def get_schema( # Format node properties formatted_node_props = [] - for el in node_properties: + for element in node_properties: props_str = ", ".join( - [f"{prop['property']}: {prop['type']}" for prop in el["properties"]] + [f"{prop['property']}: {prop['type']}" for prop in element["properties"]] ) - formatted_node_props.append(f"{el['labels']} {{{props_str}}}") + formatted_node_props.append(f"{element['labels']} {{{props_str}}}") # Format relationship properties formatted_rel_props = [] - for el in rel_properties: + for element in rel_properties: props_str = ", ".join( - [f"{prop['property']}: {prop['type']}" for prop in el["properties"]] + [f"{prop['property']}: {prop['type']}" for prop in element["properties"]] ) - formatted_rel_props.append(f"{el['type']} {{{props_str}}}") + formatted_rel_props.append(f"{element['type']} {{{props_str}}}") # Format relationships formatted_rels = [ - f"(:{el['start']})-[:{el['type']}]->(:{el['end']})" for el in relationships + f"(:{element['start']})-[:{element['type']}]->(:{element['end']})" + for element in relationships ] return "\n".join( diff --git a/tests/unit/test_schema.py b/tests/unit/test_schema.py index 9880ffbe..8a38b9e1 100644 --- a/tests/unit/test_schema.py +++ b/tests/unit/test_schema.py @@ -69,7 +69,7 @@ def test_get_schema_happy_path(driver): ) -@patch("neo4j_genai.schema._query", side_effect=_query_return_value) +@patch("neo4j_genai.schema._query_database", side_effect=_query_return_value) def test_get_schema_ensure_formatted_response(driver): result = get_schema(driver) assert ( From bbfb30c248b16258d00db34c236a574a8eacb21e Mon Sep 17 00:00:00 2001 From: Leila Messallem Date: Mon, 20 May 2024 16:39:44 +0200 Subject: [PATCH 3/4] Change default argument from empty dict to None --- src/neo4j_genai/schema.py | 16 +++++++++------- tests/unit/test_schema.py | 2 +- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/src/neo4j_genai/schema.py b/src/neo4j_genai/schema.py index 426a8d13..6f5fdc37 100644 --- a/src/neo4j_genai/schema.py +++ b/src/neo4j_genai/schema.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from typing import Any +from typing import Any, Optional import neo4j @@ -50,8 +50,8 @@ """ -def _query_database( - driver: neo4j.Driver, query: str, params: dict = {} +def query_database( + driver: neo4j.Driver, query: str, params: Optional[dict] = None ) -> list[dict[str, Any]]: """ Queries the database. @@ -59,11 +59,13 @@ def _query_database( Args: driver (neo4j.Driver): Neo4j Python driver instance. query (str): The cypher query. - params (dict, optional): The query parameters. Defaults to {}. + params (dict, optional): The query parameters. Defaults to None. Returns: List[Dict[str, Any]]: the result of the query in json format. """ + if params is None: + params = {} data = driver.execute_query(query, params) return [r.data() for r in data.records] @@ -82,7 +84,7 @@ def get_schema( """ node_properties = [ data["output"] - for data in _query_database( + for data in query_database( driver, NODE_PROPERTIES_QUERY, params={"EXCLUDED_LABELS": EXCLUDED_LABELS + [BASE_ENTITY_LABEL]}, @@ -91,13 +93,13 @@ def get_schema( rel_properties = [ data["output"] - for data in _query_database( + for data in query_database( driver, REL_PROPERTIES_QUERY, params={"EXCLUDED_LABELS": EXCLUDED_RELS} ) ] relationships = [ data["output"] - for data in _query_database( + for data in query_database( driver, REL_QUERY, params={"EXCLUDED_LABELS": EXCLUDED_LABELS + [BASE_ENTITY_LABEL]}, diff --git a/tests/unit/test_schema.py b/tests/unit/test_schema.py index 8a38b9e1..24d3ed8a 100644 --- a/tests/unit/test_schema.py +++ b/tests/unit/test_schema.py @@ -69,7 +69,7 @@ def test_get_schema_happy_path(driver): ) -@patch("neo4j_genai.schema._query_database", side_effect=_query_return_value) +@patch("neo4j_genai.schema.query_database", side_effect=_query_return_value) def test_get_schema_ensure_formatted_response(driver): result = get_schema(driver) assert ( From eed26a0c4aeee51cc3eddf5f4f3355a5d9aca9af Mon Sep 17 00:00:00 2001 From: leila-messallem <59602030+leila-messallem@users.noreply.github.com> Date: Tue, 21 May 2024 11:00:10 +0200 Subject: [PATCH 4/4] Change types in docstring Co-authored-by: willtai --- src/neo4j_genai/schema.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/neo4j_genai/schema.py b/src/neo4j_genai/schema.py index 6f5fdc37..7a2982b0 100644 --- a/src/neo4j_genai/schema.py +++ b/src/neo4j_genai/schema.py @@ -62,7 +62,7 @@ def query_database( params (dict, optional): The query parameters. Defaults to None. Returns: - List[Dict[str, Any]]: the result of the query in json format. + list[dict[str, Any]]: the result of the query in json format. """ if params is None: params = {}