diff --git a/examples/build_graph/simple_kg_builder_from_text.py b/examples/build_graph/simple_kg_builder_from_text.py index 901744a7..288a21ab 100644 --- a/examples/build_graph/simple_kg_builder_from_text.py +++ b/examples/build_graph/simple_kg_builder_from_text.py @@ -22,8 +22,8 @@ from neo4j_graphrag.llm.openai_llm import OpenAILLM logging.basicConfig() -# logging.getLogger("neo4j_graphrag").setLevel(logging.DEBUG) -logging.getLogger("neo4j_graphrag").setLevel(logging.INFO) +logging.getLogger("neo4j_graphrag").setLevel(logging.DEBUG) +# logging.getLogger("neo4j_graphrag").setLevel(logging.INFO) # Neo4j db infos diff --git a/src/neo4j_graphrag/experimental/components/entity_relation_extractor.py b/src/neo4j_graphrag/experimental/components/entity_relation_extractor.py index 6d53e469..7127a36e 100644 --- a/src/neo4j_graphrag/experimental/components/entity_relation_extractor.py +++ b/src/neo4j_graphrag/experimental/components/entity_relation_extractor.py @@ -38,7 +38,7 @@ from neo4j_graphrag.experimental.pipeline.exceptions import InvalidJSONError from neo4j_graphrag.generation.prompts import ERExtractionTemplate, PromptTemplate from neo4j_graphrag.llm import LLMInterface -from neo4j_graphrag.utils import prettyfier +from neo4j_graphrag.utils.logging import prettyfier logger = logging.getLogger(__name__) diff --git a/src/neo4j_graphrag/experimental/pipeline/config/runner.py b/src/neo4j_graphrag/experimental/pipeline/config/runner.py index c1321754..07b04dff 100644 --- a/src/neo4j_graphrag/experimental/pipeline/config/runner.py +++ b/src/neo4j_graphrag/experimental/pipeline/config/runner.py @@ -48,7 +48,7 @@ from neo4j_graphrag.experimental.pipeline.config.types import PipelineType from neo4j_graphrag.experimental.pipeline.pipeline import PipelineResult from neo4j_graphrag.experimental.pipeline.types import PipelineDefinition -from neo4j_graphrag.utils import prettyfier +from neo4j_graphrag.utils.logging import prettyfier logger = logging.getLogger(__name__) diff --git a/src/neo4j_graphrag/experimental/pipeline/pipeline.py b/src/neo4j_graphrag/experimental/pipeline/pipeline.py index a3f9b3c1..007d3ca4 100644 --- a/src/neo4j_graphrag/experimental/pipeline/pipeline.py +++ b/src/neo4j_graphrag/experimental/pipeline/pipeline.py @@ -24,7 +24,7 @@ from timeit import default_timer from typing import Any, AsyncGenerator, Optional -from neo4j_graphrag.utils import prettyfier +from neo4j_graphrag.utils.logging import prettyfier try: import pygraphviz as pgv diff --git a/src/neo4j_graphrag/types.py b/src/neo4j_graphrag/types.py index 5a45141d..3b2286ac 100644 --- a/src/neo4j_graphrag/types.py +++ b/src/neo4j_graphrag/types.py @@ -26,7 +26,7 @@ model_validator, ) -from neo4j_graphrag.utils import validate_search_query_input +from neo4j_graphrag.utils.validation import validate_search_query_input class RawSearchResult(BaseModel): diff --git a/src/neo4j_graphrag/utils/__init__.py b/src/neo4j_graphrag/utils/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/neo4j_graphrag/utils.py b/src/neo4j_graphrag/utils/logging.py similarity index 55% rename from src/neo4j_graphrag/utils.py rename to src/neo4j_graphrag/utils/logging.py index b4331c88..eee12209 100644 --- a/src/neo4j_graphrag/utils.py +++ b/src/neo4j_graphrag/utils/logging.py @@ -14,26 +14,32 @@ # limitations under the License. from __future__ import annotations -from typing import Optional, Any +import os +from typing import Any, Optional from pydantic import BaseModel +DEFAULT_MAX_LIST_LENGTH: int = 5 +DEFAULT_MAX_STRING_LENGTH: int = 200 -def validate_search_query_input( - query_text: Optional[str] = None, query_vector: Optional[list[float]] = None -) -> None: - if not (bool(query_vector) ^ bool(query_text)): - raise ValueError("You must provide exactly one of query_vector or query_text.") +class Prettyfier: + """Prettyfy any object for logging. + I.e.: truncate long lists and strings, even nested. -class Prettyfier: - """Prettyfy object for logging. + Max list and string length can be configured using env variables: + - LOGGING__MAX_LIST_LENGTH (int) + - LOGGING__MAX_STRING_LENGTH (int) + """ - I.e.: truncate long lists. - """ - def __init__(self, max_items_in_list: int = 5): - self.max_items_in_list = max_items_in_list + def __init__(self) -> None: + self.max_list_length = int(os.environ.get( + "LOGGING__MAX_LIST_LENGTH", DEFAULT_MAX_LIST_LENGTH + )) + self.max_string_length = int(os.environ.get( + "LOGGING__MAX_STRING_LENGTH", DEFAULT_MAX_STRING_LENGTH + )) def _prettyfy_dict(self, value: dict[Any, Any]) -> dict[Any, Any]: return { @@ -44,20 +50,30 @@ def _prettyfy_dict(self, value: dict[Any, Any]) -> dict[Any, Any]: def _prettyfy_list(self, value: list[Any]) -> list[Any]: items = [ self(v) # prettify each item - for v in value[:self.max_items_in_list] + for v in value[: self.max_list_length] ] remaining_items = len(value) - len(items) if remaining_items > 0: - items.append(f"...truncated {remaining_items} items...") + items.append(f"... ({remaining_items} items)") return items + def _prettyfy_str(self, value: str) -> str: + new_value = value[: self.max_string_length] + remaining_chars = len(value) - len(new_value) + if remaining_chars > 0: + new_value += f"... ({remaining_chars} chars)" + return new_value + def __call__(self, value: Any) -> Any: + """Takes any value and returns a prettified version for logging.""" if isinstance(value, dict): return self._prettyfy_dict(value) if isinstance(value, BaseModel): return self(value.model_dump()) if isinstance(value, list): return self._prettyfy_list(value) + if isinstance(value, str): + return self._prettyfy_str(value) return value diff --git a/src/neo4j_graphrag/utils/validation.py b/src/neo4j_graphrag/utils/validation.py new file mode 100644 index 00000000..e86f7588 --- /dev/null +++ b/src/neo4j_graphrag/utils/validation.py @@ -0,0 +1,24 @@ +# Copyright (c) "Neo4j" +# Neo4j Sweden AB [https://neo4j.com] +# # +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# # +# https://www.apache.org/licenses/LICENSE-2.0 +# # +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from __future__ import annotations + +from typing import Optional + + +def validate_search_query_input( + query_text: Optional[str] = None, query_vector: Optional[list[float]] = None +) -> None: + if not (bool(query_vector) ^ bool(query_text)): + raise ValueError("You must provide exactly one of query_vector or query_text.")