Skip to content

Commit

Permalink
Cut long string, configure via env vars, restructure utils folder
Browse files Browse the repository at this point in the history
  • Loading branch information
stellasia committed Jan 3, 2025
1 parent 3033ef8 commit 80708e4
Show file tree
Hide file tree
Showing 8 changed files with 60 additions and 20 deletions.
4 changes: 2 additions & 2 deletions examples/build_graph/simple_kg_builder_from_text.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@
from neo4j_graphrag.llm.openai_llm import OpenAILLM

logging.basicConfig()
# logging.getLogger("neo4j_graphrag").setLevel(logging.DEBUG)
logging.getLogger("neo4j_graphrag").setLevel(logging.INFO)
logging.getLogger("neo4j_graphrag").setLevel(logging.DEBUG)
# logging.getLogger("neo4j_graphrag").setLevel(logging.INFO)


# Neo4j db infos
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@
from neo4j_graphrag.experimental.pipeline.exceptions import InvalidJSONError
from neo4j_graphrag.generation.prompts import ERExtractionTemplate, PromptTemplate
from neo4j_graphrag.llm import LLMInterface
from neo4j_graphrag.utils import prettyfier
from neo4j_graphrag.utils.logging import prettyfier

logger = logging.getLogger(__name__)

Expand Down
2 changes: 1 addition & 1 deletion src/neo4j_graphrag/experimental/pipeline/config/runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@
from neo4j_graphrag.experimental.pipeline.config.types import PipelineType
from neo4j_graphrag.experimental.pipeline.pipeline import PipelineResult
from neo4j_graphrag.experimental.pipeline.types import PipelineDefinition
from neo4j_graphrag.utils import prettyfier
from neo4j_graphrag.utils.logging import prettyfier

logger = logging.getLogger(__name__)

Expand Down
2 changes: 1 addition & 1 deletion src/neo4j_graphrag/experimental/pipeline/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
from timeit import default_timer
from typing import Any, AsyncGenerator, Optional

from neo4j_graphrag.utils import prettyfier
from neo4j_graphrag.utils.logging import prettyfier

try:
import pygraphviz as pgv
Expand Down
2 changes: 1 addition & 1 deletion src/neo4j_graphrag/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
model_validator,
)

from neo4j_graphrag.utils import validate_search_query_input
from neo4j_graphrag.utils.validation import validate_search_query_input


class RawSearchResult(BaseModel):
Expand Down
Empty file.
44 changes: 30 additions & 14 deletions src/neo4j_graphrag/utils.py → src/neo4j_graphrag/utils/logging.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,26 +14,32 @@
# limitations under the License.
from __future__ import annotations

from typing import Optional, Any
import os
from typing import Any, Optional

from pydantic import BaseModel

DEFAULT_MAX_LIST_LENGTH: int = 5
DEFAULT_MAX_STRING_LENGTH: int = 200

def validate_search_query_input(
query_text: Optional[str] = None, query_vector: Optional[list[float]] = None
) -> None:
if not (bool(query_vector) ^ bool(query_text)):
raise ValueError("You must provide exactly one of query_vector or query_text.")

class Prettyfier:
"""Prettyfy any object for logging.
I.e.: truncate long lists and strings, even nested.
class Prettyfier:
"""Prettyfy object for logging.
Max list and string length can be configured using env variables:
- LOGGING__MAX_LIST_LENGTH (int)
- LOGGING__MAX_STRING_LENGTH (int)
"""

I.e.: truncate long lists.
"""
def __init__(self, max_items_in_list: int = 5):
self.max_items_in_list = max_items_in_list
def __init__(self) -> None:
self.max_list_length = int(os.environ.get(
"LOGGING__MAX_LIST_LENGTH", DEFAULT_MAX_LIST_LENGTH
))
self.max_string_length = int(os.environ.get(
"LOGGING__MAX_STRING_LENGTH", DEFAULT_MAX_STRING_LENGTH
))

def _prettyfy_dict(self, value: dict[Any, Any]) -> dict[Any, Any]:
return {
Expand All @@ -44,20 +50,30 @@ def _prettyfy_dict(self, value: dict[Any, Any]) -> dict[Any, Any]:
def _prettyfy_list(self, value: list[Any]) -> list[Any]:
items = [
self(v) # prettify each item
for v in value[:self.max_items_in_list]
for v in value[: self.max_list_length]
]
remaining_items = len(value) - len(items)
if remaining_items > 0:
items.append(f"...truncated {remaining_items} items...")
items.append(f"... ({remaining_items} items)")
return items

def _prettyfy_str(self, value: str) -> str:
new_value = value[: self.max_string_length]
remaining_chars = len(value) - len(new_value)
if remaining_chars > 0:
new_value += f"... ({remaining_chars} chars)"
return new_value

def __call__(self, value: Any) -> Any:
"""Takes any value and returns a prettified version for logging."""
if isinstance(value, dict):
return self._prettyfy_dict(value)
if isinstance(value, BaseModel):
return self(value.model_dump())
if isinstance(value, list):
return self._prettyfy_list(value)
if isinstance(value, str):
return self._prettyfy_str(value)
return value


Expand Down
24 changes: 24 additions & 0 deletions src/neo4j_graphrag/utils/validation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# Copyright (c) "Neo4j"
# Neo4j Sweden AB [https://neo4j.com]
# #
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# #
# https://www.apache.org/licenses/LICENSE-2.0
# #
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import annotations

from typing import Optional


def validate_search_query_input(
query_text: Optional[str] = None, query_vector: Optional[list[float]] = None
) -> None:
if not (bool(query_vector) ^ bool(query_text)):
raise ValueError("You must provide exactly one of query_vector or query_text.")

0 comments on commit 80708e4

Please sign in to comment.