From 2f99260eebf7bee07422156d3bbb3438544faf80 Mon Sep 17 00:00:00 2001 From: Alex Thomas Date: Wed, 18 Sep 2024 09:18:59 +0100 Subject: [PATCH] Updated E2E tests --- .../text_splitters/fixed_size_splitter.py | 2 +- tests/e2e/test_kg_builder_pipeline_e2e.py | 13 +++++-------- 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/src/neo4j_graphrag/experimental/components/text_splitters/fixed_size_splitter.py b/src/neo4j_graphrag/experimental/components/text_splitters/fixed_size_splitter.py index d47f6def3..f50650a57 100644 --- a/src/neo4j_graphrag/experimental/components/text_splitters/fixed_size_splitter.py +++ b/src/neo4j_graphrag/experimental/components/text_splitters/fixed_size_splitter.py @@ -23,7 +23,7 @@ class FixedSizeSplitter(TextSplitter): Args: chunk_size (int): The number of characters in each chunk. - chunk_overlap (int): The number of characters to overlap between chunks. Must be less than `chunk_size`. + chunk_overlap (int): The number of characters from the previous chunk to overlap with each chunk. Must be less than `chunk_size`. Example: diff --git a/tests/e2e/test_kg_builder_pipeline_e2e.py b/tests/e2e/test_kg_builder_pipeline_e2e.py index 5ad9dfd9f..91671d29a 100644 --- a/tests/e2e/test_kg_builder_pipeline_e2e.py +++ b/tests/e2e/test_kg_builder_pipeline_e2e.py @@ -20,7 +20,6 @@ import neo4j import pytest -from langchain_text_splitters import CharacterTextSplitter from neo4j_graphrag.embedder import Embedder from neo4j_graphrag.exceptions import LLMGenerationError from neo4j_graphrag.experimental.components.embedder import TextChunkEmbedder @@ -35,8 +34,8 @@ SchemaProperty, SchemaRelation, ) -from neo4j_graphrag.experimental.components.text_splitters.langchain import ( - LangChainTextSplitterAdapter, +from neo4j_graphrag.experimental.components.text_splitters.fixed_size_splitter import ( + FixedSizeSplitter, ) from neo4j_graphrag.experimental.pipeline import Pipeline from neo4j_graphrag.experimental.pipeline.pipeline import PipelineResult @@ -63,10 +62,8 @@ def schema_builder() -> SchemaBuilder: @pytest.fixture -def text_splitter() -> LangChainTextSplitterAdapter: - return LangChainTextSplitterAdapter( - CharacterTextSplitter(chunk_size=50, chunk_overlap=10, separator="\n\n") - ) +def text_splitter() -> FixedSizeSplitter: + return FixedSizeSplitter(chunk_size=500, chunk_overlap=100) @pytest.fixture @@ -89,7 +86,7 @@ def kg_writer(driver: neo4j.Driver) -> Neo4jWriter: @pytest.fixture def kg_builder_pipeline( - text_splitter: LangChainTextSplitterAdapter, + text_splitter: FixedSizeSplitter, chunk_embedder: TextChunkEmbedder, schema_builder: SchemaBuilder, entity_relation_extractor: LLMEntityRelationExtractor,