Skip to content

Commit

Permalink
Updated fixed size splitter defaults
Browse files Browse the repository at this point in the history
  • Loading branch information
alexthomas93 committed Sep 18, 2024
1 parent bf64758 commit 4cf46d6
Show file tree
Hide file tree
Showing 4 changed files with 7 additions and 5 deletions.
4 changes: 2 additions & 2 deletions docs/source/user_guide_kg_builder.rst
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ that can be processed within the LLM token limits:
from neo4j_graphrag.experimental.components.text_splitters.fixed_size_splitter import FixedSizeSplitter
splitter = FixedSizeSplitter(chunk_size=500, chunk_overlap=100)
splitter = FixedSizeSplitter(chunk_size=4000, chunk_overlap=200)
splitter.run(text="Hello World. Life is beautiful.")
Expand All @@ -113,7 +113,7 @@ Wrappers for LangChain and LlamaIndex text splitters are included in this packag
from langchain_text_splitters import CharacterTextSplitter
from neo4j_graphrag.experimental.components.text_splitters.langchain import LangChainTextSplitterAdapter
splitter = LangChainTextSplitterAdapter(
CharacterTextSplitter(chunk_size=500, chunk_overlap=100, separator=".")
CharacterTextSplitter(chunk_size=4000, chunk_overlap=200, separator=".")
)
splitter.run(text="Hello World. Life is beautiful.")
Expand Down
4 changes: 3 additions & 1 deletion examples/pipeline/kg_builder_from_pdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,9 @@ async def main(neo4j_driver: neo4j.Driver) -> PipelineResult:
# Set up the pipeline
pipe = Pipeline()
pipe.add_component(PdfLoader(), "pdf_loader")
pipe.add_component(FixedSizeSplitter(), "splitter")
pipe.add_component(
FixedSizeSplitter(chunk_size=4000, chunk_overlap=200), "splitter"
)
pipe.add_component(SchemaBuilder(), "schema")
pipe.add_component(
LLMEntityRelationExtractor(
Expand Down
2 changes: 1 addition & 1 deletion examples/pipeline/kg_builder_from_text.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ async def main(neo4j_driver: neo4j.Driver) -> PipelineResult:
# define the components
pipe.add_component(
# chunk_size=50 for the sake of this demo
FixedSizeSplitter(chunk_size=50, chunk_overlap=10),
FixedSizeSplitter(chunk_size=4000, chunk_overlap=200),
"splitter",
)
pipe.add_component(TextChunkEmbedder(embedder=OpenAIEmbeddings()), "chunk_embedder")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ class FixedSizeSplitter(TextSplitter):
from neo4j_graphrag.experimental.pipeline import Pipeline
pipeline = Pipeline()
text_splitter = FixedSizeSplitter(chunk_size=500, chunk_overlap=100)
text_splitter = FixedSizeSplitter(chunk_size=4000, chunk_overlap=200)
pipeline.add_component(text_splitter, "text_splitter")
"""

Expand Down

0 comments on commit 4cf46d6

Please sign in to comment.