From 8565cd66c5aeecd50ba6f577e37d65211909ae27 Mon Sep 17 00:00:00 2001 From: Chris Trevino Date: Fri, 26 Jul 2024 14:38:44 -0700 Subject: [PATCH] Update the ConfigReader to allow for empty chunk-by arrays (#742) --- .../next-release/patch-20240726200425411495.json | 4 ++++ graphrag/config/create_graphrag_config.py | 8 +++++--- graphrag/config/environment_reader.py | 5 +++-- 3 files changed, 12 insertions(+), 5 deletions(-) create mode 100644 .semversioner/next-release/patch-20240726200425411495.json diff --git a/.semversioner/next-release/patch-20240726200425411495.json b/.semversioner/next-release/patch-20240726200425411495.json new file mode 100644 index 0000000000..5525220710 --- /dev/null +++ b/.semversioner/next-release/patch-20240726200425411495.json @@ -0,0 +1,4 @@ +{ + "type": "patch", + "description": "update config-reader to allow for empty chunk-by arrays" +} diff --git a/graphrag/config/create_graphrag_config.py b/graphrag/config/create_graphrag_config.py index a3c49d71bf..54c154668f 100644 --- a/graphrag/config/create_graphrag_config.py +++ b/graphrag/config/create_graphrag_config.py @@ -382,12 +382,14 @@ def hydrate_parallelization_params( base_dir=reader.str(Fragment.base_dir) or defs.STORAGE_BASE_DIR, ) with reader.envvar_prefix(Section.chunk), reader.use(values.get("chunks")): + group_by_columns = reader.list("group_by_columns", "BY_COLUMNS") + if group_by_columns is None: + group_by_columns = defs.CHUNK_GROUP_BY_COLUMNS + chunks_model = ChunkingConfig( size=reader.int("size") or defs.CHUNK_SIZE, overlap=reader.int("overlap") or defs.CHUNK_OVERLAP, - group_by_columns=reader.list("group_by_columns", "BY_COLUMNS") - or defs.CHUNK_GROUP_BY_COLUMNS, - encoding_model=reader.str(Fragment.encoding_model), + group_by_columns=group_by_columns, ) with ( reader.envvar_prefix(Section.snapshot), diff --git a/graphrag/config/environment_reader.py b/graphrag/config/environment_reader.py index dc61301f8b..258422666c 100644 --- a/graphrag/config/environment_reader.py +++ b/graphrag/config/environment_reader.py @@ -149,6 +149,7 @@ def list( if result is None: result = self.str(key, env_key) - if result: - return [s.strip() for s in result.split(",")] + if result is not None: + result = [s.strip() for s in result.split(",")] + return [s for s in result if s] return default_value