From 342e3a43be274de58250769ca43c246a688ca88f Mon Sep 17 00:00:00 2001 From: Theo LEBRUN Date: Tue, 20 Feb 2024 18:09:54 -0500 Subject: [PATCH] Skip nested directories when using S3DirectoryLoader --- .../langchain_community/document_loaders/s3_directory.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/libs/community/langchain_community/document_loaders/s3_directory.py b/libs/community/langchain_community/document_loaders/s3_directory.py index 9885418ec8ae2..545d2059b43ae 100644 --- a/libs/community/langchain_community/document_loaders/s3_directory.py +++ b/libs/community/langchain_community/document_loaders/s3_directory.py @@ -120,6 +120,9 @@ def load(self) -> List[Document]: bucket = s3.Bucket(self.bucket) docs = [] for obj in bucket.objects.filter(Prefix=self.prefix): + # Skip directories + if obj.size == 0 and obj.key.endswith('/'): + continue loader = S3FileLoader( self.bucket, obj.key,