From 32917a0b98cb8edcfb8d0e84f0878434e1c3f192 Mon Sep 17 00:00:00 2001 From: Darien Schettler <50381286+darien-schettler@users.noreply.github.com> Date: Sun, 22 Dec 2024 19:16:16 -0500 Subject: [PATCH] Update dataframe.py (#28871) community: optimize DataFrame document loader **Description:** Simplify the `lazy_load` method in the DataFrame document loader by combining text extraction and metadata cleanup into a single operation. This makes the code more concise while maintaining the same functionality. **Issue:** N/A **Dependencies:** None **Twitter handle:** N/A --- .../langchain_community/document_loaders/dataframe.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/libs/community/langchain_community/document_loaders/dataframe.py b/libs/community/langchain_community/document_loaders/dataframe.py index 1b508533f8d93..74ad56b53f783 100644 --- a/libs/community/langchain_community/document_loaders/dataframe.py +++ b/libs/community/langchain_community/document_loaders/dataframe.py @@ -21,9 +21,8 @@ def lazy_load(self) -> Iterator[Document]: """Lazy load records from dataframe.""" for _, row in self.data_frame.iterrows(): - text = row[self.page_content_column] metadata = row.to_dict() - metadata.pop(self.page_content_column) + text = metadata.pop(self.page_content_column) yield Document(page_content=text, metadata=metadata)