From b002702af6103f3da31662c7e4171d9b383bdb50 Mon Sep 17 00:00:00 2001 From: Dristy Srivastava <58721149+dristysrivastava@users.noreply.github.com> Date: Wed, 21 Aug 2024 18:40:14 +0530 Subject: [PATCH] [Community][minor]: Updating metadata with full_path in SharePoint loader (#25593) - **Description:** Updating metadata for sharepoint loader with full path i.e., webUrl - **Issue:** NA - **Dependencies:** NA - **Tests:** NA - **Docs** NA Co-authored-by: dristy.cd Co-authored-by: ccurme --- .../langchain_community/document_loaders/sharepoint.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/libs/community/langchain_community/document_loaders/sharepoint.py b/libs/community/langchain_community/document_loaders/sharepoint.py index 41ccf0147e680..e589a58447c4d 100644 --- a/libs/community/langchain_community/document_loaders/sharepoint.py +++ b/libs/community/langchain_community/document_loaders/sharepoint.py @@ -78,6 +78,7 @@ def lazy_load(self) -> Iterator[Document]: auth_identities = self.authorized_identities(file_id) if self.load_extended_metadata is True: extended_metadata = self.get_extended_metadata(file_id) + extended_metadata.update({"source_full_url": target_folder.web_url}) for parsed_blob in blob_parser.lazy_parse(blob): if self.load_auth is True: parsed_blob.metadata["authorized_identities"] = auth_identities @@ -94,6 +95,7 @@ def lazy_load(self) -> Iterator[Document]: auth_identities = self.authorized_identities(file_id) if self.load_extended_metadata is True: extended_metadata = self.get_extended_metadata(file_id) + extended_metadata.update({"source_full_url": target_folder.web_url}) for parsed_blob in blob_parser.lazy_parse(blob): if self.load_auth is True: parsed_blob.metadata["authorized_identities"] = auth_identities @@ -130,6 +132,9 @@ def lazy_load(self) -> Iterator[Document]: blob_part.metadata["authorized_identities"] = auth_identities if self.load_extended_metadata is True: blob_part.metadata.update(extended_metadata) + blob_part.metadata.update( + {"source_full_url": target_folder.web_url} + ) yield blob_part def authorized_identities(self, file_id: str) -> List: