From 33a3db69353d74b7c925344eabf676254d8b479c Mon Sep 17 00:00:00 2001 From: richarddwang Date: Fri, 15 Sep 2023 09:41:23 +0800 Subject: [PATCH 1/2] Let notion loader support utf-8 and make it default. --- libs/langchain/langchain/document_loaders/notion.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/libs/langchain/langchain/document_loaders/notion.py b/libs/langchain/langchain/document_loaders/notion.py index 15678ace9e7ec..0b5b9d15d4c09 100644 --- a/libs/langchain/langchain/document_loaders/notion.py +++ b/libs/langchain/langchain/document_loaders/notion.py @@ -8,16 +8,17 @@ class NotionDirectoryLoader(BaseLoader): """Load `Notion directory` dump.""" - def __init__(self, path: str): + def __init__(self, path: str, encoding: str = 'utf-8'): """Initialize with a file path.""" self.file_path = path + self.encoding = encoding def load(self) -> List[Document]: """Load documents.""" ps = list(Path(self.file_path).glob("**/*.md")) docs = [] for p in ps: - with open(p) as f: + with open(p, encoding=self.encoding) as f: text = f.read() metadata = {"source": str(p)} docs.append(Document(page_content=text, metadata=metadata)) From d2f36fbe1d33da5716d031dcb864bf0474fdbb5b Mon Sep 17 00:00:00 2001 From: Eugene Yurtsev Date: Thu, 12 Oct 2023 11:48:37 -0400 Subject: [PATCH 2/2] x --- libs/langchain/langchain/document_loaders/notion.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/libs/langchain/langchain/document_loaders/notion.py b/libs/langchain/langchain/document_loaders/notion.py index 0b5b9d15d4c09..d1081301ade21 100644 --- a/libs/langchain/langchain/document_loaders/notion.py +++ b/libs/langchain/langchain/document_loaders/notion.py @@ -8,16 +8,16 @@ class NotionDirectoryLoader(BaseLoader): """Load `Notion directory` dump.""" - def __init__(self, path: str, encoding: str = 'utf-8'): + def __init__(self, path: str, *, encoding: str = "utf-8") -> None: """Initialize with a file path.""" self.file_path = path self.encoding = encoding def load(self) -> List[Document]: """Load documents.""" - ps = list(Path(self.file_path).glob("**/*.md")) + paths = list(Path(self.file_path).glob("**/*.md")) docs = [] - for p in ps: + for p in paths: with open(p, encoding=self.encoding) as f: text = f.read() metadata = {"source": str(p)}