From 33a3db69353d74b7c925344eabf676254d8b479c Mon Sep 17 00:00:00 2001 From: richarddwang Date: Fri, 15 Sep 2023 09:41:23 +0800 Subject: [PATCH] Let notion loader support utf-8 and make it default. --- libs/langchain/langchain/document_loaders/notion.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/libs/langchain/langchain/document_loaders/notion.py b/libs/langchain/langchain/document_loaders/notion.py index 15678ace9e7ec..0b5b9d15d4c09 100644 --- a/libs/langchain/langchain/document_loaders/notion.py +++ b/libs/langchain/langchain/document_loaders/notion.py @@ -8,16 +8,17 @@ class NotionDirectoryLoader(BaseLoader): """Load `Notion directory` dump.""" - def __init__(self, path: str): + def __init__(self, path: str, encoding: str = 'utf-8'): """Initialize with a file path.""" self.file_path = path + self.encoding = encoding def load(self) -> List[Document]: """Load documents.""" ps = list(Path(self.file_path).glob("**/*.md")) docs = [] for p in ps: - with open(p) as f: + with open(p, encoding=self.encoding) as f: text = f.read() metadata = {"source": str(p)} docs.append(Document(page_content=text, metadata=metadata))