From 44d6703a3b07944f667b6383f35cee46d210e248 Mon Sep 17 00:00:00 2001 From: Ben Cox <1038350+ind1go@users.noreply.github.com> Date: Thu, 14 Sep 2023 10:58:26 +0100 Subject: [PATCH 1/2] Fix typing of RecursiveUrlLoader's exclude_dirs --- .../langchain/document_loaders/recursive_url_loader.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libs/langchain/langchain/document_loaders/recursive_url_loader.py b/libs/langchain/langchain/document_loaders/recursive_url_loader.py index 61b9c7032e821..cfb5cd0180111 100644 --- a/libs/langchain/langchain/document_loaders/recursive_url_loader.py +++ b/libs/langchain/langchain/document_loaders/recursive_url_loader.py @@ -1,6 +1,6 @@ import asyncio import re -from typing import Callable, Iterator, List, Optional, Set, Union +from typing import Callable, Iterable, Iterator, List, Optional, Set, Union from urllib.parse import urljoin, urlparse import requests @@ -18,7 +18,7 @@ def __init__( max_depth: Optional[int] = None, use_async: Optional[bool] = None, extractor: Optional[Callable[[str], str]] = None, - exclude_dirs: Optional[str] = None, + exclude_dirs: Optional[Iterable[str]] = None, timeout: Optional[int] = None, prevent_outside: Optional[bool] = None, ) -> None: From e7235377b496bf24aa49f7cd83015753cbc85903 Mon Sep 17 00:00:00 2001 From: Harrison Chase Date: Wed, 27 Sep 2023 11:11:23 -0700 Subject: [PATCH 2/2] cr --- .../langchain/document_loaders/recursive_url_loader.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/libs/langchain/langchain/document_loaders/recursive_url_loader.py b/libs/langchain/langchain/document_loaders/recursive_url_loader.py index 212cfc58bd76a..ea5e730d9e78b 100644 --- a/libs/langchain/langchain/document_loaders/recursive_url_loader.py +++ b/libs/langchain/langchain/document_loaders/recursive_url_loader.py @@ -6,14 +6,13 @@ from typing import ( TYPE_CHECKING, Callable, + Iterable, Iterator, List, Optional, - Sequence, Set, Union, ) -from urllib.parse import urljoin, urlparse import requests