diff --git a/Makefile b/Makefile index 9bd2847e4..4b6e817e7 100644 --- a/Makefile +++ b/Makefile @@ -21,13 +21,13 @@ clean-docs: ## Run format against the project documentation. format-docs: - poetry run ruff format docs/docs - poetry run ruff check --fix docs/docs + poetry run ruff format docs/ + poetry run ruff check --fix docs/ # Check the docs for linting violations lint-docs: - poetry run ruff format --check docs/docs - poetry run ruff check docs/docs + poetry run ruff format --check docs + poetry run ruff check docs/ codespell: ./docs/codespell_notebooks.sh . diff --git a/docs/_scripts/download_tiktoken.py b/docs/_scripts/download_tiktoken.py index dbdf455ca..121f738ef 100644 --- a/docs/_scripts/download_tiktoken.py +++ b/docs/_scripts/download_tiktoken.py @@ -2,4 +2,4 @@ # This will trigger the download and caching of the necessary files for encoding in ("gpt2", "gpt-3.5"): - tiktoken.encoding_for_model(encoding) \ No newline at end of file + tiktoken.encoding_for_model(encoding) diff --git a/docs/_scripts/generate_api_reference_links.py b/docs/_scripts/generate_api_reference_links.py index 714dc641f..ea18f6efd 100644 --- a/docs/_scripts/generate_api_reference_links.py +++ b/docs/_scripts/generate_api_reference_links.py @@ -250,7 +250,8 @@ def update_markdown_with_imports(markdown: str) -> str: This function will append an API reference link to the `TextGenerator` class from the `langchain.nlp` module if it's recognized. """ code_block_pattern = re.compile( - r'(?P[ \t]*)```(?Ppython|py)\n(?P.*?)\n(?P=indent)```', re.DOTALL + r"(?P[ \t]*)```(?Ppython|py)\n(?P.*?)\n(?P=indent)```", + re.DOTALL, ) def replace_code_block(match: re.Match) -> str: @@ -262,9 +263,9 @@ def replace_code_block(match: re.Match) -> str: Returns: str: The modified code block with API reference links appended if applicable. """ - indent = match.group('indent') - code_block = match.group('code') - language = match.group('language') # Preserve the language from the regex match + indent = match.group("indent") + code_block = match.group("code") + language = match.group("language") # Preserve the language from the regex match # Retrieve import information from the code block imports = get_imports(code_block, "__unused__") @@ -274,12 +275,12 @@ def replace_code_block(match: re.Match) -> str: return original_code_block # Generate API reference links for each import - api_links = ' | '.join( + api_links = " | ".join( f'{imp["imported"]}' for imp in imports ) # Return the code block with appended API reference links - return f'{original_code_block}\n\n{indent}API Reference: {api_links}' + return f"{original_code_block}\n\n{indent}API Reference: {api_links}" # Apply the replace_code_block function to all matches in the markdown updated_markdown = code_block_pattern.sub(replace_code_block, markdown) - return updated_markdown \ No newline at end of file + return updated_markdown diff --git a/docs/_scripts/prepare_notebooks_for_ci.py b/docs/_scripts/prepare_notebooks_for_ci.py index dfa1e1c70..45b737732 100644 --- a/docs/_scripts/prepare_notebooks_for_ci.py +++ b/docs/_scripts/prepare_notebooks_for_ci.py @@ -7,7 +7,7 @@ import nbformat logger = logging.getLogger(__name__) -NOTEBOOK_DIRS = ("docs/docs/how-tos","docs/docs/tutorials") +NOTEBOOK_DIRS = ("docs/docs/how-tos", "docs/docs/tutorials") DOCS_PATH = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) CASSETTES_PATH = os.path.join(DOCS_PATH, "cassettes") @@ -20,7 +20,7 @@ NOTEBOOKS_NO_CASSETTES = ( "docs/docs/how-tos/visualization.ipynb", - "docs/docs/how-tos/many-tools.ipynb" + "docs/docs/how-tos/many-tools.ipynb", ) NOTEBOOKS_NO_EXECUTION = [ @@ -43,7 +43,7 @@ "docs/docs/tutorials/lats/lats.ipynb", # issues only when running with VCR "docs/docs/tutorials/rag/langgraph_crag.ipynb", # flakiness from tavily "docs/docs/tutorials/rag/langgraph_adaptive_rag.ipynb", # Cannot create a consistent method resolution error from VCR - "docs/docs/how-tos/map-reduce.ipynb" # flakiness from structured output, only when running with VCR + "docs/docs/how-tos/map-reduce.ipynb", # flakiness from structured output, only when running with VCR ] @@ -71,9 +71,9 @@ def is_comment(code: str) -> bool: def has_blocklisted_command(code: str, metadata: dict) -> bool: - if 'hide_from_vcr' in metadata: + if "hide_from_vcr" in metadata: return True - + code = code.strip() for blocklisted_pattern in BLOCKLIST_COMMANDS: if blocklisted_pattern in code: @@ -116,8 +116,9 @@ def add_vcr_to_notebook( cell_id = cell.get("id", idx) cassette_name = f"{cassette_prefix}_{cell_id}.msgpack.zlib" - cell.source = f"with custom_vcr.use_cassette('{cassette_name}', filter_headers=['x-api-key', 'authorization'], record_mode='once', serializer='advanced_compressed'):\n" + "\n".join( - f" {line}" for line in lines + cell.source = ( + f"with custom_vcr.use_cassette('{cassette_name}', filter_headers=['x-api-key', 'authorization'], record_mode='once', serializer='advanced_compressed'):\n" + + "\n".join(f" {line}" for line in lines) ) # Add import statement @@ -129,7 +130,7 @@ def add_vcr_to_notebook( "import base64", "import zlib", "import os", - "os.environ.pop(\"LANGCHAIN_TRACING_V2\", None)", + 'os.environ.pop("LANGCHAIN_TRACING_V2", None)', "custom_vcr = vcr.VCR()", "", "def compress_data(data, compression_level=9):", @@ -188,13 +189,15 @@ def process_notebooks(should_comment_install_cells: bool) -> None: # Add a special tag to the first code cell if notebook.cells and notebook.cells[1].cell_type == "code": - notebook.cells[1].metadata["tags"] = notebook.cells[1].metadata.get("tags", []) + ["no_execution"] + notebook.cells[1].metadata["tags"] = notebook.cells[ + 1 + ].metadata.get("tags", []) + ["no_execution"] nbformat.write(notebook, notebook_path) logger.info(f"Processed: {notebook_path}") except Exception as e: logger.error(f"Error processing {notebook_path}: {e}") - + with open(os.path.join(DOCS_PATH, "notebooks_no_execution.json"), "w") as f: json.dump(NOTEBOOKS_NO_EXECUTION, f)