From f993dfcfcb468bad3a30ae2ffdf2509890514294 Mon Sep 17 00:00:00 2001 From: Eugene Yurtsev Date: Sun, 22 Dec 2024 22:07:59 -0500 Subject: [PATCH] x --- docs/_scripts/generate_api_reference_links.py | 82 ++++++++++++++++++- docs/_scripts/notebook_hooks.py | 7 +- 2 files changed, 84 insertions(+), 5 deletions(-) diff --git a/docs/_scripts/generate_api_reference_links.py b/docs/_scripts/generate_api_reference_links.py index 30a75440d..bbd440a1a 100644 --- a/docs/_scripts/generate_api_reference_links.py +++ b/docs/_scripts/generate_api_reference_links.py @@ -47,6 +47,8 @@ (["langgraph.graph"], "langgraph.constants", "END", "constants"), (["langgraph.constants"], "langgraph.types", "Send", "types"), (["langgraph.constants"], "langgraph.types", "Interrupt", "types"), + (["langgraph.constants"], "langgraph.types", "interrupt", "types"), + (["langgraph.constants"], "langgraph.types", "Command", "types"), ([], "langgraph.types", "RetryPolicy", "types"), ([], "langgraph.checkpoint.base", "Checkpoint", "checkpoints"), ([], "langgraph.checkpoint.base", "CheckpointMetadata", "checkpoints"), @@ -115,10 +117,10 @@ def _get_doc_title(data: str, file_name: str) -> str: class ImportInformation(TypedDict): - imported: str # imported class name - source: str # module path - docs: str # URL to the documentation - title: str # Title of the document + imported: str # The name of the class that was imported. + source: str # The full module path from which the class was imported. + docs: str # The URL pointing to the class's documentation. + title: str # The title of the document where the import is used. def _get_imports( @@ -244,3 +246,75 @@ def preprocess(self, nb, resources): cells.append(cell) nb.cells = cells return nb, resources + + +def get_imports(code: str, doc_title: str) -> List[ImportInformation]: + """Retrieve all import references from the given code for specified ecosystems. + + Args: + code: The source code from which to extract import references. + doc_title: The documentation title associated with the code. + + Returns: + A list of import information for each import found. + """ + ecosystems = ["langchain", "langgraph"] + all_imports = [] + for package_ecosystem in ecosystems: + all_imports.extend(_get_imports(code, doc_title, package_ecosystem)) + return all_imports + + +def update_markdown_with_imports(markdown: str) -> str: + """Update markdown to include API reference links for imports in Python code blocks. + + This function scans the markdown content for Python code blocks, extracts any imports, and appends links to their API documentation. + + Args: + markdown: The markdown content to process. + + Returns: + Updated markdown with API reference links appended to Python code blocks. + + Example: + Given a markdown with a Python code block: + + ```python + from langchain.nlp import TextGenerator + ``` + This function will append an API reference link to the `TextGenerator` class from the `langchain.nlp` module if it's recognized. + """ + code_block_pattern = re.compile( + r'(?P[ \t]*)```(?Ppython|py)\n(?P.*?)\n(?P=indent)```', re.DOTALL + ) + + def replace_code_block(match: re.Match) -> str: + """Replace the matched code block with additional API reference links if imports are found. + + Args: + match (re.Match): The regex match object containing the code block. + + Returns: + str: The modified code block with API reference links appended if applicable. + """ + indent = match.group('indent') + code_block = match.group('code') + language = match.group('language') # Preserve the language from the regex match + # Retrieve import information from the code block + imports = get_imports(code_block, "__unused__") + + original_code_block = match.group(0) + # If no imports are found, return the original code block + if not imports: + return original_code_block + + # Generate API reference links for each import + api_links = ' | '.join( + f'{imp["imported"]}' for imp in imports + ) + # Return the code block with appended API reference links + return f'{original_code_block}\n\n{indent}API Reference: {api_links}' + + # Apply the replace_code_block function to all matches in the markdown + updated_markdown = code_block_pattern.sub(replace_code_block, markdown) + return updated_markdown \ No newline at end of file diff --git a/docs/_scripts/notebook_hooks.py b/docs/_scripts/notebook_hooks.py index b18e3dad5..8d3649bb1 100644 --- a/docs/_scripts/notebook_hooks.py +++ b/docs/_scripts/notebook_hooks.py @@ -7,6 +7,7 @@ from mkdocs.structure.pages import Page from notebook_convert import convert_notebook +from generate_api_reference_links import update_markdown_with_imports logger = logging.getLogger(__name__) logging.basicConfig() @@ -111,7 +112,11 @@ def on_page_markdown(markdown: str, page: Page, **kwargs: Dict[str, Any]): if page.file.src_path.endswith(".ipynb"): logger.info("Processing Jupyter notebook: %s", page.file.src_path) markdown = convert_notebook(page.file.abs_src_path) + else: + # Append API reference links to code blocks + # This logic is already applied for notebooks in `convert_notebook`. + # We add it here to apply it to regular markdown files. + markdown = update_markdown_with_imports(markdown) # Apply highlight comments to code blocks markdown = _highlight_code_blocks(markdown) - return markdown