Skip to content

Commit

Permalink
docs: improve api reference generation in code blocks (#2857)
Browse files Browse the repository at this point in the history
Generate api references from any markdown -- this will handle markdown
files in notebooks as well as code blocks in plain markdown
  • Loading branch information
eyurtsev authored Dec 23, 2024
2 parents 9e31b82 + 5183484 commit 4b0c53f
Show file tree
Hide file tree
Showing 3 changed files with 77 additions and 38 deletions.
107 changes: 73 additions & 34 deletions docs/_scripts/generate_api_reference_links.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,8 @@
(["langgraph.graph"], "langgraph.constants", "END", "constants"),
(["langgraph.constants"], "langgraph.types", "Send", "types"),
(["langgraph.constants"], "langgraph.types", "Interrupt", "types"),
(["langgraph.constants"], "langgraph.types", "interrupt", "types"),
(["langgraph.constants"], "langgraph.types", "Command", "types"),
([], "langgraph.types", "RetryPolicy", "types"),
([], "langgraph.checkpoint.base", "Checkpoint", "checkpoints"),
([], "langgraph.checkpoint.base", "CheckpointMetadata", "checkpoints"),
Expand Down Expand Up @@ -115,10 +117,10 @@ def _get_doc_title(data: str, file_name: str) -> str:


class ImportInformation(TypedDict):
imported: str # imported class name
source: str # module path
docs: str # URL to the documentation
title: str # Title of the document
imported: str # The name of the class that was imported.
source: str # The full module path from which the class was imported.
docs: str # The URL pointing to the class's documentation.
title: str # The title of the document where the import is used.


def _get_imports(
Expand Down Expand Up @@ -211,36 +213,73 @@ def _get_imports(
return imports


class ImportPreprocessor(Preprocessor):
"""A preprocessor to replace imports in each Python code cell with links to their
documentation and append the import info in a comment."""
def get_imports(code: str, doc_title: str) -> List[ImportInformation]:
"""Retrieve all import references from the given code for specified ecosystems.
def preprocess(self, nb, resources):
self.all_imports = []
file_name = os.path.basename(resources.get("metadata", {}).get("name", ""))
_DOC_TITLE = _get_doc_title(nb.cells[0].source, file_name)
Args:
code: The source code from which to extract import references.
doc_title: The documentation title associated with the code.
cells = []
for cell in nb.cells:
if cell.cell_type == "code":
cells.append(cell)
imports = _get_imports(
cell.source, _DOC_TITLE, "langchain"
) + _get_imports(cell.source, _DOC_TITLE, "langgraph")
if not imports:
continue
Returns:
A list of import information for each import found.
"""
ecosystems = ["langchain", "langgraph"]
all_imports = []
for package_ecosystem in ecosystems:
all_imports.extend(_get_imports(code, doc_title, package_ecosystem))
return all_imports

cells.append(
nbformat.v4.new_markdown_cell(
source=f"""
<div>
<b>API Reference:</b>
{' | '.join(f'<a href="{imp["docs"]}">{imp["imported"]}</a>' for imp in imports)}
</div>
"""
)
)
else:
cells.append(cell)
nb.cells = cells
return nb, resources

def update_markdown_with_imports(markdown: str) -> str:
"""Update markdown to include API reference links for imports in Python code blocks.
This function scans the markdown content for Python code blocks, extracts any imports, and appends links to their API documentation.
Args:
markdown: The markdown content to process.
Returns:
Updated markdown with API reference links appended to Python code blocks.
Example:
Given a markdown with a Python code block:
```python
from langchain.nlp import TextGenerator
```
This function will append an API reference link to the `TextGenerator` class from the `langchain.nlp` module if it's recognized.
"""
code_block_pattern = re.compile(
r'(?P<indent>[ \t]*)```(?P<language>python|py)\n(?P<code>.*?)\n(?P=indent)```', re.DOTALL
)

def replace_code_block(match: re.Match) -> str:
"""Replace the matched code block with additional API reference links if imports are found.
Args:
match (re.Match): The regex match object containing the code block.
Returns:
str: The modified code block with API reference links appended if applicable.
"""
indent = match.group('indent')
code_block = match.group('code')
language = match.group('language') # Preserve the language from the regex match
# Retrieve import information from the code block
imports = get_imports(code_block, "__unused__")

original_code_block = match.group(0)
# If no imports are found, return the original code block
if not imports:
return original_code_block

# Generate API reference links for each import
api_links = ' | '.join(
f'<a href="{imp["docs"]}">{imp["imported"]}</a>' for imp in imports
)
# Return the code block with appended API reference links
return f'{original_code_block}\n\n{indent}API Reference: {api_links}'

# Apply the replace_code_block function to all matches in the markdown
updated_markdown = code_block_pattern.sub(replace_code_block, markdown)
return updated_markdown
3 changes: 0 additions & 3 deletions docs/_scripts/notebook_convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,6 @@
from nbconvert.exporters import MarkdownExporter
from nbconvert.preprocessors import Preprocessor

from generate_api_reference_links import ImportPreprocessor


class EscapePreprocessor(Preprocessor):
def preprocess_cell(self, cell, resources, cell_index):
Expand Down Expand Up @@ -107,7 +105,6 @@ def preprocess_cell(self, cell, resources, cell_index):
preprocessors=[
EscapePreprocessor,
ExtractAttachmentsPreprocessor,
ImportPreprocessor,
],
template_name="mdoutput",
extra_template_basedirs=[
Expand Down
5 changes: 4 additions & 1 deletion docs/_scripts/notebook_hooks.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from mkdocs.structure.pages import Page

from notebook_convert import convert_notebook
from generate_api_reference_links import update_markdown_with_imports

logger = logging.getLogger(__name__)
logging.basicConfig()
Expand Down Expand Up @@ -111,7 +112,9 @@ def on_page_markdown(markdown: str, page: Page, **kwargs: Dict[str, Any]):
if page.file.src_path.endswith(".ipynb"):
logger.info("Processing Jupyter notebook: %s", page.file.src_path)
markdown = convert_notebook(page.file.abs_src_path)

# Append API reference links to code blocks
markdown = update_markdown_with_imports(markdown)
# Apply highlight comments to code blocks
markdown = _highlight_code_blocks(markdown)

return markdown

0 comments on commit 4b0c53f

Please sign in to comment.