From dedd780825ebde01e19948b0b6520ef882a0f299 Mon Sep 17 00:00:00 2001 From: "E. Evstafiev" Date: Sat, 4 Jan 2025 09:31:11 +0000 Subject: [PATCH] feat: add langchain-pull-md document loader documentation --- .../document_loaders/pull_md.ipynb | 131 ++++++++++++++++++ libs/packages.yml | 4 + 2 files changed, 135 insertions(+) create mode 100644 docs/docs/integrations/document_loaders/pull_md.ipynb diff --git a/docs/docs/integrations/document_loaders/pull_md.ipynb b/docs/docs/integrations/document_loaders/pull_md.ipynb new file mode 100644 index 0000000000000..401719c4aa6f5 --- /dev/null +++ b/docs/docs/integrations/document_loaders/pull_md.ipynb @@ -0,0 +1,131 @@ +{ + "cells": [ + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "---\n", + "sidebar_label: PullMdLoader\n", + "---" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# PullMdLoader\n", + "\n", + "Loader for converting URLs into Markdown using the pull.md service.\n", + "\n", + "This package implements a [document loader](/docs/concepts/document_loaders/) for web content. Unlike traditional web scrapers, PullMdLoader can handle web pages built with dynamic JavaScript frameworks like React, Angular, or Vue.js, converting them into Markdown without local rendering.\n", + "\n", + "## Overview\n", + "### Integration details\n", + "\n", + "| Class | Package | Local | Serializable | JS Support |\n", + "| :--- | :--- | :---: | :---: | :---: |\n", + "| PullMdLoader | langchain-pull-md | ✅ | ✅ | ❌ |\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Setup\n", + "\n", + "### Installation\n", + "\n", + "```bash\n", + "pip install langchain-pull-md\n", + "```" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Initialization" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "from langchain_pull_md.markdown_loader import PullMdLoader\n", + "\n", + "# Instantiate the loader with a URL\n", + "loader = PullMdLoader(url='https://example.com')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Load" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "documents = loader.load()" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'source': 'https://example.com',\n", + " 'page_content': '# Example Domain\\nThis domain is used for illustrative examples in documents. You may use this domain in literature without prior coordination or asking for permission.'}" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "documents[0].metadata" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## API Reference:\n", + "\n", + "- [GitHub](https://github.com/chigwell/langchain-pull-md)\n", + "- [PyPi](https://pypi.org/project/langchain-pull-md/)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.12" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/libs/packages.yml b/libs/packages.yml index 34d8184fad178..5fd27064a365f 100644 --- a/libs/packages.yml +++ b/libs/packages.yml @@ -320,4 +320,8 @@ packages: - name: langchain-dappier path: . repo: DappierAI/langchain-dappier + downloads: 0 +- name: langchain-pull-md + path: . + repo: chigwell/langchain-pull-md downloads: 0 \ No newline at end of file