Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

docs: sorted by downloads [wip] #28869

Merged
merged 7 commits into from
Dec 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
71 changes: 71 additions & 0 deletions docs/scripts/packages_yml_get_downloads.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
from datetime import datetime, timedelta, timezone
from pathlib import Path

import requests
from ruamel.yaml import YAML
from ruamel.yaml.comments import CommentedMap

yaml = YAML()

PACKAGE_YML = Path(__file__).parents[2] / "libs" / "packages.yml"


def _get_downloads(p: dict) -> int:
url = f"https://pypistats.org/api/packages/{p['name']}/recent?period=month"
r = requests.get(url)
r.raise_for_status()
return r.json()["data"]["last_month"]


current_datetime = datetime.now(timezone.utc)
yesterday = current_datetime - timedelta(days=1)

with open(PACKAGE_YML) as f:
data = yaml.load(f)


def _reorder_keys(p):
keys = p.keys()
key_order = [
"name",
"name_title",
"path",
"repo",
"type",
"provider_page",
"js",
"downloads",
"downloads_updated_at",
]
if set(keys) - set(key_order):
raise ValueError(f"Unexpected keys: {set(keys) - set(key_order)}")
return CommentedMap((k, p[k]) for k in key_order if k in p)


data["packages"] = [_reorder_keys(p) for p in data["packages"]]

seen = set()
for p in data["packages"]:
if p["name"] in seen:
raise ValueError(f"Duplicate package: {p['name']}")
seen.add(p["name"])
downloads_updated_at_str = p.get("downloads_updated_at")
downloads_updated_at = (
datetime.fromisoformat(downloads_updated_at_str)
if downloads_updated_at_str
else None
)

if downloads_updated_at is not None and downloads_updated_at > yesterday:
print(f"done: {p['name']}: {p['downloads']}")
continue

p["downloads"] = _get_downloads(p)
p["downloads_updated_at"] = current_datetime.isoformat()
with open(PACKAGE_YML, "w") as f:
yaml.dump(data, f)
print(f"{p['name']}: {p['downloads']}")


with open(PACKAGE_YML, "w") as f:
yaml.dump(data, f)
160 changes: 78 additions & 82 deletions docs/scripts/partner_pkg_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,110 +2,106 @@
import sys
from pathlib import Path

import requests
import yaml

#################
# CONFIGURATION #
#################

# packages to ignore / exclude from the table
IGNORE_PACKGAGES = {
# top-level packages
"langchain-core",
"langchain-text-splitters",
"langchain",
"langchain-community",
"langchain-experimental",
"langchain-cli",
"langchain-tests",
# integration packages that don't have a provider index
# do NOT add to these. These were merged before having a
# provider index was required
# can remove these once they have a provider index
"langchain-yt-dlp",
}

#####################
# END CONFIGURATION #
#####################

DOCS_DIR = Path(__file__).parents[1]
PACKAGE_YML = Path(__file__).parents[2] / "libs" / "packages.yml"
IGNORE_PACKGAGES = {"langchain-experimental"}

# for now, only include packages that are in the langchain-ai org
# because we don't have a policy for inclusion in this table yet,
# and including all packages will make the list too long
with open(PACKAGE_YML) as f:
data = yaml.safe_load(f)
EXTERNAL_PACKAGES = set(
p["name"][10:]
for p in data["packages"]
if p["repo"].startswith("langchain-ai/")
and p["repo"] != "langchain-ai/langchain"
and p["name"] not in IGNORE_PACKGAGES


def _get_type(package: dict) -> str:
if package["name"] in IGNORE_PACKGAGES:
return "ignore"
if package["repo"] == "langchain-ai/langchain":
return "B"
if package["repo"].startswith("langchain-ai/"):
return "C"
return "D"


def _enrich_package(p: dict) -> dict | None:
p["name_short"] = (
p["name"][10:] if p["name"].startswith("langchain-") else p["name"]
)
p["name_title"] = p.get("name_title") or p["name_short"].title().replace(
"-", " "
).replace("db", "DB").replace("Db", "DB").replace("ai", "AI").replace("Ai", "AI")
p["type"] = _get_type(p)

if p["type"] == "ignore":
return None

p["js_exists"] = bool(p.get("js"))
custom_provider_page = p.get("provider_page")
default_provider_page = f"/docs/integrations/providers/{p['name_short']}/"
default_provider_page_exists = bool(
glob.glob(str(DOCS_DIR / f"docs/integrations/providers/{p['name_short']}.*"))
)
IN_REPO_PACKAGES = set(
p["name"][10:]
for p in data["packages"]
if p["repo"] == "langchain-ai/langchain"
and p["path"].startswith("libs/partners")
and p["name"] not in IGNORE_PACKGAGES
p["provider_page"] = custom_provider_page or (
default_provider_page if default_provider_page_exists else None
)
if p["provider_page"] is None:
msg = (
f"Provider page not found for {p['name_short']}. "
f"Please add one at docs/integrations/providers/{p['name_short']}.{{mdx,ipynb}}"
)
raise ValueError(msg)

JS_PACKAGES = {
"google-gauth",
"openai",
"anthropic",
"google-genai",
"pinecone",
"aws",
"google-vertexai",
"qdrant",
"azure-dynamic-sessions",
"google-vertexai-web",
"redis",
"azure-openai",
"google-webauth",
"baidu-qianfan",
"groq",
"standard-tests",
"cloudflare",
"mistralai",
"textsplitters",
"cohere",
"mixedbread-ai",
"weaviate",
"mongodb",
"yandex",
"exa",
"nomic",
"google-common",
"ollama",
"ibm",
}
return p

ALL_PACKAGES = IN_REPO_PACKAGES.union(EXTERNAL_PACKAGES)

CUSTOM_NAME = {
"google-genai": "Google Generative AI",
"aws": "AWS",
"ibm": "IBM",
}
CUSTOM_PROVIDER_PAGES = {
"azure-dynamic-sessions": "/docs/integrations/providers/microsoft/",
"prompty": "/docs/integrations/providers/microsoft/",
"sqlserver": "/docs/integrations/providers/microsoft/",
"google-community": "/docs/integrations/providers/google/",
"google-genai": "/docs/integrations/providers/google/",
"google-vertexai": "/docs/integrations/providers/google/",
"nvidia-ai-endpoints": "/docs/integrations/providers/nvidia/",
"exa": "/docs/integrations/providers/exa_search/",
"mongodb": "/docs/integrations/providers/mongodb_atlas/",
"sema4": "/docs/integrations/providers/robocorp/",
"postgres": "/docs/integrations/providers/pgvector/",
}
PROVIDER_PAGES = {
name: f"/docs/integrations/providers/{name}/"
for name in ALL_PACKAGES
if glob.glob(str(DOCS_DIR / f"docs/integrations/providers/{name}.*"))
}
PROVIDER_PAGES = {
**PROVIDER_PAGES,
**CUSTOM_PROVIDER_PAGES,
}
with open(PACKAGE_YML) as f:
data = yaml.safe_load(f)

packages_n = [_enrich_package(p) for p in data["packages"]]
packages = [p for p in packages_n if p is not None]

# sort by downloads
packages_sorted = sorted(packages, key=lambda p: p["downloads"], reverse=True)


def package_row(name: str) -> str:
js = "✅" if name in JS_PACKAGES else "❌"
link = PROVIDER_PAGES.get(name)
title = CUSTOM_NAME.get(name) or name.title().replace("-", " ").replace(
"db", "DB"
).replace("Db", "DB").replace("ai", "AI").replace("Ai", "AI")
def package_row(p: dict) -> str:
js = "✅" if p["js_exists"] else "❌"
link = p["provider_page"]
title = p["name_title"]
provider = f"[{title}]({link})" if link else title
return f"| {provider} | [langchain-{name}](https://python.langchain.com/api_reference/{name.replace('-', '_')}/) | ![PyPI - Downloads](https://img.shields.io/pypi/dm/langchain-{name}?style=flat-square&label=%20&color=blue) | ![PyPI - Version](https://img.shields.io/pypi/v/langchain-{name}?style=flat-square&label=%20&color=orange) | {js} |"
return f"| {provider} | [{p['name']}](https://python.langchain.com/api_reference/{p['name_short'].replace('-', '_')}/) | ![PyPI - Downloads](https://img.shields.io/pypi/dm/{p['name']}?style=flat-square&label=%20&color=blue) | ![PyPI - Version](https://img.shields.io/pypi/v/{p['name']}?style=flat-square&label=%20&color=orange) | {js} |"


def table() -> str:
header = """| Provider | Package | Downloads | Latest | [JS](https://js.langchain.com/docs/integrations/providers/) |
| :--- | :---: | :---: | :---: | :---: |
"""
return header + "\n".join(package_row(name) for name in sorted(ALL_PACKAGES))
return header + "\n".join(package_row(p) for p in packages_sorted)


def doc() -> str:
Expand Down
Loading
Loading