Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Query objects.inv from homepage url #40

Open
wants to merge 13 commits into
base: master
Choose a base branch
from
55 changes: 31 additions & 24 deletions seed_intersphinx_mapping/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,13 +33,14 @@
# stdlib
import functools
import json
import os.path
import re
from typing import Dict, Optional, Tuple, Union
from typing import Dict, List, Optional, Tuple, Union
from urllib.parse import urlparse

# 3rd party
import dist_meta
import requests
from dist_meta.metadata_mapping import MetadataMapping
from domdf_python_tools.compat import importlib_resources
from domdf_python_tools.utils import stderr_writer
from packaging.requirements import Requirement
Expand All @@ -60,7 +61,7 @@
_DOCUMENTATION_RE = re.compile(r"^[dD]oc(s|umentation)")


def _get_project_links(project_name: str) -> MetadataMapping:
def _get_project_links(project_name: str) -> List[str]:
"""
Returns the web links for the given project.

Expand All @@ -69,29 +70,36 @@ def _get_project_links(project_name: str) -> MetadataMapping:
:param project_name:
"""

urls = MetadataMapping()
urls = []

# Try a local package first
try:
dist = dist_meta.distributions.get_distribution(project_name)
raw_urls = dist.get_metadata().get_all("Project-URL", default=())
metadata = dist.get_metadata()
raw_urls = metadata.get_all("Project-URL", default=())

for url in raw_urls:
label, url, *_ = map(str.strip, url.split(','))
label, url = url.split(',', 1)
if _DOCUMENTATION_RE.match(label):
urls[label] = url
urls.append(url)

urls.append(metadata.get("Home-Page", ''))

except dist_meta.distributions.DistributionNotFoundError:
# Fall back to PyPI

with PyPIJSON() as client:
metadata = client.get_metadata(project_name).info
pypi_metadata = client.get_metadata(project_name).info

if "project_urls" in pypi_metadata and pypi_metadata["project_urls"]:

for label, url in pypi_metadata["project_urls"].items():
if _DOCUMENTATION_RE.match(label):
urls.append(url)

if "project_urls" in metadata and metadata["project_urls"]:
for label, url in metadata["project_urls"].items():
if _DOCUMENTATION_RE.match(label):
urls[label] = url
urls.append(pypi_metadata["home_page"])

urls = [url.strip() for url in filter(None, urls)]
return urls


Expand Down Expand Up @@ -127,25 +135,24 @@ def get_sphinx_doc_url(pypi_name: str) -> str:
:exc:`apeye.slumber_url.exceptions.HttpNotFoundError` if the project could not be found on PyPI.
"""

for key, value in _get_project_links(pypi_name).items():

docs_urls = []
for value in _get_project_links(pypi_name):
# Follow redirects to get actual URL
r = requests.head(value, allow_redirects=True, timeout=10)
if r.status_code != 200: # pragma: no cover
raise ValueError(f"Documentation URL not found: HTTP Status {r.status_code}.")

docs_url = r.url
if r.status_code == 200:
has_extension = os.path.splitext(urlparse(r.url).path)[-1]
url = os.path.dirname(r.url) if has_extension else r.url
docs_urls.append(url)

if docs_url.endswith('/'):
objects_inv_url = f"{docs_url}objects.inv"
else: # pragma: no cover
objects_inv_url = f"{docs_url}/objects.inv"
for docs_url in docs_urls:
objects_inv_url = f"{docs_url.rstrip('/')}/objects.inv"

r = requests.head(objects_inv_url, allow_redirects=True, timeout=10)
if r.status_code != 200:
raise ValueError(f"objects.inv not found at url {objects_inv_url}: HTTP Status {r.status_code}.")

return docs_url
stderr_writer(f"WARNING: objects.inv not found at url {objects_inv_url}: HTTP Status {r.status_code}.")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm reluctant to accept this change as get_sphinx_doc_url is a public function that other modules can use to query documentation URLs. I'd like the caller to still be able to catch the ValueError and display the message to the user themselves.

If you're wanting to make the warning on line 197 more specific -- to say why the documentation URL couldn't be obtained -- how about including the exception message text? E.g.:

except (ValueError, requests.exceptions.ConnectionError, requests.exceptions.Timeout) as e:
	# Couldn't get it from PyPI, trying fallback mapping
	if project_name in fallback_mapping():
		doc_url = fallback_mapping()[project_name]
		intersphinx_mapping[project_name] = (doc_url, None)
	else:
        stderr_writer(f"WARNING: Unable to determine documentation url for project {project_name}: {str(e)}")

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I understand your point, removing the raise was here to ensure all URLs are queried until one is good.
Due to the addition of the home-page URL in docs_urls, they may be more than one URL to iterate over. Keeping the raise makes get_sphinx_doc_url always return or fail on the first one, so if the documentation URL is bad, but homepage is good, it will not work.

else:
return docs_url

raise ValueError("Documentation URL not found in data from PyPI.")

Expand Down
2 changes: 1 addition & 1 deletion seed_intersphinx_mapping/fallback_mapping.json
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
"scikit-learn": "https://scikit-learn.org/stable/",
"six": "https://six.readthedocs.io/",
"slumber": "https://slumber.readthedocs.io/en/v0.6.0/",
"sphinx": "https://www.sphinx-doc.org/en/3.x/",
"sphinx": "https://www.sphinx-doc.org/en/master/",
"typing": "https://docs.python.org/3/",
"typing-extensions": "https://typing-extensions.readthedocs.io/en/latest/",
"typing_extensions": "https://typing-extensions.readthedocs.io/en/latest/",
Expand Down
7 changes: 2 additions & 5 deletions tests/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ def test_get_sphinx_doc_url():
with pytest.raises(ValueError, match="Documentation URL not found in data from PyPI."):
get_sphinx_doc_url("slumber")

with pytest.raises(ValueError, match="objects.inv not found at url .*: HTTP Status 404"):
with pytest.raises(ValueError, match="Documentation URL not found in data from PyPI."):
get_sphinx_doc_url("isort")

assert cache.clear(get_sphinx_doc_url)
Expand All @@ -35,10 +35,7 @@ def test_get_sphinx_doc_url():

if sys.version_info[:2] != (3, 8):
# Latest numpy's "Documentation" url doesn't point to Sphinx docs.
with pytest.raises(
ValueError,
match="objects.inv not found at url https://numpy.org/doc/objects.inv: HTTP Status 404."
):
with pytest.raises(ValueError, match="Documentation URL not found in data from PyPI."):
get_sphinx_doc_url("numpy")
else:
assert re.match(r"https://numpy\.org/doc/1\.\d\d/", get_sphinx_doc_url("numpy"))
Expand Down
15 changes: 8 additions & 7 deletions tests/test_seeding.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,12 @@
"packaging": ("https://packaging.pypa.io/en/stable/", None),
"requests": ("https://requests.readthedocs.io/en/latest/", None),
"slumber": ("https://slumber.readthedocs.io/en/v0.6.0/", None),
"sphinx": ("https://www.sphinx-doc.org/en/3.x/", None),
"sphinx": ("https://www.sphinx-doc.org/en/master/", None),
}
bad_expected_mapping = {
"domdf-python-tools": ("https://domdf-python-tools.readthedocs.io/en/latest/", None),
"packaging": ("https://packaging.pypa.io/en/stable/", None),
"sphinx": ("https://www.sphinx-doc.org/en/3.x/", None),
"sphinx": ("https://www.sphinx-doc.org/en/master/", None),
}


Expand All @@ -50,12 +50,12 @@ def test_seed_intersphinx_mapping(

assert seed_intersphinx_mapping(*parse_requirements_txt(tmp_pathplus)) == expects
err = capsys.readouterr().err
assert err == "WARNING: Unable to determine documentation url for project sphinxcontrib-domaintools\n"
assert "WARNING: Unable to determine documentation url for project sphinxcontrib-domaintools\n" in err

requirements, comments, invalid = read_requirements(tmp_pathplus / "requirements.txt", include_invalid=True)
assert seed_intersphinx_mapping(*requirements) == expects
err = capsys.readouterr().err
assert err == "WARNING: Unable to determine documentation url for project sphinxcontrib-domaintools\n"
assert "WARNING: Unable to determine documentation url for project sphinxcontrib-domaintools\n" in err


@pytest.mark.parametrize(
Expand All @@ -70,7 +70,7 @@ def test_seed_intersphinx_mapping_pyproject(tmp_pathplus: PathPlus, contents: st

assert seed_intersphinx_mapping(*parse_pyproject_toml(tmp_pathplus)) == expects
err = capsys.readouterr().err
assert err == "WARNING: Unable to determine documentation url for project sphinxcontrib-domaintools\n"
assert err.endswith("WARNING: Unable to determine documentation url for project sphinxcontrib-domaintools\n")


@pytest.mark.parametrize(
Expand All @@ -85,7 +85,7 @@ def test_seed_intersphinx_mapping_flit(tmp_pathplus: PathPlus, contents: str, ex

assert seed_intersphinx_mapping(*parse_flit_requirements(tmp_pathplus)) == expects
err = capsys.readouterr().err
assert err == "WARNING: Unable to determine documentation url for project sphinxcontrib-domaintools\n"
assert err.endswith("WARNING: Unable to determine documentation url for project sphinxcontrib-domaintools\n")


@pytest.mark.parametrize("pkg_requirements_source", ["requirements", "flit", "pyproject", "pyproject.toml"])
Expand Down Expand Up @@ -125,7 +125,8 @@ def test_sphinx_seed_intersphinx_mapping_mocked(
advanced_data_regression.check(config.intersphinx_mapping)

err = capsys.readouterr().err
assert err == "WARNING: Unable to determine documentation url for project sphinxcontrib-domaintools\n"

assert err.endswith("WARNING: Unable to determine documentation url for project sphinxcontrib-domaintools\n")


def test_sphinx_seed_intersphinx_mapping_list_mocked(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,5 +16,5 @@ slumber:
- - null
sphinx:
- sphinx
- - https://www.sphinx-doc.org/en/3.x/
- - https://www.sphinx-doc.org/en/master/
- - null
Original file line number Diff line number Diff line change
Expand Up @@ -8,5 +8,5 @@ packaging:
- - null
sphinx:
- sphinx
- - https://www.sphinx-doc.org/en/3.x/
- - https://www.sphinx-doc.org/en/master/
- - null
Original file line number Diff line number Diff line change
Expand Up @@ -8,5 +8,5 @@ packaging:
- - null
sphinx:
- sphinx
- - https://www.sphinx-doc.org/en/3.x/
- - https://www.sphinx-doc.org/en/master/
- - null
Original file line number Diff line number Diff line change
Expand Up @@ -8,5 +8,5 @@ packaging:
- - null
sphinx:
- sphinx
- - https://www.sphinx-doc.org/en/3.x/
- - https://www.sphinx-doc.org/en/master/
- - null
Original file line number Diff line number Diff line change
Expand Up @@ -8,5 +8,5 @@ packaging:
- - null
sphinx:
- sphinx
- - https://www.sphinx-doc.org/en/3.x/
- - https://www.sphinx-doc.org/en/master/
- - null
Original file line number Diff line number Diff line change
Expand Up @@ -16,5 +16,5 @@ slumber:
- - null
sphinx:
- sphinx
- - https://www.sphinx-doc.org/en/3.x/
- - https://www.sphinx-doc.org/en/master/
- - null
Original file line number Diff line number Diff line change
Expand Up @@ -16,5 +16,5 @@ slumber:
- - null
sphinx:
- sphinx
- - https://www.sphinx-doc.org/en/3.x/
- - https://www.sphinx-doc.org/en/master/
- - null
Original file line number Diff line number Diff line change
Expand Up @@ -16,5 +16,5 @@ slumber:
- - null
sphinx:
- sphinx
- - https://www.sphinx-doc.org/en/3.x/
- - https://www.sphinx-doc.org/en/master/
- - null
Original file line number Diff line number Diff line change
Expand Up @@ -16,5 +16,5 @@ slumber:
- - null
sphinx:
- sphinx
- - https://www.sphinx-doc.org/en/3.x/
- - https://www.sphinx-doc.org/en/master/
- - null
Loading