refactor(download): remove asset/artifact utilities (MODFLOW-USGS#175)

Remove functions for listing assets and downloading artifacts. We can just use the GH CLI/API and/or download-artifact in CI workflows. There is always PyGithub if something similar is really needed.
wpbonelli · Jan 17, 2025 · 6957554 · 6957554
1 parent 5074b21
commit 6957554
Show file tree

Hide file tree

Showing 3 changed files with 4 additions and 302 deletions.
diff --git a/autotest/test_download.py b/autotest/test_download.py
@@ -3,10 +3,8 @@
 
 from modflow_devtools.download import (
     download_and_unzip,
-    download_artifact,
     get_release,
     get_releases,
-    list_artifacts,
 )
 from modflow_devtools.markers import requires_github
 
@@ -60,47 +58,6 @@ def test_get_release(repo):
         assert set(actual_names) >= set(expected_names)
 
 
-@flaky
-@requires_github
-@pytest.mark.parametrize("name", [None, "rtd-files", "run-time-comparison"])
-@pytest.mark.parametrize("per_page", [None, 100])
-def test_list_artifacts(name, per_page):
-    artifacts = list_artifacts(
-        "MODFLOW-USGS/modflow6",
-        name=name,
-        per_page=per_page,
-        max_pages=2,
-        verbose=True,
-    )
-
-    if any(artifacts) and name:
-        assert all(name == a["name"] for a in artifacts)
-
-
-@flaky
-@requires_github
-@pytest.mark.parametrize("delete_zip", [True, False])
-def test_download_artifact(function_tmpdir, delete_zip):
-    repo = "MODFLOW-USGS/modflow6"
-    artifacts = list_artifacts(repo, max_pages=1, verbose=True)
-    first = next(iter(artifacts), None)
-
-    if not first:
-        pytest.skip(f"No artifacts found for repo: {repo}")
-
-    artifact_id = first["id"]
-    download_artifact(
-        repo=repo,
-        id=artifact_id,
-        path=function_tmpdir,
-        delete_zip=delete_zip,
-        verbose=False,
-    )
-
-    assert len(list(function_tmpdir.rglob("*"))) >= (0 if delete_zip else 1)
-    assert any(list(function_tmpdir.rglob("*.zip"))) != delete_zip
-
-
 @flaky
 @requires_github
 @pytest.mark.parametrize("delete_zip", [True, False])

diff --git a/docs/md/download.md b/docs/md/download.md
@@ -1,6 +1,6 @@
 # Web utilities 
 
-Some utility functions are provided for common web requests. Most use the GitHub API to query information or download artifacts and assets. See this project's test cases (in particular `test_download.py`) for detailed usage examples.
+Some utility functions are provided for GitHub-related web requests. See this project's test cases (in particular `test_download.py`) for detailed usage examples.
 
 **Note:** to avoid GitHub API rate limits when using these functions, it is recommended to set the `GITHUB_TOKEN` environment variable. If this variable is set, the token will be borne on requests sent to the API.
 
@@ -18,56 +18,11 @@ assets = release["assets"]
 print([asset["name"] for asset in assets])
 ```
 
-This yields `['code.json', 'linux.zip', 'mac.zip', 'win64.zip']`.
-
-Equivalently, using the `get_release_assets()` function to list the latest release assets directly:
-
-```python
-from modflow_devtools.download import get_release_assets
-
-assets = get_release_assets("MODFLOW-USGS/executables")
-print([asset["name"] for asset in assets])
-```
-
-The `simple` parameter, defaulting to `False`, can be toggled to return a simple dictionary mapping asset names to download URLs:
-
-```python
-from pprint import pprint
-
-assets = get_release_assets("MODFLOW-USGS/executables", simple=True)
-pprint(assets)
-```
-
-This prints:
-
-```
-{'code.json': 'https://github.com/MODFLOW-USGS/executables/releases/download/12.0/code.json',
- 'linux.zip': 'https://github.com/MODFLOW-USGS/executables/releases/download/12.0/linux.zip',
- 'mac.zip': 'https://github.com/MODFLOW-USGS/executables/releases/download/12.0/mac.zip',
- 'win64.zip': 'https://github.com/MODFLOW-USGS/executables/releases/download/12.0/win64.zip'}
-```
+This prints `['code.json', 'linux.zip', 'mac.zip', 'win64.zip']`.
 
 ## Downloads
 
-The `download_artifact` function downloads and unzips the GitHub Actions artifact with the given ID to the given path, optionally deleting the zipfile afterwards. The `repo` format is `owner/name`, as in GitHub URLs. For instance:
-
-```python
-from modflow_devtools.download import list_artifacts, download_artifact
-
-repo = "MODFLOW-USGS/modflow6"
-artifacts = list_artifacts(repo, max_pages=1, verbose=True)
-artifact = next(iter(artifacts), None)
-if artifact:
-    download_artifact(
-        repo=repo,
-        id=artifact["id"],
-        path=function_tmpdir,
-        delete_zip=False,
-        verbose=False,
-    )
-```
-
-The `download_and_unzip` function is a more generic alternative for downloading and unzipping files from arbitrary URLs.
+The `download_and_unzip` function downloads and unzips zip files.
 
 For instance, to download a MODFLOW 6.4.1 Linux distribution and delete the zipfile after extracting:
 

diff --git a/modflow_devtools/download.py b/modflow_devtools/download.py
@@ -6,8 +6,7 @@
 import urllib.request
 from os import PathLike
 from pathlib import Path
-from typing import Optional, Union
-from uuid import uuid4
+from typing import Optional
 from warnings import warn
 
 from modflow_devtools.zip import MFZipFile
@@ -219,215 +218,6 @@ def get_latest_version(repo, retries=3, verbose=False) -> str:
     return release["tag_name"]
 
 
-def get_release_assets(
-    repo, tag="latest", simple=False, retries=3, verbose=False
-) -> Union[dict, list[dict]]:
-    """
-    Get assets corresponding to the given release.
-
-    Parameters
-    ----------
-    repo : str
-        The repository (format must be owner/name)
-    tag : str
-        The release tag to retrieve assets for
-    simple : bool
-        If True, return a dict mapping asset names to download URLs, otherwise (by
-        default) a list of dicts containing asset info as returned by the GitHub API
-    retries : int
-        The maximum number of retries for each request
-    verbose : bool
-        Whether to show verbose output
-
-    Returns
-    -------
-        A list of dicts if simple is False, one per release asset.
-        If simple is True, a dict mapping asset names to download URLs.
-    """
-
-    if "/" not in repo:
-        raise ValueError("repo format must be owner/name")
-
-    if not isinstance(tag, str) or not any(tag):
-        raise ValueError("tag must be a non-empty string")
-
-    if not isinstance(retries, int) or retries < 1:
-        raise ValueError("retries must be a positive int")
-
-    release = get_release(repo, tag=tag, retries=retries, verbose=verbose)
-    return (
-        {a["name"]: a["browser_download_url"] for a in release["assets"]}
-        if simple
-        else release["assets"]
-    )
-
-
-def list_artifacts(
-    repo, name=None, per_page=30, max_pages=10, retries=3, verbose=False
-) -> list[dict]:
-    """
-    List artifacts for the given repository, optionally filtering by name (exact match).
-    If more artifacts are available than will fit within the given page size, by default
-    requests are made until all artifacts are retrieved. The number of requests made can
-    be limited with the max_pages parameter.
-
-    Parameters
-    ----------
-    repo : str
-        The repository (format must be owner/name)
-    name : str
-        The artifact name (must be an exact match)
-    per_page : int
-        The number of artifacts to return per page (must be between 1-100, inclusive)
-    max_pages : int
-        The maximum number of pages to retrieve (i.e. the number of requests to make)
-    retries : int
-        The maximum number of retries for each request
-    verbose : bool
-        Whether to show verbose output
-
-    Returns
-    -------
-        A list of dictionaries, each containing information
-        about an artifact as returned by the GitHub API.
-    """
-
-    if "/" not in repo:
-        raise ValueError("repo format must be owner/name")
-
-    if not isinstance(retries, int) or retries < 1:
-        raise ValueError("retries must be a positive int")
-
-    msg = f"artifact(s) for {repo}" + (f" matching name {name}" if name else "")
-    req_url = f"https://api.github.com/repos/{repo}/actions/artifacts"
-    page = 1
-    params = {}
-
-    if name is not None:
-        if not isinstance(name, str) or len(name) == 0:
-            raise ValueError("name must be a non-empty string")
-        params["name"] = name
-
-    if per_page is not None:
-        if per_page < 1 or per_page > 100:
-            raise ValueError("per_page must be between 1 and 100")
-        params["per_page"] = int(per_page)
-
-    def get_response_json():
-        tries = 0
-        params["page"] = page
-        request = get_request(req_url, params=params)
-        while True:
-            tries += 1
-            try:
-                if verbose:
-                    print(f"Fetching {msg} (page {page}, {per_page} per page)")
-                with urllib.request.urlopen(request, timeout=10) as resp:
-                    return json.loads(resp.read().decode())
-            except urllib.error.HTTPError as err:
-                if err.code == 401 and os.environ.get("GITHUB_TOKEN"):
-                    raise ValueError("GITHUB_TOKEN env is invalid") from err
-                elif err.code == 403 and "rate limit exceeded" in err.reason:
-                    raise ValueError(
-                        f"use GITHUB_TOKEN env to bypass rate limit ({err})"
-                    ) from err
-                elif err.code in (404, 503) and tries < retries:
-                    # GitHub sometimes returns this error for valid URLs, so retry
-                    warn(f"URL request try {tries} failed ({err})")
-                    continue
-                raise RuntimeError(f"cannot retrieve data from {req_url}") from err
-
-    artifacts = []
-    diff = 1
-    max_pages = max_pages if max_pages else sys.maxsize
-    while diff > 0 and page <= max_pages:
-        result = get_response_json()
-        total = result["total_count"]
-        if page == 1:
-            print(f"Repo {repo} has {total} artifact(s)")
-
-        page += 1
-        artifacts.extend(result["artifacts"])
-        diff = total - len(artifacts)
-
-    if verbose:
-        print(f"Found {len(artifacts)} {msg}")
-
-    return artifacts
-
-
-def download_artifact(
-    repo,
-    id,
-    path: Optional[PathLike] = None,
-    delete_zip=True,
-    retries=3,
-    verbose=False,
-):
-    """
-    Download and unzip a GitHub Actions artifact, selected by its ID.
-
-    Parameters
-    ----------
-    repo : str
-        The repository (format must be owner/name)
-    id : str
-        The artifact ID
-    path : PathLike
-        Path where the zip file will be saved (default is current path)
-    delete_zip : bool
-        Whether the zip file should be deleted after it is unzipped (default is True)
-    retries : int
-        The maximum number of retries for each request
-    verbose : bool
-        Whether to show verbose output
-    """
-
-    if "/" not in repo:
-        raise ValueError("repo format must be owner/name")
-
-    if not isinstance(retries, int) or retries < 1:
-        raise ValueError("retries must be a positive int")
-
-    req_url = f"https://api.github.com/repos/{repo}/actions/artifacts/{id}/zip"
-    request = urllib.request.Request(req_url)
-    if "github.com" in req_url:
-        github_token = os.environ.get("GITHUB_TOKEN", None)
-        if github_token:
-            request.add_header("Authorization", f"Bearer {github_token}")
-
-    zip_path = Path(path).expanduser().absolute() / f"{uuid4()!s}.zip"
-    tries = 0
-    while True:
-        tries += 1
-        try:
-            with (
-                urllib.request.urlopen(request) as url_file,
-                zip_path.open("wb") as out_file,
-            ):
-                content = url_file.read()
-                out_file.write(content)
-                break
-        except urllib.error.HTTPError as err:
-            if tries < retries:
-                warn(f"URL request try {tries} failed ({err})")
-                continue
-            else:
-                raise RuntimeError(f"cannot retrieve data from {req_url}") from err
-
-    if verbose:
-        print(f"Uncompressing: {zip_path}")
-
-    z = MFZipFile(zip_path)
-    z.extractall(str(path))
-    z.close()
-
-    if delete_zip:
-        if verbose:
-            print(f"Deleting zipfile {zip_path}")
-        zip_path.unlink()
-
-
 def download_and_unzip(
     url: str,
     path: Optional[PathLike] = None,