diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index c883b2e..a9d2584 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -49,7 +49,12 @@ jobs: pixi add "python=${{ matrix.python-version }}.*=*cpython*" pixi run dev pixi run conda info + - name: Configure conda + run: | + echo "channels: [conda-forge]" > .pixi/envs/default/.condarc + - name: Patch history file (temporary) + run: echo "//fix" > .pixi/envs/default/conda-meta/history - name: Run tests - run: pixi run test + run: pixi run test --basetemp=${{ runner.os == 'Windows' && 'D:\\temp' || runner.temp }} - name: Build recipe run: pixi run build diff --git a/conda_pip/cli.py b/conda_pip/cli.py index f4df09a..7e11703 100644 --- a/conda_pip/cli.py +++ b/conda_pip/cli.py @@ -1,6 +1,8 @@ """ conda pip subcommand for CLI """ +from __future__ import annotations + import argparse import os import sys @@ -17,6 +19,8 @@ def configure_parser(parser: argparse.ArgumentParser): + from .dependencies import BACKENDS + add_parser_help(parser) add_parser_prefix(parser) add_output_and_prompt_options(parser) @@ -51,14 +55,22 @@ def configure_parser(parser: argparse.ArgumentParser): default="conda-forge", help="Where to look for conda dependencies.", ) + install.add_argument( + "--backend", + metavar="TOOL", + default="pip", + choices=BACKENDS, + help="Which tool to use for PyPI packaging dependency resolution.", + ) install.add_argument("packages", metavar="package", nargs="+") -def execute(args: argparse.Namespace) -> None: +def execute(args: argparse.Namespace) -> int: from conda.common.io import Spinner from conda.models.match_spec import MatchSpec from .dependencies import analyze_dependencies - from .main import (validate_target_env, get_prefix, ensure_externally_managed, run_conda_install, run_pip_install,) + from .main import (validate_target_env, ensure_externally_managed, run_conda_install, run_pip_install) + from .utils import get_prefix prefix = get_prefix(args.prefix, args.name) packages_not_installed = validate_target_env(prefix, args.packages) @@ -73,6 +85,9 @@ def execute(args: argparse.Namespace) -> None: *packages_to_process, prefer_on_conda=not args.force_with_pip, channel=args.conda_channel, + backend=args.backend, + prefix=prefix, + force_reinstall=args.force_reinstall, ) conda_match_specs = [] @@ -104,7 +119,11 @@ def execute(args: argparse.Namespace) -> None: print(" -", spec) if not args.yes and not args.json: - confirm_yn(dry_run=args.dry_run) + if conda_match_specs or pypi_specs: + confirm_yn(dry_run=False) # we let conda handle the dry-run exit below + else: + print("Nothing to do.", file=sys.stderr) + return 0 if conda_match_specs: if not args.quiet or not args.json: diff --git a/conda_pip/dependencies/__init__.py b/conda_pip/dependencies/__init__.py new file mode 100644 index 0000000..49ba314 --- /dev/null +++ b/conda_pip/dependencies/__init__.py @@ -0,0 +1,155 @@ +""" """ + +from __future__ import annotations + +import os +from collections import defaultdict +from logging import getLogger +from functools import lru_cache +from io import BytesIO +from typing import Literal + +import requests +from conda.models.match_spec import MatchSpec +from conda_libmamba_solver.index import LibMambaIndexHelper as Index +from ruamel.yaml import YAML + +yaml = YAML(typ="safe") +logger = getLogger(f"conda.{__name__}") + +BACKENDS = ( + "grayskull", + "pip", +) +NAME_MAPPINGS = { + "grayskull": "https://github.com/conda/grayskull/raw/main/grayskull/strategy/config.yaml", + "cf-graph-countyfair": "https://github.com/regro/cf-graph-countyfair/raw/master/mappings/pypi/grayskull_pypi_mapping.yaml", +} + + +def analyze_dependencies( + *packages: str, + prefer_on_conda: bool = True, + channel: str = "conda-forge", + backend: Literal["grayskull", "pip"] = "pip", + prefix: str | os.PathLike | None = None, + force_reinstall: bool = False, +) -> tuple[dict[str, list[str]], dict[str, list[str]]]: + conda_deps = defaultdict(list) + needs_analysis = [] + for package in packages: + match_spec = MatchSpec(package) + pkg_name = match_spec.name + # pkg_version = match_spec.version + if prefer_on_conda and _is_pkg_on_conda(pkg_name, channel=channel): + # TODO: check if version is available too + logger.info("Package %s is available on %s. Skipping analysis.", pkg_name, channel) + conda_spec = _pypi_spec_to_conda_spec(package) + conda_deps[pkg_name].append(conda_spec) + continue + needs_analysis.append(package) + + if not needs_analysis: + return conda_deps, {} + + if backend == "grayskull": + from .grayskull import _analyze_with_grayskull + + found_conda_deps, pypi_deps = _analyze_with_grayskull( + *needs_analysis, prefer_on_conda=prefer_on_conda, channel=channel + ) + elif backend == "pip": + from .pip import _analyze_with_pip + + python_deps, pypi_deps = _analyze_with_pip( + *needs_analysis, + prefix=prefix, + force_reinstall=force_reinstall, + ) + found_conda_deps, pypi_deps = _classify_dependencies( + pypi_deps, + prefer_on_conda=prefer_on_conda, + channel=channel, + ) + found_conda_deps.update(python_deps) + else: + raise ValueError(f"Unknown backend {backend}") + + for name, specs in found_conda_deps.items(): + conda_deps[name].extend(specs) + + # deduplicate + conda_deps = {name: list(dict.fromkeys(specs)) for name, specs in conda_deps.items()} + pypi_deps = {name: list(dict.fromkeys(specs)) for name, specs in pypi_deps.items()} + return conda_deps, pypi_deps + + +def _classify_dependencies( + deps_from_pypi: dict[str, list[str]], + prefer_on_conda: bool = True, + channel: str = "conda-forge", +) -> tuple[dict[str, list[str]], dict[str, list[str]]]: + pypi_deps = defaultdict(list) + conda_deps = defaultdict(list) + for depname, deps in deps_from_pypi.items(): + if prefer_on_conda and _is_pkg_on_conda(depname, channel=channel): + conda_depname = _pypi_spec_to_conda_spec(depname, channel=channel).name + deps_mapped_to_conda = [_pypi_spec_to_conda_spec(dep, channel=channel) for dep in deps] + conda_deps[conda_depname].extend(deps_mapped_to_conda) + else: + pypi_deps[depname].extend(deps) + return conda_deps, pypi_deps + + +@lru_cache(maxsize=None) +def _is_pkg_on_conda(pypi_spec: str, channel: str = "conda-forge"): + """ + Given a PyPI spec (name, version), try to find it on conda-forge. + """ + conda_spec = _pypi_spec_to_conda_spec(pypi_spec) + index = Index(channels=[channel]) + records = index.search(conda_spec) + return bool(records) + + +@lru_cache(maxsize=None) +def _pypi_to_conda_mapping(source="grayskull"): + try: + url = NAME_MAPPINGS[source] + except KeyError as exc: + raise ValueError(f"Invalid source {source}. Allowed: {NAME_MAPPINGS.keys()}") from exc + r = requests.get(url) + try: + r.raise_for_status() + except requests.HTTPError as exc: + logger.debug("Could not fetch mapping %s", url, exc_info=exc) + return {} + stream = BytesIO(r.content) + stream.seek(0) + return yaml.load(stream) + + +@lru_cache(maxsize=None) +def _pypi_spec_to_conda_spec(spec: str, channel: str = "conda-forge"): + """ + Tries to find the conda equivalent of a PyPI name. For that it relies + on known mappings (see `_pypi_to_conda_mapping`). If the PyPI name is + not found in any of the mappings, we assume the name is the same. + + Note that we don't currently have a way to disambiguate two different + projects that have the same name in PyPI and conda-forge (e.g. quetz, pixi). + We could improve this with API calls to metadata servers and compare sources, + but this is not currently implemented or even feasible. + """ + assert channel == "conda-forge", "Only channel=conda-forge is supported for now" + match_spec = MatchSpec(spec) + conda_name = pypi_name = match_spec.name + for source in NAME_MAPPINGS: + mapping = _pypi_to_conda_mapping(source) + if not mapping: + continue + entry = mapping.get(pypi_name, {}) + conda_name = entry.get("conda_forge") or entry.get("conda_name") or pypi_name + if conda_name != pypi_name: # we found a match! + return str(MatchSpec(match_spec, name=conda_name)) + return spec diff --git a/conda_pip/dependencies.py b/conda_pip/dependencies/grayskull.py similarity index 78% rename from conda_pip/dependencies.py rename to conda_pip/dependencies/grayskull.py index 856463f..16725f2 100644 --- a/conda_pip/dependencies.py +++ b/conda_pip/dependencies/grayskull.py @@ -1,5 +1,5 @@ -""" -""" +from __future__ import annotations + import os from logging import getLogger, ERROR from collections import defaultdict @@ -21,19 +21,20 @@ keep_refs_alive = [] -def analyze_dependencies(*packages: str, prefer_on_conda=True, channel="conda-forge", backend="grayskull"): +def _analyze_with_grayskull( + *packages: str, + prefer_on_conda: bool = True, + channel: str = "conda-forge", +) -> tuple[dict[str, list[str]], dict[str, list[str]]]: conda_deps = defaultdict(list) pypi_deps = defaultdict(list) for package in packages: match_spec = MatchSpec(package) pkg_name = match_spec.name pkg_version = match_spec.version - if prefer_on_conda and is_pkg_available(pkg_name, channel=channel): - # TODO: check if version is available too - logger.info("Package %s is available on %s. Skipping analysis.", pkg_name, channel) - conda_deps[pkg_name].append(f"{channel}::{package}") - continue - conda_deps_map, pypi_deps_map, visited_pypi_map = _recursive_dependencies(pkg_name, pkg_version) + conda_deps_map, pypi_deps_map, visited_pypi_map = _recursive_grayskull( + pkg_name, pkg_version + ) for name, specs in conda_deps_map.items(): conda_deps[name].extend(specs) for name, specs in pypi_deps_map.items(): @@ -45,34 +46,30 @@ def analyze_dependencies(*packages: str, prefer_on_conda=True, channel="conda-fo spec += f"=={version}" pypi_deps[name].append(spec) - # deduplicate - conda_deps = {name: list(dict.fromkeys(specs)) for name, specs in conda_deps.items()} - pypi_deps = {name: list(dict.fromkeys(specs)) for name, specs in pypi_deps.items()} - return conda_deps, pypi_deps -def _recursive_dependencies( - pkg_name, - pkg_version="", - conda_deps_map=None, - pypi_deps_map=None, - visited_pypi_map=None, -): +def _recursive_grayskull( + pkg_name: str, + pkg_version: str = "", + conda_deps_map: dict[str, list[str]] | None = None, + pypi_deps_map: dict[str, list[str]] | None = None, + visited_pypi_map: dict[str, list[str]] | None = None, +) -> tuple[dict[str, list[str]], dict[str, list[str]], dict[str, list[str]]]: conda_deps_map = conda_deps_map or defaultdict(list) pypi_deps_map = pypi_deps_map or defaultdict(list) visited_pypi_map = visited_pypi_map or defaultdict(list) if (pkg_name, pkg_version) in visited_pypi_map: return conda_deps_map, pypi_deps_map, visited_pypi_map - conda_deps, pypi_deps, config = _analyze_with_grayskull(pkg_name, pkg_version) + conda_deps, pypi_deps, config = _analyze_one_with_grayskull(pkg_name, pkg_version) visited_pypi_map[(pkg_name, pkg_version)].append((config.name, config.version)) for name, dep in conda_deps.items(): conda_deps_map[name].append(dep) for name, dep in pypi_deps.items(): pypi_deps_map[name].append(dep) - _recursive_dependencies( + _recursive_grayskull( name, conda_deps_map=conda_deps_map, pypi_deps_map=pypi_deps_map, @@ -82,7 +79,10 @@ def _recursive_dependencies( return conda_deps_map, pypi_deps_map, visited_pypi_map -def _analyze_with_grayskull(package, version=""): +def _analyze_one_with_grayskull( + package: str, + version: str = "", +) -> tuple[dict[str, str], dict[str, str], GrayskullConfiguration]: config = GrayskullConfiguration(name=package, version=version, is_strict_cf=True) try: with redirect_stdout(os.devnull), redirect_stderr(os.devnull): diff --git a/conda_pip/dependencies/pip.py b/conda_pip/dependencies/pip.py new file mode 100644 index 0000000..8137463 --- /dev/null +++ b/conda_pip/dependencies/pip.py @@ -0,0 +1,70 @@ +from __future__ import annotations + +import json +import os +from logging import getLogger +from collections import defaultdict +from subprocess import run +from tempfile import NamedTemporaryFile + +from conda.exceptions import CondaError + +from ..utils import get_env_python + +logger = getLogger(f"conda.{__name__}") + + +def _analyze_with_pip( + *packages: str, + prefix: str | None = None, + force_reinstall: bool = False, +) -> tuple[dict[str, list[str]], dict[str, list[str]]]: + # pip can output to stdout via `--report -` (dash), but this + # creates issues on Windows due to undecodable characters on some + # project descriptions (e.g. charset-normalizer, amusingly), which + # makes pip crash internally. Probably a bug on their end. + # So we use a temporary file instead to work with bytes. + json_output = NamedTemporaryFile(suffix=".json", delete=False) + json_output.close() # Prevent access errors on Windows + + cmd = [ + str(get_env_python(prefix)), + "-mpip", + "install", + "--dry-run", + "--ignore-installed", + *(("--force-reinstall",) if force_reinstall else ()), + "--report", + json_output.name, + *packages, + ] + process = run(cmd, capture_output=True, text=True) + if process.returncode != 0: + raise CondaError( + f"Failed to analyze dependencies with pip:\n" + f" command: {' '.join(cmd)}\n" + f" exit code: {process.returncode}\n" + f" stderr:\n{process.stderr}\n" + f" stdout:\n{process.stdout}\n" + ) + logger.debug("pip (%s) provided the following report:\n%s", " ".join(cmd), process.stdout) + + with open(json_output.name, "rb") as f: + # We need binary mode because the JSON output might + # contain weird unicode stuff (as part of the project + # description or README). + report = json.loads(f.read()) + os.unlink(json_output.name) + + deps_from_pip = defaultdict(list) + conda_deps = defaultdict(list) + for item in report["install"]: + metadata = item["metadata"] + logger.debug("Analyzing %s", metadata["name"]) + logger.debug(" metadata: %s", json.dumps(metadata, indent=2)) + deps_from_pip[metadata["name"]].append(f"{metadata['name']}=={metadata['version']}") + if python_version := metadata.get("requires_python"): + conda_deps["python"].append(f"python {python_version}") + + deps_from_pip = {name: list(dict.fromkeys(specs)) for name, specs in deps_from_pip.items()} + return conda_deps, deps_from_pip diff --git a/conda_pip/main.py b/conda_pip/main.py index 2ed351c..3d658a5 100644 --- a/conda_pip/main.py +++ b/conda_pip/main.py @@ -1,9 +1,9 @@ +from __future__ import annotations + import os -import sys -import sysconfig from logging import getLogger from pathlib import Path -from subprocess import run, check_output +from subprocess import run from typing import Iterable try: @@ -12,53 +12,18 @@ from importlib_resources import files as importlib_files from conda.history import History -from conda.base.context import context, locate_prefix_by_name +from conda.base.context import context from conda.core.prefix_data import PrefixData from conda.cli.python_api import run_command from conda.exceptions import CondaError, CondaSystemExit from conda.models.match_spec import MatchSpec +from .utils import get_env_python, get_externally_managed_path + logger = getLogger(f"conda.{__name__}") HERE = Path(__file__).parent.resolve() -def get_prefix(prefix: os.PathLike = None, name: str = None) -> Path: - if prefix: - return Path(prefix) - elif name: - return Path(locate_prefix_by_name(name)) - else: - return Path(context.target_prefix) - - -def get_env_python(prefix: os.PathLike = None) -> Path: - prefix = Path(prefix or sys.prefix) - if os.name == "nt": - return prefix / "python.exe" - return prefix / "bin" / "python" - - -def get_env_stdlib(prefix: os.PathLike = None) -> Path: - prefix = Path(prefix or sys.prefix) - if str(prefix) == sys.prefix: - return Path(sysconfig.get_path("stdlib")) - return Path(check_output([get_env_python(prefix), "-c", "import sysconfig; print(sysconfig.get_paths()['stdlib'])"], text=True).strip()) - - -def get_externally_managed_path(prefix: os.PathLike = None) -> Path: - prefix = Path(prefix or sys.prefix) - if os.name == "nt": - yield Path(prefix, "Lib", "EXTERNALLY-MANAGED") - else: - found = False - for python_dir in sorted(Path(prefix, "lib").glob("python*")): - if python_dir.is_dir(): - found = True - yield Path(python_dir, "EXTERNALLY-MANAGED") - if not found: - raise ValueError("Could not locate EXTERNALLY-MANAGED file") - - def validate_target_env(path: Path, packages: Iterable[str]) -> Iterable[str]: context.validate_configuration() pd = PrefixData(path, pip_interop_enabled=True) @@ -81,13 +46,13 @@ def validate_target_env(path: Path, packages: Iterable[str]) -> Iterable[str]: def run_conda_install( prefix: Path, specs: Iterable[MatchSpec], - dry_run=False, - quiet=False, - verbosity=0, - force_reinstall=False, - yes=False, - json=False, -): + dry_run: bool = False, + quiet: bool = False, + verbosity: int = 0, + force_reinstall: bool = False, + yes: bool = False, + json: bool = False, +) -> int: if not specs: return 0 @@ -117,14 +82,14 @@ def run_conda_install( def run_pip_install( prefix: Path, - specs, - upgrade=False, - dry_run=False, - quiet=False, - verbosity=0, - force_reinstall=False, - yes=False, -): + specs: Iterable[str], + upgrade: bool = False, + dry_run: bool = False, + quiet: bool = False, + verbosity: int = 0, + force_reinstall: bool = False, + yes: bool = False, +) -> int: if not specs: return 0 command = [ diff --git a/conda_pip/utils.py b/conda_pip/utils.py new file mode 100644 index 0000000..3931e56 --- /dev/null +++ b/conda_pip/utils.py @@ -0,0 +1,60 @@ +from __future__ import annotations + +import os +import sys +import sysconfig +from logging import getLogger +from pathlib import Path +from subprocess import check_output +from typing import Iterator + +from conda.base.context import context, locate_prefix_by_name + + +logger = getLogger(f"conda.{__name__}") + + +def get_prefix(prefix: os.PathLike = None, name: str = None) -> Path: + if prefix: + return Path(prefix) + elif name: + return Path(locate_prefix_by_name(name)) + else: + return Path(context.target_prefix) + + +def get_env_python(prefix: os.PathLike = None) -> Path: + prefix = Path(prefix or sys.prefix) + if os.name == "nt": + return prefix / "python.exe" + return prefix / "bin" / "python" + + +def get_env_stdlib(prefix: os.PathLike = None) -> Path: + prefix = Path(prefix or sys.prefix) + if str(prefix) == sys.prefix: + return Path(sysconfig.get_path("stdlib")) + return Path( + check_output( + [ + get_env_python(prefix), + "-c", + "import sysconfig; print(sysconfig.get_paths()['stdlib'])", + ], + text=True, + ).strip() + ) + + +def get_externally_managed_path(prefix: os.PathLike = None) -> Iterator[Path]: + prefix = Path(prefix or sys.prefix) + if os.name == "nt": + yield Path(prefix, "Lib", "EXTERNALLY-MANAGED") + else: + found = False + for python_dir in sorted(Path(prefix, "lib").glob("python*")): + if python_dir.is_dir(): + found = True + yield Path(python_dir, "EXTERNALLY-MANAGED") + if not found: + raise ValueError("Could not locate EXTERNALLY-MANAGED file") diff --git a/recipe/meta.yaml b/recipe/meta.yaml index c9043d7..b5bcf34 100644 --- a/recipe/meta.yaml +++ b/recipe/meta.yaml @@ -33,6 +33,7 @@ requirements: - pip >=23.0.1 - grayskull - importlib_resources + - conda-libmamba-solver test: imports: @@ -40,7 +41,7 @@ test: - conda_pip.main commands: - conda pip --help - - python -c "from conda_pip.main import get_env_stdlib; assert (get_env_stdlib() / 'EXTERNALLY-MANAGED').exists()" + - python -c "from conda_pip.utils import get_env_stdlib; assert (get_env_stdlib() / 'EXTERNALLY-MANAGED').exists()" - pip install requests && exit 1 || exit 0 about: diff --git a/tests/test_install.py b/tests/test_install.py index f0bed66..c9f83b5 100644 --- a/tests/test_install.py +++ b/tests/test_install.py @@ -2,12 +2,37 @@ import pytest +from conda.core.prefix_data import PrefixData +from conda.models.match_spec import MatchSpec from conda.testing import CondaCLIFixture, TmpEnvFixture -from conda.testing.integration import package_is_installed +from conda_pip.dependencies import BACKENDS -@pytest.mark.parametrize("spec", ["numpy", "numpy=1.20"]) -def test_conda_pip_install_numpy(tmp_env: TmpEnvFixture, conda_cli: CondaCLIFixture, spec: str): + +@pytest.mark.parametrize("backend", BACKENDS) +@pytest.mark.parametrize( + "pypi_spec,conda_spec,channel", + [ + ("numpy", "", "conda-forge"), + ("numpy=1.20", "", "conda-forge"), + # build was originally published as build in conda-forge + # and later renamed to python-build; conda-forge::build is + # only available til 0.7, but conda-forge::python-build has 1.x + ("build>=1", "python-build>=1", "conda-forge"), + # these won't be ever published in conda-forge, I guess + ("aaargh", None, "pypi"), + ("5-exercise-upload-to-pypi", None, "pypi"), + ], +) +def test_conda_pip_install( + tmp_env: TmpEnvFixture, + conda_cli: CondaCLIFixture, + pypi_spec: str, + conda_spec: str, + channel: str, + backend: str, +): + conda_spec = conda_spec or pypi_spec with tmp_env("python=3.9", "pip") as prefix: out, err, rc = conda_cli( "pip", @@ -15,10 +40,29 @@ def test_conda_pip_install_numpy(tmp_env: TmpEnvFixture, conda_cli: CondaCLIFixt prefix, "--yes", "install", - spec, + "--backend", + backend, + pypi_spec, ) print(out) print(err, file=sys.stderr) assert rc == 0 - assert spec in out - assert package_is_installed(str(prefix), spec) + # One these package names will be mentioned: + assert any( + name in out + for name in ( + MatchSpec(pypi_spec).name, + MatchSpec(pypi_spec).name.replace("-", "_"), # pip normalizes this + MatchSpec(conda_spec).name + ) + ) + PrefixData._cache_.clear() + if channel == "pypi": + pd = PrefixData(str(prefix), pip_interop_enabled=True) + records = list(pd.query(pypi_spec)) + else: + pd = PrefixData(str(prefix), pip_interop_enabled=False) + records = list(pd.query(conda_spec)) + assert len(records) == 1 + assert records[0].channel.name == channel + diff --git a/tests/test_validate.py b/tests/test_validate.py index e65a083..4083b58 100644 --- a/tests/test_validate.py +++ b/tests/test_validate.py @@ -9,7 +9,7 @@ from conda.testing.integration import package_is_installed from pytest_mock import MockerFixture -from conda_pip.main import get_env_python, get_env_stdlib +from conda_pip.utils import get_env_python, get_env_stdlib def test_pip_required_in_target_env(tmp_env: TmpEnvFixture, conda_cli: CondaCLIFixture, monkeypatch):