diff --git a/grayskull/strategy/py_base.py b/grayskull/strategy/py_base.py index de128c80..61d09da7 100644 --- a/grayskull/strategy/py_base.py +++ b/grayskull/strategy/py_base.py @@ -696,18 +696,30 @@ def download_sdist_pkg(sdist_url: str, dest: str, name: str | None = None): def merge_deps_toml_setup(setup_deps: list, toml_deps: list) -> list: re_split = re.compile(r"\s+|>|=|<|~|!") - all_deps = defaultdict(list) - for dep in toml_deps + setup_deps: - if dep.strip(): - dep_name = re_split.split(dep)[0] - if dep_name not in all_deps: - if dep_name.replace("_", "-") in all_deps: - dep_name = dep_name.replace("_", "-") - elif dep_name.replace("-", "_") in all_deps: - dep_name = dep_name.replace("-", "_") - all_deps[dep_name].append(dep) - - return [deps[0] for deps in all_deps.values()] + # drop any empty deps + setup_deps = [dep for dep in setup_deps if dep.strip()] + toml_deps = [dep for dep in toml_deps if dep.strip()] + + # get dep names + toml_dep_names = [re_split.split(dep)[0] for dep in toml_deps] + setup_dep_names = [re_split.split(dep)[0] for dep in setup_deps] + + # prefer toml over setup; only add setup deps if not found in toml + merged_deps = toml_deps + for dep_name, dep in zip(setup_dep_names, setup_deps): + if not dep_name.strip(): + continue + alternatives = [ + dep_name, + dep_name.replace("_", "-"), + dep_name.replace("-", "_"), + ] + found = any([alternative in toml_dep_names for alternative in alternatives]) + # only add the setup dep if no alternative name was found + if not found: + merged_deps.append(dep) + + return merged_deps def merge_setup_toml_metadata(setup_metadata: dict, pyproject_metadata: dict) -> dict: diff --git a/grayskull/strategy/pypi.py b/grayskull/strategy/pypi.py index c1cddcc6..cd8ea780 100644 --- a/grayskull/strategy/pypi.py +++ b/grayskull/strategy/pypi.py @@ -185,18 +185,16 @@ def merge_requires_dist(pypi_metadata: dict, sdist_metadata: dict) -> list: :param sdist_metadata: sdist metadata :return: list with all requirements """ - all_deps = sdist_metadata.get("install_requires") or [] - all_deps += pypi_metadata.get("requires_dist") or [] - re_search = re.compile(r";\s*extra") - all_deps = [pkg for pkg in all_deps if not re_search.search(pkg)] - current_pkg = pypi_metadata.get("name", "") - - requires_dist = [] - pypi_deps_name = set() - with progressbar_with_status(len(all_deps)) as bar: - for pos, sdist_pkg in enumerate(all_deps, 1): - match_deps = RE_DEPS_NAME.match(sdist_pkg) + def _get_pkg_names(deps, current_pkg, bar, pos): + """ + Get pkg names for each dep, dropping deps where no pkg name is found + """ + new_deps = [] + pkg_names = [] + for dep in deps: + pos += 1 + match_deps = RE_DEPS_NAME.match(dep) if not match_deps: bar.update(pos) continue @@ -205,11 +203,32 @@ def merge_requires_dist(pypi_metadata: dict, sdist_metadata: dict) -> list: bar.update(pos, pkg_name=pkg_name) if current_pkg and current_pkg == pkg_name: continue - if pkg_name in pypi_deps_name: - continue + new_deps.append(dep.replace(match_deps, pkg_name)) + pkg_names.append(pkg_name) + return pkg_names, new_deps + + sdist_deps = sdist_metadata.get("install_requires") or [] + pypi_deps = pypi_metadata.get("requires_dist") or [] + + re_search = re.compile(r";\s*extra") + sdist_deps = [pkg for pkg in sdist_deps if not re_search.search(pkg)] + pypi_deps = [pkg for pkg in pypi_deps if not re_search.search(pkg)] + current_pkg = pypi_metadata.get("name", "") + + count = len(sdist_deps) + len(pypi_deps) + # progress bar here because calling normalize_pkg_name() on each package + # takes awhile + with progressbar_with_status(count) as bar: + pos = 0 + sdist_pkgs, sdist_deps = _get_pkg_names(sdist_deps, current_pkg, bar, pos) + pypi_pkgs, pypi_deps = _get_pkg_names(pypi_deps, current_pkg, bar, pos) + + # prefer sdist over pypi; only add pypi deps if not found in sdist + requires_dist = sdist_deps + for pkg_name, dep in zip(pypi_pkgs, pypi_deps): + if pkg_name not in sdist_pkgs: + requires_dist.append(dep) - pypi_deps_name.add(pkg_name) - requires_dist.append(sdist_pkg.replace(match_deps, pkg_name)) return requires_dist @@ -322,8 +341,13 @@ def get_pypi_metadata(config: Configuration) -> PypiMetadata: def get_run_req_from_requires_dist(requires_dist: list, config: Configuration) -> list: """Get the run requirements looking for the `requires_dist` key present in the metadata""" + re_selector = re.compile(r"\s+#\s+\[.*\]", re.DOTALL) run_req = [] for req in requires_dist: + if re_selector.search(req): + # if there's already a selector, make sure we're arch + config.is_arch = True + list_raw_requirements = req.split(";") selector = "" if len(list_raw_requirements) > 1: diff --git a/grayskull/utils.py b/grayskull/utils.py index 88242e47..dcd7f5d1 100644 --- a/grayskull/utils.py +++ b/grayskull/utils.py @@ -128,7 +128,7 @@ def rm_duplicated_deps(all_requirements: list | set | None) -> list | None: # and underscores converted to dashes. The value is the requirement itself, # as it should be added. # (This is order-preserving since dicts are ordered by first insertion.) - new_reqs: dict[str, str] = {} + new_reqs: dict[tuple[str, str], str] = {} re_split = re.compile(r"\s+(|>|=|<|~|!|#)+") for dep in all_requirements: if dep.strip().startswith(("{{", "<{")): @@ -136,6 +136,12 @@ def rm_duplicated_deps(all_requirements: list | set | None) -> list | None: continue dep_name, *constrains = re_split.split(dep.strip()) dep_name = dep_name.strip() + + if "#" in dep: + selector = dep.split("#")[-1] + else: + selector = "" + constrains = [ c.strip() for c in constrains @@ -143,17 +149,20 @@ def rm_duplicated_deps(all_requirements: list | set | None) -> list | None: ] canonicalized = dep_name.replace("_", "-").lower() constrains.insert(0, dep_name) - if canonicalized in new_reqs: + # a canonicalized dependency is only redundant if it also has the same + # selector as a pervious dependency + key = (canonicalized, selector) + if key in new_reqs: # In order to break ties deterministically, we prioritize the requirement # which is alphanumerically lowest. This happens to prioritize the "-" # character over "_". # Example: given "importlib_metadata" and "importlib-metadata", we will # keep "importlib-metadata" because it is alphabetically lower. - previous_req = new_reqs[canonicalized] + previous_req = new_reqs[key] if len(dep) > len(previous_req) or "-" in dep_name: - new_reqs[canonicalized] = " ".join(constrains) + new_reqs[key] = " ".join(constrains) else: - new_reqs[canonicalized] = " ".join(constrains) + new_reqs[key] = " ".join(constrains) return [re.sub(r"\s+(#)", " \\1", v.strip()) for v in new_reqs.values()] @@ -168,6 +177,7 @@ def format_dependencies(all_dependencies: list, name: str) -> list: formatted_dependencies = [] re_deps = re.compile(r"^\s*([\.a-zA-Z0-9_-]+)\s*(.*)\s*$", re.MULTILINE | re.DOTALL) re_remove_space = re.compile(r"([<>!=]+)\s+") + re_selector = re.compile(r"\s+#\s+\[.*\]", re.DOTALL) re_remove_tags = re.compile(r"\s*(\[.*\])", re.DOTALL) re_remove_comments = re.compile(r"\s+#.*", re.DOTALL) for req in all_dependencies: @@ -184,6 +194,10 @@ def format_dependencies(all_dependencies: list, name: str) -> list: if len(match_req) > 1: deps_name = " ".join(match_req) deps_name = re_remove_space.sub(r"\1", deps_name.strip()) + if re_selector.search(deps_name): + # don't want to remove selectors + formatted_dependencies.append(deps_name) + continue deps_name = re_remove_tags.sub(r" ", deps_name.strip()) deps_name = re_remove_comments.sub("", deps_name) formatted_dependencies.append(deps_name.strip()) diff --git a/tests/test_pypi.py b/tests/test_pypi.py index b802e34d..94de9ffc 100644 --- a/tests/test_pypi.py +++ b/tests/test_pypi.py @@ -682,6 +682,119 @@ def test_merge_pypi_sdist_metadata(): ) +def test_pypi_metadata_constraints_for_python_versions(): + config = Configuration(name="apache-airflow-providers-trino", version="6.0.0") + pypi_metadata = get_pypi_metadata(config) + assert sorted(pypi_metadata["requires_dist"]) == sorted( + [ + "apache-airflow-providers-common-sql>=1.20.0", + "apache-airflow>=2.9.0", + 'pandas<2.2,>=1.5.3; python_version < "3.9"', + 'pandas<2.2,>=2.1.2; python_version >= "3.9"', + "trino>=0.318.0", + 'apache-airflow-providers-google; extra == "google"', + 'apache-airflow-providers-openlineage; extra == "openlineage"', + ] + ) + + config = Configuration(name="databricks-sql-connector", version="3.7.0") + pypi_metadata = get_pypi_metadata(config) + assert sorted(pypi_metadata["requires_dist"]) == sorted( + [ + 'alembic<2.0.0,>=1.0.11; extra == "alembic"', + "lz4<5.0.0,>=4.0.2", + 'numpy>=1.16.6; python_version >= "3.8" and python_version < "3.11"', + 'numpy>=1.23.4; python_version >= "3.11"', + "oauthlib<4.0.0,>=3.1.0", + "openpyxl<4.0.0,>=3.0.10", + 'pandas<2.3.0,>=1.2.5; python_version >= "3.8"', + "pyarrow>=14.0.1", + "requests<3.0.0,>=2.18.1", + 'sqlalchemy>=2.0.21; extra == "sqlalchemy" or extra == "alembic"', + "thrift<0.21.0,>=0.16.0", + "urllib3>=1.26", + ] + ) + + +def test_sdist_metadata_from_toml_project_dependencies(): + config = Configuration(name="apache-airflow-providers-trino", version="6.0.0") + pypi_metadata = get_pypi_metadata(config) + sdist_metadata = get_sdist_metadata(pypi_metadata["sdist_url"], config) + assert sorted(sdist_metadata["install_requires"]) == sorted( + [ + "apache-airflow-providers-common-sql>=1.20.0", + "apache-airflow>=2.9.0", + 'pandas>=1.5.3,<2.2;python_version<"3.9"', + 'pandas>=2.1.2,<2.2;python_version>="3.9"', + "trino>=0.318.0", + "python ~=3.9", + ] + ) + + +def test_sdist_metadata_from_toml_poetry_dependencies(): + config = Configuration(name="databricks-sql-connector", version="3.7.0") + pypi_metadata = get_pypi_metadata(config) + sdist_metadata = get_sdist_metadata(pypi_metadata["sdist_url"], config) + assert sorted(sdist_metadata["install_requires"]) == sorted( + [ + "python >=3.8.0,<4.0.0", + "thrift >=0.16.0,<0.21.0", + "pandas >=1.2.5,<2.3.0 # [py>=38]", + "pyarrow >=14.0.1", + "lz4 >=4.0.2,<5.0.0", + "requests >=2.18.1,<3.0.0", + "oauthlib >=3.1.0,<4.0.0", + "numpy >=1.16.6 # [py>=38 and py<311]", + "numpy >=1.23.4 # [py>=311]", + "openpyxl >=3.0.10,<4.0.0", + "urllib3 >=1.26", + ] + ) + + +def test_merge_pypi_sdist_metadata_from_toml(): + # tests merging pyproject.toml dependencies from poetry with pypi data, + # including multiple numpy constraints with python version selectors + config = Configuration(name="databricks-sql-connector", version="3.7.0") + pypi_metadata = get_pypi_metadata(config) + sdist_metadata = get_sdist_metadata(pypi_metadata["sdist_url"], config) + merged_data = merge_pypi_sdist_metadata(pypi_metadata, sdist_metadata, config) + assert sorted(merged_data["requires_dist"]) == sorted( + [ + "python >=3.8.0,<4.0.0", + "thrift >=0.16.0,<0.21.0", + "pandas >=1.2.5,<2.3.0 # [py>=38]", + "pyarrow >=14.0.1", + "lz4 >=4.0.2,<5.0.0", + "requests >=2.18.1,<3.0.0", + "oauthlib >=3.1.0,<4.0.0", + "numpy >=1.16.6 # [py>=38 and py<311]", + "numpy >=1.23.4 # [py>=311]", + "openpyxl >=3.0.10,<4.0.0", + "urllib3 >=1.26", + ] + ) + + # tests merging pyproject.toml project dependencies with pypi data, + # including multiple pandas constraints with python version selectors + config = Configuration(name="apache-airflow-providers-trino", version="6.0.0") + pypi_metadata = get_pypi_metadata(config) + sdist_metadata = get_sdist_metadata(pypi_metadata["sdist_url"], config) + merged_data = merge_pypi_sdist_metadata(pypi_metadata, sdist_metadata, config) + assert sorted(merged_data["requires_dist"]) == sorted( + [ + "apache-airflow-providers-common-sql>=1.20.0", + "apache-airflow>=2.9.0", + 'pandas>=1.5.3,<2.2;python_version<"3.9"', + 'pandas>=2.1.2,<2.2;python_version>="3.9"', + "trino>=0.318.0", + "python ~=3.9", + ] + ) + + def test_update_requirements_with_pin(): req = { "build": ["<{ compiler('c') }}"], @@ -787,6 +900,55 @@ def test_run_requirements_sdist(): ] ) + # a more complex example with selectors + config = Configuration(name="apache-airflow-providers-trino", version="6.0.0") + recipe = GrayskullFactory.create_recipe("pypi", config) + assert "noarch" not in recipe["build"] + selectors = { + "python >=3.9,<4.dev0": None, + "apache-airflow-providers-common-sql >=1.20.0": None, + "apache-airflow >=2.9.0": None, + "pandas >=1.5.3,<2.2": "[py<39]", + "pandas >=2.1.2,<2.2": "[py>=39]", + "trino-python-client >=0.318.0": None, + } + + for dep in recipe["requirements"]["run"]: + dep_str = str(dep) + assert dep_str in selectors + selector = selectors[dep_str] + if dep.inline_comment is None: + assert selector is None + else: + assert str(dep.inline_comment) == selector + + # another example with selectors + config = Configuration(name="databricks-sql-connector", version="3.7.0") + recipe = GrayskullFactory.create_recipe("pypi", config) + assert "noarch" not in recipe["build"] + selectors = { + "python >=3.8.0,<4.0.0": None, + "thrift >=0.16.0,<0.21.0": None, + "pandas >=1.2.5,<2.3.0": "[py>=38]", + "pyarrow >=14.0.1": None, + "lz4 >=4.0.2,<5.0.0": None, + "requests >=2.18.1,<3.0.0": None, + "oauthlib >=3.1.0,<4.0.0": None, + "numpy >=1.16.6": "[py>=38 and py<311]", + "numpy >=1.23.4": "[py>=311]", + "openpyxl >=3.0.10,<4.0.0": None, + "urllib3 >=1.26": None, + } + + for dep in recipe["requirements"]["run"]: + dep_str = str(dep) + assert dep_str in selectors + selector = selectors[dep_str] + if dep.inline_comment is None: + assert selector is None + else: + assert str(dep.inline_comment) == selector + def test_format_host_requirements(): assert sorted(format_dependencies(["setuptools>=40.0", "pkg2"], "pkg1")) == sorted(