From 258709b1acefb32e9ad88d5839d65f2e59d92d29 Mon Sep 17 00:00:00 2001 From: Charles Coggins Date: Wed, 21 Aug 2024 12:35:00 -0500 Subject: [PATCH] feat: add option to exclude dependency files (#462) This PR adds a new feature to specify exclusions to the dependency files found/detected when not explicitly specified by argument. The new option accepts gitignore-style patterns, using the [`pathspec` third-party library](https://python-path-specification.readthedocs.io/en/stable/readme.html). Closes phylum-dev/roadmap#462 --- docs/integrations/azure_pipelines.md | 10 +++++++ docs/integrations/bitbucket_pipelines.md | 10 +++++++ docs/integrations/git_precommit.md | 9 ++++++ docs/integrations/gitlab_ci.md | 10 +++++++ docs/integrations/jenkins.md | 9 ++++++ poetry.lock | 13 +++++++- pyproject.toml | 1 + src/phylum/ci/ci_base.py | 38 ++++++++++++++++++++++-- src/phylum/ci/cli.py | 8 +++++ 9 files changed, 105 insertions(+), 3 deletions(-) diff --git a/docs/integrations/azure_pipelines.md b/docs/integrations/azure_pipelines.md index 595e1493..25444c4d 100644 --- a/docs/integrations/azure_pipelines.md +++ b/docs/integrations/azure_pipelines.md @@ -311,6 +311,16 @@ view the [script options output][script_options] for the latest release. # Specify multiple explicit dependency file paths. - script: phylum-ci --depfile requirements-prod.txt Cargo.toml path/to/dependency.file + # Exclude dependency files by gitignore-style pattern. + - script: phylum-ci --exclude "requirements-*.txt" + + # Specify multiple exclusion patterns. + - script: phylum-ci --exclude "build.gradle" "tests/fixtures/" + - script: | + phylum-ci \ + --exclude "/requirements-*.txt" \ + --exclude "build.gradle" "fixtures/" + # Force analysis for all dependencies in a manifest file. This is especially useful # for *workspace* manifest files where there is no companion lockfile (e.g., libraries). - script: phylum-ci --force-analysis --all-deps --depfile Cargo.toml diff --git a/docs/integrations/bitbucket_pipelines.md b/docs/integrations/bitbucket_pipelines.md index 6f54a9a2..15895a6a 100644 --- a/docs/integrations/bitbucket_pipelines.md +++ b/docs/integrations/bitbucket_pipelines.md @@ -309,6 +309,16 @@ view the [script options output][script_options] for the latest release. # Specify multiple explicit dependency file paths. - phylum-ci --depfile requirements-prod.txt Cargo.toml path/to/dependency.file + # Exclude dependency files by gitignore-style pattern. + - phylum-ci --exclude "requirements-*.txt" + + # Specify multiple exclusion patterns. + - phylum-ci --exclude "build.gradle" "tests/fixtures/" + - | + phylum-ci \ + --exclude "/requirements-*.txt" \ + --exclude "build.gradle" "fixtures/" + # Force analysis for all dependencies in a manifest file. This is especially useful # for *workspace* manifest files where there is no companion lockfile (e.g., libraries). - phylum-ci --force-analysis --all-deps --depfile Cargo.toml diff --git a/docs/integrations/git_precommit.md b/docs/integrations/git_precommit.md index 597113f7..4cf1cc4e 100644 --- a/docs/integrations/git_precommit.md +++ b/docs/integrations/git_precommit.md @@ -149,6 +149,15 @@ with `--help` output as specified in the [Usage section of the top-level README. - --depfile=Cargo.toml - --depfile=path/to/dependency.file + # Exclude dependency files by gitignore-style pattern. + args: [--exclude=requirements-*.txt] + + # Specify multiple exclusion patterns. + args: + - --exclude=/requirements-*.txt + - --exclude=build.gradle + - --exclude=fixtures/ + # Force analysis for all dependencies in a manifest file. This is especially useful # for *workspace* manifest files where there is no companion lockfile (e.g., libraries). args: [--force-analysis, --all-deps, --depfile=Cargo.toml] diff --git a/docs/integrations/gitlab_ci.md b/docs/integrations/gitlab_ci.md index 3503b30d..69729b99 100644 --- a/docs/integrations/gitlab_ci.md +++ b/docs/integrations/gitlab_ci.md @@ -297,6 +297,16 @@ view the [script options output][script_options] for the latest release. # Specify multiple explicit dependency file paths. - phylum-ci --depfile requirements-prod.txt Cargo.toml path/to/dependency.file + # Exclude dependency files by gitignore-style pattern. + - phylum-ci --exclude "requirements-*.txt" + + # Specify multiple exclusion patterns. + - phylum-ci --exclude "build.gradle" "tests/fixtures/" + - | + phylum-ci \ + --exclude "/requirements-*.txt" \ + --exclude "build.gradle" "fixtures/" + # Force analysis for all dependencies in a manifest file. This is especially useful # for *workspace* manifest files where there is no companion lockfile (e.g., libraries). - phylum-ci --force-analysis --all-deps --depfile Cargo.toml diff --git a/docs/integrations/jenkins.md b/docs/integrations/jenkins.md index 734da8ca..6dfceb6c 100644 --- a/docs/integrations/jenkins.md +++ b/docs/integrations/jenkins.md @@ -297,6 +297,15 @@ release. // Specify multiple explicit dependency file paths. sh 'phylum-ci --depfile requirements-prod.txt Cargo.toml path/to/dependency.file' + // Exclude dependency files by gitignore-style pattern. + sh 'phylum-ci --exclude "requirements-*.txt"' + + // Specify multiple exclusion patterns. + sh 'phylum-ci --exclude "build.gradle" "tests/fixtures/"' + sh 'phylum-ci \ + --exclude "/requirements-*.txt" \ + --exclude "build.gradle" "fixtures/"' + // Force analysis for all dependencies in a manifest file. This is especially useful // for *workspace* manifest files where there is no companion lockfile (e.g., libraries). sh 'phylum-ci --force-analysis --all-deps --depfile Cargo.toml' diff --git a/poetry.lock b/poetry.lock index c33983d6..c99a83fa 100644 --- a/poetry.lock +++ b/poetry.lock @@ -829,6 +829,17 @@ files = [ {file = "packaging-24.1.tar.gz", hash = "sha256:026ed72c8ed3fcce5bf8950572258698927fd1dbda10a5e981cdf0ac37f4f002"}, ] +[[package]] +name = "pathspec" +version = "0.12.1" +description = "Utility library for gitignore style pattern matching of file paths." +optional = false +python-versions = ">=3.8" +files = [ + {file = "pathspec-0.12.1-py3-none-any.whl", hash = "sha256:a0d503e138a4c123b27490a4f7beda6a01c6f288df0e4a8b79c7eb0dc7b4cc08"}, + {file = "pathspec-0.12.1.tar.gz", hash = "sha256:a482d51503a1ab33b1c67a6c3813a26953dbdc71c31dacaef9a838c4e29f5712"}, +] + [[package]] name = "platformdirs" version = "4.2.2" @@ -1795,4 +1806,4 @@ test = ["big-O", "importlib-resources", "jaraco.functools", "jaraco.itertools", [metadata] lock-version = "2.0" python-versions = ">=3.9,<3.13" -content-hash = "39105accb86f3d007901cc48e1750e33e93c68b58bc11c66eb478f50d1f2110a" +content-hash = "cd84e92223f6493b14c56ca9f66a59f7c51984c1b7a4ae1f03000c0a131dfc2d" diff --git a/pyproject.toml b/pyproject.toml index 7108e0fe..2f170840 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -52,6 +52,7 @@ cryptography = "*" packaging = "*" "ruamel.yaml" = "*" rich = "*" +pathspec = "*" [tool.poetry.group.test] optional = true diff --git a/src/phylum/ci/ci_base.py b/src/phylum/ci/ci_base.py index f8aafd15..5bbdd786 100644 --- a/src/phylum/ci/ci_base.py +++ b/src/phylum/ci/ci_base.py @@ -11,7 +11,7 @@ from collections.abc import Mapping from functools import cached_property, lru_cache from inspect import cleandoc -from itertools import starmap +from itertools import chain, starmap import json import os from pathlib import Path @@ -22,6 +22,7 @@ from typing import Optional from packaging.version import Version +import pathspec from rich.markdown import Markdown from phylum.ci.common import ( @@ -143,11 +144,13 @@ def depfiles(self) -> Depfiles: Dependency files provided as an input option will be preferred over any entries in the `.phylum_project` file. When no valid dependency files are provided otherwise, an attempt will be made to automatically detect them. + + Detected dependency files can be modified with exclusion patterns provided as an argument. """ arg_depfiles: Optional[list[list[Path]]] = self.args.depfile provided_arg_depfiles: DepfileEntries = [] if arg_depfiles: - # flatten the list of lists + # Flatten the list of lists provided_arg_depfiles = [DepfileEntry(path) for sub_list in arg_depfiles for path in sub_list] LOG.debug("Dependency files provided as arguments: %s", provided_arg_depfiles) valid_depfiles = self._filter_depfiles(provided_arg_depfiles) @@ -162,6 +165,7 @@ def depfiles(self) -> Depfiles: LOG.debug("Dependency files provided in `.phylum_project` file: %s", detected_depfiles) else: LOG.debug("Detected dependency files: %s", detected_depfiles) + detected_depfiles = self._exclude_depfiles(detected_depfiles) if arg_depfiles: # Ensure any depfiles provided as arguments that were already filtered out are not included again here detected_depfiles = list(set(detected_depfiles).difference(set(provided_arg_depfiles))) @@ -181,6 +185,36 @@ def depfiles(self) -> Depfiles: self.returncode = ReturnCode.NO_DEPFILES_PROVIDED raise SystemExit(self.returncode) + def _exclude_depfiles(self, provided_depfiles: DepfileEntries) -> DepfileEntries: + """Apply exclusion patterns to provided dependency files and return the remaining ones.""" + arg_exclusions: Optional[list[list[str]]] = self.args.exclude + if not arg_exclusions: + LOG.debug("No dependency file exclusion patterns provided.") + return provided_depfiles + + # Flatten the list of lists + provided_arg_exclusions = list(chain.from_iterable(arg_exclusions)) + LOG.debug("Exclusion patterns provided as arguments: %s", provided_arg_exclusions) + + try: + spec = pathspec.GitIgnoreSpec.from_lines(provided_arg_exclusions) + except pathspec.patterns.gitwildmatch.GitWildMatchPatternError as err: + msg = f""" + Could not parse provided gitignore-style exclusion pattern! + {err} + For more info, see: https://git-scm.com/docs/gitignore#_pattern_format + Continuing without exclusions ...""" + LOG.warning(cleandoc(msg)) + return provided_depfiles + + excluded_depfiles = [pdf for pdf in provided_depfiles if spec.match_file(pdf.path.relative_to(Path.cwd()))] + LOG.info("Dependency files excluded by matching patterns: %s", excluded_depfiles) + + included_depfiles = list(set(provided_depfiles).difference(set(excluded_depfiles))) + LOG.debug("Dependency files after exclusions: %s", included_depfiles) + + return included_depfiles + @progress_spinner("Filtering dependency files") def _filter_depfiles(self, provided_depfiles: DepfileEntries) -> Depfiles: """Filter potential dependency files and return the valid ones in sorted order.""" diff --git a/src/phylum/ci/cli.py b/src/phylum/ci/cli.py index 34bd55ec..5eb1cebf 100644 --- a/src/phylum/ci/cli.py +++ b/src/phylum/ci/cli.py @@ -121,6 +121,14 @@ def get_args(args: Optional[Sequence[str]] = None) -> tuple[argparse.Namespace, may not contain strict dependencies. In these cases, it is best to specify an explicit dependency file path. """, ) + analysis_group.add_argument( + "-e", + "--exclude", + action="append", + nargs="*", + help="""Gitignore-style exclusion patterns. Ignored when dependency files are specified explicitly by argument. + Specify patterns in quotes to prevent shell globbing. Patterns are applied relative to working directory.""", + ) analysis_group.add_argument( "-a", "--all-deps",