diff --git a/scanpipe/config.py b/scanpipe/config.py new file mode 100644 index 000000000..18b79146b --- /dev/null +++ b/scanpipe/config.py @@ -0,0 +1,115 @@ +# SPDX-License-Identifier: Apache-2.0 +# +# http://nexb.com and https://github.com/aboutcode-org/scancode.io +# The ScanCode.io software is licensed under the Apache License version 2.0. +# Data generated with ScanCode.io is provided as-is without warranties. +# ScanCode is a trademark of nexB Inc. +# +# You may not use this software except in compliance with the License. +# You may obtain a copy of the License at: http://apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software distributed +# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +# CONDITIONS OF ANY KIND, either express or implied. See the License for the +# specific language governing permissions and limitations under the License. +# +# Data Generated with ScanCode.io is provided on an "AS IS" BASIS, WITHOUT WARRANTIES +# OR CONDITIONS OF ANY KIND, either express or implied. No content created from +# ScanCode.io should be considered or used as legal advice. Consult an Attorney +# for any legal advice. +# +# ScanCode.io is a free software code scanning tool from nexB Inc. and others. +# Visit https://github.com/aboutcode-org/scancode.io for support and download. + + +class EcosystemConfig: + """ + Base class for ecosystem specific configurations to be defined + for each ecosystems. + """ + + # This should be defined for each ecosystem which + # are options in the pipelines + ecosystem_option = None + + # These are extensions for packages of this ecosystem which + # needs to be matched from purldb + purldb_package_extensions = [] + + # These are extensions for resources of this ecosystem which + # needs to be macthed from purldb + purldb_resource_extensions = [] + + # Extensions for document files which do not require review + doc_extensions = [] + + # Paths in the deployed binaries/archives (on the to/ side) which + # do not need review even if they are not matched to the source side + deployed_resource_path_exclusions = [] + + # Paths in the developement/source archive (on the from/ side) which + # should not be considered even if unmapped to the deployed side when + # assesing what to review on the deployed side + devel_resource_path_exclusions = [] + + # Symbols which are found in ecosystem specific standard libraries + # which are not so useful in mapping + standard_symbols_to_exclude = [] + + +class DefaultEcosystemConfig(EcosystemConfig): + """Configurations which are common across multiple ecosystems.""" + + ecosystem_option = "Default" + purldb_package_extensions = [".zip", ".tar.gz", ".tar.xz"] + devel_resource_path_exclusions = ["*/tests/*"] + doc_extensions = [ + ".pdf", + ".doc", + ".docx", + ".ppt", + ".pptx", + ".tex", + ".odt", + ".odp", + ] + + +class JavaEcosystemConfig(EcosystemConfig): + ecosystem_option = "Java" + purldb_package_extensions = [".jar", ".war"] + purldb_resource_extensions = [".class"] + + +class JavaScriptEcosystemConfig(EcosystemConfig): + ecosystem_option = "JavaScript" + purldb_resource_extensions = [ + ".map", + ".js", + ".mjs", + ".ts", + ".d.ts", + ".jsx", + ".tsx", + ".css", + ".scss", + ".less", + ".sass", + ".soy", + ] + + +class GoEcosystemConfig(EcosystemConfig): + ecosystem_option = "Go" + purldb_resource_extensions = [".go"] + + +class RustEcosystemConfig(EcosystemConfig): + ecosystem_option = "Rust" + purldb_resource_extensions = [".rs"] + + +class RubyEcosystemConfig(EcosystemConfig): + ecosystem_option = "Ruby" + purldb_package_extensions = [".gem"] + purldb_resource_extensions = [".rb"] + deployed_resource_path_exclusions = ["*checksums.yaml.gz*", "*metadata.gz*"] diff --git a/scanpipe/pipelines/__init__.py b/scanpipe/pipelines/__init__.py index b2a3f61cc..25aea655f 100644 --- a/scanpipe/pipelines/__init__.py +++ b/scanpipe/pipelines/__init__.py @@ -78,7 +78,10 @@ def flag_ignored_resources(self): ignored_patterns = ignored_patterns.splitlines() ignored_patterns.extend(flag.DEFAULT_IGNORED_PATTERNS) - flag.flag_ignored_patterns(self.project, patterns=ignored_patterns) + flag.flag_ignored_patterns( + codebaseresources=self.project.codebaseresources.no_status(), + patterns=ignored_patterns, + ) def extract_archive(self, location, target): """Extract archive at `location` to `target`. Save errors as messages.""" diff --git a/scanpipe/pipelines/deploy_to_develop.py b/scanpipe/pipelines/deploy_to_develop.py index f2a8d3571..530f1d28d 100644 --- a/scanpipe/pipelines/deploy_to_develop.py +++ b/scanpipe/pipelines/deploy_to_develop.py @@ -22,6 +22,7 @@ from aboutcode.pipeline import optional_step from scanpipe import pipes +from scanpipe.config import DefaultEcosystemConfig from scanpipe.pipelines import Pipeline from scanpipe.pipes import d2d from scanpipe.pipes import flag @@ -31,7 +32,7 @@ from scanpipe.pipes import scancode -class DeployToDevelop(Pipeline): +class DeployToDevelop(Pipeline, DefaultEcosystemConfig): """ Establish relationships between two code trees: deployment and development. @@ -64,6 +65,8 @@ def steps(cls): cls.flag_empty_files, cls.flag_whitespace_files, cls.flag_ignored_resources, + cls.load_ecosystem_config, + cls.load_ecosystem_config_ruby, cls.map_about_files, cls.map_checksum, cls.match_archives_to_purldb, @@ -91,33 +94,6 @@ def steps(cls): cls.create_local_files_packages, ) - purldb_package_extensions = [".jar", ".war", ".zip"] - purldb_resource_extensions = [ - ".map", - ".js", - ".mjs", - ".ts", - ".d.ts", - ".jsx", - ".tsx", - ".css", - ".scss", - ".less", - ".sass", - ".soy", - ".class", - ] - doc_extensions = [ - ".pdf", - ".doc", - ".docx", - ".ppt", - ".pptx", - ".tex", - ".odt", - ".odp", - ] - def get_inputs(self): """Locate the ``from`` and ``to`` input files.""" self.from_files, self.to_files = d2d.get_inputs(self.project) @@ -152,6 +128,15 @@ def flag_whitespace_files(self): """Flag whitespace files with size less than or equal to 100 byte as ignored.""" d2d.flag_whitespace_files(project=self.project) + def load_ecosystem_config(self): + """Load ecosystem specific configurations for d2d steps for selected options.""" + d2d.load_ecosystem_config(pipeline=self, options=self.selected_groups) + + @optional_step("Ruby") + def load_ecosystem_config_ruby(self): + """Load Ruby specific configurations for d2d steps.""" + pass + def map_about_files(self): """Map ``from/`` .ABOUT files to their related ``to/`` resources.""" d2d.map_about_files(project=self.project, logger=self.log) @@ -268,6 +253,7 @@ def flag_mapped_resources_archives_and_ignored_directories(self): def perform_house_keeping_tasks(self): """ On deployed side + - Ignore specific files based on ecosystem based configurations. - PurlDB match files with ``no-java-source`` and empty status, if no match is found update status to ``requires-review``. - Update status for uninteresting files. @@ -278,6 +264,11 @@ def perform_house_keeping_tasks(self): """ d2d.match_resources_with_no_java_source(project=self.project, logger=self.log) d2d.handle_dangling_deployed_legal_files(project=self.project, logger=self.log) + d2d.ignore_unmapped_resources_from_config( + project=self.project, + patterns_to_ignore=self.deployed_resource_path_exclusions, + logger=self.log, + ) d2d.match_unmapped_resources( project=self.project, matched_extensions=self.purldb_resource_extensions, diff --git a/scanpipe/pipes/d2d.py b/scanpipe/pipes/d2d.py index fb44d447a..3f585c4f8 100644 --- a/scanpipe/pipes/d2d.py +++ b/scanpipe/pipes/d2d.py @@ -47,6 +47,7 @@ from summarycode.classify import LEGAL_STARTS_ENDS from aboutcode.pipeline import LoopProgress +from scanpipe import config from scanpipe import pipes from scanpipe.models import CodebaseRelation from scanpipe.models import CodebaseResource @@ -66,6 +67,16 @@ TO = "to/" +ECOSYSTEM_CONFIGS = [ + config.DefaultEcosystemConfig, + config.JavaEcosystemConfig, + config.JavaScriptEcosystemConfig, + config.RubyEcosystemConfig, + config.RustEcosystemConfig, + config.GoEcosystemConfig, +] + + def get_inputs(project): """ Locate the ``from`` and ``to`` input files in project inputs/ directory. @@ -114,6 +125,55 @@ def get_best_path_matches(to_resource, matches): return matches +def load_ecosystem_config(pipeline, options): + """ + Add ecosystem specific configurations for each ecosystem selected + as `options` to the `pipeline`. + """ + configs_by_ecosystem = { + ecosystem.ecosystem_option: ecosystem for ecosystem in ECOSYSTEM_CONFIGS + } + + # Add default configurations which are common accross ecosystems + add_ecosystem_config( + pipeline=pipeline, + configs_by_ecosystem=configs_by_ecosystem, + selected_option="Default", + ) + + # Add configurations for each selected ecosystem + for selected_option in options: + if selected_option not in configs_by_ecosystem: + continue + + add_ecosystem_config( + pipeline=pipeline, + configs_by_ecosystem=configs_by_ecosystem, + selected_option=selected_option, + ) + + +def add_ecosystem_config(pipeline, configs_by_ecosystem, selected_option): + d2d_pipeline_configs = [ + "purldb_package_extensions", + "purldb_resource_extensions", + "deployed_resource_path_exclusions", + ] + + ecosystem_config = configs_by_ecosystem.get(selected_option) + + for pipeline_config in d2d_pipeline_configs: + config_value = getattr(ecosystem_config, pipeline_config) + pipeline_config_value = getattr(pipeline, pipeline_config) + if config_value: + if not pipeline_config_value: + new_config_value = config_value + else: + new_config_value = pipeline_config_value.extend(config_value) + + setattr(pipeline, pipeline_config, new_config_value) + + def get_from_files_for_scanning(resources): """ Return resources in the "from/" side which has been mapped to the "to/" @@ -1453,6 +1513,20 @@ def match_resources_with_no_java_source(project, logger=None): ) +def ignore_unmapped_resources_from_config(project, patterns_to_ignore, logger=None): + """Ignore unmapped resources for a project using `patterns_to_ignore`.""" + ignored_resources_count = flag.flag_ignored_patterns( + codebaseresources=project.codebaseresources.to_codebase().no_status(), + patterns=patterns_to_ignore, + status=flag.IGNORED_FROM_CONFIG, + ) + if logger: + logger( + f"Ignoring {ignored_resources_count:,d} to/ resources with " + "from ecosystem specific configurations." + ) + + def match_unmapped_resources(project, matched_extensions=None, logger=None): """ Match resources with empty status to PurlDB, if unmatched diff --git a/scanpipe/pipes/flag.py b/scanpipe/pipes/flag.py index 89a99ef85..bbc83fe39 100644 --- a/scanpipe/pipes/flag.py +++ b/scanpipe/pipes/flag.py @@ -43,6 +43,7 @@ IGNORED_DEFAULT_IGNORES = "ignored-default-ignores" IGNORED_DATA_FILE_NO_CLUES = "ignored-data-file-no-clues" IGNORED_DOC_FILE = "ignored-doc-file" +IGNORED_FROM_CONFIG = "ignored-from-config" COMPLIANCE_LICENSES = "compliance-licenses" COMPLIANCE_SOURCEMIRROR = "compliance-sourcemirror" @@ -89,15 +90,15 @@ def flag_ignored_directories(project): return qs.update(status=IGNORED_DIRECTORY) -def flag_ignored_patterns(project, patterns): +def flag_ignored_patterns(codebaseresources, patterns, status=IGNORED_PATTERN): """Flag codebase resource as ``ignored`` status from list of ``patterns``.""" if isinstance(patterns, str): patterns = patterns.splitlines() update_count = 0 for pattern in patterns: - qs = project.codebaseresources.no_status().path_pattern(pattern) - update_count += qs.update(status=IGNORED_PATTERN) + qs = codebaseresources.path_pattern(pattern) + update_count += qs.update(status=status) return update_count diff --git a/scanpipe/tests/pipes/test_flag.py b/scanpipe/tests/pipes/test_flag.py index 8bc318c8e..cfdbb4bd8 100644 --- a/scanpipe/tests/pipes/test_flag.py +++ b/scanpipe/tests/pipes/test_flag.py @@ -70,7 +70,9 @@ def test_scanpipe_pipes_flag_flag_ignored_directories(self): def test_scanpipe_pipes_flag_flag_ignored_patterns(self): patterns = ["*.ext", "dir/*"] - updated = flag.flag_ignored_patterns(self.project1, patterns) + updated = flag.flag_ignored_patterns( + self.project1.codebaseresources.no_status(), patterns + ) self.assertEqual(3, updated) self.resource1.refresh_from_db() @@ -85,7 +87,8 @@ def test_scanpipe_pipes_flag_flag_ignored_patterns(self): make_resource_file(self.project1, "path/deeper/policies.yml") make_resource_file(self.project1, "path/other-policies.yml") updated = flag.flag_ignored_patterns( - self.project1, flag.DEFAULT_IGNORED_PATTERNS + self.project1.codebaseresources.no_status(), + flag.DEFAULT_IGNORED_PATTERNS, ) self.assertEqual(3, updated) diff --git a/scanpipe/tests/test_pipelines.py b/scanpipe/tests/test_pipelines.py index bca1ec48f..dd5b62546 100644 --- a/scanpipe/tests/test_pipelines.py +++ b/scanpipe/tests/test_pipelines.py @@ -423,7 +423,10 @@ def test_scanpipe_pipeline_class_flag_ignored_resources(self): mock_flag.return_value = None pipeline.flag_ignored_resources() patterns_args = ["*.ext", *flag.DEFAULT_IGNORED_PATTERNS] - mock_flag.assert_called_with(project1, patterns=patterns_args) + mock_flag.assert_called_with( + codebaseresources=project1.codebaseresources.no_status(), + patterns=patterns_args, + ) def test_scanpipe_pipeline_class_extract_archive(self): project1 = Project.objects.create(name="Analysis")