From f1323ba1c2e11d7fcea65bee1cc4a50e28b38c72 Mon Sep 17 00:00:00 2001 From: Artyom Semidolin <43622365+Artanias@users.noreply.github.com> Date: Fri, 3 Jan 2025 19:19:19 +0300 Subject: [PATCH] feat: exit with positive exit code in the end when found similarity. (#210) Refs: #210, #153. --- Makefile | 2 +- locales/codeplag.pot | 12 ++++- .../translations/en/LC_MESSAGES/codeplag.po | 10 +++- .../translations/ru/LC_MESSAGES/codeplag.po | 10 +++- src/codeplag/__init__.py | 8 +-- src/codeplag/codeplagcli.py | 7 +++ src/codeplag/handlers/check.py | 54 ++++++++++++------- src/codeplag/handlers/report.py | 19 +++---- src/codeplag/types.py | 11 +++- src/codeplag/utils.py | 10 ++-- test/auto/conftest.py | 11 ++++ test/auto/functional/test_check.py | 36 +++++++++---- test/auto/functional/test_report.py | 20 ++++--- test/auto/utils.py | 4 ++ 14 files changed, 155 insertions(+), 59 deletions(-) diff --git a/Makefile b/Makefile index 1a38291..110e0d4 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -UTIL_VERSION := 0.5.11 +UTIL_VERSION := 0.5.12 UTIL_NAME := codeplag PWD := $(shell pwd) diff --git a/locales/codeplag.pot b/locales/codeplag.pot index a3eeaec..9d787ee 100644 --- a/locales/codeplag.pot +++ b/locales/codeplag.pot @@ -5,8 +5,8 @@ #, fuzzy msgid "" msgstr "" -"Project-Id-Version: codeplag 0.5.11\n" -"POT-Creation-Date: 2025-01-02 18:42+0300\n" +"Project-Id-Version: codeplag 0.5.12\n" +"POT-Creation-Date: 2025-01-03 14:06+0300\n" "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" "Last-Translator: Artyom Semidolin\n" "Language-Team: LANGUAGE \n" @@ -216,6 +216,14 @@ msgid "" "user', or 'github-project-folder' options." msgstr "" +#: src/codeplag/codeplagcli.py:421 src/codeplag/handlers/report.py:95 +msgid "All paths must be provided." +msgstr "" + +#: src/codeplag/handlers/report.py:92 +msgid "Invalid report type." +msgstr "" + #: src/templates/general.templ:5 src/templates/sources.templ:5 msgid "Comparative report" msgstr "" diff --git a/locales/translations/en/LC_MESSAGES/codeplag.po b/locales/translations/en/LC_MESSAGES/codeplag.po index ce96c6b..a244c48 100644 --- a/locales/translations/en/LC_MESSAGES/codeplag.po +++ b/locales/translations/en/LC_MESSAGES/codeplag.po @@ -4,7 +4,7 @@ # msgid "" msgstr "" -"Project-Id-Version: codeplag 0.5.11\n" +"Project-Id-Version: codeplag 0.5.12\n" "POT-Creation-Date: 2024-05-21 09:28+0300\n" "PO-Revision-Date: 2024-05-16 19:15+0300\n" "Last-Translator: Artyom Semidolin\n" @@ -242,6 +242,14 @@ msgstr "" "The'path-regexp' option requires the provided 'directories', 'github-" "user', or 'github-project-folder' options." +#: src/codeplag/codeplagcli.py:421 src/codeplag/handlers/report.py:95 +msgid "All paths must be provided." +msgstr "All or none of the root paths must be specified." + +#: src/codeplag/handlers/report.py:92 +msgid "Invalid report type." +msgstr "Invalid report type." + #: src/templates/general.templ:5 src/templates/sources.templ:5 msgid "Comparative report" msgstr "Comparative report" diff --git a/locales/translations/ru/LC_MESSAGES/codeplag.po b/locales/translations/ru/LC_MESSAGES/codeplag.po index ec19e7f..02b36dd 100644 --- a/locales/translations/ru/LC_MESSAGES/codeplag.po +++ b/locales/translations/ru/LC_MESSAGES/codeplag.po @@ -4,7 +4,7 @@ # msgid "" msgstr "" -"Project-Id-Version: codeplag 0.5.11\n" +"Project-Id-Version: codeplag 0.5.12\n" "POT-Creation-Date: 2024-05-21 09:28+0300\n" "PO-Revision-Date: 2024-05-11 12:05+0300\n" "Last-Translator: Artyom Semidolin\n" @@ -256,6 +256,14 @@ msgstr "" "Аргумент 'path-regexp' требует заданного параметра 'directories', " "'github-user' или 'github-project-folder'." +#: src/codeplag/codeplagcli.py:421 src/codeplag/handlers/report.py:95 +msgid "All paths must be provided." +msgstr "Необходимо указать все корневые пути или не указывать ни одного." + +#: src/codeplag/handlers/report.py:92 +msgid "Invalid report type." +msgstr "Некорректный тип отчёта." + #: src/templates/general.templ:5 src/templates/sources.templ:5 msgid "Comparative report" msgstr "Сравнительный отчёт" diff --git a/src/codeplag/__init__.py b/src/codeplag/__init__.py index a6a22e8..04ac656 100644 --- a/src/codeplag/__init__.py +++ b/src/codeplag/__init__.py @@ -1,7 +1,7 @@ -from typing import Literal +from codeplag.types import ExitCode -def main() -> Literal[0, 1, 2]: +def main() -> ExitCode: import argcomplete import pandas as pd @@ -29,7 +29,7 @@ def main() -> Literal[0, 1, 2]: code = codeplag_util.run() except KeyboardInterrupt: logger.warning("The util stopped by keyboard interrupt.") - return 1 + return ExitCode.EXIT_KEYBOARD except Exception: logger.error( "An unexpected error occurred while running the utility. " @@ -37,6 +37,6 @@ def main() -> Literal[0, 1, 2]: LOG_PATH, ) logger.debug("Trace:", exc_info=True) - return 2 + return ExitCode.EXIT_UNKNOWN return code diff --git a/src/codeplag/codeplagcli.py b/src/codeplag/codeplagcli.py index 356e120..28763a6 100644 --- a/src/codeplag/codeplagcli.py +++ b/src/codeplag/codeplagcli.py @@ -412,6 +412,13 @@ def validate_args(self: Self, parsed_args: argparse.Namespace) -> None: "'github-user', or 'github-project-folder' options." ) ) + elif ( + root == "report" + and command == "create" + and not all([parsed_args.first_root_path, parsed_args.second_root_path]) + and any([parsed_args.first_root_path, parsed_args.second_root_path]) + ): + self.error(_("All paths must be provided.")) def parse_args(self: Self, args: list[str] | None = None) -> argparse.Namespace: parsed_args = super(CodeplagCLI, self).parse_args(args) diff --git a/src/codeplag/handlers/check.py b/src/codeplag/handlers/check.py index 3e4f9aa..0132d92 100644 --- a/src/codeplag/handlers/check.py +++ b/src/codeplag/handlers/check.py @@ -38,6 +38,7 @@ from codeplag.types import ( ASTFeatures, CompareInfo, + ExitCode, Extension, Flag, MaxDepth, @@ -151,7 +152,7 @@ def check( github_files: list[str] | None = None, github_project_folders: list[str] | None = None, github_user: str = "", - ) -> None: + ) -> ExitCode: if files is None: files = [] if directories is None: @@ -167,8 +168,9 @@ def check( features_from_gh_files = self.features_getter.get_from_github_files(github_files) logger.info("Starting searching for plagiarism ...") + exit_code = ExitCode.EXIT_SUCCESS if self.mode == "many_to_many": - self.__many_to_many_check( + exit_code = self.__many_to_many_check( features_from_files, directories, features_from_gh_files, @@ -176,7 +178,7 @@ def check( github_user, ) elif self.mode == "one_to_one": - self.__one_to_one_check( + exit_code = self.__one_to_one_check( features_from_files, directories, features_from_gh_files, @@ -187,6 +189,7 @@ def check( logger.info("Ending searching for plagiarism ...") if isinstance(self.reporter, CSVReporter): self.reporter._write_df_to_fs() + return exit_code def __many_to_many_check( self: Self, @@ -195,7 +198,7 @@ def __many_to_many_check( features_from_gh_files: list[ASTFeatures], github_project_folders: list[str], github_user: str, - ) -> None: + ) -> ExitCode: works: list[ASTFeatures] = [] works.extend(features_from_files) works.extend(self.features_getter.get_from_dirs(directories)) @@ -212,6 +215,7 @@ def __many_to_many_check( iterations, ) self.progress = Progress(iterations) + exit_code = ExitCode.EXIT_SUCCESS with ProcessPoolExecutor(max_workers=self.workers) as executor: processing: list[ProcessingWorks] = [] futures: set[Future] = set() @@ -219,8 +223,11 @@ def __many_to_many_check( for j, work2 in enumerate(works): if i <= j: continue - self._do_step(executor, processing, futures, work1, work2) - self._handle_completed_futures(processing, futures) + exit_code = ExitCode( + exit_code | self._do_step(executor, processing, futures, work1, work2) + ) + exit_code = ExitCode(exit_code | self._handle_completed_futures(processing, futures)) + return exit_code def __one_to_one_check( self: Self, @@ -229,7 +236,7 @@ def __one_to_one_check( features_from_gh_files: list[ASTFeatures], github_project_folders: list[str], github_user: str, - ) -> None: + ) -> ExitCode: combined_elements = filter( bool, ( @@ -253,6 +260,7 @@ def __one_to_one_check( ) self.progress = ComplexProgress(iterations) cases = combinations(combined_elements, r=2) + exit_code = ExitCode.EXIT_SUCCESS with ProcessPoolExecutor(max_workers=self.workers) as executor: processing: list[ProcessingWorks] = [] futures: set[Future] = set() @@ -269,8 +277,11 @@ def __one_to_one_check( self.progress.add_internal_progress(internal_iterations) for work1 in first_sequence: for work2 in second_sequence: - self._do_step(executor, processing, futures, work1, work2) - self._handle_completed_futures(processing, futures) + exit_code = ExitCode( + exit_code | self._do_step(executor, processing, futures, work1, work2) + ) + exit_code = ExitCode(exit_code | self._handle_completed_futures(processing, futures)) + return exit_code def _do_step( self: Self, @@ -279,10 +290,10 @@ def _do_step( futures: set[Future], work1: ASTFeatures, work2: ASTFeatures, - ) -> None: + ) -> ExitCode: if work1.filepath == work2.filepath: _print_pretty_progress_if_need_and_increase(self.progress, self.workers) - return + return ExitCode.EXIT_SUCCESS work1, work2 = sorted([work1, work2]) metrics = None @@ -293,9 +304,10 @@ def _do_step( future.id = len(processing) # type: ignore futures.add(future) processing.append(ProcessingWorks(work1, work2)) - return + return ExitCode.EXIT_SUCCESS self._handle_compare_result(work1, work2, metrics) _print_pretty_progress_if_need_and_increase(self.progress, self.workers) + return ExitCode.EXIT_FOUND_SIM def _handle_compare_result( self: Self, @@ -303,13 +315,13 @@ def _handle_compare_result( work2: ASTFeatures, metrics: CompareInfo, save: bool = False, - ) -> None: + ) -> ExitCode: if metrics.structure is None: - return + return ExitCode.EXIT_SUCCESS if self.reporter and save: self.reporter.save_result(work1, work2, metrics) if self.short_output: - return + return ExitCode.EXIT_FOUND_SIM if self.threshold and (metrics.structure.similarity * 100) <= self.threshold: print_compare_result(work1, work2, metrics) @@ -324,19 +336,25 @@ def _handle_compare_result( work2.head_nodes, ), ) + return ExitCode.EXIT_FOUND_SIM def _handle_completed_futures( self: Self, processing: list[ProcessingWorks], futures: set[Future], - ) -> None: + ) -> ExitCode: + exit_code = ExitCode.EXIT_SUCCESS for future in as_completed(futures): metrics: CompareInfo = future.result() proc_works_info = processing[future.id] # type: ignore - self._handle_compare_result( - proc_works_info.work1, proc_works_info.work2, metrics, save=True + exit_code = ExitCode( + exit_code + | self._handle_compare_result( + proc_works_info.work1, proc_works_info.work2, metrics, save=True + ) ) _print_pretty_progress_if_need_and_increase(self.progress, self.workers) + return exit_code def _create_future_compare( self: Self, diff --git a/src/codeplag/handlers/report.py b/src/codeplag/handlers/report.py index b5e4097..6810d3d 100644 --- a/src/codeplag/handlers/report.py +++ b/src/codeplag/handlers/report.py @@ -3,7 +3,7 @@ from collections import defaultdict from copy import deepcopy from pathlib import Path -from typing import Generator, Literal, TypedDict +from typing import Generator, TypedDict import jinja2 import numpy as np @@ -25,6 +25,7 @@ from codeplag.translate import get_translations from codeplag.types import ( CompareInfo, + ExitCode, Language, ReportType, SameFuncs, @@ -48,7 +49,7 @@ def html_report_create( report_type: ReportType, first_root_path: Path | str | None = None, second_root_path: Path | str | None = None, -) -> Literal[0, 1]: +) -> ExitCode: """Creates an HTML report based on the configuration settings. Args: @@ -60,7 +61,7 @@ def html_report_create( Returns: ------- - Literal[0, 1]: 0 if the report was successfully created, 1 otherwise. + ExitCode: 0 if the report was successfully created, 1 otherwise. Raises: ------- @@ -76,22 +77,22 @@ def html_report_create( reports_path = settings_config.get("reports") if not reports_path: logger.error("Can't create general report without provided in settings 'report' path.") - return 1 + return ExitCode.EXIT_INVAL if settings_config["reports_extension"] != "csv": logger.error("Can create report only when 'reports_extension' is csv.") - return 1 + return ExitCode.EXIT_INVAL if not (reports_path / CSV_REPORT_FILENAME).exists(): logger.error(f"There is nothing in '{reports_path}' to create a basic html report from.") - return 1 + return ExitCode.EXIT_INVAL if report_type == "general": create_report_function = _create_general_report elif report_type == "sources": create_report_function = _create_sources_report else: - raise ValueError("Invalid report type.") + raise ValueError(_("Invalid report type.")) all_paths_provided = all([first_root_path, second_root_path]) if not all_paths_provided and any([first_root_path, second_root_path]): - raise ValueError("All paths must be provided.") + raise ValueError(_("All paths must be provided.")) df = read_df(reports_path / CSV_REPORT_FILENAME) if all_paths_provided: @@ -110,7 +111,7 @@ def html_report_create( settings_config["language"], paths, # type: ignore ) - return 0 + return ExitCode.EXIT_SUCCESS def calculate_general_total_similarity( diff --git a/src/codeplag/types.py b/src/codeplag/types.py index 0397d79..c036889 100644 --- a/src/codeplag/types.py +++ b/src/codeplag/types.py @@ -2,6 +2,7 @@ from collections import defaultdict from dataclasses import dataclass, field from datetime import datetime +from enum import IntEnum from functools import total_ordering from pathlib import Path from typing import ( @@ -130,7 +131,7 @@ class CompareInfo(NamedTuple): structure: StructuresInfo | None = None -# Exceptions +# Exceptions and errors # ---------------------------------------------------------------------------- @@ -138,6 +139,14 @@ class CLIException(Exception): """A common exception occurred while using CLI.""" +class ExitCode(IntEnum): + EXIT_SUCCESS = 0 + EXIT_KEYBOARD = 1 + EXIT_INVAL = 3 + EXIT_UNKNOWN = 5 + EXIT_FOUND_SIM = 200 + + # Misc # ---------------------------------------------------------------------------- diff --git a/src/codeplag/utils.py b/src/codeplag/utils.py index 064d929..270c433 100644 --- a/src/codeplag/utils.py +++ b/src/codeplag/utils.py @@ -1,5 +1,5 @@ from pathlib import Path -from typing import Any, Literal +from typing import Any from typing_extensions import Self @@ -12,7 +12,7 @@ ) from codeplag.handlers.settings import settings_modify, settings_show from codeplag.logger import codeplag_logger as logger -from codeplag.types import ReportType +from codeplag.types import ExitCode, ReportType class CodeplagEngine: @@ -54,7 +54,7 @@ def __init__(self: Self, parsed_args: dict[str, Any]) -> None: self.files: list[Path] = parsed_args.pop("files", []) self.directories: list[Path] = parsed_args.pop("directories", []) - def run(self: Self) -> Literal[0, 1]: + def run(self: Self) -> ExitCode: logger.debug("Starting codeplag util ...") if self.root == "settings": @@ -68,11 +68,11 @@ def run(self: Self) -> Literal[0, 1]: self.path, self.report_type, self.first_root_path, self.second_root_path ) else: - self.comparator.check( + return self.comparator.check( self.files, self.directories, self.github_files, self.github_project_folders, self.github_user, ) - return 0 + return ExitCode.EXIT_SUCCESS diff --git a/test/auto/conftest.py b/test/auto/conftest.py index 26a5904..98d5f80 100644 --- a/test/auto/conftest.py +++ b/test/auto/conftest.py @@ -18,6 +18,17 @@ def create_reports_folder(): shutil.rmtree(REPORTS_FOLDER) +@pytest.fixture(scope="module") +def create_reports_folder_module(): + with suppress(Exception): + os.mkdir(REPORTS_FOLDER) + assert os.path.exists(REPORTS_FOLDER) + + yield + + shutil.rmtree(REPORTS_FOLDER) + + @pytest.fixture(scope="session", autouse=True) def set_logging_level(): modify_settings(log_level="debug") diff --git a/test/auto/functional/test_check.py b/test/auto/functional/test_check.py index 16b5f14..80e80be 100644 --- a/test/auto/functional/test_check.py +++ b/test/auto/functional/test_check.py @@ -3,6 +3,7 @@ import os import pytest +from const import REPORTS_FOLDER from utils import modify_settings, run_check, run_util from codeplag.consts import CONFIG_PATH, UTIL_NAME, UTIL_VERSION @@ -29,8 +30,8 @@ @pytest.fixture(scope="module", autouse=True) -def setup_module(): - first_cond = not modify_settings(environment=".env").cmd_res.returncode +def setup_module(create_reports_folder_module: None): + first_cond = not modify_settings(environment=".env", reports=REPORTS_FOLDER).cmd_res.returncode second_cond = os.environ.get("ACCESS_TOKEN", "") != "" assert first_cond or second_cond @@ -48,64 +49,79 @@ def test_check_util_version(): @pytest.mark.parametrize( - "cmd, out", + "cmd, out, found_plag", [ - (["--files", *CPP_FILES], b"Getting works features from files"), + (["--files", *CPP_FILES], b"Getting works features from files", False), ( ["--directories", CPP_DIR], f"Getting works features from {CWD}/{CPP_DIR}".encode("utf-8"), + True, ), ( ["--github-files", *CPP_GITHUB_FILES], b"Getting works features from GitHub urls", + True, ), ( ["--github-project-folders", CPP_GITHUB_DIR], f"Getting works features from {CPP_GITHUB_DIR}".encode("utf-8"), + True, ), ( ["--github-user", "OSLL", "--repo-regexp", "code-plag"], f"Getting works features from {REPO_URL}".encode("utf-8"), + True, ), ], ) -def test_compare_cpp_files(cmd: list[str], out: bytes): +def test_compare_cpp_files(cmd: list[str], out: bytes, found_plag: bool): result = run_check(cmd, extension="cpp") - result.assert_success() + if found_plag: + result.assert_found_plagiarism() + else: + result.assert_success() assert out in result.cmd_res.stdout @pytest.mark.parametrize( - "cmd, out", + "cmd, out, found_plag", [ - (["--files", *PY_FILES], b"Getting works features from files"), + (["--files", *PY_FILES], b"Getting works features from files", False), ( ["--directories", *PY_DIRS], f"Getting works features from {CWD}/{PY_DIRS[0]}".encode("utf-8"), + True, ), ( ["--github-files", *PY_GITHUB_FILES], b"Getting works features from GitHub urls", + False, ), ( ["--github-project-folders", PY_GITHUB_DIR], f"Getting works features from {PY_GITHUB_DIR}".encode("utf-8"), + False, ), ( ["--github-user", "OSLL", "--repo-regexp", "code-plag"], f"Getting works features from {REPO_URL}".encode("utf-8"), + True, ), ( ["--directories", *PY_DIRS, "--mode", "one_to_one"], f"Getting works features from {CWD}/{PY_DIRS[0]}".encode("utf-8"), + False, ), ], ) -def test_compare_py_files(cmd: list[str], out: bytes): +def test_compare_py_files(cmd: list[str], out: bytes, found_plag: bool): result = run_check(cmd) - result.assert_success() + if found_plag: + result.assert_found_plagiarism() + else: + result.assert_success() assert out in result.cmd_res.stdout diff --git a/test/auto/functional/test_report.py b/test/auto/functional/test_report.py index 2170fd5..62c200d 100644 --- a/test/auto/functional/test_report.py +++ b/test/auto/functional/test_report.py @@ -9,12 +9,12 @@ from codeplag.types import ReportType -@pytest.fixture(scope="function", autouse=True) -def setup(create_reports_folder: None): +@pytest.fixture(scope="module", autouse=True) +def setup(create_reports_folder_module: None): modify_settings( reports=REPORTS_FOLDER, reports_extension="csv", short_output=1 ).assert_success() - run_check(["--directories", "test/unit/codeplag/cplag", "src/"]).assert_success() + run_check(["--directories", "test/unit/codeplag/cplag", "src/"]).assert_found_plagiarism() yield @@ -78,15 +78,21 @@ def test_default_report_diff_with_provided_paths(self: Self, report_type: Report ["general", "sources"], ) def test_provided_only_first_path(self: Self, report_type: ReportType) -> None: - create_report( + result = create_report( REPORTS_FOLDER / "report.html", report_type, first_root_path="/usr/src" - ).assert_failed() + ) + + result.assert_failed() + assert result.cmd_res.returncode == 2 @pytest.mark.parametrize( "report_type", ["general", "sources"], ) def test_provided_only_second_path(self: Self, report_type: ReportType) -> None: - create_report( + result = create_report( REPORTS_FOLDER / "report.html", report_type, second_root_path="/usr/src" - ).assert_failed() + ) + + result.assert_failed() + assert result.cmd_res.returncode == 2 diff --git a/test/auto/utils.py b/test/auto/utils.py index 2e130ff..eee032c 100644 --- a/test/auto/utils.py +++ b/test/auto/utils.py @@ -6,6 +6,7 @@ from codeplag.consts import UTIL_NAME from codeplag.types import ( + ExitCode, Flag, Language, LogLevel, @@ -27,6 +28,9 @@ def assert_success(self: Self) -> None: def assert_failed(self: Self) -> None: assert self.cmd_res.returncode, str(self.cmd_res) + def assert_found_plagiarism(self: Self) -> None: + assert self.cmd_res.returncode == ExitCode.EXIT_FOUND_SIM, str(self.cmd_res) + def run_cmd(cmd: list[str]) -> CmdResult: return CmdResult(subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE))