diff --git a/terrarium/bi_ci/bi_ci/execute_mypy_multi.py b/terrarium/bi_ci/bi_ci/execute_mypy_multi.py index 53db415c8..1284fd710 100644 --- a/terrarium/bi_ci/bi_ci/execute_mypy_multi.py +++ b/terrarium/bi_ci/bi_ci/execute_mypy_multi.py @@ -3,9 +3,7 @@ from pathlib import Path import subprocess import sys -from typing import ( - Iterable, -) +from typing import Iterable import clize import tomlkit diff --git a/terrarium/dl_gitmanager/README.md b/terrarium/dl_gitmanager/README.md index 30e0a3d59..1249c964a 100644 --- a/terrarium/dl_gitmanager/README.md +++ b/terrarium/dl_gitmanager/README.md @@ -62,14 +62,3 @@ By default they are printed relative to the repository root. The `--only-added-commits` option makes the tool inspect only commits that have been added in the head version. - -### list-diff-paths - -List files that have changed in commits passed on as input - -``` -echo | dl-git list-diff-paths -echo | dl-git list-diff-paths --absolute -``` - -Option `--absolute` has the same meaning as in `range-diff-paths`. diff --git a/terrarium/dl_gitmanager/dl_gitmanager/git_manager.py b/terrarium/dl_gitmanager/dl_gitmanager/git_manager.py index cd04d6f1b..e6f44c055 100644 --- a/terrarium/dl_gitmanager/dl_gitmanager/git_manager.py +++ b/terrarium/dl_gitmanager/dl_gitmanager/git_manager.py @@ -2,13 +2,15 @@ from pathlib import Path from typing import ( - Collection, Generator, Iterable, + Optional, ) import attr from git.diff import Diff +from git.objects.commit import Commit +from git.objects.submodule.base import Submodule from git.repo.base import Repo as GitRepo @@ -18,18 +20,27 @@ @attr.s class GitManager: git_repo: GitRepo = attr.ib(kw_only=True) + path_prefix: Path = attr.ib(kw_only=True, default=Path(".")) def get_root_path(self) -> Path: return Path(self.git_repo.working_tree_dir) - def get_head_commit(self) -> str: - return self.git_repo.head.commit.hexsha + def _get_commit_obj(self, commit_specifier: str) -> Commit: + return self.git_repo.commit(commit_specifier) - def get_commit(self, commit_specifier: str) -> str: - return self.git_repo.commit(commit_specifier).hexsha + def _iter_commits(self, base: str, head: str, only_missing_commits: bool) -> Iterable[Commit]: + if only_missing_commits: + return self.git_repo.iter_commits(f"{base}..{head}") + else: + return self.git_repo.iter_commits(f"{base}...{head}") + + def _iter_diffs_from_commit(self, commit_obj: Commit) -> Iterable[Diff]: + for parent in commit_obj.parents: + yield from commit_obj.diff(parent) - def _get_sm_commit(self, submodule_name: str, commit: str) -> str: - return self.git_repo.commit(commit).tree[submodule_name].hexsha + def _get_submodule_commit(self, submodule: Submodule, commit_obj: Commit) -> str: + tree_item = commit_obj.tree[submodule.name] + return tree_item.hexsha def _iter_range_diffs( self, @@ -37,85 +48,36 @@ def _iter_range_diffs( head: str, absolute: bool = False, submodules: bool = True, + only_missing_commits: bool = False, ) -> Generator[tuple[Path, Diff], None, None]: - base_commit = self.git_repo.commit(base) - head_commit = self.git_repo.commit(head) - - base_path: Path - if absolute: - base_path = self.get_root_path() - else: - base_path = Path(".") - - # Iter own diffs - diff_index = head_commit.diff(base_commit) - for diff_item in diff_index: - yield base_path, diff_item - - # Iter submodules and get their internal diffs - for submodule in self.git_repo.submodules: - submodule_name = submodule.name - base_tree_item = base_commit.tree[submodule_name] - head_tree_item = head_commit.tree[submodule_name] - submodule_base = base_tree_item.hexsha - submodule_head = head_tree_item.hexsha - submodule_manager = self.get_submodule_manager(submodule_name=submodule_name) - - submodule_base_path: Path - if absolute: - submodule_base_path = submodule_manager.get_root_path() - else: - submodule_base_path = Path(submodule.path) - - # Iterate. Override the repo paths here with the submodule path - for _, diff_item in submodule_manager._iter_range_diffs(base=submodule_base, head=submodule_head): - yield submodule_base_path, diff_item - - def _iter_list_diffs( - self, commits: Collection[str], absolute: bool = False - ) -> Generator[tuple[Path, Diff], None, None]: - if not commits: - return - - base_path: Path - if absolute: - base_path = self.get_root_path() - else: - base_path = Path(".") - - # Iter own diffs - for commit_str in commits: - commit_obj = self.git_repo.commit(commit_str) - for parent in commit_obj.parents: - for diff_item in commit_obj.diff(parent): - yield base_path, diff_item - - # Iter submodules and get their internal diffs - for submodule in self.git_repo.submodules: - submodule_name = submodule.name - submodule_manager = self.get_submodule_manager(submodule_name=submodule_name) - sm_commits_for_all_commits: set[str] = set() - for commit_str in commits: - commit_obj = self.git_repo.commit(commit_str) - submodule_commit = self._get_sm_commit(submodule_name=submodule_name, commit=commit_str) - sm_ancestors = submodule_manager.get_all_ancestor_commits(submodule_commit) - parent_commit_objs = commit_obj.parents - for parent_commit_obj in parent_commit_objs: - sm_commit_of_parent = self._get_sm_commit( - submodule_name=submodule_name, commit=parent_commit_obj.hexsha - ) - sm_ancestors -= submodule_manager.get_all_ancestor_commits(sm_commit_of_parent) - - sm_commits_for_all_commits |= sm_ancestors - - submodule_base_path: Path - if absolute: - submodule_base_path = submodule_manager.get_root_path() - else: - submodule_base_path = Path(submodule.path) - - for _, diff_item in submodule_manager._iter_list_diffs(commits=sm_commits_for_all_commits): - yield submodule_base_path, diff_item + # Get commit objects + base_commit = self._get_commit_obj(base) + head_commit = self._get_commit_obj(head) + + base_path = self.get_root_path() if absolute else self.path_prefix + + # Iter commits: + for commit_obj in self._iter_commits(base=base, head=head, only_missing_commits=only_missing_commits): + for diff_item in self._iter_diffs_from_commit(commit_obj): + yield base_path, diff_item + + # Go to submodules if needed + if submodules: + # Iter submodules and get their internal diffs + for submodule in self.git_repo.submodules: + submodule_base = self._get_submodule_commit(submodule=submodule, commit_obj=base_commit) + submodule_head = self._get_submodule_commit(submodule=submodule, commit_obj=head_commit) + submodule_manager = self.get_submodule_manager( + submodule=submodule, + path_prefix=Path(submodule.path), + ) + yield from submodule_manager._iter_range_diffs( + base=submodule_base, + head=submodule_head, + absolute=absolute, + only_missing_commits=only_missing_commits, + submodules=submodules, + ) def _collect_paths_from_diffs(self, diff_iterable: Iterable[tuple[Path, Diff]]) -> list[str]: result: set[str] = set() @@ -127,23 +89,21 @@ def _collect_paths_from_diffs(self, diff_iterable: Iterable[tuple[Path, Diff]]) return sorted(result) - def get_range_diff_paths(self, base: str, head: str, absolute: bool = False) -> list[str]: + def get_range_diff_paths( + self, + base: str, + head: str, + absolute: bool = False, + only_missing_commits: bool = False, + ) -> list[str]: return self._collect_paths_from_diffs( - diff_iterable=self._iter_range_diffs(base=base, head=head, absolute=absolute) + diff_iterable=self._iter_range_diffs( + base=base, + head=head, + absolute=absolute, + only_missing_commits=only_missing_commits, + ) ) - def get_list_diff_paths(self, commits: Collection[str], absolute: bool = False) -> list[str]: - return self._collect_paths_from_diffs(diff_iterable=self._iter_list_diffs(commits=commits, absolute=absolute)) - - def get_all_ancestor_commits(self, commit: str) -> set[str]: - commits = {commit.hexsha for commit in self.git_repo.iter_commits(commit, max_count=MAX_HISTORY_DEPTH)} - return commits - - def get_missing_commits(self, base: str, head: str) -> set[str]: - commits = {commit.hexsha for commit in self.git_repo.iter_commits(f"{base}..{head}")} - return commits - - def get_submodule_manager(self, submodule_name: str) -> GitManager: - submodule_dict = {sm.name: sm for sm in self.git_repo.submodules} - submodule = submodule_dict[submodule_name] - return type(self)(git_repo=submodule.module()) + def get_submodule_manager(self, submodule: Submodule, path_prefix: Optional[Path] = None) -> GitManager: + return type(self)(git_repo=submodule.module(), path_prefix=path_prefix or Path(".")) diff --git a/terrarium/dl_gitmanager/dl_gitmanager/scripts/gitmanager_cli.py b/terrarium/dl_gitmanager/dl_gitmanager/scripts/gitmanager_cli.py index f9e635b7e..20c48fcc0 100644 --- a/terrarium/dl_gitmanager/dl_gitmanager/scripts/gitmanager_cli.py +++ b/terrarium/dl_gitmanager/dl_gitmanager/scripts/gitmanager_cli.py @@ -50,26 +50,15 @@ def get_parser(cls) -> argparse.ArgumentParser: "--only-added-commits", action="store_true", help="Inspect only commits that are added in head" ) - subparsers.add_parser( - "list-diff-paths", - parents=[base_head_parser, absolute_parser], - help="List file paths with changes given as commit list", - ) - return parser def range_diff_paths(self, base: str, head: Optional[str], absolute: bool, only_added_commits: bool) -> None: - diff_name_list: list[str] - if only_added_commits: - commits = self.git_manager.get_missing_commits(base=base, head=head) - diff_name_list = self.git_manager.get_list_diff_paths(commits=commits, absolute=absolute) - else: - diff_name_list = self.git_manager.get_range_diff_paths(base=base, head=head, absolute=absolute) - print("\n".join(diff_name_list)) - - def list_diff_paths(self, absolute: bool) -> None: - commits = [line.strip() for line in self.input_text_io if line.strip()] - diff_name_list = self.git_manager.get_list_diff_paths(commits=commits, absolute=absolute) + diff_name_list = self.git_manager.get_range_diff_paths( + base=base, + head=head, + absolute=absolute, + only_missing_commits=only_added_commits, + ) print("\n".join(diff_name_list)) @classmethod @@ -86,10 +75,11 @@ def run_parsed_args(cls, args: argparse.Namespace) -> None: match args.command: case "range-diff-paths": tool.range_diff_paths( - base=args.base, head=args.head, absolute=args.absolute, only_added_commits=args.only_added_commits + base=args.base, + head=args.head, + absolute=args.absolute, + only_added_commits=args.only_added_commits, ) - case "list-diff-paths": - tool.list_diff_paths(absolute=args.absolute) case _: raise RuntimeError(f"Got unknown command: {args.command}") diff --git a/terrarium/dl_gitmanager/dl_gitmanager_tests/unit/conftest.py b/terrarium/dl_gitmanager/dl_gitmanager_tests/unit/conftest.py index e69de29bb..4fb915089 100644 --- a/terrarium/dl_gitmanager/dl_gitmanager_tests/unit/conftest.py +++ b/terrarium/dl_gitmanager/dl_gitmanager_tests/unit/conftest.py @@ -0,0 +1,33 @@ +from pathlib import Path +import shutil +import tempfile +from typing import Generator + +from git.repo.base import Repo as GitRepo +import pytest + +from dl_gitmanager.git_manager import GitManager +from dl_gitmanager_tests.unit.git_tools import GitActionProcessor + + +@pytest.fixture(scope="function") +def base_repo_dir() -> Generator[Path, None, None]: + dir_path = Path(tempfile.mkdtemp()) + try: + yield dir_path + finally: + shutil.rmtree(dir_path) + + +@pytest.fixture(scope="function") +def git_action_proc(base_repo_dir: Path) -> GitActionProcessor: + git_action_proc = GitActionProcessor.initialize_repo(base_repo_dir) + git_action_proc.add_commit(message="Initial commit") + git_action_proc.checkout_new_branch("main") + return git_action_proc + + +@pytest.fixture(scope="function") +def git_manager(base_repo_dir: Path) -> GitManager: + git_manager = GitManager(git_repo=GitRepo(path=base_repo_dir)) + return git_manager diff --git a/terrarium/dl_gitmanager/dl_gitmanager_tests/unit/git_tools.py b/terrarium/dl_gitmanager/dl_gitmanager_tests/unit/git_tools.py new file mode 100644 index 000000000..47638b3d6 --- /dev/null +++ b/terrarium/dl_gitmanager/dl_gitmanager_tests/unit/git_tools.py @@ -0,0 +1,166 @@ +from __future__ import annotations + +from functools import singledispatchmethod +import os +from pathlib import Path +import uuid + +import attr +from git.repo.base import Repo as GitRepo + + +@attr.s(frozen=True) +class GitAction: + pass + + def get_paths(self) -> frozenset[Path]: + return frozenset() + + +@attr.s(frozen=True) +class MultiGitAction(GitAction): + actions: tuple[GitAction, ...] = attr.ib(kw_only=True, default=()) + + def get_paths(self) -> frozenset[Path]: + return frozenset(path for sub_action in self.actions for path in sub_action.get_paths()) + + +@attr.s(frozen=True) +class GitFileAction(GitAction): + path: Path = attr.ib(kw_only=True) + + def get_paths(self) -> frozenset[Path]: + return frozenset((self.path,)) + + +@attr.s(frozen=True) +class AddGitFileAction(GitFileAction): + pass + + +@attr.s(frozen=True) +class RemoveGitFileAction(GitFileAction): + pass + + +@attr.s(frozen=True) +class UpdateGitFileAction(GitFileAction): + pass + + +@attr.s(frozen=True) +class SubmoduleSpec: + name: str = attr.ib(kw_only=True) + path: Path = attr.ib(kw_only=True) + url: str = attr.ib(kw_only=True) + + +@attr.s(frozen=True) +class GitSubmoduleAction(GitAction): + submodule: SubmoduleSpec = attr.ib(kw_only=True) + + def get_paths(self) -> frozenset[Path]: + return frozenset((self.submodule.path,)) + + +@attr.s(frozen=True) +class AddGitSubmoduleAction(GitSubmoduleAction): + pass + + +@attr.s(frozen=True) +class UpdateGitSubmoduleAction(GitSubmoduleAction): + new_commit: str = attr.ib(kw_only=True) + + +@attr.s +class GitActionProcessor: + _repo_path: Path = attr.ib(kw_only=True) + _git_repo: GitRepo = attr.ib(init=False) + + @_git_repo.default + def _make_git_repo(self) -> GitRepo: + return GitRepo(path=self._repo_path) + + @property + def git_repo(self) -> GitRepo: + return self._git_repo + + def generate_path(self) -> Path: + return self._repo_path / uuid.uuid4().hex + + @singledispatchmethod + def _process_action(self, action: GitAction) -> None: + raise TypeError(f"Unsupported action type: {type(action)}") + + @_process_action.register + def _process_multi_action(self, action: MultiGitAction) -> None: + for action in action.actions: + self._process_action(action) + + @_process_action.register + def _process_add_file_action(self, action: AddGitFileAction) -> None: + assert not action.path.exists() + with open(action.path, mode="w") as f: + f.write(str(os.urandom(1024))) + self._git_repo.index.add(str(action.path)) + + @_process_action.register + def _process_remove_file_action(self, action: RemoveGitFileAction) -> None: + assert action.path.exists() + self._git_repo.index.remove(str(action.path)) + os.remove(action.path) + + @_process_action.register + def _process_update_file_action(self, action: UpdateGitFileAction) -> None: + assert action.path.exists() + with open(action.path, mode="r+") as f: + f.write(str(os.urandom(1024))) + self._git_repo.index.add(str(action.path)) + + @_process_action.register + def _process_add_submodule_action(self, action: GitSubmoduleAction) -> None: + sm_path = action.submodule.path + assert not sm_path.exists() + sm_obj = self._git_repo.create_submodule(name=action.submodule.name, url=action.submodule.url, path=sm_path) + sm_obj.update(init=True) + self._git_repo.index.add(str(sm_path)) + + @_process_action.register + def _process_update_submodule_action(self, action: UpdateGitSubmoduleAction) -> None: + sm_path = action.submodule.path + assert sm_path.exists() + sm_obj = self._git_repo.submodule(action.submodule.name) + sm_obj.module().git.checkout(action.new_commit) + self._git_repo.index.add(str(sm_path)) + + def checkout_new_branch(self, branch_name: str) -> None: + new_branch = self._git_repo.create_head(branch_name) + new_branch.checkout() + + def checkout_existing_branch(self, branch_name: str) -> None: + branch = getattr(self._git_repo.heads, branch_name) + branch.checkout() + + def add_commit(self, message: str, action: GitAction = MultiGitAction()) -> str: + self._process_action(action) + self._git_repo.index.commit(message) + return self._git_repo.commit().hexsha + + @classmethod + def initialize_repo(cls, repo_path: Path) -> GitActionProcessor: + GitRepo.init(repo_path) + return cls(repo_path=repo_path) + + def get_submodule_proc(self, submodule: SubmoduleSpec) -> GitActionProcessor: + return GitActionProcessor(repo_path=submodule.path) + + def revert_submodules(self) -> None: + for sm_obj in self._git_repo.submodules: + sm_obj.update(init=True) + + def checkout_commit(self, commit: str) -> None: + self._git_repo.git.checkout(commit) + + def get_current_commit(self) -> str: + return self._git_repo.commit().hexsha diff --git a/terrarium/dl_gitmanager/dl_gitmanager_tests/unit/test_gitmanager.py b/terrarium/dl_gitmanager/dl_gitmanager_tests/unit/test_gitmanager.py new file mode 100644 index 000000000..a836418a7 --- /dev/null +++ b/terrarium/dl_gitmanager/dl_gitmanager_tests/unit/test_gitmanager.py @@ -0,0 +1,149 @@ +import pytest + +from dl_gitmanager.git_manager import GitManager +from dl_gitmanager_tests.unit.git_tools import ( + AddGitFileAction, + GitActionProcessor, + GitSubmoduleAction, + MultiGitAction, + SubmoduleSpec, + UpdateGitSubmoduleAction, +) + + +def _norm_paths(action: MultiGitAction) -> frozenset[str]: + return frozenset(str(path) for path in action.get_paths()) + + +def test_check_linear_range_diffs(git_action_proc: GitActionProcessor, git_manager: GitManager) -> None: + action = MultiGitAction( + actions=( + AddGitFileAction(path=git_action_proc.generate_path()), + AddGitFileAction(path=git_action_proc.generate_path()), + ) + ) + git_action_proc.add_commit(action=action, message="My new commit") + + expected_diff_paths = _norm_paths(action) + actual_diff_paths = frozenset(git_manager.get_range_diff_paths(base="HEAD~1", head="HEAD", absolute=True)) + assert actual_diff_paths == expected_diff_paths + + +def test_check_range_diffs_with_branches(git_action_proc: GitActionProcessor, git_manager: GitManager) -> None: + git_action_proc.checkout_new_branch("my_branch") + action_my_branch_1 = MultiGitAction( + actions=( + AddGitFileAction(path=git_action_proc.generate_path()), + AddGitFileAction(path=git_action_proc.generate_path()), + ) + ) + git_action_proc.add_commit(action=action_my_branch_1, message="My first commit") + action_my_branch_2 = MultiGitAction(actions=(AddGitFileAction(path=git_action_proc.generate_path()),)) + git_action_proc.add_commit(action=action_my_branch_2, message="My second commit") + + git_action_proc.checkout_existing_branch("main") + action_main_1 = MultiGitAction( + actions=( + AddGitFileAction(path=git_action_proc.generate_path()), + AddGitFileAction(path=git_action_proc.generate_path()), + ) + ) + git_action_proc.add_commit(action=action_main_1, message="My main commit") + + # With only_missing_commits=True + expected_diff_paths = _norm_paths(action_my_branch_1) | _norm_paths(action_my_branch_2) + actual_diff_paths = frozenset( + git_manager.get_range_diff_paths( + base="main", + head="my_branch", + absolute=True, + only_missing_commits=True, + ) + ) + assert actual_diff_paths == expected_diff_paths + + # With only_missing_commits=False + expected_diff_paths = _norm_paths(action_my_branch_1) | _norm_paths(action_my_branch_2) | _norm_paths(action_main_1) + actual_diff_paths = frozenset( + git_manager.get_range_diff_paths( + base="main", + head="my_branch", + absolute=True, + only_missing_commits=False, + ) + ) + assert actual_diff_paths == expected_diff_paths + + +@pytest.mark.skip("Some problems with managing submodule") +def test_check_range_diffs_with_branches_and_submodules( + git_action_proc: GitActionProcessor, + git_manager: GitManager, +) -> None: + submodule = SubmoduleSpec( + name="my_sub", path=git_action_proc.generate_path(), url="https://github.com/aio-libs/yarl.git" + ) + git_action_proc.add_commit(message="Add submodule", action=GitSubmoduleAction(submodule=submodule)) + sm_action_proc = git_action_proc.get_submodule_proc(submodule=submodule) + sm_action_proc.checkout_new_branch("main") + + sm_action_proc.checkout_new_branch("my_sm_branch") + action_sm_my_branch_1 = MultiGitAction(actions=(AddGitFileAction(path=sm_action_proc.generate_path()),)) + sm_commit_1 = sm_action_proc.add_commit(action=action_sm_my_branch_1, message="My first SM commit") + + git_action_proc.checkout_new_branch("my_branch") + action_my_branch_1 = MultiGitAction( + actions=( + AddGitFileAction(path=git_action_proc.generate_path()), + AddGitFileAction(path=git_action_proc.generate_path()), + UpdateGitSubmoduleAction(submodule=submodule, new_commit=sm_commit_1), + ) + ) + git_action_proc.add_commit(action=action_my_branch_1, message="My first commit") + + sm_action_proc.checkout_existing_branch("main") + action_sm_main_1 = MultiGitAction(actions=(AddGitFileAction(path=sm_action_proc.generate_path()),)) + sm_commit_2 = sm_action_proc.add_commit(action=action_sm_main_1, message="My main SM commit") + + sm_action_proc.checkout_commit(sm_commit_1) # To revert changes in main repo + git_action_proc.checkout_existing_branch("main") + action_main_1 = MultiGitAction( + actions=( + AddGitFileAction(path=git_action_proc.generate_path()), + AddGitFileAction(path=git_action_proc.generate_path()), + UpdateGitSubmoduleAction(submodule=submodule, new_commit=sm_commit_2), + ) + ) + git_action_proc.add_commit(action=action_main_1, message="My main commit") + + # With only_missing_commits=True + # Note that for SM all commits are included - added and removed + expected_diff_paths = ( + _norm_paths(action_my_branch_1) | _norm_paths(action_sm_my_branch_1) | _norm_paths(action_sm_main_1) + ) + actual_diff_paths = frozenset( + git_manager.get_range_diff_paths( + base="main", + head="my_branch", + absolute=True, + only_missing_commits=True, + ) + ) + assert actual_diff_paths == expected_diff_paths + + # With only_missing_commits=False + expected_diff_paths = ( + _norm_paths(action_my_branch_1) + | _norm_paths(action_main_1) + | _norm_paths(action_sm_my_branch_1) + | _norm_paths(action_sm_main_1) + ) + actual_diff_paths = frozenset( + git_manager.get_range_diff_paths( + base="main", + head="my_branch", + absolute=True, + only_missing_commits=False, + ) + ) + assert actual_diff_paths == expected_diff_paths diff --git a/terrarium/dl_gitmanager/pyproject.toml b/terrarium/dl_gitmanager/pyproject.toml index 8ed4c0e8c..bd85d2f91 100644 --- a/terrarium/dl_gitmanager/pyproject.toml +++ b/terrarium/dl_gitmanager/pyproject.toml @@ -31,9 +31,6 @@ minversion = "6.0" addopts = "-ra" testpaths = [] -[datalens_ci] -skip_test = true - [tool.mypy] warn_unused_configs = true disallow_untyped_defs = true