From e81f6a87a337501acf57003b4c990e86dc1903eb Mon Sep 17 00:00:00 2001 From: Grigory Statsenko Date: Mon, 16 Oct 2023 12:32:36 +0200 Subject: [PATCH] Added the --only-added-commits option to dl-git --- .../dl_gitmanager/git_manager.py | 86 ++++++++++++++++++- .../dl_gitmanager/scripts/gitmanager_cli.py | 44 ++++++++-- 2 files changed, 118 insertions(+), 12 deletions(-) diff --git a/terrarium/dl_gitmanager/dl_gitmanager/git_manager.py b/terrarium/dl_gitmanager/dl_gitmanager/git_manager.py index 458034c4b..369531ea8 100644 --- a/terrarium/dl_gitmanager/dl_gitmanager/git_manager.py +++ b/terrarium/dl_gitmanager/dl_gitmanager/git_manager.py @@ -1,7 +1,11 @@ from __future__ import annotations from pathlib import Path -from typing import Generator +from typing import ( + Collection, + Generator, + Iterable, +) import attr from git.diff import Diff @@ -21,7 +25,16 @@ def get_head_commit(self) -> str: def get_commit(self, commit_specifier: str) -> str: return self.git_repo.commit(commit_specifier).hexsha - def _iter_diffs(self, base: str, head: str, absolute: bool = False) -> Generator[tuple[Path, Diff], None, None]: + def _get_sm_commit(self, submodule_name: str, commit: str) -> str: + return self.git_repo.commit(commit).tree[submodule_name].hexsha + + def _iter_range_diffs( + self, + base: str, + head: str, + absolute: bool = False, + submodules: bool = True, + ) -> Generator[tuple[Path, Diff], None, None]: base_commit = self.git_repo.commit(base) head_commit = self.git_repo.commit(head) @@ -55,9 +68,55 @@ def _iter_diffs(self, base: str, head: str, absolute: bool = False) -> Generator for _, diff_item in submodule_manager._iter_diffs(base=submodule_base, head=submodule_head): yield submodule_base_path, diff_item - def get_diff_paths(self, base: str, head: str, absolute: bool = False) -> list[str]: + def _iter_list_diffs( + self, commits: Collection[str], absolute: bool = False + ) -> Generator[tuple[Path, Diff], None, None]: + if not commits: + return + + base_path: Path + if absolute: + base_path = self.get_root_path() + else: + base_path = Path(".") + + # Iter own diffs + for commit_str in commits: + commit_obj = self.git_repo.commit(commit_str) + for parent in commit_obj.parents: + for diff_item in commit_obj.diff(parent): + yield base_path, diff_item + + # Iter submodules and get their internal diffs + for submodule in self.git_repo.submodules: + submodule_name = submodule.name + submodule_manager = self.get_submodule_manager(submodule_name=submodule_name) + sm_commits_for_all_commits: set[str] = set() + for commit_str in commits: + commit_obj = self.git_repo.commit(commit_str) + submodule_commit = self._get_sm_commit(submodule_name=submodule_name, commit=commit_str) + sm_ancestors = submodule_manager.get_all_ancestor_commits(submodule_commit) + parent_commit_objs = commit_obj.parents + for parent_commit_obj in parent_commit_objs: + sm_commit_of_parent = self._get_sm_commit( + submodule_name=submodule_name, commit=parent_commit_obj.hexsha + ) + sm_ancestors -= submodule_manager.get_all_ancestor_commits(sm_commit_of_parent) + + sm_commits_for_all_commits |= sm_ancestors + + submodule_base_path: Path + if absolute: + submodule_base_path = submodule_manager.get_root_path() + else: + submodule_base_path = Path(submodule.path) + + for _, diff_item in submodule_manager._iter_list_diffs(commits=sm_commits_for_all_commits): + yield submodule_base_path, diff_item + + def _collect_paths_from_diffs(self, diff_iterable: Iterable[tuple[Path, Diff]]) -> list[str]: result: set[str] = set() - for base_path, diff_item in self._iter_diffs(base=base, head=head, absolute=absolute): + for base_path, diff_item in diff_iterable: if diff_item.a_path: result.add(str(base_path / diff_item.a_path)) if diff_item.b_path: @@ -65,6 +124,25 @@ def get_diff_paths(self, base: str, head: str, absolute: bool = False) -> list[s return sorted(result) + def get_range_diff_paths(self, base: str, head: str, absolute: bool = False) -> list[str]: + return self._collect_paths_from_diffs( + diff_iterable=self._iter_range_diffs(base=base, head=head, absolute=absolute) + ) + + def get_list_diff_paths(self, commits: Collection[str], absolute: bool = False) -> list[str]: + return self._collect_paths_from_diffs(diff_iterable=self._iter_list_diffs(commits=commits, absolute=absolute)) + + def get_all_ancestor_commits(self, commit: str) -> set[str]: + result: set[str] = {commit} + commit_obj = self.git_repo.commit(commit) + for parent in commit_obj.parents: + result |= {*self.get_all_ancestor_commits(parent.hexsha)} + + return result + + def get_missing_commits(self, base: str, head: str) -> set[str]: + return self.get_all_ancestor_commits(head) - self.get_all_ancestor_commits(base) + def get_submodule_manager(self, submodule_name: str) -> GitManager: submodule_dict = {sm.name: sm for sm in self.git_repo.submodules} submodule = submodule_dict[submodule_name] diff --git a/terrarium/dl_gitmanager/dl_gitmanager/scripts/gitmanager_cli.py b/terrarium/dl_gitmanager/dl_gitmanager/scripts/gitmanager_cli.py index ed20ed653..f9e635b7e 100644 --- a/terrarium/dl_gitmanager/dl_gitmanager/scripts/gitmanager_cli.py +++ b/terrarium/dl_gitmanager/dl_gitmanager/scripts/gitmanager_cli.py @@ -3,10 +3,12 @@ import argparse from pathlib import Path import sys -from typing import Optional +from typing import ( + Optional, + TextIO, +) import attr -from git.repo.base import Repo as GitRepo from dl_cli_tools.cli_base import CliToolBase from dl_cli_tools.logging import setup_basic_logging @@ -16,6 +18,7 @@ @attr.s class GitManagerTool(CliToolBase): + input_text_io: TextIO = attr.ib(kw_only=True) git_manager: GitManager = attr.ib(kw_only=True) @classmethod @@ -38,19 +41,40 @@ def get_parser(cls) -> argparse.ArgumentParser: # commands subparsers = parser.add_subparsers(title="command", dest="command") + range_diff_paths_parser = subparsers.add_parser( + "range-diff-paths", + parents=[base_head_parser, absolute_parser], + help="List file paths with changes given as commit range", + ) + range_diff_paths_parser.add_argument( + "--only-added-commits", action="store_true", help="Inspect only commits that are added in head" + ) + subparsers.add_parser( - "diff-paths", parents=[base_head_parser, absolute_parser], help="List file paths with changes" + "list-diff-paths", + parents=[base_head_parser, absolute_parser], + help="List file paths with changes given as commit list", ) return parser - def diff_paths(self, base: str, head: Optional[str], absolute: bool) -> None: - diff_name_list = self.git_manager.get_diff_paths(base=base, head=head, absolute=absolute) + def range_diff_paths(self, base: str, head: Optional[str], absolute: bool, only_added_commits: bool) -> None: + diff_name_list: list[str] + if only_added_commits: + commits = self.git_manager.get_missing_commits(base=base, head=head) + diff_name_list = self.git_manager.get_list_diff_paths(commits=commits, absolute=absolute) + else: + diff_name_list = self.git_manager.get_range_diff_paths(base=base, head=head, absolute=absolute) + print("\n".join(diff_name_list)) + + def list_diff_paths(self, absolute: bool) -> None: + commits = [line.strip() for line in self.input_text_io if line.strip()] + diff_name_list = self.git_manager.get_list_diff_paths(commits=commits, absolute=absolute) print("\n".join(diff_name_list)) @classmethod def initialize(cls, git_manager: GitManager) -> GitManagerTool: - tool = cls(git_manager=git_manager) + tool = cls(input_text_io=sys.stdin, git_manager=git_manager) return tool @classmethod @@ -60,8 +84,12 @@ def run_parsed_args(cls, args: argparse.Namespace) -> None: tool = cls.initialize(git_manager=git_manager) match args.command: - case "diff-paths": - tool.diff_paths(base=args.base, head=args.head, absolute=args.absolute) + case "range-diff-paths": + tool.range_diff_paths( + base=args.base, head=args.head, absolute=args.absolute, only_added_commits=args.only_added_commits + ) + case "list-diff-paths": + tool.list_diff_paths(absolute=args.absolute) case _: raise RuntimeError(f"Got unknown command: {args.command}")