Skip to content

Commit

Permalink
Added the --only-added-commits option to dl-git
Browse files Browse the repository at this point in the history
  • Loading branch information
altvod committed Oct 16, 2023
1 parent 79cb89c commit e81f6a8
Show file tree
Hide file tree
Showing 2 changed files with 118 additions and 12 deletions.
86 changes: 82 additions & 4 deletions terrarium/dl_gitmanager/dl_gitmanager/git_manager.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
from __future__ import annotations

from pathlib import Path
from typing import Generator
from typing import (
Collection,
Generator,
Iterable,
)

import attr
from git.diff import Diff
Expand All @@ -21,7 +25,16 @@ def get_head_commit(self) -> str:
def get_commit(self, commit_specifier: str) -> str:
return self.git_repo.commit(commit_specifier).hexsha

def _iter_diffs(self, base: str, head: str, absolute: bool = False) -> Generator[tuple[Path, Diff], None, None]:
def _get_sm_commit(self, submodule_name: str, commit: str) -> str:
return self.git_repo.commit(commit).tree[submodule_name].hexsha

def _iter_range_diffs(
self,
base: str,
head: str,
absolute: bool = False,
submodules: bool = True,
) -> Generator[tuple[Path, Diff], None, None]:
base_commit = self.git_repo.commit(base)
head_commit = self.git_repo.commit(head)

Expand Down Expand Up @@ -55,16 +68,81 @@ def _iter_diffs(self, base: str, head: str, absolute: bool = False) -> Generator
for _, diff_item in submodule_manager._iter_diffs(base=submodule_base, head=submodule_head):
yield submodule_base_path, diff_item

def get_diff_paths(self, base: str, head: str, absolute: bool = False) -> list[str]:
def _iter_list_diffs(
self, commits: Collection[str], absolute: bool = False
) -> Generator[tuple[Path, Diff], None, None]:
if not commits:
return

base_path: Path
if absolute:
base_path = self.get_root_path()
else:
base_path = Path(".")

# Iter own diffs
for commit_str in commits:
commit_obj = self.git_repo.commit(commit_str)
for parent in commit_obj.parents:
for diff_item in commit_obj.diff(parent):
yield base_path, diff_item

# Iter submodules and get their internal diffs
for submodule in self.git_repo.submodules:
submodule_name = submodule.name
submodule_manager = self.get_submodule_manager(submodule_name=submodule_name)
sm_commits_for_all_commits: set[str] = set()
for commit_str in commits:
commit_obj = self.git_repo.commit(commit_str)
submodule_commit = self._get_sm_commit(submodule_name=submodule_name, commit=commit_str)
sm_ancestors = submodule_manager.get_all_ancestor_commits(submodule_commit)
parent_commit_objs = commit_obj.parents
for parent_commit_obj in parent_commit_objs:
sm_commit_of_parent = self._get_sm_commit(
submodule_name=submodule_name, commit=parent_commit_obj.hexsha
)
sm_ancestors -= submodule_manager.get_all_ancestor_commits(sm_commit_of_parent)

sm_commits_for_all_commits |= sm_ancestors

submodule_base_path: Path
if absolute:
submodule_base_path = submodule_manager.get_root_path()
else:
submodule_base_path = Path(submodule.path)

for _, diff_item in submodule_manager._iter_list_diffs(commits=sm_commits_for_all_commits):
yield submodule_base_path, diff_item

def _collect_paths_from_diffs(self, diff_iterable: Iterable[tuple[Path, Diff]]) -> list[str]:
result: set[str] = set()
for base_path, diff_item in self._iter_diffs(base=base, head=head, absolute=absolute):
for base_path, diff_item in diff_iterable:
if diff_item.a_path:
result.add(str(base_path / diff_item.a_path))
if diff_item.b_path:
result.add(str(base_path / diff_item.b_path))

return sorted(result)

def get_range_diff_paths(self, base: str, head: str, absolute: bool = False) -> list[str]:
return self._collect_paths_from_diffs(
diff_iterable=self._iter_range_diffs(base=base, head=head, absolute=absolute)
)

def get_list_diff_paths(self, commits: Collection[str], absolute: bool = False) -> list[str]:
return self._collect_paths_from_diffs(diff_iterable=self._iter_list_diffs(commits=commits, absolute=absolute))

def get_all_ancestor_commits(self, commit: str) -> set[str]:
result: set[str] = {commit}
commit_obj = self.git_repo.commit(commit)
for parent in commit_obj.parents:
result |= {*self.get_all_ancestor_commits(parent.hexsha)}

return result

def get_missing_commits(self, base: str, head: str) -> set[str]:
return self.get_all_ancestor_commits(head) - self.get_all_ancestor_commits(base)

def get_submodule_manager(self, submodule_name: str) -> GitManager:
submodule_dict = {sm.name: sm for sm in self.git_repo.submodules}
submodule = submodule_dict[submodule_name]
Expand Down
44 changes: 36 additions & 8 deletions terrarium/dl_gitmanager/dl_gitmanager/scripts/gitmanager_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,12 @@
import argparse
from pathlib import Path
import sys
from typing import Optional
from typing import (
Optional,
TextIO,
)

import attr
from git.repo.base import Repo as GitRepo

from dl_cli_tools.cli_base import CliToolBase
from dl_cli_tools.logging import setup_basic_logging
Expand All @@ -16,6 +18,7 @@

@attr.s
class GitManagerTool(CliToolBase):
input_text_io: TextIO = attr.ib(kw_only=True)
git_manager: GitManager = attr.ib(kw_only=True)

@classmethod
Expand All @@ -38,19 +41,40 @@ def get_parser(cls) -> argparse.ArgumentParser:
# commands
subparsers = parser.add_subparsers(title="command", dest="command")

range_diff_paths_parser = subparsers.add_parser(
"range-diff-paths",
parents=[base_head_parser, absolute_parser],
help="List file paths with changes given as commit range",
)
range_diff_paths_parser.add_argument(
"--only-added-commits", action="store_true", help="Inspect only commits that are added in head"
)

subparsers.add_parser(
"diff-paths", parents=[base_head_parser, absolute_parser], help="List file paths with changes"
"list-diff-paths",
parents=[base_head_parser, absolute_parser],
help="List file paths with changes given as commit list",
)

return parser

def diff_paths(self, base: str, head: Optional[str], absolute: bool) -> None:
diff_name_list = self.git_manager.get_diff_paths(base=base, head=head, absolute=absolute)
def range_diff_paths(self, base: str, head: Optional[str], absolute: bool, only_added_commits: bool) -> None:
diff_name_list: list[str]
if only_added_commits:
commits = self.git_manager.get_missing_commits(base=base, head=head)
diff_name_list = self.git_manager.get_list_diff_paths(commits=commits, absolute=absolute)
else:
diff_name_list = self.git_manager.get_range_diff_paths(base=base, head=head, absolute=absolute)
print("\n".join(diff_name_list))

def list_diff_paths(self, absolute: bool) -> None:
commits = [line.strip() for line in self.input_text_io if line.strip()]
diff_name_list = self.git_manager.get_list_diff_paths(commits=commits, absolute=absolute)
print("\n".join(diff_name_list))

@classmethod
def initialize(cls, git_manager: GitManager) -> GitManagerTool:
tool = cls(git_manager=git_manager)
tool = cls(input_text_io=sys.stdin, git_manager=git_manager)
return tool

@classmethod
Expand All @@ -60,8 +84,12 @@ def run_parsed_args(cls, args: argparse.Namespace) -> None:
tool = cls.initialize(git_manager=git_manager)

match args.command:
case "diff-paths":
tool.diff_paths(base=args.base, head=args.head, absolute=args.absolute)
case "range-diff-paths":
tool.range_diff_paths(
base=args.base, head=args.head, absolute=args.absolute, only_added_commits=args.only_added_commits
)
case "list-diff-paths":
tool.list_diff_paths(absolute=args.absolute)
case _:
raise RuntimeError(f"Got unknown command: {args.command}")

Expand Down

0 comments on commit e81f6a8

Please sign in to comment.