Skip to content

Commit

Permalink
Simplified gitmanager and added tests (#72)
Browse files Browse the repository at this point in the history
* Simplified gitmanager logic

* Added some simple tests to gitmanager

* Turned on tests in gitmanager
  • Loading branch information
altvod authored Nov 8, 2023
1 parent 66807a2 commit 31b3f58
Show file tree
Hide file tree
Showing 8 changed files with 421 additions and 139 deletions.
4 changes: 1 addition & 3 deletions terrarium/bi_ci/bi_ci/execute_mypy_multi.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,7 @@
from pathlib import Path
import subprocess
import sys
from typing import (
Iterable,
)
from typing import Iterable

import clize
import tomlkit
Expand Down
11 changes: 0 additions & 11 deletions terrarium/dl_gitmanager/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -62,14 +62,3 @@ By default they are printed relative to the repository root.

The `--only-added-commits` option makes the tool inspect only commits
that have been added in the head version.

### list-diff-paths

List files that have changed in commits passed on as input

```
echo <commit-id> | dl-git list-diff-paths
echo <commit-id> | dl-git list-diff-paths --absolute
```

Option `--absolute` has the same meaning as in `range-diff-paths`.
164 changes: 62 additions & 102 deletions terrarium/dl_gitmanager/dl_gitmanager/git_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,15 @@

from pathlib import Path
from typing import (
Collection,
Generator,
Iterable,
Optional,
)

import attr
from git.diff import Diff
from git.objects.commit import Commit
from git.objects.submodule.base import Submodule
from git.repo.base import Repo as GitRepo


Expand All @@ -18,104 +20,64 @@
@attr.s
class GitManager:
git_repo: GitRepo = attr.ib(kw_only=True)
path_prefix: Path = attr.ib(kw_only=True, default=Path("."))

def get_root_path(self) -> Path:
return Path(self.git_repo.working_tree_dir)

def get_head_commit(self) -> str:
return self.git_repo.head.commit.hexsha
def _get_commit_obj(self, commit_specifier: str) -> Commit:
return self.git_repo.commit(commit_specifier)

def get_commit(self, commit_specifier: str) -> str:
return self.git_repo.commit(commit_specifier).hexsha
def _iter_commits(self, base: str, head: str, only_missing_commits: bool) -> Iterable[Commit]:
if only_missing_commits:
return self.git_repo.iter_commits(f"{base}..{head}")
else:
return self.git_repo.iter_commits(f"{base}...{head}")

def _iter_diffs_from_commit(self, commit_obj: Commit) -> Iterable[Diff]:
for parent in commit_obj.parents:
yield from commit_obj.diff(parent)

def _get_sm_commit(self, submodule_name: str, commit: str) -> str:
return self.git_repo.commit(commit).tree[submodule_name].hexsha
def _get_submodule_commit(self, submodule: Submodule, commit_obj: Commit) -> str:
tree_item = commit_obj.tree[submodule.name]
return tree_item.hexsha

def _iter_range_diffs(
self,
base: str,
head: str,
absolute: bool = False,
submodules: bool = True,
only_missing_commits: bool = False,
) -> Generator[tuple[Path, Diff], None, None]:
base_commit = self.git_repo.commit(base)
head_commit = self.git_repo.commit(head)

base_path: Path
if absolute:
base_path = self.get_root_path()
else:
base_path = Path(".")

# Iter own diffs
diff_index = head_commit.diff(base_commit)
for diff_item in diff_index:
yield base_path, diff_item

# Iter submodules and get their internal diffs
for submodule in self.git_repo.submodules:
submodule_name = submodule.name
base_tree_item = base_commit.tree[submodule_name]
head_tree_item = head_commit.tree[submodule_name]
submodule_base = base_tree_item.hexsha
submodule_head = head_tree_item.hexsha
submodule_manager = self.get_submodule_manager(submodule_name=submodule_name)

submodule_base_path: Path
if absolute:
submodule_base_path = submodule_manager.get_root_path()
else:
submodule_base_path = Path(submodule.path)

# Iterate. Override the repo paths here with the submodule path
for _, diff_item in submodule_manager._iter_range_diffs(base=submodule_base, head=submodule_head):
yield submodule_base_path, diff_item

def _iter_list_diffs(
self, commits: Collection[str], absolute: bool = False
) -> Generator[tuple[Path, Diff], None, None]:
if not commits:
return

base_path: Path
if absolute:
base_path = self.get_root_path()
else:
base_path = Path(".")

# Iter own diffs
for commit_str in commits:
commit_obj = self.git_repo.commit(commit_str)
for parent in commit_obj.parents:
for diff_item in commit_obj.diff(parent):
yield base_path, diff_item

# Iter submodules and get their internal diffs
for submodule in self.git_repo.submodules:
submodule_name = submodule.name
submodule_manager = self.get_submodule_manager(submodule_name=submodule_name)
sm_commits_for_all_commits: set[str] = set()
for commit_str in commits:
commit_obj = self.git_repo.commit(commit_str)
submodule_commit = self._get_sm_commit(submodule_name=submodule_name, commit=commit_str)
sm_ancestors = submodule_manager.get_all_ancestor_commits(submodule_commit)
parent_commit_objs = commit_obj.parents
for parent_commit_obj in parent_commit_objs:
sm_commit_of_parent = self._get_sm_commit(
submodule_name=submodule_name, commit=parent_commit_obj.hexsha
)
sm_ancestors -= submodule_manager.get_all_ancestor_commits(sm_commit_of_parent)

sm_commits_for_all_commits |= sm_ancestors

submodule_base_path: Path
if absolute:
submodule_base_path = submodule_manager.get_root_path()
else:
submodule_base_path = Path(submodule.path)

for _, diff_item in submodule_manager._iter_list_diffs(commits=sm_commits_for_all_commits):
yield submodule_base_path, diff_item
# Get commit objects
base_commit = self._get_commit_obj(base)
head_commit = self._get_commit_obj(head)

base_path = self.get_root_path() if absolute else self.path_prefix

# Iter commits:
for commit_obj in self._iter_commits(base=base, head=head, only_missing_commits=only_missing_commits):
for diff_item in self._iter_diffs_from_commit(commit_obj):
yield base_path, diff_item

# Go to submodules if needed
if submodules:
# Iter submodules and get their internal diffs
for submodule in self.git_repo.submodules:
submodule_base = self._get_submodule_commit(submodule=submodule, commit_obj=base_commit)
submodule_head = self._get_submodule_commit(submodule=submodule, commit_obj=head_commit)
submodule_manager = self.get_submodule_manager(
submodule=submodule,
path_prefix=Path(submodule.path),
)
yield from submodule_manager._iter_range_diffs(
base=submodule_base,
head=submodule_head,
absolute=absolute,
only_missing_commits=only_missing_commits,
submodules=submodules,
)

def _collect_paths_from_diffs(self, diff_iterable: Iterable[tuple[Path, Diff]]) -> list[str]:
result: set[str] = set()
Expand All @@ -127,23 +89,21 @@ def _collect_paths_from_diffs(self, diff_iterable: Iterable[tuple[Path, Diff]])

return sorted(result)

def get_range_diff_paths(self, base: str, head: str, absolute: bool = False) -> list[str]:
def get_range_diff_paths(
self,
base: str,
head: str,
absolute: bool = False,
only_missing_commits: bool = False,
) -> list[str]:
return self._collect_paths_from_diffs(
diff_iterable=self._iter_range_diffs(base=base, head=head, absolute=absolute)
diff_iterable=self._iter_range_diffs(
base=base,
head=head,
absolute=absolute,
only_missing_commits=only_missing_commits,
)
)

def get_list_diff_paths(self, commits: Collection[str], absolute: bool = False) -> list[str]:
return self._collect_paths_from_diffs(diff_iterable=self._iter_list_diffs(commits=commits, absolute=absolute))

def get_all_ancestor_commits(self, commit: str) -> set[str]:
commits = {commit.hexsha for commit in self.git_repo.iter_commits(commit, max_count=MAX_HISTORY_DEPTH)}
return commits

def get_missing_commits(self, base: str, head: str) -> set[str]:
commits = {commit.hexsha for commit in self.git_repo.iter_commits(f"{base}..{head}")}
return commits

def get_submodule_manager(self, submodule_name: str) -> GitManager:
submodule_dict = {sm.name: sm for sm in self.git_repo.submodules}
submodule = submodule_dict[submodule_name]
return type(self)(git_repo=submodule.module())
def get_submodule_manager(self, submodule: Submodule, path_prefix: Optional[Path] = None) -> GitManager:
return type(self)(git_repo=submodule.module(), path_prefix=path_prefix or Path("."))
30 changes: 10 additions & 20 deletions terrarium/dl_gitmanager/dl_gitmanager/scripts/gitmanager_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,26 +50,15 @@ def get_parser(cls) -> argparse.ArgumentParser:
"--only-added-commits", action="store_true", help="Inspect only commits that are added in head"
)

subparsers.add_parser(
"list-diff-paths",
parents=[base_head_parser, absolute_parser],
help="List file paths with changes given as commit list",
)

return parser

def range_diff_paths(self, base: str, head: Optional[str], absolute: bool, only_added_commits: bool) -> None:
diff_name_list: list[str]
if only_added_commits:
commits = self.git_manager.get_missing_commits(base=base, head=head)
diff_name_list = self.git_manager.get_list_diff_paths(commits=commits, absolute=absolute)
else:
diff_name_list = self.git_manager.get_range_diff_paths(base=base, head=head, absolute=absolute)
print("\n".join(diff_name_list))

def list_diff_paths(self, absolute: bool) -> None:
commits = [line.strip() for line in self.input_text_io if line.strip()]
diff_name_list = self.git_manager.get_list_diff_paths(commits=commits, absolute=absolute)
diff_name_list = self.git_manager.get_range_diff_paths(
base=base,
head=head,
absolute=absolute,
only_missing_commits=only_added_commits,
)
print("\n".join(diff_name_list))

@classmethod
Expand All @@ -86,10 +75,11 @@ def run_parsed_args(cls, args: argparse.Namespace) -> None:
match args.command:
case "range-diff-paths":
tool.range_diff_paths(
base=args.base, head=args.head, absolute=args.absolute, only_added_commits=args.only_added_commits
base=args.base,
head=args.head,
absolute=args.absolute,
only_added_commits=args.only_added_commits,
)
case "list-diff-paths":
tool.list_diff_paths(absolute=args.absolute)
case _:
raise RuntimeError(f"Got unknown command: {args.command}")

Expand Down
33 changes: 33 additions & 0 deletions terrarium/dl_gitmanager/dl_gitmanager_tests/unit/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
from pathlib import Path
import shutil
import tempfile
from typing import Generator

from git.repo.base import Repo as GitRepo
import pytest

from dl_gitmanager.git_manager import GitManager
from dl_gitmanager_tests.unit.git_tools import GitActionProcessor


@pytest.fixture(scope="function")
def base_repo_dir() -> Generator[Path, None, None]:
dir_path = Path(tempfile.mkdtemp())
try:
yield dir_path
finally:
shutil.rmtree(dir_path)


@pytest.fixture(scope="function")
def git_action_proc(base_repo_dir: Path) -> GitActionProcessor:
git_action_proc = GitActionProcessor.initialize_repo(base_repo_dir)
git_action_proc.add_commit(message="Initial commit")
git_action_proc.checkout_new_branch("main")
return git_action_proc


@pytest.fixture(scope="function")
def git_manager(base_repo_dir: Path) -> GitManager:
git_manager = GitManager(git_repo=GitRepo(path=base_repo_dir))
return git_manager
Loading

0 comments on commit 31b3f58

Please sign in to comment.