Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Simplified gitmanager and added tests #72

Merged
merged 3 commits into from
Nov 8, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 1 addition & 3 deletions terrarium/bi_ci/bi_ci/execute_mypy_multi.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,7 @@
from pathlib import Path
import subprocess
import sys
from typing import (
Iterable,
)
from typing import Iterable

import clize
import tomlkit
Expand Down
11 changes: 0 additions & 11 deletions terrarium/dl_gitmanager/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -62,14 +62,3 @@ By default they are printed relative to the repository root.

The `--only-added-commits` option makes the tool inspect only commits
that have been added in the head version.

### list-diff-paths

List files that have changed in commits passed on as input

```
echo <commit-id> | dl-git list-diff-paths
echo <commit-id> | dl-git list-diff-paths --absolute
```

Option `--absolute` has the same meaning as in `range-diff-paths`.
164 changes: 62 additions & 102 deletions terrarium/dl_gitmanager/dl_gitmanager/git_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,15 @@

from pathlib import Path
from typing import (
Collection,
Generator,
Iterable,
Optional,
)

import attr
from git.diff import Diff
from git.objects.commit import Commit
from git.objects.submodule.base import Submodule
from git.repo.base import Repo as GitRepo


Expand All @@ -18,104 +20,64 @@
@attr.s
class GitManager:
git_repo: GitRepo = attr.ib(kw_only=True)
path_prefix: Path = attr.ib(kw_only=True, default=Path("."))

def get_root_path(self) -> Path:
return Path(self.git_repo.working_tree_dir)

def get_head_commit(self) -> str:
return self.git_repo.head.commit.hexsha
def _get_commit_obj(self, commit_specifier: str) -> Commit:
return self.git_repo.commit(commit_specifier)

def get_commit(self, commit_specifier: str) -> str:
return self.git_repo.commit(commit_specifier).hexsha
def _iter_commits(self, base: str, head: str, only_missing_commits: bool) -> Iterable[Commit]:
if only_missing_commits:
return self.git_repo.iter_commits(f"{base}..{head}")
else:
return self.git_repo.iter_commits(f"{base}...{head}")

def _iter_diffs_from_commit(self, commit_obj: Commit) -> Iterable[Diff]:
for parent in commit_obj.parents:
yield from commit_obj.diff(parent)

def _get_sm_commit(self, submodule_name: str, commit: str) -> str:
return self.git_repo.commit(commit).tree[submodule_name].hexsha
def _get_submodule_commit(self, submodule: Submodule, commit_obj: Commit) -> str:
tree_item = commit_obj.tree[submodule.name]
return tree_item.hexsha

def _iter_range_diffs(
self,
base: str,
head: str,
absolute: bool = False,
submodules: bool = True,
only_missing_commits: bool = False,
) -> Generator[tuple[Path, Diff], None, None]:
base_commit = self.git_repo.commit(base)
head_commit = self.git_repo.commit(head)

base_path: Path
if absolute:
base_path = self.get_root_path()
else:
base_path = Path(".")

# Iter own diffs
diff_index = head_commit.diff(base_commit)
for diff_item in diff_index:
yield base_path, diff_item

# Iter submodules and get their internal diffs
for submodule in self.git_repo.submodules:
submodule_name = submodule.name
base_tree_item = base_commit.tree[submodule_name]
head_tree_item = head_commit.tree[submodule_name]
submodule_base = base_tree_item.hexsha
submodule_head = head_tree_item.hexsha
submodule_manager = self.get_submodule_manager(submodule_name=submodule_name)

submodule_base_path: Path
if absolute:
submodule_base_path = submodule_manager.get_root_path()
else:
submodule_base_path = Path(submodule.path)

# Iterate. Override the repo paths here with the submodule path
for _, diff_item in submodule_manager._iter_range_diffs(base=submodule_base, head=submodule_head):
yield submodule_base_path, diff_item

def _iter_list_diffs(
self, commits: Collection[str], absolute: bool = False
) -> Generator[tuple[Path, Diff], None, None]:
if not commits:
return

base_path: Path
if absolute:
base_path = self.get_root_path()
else:
base_path = Path(".")

# Iter own diffs
for commit_str in commits:
commit_obj = self.git_repo.commit(commit_str)
for parent in commit_obj.parents:
for diff_item in commit_obj.diff(parent):
yield base_path, diff_item

# Iter submodules and get their internal diffs
for submodule in self.git_repo.submodules:
submodule_name = submodule.name
submodule_manager = self.get_submodule_manager(submodule_name=submodule_name)
sm_commits_for_all_commits: set[str] = set()
for commit_str in commits:
commit_obj = self.git_repo.commit(commit_str)
submodule_commit = self._get_sm_commit(submodule_name=submodule_name, commit=commit_str)
sm_ancestors = submodule_manager.get_all_ancestor_commits(submodule_commit)
parent_commit_objs = commit_obj.parents
for parent_commit_obj in parent_commit_objs:
sm_commit_of_parent = self._get_sm_commit(
submodule_name=submodule_name, commit=parent_commit_obj.hexsha
)
sm_ancestors -= submodule_manager.get_all_ancestor_commits(sm_commit_of_parent)

sm_commits_for_all_commits |= sm_ancestors

submodule_base_path: Path
if absolute:
submodule_base_path = submodule_manager.get_root_path()
else:
submodule_base_path = Path(submodule.path)

for _, diff_item in submodule_manager._iter_list_diffs(commits=sm_commits_for_all_commits):
yield submodule_base_path, diff_item
# Get commit objects
base_commit = self._get_commit_obj(base)
head_commit = self._get_commit_obj(head)

base_path = self.get_root_path() if absolute else self.path_prefix

# Iter commits:
for commit_obj in self._iter_commits(base=base, head=head, only_missing_commits=only_missing_commits):
for diff_item in self._iter_diffs_from_commit(commit_obj):
yield base_path, diff_item

# Go to submodules if needed
if submodules:
# Iter submodules and get their internal diffs
for submodule in self.git_repo.submodules:
submodule_base = self._get_submodule_commit(submodule=submodule, commit_obj=base_commit)
submodule_head = self._get_submodule_commit(submodule=submodule, commit_obj=head_commit)
submodule_manager = self.get_submodule_manager(
submodule=submodule,
path_prefix=Path(submodule.path),
)
yield from submodule_manager._iter_range_diffs(
base=submodule_base,
head=submodule_head,
absolute=absolute,
only_missing_commits=only_missing_commits,
submodules=submodules,
)

def _collect_paths_from_diffs(self, diff_iterable: Iterable[tuple[Path, Diff]]) -> list[str]:
result: set[str] = set()
Expand All @@ -127,23 +89,21 @@ def _collect_paths_from_diffs(self, diff_iterable: Iterable[tuple[Path, Diff]])

return sorted(result)

def get_range_diff_paths(self, base: str, head: str, absolute: bool = False) -> list[str]:
def get_range_diff_paths(
self,
base: str,
head: str,
absolute: bool = False,
only_missing_commits: bool = False,
) -> list[str]:
return self._collect_paths_from_diffs(
diff_iterable=self._iter_range_diffs(base=base, head=head, absolute=absolute)
diff_iterable=self._iter_range_diffs(
base=base,
head=head,
absolute=absolute,
only_missing_commits=only_missing_commits,
)
)

def get_list_diff_paths(self, commits: Collection[str], absolute: bool = False) -> list[str]:
return self._collect_paths_from_diffs(diff_iterable=self._iter_list_diffs(commits=commits, absolute=absolute))

def get_all_ancestor_commits(self, commit: str) -> set[str]:
commits = {commit.hexsha for commit in self.git_repo.iter_commits(commit, max_count=MAX_HISTORY_DEPTH)}
return commits

def get_missing_commits(self, base: str, head: str) -> set[str]:
commits = {commit.hexsha for commit in self.git_repo.iter_commits(f"{base}..{head}")}
return commits

def get_submodule_manager(self, submodule_name: str) -> GitManager:
submodule_dict = {sm.name: sm for sm in self.git_repo.submodules}
submodule = submodule_dict[submodule_name]
return type(self)(git_repo=submodule.module())
def get_submodule_manager(self, submodule: Submodule, path_prefix: Optional[Path] = None) -> GitManager:
return type(self)(git_repo=submodule.module(), path_prefix=path_prefix or Path("."))
30 changes: 10 additions & 20 deletions terrarium/dl_gitmanager/dl_gitmanager/scripts/gitmanager_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,26 +50,15 @@ def get_parser(cls) -> argparse.ArgumentParser:
"--only-added-commits", action="store_true", help="Inspect only commits that are added in head"
)

subparsers.add_parser(
"list-diff-paths",
parents=[base_head_parser, absolute_parser],
help="List file paths with changes given as commit list",
)

return parser

def range_diff_paths(self, base: str, head: Optional[str], absolute: bool, only_added_commits: bool) -> None:
diff_name_list: list[str]
if only_added_commits:
commits = self.git_manager.get_missing_commits(base=base, head=head)
diff_name_list = self.git_manager.get_list_diff_paths(commits=commits, absolute=absolute)
else:
diff_name_list = self.git_manager.get_range_diff_paths(base=base, head=head, absolute=absolute)
print("\n".join(diff_name_list))

def list_diff_paths(self, absolute: bool) -> None:
commits = [line.strip() for line in self.input_text_io if line.strip()]
diff_name_list = self.git_manager.get_list_diff_paths(commits=commits, absolute=absolute)
diff_name_list = self.git_manager.get_range_diff_paths(
base=base,
head=head,
absolute=absolute,
only_missing_commits=only_added_commits,
)
print("\n".join(diff_name_list))

@classmethod
Expand All @@ -86,10 +75,11 @@ def run_parsed_args(cls, args: argparse.Namespace) -> None:
match args.command:
case "range-diff-paths":
tool.range_diff_paths(
base=args.base, head=args.head, absolute=args.absolute, only_added_commits=args.only_added_commits
base=args.base,
head=args.head,
absolute=args.absolute,
only_added_commits=args.only_added_commits,
)
case "list-diff-paths":
tool.list_diff_paths(absolute=args.absolute)
case _:
raise RuntimeError(f"Got unknown command: {args.command}")

Expand Down
33 changes: 33 additions & 0 deletions terrarium/dl_gitmanager/dl_gitmanager_tests/unit/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
from pathlib import Path
import shutil
import tempfile
from typing import Generator

from git.repo.base import Repo as GitRepo
import pytest

from dl_gitmanager.git_manager import GitManager
from dl_gitmanager_tests.unit.git_tools import GitActionProcessor


@pytest.fixture(scope="function")
def base_repo_dir() -> Generator[Path, None, None]:
dir_path = Path(tempfile.mkdtemp())
try:
yield dir_path
finally:
shutil.rmtree(dir_path)


@pytest.fixture(scope="function")
def git_action_proc(base_repo_dir: Path) -> GitActionProcessor:
git_action_proc = GitActionProcessor.initialize_repo(base_repo_dir)
git_action_proc.add_commit(message="Initial commit")
git_action_proc.checkout_new_branch("main")
return git_action_proc


@pytest.fixture(scope="function")
def git_manager(base_repo_dir: Path) -> GitManager:
git_manager = GitManager(git_repo=GitRepo(path=base_repo_dir))
return git_manager
Loading
Loading