From e98c9bfe11eae136c067568aabae963b084d20cf Mon Sep 17 00:00:00 2001 From: Ivan Razumov Date: Thu, 14 Mar 2024 15:44:40 +0100 Subject: [PATCH 1/3] Limit how many files can be changed in a PR --- process_pr.py | 65 ++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 62 insertions(+), 3 deletions(-) diff --git a/process_pr.py b/process_pr.py index c0a20bdfb3ee..667d680fca98 100644 --- a/process_pr.py +++ b/process_pr.py @@ -136,6 +136,7 @@ def format(s, **kwds): ) REGEX_COMMITS_CACHE = re.compile(r"", re.DOTALL) REGEX_IGNORE_COMMIT_COUNT = "\+commit-count" +REGEX_IGNORE_FILE_COUNT = "\+file-count" TEST_WAIT_GAP = 720 ALL_CHECK_FUNCTIONS = None EXTRA_RELVALS_TESTS = ["threading", "gpu", "high-stats", "nano"] @@ -189,6 +190,9 @@ def format(s, **kwds): BOT_CACHE_CHUNK_SIZE = 55000 TOO_MANY_COMMITS_WARN_THRESHOLD = 150 TOO_MANY_COMMITS_FAIL_THRESHOLD = 240 +TOO_MANY_FILES_WARN_THRESHOLD = 1500 +TOO_MANY_FILES_FAIL_THRESHOLD = 3001 +CHANGED_FILES_FROM_DIFF_THRESHOLD = 500 L2_DATA = {} @@ -738,7 +742,7 @@ def multiline_check_function(first_line, comment_lines, repository): def get_changed_files(repo, pr, use_gh_patch=False): - if (not use_gh_patch) and (pr.changed_files <= 300): + if (not use_gh_patch) and (pr.changed_files <= CHANGED_FILES_FROM_DIFF_THRESHOLD): pr_files = [] for f in pr.get_files(): pr_files.append(f.filename) @@ -750,13 +754,20 @@ def get_changed_files(repo, pr, use_gh_patch=False): print("PR Files: ", pr_files) return pr_files cmd = ( - "curl -s -L https://patch-diff.githubusercontent.com/raw/%s/pull/%s.patch | grep '^diff --git ' | sed 's|.* a/||;s| *b/| |' | tr ' ' '\n' | sort | uniq" + "curl -s -L https://patch-diff.githubusercontent.com/raw/%s/pull/%s.diff | grep '^diff --git ' | sed 's|.* a/||;s| *b/| |' | tr ' ' '\n' | sort | uniq" % (repo.full_name, pr.number) ) e, o = run_cmd(cmd) if e: return [] - return o.split("\n") + if o: + return o.split("\n") + else: + print( + "ERROR: Request to https://patch-diff.githubusercontent.com/raw/%s/pull/%s.diff did not return any changes, is the PR too big?" + % (repo.full_name, pr.number) + ) + exit(1) def get_backported_pr(msg): @@ -912,6 +923,8 @@ def process_pr(repo_config, gh, repo, issue, dryRun, cmsbuild_user=None, force=F all_commit_shas = set() ok_too_many_commits = False warned_too_many_commits = False + ok_too_many_files = False + warned_too_many_files = False if issue.pull_request: pr = repo.get_pull(prId) @@ -1298,6 +1311,11 @@ def process_pr(repo_config, gh, repo, issue, dryRun, cmsbuild_user=None, force=F if commenter in CMSSW_ISSUES_TRACKERS: ok_too_many_commits = pr.commits < TOO_MANY_COMMITS_FAIL_THRESHOLD comment_emoji = "+1" + elif re.match(r"^\s*" + REGEX_IGNORE_FILE_COUNT + r"\s*$", first_line): + comment_emoji = "-1" + if commenter in CMSSW_ISSUES_TRACKERS: + ok_too_many_files = pr.changed_files < TOO_MANY_FILES_FAIL_THRESHOLD + comment_emoji = "+1" if comment_emoji: set_comment_emoji_cache(dryRun, bot_cache, comment, repository, emoji=comment_emoji) @@ -1396,6 +1414,14 @@ def process_pr(repo_config, gh, repo, issue, dryRun, cmsbuild_user=None, force=F ) or "This PR contains too many commits" in first_line: warned_too_many_commits = True continue + + if ( + "This PR touches many files" in first_line + and pr.changed_files < TOO_MANY_FILES_FAIL_THRESHOLD + ) or "This PR touches too many files" in first_line: + warned_too_many_files = True + continue + sec_line = comment_lines[1:2] if not sec_line: sec_line = "" @@ -1549,6 +1575,39 @@ def process_pr(repo_config, gh, repo, issue, dryRun, cmsbuild_user=None, force=F print("Commit count reached and not overridden, quitting") return + if ( + pr.changed_files >= TOO_MANY_FILES_WARN_THRESHOLD + and (not warned_too_many_files) + and (not ok_too_many_files) + ): + if pr.commits < TOO_MANY_FILES_FAIL_THRESHOLD: + if not dryRun: + issue.create_comment( + "This PR touches many files ({0} >= {1}) and will not be processed. " + "Please ensure you have selected the correct target branch and consider splitting this PR into several.\n" + "{2}, to re-enable processing of this PR, you can write `+file-count` in a comment. Thanks.".format( + pr.changed_files, + TOO_MANY_FILES_WARN_THRESHOLD, + ", ".join([gh_user_char + name for name in CMSSW_ISSUES_TRACKERS]), + ) + ) + else: + if not dryRun: + issue.create_comment( + "This PR touches too many files ({0} >= {1}) and will not be processed.\n" + "Please ensure you have selected the correct target branch and consider splitting this PR into several.\n" + "The processing of this PR will resume once the number of changed files drops below the limit.".format( + pr.changed_files, + TOO_MANY_FILES_FAIL_THRESHOLD, + ) + ) + + return + + if pr.changed_files >= TOO_MANY_FILES_WARN_THRESHOLD and not ok_too_many_files: + print("Changed file count reached and not overridden, quitting") + return + print("Processing commits") # Make sure to mark squashed=False if a cached/squashed commit is added back From 8d32f682213e7a0c469ed51e729a9eb3226819f9 Mon Sep 17 00:00:00 2001 From: Ivan Razumov Date: Thu, 14 Mar 2024 15:54:42 +0100 Subject: [PATCH 2/3] fail on run_cmd error --- process_pr.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/process_pr.py b/process_pr.py index 667d680fca98..dd913ceacdab 100644 --- a/process_pr.py +++ b/process_pr.py @@ -759,7 +759,12 @@ def get_changed_files(repo, pr, use_gh_patch=False): ) e, o = run_cmd(cmd) if e: - return [] + print( + "ERROR: Request to https://patch-diff.githubusercontent.com/raw/%s/pull/%s.diff failed" + % (repo.full_name, pr.number) + ) + print(e) + exit(1) if o: return o.split("\n") else: From 0665b814f7ba1180958029ca390968102a70c90c Mon Sep 17 00:00:00 2001 From: Ivan Razumov Date: Mon, 8 Jul 2024 12:57:47 +0200 Subject: [PATCH 3/3] Fix copy-paste error --- process_pr.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/process_pr.py b/process_pr.py index dd913ceacdab..dd58c9c5fb13 100644 --- a/process_pr.py +++ b/process_pr.py @@ -1585,7 +1585,7 @@ def process_pr(repo_config, gh, repo, issue, dryRun, cmsbuild_user=None, force=F and (not warned_too_many_files) and (not ok_too_many_files) ): - if pr.commits < TOO_MANY_FILES_FAIL_THRESHOLD: + if pr.changed_files < TOO_MANY_FILES_FAIL_THRESHOLD: if not dryRun: issue.create_comment( "This PR touches many files ({0} >= {1}) and will not be processed. "