From db51c5ec5eb8b1fd843b1868def5ba63ccbd92de Mon Sep 17 00:00:00 2001 From: Jacek Lewandowski Date: Fri, 31 Mar 2023 17:09:32 +0200 Subject: [PATCH 01/16] CASSANDRA-18399: Add a script to prepare merge commands --- dev/prepare-merge-commands.sh | 205 ++++++++++++++++++++++++++++++++++ 1 file changed, 205 insertions(+) create mode 100644 dev/prepare-merge-commands.sh diff --git a/dev/prepare-merge-commands.sh b/dev/prepare-merge-commands.sh new file mode 100644 index 00000000..faf1cbab --- /dev/null +++ b/dev/prepare-merge-commands.sh @@ -0,0 +1,205 @@ +#!/bin/bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +set -e + +supported_versions=("3.0" "3.11" "4.0" "4.1" "trunk") + +# Read the remote Apache Cassandra repository name +apache_repository="" +for r in $(git remote show); do + url="$(git remote get-url "$r")" + if [[ "$url" == *"apache/cassandra.git" ]]; then + apache_repository="$r" + break + elif [[ "$url" == *"asf/cassandra.git" ]]; then + apache_repository="$r" + fi +done +echo "Remote repositories: " +git remote -v show +echo "" +read -r -e -i "$apache_repository" -p "Enter Apache Cassandra remote repository name: " apache_repository +git fetch "$apache_repository" +echo "" + +# Read the feature repository and branch +branch="$(git status -b --porcelain=v2 | grep branch.upstream | cut -f 3 -d ' ')" + +if [[ "$branch" =~ ^([^/]*)/(CASSANDRA-[0-9]+).*$ ]]; then + repository="${BASH_REMATCH[1]}" + ticket="${BASH_REMATCH[2]}" +fi + +read -r -e -i "$repository" -p "Enter a feature repository name: " repository +read -r -e -i "$ticket" -p "Enter a ticket ID: " ticket +if [ -z "$repository" ] || [ -z "$ticket" ]; then + exit 0 +fi +echo "" + +git fetch "$repository" + +# Read all feature branches based on the ticket name +readarray -t branches < <(git ls-remote --refs -h -q "$repository" | grep "$ticket" | cut -d '/' -f 3 | sort) +if [[ "${#branches[@]}" == 0 ]]; then + echo "Found no feature branches that include a $ticket in name" + exit 0 +fi + +echo "The following feature branches were found:" +for branch in "${branches[@]}"; do + echo "$branch" +done +echo "" + +# Read the oldest Cassandra version where the feature should be applied +matched=0 +while [[ $matched == 0 ]]; do + read -r -e -p "What is the oldest target version you want to merge? " oldest_target_version + if [[ -z "$oldest_target_version" ]]; then + exit 0 + fi + + feature_versions=() + for v in "${supported_versions[@]}"; do + if [[ "$v" == "$oldest_target_version" ]]; then + matched=1 + fi + if [[ $matched == 1 ]]; then + feature_versions+=("$v") + fi + done +done + +echo "Will merge to the following Cassandra versions:" +for v in "${feature_versions[@]}"; do + echo "$v" +done +echo "" + +function find_matching_branch() { + local infix="$1" + for b in "${branches[@]}"; do + if [[ "$infix" == "" ]] && [[ "$b" == "$ticket" ]]; then + echo "$b" + return 0 + elif [[ "$b" == *"$infix"* ]]; then + echo "$b" + return 0 + fi + done + + return 1 +} + +# Confirm which feature branches are for which Cassandra versions +feature_branches=() +target_branches=() +for v in "${feature_versions[@]}"; do + branch="" + if [[ "$v" == "trunk" ]]; then + target_branches+=("trunk") + branch="$(find_matching_branch trunk || find_matching_branch "" || true)" + else + target_branches+=("cassandra-$v") + branch="$(find_matching_branch "$v" || true)" + fi + read -r -e -i "$branch" -p "Enter branch for version $v or leave empty if there nothing to merge for this version: " branch + feature_branches+=("$branch") +done + +# Generate a script + +echo "" +echo "" +echo "" +echo "git fetch $apache_repository" +echo "git fetch $repository" + +# Get a subject from the first commit which will serve as a title to be pasted into CHANGES.txt +first_commit="$(git log --pretty=format:%s --reverse "$apache_repository/${target_branches[0]}..$repository/${feature_branches[0]}" | head -n 1)" + +push_command="git push --atomic $apache_apache_repository" +skipped_branches_found=0 +for i in $(seq 0 $((${#target_branches[@]} - 1))); do + echo "" + echo "" + echo "" + echo "# $repository/${feature_branches[$i]} -> ${target_branches[$i]}" + echo "# --------------------------------------------------------------------------------------------------------" + + if [[ $i == 0 ]] && [[ "${feature_branches[$i]}" == "" ]]; then + # Although we can skip a feature for some versions, we cannot skip it for the oldest version (which is quite obvious) + exit 1 + fi + + # Read the list of commits between the remote head and the feature branch - we need to cherry pick them (or some of them) + + echo "git switch ${target_branches[$i]}" + echo "git reset --hard $apache_repository/${target_branches[$i]}" + if [[ "${feature_branches[$i]}" == "" ]]; then + skipped_branches_found=1 + # A script for the case where there is no fix for a version + echo "git merge -s ours --log --no-edit ${feature_branches[$((i - 1))]}" + else + readarray -t commits < <(git log --reverse --oneline "$apache_repository/${target_branches[$i]}..$repository/${feature_branches[$i]}") + + if [[ $i != 0 ]]; then + # When this isn't the oldest version (we want to have only the merge commit) + echo "git merge -s ours --log --no-edit ${feature_branches[$((i - 1))]}" + fi + + for c in $(seq 0 $((${#commits[@]} - 1))); do + commit_sha="$(echo "${commits[$c]}" | cut -f 1 -d ' ')" + if [[ $i == 0 ]] && [[ $c == 0 ]]; then + # we want to have only one feature commit (c == 0), which is in the oldest version (i == 0) + echo "git cherry-pick $commit_sha # ${commits[$c]}" + else + # otherwise we squash the commits to the previous one + echo "git cherry-pick -n $commit_sha && git commit -a --amend --no-edit # ${commits[$c]}" + fi + done + fi + + if [[ "$skipped_branches_found" == "0" ]]; then + if [[ $i == 0 ]]; then + echo "grep '$ticket' CHANGES.txt || sed -E -i '/^[0-9]+\.[0-9]+/{s/.*/&\n\ * $first_commit ($ticket)/;:a;n;ba}' CHANGES.txt" + else + echo "grep '$ticket' CHANGES.txt || sed -E -i '/^Merged from ${oldest_target_version}/{s/.*/&\n\ * $first_commit ($ticket)/;:a;n;ba}' CHANGES.txt" + fi + echo "git diff CHANGES.txt" + else + echo "Update CHANGES.txt by adding the following line:" + echo " * $first_commit ($ticket)" + fi + echo "git add CHANGES.txt" + echo "git commit --amend --no-edit" + + echo "(git diff $apache_repository/${target_branches[$i]}..HEAD -- .circleci/ | git apply -R --index) && git commit -a --amend --no-edit # Remove all changes in .circleci directory if you need to" + + echo "git diff --name-only $apache_repository/${target_branches[$i]}..HEAD # print a list of all changes files" + + push_command+=" ${target_branches[$i]}" +done + +echo "" +echo "" +echo "" +echo "$push_command -n" From a2936e50e9c8debfa4a151a7cf712c21fe325c1e Mon Sep 17 00:00:00 2001 From: Jacek Lewandowski Date: Fri, 31 Mar 2023 17:26:04 +0200 Subject: [PATCH 02/16] fix --- dev/prepare-merge-commands.sh | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/dev/prepare-merge-commands.sh b/dev/prepare-merge-commands.sh index faf1cbab..fc06c2e5 100644 --- a/dev/prepare-merge-commands.sh +++ b/dev/prepare-merge-commands.sh @@ -136,7 +136,7 @@ echo "git fetch $repository" # Get a subject from the first commit which will serve as a title to be pasted into CHANGES.txt first_commit="$(git log --pretty=format:%s --reverse "$apache_repository/${target_branches[0]}..$repository/${feature_branches[0]}" | head -n 1)" -push_command="git push --atomic $apache_apache_repository" +push_command="git push --atomic $apache_repository" skipped_branches_found=0 for i in $(seq 0 $((${#target_branches[@]} - 1))); do echo "" @@ -157,13 +157,13 @@ for i in $(seq 0 $((${#target_branches[@]} - 1))); do if [[ "${feature_branches[$i]}" == "" ]]; then skipped_branches_found=1 # A script for the case where there is no fix for a version - echo "git merge -s ours --log --no-edit ${feature_branches[$((i - 1))]}" + echo "git merge -s ours --log --no-edit ${target_branches[$((i - 1))]}" else readarray -t commits < <(git log --reverse --oneline "$apache_repository/${target_branches[$i]}..$repository/${feature_branches[$i]}") if [[ $i != 0 ]]; then # When this isn't the oldest version (we want to have only the merge commit) - echo "git merge -s ours --log --no-edit ${feature_branches[$((i - 1))]}" + echo "git merge -s ours --log --no-edit ${target_branches[$((i - 1))]}" fi for c in $(seq 0 $((${#commits[@]} - 1))); do @@ -203,3 +203,5 @@ echo "" echo "" echo "" echo "$push_command -n" + + From bbf45ac222ec16bf8d9616137ecfa188c21ed0b1 Mon Sep 17 00:00:00 2001 From: Jacek Lewandowski Date: Mon, 3 Apr 2023 12:25:28 +0200 Subject: [PATCH 03/16] Add verification script (wip) - to be tested --- dev/prepare-merge-commands.sh | 7 ++- dev/verify_git_history.py | 108 ++++++++++++++++++++++++++++++++++ 2 files changed, 113 insertions(+), 2 deletions(-) mode change 100644 => 100755 dev/prepare-merge-commands.sh create mode 100644 dev/verify_git_history.py diff --git a/dev/prepare-merge-commands.sh b/dev/prepare-merge-commands.sh old mode 100644 new mode 100755 index fc06c2e5..f2b5783c --- a/dev/prepare-merge-commands.sh +++ b/dev/prepare-merge-commands.sh @@ -202,6 +202,9 @@ done echo "" echo "" echo "" -echo "$push_command -n" - +echo "python3 verify_git_history.py $apache_repository ${target_branches[0]}" +echo "" +echo "" +echo "" +echo "$push_command -n" diff --git a/dev/verify_git_history.py b/dev/verify_git_history.py new file mode 100644 index 00000000..79584d87 --- /dev/null +++ b/dev/verify_git_history.py @@ -0,0 +1,108 @@ +import re +import subprocess +import sys + +def get_apache_branches(repo): + """ + Get the list of main cassandra branches from the given repo, sorted by version ascending. + :param repo: configured apache repository name + :return: list of branch names + """ + output = subprocess.check_output("git ls-remote --refs -h -q %s" % repo, shell=True) + branch_regex = re.compile(r".*refs/heads/(cassandra-(\d+)\.(\d+))$") + + branches_with_versions = [] + for line in output.decode("utf-8").split("\n"): + match = branch_regex.match(line) + if match: + branches_with_versions.append((int(match.group(2)), int(match.group(3)), match.group(1))) + + branches_with_versions.sort() + main_branches = [branch[2] for branch in branches_with_versions] + main_branches.append("trunk") + return main_branches + +def get_local_branch_history(repo, branch): + """ + Get the commit history between local branch and remote branch, sorted by commit date ascending. + :param repo: configured apache repository name + :param branch: branch name + :return: a list of tuples (commit_hash, commit_message) + """ + output = subprocess.check_output("git log --pretty=format:'%%H %%s' %s/%s..%s" % (repo, branch, branch), shell=True) + history = [] + line_regex = re.compile(r"([0-9a-f]+) (.*)") + for line in output.decode("utf-8").split("\n"): + match = line_regex.match(line) + if match: + history.append((match.group(1), match.group(2))) + history.reverse() + return history + +def parse_merge_commit_msg(msg): + """ + Parse a merge commit message and return the source and destination branches. + :param msg: a commit message + :return: a tuple of (source_branch, destination_branch) or None if the message is not a merge commit + """ + msg_regex = re.compile(r"Merge branch '(cassandra-\d+\.\d+)' into (cassandra-((\d+\.\d+)|trunk))") + match = msg_regex.match(msg) + if match: + return (match.group(1), match.group(2)) + return None + +######################################################################################################################## + +# Read the command line arguments and validate them + +if len(sys.argv) != 3: + print("Usage: %s " % sys.argv[0]) + exit(1) + +repo = sys.argv[1] +start_branch = sys.argv[2] +main_branches = get_apache_branches(repo) + +# check if start_branch is a valid branch +if start_branch not in main_branches: + print("Invalid branch %s, must be one of %s" % (start_branch, str(main_branches))) + exit(1) + +# get items from main_branches starting from the item matching start_branch +main_branches = main_branches[main_branches.index(start_branch):] + +# get the patch commit message +history = get_local_branch_history(repo, main_branches[0]) + +# history for the first branch must contain onlu one commit +if len(history) != 1: + print("Invalid history for branch %s, must contain only one commit, but found %d: \n\n%s" % (main_branches[0], len(history), "\n".join(str(x) for x in history))) + exit(1) + +# check if the commit message is valid, that is, it must not be a merge commit +if parse_merge_commit_msg(history[0][1]): + print("Invalid commit message for branch %s, must not be a merge commit, but found: \n\n%s" % (main_branches[0], history[0])) + exit(1) + +# Check the history of the branches to confirm that each branch contains exactly one main commit +# and the rest are the merge commits from the previous branch in order +expected_merges = [] +prev_branch = main_branches[0] +prev_history = history +for branch in main_branches[1:]: + expected_merges.append((prev_branch, branch)) + history = get_local_branch_history(repo, branch) + + if history[:-1] != prev_history: + print("Invalid history for branch %s, must be the same as branch %s, but found: \n\n%s" % (branch, prev_branch, "\n".join(str(x) for x in history))) + exit(1) + +# expect that the rest of the commits are merge commits matching the expected merges in the same order +for i in range(1, len(history)): + merge = parse_merge_commit_msg(history[i][1]) + if not merge: + print("Invalid commit message for branch %s, must be a merge commit, but found: \n%s" % (branch, history[i])) + exit(1) + if merge != expected_merges[i-1]: + print("Invalid merge commit for branch %s, expected: %s, but found: %s" % (branch, expected_merges[i-1], merge)) + exit(1) From fcd54821af2a6c8ac307f6e8809eb17a5074fbf8 Mon Sep 17 00:00:00 2001 From: Jacek Lewandowski Date: Mon, 3 Apr 2023 13:20:49 +0200 Subject: [PATCH 04/16] Added verification of push ranges --- dev/verify_git_history.py | 54 ++++++++++++++++++++++++++++++++++----- 1 file changed, 47 insertions(+), 7 deletions(-) diff --git a/dev/verify_git_history.py b/dev/verify_git_history.py index 79584d87..54285f61 100644 --- a/dev/verify_git_history.py +++ b/dev/verify_git_history.py @@ -2,13 +2,14 @@ import subprocess import sys + def get_apache_branches(repo): """ Get the list of main cassandra branches from the given repo, sorted by version ascending. :param repo: configured apache repository name :return: list of branch names """ - output = subprocess.check_output("git ls-remote --refs -h -q %s" % repo, shell=True) + output = subprocess.check_output(["git", "ls-remote", "--refs", "-h", "-q", repo], shell=False) branch_regex = re.compile(r".*refs/heads/(cassandra-(\d+)\.(\d+))$") branches_with_versions = [] @@ -22,6 +23,7 @@ def get_apache_branches(repo): main_branches.append("trunk") return main_branches + def get_local_branch_history(repo, branch): """ Get the commit history between local branch and remote branch, sorted by commit date ascending. @@ -29,7 +31,8 @@ def get_local_branch_history(repo, branch): :param branch: branch name :return: a list of tuples (commit_hash, commit_message) """ - output = subprocess.check_output("git log --pretty=format:'%%H %%s' %s/%s..%s" % (repo, branch, branch), shell=True) + output = subprocess.check_output(["git", "log", "--pretty=format:%H %s", "%s/%s..%s" % (repo, branch, branch)], + shell=False) history = [] line_regex = re.compile(r"([0-9a-f]+) (.*)") for line in output.decode("utf-8").split("\n"): @@ -39,6 +42,7 @@ def get_local_branch_history(repo, branch): history.reverse() return history + def parse_merge_commit_msg(msg): """ Parse a merge commit message and return the source and destination branches. @@ -51,6 +55,24 @@ def parse_merge_commit_msg(msg): return (match.group(1), match.group(2)) return None + +def parse_push_ranges(repo, branches): + """ + Parse the output of git push --atomic -n and return a list of tuples (label, start_commit, end_commit) + :param repo: configured apache repository name + :param branches: list of branch names + :return: list of tuples (label, start_commit, end_commit) + """ + output = subprocess.check_output(["git", "push", "--atomic", "-n", "--porcelain", repo] + branches, shell=False) + range_regex = re.compile(r"^\s+refs/heads/\S+:refs/heads/(\S+)\s+([0-9a-f]+)\.\.([0-9a-f]+)$") + ranges = [] + for line in output.decode("utf-8").split("\n"): + match = range_regex.match(line) + if match: + ranges.append((match.group(1), match.group(2), match.group(3))) + return ranges + + ######################################################################################################################## # Read the command line arguments and validate them @@ -76,12 +98,14 @@ def parse_merge_commit_msg(msg): # history for the first branch must contain onlu one commit if len(history) != 1: - print("Invalid history for branch %s, must contain only one commit, but found %d: \n\n%s" % (main_branches[0], len(history), "\n".join(str(x) for x in history))) + print("Invalid history for branch %s, must contain only one commit, but found %d: \n\n%s" % ( + main_branches[0], len(history), "\n".join(str(x) for x in history))) exit(1) # check if the commit message is valid, that is, it must not be a merge commit if parse_merge_commit_msg(history[0][1]): - print("Invalid commit message for branch %s, must not be a merge commit, but found: \n\n%s" % (main_branches[0], history[0])) + print("Invalid commit message for branch %s, must not be a merge commit, but found: \n\n%s" % ( + main_branches[0], history[0])) exit(1) # Check the history of the branches to confirm that each branch contains exactly one main commit @@ -94,7 +118,8 @@ def parse_merge_commit_msg(msg): history = get_local_branch_history(repo, branch) if history[:-1] != prev_history: - print("Invalid history for branch %s, must be the same as branch %s, but found: \n\n%s" % (branch, prev_branch, "\n".join(str(x) for x in history))) + print("Invalid history for branch %s, must be the same as branch %s, but found: \n\n%s" % ( + branch, prev_branch, "\n".join(str(x) for x in history))) exit(1) # expect that the rest of the commits are merge commits matching the expected merges in the same order @@ -103,6 +128,21 @@ def parse_merge_commit_msg(msg): if not merge: print("Invalid commit message for branch %s, must be a merge commit, but found: \n%s" % (branch, history[i])) exit(1) - if merge != expected_merges[i-1]: - print("Invalid merge commit for branch %s, expected: %s, but found: %s" % (branch, expected_merges[i-1], merge)) + if merge != expected_merges[i - 1]: + print( + "Invalid merge commit for branch %s, expected: %s, but found: %s" % (branch, expected_merges[i - 1], merge)) exit(1) + +push_ranges = parse_push_ranges(repo, main_branches) +# number of push ranges must match the number of branches we want to merge +if len(push_ranges) != len(main_branches): + print("Invalid number of push ranges, expected %d, but found %d:\n%s" % ( + len(main_branches), len(push_ranges), "\n".join(str(x) for x in push_ranges))) + exit(1) + +for push_range in push_ranges: + print("-" * 80) + print("Push range for branch %s: %s..%s" % (push_range[0], push_range[1], push_range[2])) + print("git diff --name-only %s..%s" % (push_range[1], push_range[2])) + print("git show %s..%s" % (push_range[1], push_range[2])) + print("") From a7cb33f7699bafeab1e8c867cc759dc03a2cb17c Mon Sep 17 00:00:00 2001 From: Jacek Lewandowski Date: Mon, 3 Apr 2023 17:04:41 +0200 Subject: [PATCH 05/16] Add CHANGES.txt updater --- dev/git_utils.py | 87 ++++++++++++++++++ dev/update_changes.py | 179 ++++++++++++++++++++++++++++++++++++++ dev/verify_git_history.py | 77 +--------------- 3 files changed, 269 insertions(+), 74 deletions(-) create mode 100644 dev/git_utils.py create mode 100644 dev/update_changes.py diff --git a/dev/git_utils.py b/dev/git_utils.py new file mode 100644 index 00000000..a9551d29 --- /dev/null +++ b/dev/git_utils.py @@ -0,0 +1,87 @@ +import re +import subprocess + + +def get_apache_branches(repo): + """ + Get the list of main cassandra branches from the given repo, sorted by version ascending. + :param repo: configured apache repository name + :return: list of branch names + """ + output = subprocess.check_output(["git", "ls-remote", "--refs", "-h", "-q", repo], shell=False) + branch_regex = re.compile(r".*refs/heads/(cassandra-(\d+)\.(\d+))$") + + branches_with_versions = [] + for line in output.decode("utf-8").split("\n"): + match = branch_regex.match(line) + if match: + branches_with_versions.append((int(match.group(2)), int(match.group(3)), match.group(1))) + + branches_with_versions.sort() + main_branches = [branch[2] for branch in branches_with_versions] + main_branches.append("trunk") + return main_branches + + +def get_local_branch_history(repo, branch): + """ + Get the commit history between local branch and remote branch, sorted by commit date ascending. + :param repo: configured apache repository name + :param branch: branch name + :return: a list of tuples (commit_hash, commit_message) + """ + output = subprocess.check_output(["git", "log", "--pretty=format:%H %s", "%s/%s..%s" % (repo, branch, branch)], + shell=False) + history = [] + line_regex = re.compile(r"([0-9a-f]+) (.*)") + for line in output.decode("utf-8").split("\n"): + match = line_regex.match(line) + if match: + history.append((match.group(1), match.group(2))) + history.reverse() + return history + + +def parse_merge_commit_msg(msg): + """ + Parse a merge commit message and return the source and destination branches. + :param msg: a commit message + :return: a tuple of (source_branch, destination_branch) or None if the message is not a merge commit + """ + msg_regex = re.compile(r"Merge branch '(cassandra-\d+\.\d+)' into (cassandra-((\d+\.\d+)|trunk))") + match = msg_regex.match(msg) + if match: + return (match.group(1), match.group(2)) + return None + + +def parse_push_ranges(repo, branches): + """ + Parse the output of git push --atomic -n and return a list of tuples (label, start_commit, end_commit) + :param repo: configured apache repository name + :param branches: list of branch names + :return: list of tuples (label, start_commit, end_commit) + """ + output = subprocess.check_output(["git", "push", "--atomic", "-n", "--porcelain", repo] + branches, shell=False) + range_regex = re.compile(r"^\s+refs/heads/\S+:refs/heads/(\S+)\s+([0-9a-f]+)\.\.([0-9a-f]+)$") + ranges = [] + for line in output.decode("utf-8").split("\n"): + match = range_regex.match(line) + if match: + ranges.append((match.group(1), match.group(2), match.group(3))) + return ranges + +cassandra_branch_version_re = re.compile(r"cassandra-(\d+)\.(\d+)") +version_string_re = re.compile(r"(\d+)\.(\d+)") + +def version_from_re(re, string): + match = re.match(string) + if match: + return (int(match.group(1)), int(match.group(2))) + return None + +def version_from_branch(branch): + return version_from_re(cassandra_branch_version_re, branch) + +def version_from_string(version_string): + return version_from_re(version_string_re, version_string) diff --git a/dev/update_changes.py b/dev/update_changes.py new file mode 100644 index 00000000..ec0b6e3c --- /dev/null +++ b/dev/update_changes.py @@ -0,0 +1,179 @@ +import re +import subprocess +import sys +from typing import NamedTuple, Tuple + +from git_utils import get_apache_branches, version_from_branch, version_from_string, version_from_re + + +class MergeSection(NamedTuple): + version: Tuple[int, int] + messages: list[str] + + +class ReleaseSection(NamedTuple): + version: Tuple[int, int] + version_string: str + messages: list[str] + merge_sections: list[MergeSection] + + +def read_changes_file() -> list[ReleaseSection]: + """ + Read the changes file and return a list of release sections. + :return: a list of release sections + """ + merge_section_regex = re.compile(r"^Merged from (\d+)\.(\d+):") + release_sections = [] + with open("CHANGES.txt", "r") as f: + lines = f.readlines() + + messages = [] + merge_sections = [] + release_section = None + merge_section = None + + # go through each line and record its index if it matches the pattern \d+\.\d+.* + for i in range(len(lines)): + version = version_from_string(lines[i]) + merge_version = version_from_re(merge_section_regex, lines[i]) + + if version: + if merge_section: + merge_sections.append(merge_section) + + if release_section: + release_sections.append(release_section) + + messages = [] + merge_sections = [] + merge_section = None + release_section = ReleaseSection(version, lines[i], messages, merge_sections) + + elif merge_version: + if merge_section: + merge_sections.append(merge_section) + + messages = [] + merge_section = MergeSection(merge_version, messages) + + elif lines[i].strip(): + if (ticket in lines[i] or message in lines[i]): + print("Found duplicate message in line %d: %s" % (i + 1, lines[i])) + exit(1) + messages.append(lines[i]) + + if release_section: + release_sections.append(release_section) + + return release_sections + + +# write a text file with the changes +def write_changes_file(release_sections: list[ReleaseSection]): + """ + Write the changes file. + :param release_sections: the release sections to write + """ + with open("CHANGES.txt", "w") as f: + for version_section in release_sections: + f.write(version_section.version_string) + for message in version_section.messages: + f.write(message) + + for merge_section in version_section.merge_sections: + f.write("Merged from %d.%d:\n" % merge_section.version) + for message in merge_section.messages: + f.write(message) + + f.write("\n\n") + + +def get_or_insert_merge_section(target_section: ReleaseSection, target_version: Tuple[int, int]) -> MergeSection: + """ + Get the merge section for the given version in the given release section. If the merge section does not exist, it is + created and inserted in the correct position. + :param target_section: the release section to search for the merge section + :param target_version: the version of the merge section to search for + :return: found or created merge section + """ + target_merge_section = None + insertion_index = -1 + for idx in range(len(target_section.merge_sections)): + insertion_index = idx + 1 + if target_section.merge_sections[idx].version == target_version: + # merge section already exists, return it + target_merge_section = target_section.merge_sections[idx] + break + elif target_section.merge_sections[idx].version < target_version: + # merge section does not exist because we just reached the first merge section with a lower version + insertion_index = idx - 1 + break + + if not target_merge_section: + # merge section does not exist, create it and insert in the correct position + target_merge_section = MergeSection(target_version, []) + target_section.merge_sections.insert(insertion_index, target_merge_section) + + return target_merge_section + + +# check if the commond line args contain the message and a list of branches +if len(sys.argv) < 5: + print("Usage: %s " % sys.argv[0]) + exit(1) + +repo = sys.argv[1] +target_branch = sys.argv[2] +target_version = version_from_string(target_branch) +ticket = sys.argv[3] +message = sys.argv[4] + +release_sections = read_changes_file() + +merge_versions = [] +for branch in get_apache_branches(repo): + if branch == "trunk": + version = release_sections[0].version + else: + version = version_from_branch(branch) + if version: + merge_versions.append(version) + +merge_versions = merge_versions[merge_versions.index(target_version):] +current_branch = subprocess.check_output(["git", "branch", "--show-current"], shell=False).decode("utf-8").strip() + +target_section = None +target_merge_section = None +new_message = " * %s (%s)\n" % (message, ticket) + +if current_branch == "trunk": + current_version = release_sections[0].version + if current_version == target_version: + # if we are on trunk and the target version is also trunk, we prepend the message to the first encountered version + target_section = release_sections[0] + else: + # if we are on trunk, but the target version is older, we prepend the message to the appropriate merge section + # (which may be created if it does not exist) in the second encountered version + target_section = release_sections[1] + for merge_version in merge_versions[1:-1]: + get_or_insert_merge_section(target_section, merge_version) + target_merge_section = get_or_insert_merge_section(target_section, target_version) +else: + current_version = version_from_branch(current_branch) + merge_versions = merge_versions[:merge_versions.index(current_version)] + target_section = release_sections[0] + if current_version != target_version: + for merge_version in merge_versions[1:-1]: + get_or_insert_merge_section(target_section, merge_version) + target_merge_section = get_or_insert_merge_section(target_section, target_version) + +if target_merge_section: + target_merge_section.messages.insert(0, new_message) +elif target_section: + target_section.messages.insert(0, new_message) +else: + print("Could not find target section") + exit(1) + +write_changes_file(release_sections) diff --git a/dev/verify_git_history.py b/dev/verify_git_history.py index 54285f61..250ea259 100644 --- a/dev/verify_git_history.py +++ b/dev/verify_git_history.py @@ -1,79 +1,6 @@ -import re -import subprocess import sys - -def get_apache_branches(repo): - """ - Get the list of main cassandra branches from the given repo, sorted by version ascending. - :param repo: configured apache repository name - :return: list of branch names - """ - output = subprocess.check_output(["git", "ls-remote", "--refs", "-h", "-q", repo], shell=False) - branch_regex = re.compile(r".*refs/heads/(cassandra-(\d+)\.(\d+))$") - - branches_with_versions = [] - for line in output.decode("utf-8").split("\n"): - match = branch_regex.match(line) - if match: - branches_with_versions.append((int(match.group(2)), int(match.group(3)), match.group(1))) - - branches_with_versions.sort() - main_branches = [branch[2] for branch in branches_with_versions] - main_branches.append("trunk") - return main_branches - - -def get_local_branch_history(repo, branch): - """ - Get the commit history between local branch and remote branch, sorted by commit date ascending. - :param repo: configured apache repository name - :param branch: branch name - :return: a list of tuples (commit_hash, commit_message) - """ - output = subprocess.check_output(["git", "log", "--pretty=format:%H %s", "%s/%s..%s" % (repo, branch, branch)], - shell=False) - history = [] - line_regex = re.compile(r"([0-9a-f]+) (.*)") - for line in output.decode("utf-8").split("\n"): - match = line_regex.match(line) - if match: - history.append((match.group(1), match.group(2))) - history.reverse() - return history - - -def parse_merge_commit_msg(msg): - """ - Parse a merge commit message and return the source and destination branches. - :param msg: a commit message - :return: a tuple of (source_branch, destination_branch) or None if the message is not a merge commit - """ - msg_regex = re.compile(r"Merge branch '(cassandra-\d+\.\d+)' into (cassandra-((\d+\.\d+)|trunk))") - match = msg_regex.match(msg) - if match: - return (match.group(1), match.group(2)) - return None - - -def parse_push_ranges(repo, branches): - """ - Parse the output of git push --atomic -n and return a list of tuples (label, start_commit, end_commit) - :param repo: configured apache repository name - :param branches: list of branch names - :return: list of tuples (label, start_commit, end_commit) - """ - output = subprocess.check_output(["git", "push", "--atomic", "-n", "--porcelain", repo] + branches, shell=False) - range_regex = re.compile(r"^\s+refs/heads/\S+:refs/heads/(\S+)\s+([0-9a-f]+)\.\.([0-9a-f]+)$") - ranges = [] - for line in output.decode("utf-8").split("\n"): - match = range_regex.match(line) - if match: - ranges.append((match.group(1), match.group(2), match.group(3))) - return ranges - - -######################################################################################################################## +from git_utils import get_local_branch_history, get_apache_branches, parse_merge_commit_msg, parse_push_ranges # Read the command line arguments and validate them @@ -133,6 +60,8 @@ def parse_push_ranges(repo, branches): "Invalid merge commit for branch %s, expected: %s, but found: %s" % (branch, expected_merges[i - 1], merge)) exit(1) +# finally we print the commands to explore the changes in each push range + push_ranges = parse_push_ranges(repo, main_branches) # number of push ranges must match the number of branches we want to merge if len(push_ranges) != len(main_branches): From e425b3ca979f74b87b6f0012ccb776c03d2dd961 Mon Sep 17 00:00:00 2001 From: Jacek Lewandowski Date: Thu, 6 Apr 2023 11:27:39 +0200 Subject: [PATCH 06/16] wip --- dev/git_utils.py | 92 ++++++++++++++++++++++++++-- dev/prepare_merge_commands.py | 111 ++++++++++++++++++++++++++++++++++ dev/update_changes.py | 8 +-- dev/verify_git_history.py | 4 +- 4 files changed, 203 insertions(+), 12 deletions(-) create mode 100644 dev/prepare_merge_commands.py diff --git a/dev/git_utils.py b/dev/git_utils.py index a9551d29..77914069 100644 --- a/dev/git_utils.py +++ b/dev/git_utils.py @@ -1,8 +1,75 @@ import re import subprocess +import sys +from typing import NamedTuple, Tuple +class FeatureBranch(NamedTuple): + version: Tuple[int, int] + version_string: str + name: str -def get_apache_branches(repo): +NO_VERSION = (-1, -1) +TRUNK = (255, 255) + +def get_feature_branches(repo, ticket): + """ + Get the list of branches from the given repository that contain the given ticket, sorted by version ascending. + :param repo: configured apache repository name + :param ticket: ticket number + :return: list of branch names + """ + output = subprocess.check_output(["git", "ls-remote", "--refs", "-h", "-q", repo], text=True) + branch_regex = re.compile(r".*refs/heads/(" + re.escape(ticket) + r"(-(\d+)\.(\d+))?.*)$", flags=re.IGNORECASE) + print(r".*refs/heads/(" + re.escape(ticket) + r"((\d+)\.(\d+))?.*)$") + matching_branches = [] + for line in output.split("\n"): + match = branch_regex.match(line) + if match: + branch = match.group(1) + if branch == ticket: + version = TRUNK + elif match.group(2): + version = (int(match.group(3)), int(match.group(4))) + else: + version = NO_VERSION + matching_branches.append(FeatureBranch(version, match.group(2), branch)) + + matching_branches.sort(key=lambda x: x.version) + return matching_branches + +def get_upstream_branch_and_repo(): + """ + Get the upstream branch and repository name for the current branch. + :return: a tuple of (remote_name, branch_name, ticket_number) or None if the current branch is not tracking a remote branch + """ + output = subprocess.check_output(["git", "status", "-b", "--porcelain=v2"], shell=False).decode("utf-8") + regex = re.compile(r"# branch\.upstream ([^/]+)/([^ ]+)") + match = regex.search(output) + if match: + ticket_regex = re.compile(r"CASSANDRA-(\d+)", flags=re.IGNORECASE) + ticket_match = ticket_regex.search(match.group(2)) + if ticket_match: + return (match.group(1), match.group(2), ticket_match.group(1)) + return (match.group(1), match.group(2), None) + return None + + +def get_remote_cassandra_repository_name(): + """ + Get the name of the remote repository that points to the apache cassandra repository. Prefers "apache" over "asf". + :return: the remote name + """ + output = subprocess.check_output(["git", "remote", "show"], shell=False) + apache_remote_name = None + for remote_name in output.decode("utf-8").split("\n"): + url = subprocess.check_output(["git", "remote", "get-url", remote_name], shell=False).decode("utf-8").strip() + if "apache/cassandra.git" in url: + return remote_name + if "asf/cassandra/git" in url: + apache_remote_name = remote_name + return apache_remote_name + +def get_release_branches(repo): """ Get the list of main cassandra branches from the given repo, sorted by version ascending. :param repo: configured apache repository name @@ -15,12 +82,12 @@ def get_apache_branches(repo): for line in output.decode("utf-8").split("\n"): match = branch_regex.match(line) if match: - branches_with_versions.append((int(match.group(2)), int(match.group(3)), match.group(1))) + branches_with_versions.append(FeatureBranch((int(match.group(2)), int(match.group(3))), "-%s.%s" % (match.group(2), match.group(3)), match.group(1))) + + branches_with_versions.append(FeatureBranch(TRUNK, "", "trunk")) + branches_with_versions.sort(key=lambda x: x.version) - branches_with_versions.sort() - main_branches = [branch[2] for branch in branches_with_versions] - main_branches.append("trunk") - return main_branches + return branches_with_versions def get_local_branch_history(repo, branch): @@ -85,3 +152,16 @@ def version_from_branch(branch): def version_from_string(version_string): return version_from_re(version_string_re, version_string) + +def check_remote_exists(remote): + return subprocess.check_call(["git", "remote", "get-url", remote], stderr=sys.stderr, stdout=None) == 0 + +def check_remote_branch_exists(remote, branch): + return subprocess.check_call(["git", "ls-remote", "--exit-code", remote, branch], stderr=sys.stderr, stdout=None) == 0 + +def version_as_string(version): + if version is None: + return None + if version == TRUNK: + return "trunk" + return "%s.%s" % version diff --git a/dev/prepare_merge_commands.py b/dev/prepare_merge_commands.py new file mode 100644 index 00000000..6ff5a36a --- /dev/null +++ b/dev/prepare_merge_commands.py @@ -0,0 +1,111 @@ +import subprocess +import sys +import re +from git_utils import * + +def read_with_default(prompt, default): + value = input("%s [default: %s]: " % (prompt, default)) + if not value: + value = default + return value + +def read_remote_repository(prompt, default): + print("Remote repositories:") + subprocess.check_call(["git", "remote", "show"]) + print("") + repo = None + + while not repo: + repo = read_with_default(prompt, default) + if not check_remote_exists(repo): + print("Invalid remote repository name: %s" % repo) + repo = None + + return repo + + +# read upstream repository name from stdin +upstream_repo = read_with_default("Enter the name of the remote repository that points to the upstream Apache Cassandra", get_remote_cassandra_repository_name()) + +feature_repo, feature_branch, ticket_number = get_upstream_branch_and_repo() +feature_repo = read_remote_repository("Enter the name of the remote repository that points to the upstream feature branch", feature_repo) + +ticket_number = read_with_default("Enter the ticket number", ticket_number) +ticket = "CASSANDRA-%s" % ticket_number + +print("") +print("Fetching from %s and %s" % (upstream_repo, feature_repo)) +subprocess.check_output(["git", "fetch", upstream_repo]) +if feature_repo != upstream_repo: + subprocess.check_output(["git", "fetch", feature_repo]) + +release_branches = get_release_branches(upstream_repo) +if len(release_branches) == 0: + print("No release branches found in %s" % upstream_repo) + sys.exit(1) + +print("Found the following release branches:\n%s" % "\n".join([str(b) for b in release_branches])) +print("") + +feature_branches = get_feature_branches(feature_repo, ticket) +print("Found the following feature branches:\n%s" % "\n".join([str(x) for x in feature_branches])) +print("") + +default_oldest_feature_version = feature_branches[0].version if len(feature_branches) > 0 else None +oldest_release_version = None +while not oldest_release_version: + oldest_release_version = read_with_default("Enter the oldest release version to merge into", version_as_string(default_oldest_feature_version)) + if oldest_release_version: + oldest_release_version = version_from_string(oldest_release_version) + if oldest_release_version not in [b.version for b in release_branches]: + print("Invalid release version: %s" % str(oldest_release_version)) + oldest_release_version = None + +target_release_branches = [b for b in release_branches if b.version >= oldest_release_version] +merges = [] +for release_branch in target_release_branches: + # find first feature branch whose version is the same as the version of the release branch + default_matching_feature_branch = next((b for b in feature_branches if b.version == release_branch.version), None) + default_matching_feature_branch_name = default_matching_feature_branch.name if default_matching_feature_branch else "none" + merge = None + while merge is None: + matching_feature_branch_name = read_with_default("Enter the name of the feature branch to merge into %s or type 'none' if there is no feature branch for this release" % release_branch.name, default_matching_feature_branch_name) + if matching_feature_branch_name == "none": + if len(merges) == 0: + print("Feature branch for the oldest release must be provided") + continue + merge = (release_branch, None) + else: + if check_remote_branch_exists(feature_repo, matching_feature_branch_name): + merge = (release_branch, matching_feature_branch_name) + else: + print("Invalid feature branch name: %s" % matching_feature_branch_name) + merges.append(merge) + +print("") +print("Merge commands:") +for release_branch, feature_branch in merges: + if feature_branch: + print("%s -> %s" % (feature_branch, release_branch.name)) + else: + print("none -> %s" % release_branch.name) + +# list commits of the oldest branch and let the user decide whose commit message should be used as a title in CHANGES.txt +print("") +print("Commits:") +commits = subprocess.check_output(["git", "log", "--reverse", "--pretty=format:%s", "%s/%s..%s/%s" % (upstream_repo, merges[0][0].name, feature_repo, merges[0][1])], text=True).splitlines() +# zip commits with their index +commits = list(zip(range(1, len(commits) + 1), commits)) +for i, commit in commits: + print("%d: %s" % (i, commit)) +print("") +change_title = read_with_default("Enter the number of the commit whose message should be used as a title in CHANGES.txt or leave empty to enter a custom title", "") +if change_title: + change_title = commits[int(change_title) - 1][1] +else: + change_title = read_with_default("Enter the title", commits[0][1]) + +# then generate the script + + + diff --git a/dev/update_changes.py b/dev/update_changes.py index ec0b6e3c..522dc8d0 100644 --- a/dev/update_changes.py +++ b/dev/update_changes.py @@ -3,7 +3,7 @@ import sys from typing import NamedTuple, Tuple -from git_utils import get_apache_branches, version_from_branch, version_from_string, version_from_re +from git_utils import get_release_branches, version_from_branch, version_from_string, version_from_re class MergeSection(NamedTuple): @@ -132,11 +132,11 @@ def get_or_insert_merge_section(target_section: ReleaseSection, target_version: release_sections = read_changes_file() merge_versions = [] -for branch in get_apache_branches(repo): - if branch == "trunk": +for branch in get_release_branches(repo): + if branch.name == "trunk": version = release_sections[0].version else: - version = version_from_branch(branch) + version = branch.version if version: merge_versions.append(version) diff --git a/dev/verify_git_history.py b/dev/verify_git_history.py index 250ea259..50ba8e89 100644 --- a/dev/verify_git_history.py +++ b/dev/verify_git_history.py @@ -1,6 +1,6 @@ import sys -from git_utils import get_local_branch_history, get_apache_branches, parse_merge_commit_msg, parse_push_ranges +from git_utils import get_local_branch_history, get_release_branches, parse_merge_commit_msg, parse_push_ranges # Read the command line arguments and validate them @@ -10,7 +10,7 @@ repo = sys.argv[1] start_branch = sys.argv[2] -main_branches = get_apache_branches(repo) +main_branches = [b.name for b in get_release_branches(repo)] # check if start_branch is a valid branch if start_branch not in main_branches: From 798738ac347fb29bf4295b6e07546f9703675384 Mon Sep 17 00:00:00 2001 From: Jacek Lewandowski Date: Tue, 11 Apr 2023 01:40:07 +0200 Subject: [PATCH 07/16] Refactored everything to Python --- dev/prepare-merge-commands.sh | 210 ------------------ dev/prepare_merge_commands.py | 111 --------- dev/scripts/__init__.py | 5 + dev/{ => scripts/lib}/git_utils.py | 208 ++++++++++++----- dev/scripts/lib/script_generator.py | 108 +++++++++ dev/scripts/prepare_merge_commands.py | 109 +++++++++ dev/{ => scripts}/update_changes.py | 92 +++----- dev/scripts/verify_git_history.py | 98 ++++++++ dev/test/__init__.py | 5 + ...st_resolving_version_and_merge_sections.py | 141 ++++++++++++ dev/verify_git_history.py | 77 ------- 11 files changed, 651 insertions(+), 513 deletions(-) delete mode 100755 dev/prepare-merge-commands.sh delete mode 100644 dev/prepare_merge_commands.py create mode 100644 dev/scripts/__init__.py rename dev/{ => scripts/lib}/git_utils.py (50%) create mode 100644 dev/scripts/lib/script_generator.py create mode 100644 dev/scripts/prepare_merge_commands.py rename dev/{ => scripts}/update_changes.py (63%) create mode 100644 dev/scripts/verify_git_history.py create mode 100644 dev/test/__init__.py create mode 100644 dev/test/test_resolving_version_and_merge_sections.py delete mode 100644 dev/verify_git_history.py diff --git a/dev/prepare-merge-commands.sh b/dev/prepare-merge-commands.sh deleted file mode 100755 index f2b5783c..00000000 --- a/dev/prepare-merge-commands.sh +++ /dev/null @@ -1,210 +0,0 @@ -#!/bin/bash -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -set -e - -supported_versions=("3.0" "3.11" "4.0" "4.1" "trunk") - -# Read the remote Apache Cassandra repository name -apache_repository="" -for r in $(git remote show); do - url="$(git remote get-url "$r")" - if [[ "$url" == *"apache/cassandra.git" ]]; then - apache_repository="$r" - break - elif [[ "$url" == *"asf/cassandra.git" ]]; then - apache_repository="$r" - fi -done -echo "Remote repositories: " -git remote -v show -echo "" -read -r -e -i "$apache_repository" -p "Enter Apache Cassandra remote repository name: " apache_repository -git fetch "$apache_repository" -echo "" - -# Read the feature repository and branch -branch="$(git status -b --porcelain=v2 | grep branch.upstream | cut -f 3 -d ' ')" - -if [[ "$branch" =~ ^([^/]*)/(CASSANDRA-[0-9]+).*$ ]]; then - repository="${BASH_REMATCH[1]}" - ticket="${BASH_REMATCH[2]}" -fi - -read -r -e -i "$repository" -p "Enter a feature repository name: " repository -read -r -e -i "$ticket" -p "Enter a ticket ID: " ticket -if [ -z "$repository" ] || [ -z "$ticket" ]; then - exit 0 -fi -echo "" - -git fetch "$repository" - -# Read all feature branches based on the ticket name -readarray -t branches < <(git ls-remote --refs -h -q "$repository" | grep "$ticket" | cut -d '/' -f 3 | sort) -if [[ "${#branches[@]}" == 0 ]]; then - echo "Found no feature branches that include a $ticket in name" - exit 0 -fi - -echo "The following feature branches were found:" -for branch in "${branches[@]}"; do - echo "$branch" -done -echo "" - -# Read the oldest Cassandra version where the feature should be applied -matched=0 -while [[ $matched == 0 ]]; do - read -r -e -p "What is the oldest target version you want to merge? " oldest_target_version - if [[ -z "$oldest_target_version" ]]; then - exit 0 - fi - - feature_versions=() - for v in "${supported_versions[@]}"; do - if [[ "$v" == "$oldest_target_version" ]]; then - matched=1 - fi - if [[ $matched == 1 ]]; then - feature_versions+=("$v") - fi - done -done - -echo "Will merge to the following Cassandra versions:" -for v in "${feature_versions[@]}"; do - echo "$v" -done -echo "" - -function find_matching_branch() { - local infix="$1" - for b in "${branches[@]}"; do - if [[ "$infix" == "" ]] && [[ "$b" == "$ticket" ]]; then - echo "$b" - return 0 - elif [[ "$b" == *"$infix"* ]]; then - echo "$b" - return 0 - fi - done - - return 1 -} - -# Confirm which feature branches are for which Cassandra versions -feature_branches=() -target_branches=() -for v in "${feature_versions[@]}"; do - branch="" - if [[ "$v" == "trunk" ]]; then - target_branches+=("trunk") - branch="$(find_matching_branch trunk || find_matching_branch "" || true)" - else - target_branches+=("cassandra-$v") - branch="$(find_matching_branch "$v" || true)" - fi - read -r -e -i "$branch" -p "Enter branch for version $v or leave empty if there nothing to merge for this version: " branch - feature_branches+=("$branch") -done - -# Generate a script - -echo "" -echo "" -echo "" -echo "git fetch $apache_repository" -echo "git fetch $repository" - -# Get a subject from the first commit which will serve as a title to be pasted into CHANGES.txt -first_commit="$(git log --pretty=format:%s --reverse "$apache_repository/${target_branches[0]}..$repository/${feature_branches[0]}" | head -n 1)" - -push_command="git push --atomic $apache_repository" -skipped_branches_found=0 -for i in $(seq 0 $((${#target_branches[@]} - 1))); do - echo "" - echo "" - echo "" - echo "# $repository/${feature_branches[$i]} -> ${target_branches[$i]}" - echo "# --------------------------------------------------------------------------------------------------------" - - if [[ $i == 0 ]] && [[ "${feature_branches[$i]}" == "" ]]; then - # Although we can skip a feature for some versions, we cannot skip it for the oldest version (which is quite obvious) - exit 1 - fi - - # Read the list of commits between the remote head and the feature branch - we need to cherry pick them (or some of them) - - echo "git switch ${target_branches[$i]}" - echo "git reset --hard $apache_repository/${target_branches[$i]}" - if [[ "${feature_branches[$i]}" == "" ]]; then - skipped_branches_found=1 - # A script for the case where there is no fix for a version - echo "git merge -s ours --log --no-edit ${target_branches[$((i - 1))]}" - else - readarray -t commits < <(git log --reverse --oneline "$apache_repository/${target_branches[$i]}..$repository/${feature_branches[$i]}") - - if [[ $i != 0 ]]; then - # When this isn't the oldest version (we want to have only the merge commit) - echo "git merge -s ours --log --no-edit ${target_branches[$((i - 1))]}" - fi - - for c in $(seq 0 $((${#commits[@]} - 1))); do - commit_sha="$(echo "${commits[$c]}" | cut -f 1 -d ' ')" - if [[ $i == 0 ]] && [[ $c == 0 ]]; then - # we want to have only one feature commit (c == 0), which is in the oldest version (i == 0) - echo "git cherry-pick $commit_sha # ${commits[$c]}" - else - # otherwise we squash the commits to the previous one - echo "git cherry-pick -n $commit_sha && git commit -a --amend --no-edit # ${commits[$c]}" - fi - done - fi - - if [[ "$skipped_branches_found" == "0" ]]; then - if [[ $i == 0 ]]; then - echo "grep '$ticket' CHANGES.txt || sed -E -i '/^[0-9]+\.[0-9]+/{s/.*/&\n\ * $first_commit ($ticket)/;:a;n;ba}' CHANGES.txt" - else - echo "grep '$ticket' CHANGES.txt || sed -E -i '/^Merged from ${oldest_target_version}/{s/.*/&\n\ * $first_commit ($ticket)/;:a;n;ba}' CHANGES.txt" - fi - echo "git diff CHANGES.txt" - else - echo "Update CHANGES.txt by adding the following line:" - echo " * $first_commit ($ticket)" - fi - echo "git add CHANGES.txt" - echo "git commit --amend --no-edit" - - echo "(git diff $apache_repository/${target_branches[$i]}..HEAD -- .circleci/ | git apply -R --index) && git commit -a --amend --no-edit # Remove all changes in .circleci directory if you need to" - - echo "git diff --name-only $apache_repository/${target_branches[$i]}..HEAD # print a list of all changes files" - - push_command+=" ${target_branches[$i]}" -done - -echo "" -echo "" -echo "" -echo "python3 verify_git_history.py $apache_repository ${target_branches[0]}" - -echo "" -echo "" -echo "" -echo "$push_command -n" diff --git a/dev/prepare_merge_commands.py b/dev/prepare_merge_commands.py deleted file mode 100644 index 6ff5a36a..00000000 --- a/dev/prepare_merge_commands.py +++ /dev/null @@ -1,111 +0,0 @@ -import subprocess -import sys -import re -from git_utils import * - -def read_with_default(prompt, default): - value = input("%s [default: %s]: " % (prompt, default)) - if not value: - value = default - return value - -def read_remote_repository(prompt, default): - print("Remote repositories:") - subprocess.check_call(["git", "remote", "show"]) - print("") - repo = None - - while not repo: - repo = read_with_default(prompt, default) - if not check_remote_exists(repo): - print("Invalid remote repository name: %s" % repo) - repo = None - - return repo - - -# read upstream repository name from stdin -upstream_repo = read_with_default("Enter the name of the remote repository that points to the upstream Apache Cassandra", get_remote_cassandra_repository_name()) - -feature_repo, feature_branch, ticket_number = get_upstream_branch_and_repo() -feature_repo = read_remote_repository("Enter the name of the remote repository that points to the upstream feature branch", feature_repo) - -ticket_number = read_with_default("Enter the ticket number", ticket_number) -ticket = "CASSANDRA-%s" % ticket_number - -print("") -print("Fetching from %s and %s" % (upstream_repo, feature_repo)) -subprocess.check_output(["git", "fetch", upstream_repo]) -if feature_repo != upstream_repo: - subprocess.check_output(["git", "fetch", feature_repo]) - -release_branches = get_release_branches(upstream_repo) -if len(release_branches) == 0: - print("No release branches found in %s" % upstream_repo) - sys.exit(1) - -print("Found the following release branches:\n%s" % "\n".join([str(b) for b in release_branches])) -print("") - -feature_branches = get_feature_branches(feature_repo, ticket) -print("Found the following feature branches:\n%s" % "\n".join([str(x) for x in feature_branches])) -print("") - -default_oldest_feature_version = feature_branches[0].version if len(feature_branches) > 0 else None -oldest_release_version = None -while not oldest_release_version: - oldest_release_version = read_with_default("Enter the oldest release version to merge into", version_as_string(default_oldest_feature_version)) - if oldest_release_version: - oldest_release_version = version_from_string(oldest_release_version) - if oldest_release_version not in [b.version for b in release_branches]: - print("Invalid release version: %s" % str(oldest_release_version)) - oldest_release_version = None - -target_release_branches = [b for b in release_branches if b.version >= oldest_release_version] -merges = [] -for release_branch in target_release_branches: - # find first feature branch whose version is the same as the version of the release branch - default_matching_feature_branch = next((b for b in feature_branches if b.version == release_branch.version), None) - default_matching_feature_branch_name = default_matching_feature_branch.name if default_matching_feature_branch else "none" - merge = None - while merge is None: - matching_feature_branch_name = read_with_default("Enter the name of the feature branch to merge into %s or type 'none' if there is no feature branch for this release" % release_branch.name, default_matching_feature_branch_name) - if matching_feature_branch_name == "none": - if len(merges) == 0: - print("Feature branch for the oldest release must be provided") - continue - merge = (release_branch, None) - else: - if check_remote_branch_exists(feature_repo, matching_feature_branch_name): - merge = (release_branch, matching_feature_branch_name) - else: - print("Invalid feature branch name: %s" % matching_feature_branch_name) - merges.append(merge) - -print("") -print("Merge commands:") -for release_branch, feature_branch in merges: - if feature_branch: - print("%s -> %s" % (feature_branch, release_branch.name)) - else: - print("none -> %s" % release_branch.name) - -# list commits of the oldest branch and let the user decide whose commit message should be used as a title in CHANGES.txt -print("") -print("Commits:") -commits = subprocess.check_output(["git", "log", "--reverse", "--pretty=format:%s", "%s/%s..%s/%s" % (upstream_repo, merges[0][0].name, feature_repo, merges[0][1])], text=True).splitlines() -# zip commits with their index -commits = list(zip(range(1, len(commits) + 1), commits)) -for i, commit in commits: - print("%d: %s" % (i, commit)) -print("") -change_title = read_with_default("Enter the number of the commit whose message should be used as a title in CHANGES.txt or leave empty to enter a custom title", "") -if change_title: - change_title = commits[int(change_title) - 1][1] -else: - change_title = read_with_default("Enter the title", commits[0][1]) - -# then generate the script - - - diff --git a/dev/scripts/__init__.py b/dev/scripts/__init__.py new file mode 100644 index 00000000..a437d36c --- /dev/null +++ b/dev/scripts/__init__.py @@ -0,0 +1,5 @@ +import os +import sys + +PROJECT_PATH = os.path.dirname(os.path.abspath(__file__)) +sys.path.append(PROJECT_PATH) diff --git a/dev/git_utils.py b/dev/scripts/lib/git_utils.py similarity index 50% rename from dev/git_utils.py rename to dev/scripts/lib/git_utils.py index 77914069..611313a3 100644 --- a/dev/git_utils.py +++ b/dev/scripts/lib/git_utils.py @@ -1,17 +1,66 @@ import re import subprocess import sys -from typing import NamedTuple, Tuple +from typing import NamedTuple, Tuple, Optional -class FeatureBranch(NamedTuple): + +class VersionedBranch(NamedTuple): version: Tuple[int, int] version_string: str name: str +class Commit(NamedTuple): + sha: str + author: str + title: str + +class BranchMergeInfo(NamedTuple): + release_branch: VersionedBranch + feature_branch: Optional[VersionedBranch] + commits: list[Commit] + +class TicketMergeInfo(NamedTuple): + ticket: str + title: str + upstream_repo: str + feature_repo: str + merges: list[BranchMergeInfo] + NO_VERSION = (-1, -1) -TRUNK = (255, 255) +TRUNK_VERSION = (255, 255) + +CASSANRA_BRANCH_VERSION_RE = re.compile(r"cassandra-(\d+)\.(\d+)") +VERSION_RE = re.compile(r"(\d+)\.(\d+)") + +def version_from_re(re, string): + match = re.match(string) + if match: + return (int(match.group(1)), int(match.group(2))) + return None + + +def version_from_branch(branch): + return version_from_re(CASSANRA_BRANCH_VERSION_RE, branch) + + +def version_from_string(version_string): + if version_string == "trunk": + return TRUNK_VERSION + return version_from_re(VERSION_RE, version_string) -def get_feature_branches(repo, ticket): + +def version_as_string(version): + if version is None: + return None + if version == TRUNK_VERSION: + return "trunk" + return "%s.%s" % version + + +### GIT functions ### + + +def guess_feature_branches(repo, ticket): """ Get the list of branches from the given repository that contain the given ticket, sorted by version ascending. :param repo: configured apache repository name @@ -27,20 +76,21 @@ def get_feature_branches(repo, ticket): if match: branch = match.group(1) if branch == ticket: - version = TRUNK + version = TRUNK_VERSION elif match.group(2): version = (int(match.group(3)), int(match.group(4))) else: version = NO_VERSION - matching_branches.append(FeatureBranch(version, match.group(2), branch)) + matching_branches.append(VersionedBranch(version, match.group(2), branch)) matching_branches.sort(key=lambda x: x.version) return matching_branches -def get_upstream_branch_and_repo(): + +def guess_feature_repo_and_ticket(): """ - Get the upstream branch and repository name for the current branch. - :return: a tuple of (remote_name, branch_name, ticket_number) or None if the current branch is not tracking a remote branch + Get the remote repository and ticket number from the current git branch. + :return: a tuple (remote_repository, ticket_number) or None if the current branch does not look like a feature branch """ output = subprocess.check_output(["git", "status", "-b", "--porcelain=v2"], shell=False).decode("utf-8") regex = re.compile(r"# branch\.upstream ([^/]+)/([^ ]+)") @@ -49,12 +99,12 @@ def get_upstream_branch_and_repo(): ticket_regex = re.compile(r"CASSANDRA-(\d+)", flags=re.IGNORECASE) ticket_match = ticket_regex.search(match.group(2)) if ticket_match: - return (match.group(1), match.group(2), ticket_match.group(1)) - return (match.group(1), match.group(2), None) - return None + return (match.group(1), ticket_match.group(1)) + return (match.group(1), None) + return (None, None) -def get_remote_cassandra_repository_name(): +def guess_upstream_repo(): """ Get the name of the remote repository that points to the apache cassandra repository. Prefers "apache" over "asf". :return: the remote name @@ -69,44 +119,50 @@ def get_remote_cassandra_repository_name(): apache_remote_name = remote_name return apache_remote_name + def get_release_branches(repo): """ Get the list of main cassandra branches from the given repo, sorted by version ascending. :param repo: configured apache repository name - :return: list of branch names + :return: list of VersionedBranch objects """ - output = subprocess.check_output(["git", "ls-remote", "--refs", "-h", "-q", repo], shell=False) - branch_regex = re.compile(r".*refs/heads/(cassandra-(\d+)\.(\d+))$") + output = subprocess.check_output(["git", "ls-remote", "--refs", "-h", "-q", repo], text=True) + branch_regex = re.compile(r".*refs/heads/(cassandra-((\d+)\.(\d+)))$") - branches_with_versions = [] - for line in output.decode("utf-8").split("\n"): + branches = [] + for line in output.split("\n"): match = branch_regex.match(line) if match: - branches_with_versions.append(FeatureBranch((int(match.group(2)), int(match.group(3))), "-%s.%s" % (match.group(2), match.group(3)), match.group(1))) + branches.append(VersionedBranch((int(match.group(3)), int(match.group(4))), match.group(2), match.group(1))) - branches_with_versions.append(FeatureBranch(TRUNK, "", "trunk")) - branches_with_versions.sort(key=lambda x: x.version) + branches.append(VersionedBranch(TRUNK_VERSION, "", "trunk")) + branches.sort(key=lambda x: x.version) - return branches_with_versions + return branches -def get_local_branch_history(repo, branch): +def get_commits(from_repo, from_branch, to_repo, to_branch): """ - Get the commit history between local branch and remote branch, sorted by commit date ascending. - :param repo: configured apache repository name - :param branch: branch name - :return: a list of tuples (commit_hash, commit_message) + Get the commit history between two branches, sorted by commit date ascending. + :param from_repo: start repository name or None for local branch + :param from_branch: start branch name + :param to_repo: end repository name or None for local branch + :param to_branch: end branch name + :return: a list of Commit objects """ - output = subprocess.check_output(["git", "log", "--pretty=format:%H %s", "%s/%s..%s" % (repo, branch, branch)], - shell=False) - history = [] - line_regex = re.compile(r"([0-9a-f]+) (.*)") - for line in output.decode("utf-8").split("\n"): - match = line_regex.match(line) - if match: - history.append((match.group(1), match.group(2))) - history.reverse() - return history + def coordinates(repo, branch): + if repo: + return "%s/%s" % (repo, branch) + else: + return branch + output = subprocess.check_output(["git", "log", "--pretty=format:%h%x00%aN%x00%s", "--reverse", "%s..%s" % (coordinates(from_repo, from_branch), coordinates(to_repo, to_branch))], text=True) + commits = [] + for line in output.split("\n"): + if not line.strip(): + continue + match = line.split("\0") + commits.append(Commit(match[0], match[1], match[2])) + return commits def parse_merge_commit_msg(msg): @@ -115,14 +171,21 @@ def parse_merge_commit_msg(msg): :param msg: a commit message :return: a tuple of (source_branch, destination_branch) or None if the message is not a merge commit """ - msg_regex = re.compile(r"Merge branch '(cassandra-\d+\.\d+)' into (cassandra-((\d+\.\d+)|trunk))") + msg_regex = re.compile(r"Merge branch '(cassandra-\d+\.\d+)' into ((cassandra-(\d+\.\d+))|trunk)") match = msg_regex.match(msg) if match: return (match.group(1), match.group(2)) return None -def parse_push_ranges(repo, branches): +def ensure_clean_git_tree(): + output = subprocess.check_output(["git", "status", "--porcelain"], text=True) + if output.strip(): + print("Your git tree is not clean. Please commit or stash your changes before running this script.") + sys.exit(1) + + +def get_push_ranges(repo, branches): """ Parse the output of git push --atomic -n and return a list of tuples (label, start_commit, end_commit) :param repo: configured apache repository name @@ -138,30 +201,55 @@ def parse_push_ranges(repo, branches): ranges.append((match.group(1), match.group(2), match.group(3))) return ranges -cassandra_branch_version_re = re.compile(r"cassandra-(\d+)\.(\d+)") -version_string_re = re.compile(r"(\d+)\.(\d+)") - -def version_from_re(re, string): - match = re.match(string) - if match: - return (int(match.group(1)), int(match.group(2))) - return None - -def version_from_branch(branch): - return version_from_re(cassandra_branch_version_re, branch) - -def version_from_string(version_string): - return version_from_re(version_string_re, version_string) def check_remote_exists(remote): - return subprocess.check_call(["git", "remote", "get-url", remote], stderr=sys.stderr, stdout=None) == 0 + try: + return subprocess.check_call(["git", "remote", "get-url", remote], stderr=sys.stderr, stdout=None) == 0 + except subprocess.CalledProcessError: + return False + def check_remote_branch_exists(remote, branch): return subprocess.check_call(["git", "ls-remote", "--exit-code", remote, branch], stderr=sys.stderr, stdout=None) == 0 -def version_as_string(version): - if version is None: - return None - if version == TRUNK: - return "trunk" - return "%s.%s" % version + +### User input functions ### + + +def read_with_default(prompt, default): + if default: + value = input("%s [default: %s]: " % (prompt, default)) + else: + value = input("%s: " % prompt) + if not value: + value = default + return value + + +def read_remote_repository(prompt, default): + repo = None + + while not repo: + repo = read_with_default(prompt, default) + if not check_remote_exists(repo): + repo = None + + return repo + + +def read_positive_int(prompt, default): + value = None + while not value: + try: + value = input(prompt) + if value: + v = int(value) + if v > 0: + return v + else: + return default + except ValueError: + print("Invalid integer value") + value = None + return value + diff --git a/dev/scripts/lib/script_generator.py b/dev/scripts/lib/script_generator.py new file mode 100644 index 00000000..75ab1666 --- /dev/null +++ b/dev/scripts/lib/script_generator.py @@ -0,0 +1,108 @@ +import os + +from lib.git_utils import * + +def resolve_version_and_merge_sections(idx: int, merges: list[Tuple[VersionedBranch, bool]]) -> Tuple[Optional[VersionedBranch], list[VersionedBranch]]: + """ + Compute the version and merge sections for a given index in the CHANGES.txt file. + See the unit tests for examples. + + :param idx: the index of the merge + :param merges: list of merges + :return: the version and merge sections + """ + + version_section = None + merge_sections = [] + release_branch, is_patch_defined = merges[idx] + + assert idx > 0 or is_patch_defined, "The first merge must be a patch" + + if idx == 0: # which means that we are in the oldest version + # in this case we just add the title for the version + version_section = release_branch + # no merge section in this case + + elif idx == (len(merges) - 1): # which means that this is a merge for trunk + # in this case version section is either len(merges) - 2 or None + before_last_release_branch, is_patch_defined_for_before_last = merges[idx - 1] + if is_patch_defined_for_before_last: + # version section is defined only if the before last release branch is a patch + version_section = before_last_release_branch + for i in range(idx - 2, -1, -1): + release_branch, _ = merges[i] + merge_sections.append(release_branch) + + elif is_patch_defined: + # otherwise, version section is defined only if there is a patch for the release branch + version_section = release_branch + for i in range(idx - 1, -1, -1): + release_branch, _ = merges[i] + merge_sections.append(release_branch) + + return version_section, merge_sections + +def generate_script(ticket_merge_info: TicketMergeInfo): + assert ticket_merge_info.merges[0].feature_branch is not None + + script_dir = os.path.dirname(os.path.dirname(os.path.realpath(__file__))) + + script = ["#!/bin/bash", "", "set -xe", "", "[[ -z $(git status --porcelain) ]] # worktree must be clean"] + + merges = ticket_merge_info.merges + # index of first merge with undefined feature branch + for idx in range(0, len(merges)): + merge = merges[idx] + script.append("") + script.append("") + script.append("") + if merge.feature_branch is not None: + script.append("# Commands for merging %s -> %s" % (merge.feature_branch.name, merge.release_branch.name)) + else: + script.append("# Commands for skipping -> %s" % merge.release_branch.name) + script.append("#" * 80) + + if merge.feature_branch: + # ensure that there is at least one non-merge commit in the feature branch + assert len([c for c in merge.commits if parse_merge_commit_msg(c.title) is None]) > 0 + + script.append("git switch %s" % merge.release_branch.name) + script.append("git reset --hard %s/%s" % (ticket_merge_info.upstream_repo, merge.release_branch.name)) + commits = [] + if idx == 0: + # oldest version + script.append("git cherry-pick %s # %s - %s" % ( + merge.commits[0].sha, merge.commits[0].author, merge.commits[0].title)) + commits = merge.commits[1:] + else: + script.append("git merge -s ours --log --no-edit %s" % merges[idx - 1].release_branch.name) + commits = merge.commits + + for commit in commits: + merge_msg = parse_merge_commit_msg(commit.title) + if merge_msg: + script.append("# skipping merge commit %s %s - %s" % (commit.sha, commit.author, commit.title)) + else: + script.append("git cherry-pick -n %s # %s - %s" % (commit.sha, commit.author, commit.title)) + + version_section, merge_sections = resolve_version_and_merge_sections(idx, [(m.release_branch, m.feature_branch is not None) for m in merges]) + if version_section: + script.append("python3 %s/update_changes.py '%s' '%s' '%s' '%s'" % (script_dir, + ticket_merge_info.ticket, + version_as_string(version_section.version), + ",".join([version_as_string(m.version) for m in merge_sections]), + ticket_merge_info.title)) + + script.append("git add CHANGES.txt") + script.append("git commit --amend --no-edit") + + script.append("[[ -n \"$(git diff --name-only %s/%s..HEAD -- .circleci/)\" ]] && (git diff %s/%s..HEAD -- .circleci/ | git apply -R --index) && git commit -a --amend --no-edit # Remove all changes in .circleci directory if you need to" % (ticket_merge_info.upstream_repo, merge.release_branch.name, ticket_merge_info.upstream_repo, merge.release_branch.name)) + script.append("git diff --name-only %s/%s..HEAD # print a list of all changes files" % (ticket_merge_info.upstream_repo, merge.release_branch.name)) + + script.append("") + script.append("") + script.append("") + script.append("# After executing the above commands, please run the following verification, and manually inspect the results of the commands it generates") + script.append("python3 %s/verify_git_history.py '%s' '%s'" % (script_dir, ticket_merge_info.upstream_repo, ",".join([m.release_branch.name for m in merges]))) + + return script diff --git a/dev/scripts/prepare_merge_commands.py b/dev/scripts/prepare_merge_commands.py new file mode 100644 index 00000000..7ab9581d --- /dev/null +++ b/dev/scripts/prepare_merge_commands.py @@ -0,0 +1,109 @@ +from lib.script_generator import generate_script +from lib.git_utils import * + +ensure_clean_git_tree() + +### Read feature repo, upstream repo and ticket +print("Remote repositories:") +print("") +subprocess.check_call(["git", "remote", "show"]) +print("") + +upstream_repo = read_remote_repository("Enter the name of the remote repository that points to the upstream Apache Cassandra", guess_upstream_repo()) + +feature_repo, ticket_number = guess_feature_repo_and_ticket() +feature_repo = read_remote_repository("Enter the name of the remote repository that points to the upstream feature branch", feature_repo) + +ticket_number = read_positive_int("Enter the ticket number: ", ticket_number) +ticket = "CASSANDRA-%s" % ticket_number + +print("") +print("Fetching from %s" % upstream_repo) +subprocess.check_output(["git", "fetch", upstream_repo]) +if feature_repo != upstream_repo: + print("Fetching from %s" % feature_repo) + subprocess.check_output(["git", "fetch", feature_repo]) + + +### Get the list of release branches and feature branches ### + +release_branches = get_release_branches(upstream_repo) +if len(release_branches) == 0: + print("No release branches found in %s" % upstream_repo) + sys.exit(1) +print("Found the following release branches:\n%s" % "\n".join(["%s: %s" % (version_as_string(b.version), b.name) for b in release_branches])) +print("") + +feature_branches = guess_feature_branches(feature_repo, ticket) +print("Found the following feature branches:\n%s" % "\n".join(["%s: %s" % (version_as_string(b.version), b.name) for b in feature_branches])) +print("") + +### Read the oldest release version the feature applies to ### + +guessed_oldest_feature_version = feature_branches[0].version if len(feature_branches) > 0 else None +oldest_release_version = None +while not oldest_release_version: + oldest_release_version_str = read_with_default("Enter the oldest release version to merge into", version_as_string(guessed_oldest_feature_version)) + if oldest_release_version_str: + oldest_release_version = version_from_string(oldest_release_version_str) + if oldest_release_version not in [b.version for b in release_branches]: + print("Invalid release version: %s" % str(oldest_release_version)) + oldest_release_version = None + +### Read the feature branches corresponding to each release branch ### + +target_release_branches = [b for b in release_branches if b.version >= oldest_release_version] +merges = [] +for release_branch in target_release_branches: + # find first feature branch whose version is the same as the version of the release branch + guessed_matching_feature_branch = next((b for b in feature_branches if b.version == release_branch.version), None) + guessed_matching_feature_branch_name = guessed_matching_feature_branch.name if guessed_matching_feature_branch else "none" + merge = None + while merge is None: + matching_feature_branch_name = read_with_default("Enter the name of the feature branch to merge into %s or type 'none' if there is no feature branch for this release" % release_branch.name, guessed_matching_feature_branch_name) + if matching_feature_branch_name == "none": + if len(merges) == 0: + print("Feature branch for the oldest release must be provided") + continue + merge = BranchMergeInfo(release_branch, None, []) + else: + if matching_feature_branch_name in [b.name for b in feature_branches] or check_remote_branch_exists(feature_repo, matching_feature_branch_name): + merge = BranchMergeInfo(release_branch, VersionedBranch(release_branch.version, NO_VERSION, matching_feature_branch_name), get_commits(upstream_repo, release_branch.name, feature_repo, matching_feature_branch_name)) + else: + print("Invalid feature branch name: %s" % matching_feature_branch_name) + merges.append(merge) + + +### Read the change title ### + +print("") +print("Commits:") +# zip commits with their index +commits = list(zip(range(1, len(merges[0].commits) + 1), merges[0].commits)) +for i, commit in commits: + print("%d: %s" % (i, str(commit))) +print("") +commit_idx = read_positive_int("Enter the number of the commit whose message should be used as a title in CHANGES.txt or leave empty to enter a custom title: ", None) +change_title = None +if commit_idx and commit_idx <= len(commits): + change_title = commits[commit_idx - 1][1].title +else: + while not change_title: + change_title = read_with_default("Enter the title", commits[0][1].title).strip() + +### Generate the script ### + +ticket_merge_info = TicketMergeInfo(ticket, change_title, upstream_repo, feature_repo, merges) + +script = generate_script(ticket_merge_info) + +# Read the filename to save the script to from either the command line or from the user +if len(sys.argv) > 1: + filename = sys.argv[1] +else: + filename = read_with_default("Enter the filename to save the script to", "merge_%s.sh" % ticket) + +# Save the script to the file +with open(filename, "w") as f: + for s in script: + f.write(s + "\n") diff --git a/dev/update_changes.py b/dev/scripts/update_changes.py similarity index 63% rename from dev/update_changes.py rename to dev/scripts/update_changes.py index 522dc8d0..2b011159 100644 --- a/dev/update_changes.py +++ b/dev/scripts/update_changes.py @@ -3,7 +3,7 @@ import sys from typing import NamedTuple, Tuple -from git_utils import get_release_branches, version_from_branch, version_from_string, version_from_re +from lib.git_utils import * class MergeSection(NamedTuple): @@ -18,7 +18,7 @@ class ReleaseSection(NamedTuple): merge_sections: list[MergeSection] -def read_changes_file() -> list[ReleaseSection]: +def read_changes_file(ticket: str) -> list[ReleaseSection]: """ Read the changes file and return a list of release sections. :return: a list of release sections @@ -58,7 +58,7 @@ def read_changes_file() -> list[ReleaseSection]: merge_section = MergeSection(merge_version, messages) elif lines[i].strip(): - if (ticket in lines[i] or message in lines[i]): + if (ticket in lines[i]): print("Found duplicate message in line %d: %s" % (i + 1, lines[i])) exit(1) messages.append(lines[i]) @@ -82,7 +82,7 @@ def write_changes_file(release_sections: list[ReleaseSection]): f.write(message) for merge_section in version_section.merge_sections: - f.write("Merged from %d.%d:\n" % merge_section.version) + f.write("Merged from %s:\n" % version_as_string(merge_section.version)) for message in merge_section.messages: f.write(message) @@ -120,60 +120,42 @@ def get_or_insert_merge_section(target_section: ReleaseSection, target_version: # check if the commond line args contain the message and a list of branches if len(sys.argv) < 5: - print("Usage: %s " % sys.argv[0]) + print("Adds a change info to the CHANGES.txt file.") + print("Usage: %s " % sys.argv[0]) + print("") + print("Example: %s CASSANDRA-12345 '4.1' '3.11,4.0' 'Some awesome change'" % sys.argv[0]) + print("It adds a change info to the top of 'Merged from 3.11' section for the latest '4.1' section, ensuring that 'Merged from 4.0' is there as well.") exit(1) -repo = sys.argv[1] -target_branch = sys.argv[2] -target_version = version_from_string(target_branch) -ticket = sys.argv[3] -message = sys.argv[4] - -release_sections = read_changes_file() - -merge_versions = [] -for branch in get_release_branches(repo): - if branch.name == "trunk": - version = release_sections[0].version - else: - version = branch.version - if version: - merge_versions.append(version) - -merge_versions = merge_versions[merge_versions.index(target_version):] -current_branch = subprocess.check_output(["git", "branch", "--show-current"], shell=False).decode("utf-8").strip() - -target_section = None -target_merge_section = None -new_message = " * %s (%s)\n" % (message, ticket) - -if current_branch == "trunk": - current_version = release_sections[0].version - if current_version == target_version: - # if we are on trunk and the target version is also trunk, we prepend the message to the first encountered version - target_section = release_sections[0] - else: - # if we are on trunk, but the target version is older, we prepend the message to the appropriate merge section - # (which may be created if it does not exist) in the second encountered version - target_section = release_sections[1] - for merge_version in merge_versions[1:-1]: - get_or_insert_merge_section(target_section, merge_version) - target_merge_section = get_or_insert_merge_section(target_section, target_version) -else: - current_version = version_from_branch(current_branch) - merge_versions = merge_versions[:merge_versions.index(current_version)] +ticket = sys.argv[1] +target_version_section_str = sys.argv[2] +target_merge_sections_strs = [s.strip() for s in sys.argv[3].split(",") if s.strip()] +title = sys.argv[4] + +release_sections = read_changes_file(ticket) + +if target_version_section_str == version_as_string(TRUNK_VERSION): + # if the target version is trunk, we prepend the message to the first encountered version target_section = release_sections[0] - if current_version != target_version: - for merge_version in merge_versions[1:-1]: - get_or_insert_merge_section(target_section, merge_version) - target_merge_section = get_or_insert_merge_section(target_section, target_version) - -if target_merge_section: - target_merge_section.messages.insert(0, new_message) -elif target_section: - target_section.messages.insert(0, new_message) else: - print("Could not find target section") - exit(1) + target_section = None + for section in release_sections: + if version_as_string(section.version) == target_version_section_str: + target_section = section + break + +assert target_section, "Could not find target version section %s" % target_version_section_str + +merge_section = None +for merge_section_str in target_merge_sections_strs: + print("Looking for merge section %d" % len(target_merge_sections_strs)) + merge_section = get_or_insert_merge_section(target_section, version_from_string(merge_section_str)) + +new_message = " * %s (%s)\n" % (title, ticket) + +if merge_section: + merge_section.messages.insert(0, new_message) +else: + target_section.messages.insert(0, new_message) write_changes_file(release_sections) diff --git a/dev/scripts/verify_git_history.py b/dev/scripts/verify_git_history.py new file mode 100644 index 00000000..3c72f6b1 --- /dev/null +++ b/dev/scripts/verify_git_history.py @@ -0,0 +1,98 @@ +from lib.git_utils import * + +# The script does two things: +# 1. Check that the history of the main branches (trunk, 4.0, 4.1, etc) is valid. +# The history of the oldest branch must contain only one commit, and that commit must not be a merge commit. +# The history of each newer branch must contain the history of the previous branch and a merge commit from that +# previous branch. +# 2. Execute dry run of the push command and parse the results. Then, generate diff and show commands for the user +# to manually inspect the changes. + +# Example usage: +# python3 dev/scripts/verify_git_history.py apache cassandra-4.0,cassandra-4.1,trunk +# +# The script will check the history of local cassandra-4.0, cassandra-4.1 and trunk branches against their remote +# counterparts in the apache repository. + +# Read the command line arguments and validate them +if len(sys.argv) != 3: + print("Usage: %s <upstream-repo-name> <comma-separated-branches-to-push>" % sys.argv[0]) + exit(1) + +repo = sys.argv[1] +main_branches = [s.strip() for s in sys.argv[2].split(",") if s.strip()] + +if len(main_branches) == 0: + print("No branches specified") + exit(1) + +# get the patch commit message +history = get_commits(repo, main_branches[0], None, main_branches[0]) + +print("") +print("Checking branch %s" % main_branches[0]) +print("Expected merges: []") +print("History: \n - -%s" % "\n - ".join(str(x) for x in history)) + +# history for the first branch must contain only one commit +if len(history) != 1: + print("Invalid history for branch %s, must contain only one commit, but found %d: \n\n%s" % ( + main_branches[0], len(history), "\n".join(str(x) for x in history))) + exit(1) + +# check if the commit message is valid, that is, it must not be a merge commit +if parse_merge_commit_msg(history[0].title): + print("Invalid commit message for branch %s, must not be a merge commit, but found: \n\n%s" % ( + main_branches[0], history[0].title)) + exit(1) + +# Check the history of the branches to confirm that each branch contains exactly one main commit +# and the rest are the merge commits from the previous branch in order +expected_merges = [] +prev_branch = main_branches[0] +prev_history = history +for branch in main_branches[1:]: + expected_merges.append((prev_branch, branch)) + history = get_commits(repo, branch, None, branch) + + print("") + print("Checking branch %s" % branch) + print("Expected merges: %s" % str(expected_merges)) + print("History: \n - %s" % "\n - ".join(str(x) for x in history)) + + if history[:-1] != prev_history: + print("Invalid history for branch %s, must include the history of branch %s:\n\n%s\n\n, but found: \n\n%s" % ( + branch, prev_branch, + "\n".join(str(x) for x in prev_history), + "\n".join(str(x) for x in history))) + exit(1) + + # expect that the rest of the commits are merge commits matching the expected merges in the same order + for i in range(1, len(history)): + merge = parse_merge_commit_msg(history[i].title) + if not merge: + print("Invalid commit message for branch %s, must be a merge commit, but found: \n%s" % (branch, history[i])) + exit(1) + if merge != expected_merges[i - 1]: + print( + "Invalid merge commit for branch %s, expected: %s, but found: %s" % (branch, expected_merges[i - 1], merge)) + exit(1) + + prev_branch = branch + prev_history = history + +# finally we print the commands to explore the changes in each push range + +push_ranges = get_push_ranges(repo, main_branches) +# number of push ranges must match the number of branches we want to merge +if len(push_ranges) != len(main_branches): + print("Invalid number of push ranges, expected %d, but found %d:\n%s" % ( + len(main_branches), len(push_ranges), "\n".join(str(x) for x in push_ranges))) + exit(1) + +for push_range in push_ranges: + print("-" * 80) + print("Push range for branch %s: %s..%s" % (push_range[0], push_range[1], push_range[2])) + print("git diff --name-only %s..%s" % (push_range[1], push_range[2])) + print("git show %s..%s" % (push_range[1], push_range[2])) + print("") diff --git a/dev/test/__init__.py b/dev/test/__init__.py new file mode 100644 index 00000000..5792318c --- /dev/null +++ b/dev/test/__init__.py @@ -0,0 +1,5 @@ +import sys +import os + +PROJECT_PATH = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "scripts") +sys.path.append(PROJECT_PATH) diff --git a/dev/test/test_resolving_version_and_merge_sections.py b/dev/test/test_resolving_version_and_merge_sections.py new file mode 100644 index 00000000..f7ec7e17 --- /dev/null +++ b/dev/test/test_resolving_version_and_merge_sections.py @@ -0,0 +1,141 @@ +import unittest + +from lib.script_generator import * + + +class MyTestCase(unittest.TestCase): + v_50 = VersionedBranch((5, 0), "5.0", "trunk") + v_41 = VersionedBranch((4, 1), "4.1", "cassandra-4.1") + v_40 = VersionedBranch((4, 0), "4.0", "cassandra-4.0") + v_311 = VersionedBranch((3, 11), "3.11", "cassandra-3.11") + v_30 = VersionedBranch((3, 0), "3.0", "cassandra-3.0") + + # If the change is only for trunk, then: + # - we add the entry in the trunk section (top section). + def test_trunk(self): + merges = [(self.v_50, True)] + version_section, merge_sections = resolve_version_and_merge_sections(0, merges) + self.assertEqual(version_section, self.v_50) + self.assertEqual(merge_sections, []) + + # If the change is for 4.1 and trunk, then: + # - in 4.1, we add the entry in the 4.1 section (top section) + # - in trunk, we add the entry in the 4.1 section (first encountered 4.1 section) + def test_41_trunk(self): + merges = [(self.v_41, True), (self.v_50, True)] + version_section, merge_sections = resolve_version_and_merge_sections(0, merges) + self.assertEqual(version_section, self.v_41) + self.assertEqual(merge_sections, []) + + version_section, merge_sections = resolve_version_and_merge_sections(1, merges) + self.assertEqual(version_section, self.v_41) + self.assertEqual(merge_sections, []) + + # If the change is for 4.0, 4.1 and trunk, then: + # - in 4.0, we add the entry in the 4.0 section (top section) + # - in 4.1, we add then entry in the 4.1 section (top section), under "Merged from 4.0" subsection + # - in trunk, we add the entry in the 4.1 section (first encountered 4.1 section), under "Merged from 4.0" subsection + def test_40_41_trunk(self): + merges = [(self.v_40, True), (self.v_41, True), (self.v_50, True)] + version_section, merge_sections = resolve_version_and_merge_sections(0, merges) + self.assertEqual(version_section, self.v_40) + self.assertEqual(merge_sections, []) + + version_section, merge_sections = resolve_version_and_merge_sections(1, merges) + self.assertEqual(version_section, self.v_41) + self.assertEqual(merge_sections, [self.v_40]) + + version_section, merge_sections = resolve_version_and_merge_sections(2, merges) + self.assertEqual(version_section, self.v_41) + self.assertEqual(merge_sections, [self.v_40]) + + # If the change is for 4.0 and not for 4.1 or trunk, then: + # - in 4.0, we add the entry in the 4.0 section (top section) + # - in 4.1, no changes + # - in trunk, no changes + def test_40(self): + merges = [(self.v_40, True), (self.v_41, False), (self.v_50, False)] + version_section, merge_sections = resolve_version_and_merge_sections(0, merges) + self.assertEqual(version_section, self.v_40) + self.assertEqual(merge_sections, []) + + version_section, merge_sections = resolve_version_and_merge_sections(1, merges) + self.assertEqual(version_section, None) + self.assertEqual(merge_sections, []) + + version_section, merge_sections = resolve_version_and_merge_sections(2, merges) + self.assertEqual(version_section, None) + self.assertEqual(merge_sections, []) + + # If the change is for 3.11 and 4.1 and not for 4.0 or trunk, then: + # - in 3.11, we add the entry in the 3.11 section (top section) + # - in 4.0, no changes + # - in 4.1, we add the entry in the 4.1 section (top section), under "Merged from 3.11" subsection + # - in trunk, we add the entry in the 4.1 section (first encountered 4.1 section), under "Merged from 3.11" subsection + def test_311_41(self): + merges = [(self.v_311, True), (self.v_40, False), (self.v_41, True), (self.v_50, False)] + version_section, merge_sections = resolve_version_and_merge_sections(0, merges) + self.assertEqual(version_section, self.v_311) + self.assertEqual(merge_sections, []) + + version_section, merge_sections = resolve_version_and_merge_sections(1, merges) + self.assertEqual(version_section, None) + self.assertEqual(merge_sections, []) + + version_section, merge_sections = resolve_version_and_merge_sections(2, merges) + self.assertEqual(version_section, self.v_41) + self.assertEqual(merge_sections, [self.v_40, self.v_311]) + + version_section, merge_sections = resolve_version_and_merge_sections(3, merges) + self.assertEqual(version_section, self.v_41) + self.assertEqual(merge_sections, [self.v_40, self.v_311]) + + # If the change is for 4.0 and trunk, and not for 4.1, then: + # - in 4.0, we add the entry in the 4.0 section (top section) + # - in 4.1, no changes + # - in trunk, no changes + def test_40_trunk(self): + merges = [(self.v_40, True), (self.v_41, False), (self.v_50, True)] + version_section, merge_sections = resolve_version_and_merge_sections(0, merges) + self.assertEqual(version_section, self.v_40) + self.assertEqual(merge_sections, []) + + version_section, merge_sections = resolve_version_and_merge_sections(1, merges) + self.assertEqual(version_section, None) + self.assertEqual(merge_sections, []) + + version_section, merge_sections = resolve_version_and_merge_sections(2, merges) + self.assertEqual(version_section, None) + self.assertEqual(merge_sections, []) + + # If the change is for 3.0, 3.11, 4.0, 4.1 and trunk, then: + # - in 3.0, we add the entry in the 3.0 section (top section) + # - in 3.11, we add the entry in the 3.11 section (top section), under "Merged from 3.0" subsection + # - in 4.0, we add the entry in the 4.0 section (top section), under "Merged from 3.0" subsection + # - in 4.1, we add the entry in the 4.1 section (top section), under "Merged from 3.0" subsection + # - in trunk, we add the entry in the 4.1 section (first encountered 4.1 section), under "Merged from 3.0" subsection + def test_30_311_40_41_trunk(self): + merges = [(self.v_30, True), (self.v_311, True), (self.v_40, True), (self.v_41, True), (self.v_50, True)] + version_section, merge_sections = resolve_version_and_merge_sections(0, merges) + self.assertEqual(version_section, self.v_30) + self.assertEqual(merge_sections, []) + + version_section, merge_sections = resolve_version_and_merge_sections(1, merges) + self.assertEqual(version_section, self.v_311) + self.assertEqual(merge_sections, [self.v_30]) + + version_section, merge_sections = resolve_version_and_merge_sections(2, merges) + self.assertEqual(version_section, self.v_40) + self.assertEqual(merge_sections, [self.v_311, self.v_30]) + + version_section, merge_sections = resolve_version_and_merge_sections(3, merges) + self.assertEqual(version_section, self.v_41) + self.assertEqual(merge_sections, [self.v_40, self.v_311, self.v_30]) + + version_section, merge_sections = resolve_version_and_merge_sections(4, merges) + self.assertEqual(version_section, self.v_41) + self.assertEqual(merge_sections, [self.v_40, self.v_311, self.v_30]) + + +if __name__ == '__main__': + unittest.main() diff --git a/dev/verify_git_history.py b/dev/verify_git_history.py deleted file mode 100644 index 50ba8e89..00000000 --- a/dev/verify_git_history.py +++ /dev/null @@ -1,77 +0,0 @@ -import sys - -from git_utils import get_local_branch_history, get_release_branches, parse_merge_commit_msg, parse_push_ranges - -# Read the command line arguments and validate them - -if len(sys.argv) != 3: - print("Usage: %s <git-repo> <start-branch>" % sys.argv[0]) - exit(1) - -repo = sys.argv[1] -start_branch = sys.argv[2] -main_branches = [b.name for b in get_release_branches(repo)] - -# check if start_branch is a valid branch -if start_branch not in main_branches: - print("Invalid branch %s, must be one of %s" % (start_branch, str(main_branches))) - exit(1) - -# get items from main_branches starting from the item matching start_branch -main_branches = main_branches[main_branches.index(start_branch):] - -# get the patch commit message -history = get_local_branch_history(repo, main_branches[0]) - -# history for the first branch must contain onlu one commit -if len(history) != 1: - print("Invalid history for branch %s, must contain only one commit, but found %d: \n\n%s" % ( - main_branches[0], len(history), "\n".join(str(x) for x in history))) - exit(1) - -# check if the commit message is valid, that is, it must not be a merge commit -if parse_merge_commit_msg(history[0][1]): - print("Invalid commit message for branch %s, must not be a merge commit, but found: \n\n%s" % ( - main_branches[0], history[0])) - exit(1) - -# Check the history of the branches to confirm that each branch contains exactly one main commit -# and the rest are the merge commits from the previous branch in order -expected_merges = [] -prev_branch = main_branches[0] -prev_history = history -for branch in main_branches[1:]: - expected_merges.append((prev_branch, branch)) - history = get_local_branch_history(repo, branch) - - if history[:-1] != prev_history: - print("Invalid history for branch %s, must be the same as branch %s, but found: \n\n%s" % ( - branch, prev_branch, "\n".join(str(x) for x in history))) - exit(1) - -# expect that the rest of the commits are merge commits matching the expected merges in the same order -for i in range(1, len(history)): - merge = parse_merge_commit_msg(history[i][1]) - if not merge: - print("Invalid commit message for branch %s, must be a merge commit, but found: \n%s" % (branch, history[i])) - exit(1) - if merge != expected_merges[i - 1]: - print( - "Invalid merge commit for branch %s, expected: %s, but found: %s" % (branch, expected_merges[i - 1], merge)) - exit(1) - -# finally we print the commands to explore the changes in each push range - -push_ranges = parse_push_ranges(repo, main_branches) -# number of push ranges must match the number of branches we want to merge -if len(push_ranges) != len(main_branches): - print("Invalid number of push ranges, expected %d, but found %d:\n%s" % ( - len(main_branches), len(push_ranges), "\n".join(str(x) for x in push_ranges))) - exit(1) - -for push_range in push_ranges: - print("-" * 80) - print("Push range for branch %s: %s..%s" % (push_range[0], push_range[1], push_range[2])) - print("git diff --name-only %s..%s" % (push_range[1], push_range[2])) - print("git show %s..%s" % (push_range[1], push_range[2])) - print("") From 5f6b28c16240a926108c59d285c6d242004578ab Mon Sep 17 00:00:00 2001 From: Jacek Lewandowski <lewandowski.jacek@gmail.com> Date: Tue, 12 Sep 2023 10:14:33 +0200 Subject: [PATCH 08/16] wip --- dev/scripts/lib/git_utils.py | 8 +++- dev/scripts/lib/script_generator.py | 5 ++- dev/scripts/prepare_merge_commands.py | 53 +++++++++++++++++++-------- 3 files changed, 46 insertions(+), 20 deletions(-) diff --git a/dev/scripts/lib/git_utils.py b/dev/scripts/lib/git_utils.py index 611313a3..52a728c8 100644 --- a/dev/scripts/lib/git_utils.py +++ b/dev/scripts/lib/git_utils.py @@ -25,6 +25,7 @@ class TicketMergeInfo(NamedTuple): upstream_repo: str feature_repo: str merges: list[BranchMergeInfo] + keep_changes_in_circleci: bool NO_VERSION = (-1, -1) TRUNK_VERSION = (255, 255) @@ -99,7 +100,7 @@ def guess_feature_repo_and_ticket(): ticket_regex = re.compile(r"CASSANDRA-(\d+)", flags=re.IGNORECASE) ticket_match = ticket_regex.search(match.group(2)) if ticket_match: - return (match.group(1), ticket_match.group(1)) + return (match.group(1), int(ticket_match.group(1))) return (match.group(1), None) return (None, None) @@ -241,7 +242,10 @@ def read_positive_int(prompt, default): value = None while not value: try: - value = input(prompt) + if default: + value = input("%s [default: %s]: " % (prompt, default)) + else: + value = input(prompt) if value: v = int(value) if v > 0: diff --git a/dev/scripts/lib/script_generator.py b/dev/scripts/lib/script_generator.py index 75ab1666..a7d0f815 100644 --- a/dev/scripts/lib/script_generator.py +++ b/dev/scripts/lib/script_generator.py @@ -86,7 +86,7 @@ def generate_script(ticket_merge_info: TicketMergeInfo): script.append("git cherry-pick -n %s # %s - %s" % (commit.sha, commit.author, commit.title)) version_section, merge_sections = resolve_version_and_merge_sections(idx, [(m.release_branch, m.feature_branch is not None) for m in merges]) - if version_section: + if ticket_merge_info.title and version_section: script.append("python3 %s/update_changes.py '%s' '%s' '%s' '%s'" % (script_dir, ticket_merge_info.ticket, version_as_string(version_section.version), @@ -96,7 +96,8 @@ def generate_script(ticket_merge_info: TicketMergeInfo): script.append("git add CHANGES.txt") script.append("git commit --amend --no-edit") - script.append("[[ -n \"$(git diff --name-only %s/%s..HEAD -- .circleci/)\" ]] && (git diff %s/%s..HEAD -- .circleci/ | git apply -R --index) && git commit -a --amend --no-edit # Remove all changes in .circleci directory if you need to" % (ticket_merge_info.upstream_repo, merge.release_branch.name, ticket_merge_info.upstream_repo, merge.release_branch.name)) + if not ticket_merge_info.keep_changes_in_circleci: + script.append("[[ -n \"$(git diff --name-only %s/%s..HEAD -- .circleci/)\" ]] && (git diff %s/%s..HEAD -- .circleci/ | git apply -R --index) && git commit -a --amend --no-edit # Remove all changes in .circleci directory if you need to" % (ticket_merge_info.upstream_repo, merge.release_branch.name, ticket_merge_info.upstream_repo, merge.release_branch.name)) script.append("git diff --name-only %s/%s..HEAD # print a list of all changes files" % (ticket_merge_info.upstream_repo, merge.release_branch.name)) script.append("") diff --git a/dev/scripts/prepare_merge_commands.py b/dev/scripts/prepare_merge_commands.py index 7ab9581d..02a2e389 100644 --- a/dev/scripts/prepare_merge_commands.py +++ b/dev/scripts/prepare_merge_commands.py @@ -1,3 +1,5 @@ +import os + from lib.script_generator import generate_script from lib.git_utils import * @@ -14,7 +16,7 @@ feature_repo, ticket_number = guess_feature_repo_and_ticket() feature_repo = read_remote_repository("Enter the name of the remote repository that points to the upstream feature branch", feature_repo) -ticket_number = read_positive_int("Enter the ticket number: ", ticket_number) +ticket_number = read_positive_int("Enter the ticket number (for example: '12345'): ", ticket_number) ticket = "CASSANDRA-%s" % ticket_number print("") @@ -76,24 +78,40 @@ ### Read the change title ### -print("") -print("Commits:") -# zip commits with their index -commits = list(zip(range(1, len(merges[0].commits) + 1), merges[0].commits)) -for i, commit in commits: - print("%d: %s" % (i, str(commit))) -print("") -commit_idx = read_positive_int("Enter the number of the commit whose message should be used as a title in CHANGES.txt or leave empty to enter a custom title: ", None) -change_title = None -if commit_idx and commit_idx <= len(commits): - change_title = commits[commit_idx - 1][1].title +need_changes_txt_entry = False +response = None +while response not in ["yes", "no"]: + response = read_with_default("Do you want the script to add a line to CHANGES.txt? (yes/no)", "yes") +if response == "yes": + need_changes_txt_entry = True + print("") + print("Commits:") + # zip commits with their index + commits = list(zip(range(1, len(merges[0].commits) + 1), merges[0].commits)) + for i, commit in commits: + print("%d: %s" % (i, str(commit))) + print("") + commit_idx = read_positive_int("Enter the number of the commit whose message should be used as a title in CHANGES.txt or leave empty to enter a custom title: ", None) + change_title = None + if commit_idx and commit_idx <= len(commits): + change_title = commits[commit_idx - 1][1].title + else: + while not change_title: + change_title = read_with_default("Enter the title", commits[0][1].title).strip() else: - while not change_title: - change_title = read_with_default("Enter the title", commits[0][1].title).strip() + change_title = None + +### Keep the circleci config changes? ### +keep_changes_in_circleci = False +response = None +while response not in ["yes", "no"]: + response = read_with_default("Do you want to keep changes in .circleci directory? (yes/no)", "no") +if response == "yes": + keep_changes_in_circleci = True ### Generate the script ### -ticket_merge_info = TicketMergeInfo(ticket, change_title, upstream_repo, feature_repo, merges) +ticket_merge_info = TicketMergeInfo(ticket, change_title, upstream_repo, feature_repo, merges, keep_changes_in_circleci) script = generate_script(ticket_merge_info) @@ -101,9 +119,12 @@ if len(sys.argv) > 1: filename = sys.argv[1] else: - filename = read_with_default("Enter the filename to save the script to", "merge_%s.sh" % ticket) + filename = read_with_default("Enter the filename to save the script to", "../merge_%s.sh" % ticket) # Save the script to the file with open(filename, "w") as f: for s in script: f.write(s + "\n") + +# make the script executable +os.chmod(filename, 0o755) From e1277b08ffe991deefc1b03ed772d549e34f6d34 Mon Sep 17 00:00:00 2001 From: Jacek Lewandowski <lewandowski.jacek@gmail.com> Date: Tue, 12 Sep 2023 10:21:25 +0200 Subject: [PATCH 09/16] wip --- dev/scripts/lib/script_generator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dev/scripts/lib/script_generator.py b/dev/scripts/lib/script_generator.py index a7d0f815..75b9c329 100644 --- a/dev/scripts/lib/script_generator.py +++ b/dev/scripts/lib/script_generator.py @@ -94,7 +94,7 @@ def generate_script(ticket_merge_info: TicketMergeInfo): ticket_merge_info.title)) script.append("git add CHANGES.txt") - script.append("git commit --amend --no-edit") + script.append("git commit --amend --no-edit") if not ticket_merge_info.keep_changes_in_circleci: script.append("[[ -n \"$(git diff --name-only %s/%s..HEAD -- .circleci/)\" ]] && (git diff %s/%s..HEAD -- .circleci/ | git apply -R --index) && git commit -a --amend --no-edit # Remove all changes in .circleci directory if you need to" % (ticket_merge_info.upstream_repo, merge.release_branch.name, ticket_merge_info.upstream_repo, merge.release_branch.name)) From 619cc59331fafb2d2d17524912e7b0149f957708 Mon Sep 17 00:00:00 2001 From: Jacek Lewandowski <lewandowski.jacek@gmail.com> Date: Tue, 12 Sep 2023 12:24:16 +0200 Subject: [PATCH 10/16] colors --- dev/scripts/lib/git_utils.py | 27 ++++++++++++++++ dev/scripts/verify_git_history.py | 52 +++++++++++++++++++++---------- 2 files changed, 62 insertions(+), 17 deletions(-) diff --git a/dev/scripts/lib/git_utils.py b/dev/scripts/lib/git_utils.py index 52a728c8..2009fe0e 100644 --- a/dev/scripts/lib/git_utils.py +++ b/dev/scripts/lib/git_utils.py @@ -257,3 +257,30 @@ def read_positive_int(prompt, default): value = None return value +# from https://gist.github.com/rene-d/9e584a7dd2935d0f461904b9f2950007 +class Colors: + """ ANSI color codes """ + BLACK = "\033[0;30m" + RED = "\033[0;31m" + GREEN = "\033[0;32m" + BROWN = "\033[0;33m" + BLUE = "\033[0;34m" + PURPLE = "\033[0;35m" + CYAN = "\033[0;36m" + LIGHT_GRAY = "\033[0;37m" + DARK_GRAY = "\033[1;30m" + LIGHT_RED = "\033[1;31m" + LIGHT_GREEN = "\033[1;32m" + YELLOW = "\033[1;33m" + LIGHT_BLUE = "\033[1;34m" + LIGHT_PURPLE = "\033[1;35m" + LIGHT_CYAN = "\033[1;36m" + LIGHT_WHITE = "\033[1;37m" + BOLD = "\033[1m" + FAINT = "\033[2m" + ITALIC = "\033[3m" + UNDERLINE = "\033[4m" + BLINK = "\033[5m" + NEGATIVE = "\033[7m" + CROSSED = "\033[9m" + END = "\033[0m" diff --git a/dev/scripts/verify_git_history.py b/dev/scripts/verify_git_history.py index 3c72f6b1..586d697c 100644 --- a/dev/scripts/verify_git_history.py +++ b/dev/scripts/verify_git_history.py @@ -36,14 +36,18 @@ # history for the first branch must contain only one commit if len(history) != 1: - print("Invalid history for branch %s, must contain only one commit, but found %d: \n\n%s" % ( - main_branches[0], len(history), "\n".join(str(x) for x in history))) + print("%sInvalid history for branch %s, must contain only one commit, but found %d: \n\n%s%s\n" % ( + Colors.RED, + main_branches[0], len(history), "\n".join(str(x) for x in history), + Colors.END)) exit(1) # check if the commit message is valid, that is, it must not be a merge commit if parse_merge_commit_msg(history[0].title): - print("Invalid commit message for branch %s, must not be a merge commit, but found: \n\n%s" % ( - main_branches[0], history[0].title)) + print("%sInvalid commit message for branch %s, must not be a merge commit, but found: \n\n%s%s\n" % ( + Colors.RED, + main_branches[0], history[0].title, + Colors.END)) exit(1) # Check the history of the branches to confirm that each branch contains exactly one main commit @@ -52,47 +56,61 @@ prev_branch = main_branches[0] prev_history = history for branch in main_branches[1:]: + print("-" * 80) + expected_merges.append((prev_branch, branch)) history = get_commits(repo, branch, None, branch) print("") print("Checking branch %s" % branch) print("Expected merges: %s" % str(expected_merges)) - print("History: \n - %s" % "\n - ".join(str(x) for x in history)) + print("History between %s/%s..local %s: \n - %s" % (repo, branch, branch, "\n - ".join(str(x) for x in history))) if history[:-1] != prev_history: - print("Invalid history for branch %s, must include the history of branch %s:\n\n%s\n\n, but found: \n\n%s" % ( + print("%sInvalid history for branch %s, must include the history of branch %s:\n\n%s\n\n, but found: \n\n%s%s\n" % ( + Colors.RED, branch, prev_branch, "\n".join(str(x) for x in prev_history), - "\n".join(str(x) for x in history))) - exit(1) + "\n".join(str(x) for x in history), + Colors.END)) # expect that the rest of the commits are merge commits matching the expected merges in the same order for i in range(1, len(history)): merge = parse_merge_commit_msg(history[i].title) if not merge: - print("Invalid commit message for branch %s, must be a merge commit, but found: \n%s" % (branch, history[i])) - exit(1) + print("%sInvalid commit message for branch %s, must be a merge commit, but found: \n%s%s\n" % ( + Colors.RED, + branch, history[i], + Colors.END)) + break + if merge != expected_merges[i - 1]: print( - "Invalid merge commit for branch %s, expected: %s, but found: %s" % (branch, expected_merges[i - 1], merge)) - exit(1) + "%sInvalid merge commit for branch %s, expected: %s, but found: %s%s\n" % ( + Colors.RED, + branch, expected_merges[i - 1], merge, + Colors.END)) + break prev_branch = branch prev_history = history # finally we print the commands to explore the changes in each push range +print("=" * 80) push_ranges = get_push_ranges(repo, main_branches) # number of push ranges must match the number of branches we want to merge if len(push_ranges) != len(main_branches): - print("Invalid number of push ranges, expected %d, but found %d:\n%s" % ( - len(main_branches), len(push_ranges), "\n".join(str(x) for x in push_ranges))) + print("" + "%sInvalid number of push ranges, expected %d, but found %d:\n%s%s" % ( + Colors.RED, + len(main_branches), len(push_ranges), "\n".join(str(x) for x in push_ranges), + Colors.END)) exit(1) for push_range in push_ranges: - print("-" * 80) print("Push range for branch %s: %s..%s" % (push_range[0], push_range[1], push_range[2])) - print("git diff --name-only %s..%s" % (push_range[1], push_range[2])) - print("git show %s..%s" % (push_range[1], push_range[2])) + print("%sgit diff --name-only %s..%s%s" % (Colors.LIGHT_BLUE, push_range[1], push_range[2], Colors.END)) + print("%sgit show %s..%s%s" % (Colors.LIGHT_BLUE, push_range[1], push_range[2], Colors.END)) print("") + print("-" * 80) From e17fb66d96a9dcf4f62f95cf0050d2dd9c8d0965 Mon Sep 17 00:00:00 2001 From: Jacek Lewandowski <lewandowski.jacek@gmail.com> Date: Fri, 15 Sep 2023 09:07:22 +0200 Subject: [PATCH 11/16] todo --- TODO | 11 +++++++++++ 1 file changed, 11 insertions(+) create mode 100644 TODO diff --git a/TODO b/TODO new file mode 100644 index 00000000..40801e3c --- /dev/null +++ b/TODO @@ -0,0 +1,11 @@ +* do not include commented-out lines in the commit message (like #Conflict...) +* validate commit message - validate the expected format and whether people can be found in github +* let the user edit the CHANGES message, make sure it is one line message + +* prepare commit message +- list of commit messages +- * use GPT to summarize the changes +- list all users who interacted on any of the PRs or reviewers from Jira +- get the current user +- prepare the message +- open editor with tmp file containing the message From 480255910add62dfd4a05bd8ab50149f8483cfb0 Mon Sep 17 00:00:00 2001 From: Jacek Lewandowski <lewandowski.jacek@gmail.com> Date: Mon, 25 Sep 2023 12:53:55 +0200 Subject: [PATCH 12/16] automatic commit message generation --- dev/scripts/lib/git_utils.py | 14 +++++---- dev/scripts/lib/jira_utils.py | 34 +++++++++++++++++++++ dev/scripts/lib/script_generator.py | 24 ++++++++++++--- dev/scripts/prepare_merge_commands.py | 43 ++++++++++++++++----------- 4 files changed, 88 insertions(+), 27 deletions(-) create mode 100644 dev/scripts/lib/jira_utils.py diff --git a/dev/scripts/lib/git_utils.py b/dev/scripts/lib/git_utils.py index 2009fe0e..32cabc24 100644 --- a/dev/scripts/lib/git_utils.py +++ b/dev/scripts/lib/git_utils.py @@ -13,6 +13,7 @@ class Commit(NamedTuple): sha: str author: str title: str + body: str class BranchMergeInfo(NamedTuple): release_branch: VersionedBranch @@ -21,11 +22,12 @@ class BranchMergeInfo(NamedTuple): class TicketMergeInfo(NamedTuple): ticket: str - title: str + update_changes: bool upstream_repo: str feature_repo: str merges: list[BranchMergeInfo] keep_changes_in_circleci: bool + commit_msg_file: str NO_VERSION = (-1, -1) TRUNK_VERSION = (255, 255) @@ -156,13 +158,13 @@ def coordinates(repo, branch): return "%s/%s" % (repo, branch) else: return branch - output = subprocess.check_output(["git", "log", "--pretty=format:%h%x00%aN%x00%s", "--reverse", "%s..%s" % (coordinates(from_repo, from_branch), coordinates(to_repo, to_branch))], text=True) + output = subprocess.check_output(["git", "log", "--pretty=format:%h%n%aN%n%s%n%b%n%x00", "--reverse", "%s..%s" % (coordinates(from_repo, from_branch), coordinates(to_repo, to_branch))], text=True) commits = [] - for line in output.split("\n"): - if not line.strip(): + for commit_block in output.split("\0"): + if not commit_block: continue - match = line.split("\0") - commits.append(Commit(match[0], match[1], match[2])) + match = commit_block.strip("\n").split(sep = "\n", maxsplit = 3) + commits.append(Commit(match[0], match[1], match[2], match[3] if len(match) > 3 else "")) return commits diff --git a/dev/scripts/lib/jira_utils.py b/dev/scripts/lib/jira_utils.py new file mode 100644 index 00000000..3038bc82 --- /dev/null +++ b/dev/scripts/lib/jira_utils.py @@ -0,0 +1,34 @@ +import json + +import urllib3 + + +def get_assignee_from_jira(ticket): + """ + Get the assignee for the given JIRA ticket. + :param ticket: + :return: + """ + http = urllib3.PoolManager() + r = http.request('GET', 'https://issues.apache.org/jira/rest/api/latest/issue/' + ticket) + if r.status == 200: + data = json.loads(r.data.decode('utf-8')) + if data['fields']['assignee']: + return data['fields']['assignee']['displayName'] + return None + + +def get_reviewers_from_jira(ticket): + """ + Get the reviewers for the given JIRA ticket. + :param ticket: + :return: + """ + http = urllib3.PoolManager() + r = http.request('GET', 'https://issues.apache.org/jira/rest/api/latest/issue/' + ticket) + if r.status == 200: + data = json.loads(r.data.decode('utf-8')) + reviewers = data['fields']['customfield_12313420'] + if reviewers: + return [reviewer['displayName'] for reviewer in reviewers] + return None diff --git a/dev/scripts/lib/script_generator.py b/dev/scripts/lib/script_generator.py index 75b9c329..b7cdbd8e 100644 --- a/dev/scripts/lib/script_generator.py +++ b/dev/scripts/lib/script_generator.py @@ -49,6 +49,14 @@ def generate_script(ticket_merge_info: TicketMergeInfo): script = ["#!/bin/bash", "", "set -xe", "", "[[ -z $(git status --porcelain) ]] # worktree must be clean"] + script.append("") + if ticket_merge_info.update_changes: + script.append("# Edit the commit message, the first will be used as the change title to update CHNAGES.txt") + else: + script.append("# Edit the commit message") + script.append("$(git config --get core.editor) %s" % ticket_merge_info.commit_msg_file) + script.append("") + merges = ticket_merge_info.merges # index of first merge with undefined feature branch for idx in range(0, len(merges)): @@ -66,6 +74,7 @@ def generate_script(ticket_merge_info: TicketMergeInfo): # ensure that there is at least one non-merge commit in the feature branch assert len([c for c in merge.commits if parse_merge_commit_msg(c.title) is None]) > 0 + closed = True script.append("git switch %s" % merge.release_branch.name) script.append("git reset --hard %s/%s" % (ticket_merge_info.upstream_repo, merge.release_branch.name)) commits = [] @@ -84,17 +93,24 @@ def generate_script(ticket_merge_info: TicketMergeInfo): script.append("# skipping merge commit %s %s - %s" % (commit.sha, commit.author, commit.title)) else: script.append("git cherry-pick -n %s # %s - %s" % (commit.sha, commit.author, commit.title)) + closed = False version_section, merge_sections = resolve_version_and_merge_sections(idx, [(m.release_branch, m.feature_branch is not None) for m in merges]) - if ticket_merge_info.title and version_section: - script.append("python3 %s/update_changes.py '%s' '%s' '%s' '%s'" % (script_dir, + if ticket_merge_info.update_changes and version_section: + script.append("python3 %s/update_changes.py '%s' '%s' '%s' %s" % (script_dir, ticket_merge_info.ticket, version_as_string(version_section.version), ",".join([version_as_string(m.version) for m in merge_sections]), - ticket_merge_info.title)) + '"$(head -n 1 %s)"' % ticket_merge_info.commit_msg_file)) script.append("git add CHANGES.txt") - script.append("git commit --amend --no-edit") + closed = False + + if not closed: + script.append("git commit --amend --no-edit") + + if idx == 0: + script.append("git commit --allow-empty --amend --file %s" % ticket_merge_info.commit_msg_file) if not ticket_merge_info.keep_changes_in_circleci: script.append("[[ -n \"$(git diff --name-only %s/%s..HEAD -- .circleci/)\" ]] && (git diff %s/%s..HEAD -- .circleci/ | git apply -R --index) && git commit -a --amend --no-edit # Remove all changes in .circleci directory if you need to" % (ticket_merge_info.upstream_repo, merge.release_branch.name, ticket_merge_info.upstream_repo, merge.release_branch.name)) diff --git a/dev/scripts/prepare_merge_commands.py b/dev/scripts/prepare_merge_commands.py index 02a2e389..7c84ad3f 100644 --- a/dev/scripts/prepare_merge_commands.py +++ b/dev/scripts/prepare_merge_commands.py @@ -1,7 +1,9 @@ import os +import tempfile from lib.script_generator import generate_script from lib.git_utils import * +from lib.jira_utils import * ensure_clean_git_tree() @@ -83,23 +85,9 @@ while response not in ["yes", "no"]: response = read_with_default("Do you want the script to add a line to CHANGES.txt? (yes/no)", "yes") if response == "yes": - need_changes_txt_entry = True - print("") - print("Commits:") - # zip commits with their index - commits = list(zip(range(1, len(merges[0].commits) + 1), merges[0].commits)) - for i, commit in commits: - print("%d: %s" % (i, str(commit))) - print("") - commit_idx = read_positive_int("Enter the number of the commit whose message should be used as a title in CHANGES.txt or leave empty to enter a custom title: ", None) - change_title = None - if commit_idx and commit_idx <= len(commits): - change_title = commits[commit_idx - 1][1].title - else: - while not change_title: - change_title = read_with_default("Enter the title", commits[0][1].title).strip() + update_changes = True else: - change_title = None + update_changes = False ### Keep the circleci config changes? ### keep_changes_in_circleci = False @@ -109,9 +97,30 @@ if response == "yes": keep_changes_in_circleci = True +### Generate commit message ### +commit_msg = merges[0].commits[0].title + "\n\n" +commit_msg = commit_msg + merges[0].commits[0].body + "\n\n" +for commit in merges[0].commits[1:]: + commit_msg = commit_msg + " - " + commit.title + "\n" + commit.body + "\n\n" + +assignee = get_assignee_from_jira(ticket) +reviewers = get_reviewers_from_jira(ticket) +if assignee: + commit_msg = commit_msg + "Patch by %s" % assignee +if reviewers: + commit_msg = commit_msg + "; reviewed by %s" % ", ".join(reviewers) +commit_msg = commit_msg + " for %s" % ticket + +temp_dir = tempfile.gettempdir() +commit_msg_file = tempfile.NamedTemporaryFile(dir=temp_dir, delete=False) +commit_msg_file.write(commit_msg.encode('utf-8')) + +print("") +print("Commit message saved to %s - you will be asked to edit" % commit_msg_file.name) + ### Generate the script ### -ticket_merge_info = TicketMergeInfo(ticket, change_title, upstream_repo, feature_repo, merges, keep_changes_in_circleci) +ticket_merge_info = TicketMergeInfo(ticket, update_changes, upstream_repo, feature_repo, merges, keep_changes_in_circleci, commit_msg_file.name) script = generate_script(ticket_merge_info) From 4cee9f19e868f877bbff2cd8afabcd7261fdc1a4 Mon Sep 17 00:00:00 2001 From: Jacek Lewandowski <lewandowski.jacek@gmail.com> Date: Wed, 29 Nov 2023 11:21:07 +0100 Subject: [PATCH 13/16] better recognition of the base branch --- dev/scripts/lib/git_utils.py | 46 +++++++++++++++++++++++---- dev/scripts/prepare_merge_commands.py | 2 +- 2 files changed, 40 insertions(+), 8 deletions(-) diff --git a/dev/scripts/lib/git_utils.py b/dev/scripts/lib/git_utils.py index 32cabc24..53e129bd 100644 --- a/dev/scripts/lib/git_utils.py +++ b/dev/scripts/lib/git_utils.py @@ -55,15 +55,52 @@ def version_from_string(version_string): def version_as_string(version): if version is None: return None + if version == NO_VERSION: + return None if version == TRUNK_VERSION: return "trunk" return "%s.%s" % version ### GIT functions ### +def guess_base_version(repo, remote_repo, branch): + version = NO_VERSION + + merge_base = None + for l in subprocess.check_output(["git", "log", "--decorate", "--simplify-by-decoration", "--oneline", "%s/%s" % (repo, branch)], text=True).split("\n"): + if "(HEAD" not in l and "(%s/%s" % (repo, branch) not in l: + merge_base = l.split(" ")[0] + break + + matching_versions = [] + if merge_base: + branch_regex = re.compile(r"\s*" + re.escape(remote_repo) + r"/((cassandra-(\d+)\.(\d+))|(trunk))$") + for l in subprocess.check_output(["git", "branch", "-r", "--contains", merge_base], text=True).split("\n"): + match = branch_regex.match(l) + if match: + if match.group(5): + matching_versions.append(TRUNK_VERSION) + elif match.group(2): + matching_versions.append((int(match.group(3)), int(match.group(4)))) + matching_versions.sort() + + if len(matching_versions) == 1: + version = matching_versions[0] + else: + branch_regex = re.compile(r".*?(-((\d+)\.(\d+))|(trunk))?$", flags=re.IGNORECASE) + match = branch_regex.match(branch) + if match: + if match.group(5) == "trunk": + version = TRUNK_VERSION + elif match.group(2): + version = (int(match.group(3)), int(match.group(4))) + else: + print("No match for %s" % branch) + + return version -def guess_feature_branches(repo, ticket): +def guess_feature_branches(repo, remote_repo, ticket): """ Get the list of branches from the given repository that contain the given ticket, sorted by version ascending. :param repo: configured apache repository name @@ -78,12 +115,7 @@ def guess_feature_branches(repo, ticket): match = branch_regex.match(line) if match: branch = match.group(1) - if branch == ticket: - version = TRUNK_VERSION - elif match.group(2): - version = (int(match.group(3)), int(match.group(4))) - else: - version = NO_VERSION + version = guess_base_version(repo, remote_repo, branch) matching_branches.append(VersionedBranch(version, match.group(2), branch)) matching_branches.sort(key=lambda x: x.version) diff --git a/dev/scripts/prepare_merge_commands.py b/dev/scripts/prepare_merge_commands.py index 7c84ad3f..40b48594 100644 --- a/dev/scripts/prepare_merge_commands.py +++ b/dev/scripts/prepare_merge_commands.py @@ -38,7 +38,7 @@ print("Found the following release branches:\n%s" % "\n".join(["%s: %s" % (version_as_string(b.version), b.name) for b in release_branches])) print("") -feature_branches = guess_feature_branches(feature_repo, ticket) +feature_branches = guess_feature_branches(feature_repo, upstream_repo, ticket) print("Found the following feature branches:\n%s" % "\n".join(["%s: %s" % (version_as_string(b.version), b.name) for b in feature_branches])) print("") From ec3537c66acbb2413882724f3cf8c79f5f7b6f57 Mon Sep 17 00:00:00 2001 From: Jacek Lewandowski <lewandowski.jacek@gmail.com> Date: Wed, 29 Nov 2023 11:31:54 +0100 Subject: [PATCH 14/16] collect authors from commits and add Co-authored-by to the commit message --- dev/scripts/lib/git_utils.py | 7 ++++--- dev/scripts/prepare_merge_commands.py | 6 ++++++ 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/dev/scripts/lib/git_utils.py b/dev/scripts/lib/git_utils.py index 53e129bd..2e636c6e 100644 --- a/dev/scripts/lib/git_utils.py +++ b/dev/scripts/lib/git_utils.py @@ -12,6 +12,7 @@ class VersionedBranch(NamedTuple): class Commit(NamedTuple): sha: str author: str + email: str title: str body: str @@ -190,13 +191,13 @@ def coordinates(repo, branch): return "%s/%s" % (repo, branch) else: return branch - output = subprocess.check_output(["git", "log", "--pretty=format:%h%n%aN%n%s%n%b%n%x00", "--reverse", "%s..%s" % (coordinates(from_repo, from_branch), coordinates(to_repo, to_branch))], text=True) + output = subprocess.check_output(["git", "log", "--pretty=format:%h%n%aN%n%ae%n%s%n%b%n%x00", "--reverse", "%s..%s" % (coordinates(from_repo, from_branch), coordinates(to_repo, to_branch))], text=True) commits = [] for commit_block in output.split("\0"): if not commit_block: continue - match = commit_block.strip("\n").split(sep = "\n", maxsplit = 3) - commits.append(Commit(match[0], match[1], match[2], match[3] if len(match) > 3 else "")) + match = commit_block.strip("\n").split(sep = "\n", maxsplit = 4) + commits.append(Commit(match[0], match[1], match[2], match[3], match[4] if len(match) > 4 else "")) return commits diff --git a/dev/scripts/prepare_merge_commands.py b/dev/scripts/prepare_merge_commands.py index 40b48594..752301e0 100644 --- a/dev/scripts/prepare_merge_commands.py +++ b/dev/scripts/prepare_merge_commands.py @@ -103,6 +103,9 @@ for commit in merges[0].commits[1:]: commit_msg = commit_msg + " - " + commit.title + "\n" + commit.body + "\n\n" +authors = ["%s <%s>" % (c.author, c.email) for c in merges[0].commits] +authors = list(set(authors)) +authors.sort() assignee = get_assignee_from_jira(ticket) reviewers = get_reviewers_from_jira(ticket) if assignee: @@ -110,6 +113,9 @@ if reviewers: commit_msg = commit_msg + "; reviewed by %s" % ", ".join(reviewers) commit_msg = commit_msg + " for %s" % ticket +commit_msg = commit_msg + "\n\n" +for author in authors: + commit_msg = commit_msg + "Co-authored-by: %s\n" % author temp_dir = tempfile.gettempdir() commit_msg_file = tempfile.NamedTemporaryFile(dir=temp_dir, delete=False) From 547a4bc80ce61fca41735e8c5daf23090b35d235 Mon Sep 17 00:00:00 2001 From: Jacek Lewandowski <lewandowski.jacek@gmail.com> Date: Tue, 9 Jan 2024 11:11:21 +0100 Subject: [PATCH 15/16] fix --- dev/scripts/lib/git_utils.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/dev/scripts/lib/git_utils.py b/dev/scripts/lib/git_utils.py index 2e636c6e..0e324a24 100644 --- a/dev/scripts/lib/git_utils.py +++ b/dev/scripts/lib/git_utils.py @@ -88,7 +88,7 @@ def guess_base_version(repo, remote_repo, branch): if len(matching_versions) == 1: version = matching_versions[0] else: - branch_regex = re.compile(r".*?(-((\d+)\.(\d+))|(trunk))?$", flags=re.IGNORECASE) + branch_regex = re.compile(r".*?([-/]((\d+)\.(\d+))|(trunk))?$", flags=re.IGNORECASE) match = branch_regex.match(branch) if match: if match.group(5) == "trunk": @@ -97,6 +97,8 @@ def guess_base_version(repo, remote_repo, branch): version = (int(match.group(3)), int(match.group(4))) else: print("No match for %s" % branch) + if len(matching_versions) > 0: + version = matching_versions[0] return version From cc0045289cf015a376991c828dd73b1ca0ec84be Mon Sep 17 00:00:00 2001 From: Jacek Lewandowski <lewandowski.jacek@gmail.com> Date: Tue, 9 Jan 2024 11:11:30 +0100 Subject: [PATCH 16/16] circleci tools --- dev/scripts/digest-circleci-workflow.py | 129 ++++++++++++++++++++++++ dev/scripts/lib/circleci_utils.py | 128 +++++++++++++++++++++++ 2 files changed, 257 insertions(+) create mode 100644 dev/scripts/digest-circleci-workflow.py create mode 100644 dev/scripts/lib/circleci_utils.py diff --git a/dev/scripts/digest-circleci-workflow.py b/dev/scripts/digest-circleci-workflow.py new file mode 100644 index 00000000..20f0ca70 --- /dev/null +++ b/dev/scripts/digest-circleci-workflow.py @@ -0,0 +1,129 @@ +# https://app.circleci.com/pipelines/github/jacek-lewandowski/cassandra/1252/workflows/b10132a7-1b4f-44d0-8808-f19a3b5fde69/jobs/63797 +# https://circleci.com/api/v2/project/gh/jacek-lewandowski/cassandra/63797/tests +# { +# "items": [ +# { +# "classname": "org.apache.cassandra.distributed.test.LegacyCASTest", +# "name": "testRepairIncompletePropose-_jdk17", +# "result": "success", +# "message": "", +# "run_time": 15.254, +# "source": "unknown" +# } +# ,{ +# "classname": "org.apache.cassandra.distributed.test.NativeTransportEncryptionOptionsTest", +# "name": "testEndpointVerificationEnabledIpNotInSAN-cassandra.testtag_IS_UNDEFINED", +# "result": "failure", +# "message": "junit.framework.AssertionFailedError: Forked Java VM exited abnormally. Please note the time in the report does not reflect the time until the VM exit.\n\tat jdk.internal.reflect.GeneratedMethodAccessor4.invoke(Unknown Source)\n\tat java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\n\tat java.base/java.util.Vector.forEach(Vector.java:1365)\n\tat jdk.internal.reflect.GeneratedMethodAccessor4.invoke(Unknown Source)\n\tat java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\n\tat jdk.internal.reflect.GeneratedMethodAccessor4.invoke(Unknown Source)\n\tat java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\n\tat java.base/java.util.Vector.forEach(Vector.java:1365)\n\tat jdk.internal.reflect.GeneratedMethodAccessor4.invoke(Unknown Source)\n\tat java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\n\tat java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\n\tat java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:77)\n\tat java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\n\tat java.base/java.util.Vector.forEach(Vector.java:1365)\n\tat java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\n\tat java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:77)\n\tat java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\n\tat org.apache.cassandra.anttasks.TestHelper.execute(TestHelper.java:53)\n\tat jdk.internal.reflect.GeneratedMethodAccessor4.invoke(Unknown Source)\n\tat java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\n\tat java.base/java.util.Vector.forEach(Vector.java:1365)\n\tat jdk.internal.reflect.GeneratedMethodAccessor4.invoke(Unknown Source)\n\tat java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\n\tat jdk.internal.reflect.GeneratedMethodAccessor4.invoke(Unknown Source)\n\tat java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)", +# "run_time": 0.001, +# "source": "unknown" +# } +# ] +# } +import csv + +# So here is the plan: +# I have a link to the pipeline: https://app.circleci.com/pipelines/github/jacek-lewandowski/cassandra/1252 +# The program goes through all the workflow jobs and list the failed tests along with the workflow, job, etc. +# Then: +# - separate failures into 3 groups: +# 1. flaky - if a test was repeated in mulitple jobs and failred in some of them +# 2. failure - if a test was repeated in multiple jobs and failed in all of them +# 3. suspected - if a test was not repeated + +# Then for each failure list Jira tickets that mention the test name. + +# Having that information, let the user decide what to do with each failure: +# - select a jira ticket +# - create a new ticket +# - do not associate with any ticket +# - report on the PR + +# Eventually, the user can create the script which can perform the planned operations + +from lib.circleci_utils import * + +class TestFailure(NamedTuple): + file: str + classname: str + name: str + jobs_comp: str + jobs_list: list + +class TestFailureComparison(NamedTuple): + file: str + classname: str + name: str + feature_jobs: set + base_jobs: set + jobs_comp: str + +if len(sys.argv) != 4 and len(sys.argv) != 6: + print("Usage: %s <repo> <workflow_id> <output.csv>" % sys.argv[0]) + print("Usage: %s <feature repo> <feature workflow id > <base repo> <base workflow id> <output.csv>" % sys.argv[0]) + sys.exit(1) + +if len(sys.argv) == 4: + repo = sys.argv[1] + workflow_id = sys.argv[2] + output_file = sys.argv[3] + failed_tests_dict = get_failed_tests(repo, workflow_id) + failed_tests = [] + for file in failed_tests_dict: + for classname in failed_tests_dict[file]: + for name in failed_tests_dict[file][classname]: + jobs = list(failed_tests_dict[file][classname][name]) + jobs.sort() + failed_tests.append(TestFailure(file, classname, name, ",".join(failed_tests_dict[file][classname][name]), jobs)) + + # sort failed tests by jobs, file, classname, name + failed_tests.sort(key=lambda test: (test.jobs_comp, test.file, test.classname, test.name)) + + # save failed_tests to csv file + with open(output_file, 'w') as csvfile: + writer = csv.writer(csvfile) + writer.writerow(['file', 'classname', 'name', 'jobs']) + for test in failed_tests: + writer.writerow([test.file, test.classname, test.name, test.jobs_comp]) + +else: + feature_repo = sys.argv[1] + feature_workflow_id = sys.argv[2] + base_repo = sys.argv[3] + base_workflow_id = sys.argv[4] + output_file = sys.argv[5] + feature_failed_tests_dict = get_failed_tests(feature_repo, feature_workflow_id) + base_failed_tests_dict = get_failed_tests(base_repo, base_workflow_id) + + failed_tests = [] + all_files = set(feature_failed_tests_dict.keys()).union(set(base_failed_tests_dict.keys())) + for file in all_files: + feature_classnames = feature_failed_tests_dict[file] if file in feature_failed_tests_dict else {} + base_classnames = base_failed_tests_dict[file] if file in base_failed_tests_dict else {} + all_classnames = set(feature_classnames.keys()).union(set(base_classnames.keys())) + for classname in all_classnames: + feature_names = feature_classnames[classname] if classname in feature_classnames else {} + base_names = base_classnames[classname] if classname in base_classnames else {} + all_names = set(feature_names.keys()).union(set(base_names.keys())) + for name in all_names: + feature_jobs = feature_names[name] if name in feature_names else set() + base_jobs = base_names[name] if name in base_names else set() + jobs_comp = list(feature_jobs.union(base_jobs)) + jobs_comp.sort() + failed_tests.append(TestFailureComparison(file, classname, name, feature_jobs, base_jobs, ",".join(jobs_comp))) + + # sort failed tests by jobs, file, classname, name + failed_tests.sort(key=lambda test: (test.jobs_comp, test.file, test.classname, test.name)) + + # save failed_tests to csv file + with open(output_file, 'w') as csvfile: + writer = csv.writer(csvfile) + writer.writerow(['file', 'classname', 'name', 'failed in feature only', 'failed in base only', 'failed in both']) + for test in failed_tests: + feature_only_jobs = list(test.feature_jobs.difference(test.base_jobs)) + feature_only_jobs.sort() + base_only_jobs = list(test.base_jobs.difference(test.feature_jobs)) + base_only_jobs.sort() + common_jobs = list(test.feature_jobs.intersection(test.base_jobs)) + common_jobs.sort() + writer.writerow([test.file, test.classname, test.name, ",".join(feature_only_jobs), ",".join(base_only_jobs), ",".join(common_jobs)]) diff --git a/dev/scripts/lib/circleci_utils.py b/dev/scripts/lib/circleci_utils.py new file mode 100644 index 00000000..34c08687 --- /dev/null +++ b/dev/scripts/lib/circleci_utils.py @@ -0,0 +1,128 @@ +import json +import sys +from enum import Enum +from typing import NamedTuple + +import urllib3 + +class PipelineInfo(NamedTuple): + id: str + number: int + +def get_pipelines_from_circleci(repo, branch): + http = urllib3.PoolManager() + url = "https://circleci.com/api/v2/project/gh/%s/cassandra/pipeline?branch=%s" % (repo, branch) + r = http.request('GET', url) + if r.status == 200: + items = json.loads(r.data.decode('utf-8'))['items'] + return [PipelineInfo(id=item['id'], number=item['number']) for item in items] + return None + +class WorkflowInfo(NamedTuple): + id: str + name: str + status: str + +def get_pipeline_workflows(pipeline_id): + http = urllib3.PoolManager() + url = "https://circleci.com/api/v2/pipeline/%s/workflow" % (pipeline_id) + r = http.request('GET', url) + if r.status == 200: + items = json.loads(r.data.decode('utf-8'))['items'] + return [WorkflowInfo(id=item['id'], name=item['name'], status=item['status']) for item in items] + +class JobType(Enum): + BUILD = "build" + APPROVAL = "approval" + +class JobStatus(Enum): + SUCCESS = "success" + RUNNING = "running" + NOT_RUN = "not_run" + FAILED = "failed" + RETRIED = "retried" + QUEUED = "queued" + NOT_RUNNING = "not_running" + INFRASTRUCTURE_FAIL = "infrastructure_fail" + TIMEDOUT = "timedout" + ON_HOLD = "on_hold" + TERMINATED_UNKNOWN = "terminated-unknown" + BLOCKED = "blocked" + CANCELED = "canceled" + UNAUTHORIZED = "unauthorized" + +class JobInfo(NamedTuple): + id: str + name: str + status: JobStatus + job_number: str + type: JobType + +def job_info_from_json(json): + return JobInfo(id=json['id'], name=json['name'], status=JobStatus(json['status']), job_number=json['job_number'] if 'job_number' in json else None , type=JobType(json['type'])) + +def get_workflow_jobs(workflow_id): + http = urllib3.PoolManager() + url = "https://circleci.com/api/v2/workflow/%s/job" % (workflow_id) + r = http.request('GET', url) + if r.status == 200: + items = json.loads(r.data.decode('utf-8'))['items'] + print("Found %d jobs" % len(items)) + return [job_info_from_json(item) for item in items] + return None + +def get_failed_jobs(workflow_id): + jobs = get_workflow_jobs(workflow_id) + failed_jobs = [] + for job in jobs: + if job.status == JobStatus.FAILED and job.job_number is not None: + failed_jobs.append(job) + else: + print("Skipping job %s" % str(job)) + return failed_jobs + +class TestResult(Enum): + SUCCESS = "success" + FAILURE = "failure" + SKIPPED = "skipped" + ERROR = "error" + UNKNOWN = "unknown" + +class TestInfo(NamedTuple): + message: str + source: str + run_time: float + file: str + result: TestResult + name: str + classname: str + +def get_job_tests(repo, job_number): + http = urllib3.PoolManager() + url = "https://circleci.com/api/v2/project/gh/%s/cassandra/%s/tests" % (repo, job_number) + r = http.request('GET', url) + if r.status == 200: + tests = [TestInfo(t['message'], t['source'], t['run_time'], t['file'] if 'file' in t else "", TestResult(t['result']), t['name'], t['classname']) for t in json.loads(r.data.decode('utf-8'))['items']] + return tests + return None + + +def get_failed_tests(repo, workflow_id): + failed_jobs = get_failed_jobs(workflow_id) + failed_tests = {} + for job in failed_jobs: + print("Getting tests for job %s" % str(job)) + tests = get_job_tests(repo, job.job_number) + for test in tests: + if test.result == TestResult.FAILURE: + if test.file not in failed_tests: + failed_tests[test.file] = {} + if test.classname not in failed_tests[test.file]: + failed_tests[test.file][test.classname] = {} + test_name = test.name.split("-", 2)[0] + test_name = test_name.split("[", 2)[0] + if test_name not in failed_tests[test.file][test.classname]: + failed_tests[test.file][test.classname][test_name] = set() + failed_tests[test.file][test.classname][test_name].add(job.name) + + return failed_tests