Skip to content

Commit

Permalink
Merge pull request #124 from redhat-performance/multi_run
Browse files Browse the repository at this point in the history
Multi run
  • Loading branch information
dvalinrh authored Mar 4, 2025
2 parents 635f7fc + 55a87bd commit a870050
Showing 1 changed file with 160 additions and 40 deletions.
200 changes: 160 additions & 40 deletions bin/burden
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,26 @@
# the test.
#

#
# Locking overview
#
# To allow the user to run multiple copies of Zathras in the same directory the following must
# happen.
# 1) Unique file names for files that are specific to the Zathras instance
# 2) For files that are common between instance, they must be locked if there is potential for
# being updated by multiple Zathras instance.
#
# Unique file names are accomplished via mktemp
#
# For locking for blocks of shell code, we use
# Exclusive general lock
# To lock: gl_lock_exclusive <locking from, debug only>
# To unlock: gl_unlock_exclusive <unlocking from, debug only>
#
# For locking on a command, we use
# flock -x <file name> <command>
#

# Burdens version number.

cli=${0}
Expand Down Expand Up @@ -88,7 +108,9 @@ export ANSIBLE_ROLES_PATH=$HOME/.ansible/collections/ansible_collections/pbench/
# cloud_region: region to create the instance in.
# gl_cloud_region_zone: zone to create the instance in.
#
gl_run_info=`mktemp /tmp/zathras_run_info.XXXXX`
value_not_set="none"
gl_parse_file=""
gl_sys_index_rerun=1
gl_kit_upload_directory="none"
gl_retry_failed_tests=1
Expand Down Expand Up @@ -210,6 +232,11 @@ gl_cpu_type_request="none"
gl_run_file=""
gl_failed_test_rpt="failed_tests"

#
# Make sure the lock directory exists
#
lock_dir=${gl_top_dir}/lock_dir
mkdir -p $lock_dir
#
# We get the number of failed test reports at the start of the test. When we exit we will
# check the original number of reported failed tests to what we currently have. If they
Expand All @@ -231,24 +258,71 @@ else
UTILS_DIR=$UTILS_DIR/utils
fi

#
# Exclusive locking routines
#
# Given bash does not have a locking mechism built in, for code blocks
# we need to have our own.
#
# To do this:
# Locking exclusive: Attempt to make the directory $gl_lock. If the mkdir
# fails, then sleep for 4 seconds, and repeat. If mkdir passes, then set
# $gl_have_locked to be 0, and break from the loop.
# Unlocking exclusive: Simply remove the dir $gl_lock and set $gl_have_locked
# to be 0.
#
# $gl_have_locked is to make sure we do not attempt to remove the lock if we do not
# own it. The cleanup_and_exit routine simply attempts to unlock it everytime,
# this prevents that from happening if we do not own the lock.
#
gl_lock=${gl_top_dir}/gl_lock
gl_have_locked=0
gl_debug_lock=0

gl_lock_exclusive()
{
if [[ $gl_debug_lock -eq 1 ]]; then
echo ${FUNCNAME[1]} ${BASH_LINENO[0]}
fi
while true
do
mkdir $gl_lock 2> /dev/null
if [[ $? -eq 0 ]]; then
gl_have_locked=1
break
else
sleep 4
fi
done
}

gl_unlock_exclusive()
{
if [[ $gl_debug_lock -eq 1 ]]; then
echo ${FUNCNAME[1]} ${BASH_LINENO[0]}
fi
rmdir $gl_lock
gl_have_locked=0
}

#
# Generate report of test status.
#
process_results()
{
if [ $gl_first_invocation -eq 1 ]; then
sort -u run_info > run_info.sorted
sort -u $gl_run_info > ${gl_run_info}.sorted
while IFS= read -r run_data
do
pushd $gl_top_dir/$run_data >& /dev/null
$gl_top_dir/tools_bin/determine_test_status
grep failed initial_summary > $gl_top_dir/failures
$gl_top_dir/tools_bin/report_missing_failed_test
if [[ $? -ne 0 ]] && [[ $gl_retry_failed_tests -eq 1 ]]; then
handle_error_reruns
fi
popd >& /dev/null
done < "run_info.sorted"
done < "${gl_run_info}.sorted"
rm ${gl_run_info}.sorted $gl_run_info
fi
}

Expand Down Expand Up @@ -281,6 +355,13 @@ cleanup_and_exit()
rtc=1
fi
fi
rm -f $gl_cli_supplied_options $gl_run_info ${gl_run_info}.sorted
#
# In case we have a lock
#
if [[ $gl_have_locked -eq 1 ]]; then
gl_unlock_exclusive
fi
source $UTILS_DIR/cleanup_and_exit_out --fail_report $gl_failed_test_rpt --msg_string "$1" --rtc $rtc --pid $BASHPID $scenario_restore $sysname --top_dir $gl_top_dir
}

Expand Down Expand Up @@ -741,9 +822,17 @@ general_setup()
#
gl_test_repos=`echo $1 | cut -d' ' -f 2 | sed "s/test_defs.yml/full_test_defs.yml/g"`

tar cf bin.tar bin
tar cf tools_bin.tar tools_bin
tar cf sysctl_settings.tar sysctl_settings
#
# Only build the tar files once. Also make sure to only allow one process
# operating here.
#
gl_lock_exclusive
if [[ ! -f "bin.tar" ]]; then
tar cf bin.tar bin
tar cf tools_bin.tar tools_bin
tar cf sysctl_settings.tar sysctl_settings
fi
gl_unlock_exclusive
}

#
Expand Down Expand Up @@ -1594,6 +1683,7 @@ create_ansible_options()
#
# Azure, we have a limit on resource group name, need the test_index.
#
gl_lock_exclusive
if [[ $gl_sys_type == "azure" ]]; then
run_dir=${gl_run_prefix}/${gl_os_vendor}/${gl_system_type}/${test_index}_${host_or_cloud_inst}
else
Expand All @@ -1617,8 +1707,9 @@ create_ansible_options()
rc=$?
done

echo ${gl_run_prefix}/${gl_os_vendor}/${gl_system_type} >> ${gl_top_dir}/run_info
echo ${gl_run_prefix}/${gl_os_vendor}/${gl_system_type} >> $gl_run_info
make_dir_report_errors $run_dir
gl_unlock_exclusive
dir_list=$dir_list$run_dir" "
cp ${gl_test_def_dir}/test_defs.yml $run_dir
cp ${gl_test_def_dir}/full_test_defs.yml $run_dir
Expand Down Expand Up @@ -1904,7 +1995,20 @@ create_ansible_options()
echo $cli "${arguments[@]}" | sed "s/bin/./g" > ${run_dir}/exec_command
test_info_str=`grep test_to_run: $run_dir/ansible_vars_main.yml | sed "s/\[//g" | sed "s/\]//g" | cut -d: -f 2 | sed "s/ //g"`
echo "Starting ${test_info_str} on ${host_or_cloud_inst}"
kick_off.sh $base_string &
if [[ $gl_system_type == "local" ]]; then
#
# Do not allow simulatenous runs to the same local host.
#
touch $gl_top_dir/lock_dir/${host_or_cloud_inst}
flock -x $gl_top_dir/lock_dir/${host_or_cloud_inst} kick_off.sh $base_string | tee ${run_dir}/ansible_log &
else
#
# Cloud does not require exclusive locks on execution
# as we create a new cloud image.
#
kick_off.sh $base_string | tee ${run_dir}/ansible_log &
fi
pids[${pindex}]=$!
index=$!
pids[${pindex}]="${index}:${test_info_str} on ${host_or_cloud_inst}"
let "pindex=$pindex+1"
Expand Down Expand Up @@ -1992,7 +2096,7 @@ create_ansible_options()
if [[ $results_report != "" ]]; then
grep -q Failed $results_report
if [ $? -ne 1 ]; then
echo "${timestamp} Error: System: ${test_sys}, Test, $i, reported failure" >> $gl_top_dir/failed_runs
flock -x $gl_top_dir/failed_runs echo "${timestamp} Error: System: ${test_sys}, Test, $i, reported failure" >> $gl_top_dir/failed_runs
echo " Error: reported a failure" >> ../results_info
fi
fi
Expand Down Expand Up @@ -2149,16 +2253,20 @@ check_for_terraform()
#
package_check()
{
check_for_pip3
check_for_ansible
check_for_yq
check_for_jq
check_for_python
if [[ $gl_system_type == "aws" ]]; then
check_for_boto
check_for_aws
gl_lock_exclusive
if [[ ! -f utils_version ]]; then
check_for_pip3
check_for_ansible
check_for_yq
check_for_jq
check_for_python
if [[ $gl_system_type == "aws" ]]; then
check_for_boto
check_for_aws
fi
check_for_terraform
fi
check_for_terraform
gl_unlock_exclusive
}

#
Expand Down Expand Up @@ -2504,28 +2612,34 @@ convert_scenario_file()
#
# Now create the parse file to use.
#
echo "${sed_string}" > parse_reduce
chmod 755 parse_reduce
echo "${sed_string}" > parse_reduce_$$
chmod 755 parse_reduce_$$

tmp_run_file=$(mktemp /tmp/zath_temp_run.XXXXXX)
./parse_reduce | sed "s/+++/\//g" > $tmp_run_file
cat $tmp_run_file | yq . > parse_file.tmp
./parse_reduce_$$ | sed "s/+++/\//g" > $tmp_run_file
rm ./parse_reduce_$$
parse_file_tmp=$(mktemp /tmp/parse_file_tmp.XXXXXX)
gl_parse_file=$(mktemp /tmp/parse_file.XXXXXX)
cat $tmp_run_file | yq . > $parse_file_tmp
if [ $? -ne 0 ]; then
rm -f $gl_parse_file $parse_file_tmp $tmp_run_file
cleanup_and_exit "Creation of the parse_file via yq failed" 1
fi
#
# Verify the file just created is valid.
#
sed "s/----/;/g" parse_file.tmp > parse_file
python -c 'import yaml, sys; print(yaml.safe_load(sys.stdin))' < parse_file
sed "s/----/;/g" $parse_file_tmp > $gl_parse_file
python -c 'import yaml, sys; print(yaml.safe_load(sys.stdin))' < $gl_parse_file
if [ $? -eq 1 ]; then
rm -f $gl_parse_file $parse_file_tmp $tmp_run_file
cleanup_and_exit "The file, parse_file does not meet yaml requirements." 1
fi
#
# Check to make sure we have the proper number of host configs
#
sc_cnt=`grep host_config $tmp_run_file | wc -l`
ps_cnt=`grep host_config parse_file.tmp | wc -l`
ps_cnt=`grep host_config $parse_file_tmp | wc -l`
rm -f $parse_file_tmp
if [ $ps_cnt != $sc_cnt ]; then
cleanup_and_exit "yq did not find the appropriate number of hosts, look for duplicate names" 1
fi
Expand Down Expand Up @@ -2564,12 +2678,15 @@ run_scenario()
# Convert the scenario file. After this the scenario will point to the parsed
# file
convert_scenario_file $scenario
scenario=parse_file
#
# $gl_parse_file points to a temp file.
#
working_scenario=$gl_parse_file

#
# Verify all tests that are present are valid tests.
#
verify_gl_test_list=`grep \"tests\": parse_file | cut -d\" -f 4`
verify_gl_test_list=`grep \"tests\": $gl_parse_file | cut -d\" -f 4`
for i in $verify_test_list;
do
verify_test $i
Expand Down Expand Up @@ -2609,7 +2726,7 @@ run_scenario()
globals[${gindex}]=$field_separ\"--$field_value\"
let "gindex=$gindex+1"
field_separ=" "
done < "$scenario"
done < "$working_scenario"
if [[ $update_target_uploaded -eq 1 ]]; then
update_the_image $gl_update_target
fi
Expand Down Expand Up @@ -2745,15 +2862,16 @@ run_scenario()
pid=""
else
tmpfile=$(mktemp /tmp/zath_temp_test_cli.XXXXXX)
run_burden_file="run_burden_$$_${pindex}"
echo "#!/bin/bash" > $tmpfile
echo ./burden $test_cli --run_file run_burden_${pindex} > $tmpfile
echo ./burden $test_cli --run_file $run_burden_file > $tmpfile
#
# Remove duplicate entries.
#
run_burden_remove_dups $tmpfile
mv $tmpfile run_burden_${pindex}
chmod 755 run_burden_${pindex}
./run_burden_${pindex} &
mv $tmpfile ${run_burden_file}
chmod 755 ${run_burden_file}
./${run_burden_file} &
pids[${pindex}]=$!
let "pindex=$pindex+1"
fi
Expand All @@ -2776,7 +2894,7 @@ run_scenario()
cli_value=`echo $setting | sed "s/://" | sed 's/,$//'`
test_values[${test_index}]=" --$cli_value"
let "test_index=$test_index+1"
done < "$scenario"
done < "$working_scenario"
#
# Wait for everyone to finish up.
#
Expand All @@ -2786,6 +2904,7 @@ run_scenario()
if [[ $gl_update_target != $value_not_set ]]; then
$UTILS_DIR/cleanup_install_lock $BASHPID
fi
rm $working_scenario
}

#
Expand Down Expand Up @@ -3686,7 +3805,7 @@ grab_cli_data()

eval set --$opts

gl_cli_supplied_options=`mktemp /tmp/zathras.XXXXX`
gl_cli_supplied_options=`mktemp /tmp/zathras_cli.XXXXX`

#
# If no options provided, then usage message.
Expand Down Expand Up @@ -3915,8 +4034,6 @@ first_invocation()
mv $tfile $gl_scenario_to_run
rm $cfile
fi
rm test_info 2> /dev/null
rm java_info 2> /dev/null

if [[ $gl_scenario_to_run != "" ]]; then
test_def_info=`grep "^ test_def_dir:" ${gl_scenario_to_run}`
Expand Down Expand Up @@ -3957,9 +4074,13 @@ first_invocation()
fi
fi

integrate_templates ${gl_test_def_dir}/test_defs.yml
cat $gl_test_def_dir/full_test_defs.yml | yq . > test_info
cat $gl_test_def_dir/java_pkg_def.yml | yq . > java_info
gl_lock_exclusive
if [[ ! -f test_info ]] || [[ $gl_test_version_check -eq 1 ]] || [[ $gl_update_test_versions -eq 1 ]]; then
integrate_templates ${gl_test_def_dir}/test_defs.yml
cat $gl_test_def_dir/full_test_defs.yml | yq . > test_info
cat $gl_test_def_dir/java_pkg_def.yml | yq . > java_info
fi
gl_unlock_exclusive
}

cli_data="$@"
Expand Down Expand Up @@ -3996,7 +4117,6 @@ fi
# check to make sure the packages that are required are installed.
#
if [[ $gl_first_invocation -eq 1 ]]; then
rm utils_version run_info 2> /dev/null
package_check
fi

Expand Down

0 comments on commit a870050

Please sign in to comment.