Skip to content

Commit

Permalink
Fix Race Condition (#284)
Browse files Browse the repository at this point in the history
* fix race condition

* PARENT_PID

* fix PARENT_PID

---------

Co-authored-by: Thomas Applencourt <[email protected]>
Co-authored-by: Thomas Applencourt <[email protected]>
  • Loading branch information
3 people authored Sep 3, 2024
1 parent ceaabfc commit 1f37897
Showing 1 changed file with 22 additions and 18 deletions.
40 changes: 22 additions & 18 deletions integration_tests/light_iprof_only_sync.sh
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
#!/bin/bash
set -euo pipefail

# For loging and Daemon to send signal to us
PARENT_PID=$$

# Get base real-time signal number
SIGRTMIN=$(kill -l SIGRTMIN)

Expand All @@ -10,11 +13,9 @@ RT_SIGNAL_GLOBAL_BARRIER=$((SIGRTMIN + 1))
RT_SIGNAL_LOCAL_BARRIER=$((SIGRTMIN + 2))
RT_SIGNAL_FINISH=$((SIGRTMIN + 3))

# Initialize a variable to track signal reception
SIGNAL_RECEIVED="false"
# Signal handler for capturing signals
handle_signal() {
echo "$PARENT_PID | Received signal $1 from mpi_daemon"
echo "$PARENT_PID $(date) | Received signal $1 from sync_daemon"
if [ "$1" == "RT_SIGNAL_READY" ]; then
SIGNAL_RECEIVED="true"
fi
Expand All @@ -25,36 +26,39 @@ trap 'handle_signal RT_SIGNAL_READY' $RT_SIGNAL_READY

# Function to wait for RT_SIGNAL_READY
wait_for_signal() {
SIGNAL_RECEIVED="false"
while [[ "$SIGNAL_RECEIVED" == "false" ]]; do
sleep 0.1 # Small sleep to prevent busy looping
done
}

# Function to send signals, using adjusted SIGRTMIN corresponding to MPI signal daemon defines
# To avoid race condition, `SIGNAL_RECEIVED` need to be set
# before spawning or signaling the daemon
spawn_daemon_blocking() {
SIGNAL_RECEIVED="false"
"${THAPI_BIN_DIR}"/sync_daemon_"${THAPI_SYNC_DAEMON}" $PARENT_PID &
DAEMON_PID=$!
wait_for_signal
}

send_signal_blocking() {
kill -$1 $DAEMON_PID
SIGNAL_RECEIVED="false"
kill -"$1" $DAEMON_PID
wait_for_signal
}

# Get the PID of this script
PARENT_PID=$$
# Start sync daemon in the background
${THAPI_BIN_DIR}/sync_daemon_${THAPI_SYNC_DAEMON} $PARENT_PID &
DAEMON_PID=$!
echo "$PARENT_PID | Wait for daemon to be ready"
wait_for_signal
echo "$PARENT_PID | Send Local Barrier signal"
echo "$PARENT_PID $(date) | Spawn Daemon"
spawn_daemon_blocking
echo "$PARENT_PID $(date) | Send Local Barrier signal"
send_signal_blocking $RT_SIGNAL_LOCAL_BARRIER
# Run test program
"$@"

# Final synchronization after mpi_hello_world execution
echo "$PARENT_PID | Send Local Barrier signal"
echo "$PARENT_PID $(date) | Send Local Barrier signal"
send_signal_blocking $RT_SIGNAL_LOCAL_BARRIER
echo "$PARENT_PID | Send Global Barrier signal"
echo "$PARENT_PID $(date) | Send Global Barrier signal"
send_signal_blocking $RT_SIGNAL_GLOBAL_BARRIER
echo "$PARENT_PID | Send Termination signal"
echo "$PARENT_PID $(date) | Send Termination signal"
send_signal_blocking $RT_SIGNAL_FINISH
echo "$PARENT_PID | Wait for daemon to quit"
echo "$PARENT_PID $(date) | Wait for daemon to quit"
wait $DAEMON_PID

0 comments on commit 1f37897

Please sign in to comment.