Skip to content

Commit

Permalink
better error message when bin doesn't exist
Browse files Browse the repository at this point in the history
  • Loading branch information
Thomas Applencourt committed Sep 30, 2024
1 parent 0b50b1a commit a84fb5c
Showing 1 changed file with 32 additions and 17 deletions.
49 changes: 32 additions & 17 deletions xprof/xprof.rb.in
Original file line number Diff line number Diff line change
Expand Up @@ -307,12 +307,12 @@ class Sync_daemon
# we always call clean-up the daemon
def self.open
yield f = new
rescue StandardError
raise
ensure
return unless f
f.global_barrier
f.finalize
# https://www.rubydoc.info/gems/rubocop/RuboCop/Cop/Lint/EnsureReturn
if f
f.global_barrier
f.finalize
end
end
end

Expand Down Expand Up @@ -346,8 +346,7 @@ def env_tracers
%w[ze ze libze_loader libTracerZE],
%w[cuda cuda libcuda libTracerCUDA],
%w[hip hip libamdhip64 libTracerHIP],
%w[mpi mpi libmpi libTracerMPI],
].each do |name, bt_name, lib, libtracer|
%w[mpi mpi libmpi libTracerMPI]].each do |name, bt_name, lib, libtracer|
# Backend requested, skip omp. It will be handled in a custom case bellow
next unless OPTIONS[:'backend-names'].include?(bt_name)

Expand Down Expand Up @@ -430,6 +429,9 @@ def launch_usr_bin(env, cmd)
LOGGER.warn { 'Application Exited' }
rescue Interrupt
LOGGER.warn { 'Application Received Interrupt Signal' }
rescue Errno::ENOENT
warn("#{__FILE__}: Can't find executable #{cmd.first}")
raise Errno::ENOENT
end
end

Expand Down Expand Up @@ -570,11 +572,13 @@ end

def lm_lttng_teardown_session
raise unless mpi_local_master?

exec("lttng destroy #{lttng_session_uuid}")
end

def lm_lttng_kill_sessiond
raise unless mpi_local_master?

# Need to kill the sessiond Daemon. It's safe because each job has their own
#
# In theory, opening the lttng-sessiond.pid file is racy.
Expand Down Expand Up @@ -650,7 +654,19 @@ end

# Start, Stop lttng, amd do the on-node analsysis
def trace_and_on_node_processing(usr_argv)
# Global barrier at exit
def teardown_lttng(syncd)
# We need to be sure that all the local ranks are finished
# before the local master stops the lttng session
syncd.local_barrier('waiting_for_application_ending')
return unless mpi_local_master?

# Stop Lttng session
lm_lttng_teardown_session
# Lttng session is finished,
# we can kill the session daemon
lm_lttng_kill_sessiond
end

Sync_daemon.open do |syncd|
# Load Tracers and APILoaders Lib
backends, h = env_tracers
Expand All @@ -661,19 +677,18 @@ def trace_and_on_node_processing(usr_argv)
# Only local master spawn LTTNG daemon and start session
lm_setup_lttng(backends) if mpi_local_master?
syncd.local_barrier('waiting_for_lttng_setup')

# Launch User Command
launch_usr_bin(h, usr_argv)
begin
launch_usr_bin(h, usr_argv)
rescue Errno::ENOENT
teardown_lttng(syncd)
exit(1)
end

# We need to be sure that all the local ranks are finished
# before the local master stops the lttng session
syncd.local_barrier('waiting_for_application_ending')
teardown_lttng(syncd)
return unless mpi_local_master?

# Stop Lttng session
lm_lttng_teardown_session
# Lttng session is finished,
# we can kill the session daemon
lm_lttng_kill_sessiond
# Preprocess trace
lm_babeltrace(backends)
lm_move_to_shared
Expand Down

0 comments on commit a84fb5c

Please sign in to comment.