Skip to content

Commit

Permalink
fix(syscalls): Enably syscall recording for process mode, fix various…
Browse files Browse the repository at this point in the history
… minor issues

This commit enables syscall recording in process monitoring mode too. It
was previouly restricted to only system monitoring mode for a reason
lost to history, but it works perfectly well with processes too, so no
reason to disable it.

Minor fixes:
- Attach "syscall events for location XXX" writer to location group of
  "location XXX". It was previously its own location group, but in that
  it case it just floats around somewhere, which seems less useful than
  having it directly by the executing process/cpu.

- If 'all' syscalls are selected, initialize the size of the local ->
  global syscall calling context mapping to __NR_syscalls, which is one
  more than the highest defined syscall number. Previously it was initialized
  to the highest syscall number in syscall_filter, but in the 'all'
  case, that is empty.

- Disable the syscall perf_event before finalizing the measurement. With
  how frequent syscalls are it is otherwise very easy to end up in
  endless loop kind of situation, where lo2s will never finish reading
  the buffer because there are always new events.

 - Always write syscall calling_context_leave's at end of trace.
   Some syscalls do not return (exit_group) and that just leaves the
   enter dangling in the thread, which appears in Vampir as if the
   syscall is ongoing until the end ot the trace.

- If we lost events and receive a syscall exit for a syscall we have not
  got the enter for, but we are already in another syscall, atleast write
  the exit for the previous syscall.
  • Loading branch information
cvonelm committed Mar 4, 2025
1 parent e6cb0ce commit 8295263
Show file tree
Hide file tree
Showing 8 changed files with 74 additions and 43 deletions.
13 changes: 8 additions & 5 deletions include/lo2s/perf/syscall/reader.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ class Reader : public EventReader<T>
uint64_t args[6];
};

Reader(Cpu cpu) : cpu_(cpu)
Reader(ExecutionScope scope) : scope_(scope)
{
tracepoint::TracepointEventAttr enter_event =
EventComposer::instance().create_tracepoint_event("raw_syscalls:sys_enter");
Expand All @@ -79,8 +79,8 @@ class Reader : public EventReader<T>
exit_event.set_sample_type(PERF_SAMPLE_IDENTIFIER);
try
{
enter_ev_ = enter_event.open(cpu_, config().cgroup_fd);
exit_ev_ = exit_event.open(cpu_, config().cgroup_fd);
enter_ev_ = enter_event.open(scope_, config().cgroup_fd);
exit_ev_ = exit_event.open(scope_, config().cgroup_fd);
}
catch (const std::system_error& e)
{
Expand All @@ -104,14 +104,17 @@ class Reader : public EventReader<T>
}

Reader(Reader&& other)
: EventReader<T>(std::forward<perf::EventReader<T>>(other)), cpu_(other.cpu_)
: EventReader<T>(std::forward<perf::EventReader<T>>(other)), scope_(other.scope_)
{
std::swap(enter_ev_, other.enter_ev_);
}

void stop()
{
enter_ev_.value().disable();
// This should not be necessary because exit is attached to enter, but it can not hurt.
exit_ev_.value().disable();

this->read();
}

Expand All @@ -121,7 +124,7 @@ class Reader : public EventReader<T>
uint64_t sys_exit_id;

private:
Cpu cpu_;
ExecutionScope scope_;
std::optional<EventGuard> enter_ev_;
std::optional<EventGuard> exit_ev_;
};
Expand Down
7 changes: 5 additions & 2 deletions include/lo2s/perf/syscall/writer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ namespace syscall
class Writer : public Reader<Writer>
{
public:
Writer(Cpu cpu, trace::Trace& trace);
Writer(ExecutionScope scope, trace::Trace& trace);

Writer(const Writer& other) = delete;

Expand All @@ -60,8 +60,11 @@ class Writer : public Reader<Writer>
trace::Trace& trace_;
const time::Converter& time_converter_;
otf2::writer::local& writer_;
int64_t last_syscall_nr_;

std::set<int64_t> used_syscalls_;

int64_t last_syscall_nr_;
otf2::chrono::time_point last_tp_;
};
} // namespace syscall
} // namespace perf
Expand Down
2 changes: 1 addition & 1 deletion include/lo2s/trace/trace.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ class Trace
otf2::writer::local& sample_writer(const ExecutionScope& scope);
otf2::writer::local& cuda_writer(const Thread& thread);
otf2::writer::local& metric_writer(const MeasurementScope& scope);
otf2::writer::local& syscall_writer(const Cpu& cpu);
otf2::writer::local& syscall_writer(const ExecutionScope& scope);
otf2::writer::local& bio_writer(BlockDevice dev);
otf2::writer::local& create_metric_writer(const std::string& name);
otf2::writer::local& nec_writer(NecDevice device, const Thread& nec_thread);
Expand Down
2 changes: 0 additions & 2 deletions man/lo2s.1.pod
Original file line number Diff line number Diff line change
Expand Up @@ -326,8 +326,6 @@ Can be given multiple times to record multiple syscalls at once.
Argument may either be a syscall name, like "read", or a syscall number.
Note that due to the high event-rate of many syscalls it is advised to keep the number of recorded syscalls limited.

This is only available in system-wide measurement mode

=back

=head2 B<x86_adapt> and B<x86_energy> options
Expand Down
29 changes: 12 additions & 17 deletions src/config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -630,6 +630,18 @@ void parse_program_options(int argc, const char** argv)
}
}

if (arguments.provided("syscall"))
{
std::vector<std::string> requested_syscalls = arguments.get_all("syscall");
config.use_syscalls = true;

if (std::find(requested_syscalls.begin(), requested_syscalls.end(), "all") ==
requested_syscalls.end())
{
config.syscall_filter = parse_syscall_names(requested_syscalls);
}
}

if (arguments.given("all-cpus") || arguments.given("all-cpus-sampling"))
{
config.monitor_type = lo2s::MonitorType::CPU_SET;
Expand All @@ -656,18 +668,6 @@ void parse_program_options(int argc, const char** argv)
std::exit(EXIT_FAILURE);
}
}

if (arguments.provided("syscall"))
{
std::vector<std::string> requested_syscalls = arguments.get_all("syscall");
config.use_syscalls = true;

if (std::find(requested_syscalls.begin(), requested_syscalls.end(), "all") ==
requested_syscalls.end())
{
config.syscall_filter = parse_syscall_names(requested_syscalls);
}
}
}
else
{
Expand All @@ -677,11 +677,6 @@ void parse_program_options(int argc, const char** argv)
std::exit(EXIT_FAILURE);
}

if (arguments.provided("syscall"))
{
Log::fatal() << "Syscall recording is only available in system-wide monitoring mode";
std::exit(EXIT_FAILURE);
}
config.monitor_type = lo2s::MonitorType::PROCESS;
config.sampling = true;

Expand Down
9 changes: 7 additions & 2 deletions src/monitor/scope_monitor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,9 +50,9 @@ ScopeMonitor::ScopeMonitor(ExecutionScope scope, MainMonitor& parent, bool enabl
add_fd(sample_writer_->fd());
}

if (scope.is_cpu() && config().use_syscalls)
if (config().use_syscalls)
{
syscall_writer_ = std::make_unique<perf::syscall::Writer>(scope.as_cpu(), parent.trace());
syscall_writer_ = std::make_unique<perf::syscall::Writer>(scope, parent.trace());
add_fd(syscall_writer_->fd());
}

Expand Down Expand Up @@ -95,6 +95,11 @@ void ScopeMonitor::finalize_thread()
{
sample_writer_->end();
}

if (syscall_writer_)
{
syscall_writer_->stop();
}
}

void ScopeMonitor::monitor(int fd)
Expand Down
25 changes: 20 additions & 5 deletions src/perf/syscall/writer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,9 @@ namespace perf
namespace syscall
{

Writer::Writer(Cpu cpu, trace::Trace& trace)
: Reader(cpu), trace_(trace), time_converter_(perf::time::Converter::instance()),
writer_(trace.syscall_writer(cpu)), last_syscall_nr_(-1)
Writer::Writer(ExecutionScope scope, trace::Trace& trace)
: Reader(scope), trace_(trace), time_converter_(perf::time::Converter::instance()),
writer_(trace.syscall_writer(scope)), last_syscall_nr_(-1)
{
}

Expand All @@ -26,6 +26,7 @@ bool Writer::handle(const Reader::RecordSampleType* sample)
{
writer_.write_calling_context_leave(tp, sample->syscall_nr);
}

last_syscall_nr_ = sample->syscall_nr;
writer_.write_calling_context_enter(tp, sample->syscall_nr, 2);
used_syscalls_.emplace(sample->syscall_nr);
Expand All @@ -36,15 +37,29 @@ bool Writer::handle(const Reader::RecordSampleType* sample)
{
writer_.write_calling_context_leave(tp, sample->syscall_nr);
}
else if (last_syscall_nr_ != -1)
{
writer_.write_calling_context_leave(tp, last_syscall_nr_);
}

last_syscall_nr_ = -1;
}
last_tp_ = tp;
return false;
}

Writer::~Writer()
{
const auto& mapping = trace_.merge_syscall_contexts(used_syscalls_);
writer_ << mapping;
if (last_syscall_nr_ != -1)
{
writer_.write_calling_context_leave(last_tp_, last_syscall_nr_);
}

if (!used_syscalls_.empty())
{
const auto& mapping = trace_.merge_syscall_contexts(used_syscalls_);
writer_ << mapping;
}
}
} // namespace syscall
} // namespace perf
Expand Down
30 changes: 21 additions & 9 deletions src/trace/trace.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,11 @@
#include <stdexcept>
#include <tuple>

extern "C"
{
#include <asm-generic/unistd.h>
}

namespace lo2s
{
namespace trace
Expand Down Expand Up @@ -458,17 +463,16 @@ otf2::writer::local& Trace::nec_writer(NecDevice device, const Thread& nec_threa
return archive_(intern_location);
}

otf2::writer::local& Trace::syscall_writer(const Cpu& cpu)
otf2::writer::local& Trace::syscall_writer(const ExecutionScope& scope)
{
MeasurementScope scope = MeasurementScope::syscall(cpu.as_scope());

const auto& syscall_location_group = registry_.emplace<otf2::definition::location_group>(
ByMeasurementScope(scope), intern(scope.name()), otf2::common::location_group_type::process,
registry_.get<otf2::definition::system_tree_node>(ByCpu(cpu)));
MeasurementScope meas_scope = MeasurementScope::syscall(scope);

const auto& intern_location = registry_.emplace<otf2::definition::location>(
ByMeasurementScope(scope), intern(scope.name()), syscall_location_group,
ByMeasurementScope(meas_scope), intern(meas_scope.name()),
registry_.get<otf2::definition::location_group>(
ByExecutionScope(groups_.get_parent(scope))),
otf2::definition::location::location_type::cpu_thread);

return archive_(intern_location);
}

Expand Down Expand Up @@ -804,8 +808,16 @@ Trace::merge_calling_contexts(const std::map<Thread, ThreadCctxRefs>& new_ips, s
otf2::definition::mapping_table
Trace::merge_syscall_contexts(const std::set<int64_t>& used_syscalls)
{
std::vector<uint32_t> mappings(
*std::max_element(config().syscall_filter.begin(), config().syscall_filter.end()) + 1);
std::vector<uint32_t> mappings;
if (!config().syscall_filter.empty())
{
mappings.resize(
*std::max_element(config().syscall_filter.begin(), config().syscall_filter.end()) + 1);
}
else
{
mappings.resize(__NR_syscalls);
}

for (const auto& syscall_nr : used_syscalls)
{
Expand Down

0 comments on commit 8295263

Please sign in to comment.