Skip to content

Commit

Permalink
feat(system_monitor): add a hostname for diagnostic information (auto…
Browse files Browse the repository at this point in the history
…warefoundation#1636)

* add a hostname for diagnostic information

Signed-off-by: Zhi Wang <[email protected]>

* style(pre-commit): autofix

---------

Signed-off-by: Zhi Wang <[email protected]>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
  • Loading branch information
zhiwango and pre-commit-ci[bot] authored Dec 2, 2024
1 parent 3bd0bd7 commit d7cc786
Show file tree
Hide file tree
Showing 8 changed files with 46 additions and 32 deletions.
11 changes: 6 additions & 5 deletions system/system_monitor/src/cpu_monitor/cpu_monitor_base.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -61,11 +61,12 @@ CPUMonitorBase::CPUMonitorBase(const std::string & node_name, const rclcpp::Node
mpstat_exists_ = (p.empty()) ? false : true;

updater_.setHardwareID(hostname_);
updater_.add("CPU Temperature", this, &CPUMonitorBase::checkTemp);
updater_.add("CPU Usage", this, &CPUMonitorBase::checkUsage);
updater_.add("CPU Load Average", this, &CPUMonitorBase::checkLoad);
updater_.add("CPU Thermal Throttling", this, &CPUMonitorBase::checkThrottling);
updater_.add("CPU Frequency", this, &CPUMonitorBase::checkFrequency);
updater_.add(std::string(hostname_) + ": CPU Temperature", this, &CPUMonitorBase::checkTemp);
updater_.add(std::string(hostname_) + ": CPU Usage", this, &CPUMonitorBase::checkUsage);
updater_.add(std::string(hostname_) + ": CPU Load Average", this, &CPUMonitorBase::checkLoad);
updater_.add(
std::string(hostname_) + ": CPU Thermal Throttling", this, &CPUMonitorBase::checkThrottling);
updater_.add(std::string(hostname_) + ": CPU Frequency", this, &CPUMonitorBase::checkFrequency);

// Publisher
rclcpp::QoS durable_qos{1};
Expand Down
12 changes: 7 additions & 5 deletions system/system_monitor/src/gpu_monitor/gpu_monitor_base.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,11 +37,13 @@ GPUMonitorBase::GPUMonitorBase(const std::string & node_name, const rclcpp::Node
gethostname(hostname_, sizeof(hostname_));

updater_.setHardwareID(hostname_);
updater_.add("GPU Temperature", this, &GPUMonitorBase::checkTemp);
updater_.add("GPU Usage", this, &GPUMonitorBase::checkUsage);
updater_.add("GPU Memory Usage", this, &GPUMonitorBase::checkMemoryUsage);
updater_.add("GPU Thermal Throttling", this, &GPUMonitorBase::checkThrottling);
updater_.add("GPU Frequency", this, &GPUMonitorBase::checkFrequency);
updater_.add(std::string(hostname_) + ": GPU Temperature", this, &GPUMonitorBase::checkTemp);
updater_.add(std::string(hostname_) + ": GPU Usage", this, &GPUMonitorBase::checkUsage);
updater_.add(
std::string(hostname_) + ": GPU Memory Usage", this, &GPUMonitorBase::checkMemoryUsage);
updater_.add(
std::string(hostname_) + ": GPU Thermal Throttling", this, &GPUMonitorBase::checkThrottling);
updater_.add(std::string(hostname_) + ": GPU Frequency", this, &GPUMonitorBase::checkFrequency);
}

void GPUMonitorBase::update()
Expand Down
26 changes: 16 additions & 10 deletions system/system_monitor/src/hdd_monitor/hdd_monitor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,16 +51,22 @@ HddMonitor::HddMonitor(const rclcpp::NodeOptions & options)
getHddParams();

updater_.setHardwareID(hostname_);
updater_.add("HDD Temperature", this, &HddMonitor::checkSmartTemperature);
updater_.add("HDD PowerOnHours", this, &HddMonitor::checkSmartPowerOnHours);
updater_.add("HDD TotalDataWritten", this, &HddMonitor::checkSmartTotalDataWritten);
updater_.add("HDD RecoveredError", this, &HddMonitor::checkSmartRecoveredError);
updater_.add("HDD Usage", this, &HddMonitor::checkUsage);
updater_.add("HDD ReadDataRate", this, &HddMonitor::checkReadDataRate);
updater_.add("HDD WriteDataRate", this, &HddMonitor::checkWriteDataRate);
updater_.add("HDD ReadIOPS", this, &HddMonitor::checkReadIops);
updater_.add("HDD WriteIOPS", this, &HddMonitor::checkWriteIops);
updater_.add("HDD Connection", this, &HddMonitor::checkConnection);
updater_.add(
std::string(hostname_) + ": HDD Temperature", this, &HddMonitor::checkSmartTemperature);
updater_.add(
std::string(hostname_) + ": HDD PowerOnHours", this, &HddMonitor::checkSmartPowerOnHours);
updater_.add(
std::string(hostname_) + ": HDD TotalDataWritten", this,
&HddMonitor::checkSmartTotalDataWritten);
updater_.add(
std::string(hostname_) + ": HDD RecoveredError", this, &HddMonitor::checkSmartRecoveredError);
updater_.add(std::string(hostname_) + ": HDD Usage", this, &HddMonitor::checkUsage);
updater_.add(std::string(hostname_) + ": HDD ReadDataRate", this, &HddMonitor::checkReadDataRate);
updater_.add(
std::string(hostname_) + ": HDD WriteDataRate", this, &HddMonitor::checkWriteDataRate);
updater_.add(std::string(hostname_) + ": HDD ReadIOPS", this, &HddMonitor::checkReadIops);
updater_.add(std::string(hostname_) + ": HDD WriteIOPS", this, &HddMonitor::checkWriteIops);
updater_.add(std::string(hostname_) + ": HDD Connection", this, &HddMonitor::checkConnection);

// get HDD connection status
updateHddConnections();
Expand Down
4 changes: 2 additions & 2 deletions system/system_monitor/src/mem_monitor/mem_monitor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,11 +37,11 @@ MemMonitor::MemMonitor(const rclcpp::NodeOptions & options)
{
gethostname(hostname_, sizeof(hostname_));
updater_.setHardwareID(hostname_);
updater_.add("Memory Usage", this, &MemMonitor::checkUsage);
updater_.add(std::string(hostname_) + ": Memory Usage", this, &MemMonitor::checkUsage);

// Enable ECC error detection if edac-utils package is installed
if (!bp::search_path("edac-util").empty()) {
updater_.add("Memory ECC", this, &MemMonitor::checkEcc);
updater_.add(std::string(hostname_) + ": Memory ECC", this, &MemMonitor::checkEcc);
}
}

Expand Down
13 changes: 8 additions & 5 deletions system/system_monitor/src/net_monitor/net_monitor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,11 +57,14 @@ NetMonitor::NetMonitor(const rclcpp::NodeOptions & options)

gethostname(hostname_, sizeof(hostname_));
updater_.setHardwareID(hostname_);
updater_.add("Network Connection", this, &NetMonitor::check_connection);
updater_.add("Network Usage", this, &NetMonitor::check_usage);
updater_.add("Network Traffic", this, &NetMonitor::monitor_traffic);
updater_.add("Network CRC Error", this, &NetMonitor::check_crc_error);
updater_.add("IP Packet Reassembles Failed", this, &NetMonitor::check_reassembles_failed);
updater_.add(
std::string(hostname_) + ": Network Connection", this, &NetMonitor::check_connection);
updater_.add(std::string(hostname_) + ": Network Usage", this, &NetMonitor::check_usage);
updater_.add(std::string(hostname_) + ": Network Traffic", this, &NetMonitor::monitor_traffic);
updater_.add(std::string(hostname_) + ": Network CRC Error", this, &NetMonitor::check_crc_error);
updater_.add(
std::string(hostname_) + ": IP Packet Reassembles Failed", this,
&NetMonitor::check_reassembles_failed);

nl80211_.init();

Expand Down
2 changes: 1 addition & 1 deletion system/system_monitor/src/ntp_monitor/ntp_monitor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ NTPMonitor::NTPMonitor(const rclcpp::NodeOptions & options)
chronyc_exists_ = (p.empty()) ? false : true;

updater_.setHardwareID(hostname_);
updater_.add("NTP Offset", this, &NTPMonitor::checkOffset);
updater_.add(std::string(hostname_) + ": NTP Offset", this, &NTPMonitor::checkOffset);

// Start timer to execute top command
timer_callback_group_ = this->create_callback_group(rclcpp::CallbackGroupType::MutuallyExclusive);
Expand Down
8 changes: 5 additions & 3 deletions system/system_monitor/src/process_monitor/process_monitor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,15 +44,17 @@ ProcessMonitor::ProcessMonitor(const rclcpp::NodeOptions & options)
gethostname(hostname_, sizeof(hostname_));

updater_.setHardwareID(hostname_);
updater_.add("Tasks Summary", this, &ProcessMonitor::monitorProcesses);
updater_.add(std::string(hostname_) + ": Tasks Summary", this, &ProcessMonitor::monitorProcesses);

for (index = 0; index < num_of_procs_; ++index) {
auto task = std::make_shared<DiagTask>(fmt::format("High-load Proc[{}]", index));
auto task = std::make_shared<DiagTask>(
fmt::format(std::string(hostname_) + ": High-load Proc[{}]", index));
load_tasks_.push_back(task);
updater_.add(*task);
}
for (index = 0; index < num_of_procs_; ++index) {
auto task = std::make_shared<DiagTask>(fmt::format("High-mem Proc[{}]", index));
auto task = std::make_shared<DiagTask>(
fmt::format(std::string(hostname_) + ": High-mem Proc[{}]", index));
memory_tasks_.push_back(task);
updater_.add(*task);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ VoltageMonitor::VoltageMonitor(const rclcpp::NodeOptions & options)
}
callback = &VoltageMonitor::checkVoltage;
}
updater_.add("CMOS Battery Status", this, callback);
updater_.add(std::string(hostname_) + ": CMOS Battery Status", this, callback);
}

void VoltageMonitor::checkVoltage(diagnostic_updater::DiagnosticStatusWrapper & stat)
Expand Down

0 comments on commit d7cc786

Please sign in to comment.