Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(system_monitor): add diagnostic feature for monitoring swap usage #1661

Open
wants to merge 5 commits into
base: beta/v0.3.20.1
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions system/system_monitor/config/mem_monitor.param.yaml
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
/**:
ros__parameters:
available_size: 1024 # MB
usage_timeout: 5 # sec
ecc_timeout: 5 # sec
swap_usage_warn: 0.25 # %
swap_usage_error: 0.75 # %
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,14 @@

#include <diagnostic_updater/diagnostic_updater.hpp>

#include <boost/process.hpp>

#include <climits>
#include <map>
#include <string>
#include <unordered_map>

namespace bp = boost::process;

class MemMonitor : public rclcpp::Node
{
Expand Down Expand Up @@ -52,6 +57,41 @@ class MemMonitor : public rclcpp::Node
void checkUsage(
diagnostic_updater::DiagnosticStatusWrapper & stat); // NOLINT(runtime/references)

/**
* @brief check Swap usage
* @param @param [out] stat diagnostic message passed directly to diagnostic publish calls
* @note NOLINT syntax is needed since diagnostic_updater asks for a non-const reference
* to pass diagnostic message updated in this function to diagnostic publish calls.
*/
void checkSwapUsage(
diagnostic_updater::DiagnosticStatusWrapper & stat); // NOLINT(runtime/references)

/**
* @brief check Memory ECC
* @param [out] stat diagnostic message passed directly to diagnostic publish calls
*/
void checkEcc(diagnostic_updater::DiagnosticStatusWrapper & stat);

/**
* @brief read /proc/meminfo
*/
void readMemInfo(std::unordered_map<std::string, size_t> & memInfo);

/**
* @brief call readMemInfo and calculate memory usage
*/
std::string readUsage(std::map<std::string, size_t> & map);

/**
* @brief execute edac-util command
*/
std::string executeEdacUtil(std::string & output, std::string & pipe2_error_str);

/**
* @brief Timer callback to execute read infomation about usages and ecc
*/
void onTimer();

/**
* @brief get human-readable output for memory size
* @param [in] str size with bytes
Expand All @@ -63,7 +103,30 @@ class MemMonitor : public rclcpp::Node

char hostname_[HOST_NAME_MAX + 1]; //!< @brief host name

size_t available_size_; //!< @brief Memory available size to generate error
size_t available_size_; //!< @brief Memory available size to generate error
int usage_timeout_; //!< @brief Timeout duration for executing readUsage
int ecc_timeout_; //!< @brief Timeout duration for executing edac-util command
float swap_usage_warn_; //!< @brief Swap usage(%) to generate warning
float swap_usage_error_; //!< @brief Swap usage(%) to generate error

rclcpp::TimerBase::SharedPtr
timer_; //!< @brief Timer to execute readUsage and edac-utils command
rclcpp::CallbackGroup::SharedPtr timer_callback_group_; //!< @brief Callback Group

std::mutex usage_mutex_; //!< @brief Mutex for output from /proc/meminfo
std::string usage_error_str_; //!< @brief Error string
std::map<std::string, size_t> usage_map_; //!< @brief Output of /proc/meminfo
double usage_elapsed_ms_; //!< @brief Execution time of readUsage
std::mutex usage_timeout_mutex_; //!< @brief Mutex regarding timeout for executing readUsage

std::mutex ecc_mutex_; //!< @brief Mutex for output from edac-util command
std::string ecc_error_str_; //!< @brief Error string
std::string ecc_pipe2_error_str_; //!< @brief Error string regarding pipe2 function call
std::string ecc_output_; //!< @brief Output of edac-util command
double ecc_elapsed_ms_; //!< @brief Execution time of edac-util command
std::mutex
ecc_timeout_mutex_; //!< @brief Mutex regarding timeout for executing edac-util command
bool use_edac_util_; //!< @brief Available to use edac-util command or not

/**
* @brief Memory usage status messages
Expand Down
Loading
Loading