Skip to content

Commit

Permalink
Merge pull request #4 from nobleo/add/cpu_monitor
Browse files Browse the repository at this point in the history
Ported cpu_monitor to ROS2
  • Loading branch information
RichardvdK authored Jan 17, 2024
2 parents b169c12 + f22ab87 commit e4b13e0
Show file tree
Hide file tree
Showing 6 changed files with 268 additions and 13 deletions.
7 changes: 6 additions & 1 deletion diagnostic_common_diagnostics/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ find_package(ament_cmake_python REQUIRED)
ament_python_install_package(${PROJECT_NAME})

install(PROGRAMS
${PROJECT_NAME}/cpu_monitor.py
${PROJECT_NAME}/ntp_monitor.py
DESTINATION lib/${PROJECT_NAME}
)
Expand All @@ -21,6 +22,10 @@ if(BUILD_TESTING)
test_ntp_monitor
test/systemtest/test_ntp_monitor.py
TIMEOUT 10)
ament_add_pytest_test(
test_cpu_monitor
test/systemtest/test_cpu_monitor.py
TIMEOUT 10)
endif()

ament_package()
ament_package()
18 changes: 12 additions & 6 deletions diagnostic_common_diagnostics/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,19 +8,28 @@ Currently only the NTP monitor is ported to ROS2.
# Nodes

## ntp_monitor.py
Runs 'ntpdate' to check if the system clock is synchronized with the NTP server.
Runs 'ntpdate' to check if the system clock is synchronized with the NTP server.
* If the offset is smaller than `offset-tolerance`, an `OK` status will be published.
* If the offset is larger than the configured `offset-tolerance`, a `WARN` status will be published,
* if it is bigger than `error-offset-tolerance`, an `ERROR` status will be published.
* If there was an error running `ntpdate`, an `ERROR` status will be published.

## cpu_monitor.py
The `cpu_monitor` module allows users to monitor the CPU usage of their system in real-time.
It publishes the usage percentage in a diagnostic message.

* Name of the node is "cpu_monitor_" + hostname.
* Uses the following args:
* warning_percentage: If the CPU usage is > warning_percentage, a WARN status will be publised.
* window: the maximum length of the used collections.deque for queuing CPU readings.

### Published Topics
#### /diagnostics
diagnostic_msgs/DiagnosticArray
The diagnostics information.

### Parameters
#### ntp_hostname
#### ntp_hostname
(default: "pool.ntp.org")
Hostname of NTP server.

Expand All @@ -46,14 +55,11 @@ Disable self test.
## hd_monitor.py
**To be ported**

## cpu_monitor.py
**To be ported**

## ram_monitor.py
**To be ported**

## sensors_monitor.py
**To be ported**

## tf_monitor.py
**To be ported**
**To be ported**
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
#
# Software License Agreement (BSD License)
#
# Copyright (c) 2017, TNO IVS, Helmond, Netherlands
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above
# copyright notice, this list of conditions and the following
# disclaimer in the documentation and/or other materials provided
# with the distribution.
# * Neither the name of the TNO IVS nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
# COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.

# \author Rein Appeldoorn

import collections
import socket
import traceback

from diagnostic_msgs.msg import DiagnosticStatus

from diagnostic_updater import DiagnosticTask, Updater

import psutil

import rclpy
from rclpy.node import Node


class CpuTask(DiagnosticTask):

def __init__(self, warning_percentage=90, window=1):
DiagnosticTask.__init__(self, 'CPU Information')

self._warning_percentage = int(warning_percentage)
self._readings = collections.deque(maxlen=window)

def _get_average_reading(self):
def avg(lst):
return float(sum(lst)) / len(lst) if lst else float('nan')

return [avg(cpu_percentages)
for cpu_percentages in zip(*self._readings)]

def run(self, stat):
self._readings.append(psutil.cpu_percent(percpu=True))
cpu_percentages = self._get_average_reading()
cpu_average = sum(cpu_percentages) / len(cpu_percentages)

stat.add('CPU Load Average', '{:.2f}'.format(cpu_average))

warn = False
for idx, cpu_percentage in enumerate(cpu_percentages):
stat.add('CPU {} Load'.format(idx), '{:.2f}'.format(cpu_percentage))
if cpu_percentage > self._warning_percentage:
warn = True

if warn:
stat.summary(DiagnosticStatus.WARN,
'At least one CPU exceeds {} percent'.format(self._warning_percentage))
else:
stat.summary(DiagnosticStatus.OK,
'CPU Average {:.2f} percent'.format(cpu_average))

return stat


def main(args=None):
rclpy.init(args=args)

# Create the node
hostname = socket.gethostname()
node = Node('cpu_monitor_%s' % hostname.replace('-', '_'))

# Declare and get parameters
node.declare_parameter('warning_percentage', 90)
node.declare_parameter('window', 1)

warning_percentage = node.get_parameter(
'warning_percentage').get_parameter_value().integer_value
window = node.get_parameter('window').get_parameter_value().integer_value

# Create diagnostic updater with default updater rate of 1 hz
updater = Updater(node)
updater.setHardwareID(hostname)
updater.add(CpuTask(warning_percentage=warning_percentage, window=window))

rclpy.spin(node)


if __name__ == '__main__':
try:
main()
except KeyboardInterrupt:
pass
except Exception:
traceback.print_exc()
1 change: 1 addition & 0 deletions diagnostic_common_diagnostics/mainpage.dox
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
\b diagnostic_common_diagnostics contains a few common diagnostic nodes

- ntp_monitor publishes diagnostic messages for how well the NTP time sync is working.
- cpu_monitor publishes diagnostic messages with the CPU usage of the system.
- tf_monitor used to publish diagnostic messages reporting on the health of
the TF tree. It is based on tfwtf. It is not ported to ROS2.

Expand Down
13 changes: 7 additions & 6 deletions diagnostic_common_diagnostics/package.xml
Original file line number Diff line number Diff line change
Expand Up @@ -18,21 +18,22 @@
<buildtool_depend>ament_cmake</buildtool_depend>
<buildtool_depend>ament_cmake_python</buildtool_depend>

<exec_depend>rclpy</exec_depend>
<exec_depend>diagnostic_updater</exec_depend>
<exec_depend>python3-ntplib</exec_depend>
<exec_depend>python3-psutil</exec_depend>
<exec_depend>rclpy</exec_depend>

<test_depend>ament_cmake_lint_cmake</test_depend>
<test_depend>ament_cmake_pytest</test_depend>
<test_depend>ament_cmake_xmllint</test_depend>
<test_depend>ament_lint_auto</test_depend>
<!-- Usage of ament_lint_common is locked by https://github.com/ament/ament_lint/issues/423
For now, enable the linters that do support exlusions.
Once all files are migrated to ROS2, all linters can be
Once all files are migrated to ROS2, all linters can be
enabled vi ament_lint_common as the files are cleaned up. -->
<!-- <test_depend>ament_cmake_flake8</test_depend> -->
<!-- <test_depend>ament_cmake_pep257</test_depend> -->
<test_depend>ament_cmake_xmllint</test_depend>
<test_depend>ament_cmake_lint_cmake</test_depend>

<test_depend>ament_cmake_pytest</test_depend>


<export>
<build_type>ament_cmake</build_type>
Expand Down
123 changes: 123 additions & 0 deletions diagnostic_common_diagnostics/test/systemtest/test_cpu_monitor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
# -*- coding: utf-8 -*-
# Software License Agreement (BSD License)
#
# Copyright (c) 2023, Robert Bosch GmbH
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above
# copyright notice, this list of conditions and the following
# disclaimer in the documentation and/or other materials provided
# with the distribution.
# * Neither the name of the Willow Garage nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
# COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.

import time
import unittest

from diagnostic_common_diagnostics.cpu_monitor import CpuTask

from diagnostic_msgs.msg import DiagnosticStatus

from diagnostic_updater import DiagnosticArray, Updater
from diagnostic_updater import DiagnosticStatusWrapper

import rclpy
from rclpy.node import Node


class TestCPUMonitor(unittest.TestCase):

@classmethod
def setUpClass(cls):
rclpy.init(args=None)

@classmethod
def tearDownClass(cls):
if rclpy.ok():
rclpy.shutdown()

def diagnostics_callback(self, msg):
self.message_recieved = True
self.assertEqual(len(msg.status), 1)

def test_ok(self):
# In this case is recommended for accuracy that psutil.cpu_percent()
# function be called with at least 0.1 seconds between calls.
time.sleep(0.1)

warning_percentage = 100
task = CpuTask(warning_percentage)
stat = DiagnosticStatusWrapper()
task.run(stat)
self.assertEqual(task.name, 'CPU Information')
self.assertEqual(stat.level, DiagnosticStatus.OK)
self.assertIn(str('CPU Average'), stat.message)

# Check for at least 1 CPU Load Average and 1 CPU Load
self.assertGreaterEqual(len(stat.values), 2)

def test_warn(self):
# In this case is recommended for accuracy that psutil.cpu_percent()
# function be called with at least 0.1 seconds between calls.
time.sleep(0.1)

warning_percentage = -1
task = CpuTask(warning_percentage)
stat = DiagnosticStatusWrapper()
task.run(stat)
print(f'Raw readings: {task._readings}')
self.assertEqual(task.name, 'CPU Information')
self.assertEqual(stat.level, DiagnosticStatus.WARN)
self.assertIn(str('At least one CPU exceeds'), stat.message)

# Check for at least 1 CPU Load Average and 1 CPU Load
self.assertGreaterEqual(len(stat.values), 2)

def test_updater(self):
# In this case is recommended for accuracy that psutil.cpu_percent()
# function be called with at least 0.1 seconds between calls.
time.sleep(0.1)

self.message_recieved = False

node = Node('cpu_monitor_test')
updater = Updater(node)
updater.setHardwareID('test_id')
updater.add(CpuTask())

node.create_subscription(
DiagnosticArray, '/diagnostics', self.diagnostics_callback, 10)

start_time = time.time()
timeout = 5.0 # Timeout in seconds

while not self.message_recieved:
rclpy.spin_once(node)
time.sleep(0.1)
elapsed_time = time.time() - start_time
if elapsed_time >= timeout:
self.fail('No diagnostics received')


if __name__ == '__main__':
unittest.main()

0 comments on commit e4b13e0

Please sign in to comment.