Skip to content

Commit

Permalink
refactor(components): improve component error handling (#138)
Browse files Browse the repository at this point in the history
  • Loading branch information
domire8 authored Oct 8, 2024
1 parent 8edddf3 commit 1b8d582
Show file tree
Hide file tree
Showing 15 changed files with 153 additions and 103 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ Release Versions:
- feat(utils): add binary reader and recorder for encoded states (#152)
- feat!: add support for custom inputs and outputs (#133)
- release: version v5.0.0-rc0009 (#155)
- refactor(components): improve component error handling (#138)

## 4.2.2

Expand Down
2 changes: 1 addition & 1 deletion aica-package.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#syntax=ghcr.io/aica-technology/package-builder:v1.1.1

[metadata]
version = "5.0.0-rc0009"
version = "5.0.0-rc0010"
description = "Modular ROS 2 extension library for dynamic composition of components and controllers with the AICA robotics framework"

[metadata.collection]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,5 @@
"parameter_type": "double",
"default_value": "10.0"
}
],
"predicates": [
{
"display_name": "In error state",
"description": "True if the component is in error state",
"predicate_name": "in_error_state"
}
]
}
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,11 @@ class Component : public rclcpp::Node, public ComponentInterface {
const std::string& signal_name, const std::shared_ptr<DataT>& data, const std::string& default_topic = "",
bool fixed_topic = false, bool publish_on_step = true);

/**
* Set the in_error_state predicate to true and cancel the step timer.
*/
void raise_error() override;

private:
/**
* @brief Step function that is called periodically and publishes predicates, outputs, and evaluates daemon callbacks.
Expand All @@ -82,6 +87,7 @@ class Component : public rclcpp::Node, public ComponentInterface {
// TODO hide ROS methods
using ComponentInterface::create_output;
using ComponentInterface::evaluate_periodic_callbacks;
using ComponentInterface::finalize_interfaces;
using ComponentInterface::get_parameter;
using ComponentInterface::inputs_;
using ComponentInterface::outputs_;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -418,7 +418,7 @@ class ComponentInterface {
void set_qos(const rclcpp::QoS& qos);

/**
* @brief Put the component in error state by setting the 'in_error_state' predicate to true.
* @brief Notify an error in the component.
*/
virtual void raise_error();

Expand All @@ -437,6 +437,11 @@ class ComponentInterface {
*/
void evaluate_periodic_callbacks();

/**
* @brief Finalize all interfaces.
*/
void finalize_interfaces();

std::map<std::string, std::shared_ptr<modulo_core::communication::SubscriptionInterface>> inputs_;///< Map of inputs
std::map<std::string, std::shared_ptr<modulo_core::communication::PublisherInterface>> outputs_; ///< Map of outputs
std::map<std::string, bool> periodic_outputs_;///< Map of outputs with periodic publishing flag
Expand Down Expand Up @@ -539,9 +544,9 @@ class ComponentInterface {
const std::string& frame, const std::string& reference_frame, const tf2::TimePoint& time_point,
const tf2::Duration& duration);

double rate_; ///< The component rate in Hz
double period_; ///< The componet period in s
std::mutex step_mutex_;///< Mutex for step callback
double rate_; ///< The component rate in Hz
double period_; ///< The componet period in s
std::mutex step_mutex_; ///< Mutex for step callback

std::map<std::string, modulo_core::Predicate> predicates_;///< Map of predicates
std::shared_ptr<rclcpp::Publisher<modulo_interfaces::msg::PredicateCollection>>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,11 @@ class LifecycleComponent : public rclcpp_lifecycle::LifecycleNode, public Compon
*/
rclcpp_lifecycle::State get_lifecycle_state() const;

/**
* @brief Trigger the shutdown and error transitions.
*/
void raise_error() override;

private:
/**
* @brief Transition callback for state 'Configuring'.
Expand Down Expand Up @@ -258,14 +263,12 @@ class LifecycleComponent : public rclcpp_lifecycle::LifecycleNode, public Compon
*/
bool deactivate_outputs();

/**
* @brief Cleanup all inputs and outputs.
*/
bool clear_signals();
bool has_error_;

// TODO hide ROS methods
using ComponentInterface::create_output;
using ComponentInterface::evaluate_periodic_callbacks;
using ComponentInterface::finalize_interfaces;
using ComponentInterface::get_parameter;
using ComponentInterface::inputs_;
using ComponentInterface::outputs_;
Expand Down Expand Up @@ -314,7 +317,4 @@ inline void LifecycleComponent::add_output(
RCLCPP_ERROR_STREAM(this->get_logger(), "Failed to add output '" << signal_name << "': " << ex.what());
}
}

// TODO, if we raise error we need to manually call the lifecycle change state callback,
// call callback function that this service calls
}// namespace modulo_components
10 changes: 10 additions & 0 deletions source/modulo_components/modulo_components/component.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ def __init__(self, node_name: str, *args, **kwargs):
self.__started = False
self.__execute_thread = None
self.add_predicate("is_finished", False)
self.add_predicate("in_error_state", False)

def _step(self):
"""
Expand All @@ -35,6 +36,7 @@ def _step(self):
self._publish_predicates()
except Exception as e:
self.get_logger().error(f"Failed to execute step function: {e}", throttle_duration_sec=1.0)
self.raise_error()

def execute(self):
"""
Expand Down Expand Up @@ -94,3 +96,11 @@ def add_output(self, signal_name: str, data: str, message_type: MsgT,
self._outputs[parsed_signal_name]["publisher"] = publisher
except Exception as e:
self.get_logger().error(f"Failed to add output '{signal_name}': {e}")

def raise_error(self):
"""
Set the in_error_state predicate to true and cancel the step timer.
"""
super().raise_error()
self.set_predicate("in_error_state", True)
self._finalize_interfaces()
23 changes: 19 additions & 4 deletions source/modulo_components/modulo_components/component_interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,11 +72,10 @@ def __init__(self, node_name: str, *args, **kwargs):
self.__predicate_message = PredicateCollection()
self.__predicate_message.node = self.get_fully_qualified_name()
self.__predicate_message.type = PredicateCollection.COMPONENT
self.add_predicate("in_error_state", False)

self._rate = self.get_parameter_value("rate")
self._period = 1.0 / self._rate
self.create_timer(self._period, self.__step_with_mutex)
self.__step_timer = self.create_timer(self._period, self.__step_with_mutex)

def __del__(self):
self.__step_lock.acquire()
Expand Down Expand Up @@ -920,6 +919,22 @@ def __publish_transforms(self, transforms: Iterable[sr.CartesianPose], static=Fa

def raise_error(self):
"""
Put the component in error state by setting the 'in_error_state' predicate to true.
Notify an error in the component.
"""
self.set_predicate("in_error_state", True)
self.get_logger().error("An error was raised in the component.")

def _finalize_interfaces(self):
"""
Finalize all interfaces.
"""
self._inputs = {}
self._outputs = {}
self._services_dict = {}
self.__tf_buffer = None
self.__tf_listener = None
self.__tf_broadcaster = None
self.__static_tf_broadcaster = None
self._predicate_publisher = None
if self.__step_timer:
self.__step_timer.cancel()
self.__step_timer = None
52 changes: 31 additions & 21 deletions source/modulo_components/modulo_components/lifecycle_component.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ def __init__(self, node_name: str, *args, **kwargs):
lifecycle_node_kwargs = {key: value for key, value in kwargs.items() if key in LIFECYCLE_NODE_MIXIN_KWARGS}
ComponentInterface.__init__(self, node_name, *args, **kwargs)
LifecycleNodeMixin.__init__(self, *args, **lifecycle_node_kwargs)
self._has_error = False

def get_lifecycle_state(self) -> LifecycleState:
"""
Expand Down Expand Up @@ -226,21 +227,23 @@ def on_shutdown(self, previous_state: LifecycleState) -> TransitionCallbackRetur
TRANSITION_CALLBACK_FAILURE, TRANSITION_CALLBACK_ERROR or any uncaught exceptions to 'ErrorProcessing'
"""
self.get_logger().debug(f"on_shutdown called from previous state {previous_state.label}.")
if previous_state.state_id == State.PRIMARY_STATE_FINALIZED:
return TransitionCallbackReturn.SUCCESS
if previous_state.state_id == State.PRIMARY_STATE_ACTIVE:
if not self.__handle_deactivate():
self.get_logger().debug("Shutdown failed during intermediate deactivation!")
if previous_state.state_id == State.PRIMARY_STATE_INACTIVE:
if not self.__handle_cleanup():
self.get_logger().debug("Shutdown failed during intermediate cleanup!")
if previous_state.state_id == State.PRIMARY_STATE_UNCONFIGURED:
if not self.__handle_shutdown():
self.get_logger().error("Entering into the error processing transition state.")
return TransitionCallbackReturn.ERROR
# TODO reset and finalize all properties
return TransitionCallbackReturn.SUCCESS
self.get_logger().warn(f"Invalid transition 'shutdown' from state {previous_state.label}.")
if not self._has_error:
if previous_state.state_id == State.PRIMARY_STATE_FINALIZED:
return TransitionCallbackReturn.SUCCESS
if previous_state.state_id == State.PRIMARY_STATE_ACTIVE:
if not self.__handle_deactivate():
self.get_logger().debug("Shutdown failed during intermediate deactivation!")
if previous_state.state_id == State.PRIMARY_STATE_INACTIVE:
if not self.__handle_cleanup():
self.get_logger().debug("Shutdown failed during intermediate cleanup!")
if previous_state.state_id == State.PRIMARY_STATE_UNCONFIGURED:
if not self.__handle_shutdown():
self.get_logger().error("Entering into the error processing transition state.")
return TransitionCallbackReturn.ERROR
self._finalize_interfaces()
return TransitionCallbackReturn.SUCCESS
self.get_logger().warn(f"Invalid transition 'shutdown' from state {previous_state.label}.")
self.get_logger().error("Entering into the error processing transition state.")
return TransitionCallbackReturn.ERROR

def __handle_shutdown(self) -> bool:
Expand Down Expand Up @@ -277,7 +280,6 @@ def on_error(self, previous_state: LifecycleState) -> TransitionCallbackReturn:
TRANSITION_CALLBACK_ERROR should not be returned, and any exceptions should be caught and returned as a failure
"""
self.get_logger().debug(f"on_error called from previous state {previous_state.label}.")
self.set_predicate("in_error_state", True)
error_handled = False
try:
error_handled = self.__handle_error()
Expand All @@ -286,9 +288,9 @@ def on_error(self, previous_state: LifecycleState) -> TransitionCallbackReturn:
error_handled = False
if not error_handled:
self.get_logger().error("Error processing failed! Entering into the finalized state.")
# TODO reset and finalize all needed properties
self._finalize_interfaces()
return TransitionCallbackReturn.ERROR
self.set_predicate("in_error_state", False)
self._has_error = False
return TransitionCallbackReturn.SUCCESS

def __handle_error(self) -> bool:
Expand All @@ -306,7 +308,7 @@ def on_error_callback(self) -> bool:
:return: True if error handling is successful, false otherwise
"""
return True
return False

def _step(self):
"""
Expand All @@ -320,8 +322,8 @@ def _step(self):
self._publish_outputs()
self._publish_predicates()
except Exception as e:
self.get_logger().error(f"Failed to execute step function: {e}", throttle_duration_sec=1.0)
# TODO handle error in lifecycle component
self.get_logger().error(f"Failed to execute step function: {e}")
self.raise_error()

def __configure_outputs(self) -> bool:
"""
Expand Down Expand Up @@ -391,3 +393,11 @@ def __deactivate_outputs(self):
self.get_logger().error(f"Failed to deactivate output '{signal_name}': {e}")
self.get_logger().debug("All outputs deactivated.")
return success

def raise_error(self):
"""
Trigger the shutdown and error transitions.
"""
super().raise_error()
self._has_error = True
self.trigger_shutdown()
7 changes: 7 additions & 0 deletions source/modulo_components/src/Component.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ Component::Component(const NodeOptions& node_options, const std::string& fallbac
Node::get_node_type_descriptions_interface(), Node::get_node_waitables_interface())),
started_(false) {
this->add_predicate("is_finished", false);
this->add_predicate("in_error_state", false);
}

void Component::step() {
Expand Down Expand Up @@ -60,4 +61,10 @@ bool Component::on_execute_callback() {
std::shared_ptr<state_representation::ParameterInterface> Component::get_parameter(const std::string& name) const {
return ComponentInterface::get_parameter(name);
}

void Component::raise_error() {
ComponentInterface::raise_error();
this->set_predicate("in_error_state", true);
this->finalize_interfaces();
}
}// namespace modulo_components
22 changes: 18 additions & 4 deletions source/modulo_components/src/ComponentInterface.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,6 @@ ComponentInterface::ComponentInterface(
this->predicate_message_.node = this->node_base_->get_fully_qualified_name();
this->predicate_message_.type = modulo_interfaces::msg::PredicateCollection::COMPONENT;

this->add_predicate("in_error_state", false);

this->rate_ = this->get_parameter_value<double>("rate");
this->period_ = 1.0 / this->rate_;
this->step_timer_ = rclcpp::create_wall_timer(
Expand Down Expand Up @@ -557,8 +555,7 @@ void ComponentInterface::set_qos(const rclcpp::QoS& qos) {
}

void ComponentInterface::raise_error() {
RCLCPP_DEBUG(this->node_logging_->get_logger(), "raise_error called: Setting predicate 'in_error_state' to true.");
this->set_predicate("in_error_state", true);
RCLCPP_ERROR(this->node_logging_->get_logger(), "An error was raised in the component.");
}

modulo_interfaces::msg::Predicate ComponentInterface::get_predicate_message(const std::string& name, bool value) const {
Expand Down Expand Up @@ -611,4 +608,21 @@ void ComponentInterface::evaluate_periodic_callbacks() {
}
}
}

void ComponentInterface::finalize_interfaces() {
RCLCPP_DEBUG(this->node_logging_->get_logger(), "Finalizing all interfaces.");
this->inputs_.clear();
this->outputs_.clear();
this->predicate_publisher_.reset();
this->empty_services_.clear();
this->string_services_.clear();
if (this->step_timer_ != nullptr) {
this->step_timer_->cancel();
}
this->step_timer_.reset();
this->tf_buffer_.reset();
this->tf_listener_.reset();
this->tf_broadcaster_.reset();
this->static_tf_broadcaster_.reset();
}
}// namespace modulo_components
Loading

0 comments on commit 1b8d582

Please sign in to comment.