Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor(components): improve component error handling #138

Merged
merged 11 commits into from
Oct 8, 2024
Merged
Show file tree
Hide file tree
Changes from 10 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ Release Versions:
- feat(utils): add binary reader and recorder for encoded states (#152)
- feat!: add support for custom inputs and outputs (#133)
- release: version v5.0.0-rc0009 (#155)
- refactor(components): improve component error handling (#138)

## 4.2.2

Expand Down
2 changes: 1 addition & 1 deletion aica-package.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#syntax=ghcr.io/aica-technology/package-builder:v1.1.1

[metadata]
version = "5.0.0-rc0009"
version = "5.0.0-rc0010"
description = "Modular ROS 2 extension library for dynamic composition of components and controllers with the AICA robotics framework"

[metadata.collection]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,5 @@
"parameter_type": "double",
"default_value": "10.0"
}
],
"predicates": [
{
"display_name": "In error state",
"description": "True if the component is in error state",
"predicate_name": "in_error_state"
}
]
}
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,11 @@ class Component : public rclcpp::Node, public ComponentInterface {
const std::string& signal_name, const std::shared_ptr<DataT>& data, const std::string& default_topic = "",
bool fixed_topic = false, bool publish_on_step = true);

/**
* Set the in_error_state predicate to true and cancel the step timer.
*/
void raise_error() override;

private:
/**
* @brief Step function that is called periodically and publishes predicates, outputs, and evaluates daemon callbacks.
Expand All @@ -80,8 +85,10 @@ class Component : public rclcpp::Node, public ComponentInterface {
void on_execute();

// TODO hide ROS methods
using ComponentInterface::cancel_step;
using ComponentInterface::create_output;
using ComponentInterface::evaluate_periodic_callbacks;
using ComponentInterface::finalize_interfaces;
using ComponentInterface::get_parameter;
using ComponentInterface::inputs_;
using ComponentInterface::outputs_;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,11 @@ class ComponentInterface {
*/
virtual void on_step_callback();

/**
* @brief Cancel the step timer.
*/
void cancel_step();

/**
* @brief Add a parameter.
* @details This method stores a pointer reference to an existing Parameter object in the local parameter map and
Expand Down Expand Up @@ -418,7 +423,7 @@ class ComponentInterface {
void set_qos(const rclcpp::QoS& qos);

/**
* @brief Put the component in error state by setting the 'in_error_state' predicate to true.
* @brief Notify an error in the component.
*/
virtual void raise_error();

Expand All @@ -437,6 +442,11 @@ class ComponentInterface {
*/
void evaluate_periodic_callbacks();

/**
* @brief Finalize all interfaces.
*/
void finalize_interfaces();

std::map<std::string, std::shared_ptr<modulo_core::communication::SubscriptionInterface>> inputs_;///< Map of inputs
std::map<std::string, std::shared_ptr<modulo_core::communication::PublisherInterface>> outputs_; ///< Map of outputs
std::map<std::string, bool> periodic_outputs_;///< Map of outputs with periodic publishing flag
Expand Down Expand Up @@ -539,9 +549,9 @@ class ComponentInterface {
const std::string& frame, const std::string& reference_frame, const tf2::TimePoint& time_point,
const tf2::Duration& duration);

double rate_; ///< The component rate in Hz
double period_; ///< The componet period in s
std::mutex step_mutex_;///< Mutex for step callback
double rate_; ///< The component rate in Hz
double period_; ///< The componet period in s
std::mutex step_mutex_; ///< Mutex for step callback

std::map<std::string, modulo_core::Predicate> predicates_;///< Map of predicates
std::shared_ptr<rclcpp::Publisher<modulo_interfaces::msg::PredicateCollection>>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,11 @@ class LifecycleComponent : public rclcpp_lifecycle::LifecycleNode, public Compon
*/
rclcpp_lifecycle::State get_lifecycle_state() const;

/**
* @brief Trigger the shutdown and error transitions.
*/
void raise_error() override;

private:
/**
* @brief Transition callback for state 'Configuring'.
Expand Down Expand Up @@ -258,14 +263,13 @@ class LifecycleComponent : public rclcpp_lifecycle::LifecycleNode, public Compon
*/
bool deactivate_outputs();

/**
* @brief Cleanup all inputs and outputs.
*/
bool clear_signals();
bool has_error_;

// TODO hide ROS methods
using ComponentInterface::cancel_step;
using ComponentInterface::create_output;
using ComponentInterface::evaluate_periodic_callbacks;
using ComponentInterface::finalize_interfaces;
using ComponentInterface::get_parameter;
using ComponentInterface::inputs_;
using ComponentInterface::outputs_;
Expand Down Expand Up @@ -314,7 +318,4 @@ inline void LifecycleComponent::add_output(
RCLCPP_ERROR_STREAM(this->get_logger(), "Failed to add output '" << signal_name << "': " << ex.what());
}
}

// TODO, if we raise error we need to manually call the lifecycle change state callback,
// call callback function that this service calls
}// namespace modulo_components
10 changes: 10 additions & 0 deletions source/modulo_components/modulo_components/component.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ def __init__(self, node_name: str, *args, **kwargs):
self.__started = False
self.__execute_thread = None
self.add_predicate("is_finished", False)
self.add_predicate("in_error_state", False)
domire8 marked this conversation as resolved.
Show resolved Hide resolved

def _step(self):
"""
Expand All @@ -35,6 +36,7 @@ def _step(self):
self._publish_predicates()
except Exception as e:
self.get_logger().error(f"Failed to execute step function: {e}", throttle_duration_sec=1.0)
self.raise_error()

def execute(self):
"""
Expand Down Expand Up @@ -94,3 +96,11 @@ def add_output(self, signal_name: str, data: str, message_type: MsgT,
self._outputs[parsed_signal_name]["publisher"] = publisher
except Exception as e:
self.get_logger().error(f"Failed to add output '{signal_name}': {e}")

def raise_error(self):
"""
Set the in_error_state predicate to true and cancel the step timer.
"""
super().raise_error()
self.set_predicate("in_error_state", True)
domire8 marked this conversation as resolved.
Show resolved Hide resolved
self._cancel_step()
28 changes: 24 additions & 4 deletions source/modulo_components/modulo_components/component_interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,11 +72,10 @@ def __init__(self, node_name: str, *args, **kwargs):
self.__predicate_message = PredicateCollection()
self.__predicate_message.node = self.get_fully_qualified_name()
self.__predicate_message.type = PredicateCollection.COMPONENT
self.add_predicate("in_error_state", False)

self._rate = self.get_parameter_value("rate")
self._period = 1.0 / self._rate
self.create_timer(self._period, self.__step_with_mutex)
self.__step_timer = self.create_timer(self._period, self.__step_with_mutex)

def __del__(self):
self.__step_lock.acquire()
Expand Down Expand Up @@ -114,6 +113,13 @@ def on_step_callback(self):
"""
pass

def _cancel_step(self):
"""
Cancel the step timer.
"""
if self.__step_timer:
self.__step_timer.cancel()

def add_parameter(self, parameter: Union[str, sr.Parameter], description: str, read_only=False) -> None:
"""
Add a parameter. This method stores either the name of the attribute corresponding to the parameter object or
Expand Down Expand Up @@ -920,6 +926,20 @@ def __publish_transforms(self, transforms: Iterable[sr.CartesianPose], static=Fa

def raise_error(self):
"""
Put the component in error state by setting the 'in_error_state' predicate to true.
Notify an error in the component.
"""
self.get_logger().error("An error was raised in the component.")

def _finalize_interfaces(self):
"""
Finalize all interfaces.
"""
self.set_predicate("in_error_state", True)
self._inputs = {}
self._outputs = {}
self._services_dict = {}
self.__tf_buffer = None
self.__tf_listener = None
self.__tf_broadcaster = None
self.__static_tf_broadcaster = None
self._predicate_publisher = None
self.__step_timer = None
52 changes: 31 additions & 21 deletions source/modulo_components/modulo_components/lifecycle_component.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ def __init__(self, node_name: str, *args, **kwargs):
lifecycle_node_kwargs = {key: value for key, value in kwargs.items() if key in LIFECYCLE_NODE_MIXIN_KWARGS}
ComponentInterface.__init__(self, node_name, *args, **kwargs)
LifecycleNodeMixin.__init__(self, *args, **lifecycle_node_kwargs)
self._has_error = False

def get_lifecycle_state(self) -> LifecycleState:
"""
Expand Down Expand Up @@ -226,21 +227,23 @@ def on_shutdown(self, previous_state: LifecycleState) -> TransitionCallbackRetur
TRANSITION_CALLBACK_FAILURE, TRANSITION_CALLBACK_ERROR or any uncaught exceptions to 'ErrorProcessing'
"""
self.get_logger().debug(f"on_shutdown called from previous state {previous_state.label}.")
if previous_state.state_id == State.PRIMARY_STATE_FINALIZED:
return TransitionCallbackReturn.SUCCESS
if previous_state.state_id == State.PRIMARY_STATE_ACTIVE:
if not self.__handle_deactivate():
self.get_logger().debug("Shutdown failed during intermediate deactivation!")
if previous_state.state_id == State.PRIMARY_STATE_INACTIVE:
if not self.__handle_cleanup():
self.get_logger().debug("Shutdown failed during intermediate cleanup!")
if previous_state.state_id == State.PRIMARY_STATE_UNCONFIGURED:
if not self.__handle_shutdown():
self.get_logger().error("Entering into the error processing transition state.")
return TransitionCallbackReturn.ERROR
# TODO reset and finalize all properties
return TransitionCallbackReturn.SUCCESS
self.get_logger().warn(f"Invalid transition 'shutdown' from state {previous_state.label}.")
if not self._has_error:
if previous_state.state_id == State.PRIMARY_STATE_FINALIZED:
return TransitionCallbackReturn.SUCCESS
if previous_state.state_id == State.PRIMARY_STATE_ACTIVE:
if not self.__handle_deactivate():
self.get_logger().debug("Shutdown failed during intermediate deactivation!")
if previous_state.state_id == State.PRIMARY_STATE_INACTIVE:
if not self.__handle_cleanup():
self.get_logger().debug("Shutdown failed during intermediate cleanup!")
if previous_state.state_id == State.PRIMARY_STATE_UNCONFIGURED:
if not self.__handle_shutdown():
self.get_logger().error("Entering into the error processing transition state.")
return TransitionCallbackReturn.ERROR
self._finalize_interfaces()
return TransitionCallbackReturn.SUCCESS
self.get_logger().warn(f"Invalid transition 'shutdown' from state {previous_state.label}.")
self.get_logger().error("Entering into the error processing transition state.")
return TransitionCallbackReturn.ERROR

def __handle_shutdown(self) -> bool:
Expand Down Expand Up @@ -277,7 +280,6 @@ def on_error(self, previous_state: LifecycleState) -> TransitionCallbackReturn:
TRANSITION_CALLBACK_ERROR should not be returned, and any exceptions should be caught and returned as a failure
"""
self.get_logger().debug(f"on_error called from previous state {previous_state.label}.")
self.set_predicate("in_error_state", True)
error_handled = False
try:
error_handled = self.__handle_error()
Expand All @@ -286,9 +288,9 @@ def on_error(self, previous_state: LifecycleState) -> TransitionCallbackReturn:
error_handled = False
if not error_handled:
self.get_logger().error("Error processing failed! Entering into the finalized state.")
# TODO reset and finalize all needed properties
self._finalize_interfaces()
return TransitionCallbackReturn.ERROR
self.set_predicate("in_error_state", False)
domire8 marked this conversation as resolved.
Show resolved Hide resolved
self._has_error = False
return TransitionCallbackReturn.SUCCESS

def __handle_error(self) -> bool:
Expand All @@ -306,7 +308,7 @@ def on_error_callback(self) -> bool:

:return: True if error handling is successful, false otherwise
"""
return True
return False

def _step(self):
"""
Expand All @@ -320,8 +322,8 @@ def _step(self):
self._publish_outputs()
self._publish_predicates()
except Exception as e:
self.get_logger().error(f"Failed to execute step function: {e}", throttle_duration_sec=1.0)
# TODO handle error in lifecycle component
self.get_logger().error(f"Failed to execute step function: {e}")
self.raise_error()

def __configure_outputs(self) -> bool:
"""
Expand Down Expand Up @@ -391,3 +393,11 @@ def __deactivate_outputs(self):
self.get_logger().error(f"Failed to deactivate output '{signal_name}': {e}")
self.get_logger().debug("All outputs deactivated.")
return success

def raise_error(self):
"""
Trigger the shutdown and error transitions.
"""
super().raise_error()
self._has_error = True
self.trigger_shutdown()
7 changes: 7 additions & 0 deletions source/modulo_components/src/Component.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ Component::Component(const NodeOptions& node_options, const std::string& fallbac
Node::get_node_type_descriptions_interface(), Node::get_node_waitables_interface())),
started_(false) {
this->add_predicate("is_finished", false);
this->add_predicate("in_error_state", false);
}

void Component::step() {
Expand Down Expand Up @@ -60,4 +61,10 @@ bool Component::on_execute_callback() {
std::shared_ptr<state_representation::ParameterInterface> Component::get_parameter(const std::string& name) const {
return ComponentInterface::get_parameter(name);
}

void Component::raise_error() {
ComponentInterface::raise_error();
this->set_predicate("in_error_state", true);
this->cancel_step();
domire8 marked this conversation as resolved.
Show resolved Hide resolved
}
}// namespace modulo_components
25 changes: 21 additions & 4 deletions source/modulo_components/src/ComponentInterface.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,6 @@ ComponentInterface::ComponentInterface(
this->predicate_message_.node = this->node_base_->get_fully_qualified_name();
this->predicate_message_.type = modulo_interfaces::msg::PredicateCollection::COMPONENT;

this->add_predicate("in_error_state", false);

this->rate_ = this->get_parameter_value<double>("rate");
this->period_ = 1.0 / this->rate_;
this->step_timer_ = rclcpp::create_wall_timer(
Expand Down Expand Up @@ -75,6 +73,12 @@ void ComponentInterface::step() {}

void ComponentInterface::on_step_callback() {}

void ComponentInterface::cancel_step() {
if (this->step_timer_ != nullptr) {
this->step_timer_->cancel();
}
}

void ComponentInterface::add_parameter(
const std::shared_ptr<state_representation::ParameterInterface>& parameter, const std::string& description,
bool read_only) {
Expand Down Expand Up @@ -557,8 +561,7 @@ void ComponentInterface::set_qos(const rclcpp::QoS& qos) {
}

void ComponentInterface::raise_error() {
RCLCPP_DEBUG(this->node_logging_->get_logger(), "raise_error called: Setting predicate 'in_error_state' to true.");
this->set_predicate("in_error_state", true);
RCLCPP_ERROR(this->node_logging_->get_logger(), "An error was raised in the component.");
}

modulo_interfaces::msg::Predicate ComponentInterface::get_predicate_message(const std::string& name, bool value) const {
Expand Down Expand Up @@ -611,4 +614,18 @@ void ComponentInterface::evaluate_periodic_callbacks() {
}
}
}

void ComponentInterface::finalize_interfaces() {
RCLCPP_DEBUG(this->node_logging_->get_logger(), "Finalizing all interfaces.");
this->inputs_.clear();
this->outputs_.clear();
this->predicate_publisher_.reset();
this->empty_services_.clear();
this->string_services_.clear();
this->step_timer_.reset();
domire8 marked this conversation as resolved.
Show resolved Hide resolved
this->tf_buffer_.reset();
this->tf_listener_.reset();
this->tf_broadcaster_.reset();
this->static_tf_broadcaster_.reset();
}
}// namespace modulo_components
Loading
Loading