Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(robot-server): start logic for dynamic error recovery policy #15707

Merged
merged 13 commits into from
Jul 24, 2024
6 changes: 2 additions & 4 deletions api/src/opentrons/protocol_engine/error_recovery_policy.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,8 @@ class ErrorRecoveryType(enum.Enum):
WAIT_FOR_RECOVERY = enum.auto()
"""Stop and wait for the error to be recovered from manually."""

# TODO(mm, 2023-03-18): Add something like this for
# https://opentrons.atlassian.net/browse/EXEC-302.
# CONTINUE = enum.auto()
# """Continue with the run, as if the command never failed."""
IGNORE_AND_CONTINUE = enum.auto()
"""Continue with the run, as if the command never failed."""


class ErrorRecoveryPolicy(Protocol):
Expand Down
5 changes: 4 additions & 1 deletion api/src/opentrons/protocol_engine/state/commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -337,7 +337,10 @@ def _handle_fail_command_action(self, action: FailCommandAction) -> None:
other_command_ids_to_fail = list(
self._state.command_history.get_queue_ids()
)
elif action.type == ErrorRecoveryType.WAIT_FOR_RECOVERY:
elif (
action.type == ErrorRecoveryType.WAIT_FOR_RECOVERY
or action.type == ErrorRecoveryType.IGNORE_AND_CONTINUE
):
other_command_ids_to_fail = []
else:
assert_never(action.type)
Expand Down
46 changes: 46 additions & 0 deletions robot-server/robot_server/runs/error_recovery_mapping.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
"""Functions used for managing error recovery policy."""
from typing import Optional
from opentrons.protocol_engine.state.config import Config
from robot_server.runs.error_recovery_models import ErrorRecoveryRule, ReactionIfMatch
from opentrons.protocol_engine.commands.command_unions import (
Command,
CommandDefinedErrorData,
)
from opentrons.protocol_engine.error_recovery_policy import (
ErrorRecoveryPolicy,
ErrorRecoveryType,
standard_run_policy,
)


def create_error_recovery_policy_from_rules(
rules: list[ErrorRecoveryRule],
) -> ErrorRecoveryPolicy:
"""Given a list of error recovery rules return an error recovery policy."""

def _policy(
config: Config,
failed_command: Command,
defined_error_data: Optional[CommandDefinedErrorData],
) -> ErrorRecoveryType:
for rule in rules:
for i, criteria in enumerate(rule.matchCriteria):
command_type_matches = (
failed_command.commandType == criteria.command.commandType
)
error_type_matches = (
defined_error_data is not None
and defined_error_data.public.errorType
SyntaxColoring marked this conversation as resolved.
Show resolved Hide resolved
== criteria.command.error.errorType
)
if command_type_matches and error_type_matches:
if rule.ifMatch[i] == ReactionIfMatch.IGNORE_AND_CONTINUE:
raise NotImplementedError # No protocol engine support for this yet. It's in EXEC-302.
elif rule.ifMatch[i] == ReactionIfMatch.FAIL_RUN:
return ErrorRecoveryType.FAIL_RUN
elif rule.ifMatch[i] == ReactionIfMatch.WAIT_FOR_RECOVERY:
return ErrorRecoveryType.WAIT_FOR_RECOVERY

return standard_run_policy(config, failed_command, defined_error_data)

return _policy
69 changes: 69 additions & 0 deletions robot-server/robot_server/runs/error_recovery_models.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
"""Request and response models for dealing with error recovery policies."""
from enum import Enum
from pydantic import BaseModel, Field


class ReactionIfMatch(Enum):
"""The type of the error recovery setting.

* `"ignoreAndContinue"`: Ignore this error and future errors of the same type.
* `"failRun"`: Errors of this type should fail the run.
* `"waitForRecovery"`: Instances of this error should initiate a recover operation.

"""

IGNORE_AND_CONTINUE = "ignoreAndContinue"
FAIL_RUN = "failRun"
WAIT_FOR_RECOVERY = "waitForRecovery"


# There's a lot of nested classes here. This is the JSON schema this code models.
# "ErrorRecoveryRule": {
# "matchCriteria": {
# "command": {
# "commandType": "foo",
# "error": {
# "errorType": "bar"
# }
# }
# },
# "ifMatch": "baz"
# }


class ErrorMatcher(BaseModel):
"""The error type that this rule applies to."""

errorType: str = Field(..., description="The error type that this rule applies to.")


class CommandMatcher(BaseModel):
"""Command/error data used for matching rules."""

commandType: str = Field(
..., description="The command type that this rule applies to."
)
error: ErrorMatcher = Field(
..., description="The error details that this rule applies to."
)


class MatchCriteria(BaseModel):
"""The criteria that this rule will attempt to match."""

command: CommandMatcher = Field(
..., description="The command and error types that this rule applies to."
)


class ErrorRecoveryRule(BaseModel):
"""Request/Response model for new error recovery rule creation."""

matchCriteria: list[MatchCriteria] = Field(
default_factory=list,
description="The criteria that must be met for this rule to be applied.",
)
ifMatch: list[ReactionIfMatch] = Field(
default_factory=list,
description="The specific recovery setting that will be in use if the type parameters match.",
)
Comment on lines +59 to +69
Copy link
Contributor

@SyntaxColoring SyntaxColoring Jul 24, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK so it looks like this is modeling a structure that's like this (in loose sketchy form):

rules
    matchCriteria
        criteria 1 (with one commandType and one errorType)
        criteria 2 (with one commandType and one errorType)
        criteria 3 (with one commandType and one errorType)
    ifMatch
        reaction 1
        reaction 2
        reaction 3

Where criteria 1 gets paired with reaction 1, criteria 2 gets paired with reaction 2, etc.

I think instead, we want:

rules
    rule 1
        matchCriteria (with one commandType and one errorType)
        ifMatch reaction
    rule 2
        matchCriteria (with one commandType and one errorType)
        ifMatch reaction

But I think we can merge this as-is and address it separately. On its own terms, this looks correctly-implemented, and this PR nicely unblocks both the HTTP API work and the internal opentrons.protocol_engine work.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

1 change: 1 addition & 0 deletions robot-server/robot_server/service/errors.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# TODO(mc, 2021-05-10): delete this file; these models have been moved to
# robot_server/errors/error_responses.py and robot_server/errors/global_errors.py
# Note: (2024-07-18): this file does not actually seem to be safe to delete
from dataclasses import dataclass, asdict
from enum import Enum
from typing import Any, Dict, Optional, Sequence, Tuple
Expand Down
118 changes: 118 additions & 0 deletions robot-server/tests/runs/test_error_recovery_mapping.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
"""Unit tests for `error_recovery_mapping`."""
import pytest
from decoy import Decoy


from opentrons.protocol_engine.commands.pipetting_common import (
LiquidNotFoundError,
LiquidNotFoundErrorInternalData,
)
from opentrons.protocol_engine.commands.command import (
DefinedErrorData,
)
from opentrons.protocol_engine.commands.command_unions import CommandDefinedErrorData
from opentrons.protocol_engine.commands.liquid_probe import LiquidProbe
from opentrons.protocol_engine.error_recovery_policy import ErrorRecoveryType
from opentrons.protocol_engine.state.config import Config
from opentrons.protocol_engine.types import DeckType
from robot_server.runs.error_recovery_mapping import (
create_error_recovery_policy_from_rules,
)
from robot_server.runs.error_recovery_models import (
ErrorRecoveryRule,
MatchCriteria,
CommandMatcher,
ErrorMatcher,
ReactionIfMatch,
)


@pytest.fixture
def mock_command(decoy: Decoy) -> LiquidProbe:
"""Get a mock PickUpTip command."""
mock = decoy.mock(cls=LiquidProbe)
decoy.when(mock.commandType).then_return("liquidProbe")
return mock


@pytest.fixture
def mock_error_data(decoy: Decoy) -> CommandDefinedErrorData:
"""Get a mock TipPhysicallyMissingError."""
mock = decoy.mock(
cls=DefinedErrorData[LiquidNotFoundError, LiquidNotFoundErrorInternalData]
)
mock_lnfe = decoy.mock(cls=LiquidNotFoundError)
decoy.when(mock.public).then_return(mock_lnfe)
decoy.when(mock_lnfe.errorType).then_return("liquidNotFound")
return mock


@pytest.fixture
def mock_criteria(decoy: Decoy) -> MatchCriteria:
"""Get a mock Match Criteria."""
mock = decoy.mock(cls=MatchCriteria)
mock_command = decoy.mock(cls=CommandMatcher)
decoy.when(mock_command.commandType).then_return("liquidProbe")
mock_error_matcher = decoy.mock(cls=ErrorMatcher)
decoy.when(mock_error_matcher.errorType).then_return("liquidNotFound")
decoy.when(mock.command).then_return(mock_command)
decoy.when(mock_command.error).then_return(mock_error_matcher)
return mock


@pytest.fixture
def mock_rule(decoy: Decoy, mock_criteria: MatchCriteria) -> ErrorRecoveryRule:
"""Get a mock ErrorRecoveryRule."""
mock = decoy.mock(cls=ErrorRecoveryRule)
decoy.when(mock.ifMatch).then_return([ReactionIfMatch.IGNORE_AND_CONTINUE])
decoy.when(mock.matchCriteria).then_return([mock_criteria])
return mock


def test_create_error_recovery_policy_with_rules(
decoy: Decoy,
mock_command: LiquidProbe,
mock_error_data: CommandDefinedErrorData,
mock_rule: ErrorRecoveryRule,
) -> None:
"""Should return IGNORE_AND_CONTINUE if that's what we specify as the rule."""
policy = create_error_recovery_policy_from_rules([mock_rule])
exampleConfig = Config(
robot_type="OT-3 Standard",
deck_type=DeckType.OT3_STANDARD,
)
with pytest.raises(NotImplementedError):
policy(exampleConfig, mock_command, mock_error_data)


def test_create_error_recovery_policy_undefined_error(
decoy: Decoy, mock_command: LiquidProbe
) -> None:
"""Should return a FAIL_RUN policy when error is not defined."""
rule1 = ErrorRecoveryRule(matchCriteria=[], ifMatch=[])

policy = create_error_recovery_policy_from_rules([rule1])
exampleConfig = Config(
robot_type="OT-3 Standard",
deck_type=DeckType.OT3_STANDARD,
)

assert policy(exampleConfig, mock_command, None) == ErrorRecoveryType.FAIL_RUN


def test_create_error_recovery_policy_defined_error(
decoy: Decoy, mock_command: LiquidProbe, mock_error_data: CommandDefinedErrorData
) -> None:
"""Should return a WAIT_FOR_RECOVERY policy when error is defined."""
rule1 = ErrorRecoveryRule(matchCriteria=[], ifMatch=[])

policy = create_error_recovery_policy_from_rules([rule1])
exampleConfig = Config(
robot_type="OT-3 Standard",
deck_type=DeckType.OT3_STANDARD,
)

assert (
policy(exampleConfig, mock_command, mock_error_data)
== ErrorRecoveryType.WAIT_FOR_RECOVERY
)
Loading