From b359bec6c849cb13838cb0544c97f7f9e30f7ac7 Mon Sep 17 00:00:00 2001 From: Matthew Pendrey Date: Mon, 20 May 2024 15:16:27 +0100 Subject: [PATCH 01/43] factor out transmission protocol --- .../remote/{target.go => target_caller.go} | 76 +++++++----- core/capabilities/remote/target_receiver.go | 35 ++++++ .../capabilities/transmission/transmission.go | 115 ++++++++++++++++++ .../transmission/transmission_test.go | 102 ++++++++++++++++ core/services/workflows/execution_strategy.go | 108 ++++------------ 5 files changed, 318 insertions(+), 118 deletions(-) rename core/capabilities/remote/{target.go => target_caller.go} (54%) create mode 100644 core/capabilities/remote/target_receiver.go create mode 100644 core/capabilities/transmission/transmission.go create mode 100644 core/capabilities/transmission/transmission_test.go diff --git a/core/capabilities/remote/target.go b/core/capabilities/remote/target_caller.go similarity index 54% rename from core/capabilities/remote/target.go rename to core/capabilities/remote/target_caller.go index 655f4f84abb..452e6e2bd8d 100644 --- a/core/capabilities/remote/target.go +++ b/core/capabilities/remote/target_caller.go @@ -21,15 +21,6 @@ type remoteTargetCaller struct { var _ commoncap.TargetCapability = &remoteTargetCaller{} var _ types.Receiver = &remoteTargetCaller{} -type remoteTargetReceiver struct { - capInfo commoncap.CapabilityInfo - donInfo *capabilities.DON - dispatcher types.Dispatcher - lggr logger.Logger -} - -var _ types.Receiver = &remoteTargetReceiver{} - func NewRemoteTargetCaller(capInfo commoncap.CapabilityInfo, donInfo *capabilities.DON, dispatcher types.Dispatcher, lggr logger.Logger) *remoteTargetCaller { return &remoteTargetCaller{ capInfo: capInfo, @@ -51,20 +42,56 @@ func (c *remoteTargetCaller) UnregisterFromWorkflow(ctx context.Context, request return errors.New("not implemented") } -func (c *remoteTargetCaller) Execute(ctx context.Context, request commoncap.CapabilityRequest) (<-chan commoncap.CapabilityResponse, error) { - c.lggr.Debugw("not implemented - executing fake remote target capability", "capabilityId", c.capInfo.ID, "nMembers", len(c.donInfo.Members)) - for _, peerID := range c.donInfo.Members { - m := &types.MessageBody{ - CapabilityId: c.capInfo.ID, - CapabilityDonId: c.donInfo.ID, - Payload: []byte{0x01, 0x02, 0x03}, +func (c *remoteTargetCaller) Execute(ctx context.Context, req commoncap.CapabilityRequest) (<-chan commoncap.CapabilityResponse, error) { + + /* + if c.capInfo.DON == nil { + return nil, errors.New("missing DON in capability info") } - err := c.dispatcher.Send(peerID, m) + + tc, err := workflows.ExtractTransmissionConfig(req.Config) if err != nil { return nil, err } - } + n := len(c.capInfo.DON.Members) + key := workflows.ScheduleSeed(c.donInfo.Config.SharedSecret, req.Metadata.WorkflowID, req.Metadata.WorkflowExecutionID) + sched, err := workflows.Schedule(tc.Schedule, n) + if err != nil { + return nil, err + } + + picked := permutation.Permutation(n, key) + delay := workflows.DelayFor(d.Position, sched, picked, tc.DeltaStage) + if delay == nil { + lggr.Debugw("skipping transmission: node is not included in schedule") + return nil, nil + } + + lggr.Debugf("execution delayed by %+v", *delay) + select { + case <-ctx.Done(): + return nil, ctx.Err() + case <-time.After(*delay): + lggr.Debugw("executing delayed execution") + return immediateExecution{}.Apply(ctx, lggr, cap, req) + } + + c.lggr.Debugw("not implemented - executing fake remote target capability", "capabilityId", c.capInfo.ID, "nMembers", len(c.donInfo.Members)) + for _, peerID := range c.donInfo.Members { + m := &types.MessageBody{ + CapabilityId: c.capInfo.ID, + CapabilityDonId: c.donInfo.ID, + Payload: []byte{0x01, 0x02, 0x03}, + } + err := c.dispatcher.Send(peerID, m) + if err != nil { + return nil, err + } + } + + + */ // TODO: return a channel that will be closed when all responses are received return nil, nil } @@ -72,16 +99,3 @@ func (c *remoteTargetCaller) Execute(ctx context.Context, request commoncap.Capa func (c *remoteTargetCaller) Receive(msg *types.MessageBody) { c.lggr.Debugw("not implemented - received message", "capabilityId", c.capInfo.ID, "payload", msg.Payload) } - -func NewRemoteTargetReceiver(capInfo commoncap.CapabilityInfo, donInfo *capabilities.DON, dispatcher types.Dispatcher, lggr logger.Logger) *remoteTargetReceiver { - return &remoteTargetReceiver{ - capInfo: capInfo, - donInfo: donInfo, - dispatcher: dispatcher, - lggr: lggr, - } -} - -func (c *remoteTargetReceiver) Receive(msg *types.MessageBody) { - c.lggr.Debugw("not implemented - received message", "capabilityId", c.capInfo.ID, "payload", msg.Payload) -} diff --git a/core/capabilities/remote/target_receiver.go b/core/capabilities/remote/target_receiver.go new file mode 100644 index 00000000000..e8c0776b8ea --- /dev/null +++ b/core/capabilities/remote/target_receiver.go @@ -0,0 +1,35 @@ +package remote + +// here the only executes when it recieves a report from f + 1 nodes, can use the message cache to collect up these reports + +// the chain write is waiting for f + 1 reports to be collected before it will execute the transmission + +import ( + commoncap "github.com/smartcontractkit/chainlink-common/pkg/capabilities" + "github.com/smartcontractkit/chainlink/v2/core/capabilities/remote/types" + + "github.com/smartcontractkit/chainlink-common/pkg/capabilities" + "github.com/smartcontractkit/chainlink/v2/core/logger" +) + +type remoteTargetReceiver struct { + capInfo commoncap.CapabilityInfo + donInfo *capabilities.DON + dispatcher types.Dispatcher + lggr logger.Logger +} + +var _ types.Receiver = &remoteTargetReceiver{} + +func NewRemoteTargetReceiver(capInfo commoncap.CapabilityInfo, donInfo *capabilities.DON, dispatcher types.Dispatcher, lggr logger.Logger) *remoteTargetReceiver { + return &remoteTargetReceiver{ + capInfo: capInfo, + donInfo: donInfo, + dispatcher: dispatcher, + lggr: lggr, + } +} + +func (c *remoteTargetReceiver) Receive(msg *types.MessageBody) { + c.lggr.Debugw("not implemented - received message", "capabilityId", c.capInfo.ID, "payload", msg.Payload) +} diff --git a/core/capabilities/transmission/transmission.go b/core/capabilities/transmission/transmission.go new file mode 100644 index 00000000000..6ee70ec3472 --- /dev/null +++ b/core/capabilities/transmission/transmission.go @@ -0,0 +1,115 @@ +package transmission + +import ( + "fmt" + "time" + + "golang.org/x/crypto/sha3" + + "github.com/smartcontractkit/chainlink-common/pkg/values" + p2ptypes "github.com/smartcontractkit/chainlink/v2/core/services/p2p/types" + "github.com/smartcontractkit/libocr/permutation" + ragep2ptypes "github.com/smartcontractkit/libocr/ragep2p/types" +) + +// TODO determine location for this code + +var ( + // S = [N] + Schedule_AllAtOnce = "allAtOnce" + // S = [1 * N] + Schedule_OneAtATime = "oneAtATime" +) + +type TransmissionConfig struct { + Schedule string + DeltaStage time.Duration +} + +func ExtractTransmissionConfig(config *values.Map) (TransmissionConfig, error) { + var tc struct { + DeltaStage string + Schedule string + } + err := config.UnwrapTo(&tc) + if err != nil { + return TransmissionConfig{}, err + } + + duration, err := time.ParseDuration(tc.DeltaStage) + if err != nil { + return TransmissionConfig{}, fmt.Errorf("failed to parse DeltaStage %s as duration: %w", tc.DeltaStage, err) + } + + return TransmissionConfig{ + Schedule: tc.Schedule, + DeltaStage: duration, + }, nil +} + +// GetPeerIDToTransmissionDelay returns a map of PeerID to the time.Duration that the node with that PeerID should wait +// before transmitting. If a node is not in the map, it should not transmit. +func GetPeerIDToTransmissionDelay(donPeerIDs []ragep2ptypes.PeerID, sharedSecret [16]byte, workflowID string, + workflowExecutionID string, tc TransmissionConfig) (map[p2ptypes.PeerID]*time.Duration, error) { + donMemberCount := len(donPeerIDs) + key := scheduleSeed(sharedSecret, workflowID, workflowExecutionID) + sched, err := schedule(tc.Schedule, donMemberCount) + if err != nil { + return nil, err + } + + picked := permutation.Permutation(donMemberCount, key) + + peerIDToTransmissionDelay := map[p2ptypes.PeerID]*time.Duration{} + for i, peerID := range donPeerIDs { + delay := delayFor(i, sched, picked, tc.DeltaStage) + if delay != nil { + peerIDToTransmissionDelay[peerID] = delay + } + } + return peerIDToTransmissionDelay, nil +} + +func delayFor(position int, schedule []int, permutation []int, deltaStage time.Duration) *time.Duration { + sum := 0 + for i, s := range schedule { + sum += s + if permutation[position] < sum { + result := time.Duration(i) * deltaStage + return &result + } + } + + return nil +} + +func schedule(sched string, N int) ([]int, error) { + switch sched { + case Schedule_AllAtOnce: + return []int{N}, nil + case Schedule_OneAtATime: + sch := []int{} + for i := 0; i < N; i++ { + sch = append(sch, 1) + } + return sch, nil + } + return nil, fmt.Errorf("unknown schedule %s", sched) +} + +// scheduleSeed uses a shared secret, combined with a workflowID and a workflowExecutionID to generate +// a secret that can later be used to pseudo-randomly determine a schedule for a set of nodes in a DON. +// The addition of the workflowExecutionID -- which nodes don't know ahead of time -- additionally guarantees +// that a malicious coalition of nodes can't "game" the schedule. +// IMPORTANT: changing this function should happen carefully to maintain the guarantee that all nodes +// arrive at the same secret. +func scheduleSeed(sharedSecret [16]byte, workflowID, workflowExecutionID string) [16]byte { + hash := sha3.NewLegacyKeccak256() + hash.Write(sharedSecret[:]) + hash.Write([]byte(workflowID)) + hash.Write([]byte(workflowExecutionID)) + + var key [16]byte + copy(key[:], hash.Sum(nil)) + return key +} diff --git a/core/capabilities/transmission/transmission_test.go b/core/capabilities/transmission/transmission_test.go new file mode 100644 index 00000000000..54ef4cba6b0 --- /dev/null +++ b/core/capabilities/transmission/transmission_test.go @@ -0,0 +1,102 @@ +package transmission + +import ( + "encoding/hex" + "fmt" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/smartcontractkit/chainlink-common/pkg/values" + p2ptypes "github.com/smartcontractkit/chainlink/v2/core/services/p2p/types" +) + +func Test_GetPeerIDToTransmissionDelay(t *testing.T) { + + peer1 := [32]byte([]byte(fmt.Sprintf("%-32s", "one"))) + peer2 := [32]byte([]byte(fmt.Sprintf("%-32s", "two"))) + peer3 := [32]byte([]byte(fmt.Sprintf("%-32s", "three"))) + peer4 := [32]byte([]byte(fmt.Sprintf("%-32s", "four"))) + + ids := []p2ptypes.PeerID{ + peer1, peer2, peer3, peer4, + } + + testCases := []struct { + name string + peerName string + sharedSecret string + schedule string + deltaStage string + workflowExecutionID string + expectedDelays map[string]time.Duration + }{ + { + "TestOneAtATime", + "one", + "fb13ca015a9ec60089c7141e9522de79", + "oneAtATime", + "100ms", + "mock-execution-id", + map[string]time.Duration{ + "one": 300 * time.Millisecond, + "two": 200 * time.Millisecond, + "three": 0 * time.Millisecond, + "four": 100 * time.Millisecond, + }, + }, + { + "TestAllAtOnce", + "one", + "fb13ca015a9ec60089c7141e9522de79", + "allAtOnce", + "100ms", + "mock-execution-id", + map[string]time.Duration{ + "one": 0 * time.Millisecond, + "two": 0 * time.Millisecond, + "three": 0 * time.Millisecond, + "four": 0 * time.Millisecond, + }, + }, + { + "TestOneAtATimeWithDifferentExecutionID", + "one", + "fb13ca015a9ec60089c7141e9522de79", + "oneAtATime", + "100ms", + "mock-execution-id2", + map[string]time.Duration{ + "one": 0 * time.Millisecond, + "two": 300 * time.Millisecond, + "three": 100 * time.Millisecond, + "four": 200 * time.Millisecond, + }, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + + sharedSecret, err := hex.DecodeString(tc.sharedSecret) + require.NoError(t, err) + + m, err := values.NewMap(map[string]any{ + "schedule": tc.schedule, + "deltaStage": tc.deltaStage, + }) + require.NoError(t, err) + transmissionCfg, err := ExtractTransmissionConfig(m) + + peerIdToDelay, err := GetPeerIDToTransmissionDelay(ids, [16]byte(sharedSecret), "mock-workflow-id", tc.workflowExecutionID, transmissionCfg) + require.NoError(t, err) + + assert.Equal(t, tc.expectedDelays["one"], *peerIdToDelay[peer1]) + assert.Equal(t, tc.expectedDelays["two"], *peerIdToDelay[peer2]) + assert.Equal(t, tc.expectedDelays["three"], *peerIdToDelay[peer3]) + assert.Equal(t, tc.expectedDelays["four"], *peerIdToDelay[peer4]) + }) + } +} diff --git a/core/services/workflows/execution_strategy.go b/core/services/workflows/execution_strategy.go index f5da8bca4be..4f3b8c5e7a3 100644 --- a/core/services/workflows/execution_strategy.go +++ b/core/services/workflows/execution_strategy.go @@ -7,12 +7,9 @@ import ( "github.com/smartcontractkit/chainlink-common/pkg/capabilities" "github.com/smartcontractkit/chainlink-common/pkg/values" + "github.com/smartcontractkit/chainlink/v2/core/capabilities/transmission" "github.com/smartcontractkit/chainlink/v2/core/logger" p2ptypes "github.com/smartcontractkit/chainlink/v2/core/services/p2p/types" - - "github.com/smartcontractkit/libocr/permutation" - - "golang.org/x/crypto/sha3" ) type executionStrategy interface { @@ -47,19 +44,12 @@ type scheduledExecution struct { Position int } -var ( - // S = [N] - Schedule_AllAtOnce = "allAtOnce" - // S = [1 * N] - Schedule_OneAtATime = "oneAtATime" -) - // scheduledExecution generates a pseudo-random transmission schedule, // and delays execution until a node is required to transmit. func (d scheduledExecution) Apply(ctx context.Context, lggr logger.Logger, cap capabilities.CallbackCapability, req capabilities.CapabilityRequest) (values.Value, error) { - tc, err := d.transmissionConfig(req.Config) + tc, err := transmission.ExtractTransmissionConfig(req.Config) if err != nil { - return nil, err + return nil, fmt.Errorf("failed to extract transmission config from request config: %w", err) } info, err := cap.Info(ctx) @@ -70,15 +60,17 @@ func (d scheduledExecution) Apply(ctx context.Context, lggr logger.Logger, cap c switch { // Case 1: Local DON case info.DON == nil: - n := len(d.DON.Members) - key := d.key(d.DON.Config.SharedSecret, req.Metadata.WorkflowID, req.Metadata.WorkflowExecutionID) - sched, err := schedule(tc.Schedule, n) + donPeerIDs := d.DON.Members + sharedSecret := d.DON.Config.SharedSecret + workflowID := req.Metadata.WorkflowID + workflowExecutionID := req.Metadata.WorkflowExecutionID + + peerIDToTransmissionDelay, err := transmission.GetPeerIDToTransmissionDelay(donPeerIDs, sharedSecret, workflowID, workflowExecutionID, tc) if err != nil { - return nil, err + return nil, fmt.Errorf("failed to get peer ID to transmission delay map: %w", err) } - picked := permutation.Permutation(n, key) - delay := d.delayFor(d.Position, sched, picked, tc.DeltaStage) + delay := peerIDToTransmissionDelay[*d.PeerID] if delay == nil { lggr.Debugw("skipping transmission: node is not included in schedule") return nil, nil @@ -94,78 +86,20 @@ func (d scheduledExecution) Apply(ctx context.Context, lggr logger.Logger, cap c } // Case 2: Remote DON default: - // TODO: fill in the remote DON case once consensus has been reach on what to do. - lggr.Debugw("remote DON transmission not implemented: using immediate execution") - return immediateExecution{}.Apply(ctx, lggr, cap, req) - } -} -// `key` uses a shared secret, combined with a workflowID and a workflowExecutionID to generate -// a secret that can later be used to pseudo-randomly determine a schedule for a set of nodes in a DON. -// The addition of the workflowExecutionID -- which nodes don't know ahead of time -- additionally guarantees -// that a malicious coalition of nodes can't "game" the schedule. -// IMPORTANT: changing this function should happen carefully to maintain the guarantee that all nodes -// arrive at the same secret. -func (d scheduledExecution) key(sharedSecret [16]byte, workflowID, workflowExecutionID string) [16]byte { - hash := sha3.NewLegacyKeccak256() - hash.Write(sharedSecret[:]) - hash.Write([]byte(workflowID)) - hash.Write([]byte(workflowExecutionID)) - - var key [16]byte - copy(key[:], hash.Sum(nil)) - return key -} + // In this case just execute immediately on the capability and the shims will handle the scheduling and f+1 aggregation -type transmissionConfig struct { - Schedule string - DeltaStage time.Duration -} + // so in this scenario, the local worflow nodes all have to tell the remote node that it should execute the transmission + // the remote node should only do this when it receives f + 1 requests with the same report. -func (d scheduledExecution) transmissionConfig(config *values.Map) (transmissionConfig, error) { - var tc struct { - DeltaStage string - Schedule string - } - err := config.UnwrapTo(&tc) - if err != nil { - return transmissionConfig{}, err - } - - duration, err := time.ParseDuration(tc.DeltaStage) - if err != nil { - return transmissionConfig{}, fmt.Errorf("failed to parse DeltaStage %s as duration: %w", tc.DeltaStage, err) - } - - return transmissionConfig{ - Schedule: tc.Schedule, - DeltaStage: duration, - }, nil -} - -func (d scheduledExecution) delayFor(position int, schedule []int, permutation []int, deltaStage time.Duration) *time.Duration { - sum := 0 - for i, s := range schedule { - sum += s - if permutation[position] < sum { - result := time.Duration(i) * deltaStage - return &result - } - } + // ok, so here we are given a capability - we would have to execute against dons and have f+1 to ensure at least one + // honest node will transmit the message. - return nil -} + // so the question becomes, do we execute against the DONS here or do we have the consensus publisher + // subscriber somehow embed the logic? -func schedule(sched string, N int) ([]int, error) { - switch sched { - case Schedule_AllAtOnce: - return []int{N}, nil - case Schedule_OneAtATime: - sch := []int{} - for i := 0; i < N; i++ { - sch = append(sch, 1) - } - return sch, nil + // TODO: fill in the remote DON case once consensus has been reach on what to do. + lggr.Debugw("remote DON transmission not implemented: using immediate execution") + return immediateExecution{}.Apply(ctx, lggr, cap, req) } - return nil, fmt.Errorf("unknown schedule %s", sched) } From bc87132a32c2ed285a228935bfb1d7c70e929ab8 Mon Sep 17 00:00:00 2001 From: Matthew Pendrey Date: Mon, 20 May 2024 15:20:55 +0100 Subject: [PATCH 02/43] tidyup --- core/services/workflows/execution_strategy.go | 16 ++-------------- 1 file changed, 2 insertions(+), 14 deletions(-) diff --git a/core/services/workflows/execution_strategy.go b/core/services/workflows/execution_strategy.go index 4f3b8c5e7a3..6682f284220 100644 --- a/core/services/workflows/execution_strategy.go +++ b/core/services/workflows/execution_strategy.go @@ -60,12 +60,9 @@ func (d scheduledExecution) Apply(ctx context.Context, lggr logger.Logger, cap c switch { // Case 1: Local DON case info.DON == nil: - donPeerIDs := d.DON.Members - sharedSecret := d.DON.Config.SharedSecret - workflowID := req.Metadata.WorkflowID - workflowExecutionID := req.Metadata.WorkflowExecutionID - peerIDToTransmissionDelay, err := transmission.GetPeerIDToTransmissionDelay(donPeerIDs, sharedSecret, workflowID, workflowExecutionID, tc) + peerIDToTransmissionDelay, err := transmission.GetPeerIDToTransmissionDelay(d.DON.Members, d.DON.Config.SharedSecret, + req.Metadata.WorkflowID, req.Metadata.WorkflowExecutionID, tc) if err != nil { return nil, fmt.Errorf("failed to get peer ID to transmission delay map: %w", err) } @@ -89,15 +86,6 @@ func (d scheduledExecution) Apply(ctx context.Context, lggr logger.Logger, cap c // In this case just execute immediately on the capability and the shims will handle the scheduling and f+1 aggregation - // so in this scenario, the local worflow nodes all have to tell the remote node that it should execute the transmission - // the remote node should only do this when it receives f + 1 requests with the same report. - - // ok, so here we are given a capability - we would have to execute against dons and have f+1 to ensure at least one - // honest node will transmit the message. - - // so the question becomes, do we execute against the DONS here or do we have the consensus publisher - // subscriber somehow embed the logic? - // TODO: fill in the remote DON case once consensus has been reach on what to do. lggr.Debugw("remote DON transmission not implemented: using immediate execution") return immediateExecution{}.Apply(ctx, lggr, cap, req) From 990f28cd6b6d5b2b4259fa7c57bc1ff976394fe3 Mon Sep 17 00:00:00 2001 From: Matthew Pendrey Date: Mon, 20 May 2024 18:03:24 +0100 Subject: [PATCH 03/43] wip --- core/capabilities/remote/target_caller.go | 138 ++++++++++++------ .../capabilities/remote/target_caller_test.go | 99 +++++++++++++ core/capabilities/remote/target_test.go | 30 ---- .../remote/trigger_publisher_test.go | 6 +- .../remote/trigger_subscriber_test.go | 14 +- core/capabilities/remote/types/types.go | 1 + .../capabilities/transmission/transmission.go | 6 +- .../transmission/transmission_test.go | 8 +- 8 files changed, 208 insertions(+), 94 deletions(-) create mode 100644 core/capabilities/remote/target_caller_test.go delete mode 100644 core/capabilities/remote/target_test.go diff --git a/core/capabilities/remote/target_caller.go b/core/capabilities/remote/target_caller.go index 452e6e2bd8d..102833dff15 100644 --- a/core/capabilities/remote/target_caller.go +++ b/core/capabilities/remote/target_caller.go @@ -3,25 +3,35 @@ package remote import ( "context" "errors" + "fmt" + "sync" + "time" + + "github.com/google/uuid" "github.com/smartcontractkit/chainlink-common/pkg/capabilities" commoncap "github.com/smartcontractkit/chainlink-common/pkg/capabilities" + "github.com/smartcontractkit/chainlink-common/pkg/capabilities/pb" "github.com/smartcontractkit/chainlink/v2/core/capabilities/remote/types" + "github.com/smartcontractkit/chainlink/v2/core/capabilities/transmission" "github.com/smartcontractkit/chainlink/v2/core/logger" + ragep2ptypes "github.com/smartcontractkit/libocr/ragep2p/types" ) // remoteTargetCaller/Receiver are shims translating between capability API calls and network messages type remoteTargetCaller struct { - capInfo commoncap.CapabilityInfo - donInfo *capabilities.DON - dispatcher types.Dispatcher - lggr logger.Logger + capInfo commoncap.CapabilityInfo + donInfo capabilities.DON + dispatcher types.Dispatcher + lggr logger.Logger + messageIDToWaitgroup sync.Map + messageIDToResponse sync.Map } var _ commoncap.TargetCapability = &remoteTargetCaller{} var _ types.Receiver = &remoteTargetCaller{} -func NewRemoteTargetCaller(capInfo commoncap.CapabilityInfo, donInfo *capabilities.DON, dispatcher types.Dispatcher, lggr logger.Logger) *remoteTargetCaller { +func NewRemoteTargetCaller(lggr logger.Logger, capInfo commoncap.CapabilityInfo, donInfo capabilities.DON, dispatcher types.Dispatcher) *remoteTargetCaller { return &remoteTargetCaller{ capInfo: capInfo, donInfo: donInfo, @@ -44,58 +54,92 @@ func (c *remoteTargetCaller) UnregisterFromWorkflow(ctx context.Context, request func (c *remoteTargetCaller) Execute(ctx context.Context, req commoncap.CapabilityRequest) (<-chan commoncap.CapabilityResponse, error) { - /* - if c.capInfo.DON == nil { - return nil, errors.New("missing DON in capability info") - } + if c.capInfo.DON == nil { + return nil, errors.New("missing remote capability DON info") + } - tc, err := workflows.ExtractTransmissionConfig(req.Config) - if err != nil { - return nil, err - } + tc, err := transmission.ExtractTransmissionConfig(req.Config) + if err != nil { + return nil, err + } - n := len(c.capInfo.DON.Members) - key := workflows.ScheduleSeed(c.donInfo.Config.SharedSecret, req.Metadata.WorkflowID, req.Metadata.WorkflowExecutionID) - sched, err := workflows.Schedule(tc.Schedule, n) - if err != nil { - return nil, err - } + rawRequest, err := pb.MarshalCapabilityRequest(req) + if err != nil { + return nil, fmt.Errorf("failed to marshal capability request: %w", err) + } - picked := permutation.Permutation(n, key) - delay := workflows.DelayFor(d.Position, sched, picked, tc.DeltaStage) - if delay == nil { - lggr.Debugw("skipping transmission: node is not included in schedule") - return nil, nil - } + peerIDToDelay, err := transmission.GetPeerIDToTransmissionDelay(c.capInfo.DON.Members, c.donInfo.Config.SharedSecret, req.Metadata.WorkflowID, req.Metadata.WorkflowExecutionID, tc) + if err != nil { + return nil, fmt.Errorf("failed to get peer ID to transmission delay: %w", err) + } - lggr.Debugf("execution delayed by %+v", *delay) - select { - case <-ctx.Done(): - return nil, ctx.Err() - case <-time.After(*delay): - lggr.Debugw("executing delayed execution") - return immediateExecution{}.Apply(ctx, lggr, cap, req) + messageID := uuid.New().String() + + responseWaitGroup := &sync.WaitGroup{} + responseWaitGroup.Add(1) + c.messageIDToWaitgroup.Store(messageID, responseWaitGroup) + + responseReceived := make(chan struct{}) + go func() { + responseWaitGroup.Wait() + close(responseReceived) + }() + + for peerID, delay := range peerIDToDelay { + + go func(peerID ragep2ptypes.PeerID, delay time.Duration) { + select { + case <-ctx.Done(): + return + case <-time.After(delay): + c.lggr.Debugw("executing delayed execution for peer", "peerID", peerID) + m := &types.MessageBody{ + CapabilityId: c.capInfo.ID, + CapabilityDonId: c.capInfo.DON.ID, + CallerDonId: c.donInfo.ID, + Method: types.MethodExecute, + Payload: rawRequest, + MessageId: []byte(messageID), + } + err = c.dispatcher.Send(peerID, m) + if err != nil { + c.lggr.Errorw("failed to send message", "peerID", peerID, "err", err) + } + } + }(peerID, delay) + } + + select { + case <-responseReceived: + response, loaded := c.messageIDToResponse.LoadAndDelete(messageID) + if !loaded { + return nil, fmt.Errorf("no response found for message ID %s", messageID) } - c.lggr.Debugw("not implemented - executing fake remote target capability", "capabilityId", c.capInfo.ID, "nMembers", len(c.donInfo.Members)) - for _, peerID := range c.donInfo.Members { - m := &types.MessageBody{ - CapabilityId: c.capInfo.ID, - CapabilityDonId: c.donInfo.ID, - Payload: []byte{0x01, 0x02, 0x03}, - } - err := c.dispatcher.Send(peerID, m) - if err != nil { - return nil, err - } + capabilityResponse, ok := response.(commoncap.CapabilityResponse) + if !ok { + return nil, fmt.Errorf("failed to cast response to CapabilityResponse: %v", response) } + // TODO going to need to handle the case where the capability returns a stream of responses + resultCh := make(chan commoncap.CapabilityResponse, 1) + resultCh <- capabilityResponse + close(resultCh) + + return resultCh, nil + case <-ctx.Done(): + return nil, ctx.Err() + } - */ - // TODO: return a channel that will be closed when all responses are received - return nil, nil } func (c *remoteTargetCaller) Receive(msg *types.MessageBody) { - c.lggr.Debugw("not implemented - received message", "capabilityId", c.capInfo.ID, "payload", msg.Payload) + + // TODO handle the case where the capability returns a stream of responses + wg, loaded := c.messageIDToWaitgroup.LoadAndDelete(msg.MessageId) + if loaded { + wg.(*sync.WaitGroup).Done() + c.messageIDToResponse.Store(msg.MessageId, msg.Payload) + return + } } diff --git a/core/capabilities/remote/target_caller_test.go b/core/capabilities/remote/target_caller_test.go new file mode 100644 index 00000000000..8f7f84b84c6 --- /dev/null +++ b/core/capabilities/remote/target_caller_test.go @@ -0,0 +1,99 @@ +package remote_test + +import ( + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + commoncap "github.com/smartcontractkit/chainlink-common/pkg/capabilities" + "github.com/smartcontractkit/chainlink-common/pkg/capabilities/pb" + "github.com/smartcontractkit/chainlink-common/pkg/values" + "github.com/smartcontractkit/chainlink/v2/core/capabilities/remote" + remotetypes "github.com/smartcontractkit/chainlink/v2/core/capabilities/remote/types" + remoteMocks "github.com/smartcontractkit/chainlink/v2/core/capabilities/remote/types/mocks" + "github.com/smartcontractkit/chainlink/v2/core/internal/testutils" + "github.com/smartcontractkit/chainlink/v2/core/logger" + p2ptypes "github.com/smartcontractkit/chainlink/v2/core/services/p2p/types" + + "github.com/stretchr/testify/mock" +) + +const ( + executeValue1 = "triggerEvent1" +) + +func Test_TargetCallerExecute(t *testing.T) { + + lggr := logger.TestLogger(t) + ctx := testutils.Context(t) + capInfo := commoncap.CapabilityInfo{ + ID: "cap_id", + CapabilityType: commoncap.CapabilityTypeTarget, + Description: "Remote Target", + Version: "0.0.1", + } + p1 := p2ptypes.PeerID{} + require.NoError(t, p1.UnmarshalText([]byte(PeerID1))) + p2 := p2ptypes.PeerID{} + require.NoError(t, p2.UnmarshalText([]byte(PeerID2))) + capDonInfo := commoncap.DON{ + ID: "capability-don", + Members: []p2ptypes.PeerID{p1}, + F: 0, + } + + /* + workflowDonInfo := commoncap.DON{ + ID: "workflow-don", + Members: []p2ptypes.PeerID{p2}, + F: 0, + }*/ + + dispatcher := remoteMocks.NewDispatcher(t) + + awaitExecuteCh := make(chan struct{}) + dispatcher.On("Send", mock.Anything, mock.Anything).Return(nil).Run(func(args mock.Arguments) { + select { + case awaitExecuteCh <- struct{}{}: + default: + } + }) + + caller := remote.NewRemoteTargetCaller(lggr, capInfo, capDonInfo, dispatcher) + + go func() { + <-awaitExecuteCh + + executeValue, err := values.Wrap(executeValue1) + require.NoError(t, err) + capResponse := commoncap.CapabilityResponse{ + Value: executeValue, + Err: nil, + } + marshaled, err := pb.MarshalCapabilityResponse(capResponse) + require.NoError(t, err) + executeResponse := &remotetypes.MessageBody{ + Sender: p1[:], + Method: remotetypes.MethodExecute, + Payload: marshaled, + } + + caller.Receive(executeResponse) + }() + + resultCh, err := caller.Execute(ctx, + commoncap.CapabilityRequest{ + Metadata: commoncap.RequestMetadata{ + WorkflowID: WorkflowID1, + }, + }) + + require.NoError(t, err) + + response := <-resultCh + + responseValue, err := response.Value.Unwrap() + assert.Equal(t, executeValue1, responseValue.(string)) + +} diff --git a/core/capabilities/remote/target_test.go b/core/capabilities/remote/target_test.go deleted file mode 100644 index 0f9bad51f67..00000000000 --- a/core/capabilities/remote/target_test.go +++ /dev/null @@ -1,30 +0,0 @@ -package remote_test - -import ( - "testing" - - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/mock" - - "github.com/smartcontractkit/chainlink-common/pkg/capabilities" - commoncap "github.com/smartcontractkit/chainlink-common/pkg/capabilities" - "github.com/smartcontractkit/chainlink/v2/core/capabilities/remote" - remoteMocks "github.com/smartcontractkit/chainlink/v2/core/capabilities/remote/types/mocks" - "github.com/smartcontractkit/chainlink/v2/core/internal/testutils" - "github.com/smartcontractkit/chainlink/v2/core/logger" - p2ptypes "github.com/smartcontractkit/chainlink/v2/core/services/p2p/types" -) - -func TestTarget_Placeholder(t *testing.T) { - lggr := logger.TestLogger(t) - ctx := testutils.Context(t) - donInfo := &capabilities.DON{ - Members: []p2ptypes.PeerID{{}}, - } - dispatcher := remoteMocks.NewDispatcher(t) - dispatcher.On("Send", mock.Anything, mock.Anything).Return(nil) - target := remote.NewRemoteTargetCaller(commoncap.CapabilityInfo{}, donInfo, dispatcher, lggr) - - _, err := target.Execute(ctx, commoncap.CapabilityRequest{}) - assert.NoError(t, err) -} diff --git a/core/capabilities/remote/trigger_publisher_test.go b/core/capabilities/remote/trigger_publisher_test.go index 71a5174c07f..81f3d737c69 100644 --- a/core/capabilities/remote/trigger_publisher_test.go +++ b/core/capabilities/remote/trigger_publisher_test.go @@ -26,9 +26,9 @@ func TestTriggerPublisher_Register(t *testing.T) { Version: "0.0.1", } p1 := p2ptypes.PeerID{} - require.NoError(t, p1.UnmarshalText([]byte(peerID1))) + require.NoError(t, p1.UnmarshalText([]byte(PeerID1))) p2 := p2ptypes.PeerID{} - require.NoError(t, p2.UnmarshalText([]byte(peerID2))) + require.NoError(t, p2.UnmarshalText([]byte(PeerID2))) capDonInfo := commoncap.DON{ ID: "capability-don", Members: []p2ptypes.PeerID{p1}, @@ -60,7 +60,7 @@ func TestTriggerPublisher_Register(t *testing.T) { // trigger registration event capRequest := commoncap.CapabilityRequest{ Metadata: commoncap.RequestMetadata{ - WorkflowID: workflowID1, + WorkflowID: WorkflowID1, }, } marshaled, err := pb.MarshalCapabilityRequest(capRequest) diff --git a/core/capabilities/remote/trigger_subscriber_test.go b/core/capabilities/remote/trigger_subscriber_test.go index 4d251d49dc8..1a58c9c4eb6 100644 --- a/core/capabilities/remote/trigger_subscriber_test.go +++ b/core/capabilities/remote/trigger_subscriber_test.go @@ -18,9 +18,9 @@ import ( ) const ( - peerID1 = "12D3KooWF3dVeJ6YoT5HFnYhmwQWWMoEwVFzJQ5kKCMX3ZityxMC" - peerID2 = "12D3KooWQsmok6aD8PZqt3RnJhQRrNzKHLficq7zYFRp7kZ1hHP8" - workflowID1 = "workflowID1" + PeerID1 = "12D3KooWF3dVeJ6YoT5HFnYhmwQWWMoEwVFzJQ5kKCMX3ZityxMC" + PeerID2 = "12D3KooWQsmok6aD8PZqt3RnJhQRrNzKHLficq7zYFRp7kZ1hHP8" + WorkflowID1 = "workflowID1" triggerEvent1 = "triggerEvent1" triggerEvent2 = "triggerEvent2" ) @@ -35,9 +35,9 @@ func TestTriggerSubscriber_RegisterAndReceive(t *testing.T) { Version: "0.0.1", } p1 := p2ptypes.PeerID{} - require.NoError(t, p1.UnmarshalText([]byte(peerID1))) + require.NoError(t, p1.UnmarshalText([]byte(PeerID1))) p2 := p2ptypes.PeerID{} - require.NoError(t, p2.UnmarshalText([]byte(peerID2))) + require.NoError(t, p2.UnmarshalText([]byte(PeerID2))) capDonInfo := commoncap.DON{ ID: "capability-don", Members: []p2ptypes.PeerID{p1}, @@ -70,7 +70,7 @@ func TestTriggerSubscriber_RegisterAndReceive(t *testing.T) { triggerEventCallbackCh, err := subscriber.RegisterTrigger(ctx, commoncap.CapabilityRequest{ Metadata: commoncap.RequestMetadata{ - WorkflowID: workflowID1, + WorkflowID: WorkflowID1, }, }) require.NoError(t, err) @@ -90,7 +90,7 @@ func TestTriggerSubscriber_RegisterAndReceive(t *testing.T) { Method: remotetypes.MethodTriggerEvent, Metadata: &remotetypes.MessageBody_TriggerEventMetadata{ TriggerEventMetadata: &remotetypes.TriggerEventMetadata{ - WorkflowIds: []string{workflowID1}, + WorkflowIds: []string{WorkflowID1}, }, }, Payload: marshaled, diff --git a/core/capabilities/remote/types/types.go b/core/capabilities/remote/types/types.go index d8307d09f80..a825c42be56 100644 --- a/core/capabilities/remote/types/types.go +++ b/core/capabilities/remote/types/types.go @@ -9,6 +9,7 @@ const ( MethodRegisterTrigger = "RegisterTrigger" MethodUnRegisterTrigger = "UnregisterTrigger" MethodTriggerEvent = "TriggerEvent" + MethodExecute = "Execute" ) //go:generate mockery --quiet --name Dispatcher --output ./mocks/ --case=underscore diff --git a/core/capabilities/transmission/transmission.go b/core/capabilities/transmission/transmission.go index 6ee70ec3472..06e5fefd871 100644 --- a/core/capabilities/transmission/transmission.go +++ b/core/capabilities/transmission/transmission.go @@ -50,7 +50,7 @@ func ExtractTransmissionConfig(config *values.Map) (TransmissionConfig, error) { // GetPeerIDToTransmissionDelay returns a map of PeerID to the time.Duration that the node with that PeerID should wait // before transmitting. If a node is not in the map, it should not transmit. func GetPeerIDToTransmissionDelay(donPeerIDs []ragep2ptypes.PeerID, sharedSecret [16]byte, workflowID string, - workflowExecutionID string, tc TransmissionConfig) (map[p2ptypes.PeerID]*time.Duration, error) { + workflowExecutionID string, tc TransmissionConfig) (map[p2ptypes.PeerID]time.Duration, error) { donMemberCount := len(donPeerIDs) key := scheduleSeed(sharedSecret, workflowID, workflowExecutionID) sched, err := schedule(tc.Schedule, donMemberCount) @@ -60,11 +60,11 @@ func GetPeerIDToTransmissionDelay(donPeerIDs []ragep2ptypes.PeerID, sharedSecret picked := permutation.Permutation(donMemberCount, key) - peerIDToTransmissionDelay := map[p2ptypes.PeerID]*time.Duration{} + peerIDToTransmissionDelay := map[p2ptypes.PeerID]time.Duration{} for i, peerID := range donPeerIDs { delay := delayFor(i, sched, picked, tc.DeltaStage) if delay != nil { - peerIDToTransmissionDelay[peerID] = delay + peerIDToTransmissionDelay[peerID] = *delay } } return peerIDToTransmissionDelay, nil diff --git a/core/capabilities/transmission/transmission_test.go b/core/capabilities/transmission/transmission_test.go index 54ef4cba6b0..ec4de71a1cd 100644 --- a/core/capabilities/transmission/transmission_test.go +++ b/core/capabilities/transmission/transmission_test.go @@ -93,10 +93,10 @@ func Test_GetPeerIDToTransmissionDelay(t *testing.T) { peerIdToDelay, err := GetPeerIDToTransmissionDelay(ids, [16]byte(sharedSecret), "mock-workflow-id", tc.workflowExecutionID, transmissionCfg) require.NoError(t, err) - assert.Equal(t, tc.expectedDelays["one"], *peerIdToDelay[peer1]) - assert.Equal(t, tc.expectedDelays["two"], *peerIdToDelay[peer2]) - assert.Equal(t, tc.expectedDelays["three"], *peerIdToDelay[peer3]) - assert.Equal(t, tc.expectedDelays["four"], *peerIdToDelay[peer4]) + assert.Equal(t, tc.expectedDelays["one"], peerIdToDelay[peer1]) + assert.Equal(t, tc.expectedDelays["two"], peerIdToDelay[peer2]) + assert.Equal(t, tc.expectedDelays["three"], peerIdToDelay[peer3]) + assert.Equal(t, tc.expectedDelays["four"], peerIdToDelay[peer4]) }) } } From a87175740ec05211f361afbea8e2de4a0ed70927 Mon Sep 17 00:00:00 2001 From: Matthew Pendrey Date: Tue, 21 May 2024 12:13:37 +0100 Subject: [PATCH 04/43] add timeout handling --- core/capabilities/remote/target_caller.go | 60 ++++---- .../capabilities/remote/target_caller_test.go | 140 ++++++++++++++---- .../capabilities/transmission/transmission.go | 2 +- core/services/workflows/execution_strategy.go | 8 +- 4 files changed, 151 insertions(+), 59 deletions(-) diff --git a/core/capabilities/remote/target_caller.go b/core/capabilities/remote/target_caller.go index 102833dff15..3d2ba7b64b6 100644 --- a/core/capabilities/remote/target_caller.go +++ b/core/capabilities/remote/target_caller.go @@ -20,8 +20,8 @@ import ( // remoteTargetCaller/Receiver are shims translating between capability API calls and network messages type remoteTargetCaller struct { - capInfo commoncap.CapabilityInfo - donInfo capabilities.DON + remoteCapabilityInfo commoncap.CapabilityInfo + localDONInfo capabilities.DON dispatcher types.Dispatcher lggr logger.Logger messageIDToWaitgroup sync.Map @@ -31,17 +31,22 @@ type remoteTargetCaller struct { var _ commoncap.TargetCapability = &remoteTargetCaller{} var _ types.Receiver = &remoteTargetCaller{} -func NewRemoteTargetCaller(lggr logger.Logger, capInfo commoncap.CapabilityInfo, donInfo capabilities.DON, dispatcher types.Dispatcher) *remoteTargetCaller { - return &remoteTargetCaller{ - capInfo: capInfo, - donInfo: donInfo, - dispatcher: dispatcher, - lggr: lggr, +func NewRemoteTargetCaller(lggr logger.Logger, remoteCapabilityInfo commoncap.CapabilityInfo, localDonInfo capabilities.DON, dispatcher types.Dispatcher) (*remoteTargetCaller, error) { + + if remoteCapabilityInfo.DON == nil { + return nil, errors.New("missing remote capability DON info") } + + return &remoteTargetCaller{ + remoteCapabilityInfo: remoteCapabilityInfo, + localDONInfo: localDonInfo, + dispatcher: dispatcher, + lggr: lggr, + }, nil } func (c *remoteTargetCaller) Info(ctx context.Context) (commoncap.CapabilityInfo, error) { - return c.capInfo, nil + return c.remoteCapabilityInfo, nil } func (c *remoteTargetCaller) RegisterToWorkflow(ctx context.Context, request commoncap.RegisterToWorkflowRequest) error { @@ -52,15 +57,12 @@ func (c *remoteTargetCaller) UnregisterFromWorkflow(ctx context.Context, request return errors.New("not implemented") } -func (c *remoteTargetCaller) Execute(ctx context.Context, req commoncap.CapabilityRequest) (<-chan commoncap.CapabilityResponse, error) { - - if c.capInfo.DON == nil { - return nil, errors.New("missing remote capability DON info") - } +func (c *remoteTargetCaller) Execute(parentCtx context.Context, req commoncap.CapabilityRequest) (<-chan commoncap.CapabilityResponse, error) { + // TODO should the transmission config be passed into the constructor rather than pulled from the request? tc, err := transmission.ExtractTransmissionConfig(req.Config) if err != nil { - return nil, err + return nil, fmt.Errorf("failed to extract transmission config from request config: %w", err) } rawRequest, err := pb.MarshalCapabilityRequest(req) @@ -68,7 +70,7 @@ func (c *remoteTargetCaller) Execute(ctx context.Context, req commoncap.Capabili return nil, fmt.Errorf("failed to marshal capability request: %w", err) } - peerIDToDelay, err := transmission.GetPeerIDToTransmissionDelay(c.capInfo.DON.Members, c.donInfo.Config.SharedSecret, req.Metadata.WorkflowID, req.Metadata.WorkflowExecutionID, tc) + peerIDToDelay, err := transmission.GetPeerIDToTransmissionDelay(c.remoteCapabilityInfo.DON.Members, c.localDONInfo.Config.SharedSecret, req.Metadata.WorkflowID, req.Metadata.WorkflowExecutionID, tc) if err != nil { return nil, fmt.Errorf("failed to get peer ID to transmission delay: %w", err) } @@ -85,8 +87,11 @@ func (c *remoteTargetCaller) Execute(ctx context.Context, req commoncap.Capabili close(responseReceived) }() - for peerID, delay := range peerIDToDelay { + // Once a response is returned from a remote capability any pending scheduled calls can be cancelled + ctx, cancelFn := context.WithCancel(parentCtx) + defer cancelFn() + for peerID, delay := range peerIDToDelay { go func(peerID ragep2ptypes.PeerID, delay time.Duration) { select { case <-ctx.Done(): @@ -94,9 +99,9 @@ func (c *remoteTargetCaller) Execute(ctx context.Context, req commoncap.Capabili case <-time.After(delay): c.lggr.Debugw("executing delayed execution for peer", "peerID", peerID) m := &types.MessageBody{ - CapabilityId: c.capInfo.ID, - CapabilityDonId: c.capInfo.DON.ID, - CallerDonId: c.donInfo.ID, + CapabilityId: c.remoteCapabilityInfo.ID, + CapabilityDonId: c.remoteCapabilityInfo.DON.ID, + CallerDonId: c.localDONInfo.ID, Method: types.MethodExecute, Payload: rawRequest, MessageId: []byte(messageID), @@ -116,12 +121,17 @@ func (c *remoteTargetCaller) Execute(ctx context.Context, req commoncap.Capabili return nil, fmt.Errorf("no response found for message ID %s", messageID) } - capabilityResponse, ok := response.(commoncap.CapabilityResponse) + payload, ok := response.([]byte) if !ok { - return nil, fmt.Errorf("failed to cast response to CapabilityResponse: %v", response) + return nil, fmt.Errorf("unexpected response type %T for message ID %s", response, messageID) + } + + capabilityResponse, err := pb.UnmarshalCapabilityResponse(payload) + if err != nil { + return nil, fmt.Errorf("failed to unmarshal capability response: %w", err) } - // TODO going to need to handle the case where the capability returns a stream of responses + // TODO handle the case where the capability returns a stream of responses resultCh := make(chan commoncap.CapabilityResponse, 1) resultCh <- capabilityResponse close(resultCh) @@ -136,10 +146,10 @@ func (c *remoteTargetCaller) Execute(ctx context.Context, req commoncap.Capabili func (c *remoteTargetCaller) Receive(msg *types.MessageBody) { // TODO handle the case where the capability returns a stream of responses - wg, loaded := c.messageIDToWaitgroup.LoadAndDelete(msg.MessageId) + wg, loaded := c.messageIDToWaitgroup.LoadAndDelete(string(msg.MessageId)) if loaded { wg.(*sync.WaitGroup).Done() - c.messageIDToResponse.Store(msg.MessageId, msg.Payload) + c.messageIDToResponse.Store(string(msg.MessageId), msg.Payload) return } } diff --git a/core/capabilities/remote/target_caller_test.go b/core/capabilities/remote/target_caller_test.go index 8f7f84b84c6..5325a36276b 100644 --- a/core/capabilities/remote/target_caller_test.go +++ b/core/capabilities/remote/target_caller_test.go @@ -1,7 +1,9 @@ package remote_test import ( + "context" "testing" + "time" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" @@ -11,59 +13,107 @@ import ( "github.com/smartcontractkit/chainlink-common/pkg/values" "github.com/smartcontractkit/chainlink/v2/core/capabilities/remote" remotetypes "github.com/smartcontractkit/chainlink/v2/core/capabilities/remote/types" - remoteMocks "github.com/smartcontractkit/chainlink/v2/core/capabilities/remote/types/mocks" + "github.com/smartcontractkit/chainlink/v2/core/capabilities/transmission" "github.com/smartcontractkit/chainlink/v2/core/internal/testutils" "github.com/smartcontractkit/chainlink/v2/core/logger" p2ptypes "github.com/smartcontractkit/chainlink/v2/core/services/p2p/types" - - "github.com/stretchr/testify/mock" ) const ( executeValue1 = "triggerEvent1" ) -func Test_TargetCallerExecute(t *testing.T) { - +func Test_TargetCallerExecuteContextTimeout(t *testing.T) { lggr := logger.TestLogger(t) ctx := testutils.Context(t) + + p1 := p2ptypes.PeerID{} + require.NoError(t, p1.UnmarshalText([]byte(PeerID1))) + p2 := p2ptypes.PeerID{} + require.NoError(t, p2.UnmarshalText([]byte(PeerID2))) + capDonInfo := &commoncap.DON{ + ID: "capability-don", + Members: []p2ptypes.PeerID{p1}, + F: 0, + } + capInfo := commoncap.CapabilityInfo{ ID: "cap_id", CapabilityType: commoncap.CapabilityTypeTarget, Description: "Remote Target", Version: "0.0.1", + DON: capDonInfo, + } + + workflowDonInfo := commoncap.DON{ + ID: "workflow-don", + Members: []p2ptypes.PeerID{p2}, + F: 0, } + + dispatcher := NewTestDispatcher() + + caller, err := remote.NewRemoteTargetCaller(lggr, capInfo, workflowDonInfo, dispatcher) + require.NoError(t, err) + + err = dispatcher.SetReceiver("cap_id", "workflow-don", caller) + require.NoError(t, err) + + transmissionSchedule, err := values.NewMap(map[string]any{ + "schedule": transmission.Schedule_AllAtOnce, + "deltaStage": "100ms", + }) + require.NoError(t, err) + + ctxWithTimeout, _ := context.WithTimeout(ctx, 10*time.Millisecond) + + _, err = caller.Execute(ctxWithTimeout, + commoncap.CapabilityRequest{ + Config: transmissionSchedule, + }) + + assert.NotNil(t, err) +} + +func Test_TargetCallerExecute(t *testing.T) { + + lggr := logger.TestLogger(t) + ctx := testutils.Context(t) + p1 := p2ptypes.PeerID{} require.NoError(t, p1.UnmarshalText([]byte(PeerID1))) p2 := p2ptypes.PeerID{} require.NoError(t, p2.UnmarshalText([]byte(PeerID2))) - capDonInfo := commoncap.DON{ + capDonInfo := &commoncap.DON{ ID: "capability-don", Members: []p2ptypes.PeerID{p1}, F: 0, } - /* - workflowDonInfo := commoncap.DON{ - ID: "workflow-don", - Members: []p2ptypes.PeerID{p2}, - F: 0, - }*/ + capInfo := commoncap.CapabilityInfo{ + ID: "cap_id", + CapabilityType: commoncap.CapabilityTypeTarget, + Description: "Remote Target", + Version: "0.0.1", + DON: capDonInfo, + } + + workflowDonInfo := commoncap.DON{ + ID: "workflow-don", + Members: []p2ptypes.PeerID{p2}, + F: 0, + } - dispatcher := remoteMocks.NewDispatcher(t) + dispatcher := NewTestDispatcher() - awaitExecuteCh := make(chan struct{}) - dispatcher.On("Send", mock.Anything, mock.Anything).Return(nil).Run(func(args mock.Arguments) { - select { - case awaitExecuteCh <- struct{}{}: - default: - } - }) + caller, err := remote.NewRemoteTargetCaller(lggr, capInfo, workflowDonInfo, dispatcher) + require.NoError(t, err) - caller := remote.NewRemoteTargetCaller(lggr, capInfo, capDonInfo, dispatcher) + err = dispatcher.SetReceiver("cap_id", "workflow-don", caller) + require.NoError(t, err) go func() { - <-awaitExecuteCh + sentMessage := <-dispatcher.sentMessagesCh executeValue, err := values.Wrap(executeValue1) require.NoError(t, err) @@ -74,19 +124,24 @@ func Test_TargetCallerExecute(t *testing.T) { marshaled, err := pb.MarshalCapabilityResponse(capResponse) require.NoError(t, err) executeResponse := &remotetypes.MessageBody{ - Sender: p1[:], - Method: remotetypes.MethodExecute, - Payload: marshaled, + Sender: p1[:], + Method: remotetypes.MethodExecute, + Payload: marshaled, + MessageId: sentMessage.MessageId, } - caller.Receive(executeResponse) + dispatcher.SendToReceiver(executeResponse) }() + transmissionSchedule, err := values.NewMap(map[string]any{ + "schedule": transmission.Schedule_AllAtOnce, + "deltaStage": "100ms", + }) + require.NoError(t, err) + resultCh, err := caller.Execute(ctx, commoncap.CapabilityRequest{ - Metadata: commoncap.RequestMetadata{ - WorkflowID: WorkflowID1, - }, + Config: transmissionSchedule, }) require.NoError(t, err) @@ -97,3 +152,30 @@ func Test_TargetCallerExecute(t *testing.T) { assert.Equal(t, executeValue1, responseValue.(string)) } + +type TestDispatcher struct { + sentMessagesCh chan *remotetypes.MessageBody + receiver remotetypes.Receiver +} + +func NewTestDispatcher() *TestDispatcher { + return &TestDispatcher{ + sentMessagesCh: make(chan *remotetypes.MessageBody, 1), + } +} + +func (t *TestDispatcher) SendToReceiver(msgBody *remotetypes.MessageBody) { + t.receiver.Receive(msgBody) +} + +func (t *TestDispatcher) SetReceiver(capabilityId string, donId string, receiver remotetypes.Receiver) error { + t.receiver = receiver + return nil +} + +func (t *TestDispatcher) RemoveReceiver(capabilityId string, donId string) {} + +func (t *TestDispatcher) Send(peerID p2ptypes.PeerID, msgBody *remotetypes.MessageBody) error { + t.sentMessagesCh <- msgBody + return nil +} diff --git a/core/capabilities/transmission/transmission.go b/core/capabilities/transmission/transmission.go index 06e5fefd871..d76ffe5a3d6 100644 --- a/core/capabilities/transmission/transmission.go +++ b/core/capabilities/transmission/transmission.go @@ -33,7 +33,7 @@ func ExtractTransmissionConfig(config *values.Map) (TransmissionConfig, error) { } err := config.UnwrapTo(&tc) if err != nil { - return TransmissionConfig{}, err + return TransmissionConfig{}, fmt.Errorf("failed to unwrap tranmission config from value map: %w", err) } duration, err := time.ParseDuration(tc.DeltaStage) diff --git a/core/services/workflows/execution_strategy.go b/core/services/workflows/execution_strategy.go index 6682f284220..bb36e8baf89 100644 --- a/core/services/workflows/execution_strategy.go +++ b/core/services/workflows/execution_strategy.go @@ -67,17 +67,17 @@ func (d scheduledExecution) Apply(ctx context.Context, lggr logger.Logger, cap c return nil, fmt.Errorf("failed to get peer ID to transmission delay map: %w", err) } - delay := peerIDToTransmissionDelay[*d.PeerID] - if delay == nil { + delay, existsForPeerID := peerIDToTransmissionDelay[*d.PeerID] + if !existsForPeerID { lggr.Debugw("skipping transmission: node is not included in schedule") return nil, nil } - lggr.Debugf("execution delayed by %+v", *delay) + lggr.Debugf("execution delayed by %+v", delay) select { case <-ctx.Done(): return nil, ctx.Err() - case <-time.After(*delay): + case <-time.After(delay): lggr.Debugw("executing delayed execution") return immediateExecution{}.Apply(ctx, lggr, cap, req) } From 5b1c2c97099dc164192dcc7a935a634182c64b5a Mon Sep 17 00:00:00 2001 From: Matthew Pendrey Date: Tue, 21 May 2024 13:16:37 +0100 Subject: [PATCH 05/43] move to using deterministically unique message ids --- core/capabilities/remote/target_caller.go | 94 +++++++++++++-------- core/capabilities/remote/target_receiver.go | 4 +- 2 files changed, 60 insertions(+), 38 deletions(-) diff --git a/core/capabilities/remote/target_caller.go b/core/capabilities/remote/target_caller.go index 3d2ba7b64b6..146b2689ae9 100644 --- a/core/capabilities/remote/target_caller.go +++ b/core/capabilities/remote/target_caller.go @@ -2,13 +2,12 @@ package remote import ( "context" + "crypto/sha256" "errors" "fmt" "sync" "time" - "github.com/google/uuid" - "github.com/smartcontractkit/chainlink-common/pkg/capabilities" commoncap "github.com/smartcontractkit/chainlink-common/pkg/capabilities" "github.com/smartcontractkit/chainlink-common/pkg/capabilities/pb" @@ -59,23 +58,15 @@ func (c *remoteTargetCaller) UnregisterFromWorkflow(ctx context.Context, request func (c *remoteTargetCaller) Execute(parentCtx context.Context, req commoncap.CapabilityRequest) (<-chan commoncap.CapabilityResponse, error) { - // TODO should the transmission config be passed into the constructor rather than pulled from the request? - tc, err := transmission.ExtractTransmissionConfig(req.Config) - if err != nil { - return nil, fmt.Errorf("failed to extract transmission config from request config: %w", err) - } + // TODO Assuming here that the capability request is deterministically unique across the nodes, need to confirm this is reasonable assumption + // TODO also check pb marshalliing is by default deterministic in the version being used rawRequest, err := pb.MarshalCapabilityRequest(req) if err != nil { return nil, fmt.Errorf("failed to marshal capability request: %w", err) } - peerIDToDelay, err := transmission.GetPeerIDToTransmissionDelay(c.remoteCapabilityInfo.DON.Members, c.localDONInfo.Config.SharedSecret, req.Metadata.WorkflowID, req.Metadata.WorkflowExecutionID, tc) - if err != nil { - return nil, fmt.Errorf("failed to get peer ID to transmission delay: %w", err) - } - - messageID := uuid.New().String() + messageID := sha256.Sum256(rawRequest) responseWaitGroup := &sync.WaitGroup{} responseWaitGroup.Add(1) @@ -87,31 +78,12 @@ func (c *remoteTargetCaller) Execute(parentCtx context.Context, req commoncap.Ca close(responseReceived) }() - // Once a response is returned from a remote capability any pending scheduled calls can be cancelled + // Once a response is received from a remote capability further transmission should be cancelled ctx, cancelFn := context.WithCancel(parentCtx) defer cancelFn() - for peerID, delay := range peerIDToDelay { - go func(peerID ragep2ptypes.PeerID, delay time.Duration) { - select { - case <-ctx.Done(): - return - case <-time.After(delay): - c.lggr.Debugw("executing delayed execution for peer", "peerID", peerID) - m := &types.MessageBody{ - CapabilityId: c.remoteCapabilityInfo.ID, - CapabilityDonId: c.remoteCapabilityInfo.DON.ID, - CallerDonId: c.localDONInfo.ID, - Method: types.MethodExecute, - Payload: rawRequest, - MessageId: []byte(messageID), - } - err = c.dispatcher.Send(peerID, m) - if err != nil { - c.lggr.Errorw("failed to send message", "peerID", peerID, "err", err) - } - } - }(peerID, delay) + if err := c.transmitRequestWithMessageID(ctx, req, messageID); err != nil { + return nil, fmt.Errorf("failed to transmit request: %w", err) } select { @@ -143,13 +115,61 @@ func (c *remoteTargetCaller) Execute(parentCtx context.Context, req commoncap.Ca } +// transmitRequestWithMessageID transmits a capability request to remote capabilities according to the transmission configuration +func (c *remoteTargetCaller) transmitRequestWithMessageID(ctx context.Context, req commoncap.CapabilityRequest, messageID [32]byte) error { + rawRequest, err := pb.MarshalCapabilityRequest(req) + if err != nil { + return fmt.Errorf("failed to marshal capability request: %w", err) + } + + // TODO should the transmission config be passed into the constructor rather than pulled from the request? + tc, err := transmission.ExtractTransmissionConfig(req.Config) + if err != nil { + return fmt.Errorf("failed to extract transmission config from request config: %w", err) + } + + message := &types.MessageBody{ + CapabilityId: c.remoteCapabilityInfo.ID, + CapabilityDonId: c.remoteCapabilityInfo.DON.ID, + CallerDonId: c.localDONInfo.ID, + Method: types.MethodExecute, + Payload: rawRequest, + MessageId: messageID[:], + } + + peerIDToDelay, err := transmission.GetPeerIDToTransmissionDelay(c.remoteCapabilityInfo.DON.Members, c.localDONInfo.Config.SharedSecret, req.Metadata.WorkflowID, req.Metadata.WorkflowExecutionID, tc) + if err != nil { + return fmt.Errorf("failed to get peer ID to transmission delay: %w", err) + } + + for peerID, delay := range peerIDToDelay { + go func(peerID ragep2ptypes.PeerID, delay time.Duration) { + select { + case <-ctx.Done(): + return + case <-time.After(delay): + c.lggr.Debugw("executing delayed execution for peer", "peerID", peerID) + err = c.dispatcher.Send(peerID, message) + if err != nil { + c.lggr.Errorw("failed to send message", "peerID", peerID, "err", err) + } + } + }(peerID, delay) + } + + return nil +} + func (c *remoteTargetCaller) Receive(msg *types.MessageBody) { // TODO handle the case where the capability returns a stream of responses - wg, loaded := c.messageIDToWaitgroup.LoadAndDelete(string(msg.MessageId)) + var messageId [32]byte + copy(messageId[:], msg.MessageId) + + wg, loaded := c.messageIDToWaitgroup.LoadAndDelete(messageId) if loaded { wg.(*sync.WaitGroup).Done() - c.messageIDToResponse.Store(string(msg.MessageId), msg.Payload) + c.messageIDToResponse.Store(messageId, msg.Payload) return } } diff --git a/core/capabilities/remote/target_receiver.go b/core/capabilities/remote/target_receiver.go index e8c0776b8ea..d71ca2f2b65 100644 --- a/core/capabilities/remote/target_receiver.go +++ b/core/capabilities/remote/target_receiver.go @@ -13,6 +13,7 @@ import ( ) type remoteTargetReceiver struct { + underlying commoncap.TargetCapability capInfo commoncap.CapabilityInfo donInfo *capabilities.DON dispatcher types.Dispatcher @@ -21,8 +22,9 @@ type remoteTargetReceiver struct { var _ types.Receiver = &remoteTargetReceiver{} -func NewRemoteTargetReceiver(capInfo commoncap.CapabilityInfo, donInfo *capabilities.DON, dispatcher types.Dispatcher, lggr logger.Logger) *remoteTargetReceiver { +func NewRemoteTargetReceiver(underlying commoncap.TargetCapability, capInfo commoncap.CapabilityInfo, donInfo *capabilities.DON, dispatcher types.Dispatcher, lggr logger.Logger) *remoteTargetReceiver { return &remoteTargetReceiver{ + underlying: underlying, capInfo: capInfo, donInfo: donInfo, dispatcher: dispatcher, From 13a861d26f659246dbc1c106a93f7cb00a60a8a3 Mon Sep 17 00:00:00 2001 From: Matthew Pendrey Date: Tue, 21 May 2024 16:22:48 +0100 Subject: [PATCH 06/43] wip --- core/capabilities/remote/target_receiver.go | 120 +++++++++++++++++-- core/capabilities/remote/types/message.proto | 1 + 2 files changed, 108 insertions(+), 13 deletions(-) diff --git a/core/capabilities/remote/target_receiver.go b/core/capabilities/remote/target_receiver.go index d71ca2f2b65..dea02054c8b 100644 --- a/core/capabilities/remote/target_receiver.go +++ b/core/capabilities/remote/target_receiver.go @@ -5,33 +5,127 @@ package remote // the chain write is waiting for f + 1 reports to be collected before it will execute the transmission import ( + "time" + commoncap "github.com/smartcontractkit/chainlink-common/pkg/capabilities" + "github.com/smartcontractkit/chainlink-common/pkg/capabilities/pb" "github.com/smartcontractkit/chainlink/v2/core/capabilities/remote/types" + p2ptypes "github.com/smartcontractkit/chainlink/v2/core/services/p2p/types" "github.com/smartcontractkit/chainlink-common/pkg/capabilities" "github.com/smartcontractkit/chainlink/v2/core/logger" ) type remoteTargetReceiver struct { - underlying commoncap.TargetCapability - capInfo commoncap.CapabilityInfo - donInfo *capabilities.DON - dispatcher types.Dispatcher - lggr logger.Logger + underlying commoncap.TargetCapability + capInfo commoncap.CapabilityInfo + localDonInfo *capabilities.DON + workflowDONs map[string]commoncap.DON + dispatcher types.Dispatcher + lggr logger.Logger + + executeRequests map[[32]byte]requestCache } var _ types.Receiver = &remoteTargetReceiver{} -func NewRemoteTargetReceiver(underlying commoncap.TargetCapability, capInfo commoncap.CapabilityInfo, donInfo *capabilities.DON, dispatcher types.Dispatcher, lggr logger.Logger) *remoteTargetReceiver { +func NewRemoteTargetReceiver(underlying commoncap.TargetCapability, capInfo commoncap.CapabilityInfo, localDonInfo *capabilities.DON, + workflowDONs map[string]commoncap.DON, dispatcher types.Dispatcher, lggr logger.Logger) *remoteTargetReceiver { return &remoteTargetReceiver{ - underlying: underlying, - capInfo: capInfo, - donInfo: donInfo, - dispatcher: dispatcher, - lggr: lggr, + underlying: underlying, + capInfo: capInfo, + localDonInfo: localDonInfo, + workflowDONs: workflowDONs, + dispatcher: dispatcher, + + executeRequests: map[[32]byte]requestCache{}, + + lggr: lggr, } } -func (c *remoteTargetReceiver) Receive(msg *types.MessageBody) { - c.lggr.Debugw("not implemented - received message", "capabilityId", c.capInfo.ID, "payload", msg.Payload) +type requestCache struct { + fromPeers map[p2ptypes.PeerID]bool + response *types.MessageBody + callingDonID string + firstRequestTime time.Time +} + +func (r *remoteTargetReceiver) Receive(msg *types.MessageBody) { + if msg.Method != types.MethodExecute { + r.lggr.Errorw("received request for unsupported method type", "method", msg.Method) + return + } + + callerDon, ok := r.workflowDONs[msg.CallerDonId] + if !ok { + r.lggr.Errorw("received a message from unsupported workflow DON", "capabilityId", r.capInfo.ID, "callerDonId", msg.CallerDonId) + return + } + + sender := ToPeerID(msg.Sender) + + var messageId [32]byte + copy(messageId[:], msg.MessageId) + + rc, ok := r.executeRequests[messageId] + if !ok { + rc = requestCache{ + fromPeers: map[p2ptypes.PeerID]bool{}, + callingDonID: msg.CallerDonId, + firstRequestTime: time.Now(), + } + r.executeRequests[messageId] = rc + } + + if rc.callingDonID != msg.CallerDonId { + r.lggr.Warnw("received duplicate execute request from different don, ignoring", "peer", sender) + return + } + + if rc.fromPeers[sender] { + r.lggr.Warnw("received duplicate execute request from peer, ignoring", "peer", sender) + return + } + + rc.fromPeers[sender] = true + minRequiredRequests := int(callerDon.F + 1) + if len(rc.fromPeers) >= minRequiredRequests { + if rc.response == nil { + + + + responseMsg := &types.MessageBody{ + CapabilityId: r.capInfo.ID, + CapabilityDonId: r.localDonInfo.ID, + CallerDonId: msg.CallerDonId, + Method: types.MethodExecute, + } + + capabilityRequest, err := pb.UnmarshalCapabilityRequest(msg.Payload) + if err == nil { + + + r.lggr.Errorw("failed to unmarshal capability request", "err", err) + return + } else { + responseMsg.Error = types.Error_CAPABILITY_NOT_FOUND + + } + } + + + r.underlying.Execute(msg.Payload, func(response []byte) { + + + + r.lggr.Warnw("received enough execute requests, but no response was provided") + return + } else { + if err := r.dispatcher.Send(sender, rc.response); err != nil { + r.lggr.Errorw("failed to send response", "peer", sender, "err", err) + } + } + } + } diff --git a/core/capabilities/remote/types/message.proto b/core/capabilities/remote/types/message.proto index 072accedbc0..05e6cff512e 100644 --- a/core/capabilities/remote/types/message.proto +++ b/core/capabilities/remote/types/message.proto @@ -8,6 +8,7 @@ enum Error { OK = 0; VALIDATION_FAILED = 1; CAPABILITY_NOT_FOUND = 2; + INVALID_REQUEST = 3; } message Message { From 43eb46bfaa7635ac332a7be50adddc982d6f7274 Mon Sep 17 00:00:00 2001 From: Matthew Pendrey Date: Tue, 21 May 2024 17:30:41 +0100 Subject: [PATCH 07/43] error handling --- core/capabilities/remote/target_caller.go | 11 +++- .../capabilities/remote/target_caller_test.go | 65 +++++++++++++++++++ core/capabilities/remote/target_receiver.go | 64 ++++++++++++------ .../evm/mercury/wsrpc/pb/mercury_wsrpc.pb.go | 1 - 4 files changed, 116 insertions(+), 25 deletions(-) diff --git a/core/capabilities/remote/target_caller.go b/core/capabilities/remote/target_caller.go index 146b2689ae9..41a4a4e275d 100644 --- a/core/capabilities/remote/target_caller.go +++ b/core/capabilities/remote/target_caller.go @@ -88,17 +88,22 @@ func (c *remoteTargetCaller) Execute(parentCtx context.Context, req commoncap.Ca select { case <-responseReceived: + response, loaded := c.messageIDToResponse.LoadAndDelete(messageID) if !loaded { return nil, fmt.Errorf("no response found for message ID %s", messageID) } - payload, ok := response.([]byte) + msg, ok := response.(*types.MessageBody) if !ok { return nil, fmt.Errorf("unexpected response type %T for message ID %s", response, messageID) } - capabilityResponse, err := pb.UnmarshalCapabilityResponse(payload) + if msg.Error != types.Error_OK { + return nil, fmt.Errorf("remote capability returned error: %s", msg.Error) + } + + capabilityResponse, err := pb.UnmarshalCapabilityResponse(msg.Payload) if err != nil { return nil, fmt.Errorf("failed to unmarshal capability response: %w", err) } @@ -169,7 +174,7 @@ func (c *remoteTargetCaller) Receive(msg *types.MessageBody) { wg, loaded := c.messageIDToWaitgroup.LoadAndDelete(messageId) if loaded { wg.(*sync.WaitGroup).Done() - c.messageIDToResponse.Store(messageId, msg.Payload) + c.messageIDToResponse.Store(messageId, msg) return } } diff --git a/core/capabilities/remote/target_caller_test.go b/core/capabilities/remote/target_caller_test.go index 5325a36276b..9a652429c7e 100644 --- a/core/capabilities/remote/target_caller_test.go +++ b/core/capabilities/remote/target_caller_test.go @@ -153,6 +153,71 @@ func Test_TargetCallerExecute(t *testing.T) { } +func Test_TargetCallerExecuteWithError(t *testing.T) { + + lggr := logger.TestLogger(t) + ctx := testutils.Context(t) + + p1 := p2ptypes.PeerID{} + require.NoError(t, p1.UnmarshalText([]byte(PeerID1))) + p2 := p2ptypes.PeerID{} + require.NoError(t, p2.UnmarshalText([]byte(PeerID2))) + capDonInfo := &commoncap.DON{ + ID: "capability-don", + Members: []p2ptypes.PeerID{p1}, + F: 0, + } + + capInfo := commoncap.CapabilityInfo{ + ID: "cap_id", + CapabilityType: commoncap.CapabilityTypeTarget, + Description: "Remote Target", + Version: "0.0.1", + DON: capDonInfo, + } + + workflowDonInfo := commoncap.DON{ + ID: "workflow-don", + Members: []p2ptypes.PeerID{p2}, + F: 0, + } + + dispatcher := NewTestDispatcher() + + caller, err := remote.NewRemoteTargetCaller(lggr, capInfo, workflowDonInfo, dispatcher) + require.NoError(t, err) + + err = dispatcher.SetReceiver("cap_id", "workflow-don", caller) + require.NoError(t, err) + + go func() { + sentMessage := <-dispatcher.sentMessagesCh + + require.NoError(t, err) + executeResponse := &remotetypes.MessageBody{ + Sender: p1[:], + Method: remotetypes.MethodExecute, + MessageId: sentMessage.MessageId, + Error: remotetypes.Error_CAPABILITY_NOT_FOUND, + } + + dispatcher.SendToReceiver(executeResponse) + }() + + transmissionSchedule, err := values.NewMap(map[string]any{ + "schedule": transmission.Schedule_AllAtOnce, + "deltaStage": "100ms", + }) + require.NoError(t, err) + + _, err = caller.Execute(ctx, + commoncap.CapabilityRequest{ + Config: transmissionSchedule, + }) + + require.NotNil(t, err) +} + type TestDispatcher struct { sentMessagesCh chan *remotetypes.MessageBody receiver remotetypes.Receiver diff --git a/core/capabilities/remote/target_receiver.go b/core/capabilities/remote/target_receiver.go index dea02054c8b..b24b491b533 100644 --- a/core/capabilities/remote/target_receiver.go +++ b/core/capabilities/remote/target_receiver.go @@ -5,6 +5,7 @@ package remote // the chain write is waiting for f + 1 reports to be collected before it will execute the transmission import ( + "context" "time" commoncap "github.com/smartcontractkit/chainlink-common/pkg/capabilities" @@ -29,8 +30,22 @@ type remoteTargetReceiver struct { var _ types.Receiver = &remoteTargetReceiver{} -func NewRemoteTargetReceiver(underlying commoncap.TargetCapability, capInfo commoncap.CapabilityInfo, localDonInfo *capabilities.DON, +func NewRemoteTargetReceiver(ctx context.Context, underlying commoncap.TargetCapability, capInfo commoncap.CapabilityInfo, localDonInfo *capabilities.DON, workflowDONs map[string]commoncap.DON, dispatcher types.Dispatcher, lggr logger.Logger) *remoteTargetReceiver { + + go func() { + timer := time.NewTimer(60 * time.Second) + defer timer.Stop() + for { + select { + case <-ctx.Done(): + return + case <-timer.C: + // TODO Implement timeout handling and cleanup logic on the request cache + } + } + }() + return &remoteTargetReceiver{ underlying: underlying, capInfo: capInfo, @@ -45,13 +60,16 @@ func NewRemoteTargetReceiver(underlying commoncap.TargetCapability, capInfo comm } type requestCache struct { - fromPeers map[p2ptypes.PeerID]bool - response *types.MessageBody - callingDonID string + fromPeers map[p2ptypes.PeerID]bool + response *types.MessageBody + callingDonID string firstRequestTime time.Time } func (r *remoteTargetReceiver) Receive(msg *types.MessageBody) { + // TODO should the dispatcher be passing in a context? + ctx := context.Background() + if msg.Method != types.MethodExecute { r.lggr.Errorw("received request for unsupported method type", "method", msg.Method) return @@ -71,8 +89,8 @@ func (r *remoteTargetReceiver) Receive(msg *types.MessageBody) { rc, ok := r.executeRequests[messageId] if !ok { rc = requestCache{ - fromPeers: map[p2ptypes.PeerID]bool{}, - callingDonID: msg.CallerDonId, + fromPeers: map[p2ptypes.PeerID]bool{}, + callingDonID: msg.CallerDonId, firstRequestTime: time.Now(), } r.executeRequests[messageId] = rc @@ -93,8 +111,6 @@ func (r *remoteTargetReceiver) Receive(msg *types.MessageBody) { if len(rc.fromPeers) >= minRequiredRequests { if rc.response == nil { - - responseMsg := &types.MessageBody{ CapabilityId: r.capInfo.ID, CapabilityDonId: r.localDonInfo.ID, @@ -104,23 +120,29 @@ func (r *remoteTargetReceiver) Receive(msg *types.MessageBody) { capabilityRequest, err := pb.UnmarshalCapabilityRequest(msg.Payload) if err == nil { - - - r.lggr.Errorw("failed to unmarshal capability request", "err", err) - return + responseCh, err := r.underlying.Execute(ctx, capabilityRequest) + if err == nil { + // TODO handle the case where the capability returns a stream of responses + response := <-responseCh + responseMsg.Payload, err = pb.MarshalCapabilityResponse(response) + } else { + r.lggr.Errorw("failed to execute capability", "capabilityId", r.capInfo.ID, "err", err) + // TODO set correct error code + responseMsg.Error = types.Error_CAPABILITY_NOT_FOUND + } } else { + r.lggr.Errorw("failed to unmarshal capability request", "capabilityId", r.capInfo.ID, "err", err) + // TODO set correct error code responseMsg.Error = types.Error_CAPABILITY_NOT_FOUND + } + + rc.response = responseMsg + for peerID := range rc.fromPeers { + if err = r.dispatcher.Send(peerID, responseMsg); err != nil { + r.lggr.Errorw("failed to send response", "peer", peerID, "err", err) + } } - } - - - r.underlying.Execute(msg.Payload, func(response []byte) { - - - - r.lggr.Warnw("received enough execute requests, but no response was provided") - return } else { if err := r.dispatcher.Send(sender, rc.response); err != nil { r.lggr.Errorw("failed to send response", "peer", sender, "err", err) diff --git a/core/services/relay/evm/mercury/wsrpc/pb/mercury_wsrpc.pb.go b/core/services/relay/evm/mercury/wsrpc/pb/mercury_wsrpc.pb.go index 0c31a1d7ac9..4d05db4380f 100644 --- a/core/services/relay/evm/mercury/wsrpc/pb/mercury_wsrpc.pb.go +++ b/core/services/relay/evm/mercury/wsrpc/pb/mercury_wsrpc.pb.go @@ -11,7 +11,6 @@ import ( ) // MercuryClient is the client API for Mercury service. -// type MercuryClient interface { Transmit(ctx context.Context, in *TransmitRequest) (*TransmitResponse, error) LatestReport(ctx context.Context, in *LatestReportRequest) (*LatestReportResponse, error) From 04b820b929929da3691869a8b9e03157db18bea7 Mon Sep 17 00:00:00 2001 From: Matthew Pendrey Date: Tue, 21 May 2024 17:45:25 +0100 Subject: [PATCH 08/43] wip --- core/capabilities/remote/target_caller.go | 7 ++- core/capabilities/remote/target_receiver.go | 51 ++++++++++++------- .../remote/target_receiver_test.go | 17 +++++++ 3 files changed, 52 insertions(+), 23 deletions(-) create mode 100644 core/capabilities/remote/target_receiver_test.go diff --git a/core/capabilities/remote/target_caller.go b/core/capabilities/remote/target_caller.go index 41a4a4e275d..f55b79e444d 100644 --- a/core/capabilities/remote/target_caller.go +++ b/core/capabilities/remote/target_caller.go @@ -168,13 +168,12 @@ func (c *remoteTargetCaller) transmitRequestWithMessageID(ctx context.Context, r func (c *remoteTargetCaller) Receive(msg *types.MessageBody) { // TODO handle the case where the capability returns a stream of responses - var messageId [32]byte - copy(messageId[:], msg.MessageId) + messageID := getMessageID(msg) - wg, loaded := c.messageIDToWaitgroup.LoadAndDelete(messageId) + wg, loaded := c.messageIDToWaitgroup.LoadAndDelete(messageID) if loaded { wg.(*sync.WaitGroup).Done() - c.messageIDToResponse.Store(messageId, msg) + c.messageIDToResponse.Store(messageID, msg) return } } diff --git a/core/capabilities/remote/target_receiver.go b/core/capabilities/remote/target_receiver.go index b24b491b533..94af851afc4 100644 --- a/core/capabilities/remote/target_receiver.go +++ b/core/capabilities/remote/target_receiver.go @@ -6,6 +6,7 @@ package remote import ( "context" + "sync" "time" commoncap "github.com/smartcontractkit/chainlink-common/pkg/capabilities" @@ -25,23 +26,25 @@ type remoteTargetReceiver struct { dispatcher types.Dispatcher lggr logger.Logger - executeRequests map[[32]byte]requestCache + msgIDToExecuteRequest map[[32]byte]executeRequest + + receiveLock sync.Mutex } var _ types.Receiver = &remoteTargetReceiver{} func NewRemoteTargetReceiver(ctx context.Context, underlying commoncap.TargetCapability, capInfo commoncap.CapabilityInfo, localDonInfo *capabilities.DON, - workflowDONs map[string]commoncap.DON, dispatcher types.Dispatcher, lggr logger.Logger) *remoteTargetReceiver { + workflowDONs map[string]commoncap.DON, dispatcher types.Dispatcher, requestTimeout time.Duration, lggr logger.Logger) *remoteTargetReceiver { go func() { - timer := time.NewTimer(60 * time.Second) + timer := time.NewTimer(requestTimeout) defer timer.Stop() for { select { case <-ctx.Done(): return case <-timer.C: - // TODO Implement timeout handling and cleanup logic on the request cache + // TODO Implement timeout handling and cleanup logic of the execute requests cache } } }() @@ -53,13 +56,13 @@ func NewRemoteTargetReceiver(ctx context.Context, underlying commoncap.TargetCap workflowDONs: workflowDONs, dispatcher: dispatcher, - executeRequests: map[[32]byte]requestCache{}, + msgIDToExecuteRequest: map[[32]byte]executeRequest{}, lggr: lggr, } } -type requestCache struct { +type executeRequest struct { fromPeers map[p2ptypes.PeerID]bool response *types.MessageBody callingDonID string @@ -70,6 +73,11 @@ func (r *remoteTargetReceiver) Receive(msg *types.MessageBody) { // TODO should the dispatcher be passing in a context? ctx := context.Background() + // TODO Confirm threading semantices of dispatcher receive + // TODO May want to have executor per message id to improve liveness + r.receiveLock.Lock() + defer r.receiveLock.Unlock() + if msg.Method != types.MethodExecute { r.lggr.Errorw("received request for unsupported method type", "method", msg.Method) return @@ -83,33 +91,32 @@ func (r *remoteTargetReceiver) Receive(msg *types.MessageBody) { sender := ToPeerID(msg.Sender) - var messageId [32]byte - copy(messageId[:], msg.MessageId) + messageId := getMessageID(msg) - rc, ok := r.executeRequests[messageId] + executeReq, ok := r.msgIDToExecuteRequest[messageId] if !ok { - rc = requestCache{ + executeReq = executeRequest{ fromPeers: map[p2ptypes.PeerID]bool{}, callingDonID: msg.CallerDonId, firstRequestTime: time.Now(), } - r.executeRequests[messageId] = rc + r.msgIDToExecuteRequest[messageId] = executeReq } - if rc.callingDonID != msg.CallerDonId { + if executeReq.callingDonID != msg.CallerDonId { r.lggr.Warnw("received duplicate execute request from different don, ignoring", "peer", sender) return } - if rc.fromPeers[sender] { + if executeReq.fromPeers[sender] { r.lggr.Warnw("received duplicate execute request from peer, ignoring", "peer", sender) return } - rc.fromPeers[sender] = true + executeReq.fromPeers[sender] = true minRequiredRequests := int(callerDon.F + 1) - if len(rc.fromPeers) >= minRequiredRequests { - if rc.response == nil { + if len(executeReq.fromPeers) >= minRequiredRequests { + if executeReq.response == nil { responseMsg := &types.MessageBody{ CapabilityId: r.capInfo.ID, @@ -136,18 +143,24 @@ func (r *remoteTargetReceiver) Receive(msg *types.MessageBody) { responseMsg.Error = types.Error_CAPABILITY_NOT_FOUND } - rc.response = responseMsg + executeReq.response = responseMsg - for peerID := range rc.fromPeers { + for peerID := range executeReq.fromPeers { if err = r.dispatcher.Send(peerID, responseMsg); err != nil { r.lggr.Errorw("failed to send response", "peer", peerID, "err", err) } } } else { - if err := r.dispatcher.Send(sender, rc.response); err != nil { + if err := r.dispatcher.Send(sender, executeReq.response); err != nil { r.lggr.Errorw("failed to send response", "peer", sender, "err", err) } } } } + +func getMessageID(msg *types.MessageBody) [32]byte { + var messageId [32]byte + copy(messageId[:], msg.MessageId) + return messageId +} diff --git a/core/capabilities/remote/target_receiver_test.go b/core/capabilities/remote/target_receiver_test.go new file mode 100644 index 00000000000..5ffdf5fb4d7 --- /dev/null +++ b/core/capabilities/remote/target_receiver_test.go @@ -0,0 +1,17 @@ +package remote + +import "testing" + +func Test_TargetReceiverConsensus(t *testing.T) { + + // Confirm that the target receiver return a response only when sufficient requests have been received + + // Also confirm that any request received after the first response is replied to + + // Check request times out if insufficient requests are received in a timely manner + + // Check request errors as expected and all error responses are received + + // Check that requests from an incorrect don are ignored? + +} From bd79c12d0c460fd38552ccfb7bfdb7a9f7f24fe8 Mon Sep 17 00:00:00 2001 From: Matthew Pendrey Date: Tue, 21 May 2024 17:47:54 +0100 Subject: [PATCH 09/43] wip --- core/capabilities/remote/target_receiver.go | 2 +- core/capabilities/remote/target_receiver_test.go | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/core/capabilities/remote/target_receiver.go b/core/capabilities/remote/target_receiver.go index 94af851afc4..128a2f83f8d 100644 --- a/core/capabilities/remote/target_receiver.go +++ b/core/capabilities/remote/target_receiver.go @@ -73,7 +73,7 @@ func (r *remoteTargetReceiver) Receive(msg *types.MessageBody) { // TODO should the dispatcher be passing in a context? ctx := context.Background() - // TODO Confirm threading semantices of dispatcher receive + // TODO Confirm threading semantics of dispatcher receive // TODO May want to have executor per message id to improve liveness r.receiveLock.Lock() defer r.receiveLock.Unlock() diff --git a/core/capabilities/remote/target_receiver_test.go b/core/capabilities/remote/target_receiver_test.go index 5ffdf5fb4d7..c302d4aff63 100644 --- a/core/capabilities/remote/target_receiver_test.go +++ b/core/capabilities/remote/target_receiver_test.go @@ -13,5 +13,7 @@ func Test_TargetReceiverConsensus(t *testing.T) { // Check request errors as expected and all error responses are received // Check that requests from an incorrect don are ignored? + + // Check that multiple requests from the same sender are ignored } From e80bea6c7165e252674772d74941d2e269e9fffa Mon Sep 17 00:00:00 2001 From: Matthew Pendrey Date: Wed, 22 May 2024 13:34:57 +0100 Subject: [PATCH 10/43] remote target receiver base tests --- core/capabilities/remote/target_caller.go | 32 ++- .../capabilities/remote/target_caller_test.go | 15 +- core/capabilities/remote/target_receiver.go | 5 +- .../remote/target_receiver_test.go | 213 +++++++++++++++++- 4 files changed, 228 insertions(+), 37 deletions(-) diff --git a/core/capabilities/remote/target_caller.go b/core/capabilities/remote/target_caller.go index f55b79e444d..483b14df758 100644 --- a/core/capabilities/remote/target_caller.go +++ b/core/capabilities/remote/target_caller.go @@ -19,28 +19,26 @@ import ( // remoteTargetCaller/Receiver are shims translating between capability API calls and network messages type remoteTargetCaller struct { - remoteCapabilityInfo commoncap.CapabilityInfo - localDONInfo capabilities.DON - dispatcher types.Dispatcher - lggr logger.Logger - messageIDToWaitgroup sync.Map - messageIDToResponse sync.Map + remoteCapabilityInfo commoncap.CapabilityInfo + remoteCapabilityDonInfo capabilities.DON + localDONInfo capabilities.DON + dispatcher types.Dispatcher + lggr logger.Logger + messageIDToWaitgroup sync.Map + messageIDToResponse sync.Map } var _ commoncap.TargetCapability = &remoteTargetCaller{} var _ types.Receiver = &remoteTargetCaller{} -func NewRemoteTargetCaller(lggr logger.Logger, remoteCapabilityInfo commoncap.CapabilityInfo, localDonInfo capabilities.DON, dispatcher types.Dispatcher) (*remoteTargetCaller, error) { - - if remoteCapabilityInfo.DON == nil { - return nil, errors.New("missing remote capability DON info") - } +func NewRemoteTargetCaller(lggr logger.Logger, remoteCapabilityInfo commoncap.CapabilityInfo, remoteCapabilityDonInfo capabilities.DON, localDonInfo capabilities.DON, dispatcher types.Dispatcher) (*remoteTargetCaller, error) { return &remoteTargetCaller{ - remoteCapabilityInfo: remoteCapabilityInfo, - localDONInfo: localDonInfo, - dispatcher: dispatcher, - lggr: lggr, + remoteCapabilityInfo: remoteCapabilityInfo, + remoteCapabilityDonInfo: remoteCapabilityDonInfo, + localDONInfo: localDonInfo, + dispatcher: dispatcher, + lggr: lggr, }, nil } @@ -135,14 +133,14 @@ func (c *remoteTargetCaller) transmitRequestWithMessageID(ctx context.Context, r message := &types.MessageBody{ CapabilityId: c.remoteCapabilityInfo.ID, - CapabilityDonId: c.remoteCapabilityInfo.DON.ID, + CapabilityDonId: c.remoteCapabilityDonInfo.ID, CallerDonId: c.localDONInfo.ID, Method: types.MethodExecute, Payload: rawRequest, MessageId: messageID[:], } - peerIDToDelay, err := transmission.GetPeerIDToTransmissionDelay(c.remoteCapabilityInfo.DON.Members, c.localDONInfo.Config.SharedSecret, req.Metadata.WorkflowID, req.Metadata.WorkflowExecutionID, tc) + peerIDToDelay, err := transmission.GetPeerIDToTransmissionDelay(c.remoteCapabilityDonInfo.Members, c.localDONInfo.Config.SharedSecret, req.Metadata.WorkflowID, req.Metadata.WorkflowExecutionID, tc) if err != nil { return fmt.Errorf("failed to get peer ID to transmission delay: %w", err) } diff --git a/core/capabilities/remote/target_caller_test.go b/core/capabilities/remote/target_caller_test.go index 9a652429c7e..42437b79a63 100644 --- a/core/capabilities/remote/target_caller_test.go +++ b/core/capabilities/remote/target_caller_test.go @@ -31,7 +31,7 @@ func Test_TargetCallerExecuteContextTimeout(t *testing.T) { require.NoError(t, p1.UnmarshalText([]byte(PeerID1))) p2 := p2ptypes.PeerID{} require.NoError(t, p2.UnmarshalText([]byte(PeerID2))) - capDonInfo := &commoncap.DON{ + capDonInfo := commoncap.DON{ ID: "capability-don", Members: []p2ptypes.PeerID{p1}, F: 0, @@ -42,7 +42,6 @@ func Test_TargetCallerExecuteContextTimeout(t *testing.T) { CapabilityType: commoncap.CapabilityTypeTarget, Description: "Remote Target", Version: "0.0.1", - DON: capDonInfo, } workflowDonInfo := commoncap.DON{ @@ -53,7 +52,7 @@ func Test_TargetCallerExecuteContextTimeout(t *testing.T) { dispatcher := NewTestDispatcher() - caller, err := remote.NewRemoteTargetCaller(lggr, capInfo, workflowDonInfo, dispatcher) + caller, err := remote.NewRemoteTargetCaller(lggr, capInfo, capDonInfo, workflowDonInfo, dispatcher) require.NoError(t, err) err = dispatcher.SetReceiver("cap_id", "workflow-don", caller) @@ -84,7 +83,7 @@ func Test_TargetCallerExecute(t *testing.T) { require.NoError(t, p1.UnmarshalText([]byte(PeerID1))) p2 := p2ptypes.PeerID{} require.NoError(t, p2.UnmarshalText([]byte(PeerID2))) - capDonInfo := &commoncap.DON{ + capDonInfo := commoncap.DON{ ID: "capability-don", Members: []p2ptypes.PeerID{p1}, F: 0, @@ -95,7 +94,6 @@ func Test_TargetCallerExecute(t *testing.T) { CapabilityType: commoncap.CapabilityTypeTarget, Description: "Remote Target", Version: "0.0.1", - DON: capDonInfo, } workflowDonInfo := commoncap.DON{ @@ -106,7 +104,7 @@ func Test_TargetCallerExecute(t *testing.T) { dispatcher := NewTestDispatcher() - caller, err := remote.NewRemoteTargetCaller(lggr, capInfo, workflowDonInfo, dispatcher) + caller, err := remote.NewRemoteTargetCaller(lggr, capInfo, capDonInfo, workflowDonInfo, dispatcher) require.NoError(t, err) err = dispatcher.SetReceiver("cap_id", "workflow-don", caller) @@ -162,7 +160,7 @@ func Test_TargetCallerExecuteWithError(t *testing.T) { require.NoError(t, p1.UnmarshalText([]byte(PeerID1))) p2 := p2ptypes.PeerID{} require.NoError(t, p2.UnmarshalText([]byte(PeerID2))) - capDonInfo := &commoncap.DON{ + capDonInfo := commoncap.DON{ ID: "capability-don", Members: []p2ptypes.PeerID{p1}, F: 0, @@ -173,7 +171,6 @@ func Test_TargetCallerExecuteWithError(t *testing.T) { CapabilityType: commoncap.CapabilityTypeTarget, Description: "Remote Target", Version: "0.0.1", - DON: capDonInfo, } workflowDonInfo := commoncap.DON{ @@ -184,7 +181,7 @@ func Test_TargetCallerExecuteWithError(t *testing.T) { dispatcher := NewTestDispatcher() - caller, err := remote.NewRemoteTargetCaller(lggr, capInfo, workflowDonInfo, dispatcher) + caller, err := remote.NewRemoteTargetCaller(lggr, capInfo, capDonInfo, workflowDonInfo, dispatcher) require.NoError(t, err) err = dispatcher.SetReceiver("cap_id", "workflow-don", caller) diff --git a/core/capabilities/remote/target_receiver.go b/core/capabilities/remote/target_receiver.go index 128a2f83f8d..2f157fceacb 100644 --- a/core/capabilities/remote/target_receiver.go +++ b/core/capabilities/remote/target_receiver.go @@ -33,8 +33,8 @@ type remoteTargetReceiver struct { var _ types.Receiver = &remoteTargetReceiver{} -func NewRemoteTargetReceiver(ctx context.Context, underlying commoncap.TargetCapability, capInfo commoncap.CapabilityInfo, localDonInfo *capabilities.DON, - workflowDONs map[string]commoncap.DON, dispatcher types.Dispatcher, requestTimeout time.Duration, lggr logger.Logger) *remoteTargetReceiver { +func NewRemoteTargetReceiver(ctx context.Context, lggr logger.Logger, underlying commoncap.TargetCapability, capInfo commoncap.CapabilityInfo, localDonInfo *capabilities.DON, + workflowDONs map[string]commoncap.DON, dispatcher types.Dispatcher, requestTimeout time.Duration) *remoteTargetReceiver { go func() { timer := time.NewTimer(requestTimeout) @@ -123,6 +123,7 @@ func (r *remoteTargetReceiver) Receive(msg *types.MessageBody) { CapabilityDonId: r.localDonInfo.ID, CallerDonId: msg.CallerDonId, Method: types.MethodExecute, + MessageId: messageId[:], } capabilityRequest, err := pb.UnmarshalCapabilityRequest(msg.Payload) diff --git a/core/capabilities/remote/target_receiver_test.go b/core/capabilities/remote/target_receiver_test.go index c302d4aff63..a9191bcabf3 100644 --- a/core/capabilities/remote/target_receiver_test.go +++ b/core/capabilities/remote/target_receiver_test.go @@ -1,19 +1,214 @@ -package remote +package remote_test -import "testing" +import ( + "context" + "fmt" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + commoncap "github.com/smartcontractkit/chainlink-common/pkg/capabilities" + "github.com/smartcontractkit/chainlink-common/pkg/values" + "github.com/smartcontractkit/chainlink/v2/core/capabilities/remote" + remotetypes "github.com/smartcontractkit/chainlink/v2/core/capabilities/remote/types" + "github.com/smartcontractkit/chainlink/v2/core/capabilities/transmission" + "github.com/smartcontractkit/chainlink/v2/core/internal/testutils" + "github.com/smartcontractkit/chainlink/v2/core/logger" + p2ptypes "github.com/smartcontractkit/chainlink/v2/core/services/p2p/types" +) func Test_TargetReceiverConsensus(t *testing.T) { + lggr := logger.TestLogger(t) + ctx := testutils.Context(t) + capInfo := commoncap.CapabilityInfo{ + ID: "cap_id", + CapabilityType: commoncap.CapabilityTypeTarget, + Description: "Remote Target", + Version: "0.0.1", + } + capabilityPeerID := p2ptypes.PeerID{} + require.NoError(t, capabilityPeerID.UnmarshalText([]byte(PeerID1))) + workflowPeer1ID := p2ptypes.PeerID{} + require.NoError(t, workflowPeer1ID.UnmarshalText([]byte(PeerID2))) + + capDonInfo := commoncap.DON{ + ID: "capability-don", + Members: []p2ptypes.PeerID{capabilityPeerID}, + F: 0, + } + + workflowDonInfo := commoncap.DON{ + Members: []p2ptypes.PeerID{workflowPeer1ID}, + ID: "workflow-don", + F: 0, + } + + dispatcher := newTestTargetReceiverDispatcher(capabilityPeerID) + + workflowDONs := map[string]commoncap.DON{ + workflowDonInfo.ID: workflowDonInfo, + } + underlying := &testTargetReceiver{} + + receiver := remote.NewRemoteTargetReceiver(ctx, lggr, underlying, capInfo, &capDonInfo, workflowDONs, dispatcher, 100) + dispatcher.RegisterReceiver(receiver) + + workflowPeerDispatcher := dispatcher.GetDispatcherForCaller(workflowPeer1ID) + + caller, err := remote.NewRemoteTargetCaller(lggr, capInfo, capDonInfo, workflowDonInfo, workflowPeerDispatcher) + require.NoError(t, err) + dispatcher.RegisterCaller(workflowPeer1ID, caller) + + transmissionSchedule, err := values.NewMap(map[string]any{ + "schedule": transmission.Schedule_AllAtOnce, + "deltaStage": "100ms", + }) + require.NoError(t, err) + + executeInputs, err := values.NewMap( + map[string]any{ + "executeValue1": "aValue1", + }, + ) + + responseCh, err := caller.Execute(ctx, + commoncap.CapabilityRequest{ + Metadata: commoncap.RequestMetadata{}, + Config: transmissionSchedule, + Inputs: executeInputs, + }) + + require.NoError(t, err) + + response := <-responseCh + responseValue, err := response.Value.Unwrap() + require.NoError(t, err) + assert.Equal(t, "aValue1", responseValue.(string)) +} + +// Confirm that the target receiver return a response only when sufficient requests have been received + +// Also confirm that any request received after the first response is replied to + +// Check request times out if insufficient requests are received in a timely manner + +// Check request errors as expected and all error responses are received + +// Check that requests from an incorrect don are ignored? + +// Check that multiple requests from the same sender are ignored + +type testTargetReceiverDispatcher struct { + abstractDispatcher + receiver remotetypes.Receiver + callers map[p2ptypes.PeerID]remotetypes.Receiver + receiverPeerID p2ptypes.PeerID +} + +func newTestTargetReceiverDispatcher(receiverPeerID p2ptypes.PeerID) *testTargetReceiverDispatcher { + return &testTargetReceiverDispatcher{ + receiverPeerID: receiverPeerID, + callers: make(map[p2ptypes.PeerID]remotetypes.Receiver), + } +} + +func (r *testTargetReceiverDispatcher) RegisterReceiver(receiver remotetypes.Receiver) { + if r.receiver != nil { + panic("receiver already registered") + } + + r.receiver = receiver +} - // Confirm that the target receiver return a response only when sufficient requests have been received +func (r *testTargetReceiverDispatcher) GetDispatcherForCaller(callerPeerID p2ptypes.PeerID) remotetypes.Dispatcher { + dispatcher := &callerDispatcher{ + callerPeerID: callerPeerID, + broker: r, + } + return dispatcher +} + +func (r *testTargetReceiverDispatcher) RegisterCaller(callerPeerID p2ptypes.PeerID, caller remotetypes.Receiver) { + if _, ok := r.callers[callerPeerID]; ok { + panic("caller already registered") + } + + r.callers[callerPeerID] = caller +} + +func (r *testTargetReceiverDispatcher) SendToReceiver(peerID p2ptypes.PeerID, msg *remotetypes.MessageBody) { + if peerID != r.receiverPeerID { + panic("receiver peer id mismatch") + } + + msg.Receiver = r.receiverPeerID[:] + + r.receiver.Receive(msg) +} + +func (r *testTargetReceiverDispatcher) Send(callerPeerID p2ptypes.PeerID, msgBody *remotetypes.MessageBody) error { + + msgBody.Version = 1 + msgBody.Sender = r.receiverPeerID[:] + msgBody.Receiver = callerPeerID[:] + msgBody.Timestamp = time.Now().UnixMilli() + + if caller, ok := r.callers[callerPeerID]; ok { + caller.Receive(msgBody) + } else { + return fmt.Errorf("caller not found for caller peer id %s", callerPeerID.String()) + } + + return nil +} + +type callerDispatcher struct { + abstractDispatcher + callerPeerID p2ptypes.PeerID + broker *testTargetReceiverDispatcher +} + +func (t *callerDispatcher) Send(peerID p2ptypes.PeerID, msgBody *remotetypes.MessageBody) error { + msgBody.Version = 1 + msgBody.Sender = t.callerPeerID[:] + msgBody.Timestamp = time.Now().UnixMilli() + t.broker.SendToReceiver(peerID, msgBody) + return nil +} + +type abstractDispatcher struct { +} + +func (t *abstractDispatcher) SetReceiver(capabilityId string, donId string, receiver remotetypes.Receiver) error { + return nil +} +func (t *abstractDispatcher) RemoveReceiver(capabilityId string, donId string) {} - // Also confirm that any request received after the first response is replied to +type testTargetReceiver struct { +} + +func (t testTargetReceiver) Info(ctx context.Context) (commoncap.CapabilityInfo, error) { + return commoncap.CapabilityInfo{}, nil +} + +func (t testTargetReceiver) RegisterToWorkflow(ctx context.Context, request commoncap.RegisterToWorkflowRequest) error { + return nil +} + +func (t testTargetReceiver) UnregisterFromWorkflow(ctx context.Context, request commoncap.UnregisterFromWorkflowRequest) error { + return nil +} - // Check request times out if insufficient requests are received in a timely manner +func (t testTargetReceiver) Execute(ctx context.Context, request commoncap.CapabilityRequest) (<-chan commoncap.CapabilityResponse, error) { + ch := make(chan commoncap.CapabilityResponse, 1) - // Check request errors as expected and all error responses are received + value := request.Inputs.Underlying["executeValue1"] - // Check that requests from an incorrect don are ignored? - - // Check that multiple requests from the same sender are ignored + ch <- commoncap.CapabilityResponse{ + Value: value, + } + return ch, nil } From fbf15cf44501f17a97c274ec0479b357ae0c1acd Mon Sep 17 00:00:00 2001 From: Matthew Pendrey Date: Wed, 22 May 2024 14:56:25 +0100 Subject: [PATCH 11/43] wip --- core/capabilities/remote/target_caller.go | 12 +- .../remote/target_receiver_test.go | 236 ++++++++++++++++++ 2 files changed, 242 insertions(+), 6 deletions(-) diff --git a/core/capabilities/remote/target_caller.go b/core/capabilities/remote/target_caller.go index 483b14df758..acd2608143a 100644 --- a/core/capabilities/remote/target_caller.go +++ b/core/capabilities/remote/target_caller.go @@ -64,11 +64,11 @@ func (c *remoteTargetCaller) Execute(parentCtx context.Context, req commoncap.Ca return nil, fmt.Errorf("failed to marshal capability request: %w", err) } - messageID := sha256.Sum256(rawRequest) + deterministicMessageID := sha256.Sum256(rawRequest) responseWaitGroup := &sync.WaitGroup{} responseWaitGroup.Add(1) - c.messageIDToWaitgroup.Store(messageID, responseWaitGroup) + c.messageIDToWaitgroup.Store(deterministicMessageID, responseWaitGroup) responseReceived := make(chan struct{}) go func() { @@ -80,21 +80,21 @@ func (c *remoteTargetCaller) Execute(parentCtx context.Context, req commoncap.Ca ctx, cancelFn := context.WithCancel(parentCtx) defer cancelFn() - if err := c.transmitRequestWithMessageID(ctx, req, messageID); err != nil { + if err := c.transmitRequestWithMessageID(ctx, req, deterministicMessageID); err != nil { return nil, fmt.Errorf("failed to transmit request: %w", err) } select { case <-responseReceived: - response, loaded := c.messageIDToResponse.LoadAndDelete(messageID) + response, loaded := c.messageIDToResponse.LoadAndDelete(deterministicMessageID) if !loaded { - return nil, fmt.Errorf("no response found for message ID %s", messageID) + return nil, fmt.Errorf("no response found for message ID %s", deterministicMessageID) } msg, ok := response.(*types.MessageBody) if !ok { - return nil, fmt.Errorf("unexpected response type %T for message ID %s", response, messageID) + return nil, fmt.Errorf("unexpected response type %T for message ID %s", response, deterministicMessageID) } if msg.Error != types.Error_OK { diff --git a/core/capabilities/remote/target_receiver_test.go b/core/capabilities/remote/target_receiver_test.go index a9191bcabf3..923eab9a460 100644 --- a/core/capabilities/remote/target_receiver_test.go +++ b/core/capabilities/remote/target_receiver_test.go @@ -2,10 +2,13 @@ package remote_test import ( "context" + "crypto/rand" "fmt" + "sync" "testing" "time" + "github.com/mr-tron/base58" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" @@ -19,6 +22,223 @@ import ( p2ptypes "github.com/smartcontractkit/chainlink/v2/core/services/p2p/types" ) +func Test_TargetReceiverConsensusWithMultipleCallers(t *testing.T) { + + responseTest := func(t *testing.T, response commoncap.CapabilityResponse) { + responseValue, err := response.Value.Unwrap() + require.NoError(t, err) + assert.Equal(t, "aValue1", responseValue.(string)) + } + + // Test scenarios where the number of submissions is greater than or equal to F + 1 + testRemoteTargetConsensus(t, 4, 3, 10*time.Minute, responseTest) + testRemoteTargetConsensus(t, 10, 3, 10*time.Minute, responseTest) + + errResponseTest := func(t *testing.T, response commoncap.CapabilityResponse) { + _, err := response.Value.Unwrap() + assert.NotNil(t, err) + //require.NoError(t, err) + //assert.Equal(t, "aValue1", responseValue.(string)) + } + + // Test scenario where number of submissions is less than F + 1 + testRemoteTargetConsensus(t, 4, 6, 1*time.Second, errResponseTest) + +} + +func testRemoteTargetConsensus(t *testing.T, numWorkflowPeers int, workflowDonF uint8, + consensusTimeout time.Duration, responseTest func(t *testing.T, response commoncap.CapabilityResponse)) { + lggr := logger.TestLogger(t) + ctx := testutils.Context(t) + capInfo := commoncap.CapabilityInfo{ + ID: "cap_id", + CapabilityType: commoncap.CapabilityTypeTarget, + Description: "Remote Target", + Version: "0.0.1", + } + capabilityPeerID := p2ptypes.PeerID{} + require.NoError(t, capabilityPeerID.UnmarshalText([]byte(newPeerID()))) + + capDonInfo := commoncap.DON{ + ID: "capability-don", + Members: []p2ptypes.PeerID{capabilityPeerID}, + F: 0, + } + + // Define the number of workflow peers + + workflowPeers := make([]p2ptypes.PeerID, numWorkflowPeers) + for i := 0; i < numWorkflowPeers; i++ { + workflowPeerID := p2ptypes.PeerID{} + require.NoError(t, workflowPeerID.UnmarshalText([]byte(newPeerID()))) + workflowPeers[i] = workflowPeerID + } + + workflowDonInfo := commoncap.DON{ + Members: workflowPeers, + ID: "workflow-don", + F: workflowDonF, + } + + dispatcher := newTestTargetReceiverDispatcher(capabilityPeerID) + + workflowDONs := map[string]commoncap.DON{ + workflowDonInfo.ID: workflowDonInfo, + } + underlying := &testTargetReceiver{} + + receiver := remote.NewRemoteTargetReceiver(ctx, lggr, underlying, capInfo, &capDonInfo, workflowDONs, dispatcher, consensusTimeout) + dispatcher.RegisterReceiver(receiver) + + callers := make([]commoncap.TargetCapability, numWorkflowPeers) + for i := 0; i < numWorkflowPeers; i++ { + workflowPeerDispatcher := dispatcher.GetDispatcherForCaller(workflowPeers[i]) + caller, err := remote.NewRemoteTargetCaller(lggr, capInfo, capDonInfo, workflowDonInfo, workflowPeerDispatcher) + require.NoError(t, err) + dispatcher.RegisterCaller(workflowPeers[i], caller) + callers[i] = caller + } + + transmissionSchedule, err := values.NewMap(map[string]any{ + "schedule": transmission.Schedule_AllAtOnce, + "deltaStage": "100ms", + }) + require.NoError(t, err) + + executeInputs, err := values.NewMap( + map[string]any{ + "executeValue1": "aValue1", + }, + ) + + wg := &sync.WaitGroup{} + wg.Add(len(callers)) + + // Fire off all the requests + for _, caller := range callers { + go func(caller commoncap.TargetCapability) { + responseCh, err := caller.Execute(ctx, + commoncap.CapabilityRequest{ + Metadata: commoncap.RequestMetadata{}, + Config: transmissionSchedule, + Inputs: executeInputs, + }) + + require.NoError(t, err) + + response := <-responseCh + responseTest(t, response) + wg.Done() + }(caller) + } + + wg.Wait() +} + +func Test_TargetReceiverConsensusWithMultipleCallers2(t *testing.T) { + lggr := logger.TestLogger(t) + ctx := testutils.Context(t) + capInfo := commoncap.CapabilityInfo{ + ID: "cap_id", + CapabilityType: commoncap.CapabilityTypeTarget, + Description: "Remote Target", + Version: "0.0.1", + } + capabilityPeerID := p2ptypes.PeerID{} + require.NoError(t, capabilityPeerID.UnmarshalText([]byte(newPeerID()))) + workflowPeer1ID := p2ptypes.PeerID{} + require.NoError(t, workflowPeer1ID.UnmarshalText([]byte(newPeerID()))) + workflowPeer2ID := p2ptypes.PeerID{} + require.NoError(t, workflowPeer1ID.UnmarshalText([]byte(newPeerID()))) + + capDonInfo := commoncap.DON{ + ID: "capability-don", + Members: []p2ptypes.PeerID{capabilityPeerID}, + F: 0, + } + + workflowPeers := []p2ptypes.PeerID{workflowPeer1ID, workflowPeer2ID} + workflowDonInfo := commoncap.DON{ + Members: workflowPeers, + ID: "workflow-don", + F: 1, + } + + dispatcher := newTestTargetReceiverDispatcher(capabilityPeerID) + + workflowDONs := map[string]commoncap.DON{ + workflowDonInfo.ID: workflowDonInfo, + } + underlying := &testTargetReceiver{} + + receiver := remote.NewRemoteTargetReceiver(ctx, lggr, underlying, capInfo, &capDonInfo, workflowDONs, dispatcher, 100) + dispatcher.RegisterReceiver(receiver) + + workflowPeerDispatcher1 := dispatcher.GetDispatcherForCaller(workflowPeer1ID) + + caller1, err := remote.NewRemoteTargetCaller(lggr, capInfo, capDonInfo, workflowDonInfo, workflowPeerDispatcher1) + require.NoError(t, err) + dispatcher.RegisterCaller(workflowPeer1ID, caller1) + + workflowPeerDispatcher2 := dispatcher.GetDispatcherForCaller(workflowPeer2ID) + + caller2, err := remote.NewRemoteTargetCaller(lggr, capInfo, capDonInfo, workflowDonInfo, workflowPeerDispatcher2) + require.NoError(t, err) + dispatcher.RegisterCaller(workflowPeer2ID, caller2) + + transmissionSchedule, err := values.NewMap(map[string]any{ + "schedule": transmission.Schedule_AllAtOnce, + "deltaStage": "100ms", + }) + require.NoError(t, err) + + executeInputs, err := values.NewMap( + map[string]any{ + "executeValue1": "aValue1", + }, + ) + + wg := &sync.WaitGroup{} + wg.Add(len(workflowPeers)) + + go func() { + responseCh1, err := caller1.Execute(ctx, + commoncap.CapabilityRequest{ + Metadata: commoncap.RequestMetadata{}, + Config: transmissionSchedule, + Inputs: executeInputs, + }) + + require.NoError(t, err) + + response1 := <-responseCh1 + responseValue, err := response1.Value.Unwrap() + require.NoError(t, err) + assert.Equal(t, "aValue1", responseValue.(string)) + wg.Done() + }() + + go func() { + responseCh2, err := caller2.Execute(ctx, + commoncap.CapabilityRequest{ + Metadata: commoncap.RequestMetadata{}, + Config: transmissionSchedule, + Inputs: executeInputs, + }) + + require.NoError(t, err) + + response2 := <-responseCh2 + responseValue, err := response2.Value.Unwrap() + require.NoError(t, err) + assert.Equal(t, "aValue1", responseValue.(string)) + wg.Done() + }() + + wg.Wait() + +} + func Test_TargetReceiverConsensus(t *testing.T) { lggr := logger.TestLogger(t) ctx := testutils.Context(t) @@ -212,3 +432,19 @@ func (t testTargetReceiver) Execute(ctx context.Context, request commoncap.Capab return ch, nil } + +func libp2pMagic() []byte { + return []byte{0x00, 0x24, 0x08, 0x01, 0x12, 0x20} +} + +func newPeerID() string { + var privKey [32]byte + _, err := rand.Read(privKey[:]) + if err != nil { + panic(err) + } + + peerID := append(libp2pMagic(), privKey[:]...) + + return base58.Encode(peerID[:]) +} From 8ae4e7344318410b57e1e2d6a6bfb6148cc80696 Mon Sep 17 00:00:00 2001 From: Matthew Pendrey Date: Wed, 22 May 2024 15:59:40 +0100 Subject: [PATCH 12/43] wip wip --- core/capabilities/remote/target_receiver.go | 48 +++-- .../remote/target_receiver_test.go | 193 ++---------------- 2 files changed, 48 insertions(+), 193 deletions(-) diff --git a/core/capabilities/remote/target_receiver.go b/core/capabilities/remote/target_receiver.go index 2f157fceacb..b83c337a82e 100644 --- a/core/capabilities/remote/target_receiver.go +++ b/core/capabilities/remote/target_receiver.go @@ -27,6 +27,7 @@ type remoteTargetReceiver struct { lggr logger.Logger msgIDToExecuteRequest map[[32]byte]executeRequest + requestTimeout time.Duration receiveLock sync.Mutex } @@ -36,6 +37,19 @@ var _ types.Receiver = &remoteTargetReceiver{} func NewRemoteTargetReceiver(ctx context.Context, lggr logger.Logger, underlying commoncap.TargetCapability, capInfo commoncap.CapabilityInfo, localDonInfo *capabilities.DON, workflowDONs map[string]commoncap.DON, dispatcher types.Dispatcher, requestTimeout time.Duration) *remoteTargetReceiver { + receiver := &remoteTargetReceiver{ + underlying: underlying, + capInfo: capInfo, + localDonInfo: localDonInfo, + workflowDONs: workflowDONs, + dispatcher: dispatcher, + + msgIDToExecuteRequest: map[[32]byte]executeRequest{}, + requestTimeout: requestTimeout, + + lggr: lggr, + } + go func() { timer := time.NewTimer(requestTimeout) defer timer.Stop() @@ -44,22 +58,12 @@ func NewRemoteTargetReceiver(ctx context.Context, lggr logger.Logger, underlying case <-ctx.Done(): return case <-timer.C: - // TODO Implement timeout handling and cleanup logic of the execute requests cache + receiver.ExpireRequests(ctx) } } }() - return &remoteTargetReceiver{ - underlying: underlying, - capInfo: capInfo, - localDonInfo: localDonInfo, - workflowDONs: workflowDONs, - dispatcher: dispatcher, - - msgIDToExecuteRequest: map[[32]byte]executeRequest{}, - - lggr: lggr, - } + return receiver } type executeRequest struct { @@ -67,6 +71,26 @@ type executeRequest struct { response *types.MessageBody callingDonID string firstRequestTime time.Time + cancelContext func() +} + +func (r *remoteTargetReceiver) ExpireRequests(ctx context.Context) { + /*r.receiveLock.Lock() + defer r.receiveLock.Unlock() + + for messageId, executeReq := range r.msgIDToExecuteRequest { + if time.Since(executeReq.firstRequestTime) > r.requestTimeout { + if executeReq.response == nil { + // + + } else { + delete(r.msgIDToExecuteRequest, messageId) + } + + } + + } + */ } func (r *remoteTargetReceiver) Receive(msg *types.MessageBody) { diff --git a/core/capabilities/remote/target_receiver_test.go b/core/capabilities/remote/target_receiver_test.go index 923eab9a460..0ac0d503a5a 100644 --- a/core/capabilities/remote/target_receiver_test.go +++ b/core/capabilities/remote/target_receiver_test.go @@ -31,19 +31,23 @@ func Test_TargetReceiverConsensusWithMultipleCallers(t *testing.T) { } // Test scenarios where the number of submissions is greater than or equal to F + 1 + testRemoteTargetConsensus(t, 1, 0, 10*time.Minute, responseTest) testRemoteTargetConsensus(t, 4, 3, 10*time.Minute, responseTest) testRemoteTargetConsensus(t, 10, 3, 10*time.Minute, responseTest) - errResponseTest := func(t *testing.T, response commoncap.CapabilityResponse) { - _, err := response.Value.Unwrap() - assert.NotNil(t, err) - //require.NoError(t, err) - //assert.Equal(t, "aValue1", responseValue.(string)) - } + /* + errResponseTest := func(t *testing.T, response commoncap.CapabilityResponse) { + _, err := response.Value.Unwrap() + assert.NotNil(t, err) + //require.NoError(t, err) + //assert.Equal(t, "aValue1", responseValue.(string)) + } - // Test scenario where number of submissions is less than F + 1 - testRemoteTargetConsensus(t, 4, 6, 1*time.Second, errResponseTest) + // Test scenario where number of submissions is less than F + 1 + // TODO implement the timeout handling and cleanup logic of the execute requests cache + testRemoteTargetConsensus(t, 4, 6, 1*time.Second, errResponseTest) + */ } func testRemoteTargetConsensus(t *testing.T, numWorkflowPeers int, workflowDonF uint8, @@ -135,179 +139,6 @@ func testRemoteTargetConsensus(t *testing.T, numWorkflowPeers int, workflowDonF wg.Wait() } -func Test_TargetReceiverConsensusWithMultipleCallers2(t *testing.T) { - lggr := logger.TestLogger(t) - ctx := testutils.Context(t) - capInfo := commoncap.CapabilityInfo{ - ID: "cap_id", - CapabilityType: commoncap.CapabilityTypeTarget, - Description: "Remote Target", - Version: "0.0.1", - } - capabilityPeerID := p2ptypes.PeerID{} - require.NoError(t, capabilityPeerID.UnmarshalText([]byte(newPeerID()))) - workflowPeer1ID := p2ptypes.PeerID{} - require.NoError(t, workflowPeer1ID.UnmarshalText([]byte(newPeerID()))) - workflowPeer2ID := p2ptypes.PeerID{} - require.NoError(t, workflowPeer1ID.UnmarshalText([]byte(newPeerID()))) - - capDonInfo := commoncap.DON{ - ID: "capability-don", - Members: []p2ptypes.PeerID{capabilityPeerID}, - F: 0, - } - - workflowPeers := []p2ptypes.PeerID{workflowPeer1ID, workflowPeer2ID} - workflowDonInfo := commoncap.DON{ - Members: workflowPeers, - ID: "workflow-don", - F: 1, - } - - dispatcher := newTestTargetReceiverDispatcher(capabilityPeerID) - - workflowDONs := map[string]commoncap.DON{ - workflowDonInfo.ID: workflowDonInfo, - } - underlying := &testTargetReceiver{} - - receiver := remote.NewRemoteTargetReceiver(ctx, lggr, underlying, capInfo, &capDonInfo, workflowDONs, dispatcher, 100) - dispatcher.RegisterReceiver(receiver) - - workflowPeerDispatcher1 := dispatcher.GetDispatcherForCaller(workflowPeer1ID) - - caller1, err := remote.NewRemoteTargetCaller(lggr, capInfo, capDonInfo, workflowDonInfo, workflowPeerDispatcher1) - require.NoError(t, err) - dispatcher.RegisterCaller(workflowPeer1ID, caller1) - - workflowPeerDispatcher2 := dispatcher.GetDispatcherForCaller(workflowPeer2ID) - - caller2, err := remote.NewRemoteTargetCaller(lggr, capInfo, capDonInfo, workflowDonInfo, workflowPeerDispatcher2) - require.NoError(t, err) - dispatcher.RegisterCaller(workflowPeer2ID, caller2) - - transmissionSchedule, err := values.NewMap(map[string]any{ - "schedule": transmission.Schedule_AllAtOnce, - "deltaStage": "100ms", - }) - require.NoError(t, err) - - executeInputs, err := values.NewMap( - map[string]any{ - "executeValue1": "aValue1", - }, - ) - - wg := &sync.WaitGroup{} - wg.Add(len(workflowPeers)) - - go func() { - responseCh1, err := caller1.Execute(ctx, - commoncap.CapabilityRequest{ - Metadata: commoncap.RequestMetadata{}, - Config: transmissionSchedule, - Inputs: executeInputs, - }) - - require.NoError(t, err) - - response1 := <-responseCh1 - responseValue, err := response1.Value.Unwrap() - require.NoError(t, err) - assert.Equal(t, "aValue1", responseValue.(string)) - wg.Done() - }() - - go func() { - responseCh2, err := caller2.Execute(ctx, - commoncap.CapabilityRequest{ - Metadata: commoncap.RequestMetadata{}, - Config: transmissionSchedule, - Inputs: executeInputs, - }) - - require.NoError(t, err) - - response2 := <-responseCh2 - responseValue, err := response2.Value.Unwrap() - require.NoError(t, err) - assert.Equal(t, "aValue1", responseValue.(string)) - wg.Done() - }() - - wg.Wait() - -} - -func Test_TargetReceiverConsensus(t *testing.T) { - lggr := logger.TestLogger(t) - ctx := testutils.Context(t) - capInfo := commoncap.CapabilityInfo{ - ID: "cap_id", - CapabilityType: commoncap.CapabilityTypeTarget, - Description: "Remote Target", - Version: "0.0.1", - } - capabilityPeerID := p2ptypes.PeerID{} - require.NoError(t, capabilityPeerID.UnmarshalText([]byte(PeerID1))) - workflowPeer1ID := p2ptypes.PeerID{} - require.NoError(t, workflowPeer1ID.UnmarshalText([]byte(PeerID2))) - - capDonInfo := commoncap.DON{ - ID: "capability-don", - Members: []p2ptypes.PeerID{capabilityPeerID}, - F: 0, - } - - workflowDonInfo := commoncap.DON{ - Members: []p2ptypes.PeerID{workflowPeer1ID}, - ID: "workflow-don", - F: 0, - } - - dispatcher := newTestTargetReceiverDispatcher(capabilityPeerID) - - workflowDONs := map[string]commoncap.DON{ - workflowDonInfo.ID: workflowDonInfo, - } - underlying := &testTargetReceiver{} - - receiver := remote.NewRemoteTargetReceiver(ctx, lggr, underlying, capInfo, &capDonInfo, workflowDONs, dispatcher, 100) - dispatcher.RegisterReceiver(receiver) - - workflowPeerDispatcher := dispatcher.GetDispatcherForCaller(workflowPeer1ID) - - caller, err := remote.NewRemoteTargetCaller(lggr, capInfo, capDonInfo, workflowDonInfo, workflowPeerDispatcher) - require.NoError(t, err) - dispatcher.RegisterCaller(workflowPeer1ID, caller) - - transmissionSchedule, err := values.NewMap(map[string]any{ - "schedule": transmission.Schedule_AllAtOnce, - "deltaStage": "100ms", - }) - require.NoError(t, err) - - executeInputs, err := values.NewMap( - map[string]any{ - "executeValue1": "aValue1", - }, - ) - - responseCh, err := caller.Execute(ctx, - commoncap.CapabilityRequest{ - Metadata: commoncap.RequestMetadata{}, - Config: transmissionSchedule, - Inputs: executeInputs, - }) - - require.NoError(t, err) - - response := <-responseCh - responseValue, err := response.Value.Unwrap() - require.NoError(t, err) - assert.Equal(t, "aValue1", responseValue.(string)) -} - // Confirm that the target receiver return a response only when sufficient requests have been received // Also confirm that any request received after the first response is replied to From 393cdabeafd5be903274e15a06b28b7108a6cb81 Mon Sep 17 00:00:00 2001 From: Matthew Pendrey Date: Wed, 22 May 2024 17:37:10 +0100 Subject: [PATCH 13/43] request timeout handling --- core/capabilities/remote/target_receiver.go | 22 +++++++++++---- .../remote/target_receiver_test.go | 28 ++++++++----------- 2 files changed, 28 insertions(+), 22 deletions(-) diff --git a/core/capabilities/remote/target_receiver.go b/core/capabilities/remote/target_receiver.go index b83c337a82e..0edf9a67929 100644 --- a/core/capabilities/remote/target_receiver.go +++ b/core/capabilities/remote/target_receiver.go @@ -75,22 +75,34 @@ type executeRequest struct { } func (r *remoteTargetReceiver) ExpireRequests(ctx context.Context) { - /*r.receiveLock.Lock() + r.receiveLock.Lock() defer r.receiveLock.Unlock() for messageId, executeReq := range r.msgIDToExecuteRequest { if time.Since(executeReq.firstRequestTime) > r.requestTimeout { if executeReq.response == nil { - // + responseMsg := &types.MessageBody{ + CapabilityId: r.capInfo.ID, + CapabilityDonId: r.localDonInfo.ID, + CallerDonId: executeReq.callingDonID, + Method: types.MethodExecute, + MessageId: messageId[:], + // TODO sort out error codes - this should be a timeout error + Error: types.Error_CAPABILITY_NOT_FOUND, + } - } else { - delete(r.msgIDToExecuteRequest, messageId) + for peerID := range executeReq.fromPeers { + if err := r.dispatcher.Send(peerID, responseMsg); err != nil { + r.lggr.Errorw("failed to send time out response", "peer", peerID, "err", err) + } + } } + delete(r.msgIDToExecuteRequest, messageId) } } - */ + } func (r *remoteTargetReceiver) Receive(msg *types.MessageBody) { diff --git a/core/capabilities/remote/target_receiver_test.go b/core/capabilities/remote/target_receiver_test.go index 0ac0d503a5a..ebcdbdd9721 100644 --- a/core/capabilities/remote/target_receiver_test.go +++ b/core/capabilities/remote/target_receiver_test.go @@ -24,7 +24,10 @@ import ( func Test_TargetReceiverConsensusWithMultipleCallers(t *testing.T) { - responseTest := func(t *testing.T, response commoncap.CapabilityResponse) { + responseTest := func(t *testing.T, responseCh <-chan commoncap.CapabilityResponse, responseError error) { + + require.NoError(t, responseError) + response := <-responseCh responseValue, err := response.Value.Unwrap() require.NoError(t, err) assert.Equal(t, "aValue1", responseValue.(string)) @@ -35,23 +38,17 @@ func Test_TargetReceiverConsensusWithMultipleCallers(t *testing.T) { testRemoteTargetConsensus(t, 4, 3, 10*time.Minute, responseTest) testRemoteTargetConsensus(t, 10, 3, 10*time.Minute, responseTest) - /* - errResponseTest := func(t *testing.T, response commoncap.CapabilityResponse) { - _, err := response.Value.Unwrap() - assert.NotNil(t, err) - //require.NoError(t, err) - //assert.Equal(t, "aValue1", responseValue.(string)) - } + errResponseTest := func(t *testing.T, responseCh <-chan commoncap.CapabilityResponse, responseError error) { + assert.NotNil(t, responseError) + } - // Test scenario where number of submissions is less than F + 1 - // TODO implement the timeout handling and cleanup logic of the execute requests cache - testRemoteTargetConsensus(t, 4, 6, 1*time.Second, errResponseTest) + // Test scenario where number of submissions is less than F + 1 + testRemoteTargetConsensus(t, 4, 6, 1*time.Second, errResponseTest) - */ } func testRemoteTargetConsensus(t *testing.T, numWorkflowPeers int, workflowDonF uint8, - consensusTimeout time.Duration, responseTest func(t *testing.T, response commoncap.CapabilityResponse)) { + consensusTimeout time.Duration, responseTest func(t *testing.T, responseCh <-chan commoncap.CapabilityResponse, responseError error)) { lggr := logger.TestLogger(t) ctx := testutils.Context(t) capInfo := commoncap.CapabilityInfo{ @@ -128,10 +125,7 @@ func testRemoteTargetConsensus(t *testing.T, numWorkflowPeers int, workflowDonF Inputs: executeInputs, }) - require.NoError(t, err) - - response := <-responseCh - responseTest(t, response) + responseTest(t, responseCh, err) wg.Done() }(caller) } From 79d98b50b1be66698c3e509c46797b1c71d2087d Mon Sep 17 00:00:00 2001 From: Matthew Pendrey Date: Wed, 22 May 2024 17:38:09 +0100 Subject: [PATCH 14/43] request timeout handling --- core/capabilities/remote/target_receiver_test.go | 1 + 1 file changed, 1 insertion(+) diff --git a/core/capabilities/remote/target_receiver_test.go b/core/capabilities/remote/target_receiver_test.go index ebcdbdd9721..30f731b74d5 100644 --- a/core/capabilities/remote/target_receiver_test.go +++ b/core/capabilities/remote/target_receiver_test.go @@ -44,6 +44,7 @@ func Test_TargetReceiverConsensusWithMultipleCallers(t *testing.T) { // Test scenario where number of submissions is less than F + 1 testRemoteTargetConsensus(t, 4, 6, 1*time.Second, errResponseTest) + testRemoteTargetConsensus(t, 10, 10, 1*time.Second, errResponseTest) } From f2587c6830f5d74945717823e910fd6cab26942f Mon Sep 17 00:00:00 2001 From: Matthew Pendrey Date: Wed, 22 May 2024 17:49:14 +0100 Subject: [PATCH 15/43] context cancellation --- core/capabilities/remote/target_receiver.go | 6 +++-- .../remote/target_receiver_test.go | 22 +++++++------------ 2 files changed, 12 insertions(+), 16 deletions(-) diff --git a/core/capabilities/remote/target_receiver.go b/core/capabilities/remote/target_receiver.go index 0edf9a67929..420975e1e81 100644 --- a/core/capabilities/remote/target_receiver.go +++ b/core/capabilities/remote/target_receiver.go @@ -71,7 +71,6 @@ type executeRequest struct { response *types.MessageBody callingDonID string firstRequestTime time.Time - cancelContext func() } func (r *remoteTargetReceiver) ExpireRequests(ctx context.Context) { @@ -80,6 +79,7 @@ func (r *remoteTargetReceiver) ExpireRequests(ctx context.Context) { for messageId, executeReq := range r.msgIDToExecuteRequest { if time.Since(executeReq.firstRequestTime) > r.requestTimeout { + if executeReq.response == nil { responseMsg := &types.MessageBody{ CapabilityId: r.capInfo.ID, @@ -164,7 +164,9 @@ func (r *remoteTargetReceiver) Receive(msg *types.MessageBody) { capabilityRequest, err := pb.UnmarshalCapabilityRequest(msg.Payload) if err == nil { - responseCh, err := r.underlying.Execute(ctx, capabilityRequest) + ctxWithTimeout, cancel := context.WithTimeout(ctx, r.requestTimeout) + defer cancel() + responseCh, err := r.underlying.Execute(ctxWithTimeout, capabilityRequest) if err == nil { // TODO handle the case where the capability returns a stream of responses response := <-responseCh diff --git a/core/capabilities/remote/target_receiver_test.go b/core/capabilities/remote/target_receiver_test.go index 30f731b74d5..5122ddd40f3 100644 --- a/core/capabilities/remote/target_receiver_test.go +++ b/core/capabilities/remote/target_receiver_test.go @@ -46,6 +46,14 @@ func Test_TargetReceiverConsensusWithMultipleCallers(t *testing.T) { testRemoteTargetConsensus(t, 4, 6, 1*time.Second, errResponseTest) testRemoteTargetConsensus(t, 10, 10, 1*time.Second, errResponseTest) + // Check request errors as expected and all error responses are received + + // Check that requests from an incorrect don are ignored? + + // Check that multiple requests from the same sender are ignored + + // Test with different transmission schedules ? + } func testRemoteTargetConsensus(t *testing.T, numWorkflowPeers int, workflowDonF uint8, @@ -67,8 +75,6 @@ func testRemoteTargetConsensus(t *testing.T, numWorkflowPeers int, workflowDonF F: 0, } - // Define the number of workflow peers - workflowPeers := make([]p2ptypes.PeerID, numWorkflowPeers) for i := 0; i < numWorkflowPeers; i++ { workflowPeerID := p2ptypes.PeerID{} @@ -134,18 +140,6 @@ func testRemoteTargetConsensus(t *testing.T, numWorkflowPeers int, workflowDonF wg.Wait() } -// Confirm that the target receiver return a response only when sufficient requests have been received - -// Also confirm that any request received after the first response is replied to - -// Check request times out if insufficient requests are received in a timely manner - -// Check request errors as expected and all error responses are received - -// Check that requests from an incorrect don are ignored? - -// Check that multiple requests from the same sender are ignored - type testTargetReceiverDispatcher struct { abstractDispatcher receiver remotetypes.Receiver From 0ce39437a733f10589cdfb81576e6572bcd0c0a7 Mon Sep 17 00:00:00 2001 From: Matthew Pendrey Date: Thu, 23 May 2024 11:07:55 +0100 Subject: [PATCH 16/43] update message id --- core/capabilities/remote/target_caller.go | 20 ++++++++++++------- .../capabilities/remote/target_caller_test.go | 12 +++++++++++ core/capabilities/remote/target_receiver.go | 20 ++++++++----------- .../remote/target_receiver_test.go | 11 +++++++--- 4 files changed, 41 insertions(+), 22 deletions(-) diff --git a/core/capabilities/remote/target_caller.go b/core/capabilities/remote/target_caller.go index acd2608143a..ca376f54e60 100644 --- a/core/capabilities/remote/target_caller.go +++ b/core/capabilities/remote/target_caller.go @@ -2,7 +2,6 @@ package remote import ( "context" - "crypto/sha256" "errors" "fmt" "sync" @@ -59,12 +58,11 @@ func (c *remoteTargetCaller) Execute(parentCtx context.Context, req commoncap.Ca // TODO Assuming here that the capability request is deterministically unique across the nodes, need to confirm this is reasonable assumption // TODO also check pb marshalliing is by default deterministic in the version being used - rawRequest, err := pb.MarshalCapabilityRequest(req) - if err != nil { - return nil, fmt.Errorf("failed to marshal capability request: %w", err) + if req.Metadata.WorkflowID == "" || req.Metadata.WorkflowExecutionID == "" { + return nil, errors.New("workflow ID and execution ID must be set in request metadata") } - deterministicMessageID := sha256.Sum256(rawRequest) + deterministicMessageID := req.Metadata.WorkflowID + req.Metadata.WorkflowExecutionID responseWaitGroup := &sync.WaitGroup{} responseWaitGroup.Add(1) @@ -119,7 +117,7 @@ func (c *remoteTargetCaller) Execute(parentCtx context.Context, req commoncap.Ca } // transmitRequestWithMessageID transmits a capability request to remote capabilities according to the transmission configuration -func (c *remoteTargetCaller) transmitRequestWithMessageID(ctx context.Context, req commoncap.CapabilityRequest, messageID [32]byte) error { +func (c *remoteTargetCaller) transmitRequestWithMessageID(ctx context.Context, req commoncap.CapabilityRequest, messageID string) error { rawRequest, err := pb.MarshalCapabilityRequest(req) if err != nil { return fmt.Errorf("failed to marshal capability request: %w", err) @@ -137,7 +135,7 @@ func (c *remoteTargetCaller) transmitRequestWithMessageID(ctx context.Context, r CallerDonId: c.localDONInfo.ID, Method: types.MethodExecute, Payload: rawRequest, - MessageId: messageID[:], + MessageId: []byte(messageID), } peerIDToDelay, err := transmission.GetPeerIDToTransmissionDelay(c.remoteCapabilityDonInfo.Members, c.localDONInfo.Config.SharedSecret, req.Metadata.WorkflowID, req.Metadata.WorkflowExecutionID, tc) @@ -168,6 +166,14 @@ func (c *remoteTargetCaller) Receive(msg *types.MessageBody) { // TODO handle the case where the capability returns a stream of responses messageID := getMessageID(msg) + /* + + what aggregation of result f + 1 ? hash it ? + + failure - 2f + 1 and a timeout + + */ + wg, loaded := c.messageIDToWaitgroup.LoadAndDelete(messageID) if loaded { wg.(*sync.WaitGroup).Done() diff --git a/core/capabilities/remote/target_caller_test.go b/core/capabilities/remote/target_caller_test.go index 42437b79a63..bab0c0e13b8 100644 --- a/core/capabilities/remote/target_caller_test.go +++ b/core/capabilities/remote/target_caller_test.go @@ -68,6 +68,10 @@ func Test_TargetCallerExecuteContextTimeout(t *testing.T) { _, err = caller.Execute(ctxWithTimeout, commoncap.CapabilityRequest{ + Metadata: commoncap.RequestMetadata{ + WorkflowID: "workflowID", + WorkflowExecutionID: "workflowExecutionID", + }, Config: transmissionSchedule, }) @@ -139,6 +143,10 @@ func Test_TargetCallerExecute(t *testing.T) { resultCh, err := caller.Execute(ctx, commoncap.CapabilityRequest{ + Metadata: commoncap.RequestMetadata{ + WorkflowID: "workflowID", + WorkflowExecutionID: "workflowExecutionID", + }, Config: transmissionSchedule, }) @@ -209,6 +217,10 @@ func Test_TargetCallerExecuteWithError(t *testing.T) { _, err = caller.Execute(ctx, commoncap.CapabilityRequest{ + Metadata: commoncap.RequestMetadata{ + WorkflowID: "workflowID", + WorkflowExecutionID: "workflowExecutionID", + }, Config: transmissionSchedule, }) diff --git a/core/capabilities/remote/target_receiver.go b/core/capabilities/remote/target_receiver.go index 420975e1e81..9bdc48c22b5 100644 --- a/core/capabilities/remote/target_receiver.go +++ b/core/capabilities/remote/target_receiver.go @@ -1,9 +1,5 @@ package remote -// here the only executes when it recieves a report from f + 1 nodes, can use the message cache to collect up these reports - -// the chain write is waiting for f + 1 reports to be collected before it will execute the transmission - import ( "context" "sync" @@ -26,7 +22,7 @@ type remoteTargetReceiver struct { dispatcher types.Dispatcher lggr logger.Logger - msgIDToExecuteRequest map[[32]byte]executeRequest + msgIDToExecuteRequest map[string]executeRequest requestTimeout time.Duration receiveLock sync.Mutex @@ -44,7 +40,7 @@ func NewRemoteTargetReceiver(ctx context.Context, lggr logger.Logger, underlying workflowDONs: workflowDONs, dispatcher: dispatcher, - msgIDToExecuteRequest: map[[32]byte]executeRequest{}, + msgIDToExecuteRequest: map[string]executeRequest{}, requestTimeout: requestTimeout, lggr: lggr, @@ -86,7 +82,7 @@ func (r *remoteTargetReceiver) ExpireRequests(ctx context.Context) { CapabilityDonId: r.localDonInfo.ID, CallerDonId: executeReq.callingDonID, Method: types.MethodExecute, - MessageId: messageId[:], + MessageId: []byte(messageId), // TODO sort out error codes - this should be a timeout error Error: types.Error_CAPABILITY_NOT_FOUND, } @@ -114,6 +110,8 @@ func (r *remoteTargetReceiver) Receive(msg *types.MessageBody) { r.receiveLock.Lock() defer r.receiveLock.Unlock() + // TODO multithread this + if msg.Method != types.MethodExecute { r.lggr.Errorw("received request for unsupported method type", "method", msg.Method) return @@ -159,7 +157,7 @@ func (r *remoteTargetReceiver) Receive(msg *types.MessageBody) { CapabilityDonId: r.localDonInfo.ID, CallerDonId: msg.CallerDonId, Method: types.MethodExecute, - MessageId: messageId[:], + MessageId: []byte(messageId), } capabilityRequest, err := pb.UnmarshalCapabilityRequest(msg.Payload) @@ -198,8 +196,6 @@ func (r *remoteTargetReceiver) Receive(msg *types.MessageBody) { } -func getMessageID(msg *types.MessageBody) [32]byte { - var messageId [32]byte - copy(messageId[:], msg.MessageId) - return messageId +func getMessageID(msg *types.MessageBody) string { + return string(msg.MessageId) } diff --git a/core/capabilities/remote/target_receiver_test.go b/core/capabilities/remote/target_receiver_test.go index 5122ddd40f3..587b5aa2f6f 100644 --- a/core/capabilities/remote/target_receiver_test.go +++ b/core/capabilities/remote/target_receiver_test.go @@ -46,6 +46,8 @@ func Test_TargetReceiverConsensusWithMultipleCallers(t *testing.T) { testRemoteTargetConsensus(t, 4, 6, 1*time.Second, errResponseTest) testRemoteTargetConsensus(t, 10, 10, 1*time.Second, errResponseTest) + // Context cancellation test - use an underlying capability that blocks until the context is cancelled + // Check request errors as expected and all error responses are received // Check that requests from an incorrect don are ignored? @@ -127,9 +129,12 @@ func testRemoteTargetConsensus(t *testing.T, numWorkflowPeers int, workflowDonF go func(caller commoncap.TargetCapability) { responseCh, err := caller.Execute(ctx, commoncap.CapabilityRequest{ - Metadata: commoncap.RequestMetadata{}, - Config: transmissionSchedule, - Inputs: executeInputs, + Metadata: commoncap.RequestMetadata{ + WorkflowID: "workflowID", + WorkflowExecutionID: "workflowExecutionID", + }, + Config: transmissionSchedule, + Inputs: executeInputs, }) responseTest(t, responseCh, err) From 7b2be1422521d849ea0cc7a99851cbd005c21d47 Mon Sep 17 00:00:00 2001 From: Matthew Pendrey Date: Thu, 23 May 2024 12:49:52 +0100 Subject: [PATCH 17/43] refactored caller to return on f + 1 responses --- core/capabilities/remote/target_caller.go | 143 ++++++------ .../capabilities/remote/target_caller_test.go | 3 +- core/capabilities/remote/types/generate.go | 4 + core/capabilities/remote/types/message.pb.go | 221 +++++++++--------- 4 files changed, 194 insertions(+), 177 deletions(-) create mode 100644 core/capabilities/remote/types/generate.go diff --git a/core/capabilities/remote/target_caller.go b/core/capabilities/remote/target_caller.go index ca376f54e60..489fdb040f4 100644 --- a/core/capabilities/remote/target_caller.go +++ b/core/capabilities/remote/target_caller.go @@ -2,6 +2,7 @@ package remote import ( "context" + "crypto/sha256" "errors" "fmt" "sync" @@ -18,13 +19,14 @@ import ( // remoteTargetCaller/Receiver are shims translating between capability API calls and network messages type remoteTargetCaller struct { + lggr logger.Logger remoteCapabilityInfo commoncap.CapabilityInfo remoteCapabilityDonInfo capabilities.DON localDONInfo capabilities.DON dispatcher types.Dispatcher - lggr logger.Logger - messageIDToWaitgroup sync.Map - messageIDToResponse sync.Map + + messageIDToExecuteRequest map[string]*callerExecuteRequest + mutex sync.Mutex } var _ commoncap.TargetCapability = &remoteTargetCaller{} @@ -33,11 +35,12 @@ var _ types.Receiver = &remoteTargetCaller{} func NewRemoteTargetCaller(lggr logger.Logger, remoteCapabilityInfo commoncap.CapabilityInfo, remoteCapabilityDonInfo capabilities.DON, localDonInfo capabilities.DON, dispatcher types.Dispatcher) (*remoteTargetCaller, error) { return &remoteTargetCaller{ - remoteCapabilityInfo: remoteCapabilityInfo, - remoteCapabilityDonInfo: remoteCapabilityDonInfo, - localDONInfo: localDonInfo, - dispatcher: dispatcher, - lggr: lggr, + lggr: lggr, + remoteCapabilityInfo: remoteCapabilityInfo, + remoteCapabilityDonInfo: remoteCapabilityDonInfo, + localDONInfo: localDonInfo, + dispatcher: dispatcher, + messageIDToExecuteRequest: make(map[string]*callerExecuteRequest), }, nil } @@ -54,66 +57,38 @@ func (c *remoteTargetCaller) UnregisterFromWorkflow(ctx context.Context, request } func (c *remoteTargetCaller) Execute(parentCtx context.Context, req commoncap.CapabilityRequest) (<-chan commoncap.CapabilityResponse, error) { + // TODO To keep the initial implementation simple make it single threaded - will this need to be concurrent? + c.mutex.Lock() + defer c.mutex.Unlock() - // TODO Assuming here that the capability request is deterministically unique across the nodes, need to confirm this is reasonable assumption - // TODO also check pb marshalliing is by default deterministic in the version being used - - if req.Metadata.WorkflowID == "" || req.Metadata.WorkflowExecutionID == "" { - return nil, errors.New("workflow ID and execution ID must be set in request metadata") + deterministicMessageID, err := getDeterministicMessageID(req) + if err != nil { + return nil, fmt.Errorf("failed to get deterministic message ID from request: %w", err) } - deterministicMessageID := req.Metadata.WorkflowID + req.Metadata.WorkflowExecutionID - - responseWaitGroup := &sync.WaitGroup{} - responseWaitGroup.Add(1) - c.messageIDToWaitgroup.Store(deterministicMessageID, responseWaitGroup) + if _, ok := c.messageIDToExecuteRequest[deterministicMessageID]; ok { + return nil, fmt.Errorf("request with message ID %s already exists", deterministicMessageID) + } - responseReceived := make(chan struct{}) - go func() { - responseWaitGroup.Wait() - close(responseReceived) - }() + transmissionCtx, transmissionCancelFn := context.WithCancel(parentCtx) + execRequest := newCallerExecuteRequest(transmissionCancelFn, int(c.remoteCapabilityDonInfo.F+1)) - // Once a response is received from a remote capability further transmission should be cancelled - ctx, cancelFn := context.WithCancel(parentCtx) - defer cancelFn() + c.messageIDToExecuteRequest[deterministicMessageID] = execRequest - if err := c.transmitRequestWithMessageID(ctx, req, deterministicMessageID); err != nil { + if err = c.transmitRequestWithMessageID(transmissionCtx, req, deterministicMessageID); err != nil { return nil, fmt.Errorf("failed to transmit request: %w", err) } - select { - case <-responseReceived: - - response, loaded := c.messageIDToResponse.LoadAndDelete(deterministicMessageID) - if !loaded { - return nil, fmt.Errorf("no response found for message ID %s", deterministicMessageID) - } - - msg, ok := response.(*types.MessageBody) - if !ok { - return nil, fmt.Errorf("unexpected response type %T for message ID %s", response, deterministicMessageID) - } - - if msg.Error != types.Error_OK { - return nil, fmt.Errorf("remote capability returned error: %s", msg.Error) - } - - capabilityResponse, err := pb.UnmarshalCapabilityResponse(msg.Payload) - if err != nil { - return nil, fmt.Errorf("failed to unmarshal capability response: %w", err) - } - - // TODO handle the case where the capability returns a stream of responses - resultCh := make(chan commoncap.CapabilityResponse, 1) - resultCh <- capabilityResponse - close(resultCh) + return execRequest.responseCh, nil +} - return resultCh, nil - case <-ctx.Done(): - return nil, ctx.Err() +func getDeterministicMessageID(req commoncap.CapabilityRequest) (string, error) { + if req.Metadata.WorkflowID == "" || req.Metadata.WorkflowExecutionID == "" { + return "", errors.New("workflow ID and workflow execution ID must be set in request metadata") } + deterministicMessageID := req.Metadata.WorkflowID + req.Metadata.WorkflowExecutionID + return deterministicMessageID, nil } // transmitRequestWithMessageID transmits a capability request to remote capabilities according to the transmission configuration @@ -123,7 +98,6 @@ func (c *remoteTargetCaller) transmitRequestWithMessageID(ctx context.Context, r return fmt.Errorf("failed to marshal capability request: %w", err) } - // TODO should the transmission config be passed into the constructor rather than pulled from the request? tc, err := transmission.ExtractTransmissionConfig(req.Config) if err != nil { return fmt.Errorf("failed to extract transmission config from request config: %w", err) @@ -162,22 +136,57 @@ func (c *remoteTargetCaller) transmitRequestWithMessageID(ctx context.Context, r } func (c *remoteTargetCaller) Receive(msg *types.MessageBody) { + c.mutex.Lock() + defer c.mutex.Unlock() - // TODO handle the case where the capability returns a stream of responses messageID := getMessageID(msg) - /* + req := c.messageIDToExecuteRequest[messageID] + if req == nil { + c.lggr.Warnw("received response for unknown message ID", "messageID", messageID, "sender", msg.Sender) + return + } - what aggregation of result f + 1 ? hash it ? + req.addResponse(msg.Payload) +} - failure - 2f + 1 and a timeout +type callerExecuteRequest struct { + responseCh chan commoncap.CapabilityResponse + transmissionCancelFn context.CancelFunc + creationTime time.Time + responseIDCount map[[32]byte]int - */ + requiredIdenticalResponses int +} - wg, loaded := c.messageIDToWaitgroup.LoadAndDelete(messageID) - if loaded { - wg.(*sync.WaitGroup).Done() - c.messageIDToResponse.Store(messageID, msg) - return +func newCallerExecuteRequest(transmissionCancelFn context.CancelFunc, requiredIdenticalResponses int) *callerExecuteRequest { + return &callerExecuteRequest{ + responseCh: make(chan commoncap.CapabilityResponse, 1), + transmissionCancelFn: transmissionCancelFn, + responseIDCount: make(map[[32]byte]int), + creationTime: time.Now(), + requiredIdenticalResponses: requiredIdenticalResponses, + } +} + +func (c *callerExecuteRequest) complete() bool { + return len(c.responseIDCount) >= c.requiredIdenticalResponses +} + +// TODO addResponse assumes that only one response is received from each peer, if streaming responses need to be supported this will need to be updated +func (c *callerExecuteRequest) addResponse(response []byte) { + payloadId := sha256.Sum256(response) + c.responseIDCount[payloadId]++ + + if c.responseIDCount[payloadId] == c.requiredIdenticalResponses { + defer close(c.responseCh) + c.transmissionCancelFn() + + capabilityResponse, err := pb.UnmarshalCapabilityResponse(response) + if err != nil { + c.responseCh <- commoncap.CapabilityResponse{Err: fmt.Errorf("failed to unmarshal capability response: %w", err)} + } else { + c.responseCh <- commoncap.CapabilityResponse{Value: capabilityResponse.Value} + } } } diff --git a/core/capabilities/remote/target_caller_test.go b/core/capabilities/remote/target_caller_test.go index bab0c0e13b8..eb11abf1ef3 100644 --- a/core/capabilities/remote/target_caller_test.go +++ b/core/capabilities/remote/target_caller_test.go @@ -64,7 +64,8 @@ func Test_TargetCallerExecuteContextTimeout(t *testing.T) { }) require.NoError(t, err) - ctxWithTimeout, _ := context.WithTimeout(ctx, 10*time.Millisecond) + ctxWithTimeout, cancel := context.WithTimeout(ctx, 10*time.Millisecond) + defer cancel() _, err = caller.Execute(ctxWithTimeout, commoncap.CapabilityRequest{ diff --git a/core/capabilities/remote/types/generate.go b/core/capabilities/remote/types/generate.go new file mode 100644 index 00000000000..845c53b2f43 --- /dev/null +++ b/core/capabilities/remote/types/generate.go @@ -0,0 +1,4 @@ +//go:generate protoc --proto_path=.:.. --go_out=. --go_opt=paths=source_relative --go-grpc_out=. --go-grpc_opt=paths=source_relative message.proto + + +package types diff --git a/core/capabilities/remote/types/message.pb.go b/core/capabilities/remote/types/message.pb.go index d8e9579e96c..e9f82fde52b 100644 --- a/core/capabilities/remote/types/message.pb.go +++ b/core/capabilities/remote/types/message.pb.go @@ -1,8 +1,8 @@ // Code generated by protoc-gen-go. DO NOT EDIT. // versions: -// protoc-gen-go v1.28.1 -// protoc v3.21.8 -// source: core/capabilities/remote/types/message.proto +// protoc-gen-go v1.33.0 +// protoc v4.25.1 +// source: message.proto package types @@ -26,6 +26,7 @@ const ( Error_OK Error = 0 Error_VALIDATION_FAILED Error = 1 Error_CAPABILITY_NOT_FOUND Error = 2 + Error_INVALID_REQUEST Error = 3 ) // Enum value maps for Error. @@ -34,11 +35,13 @@ var ( 0: "OK", 1: "VALIDATION_FAILED", 2: "CAPABILITY_NOT_FOUND", + 3: "INVALID_REQUEST", } Error_value = map[string]int32{ "OK": 0, "VALIDATION_FAILED": 1, "CAPABILITY_NOT_FOUND": 2, + "INVALID_REQUEST": 3, } ) @@ -53,11 +56,11 @@ func (x Error) String() string { } func (Error) Descriptor() protoreflect.EnumDescriptor { - return file_core_capabilities_remote_types_message_proto_enumTypes[0].Descriptor() + return file_message_proto_enumTypes[0].Descriptor() } func (Error) Type() protoreflect.EnumType { - return &file_core_capabilities_remote_types_message_proto_enumTypes[0] + return &file_message_proto_enumTypes[0] } func (x Error) Number() protoreflect.EnumNumber { @@ -66,7 +69,7 @@ func (x Error) Number() protoreflect.EnumNumber { // Deprecated: Use Error.Descriptor instead. func (Error) EnumDescriptor() ([]byte, []int) { - return file_core_capabilities_remote_types_message_proto_rawDescGZIP(), []int{0} + return file_message_proto_rawDescGZIP(), []int{0} } type Message struct { @@ -81,7 +84,7 @@ type Message struct { func (x *Message) Reset() { *x = Message{} if protoimpl.UnsafeEnabled { - mi := &file_core_capabilities_remote_types_message_proto_msgTypes[0] + mi := &file_message_proto_msgTypes[0] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -94,7 +97,7 @@ func (x *Message) String() string { func (*Message) ProtoMessage() {} func (x *Message) ProtoReflect() protoreflect.Message { - mi := &file_core_capabilities_remote_types_message_proto_msgTypes[0] + mi := &file_message_proto_msgTypes[0] if protoimpl.UnsafeEnabled && x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -107,7 +110,7 @@ func (x *Message) ProtoReflect() protoreflect.Message { // Deprecated: Use Message.ProtoReflect.Descriptor instead. func (*Message) Descriptor() ([]byte, []int) { - return file_core_capabilities_remote_types_message_proto_rawDescGZIP(), []int{0} + return file_message_proto_rawDescGZIP(), []int{0} } func (x *Message) GetSignature() []byte { @@ -129,7 +132,6 @@ type MessageBody struct { sizeCache protoimpl.SizeCache unknownFields protoimpl.UnknownFields - // header fields set and validated by the Dispatcher Version uint32 `protobuf:"varint,1,opt,name=version,proto3" json:"version,omitempty"` Sender []byte `protobuf:"bytes,2,opt,name=sender,proto3" json:"sender,omitempty"` Receiver []byte `protobuf:"bytes,3,opt,name=receiver,proto3" json:"receiver,omitempty"` @@ -143,6 +145,7 @@ type MessageBody struct { // payload contains a CapabilityRequest or CapabilityResponse Payload []byte `protobuf:"bytes,11,opt,name=payload,proto3" json:"payload,omitempty"` // Types that are assignable to Metadata: + // // *MessageBody_TriggerRegistrationMetadata // *MessageBody_TriggerEventMetadata Metadata isMessageBody_Metadata `protobuf_oneof:"metadata"` @@ -151,7 +154,7 @@ type MessageBody struct { func (x *MessageBody) Reset() { *x = MessageBody{} if protoimpl.UnsafeEnabled { - mi := &file_core_capabilities_remote_types_message_proto_msgTypes[1] + mi := &file_message_proto_msgTypes[1] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -164,7 +167,7 @@ func (x *MessageBody) String() string { func (*MessageBody) ProtoMessage() {} func (x *MessageBody) ProtoReflect() protoreflect.Message { - mi := &file_core_capabilities_remote_types_message_proto_msgTypes[1] + mi := &file_message_proto_msgTypes[1] if protoimpl.UnsafeEnabled && x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -177,7 +180,7 @@ func (x *MessageBody) ProtoReflect() protoreflect.Message { // Deprecated: Use MessageBody.ProtoReflect.Descriptor instead. func (*MessageBody) Descriptor() ([]byte, []int) { - return file_core_capabilities_remote_types_message_proto_rawDescGZIP(), []int{1} + return file_message_proto_rawDescGZIP(), []int{1} } func (x *MessageBody) GetVersion() uint32 { @@ -305,7 +308,7 @@ type TriggerRegistrationMetadata struct { func (x *TriggerRegistrationMetadata) Reset() { *x = TriggerRegistrationMetadata{} if protoimpl.UnsafeEnabled { - mi := &file_core_capabilities_remote_types_message_proto_msgTypes[2] + mi := &file_message_proto_msgTypes[2] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -318,7 +321,7 @@ func (x *TriggerRegistrationMetadata) String() string { func (*TriggerRegistrationMetadata) ProtoMessage() {} func (x *TriggerRegistrationMetadata) ProtoReflect() protoreflect.Message { - mi := &file_core_capabilities_remote_types_message_proto_msgTypes[2] + mi := &file_message_proto_msgTypes[2] if protoimpl.UnsafeEnabled && x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -331,7 +334,7 @@ func (x *TriggerRegistrationMetadata) ProtoReflect() protoreflect.Message { // Deprecated: Use TriggerRegistrationMetadata.ProtoReflect.Descriptor instead. func (*TriggerRegistrationMetadata) Descriptor() ([]byte, []int) { - return file_core_capabilities_remote_types_message_proto_rawDescGZIP(), []int{2} + return file_message_proto_rawDescGZIP(), []int{2} } func (x *TriggerRegistrationMetadata) GetLastReceivedEventId() string { @@ -353,7 +356,7 @@ type TriggerEventMetadata struct { func (x *TriggerEventMetadata) Reset() { *x = TriggerEventMetadata{} if protoimpl.UnsafeEnabled { - mi := &file_core_capabilities_remote_types_message_proto_msgTypes[3] + mi := &file_message_proto_msgTypes[3] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -366,7 +369,7 @@ func (x *TriggerEventMetadata) String() string { func (*TriggerEventMetadata) ProtoMessage() {} func (x *TriggerEventMetadata) ProtoReflect() protoreflect.Message { - mi := &file_core_capabilities_remote_types_message_proto_msgTypes[3] + mi := &file_message_proto_msgTypes[3] if protoimpl.UnsafeEnabled && x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -379,7 +382,7 @@ func (x *TriggerEventMetadata) ProtoReflect() protoreflect.Message { // Deprecated: Use TriggerEventMetadata.ProtoReflect.Descriptor instead. func (*TriggerEventMetadata) Descriptor() ([]byte, []int) { - return file_core_capabilities_remote_types_message_proto_rawDescGZIP(), []int{3} + return file_message_proto_rawDescGZIP(), []int{3} } func (x *TriggerEventMetadata) GetTriggerEventId() string { @@ -396,94 +399,94 @@ func (x *TriggerEventMetadata) GetWorkflowIds() []string { return nil } -var File_core_capabilities_remote_types_message_proto protoreflect.FileDescriptor - -var file_core_capabilities_remote_types_message_proto_rawDesc = []byte{ - 0x0a, 0x2c, 0x63, 0x6f, 0x72, 0x65, 0x2f, 0x63, 0x61, 0x70, 0x61, 0x62, 0x69, 0x6c, 0x69, 0x74, - 0x69, 0x65, 0x73, 0x2f, 0x72, 0x65, 0x6d, 0x6f, 0x74, 0x65, 0x2f, 0x74, 0x79, 0x70, 0x65, 0x73, - 0x2f, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x12, 0x06, - 0x72, 0x65, 0x6d, 0x6f, 0x74, 0x65, 0x22, 0x3b, 0x0a, 0x07, 0x4d, 0x65, 0x73, 0x73, 0x61, 0x67, - 0x65, 0x12, 0x1c, 0x0a, 0x09, 0x73, 0x69, 0x67, 0x6e, 0x61, 0x74, 0x75, 0x72, 0x65, 0x18, 0x01, - 0x20, 0x01, 0x28, 0x0c, 0x52, 0x09, 0x73, 0x69, 0x67, 0x6e, 0x61, 0x74, 0x75, 0x72, 0x65, 0x12, - 0x12, 0x0a, 0x04, 0x62, 0x6f, 0x64, 0x79, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0c, 0x52, 0x04, 0x62, - 0x6f, 0x64, 0x79, 0x22, 0xb1, 0x04, 0x0a, 0x0b, 0x4d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x42, - 0x6f, 0x64, 0x79, 0x12, 0x18, 0x0a, 0x07, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x18, 0x01, - 0x20, 0x01, 0x28, 0x0d, 0x52, 0x07, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x12, 0x16, 0x0a, - 0x06, 0x73, 0x65, 0x6e, 0x64, 0x65, 0x72, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0c, 0x52, 0x06, 0x73, - 0x65, 0x6e, 0x64, 0x65, 0x72, 0x12, 0x1a, 0x0a, 0x08, 0x72, 0x65, 0x63, 0x65, 0x69, 0x76, 0x65, - 0x72, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0c, 0x52, 0x08, 0x72, 0x65, 0x63, 0x65, 0x69, 0x76, 0x65, - 0x72, 0x12, 0x1c, 0x0a, 0x09, 0x74, 0x69, 0x6d, 0x65, 0x73, 0x74, 0x61, 0x6d, 0x70, 0x18, 0x04, - 0x20, 0x01, 0x28, 0x03, 0x52, 0x09, 0x74, 0x69, 0x6d, 0x65, 0x73, 0x74, 0x61, 0x6d, 0x70, 0x12, - 0x1d, 0x0a, 0x0a, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x5f, 0x69, 0x64, 0x18, 0x05, 0x20, - 0x01, 0x28, 0x0c, 0x52, 0x09, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x49, 0x64, 0x12, 0x23, - 0x0a, 0x0d, 0x63, 0x61, 0x70, 0x61, 0x62, 0x69, 0x6c, 0x69, 0x74, 0x79, 0x5f, 0x69, 0x64, 0x18, - 0x06, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0c, 0x63, 0x61, 0x70, 0x61, 0x62, 0x69, 0x6c, 0x69, 0x74, - 0x79, 0x49, 0x64, 0x12, 0x2a, 0x0a, 0x11, 0x63, 0x61, 0x70, 0x61, 0x62, 0x69, 0x6c, 0x69, 0x74, - 0x79, 0x5f, 0x64, 0x6f, 0x6e, 0x5f, 0x69, 0x64, 0x18, 0x07, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0f, - 0x63, 0x61, 0x70, 0x61, 0x62, 0x69, 0x6c, 0x69, 0x74, 0x79, 0x44, 0x6f, 0x6e, 0x49, 0x64, 0x12, - 0x22, 0x0a, 0x0d, 0x63, 0x61, 0x6c, 0x6c, 0x65, 0x72, 0x5f, 0x64, 0x6f, 0x6e, 0x5f, 0x69, 0x64, - 0x18, 0x08, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0b, 0x63, 0x61, 0x6c, 0x6c, 0x65, 0x72, 0x44, 0x6f, - 0x6e, 0x49, 0x64, 0x12, 0x16, 0x0a, 0x06, 0x6d, 0x65, 0x74, 0x68, 0x6f, 0x64, 0x18, 0x09, 0x20, - 0x01, 0x28, 0x09, 0x52, 0x06, 0x6d, 0x65, 0x74, 0x68, 0x6f, 0x64, 0x12, 0x23, 0x0a, 0x05, 0x65, - 0x72, 0x72, 0x6f, 0x72, 0x18, 0x0a, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x0d, 0x2e, 0x72, 0x65, 0x6d, - 0x6f, 0x74, 0x65, 0x2e, 0x45, 0x72, 0x72, 0x6f, 0x72, 0x52, 0x05, 0x65, 0x72, 0x72, 0x6f, 0x72, - 0x12, 0x18, 0x0a, 0x07, 0x70, 0x61, 0x79, 0x6c, 0x6f, 0x61, 0x64, 0x18, 0x0b, 0x20, 0x01, 0x28, - 0x0c, 0x52, 0x07, 0x70, 0x61, 0x79, 0x6c, 0x6f, 0x61, 0x64, 0x12, 0x69, 0x0a, 0x1d, 0x74, 0x72, - 0x69, 0x67, 0x67, 0x65, 0x72, 0x5f, 0x72, 0x65, 0x67, 0x69, 0x73, 0x74, 0x72, 0x61, 0x74, 0x69, - 0x6f, 0x6e, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x18, 0x0c, 0x20, 0x01, 0x28, - 0x0b, 0x32, 0x23, 0x2e, 0x72, 0x65, 0x6d, 0x6f, 0x74, 0x65, 0x2e, 0x54, 0x72, 0x69, 0x67, 0x67, +var File_message_proto protoreflect.FileDescriptor + +var file_message_proto_rawDesc = []byte{ + 0x0a, 0x0d, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x12, + 0x06, 0x72, 0x65, 0x6d, 0x6f, 0x74, 0x65, 0x22, 0x3b, 0x0a, 0x07, 0x4d, 0x65, 0x73, 0x73, 0x61, + 0x67, 0x65, 0x12, 0x1c, 0x0a, 0x09, 0x73, 0x69, 0x67, 0x6e, 0x61, 0x74, 0x75, 0x72, 0x65, 0x18, + 0x01, 0x20, 0x01, 0x28, 0x0c, 0x52, 0x09, 0x73, 0x69, 0x67, 0x6e, 0x61, 0x74, 0x75, 0x72, 0x65, + 0x12, 0x12, 0x0a, 0x04, 0x62, 0x6f, 0x64, 0x79, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0c, 0x52, 0x04, + 0x62, 0x6f, 0x64, 0x79, 0x22, 0xb1, 0x04, 0x0a, 0x0b, 0x4d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, + 0x42, 0x6f, 0x64, 0x79, 0x12, 0x18, 0x0a, 0x07, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x18, + 0x01, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x07, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x12, 0x16, + 0x0a, 0x06, 0x73, 0x65, 0x6e, 0x64, 0x65, 0x72, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0c, 0x52, 0x06, + 0x73, 0x65, 0x6e, 0x64, 0x65, 0x72, 0x12, 0x1a, 0x0a, 0x08, 0x72, 0x65, 0x63, 0x65, 0x69, 0x76, + 0x65, 0x72, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0c, 0x52, 0x08, 0x72, 0x65, 0x63, 0x65, 0x69, 0x76, + 0x65, 0x72, 0x12, 0x1c, 0x0a, 0x09, 0x74, 0x69, 0x6d, 0x65, 0x73, 0x74, 0x61, 0x6d, 0x70, 0x18, + 0x04, 0x20, 0x01, 0x28, 0x03, 0x52, 0x09, 0x74, 0x69, 0x6d, 0x65, 0x73, 0x74, 0x61, 0x6d, 0x70, + 0x12, 0x1d, 0x0a, 0x0a, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x5f, 0x69, 0x64, 0x18, 0x05, + 0x20, 0x01, 0x28, 0x0c, 0x52, 0x09, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x49, 0x64, 0x12, + 0x23, 0x0a, 0x0d, 0x63, 0x61, 0x70, 0x61, 0x62, 0x69, 0x6c, 0x69, 0x74, 0x79, 0x5f, 0x69, 0x64, + 0x18, 0x06, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0c, 0x63, 0x61, 0x70, 0x61, 0x62, 0x69, 0x6c, 0x69, + 0x74, 0x79, 0x49, 0x64, 0x12, 0x2a, 0x0a, 0x11, 0x63, 0x61, 0x70, 0x61, 0x62, 0x69, 0x6c, 0x69, + 0x74, 0x79, 0x5f, 0x64, 0x6f, 0x6e, 0x5f, 0x69, 0x64, 0x18, 0x07, 0x20, 0x01, 0x28, 0x09, 0x52, + 0x0f, 0x63, 0x61, 0x70, 0x61, 0x62, 0x69, 0x6c, 0x69, 0x74, 0x79, 0x44, 0x6f, 0x6e, 0x49, 0x64, + 0x12, 0x22, 0x0a, 0x0d, 0x63, 0x61, 0x6c, 0x6c, 0x65, 0x72, 0x5f, 0x64, 0x6f, 0x6e, 0x5f, 0x69, + 0x64, 0x18, 0x08, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0b, 0x63, 0x61, 0x6c, 0x6c, 0x65, 0x72, 0x44, + 0x6f, 0x6e, 0x49, 0x64, 0x12, 0x16, 0x0a, 0x06, 0x6d, 0x65, 0x74, 0x68, 0x6f, 0x64, 0x18, 0x09, + 0x20, 0x01, 0x28, 0x09, 0x52, 0x06, 0x6d, 0x65, 0x74, 0x68, 0x6f, 0x64, 0x12, 0x23, 0x0a, 0x05, + 0x65, 0x72, 0x72, 0x6f, 0x72, 0x18, 0x0a, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x0d, 0x2e, 0x72, 0x65, + 0x6d, 0x6f, 0x74, 0x65, 0x2e, 0x45, 0x72, 0x72, 0x6f, 0x72, 0x52, 0x05, 0x65, 0x72, 0x72, 0x6f, + 0x72, 0x12, 0x18, 0x0a, 0x07, 0x70, 0x61, 0x79, 0x6c, 0x6f, 0x61, 0x64, 0x18, 0x0b, 0x20, 0x01, + 0x28, 0x0c, 0x52, 0x07, 0x70, 0x61, 0x79, 0x6c, 0x6f, 0x61, 0x64, 0x12, 0x69, 0x0a, 0x1d, 0x74, + 0x72, 0x69, 0x67, 0x67, 0x65, 0x72, 0x5f, 0x72, 0x65, 0x67, 0x69, 0x73, 0x74, 0x72, 0x61, 0x74, + 0x69, 0x6f, 0x6e, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x18, 0x0c, 0x20, 0x01, + 0x28, 0x0b, 0x32, 0x23, 0x2e, 0x72, 0x65, 0x6d, 0x6f, 0x74, 0x65, 0x2e, 0x54, 0x72, 0x69, 0x67, + 0x67, 0x65, 0x72, 0x52, 0x65, 0x67, 0x69, 0x73, 0x74, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x4d, + 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x48, 0x00, 0x52, 0x1b, 0x74, 0x72, 0x69, 0x67, 0x67, 0x65, 0x72, 0x52, 0x65, 0x67, 0x69, 0x73, 0x74, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x4d, 0x65, - 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x48, 0x00, 0x52, 0x1b, 0x74, 0x72, 0x69, 0x67, 0x67, 0x65, - 0x72, 0x52, 0x65, 0x67, 0x69, 0x73, 0x74, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x4d, 0x65, 0x74, - 0x61, 0x64, 0x61, 0x74, 0x61, 0x12, 0x54, 0x0a, 0x16, 0x74, 0x72, 0x69, 0x67, 0x67, 0x65, 0x72, - 0x5f, 0x65, 0x76, 0x65, 0x6e, 0x74, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x18, - 0x0d, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1c, 0x2e, 0x72, 0x65, 0x6d, 0x6f, 0x74, 0x65, 0x2e, 0x54, - 0x72, 0x69, 0x67, 0x67, 0x65, 0x72, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x4d, 0x65, 0x74, 0x61, 0x64, - 0x61, 0x74, 0x61, 0x48, 0x00, 0x52, 0x14, 0x74, 0x72, 0x69, 0x67, 0x67, 0x65, 0x72, 0x45, 0x76, - 0x65, 0x6e, 0x74, 0x4d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x42, 0x0a, 0x0a, 0x08, 0x6d, - 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x22, 0x52, 0x0a, 0x1b, 0x54, 0x72, 0x69, 0x67, 0x67, - 0x65, 0x72, 0x52, 0x65, 0x67, 0x69, 0x73, 0x74, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x4d, 0x65, - 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x12, 0x33, 0x0a, 0x16, 0x6c, 0x61, 0x73, 0x74, 0x5f, 0x72, - 0x65, 0x63, 0x65, 0x69, 0x76, 0x65, 0x64, 0x5f, 0x65, 0x76, 0x65, 0x6e, 0x74, 0x5f, 0x69, 0x64, - 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x13, 0x6c, 0x61, 0x73, 0x74, 0x52, 0x65, 0x63, 0x65, - 0x69, 0x76, 0x65, 0x64, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x49, 0x64, 0x22, 0x63, 0x0a, 0x14, 0x54, - 0x72, 0x69, 0x67, 0x67, 0x65, 0x72, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x4d, 0x65, 0x74, 0x61, 0x64, - 0x61, 0x74, 0x61, 0x12, 0x28, 0x0a, 0x10, 0x74, 0x72, 0x69, 0x67, 0x67, 0x65, 0x72, 0x5f, 0x65, - 0x76, 0x65, 0x6e, 0x74, 0x5f, 0x69, 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0e, 0x74, - 0x72, 0x69, 0x67, 0x67, 0x65, 0x72, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x49, 0x64, 0x12, 0x21, 0x0a, - 0x0c, 0x77, 0x6f, 0x72, 0x6b, 0x66, 0x6c, 0x6f, 0x77, 0x5f, 0x69, 0x64, 0x73, 0x18, 0x02, 0x20, - 0x03, 0x28, 0x09, 0x52, 0x0b, 0x77, 0x6f, 0x72, 0x6b, 0x66, 0x6c, 0x6f, 0x77, 0x49, 0x64, 0x73, - 0x2a, 0x40, 0x0a, 0x05, 0x45, 0x72, 0x72, 0x6f, 0x72, 0x12, 0x06, 0x0a, 0x02, 0x4f, 0x4b, 0x10, - 0x00, 0x12, 0x15, 0x0a, 0x11, 0x56, 0x41, 0x4c, 0x49, 0x44, 0x41, 0x54, 0x49, 0x4f, 0x4e, 0x5f, - 0x46, 0x41, 0x49, 0x4c, 0x45, 0x44, 0x10, 0x01, 0x12, 0x18, 0x0a, 0x14, 0x43, 0x41, 0x50, 0x41, - 0x42, 0x49, 0x4c, 0x49, 0x54, 0x59, 0x5f, 0x4e, 0x4f, 0x54, 0x5f, 0x46, 0x4f, 0x55, 0x4e, 0x44, - 0x10, 0x02, 0x42, 0x20, 0x5a, 0x1e, 0x63, 0x6f, 0x72, 0x65, 0x2f, 0x63, 0x61, 0x70, 0x61, 0x62, - 0x69, 0x6c, 0x69, 0x74, 0x69, 0x65, 0x73, 0x2f, 0x72, 0x65, 0x6d, 0x6f, 0x74, 0x65, 0x2f, 0x74, - 0x79, 0x70, 0x65, 0x73, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, + 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x12, 0x54, 0x0a, 0x16, 0x74, 0x72, 0x69, 0x67, 0x67, 0x65, + 0x72, 0x5f, 0x65, 0x76, 0x65, 0x6e, 0x74, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, + 0x18, 0x0d, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1c, 0x2e, 0x72, 0x65, 0x6d, 0x6f, 0x74, 0x65, 0x2e, + 0x54, 0x72, 0x69, 0x67, 0x67, 0x65, 0x72, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x4d, 0x65, 0x74, 0x61, + 0x64, 0x61, 0x74, 0x61, 0x48, 0x00, 0x52, 0x14, 0x74, 0x72, 0x69, 0x67, 0x67, 0x65, 0x72, 0x45, + 0x76, 0x65, 0x6e, 0x74, 0x4d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x42, 0x0a, 0x0a, 0x08, + 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x22, 0x52, 0x0a, 0x1b, 0x54, 0x72, 0x69, 0x67, + 0x67, 0x65, 0x72, 0x52, 0x65, 0x67, 0x69, 0x73, 0x74, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x4d, + 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x12, 0x33, 0x0a, 0x16, 0x6c, 0x61, 0x73, 0x74, 0x5f, + 0x72, 0x65, 0x63, 0x65, 0x69, 0x76, 0x65, 0x64, 0x5f, 0x65, 0x76, 0x65, 0x6e, 0x74, 0x5f, 0x69, + 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x13, 0x6c, 0x61, 0x73, 0x74, 0x52, 0x65, 0x63, + 0x65, 0x69, 0x76, 0x65, 0x64, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x49, 0x64, 0x22, 0x63, 0x0a, 0x14, + 0x54, 0x72, 0x69, 0x67, 0x67, 0x65, 0x72, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x4d, 0x65, 0x74, 0x61, + 0x64, 0x61, 0x74, 0x61, 0x12, 0x28, 0x0a, 0x10, 0x74, 0x72, 0x69, 0x67, 0x67, 0x65, 0x72, 0x5f, + 0x65, 0x76, 0x65, 0x6e, 0x74, 0x5f, 0x69, 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0e, + 0x74, 0x72, 0x69, 0x67, 0x67, 0x65, 0x72, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x49, 0x64, 0x12, 0x21, + 0x0a, 0x0c, 0x77, 0x6f, 0x72, 0x6b, 0x66, 0x6c, 0x6f, 0x77, 0x5f, 0x69, 0x64, 0x73, 0x18, 0x02, + 0x20, 0x03, 0x28, 0x09, 0x52, 0x0b, 0x77, 0x6f, 0x72, 0x6b, 0x66, 0x6c, 0x6f, 0x77, 0x49, 0x64, + 0x73, 0x2a, 0x55, 0x0a, 0x05, 0x45, 0x72, 0x72, 0x6f, 0x72, 0x12, 0x06, 0x0a, 0x02, 0x4f, 0x4b, + 0x10, 0x00, 0x12, 0x15, 0x0a, 0x11, 0x56, 0x41, 0x4c, 0x49, 0x44, 0x41, 0x54, 0x49, 0x4f, 0x4e, + 0x5f, 0x46, 0x41, 0x49, 0x4c, 0x45, 0x44, 0x10, 0x01, 0x12, 0x18, 0x0a, 0x14, 0x43, 0x41, 0x50, + 0x41, 0x42, 0x49, 0x4c, 0x49, 0x54, 0x59, 0x5f, 0x4e, 0x4f, 0x54, 0x5f, 0x46, 0x4f, 0x55, 0x4e, + 0x44, 0x10, 0x02, 0x12, 0x13, 0x0a, 0x0f, 0x49, 0x4e, 0x56, 0x41, 0x4c, 0x49, 0x44, 0x5f, 0x52, + 0x45, 0x51, 0x55, 0x45, 0x53, 0x54, 0x10, 0x03, 0x42, 0x20, 0x5a, 0x1e, 0x63, 0x6f, 0x72, 0x65, + 0x2f, 0x63, 0x61, 0x70, 0x61, 0x62, 0x69, 0x6c, 0x69, 0x74, 0x69, 0x65, 0x73, 0x2f, 0x72, 0x65, + 0x6d, 0x6f, 0x74, 0x65, 0x2f, 0x74, 0x79, 0x70, 0x65, 0x73, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, + 0x6f, 0x33, } var ( - file_core_capabilities_remote_types_message_proto_rawDescOnce sync.Once - file_core_capabilities_remote_types_message_proto_rawDescData = file_core_capabilities_remote_types_message_proto_rawDesc + file_message_proto_rawDescOnce sync.Once + file_message_proto_rawDescData = file_message_proto_rawDesc ) -func file_core_capabilities_remote_types_message_proto_rawDescGZIP() []byte { - file_core_capabilities_remote_types_message_proto_rawDescOnce.Do(func() { - file_core_capabilities_remote_types_message_proto_rawDescData = protoimpl.X.CompressGZIP(file_core_capabilities_remote_types_message_proto_rawDescData) +func file_message_proto_rawDescGZIP() []byte { + file_message_proto_rawDescOnce.Do(func() { + file_message_proto_rawDescData = protoimpl.X.CompressGZIP(file_message_proto_rawDescData) }) - return file_core_capabilities_remote_types_message_proto_rawDescData + return file_message_proto_rawDescData } -var file_core_capabilities_remote_types_message_proto_enumTypes = make([]protoimpl.EnumInfo, 1) -var file_core_capabilities_remote_types_message_proto_msgTypes = make([]protoimpl.MessageInfo, 4) -var file_core_capabilities_remote_types_message_proto_goTypes = []interface{}{ +var file_message_proto_enumTypes = make([]protoimpl.EnumInfo, 1) +var file_message_proto_msgTypes = make([]protoimpl.MessageInfo, 4) +var file_message_proto_goTypes = []interface{}{ (Error)(0), // 0: remote.Error (*Message)(nil), // 1: remote.Message (*MessageBody)(nil), // 2: remote.MessageBody (*TriggerRegistrationMetadata)(nil), // 3: remote.TriggerRegistrationMetadata (*TriggerEventMetadata)(nil), // 4: remote.TriggerEventMetadata } -var file_core_capabilities_remote_types_message_proto_depIdxs = []int32{ +var file_message_proto_depIdxs = []int32{ 0, // 0: remote.MessageBody.error:type_name -> remote.Error 3, // 1: remote.MessageBody.trigger_registration_metadata:type_name -> remote.TriggerRegistrationMetadata 4, // 2: remote.MessageBody.trigger_event_metadata:type_name -> remote.TriggerEventMetadata @@ -494,13 +497,13 @@ var file_core_capabilities_remote_types_message_proto_depIdxs = []int32{ 0, // [0:3] is the sub-list for field type_name } -func init() { file_core_capabilities_remote_types_message_proto_init() } -func file_core_capabilities_remote_types_message_proto_init() { - if File_core_capabilities_remote_types_message_proto != nil { +func init() { file_message_proto_init() } +func file_message_proto_init() { + if File_message_proto != nil { return } if !protoimpl.UnsafeEnabled { - file_core_capabilities_remote_types_message_proto_msgTypes[0].Exporter = func(v interface{}, i int) interface{} { + file_message_proto_msgTypes[0].Exporter = func(v interface{}, i int) interface{} { switch v := v.(*Message); i { case 0: return &v.state @@ -512,7 +515,7 @@ func file_core_capabilities_remote_types_message_proto_init() { return nil } } - file_core_capabilities_remote_types_message_proto_msgTypes[1].Exporter = func(v interface{}, i int) interface{} { + file_message_proto_msgTypes[1].Exporter = func(v interface{}, i int) interface{} { switch v := v.(*MessageBody); i { case 0: return &v.state @@ -524,7 +527,7 @@ func file_core_capabilities_remote_types_message_proto_init() { return nil } } - file_core_capabilities_remote_types_message_proto_msgTypes[2].Exporter = func(v interface{}, i int) interface{} { + file_message_proto_msgTypes[2].Exporter = func(v interface{}, i int) interface{} { switch v := v.(*TriggerRegistrationMetadata); i { case 0: return &v.state @@ -536,7 +539,7 @@ func file_core_capabilities_remote_types_message_proto_init() { return nil } } - file_core_capabilities_remote_types_message_proto_msgTypes[3].Exporter = func(v interface{}, i int) interface{} { + file_message_proto_msgTypes[3].Exporter = func(v interface{}, i int) interface{} { switch v := v.(*TriggerEventMetadata); i { case 0: return &v.state @@ -549,7 +552,7 @@ func file_core_capabilities_remote_types_message_proto_init() { } } } - file_core_capabilities_remote_types_message_proto_msgTypes[1].OneofWrappers = []interface{}{ + file_message_proto_msgTypes[1].OneofWrappers = []interface{}{ (*MessageBody_TriggerRegistrationMetadata)(nil), (*MessageBody_TriggerEventMetadata)(nil), } @@ -557,19 +560,19 @@ func file_core_capabilities_remote_types_message_proto_init() { out := protoimpl.TypeBuilder{ File: protoimpl.DescBuilder{ GoPackagePath: reflect.TypeOf(x{}).PkgPath(), - RawDescriptor: file_core_capabilities_remote_types_message_proto_rawDesc, + RawDescriptor: file_message_proto_rawDesc, NumEnums: 1, NumMessages: 4, NumExtensions: 0, NumServices: 0, }, - GoTypes: file_core_capabilities_remote_types_message_proto_goTypes, - DependencyIndexes: file_core_capabilities_remote_types_message_proto_depIdxs, - EnumInfos: file_core_capabilities_remote_types_message_proto_enumTypes, - MessageInfos: file_core_capabilities_remote_types_message_proto_msgTypes, + GoTypes: file_message_proto_goTypes, + DependencyIndexes: file_message_proto_depIdxs, + EnumInfos: file_message_proto_enumTypes, + MessageInfos: file_message_proto_msgTypes, }.Build() - File_core_capabilities_remote_types_message_proto = out.File - file_core_capabilities_remote_types_message_proto_rawDesc = nil - file_core_capabilities_remote_types_message_proto_goTypes = nil - file_core_capabilities_remote_types_message_proto_depIdxs = nil + File_message_proto = out.File + file_message_proto_rawDesc = nil + file_message_proto_goTypes = nil + file_message_proto_depIdxs = nil } From f2ce1953a0fc68928084686a0a15d132a8cba4c0 Mon Sep 17 00:00:00 2001 From: Matthew Pendrey Date: Thu, 23 May 2024 14:56:59 +0100 Subject: [PATCH 18/43] wip --- core/capabilities/remote/target_caller.go | 42 +++++++++++++++++-- .../capabilities/remote/target_caller_test.go | 35 +++++++--------- .../remote/target_receiver_test.go | 23 +++++----- 3 files changed, 66 insertions(+), 34 deletions(-) diff --git a/core/capabilities/remote/target_caller.go b/core/capabilities/remote/target_caller.go index 489fdb040f4..0d5e10c8bcc 100644 --- a/core/capabilities/remote/target_caller.go +++ b/core/capabilities/remote/target_caller.go @@ -24,6 +24,7 @@ type remoteTargetCaller struct { remoteCapabilityDonInfo capabilities.DON localDONInfo capabilities.DON dispatcher types.Dispatcher + requestTimeout time.Duration messageIDToExecuteRequest map[string]*callerExecuteRequest mutex sync.Mutex @@ -32,16 +33,46 @@ type remoteTargetCaller struct { var _ commoncap.TargetCapability = &remoteTargetCaller{} var _ types.Receiver = &remoteTargetCaller{} -func NewRemoteTargetCaller(lggr logger.Logger, remoteCapabilityInfo commoncap.CapabilityInfo, remoteCapabilityDonInfo capabilities.DON, localDonInfo capabilities.DON, dispatcher types.Dispatcher) (*remoteTargetCaller, error) { +func NewRemoteTargetCaller(ctx context.Context, lggr logger.Logger, remoteCapabilityInfo commoncap.CapabilityInfo, remoteCapabilityDonInfo capabilities.DON, localDonInfo capabilities.DON, dispatcher types.Dispatcher, + requestTimeout time.Duration) *remoteTargetCaller { - return &remoteTargetCaller{ + caller := &remoteTargetCaller{ lggr: lggr, remoteCapabilityInfo: remoteCapabilityInfo, remoteCapabilityDonInfo: remoteCapabilityDonInfo, localDONInfo: localDonInfo, dispatcher: dispatcher, + requestTimeout: requestTimeout, messageIDToExecuteRequest: make(map[string]*callerExecuteRequest), - }, nil + } + + go func() { + timer := time.NewTimer(requestTimeout) + defer timer.Stop() + for { + select { + case <-ctx.Done(): + return + case <-timer.C: + caller.ExpireRequests(ctx) + } + } + }() + + return caller +} + +func (c *remoteTargetCaller) ExpireRequests(ctx context.Context) { + c.mutex.Lock() + defer c.mutex.Unlock() + + for messageID, req := range c.messageIDToExecuteRequest { + if time.Since(req.creationTime) > c.requestTimeout { + delete(c.messageIDToExecuteRequest, messageID) + req.responseCh <- commoncap.CapabilityResponse{Err: errors.New("request timed out")} + close(req.responseCh) + } + } } func (c *remoteTargetCaller) Info(ctx context.Context) (commoncap.CapabilityInfo, error) { @@ -141,6 +172,11 @@ func (c *remoteTargetCaller) Receive(msg *types.MessageBody) { messageID := getMessageID(msg) + if msg.Error != types.Error_OK { + c.lggr.Warnw("received error response", "messageID", messageID, "error", msg.Error) + return + } + req := c.messageIDToExecuteRequest[messageID] if req == nil { c.lggr.Warnw("received response for unknown message ID", "messageID", messageID, "sender", msg.Sender) diff --git a/core/capabilities/remote/target_caller_test.go b/core/capabilities/remote/target_caller_test.go index eb11abf1ef3..c45c27a8219 100644 --- a/core/capabilities/remote/target_caller_test.go +++ b/core/capabilities/remote/target_caller_test.go @@ -1,7 +1,6 @@ package remote_test import ( - "context" "testing" "time" @@ -23,7 +22,7 @@ const ( executeValue1 = "triggerEvent1" ) -func Test_TargetCallerExecuteContextTimeout(t *testing.T) { +func Test_TargetCallerExecuteTimeout(t *testing.T) { lggr := logger.TestLogger(t) ctx := testutils.Context(t) @@ -52,10 +51,9 @@ func Test_TargetCallerExecuteContextTimeout(t *testing.T) { dispatcher := NewTestDispatcher() - caller, err := remote.NewRemoteTargetCaller(lggr, capInfo, capDonInfo, workflowDonInfo, dispatcher) - require.NoError(t, err) + caller := remote.NewRemoteTargetCaller(ctx, lggr, capInfo, capDonInfo, workflowDonInfo, dispatcher, 1*time.Second) - err = dispatcher.SetReceiver("cap_id", "workflow-don", caller) + err := dispatcher.SetReceiver("cap_id", "workflow-don", caller) require.NoError(t, err) transmissionSchedule, err := values.NewMap(map[string]any{ @@ -64,10 +62,7 @@ func Test_TargetCallerExecuteContextTimeout(t *testing.T) { }) require.NoError(t, err) - ctxWithTimeout, cancel := context.WithTimeout(ctx, 10*time.Millisecond) - defer cancel() - - _, err = caller.Execute(ctxWithTimeout, + responseCh, err := caller.Execute(ctx, commoncap.CapabilityRequest{ Metadata: commoncap.RequestMetadata{ WorkflowID: "workflowID", @@ -76,7 +71,9 @@ func Test_TargetCallerExecuteContextTimeout(t *testing.T) { Config: transmissionSchedule, }) - assert.NotNil(t, err) + response := <-responseCh + assert.NotNil(t, response.Err) + } func Test_TargetCallerExecute(t *testing.T) { @@ -109,10 +106,9 @@ func Test_TargetCallerExecute(t *testing.T) { dispatcher := NewTestDispatcher() - caller, err := remote.NewRemoteTargetCaller(lggr, capInfo, capDonInfo, workflowDonInfo, dispatcher) - require.NoError(t, err) + caller := remote.NewRemoteTargetCaller(ctx, lggr, capInfo, capDonInfo, workflowDonInfo, dispatcher, 1*time.Minute) - err = dispatcher.SetReceiver("cap_id", "workflow-don", caller) + err := dispatcher.SetReceiver("cap_id", "workflow-don", caller) require.NoError(t, err) go func() { @@ -160,7 +156,7 @@ func Test_TargetCallerExecute(t *testing.T) { } -func Test_TargetCallerExecuteWithError(t *testing.T) { +func Test_TargetCallerExecuteWithErrorTimesOut(t *testing.T) { lggr := logger.TestLogger(t) ctx := testutils.Context(t) @@ -190,10 +186,9 @@ func Test_TargetCallerExecuteWithError(t *testing.T) { dispatcher := NewTestDispatcher() - caller, err := remote.NewRemoteTargetCaller(lggr, capInfo, capDonInfo, workflowDonInfo, dispatcher) - require.NoError(t, err) + caller := remote.NewRemoteTargetCaller(ctx, lggr, capInfo, capDonInfo, workflowDonInfo, dispatcher, 1*time.Second) - err = dispatcher.SetReceiver("cap_id", "workflow-don", caller) + err := dispatcher.SetReceiver("cap_id", "workflow-don", caller) require.NoError(t, err) go func() { @@ -216,7 +211,7 @@ func Test_TargetCallerExecuteWithError(t *testing.T) { }) require.NoError(t, err) - _, err = caller.Execute(ctx, + responseCh, err := caller.Execute(ctx, commoncap.CapabilityRequest{ Metadata: commoncap.RequestMetadata{ WorkflowID: "workflowID", @@ -225,7 +220,9 @@ func Test_TargetCallerExecuteWithError(t *testing.T) { Config: transmissionSchedule, }) - require.NotNil(t, err) + response := <-responseCh + + require.NotNil(t, response.Err) } type TestDispatcher struct { diff --git a/core/capabilities/remote/target_receiver_test.go b/core/capabilities/remote/target_receiver_test.go index 587b5aa2f6f..5bf4b449e38 100644 --- a/core/capabilities/remote/target_receiver_test.go +++ b/core/capabilities/remote/target_receiver_test.go @@ -90,7 +90,7 @@ func testRemoteTargetConsensus(t *testing.T, numWorkflowPeers int, workflowDonF F: workflowDonF, } - dispatcher := newTestTargetReceiverDispatcher(capabilityPeerID) + dispatcher := newTestRemoteTargetDispatcher(capabilityPeerID) workflowDONs := map[string]commoncap.DON{ workflowDonInfo.ID: workflowDonInfo, @@ -103,8 +103,7 @@ func testRemoteTargetConsensus(t *testing.T, numWorkflowPeers int, workflowDonF callers := make([]commoncap.TargetCapability, numWorkflowPeers) for i := 0; i < numWorkflowPeers; i++ { workflowPeerDispatcher := dispatcher.GetDispatcherForCaller(workflowPeers[i]) - caller, err := remote.NewRemoteTargetCaller(lggr, capInfo, capDonInfo, workflowDonInfo, workflowPeerDispatcher) - require.NoError(t, err) + caller := remote.NewRemoteTargetCaller(ctx, lggr, capInfo, capDonInfo, workflowDonInfo, workflowPeerDispatcher, 1*time.Minute) dispatcher.RegisterCaller(workflowPeers[i], caller) callers[i] = caller } @@ -145,21 +144,21 @@ func testRemoteTargetConsensus(t *testing.T, numWorkflowPeers int, workflowDonF wg.Wait() } -type testTargetReceiverDispatcher struct { +type testRemoteTargetDispatcher struct { abstractDispatcher receiver remotetypes.Receiver callers map[p2ptypes.PeerID]remotetypes.Receiver receiverPeerID p2ptypes.PeerID } -func newTestTargetReceiverDispatcher(receiverPeerID p2ptypes.PeerID) *testTargetReceiverDispatcher { - return &testTargetReceiverDispatcher{ +func newTestRemoteTargetDispatcher(receiverPeerID p2ptypes.PeerID) *testRemoteTargetDispatcher { + return &testRemoteTargetDispatcher{ receiverPeerID: receiverPeerID, callers: make(map[p2ptypes.PeerID]remotetypes.Receiver), } } -func (r *testTargetReceiverDispatcher) RegisterReceiver(receiver remotetypes.Receiver) { +func (r *testRemoteTargetDispatcher) GetDispatcherReceiver(receiverPeerID p2ptypes.PeerID, receiver remotetypes.Receiver) { if r.receiver != nil { panic("receiver already registered") } @@ -167,7 +166,7 @@ func (r *testTargetReceiverDispatcher) RegisterReceiver(receiver remotetypes.Rec r.receiver = receiver } -func (r *testTargetReceiverDispatcher) GetDispatcherForCaller(callerPeerID p2ptypes.PeerID) remotetypes.Dispatcher { +func (r *testRemoteTargetDispatcher) GetDispatcherForCaller(callerPeerID p2ptypes.PeerID) remotetypes.Dispatcher { dispatcher := &callerDispatcher{ callerPeerID: callerPeerID, broker: r, @@ -175,7 +174,7 @@ func (r *testTargetReceiverDispatcher) GetDispatcherForCaller(callerPeerID p2pty return dispatcher } -func (r *testTargetReceiverDispatcher) RegisterCaller(callerPeerID p2ptypes.PeerID, caller remotetypes.Receiver) { +func (r *testRemoteTargetDispatcher) RegisterCaller(callerPeerID p2ptypes.PeerID, caller remotetypes.Receiver) { if _, ok := r.callers[callerPeerID]; ok { panic("caller already registered") } @@ -183,7 +182,7 @@ func (r *testTargetReceiverDispatcher) RegisterCaller(callerPeerID p2ptypes.Peer r.callers[callerPeerID] = caller } -func (r *testTargetReceiverDispatcher) SendToReceiver(peerID p2ptypes.PeerID, msg *remotetypes.MessageBody) { +func (r *testRemoteTargetDispatcher) SendToReceiver(peerID p2ptypes.PeerID, msg *remotetypes.MessageBody) { if peerID != r.receiverPeerID { panic("receiver peer id mismatch") } @@ -193,7 +192,7 @@ func (r *testTargetReceiverDispatcher) SendToReceiver(peerID p2ptypes.PeerID, ms r.receiver.Receive(msg) } -func (r *testTargetReceiverDispatcher) Send(callerPeerID p2ptypes.PeerID, msgBody *remotetypes.MessageBody) error { +func (r *testRemoteTargetDispatcher) Send(callerPeerID p2ptypes.PeerID, msgBody *remotetypes.MessageBody) error { msgBody.Version = 1 msgBody.Sender = r.receiverPeerID[:] @@ -212,7 +211,7 @@ func (r *testTargetReceiverDispatcher) Send(callerPeerID p2ptypes.PeerID, msgBod type callerDispatcher struct { abstractDispatcher callerPeerID p2ptypes.PeerID - broker *testTargetReceiverDispatcher + broker *testRemoteTargetDispatcher } func (t *callerDispatcher) Send(peerID p2ptypes.PeerID, msgBody *remotetypes.MessageBody) error { From 460f6088c96dbed8beeb0ef1ec50ff7190442c7c Mon Sep 17 00:00:00 2001 From: Matthew Pendrey Date: Thu, 23 May 2024 15:34:48 +0100 Subject: [PATCH 19/43] refactor tests and test broker --- .../remote/target_receiver_test.go | 109 +++++++----------- 1 file changed, 42 insertions(+), 67 deletions(-) diff --git a/core/capabilities/remote/target_receiver_test.go b/core/capabilities/remote/target_receiver_test.go index 5bf4b449e38..02e84040be4 100644 --- a/core/capabilities/remote/target_receiver_test.go +++ b/core/capabilities/remote/target_receiver_test.go @@ -3,7 +3,6 @@ package remote_test import ( "context" "crypto/rand" - "fmt" "sync" "testing" "time" @@ -38,14 +37,16 @@ func Test_TargetReceiverConsensusWithMultipleCallers(t *testing.T) { testRemoteTargetConsensus(t, 4, 3, 10*time.Minute, responseTest) testRemoteTargetConsensus(t, 10, 3, 10*time.Minute, responseTest) - errResponseTest := func(t *testing.T, responseCh <-chan commoncap.CapabilityResponse, responseError error) { - assert.NotNil(t, responseError) - } + /* - // Test scenario where number of submissions is less than F + 1 - testRemoteTargetConsensus(t, 4, 6, 1*time.Second, errResponseTest) - testRemoteTargetConsensus(t, 10, 10, 1*time.Second, errResponseTest) + errResponseTest := func(t *testing.T, responseCh <-chan commoncap.CapabilityResponse, responseError error) { + assert.NotNil(t, responseError) + } + // Test scenario where number of submissions is less than F + 1 + testRemoteTargetConsensus(t, 4, 6, 1*time.Second, errResponseTest) + testRemoteTargetConsensus(t, 10, 10, 1*time.Second, errResponseTest) + */ // Context cancellation test - use an underlying capability that blocks until the context is cancelled // Check request errors as expected and all error responses are received @@ -90,21 +91,22 @@ func testRemoteTargetConsensus(t *testing.T, numWorkflowPeers int, workflowDonF F: workflowDonF, } - dispatcher := newTestRemoteTargetDispatcher(capabilityPeerID) + broker := newTestMessageBroker() workflowDONs := map[string]commoncap.DON{ workflowDonInfo.ID: workflowDonInfo, } underlying := &testTargetReceiver{} - receiver := remote.NewRemoteTargetReceiver(ctx, lggr, underlying, capInfo, &capDonInfo, workflowDONs, dispatcher, consensusTimeout) - dispatcher.RegisterReceiver(receiver) + capabilityDispatcher := broker.NewDispatcherForNode(capabilityPeerID) + receiver := remote.NewRemoteTargetReceiver(ctx, lggr, underlying, capInfo, &capDonInfo, workflowDONs, capabilityDispatcher, consensusTimeout) + broker.RegisterReceiverNode(capabilityPeerID, receiver) callers := make([]commoncap.TargetCapability, numWorkflowPeers) for i := 0; i < numWorkflowPeers; i++ { - workflowPeerDispatcher := dispatcher.GetDispatcherForCaller(workflowPeers[i]) + workflowPeerDispatcher := broker.NewDispatcherForNode(workflowPeers[i]) caller := remote.NewRemoteTargetCaller(ctx, lggr, capInfo, capDonInfo, workflowDonInfo, workflowPeerDispatcher, 1*time.Minute) - dispatcher.RegisterCaller(workflowPeers[i], caller) + broker.RegisterReceiverNode(workflowPeers[i], caller) callers[i] = caller } @@ -144,91 +146,64 @@ func testRemoteTargetConsensus(t *testing.T, numWorkflowPeers int, workflowDonF wg.Wait() } -type testRemoteTargetDispatcher struct { - abstractDispatcher - receiver remotetypes.Receiver - callers map[p2ptypes.PeerID]remotetypes.Receiver - receiverPeerID p2ptypes.PeerID +type testMessageBroker struct { + receivers map[p2ptypes.PeerID]remotetypes.Receiver } -func newTestRemoteTargetDispatcher(receiverPeerID p2ptypes.PeerID) *testRemoteTargetDispatcher { - return &testRemoteTargetDispatcher{ - receiverPeerID: receiverPeerID, - callers: make(map[p2ptypes.PeerID]remotetypes.Receiver), +func newTestMessageBroker() *testMessageBroker { + return &testMessageBroker{ + receivers: make(map[p2ptypes.PeerID]remotetypes.Receiver), } } -func (r *testRemoteTargetDispatcher) GetDispatcherReceiver(receiverPeerID p2ptypes.PeerID, receiver remotetypes.Receiver) { - if r.receiver != nil { - panic("receiver already registered") - } - - r.receiver = receiver -} - -func (r *testRemoteTargetDispatcher) GetDispatcherForCaller(callerPeerID p2ptypes.PeerID) remotetypes.Dispatcher { - dispatcher := &callerDispatcher{ - callerPeerID: callerPeerID, +func (r *testMessageBroker) NewDispatcherForNode(nodePeerID p2ptypes.PeerID) remotetypes.Dispatcher { + return &nodeDispatcher{ + callerPeerID: nodePeerID, broker: r, } - return dispatcher -} - -func (r *testRemoteTargetDispatcher) RegisterCaller(callerPeerID p2ptypes.PeerID, caller remotetypes.Receiver) { - if _, ok := r.callers[callerPeerID]; ok { - panic("caller already registered") - } - - r.callers[callerPeerID] = caller } -func (r *testRemoteTargetDispatcher) SendToReceiver(peerID p2ptypes.PeerID, msg *remotetypes.MessageBody) { - if peerID != r.receiverPeerID { - panic("receiver peer id mismatch") +func (r *testMessageBroker) RegisterReceiverNode(nodePeerID p2ptypes.PeerID, node remotetypes.Receiver) { + if _, ok := r.receivers[nodePeerID]; ok { + panic("node already registered") } - msg.Receiver = r.receiverPeerID[:] - - r.receiver.Receive(msg) + r.receivers[nodePeerID] = node } -func (r *testRemoteTargetDispatcher) Send(callerPeerID p2ptypes.PeerID, msgBody *remotetypes.MessageBody) error { +func (r *testMessageBroker) Send(msg *remotetypes.MessageBody) { + receiverId := toPeerID(msg.Receiver) - msgBody.Version = 1 - msgBody.Sender = r.receiverPeerID[:] - msgBody.Receiver = callerPeerID[:] - msgBody.Timestamp = time.Now().UnixMilli() - - if caller, ok := r.callers[callerPeerID]; ok { - caller.Receive(msgBody) + if receiver, ok := r.receivers[receiverId]; ok { + receiver.Receive(msg) } else { - return fmt.Errorf("caller not found for caller peer id %s", callerPeerID.String()) + panic("receiver not found for peer id") } - return nil } -type callerDispatcher struct { - abstractDispatcher +func toPeerID(id []byte) p2ptypes.PeerID { + return [32]byte(id) +} + +type nodeDispatcher struct { callerPeerID p2ptypes.PeerID - broker *testRemoteTargetDispatcher + broker *testMessageBroker } -func (t *callerDispatcher) Send(peerID p2ptypes.PeerID, msgBody *remotetypes.MessageBody) error { +func (t *nodeDispatcher) Send(peerID p2ptypes.PeerID, msgBody *remotetypes.MessageBody) error { msgBody.Version = 1 msgBody.Sender = t.callerPeerID[:] + msgBody.Receiver = peerID[:] msgBody.Timestamp = time.Now().UnixMilli() - t.broker.SendToReceiver(peerID, msgBody) + t.broker.Send(msgBody) return nil } -type abstractDispatcher struct { -} - -func (t *abstractDispatcher) SetReceiver(capabilityId string, donId string, receiver remotetypes.Receiver) error { +func (t *nodeDispatcher) SetReceiver(capabilityId string, donId string, receiver remotetypes.Receiver) error { return nil } -func (t *abstractDispatcher) RemoveReceiver(capabilityId string, donId string) {} +func (t *nodeDispatcher) RemoveReceiver(capabilityId string, donId string) {} type testTargetReceiver struct { } From 62dfe30198f78684191e8dabfdfb4e76de4cc22a Mon Sep 17 00:00:00 2001 From: Matthew Pendrey Date: Thu, 23 May 2024 16:07:23 +0100 Subject: [PATCH 20/43] wip --- .../remote/target_receiver_test.go | 32 ++++++++++--------- 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/core/capabilities/remote/target_receiver_test.go b/core/capabilities/remote/target_receiver_test.go index 02e84040be4..c2985eedaeb 100644 --- a/core/capabilities/remote/target_receiver_test.go +++ b/core/capabilities/remote/target_receiver_test.go @@ -33,20 +33,20 @@ func Test_TargetReceiverConsensusWithMultipleCallers(t *testing.T) { } // Test scenarios where the number of submissions is greater than or equal to F + 1 - testRemoteTargetConsensus(t, 1, 0, 10*time.Minute, responseTest) - testRemoteTargetConsensus(t, 4, 3, 10*time.Minute, responseTest) - testRemoteTargetConsensus(t, 10, 3, 10*time.Minute, responseTest) + testRemoteTargetConsensus(t, 1, 0, 10*time.Minute, 10*time.Minute, responseTest) + testRemoteTargetConsensus(t, 4, 3, 10*time.Minute, 10*time.Minute, responseTest) + testRemoteTargetConsensus(t, 10, 3, 10*time.Minute, 10*time.Minute, responseTest) - /* + errResponseTest := func(t *testing.T, responseCh <-chan commoncap.CapabilityResponse, responseError error) { + require.NoError(t, responseError) + response := <-responseCh + assert.NotNil(t, response.Err) + } - errResponseTest := func(t *testing.T, responseCh <-chan commoncap.CapabilityResponse, responseError error) { - assert.NotNil(t, responseError) - } + // Test scenario where number of submissions is less than F + 1 + testRemoteTargetConsensus(t, 4, 6, 1*time.Second, 1*time.Second, errResponseTest) + testRemoteTargetConsensus(t, 10, 10, 1*time.Second, 1*time.Second, errResponseTest) - // Test scenario where number of submissions is less than F + 1 - testRemoteTargetConsensus(t, 4, 6, 1*time.Second, errResponseTest) - testRemoteTargetConsensus(t, 10, 10, 1*time.Second, errResponseTest) - */ // Context cancellation test - use an underlying capability that blocks until the context is cancelled // Check request errors as expected and all error responses are received @@ -60,9 +60,11 @@ func Test_TargetReceiverConsensusWithMultipleCallers(t *testing.T) { } func testRemoteTargetConsensus(t *testing.T, numWorkflowPeers int, workflowDonF uint8, - consensusTimeout time.Duration, responseTest func(t *testing.T, responseCh <-chan commoncap.CapabilityResponse, responseError error)) { + receiverRequestTimeout time.Duration, callerRequestTimeout time.Duration, responseTest func(t *testing.T, responseCh <-chan commoncap.CapabilityResponse, responseError error)) { lggr := logger.TestLogger(t) - ctx := testutils.Context(t) + ctx, cancel := context.WithCancel(testutils.Context(t)) + defer cancel() + capInfo := commoncap.CapabilityInfo{ ID: "cap_id", CapabilityType: commoncap.CapabilityTypeTarget, @@ -99,13 +101,13 @@ func testRemoteTargetConsensus(t *testing.T, numWorkflowPeers int, workflowDonF underlying := &testTargetReceiver{} capabilityDispatcher := broker.NewDispatcherForNode(capabilityPeerID) - receiver := remote.NewRemoteTargetReceiver(ctx, lggr, underlying, capInfo, &capDonInfo, workflowDONs, capabilityDispatcher, consensusTimeout) + receiver := remote.NewRemoteTargetReceiver(ctx, lggr, underlying, capInfo, &capDonInfo, workflowDONs, capabilityDispatcher, receiverRequestTimeout) broker.RegisterReceiverNode(capabilityPeerID, receiver) callers := make([]commoncap.TargetCapability, numWorkflowPeers) for i := 0; i < numWorkflowPeers; i++ { workflowPeerDispatcher := broker.NewDispatcherForNode(workflowPeers[i]) - caller := remote.NewRemoteTargetCaller(ctx, lggr, capInfo, capDonInfo, workflowDonInfo, workflowPeerDispatcher, 1*time.Minute) + caller := remote.NewRemoteTargetCaller(ctx, lggr, capInfo, capDonInfo, workflowDonInfo, workflowPeerDispatcher, callerRequestTimeout) broker.RegisterReceiverNode(workflowPeers[i], caller) callers[i] = caller } From 166524a9e9f15f186ed99095586b6db5f16d1116 Mon Sep 17 00:00:00 2001 From: Matthew Pendrey Date: Thu, 23 May 2024 16:20:49 +0100 Subject: [PATCH 21/43] error codes --- core/capabilities/remote/target_receiver.go | 9 +++----- .../remote/target_receiver_test.go | 21 ++++++++++--------- core/capabilities/remote/types/message.pb.go | 18 +++++++++++----- core/capabilities/remote/types/message.proto | 2 ++ 4 files changed, 29 insertions(+), 21 deletions(-) diff --git a/core/capabilities/remote/target_receiver.go b/core/capabilities/remote/target_receiver.go index 9bdc48c22b5..6653151f2c5 100644 --- a/core/capabilities/remote/target_receiver.go +++ b/core/capabilities/remote/target_receiver.go @@ -83,8 +83,7 @@ func (r *remoteTargetReceiver) ExpireRequests(ctx context.Context) { CallerDonId: executeReq.callingDonID, Method: types.MethodExecute, MessageId: []byte(messageId), - // TODO sort out error codes - this should be a timeout error - Error: types.Error_CAPABILITY_NOT_FOUND, + Error: types.Error_TIMEOUT, } for peerID := range executeReq.fromPeers { @@ -171,13 +170,11 @@ func (r *remoteTargetReceiver) Receive(msg *types.MessageBody) { responseMsg.Payload, err = pb.MarshalCapabilityResponse(response) } else { r.lggr.Errorw("failed to execute capability", "capabilityId", r.capInfo.ID, "err", err) - // TODO set correct error code - responseMsg.Error = types.Error_CAPABILITY_NOT_FOUND + responseMsg.Error = types.Error_INTERNAL_ERROR } } else { r.lggr.Errorw("failed to unmarshal capability request", "capabilityId", r.capInfo.ID, "err", err) - // TODO set correct error code - responseMsg.Error = types.Error_CAPABILITY_NOT_FOUND + responseMsg.Error = types.Error_INVALID_REQUEST } executeReq.response = responseMsg diff --git a/core/capabilities/remote/target_receiver_test.go b/core/capabilities/remote/target_receiver_test.go index c2985eedaeb..6cd0ed27128 100644 --- a/core/capabilities/remote/target_receiver_test.go +++ b/core/capabilities/remote/target_receiver_test.go @@ -33,9 +33,9 @@ func Test_TargetReceiverConsensusWithMultipleCallers(t *testing.T) { } // Test scenarios where the number of submissions is greater than or equal to F + 1 - testRemoteTargetConsensus(t, 1, 0, 10*time.Minute, 10*time.Minute, responseTest) - testRemoteTargetConsensus(t, 4, 3, 10*time.Minute, 10*time.Minute, responseTest) - testRemoteTargetConsensus(t, 10, 3, 10*time.Minute, 10*time.Minute, responseTest) + testRemoteTargetConsensus(t, 1, 0, 10*time.Minute, 1, 0, 10*time.Minute, responseTest) + testRemoteTargetConsensus(t, 4, 3, 10*time.Minute, 1, 0, 10*time.Minute, responseTest) + testRemoteTargetConsensus(t, 10, 3, 10*time.Minute, 1, 0, 10*time.Minute, responseTest) errResponseTest := func(t *testing.T, responseCh <-chan commoncap.CapabilityResponse, responseError error) { require.NoError(t, responseError) @@ -44,8 +44,8 @@ func Test_TargetReceiverConsensusWithMultipleCallers(t *testing.T) { } // Test scenario where number of submissions is less than F + 1 - testRemoteTargetConsensus(t, 4, 6, 1*time.Second, 1*time.Second, errResponseTest) - testRemoteTargetConsensus(t, 10, 10, 1*time.Second, 1*time.Second, errResponseTest) + testRemoteTargetConsensus(t, 4, 6, 1*time.Second, 1, 0, 1*time.Second, errResponseTest) + testRemoteTargetConsensus(t, 10, 10, 1*time.Second, 1, 0, 1*time.Second, errResponseTest) // Context cancellation test - use an underlying capability that blocks until the context is cancelled @@ -59,8 +59,9 @@ func Test_TargetReceiverConsensusWithMultipleCallers(t *testing.T) { } -func testRemoteTargetConsensus(t *testing.T, numWorkflowPeers int, workflowDonF uint8, - receiverRequestTimeout time.Duration, callerRequestTimeout time.Duration, responseTest func(t *testing.T, responseCh <-chan commoncap.CapabilityResponse, responseError error)) { +func testRemoteTargetConsensus(t *testing.T, numWorkflowPeers int, workflowDonF uint8, workflowNodeTimeout time.Duration, + capabilityNodePeers int, capabilityDonF uint8, capabilityNodeTimeout time.Duration, + responseTest func(t *testing.T, responseCh <-chan commoncap.CapabilityResponse, responseError error)) { lggr := logger.TestLogger(t) ctx, cancel := context.WithCancel(testutils.Context(t)) defer cancel() @@ -77,7 +78,7 @@ func testRemoteTargetConsensus(t *testing.T, numWorkflowPeers int, workflowDonF capDonInfo := commoncap.DON{ ID: "capability-don", Members: []p2ptypes.PeerID{capabilityPeerID}, - F: 0, + F: capabilityDonF, } workflowPeers := make([]p2ptypes.PeerID, numWorkflowPeers) @@ -101,13 +102,13 @@ func testRemoteTargetConsensus(t *testing.T, numWorkflowPeers int, workflowDonF underlying := &testTargetReceiver{} capabilityDispatcher := broker.NewDispatcherForNode(capabilityPeerID) - receiver := remote.NewRemoteTargetReceiver(ctx, lggr, underlying, capInfo, &capDonInfo, workflowDONs, capabilityDispatcher, receiverRequestTimeout) + receiver := remote.NewRemoteTargetReceiver(ctx, lggr, underlying, capInfo, &capDonInfo, workflowDONs, capabilityDispatcher, capabilityNodeTimeout) broker.RegisterReceiverNode(capabilityPeerID, receiver) callers := make([]commoncap.TargetCapability, numWorkflowPeers) for i := 0; i < numWorkflowPeers; i++ { workflowPeerDispatcher := broker.NewDispatcherForNode(workflowPeers[i]) - caller := remote.NewRemoteTargetCaller(ctx, lggr, capInfo, capDonInfo, workflowDonInfo, workflowPeerDispatcher, callerRequestTimeout) + caller := remote.NewRemoteTargetCaller(ctx, lggr, capInfo, capDonInfo, workflowDonInfo, workflowPeerDispatcher, workflowNodeTimeout) broker.RegisterReceiverNode(workflowPeers[i], caller) callers[i] = caller } diff --git a/core/capabilities/remote/types/message.pb.go b/core/capabilities/remote/types/message.pb.go index e9f82fde52b..1a9c0891251 100644 --- a/core/capabilities/remote/types/message.pb.go +++ b/core/capabilities/remote/types/message.pb.go @@ -27,6 +27,8 @@ const ( Error_VALIDATION_FAILED Error = 1 Error_CAPABILITY_NOT_FOUND Error = 2 Error_INVALID_REQUEST Error = 3 + Error_TIMEOUT Error = 4 + Error_INTERNAL_ERROR Error = 5 ) // Enum value maps for Error. @@ -36,12 +38,16 @@ var ( 1: "VALIDATION_FAILED", 2: "CAPABILITY_NOT_FOUND", 3: "INVALID_REQUEST", + 4: "TIMEOUT", + 5: "INTERNAL_ERROR", } Error_value = map[string]int32{ "OK": 0, "VALIDATION_FAILED": 1, "CAPABILITY_NOT_FOUND": 2, "INVALID_REQUEST": 3, + "TIMEOUT": 4, + "INTERNAL_ERROR": 5, } ) @@ -454,15 +460,17 @@ var file_message_proto_rawDesc = []byte{ 0x74, 0x72, 0x69, 0x67, 0x67, 0x65, 0x72, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x49, 0x64, 0x12, 0x21, 0x0a, 0x0c, 0x77, 0x6f, 0x72, 0x6b, 0x66, 0x6c, 0x6f, 0x77, 0x5f, 0x69, 0x64, 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, 0x09, 0x52, 0x0b, 0x77, 0x6f, 0x72, 0x6b, 0x66, 0x6c, 0x6f, 0x77, 0x49, 0x64, - 0x73, 0x2a, 0x55, 0x0a, 0x05, 0x45, 0x72, 0x72, 0x6f, 0x72, 0x12, 0x06, 0x0a, 0x02, 0x4f, 0x4b, + 0x73, 0x2a, 0x76, 0x0a, 0x05, 0x45, 0x72, 0x72, 0x6f, 0x72, 0x12, 0x06, 0x0a, 0x02, 0x4f, 0x4b, 0x10, 0x00, 0x12, 0x15, 0x0a, 0x11, 0x56, 0x41, 0x4c, 0x49, 0x44, 0x41, 0x54, 0x49, 0x4f, 0x4e, 0x5f, 0x46, 0x41, 0x49, 0x4c, 0x45, 0x44, 0x10, 0x01, 0x12, 0x18, 0x0a, 0x14, 0x43, 0x41, 0x50, 0x41, 0x42, 0x49, 0x4c, 0x49, 0x54, 0x59, 0x5f, 0x4e, 0x4f, 0x54, 0x5f, 0x46, 0x4f, 0x55, 0x4e, 0x44, 0x10, 0x02, 0x12, 0x13, 0x0a, 0x0f, 0x49, 0x4e, 0x56, 0x41, 0x4c, 0x49, 0x44, 0x5f, 0x52, - 0x45, 0x51, 0x55, 0x45, 0x53, 0x54, 0x10, 0x03, 0x42, 0x20, 0x5a, 0x1e, 0x63, 0x6f, 0x72, 0x65, - 0x2f, 0x63, 0x61, 0x70, 0x61, 0x62, 0x69, 0x6c, 0x69, 0x74, 0x69, 0x65, 0x73, 0x2f, 0x72, 0x65, - 0x6d, 0x6f, 0x74, 0x65, 0x2f, 0x74, 0x79, 0x70, 0x65, 0x73, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, - 0x6f, 0x33, + 0x45, 0x51, 0x55, 0x45, 0x53, 0x54, 0x10, 0x03, 0x12, 0x0b, 0x0a, 0x07, 0x54, 0x49, 0x4d, 0x45, + 0x4f, 0x55, 0x54, 0x10, 0x04, 0x12, 0x12, 0x0a, 0x0e, 0x49, 0x4e, 0x54, 0x45, 0x52, 0x4e, 0x41, + 0x4c, 0x5f, 0x45, 0x52, 0x52, 0x4f, 0x52, 0x10, 0x05, 0x42, 0x20, 0x5a, 0x1e, 0x63, 0x6f, 0x72, + 0x65, 0x2f, 0x63, 0x61, 0x70, 0x61, 0x62, 0x69, 0x6c, 0x69, 0x74, 0x69, 0x65, 0x73, 0x2f, 0x72, + 0x65, 0x6d, 0x6f, 0x74, 0x65, 0x2f, 0x74, 0x79, 0x70, 0x65, 0x73, 0x62, 0x06, 0x70, 0x72, 0x6f, + 0x74, 0x6f, 0x33, } var ( diff --git a/core/capabilities/remote/types/message.proto b/core/capabilities/remote/types/message.proto index 05e6cff512e..56e4aabfafc 100644 --- a/core/capabilities/remote/types/message.proto +++ b/core/capabilities/remote/types/message.proto @@ -9,6 +9,8 @@ enum Error { VALIDATION_FAILED = 1; CAPABILITY_NOT_FOUND = 2; INVALID_REQUEST = 3; + TIMEOUT = 4; + INTERNAL_ERROR = 5; } message Message { From 21344ce593ad6089d306d47317daad66277ed9c4 Mon Sep 17 00:00:00 2001 From: Matthew Pendrey Date: Thu, 23 May 2024 20:34:32 +0100 Subject: [PATCH 22/43] fix up expiring caller requests --- core/capabilities/remote/target_caller.go | 39 ++++++++++-------- core/capabilities/remote/target_receiver.go | 2 +- .../remote/target_receiver_test.go | 41 +++++++++++-------- 3 files changed, 49 insertions(+), 33 deletions(-) diff --git a/core/capabilities/remote/target_caller.go b/core/capabilities/remote/target_caller.go index 0d5e10c8bcc..9c7f4fc780f 100644 --- a/core/capabilities/remote/target_caller.go +++ b/core/capabilities/remote/target_caller.go @@ -68,10 +68,12 @@ func (c *remoteTargetCaller) ExpireRequests(ctx context.Context) { for messageID, req := range c.messageIDToExecuteRequest { if time.Since(req.creationTime) > c.requestTimeout { - delete(c.messageIDToExecuteRequest, messageID) - req.responseCh <- commoncap.CapabilityResponse{Err: errors.New("request timed out")} - close(req.responseCh) + if !req.responseSent() { + req.sendResponse(commoncap.CapabilityResponse{Err: errors.New("request timed out")}) + } } + + delete(c.messageIDToExecuteRequest, messageID) } } @@ -154,7 +156,7 @@ func (c *remoteTargetCaller) transmitRequestWithMessageID(ctx context.Context, r case <-ctx.Done(): return case <-time.After(delay): - c.lggr.Debugw("executing delayed execution for peer", "peerID", peerID) + c.lggr.Debugw("executing delayed execution for peer", "peerID", peerID, "delay", delay) err = c.dispatcher.Send(peerID, message) if err != nil { c.lggr.Errorw("failed to send message", "peerID", peerID, "err", err) @@ -172,17 +174,17 @@ func (c *remoteTargetCaller) Receive(msg *types.MessageBody) { messageID := getMessageID(msg) - if msg.Error != types.Error_OK { - c.lggr.Warnw("received error response", "messageID", messageID, "error", msg.Error) - return - } - req := c.messageIDToExecuteRequest[messageID] if req == nil { c.lggr.Warnw("received response for unknown message ID", "messageID", messageID, "sender", msg.Sender) return } + if msg.Error != types.Error_OK { + c.lggr.Warnw("received error response for pending request", "messageID", messageID, "sender", msg.Sender, "receiver", msg.Receiver, "error", msg.Error) + return + } + req.addResponse(msg.Payload) } @@ -193,6 +195,7 @@ type callerExecuteRequest struct { responseIDCount map[[32]byte]int requiredIdenticalResponses int + respSent bool } func newCallerExecuteRequest(transmissionCancelFn context.CancelFunc, requiredIdenticalResponses int) *callerExecuteRequest { @@ -205,8 +208,8 @@ func newCallerExecuteRequest(transmissionCancelFn context.CancelFunc, requiredId } } -func (c *callerExecuteRequest) complete() bool { - return len(c.responseIDCount) >= c.requiredIdenticalResponses +func (c *callerExecuteRequest) responseSent() bool { + return c.respSent } // TODO addResponse assumes that only one response is received from each peer, if streaming responses need to be supported this will need to be updated @@ -215,14 +218,18 @@ func (c *callerExecuteRequest) addResponse(response []byte) { c.responseIDCount[payloadId]++ if c.responseIDCount[payloadId] == c.requiredIdenticalResponses { - defer close(c.responseCh) - c.transmissionCancelFn() - capabilityResponse, err := pb.UnmarshalCapabilityResponse(response) if err != nil { - c.responseCh <- commoncap.CapabilityResponse{Err: fmt.Errorf("failed to unmarshal capability response: %w", err)} + c.sendResponse(commoncap.CapabilityResponse{Err: fmt.Errorf("failed to unmarshal capability response: %w", err)}) } else { - c.responseCh <- commoncap.CapabilityResponse{Value: capabilityResponse.Value} + c.sendResponse(commoncap.CapabilityResponse{Value: capabilityResponse.Value}) } } } + +func (c *callerExecuteRequest) sendResponse(response commoncap.CapabilityResponse) { + c.responseCh <- response + close(c.responseCh) + c.transmissionCancelFn() + c.respSent = true +} diff --git a/core/capabilities/remote/target_receiver.go b/core/capabilities/remote/target_receiver.go index 6653151f2c5..4bae5423745 100644 --- a/core/capabilities/remote/target_receiver.go +++ b/core/capabilities/remote/target_receiver.go @@ -165,7 +165,7 @@ func (r *remoteTargetReceiver) Receive(msg *types.MessageBody) { defer cancel() responseCh, err := r.underlying.Execute(ctxWithTimeout, capabilityRequest) if err == nil { - // TODO handle the case where the capability returns a stream of responses + // TODO working on the assumption that the capability will only ever return one response from its channel (for now at least) response := <-responseCh responseMsg.Payload, err = pb.MarshalCapabilityResponse(response) } else { diff --git a/core/capabilities/remote/target_receiver_test.go b/core/capabilities/remote/target_receiver_test.go index 6cd0ed27128..60805f80ddf 100644 --- a/core/capabilities/remote/target_receiver_test.go +++ b/core/capabilities/remote/target_receiver_test.go @@ -22,20 +22,21 @@ import ( ) func Test_TargetReceiverConsensusWithMultipleCallers(t *testing.T) { - - responseTest := func(t *testing.T, responseCh <-chan commoncap.CapabilityResponse, responseError error) { - - require.NoError(t, responseError) - response := <-responseCh - responseValue, err := response.Value.Unwrap() - require.NoError(t, err) - assert.Equal(t, "aValue1", responseValue.(string)) - } - - // Test scenarios where the number of submissions is greater than or equal to F + 1 - testRemoteTargetConsensus(t, 1, 0, 10*time.Minute, 1, 0, 10*time.Minute, responseTest) - testRemoteTargetConsensus(t, 4, 3, 10*time.Minute, 1, 0, 10*time.Minute, responseTest) - testRemoteTargetConsensus(t, 10, 3, 10*time.Minute, 1, 0, 10*time.Minute, responseTest) + /* + responseTest := func(t *testing.T, responseCh <-chan commoncap.CapabilityResponse, responseError error) { + + require.NoError(t, responseError) + response := <-responseCh + responseValue, err := response.Value.Unwrap() + require.NoError(t, err) + assert.Equal(t, "aValue1", responseValue.(string)) + } + + // Test scenarios where the number of submissions is greater than or equal to F + 1 + testRemoteTargetConsensus(t, 1, 0, 10*time.Minute, 1, 0, 10*time.Minute, responseTest) + testRemoteTargetConsensus(t, 4, 3, 10*time.Minute, 1, 0, 10*time.Minute, responseTest) + testRemoteTargetConsensus(t, 10, 3, 10*time.Minute, 1, 0, 10*time.Minute, responseTest) + */ errResponseTest := func(t *testing.T, responseCh <-chan commoncap.CapabilityResponse, responseError error) { require.NoError(t, responseError) @@ -44,8 +45,16 @@ func Test_TargetReceiverConsensusWithMultipleCallers(t *testing.T) { } // Test scenario where number of submissions is less than F + 1 - testRemoteTargetConsensus(t, 4, 6, 1*time.Second, 1, 0, 1*time.Second, errResponseTest) - testRemoteTargetConsensus(t, 10, 10, 1*time.Second, 1, 0, 1*time.Second, errResponseTest) + + // How to make these tests less time dependent? risk of being flaky + testRemoteTargetConsensus(t, 4, 6, 5*time.Second, 1, 0, 1*time.Second, errResponseTest) + testRemoteTargetConsensus(t, 10, 10, 5*time.Second, 1, 0, 1*time.Second, errResponseTest) + + //tyring to modify tests to test the caller F number handling? + + //also having issues with error test cases - since the client F handling? + + //then got threading to do // Context cancellation test - use an underlying capability that blocks until the context is cancelled From 6f86dd04db8e365a21c09bb4bb40ee6bc2457862 Mon Sep 17 00:00:00 2001 From: Matthew Pendrey Date: Thu, 23 May 2024 21:14:30 +0100 Subject: [PATCH 23/43] test setup supports multiple workflow dons --- core/capabilities/remote/target_receiver.go | 4 +- .../remote/target_receiver_test.go | 62 +++++++++++-------- 2 files changed, 39 insertions(+), 27 deletions(-) diff --git a/core/capabilities/remote/target_receiver.go b/core/capabilities/remote/target_receiver.go index 4bae5423745..4a550e80317 100644 --- a/core/capabilities/remote/target_receiver.go +++ b/core/capabilities/remote/target_receiver.go @@ -17,7 +17,7 @@ import ( type remoteTargetReceiver struct { underlying commoncap.TargetCapability capInfo commoncap.CapabilityInfo - localDonInfo *capabilities.DON + localDonInfo capabilities.DON workflowDONs map[string]commoncap.DON dispatcher types.Dispatcher lggr logger.Logger @@ -30,7 +30,7 @@ type remoteTargetReceiver struct { var _ types.Receiver = &remoteTargetReceiver{} -func NewRemoteTargetReceiver(ctx context.Context, lggr logger.Logger, underlying commoncap.TargetCapability, capInfo commoncap.CapabilityInfo, localDonInfo *capabilities.DON, +func NewRemoteTargetReceiver(ctx context.Context, lggr logger.Logger, underlying commoncap.TargetCapability, capInfo commoncap.CapabilityInfo, localDonInfo capabilities.DON, workflowDONs map[string]commoncap.DON, dispatcher types.Dispatcher, requestTimeout time.Duration) *remoteTargetReceiver { receiver := &remoteTargetReceiver{ diff --git a/core/capabilities/remote/target_receiver_test.go b/core/capabilities/remote/target_receiver_test.go index 60805f80ddf..735075c972a 100644 --- a/core/capabilities/remote/target_receiver_test.go +++ b/core/capabilities/remote/target_receiver_test.go @@ -22,34 +22,34 @@ import ( ) func Test_TargetReceiverConsensusWithMultipleCallers(t *testing.T) { - /* - responseTest := func(t *testing.T, responseCh <-chan commoncap.CapabilityResponse, responseError error) { - - require.NoError(t, responseError) - response := <-responseCh - responseValue, err := response.Value.Unwrap() - require.NoError(t, err) - assert.Equal(t, "aValue1", responseValue.(string)) - } - // Test scenarios where the number of submissions is greater than or equal to F + 1 - testRemoteTargetConsensus(t, 1, 0, 10*time.Minute, 1, 0, 10*time.Minute, responseTest) - testRemoteTargetConsensus(t, 4, 3, 10*time.Minute, 1, 0, 10*time.Minute, responseTest) - testRemoteTargetConsensus(t, 10, 3, 10*time.Minute, 1, 0, 10*time.Minute, responseTest) - */ + responseTest := func(t *testing.T, responseCh <-chan commoncap.CapabilityResponse, responseError error) { - errResponseTest := func(t *testing.T, responseCh <-chan commoncap.CapabilityResponse, responseError error) { require.NoError(t, responseError) response := <-responseCh - assert.NotNil(t, response.Err) + responseValue, err := response.Value.Unwrap() + require.NoError(t, err) + assert.Equal(t, "aValue1", responseValue.(string)) } - // Test scenario where number of submissions is less than F + 1 + // Test scenarios where the number of submissions is greater than or equal to F + 1 + testRemoteTargetConsensus(t, 1, 0, 10*time.Minute, 1, 0, 10*time.Minute, responseTest) + testRemoteTargetConsensus(t, 4, 3, 10*time.Minute, 1, 0, 10*time.Minute, responseTest) + testRemoteTargetConsensus(t, 10, 3, 10*time.Minute, 1, 0, 10*time.Minute, responseTest) + + /* + errResponseTest := func(t *testing.T, responseCh <-chan commoncap.CapabilityResponse, responseError error) { + require.NoError(t, responseError) + response := <-responseCh + assert.NotNil(t, response.Err) + } - // How to make these tests less time dependent? risk of being flaky - testRemoteTargetConsensus(t, 4, 6, 5*time.Second, 1, 0, 1*time.Second, errResponseTest) - testRemoteTargetConsensus(t, 10, 10, 5*time.Second, 1, 0, 1*time.Second, errResponseTest) + // Test scenario where number of submissions is less than F + 1 + // How to make these tests less time dependent? risk of being flaky + testRemoteTargetConsensus(t, 4, 6, 5*time.Second, 1, 0, 1*time.Second, errResponseTest) + testRemoteTargetConsensus(t, 10, 10, 5*time.Second, 1, 0, 1*time.Second, errResponseTest) + */ //tyring to modify tests to test the caller F number handling? //also having issues with error test cases - since the client F handling? @@ -69,7 +69,7 @@ func Test_TargetReceiverConsensusWithMultipleCallers(t *testing.T) { } func testRemoteTargetConsensus(t *testing.T, numWorkflowPeers int, workflowDonF uint8, workflowNodeTimeout time.Duration, - capabilityNodePeers int, capabilityDonF uint8, capabilityNodeTimeout time.Duration, + numCapabilityPeers int, capabilityDonF uint8, capabilityNodeTimeout time.Duration, responseTest func(t *testing.T, responseCh <-chan commoncap.CapabilityResponse, responseError error)) { lggr := logger.TestLogger(t) ctx, cancel := context.WithCancel(testutils.Context(t)) @@ -81,12 +81,20 @@ func testRemoteTargetConsensus(t *testing.T, numWorkflowPeers int, workflowDonF Description: "Remote Target", Version: "0.0.1", } + + capabilityPeers := make([]p2ptypes.PeerID, numCapabilityPeers) + for i := 0; i < numCapabilityPeers; i++ { + capabilityPeerID := p2ptypes.PeerID{} + require.NoError(t, capabilityPeerID.UnmarshalText([]byte(newPeerID()))) + capabilityPeers[i] = capabilityPeerID + } + capabilityPeerID := p2ptypes.PeerID{} require.NoError(t, capabilityPeerID.UnmarshalText([]byte(newPeerID()))) capDonInfo := commoncap.DON{ ID: "capability-don", - Members: []p2ptypes.PeerID{capabilityPeerID}, + Members: capabilityPeers, F: capabilityDonF, } @@ -110,9 +118,13 @@ func testRemoteTargetConsensus(t *testing.T, numWorkflowPeers int, workflowDonF } underlying := &testTargetReceiver{} - capabilityDispatcher := broker.NewDispatcherForNode(capabilityPeerID) - receiver := remote.NewRemoteTargetReceiver(ctx, lggr, underlying, capInfo, &capDonInfo, workflowDONs, capabilityDispatcher, capabilityNodeTimeout) - broker.RegisterReceiverNode(capabilityPeerID, receiver) + receivers := make([]remotetypes.Receiver, numCapabilityPeers) + for i := 0; i < numCapabilityPeers; i++ { + capabilityDispatcher := broker.NewDispatcherForNode(capabilityPeers[i]) + receiver := remote.NewRemoteTargetReceiver(ctx, lggr, underlying, capInfo, capDonInfo, workflowDONs, capabilityDispatcher, capabilityNodeTimeout) + broker.RegisterReceiverNode(capabilityPeers[i], receiver) + receivers[i] = receiver + } callers := make([]commoncap.TargetCapability, numWorkflowPeers) for i := 0; i < numWorkflowPeers; i++ { From efe3e381aa6fe284c3e65ce7ac11b0c6a96cc4af Mon Sep 17 00:00:00 2001 From: Matthew Pendrey Date: Fri, 24 May 2024 16:16:18 +0100 Subject: [PATCH 24/43] wip --- core/capabilities/remote/target_caller.go | 20 +- .../capabilities/remote/target_caller_test.go | 250 +++++++++++++++++ core/capabilities/remote/target_receiver.go | 252 ++++++++++++------ .../remote/target_receiver_test.go | 68 +++-- 4 files changed, 476 insertions(+), 114 deletions(-) diff --git a/core/capabilities/remote/target_caller.go b/core/capabilities/remote/target_caller.go index 9c7f4fc780f..fa14560c986 100644 --- a/core/capabilities/remote/target_caller.go +++ b/core/capabilities/remote/target_caller.go @@ -136,15 +136,6 @@ func (c *remoteTargetCaller) transmitRequestWithMessageID(ctx context.Context, r return fmt.Errorf("failed to extract transmission config from request config: %w", err) } - message := &types.MessageBody{ - CapabilityId: c.remoteCapabilityInfo.ID, - CapabilityDonId: c.remoteCapabilityDonInfo.ID, - CallerDonId: c.localDONInfo.ID, - Method: types.MethodExecute, - Payload: rawRequest, - MessageId: []byte(messageID), - } - peerIDToDelay, err := transmission.GetPeerIDToTransmissionDelay(c.remoteCapabilityDonInfo.Members, c.localDONInfo.Config.SharedSecret, req.Metadata.WorkflowID, req.Metadata.WorkflowExecutionID, tc) if err != nil { return fmt.Errorf("failed to get peer ID to transmission delay: %w", err) @@ -152,6 +143,15 @@ func (c *remoteTargetCaller) transmitRequestWithMessageID(ctx context.Context, r for peerID, delay := range peerIDToDelay { go func(peerID ragep2ptypes.PeerID, delay time.Duration) { + message := &types.MessageBody{ + CapabilityId: c.remoteCapabilityInfo.ID, + CapabilityDonId: c.remoteCapabilityDonInfo.ID, + CallerDonId: c.localDONInfo.ID, + Method: types.MethodExecute, + Payload: rawRequest, + MessageId: []byte(messageID), + } + select { case <-ctx.Done(): return @@ -172,7 +172,7 @@ func (c *remoteTargetCaller) Receive(msg *types.MessageBody) { c.mutex.Lock() defer c.mutex.Unlock() - messageID := getMessageID(msg) + messageID := GetMessageID(msg) req := c.messageIDToExecuteRequest[messageID] if req == nil { diff --git a/core/capabilities/remote/target_caller_test.go b/core/capabilities/remote/target_caller_test.go index c45c27a8219..3dee009796d 100644 --- a/core/capabilities/remote/target_caller_test.go +++ b/core/capabilities/remote/target_caller_test.go @@ -1,6 +1,8 @@ package remote_test import ( + "context" + "sync" "testing" "time" @@ -225,6 +227,254 @@ func Test_TargetCallerExecuteWithErrorTimesOut(t *testing.T) { require.NotNil(t, response.Err) } +func Test_RemoteTargetCaller_DonTopologies(t *testing.T) { + + transmissionSchedule, err := values.NewMap(map[string]any{ + "schedule": transmission.Schedule_OneAtATime, + "deltaStage": "10ms", + }) + require.NoError(t, err) + + responseTest := func(t *testing.T, responseCh <-chan commoncap.CapabilityResponse, responseError error) { + require.NoError(t, responseError) + response := <-responseCh + responseValue, err := response.Value.Unwrap() + require.NoError(t, err) + assert.Equal(t, "aValue1", responseValue.(string)) + } + + capability := &testCapability{} + + responseTimeOut := 10 * time.Minute + + testRemoteTargetCaller(t, 1, responseTimeOut, 1, 0, + capability, transmissionSchedule, responseTest) + + testRemoteTargetCaller(t, 10, responseTimeOut, 1, 0, + capability, transmissionSchedule, responseTest) + + testRemoteTargetCaller(t, 1, responseTimeOut, 10, 3, + capability, transmissionSchedule, responseTest) + + testRemoteTargetCaller(t, 10, responseTimeOut, 10, 3, + capability, transmissionSchedule, responseTest) + + testRemoteTargetCaller(t, 10, responseTimeOut, 10, 9, + capability, transmissionSchedule, responseTest) + +} + +func Test_RemoteTargetCaller_TransmissionSchedules(t *testing.T) { + + responseTest := func(t *testing.T, responseCh <-chan commoncap.CapabilityResponse, responseError error) { + require.NoError(t, responseError) + response := <-responseCh + responseValue, err := response.Value.Unwrap() + require.NoError(t, err) + assert.Equal(t, "aValue1", responseValue.(string)) + } + + capability := &testCapability{} + + responseTimeOut := 10 * time.Minute + + transmissionSchedule, err := values.NewMap(map[string]any{ + "schedule": transmission.Schedule_OneAtATime, + "deltaStage": "10ms", + }) + require.NoError(t, err) + + testRemoteTargetCaller(t, 1, responseTimeOut, 1, 0, + capability, transmissionSchedule, responseTest) + testRemoteTargetCaller(t, 10, responseTimeOut, 10, 3, + capability, transmissionSchedule, responseTest) + + transmissionSchedule, err = values.NewMap(map[string]any{ + "schedule": transmission.Schedule_AllAtOnce, + "deltaStage": "10ms", + }) + require.NoError(t, err) + + testRemoteTargetCaller(t, 1, responseTimeOut, 1, 0, + capability, transmissionSchedule, responseTest) + testRemoteTargetCaller(t, 10, responseTimeOut, 10, 3, + capability, transmissionSchedule, responseTest) + +} + +func testRemoteTargetCaller(t *testing.T, numWorkflowPeers int, workflowNodeResponseTimeout time.Duration, + numCapabilityPeers int, capabilityDonF uint8, underlying commoncap.TargetCapability, transmissionSchedule *values.Map, + responseTest func(t *testing.T, responseCh <-chan commoncap.CapabilityResponse, responseError error)) { + lggr := logger.TestLogger(t) + ctx, cancel := context.WithCancel(testutils.Context(t)) + defer cancel() + + capInfo := commoncap.CapabilityInfo{ + ID: "cap_id", + CapabilityType: commoncap.CapabilityTypeTarget, + Description: "Remote Target", + Version: "0.0.1", + } + + capabilityPeers := make([]p2ptypes.PeerID, numCapabilityPeers) + for i := 0; i < numCapabilityPeers; i++ { + capabilityPeerID := p2ptypes.PeerID{} + require.NoError(t, capabilityPeerID.UnmarshalText([]byte(newPeerID()))) + capabilityPeers[i] = capabilityPeerID + } + + capabilityPeerID := p2ptypes.PeerID{} + require.NoError(t, capabilityPeerID.UnmarshalText([]byte(newPeerID()))) + + capDonInfo := commoncap.DON{ + ID: "capability-don", + Members: capabilityPeers, + F: capabilityDonF, + } + + workflowPeers := make([]p2ptypes.PeerID, numWorkflowPeers) + for i := 0; i < numWorkflowPeers; i++ { + workflowPeerID := p2ptypes.PeerID{} + require.NoError(t, workflowPeerID.UnmarshalText([]byte(newPeerID()))) + workflowPeers[i] = workflowPeerID + } + + workflowDonInfo := commoncap.DON{ + Members: workflowPeers, + ID: "workflow-don", + } + + broker := newTestMessageBroker() + + receivers := make([]remotetypes.Receiver, numCapabilityPeers) + for i := 0; i < numCapabilityPeers; i++ { + capabilityDispatcher := broker.NewDispatcherForNode(capabilityPeers[i]) + receiver := newTestReceiver(capabilityPeers[i], capabilityDispatcher, workflowDonInfo, underlying) + broker.RegisterReceiverNode(capabilityPeers[i], receiver) + receivers[i] = receiver + } + + callers := make([]commoncap.TargetCapability, numWorkflowPeers) + for i := 0; i < numWorkflowPeers; i++ { + workflowPeerDispatcher := broker.NewDispatcherForNode(workflowPeers[i]) + caller := remote.NewRemoteTargetCaller(ctx, lggr, capInfo, capDonInfo, workflowDonInfo, workflowPeerDispatcher, workflowNodeResponseTimeout) + broker.RegisterReceiverNode(workflowPeers[i], caller) + callers[i] = caller + } + + executeInputs, err := values.NewMap( + map[string]any{ + "executeValue1": "aValue1", + }, + ) + + require.NoError(t, err) + + wg := &sync.WaitGroup{} + wg.Add(len(callers)) + + // Fire off all the requests + for _, caller := range callers { + go func(caller commoncap.TargetCapability) { + responseCh, err := caller.Execute(ctx, + commoncap.CapabilityRequest{ + Metadata: commoncap.RequestMetadata{ + WorkflowID: "workflowID", + WorkflowExecutionID: "workflowExecutionID", + }, + Config: transmissionSchedule, + Inputs: executeInputs, + }) + + responseTest(t, responseCh, err) + wg.Done() + }(caller) + } + + wg.Wait() +} + +// Simple receiver that only responds once it has received a message from each workflow peer +type callerTestReceiver struct { + peerID p2ptypes.PeerID + dispatcher remotetypes.Dispatcher + workflowDonInfo commoncap.DON + messageIDToSenders map[string]map[p2ptypes.PeerID]bool + + targetCapability commoncap.TargetCapability + + mux sync.Mutex +} + +func newTestReceiver(peerID p2ptypes.PeerID, dispatcher remotetypes.Dispatcher, workflowDonInfo commoncap.DON, + targetCapability commoncap.TargetCapability) *callerTestReceiver { + + return &callerTestReceiver{ + dispatcher: dispatcher, + workflowDonInfo: workflowDonInfo, + peerID: peerID, + messageIDToSenders: make(map[string]map[p2ptypes.PeerID]bool), + targetCapability: targetCapability, + } +} + +func (t *callerTestReceiver) Receive(msg *remotetypes.MessageBody) { + t.mux.Lock() + defer t.mux.Unlock() + + sender := toPeerID(msg.Sender) + messageID := remote.GetMessageID(msg) + + if t.messageIDToSenders[messageID] == nil { + t.messageIDToSenders[messageID] = make(map[p2ptypes.PeerID]bool) + } + + sendersOfMessageID := t.messageIDToSenders[messageID] + if sendersOfMessageID[sender] { + panic("received duplicate message") + } + + sendersOfMessageID[sender] = true + + if len(t.messageIDToSenders[messageID]) == len(t.workflowDonInfo.Members) { + + capabilityRequest, err := pb.UnmarshalCapabilityRequest(msg.Payload) + if err != nil { + panic(err) + } + + respCh, responseErr := t.targetCapability.Execute(context.Background(), capabilityRequest) + resp := <-respCh + + for receiver := range t.messageIDToSenders[messageID] { + var responseMsg = &remotetypes.MessageBody{ + CapabilityId: "cap_id", + CapabilityDonId: "capability-don", + CallerDonId: t.workflowDonInfo.ID, + Method: remotetypes.MethodExecute, + MessageId: []byte(messageID), + Sender: t.peerID[:], + Receiver: receiver[:], + } + + if responseErr != nil { + responseMsg.Error = remotetypes.Error_INTERNAL_ERROR + } else { + payload, err := pb.MarshalCapabilityResponse(resp) + if err != nil { + panic(err) + } + responseMsg.Payload = payload + } + + err = t.dispatcher.Send(receiver, responseMsg) + if err != nil { + panic(err) + } + } + } +} + type TestDispatcher struct { sentMessagesCh chan *remotetypes.MessageBody receiver remotetypes.Receiver diff --git a/core/capabilities/remote/target_receiver.go b/core/capabilities/remote/target_receiver.go index 4a550e80317..5cff3c00e82 100644 --- a/core/capabilities/remote/target_receiver.go +++ b/core/capabilities/remote/target_receiver.go @@ -2,6 +2,7 @@ package remote import ( "context" + "fmt" "sync" "time" @@ -15,6 +16,7 @@ import ( ) type remoteTargetReceiver struct { + peerID p2ptypes.PeerID underlying commoncap.TargetCapability capInfo commoncap.CapabilityInfo localDonInfo capabilities.DON @@ -22,26 +24,27 @@ type remoteTargetReceiver struct { dispatcher types.Dispatcher lggr logger.Logger - msgIDToExecuteRequest map[string]executeRequest - requestTimeout time.Duration + requestMsgIDToResponse map[string]remoteTargetCapabilityRequest + requestTimeout time.Duration receiveLock sync.Mutex } var _ types.Receiver = &remoteTargetReceiver{} -func NewRemoteTargetReceiver(ctx context.Context, lggr logger.Logger, underlying commoncap.TargetCapability, capInfo commoncap.CapabilityInfo, localDonInfo capabilities.DON, +func NewRemoteTargetReceiver(ctx context.Context, lggr logger.Logger, peerID p2ptypes.PeerID, underlying commoncap.TargetCapability, capInfo commoncap.CapabilityInfo, localDonInfo capabilities.DON, workflowDONs map[string]commoncap.DON, dispatcher types.Dispatcher, requestTimeout time.Duration) *remoteTargetReceiver { receiver := &remoteTargetReceiver{ underlying: underlying, + peerID: peerID, capInfo: capInfo, localDonInfo: localDonInfo, workflowDONs: workflowDONs, dispatcher: dispatcher, - msgIDToExecuteRequest: map[string]executeRequest{}, - requestTimeout: requestTimeout, + requestMsgIDToResponse: map[string]remoteTargetCapabilityRequest{}, + requestTimeout: requestTimeout, lggr: lggr, } @@ -62,38 +65,21 @@ func NewRemoteTargetReceiver(ctx context.Context, lggr logger.Logger, underlying return receiver } -type executeRequest struct { - fromPeers map[p2ptypes.PeerID]bool - response *types.MessageBody - callingDonID string - firstRequestTime time.Time -} - func (r *remoteTargetReceiver) ExpireRequests(ctx context.Context) { r.receiveLock.Lock() defer r.receiveLock.Unlock() - for messageId, executeReq := range r.msgIDToExecuteRequest { - if time.Since(executeReq.firstRequestTime) > r.requestTimeout { - - if executeReq.response == nil { - responseMsg := &types.MessageBody{ - CapabilityId: r.capInfo.ID, - CapabilityDonId: r.localDonInfo.ID, - CallerDonId: executeReq.callingDonID, - Method: types.MethodExecute, - MessageId: []byte(messageId), - Error: types.Error_TIMEOUT, - } + for messageId, executeReq := range r.requestMsgIDToResponse { + if time.Since(executeReq.createdTime) > r.requestTimeout { - for peerID := range executeReq.fromPeers { - if err := r.dispatcher.Send(peerID, responseMsg); err != nil { - r.lggr.Errorw("failed to send time out response", "peer", peerID, "err", err) - } + if !executeReq.hasResponse() { + executeReq.setError(types.Error_TIMEOUT) + if err := executeReq.sendResponseToAllRequesters(); err != nil { + r.lggr.Errorw("failed to send timeout response to all requesters", "capabilityId", r.capInfo.ID, "err", err) } } - delete(r.msgIDToExecuteRequest, messageId) + delete(r.requestMsgIDToResponse, messageId) } } @@ -122,77 +108,183 @@ func (r *remoteTargetReceiver) Receive(msg *types.MessageBody) { return } - sender := ToPeerID(msg.Sender) + requester := ToPeerID(msg.Sender) + messageId := GetMessageID(msg) - messageId := getMessageID(msg) - - executeReq, ok := r.msgIDToExecuteRequest[messageId] - if !ok { - executeReq = executeRequest{ - fromPeers: map[p2ptypes.PeerID]bool{}, - callingDonID: msg.CallerDonId, - firstRequestTime: time.Now(), - } - r.msgIDToExecuteRequest[messageId] = executeReq + if _, ok := r.requestMsgIDToResponse[messageId]; !ok { + r.requestMsgIDToResponse[messageId] = newTargetCapabilityRequest(r.capInfo.ID, r.localDonInfo.ID, r.peerID, + msg.CallerDonId, messageId, r.dispatcher) } - if executeReq.callingDonID != msg.CallerDonId { - r.lggr.Warnw("received duplicate execute request from different don, ignoring", "peer", sender) - return - } + request, ok := r.requestMsgIDToResponse[messageId] - if executeReq.fromPeers[sender] { - r.lggr.Warnw("received duplicate execute request from peer, ignoring", "peer", sender) + if err := request.addRequester(requester, msg.CallerDonId, messageId); err != nil { + r.lggr.Errorw("failed to add request to response", "capabilityId", r.capInfo.ID, "sender", + requester, "err", err) return } - executeReq.fromPeers[sender] = true minRequiredRequests := int(callerDon.F + 1) - if len(executeReq.fromPeers) >= minRequiredRequests { - if executeReq.response == nil { - - responseMsg := &types.MessageBody{ - CapabilityId: r.capInfo.ID, - CapabilityDonId: r.localDonInfo.ID, - CallerDonId: msg.CallerDonId, - Method: types.MethodExecute, - MessageId: []byte(messageId), - } + if request.getRequestersCount() == minRequiredRequests { - capabilityRequest, err := pb.UnmarshalCapabilityRequest(msg.Payload) + capabilityRequest, err := pb.UnmarshalCapabilityRequest(msg.Payload) + if err == nil { + ctxWithTimeout, cancel := context.WithTimeout(ctx, r.requestTimeout) + defer cancel() + capResponseCh, err := r.underlying.Execute(ctxWithTimeout, capabilityRequest) if err == nil { - ctxWithTimeout, cancel := context.WithTimeout(ctx, r.requestTimeout) - defer cancel() - responseCh, err := r.underlying.Execute(ctxWithTimeout, capabilityRequest) - if err == nil { - // TODO working on the assumption that the capability will only ever return one response from its channel (for now at least) - response := <-responseCh - responseMsg.Payload, err = pb.MarshalCapabilityResponse(response) + // TODO working on the assumption that the capability will only ever return one response from its channel (for now at least) + capResponse := <-capResponseCh + responsePayload, err := pb.MarshalCapabilityResponse(capResponse) + if err != nil { + r.lggr.Errorw("failed to marshal capability response", "capabilityId", r.capInfo.ID, "err", err) + request.setError(types.Error_INTERNAL_ERROR) } else { - r.lggr.Errorw("failed to execute capability", "capabilityId", r.capInfo.ID, "err", err) - responseMsg.Error = types.Error_INTERNAL_ERROR + request.setResult(responsePayload) } } else { - r.lggr.Errorw("failed to unmarshal capability request", "capabilityId", r.capInfo.ID, "err", err) - responseMsg.Error = types.Error_INVALID_REQUEST - } - - executeReq.response = responseMsg - for peerID := range executeReq.fromPeers { - if err = r.dispatcher.Send(peerID, responseMsg); err != nil { - r.lggr.Errorw("failed to send response", "peer", peerID, "err", err) - } + r.lggr.Errorw("failed to execute capability", "capabilityId", r.capInfo.ID, "err", err) + request.setError(types.Error_INTERNAL_ERROR) } } else { - if err := r.dispatcher.Send(sender, executeReq.response); err != nil { - r.lggr.Errorw("failed to send response", "peer", sender, "err", err) - } + r.lggr.Errorw("failed to unmarshal capability request", "capabilityId", r.capInfo.ID, "err", err) + request.setError(types.Error_INVALID_REQUEST) + } + + if err := request.sendResponseToAllRequesters(); err != nil { + r.lggr.Errorw("failed to send response to all requesters", "capabilityId", r.capInfo.ID, "err", err) + } + + } else if request.getRequestersCount() > minRequiredRequests { + if err := request.sendResponse(requester); err != nil { + r.lggr.Errorw("failed to send response to requester", "capabilityId", r.capInfo.ID, "err", err) + } + } + +} + +type remoteTargetCapabilityRequest struct { + capabilityPeerId p2ptypes.PeerID + capabilityID string + capabilityDonID string + + dispatcher types.Dispatcher + + requesters map[p2ptypes.PeerID]bool + responseReceivers map[p2ptypes.PeerID]bool + + createdTime time.Time + + response []byte + responseError types.Error + + initialRequestingDon string + requestMessageID string +} + +func newTargetCapabilityRequest(capabilityID string, capabilityDonID string, capabilityPeerId p2ptypes.PeerID, + callingDonID string, requestMessageID string, + dispatcher types.Dispatcher) remoteTargetCapabilityRequest { + return remoteTargetCapabilityRequest{ + capabilityID: capabilityID, + capabilityDonID: capabilityDonID, + capabilityPeerId: capabilityPeerId, + dispatcher: dispatcher, + requesters: map[p2ptypes.PeerID]bool{}, + responseReceivers: map[p2ptypes.PeerID]bool{}, + createdTime: time.Now(), + initialRequestingDon: callingDonID, + requestMessageID: requestMessageID, + } +} + +func (e *remoteTargetCapabilityRequest) addRequester(from p2ptypes.PeerID, fromDonID string, requestMessageID string) error { + if e.requesters[from] { + return fmt.Errorf("request already received from peer %s", from) + } + + if e.initialRequestingDon != fromDonID { + return fmt.Errorf("received request from different initial requesting don %s, expected %s", fromDonID, e.initialRequestingDon) + } + + if e.requestMessageID != requestMessageID { + return fmt.Errorf("received request with different message id %s, expected %s", requestMessageID, e.requestMessageID) + } + + e.requesters[from] = true + + return nil +} + +func (e *remoteTargetCapabilityRequest) getRequestersCount() int { + return len(e.requesters) +} + +func (e *remoteTargetCapabilityRequest) setResult(result []byte) { + e.response = result +} + +func (e *remoteTargetCapabilityRequest) setError(err types.Error) { + e.responseError = err +} + +func (e *remoteTargetCapabilityRequest) hasResponse() bool { + return e.response != nil || e.responseError != types.Error_OK +} + +func (e *remoteTargetCapabilityRequest) sendResponseToAllRequesters() error { + for peer := range e.requesters { + if err := e.sendResponse(peer); err != nil { + return fmt.Errorf("failed to send response to peer %s: %w", peer, err) } } + return nil +} + +func (e *remoteTargetCapabilityRequest) sendResponse(peer p2ptypes.PeerID) error { + if err := e.validateResponseSendRequest(peer); err != nil { + return fmt.Errorf("failed to validate response send request: %w", err) + } + + responseMsg := types.MessageBody{ + CapabilityId: e.capabilityID, + CapabilityDonId: e.capabilityDonID, + CallerDonId: e.initialRequestingDon, + Method: types.MethodExecute, + MessageId: []byte(e.requestMessageID), + Sender: e.capabilityPeerId[:], + Receiver: peer[:], + } + + if e.responseError != types.Error_OK { + responseMsg.Error = e.responseError + } else { + responseMsg.Payload = e.response + } + + if err := e.dispatcher.Send(peer, &responseMsg); err != nil { + return fmt.Errorf("failed to send response: %w", err) + } + + e.responseReceivers[peer] = true + + return nil +} + +func (e *remoteTargetCapabilityRequest) validateResponseSendRequest(peer p2ptypes.PeerID) error { + if !e.hasResponse() { + return fmt.Errorf("no response to send") + } + + if e.responseReceivers[peer] { + return fmt.Errorf("response already sent to peer") + } + + return nil } -func getMessageID(msg *types.MessageBody) string { +func GetMessageID(msg *types.MessageBody) string { return string(msg.MessageId) } diff --git a/core/capabilities/remote/target_receiver_test.go b/core/capabilities/remote/target_receiver_test.go index 735075c972a..ffd6254c728 100644 --- a/core/capabilities/remote/target_receiver_test.go +++ b/core/capabilities/remote/target_receiver_test.go @@ -21,10 +21,9 @@ import ( p2ptypes "github.com/smartcontractkit/chainlink/v2/core/services/p2p/types" ) -func Test_TargetReceiverConsensusWithMultipleCallers(t *testing.T) { +func Test_TargetRemoteTarget(t *testing.T) { responseTest := func(t *testing.T, responseCh <-chan commoncap.CapabilityResponse, responseError error) { - require.NoError(t, responseError) response := <-responseCh responseValue, err := response.Value.Unwrap() @@ -32,12 +31,35 @@ func Test_TargetReceiverConsensusWithMultipleCallers(t *testing.T) { assert.Equal(t, "aValue1", responseValue.(string)) } - // Test scenarios where the number of submissions is greater than or equal to F + 1 - testRemoteTargetConsensus(t, 1, 0, 10*time.Minute, 1, 0, 10*time.Minute, responseTest) - testRemoteTargetConsensus(t, 4, 3, 10*time.Minute, 1, 0, 10*time.Minute, responseTest) - testRemoteTargetConsensus(t, 10, 3, 10*time.Minute, 1, 0, 10*time.Minute, responseTest) - /* + transmissionSchedule, err := values.NewMap(map[string]any{ + "schedule": transmission.Schedule_AllAtOnce, + "deltaStage": "100ms", + }) + require.NoError(t, err) + + // Test scenarios where the number of submissions is greater than or equal to F + 1 + testRemoteTarget(t, 1, 0, 10*time.Minute, 1, 0, 10*time.Minute, transmissionSchedule, responseTest) + testRemoteTarget(t, 4, 3, 10*time.Minute, 4, 3, 10*time.Minute, transmissionSchedule, responseTest) + testRemoteTarget(t, 10, 3, 10*time.Minute, 10, 3, 10*time.Minute, transmissionSchedule, responseTest) + + + */ + transmissionSchedule, err := values.NewMap(map[string]any{ + "schedule": transmission.Schedule_OneAtATime, + "deltaStage": "10ms", + }) + require.NoError(t, err) + + // testRemoteTarget(t, 1, 0, 10*time.Minute, 1, 0, 10*time.Minute, transmissionSchedule, responseTest) + + testRemoteTarget(t, 10, 3, 10*time.Minute, 10, 3, 10*time.Minute, transmissionSchedule, responseTest) + + // test capability don F handling + + /* + here - these errors tests failing still? why? + errResponseTest := func(t *testing.T, responseCh <-chan commoncap.CapabilityResponse, responseError error) { require.NoError(t, responseError) response := <-responseCh @@ -68,8 +90,8 @@ func Test_TargetReceiverConsensusWithMultipleCallers(t *testing.T) { } -func testRemoteTargetConsensus(t *testing.T, numWorkflowPeers int, workflowDonF uint8, workflowNodeTimeout time.Duration, - numCapabilityPeers int, capabilityDonF uint8, capabilityNodeTimeout time.Duration, +func testRemoteTarget(t *testing.T, numWorkflowPeers int, workflowDonF uint8, workflowNodeTimeout time.Duration, + numCapabilityPeers int, capabilityDonF uint8, capabilityNodeResponseTimeout time.Duration, transmissionSchedule *values.Map, responseTest func(t *testing.T, responseCh <-chan commoncap.CapabilityResponse, responseError error)) { lggr := logger.TestLogger(t) ctx, cancel := context.WithCancel(testutils.Context(t)) @@ -116,13 +138,15 @@ func testRemoteTargetConsensus(t *testing.T, numWorkflowPeers int, workflowDonF workflowDONs := map[string]commoncap.DON{ workflowDonInfo.ID: workflowDonInfo, } - underlying := &testTargetReceiver{} + underlying := &testCapability{} receivers := make([]remotetypes.Receiver, numCapabilityPeers) for i := 0; i < numCapabilityPeers; i++ { - capabilityDispatcher := broker.NewDispatcherForNode(capabilityPeers[i]) - receiver := remote.NewRemoteTargetReceiver(ctx, lggr, underlying, capInfo, capDonInfo, workflowDONs, capabilityDispatcher, capabilityNodeTimeout) - broker.RegisterReceiverNode(capabilityPeers[i], receiver) + capabilityPeer := capabilityPeers[i] + capabilityDispatcher := broker.NewDispatcherForNode(capabilityPeer) + receiver := remote.NewRemoteTargetReceiver(ctx, lggr, capabilityPeer, underlying, capInfo, capDonInfo, workflowDONs, capabilityDispatcher, + capabilityNodeResponseTimeout) + broker.RegisterReceiverNode(capabilityPeer, receiver) receivers[i] = receiver } @@ -134,18 +158,14 @@ func testRemoteTargetConsensus(t *testing.T, numWorkflowPeers int, workflowDonF callers[i] = caller } - transmissionSchedule, err := values.NewMap(map[string]any{ - "schedule": transmission.Schedule_AllAtOnce, - "deltaStage": "100ms", - }) - require.NoError(t, err) - executeInputs, err := values.NewMap( map[string]any{ "executeValue1": "aValue1", }, ) + require.NoError(t, err) + wg := &sync.WaitGroup{} wg.Add(len(callers)) @@ -229,22 +249,22 @@ func (t *nodeDispatcher) SetReceiver(capabilityId string, donId string, receiver } func (t *nodeDispatcher) RemoveReceiver(capabilityId string, donId string) {} -type testTargetReceiver struct { +type testCapability struct { } -func (t testTargetReceiver) Info(ctx context.Context) (commoncap.CapabilityInfo, error) { +func (t testCapability) Info(ctx context.Context) (commoncap.CapabilityInfo, error) { return commoncap.CapabilityInfo{}, nil } -func (t testTargetReceiver) RegisterToWorkflow(ctx context.Context, request commoncap.RegisterToWorkflowRequest) error { +func (t testCapability) RegisterToWorkflow(ctx context.Context, request commoncap.RegisterToWorkflowRequest) error { return nil } -func (t testTargetReceiver) UnregisterFromWorkflow(ctx context.Context, request commoncap.UnregisterFromWorkflowRequest) error { +func (t testCapability) UnregisterFromWorkflow(ctx context.Context, request commoncap.UnregisterFromWorkflowRequest) error { return nil } -func (t testTargetReceiver) Execute(ctx context.Context, request commoncap.CapabilityRequest) (<-chan commoncap.CapabilityResponse, error) { +func (t testCapability) Execute(ctx context.Context, request commoncap.CapabilityRequest) (<-chan commoncap.CapabilityResponse, error) { ch := make(chan commoncap.CapabilityResponse, 1) value := request.Inputs.Underlying["executeValue1"] From ed31fae6ea1d998febf8d623c50b5f85f4d63140 Mon Sep 17 00:00:00 2001 From: Matthew Pendrey Date: Fri, 24 May 2024 17:08:37 +0100 Subject: [PATCH 25/43] wip --- core/capabilities/remote/target_receiver.go | 34 ++++++++++++--------- 1 file changed, 19 insertions(+), 15 deletions(-) diff --git a/core/capabilities/remote/target_receiver.go b/core/capabilities/remote/target_receiver.go index 5cff3c00e82..f8e00b89d79 100644 --- a/core/capabilities/remote/target_receiver.go +++ b/core/capabilities/remote/target_receiver.go @@ -6,6 +6,8 @@ import ( "sync" "time" + "github.com/google/uuid" + commoncap "github.com/smartcontractkit/chainlink-common/pkg/capabilities" "github.com/smartcontractkit/chainlink-common/pkg/capabilities/pb" "github.com/smartcontractkit/chainlink/v2/core/capabilities/remote/types" @@ -24,8 +26,8 @@ type remoteTargetReceiver struct { dispatcher types.Dispatcher lggr logger.Logger - requestMsgIDToResponse map[string]remoteTargetCapabilityRequest - requestTimeout time.Duration + requestMsgIDToRequest map[string]*remoteTargetCapabilityRequest + requestTimeout time.Duration receiveLock sync.Mutex } @@ -43,8 +45,8 @@ func NewRemoteTargetReceiver(ctx context.Context, lggr logger.Logger, peerID p2p workflowDONs: workflowDONs, dispatcher: dispatcher, - requestMsgIDToResponse: map[string]remoteTargetCapabilityRequest{}, - requestTimeout: requestTimeout, + requestMsgIDToRequest: map[string]*remoteTargetCapabilityRequest{}, + requestTimeout: requestTimeout, lggr: lggr, } @@ -69,7 +71,7 @@ func (r *remoteTargetReceiver) ExpireRequests(ctx context.Context) { r.receiveLock.Lock() defer r.receiveLock.Unlock() - for messageId, executeReq := range r.requestMsgIDToResponse { + for messageId, executeReq := range r.requestMsgIDToRequest { if time.Since(executeReq.createdTime) > r.requestTimeout { if !executeReq.hasResponse() { @@ -79,7 +81,7 @@ func (r *remoteTargetReceiver) ExpireRequests(ctx context.Context) { } } - delete(r.requestMsgIDToResponse, messageId) + delete(r.requestMsgIDToRequest, messageId) } } @@ -111,12 +113,12 @@ func (r *remoteTargetReceiver) Receive(msg *types.MessageBody) { requester := ToPeerID(msg.Sender) messageId := GetMessageID(msg) - if _, ok := r.requestMsgIDToResponse[messageId]; !ok { - r.requestMsgIDToResponse[messageId] = newTargetCapabilityRequest(r.capInfo.ID, r.localDonInfo.ID, r.peerID, + if _, ok := r.requestMsgIDToRequest[messageId]; !ok { + r.requestMsgIDToRequest[messageId] = newTargetCapabilityRequest(r.capInfo.ID, r.localDonInfo.ID, r.peerID, msg.CallerDonId, messageId, r.dispatcher) } - request, ok := r.requestMsgIDToResponse[messageId] + request, ok := r.requestMsgIDToRequest[messageId] if err := request.addRequester(requester, msg.CallerDonId, messageId); err != nil { r.lggr.Errorw("failed to add request to response", "capabilityId", r.capInfo.ID, "sender", @@ -143,7 +145,6 @@ func (r *remoteTargetReceiver) Receive(msg *types.MessageBody) { request.setResult(responsePayload) } } else { - r.lggr.Errorw("failed to execute capability", "capabilityId", r.capInfo.ID, "err", err) request.setError(types.Error_INTERNAL_ERROR) } @@ -165,6 +166,8 @@ func (r *remoteTargetReceiver) Receive(msg *types.MessageBody) { } type remoteTargetCapabilityRequest struct { + id string + capabilityPeerId p2ptypes.PeerID capabilityID string capabilityDonID string @@ -185,8 +188,9 @@ type remoteTargetCapabilityRequest struct { func newTargetCapabilityRequest(capabilityID string, capabilityDonID string, capabilityPeerId p2ptypes.PeerID, callingDonID string, requestMessageID string, - dispatcher types.Dispatcher) remoteTargetCapabilityRequest { - return remoteTargetCapabilityRequest{ + dispatcher types.Dispatcher) *remoteTargetCapabilityRequest { + return &remoteTargetCapabilityRequest{ + id: uuid.New().String(), capabilityID: capabilityID, capabilityDonID: capabilityDonID, capabilityPeerId: capabilityPeerId, @@ -234,9 +238,9 @@ func (e *remoteTargetCapabilityRequest) hasResponse() bool { } func (e *remoteTargetCapabilityRequest) sendResponseToAllRequesters() error { - for peer := range e.requesters { - if err := e.sendResponse(peer); err != nil { - return fmt.Errorf("failed to send response to peer %s: %w", peer, err) + for requester := range e.requesters { + if err := e.sendResponse(requester); err != nil { + return fmt.Errorf("failed to send response to requester %s: %w", requester, err) } } From 328ed68d068557735df21a2faeb4d906f5a9272a Mon Sep 17 00:00:00 2001 From: Matthew Pendrey Date: Fri, 24 May 2024 18:43:06 +0100 Subject: [PATCH 26/43] caller test tidy --- core/capabilities/remote/target_caller.go | 1 - .../capabilities/remote/target_caller_test.go | 259 ++++-------------- .../remote/target_receiver_test.go | 26 +- 3 files changed, 63 insertions(+), 223 deletions(-) diff --git a/core/capabilities/remote/target_caller.go b/core/capabilities/remote/target_caller.go index fa14560c986..3bf9e86c858 100644 --- a/core/capabilities/remote/target_caller.go +++ b/core/capabilities/remote/target_caller.go @@ -90,7 +90,6 @@ func (c *remoteTargetCaller) UnregisterFromWorkflow(ctx context.Context, request } func (c *remoteTargetCaller) Execute(parentCtx context.Context, req commoncap.CapabilityRequest) (<-chan commoncap.CapabilityResponse, error) { - // TODO To keep the initial implementation simple make it single threaded - will this need to be concurrent? c.mutex.Lock() defer c.mutex.Unlock() diff --git a/core/capabilities/remote/target_caller_test.go b/core/capabilities/remote/target_caller_test.go index 3dee009796d..1fd190c71fd 100644 --- a/core/capabilities/remote/target_caller_test.go +++ b/core/capabilities/remote/target_caller_test.go @@ -20,213 +20,6 @@ import ( p2ptypes "github.com/smartcontractkit/chainlink/v2/core/services/p2p/types" ) -const ( - executeValue1 = "triggerEvent1" -) - -func Test_TargetCallerExecuteTimeout(t *testing.T) { - lggr := logger.TestLogger(t) - ctx := testutils.Context(t) - - p1 := p2ptypes.PeerID{} - require.NoError(t, p1.UnmarshalText([]byte(PeerID1))) - p2 := p2ptypes.PeerID{} - require.NoError(t, p2.UnmarshalText([]byte(PeerID2))) - capDonInfo := commoncap.DON{ - ID: "capability-don", - Members: []p2ptypes.PeerID{p1}, - F: 0, - } - - capInfo := commoncap.CapabilityInfo{ - ID: "cap_id", - CapabilityType: commoncap.CapabilityTypeTarget, - Description: "Remote Target", - Version: "0.0.1", - } - - workflowDonInfo := commoncap.DON{ - ID: "workflow-don", - Members: []p2ptypes.PeerID{p2}, - F: 0, - } - - dispatcher := NewTestDispatcher() - - caller := remote.NewRemoteTargetCaller(ctx, lggr, capInfo, capDonInfo, workflowDonInfo, dispatcher, 1*time.Second) - - err := dispatcher.SetReceiver("cap_id", "workflow-don", caller) - require.NoError(t, err) - - transmissionSchedule, err := values.NewMap(map[string]any{ - "schedule": transmission.Schedule_AllAtOnce, - "deltaStage": "100ms", - }) - require.NoError(t, err) - - responseCh, err := caller.Execute(ctx, - commoncap.CapabilityRequest{ - Metadata: commoncap.RequestMetadata{ - WorkflowID: "workflowID", - WorkflowExecutionID: "workflowExecutionID", - }, - Config: transmissionSchedule, - }) - - response := <-responseCh - assert.NotNil(t, response.Err) - -} - -func Test_TargetCallerExecute(t *testing.T) { - - lggr := logger.TestLogger(t) - ctx := testutils.Context(t) - - p1 := p2ptypes.PeerID{} - require.NoError(t, p1.UnmarshalText([]byte(PeerID1))) - p2 := p2ptypes.PeerID{} - require.NoError(t, p2.UnmarshalText([]byte(PeerID2))) - capDonInfo := commoncap.DON{ - ID: "capability-don", - Members: []p2ptypes.PeerID{p1}, - F: 0, - } - - capInfo := commoncap.CapabilityInfo{ - ID: "cap_id", - CapabilityType: commoncap.CapabilityTypeTarget, - Description: "Remote Target", - Version: "0.0.1", - } - - workflowDonInfo := commoncap.DON{ - ID: "workflow-don", - Members: []p2ptypes.PeerID{p2}, - F: 0, - } - - dispatcher := NewTestDispatcher() - - caller := remote.NewRemoteTargetCaller(ctx, lggr, capInfo, capDonInfo, workflowDonInfo, dispatcher, 1*time.Minute) - - err := dispatcher.SetReceiver("cap_id", "workflow-don", caller) - require.NoError(t, err) - - go func() { - sentMessage := <-dispatcher.sentMessagesCh - - executeValue, err := values.Wrap(executeValue1) - require.NoError(t, err) - capResponse := commoncap.CapabilityResponse{ - Value: executeValue, - Err: nil, - } - marshaled, err := pb.MarshalCapabilityResponse(capResponse) - require.NoError(t, err) - executeResponse := &remotetypes.MessageBody{ - Sender: p1[:], - Method: remotetypes.MethodExecute, - Payload: marshaled, - MessageId: sentMessage.MessageId, - } - - dispatcher.SendToReceiver(executeResponse) - }() - - transmissionSchedule, err := values.NewMap(map[string]any{ - "schedule": transmission.Schedule_AllAtOnce, - "deltaStage": "100ms", - }) - require.NoError(t, err) - - resultCh, err := caller.Execute(ctx, - commoncap.CapabilityRequest{ - Metadata: commoncap.RequestMetadata{ - WorkflowID: "workflowID", - WorkflowExecutionID: "workflowExecutionID", - }, - Config: transmissionSchedule, - }) - - require.NoError(t, err) - - response := <-resultCh - - responseValue, err := response.Value.Unwrap() - assert.Equal(t, executeValue1, responseValue.(string)) - -} - -func Test_TargetCallerExecuteWithErrorTimesOut(t *testing.T) { - - lggr := logger.TestLogger(t) - ctx := testutils.Context(t) - - p1 := p2ptypes.PeerID{} - require.NoError(t, p1.UnmarshalText([]byte(PeerID1))) - p2 := p2ptypes.PeerID{} - require.NoError(t, p2.UnmarshalText([]byte(PeerID2))) - capDonInfo := commoncap.DON{ - ID: "capability-don", - Members: []p2ptypes.PeerID{p1}, - F: 0, - } - - capInfo := commoncap.CapabilityInfo{ - ID: "cap_id", - CapabilityType: commoncap.CapabilityTypeTarget, - Description: "Remote Target", - Version: "0.0.1", - } - - workflowDonInfo := commoncap.DON{ - ID: "workflow-don", - Members: []p2ptypes.PeerID{p2}, - F: 0, - } - - dispatcher := NewTestDispatcher() - - caller := remote.NewRemoteTargetCaller(ctx, lggr, capInfo, capDonInfo, workflowDonInfo, dispatcher, 1*time.Second) - - err := dispatcher.SetReceiver("cap_id", "workflow-don", caller) - require.NoError(t, err) - - go func() { - sentMessage := <-dispatcher.sentMessagesCh - - require.NoError(t, err) - executeResponse := &remotetypes.MessageBody{ - Sender: p1[:], - Method: remotetypes.MethodExecute, - MessageId: sentMessage.MessageId, - Error: remotetypes.Error_CAPABILITY_NOT_FOUND, - } - - dispatcher.SendToReceiver(executeResponse) - }() - - transmissionSchedule, err := values.NewMap(map[string]any{ - "schedule": transmission.Schedule_AllAtOnce, - "deltaStage": "100ms", - }) - require.NoError(t, err) - - responseCh, err := caller.Execute(ctx, - commoncap.CapabilityRequest{ - Metadata: commoncap.RequestMetadata{ - WorkflowID: "workflowID", - WorkflowExecutionID: "workflowExecutionID", - }, - Config: transmissionSchedule, - }) - - response := <-responseCh - - require.NotNil(t, response.Err) -} - func Test_RemoteTargetCaller_DonTopologies(t *testing.T) { transmissionSchedule, err := values.NewMap(map[string]any{ @@ -302,6 +95,58 @@ func Test_RemoteTargetCaller_TransmissionSchedules(t *testing.T) { } +func Test_RemoteTargetCaller_TimesOutIfRespondingCapabilityPeersLessThenFPlusOne(t *testing.T) { + + responseTest := func(t *testing.T, responseCh <-chan commoncap.CapabilityResponse, responseError error) { + require.NoError(t, responseError) + response := <-responseCh + assert.NotNil(t, response.Err) + } + + capability := &testCapability{} + + transmissionSchedule, err := values.NewMap(map[string]any{ + "schedule": transmission.Schedule_AllAtOnce, + "deltaStage": "10ms", + }) + require.NoError(t, err) + + // number of capability peers is less than F + 1 + testRemoteTargetCaller(t, 10, 1*time.Second, 10, 11, + capability, transmissionSchedule, responseTest) + + transmissionSchedule, err = values.NewMap(map[string]any{ + "schedule": transmission.Schedule_OneAtATime, + "deltaStage": "1000ms", + }) + require.NoError(t, err) + + testRemoteTargetCaller(t, 10, 1*time.Second, 10, 7, + capability, transmissionSchedule, responseTest) + +} + +func Test_RemoteTargetCaller_TimesOutIfTransmissionScheduleExceedsTimeout(t *testing.T) { + + responseTest := func(t *testing.T, responseCh <-chan commoncap.CapabilityResponse, responseError error) { + require.NoError(t, responseError) + response := <-responseCh + assert.NotNil(t, response.Err) + } + + capability := &testCapability{} + + transmissionSchedule, err := values.NewMap(map[string]any{ + "schedule": transmission.Schedule_OneAtATime, + "deltaStage": "1000ms", + }) + require.NoError(t, err) + + testRemoteTargetCaller(t, 10, 1*time.Second, 10, 7, + capability, transmissionSchedule, responseTest) + +} + func testRemoteTargetCaller(t *testing.T, numWorkflowPeers int, workflowNodeResponseTimeout time.Duration, numCapabilityPeers int, capabilityDonF uint8, underlying commoncap.TargetCapability, transmissionSchedule *values.Map, responseTest func(t *testing.T, responseCh <-chan commoncap.CapabilityResponse, responseError error)) { diff --git a/core/capabilities/remote/target_receiver_test.go b/core/capabilities/remote/target_receiver_test.go index ffd6254c728..c7163b0b5bb 100644 --- a/core/capabilities/remote/target_receiver_test.go +++ b/core/capabilities/remote/target_receiver_test.go @@ -31,28 +31,24 @@ func Test_TargetRemoteTarget(t *testing.T) { assert.Equal(t, "aValue1", responseValue.(string)) } - /* - transmissionSchedule, err := values.NewMap(map[string]any{ - "schedule": transmission.Schedule_AllAtOnce, - "deltaStage": "100ms", - }) - require.NoError(t, err) - - // Test scenarios where the number of submissions is greater than or equal to F + 1 - testRemoteTarget(t, 1, 0, 10*time.Minute, 1, 0, 10*time.Minute, transmissionSchedule, responseTest) - testRemoteTarget(t, 4, 3, 10*time.Minute, 4, 3, 10*time.Minute, transmissionSchedule, responseTest) - testRemoteTarget(t, 10, 3, 10*time.Minute, 10, 3, 10*time.Minute, transmissionSchedule, responseTest) + transmissionSchedule, err := values.NewMap(map[string]any{ + "schedule": transmission.Schedule_AllAtOnce, + "deltaStage": "100ms", + }) + require.NoError(t, err) + // Test scenarios where the number of submissions is greater than or equal to F + 1 + testRemoteTarget(t, 1, 0, 10*time.Minute, 1, 0, 10*time.Minute, transmissionSchedule, responseTest) + testRemoteTarget(t, 4, 3, 10*time.Minute, 4, 3, 10*time.Minute, transmissionSchedule, responseTest) + testRemoteTarget(t, 10, 3, 10*time.Minute, 10, 3, 10*time.Minute, transmissionSchedule, responseTest) - */ - transmissionSchedule, err := values.NewMap(map[string]any{ + transmissionSchedule, err = values.NewMap(map[string]any{ "schedule": transmission.Schedule_OneAtATime, "deltaStage": "10ms", }) require.NoError(t, err) - // testRemoteTarget(t, 1, 0, 10*time.Minute, 1, 0, 10*time.Minute, transmissionSchedule, responseTest) - + testRemoteTarget(t, 1, 0, 10*time.Minute, 1, 0, 10*time.Minute, transmissionSchedule, responseTest) testRemoteTarget(t, 10, 3, 10*time.Minute, 10, 3, 10*time.Minute, transmissionSchedule, responseTest) // test capability don F handling From 5b23fe9b2ed27471e3a40b87d14b91739ab2183d Mon Sep 17 00:00:00 2001 From: Matthew Pendrey Date: Fri, 24 May 2024 18:56:07 +0100 Subject: [PATCH 27/43] wip markers --- core/capabilities/remote/target_caller.go | 4 +++- core/capabilities/remote/target_caller_test.go | 9 --------- core/capabilities/remote/target_receiver_test.go | 2 ++ 3 files changed, 5 insertions(+), 10 deletions(-) diff --git a/core/capabilities/remote/target_caller.go b/core/capabilities/remote/target_caller.go index 3bf9e86c858..c249e6a059c 100644 --- a/core/capabilities/remote/target_caller.go +++ b/core/capabilities/remote/target_caller.go @@ -68,6 +68,7 @@ func (c *remoteTargetCaller) ExpireRequests(ctx context.Context) { for messageID, req := range c.messageIDToExecuteRequest { if time.Since(req.creationTime) > c.requestTimeout { + req.transmissionCancelFn() if !req.responseSent() { req.sendResponse(commoncap.CapabilityResponse{Err: errors.New("request timed out")}) } @@ -155,7 +156,6 @@ func (c *remoteTargetCaller) transmitRequestWithMessageID(ctx context.Context, r case <-ctx.Done(): return case <-time.After(delay): - c.lggr.Debugw("executing delayed execution for peer", "peerID", peerID, "delay", delay) err = c.dispatcher.Send(peerID, message) if err != nil { c.lggr.Errorw("failed to send message", "peerID", peerID, "err", err) @@ -197,6 +197,8 @@ type callerExecuteRequest struct { respSent bool } +should refactor this, move the tranmission logic onto it to better encapsulate the cancellation logic + func newCallerExecuteRequest(transmissionCancelFn context.CancelFunc, requiredIdenticalResponses int) *callerExecuteRequest { return &callerExecuteRequest{ responseCh: make(chan commoncap.CapabilityResponse, 1), diff --git a/core/capabilities/remote/target_caller_test.go b/core/capabilities/remote/target_caller_test.go index 1fd190c71fd..c6799582930 100644 --- a/core/capabilities/remote/target_caller_test.go +++ b/core/capabilities/remote/target_caller_test.go @@ -115,15 +115,6 @@ func Test_RemoteTargetCaller_TimesOutIfRespondingCapabilityPeersLessThenFPlusOne testRemoteTargetCaller(t, 10, 1*time.Second, 10, 11, capability, transmissionSchedule, responseTest) - transmissionSchedule, err = values.NewMap(map[string]any{ - "schedule": transmission.Schedule_OneAtATime, - "deltaStage": "1000ms", - }) - require.NoError(t, err) - - testRemoteTargetCaller(t, 10, 1*time.Second, 10, 7, - capability, transmissionSchedule, responseTest) - } func Test_RemoteTargetCaller_TimesOutIfTransmissionScheduleExceedsTimeout(t *testing.T) { diff --git a/core/capabilities/remote/target_receiver_test.go b/core/capabilities/remote/target_receiver_test.go index c7163b0b5bb..5de307211aa 100644 --- a/core/capabilities/remote/target_receiver_test.go +++ b/core/capabilities/remote/target_receiver_test.go @@ -51,6 +51,8 @@ func Test_TargetRemoteTarget(t *testing.T) { testRemoteTarget(t, 1, 0, 10*time.Minute, 1, 0, 10*time.Minute, transmissionSchedule, responseTest) testRemoteTarget(t, 10, 3, 10*time.Minute, 10, 3, 10*time.Minute, transmissionSchedule, responseTest) + here - below tests plus additional tests for the remoteTargetCapability test + // test capability don F handling /* From 12062a69ee58c30739e651fb887d34727d133c5c Mon Sep 17 00:00:00 2001 From: Matthew Pendrey Date: Mon, 27 May 2024 12:41:20 +0100 Subject: [PATCH 28/43] refactor and tidyup --- core/capabilities/remote/target_caller.go | 162 +++++++++--------- .../capabilities/remote/target_caller_test.go | 17 +- .../remote/target_receiver_test.go | 27 +-- .../capabilities/transmission/transmission.go | 26 +-- .../transmission/transmission_test.go | 2 +- core/services/workflows/execution_strategy.go | 4 +- 6 files changed, 118 insertions(+), 120 deletions(-) diff --git a/core/capabilities/remote/target_caller.go b/core/capabilities/remote/target_caller.go index c249e6a059c..1fbc35980ef 100644 --- a/core/capabilities/remote/target_caller.go +++ b/core/capabilities/remote/target_caller.go @@ -19,12 +19,11 @@ import ( // remoteTargetCaller/Receiver are shims translating between capability API calls and network messages type remoteTargetCaller struct { - lggr logger.Logger - remoteCapabilityInfo commoncap.CapabilityInfo - remoteCapabilityDonInfo capabilities.DON - localDONInfo capabilities.DON - dispatcher types.Dispatcher - requestTimeout time.Duration + lggr logger.Logger + remoteCapabilityInfo commoncap.CapabilityInfo + localDONInfo capabilities.DON + dispatcher types.Dispatcher + requestTimeout time.Duration messageIDToExecuteRequest map[string]*callerExecuteRequest mutex sync.Mutex @@ -33,13 +32,12 @@ type remoteTargetCaller struct { var _ commoncap.TargetCapability = &remoteTargetCaller{} var _ types.Receiver = &remoteTargetCaller{} -func NewRemoteTargetCaller(ctx context.Context, lggr logger.Logger, remoteCapabilityInfo commoncap.CapabilityInfo, remoteCapabilityDonInfo capabilities.DON, localDonInfo capabilities.DON, dispatcher types.Dispatcher, +func NewRemoteTargetCaller(ctx context.Context, lggr logger.Logger, remoteCapabilityInfo commoncap.CapabilityInfo, localDonInfo capabilities.DON, dispatcher types.Dispatcher, requestTimeout time.Duration) *remoteTargetCaller { caller := &remoteTargetCaller{ lggr: lggr, remoteCapabilityInfo: remoteCapabilityInfo, - remoteCapabilityDonInfo: remoteCapabilityDonInfo, localDONInfo: localDonInfo, dispatcher: dispatcher, requestTimeout: requestTimeout, @@ -54,7 +52,7 @@ func NewRemoteTargetCaller(ctx context.Context, lggr logger.Logger, remoteCapabi case <-ctx.Done(): return case <-timer.C: - caller.ExpireRequests(ctx) + caller.ExpireRequests() } } }() @@ -62,16 +60,13 @@ func NewRemoteTargetCaller(ctx context.Context, lggr logger.Logger, remoteCapabi return caller } -func (c *remoteTargetCaller) ExpireRequests(ctx context.Context) { +func (c *remoteTargetCaller) ExpireRequests() { c.mutex.Lock() defer c.mutex.Unlock() for messageID, req := range c.messageIDToExecuteRequest { if time.Since(req.creationTime) > c.requestTimeout { - req.transmissionCancelFn() - if !req.responseSent() { - req.sendResponse(commoncap.CapabilityResponse{Err: errors.New("request timed out")}) - } + req.cancelRequest("request timed out") } delete(c.messageIDToExecuteRequest, messageID) @@ -90,123 +85,123 @@ func (c *remoteTargetCaller) UnregisterFromWorkflow(ctx context.Context, request return errors.New("not implemented") } -func (c *remoteTargetCaller) Execute(parentCtx context.Context, req commoncap.CapabilityRequest) (<-chan commoncap.CapabilityResponse, error) { +func (c *remoteTargetCaller) Execute(ctx context.Context, req commoncap.CapabilityRequest) (<-chan commoncap.CapabilityResponse, error) { c.mutex.Lock() defer c.mutex.Unlock() - deterministicMessageID, err := getDeterministicMessageID(req) + messageID, err := getMessageIDForRequest(req) if err != nil { - return nil, fmt.Errorf("failed to get deterministic message ID from request: %w", err) + return nil, fmt.Errorf("failed to create message ID from request: %w", err) } - if _, ok := c.messageIDToExecuteRequest[deterministicMessageID]; ok { - return nil, fmt.Errorf("request with message ID %s already exists", deterministicMessageID) + if _, ok := c.messageIDToExecuteRequest[messageID]; ok { + return nil, fmt.Errorf("request with message ID %s already exists", messageID) } - transmissionCtx, transmissionCancelFn := context.WithCancel(parentCtx) - execRequest := newCallerExecuteRequest(transmissionCancelFn, int(c.remoteCapabilityDonInfo.F+1)) + execRequest, err := newCallerExecuteRequest(ctx, c.lggr, req, messageID, c.remoteCapabilityInfo, c.localDONInfo, c.dispatcher) + + c.messageIDToExecuteRequest[messageID] = execRequest + + return execRequest.responseCh, nil +} + +func (c *remoteTargetCaller) Receive(msg *types.MessageBody) { + c.mutex.Lock() + defer c.mutex.Unlock() - c.messageIDToExecuteRequest[deterministicMessageID] = execRequest + messageID := GetMessageID(msg) - if err = c.transmitRequestWithMessageID(transmissionCtx, req, deterministicMessageID); err != nil { - return nil, fmt.Errorf("failed to transmit request: %w", err) + req := c.messageIDToExecuteRequest[messageID] + if req == nil { + c.lggr.Warnw("received response for unknown message ID", "messageID", messageID, "sender", msg.Sender) + return } - return execRequest.responseCh, nil + if msg.Error != types.Error_OK { + c.lggr.Warnw("received error response for pending request", "messageID", messageID, "sender", msg.Sender, "receiver", msg.Receiver, "error", msg.Error) + return + } + + req.addResponse(msg.Payload) } -func getDeterministicMessageID(req commoncap.CapabilityRequest) (string, error) { +// getMessageIDForRequest uses the workflow ID and workflow execution ID from the request metadata to create a +// deterministically unique message ID for the request. +func getMessageIDForRequest(req commoncap.CapabilityRequest) (string, error) { if req.Metadata.WorkflowID == "" || req.Metadata.WorkflowExecutionID == "" { return "", errors.New("workflow ID and workflow execution ID must be set in request metadata") } - deterministicMessageID := req.Metadata.WorkflowID + req.Metadata.WorkflowExecutionID - return deterministicMessageID, nil + return req.Metadata.WorkflowID + req.Metadata.WorkflowExecutionID, nil } -// transmitRequestWithMessageID transmits a capability request to remote capabilities according to the transmission configuration -func (c *remoteTargetCaller) transmitRequestWithMessageID(ctx context.Context, req commoncap.CapabilityRequest, messageID string) error { +type callerExecuteRequest struct { + transmissionCtx context.Context + responseCh chan commoncap.CapabilityResponse + transmissionCancelFn context.CancelFunc + creationTime time.Time + responseIDCount map[[32]byte]int + + requiredIdenticalResponses int + + respSent bool +} + +func newCallerExecuteRequest(ctx context.Context, lggr logger.Logger, req commoncap.CapabilityRequest, messageID string, + remoteCapabilityInfo commoncap.CapabilityInfo, localDonInfo capabilities.DON, dispatcher types.Dispatcher) (*callerExecuteRequest, error) { + + remoteCapabilityDonInfo := remoteCapabilityInfo.DON + if remoteCapabilityDonInfo == nil { + return nil, errors.New("remote capability info missing DON") + } + rawRequest, err := pb.MarshalCapabilityRequest(req) if err != nil { - return fmt.Errorf("failed to marshal capability request: %w", err) + return nil, fmt.Errorf("failed to marshal capability request: %w", err) } tc, err := transmission.ExtractTransmissionConfig(req.Config) if err != nil { - return fmt.Errorf("failed to extract transmission config from request config: %w", err) + return nil, fmt.Errorf("failed to extract transmission config from request config: %w", err) } - peerIDToDelay, err := transmission.GetPeerIDToTransmissionDelay(c.remoteCapabilityDonInfo.Members, c.localDONInfo.Config.SharedSecret, req.Metadata.WorkflowID, req.Metadata.WorkflowExecutionID, tc) + peerIDToDelay, err := transmission.GetPeerIDToTransmissionDelay(remoteCapabilityDonInfo.Members, localDonInfo.Config.SharedSecret, + messageID, tc) if err != nil { - return fmt.Errorf("failed to get peer ID to transmission delay: %w", err) + return nil, fmt.Errorf("failed to get peer ID to transmission delay: %w", err) } + transmissionCtx, transmissionCancelFn := context.WithCancel(ctx) for peerID, delay := range peerIDToDelay { go func(peerID ragep2ptypes.PeerID, delay time.Duration) { message := &types.MessageBody{ - CapabilityId: c.remoteCapabilityInfo.ID, - CapabilityDonId: c.remoteCapabilityDonInfo.ID, - CallerDonId: c.localDONInfo.ID, + CapabilityId: remoteCapabilityInfo.ID, + CapabilityDonId: remoteCapabilityDonInfo.ID, + CallerDonId: localDonInfo.ID, Method: types.MethodExecute, Payload: rawRequest, MessageId: []byte(messageID), } select { - case <-ctx.Done(): + case <-transmissionCtx.Done(): return case <-time.After(delay): - err = c.dispatcher.Send(peerID, message) + err = dispatcher.Send(peerID, message) if err != nil { - c.lggr.Errorw("failed to send message", "peerID", peerID, "err", err) + lggr.Errorw("failed to send message", "peerID", peerID, "err", err) } } }(peerID, delay) } - return nil -} - -func (c *remoteTargetCaller) Receive(msg *types.MessageBody) { - c.mutex.Lock() - defer c.mutex.Unlock() - - messageID := GetMessageID(msg) - - req := c.messageIDToExecuteRequest[messageID] - if req == nil { - c.lggr.Warnw("received response for unknown message ID", "messageID", messageID, "sender", msg.Sender) - return - } - - if msg.Error != types.Error_OK { - c.lggr.Warnw("received error response for pending request", "messageID", messageID, "sender", msg.Sender, "receiver", msg.Receiver, "error", msg.Error) - return - } - - req.addResponse(msg.Payload) -} - -type callerExecuteRequest struct { - responseCh chan commoncap.CapabilityResponse - transmissionCancelFn context.CancelFunc - creationTime time.Time - responseIDCount map[[32]byte]int - - requiredIdenticalResponses int - respSent bool -} - -should refactor this, move the tranmission logic onto it to better encapsulate the cancellation logic - -func newCallerExecuteRequest(transmissionCancelFn context.CancelFunc, requiredIdenticalResponses int) *callerExecuteRequest { return &callerExecuteRequest{ - responseCh: make(chan commoncap.CapabilityResponse, 1), + creationTime: time.Now(), transmissionCancelFn: transmissionCancelFn, + requiredIdenticalResponses: int(remoteCapabilityDonInfo.F + 1), responseIDCount: make(map[[32]byte]int), - creationTime: time.Now(), - requiredIdenticalResponses: requiredIdenticalResponses, - } + responseCh: make(chan commoncap.CapabilityResponse, 1), + }, nil } func (c *callerExecuteRequest) responseSent() bool { @@ -234,3 +229,10 @@ func (c *callerExecuteRequest) sendResponse(response commoncap.CapabilityRespons c.transmissionCancelFn() c.respSent = true } + +func (c *callerExecuteRequest) cancelRequest(reason string) { + c.transmissionCancelFn() + if !c.responseSent() { + c.sendResponse(commoncap.CapabilityResponse{Err: errors.New(reason)}) + } +} diff --git a/core/capabilities/remote/target_caller_test.go b/core/capabilities/remote/target_caller_test.go index c6799582930..5f259025ecd 100644 --- a/core/capabilities/remote/target_caller_test.go +++ b/core/capabilities/remote/target_caller_test.go @@ -145,13 +145,6 @@ func testRemoteTargetCaller(t *testing.T, numWorkflowPeers int, workflowNodeResp ctx, cancel := context.WithCancel(testutils.Context(t)) defer cancel() - capInfo := commoncap.CapabilityInfo{ - ID: "cap_id", - CapabilityType: commoncap.CapabilityTypeTarget, - Description: "Remote Target", - Version: "0.0.1", - } - capabilityPeers := make([]p2ptypes.PeerID, numCapabilityPeers) for i := 0; i < numCapabilityPeers; i++ { capabilityPeerID := p2ptypes.PeerID{} @@ -168,6 +161,14 @@ func testRemoteTargetCaller(t *testing.T, numWorkflowPeers int, workflowNodeResp F: capabilityDonF, } + capInfo := commoncap.CapabilityInfo{ + ID: "cap_id", + CapabilityType: commoncap.CapabilityTypeTarget, + Description: "Remote Target", + Version: "0.0.1", + DON: &capDonInfo, + } + workflowPeers := make([]p2ptypes.PeerID, numWorkflowPeers) for i := 0; i < numWorkflowPeers; i++ { workflowPeerID := p2ptypes.PeerID{} @@ -193,7 +194,7 @@ func testRemoteTargetCaller(t *testing.T, numWorkflowPeers int, workflowNodeResp callers := make([]commoncap.TargetCapability, numWorkflowPeers) for i := 0; i < numWorkflowPeers; i++ { workflowPeerDispatcher := broker.NewDispatcherForNode(workflowPeers[i]) - caller := remote.NewRemoteTargetCaller(ctx, lggr, capInfo, capDonInfo, workflowDonInfo, workflowPeerDispatcher, workflowNodeResponseTimeout) + caller := remote.NewRemoteTargetCaller(ctx, lggr, capInfo, workflowDonInfo, workflowPeerDispatcher, workflowNodeResponseTimeout) broker.RegisterReceiverNode(workflowPeers[i], caller) callers[i] = caller } diff --git a/core/capabilities/remote/target_receiver_test.go b/core/capabilities/remote/target_receiver_test.go index 5de307211aa..6050406533d 100644 --- a/core/capabilities/remote/target_receiver_test.go +++ b/core/capabilities/remote/target_receiver_test.go @@ -51,7 +51,7 @@ func Test_TargetRemoteTarget(t *testing.T) { testRemoteTarget(t, 1, 0, 10*time.Minute, 1, 0, 10*time.Minute, transmissionSchedule, responseTest) testRemoteTarget(t, 10, 3, 10*time.Minute, 10, 3, 10*time.Minute, transmissionSchedule, responseTest) - here - below tests plus additional tests for the remoteTargetCapability test + //here - below tests plus additional tests for the remoteTargetCapability test // test capability don F handling @@ -95,13 +95,6 @@ func testRemoteTarget(t *testing.T, numWorkflowPeers int, workflowDonF uint8, wo ctx, cancel := context.WithCancel(testutils.Context(t)) defer cancel() - capInfo := commoncap.CapabilityInfo{ - ID: "cap_id", - CapabilityType: commoncap.CapabilityTypeTarget, - Description: "Remote Target", - Version: "0.0.1", - } - capabilityPeers := make([]p2ptypes.PeerID, numCapabilityPeers) for i := 0; i < numCapabilityPeers; i++ { capabilityPeerID := p2ptypes.PeerID{} @@ -118,6 +111,14 @@ func testRemoteTarget(t *testing.T, numWorkflowPeers int, workflowDonF uint8, wo F: capabilityDonF, } + capInfo := commoncap.CapabilityInfo{ + ID: "cap_id", + CapabilityType: commoncap.CapabilityTypeTarget, + Description: "Remote Target", + Version: "0.0.1", + DON: &capDonInfo, + } + workflowPeers := make([]p2ptypes.PeerID, numWorkflowPeers) for i := 0; i < numWorkflowPeers; i++ { workflowPeerID := p2ptypes.PeerID{} @@ -151,7 +152,7 @@ func testRemoteTarget(t *testing.T, numWorkflowPeers int, workflowDonF uint8, wo callers := make([]commoncap.TargetCapability, numWorkflowPeers) for i := 0; i < numWorkflowPeers; i++ { workflowPeerDispatcher := broker.NewDispatcherForNode(workflowPeers[i]) - caller := remote.NewRemoteTargetCaller(ctx, lggr, capInfo, capDonInfo, workflowDonInfo, workflowPeerDispatcher, workflowNodeTimeout) + caller := remote.NewRemoteTargetCaller(ctx, lggr, capInfo, workflowDonInfo, workflowPeerDispatcher, workflowNodeTimeout) broker.RegisterReceiverNode(workflowPeers[i], caller) callers[i] = caller } @@ -274,10 +275,6 @@ func (t testCapability) Execute(ctx context.Context, request commoncap.Capabilit return ch, nil } -func libp2pMagic() []byte { - return []byte{0x00, 0x24, 0x08, 0x01, 0x12, 0x20} -} - func newPeerID() string { var privKey [32]byte _, err := rand.Read(privKey[:]) @@ -289,3 +286,7 @@ func newPeerID() string { return base58.Encode(peerID[:]) } + +func libp2pMagic() []byte { + return []byte{0x00, 0x24, 0x08, 0x01, 0x12, 0x20} +} diff --git a/core/capabilities/transmission/transmission.go b/core/capabilities/transmission/transmission.go index d76ffe5a3d6..dba0aa1f746 100644 --- a/core/capabilities/transmission/transmission.go +++ b/core/capabilities/transmission/transmission.go @@ -49,11 +49,10 @@ func ExtractTransmissionConfig(config *values.Map) (TransmissionConfig, error) { // GetPeerIDToTransmissionDelay returns a map of PeerID to the time.Duration that the node with that PeerID should wait // before transmitting. If a node is not in the map, it should not transmit. -func GetPeerIDToTransmissionDelay(donPeerIDs []ragep2ptypes.PeerID, sharedSecret [16]byte, workflowID string, - workflowExecutionID string, tc TransmissionConfig) (map[p2ptypes.PeerID]time.Duration, error) { +func GetPeerIDToTransmissionDelay(donPeerIDs []ragep2ptypes.PeerID, sharedSecret [16]byte, transmissionID string, tc TransmissionConfig) (map[p2ptypes.PeerID]time.Duration, error) { donMemberCount := len(donPeerIDs) - key := scheduleSeed(sharedSecret, workflowID, workflowExecutionID) - sched, err := schedule(tc.Schedule, donMemberCount) + key := transmissionScheduleSeed(sharedSecret, transmissionID) + schedule, err := createTransmissionSchedule(tc.Schedule, donMemberCount) if err != nil { return nil, err } @@ -62,7 +61,7 @@ func GetPeerIDToTransmissionDelay(donPeerIDs []ragep2ptypes.PeerID, sharedSecret peerIDToTransmissionDelay := map[p2ptypes.PeerID]time.Duration{} for i, peerID := range donPeerIDs { - delay := delayFor(i, sched, picked, tc.DeltaStage) + delay := delayFor(i, schedule, picked, tc.DeltaStage) if delay != nil { peerIDToTransmissionDelay[peerID] = *delay } @@ -83,8 +82,8 @@ func delayFor(position int, schedule []int, permutation []int, deltaStage time.D return nil } -func schedule(sched string, N int) ([]int, error) { - switch sched { +func createTransmissionSchedule(scheduleType string, N int) ([]int, error) { + switch scheduleType { case Schedule_AllAtOnce: return []int{N}, nil case Schedule_OneAtATime: @@ -94,20 +93,13 @@ func schedule(sched string, N int) ([]int, error) { } return sch, nil } - return nil, fmt.Errorf("unknown schedule %s", sched) + return nil, fmt.Errorf("unknown schedule type %s", scheduleType) } -// scheduleSeed uses a shared secret, combined with a workflowID and a workflowExecutionID to generate -// a secret that can later be used to pseudo-randomly determine a schedule for a set of nodes in a DON. -// The addition of the workflowExecutionID -- which nodes don't know ahead of time -- additionally guarantees -// that a malicious coalition of nodes can't "game" the schedule. -// IMPORTANT: changing this function should happen carefully to maintain the guarantee that all nodes -// arrive at the same secret. -func scheduleSeed(sharedSecret [16]byte, workflowID, workflowExecutionID string) [16]byte { +func transmissionScheduleSeed(sharedSecret [16]byte, transmissionID string) [16]byte { hash := sha3.NewLegacyKeccak256() hash.Write(sharedSecret[:]) - hash.Write([]byte(workflowID)) - hash.Write([]byte(workflowExecutionID)) + hash.Write([]byte(transmissionID)) var key [16]byte copy(key[:], hash.Sum(nil)) diff --git a/core/capabilities/transmission/transmission_test.go b/core/capabilities/transmission/transmission_test.go index ec4de71a1cd..6c4494c407a 100644 --- a/core/capabilities/transmission/transmission_test.go +++ b/core/capabilities/transmission/transmission_test.go @@ -90,7 +90,7 @@ func Test_GetPeerIDToTransmissionDelay(t *testing.T) { require.NoError(t, err) transmissionCfg, err := ExtractTransmissionConfig(m) - peerIdToDelay, err := GetPeerIDToTransmissionDelay(ids, [16]byte(sharedSecret), "mock-workflow-id", tc.workflowExecutionID, transmissionCfg) + peerIdToDelay, err := GetPeerIDToTransmissionDelay(ids, [16]byte(sharedSecret), "mock-workflow-id"+tc.workflowExecutionID, transmissionCfg) require.NoError(t, err) assert.Equal(t, tc.expectedDelays["one"], peerIdToDelay[peer1]) diff --git a/core/services/workflows/execution_strategy.go b/core/services/workflows/execution_strategy.go index bb36e8baf89..5cc8164c4f7 100644 --- a/core/services/workflows/execution_strategy.go +++ b/core/services/workflows/execution_strategy.go @@ -61,8 +61,10 @@ func (d scheduledExecution) Apply(ctx context.Context, lggr logger.Logger, cap c // Case 1: Local DON case info.DON == nil: + // The transmission ID is created using the workflow ID and the workflow execution ID which nodes don't know + // ahead of time and ensures a malicious node cannot game the schedule. peerIDToTransmissionDelay, err := transmission.GetPeerIDToTransmissionDelay(d.DON.Members, d.DON.Config.SharedSecret, - req.Metadata.WorkflowID, req.Metadata.WorkflowExecutionID, tc) + req.Metadata.WorkflowID+req.Metadata.WorkflowExecutionID, tc) if err != nil { return nil, fmt.Errorf("failed to get peer ID to transmission delay map: %w", err) } From 573ea65b3c8d2355fff83ce44cc278d159951d3a Mon Sep 17 00:00:00 2001 From: Matthew Pendrey Date: Mon, 27 May 2024 18:26:39 +0100 Subject: [PATCH 29/43] refactor and prep to multi thread the receiver --- core/capabilities/remote/target_caller.go | 67 ++-- core/capabilities/remote/target_receiver.go | 225 ++++++------ .../remote/target_receiver_test.go | 280 +++++---------- core/capabilities/remote/target_test.go | 331 ++++++++++++++++++ 4 files changed, 588 insertions(+), 315 deletions(-) create mode 100644 core/capabilities/remote/target_test.go diff --git a/core/capabilities/remote/target_caller.go b/core/capabilities/remote/target_caller.go index 1fbc35980ef..38a96545cfc 100644 --- a/core/capabilities/remote/target_caller.go +++ b/core/capabilities/remote/target_caller.go @@ -14,6 +14,7 @@ import ( "github.com/smartcontractkit/chainlink/v2/core/capabilities/remote/types" "github.com/smartcontractkit/chainlink/v2/core/capabilities/transmission" "github.com/smartcontractkit/chainlink/v2/core/logger" + p2ptypes "github.com/smartcontractkit/chainlink/v2/core/services/p2p/types" ragep2ptypes "github.com/smartcontractkit/libocr/ragep2p/types" ) @@ -25,7 +26,7 @@ type remoteTargetCaller struct { dispatcher types.Dispatcher requestTimeout time.Duration - messageIDToExecuteRequest map[string]*callerExecuteRequest + requestIDToExecuteRequest map[string]*callerExecuteRequest mutex sync.Mutex } @@ -41,7 +42,7 @@ func NewRemoteTargetCaller(ctx context.Context, lggr logger.Logger, remoteCapabi localDONInfo: localDonInfo, dispatcher: dispatcher, requestTimeout: requestTimeout, - messageIDToExecuteRequest: make(map[string]*callerExecuteRequest), + requestIDToExecuteRequest: make(map[string]*callerExecuteRequest), } go func() { @@ -64,12 +65,12 @@ func (c *remoteTargetCaller) ExpireRequests() { c.mutex.Lock() defer c.mutex.Unlock() - for messageID, req := range c.messageIDToExecuteRequest { - if time.Since(req.creationTime) > c.requestTimeout { + for messageID, req := range c.requestIDToExecuteRequest { + if time.Since(req.createdAt) > c.requestTimeout { req.cancelRequest("request timed out") } - delete(c.messageIDToExecuteRequest, messageID) + delete(c.requestIDToExecuteRequest, messageID) } } @@ -89,18 +90,18 @@ func (c *remoteTargetCaller) Execute(ctx context.Context, req commoncap.Capabili c.mutex.Lock() defer c.mutex.Unlock() - messageID, err := getMessageIDForRequest(req) + requestID, err := GetRequestID(req) if err != nil { - return nil, fmt.Errorf("failed to create message ID from request: %w", err) + return nil, fmt.Errorf("failed to get request ID: %w", err) } - if _, ok := c.messageIDToExecuteRequest[messageID]; ok { - return nil, fmt.Errorf("request with message ID %s already exists", messageID) + if _, ok := c.requestIDToExecuteRequest[requestID]; ok { + return nil, fmt.Errorf("request with ID %s already exists", requestID) } - execRequest, err := newCallerExecuteRequest(ctx, c.lggr, req, messageID, c.remoteCapabilityInfo, c.localDONInfo, c.dispatcher) + execRequest, err := newCallerExecuteRequest(ctx, c.lggr, req, requestID, c.remoteCapabilityInfo, c.localDONInfo, c.dispatcher) - c.messageIDToExecuteRequest[messageID] = execRequest + c.requestIDToExecuteRequest[requestID] = execRequest return execRequest.responseCh, nil } @@ -109,25 +110,27 @@ func (c *remoteTargetCaller) Receive(msg *types.MessageBody) { c.mutex.Lock() defer c.mutex.Unlock() - messageID := GetMessageID(msg) + requestID := GetMessageID(msg) + sender := ToPeerID(msg.Sender) - req := c.messageIDToExecuteRequest[messageID] + req := c.requestIDToExecuteRequest[requestID] if req == nil { - c.lggr.Warnw("received response for unknown message ID", "messageID", messageID, "sender", msg.Sender) + c.lggr.Warnw("received response for unknown request ID", "requestID", requestID, "sender", sender) return } if msg.Error != types.Error_OK { - c.lggr.Warnw("received error response for pending request", "messageID", messageID, "sender", msg.Sender, "receiver", msg.Receiver, "error", msg.Error) + c.lggr.Warnw("received error response for pending request", "requestID", requestID, "sender", sender, "receiver", msg.Receiver, "error", msg.Error) return } - req.addResponse(msg.Payload) + if err := req.addResponse(sender, msg.Payload); err != nil { + c.lggr.Errorw("failed to add response to request", "requestID", requestID, "sender", sender, "err", err) + } } -// getMessageIDForRequest uses the workflow ID and workflow execution ID from the request metadata to create a -// deterministically unique message ID for the request. -func getMessageIDForRequest(req commoncap.CapabilityRequest) (string, error) { +// Move this into common? +func GetRequestID(req commoncap.CapabilityRequest) (string, error) { if req.Metadata.WorkflowID == "" || req.Metadata.WorkflowExecutionID == "" { return "", errors.New("workflow ID and workflow execution ID must be set in request metadata") } @@ -139,8 +142,9 @@ type callerExecuteRequest struct { transmissionCtx context.Context responseCh chan commoncap.CapabilityResponse transmissionCancelFn context.CancelFunc - creationTime time.Time + createdAt time.Time responseIDCount map[[32]byte]int + responseReceived map[p2ptypes.PeerID]bool requiredIdenticalResponses int @@ -165,14 +169,16 @@ func newCallerExecuteRequest(ctx context.Context, lggr logger.Logger, req common return nil, fmt.Errorf("failed to extract transmission config from request config: %w", err) } - peerIDToDelay, err := transmission.GetPeerIDToTransmissionDelay(remoteCapabilityDonInfo.Members, localDonInfo.Config.SharedSecret, + peerIDToTransmissionDelay, err := transmission.GetPeerIDToTransmissionDelay(remoteCapabilityDonInfo.Members, localDonInfo.Config.SharedSecret, messageID, tc) if err != nil { return nil, fmt.Errorf("failed to get peer ID to transmission delay: %w", err) } transmissionCtx, transmissionCancelFn := context.WithCancel(ctx) - for peerID, delay := range peerIDToDelay { + responseReceived := make(map[p2ptypes.PeerID]bool) + for peerID, delay := range peerIDToTransmissionDelay { + responseReceived[peerID] = false go func(peerID ragep2ptypes.PeerID, delay time.Duration) { message := &types.MessageBody{ CapabilityId: remoteCapabilityInfo.ID, @@ -196,10 +202,11 @@ func newCallerExecuteRequest(ctx context.Context, lggr logger.Logger, req common } return &callerExecuteRequest{ - creationTime: time.Now(), + createdAt: time.Now(), transmissionCancelFn: transmissionCancelFn, requiredIdenticalResponses: int(remoteCapabilityDonInfo.F + 1), responseIDCount: make(map[[32]byte]int), + responseReceived: responseReceived, responseCh: make(chan commoncap.CapabilityResponse, 1), }, nil } @@ -209,7 +216,17 @@ func (c *callerExecuteRequest) responseSent() bool { } // TODO addResponse assumes that only one response is received from each peer, if streaming responses need to be supported this will need to be updated -func (c *callerExecuteRequest) addResponse(response []byte) { +func (c *callerExecuteRequest) addResponse(sender p2ptypes.PeerID, response []byte) error { + if _, ok := c.responseReceived[sender]; !ok { + return fmt.Errorf("response from peer %s not expected", sender) + } + + if c.responseReceived[sender] { + return fmt.Errorf("response from peer %s already received", sender) + } + + c.responseReceived[sender] = true + payloadId := sha256.Sum256(response) c.responseIDCount[payloadId]++ @@ -221,6 +238,8 @@ func (c *callerExecuteRequest) addResponse(response []byte) { c.sendResponse(commoncap.CapabilityResponse{Value: capabilityResponse.Value}) } } + + return nil } func (c *callerExecuteRequest) sendResponse(response commoncap.CapabilityResponse) { diff --git a/core/capabilities/remote/target_receiver.go b/core/capabilities/remote/target_receiver.go index f8e00b89d79..fe08e7eccb6 100644 --- a/core/capabilities/remote/target_receiver.go +++ b/core/capabilities/remote/target_receiver.go @@ -2,12 +2,12 @@ package remote import ( "context" + "crypto/sha256" + "encoding/hex" "fmt" "sync" "time" - "github.com/google/uuid" - commoncap "github.com/smartcontractkit/chainlink-common/pkg/capabilities" "github.com/smartcontractkit/chainlink-common/pkg/capabilities/pb" "github.com/smartcontractkit/chainlink/v2/core/capabilities/remote/types" @@ -26,8 +26,8 @@ type remoteTargetReceiver struct { dispatcher types.Dispatcher lggr logger.Logger - requestMsgIDToRequest map[string]*remoteTargetCapabilityRequest - requestTimeout time.Duration + messageIDToRequest map[string]*remoteTargetCapabilityRequest + requestTimeout time.Duration receiveLock sync.Mutex } @@ -45,8 +45,8 @@ func NewRemoteTargetReceiver(ctx context.Context, lggr logger.Logger, peerID p2p workflowDONs: workflowDONs, dispatcher: dispatcher, - requestMsgIDToRequest: map[string]*remoteTargetCapabilityRequest{}, - requestTimeout: requestTimeout, + messageIDToRequest: map[string]*remoteTargetCapabilityRequest{}, + requestTimeout: requestTimeout, lggr: lggr, } @@ -71,17 +71,17 @@ func (r *remoteTargetReceiver) ExpireRequests(ctx context.Context) { r.receiveLock.Lock() defer r.receiveLock.Unlock() - for messageId, executeReq := range r.requestMsgIDToRequest { + for messageId, executeReq := range r.messageIDToRequest { if time.Since(executeReq.createdTime) > r.requestTimeout { if !executeReq.hasResponse() { executeReq.setError(types.Error_TIMEOUT) - if err := executeReq.sendResponseToAllRequesters(); err != nil { + if err := executeReq.sendResponses(); err != nil { r.lggr.Errorw("failed to send timeout response to all requesters", "capabilityId", r.capInfo.ID, "err", err) } } - delete(r.requestMsgIDToRequest, messageId) + delete(r.messageIDToRequest, messageId) } } @@ -104,69 +104,34 @@ func (r *remoteTargetReceiver) Receive(msg *types.MessageBody) { return } - callerDon, ok := r.workflowDONs[msg.CallerDonId] - if !ok { - r.lggr.Errorw("received a message from unsupported workflow DON", "capabilityId", r.capInfo.ID, "callerDonId", msg.CallerDonId) - return - } - - requester := ToPeerID(msg.Sender) + // A request is uniquely identified by the message id and the hash of the payload messageId := GetMessageID(msg) + hash := sha256.Sum256(msg.Payload) + requestID := messageId + hex.EncodeToString(hash[:]) - if _, ok := r.requestMsgIDToRequest[messageId]; !ok { - r.requestMsgIDToRequest[messageId] = newTargetCapabilityRequest(r.capInfo.ID, r.localDonInfo.ID, r.peerID, - msg.CallerDonId, messageId, r.dispatcher) - } - - request, ok := r.requestMsgIDToRequest[messageId] - - if err := request.addRequester(requester, msg.CallerDonId, messageId); err != nil { - r.lggr.Errorw("failed to add request to response", "capabilityId", r.capInfo.ID, "sender", - requester, "err", err) - return - } - - minRequiredRequests := int(callerDon.F + 1) - if request.getRequestersCount() == minRequiredRequests { - - capabilityRequest, err := pb.UnmarshalCapabilityRequest(msg.Payload) - if err == nil { - ctxWithTimeout, cancel := context.WithTimeout(ctx, r.requestTimeout) - defer cancel() - capResponseCh, err := r.underlying.Execute(ctxWithTimeout, capabilityRequest) - if err == nil { - // TODO working on the assumption that the capability will only ever return one response from its channel (for now at least) - capResponse := <-capResponseCh - responsePayload, err := pb.MarshalCapabilityResponse(capResponse) - if err != nil { - r.lggr.Errorw("failed to marshal capability response", "capabilityId", r.capInfo.ID, "err", err) - request.setError(types.Error_INTERNAL_ERROR) - } else { - request.setResult(responsePayload) - } - } else { - r.lggr.Errorw("failed to execute capability", "capabilityId", r.capInfo.ID, "err", err) - request.setError(types.Error_INTERNAL_ERROR) - } + if _, ok := r.messageIDToRequest[requestID]; !ok { + if callingDon, ok := r.workflowDONs[msg.CallerDonId]; ok { + r.messageIDToRequest[requestID] = newTargetCapabilityRequest(r.lggr, r.underlying, r.capInfo.ID, r.localDonInfo.ID, r.peerID, + callingDon, messageId, r.dispatcher, r.requestTimeout) } else { - r.lggr.Errorw("failed to unmarshal capability request", "capabilityId", r.capInfo.ID, "err", err) - request.setError(types.Error_INVALID_REQUEST) + r.lggr.Errorw("received request from unregistered workflow don", "donId", msg.CallerDonId) + return } + } - if err := request.sendResponseToAllRequesters(); err != nil { - r.lggr.Errorw("failed to send response to all requesters", "capabilityId", r.capInfo.ID, "err", err) - } + request := r.messageIDToRequest[requestID] - } else if request.getRequestersCount() > minRequiredRequests { - if err := request.sendResponse(requester); err != nil { - r.lggr.Errorw("failed to send response to requester", "capabilityId", r.capInfo.ID, "err", err) - } + err := request.receive(ctx, msg) + if err != nil { + r.lggr.Errorw("request failed to receive new message", "request", request, "err", err) } } type remoteTargetCapabilityRequest struct { - id string + lggr logger.Logger + + capability capabilities.TargetCapability capabilityPeerId p2ptypes.PeerID capabilityID string @@ -174,46 +139,100 @@ type remoteTargetCapabilityRequest struct { dispatcher types.Dispatcher - requesters map[p2ptypes.PeerID]bool - responseReceivers map[p2ptypes.PeerID]bool + requesters map[p2ptypes.PeerID]bool + responseSentToRequester map[p2ptypes.PeerID]bool createdTime time.Time response []byte responseError types.Error - initialRequestingDon string - requestMessageID string + callingDon commoncap.DON + requestMessageID string + + requestTimeout time.Duration } -func newTargetCapabilityRequest(capabilityID string, capabilityDonID string, capabilityPeerId p2ptypes.PeerID, - callingDonID string, requestMessageID string, - dispatcher types.Dispatcher) *remoteTargetCapabilityRequest { +func newTargetCapabilityRequest(lggr logger.Logger, capability capabilities.TargetCapability, capabilityID string, capabilityDonID string, capabilityPeerId p2ptypes.PeerID, + callingDon commoncap.DON, requestMessageID string, + dispatcher types.Dispatcher, requestTimeout time.Duration) *remoteTargetCapabilityRequest { return &remoteTargetCapabilityRequest{ - id: uuid.New().String(), - capabilityID: capabilityID, - capabilityDonID: capabilityDonID, - capabilityPeerId: capabilityPeerId, - dispatcher: dispatcher, - requesters: map[p2ptypes.PeerID]bool{}, - responseReceivers: map[p2ptypes.PeerID]bool{}, - createdTime: time.Now(), - initialRequestingDon: callingDonID, - requestMessageID: requestMessageID, + lggr: lggr, + capability: capability, + createdTime: time.Now(), + capabilityID: capabilityID, + capabilityDonID: capabilityDonID, + capabilityPeerId: capabilityPeerId, + dispatcher: dispatcher, + requesters: map[p2ptypes.PeerID]bool{}, + responseSentToRequester: map[p2ptypes.PeerID]bool{}, + callingDon: callingDon, + requestMessageID: requestMessageID, + requestTimeout: requestTimeout, } } -func (e *remoteTargetCapabilityRequest) addRequester(from p2ptypes.PeerID, fromDonID string, requestMessageID string) error { - if e.requesters[from] { - return fmt.Errorf("request already received from peer %s", from) +func (e *remoteTargetCapabilityRequest) receive(ctx context.Context, msg *types.MessageBody) error { + requester := ToPeerID(msg.Sender) + if err := e.addRequester(requester); err != nil { + return fmt.Errorf("failed to add requester to request: %w", err) } - if e.initialRequestingDon != fromDonID { - return fmt.Errorf("received request from different initial requesting don %s, expected %s", fromDonID, e.initialRequestingDon) + if e.minimumRequiredRequestsReceived() && !e.hasResponse() { + e.executeRequest(ctx, msg.Payload) } - if e.requestMessageID != requestMessageID { - return fmt.Errorf("received request with different message id %s, expected %s", requestMessageID, e.requestMessageID) + if err := e.sendResponses(); err != nil { + return fmt.Errorf("failed to send response to requesters: %w", err) + } + + return nil +} + +func (e *remoteTargetCapabilityRequest) executeRequest(ctx context.Context, payload []byte) { + ctxWithTimeout, cancel := context.WithTimeout(ctx, e.requestTimeout) + defer cancel() + + capabilityRequest, err := pb.UnmarshalCapabilityRequest(payload) + if err != nil { + e.setError(types.Error_INVALID_REQUEST) + e.lggr.Errorw("failed to unmarshal capability request", "err", err) + } + + capResponseCh, err := e.capability.Execute(ctxWithTimeout, capabilityRequest) + + if err != nil { + e.setError(types.Error_INTERNAL_ERROR) + e.lggr.Errorw("failed to execute capability", "err", err) + } + + // TODO working on the assumption that the capability will only ever return one response from its channel (for now at least) + capResponse := <-capResponseCh + responsePayload, err := pb.MarshalCapabilityResponse(capResponse) + if err != nil { + e.setError(types.Error_INTERNAL_ERROR) + e.lggr.Errorw("failed to marshal capability response", "err", err) + } + + e.setResult(responsePayload) +} + +func (e *remoteTargetCapabilityRequest) addRequester(from p2ptypes.PeerID) error { + + fromPeerInDon := false + for _, member := range e.callingDon.Members { + if member == from { + fromPeerInDon = true + break + } + } + + if !fromPeerInDon { + return fmt.Errorf("request received from peer %s not in calling don", from) + } + + if e.requesters[from] { + return fmt.Errorf("request already received from peer %s", from) } e.requesters[from] = true @@ -221,8 +240,8 @@ func (e *remoteTargetCapabilityRequest) addRequester(from p2ptypes.PeerID, fromD return nil } -func (e *remoteTargetCapabilityRequest) getRequestersCount() int { - return len(e.requesters) +func (e *remoteTargetCapabilityRequest) minimumRequiredRequestsReceived() bool { + return len(e.requesters) >= int(e.callingDon.F+1) } func (e *remoteTargetCapabilityRequest) setResult(result []byte) { @@ -237,10 +256,17 @@ func (e *remoteTargetCapabilityRequest) hasResponse() bool { return e.response != nil || e.responseError != types.Error_OK } -func (e *remoteTargetCapabilityRequest) sendResponseToAllRequesters() error { +func (e *remoteTargetCapabilityRequest) sendResponses() error { + if !e.minimumRequiredRequestsReceived() { + return nil + } + for requester := range e.requesters { - if err := e.sendResponse(requester); err != nil { - return fmt.Errorf("failed to send response to requester %s: %w", requester, err) + if !e.responseSentToRequester[requester] { + e.responseSentToRequester[requester] = true + if err := e.sendResponse(requester); err != nil { + return fmt.Errorf("failed to send response to requester %s: %w", requester, err) + } } } @@ -248,14 +274,11 @@ func (e *remoteTargetCapabilityRequest) sendResponseToAllRequesters() error { } func (e *remoteTargetCapabilityRequest) sendResponse(peer p2ptypes.PeerID) error { - if err := e.validateResponseSendRequest(peer); err != nil { - return fmt.Errorf("failed to validate response send request: %w", err) - } responseMsg := types.MessageBody{ CapabilityId: e.capabilityID, CapabilityDonId: e.capabilityDonID, - CallerDonId: e.initialRequestingDon, + CallerDonId: e.callingDon.ID, Method: types.MethodExecute, MessageId: []byte(e.requestMessageID), Sender: e.capabilityPeerId[:], @@ -272,19 +295,7 @@ func (e *remoteTargetCapabilityRequest) sendResponse(peer p2ptypes.PeerID) error return fmt.Errorf("failed to send response: %w", err) } - e.responseReceivers[peer] = true - - return nil -} - -func (e *remoteTargetCapabilityRequest) validateResponseSendRequest(peer p2ptypes.PeerID) error { - if !e.hasResponse() { - return fmt.Errorf("no response to send") - } - - if e.responseReceivers[peer] { - return fmt.Errorf("response already sent to peer") - } + e.responseSentToRequester[peer] = true return nil } diff --git a/core/capabilities/remote/target_receiver_test.go b/core/capabilities/remote/target_receiver_test.go index 6050406533d..d16ca873ed6 100644 --- a/core/capabilities/remote/target_receiver_test.go +++ b/core/capabilities/remote/target_receiver_test.go @@ -2,98 +2,79 @@ package remote_test import ( "context" - "crypto/rand" - "sync" "testing" "time" - "github.com/mr-tron/base58" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" commoncap "github.com/smartcontractkit/chainlink-common/pkg/capabilities" - "github.com/smartcontractkit/chainlink-common/pkg/values" + "github.com/smartcontractkit/chainlink-common/pkg/capabilities/pb" "github.com/smartcontractkit/chainlink/v2/core/capabilities/remote" remotetypes "github.com/smartcontractkit/chainlink/v2/core/capabilities/remote/types" - "github.com/smartcontractkit/chainlink/v2/core/capabilities/transmission" "github.com/smartcontractkit/chainlink/v2/core/internal/testutils" "github.com/smartcontractkit/chainlink/v2/core/logger" p2ptypes "github.com/smartcontractkit/chainlink/v2/core/services/p2p/types" ) -func Test_TargetRemoteTarget(t *testing.T) { - - responseTest := func(t *testing.T, responseCh <-chan commoncap.CapabilityResponse, responseError error) { - require.NoError(t, responseError) - response := <-responseCh - responseValue, err := response.Value.Unwrap() - require.NoError(t, err) - assert.Equal(t, "aValue1", responseValue.(string)) - } - - transmissionSchedule, err := values.NewMap(map[string]any{ - "schedule": transmission.Schedule_AllAtOnce, - "deltaStage": "100ms", - }) - require.NoError(t, err) - - // Test scenarios where the number of submissions is greater than or equal to F + 1 - testRemoteTarget(t, 1, 0, 10*time.Minute, 1, 0, 10*time.Minute, transmissionSchedule, responseTest) - testRemoteTarget(t, 4, 3, 10*time.Minute, 4, 3, 10*time.Minute, transmissionSchedule, responseTest) - testRemoteTarget(t, 10, 3, 10*time.Minute, 10, 3, 10*time.Minute, transmissionSchedule, responseTest) - - transmissionSchedule, err = values.NewMap(map[string]any{ - "schedule": transmission.Schedule_OneAtATime, - "deltaStage": "10ms", - }) - require.NoError(t, err) - - testRemoteTarget(t, 1, 0, 10*time.Minute, 1, 0, 10*time.Minute, transmissionSchedule, responseTest) - testRemoteTarget(t, 10, 3, 10*time.Minute, 10, 3, 10*time.Minute, transmissionSchedule, responseTest) +func Test_RemoteTargetCapability_InsufficientWorkflowCallers(t *testing.T) { + ctx, cancel := context.WithCancel(testutils.Context(t)) + defer cancel() - //here - below tests plus additional tests for the remoteTargetCapability test + numCapabilityPeers := 4 - // test capability don F handling + callers := testRemoteTargetReceiver(t, ctx, 10, 10, numCapabilityPeers, 3, 100*time.Millisecond) - /* - here - these errors tests failing still? why? + for _, caller := range callers { + caller.Execute(context.Background(), + commoncap.CapabilityRequest{ + Metadata: commoncap.RequestMetadata{ + WorkflowID: "workflowID", + WorkflowExecutionID: "workflowExecutionID", + }, + }) + } - errResponseTest := func(t *testing.T, responseCh <-chan commoncap.CapabilityResponse, responseError error) { - require.NoError(t, responseError) - response := <-responseCh - assert.NotNil(t, response.Err) + for _, caller := range callers { + for i := 0; i < numCapabilityPeers; i++ { + msg := <-caller.receivedMessages + assert.Equal(t, remotetypes.Error_TIMEOUT, msg.Error) } + } +} - // Test scenario where number of submissions is less than F + 1 - - // How to make these tests less time dependent? risk of being flaky - testRemoteTargetConsensus(t, 4, 6, 5*time.Second, 1, 0, 1*time.Second, errResponseTest) - testRemoteTargetConsensus(t, 10, 10, 5*time.Second, 1, 0, 1*time.Second, errResponseTest) - */ - //tyring to modify tests to test the caller F number handling? - - //also having issues with error test cases - since the client F handling? - - //then got threading to do - - // Context cancellation test - use an underlying capability that blocks until the context is cancelled +// Test stuck capability times out - // Check request errors as expected and all error responses are received +func Test_RemoteTargetCapability_IgnoresRequestFromIncorrectPeer(t *testing.T) { + ctx, cancel := context.WithCancel(testutils.Context(t)) + defer cancel() - // Check that requests from an incorrect don are ignored? + numCapabilityPeers := 4 - // Check that multiple requests from the same sender are ignored + callers := testRemoteTargetReceiver(t, ctx, 10, 9, numCapabilityPeers, 3, 100*time.Millisecond) - // Test with different transmission schedules ? + for _, caller := range callers { + caller.Execute(context.Background(), + commoncap.CapabilityRequest{ + Metadata: commoncap.RequestMetadata{ + WorkflowID: "workflowID", + WorkflowExecutionID: "workflowExecutionID", + }, + }) + } + for _, caller := range callers { + for i := 0; i < numCapabilityPeers; i++ { + msg := <-caller.receivedMessages + assert.Equal(t, remotetypes.Error_TIMEOUT, msg.Error) + } + } } -func testRemoteTarget(t *testing.T, numWorkflowPeers int, workflowDonF uint8, workflowNodeTimeout time.Duration, - numCapabilityPeers int, capabilityDonF uint8, capabilityNodeResponseTimeout time.Duration, transmissionSchedule *values.Map, - responseTest func(t *testing.T, responseCh <-chan commoncap.CapabilityResponse, responseError error)) { +func testRemoteTargetReceiver(t *testing.T, ctx context.Context, numWorkflowPeers int, workflowDonF uint8, + numCapabilityPeers int, capabilityDonF uint8, capabilityNodeResponseTimeout time.Duration) []*receiverTestCaller { + lggr := logger.TestLogger(t) - ctx, cancel := context.WithCancel(testutils.Context(t)) - defer cancel() capabilityPeers := make([]p2ptypes.PeerID, numCapabilityPeers) for i := 0; i < numCapabilityPeers; i++ { @@ -139,154 +120,85 @@ func testRemoteTarget(t *testing.T, numWorkflowPeers int, workflowDonF uint8, wo } underlying := &testCapability{} - receivers := make([]remotetypes.Receiver, numCapabilityPeers) + capabilityNodes := make([]remotetypes.Receiver, numCapabilityPeers) for i := 0; i < numCapabilityPeers; i++ { capabilityPeer := capabilityPeers[i] capabilityDispatcher := broker.NewDispatcherForNode(capabilityPeer) - receiver := remote.NewRemoteTargetReceiver(ctx, lggr, capabilityPeer, underlying, capInfo, capDonInfo, workflowDONs, capabilityDispatcher, + capabilityNode := remote.NewRemoteTargetReceiver(ctx, lggr, capabilityPeer, underlying, capInfo, capDonInfo, workflowDONs, capabilityDispatcher, capabilityNodeResponseTimeout) - broker.RegisterReceiverNode(capabilityPeer, receiver) - receivers[i] = receiver + broker.RegisterReceiverNode(capabilityPeer, capabilityNode) + capabilityNodes[i] = capabilityNode } - callers := make([]commoncap.TargetCapability, numWorkflowPeers) + workflowNodes := make([]*receiverTestCaller, numWorkflowPeers) for i := 0; i < numWorkflowPeers; i++ { workflowPeerDispatcher := broker.NewDispatcherForNode(workflowPeers[i]) - caller := remote.NewRemoteTargetCaller(ctx, lggr, capInfo, workflowDonInfo, workflowPeerDispatcher, workflowNodeTimeout) - broker.RegisterReceiverNode(workflowPeers[i], caller) - callers[i] = caller - } - - executeInputs, err := values.NewMap( - map[string]any{ - "executeValue1": "aValue1", - }, - ) - - require.NoError(t, err) - - wg := &sync.WaitGroup{} - wg.Add(len(callers)) - - // Fire off all the requests - for _, caller := range callers { - go func(caller commoncap.TargetCapability) { - responseCh, err := caller.Execute(ctx, - commoncap.CapabilityRequest{ - Metadata: commoncap.RequestMetadata{ - WorkflowID: "workflowID", - WorkflowExecutionID: "workflowExecutionID", - }, - Config: transmissionSchedule, - Inputs: executeInputs, - }) - - responseTest(t, responseCh, err) - wg.Done() - }(caller) - } - - wg.Wait() -} - -type testMessageBroker struct { - receivers map[p2ptypes.PeerID]remotetypes.Receiver -} - -func newTestMessageBroker() *testMessageBroker { - return &testMessageBroker{ - receivers: make(map[p2ptypes.PeerID]remotetypes.Receiver), - } -} - -func (r *testMessageBroker) NewDispatcherForNode(nodePeerID p2ptypes.PeerID) remotetypes.Dispatcher { - return &nodeDispatcher{ - callerPeerID: nodePeerID, - broker: r, - } -} - -func (r *testMessageBroker) RegisterReceiverNode(nodePeerID p2ptypes.PeerID, node remotetypes.Receiver) { - if _, ok := r.receivers[nodePeerID]; ok { - panic("node already registered") - } - - r.receivers[nodePeerID] = node -} - -func (r *testMessageBroker) Send(msg *remotetypes.MessageBody) { - receiverId := toPeerID(msg.Receiver) - - if receiver, ok := r.receivers[receiverId]; ok { - receiver.Receive(msg) - } else { - panic("receiver not found for peer id") + workflowNode := newReceiverTestCaller(workflowPeers[i], capDonInfo, workflowPeerDispatcher) + broker.RegisterReceiverNode(workflowPeers[i], workflowNode) + workflowNodes[i] = workflowNode } + return workflowNodes } -func toPeerID(id []byte) p2ptypes.PeerID { - return [32]byte(id) -} - -type nodeDispatcher struct { - callerPeerID p2ptypes.PeerID - broker *testMessageBroker +type receiverTestCaller struct { + peerID p2ptypes.PeerID + dispatcher remotetypes.Dispatcher + capabilityDonInfo commoncap.DON + receivedMessages chan *remotetypes.MessageBody + callerDonID string } -func (t *nodeDispatcher) Send(peerID p2ptypes.PeerID, msgBody *remotetypes.MessageBody) error { - msgBody.Version = 1 - msgBody.Sender = t.callerPeerID[:] - msgBody.Receiver = peerID[:] - msgBody.Timestamp = time.Now().UnixMilli() - t.broker.Send(msgBody) - return nil +func (r *receiverTestCaller) Receive(msg *remotetypes.MessageBody) { + r.receivedMessages <- msg } -func (t *nodeDispatcher) SetReceiver(capabilityId string, donId string, receiver remotetypes.Receiver) error { - return nil +func newReceiverTestCaller(peerID p2ptypes.PeerID, capabilityDonInfo commoncap.DON, + dispatcher remotetypes.Dispatcher) *receiverTestCaller { + return &receiverTestCaller{peerID: peerID, dispatcher: dispatcher, capabilityDonInfo: capabilityDonInfo, + receivedMessages: make(chan *remotetypes.MessageBody, 100), callerDonID: "workflow-don"} } -func (t *nodeDispatcher) RemoveReceiver(capabilityId string, donId string) {} -type testCapability struct { +func (r *receiverTestCaller) Info(ctx context.Context) (commoncap.CapabilityInfo, error) { + panic("not implemented") } -func (t testCapability) Info(ctx context.Context) (commoncap.CapabilityInfo, error) { - return commoncap.CapabilityInfo{}, nil +func (r *receiverTestCaller) RegisterToWorkflow(ctx context.Context, request commoncap.RegisterToWorkflowRequest) error { + panic("not implemented") } -func (t testCapability) RegisterToWorkflow(ctx context.Context, request commoncap.RegisterToWorkflowRequest) error { - return nil +func (r *receiverTestCaller) UnregisterFromWorkflow(ctx context.Context, request commoncap.UnregisterFromWorkflowRequest) error { + panic("not implemented") } -func (t testCapability) UnregisterFromWorkflow(ctx context.Context, request commoncap.UnregisterFromWorkflowRequest) error { - return nil -} - -func (t testCapability) Execute(ctx context.Context, request commoncap.CapabilityRequest) (<-chan commoncap.CapabilityResponse, error) { - ch := make(chan commoncap.CapabilityResponse, 1) - - value := request.Inputs.Underlying["executeValue1"] - - ch <- commoncap.CapabilityResponse{ - Value: value, - } - - return ch, nil -} +func (r *receiverTestCaller) Execute(ctx context.Context, req commoncap.CapabilityRequest) (<-chan commoncap.CapabilityResponse, error) { -func newPeerID() string { - var privKey [32]byte - _, err := rand.Read(privKey[:]) + rawRequest, err := pb.MarshalCapabilityRequest(req) if err != nil { - panic(err) + return nil, err } - peerID := append(libp2pMagic(), privKey[:]...) + messageID, err := remote.GetRequestID(req) + if err != nil { + return nil, err + } + + for _, node := range r.capabilityDonInfo.Members { + message := &remotetypes.MessageBody{ + CapabilityId: "capability-id", + CapabilityDonId: "capability-don", + CallerDonId: "workflow-don", + Method: remotetypes.MethodExecute, + Payload: rawRequest, + MessageId: []byte(messageID), + Sender: r.peerID[:], + Receiver: node[:], + } - return base58.Encode(peerID[:]) -} + if err = r.dispatcher.Send(node, message); err != nil { + return nil, err + } + } -func libp2pMagic() []byte { - return []byte{0x00, 0x24, 0x08, 0x01, 0x12, 0x20} + return nil, nil } diff --git a/core/capabilities/remote/target_test.go b/core/capabilities/remote/target_test.go new file mode 100644 index 00000000000..cbce2d17a63 --- /dev/null +++ b/core/capabilities/remote/target_test.go @@ -0,0 +1,331 @@ +package remote_test + +import ( + "context" + "crypto/rand" + "sync" + "testing" + "time" + + "github.com/mr-tron/base58" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + commoncap "github.com/smartcontractkit/chainlink-common/pkg/capabilities" + "github.com/smartcontractkit/chainlink-common/pkg/values" + "github.com/smartcontractkit/chainlink/v2/core/capabilities/remote" + remotetypes "github.com/smartcontractkit/chainlink/v2/core/capabilities/remote/types" + "github.com/smartcontractkit/chainlink/v2/core/capabilities/transmission" + "github.com/smartcontractkit/chainlink/v2/core/internal/testutils" + "github.com/smartcontractkit/chainlink/v2/core/logger" + p2ptypes "github.com/smartcontractkit/chainlink/v2/core/services/p2p/types" +) + +func Test_RemoteTargetCapability_TransmissionSchedules(t *testing.T) { + + responseTest := func(t *testing.T, responseCh <-chan commoncap.CapabilityResponse, responseError error) { + require.NoError(t, responseError) + response := <-responseCh + responseValue, err := response.Value.Unwrap() + require.NoError(t, err) + assert.Equal(t, "aValue1", responseValue.(string)) + } + + transmissionSchedule, err := values.NewMap(map[string]any{ + "schedule": transmission.Schedule_OneAtATime, + "deltaStage": "10ms", + }) + require.NoError(t, err) + + timeOut := 10 * time.Minute + + testRemoteTarget(t, 10, 9, timeOut, 10, 9, timeOut, transmissionSchedule, responseTest) + + transmissionSchedule, err = values.NewMap(map[string]any{ + "schedule": transmission.Schedule_AllAtOnce, + "deltaStage": "10ms", + }) + require.NoError(t, err) + + testRemoteTarget(t, 10, 9, timeOut, 10, 9, timeOut, transmissionSchedule, responseTest) + +} + +func Test_RemoteTargetCapability_DonTopologies(t *testing.T) { + + responseTest := func(t *testing.T, responseCh <-chan commoncap.CapabilityResponse, responseError error) { + require.NoError(t, responseError) + response := <-responseCh + responseValue, err := response.Value.Unwrap() + require.NoError(t, err) + assert.Equal(t, "aValue1", responseValue.(string)) + } + + transmissionSchedule, err := values.NewMap(map[string]any{ + "schedule": transmission.Schedule_OneAtATime, + "deltaStage": "10ms", + }) + require.NoError(t, err) + + timeOut := 10 * time.Minute + + // Test scenarios where the number of submissions is greater than or equal to F + 1 + testRemoteTarget(t, 1, 0, timeOut, 1, 0, timeOut, transmissionSchedule, responseTest) + testRemoteTarget(t, 4, 3, timeOut, 1, 0, timeOut, transmissionSchedule, responseTest) + testRemoteTarget(t, 10, 3, timeOut, 1, 0, timeOut, transmissionSchedule, responseTest) + + testRemoteTarget(t, 1, 0, timeOut, 1, 0, timeOut, transmissionSchedule, responseTest) + testRemoteTarget(t, 1, 0, timeOut, 4, 3, timeOut, transmissionSchedule, responseTest) + testRemoteTarget(t, 1, 0, timeOut, 10, 3, timeOut, transmissionSchedule, responseTest) + + testRemoteTarget(t, 4, 3, timeOut, 4, 3, timeOut, transmissionSchedule, responseTest) + testRemoteTarget(t, 10, 3, timeOut, 10, 3, timeOut, transmissionSchedule, responseTest) + testRemoteTarget(t, 10, 9, timeOut, 10, 9, timeOut, transmissionSchedule, responseTest) + + /* + transmissionSchedule, err = values.NewMap(map[string]any{ + "schedule": transmission.Schedule_OneAtATime, + "deltaStage": "10ms", + }) + require.NoError(t, err) + + testRemoteTarget(t, 1, 0, 10*time.Minute, 1, 0, 10*time.Minute, transmissionSchedule, responseTest) + testRemoteTarget(t, 10, 3, 10*time.Minute, 10, 3, 10*time.Minute, transmissionSchedule, responseTest) + */ + //here - below tests plus additional tests for the remoteTargetCapability test + + // test capability don F handling + + /* + here - these errors tests failing still? why? + + errResponseTest := func(t *testing.T, responseCh <-chan commoncap.CapabilityResponse, responseError error) { + require.NoError(t, responseError) + response := <-responseCh + assert.NotNil(t, response.Err) + } + + // Test scenario where number of submissions is less than F + 1 + + // How to make these tests less time dependent? risk of being flaky + testRemoteTargetConsensus(t, 4, 6, 5*time.Second, 1, 0, 1*time.Second, errResponseTest) + testRemoteTargetConsensus(t, 10, 10, 5*time.Second, 1, 0, 1*time.Second, errResponseTest) + */ + //tyring to modify tests to test the caller F number handling? + + //also having issues with error test cases - since the client F handling? + + //then got threading to do + + // Context cancellation test - use an underlying capability that blocks until the context is cancelled + + // Check request errors as expected and all error responses are received + + // Check that requests from an incorrect don are ignored? + + // Check that multiple requests from the same sender are ignored + +} + +func testRemoteTarget(t *testing.T, numWorkflowPeers int, workflowDonF uint8, workflowNodeTimeout time.Duration, + numCapabilityPeers int, capabilityDonF uint8, capabilityNodeResponseTimeout time.Duration, transmissionSchedule *values.Map, + responseTest func(t *testing.T, responseCh <-chan commoncap.CapabilityResponse, responseError error)) { + lggr := logger.TestLogger(t) + ctx, cancel := context.WithCancel(testutils.Context(t)) + defer cancel() + + capabilityPeers := make([]p2ptypes.PeerID, numCapabilityPeers) + for i := 0; i < numCapabilityPeers; i++ { + capabilityPeerID := p2ptypes.PeerID{} + require.NoError(t, capabilityPeerID.UnmarshalText([]byte(newPeerID()))) + capabilityPeers[i] = capabilityPeerID + } + + capabilityPeerID := p2ptypes.PeerID{} + require.NoError(t, capabilityPeerID.UnmarshalText([]byte(newPeerID()))) + + capDonInfo := commoncap.DON{ + ID: "capability-don", + Members: capabilityPeers, + F: capabilityDonF, + } + + capInfo := commoncap.CapabilityInfo{ + ID: "cap_id", + CapabilityType: commoncap.CapabilityTypeTarget, + Description: "Remote Target", + Version: "0.0.1", + DON: &capDonInfo, + } + + workflowPeers := make([]p2ptypes.PeerID, numWorkflowPeers) + for i := 0; i < numWorkflowPeers; i++ { + workflowPeerID := p2ptypes.PeerID{} + require.NoError(t, workflowPeerID.UnmarshalText([]byte(newPeerID()))) + workflowPeers[i] = workflowPeerID + } + + workflowDonInfo := commoncap.DON{ + Members: workflowPeers, + ID: "workflow-don", + F: workflowDonF, + } + + broker := newTestMessageBroker() + + workflowDONs := map[string]commoncap.DON{ + workflowDonInfo.ID: workflowDonInfo, + } + underlying := &testCapability{} + + capabilityNodes := make([]remotetypes.Receiver, numCapabilityPeers) + for i := 0; i < numCapabilityPeers; i++ { + capabilityPeer := capabilityPeers[i] + capabilityDispatcher := broker.NewDispatcherForNode(capabilityPeer) + capabilityNode := remote.NewRemoteTargetReceiver(ctx, lggr, capabilityPeer, underlying, capInfo, capDonInfo, workflowDONs, capabilityDispatcher, + capabilityNodeResponseTimeout) + broker.RegisterReceiverNode(capabilityPeer, capabilityNode) + capabilityNodes[i] = capabilityNode + } + + workflowNodes := make([]commoncap.TargetCapability, numWorkflowPeers) + for i := 0; i < numWorkflowPeers; i++ { + workflowPeerDispatcher := broker.NewDispatcherForNode(workflowPeers[i]) + workflowNode := remote.NewRemoteTargetCaller(ctx, lggr, capInfo, workflowDonInfo, workflowPeerDispatcher, workflowNodeTimeout) + broker.RegisterReceiverNode(workflowPeers[i], workflowNode) + workflowNodes[i] = workflowNode + } + + executeInputs, err := values.NewMap( + map[string]any{ + "executeValue1": "aValue1", + }, + ) + + require.NoError(t, err) + + wg := &sync.WaitGroup{} + wg.Add(len(workflowNodes)) + + // Fire off all the requests + for _, caller := range workflowNodes { + go func(caller commoncap.TargetCapability) { + responseCh, err := caller.Execute(ctx, + commoncap.CapabilityRequest{ + Metadata: commoncap.RequestMetadata{ + WorkflowID: "workflowID", + WorkflowExecutionID: "workflowExecutionID", + }, + Config: transmissionSchedule, + Inputs: executeInputs, + }) + + responseTest(t, responseCh, err) + wg.Done() + }(caller) + } + + wg.Wait() +} + +type testMessageBroker struct { + nodes map[p2ptypes.PeerID]remotetypes.Receiver +} + +func newTestMessageBroker() *testMessageBroker { + return &testMessageBroker{ + nodes: make(map[p2ptypes.PeerID]remotetypes.Receiver), + } +} + +func (r *testMessageBroker) NewDispatcherForNode(nodePeerID p2ptypes.PeerID) remotetypes.Dispatcher { + return &nodeDispatcher{ + callerPeerID: nodePeerID, + broker: r, + } +} + +func (r *testMessageBroker) RegisterReceiverNode(nodePeerID p2ptypes.PeerID, node remotetypes.Receiver) { + if _, ok := r.nodes[nodePeerID]; ok { + panic("node already registered") + } + + r.nodes[nodePeerID] = node +} + +func (r *testMessageBroker) Send(msg *remotetypes.MessageBody) { + receiverId := toPeerID(msg.Receiver) + + if receiver, ok := r.nodes[receiverId]; ok { + receiver.Receive(msg) + } else { + panic("receiver not found for peer id") + } + +} + +func toPeerID(id []byte) p2ptypes.PeerID { + return [32]byte(id) +} + +type nodeDispatcher struct { + callerPeerID p2ptypes.PeerID + broker *testMessageBroker +} + +func (t *nodeDispatcher) Send(peerID p2ptypes.PeerID, msgBody *remotetypes.MessageBody) error { + msgBody.Version = 1 + msgBody.Sender = t.callerPeerID[:] + msgBody.Receiver = peerID[:] + msgBody.Timestamp = time.Now().UnixMilli() + t.broker.Send(msgBody) + return nil +} + +func (t *nodeDispatcher) SetReceiver(capabilityId string, donId string, receiver remotetypes.Receiver) error { + return nil +} +func (t *nodeDispatcher) RemoveReceiver(capabilityId string, donId string) {} + +type testCapability struct { +} + +func (t testCapability) Info(ctx context.Context) (commoncap.CapabilityInfo, error) { + return commoncap.CapabilityInfo{}, nil +} + +func (t testCapability) RegisterToWorkflow(ctx context.Context, request commoncap.RegisterToWorkflowRequest) error { + return nil +} + +func (t testCapability) UnregisterFromWorkflow(ctx context.Context, request commoncap.UnregisterFromWorkflowRequest) error { + return nil +} + +func (t testCapability) Execute(ctx context.Context, request commoncap.CapabilityRequest) (<-chan commoncap.CapabilityResponse, error) { + ch := make(chan commoncap.CapabilityResponse, 1) + + value := request.Inputs.Underlying["executeValue1"] + + ch <- commoncap.CapabilityResponse{ + Value: value, + } + + return ch, nil +} + +func newPeerID() string { + var privKey [32]byte + _, err := rand.Read(privKey[:]) + if err != nil { + panic(err) + } + + peerID := append(libp2pMagic(), privKey[:]...) + + return base58.Encode(peerID[:]) +} + +func libp2pMagic() []byte { + return []byte{0x00, 0x24, 0x08, 0x01, 0x12, 0x20} +} From e03d7a78233e600387f2e441cebd954caca9218c Mon Sep 17 00:00:00 2001 From: Matthew Pendrey Date: Mon, 27 May 2024 18:33:33 +0100 Subject: [PATCH 30/43] wip --- core/capabilities/remote/target_receiver.go | 48 ++++++++++----------- 1 file changed, 23 insertions(+), 25 deletions(-) diff --git a/core/capabilities/remote/target_receiver.go b/core/capabilities/remote/target_receiver.go index fe08e7eccb6..c8fa08376ae 100644 --- a/core/capabilities/remote/target_receiver.go +++ b/core/capabilities/remote/target_receiver.go @@ -18,15 +18,15 @@ import ( ) type remoteTargetReceiver struct { + lggr logger.Logger peerID p2ptypes.PeerID underlying commoncap.TargetCapability capInfo commoncap.CapabilityInfo localDonInfo capabilities.DON workflowDONs map[string]commoncap.DON dispatcher types.Dispatcher - lggr logger.Logger - messageIDToRequest map[string]*remoteTargetCapabilityRequest + requestIDToRequest map[string]*remoteTargetCapabilityRequest requestTimeout time.Duration receiveLock sync.Mutex @@ -45,7 +45,7 @@ func NewRemoteTargetReceiver(ctx context.Context, lggr logger.Logger, peerID p2p workflowDONs: workflowDONs, dispatcher: dispatcher, - messageIDToRequest: map[string]*remoteTargetCapabilityRequest{}, + requestIDToRequest: map[string]*remoteTargetCapabilityRequest{}, requestTimeout: requestTimeout, lggr: lggr, @@ -71,7 +71,7 @@ func (r *remoteTargetReceiver) ExpireRequests(ctx context.Context) { r.receiveLock.Lock() defer r.receiveLock.Unlock() - for messageId, executeReq := range r.messageIDToRequest { + for messageId, executeReq := range r.requestIDToRequest { if time.Since(executeReq.createdTime) > r.requestTimeout { if !executeReq.hasResponse() { @@ -81,7 +81,7 @@ func (r *remoteTargetReceiver) ExpireRequests(ctx context.Context) { } } - delete(r.messageIDToRequest, messageId) + delete(r.requestIDToRequest, messageId) } } @@ -109,9 +109,9 @@ func (r *remoteTargetReceiver) Receive(msg *types.MessageBody) { hash := sha256.Sum256(msg.Payload) requestID := messageId + hex.EncodeToString(hash[:]) - if _, ok := r.messageIDToRequest[requestID]; !ok { + if _, ok := r.requestIDToRequest[requestID]; !ok { if callingDon, ok := r.workflowDONs[msg.CallerDonId]; ok { - r.messageIDToRequest[requestID] = newTargetCapabilityRequest(r.lggr, r.underlying, r.capInfo.ID, r.localDonInfo.ID, r.peerID, + r.requestIDToRequest[requestID] = newTargetCapabilityRequest(r.lggr, r.underlying, r.capInfo.ID, r.localDonInfo.ID, r.peerID, callingDon, messageId, r.dispatcher, r.requestTimeout) } else { r.lggr.Errorw("received request from unregistered workflow don", "donId", msg.CallerDonId) @@ -119,7 +119,7 @@ func (r *remoteTargetReceiver) Receive(msg *types.MessageBody) { } } - request := r.messageIDToRequest[requestID] + request := r.requestIDToRequest[requestID] err := request.receive(ctx, msg) if err != nil { @@ -219,15 +219,15 @@ func (e *remoteTargetCapabilityRequest) executeRequest(ctx context.Context, payl func (e *remoteTargetCapabilityRequest) addRequester(from p2ptypes.PeerID) error { - fromPeerInDon := false + fromPeerInCallingDon := false for _, member := range e.callingDon.Members { if member == from { - fromPeerInDon = true + fromPeerInCallingDon = true break } } - if !fromPeerInDon { + if !fromPeerInCallingDon { return fmt.Errorf("request received from peer %s not in calling don", from) } @@ -257,15 +257,13 @@ func (e *remoteTargetCapabilityRequest) hasResponse() bool { } func (e *remoteTargetCapabilityRequest) sendResponses() error { - if !e.minimumRequiredRequestsReceived() { - return nil - } - - for requester := range e.requesters { - if !e.responseSentToRequester[requester] { - e.responseSentToRequester[requester] = true - if err := e.sendResponse(requester); err != nil { - return fmt.Errorf("failed to send response to requester %s: %w", requester, err) + if e.minimumRequiredRequestsReceived() && e.hasResponse() { + for requester := range e.requesters { + if !e.responseSentToRequester[requester] { + e.responseSentToRequester[requester] = true + if err := e.sendResponse(requester); err != nil { + return fmt.Errorf("failed to send response to requester %s: %w", requester, err) + } } } } @@ -273,7 +271,7 @@ func (e *remoteTargetCapabilityRequest) sendResponses() error { return nil } -func (e *remoteTargetCapabilityRequest) sendResponse(peer p2ptypes.PeerID) error { +func (e *remoteTargetCapabilityRequest) sendResponse(receiver p2ptypes.PeerID) error { responseMsg := types.MessageBody{ CapabilityId: e.capabilityID, @@ -282,7 +280,7 @@ func (e *remoteTargetCapabilityRequest) sendResponse(peer p2ptypes.PeerID) error Method: types.MethodExecute, MessageId: []byte(e.requestMessageID), Sender: e.capabilityPeerId[:], - Receiver: peer[:], + Receiver: receiver[:], } if e.responseError != types.Error_OK { @@ -291,11 +289,11 @@ func (e *remoteTargetCapabilityRequest) sendResponse(peer p2ptypes.PeerID) error responseMsg.Payload = e.response } - if err := e.dispatcher.Send(peer, &responseMsg); err != nil { - return fmt.Errorf("failed to send response: %w", err) + if err := e.dispatcher.Send(receiver, &responseMsg); err != nil { + return fmt.Errorf("failed to send response to dispatcher: %w", err) } - e.responseSentToRequester[peer] = true + e.responseSentToRequester[receiver] = true return nil } From 29948671a454de5ba95fe3109fd695e486ce2a29 Mon Sep 17 00:00:00 2001 From: Matthew Pendrey Date: Mon, 27 May 2024 18:49:24 +0100 Subject: [PATCH 31/43] move to subpackage --- core/capabilities/remote/target/caller.go | 135 ++++++++++++++++++ .../caller_request.go} | 123 +--------------- .../caller_test.go} | 8 +- .../endtoend_test.go} | 8 +- core/capabilities/remote/target/receiver.go | 131 +++++++++++++++++ .../receiverRequest.go} | 123 +--------------- .../receiver_test.go} | 8 +- 7 files changed, 282 insertions(+), 254 deletions(-) create mode 100644 core/capabilities/remote/target/caller.go rename core/capabilities/remote/{target_caller.go => target/caller_request.go} (54%) rename core/capabilities/remote/{target_caller_test.go => target/caller_test.go} (98%) rename core/capabilities/remote/{target_test.go => target/endtoend_test.go} (98%) create mode 100644 core/capabilities/remote/target/receiver.go rename core/capabilities/remote/{target_receiver.go => target/receiverRequest.go} (61%) rename core/capabilities/remote/{target_receiver_test.go => target/receiver_test.go} (97%) diff --git a/core/capabilities/remote/target/caller.go b/core/capabilities/remote/target/caller.go new file mode 100644 index 00000000000..4e177e6d26c --- /dev/null +++ b/core/capabilities/remote/target/caller.go @@ -0,0 +1,135 @@ +package target + +import ( + "context" + "errors" + "fmt" + "sync" + "time" + + "github.com/smartcontractkit/chainlink-common/pkg/capabilities" + commoncap "github.com/smartcontractkit/chainlink-common/pkg/capabilities" + "github.com/smartcontractkit/chainlink/v2/core/capabilities/remote" + "github.com/smartcontractkit/chainlink/v2/core/capabilities/remote/types" + "github.com/smartcontractkit/chainlink/v2/core/logger" +) + +// remoteTargetCaller/Receiver are shims translating between capability API calls and network messages +type remoteTargetCaller struct { + lggr logger.Logger + remoteCapabilityInfo commoncap.CapabilityInfo + localDONInfo capabilities.DON + dispatcher types.Dispatcher + requestTimeout time.Duration + + requestIDToExecuteRequest map[string]*callerExecuteRequest + mutex sync.Mutex +} + +var _ commoncap.TargetCapability = &remoteTargetCaller{} +var _ types.Receiver = &remoteTargetCaller{} + +func NewRemoteTargetCaller(ctx context.Context, lggr logger.Logger, remoteCapabilityInfo commoncap.CapabilityInfo, localDonInfo capabilities.DON, dispatcher types.Dispatcher, + requestTimeout time.Duration) *remoteTargetCaller { + + caller := &remoteTargetCaller{ + lggr: lggr, + remoteCapabilityInfo: remoteCapabilityInfo, + localDONInfo: localDonInfo, + dispatcher: dispatcher, + requestTimeout: requestTimeout, + requestIDToExecuteRequest: make(map[string]*callerExecuteRequest), + } + + go func() { + timer := time.NewTimer(requestTimeout) + defer timer.Stop() + for { + select { + case <-ctx.Done(): + return + case <-timer.C: + caller.ExpireRequests() + } + } + }() + + return caller +} + +func (c *remoteTargetCaller) ExpireRequests() { + c.mutex.Lock() + defer c.mutex.Unlock() + + for messageID, req := range c.requestIDToExecuteRequest { + if time.Since(req.createdAt) > c.requestTimeout { + req.cancelRequest("request timed out") + } + + delete(c.requestIDToExecuteRequest, messageID) + } +} + +func (c *remoteTargetCaller) Info(ctx context.Context) (commoncap.CapabilityInfo, error) { + return c.remoteCapabilityInfo, nil +} + +func (c *remoteTargetCaller) RegisterToWorkflow(ctx context.Context, request commoncap.RegisterToWorkflowRequest) error { + return errors.New("not implemented") +} + +func (c *remoteTargetCaller) UnregisterFromWorkflow(ctx context.Context, request commoncap.UnregisterFromWorkflowRequest) error { + return errors.New("not implemented") +} + +func (c *remoteTargetCaller) Execute(ctx context.Context, req commoncap.CapabilityRequest) (<-chan commoncap.CapabilityResponse, error) { + c.mutex.Lock() + defer c.mutex.Unlock() + + requestID, err := GetRequestID(req) + if err != nil { + return nil, fmt.Errorf("failed to get request ID: %w", err) + } + + if _, ok := c.requestIDToExecuteRequest[requestID]; ok { + return nil, fmt.Errorf("request with ID %s already exists", requestID) + } + + execRequest, err := newCallerExecuteRequest(ctx, c.lggr, req, requestID, c.remoteCapabilityInfo, c.localDONInfo, c.dispatcher) + + c.requestIDToExecuteRequest[requestID] = execRequest + + return execRequest.responseCh, nil +} + +func (c *remoteTargetCaller) Receive(msg *types.MessageBody) { + c.mutex.Lock() + defer c.mutex.Unlock() + + requestID := GetMessageID(msg) + sender := remote.ToPeerID(msg.Sender) + + req := c.requestIDToExecuteRequest[requestID] + if req == nil { + c.lggr.Warnw("received response for unknown request ID", "requestID", requestID, "sender", sender) + return + } + + if msg.Error != types.Error_OK { + c.lggr.Warnw("received error response for pending request", "requestID", requestID, "sender", sender, "receiver", msg.Receiver, "error", msg.Error) + return + } + + if err := req.addResponse(sender, msg.Payload); err != nil { + c.lggr.Errorw("failed to add response to request", "requestID", requestID, "sender", sender, "err", err) + } +} + +// Move this into common? +func GetRequestID(req commoncap.CapabilityRequest) (string, error) { + if req.Metadata.WorkflowID == "" || req.Metadata.WorkflowExecutionID == "" { + return "", errors.New("workflow ID and workflow execution ID must be set in request metadata") + } + + return req.Metadata.WorkflowID + req.Metadata.WorkflowExecutionID, nil +} diff --git a/core/capabilities/remote/target_caller.go b/core/capabilities/remote/target/caller_request.go similarity index 54% rename from core/capabilities/remote/target_caller.go rename to core/capabilities/remote/target/caller_request.go index 38a96545cfc..11a481c396a 100644 --- a/core/capabilities/remote/target_caller.go +++ b/core/capabilities/remote/target/caller_request.go @@ -1,11 +1,10 @@ -package remote +package target import ( "context" "crypto/sha256" "errors" "fmt" - "sync" "time" "github.com/smartcontractkit/chainlink-common/pkg/capabilities" @@ -18,126 +17,6 @@ import ( ragep2ptypes "github.com/smartcontractkit/libocr/ragep2p/types" ) -// remoteTargetCaller/Receiver are shims translating between capability API calls and network messages -type remoteTargetCaller struct { - lggr logger.Logger - remoteCapabilityInfo commoncap.CapabilityInfo - localDONInfo capabilities.DON - dispatcher types.Dispatcher - requestTimeout time.Duration - - requestIDToExecuteRequest map[string]*callerExecuteRequest - mutex sync.Mutex -} - -var _ commoncap.TargetCapability = &remoteTargetCaller{} -var _ types.Receiver = &remoteTargetCaller{} - -func NewRemoteTargetCaller(ctx context.Context, lggr logger.Logger, remoteCapabilityInfo commoncap.CapabilityInfo, localDonInfo capabilities.DON, dispatcher types.Dispatcher, - requestTimeout time.Duration) *remoteTargetCaller { - - caller := &remoteTargetCaller{ - lggr: lggr, - remoteCapabilityInfo: remoteCapabilityInfo, - localDONInfo: localDonInfo, - dispatcher: dispatcher, - requestTimeout: requestTimeout, - requestIDToExecuteRequest: make(map[string]*callerExecuteRequest), - } - - go func() { - timer := time.NewTimer(requestTimeout) - defer timer.Stop() - for { - select { - case <-ctx.Done(): - return - case <-timer.C: - caller.ExpireRequests() - } - } - }() - - return caller -} - -func (c *remoteTargetCaller) ExpireRequests() { - c.mutex.Lock() - defer c.mutex.Unlock() - - for messageID, req := range c.requestIDToExecuteRequest { - if time.Since(req.createdAt) > c.requestTimeout { - req.cancelRequest("request timed out") - } - - delete(c.requestIDToExecuteRequest, messageID) - } -} - -func (c *remoteTargetCaller) Info(ctx context.Context) (commoncap.CapabilityInfo, error) { - return c.remoteCapabilityInfo, nil -} - -func (c *remoteTargetCaller) RegisterToWorkflow(ctx context.Context, request commoncap.RegisterToWorkflowRequest) error { - return errors.New("not implemented") -} - -func (c *remoteTargetCaller) UnregisterFromWorkflow(ctx context.Context, request commoncap.UnregisterFromWorkflowRequest) error { - return errors.New("not implemented") -} - -func (c *remoteTargetCaller) Execute(ctx context.Context, req commoncap.CapabilityRequest) (<-chan commoncap.CapabilityResponse, error) { - c.mutex.Lock() - defer c.mutex.Unlock() - - requestID, err := GetRequestID(req) - if err != nil { - return nil, fmt.Errorf("failed to get request ID: %w", err) - } - - if _, ok := c.requestIDToExecuteRequest[requestID]; ok { - return nil, fmt.Errorf("request with ID %s already exists", requestID) - } - - execRequest, err := newCallerExecuteRequest(ctx, c.lggr, req, requestID, c.remoteCapabilityInfo, c.localDONInfo, c.dispatcher) - - c.requestIDToExecuteRequest[requestID] = execRequest - - return execRequest.responseCh, nil -} - -func (c *remoteTargetCaller) Receive(msg *types.MessageBody) { - c.mutex.Lock() - defer c.mutex.Unlock() - - requestID := GetMessageID(msg) - sender := ToPeerID(msg.Sender) - - req := c.requestIDToExecuteRequest[requestID] - if req == nil { - c.lggr.Warnw("received response for unknown request ID", "requestID", requestID, "sender", sender) - return - } - - if msg.Error != types.Error_OK { - c.lggr.Warnw("received error response for pending request", "requestID", requestID, "sender", sender, "receiver", msg.Receiver, "error", msg.Error) - return - } - - if err := req.addResponse(sender, msg.Payload); err != nil { - c.lggr.Errorw("failed to add response to request", "requestID", requestID, "sender", sender, "err", err) - } -} - -// Move this into common? -func GetRequestID(req commoncap.CapabilityRequest) (string, error) { - if req.Metadata.WorkflowID == "" || req.Metadata.WorkflowExecutionID == "" { - return "", errors.New("workflow ID and workflow execution ID must be set in request metadata") - } - - return req.Metadata.WorkflowID + req.Metadata.WorkflowExecutionID, nil -} - type callerExecuteRequest struct { transmissionCtx context.Context responseCh chan commoncap.CapabilityResponse diff --git a/core/capabilities/remote/target_caller_test.go b/core/capabilities/remote/target/caller_test.go similarity index 98% rename from core/capabilities/remote/target_caller_test.go rename to core/capabilities/remote/target/caller_test.go index 5f259025ecd..3dca68fd123 100644 --- a/core/capabilities/remote/target_caller_test.go +++ b/core/capabilities/remote/target/caller_test.go @@ -1,4 +1,4 @@ -package remote_test +package target_test import ( "context" @@ -12,7 +12,7 @@ import ( commoncap "github.com/smartcontractkit/chainlink-common/pkg/capabilities" "github.com/smartcontractkit/chainlink-common/pkg/capabilities/pb" "github.com/smartcontractkit/chainlink-common/pkg/values" - "github.com/smartcontractkit/chainlink/v2/core/capabilities/remote" + "github.com/smartcontractkit/chainlink/v2/core/capabilities/remote/target" remotetypes "github.com/smartcontractkit/chainlink/v2/core/capabilities/remote/types" "github.com/smartcontractkit/chainlink/v2/core/capabilities/transmission" "github.com/smartcontractkit/chainlink/v2/core/internal/testutils" @@ -194,7 +194,7 @@ func testRemoteTargetCaller(t *testing.T, numWorkflowPeers int, workflowNodeResp callers := make([]commoncap.TargetCapability, numWorkflowPeers) for i := 0; i < numWorkflowPeers; i++ { workflowPeerDispatcher := broker.NewDispatcherForNode(workflowPeers[i]) - caller := remote.NewRemoteTargetCaller(ctx, lggr, capInfo, workflowDonInfo, workflowPeerDispatcher, workflowNodeResponseTimeout) + caller := target.NewRemoteTargetCaller(ctx, lggr, capInfo, workflowDonInfo, workflowPeerDispatcher, workflowNodeResponseTimeout) broker.RegisterReceiverNode(workflowPeers[i], caller) callers[i] = caller } @@ -260,7 +260,7 @@ func (t *callerTestReceiver) Receive(msg *remotetypes.MessageBody) { defer t.mux.Unlock() sender := toPeerID(msg.Sender) - messageID := remote.GetMessageID(msg) + messageID := target.GetMessageID(msg) if t.messageIDToSenders[messageID] == nil { t.messageIDToSenders[messageID] = make(map[p2ptypes.PeerID]bool) diff --git a/core/capabilities/remote/target_test.go b/core/capabilities/remote/target/endtoend_test.go similarity index 98% rename from core/capabilities/remote/target_test.go rename to core/capabilities/remote/target/endtoend_test.go index cbce2d17a63..3cc1b0b7014 100644 --- a/core/capabilities/remote/target_test.go +++ b/core/capabilities/remote/target/endtoend_test.go @@ -1,4 +1,4 @@ -package remote_test +package target_test import ( "context" @@ -13,7 +13,7 @@ import ( commoncap "github.com/smartcontractkit/chainlink-common/pkg/capabilities" "github.com/smartcontractkit/chainlink-common/pkg/values" - "github.com/smartcontractkit/chainlink/v2/core/capabilities/remote" + "github.com/smartcontractkit/chainlink/v2/core/capabilities/remote/target" remotetypes "github.com/smartcontractkit/chainlink/v2/core/capabilities/remote/types" "github.com/smartcontractkit/chainlink/v2/core/capabilities/transmission" "github.com/smartcontractkit/chainlink/v2/core/internal/testutils" @@ -182,7 +182,7 @@ func testRemoteTarget(t *testing.T, numWorkflowPeers int, workflowDonF uint8, wo for i := 0; i < numCapabilityPeers; i++ { capabilityPeer := capabilityPeers[i] capabilityDispatcher := broker.NewDispatcherForNode(capabilityPeer) - capabilityNode := remote.NewRemoteTargetReceiver(ctx, lggr, capabilityPeer, underlying, capInfo, capDonInfo, workflowDONs, capabilityDispatcher, + capabilityNode := target.NewRemoteTargetReceiver(ctx, lggr, capabilityPeer, underlying, capInfo, capDonInfo, workflowDONs, capabilityDispatcher, capabilityNodeResponseTimeout) broker.RegisterReceiverNode(capabilityPeer, capabilityNode) capabilityNodes[i] = capabilityNode @@ -191,7 +191,7 @@ func testRemoteTarget(t *testing.T, numWorkflowPeers int, workflowDonF uint8, wo workflowNodes := make([]commoncap.TargetCapability, numWorkflowPeers) for i := 0; i < numWorkflowPeers; i++ { workflowPeerDispatcher := broker.NewDispatcherForNode(workflowPeers[i]) - workflowNode := remote.NewRemoteTargetCaller(ctx, lggr, capInfo, workflowDonInfo, workflowPeerDispatcher, workflowNodeTimeout) + workflowNode := target.NewRemoteTargetCaller(ctx, lggr, capInfo, workflowDonInfo, workflowPeerDispatcher, workflowNodeTimeout) broker.RegisterReceiverNode(workflowPeers[i], workflowNode) workflowNodes[i] = workflowNode } diff --git a/core/capabilities/remote/target/receiver.go b/core/capabilities/remote/target/receiver.go new file mode 100644 index 00000000000..be3c2217125 --- /dev/null +++ b/core/capabilities/remote/target/receiver.go @@ -0,0 +1,131 @@ +package target + +import ( + "context" + "crypto/sha256" + "encoding/hex" + "sync" + "time" + + commoncap "github.com/smartcontractkit/chainlink-common/pkg/capabilities" + "github.com/smartcontractkit/chainlink/v2/core/capabilities/remote/types" + p2ptypes "github.com/smartcontractkit/chainlink/v2/core/services/p2p/types" + + "github.com/smartcontractkit/chainlink-common/pkg/capabilities" + "github.com/smartcontractkit/chainlink/v2/core/logger" +) + +type remoteTargetReceiver struct { + lggr logger.Logger + peerID p2ptypes.PeerID + underlying commoncap.TargetCapability + capInfo commoncap.CapabilityInfo + localDonInfo capabilities.DON + workflowDONs map[string]commoncap.DON + dispatcher types.Dispatcher + + requestIDToRequest map[string]*remoteTargetCapabilityRequest + requestTimeout time.Duration + + receiveLock sync.Mutex +} + +var _ types.Receiver = &remoteTargetReceiver{} + +func NewRemoteTargetReceiver(ctx context.Context, lggr logger.Logger, peerID p2ptypes.PeerID, underlying commoncap.TargetCapability, capInfo commoncap.CapabilityInfo, localDonInfo capabilities.DON, + workflowDONs map[string]commoncap.DON, dispatcher types.Dispatcher, requestTimeout time.Duration) *remoteTargetReceiver { + + receiver := &remoteTargetReceiver{ + underlying: underlying, + peerID: peerID, + capInfo: capInfo, + localDonInfo: localDonInfo, + workflowDONs: workflowDONs, + dispatcher: dispatcher, + + requestIDToRequest: map[string]*remoteTargetCapabilityRequest{}, + requestTimeout: requestTimeout, + + lggr: lggr, + } + + go func() { + timer := time.NewTimer(requestTimeout) + defer timer.Stop() + for { + select { + case <-ctx.Done(): + return + case <-timer.C: + receiver.ExpireRequests(ctx) + } + } + }() + + return receiver +} + +func (r *remoteTargetReceiver) ExpireRequests(ctx context.Context) { + r.receiveLock.Lock() + defer r.receiveLock.Unlock() + + for messageId, executeReq := range r.requestIDToRequest { + if time.Since(executeReq.createdTime) > r.requestTimeout { + + if !executeReq.hasResponse() { + executeReq.setError(types.Error_TIMEOUT) + if err := executeReq.sendResponses(); err != nil { + r.lggr.Errorw("failed to send timeout response to all requesters", "capabilityId", r.capInfo.ID, "err", err) + } + } + + delete(r.requestIDToRequest, messageId) + } + + } + +} + +func (r *remoteTargetReceiver) Receive(msg *types.MessageBody) { + // TODO should the dispatcher be passing in a context? + ctx := context.Background() + + // TODO Confirm threading semantics of dispatcher receive + // TODO May want to have executor per message id to improve liveness + r.receiveLock.Lock() + defer r.receiveLock.Unlock() + + // TODO multithread this + + if msg.Method != types.MethodExecute { + r.lggr.Errorw("received request for unsupported method type", "method", msg.Method) + return + } + + // A request is uniquely identified by the message id and the hash of the payload + messageId := GetMessageID(msg) + hash := sha256.Sum256(msg.Payload) + requestID := messageId + hex.EncodeToString(hash[:]) + + if _, ok := r.requestIDToRequest[requestID]; !ok { + if callingDon, ok := r.workflowDONs[msg.CallerDonId]; ok { + r.requestIDToRequest[requestID] = newTargetCapabilityRequest(r.lggr, r.underlying, r.capInfo.ID, r.localDonInfo.ID, r.peerID, + callingDon, messageId, r.dispatcher, r.requestTimeout) + } else { + r.lggr.Errorw("received request from unregistered workflow don", "donId", msg.CallerDonId) + return + } + } + + request := r.requestIDToRequest[requestID] + + err := request.receive(ctx, msg) + if err != nil { + r.lggr.Errorw("request failed to receive new message", "request", request, "err", err) + } + +} + +func GetMessageID(msg *types.MessageBody) string { + return string(msg.MessageId) +} diff --git a/core/capabilities/remote/target_receiver.go b/core/capabilities/remote/target/receiverRequest.go similarity index 61% rename from core/capabilities/remote/target_receiver.go rename to core/capabilities/remote/target/receiverRequest.go index c8fa08376ae..d8fe72b9066 100644 --- a/core/capabilities/remote/target_receiver.go +++ b/core/capabilities/remote/target/receiverRequest.go @@ -1,15 +1,13 @@ -package remote +package target import ( "context" - "crypto/sha256" - "encoding/hex" "fmt" - "sync" "time" commoncap "github.com/smartcontractkit/chainlink-common/pkg/capabilities" "github.com/smartcontractkit/chainlink-common/pkg/capabilities/pb" + "github.com/smartcontractkit/chainlink/v2/core/capabilities/remote" "github.com/smartcontractkit/chainlink/v2/core/capabilities/remote/types" p2ptypes "github.com/smartcontractkit/chainlink/v2/core/services/p2p/types" @@ -17,117 +15,6 @@ import ( "github.com/smartcontractkit/chainlink/v2/core/logger" ) -type remoteTargetReceiver struct { - lggr logger.Logger - peerID p2ptypes.PeerID - underlying commoncap.TargetCapability - capInfo commoncap.CapabilityInfo - localDonInfo capabilities.DON - workflowDONs map[string]commoncap.DON - dispatcher types.Dispatcher - - requestIDToRequest map[string]*remoteTargetCapabilityRequest - requestTimeout time.Duration - - receiveLock sync.Mutex -} - -var _ types.Receiver = &remoteTargetReceiver{} - -func NewRemoteTargetReceiver(ctx context.Context, lggr logger.Logger, peerID p2ptypes.PeerID, underlying commoncap.TargetCapability, capInfo commoncap.CapabilityInfo, localDonInfo capabilities.DON, - workflowDONs map[string]commoncap.DON, dispatcher types.Dispatcher, requestTimeout time.Duration) *remoteTargetReceiver { - - receiver := &remoteTargetReceiver{ - underlying: underlying, - peerID: peerID, - capInfo: capInfo, - localDonInfo: localDonInfo, - workflowDONs: workflowDONs, - dispatcher: dispatcher, - - requestIDToRequest: map[string]*remoteTargetCapabilityRequest{}, - requestTimeout: requestTimeout, - - lggr: lggr, - } - - go func() { - timer := time.NewTimer(requestTimeout) - defer timer.Stop() - for { - select { - case <-ctx.Done(): - return - case <-timer.C: - receiver.ExpireRequests(ctx) - } - } - }() - - return receiver -} - -func (r *remoteTargetReceiver) ExpireRequests(ctx context.Context) { - r.receiveLock.Lock() - defer r.receiveLock.Unlock() - - for messageId, executeReq := range r.requestIDToRequest { - if time.Since(executeReq.createdTime) > r.requestTimeout { - - if !executeReq.hasResponse() { - executeReq.setError(types.Error_TIMEOUT) - if err := executeReq.sendResponses(); err != nil { - r.lggr.Errorw("failed to send timeout response to all requesters", "capabilityId", r.capInfo.ID, "err", err) - } - } - - delete(r.requestIDToRequest, messageId) - } - - } - -} - -func (r *remoteTargetReceiver) Receive(msg *types.MessageBody) { - // TODO should the dispatcher be passing in a context? - ctx := context.Background() - - // TODO Confirm threading semantics of dispatcher receive - // TODO May want to have executor per message id to improve liveness - r.receiveLock.Lock() - defer r.receiveLock.Unlock() - - // TODO multithread this - - if msg.Method != types.MethodExecute { - r.lggr.Errorw("received request for unsupported method type", "method", msg.Method) - return - } - - // A request is uniquely identified by the message id and the hash of the payload - messageId := GetMessageID(msg) - hash := sha256.Sum256(msg.Payload) - requestID := messageId + hex.EncodeToString(hash[:]) - - if _, ok := r.requestIDToRequest[requestID]; !ok { - if callingDon, ok := r.workflowDONs[msg.CallerDonId]; ok { - r.requestIDToRequest[requestID] = newTargetCapabilityRequest(r.lggr, r.underlying, r.capInfo.ID, r.localDonInfo.ID, r.peerID, - callingDon, messageId, r.dispatcher, r.requestTimeout) - } else { - r.lggr.Errorw("received request from unregistered workflow don", "donId", msg.CallerDonId) - return - } - } - - request := r.requestIDToRequest[requestID] - - err := request.receive(ctx, msg) - if err != nil { - r.lggr.Errorw("request failed to receive new message", "request", request, "err", err) - } - -} - type remoteTargetCapabilityRequest struct { lggr logger.Logger @@ -173,7 +60,7 @@ func newTargetCapabilityRequest(lggr logger.Logger, capability capabilities.Targ } func (e *remoteTargetCapabilityRequest) receive(ctx context.Context, msg *types.MessageBody) error { - requester := ToPeerID(msg.Sender) + requester := remote.ToPeerID(msg.Sender) if err := e.addRequester(requester); err != nil { return fmt.Errorf("failed to add requester to request: %w", err) } @@ -297,7 +184,3 @@ func (e *remoteTargetCapabilityRequest) sendResponse(receiver p2ptypes.PeerID) e return nil } - -func GetMessageID(msg *types.MessageBody) string { - return string(msg.MessageId) -} diff --git a/core/capabilities/remote/target_receiver_test.go b/core/capabilities/remote/target/receiver_test.go similarity index 97% rename from core/capabilities/remote/target_receiver_test.go rename to core/capabilities/remote/target/receiver_test.go index d16ca873ed6..6b5a20a167e 100644 --- a/core/capabilities/remote/target_receiver_test.go +++ b/core/capabilities/remote/target/receiver_test.go @@ -1,4 +1,4 @@ -package remote_test +package target_test import ( "context" @@ -10,7 +10,7 @@ import ( commoncap "github.com/smartcontractkit/chainlink-common/pkg/capabilities" "github.com/smartcontractkit/chainlink-common/pkg/capabilities/pb" - "github.com/smartcontractkit/chainlink/v2/core/capabilities/remote" + "github.com/smartcontractkit/chainlink/v2/core/capabilities/remote/target" remotetypes "github.com/smartcontractkit/chainlink/v2/core/capabilities/remote/types" "github.com/smartcontractkit/chainlink/v2/core/internal/testutils" "github.com/smartcontractkit/chainlink/v2/core/logger" @@ -124,7 +124,7 @@ func testRemoteTargetReceiver(t *testing.T, ctx context.Context, numWorkflowPeer for i := 0; i < numCapabilityPeers; i++ { capabilityPeer := capabilityPeers[i] capabilityDispatcher := broker.NewDispatcherForNode(capabilityPeer) - capabilityNode := remote.NewRemoteTargetReceiver(ctx, lggr, capabilityPeer, underlying, capInfo, capDonInfo, workflowDONs, capabilityDispatcher, + capabilityNode := target.NewRemoteTargetReceiver(ctx, lggr, capabilityPeer, underlying, capInfo, capDonInfo, workflowDONs, capabilityDispatcher, capabilityNodeResponseTimeout) broker.RegisterReceiverNode(capabilityPeer, capabilityNode) capabilityNodes[i] = capabilityNode @@ -178,7 +178,7 @@ func (r *receiverTestCaller) Execute(ctx context.Context, req commoncap.Capabili return nil, err } - messageID, err := remote.GetRequestID(req) + messageID, err := target.GetRequestID(req) if err != nil { return nil, err } From b7980c2091d4e2016de0c4ec27c4b5bb81a1ddf7 Mon Sep 17 00:00:00 2001 From: Matthew Pendrey Date: Tue, 28 May 2024 10:24:48 +0100 Subject: [PATCH 32/43] wip --- core/capabilities/remote/target/caller.go | 6 ++-- .../remote/target/caller_request.go | 16 ++++----- .../remote/target/endtoend_test.go | 22 ------------ core/capabilities/remote/target/receiver.go | 8 ++--- ...receiverRequest.go => receiver_request.go} | 34 +++++++++---------- .../remote/target/receiver_request_test.go | 9 +++++ .../remote/target/receiver_test.go | 2 -- 7 files changed, 41 insertions(+), 56 deletions(-) rename core/capabilities/remote/target/{receiverRequest.go => receiver_request.go} (77%) create mode 100644 core/capabilities/remote/target/receiver_request_test.go diff --git a/core/capabilities/remote/target/caller.go b/core/capabilities/remote/target/caller.go index 4e177e6d26c..27da469dbf4 100644 --- a/core/capabilities/remote/target/caller.go +++ b/core/capabilities/remote/target/caller.go @@ -22,7 +22,7 @@ type remoteTargetCaller struct { dispatcher types.Dispatcher requestTimeout time.Duration - requestIDToExecuteRequest map[string]*callerExecuteRequest + requestIDToExecuteRequest map[string]*callerRequest mutex sync.Mutex } @@ -38,7 +38,7 @@ func NewRemoteTargetCaller(ctx context.Context, lggr logger.Logger, remoteCapabi localDONInfo: localDonInfo, dispatcher: dispatcher, requestTimeout: requestTimeout, - requestIDToExecuteRequest: make(map[string]*callerExecuteRequest), + requestIDToExecuteRequest: make(map[string]*callerRequest), } go func() { @@ -95,7 +95,7 @@ func (c *remoteTargetCaller) Execute(ctx context.Context, req commoncap.Capabili return nil, fmt.Errorf("request with ID %s already exists", requestID) } - execRequest, err := newCallerExecuteRequest(ctx, c.lggr, req, requestID, c.remoteCapabilityInfo, c.localDONInfo, c.dispatcher) + execRequest, err := newCallerRequest(ctx, c.lggr, req, requestID, c.remoteCapabilityInfo, c.localDONInfo, c.dispatcher) c.requestIDToExecuteRequest[requestID] = execRequest diff --git a/core/capabilities/remote/target/caller_request.go b/core/capabilities/remote/target/caller_request.go index 11a481c396a..a3f2b9a9bc4 100644 --- a/core/capabilities/remote/target/caller_request.go +++ b/core/capabilities/remote/target/caller_request.go @@ -17,7 +17,7 @@ import ( ragep2ptypes "github.com/smartcontractkit/libocr/ragep2p/types" ) -type callerExecuteRequest struct { +type callerRequest struct { transmissionCtx context.Context responseCh chan commoncap.CapabilityResponse transmissionCancelFn context.CancelFunc @@ -30,8 +30,8 @@ type callerExecuteRequest struct { respSent bool } -func newCallerExecuteRequest(ctx context.Context, lggr logger.Logger, req commoncap.CapabilityRequest, messageID string, - remoteCapabilityInfo commoncap.CapabilityInfo, localDonInfo capabilities.DON, dispatcher types.Dispatcher) (*callerExecuteRequest, error) { +func newCallerRequest(ctx context.Context, lggr logger.Logger, req commoncap.CapabilityRequest, messageID string, + remoteCapabilityInfo commoncap.CapabilityInfo, localDonInfo capabilities.DON, dispatcher types.Dispatcher) (*callerRequest, error) { remoteCapabilityDonInfo := remoteCapabilityInfo.DON if remoteCapabilityDonInfo == nil { @@ -80,7 +80,7 @@ func newCallerExecuteRequest(ctx context.Context, lggr logger.Logger, req common }(peerID, delay) } - return &callerExecuteRequest{ + return &callerRequest{ createdAt: time.Now(), transmissionCancelFn: transmissionCancelFn, requiredIdenticalResponses: int(remoteCapabilityDonInfo.F + 1), @@ -90,12 +90,12 @@ func newCallerExecuteRequest(ctx context.Context, lggr logger.Logger, req common }, nil } -func (c *callerExecuteRequest) responseSent() bool { +func (c *callerRequest) responseSent() bool { return c.respSent } // TODO addResponse assumes that only one response is received from each peer, if streaming responses need to be supported this will need to be updated -func (c *callerExecuteRequest) addResponse(sender p2ptypes.PeerID, response []byte) error { +func (c *callerRequest) addResponse(sender p2ptypes.PeerID, response []byte) error { if _, ok := c.responseReceived[sender]; !ok { return fmt.Errorf("response from peer %s not expected", sender) } @@ -121,14 +121,14 @@ func (c *callerExecuteRequest) addResponse(sender p2ptypes.PeerID, response []by return nil } -func (c *callerExecuteRequest) sendResponse(response commoncap.CapabilityResponse) { +func (c *callerRequest) sendResponse(response commoncap.CapabilityResponse) { c.responseCh <- response close(c.responseCh) c.transmissionCancelFn() c.respSent = true } -func (c *callerExecuteRequest) cancelRequest(reason string) { +func (c *callerRequest) cancelRequest(reason string) { c.transmissionCancelFn() if !c.responseSent() { c.sendResponse(commoncap.CapabilityResponse{Err: errors.New(reason)}) diff --git a/core/capabilities/remote/target/endtoend_test.go b/core/capabilities/remote/target/endtoend_test.go index 3cc1b0b7014..7beb2eae3ef 100644 --- a/core/capabilities/remote/target/endtoend_test.go +++ b/core/capabilities/remote/target/endtoend_test.go @@ -94,27 +94,6 @@ func Test_RemoteTargetCapability_DonTopologies(t *testing.T) { */ //here - below tests plus additional tests for the remoteTargetCapability test - // test capability don F handling - - /* - here - these errors tests failing still? why? - - errResponseTest := func(t *testing.T, responseCh <-chan commoncap.CapabilityResponse, responseError error) { - require.NoError(t, responseError) - response := <-responseCh - assert.NotNil(t, response.Err) - } - - // Test scenario where number of submissions is less than F + 1 - - // How to make these tests less time dependent? risk of being flaky - testRemoteTargetConsensus(t, 4, 6, 5*time.Second, 1, 0, 1*time.Second, errResponseTest) - testRemoteTargetConsensus(t, 10, 10, 5*time.Second, 1, 0, 1*time.Second, errResponseTest) - */ - //tyring to modify tests to test the caller F number handling? - - //also having issues with error test cases - since the client F handling? - //then got threading to do // Context cancellation test - use an underlying capability that blocks until the context is cancelled @@ -207,7 +186,6 @@ func testRemoteTarget(t *testing.T, numWorkflowPeers int, workflowDonF uint8, wo wg := &sync.WaitGroup{} wg.Add(len(workflowNodes)) - // Fire off all the requests for _, caller := range workflowNodes { go func(caller commoncap.TargetCapability) { responseCh, err := caller.Execute(ctx, diff --git a/core/capabilities/remote/target/receiver.go b/core/capabilities/remote/target/receiver.go index be3c2217125..35f11ae5224 100644 --- a/core/capabilities/remote/target/receiver.go +++ b/core/capabilities/remote/target/receiver.go @@ -24,7 +24,7 @@ type remoteTargetReceiver struct { workflowDONs map[string]commoncap.DON dispatcher types.Dispatcher - requestIDToRequest map[string]*remoteTargetCapabilityRequest + requestIDToRequest map[string]*receiverRequest requestTimeout time.Duration receiveLock sync.Mutex @@ -43,7 +43,7 @@ func NewRemoteTargetReceiver(ctx context.Context, lggr logger.Logger, peerID p2p workflowDONs: workflowDONs, dispatcher: dispatcher, - requestIDToRequest: map[string]*remoteTargetCapabilityRequest{}, + requestIDToRequest: map[string]*receiverRequest{}, requestTimeout: requestTimeout, lggr: lggr, @@ -57,7 +57,7 @@ func NewRemoteTargetReceiver(ctx context.Context, lggr logger.Logger, peerID p2p case <-ctx.Done(): return case <-timer.C: - receiver.ExpireRequests(ctx) + receiver.ExpireRequests() } } }() @@ -65,7 +65,7 @@ func NewRemoteTargetReceiver(ctx context.Context, lggr logger.Logger, peerID p2p return receiver } -func (r *remoteTargetReceiver) ExpireRequests(ctx context.Context) { +func (r *remoteTargetReceiver) ExpireRequests() { r.receiveLock.Lock() defer r.receiveLock.Unlock() diff --git a/core/capabilities/remote/target/receiverRequest.go b/core/capabilities/remote/target/receiver_request.go similarity index 77% rename from core/capabilities/remote/target/receiverRequest.go rename to core/capabilities/remote/target/receiver_request.go index d8fe72b9066..746b1bbb180 100644 --- a/core/capabilities/remote/target/receiverRequest.go +++ b/core/capabilities/remote/target/receiver_request.go @@ -15,7 +15,7 @@ import ( "github.com/smartcontractkit/chainlink/v2/core/logger" ) -type remoteTargetCapabilityRequest struct { +type receiverRequest struct { lggr logger.Logger capability capabilities.TargetCapability @@ -40,10 +40,10 @@ type remoteTargetCapabilityRequest struct { requestTimeout time.Duration } -func newTargetCapabilityRequest(lggr logger.Logger, capability capabilities.TargetCapability, capabilityID string, capabilityDonID string, capabilityPeerId p2ptypes.PeerID, +func NewReceiverRequest(lggr logger.Logger, capability capabilities.TargetCapability, capabilityID string, capabilityDonID string, capabilityPeerId p2ptypes.PeerID, callingDon commoncap.DON, requestMessageID string, - dispatcher types.Dispatcher, requestTimeout time.Duration) *remoteTargetCapabilityRequest { - return &remoteTargetCapabilityRequest{ + dispatcher types.Dispatcher, requestTimeout time.Duration) *receiverRequest { + return &receiverRequest{ lggr: lggr, capability: capability, createdTime: time.Now(), @@ -59,7 +59,7 @@ func newTargetCapabilityRequest(lggr logger.Logger, capability capabilities.Targ } } -func (e *remoteTargetCapabilityRequest) receive(ctx context.Context, msg *types.MessageBody) error { +func (e *receiverRequest) receive(ctx context.Context, msg *types.MessageBody) error { requester := remote.ToPeerID(msg.Sender) if err := e.addRequester(requester); err != nil { return fmt.Errorf("failed to add requester to request: %w", err) @@ -76,7 +76,7 @@ func (e *remoteTargetCapabilityRequest) receive(ctx context.Context, msg *types. return nil } -func (e *remoteTargetCapabilityRequest) executeRequest(ctx context.Context, payload []byte) { +func (e *receiverRequest) executeRequest(ctx context.Context, payload []byte) { ctxWithTimeout, cancel := context.WithTimeout(ctx, e.requestTimeout) defer cancel() @@ -104,7 +104,7 @@ func (e *remoteTargetCapabilityRequest) executeRequest(ctx context.Context, payl e.setResult(responsePayload) } -func (e *remoteTargetCapabilityRequest) addRequester(from p2ptypes.PeerID) error { +func (e *receiverRequest) addRequester(from p2ptypes.PeerID) error { fromPeerInCallingDon := false for _, member := range e.callingDon.Members { @@ -127,24 +127,24 @@ func (e *remoteTargetCapabilityRequest) addRequester(from p2ptypes.PeerID) error return nil } -func (e *remoteTargetCapabilityRequest) minimumRequiredRequestsReceived() bool { +func (e *receiverRequest) minimumRequiredRequestsReceived() bool { return len(e.requesters) >= int(e.callingDon.F+1) } -func (e *remoteTargetCapabilityRequest) setResult(result []byte) { +func (e *receiverRequest) setResult(result []byte) { e.response = result } -func (e *remoteTargetCapabilityRequest) setError(err types.Error) { +func (e *receiverRequest) setError(err types.Error) { e.responseError = err } -func (e *remoteTargetCapabilityRequest) hasResponse() bool { +func (e *receiverRequest) hasResponse() bool { return e.response != nil || e.responseError != types.Error_OK } -func (e *remoteTargetCapabilityRequest) sendResponses() error { - if e.minimumRequiredRequestsReceived() && e.hasResponse() { +func (e *receiverRequest) sendResponses() error { + if e.hasResponse() { for requester := range e.requesters { if !e.responseSentToRequester[requester] { e.responseSentToRequester[requester] = true @@ -158,7 +158,7 @@ func (e *remoteTargetCapabilityRequest) sendResponses() error { return nil } -func (e *remoteTargetCapabilityRequest) sendResponse(receiver p2ptypes.PeerID) error { +func (e *receiverRequest) sendResponse(requester p2ptypes.PeerID) error { responseMsg := types.MessageBody{ CapabilityId: e.capabilityID, @@ -167,7 +167,7 @@ func (e *remoteTargetCapabilityRequest) sendResponse(receiver p2ptypes.PeerID) e Method: types.MethodExecute, MessageId: []byte(e.requestMessageID), Sender: e.capabilityPeerId[:], - Receiver: receiver[:], + Receiver: requester[:], } if e.responseError != types.Error_OK { @@ -176,11 +176,11 @@ func (e *remoteTargetCapabilityRequest) sendResponse(receiver p2ptypes.PeerID) e responseMsg.Payload = e.response } - if err := e.dispatcher.Send(receiver, &responseMsg); err != nil { + if err := e.dispatcher.Send(requester, &responseMsg); err != nil { return fmt.Errorf("failed to send response to dispatcher: %w", err) } - e.responseSentToRequester[receiver] = true + e.responseSentToRequester[requester] = true return nil } diff --git a/core/capabilities/remote/target/receiver_request_test.go b/core/capabilities/remote/target/receiver_request_test.go new file mode 100644 index 00000000000..e3036d4a364 --- /dev/null +++ b/core/capabilities/remote/target/receiver_request_test.go @@ -0,0 +1,9 @@ +package target_test + +import "testing" + +func Test_ReceiverRequest_RequestsFromNonDonPeerAreIgnored(t *testing.T) { + + request := newTarget + +} diff --git a/core/capabilities/remote/target/receiver_test.go b/core/capabilities/remote/target/receiver_test.go index 6b5a20a167e..77c5352a7bd 100644 --- a/core/capabilities/remote/target/receiver_test.go +++ b/core/capabilities/remote/target/receiver_test.go @@ -43,8 +43,6 @@ func Test_RemoteTargetCapability_InsufficientWorkflowCallers(t *testing.T) { } } -// Test stuck capability times out - func Test_RemoteTargetCapability_IgnoresRequestFromIncorrectPeer(t *testing.T) { ctx, cancel := context.WithCancel(testutils.Context(t)) defer cancel() From fa117f1dae6c22c1043f4bc33312ce848bd73c05 Mon Sep 17 00:00:00 2001 From: Matthew Pendrey Date: Tue, 28 May 2024 13:44:20 +0100 Subject: [PATCH 33/43] wip --- core/capabilities/remote/target/caller.go | 32 +-- .../remote/target/caller_request.go | 6 +- .../capabilities/remote/target/caller_test.go | 11 +- .../remote/target/endtoend_test.go | 73 +++++-- core/capabilities/remote/target/receiver.go | 8 +- .../remote/target/receiver_request.go | 20 +- .../remote/target/receiver_request_test.go | 198 +++++++++++++++++- .../remote/target/receiver_test.go | 33 +-- 8 files changed, 304 insertions(+), 77 deletions(-) diff --git a/core/capabilities/remote/target/caller.go b/core/capabilities/remote/target/caller.go index 27da469dbf4..12aa965f5e1 100644 --- a/core/capabilities/remote/target/caller.go +++ b/core/capabilities/remote/target/caller.go @@ -22,7 +22,7 @@ type remoteTargetCaller struct { dispatcher types.Dispatcher requestTimeout time.Duration - requestIDToExecuteRequest map[string]*callerRequest + messageIDToExecuteRequest map[string]*callerRequest mutex sync.Mutex } @@ -38,7 +38,7 @@ func NewRemoteTargetCaller(ctx context.Context, lggr logger.Logger, remoteCapabi localDONInfo: localDonInfo, dispatcher: dispatcher, requestTimeout: requestTimeout, - requestIDToExecuteRequest: make(map[string]*callerRequest), + messageIDToExecuteRequest: make(map[string]*callerRequest), } go func() { @@ -61,12 +61,12 @@ func (c *remoteTargetCaller) ExpireRequests() { c.mutex.Lock() defer c.mutex.Unlock() - for messageID, req := range c.requestIDToExecuteRequest { + for messageID, req := range c.messageIDToExecuteRequest { if time.Since(req.createdAt) > c.requestTimeout { req.cancelRequest("request timed out") } - delete(c.requestIDToExecuteRequest, messageID) + delete(c.messageIDToExecuteRequest, messageID) } } @@ -86,18 +86,18 @@ func (c *remoteTargetCaller) Execute(ctx context.Context, req commoncap.Capabili c.mutex.Lock() defer c.mutex.Unlock() - requestID, err := GetRequestID(req) + messageID, err := GetMessageIDForRequest(req) if err != nil { - return nil, fmt.Errorf("failed to get request ID: %w", err) + return nil, fmt.Errorf("failed to get message ID for request: %w", err) } - if _, ok := c.requestIDToExecuteRequest[requestID]; ok { - return nil, fmt.Errorf("request with ID %s already exists", requestID) + if _, ok := c.messageIDToExecuteRequest[messageID]; ok { + return nil, fmt.Errorf("request for message ID %s already exists", messageID) } - execRequest, err := newCallerRequest(ctx, c.lggr, req, requestID, c.remoteCapabilityInfo, c.localDONInfo, c.dispatcher) + execRequest, err := newCallerRequest(ctx, c.lggr, req, messageID, c.remoteCapabilityInfo, c.localDONInfo, c.dispatcher) - c.requestIDToExecuteRequest[requestID] = execRequest + c.messageIDToExecuteRequest[messageID] = execRequest return execRequest.responseCh, nil } @@ -106,27 +106,27 @@ func (c *remoteTargetCaller) Receive(msg *types.MessageBody) { c.mutex.Lock() defer c.mutex.Unlock() - requestID := GetMessageID(msg) + messageID := GetMessageID(msg) sender := remote.ToPeerID(msg.Sender) - req := c.requestIDToExecuteRequest[requestID] + req := c.messageIDToExecuteRequest[messageID] if req == nil { - c.lggr.Warnw("received response for unknown request ID", "requestID", requestID, "sender", sender) + c.lggr.Warnw("received response for unknown message ID ", "messageID", messageID, "sender", sender) return } if msg.Error != types.Error_OK { - c.lggr.Warnw("received error response for pending request", "requestID", requestID, "sender", sender, "receiver", msg.Receiver, "error", msg.Error) + c.lggr.Warnw("received error response for pending request", "messageID", messageID, "sender", sender, "receiver", msg.Receiver, "error", msg.Error) return } if err := req.addResponse(sender, msg.Payload); err != nil { - c.lggr.Errorw("failed to add response to request", "requestID", requestID, "sender", sender, "err", err) + c.lggr.Errorw("failed to add response to request", "messageID", messageID, "sender", sender, "err", err) } } // Move this into common? -func GetRequestID(req commoncap.CapabilityRequest) (string, error) { +func GetMessageIDForRequest(req commoncap.CapabilityRequest) (string, error) { if req.Metadata.WorkflowID == "" || req.Metadata.WorkflowExecutionID == "" { return "", errors.New("workflow ID and workflow execution ID must be set in request metadata") } diff --git a/core/capabilities/remote/target/caller_request.go b/core/capabilities/remote/target/caller_request.go index a3f2b9a9bc4..f9dbee7544c 100644 --- a/core/capabilities/remote/target/caller_request.go +++ b/core/capabilities/remote/target/caller_request.go @@ -106,10 +106,10 @@ func (c *callerRequest) addResponse(sender p2ptypes.PeerID, response []byte) err c.responseReceived[sender] = true - payloadId := sha256.Sum256(response) - c.responseIDCount[payloadId]++ + responseID := sha256.Sum256(response) + c.responseIDCount[responseID]++ - if c.responseIDCount[payloadId] == c.requiredIdenticalResponses { + if c.responseIDCount[responseID] == c.requiredIdenticalResponses { capabilityResponse, err := pb.UnmarshalCapabilityResponse(response) if err != nil { c.sendResponse(commoncap.CapabilityResponse{Err: fmt.Errorf("failed to unmarshal capability response: %w", err)}) diff --git a/core/capabilities/remote/target/caller_test.go b/core/capabilities/remote/target/caller_test.go index 3dca68fd123..54fbe3c0f68 100644 --- a/core/capabilities/remote/target/caller_test.go +++ b/core/capabilities/remote/target/caller_test.go @@ -147,14 +147,9 @@ func testRemoteTargetCaller(t *testing.T, numWorkflowPeers int, workflowNodeResp capabilityPeers := make([]p2ptypes.PeerID, numCapabilityPeers) for i := 0; i < numCapabilityPeers; i++ { - capabilityPeerID := p2ptypes.PeerID{} - require.NoError(t, capabilityPeerID.UnmarshalText([]byte(newPeerID()))) - capabilityPeers[i] = capabilityPeerID + capabilityPeers[i] = newP2PPeerID(t) } - capabilityPeerID := p2ptypes.PeerID{} - require.NoError(t, capabilityPeerID.UnmarshalText([]byte(newPeerID()))) - capDonInfo := commoncap.DON{ ID: "capability-don", Members: capabilityPeers, @@ -171,9 +166,7 @@ func testRemoteTargetCaller(t *testing.T, numWorkflowPeers int, workflowNodeResp workflowPeers := make([]p2ptypes.PeerID, numWorkflowPeers) for i := 0; i < numWorkflowPeers; i++ { - workflowPeerID := p2ptypes.PeerID{} - require.NoError(t, workflowPeerID.UnmarshalText([]byte(newPeerID()))) - workflowPeers[i] = workflowPeerID + workflowPeers[i] = newP2PPeerID(t) } workflowDonInfo := commoncap.DON{ diff --git a/core/capabilities/remote/target/endtoend_test.go b/core/capabilities/remote/target/endtoend_test.go index 7beb2eae3ef..bf6d4cd77af 100644 --- a/core/capabilities/remote/target/endtoend_test.go +++ b/core/capabilities/remote/target/endtoend_test.go @@ -8,6 +8,7 @@ import ( "time" "github.com/mr-tron/base58" + "github.com/pkg/errors" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" @@ -39,7 +40,9 @@ func Test_RemoteTargetCapability_TransmissionSchedules(t *testing.T) { timeOut := 10 * time.Minute - testRemoteTarget(t, 10, 9, timeOut, 10, 9, timeOut, transmissionSchedule, responseTest) + capability := &testCapability{} + + testRemoteTarget(t, capability, 10, 9, timeOut, 10, 9, timeOut, transmissionSchedule, responseTest) transmissionSchedule, err = values.NewMap(map[string]any{ "schedule": transmission.Schedule_AllAtOnce, @@ -47,7 +50,7 @@ func Test_RemoteTargetCapability_TransmissionSchedules(t *testing.T) { }) require.NoError(t, err) - testRemoteTarget(t, 10, 9, timeOut, 10, 9, timeOut, transmissionSchedule, responseTest) + testRemoteTarget(t, capability, 10, 9, timeOut, 10, 9, timeOut, transmissionSchedule, responseTest) } @@ -69,18 +72,20 @@ func Test_RemoteTargetCapability_DonTopologies(t *testing.T) { timeOut := 10 * time.Minute + capability := &testCapability{} + // Test scenarios where the number of submissions is greater than or equal to F + 1 - testRemoteTarget(t, 1, 0, timeOut, 1, 0, timeOut, transmissionSchedule, responseTest) - testRemoteTarget(t, 4, 3, timeOut, 1, 0, timeOut, transmissionSchedule, responseTest) - testRemoteTarget(t, 10, 3, timeOut, 1, 0, timeOut, transmissionSchedule, responseTest) + testRemoteTarget(t, capability, 1, 0, timeOut, 1, 0, timeOut, transmissionSchedule, responseTest) + testRemoteTarget(t, capability, 4, 3, timeOut, 1, 0, timeOut, transmissionSchedule, responseTest) + testRemoteTarget(t, capability, 10, 3, timeOut, 1, 0, timeOut, transmissionSchedule, responseTest) - testRemoteTarget(t, 1, 0, timeOut, 1, 0, timeOut, transmissionSchedule, responseTest) - testRemoteTarget(t, 1, 0, timeOut, 4, 3, timeOut, transmissionSchedule, responseTest) - testRemoteTarget(t, 1, 0, timeOut, 10, 3, timeOut, transmissionSchedule, responseTest) + testRemoteTarget(t, capability, 1, 0, timeOut, 1, 0, timeOut, transmissionSchedule, responseTest) + testRemoteTarget(t, capability, 1, 0, timeOut, 4, 3, timeOut, transmissionSchedule, responseTest) + testRemoteTarget(t, capability, 1, 0, timeOut, 10, 3, timeOut, transmissionSchedule, responseTest) - testRemoteTarget(t, 4, 3, timeOut, 4, 3, timeOut, transmissionSchedule, responseTest) - testRemoteTarget(t, 10, 3, timeOut, 10, 3, timeOut, transmissionSchedule, responseTest) - testRemoteTarget(t, 10, 9, timeOut, 10, 9, timeOut, transmissionSchedule, responseTest) + testRemoteTarget(t, capability, 4, 3, timeOut, 4, 3, timeOut, transmissionSchedule, responseTest) + testRemoteTarget(t, capability, 10, 3, timeOut, 10, 3, timeOut, transmissionSchedule, responseTest) + testRemoteTarget(t, capability, 10, 9, timeOut, 10, 9, timeOut, transmissionSchedule, responseTest) /* transmissionSchedule, err = values.NewMap(map[string]any{ @@ -106,7 +111,30 @@ func Test_RemoteTargetCapability_DonTopologies(t *testing.T) { } -func testRemoteTarget(t *testing.T, numWorkflowPeers int, workflowDonF uint8, workflowNodeTimeout time.Duration, +func Test_RemoteTargetCapability_CapabilityError(t *testing.T) { + responseTest := func(t *testing.T, responseCh <-chan commoncap.CapabilityResponse, responseError error) { + require.NoError(t, responseError) + response := <-responseCh + responseValue, err := response.Value.Unwrap() + require.NoError(t, err) + assert.Equal(t, "aValue1", responseValue.(string)) + } + + timeOut := 10 * time.Minute + + capability := &testErrorCapability{} + + transmissionSchedule, err := values.NewMap(map[string]any{ + "schedule": transmission.Schedule_AllAtOnce, + "deltaStage": "10ms", + }) + require.NoError(t, err) + + testRemoteTarget(t, capability, 10, 9, timeOut, 10, 9, timeOut, transmissionSchedule, responseTest) + +} + +func testRemoteTarget(t *testing.T, underlying commoncap.TargetCapability, numWorkflowPeers int, workflowDonF uint8, workflowNodeTimeout time.Duration, numCapabilityPeers int, capabilityDonF uint8, capabilityNodeResponseTimeout time.Duration, transmissionSchedule *values.Map, responseTest func(t *testing.T, responseCh <-chan commoncap.CapabilityResponse, responseError error)) { lggr := logger.TestLogger(t) @@ -155,7 +183,6 @@ func testRemoteTarget(t *testing.T, numWorkflowPeers int, workflowDonF uint8, wo workflowDONs := map[string]commoncap.DON{ workflowDonInfo.ID: workflowDonInfo, } - underlying := &testCapability{} capabilityNodes := make([]remotetypes.Receiver, numCapabilityPeers) for i := 0; i < numCapabilityPeers; i++ { @@ -265,21 +292,25 @@ func (t *nodeDispatcher) SetReceiver(capabilityId string, donId string, receiver } func (t *nodeDispatcher) RemoveReceiver(capabilityId string, donId string) {} -type testCapability struct { +type abstractTestCapability struct { } -func (t testCapability) Info(ctx context.Context) (commoncap.CapabilityInfo, error) { +func (t abstractTestCapability) Info(ctx context.Context) (commoncap.CapabilityInfo, error) { return commoncap.CapabilityInfo{}, nil } -func (t testCapability) RegisterToWorkflow(ctx context.Context, request commoncap.RegisterToWorkflowRequest) error { +func (t abstractTestCapability) RegisterToWorkflow(ctx context.Context, request commoncap.RegisterToWorkflowRequest) error { return nil } -func (t testCapability) UnregisterFromWorkflow(ctx context.Context, request commoncap.UnregisterFromWorkflowRequest) error { +func (t abstractTestCapability) UnregisterFromWorkflow(ctx context.Context, request commoncap.UnregisterFromWorkflowRequest) error { return nil } +type testCapability struct { + abstractTestCapability +} + func (t testCapability) Execute(ctx context.Context, request commoncap.CapabilityRequest) (<-chan commoncap.CapabilityResponse, error) { ch := make(chan commoncap.CapabilityResponse, 1) @@ -292,6 +323,14 @@ func (t testCapability) Execute(ctx context.Context, request commoncap.Capabilit return ch, nil } +type testErrorCapability struct { + abstractTestCapability +} + +func (t testErrorCapability) Execute(ctx context.Context, request commoncap.CapabilityRequest) (<-chan commoncap.CapabilityResponse, error) { + return nil, errors.New("an error") +} + func newPeerID() string { var privKey [32]byte _, err := rand.Read(privKey[:]) diff --git a/core/capabilities/remote/target/receiver.go b/core/capabilities/remote/target/receiver.go index 35f11ae5224..5e048d42f4e 100644 --- a/core/capabilities/remote/target/receiver.go +++ b/core/capabilities/remote/target/receiver.go @@ -90,7 +90,7 @@ func (r *remoteTargetReceiver) Receive(msg *types.MessageBody) { // TODO should the dispatcher be passing in a context? ctx := context.Background() - // TODO Confirm threading semantics of dispatcher receive + // TODO Confirm threading semantics of dispatcher Receive // TODO May want to have executor per message id to improve liveness r.receiveLock.Lock() defer r.receiveLock.Unlock() @@ -109,7 +109,7 @@ func (r *remoteTargetReceiver) Receive(msg *types.MessageBody) { if _, ok := r.requestIDToRequest[requestID]; !ok { if callingDon, ok := r.workflowDONs[msg.CallerDonId]; ok { - r.requestIDToRequest[requestID] = newTargetCapabilityRequest(r.lggr, r.underlying, r.capInfo.ID, r.localDonInfo.ID, r.peerID, + r.requestIDToRequest[requestID] = NewReceiverRequest(r.lggr, r.underlying, r.capInfo.ID, r.localDonInfo.ID, r.peerID, callingDon, messageId, r.dispatcher, r.requestTimeout) } else { r.lggr.Errorw("received request from unregistered workflow don", "donId", msg.CallerDonId) @@ -119,9 +119,9 @@ func (r *remoteTargetReceiver) Receive(msg *types.MessageBody) { request := r.requestIDToRequest[requestID] - err := request.receive(ctx, msg) + err := request.Receive(ctx, msg) if err != nil { - r.lggr.Errorw("request failed to receive new message", "request", request, "err", err) + r.lggr.Errorw("request failed to Receive new message", "request", request, "err", err) } } diff --git a/core/capabilities/remote/target/receiver_request.go b/core/capabilities/remote/target/receiver_request.go index 746b1bbb180..1029a203e0d 100644 --- a/core/capabilities/remote/target/receiver_request.go +++ b/core/capabilities/remote/target/receiver_request.go @@ -59,14 +59,17 @@ func NewReceiverRequest(lggr logger.Logger, capability capabilities.TargetCapabi } } -func (e *receiverRequest) receive(ctx context.Context, msg *types.MessageBody) error { +func (e *receiverRequest) Receive(ctx context.Context, msg *types.MessageBody) error { requester := remote.ToPeerID(msg.Sender) if err := e.addRequester(requester); err != nil { return fmt.Errorf("failed to add requester to request: %w", err) } if e.minimumRequiredRequestsReceived() && !e.hasResponse() { - e.executeRequest(ctx, msg.Payload) + if err := e.executeRequest(ctx, msg.Payload); err != nil { + e.setError(types.Error_INTERNAL_ERROR) + e.lggr.Errorw("failed to execute request", "error", err) + } } if err := e.sendResponses(); err != nil { @@ -76,32 +79,31 @@ func (e *receiverRequest) receive(ctx context.Context, msg *types.MessageBody) e return nil } -func (e *receiverRequest) executeRequest(ctx context.Context, payload []byte) { +func (e *receiverRequest) executeRequest(ctx context.Context, payload []byte) error { ctxWithTimeout, cancel := context.WithTimeout(ctx, e.requestTimeout) defer cancel() capabilityRequest, err := pb.UnmarshalCapabilityRequest(payload) if err != nil { - e.setError(types.Error_INVALID_REQUEST) - e.lggr.Errorw("failed to unmarshal capability request", "err", err) + return fmt.Errorf("failed to unmarshal capability request: %w", err) } capResponseCh, err := e.capability.Execute(ctxWithTimeout, capabilityRequest) if err != nil { - e.setError(types.Error_INTERNAL_ERROR) - e.lggr.Errorw("failed to execute capability", "err", err) + return fmt.Errorf("failed to execute capability: %w", err) } // TODO working on the assumption that the capability will only ever return one response from its channel (for now at least) capResponse := <-capResponseCh responsePayload, err := pb.MarshalCapabilityResponse(capResponse) if err != nil { - e.setError(types.Error_INTERNAL_ERROR) - e.lggr.Errorw("failed to marshal capability response", "err", err) + return fmt.Errorf("failed to marshal capability response: %w", err) } e.setResult(responsePayload) + + return nil } func (e *receiverRequest) addRequester(from p2ptypes.PeerID) error { diff --git a/core/capabilities/remote/target/receiver_request_test.go b/core/capabilities/remote/target/receiver_request_test.go index e3036d4a364..1a8d45c10ce 100644 --- a/core/capabilities/remote/target/receiver_request_test.go +++ b/core/capabilities/remote/target/receiver_request_test.go @@ -1,9 +1,201 @@ package target_test -import "testing" +import ( + "context" + "testing" + "time" -func Test_ReceiverRequest_RequestsFromNonDonPeerAreIgnored(t *testing.T) { + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" - request := newTarget + commoncap "github.com/smartcontractkit/chainlink-common/pkg/capabilities" + "github.com/smartcontractkit/chainlink-common/pkg/capabilities/pb" + "github.com/smartcontractkit/chainlink-common/pkg/values" + "github.com/smartcontractkit/chainlink/v2/core/capabilities/remote/target" + "github.com/smartcontractkit/chainlink/v2/core/capabilities/remote/types" + "github.com/smartcontractkit/chainlink/v2/core/logger" + p2ptypes "github.com/smartcontractkit/chainlink/v2/core/services/p2p/types" +) +func Test_ReceiverRequest_MessageValidation(t *testing.T) { + lggr := logger.TestLogger(t) + capability := testCapability{} + capabilityPeerID := newP2PPeerID(t) + + numWorkflowPeers := 2 + workflowPeers := make([]p2ptypes.PeerID, numWorkflowPeers) + for i := 0; i < numWorkflowPeers; i++ { + workflowPeers[i] = newP2PPeerID(t) + } + + callingDon := commoncap.DON{ + Members: workflowPeers, + ID: "workflow-don", + F: 1, + } + + dispatcher := &testDispatcher{} + + executeInputs, err := values.NewMap( + map[string]any{ + "executeValue1": "aValue1", + }, + ) + require.NoError(t, err) + + capabilityRequest := commoncap.CapabilityRequest{ + Metadata: commoncap.RequestMetadata{ + WorkflowID: "workflowID", + WorkflowExecutionID: "workflowExecutionID", + }, + Inputs: executeInputs, + } + + rawRequest, err := pb.MarshalCapabilityRequest(capabilityRequest) + require.NoError(t, err) + + t.Run("Send duplicate message", func(t *testing.T) { + request := target.NewReceiverRequest(lggr, capability, "capabilityID", "capabilityDonID", + capabilityPeerID, callingDon, "requestMessageID", dispatcher, 10*time.Minute) + + err := sendValidRequest(request, workflowPeers, capabilityPeerID, rawRequest) + require.NoError(t, err) + err = sendValidRequest(request, workflowPeers, capabilityPeerID, rawRequest) + assert.NotNil(t, err) + }) + + t.Run("Send message with non calling don peer", func(t *testing.T) { + request := target.NewReceiverRequest(lggr, capability, "capabilityID", "capabilityDonID", + capabilityPeerID, callingDon, "requestMessageID", dispatcher, 10*time.Minute) + + err := sendValidRequest(request, workflowPeers, capabilityPeerID, rawRequest) + require.NoError(t, err) + + nonDonPeer := newP2PPeerID(t) + err = request.Receive(context.Background(), &types.MessageBody{ + Version: 0, + Sender: nonDonPeer[:], + Receiver: capabilityPeerID[:], + MessageId: []byte("workflowID" + "workflowExecutionID"), + CapabilityId: "capabilityID", + CapabilityDonId: "capabilityDonID", + CallerDonId: "workflow-don", + Method: types.MethodExecute, + Payload: rawRequest, + }) + + assert.NotNil(t, err) + }) + + t.Run("Send message invalid payload", func(t *testing.T) { + request := target.NewReceiverRequest(lggr, capability, "capabilityID", "capabilityDonID", + capabilityPeerID, callingDon, "requestMessageID", dispatcher, 10*time.Minute) + + err := sendValidRequest(request, workflowPeers, capabilityPeerID, rawRequest) + require.NoError(t, err) + + err = request.Receive(context.Background(), &types.MessageBody{ + Version: 0, + Sender: workflowPeers[1][:], + Receiver: capabilityPeerID[:], + MessageId: []byte("workflowID" + "workflowExecutionID"), + CapabilityId: "capabilityID", + CapabilityDonId: "capabilityDonID", + CallerDonId: "workflow-don", + Method: types.MethodExecute, + Payload: append(rawRequest, []byte("asdf")...), + }) + assert.NoError(t, err) + assert.Equal(t, 2, len(dispatcher.msgs)) + assert.Equal(t, dispatcher.msgs[0].Error, types.Error_INTERNAL_ERROR) + assert.Equal(t, dispatcher.msgs[1].Error, types.Error_INTERNAL_ERROR) + + }) + + t.Run("Send second valid request when capability errors", func(t *testing.T) { + + dispatcher := &testDispatcher{} + request := target.NewReceiverRequest(lggr, testErrorCapability{}, "capabilityID", "capabilityDonID", + capabilityPeerID, callingDon, "requestMessageID", dispatcher, 10*time.Minute) + + err := sendValidRequest(request, workflowPeers, capabilityPeerID, rawRequest) + require.NoError(t, err) + + err = request.Receive(context.Background(), &types.MessageBody{ + Version: 0, + Sender: workflowPeers[1][:], + Receiver: capabilityPeerID[:], + MessageId: []byte("workflowID" + "workflowExecutionID"), + CapabilityId: "capabilityID", + CapabilityDonId: "capabilityDonID", + CallerDonId: "workflow-don", + Method: types.MethodExecute, + Payload: rawRequest, + }) + assert.NoError(t, err) + assert.Equal(t, 2, len(dispatcher.msgs)) + assert.Equal(t, dispatcher.msgs[0].Error, types.Error_INTERNAL_ERROR) + assert.Equal(t, dispatcher.msgs[1].Error, types.Error_INTERNAL_ERROR) + + }) + + t.Run("Send second valid request", func(t *testing.T) { + dispatcher := &testDispatcher{} + request := target.NewReceiverRequest(lggr, capability, "capabilityID", "capabilityDonID", + capabilityPeerID, callingDon, "requestMessageID", dispatcher, 10*time.Minute) + + err := sendValidRequest(request, workflowPeers, capabilityPeerID, rawRequest) + require.NoError(t, err) + + err = request.Receive(context.Background(), &types.MessageBody{ + Version: 0, + Sender: workflowPeers[1][:], + Receiver: capabilityPeerID[:], + MessageId: []byte("workflowID" + "workflowExecutionID"), + CapabilityId: "capabilityID", + CapabilityDonId: "capabilityDonID", + CallerDonId: "workflow-don", + Method: types.MethodExecute, + Payload: rawRequest, + }) + assert.NoError(t, err) + assert.Equal(t, 2, len(dispatcher.msgs)) + assert.Equal(t, dispatcher.msgs[0].Error, types.Error_OK) + assert.Equal(t, dispatcher.msgs[1].Error, types.Error_OK) + }) +} + +type receiverRequest interface { + Receive(ctx context.Context, msg *types.MessageBody) error +} + +func sendValidRequest(request receiverRequest, workflowPeers []p2ptypes.PeerID, capabilityPeerID p2ptypes.PeerID, + rawRequest []byte) error { + return request.Receive(context.Background(), &types.MessageBody{ + Version: 0, + Sender: workflowPeers[0][:], + Receiver: capabilityPeerID[:], + MessageId: []byte("workflowID" + "workflowExecutionID"), + CapabilityId: "capabilityID", + CapabilityDonId: "capabilityDonID", + CallerDonId: "workflow-don", + Method: types.MethodExecute, + Payload: rawRequest, + }) + +} + +type testDispatcher struct { + msgs []*types.MessageBody +} + +func (t *testDispatcher) SetReceiver(capabilityId string, donId string, receiver types.Receiver) error { + return nil +} + +func (t *testDispatcher) RemoveReceiver(capabilityId string, donId string) {} + +func (t *testDispatcher) Send(peerID p2ptypes.PeerID, msgBody *types.MessageBody) error { + t.msgs = append(t.msgs, msgBody) + return nil } diff --git a/core/capabilities/remote/target/receiver_test.go b/core/capabilities/remote/target/receiver_test.go index 77c5352a7bd..6f90e1479df 100644 --- a/core/capabilities/remote/target/receiver_test.go +++ b/core/capabilities/remote/target/receiver_test.go @@ -17,13 +17,13 @@ import ( p2ptypes "github.com/smartcontractkit/chainlink/v2/core/services/p2p/types" ) -func Test_RemoteTargetCapability_InsufficientWorkflowCallers(t *testing.T) { +func Test_Receiver_InsufficientWorkflowCallers(t *testing.T) { ctx, cancel := context.WithCancel(testutils.Context(t)) defer cancel() numCapabilityPeers := 4 - callers := testRemoteTargetReceiver(t, ctx, 10, 10, numCapabilityPeers, 3, 100*time.Millisecond) + callers := testRemoteTargetReceiver(t, ctx, &testCapability{}, 10, 10, numCapabilityPeers, 3, 100*time.Millisecond) for _, caller := range callers { caller.Execute(context.Background(), @@ -43,13 +43,13 @@ func Test_RemoteTargetCapability_InsufficientWorkflowCallers(t *testing.T) { } } -func Test_RemoteTargetCapability_IgnoresRequestFromIncorrectPeer(t *testing.T) { +func Test_Receiver_CapabilityError(t *testing.T) { ctx, cancel := context.WithCancel(testutils.Context(t)) defer cancel() numCapabilityPeers := 4 - callers := testRemoteTargetReceiver(t, ctx, 10, 9, numCapabilityPeers, 3, 100*time.Millisecond) + callers := testRemoteTargetReceiver(t, ctx, &testErrorCapability{}, 10, 9, numCapabilityPeers, 3, 100*time.Millisecond) for _, caller := range callers { caller.Execute(context.Background(), @@ -64,26 +64,24 @@ func Test_RemoteTargetCapability_IgnoresRequestFromIncorrectPeer(t *testing.T) { for _, caller := range callers { for i := 0; i < numCapabilityPeers; i++ { msg := <-caller.receivedMessages - assert.Equal(t, remotetypes.Error_TIMEOUT, msg.Error) + assert.Equal(t, remotetypes.Error_INTERNAL_ERROR, msg.Error) } } } -func testRemoteTargetReceiver(t *testing.T, ctx context.Context, numWorkflowPeers int, workflowDonF uint8, +func testRemoteTargetReceiver(t *testing.T, ctx context.Context, + underlying commoncap.TargetCapability, + numWorkflowPeers int, workflowDonF uint8, numCapabilityPeers int, capabilityDonF uint8, capabilityNodeResponseTimeout time.Duration) []*receiverTestCaller { lggr := logger.TestLogger(t) capabilityPeers := make([]p2ptypes.PeerID, numCapabilityPeers) for i := 0; i < numCapabilityPeers; i++ { - capabilityPeerID := p2ptypes.PeerID{} - require.NoError(t, capabilityPeerID.UnmarshalText([]byte(newPeerID()))) + capabilityPeerID := newP2PPeerID(t) capabilityPeers[i] = capabilityPeerID } - capabilityPeerID := p2ptypes.PeerID{} - require.NoError(t, capabilityPeerID.UnmarshalText([]byte(newPeerID()))) - capDonInfo := commoncap.DON{ ID: "capability-don", Members: capabilityPeers, @@ -100,9 +98,7 @@ func testRemoteTargetReceiver(t *testing.T, ctx context.Context, numWorkflowPeer workflowPeers := make([]p2ptypes.PeerID, numWorkflowPeers) for i := 0; i < numWorkflowPeers; i++ { - workflowPeerID := p2ptypes.PeerID{} - require.NoError(t, workflowPeerID.UnmarshalText([]byte(newPeerID()))) - workflowPeers[i] = workflowPeerID + workflowPeers[i] = newP2PPeerID(t) } workflowDonInfo := commoncap.DON{ @@ -116,7 +112,6 @@ func testRemoteTargetReceiver(t *testing.T, ctx context.Context, numWorkflowPeer workflowDONs := map[string]commoncap.DON{ workflowDonInfo.ID: workflowDonInfo, } - underlying := &testCapability{} capabilityNodes := make([]remotetypes.Receiver, numCapabilityPeers) for i := 0; i < numCapabilityPeers; i++ { @@ -139,6 +134,12 @@ func testRemoteTargetReceiver(t *testing.T, ctx context.Context, numWorkflowPeer return workflowNodes } +func newP2PPeerID(t *testing.T) p2ptypes.PeerID { + id := p2ptypes.PeerID{} + require.NoError(t, id.UnmarshalText([]byte(newPeerID()))) + return id +} + type receiverTestCaller struct { peerID p2ptypes.PeerID dispatcher remotetypes.Dispatcher @@ -176,7 +177,7 @@ func (r *receiverTestCaller) Execute(ctx context.Context, req commoncap.Capabili return nil, err } - messageID, err := target.GetRequestID(req) + messageID, err := target.GetMessageIDForRequest(req) if err != nil { return nil, err } From 18fe585cf92b6679b86d692f42872f99b5c08299 Mon Sep 17 00:00:00 2001 From: Matthew Pendrey Date: Tue, 28 May 2024 15:50:25 +0100 Subject: [PATCH 34/43] more tests --- core/capabilities/remote/target/caller.go | 7 +- .../remote/target/caller_request.go | 10 +- .../remote/target/caller_request_test.go | 190 ++++++++++++++++++ .../remote/target/endtoend_test.go | 32 +-- .../remote/target/receiver_test.go | 2 +- 5 files changed, 201 insertions(+), 40 deletions(-) create mode 100644 core/capabilities/remote/target/caller_request_test.go diff --git a/core/capabilities/remote/target/caller.go b/core/capabilities/remote/target/caller.go index 12aa965f5e1..ebe6cfcaf74 100644 --- a/core/capabilities/remote/target/caller.go +++ b/core/capabilities/remote/target/caller.go @@ -95,11 +95,11 @@ func (c *remoteTargetCaller) Execute(ctx context.Context, req commoncap.Capabili return nil, fmt.Errorf("request for message ID %s already exists", messageID) } - execRequest, err := newCallerRequest(ctx, c.lggr, req, messageID, c.remoteCapabilityInfo, c.localDONInfo, c.dispatcher) + execRequest, err := NewCallerRequest(ctx, c.lggr, req, messageID, c.remoteCapabilityInfo, c.localDONInfo, c.dispatcher) c.messageIDToExecuteRequest[messageID] = execRequest - return execRequest.responseCh, nil + return execRequest.ResponseChan(), nil } func (c *remoteTargetCaller) Receive(msg *types.MessageBody) { @@ -120,12 +120,11 @@ func (c *remoteTargetCaller) Receive(msg *types.MessageBody) { return } - if err := req.addResponse(sender, msg.Payload); err != nil { + if err := req.AddResponse(sender, msg.Payload); err != nil { c.lggr.Errorw("failed to add response to request", "messageID", messageID, "sender", sender, "err", err) } } -// Move this into common? func GetMessageIDForRequest(req commoncap.CapabilityRequest) (string, error) { if req.Metadata.WorkflowID == "" || req.Metadata.WorkflowExecutionID == "" { return "", errors.New("workflow ID and workflow execution ID must be set in request metadata") diff --git a/core/capabilities/remote/target/caller_request.go b/core/capabilities/remote/target/caller_request.go index f9dbee7544c..bc74bd64de9 100644 --- a/core/capabilities/remote/target/caller_request.go +++ b/core/capabilities/remote/target/caller_request.go @@ -30,7 +30,7 @@ type callerRequest struct { respSent bool } -func newCallerRequest(ctx context.Context, lggr logger.Logger, req commoncap.CapabilityRequest, messageID string, +func NewCallerRequest(ctx context.Context, lggr logger.Logger, req commoncap.CapabilityRequest, messageID string, remoteCapabilityInfo commoncap.CapabilityInfo, localDonInfo capabilities.DON, dispatcher types.Dispatcher) (*callerRequest, error) { remoteCapabilityDonInfo := remoteCapabilityInfo.DON @@ -90,12 +90,12 @@ func newCallerRequest(ctx context.Context, lggr logger.Logger, req commoncap.Cap }, nil } -func (c *callerRequest) responseSent() bool { - return c.respSent +func (c *callerRequest) ResponseChan() <-chan commoncap.CapabilityResponse { + return c.responseCh } // TODO addResponse assumes that only one response is received from each peer, if streaming responses need to be supported this will need to be updated -func (c *callerRequest) addResponse(sender p2ptypes.PeerID, response []byte) error { +func (c *callerRequest) AddResponse(sender p2ptypes.PeerID, response []byte) error { if _, ok := c.responseReceived[sender]; !ok { return fmt.Errorf("response from peer %s not expected", sender) } @@ -130,7 +130,7 @@ func (c *callerRequest) sendResponse(response commoncap.CapabilityResponse) { func (c *callerRequest) cancelRequest(reason string) { c.transmissionCancelFn() - if !c.responseSent() { + if !c.respSent { c.sendResponse(commoncap.CapabilityResponse{Err: errors.New(reason)}) } } diff --git a/core/capabilities/remote/target/caller_request_test.go b/core/capabilities/remote/target/caller_request_test.go new file mode 100644 index 00000000000..175b9012fac --- /dev/null +++ b/core/capabilities/remote/target/caller_request_test.go @@ -0,0 +1,190 @@ +package target_test + +import ( + "context" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + commoncap "github.com/smartcontractkit/chainlink-common/pkg/capabilities" + "github.com/smartcontractkit/chainlink-common/pkg/capabilities/pb" + "github.com/smartcontractkit/chainlink-common/pkg/values" + "github.com/smartcontractkit/chainlink/v2/core/capabilities/remote/target" + "github.com/smartcontractkit/chainlink/v2/core/capabilities/remote/types" + "github.com/smartcontractkit/chainlink/v2/core/capabilities/transmission" + "github.com/smartcontractkit/chainlink/v2/core/logger" + p2ptypes "github.com/smartcontractkit/chainlink/v2/core/services/p2p/types" +) + +func Test_CallerRequest_MessageValidation(t *testing.T) { + lggr := logger.TestLogger(t) + + numCapabilityPeers := 2 + capabilityPeers := make([]p2ptypes.PeerID, numCapabilityPeers) + for i := 0; i < numCapabilityPeers; i++ { + capabilityPeers[i] = newP2PPeerID(t) + } + + capDonInfo := commoncap.DON{ + ID: "capability-don", + Members: capabilityPeers, + F: 1, + } + + capInfo := commoncap.CapabilityInfo{ + ID: "cap_id", + CapabilityType: commoncap.CapabilityTypeTarget, + Description: "Remote Target", + Version: "0.0.1", + DON: &capDonInfo, + } + + numWorkflowPeers := 2 + workflowPeers := make([]p2ptypes.PeerID, numWorkflowPeers) + for i := 0; i < numWorkflowPeers; i++ { + workflowPeers[i] = newP2PPeerID(t) + } + + workflowDonInfo := commoncap.DON{ + Members: workflowPeers, + ID: "workflow-don", + } + + executeInputs, err := values.NewMap( + map[string]any{ + "executeValue1": "aValue1", + }, + ) + require.NoError(t, err) + + transmissionSchedule, err := values.NewMap(map[string]any{ + "schedule": transmission.Schedule_OneAtATime, + "deltaStage": "1000ms", + }) + capabilityRequest := commoncap.CapabilityRequest{ + Metadata: commoncap.RequestMetadata{ + WorkflowID: "workflowID", + WorkflowExecutionID: "workflowExecutionID", + }, + Inputs: executeInputs, + Config: transmissionSchedule, + } + + capabilityResponse := commoncap.CapabilityResponse{ + Value: values.NewString("response1"), + Err: nil, + } + + rawResponse, err := pb.MarshalCapabilityResponse(capabilityResponse) + require.NoError(t, err) + + t.Run("Send second message with different response", func(t *testing.T) { + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + messageID, err := target.GetMessageIDForRequest(capabilityRequest) + require.NoError(t, err) + + dispatcher := &callerRequestTestDispatcher{msgs: make(chan *types.MessageBody, 100)} + request, err := target.NewCallerRequest(ctx, lggr, capabilityRequest, messageID, capInfo, + workflowDonInfo, dispatcher) + require.NoError(t, err) + + capabilityResponse2 := commoncap.CapabilityResponse{ + Value: values.NewString("response2"), + Err: nil, + } + + rawResponse2, err := pb.MarshalCapabilityResponse(capabilityResponse2) + require.NoError(t, err) + + err = request.AddResponse(capabilityPeers[0], rawResponse) + require.NoError(t, err) + err = request.AddResponse(capabilityPeers[1], rawResponse2) + require.NoError(t, err) + + select { + case <-request.ResponseChan(): + t.Fatal("expected no response") + default: + } + }) + + t.Run("Send second message from non calling Don peer", func(t *testing.T) { + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + messageID, err := target.GetMessageIDForRequest(capabilityRequest) + require.NoError(t, err) + + dispatcher := &callerRequestTestDispatcher{msgs: make(chan *types.MessageBody, 100)} + request, err := target.NewCallerRequest(ctx, lggr, capabilityRequest, messageID, capInfo, + workflowDonInfo, dispatcher) + require.NoError(t, err) + + err = request.AddResponse(capabilityPeers[0], rawResponse) + require.NoError(t, err) + err = request.AddResponse(newP2PPeerID(t), rawResponse) + require.NotNil(t, err) + }) + + t.Run("Send second message from same peer as first message", func(t *testing.T) { + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + messageID, err := target.GetMessageIDForRequest(capabilityRequest) + require.NoError(t, err) + + dispatcher := &callerRequestTestDispatcher{msgs: make(chan *types.MessageBody, 100)} + request, err := target.NewCallerRequest(ctx, lggr, capabilityRequest, messageID, capInfo, + workflowDonInfo, dispatcher) + require.NoError(t, err) + + err = request.AddResponse(capabilityPeers[0], rawResponse) + require.NoError(t, err) + err = request.AddResponse(capabilityPeers[0], rawResponse) + require.NotNil(t, err) + }) + + t.Run("Send second valid message", func(t *testing.T) { + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + messageID, err := target.GetMessageIDForRequest(capabilityRequest) + require.NoError(t, err) + + dispatcher := &callerRequestTestDispatcher{msgs: make(chan *types.MessageBody, 100)} + request, err := target.NewCallerRequest(ctx, lggr, capabilityRequest, messageID, capInfo, + workflowDonInfo, dispatcher) + require.NoError(t, err) + + <-dispatcher.msgs + <-dispatcher.msgs + assert.Equal(t, 0, len(dispatcher.msgs)) + + err = request.AddResponse(capabilityPeers[0], rawResponse) + require.NoError(t, err) + err = request.AddResponse(capabilityPeers[1], rawResponse) + require.NoError(t, err) + + response := <-request.ResponseChan() + + assert.Equal(t, response.Value, values.NewString("response1")) + }) +} + +type callerRequestTestDispatcher struct { + msgs chan *types.MessageBody +} + +func (t *callerRequestTestDispatcher) SetReceiver(capabilityId string, donId string, receiver types.Receiver) error { + return nil +} + +func (t *callerRequestTestDispatcher) RemoveReceiver(capabilityId string, donId string) {} + +func (t *callerRequestTestDispatcher) Send(peerID p2ptypes.PeerID, msgBody *types.MessageBody) error { + t.msgs <- msgBody + return nil +} diff --git a/core/capabilities/remote/target/endtoend_test.go b/core/capabilities/remote/target/endtoend_test.go index bf6d4cd77af..696adffb347 100644 --- a/core/capabilities/remote/target/endtoend_test.go +++ b/core/capabilities/remote/target/endtoend_test.go @@ -86,42 +86,15 @@ func Test_RemoteTargetCapability_DonTopologies(t *testing.T) { testRemoteTarget(t, capability, 4, 3, timeOut, 4, 3, timeOut, transmissionSchedule, responseTest) testRemoteTarget(t, capability, 10, 3, timeOut, 10, 3, timeOut, transmissionSchedule, responseTest) testRemoteTarget(t, capability, 10, 9, timeOut, 10, 9, timeOut, transmissionSchedule, responseTest) - - /* - transmissionSchedule, err = values.NewMap(map[string]any{ - "schedule": transmission.Schedule_OneAtATime, - "deltaStage": "10ms", - }) - require.NoError(t, err) - - testRemoteTarget(t, 1, 0, 10*time.Minute, 1, 0, 10*time.Minute, transmissionSchedule, responseTest) - testRemoteTarget(t, 10, 3, 10*time.Minute, 10, 3, 10*time.Minute, transmissionSchedule, responseTest) - */ - //here - below tests plus additional tests for the remoteTargetCapability test - - //then got threading to do - - // Context cancellation test - use an underlying capability that blocks until the context is cancelled - - // Check request errors as expected and all error responses are received - - // Check that requests from an incorrect don are ignored? - - // Check that multiple requests from the same sender are ignored - } func Test_RemoteTargetCapability_CapabilityError(t *testing.T) { responseTest := func(t *testing.T, responseCh <-chan commoncap.CapabilityResponse, responseError error) { require.NoError(t, responseError) response := <-responseCh - responseValue, err := response.Value.Unwrap() - require.NoError(t, err) - assert.Equal(t, "aValue1", responseValue.(string)) + assert.NotNil(t, response.Err) } - timeOut := 10 * time.Minute - capability := &testErrorCapability{} transmissionSchedule, err := values.NewMap(map[string]any{ @@ -130,8 +103,7 @@ func Test_RemoteTargetCapability_CapabilityError(t *testing.T) { }) require.NoError(t, err) - testRemoteTarget(t, capability, 10, 9, timeOut, 10, 9, timeOut, transmissionSchedule, responseTest) - + testRemoteTarget(t, capability, 10, 9, 10*time.Millisecond, 10, 9, 10*time.Minute, transmissionSchedule, responseTest) } func testRemoteTarget(t *testing.T, underlying commoncap.TargetCapability, numWorkflowPeers int, workflowDonF uint8, workflowNodeTimeout time.Duration, diff --git a/core/capabilities/remote/target/receiver_test.go b/core/capabilities/remote/target/receiver_test.go index 6f90e1479df..d0bae58b35e 100644 --- a/core/capabilities/remote/target/receiver_test.go +++ b/core/capabilities/remote/target/receiver_test.go @@ -17,7 +17,7 @@ import ( p2ptypes "github.com/smartcontractkit/chainlink/v2/core/services/p2p/types" ) -func Test_Receiver_InsufficientWorkflowCallers(t *testing.T) { +func Test_Receiver_InsufficientCallers(t *testing.T) { ctx, cancel := context.WithCancel(testutils.Context(t)) defer cancel() From e1427a0d2a019acdc05a0e64a6f2cda47960b89f Mon Sep 17 00:00:00 2001 From: Matthew Pendrey Date: Tue, 28 May 2024 16:08:42 +0100 Subject: [PATCH 35/43] wip --- .../capabilities/remote/target/caller_test.go | 56 +++++++------------ 1 file changed, 20 insertions(+), 36 deletions(-) diff --git a/core/capabilities/remote/target/caller_test.go b/core/capabilities/remote/target/caller_test.go index 54fbe3c0f68..cfe0d953877 100644 --- a/core/capabilities/remote/target/caller_test.go +++ b/core/capabilities/remote/target/caller_test.go @@ -20,7 +20,9 @@ import ( p2ptypes "github.com/smartcontractkit/chainlink/v2/core/services/p2p/types" ) -func Test_RemoteTargetCaller_DonTopologies(t *testing.T) { +func Test_Caller_DonTopologies(t *testing.T) { + ctx, cancel := context.WithCancel(testutils.Context(t)) + defer cancel() transmissionSchedule, err := values.NewMap(map[string]any{ "schedule": transmission.Schedule_OneAtATime, @@ -40,24 +42,26 @@ func Test_RemoteTargetCaller_DonTopologies(t *testing.T) { responseTimeOut := 10 * time.Minute - testRemoteTargetCaller(t, 1, responseTimeOut, 1, 0, + testCaller(t, ctx, 1, responseTimeOut, 1, 0, capability, transmissionSchedule, responseTest) - testRemoteTargetCaller(t, 10, responseTimeOut, 1, 0, + testCaller(t, ctx, 10, responseTimeOut, 1, 0, capability, transmissionSchedule, responseTest) - testRemoteTargetCaller(t, 1, responseTimeOut, 10, 3, + testCaller(t, ctx, 1, responseTimeOut, 10, 3, capability, transmissionSchedule, responseTest) - testRemoteTargetCaller(t, 10, responseTimeOut, 10, 3, + testCaller(t, ctx, 10, responseTimeOut, 10, 3, capability, transmissionSchedule, responseTest) - testRemoteTargetCaller(t, 10, responseTimeOut, 10, 9, + testCaller(t, ctx, 10, responseTimeOut, 10, 9, capability, transmissionSchedule, responseTest) } -func Test_RemoteTargetCaller_TransmissionSchedules(t *testing.T) { +func Test_Caller_TransmissionSchedules(t *testing.T) { + ctx, cancel := context.WithCancel(testutils.Context(t)) + defer cancel() responseTest := func(t *testing.T, responseCh <-chan commoncap.CapabilityResponse, responseError error) { require.NoError(t, responseError) @@ -77,9 +81,9 @@ func Test_RemoteTargetCaller_TransmissionSchedules(t *testing.T) { }) require.NoError(t, err) - testRemoteTargetCaller(t, 1, responseTimeOut, 1, 0, + testCaller(t, ctx, 1, responseTimeOut, 1, 0, capability, transmissionSchedule, responseTest) - testRemoteTargetCaller(t, 10, responseTimeOut, 10, 3, + testCaller(t, ctx, 10, responseTimeOut, 10, 3, capability, transmissionSchedule, responseTest) transmissionSchedule, err = values.NewMap(map[string]any{ @@ -88,14 +92,16 @@ func Test_RemoteTargetCaller_TransmissionSchedules(t *testing.T) { }) require.NoError(t, err) - testRemoteTargetCaller(t, 1, responseTimeOut, 1, 0, + testCaller(t, ctx, 1, responseTimeOut, 1, 0, capability, transmissionSchedule, responseTest) - testRemoteTargetCaller(t, 10, responseTimeOut, 10, 3, + testCaller(t, ctx, 10, responseTimeOut, 10, 3, capability, transmissionSchedule, responseTest) } -func Test_RemoteTargetCaller_TimesOutIfRespondingCapabilityPeersLessThenFPlusOne(t *testing.T) { +func Test_Caller_TimesOutIfRespondingCapabilityPeersLessThenFPlusOne(t *testing.T) { + ctx, cancel := context.WithCancel(testutils.Context(t)) + defer cancel() responseTest := func(t *testing.T, responseCh <-chan commoncap.CapabilityResponse, responseError error) { require.NoError(t, responseError) @@ -112,38 +118,16 @@ func Test_RemoteTargetCaller_TimesOutIfRespondingCapabilityPeersLessThenFPlusOne require.NoError(t, err) // number of capability peers is less than F + 1 - testRemoteTargetCaller(t, 10, 1*time.Second, 10, 11, - capability, transmissionSchedule, responseTest) -} - -func Test_RemoteTargetCaller_TimesOutIfTransmissionScheduleExceedsTimeout(t *testing.T) { - - responseTest := func(t *testing.T, responseCh <-chan commoncap.CapabilityResponse, responseError error) { - require.NoError(t, responseError) - response := <-responseCh - assert.NotNil(t, response.Err) - } - - capability := &testCapability{} - - transmissionSchedule, err := values.NewMap(map[string]any{ - "schedule": transmission.Schedule_OneAtATime, - "deltaStage": "1000ms", - }) - require.NoError(t, err) - - testRemoteTargetCaller(t, 10, 1*time.Second, 10, 7, + testCaller(t, ctx, 10, 1*time.Second, 10, 11, capability, transmissionSchedule, responseTest) } -func testRemoteTargetCaller(t *testing.T, numWorkflowPeers int, workflowNodeResponseTimeout time.Duration, +func testCaller(t *testing.T, ctx context.Context, numWorkflowPeers int, workflowNodeResponseTimeout time.Duration, numCapabilityPeers int, capabilityDonF uint8, underlying commoncap.TargetCapability, transmissionSchedule *values.Map, responseTest func(t *testing.T, responseCh <-chan commoncap.CapabilityResponse, responseError error)) { lggr := logger.TestLogger(t) - ctx, cancel := context.WithCancel(testutils.Context(t)) - defer cancel() capabilityPeers := make([]p2ptypes.PeerID, numCapabilityPeers) for i := 0; i < numCapabilityPeers; i++ { From a0fc1a71adc953f9bc862a67e385b0ec9d2e59c0 Mon Sep 17 00:00:00 2001 From: Matthew Pendrey Date: Tue, 28 May 2024 17:54:33 +0100 Subject: [PATCH 36/43] error case tests --- core/capabilities/remote/target/caller.go | 8 ++--- .../remote/target/caller_request.go | 18 ++++------ .../remote/target/endtoend_test.go | 35 +++++++++++-------- core/capabilities/remote/target/receiver.go | 6 ++-- 4 files changed, 33 insertions(+), 34 deletions(-) diff --git a/core/capabilities/remote/target/caller.go b/core/capabilities/remote/target/caller.go index ebe6cfcaf74..f98a9aa1770 100644 --- a/core/capabilities/remote/target/caller.go +++ b/core/capabilities/remote/target/caller.go @@ -42,13 +42,13 @@ func NewRemoteTargetCaller(ctx context.Context, lggr logger.Logger, remoteCapabi } go func() { - timer := time.NewTimer(requestTimeout) - defer timer.Stop() + ticker := time.NewTicker(requestTimeout) + defer ticker.Stop() for { select { case <-ctx.Done(): return - case <-timer.C: + case <-ticker.C: caller.ExpireRequests() } } @@ -64,9 +64,9 @@ func (c *remoteTargetCaller) ExpireRequests() { for messageID, req := range c.messageIDToExecuteRequest { if time.Since(req.createdAt) > c.requestTimeout { req.cancelRequest("request timed out") + delete(c.messageIDToExecuteRequest, messageID) } - delete(c.messageIDToExecuteRequest, messageID) } } diff --git a/core/capabilities/remote/target/caller_request.go b/core/capabilities/remote/target/caller_request.go index bc74bd64de9..c1c57b67fad 100644 --- a/core/capabilities/remote/target/caller_request.go +++ b/core/capabilities/remote/target/caller_request.go @@ -18,12 +18,10 @@ import ( ) type callerRequest struct { - transmissionCtx context.Context - responseCh chan commoncap.CapabilityResponse - transmissionCancelFn context.CancelFunc - createdAt time.Time - responseIDCount map[[32]byte]int - responseReceived map[p2ptypes.PeerID]bool + responseCh chan commoncap.CapabilityResponse + createdAt time.Time + responseIDCount map[[32]byte]int + responseReceived map[p2ptypes.PeerID]bool requiredIdenticalResponses int @@ -54,7 +52,6 @@ func NewCallerRequest(ctx context.Context, lggr logger.Logger, req commoncap.Cap return nil, fmt.Errorf("failed to get peer ID to transmission delay: %w", err) } - transmissionCtx, transmissionCancelFn := context.WithCancel(ctx) responseReceived := make(map[p2ptypes.PeerID]bool) for peerID, delay := range peerIDToTransmissionDelay { responseReceived[peerID] = false @@ -69,10 +66,10 @@ func NewCallerRequest(ctx context.Context, lggr logger.Logger, req commoncap.Cap } select { - case <-transmissionCtx.Done(): + case <-ctx.Done(): return case <-time.After(delay): - err = dispatcher.Send(peerID, message) + err := dispatcher.Send(peerID, message) if err != nil { lggr.Errorw("failed to send message", "peerID", peerID, "err", err) } @@ -82,7 +79,6 @@ func NewCallerRequest(ctx context.Context, lggr logger.Logger, req commoncap.Cap return &callerRequest{ createdAt: time.Now(), - transmissionCancelFn: transmissionCancelFn, requiredIdenticalResponses: int(remoteCapabilityDonInfo.F + 1), responseIDCount: make(map[[32]byte]int), responseReceived: responseReceived, @@ -124,12 +120,10 @@ func (c *callerRequest) AddResponse(sender p2ptypes.PeerID, response []byte) err func (c *callerRequest) sendResponse(response commoncap.CapabilityResponse) { c.responseCh <- response close(c.responseCh) - c.transmissionCancelFn() c.respSent = true } func (c *callerRequest) cancelRequest(reason string) { - c.transmissionCancelFn() if !c.respSent { c.sendResponse(commoncap.CapabilityResponse{Err: errors.New(reason)}) } diff --git a/core/capabilities/remote/target/endtoend_test.go b/core/capabilities/remote/target/endtoend_test.go index 696adffb347..e6027778875 100644 --- a/core/capabilities/remote/target/endtoend_test.go +++ b/core/capabilities/remote/target/endtoend_test.go @@ -23,6 +23,8 @@ import ( ) func Test_RemoteTargetCapability_TransmissionSchedules(t *testing.T) { + ctx, cancel := context.WithCancel(testutils.Context(t)) + defer cancel() responseTest := func(t *testing.T, responseCh <-chan commoncap.CapabilityResponse, responseError error) { require.NoError(t, responseError) @@ -42,7 +44,7 @@ func Test_RemoteTargetCapability_TransmissionSchedules(t *testing.T) { capability := &testCapability{} - testRemoteTarget(t, capability, 10, 9, timeOut, 10, 9, timeOut, transmissionSchedule, responseTest) + testRemoteTarget(t, ctx, capability, 10, 9, timeOut, 10, 9, timeOut, transmissionSchedule, responseTest) transmissionSchedule, err = values.NewMap(map[string]any{ "schedule": transmission.Schedule_AllAtOnce, @@ -50,11 +52,13 @@ func Test_RemoteTargetCapability_TransmissionSchedules(t *testing.T) { }) require.NoError(t, err) - testRemoteTarget(t, capability, 10, 9, timeOut, 10, 9, timeOut, transmissionSchedule, responseTest) + testRemoteTarget(t, ctx, capability, 10, 9, timeOut, 10, 9, timeOut, transmissionSchedule, responseTest) } func Test_RemoteTargetCapability_DonTopologies(t *testing.T) { + ctx, cancel := context.WithCancel(testutils.Context(t)) + defer cancel() responseTest := func(t *testing.T, responseCh <-chan commoncap.CapabilityResponse, responseError error) { require.NoError(t, responseError) @@ -75,20 +79,23 @@ func Test_RemoteTargetCapability_DonTopologies(t *testing.T) { capability := &testCapability{} // Test scenarios where the number of submissions is greater than or equal to F + 1 - testRemoteTarget(t, capability, 1, 0, timeOut, 1, 0, timeOut, transmissionSchedule, responseTest) - testRemoteTarget(t, capability, 4, 3, timeOut, 1, 0, timeOut, transmissionSchedule, responseTest) - testRemoteTarget(t, capability, 10, 3, timeOut, 1, 0, timeOut, transmissionSchedule, responseTest) + testRemoteTarget(t, ctx, capability, 1, 0, timeOut, 1, 0, timeOut, transmissionSchedule, responseTest) + testRemoteTarget(t, ctx, capability, 4, 3, timeOut, 1, 0, timeOut, transmissionSchedule, responseTest) + testRemoteTarget(t, ctx, capability, 10, 3, timeOut, 1, 0, timeOut, transmissionSchedule, responseTest) - testRemoteTarget(t, capability, 1, 0, timeOut, 1, 0, timeOut, transmissionSchedule, responseTest) - testRemoteTarget(t, capability, 1, 0, timeOut, 4, 3, timeOut, transmissionSchedule, responseTest) - testRemoteTarget(t, capability, 1, 0, timeOut, 10, 3, timeOut, transmissionSchedule, responseTest) + testRemoteTarget(t, ctx, capability, 1, 0, timeOut, 1, 0, timeOut, transmissionSchedule, responseTest) + testRemoteTarget(t, ctx, capability, 1, 0, timeOut, 4, 3, timeOut, transmissionSchedule, responseTest) + testRemoteTarget(t, ctx, capability, 1, 0, timeOut, 10, 3, timeOut, transmissionSchedule, responseTest) - testRemoteTarget(t, capability, 4, 3, timeOut, 4, 3, timeOut, transmissionSchedule, responseTest) - testRemoteTarget(t, capability, 10, 3, timeOut, 10, 3, timeOut, transmissionSchedule, responseTest) - testRemoteTarget(t, capability, 10, 9, timeOut, 10, 9, timeOut, transmissionSchedule, responseTest) + testRemoteTarget(t, ctx, capability, 4, 3, timeOut, 4, 3, timeOut, transmissionSchedule, responseTest) + testRemoteTarget(t, ctx, capability, 10, 3, timeOut, 10, 3, timeOut, transmissionSchedule, responseTest) + testRemoteTarget(t, ctx, capability, 10, 9, timeOut, 10, 9, timeOut, transmissionSchedule, responseTest) } func Test_RemoteTargetCapability_CapabilityError(t *testing.T) { + ctx, cancel := context.WithCancel(testutils.Context(t)) + defer cancel() + responseTest := func(t *testing.T, responseCh <-chan commoncap.CapabilityResponse, responseError error) { require.NoError(t, responseError) response := <-responseCh @@ -103,15 +110,13 @@ func Test_RemoteTargetCapability_CapabilityError(t *testing.T) { }) require.NoError(t, err) - testRemoteTarget(t, capability, 10, 9, 10*time.Millisecond, 10, 9, 10*time.Minute, transmissionSchedule, responseTest) + testRemoteTarget(t, ctx, capability, 10, 9, 10*time.Millisecond, 10, 9, 10*time.Minute, transmissionSchedule, responseTest) } -func testRemoteTarget(t *testing.T, underlying commoncap.TargetCapability, numWorkflowPeers int, workflowDonF uint8, workflowNodeTimeout time.Duration, +func testRemoteTarget(t *testing.T, ctx context.Context, underlying commoncap.TargetCapability, numWorkflowPeers int, workflowDonF uint8, workflowNodeTimeout time.Duration, numCapabilityPeers int, capabilityDonF uint8, capabilityNodeResponseTimeout time.Duration, transmissionSchedule *values.Map, responseTest func(t *testing.T, responseCh <-chan commoncap.CapabilityResponse, responseError error)) { lggr := logger.TestLogger(t) - ctx, cancel := context.WithCancel(testutils.Context(t)) - defer cancel() capabilityPeers := make([]p2ptypes.PeerID, numCapabilityPeers) for i := 0; i < numCapabilityPeers; i++ { diff --git a/core/capabilities/remote/target/receiver.go b/core/capabilities/remote/target/receiver.go index 5e048d42f4e..a11d921d1c7 100644 --- a/core/capabilities/remote/target/receiver.go +++ b/core/capabilities/remote/target/receiver.go @@ -50,13 +50,13 @@ func NewRemoteTargetReceiver(ctx context.Context, lggr logger.Logger, peerID p2p } go func() { - timer := time.NewTimer(requestTimeout) - defer timer.Stop() + ticker := time.NewTicker(requestTimeout) + defer ticker.Stop() for { select { case <-ctx.Done(): return - case <-timer.C: + case <-ticker.C: receiver.ExpireRequests() } } From f80dcdc3d6cf616d87690522c7f610948c8b77d2 Mon Sep 17 00:00:00 2001 From: Matthew Pendrey Date: Tue, 28 May 2024 18:04:58 +0100 Subject: [PATCH 37/43] more tests --- .../capabilities/remote/target/caller_test.go | 2 +- .../remote/target/endtoend_test.go | 44 +++++++++++++++++++ .../remote/target/receiver_test.go | 26 +++++++++++ 3 files changed, 71 insertions(+), 1 deletion(-) diff --git a/core/capabilities/remote/target/caller_test.go b/core/capabilities/remote/target/caller_test.go index cfe0d953877..9b3a315586d 100644 --- a/core/capabilities/remote/target/caller_test.go +++ b/core/capabilities/remote/target/caller_test.go @@ -99,7 +99,7 @@ func Test_Caller_TransmissionSchedules(t *testing.T) { } -func Test_Caller_TimesOutIfRespondingCapabilityPeersLessThenFPlusOne(t *testing.T) { +func Test_Caller_TimesOutIfInsufficientCapabilityPeerResponses(t *testing.T) { ctx, cancel := context.WithCancel(testutils.Context(t)) defer cancel() diff --git a/core/capabilities/remote/target/endtoend_test.go b/core/capabilities/remote/target/endtoend_test.go index e6027778875..285ceeda364 100644 --- a/core/capabilities/remote/target/endtoend_test.go +++ b/core/capabilities/remote/target/endtoend_test.go @@ -22,6 +22,50 @@ import ( p2ptypes "github.com/smartcontractkit/chainlink/v2/core/services/p2p/types" ) +func Test_RemoteTargetCapability_InsufficientCapabilityResponses(t *testing.T) { + ctx, cancel := context.WithCancel(testutils.Context(t)) + defer cancel() + + responseTest := func(t *testing.T, responseCh <-chan commoncap.CapabilityResponse, responseError error) { + require.NoError(t, responseError) + response := <-responseCh + assert.NotNil(t, response.Err) + } + + capability := &testCapability{} + + transmissionSchedule, err := values.NewMap(map[string]any{ + "schedule": transmission.Schedule_AllAtOnce, + "deltaStage": "10ms", + }) + require.NoError(t, err) + + testRemoteTarget(t, ctx, capability, 10, 9, 10*time.Millisecond, 10, 10, 10*time.Minute, transmissionSchedule, responseTest) +} + +func Test_RemoteTargetCapability_InsufficientWorkflowRequests(t *testing.T) { + ctx, cancel := context.WithCancel(testutils.Context(t)) + defer cancel() + + responseTest := func(t *testing.T, responseCh <-chan commoncap.CapabilityResponse, responseError error) { + require.NoError(t, responseError) + response := <-responseCh + assert.NotNil(t, response.Err) + } + + timeOut := 10 * time.Minute + + capability := &testCapability{} + + transmissionSchedule, err := values.NewMap(map[string]any{ + "schedule": transmission.Schedule_AllAtOnce, + "deltaStage": "10ms", + }) + require.NoError(t, err) + + testRemoteTarget(t, ctx, capability, 10, 10, 10*time.Millisecond, 10, 9, timeOut, transmissionSchedule, responseTest) +} + func Test_RemoteTargetCapability_TransmissionSchedules(t *testing.T) { ctx, cancel := context.WithCancel(testutils.Context(t)) defer cancel() diff --git a/core/capabilities/remote/target/receiver_test.go b/core/capabilities/remote/target/receiver_test.go index d0bae58b35e..8a4befc60f9 100644 --- a/core/capabilities/remote/target/receiver_test.go +++ b/core/capabilities/remote/target/receiver_test.go @@ -17,6 +17,32 @@ import ( p2ptypes "github.com/smartcontractkit/chainlink/v2/core/services/p2p/types" ) +func Test_Receiver_RespondsAfterSufficientRequests(t *testing.T) { + ctx, cancel := context.WithCancel(testutils.Context(t)) + defer cancel() + + numCapabilityPeers := 4 + + callers := testRemoteTargetReceiver(t, ctx, &testCapability{}, 10, 9, numCapabilityPeers, 3, 10*time.Minute) + + for _, caller := range callers { + caller.Execute(context.Background(), + commoncap.CapabilityRequest{ + Metadata: commoncap.RequestMetadata{ + WorkflowID: "workflowID", + WorkflowExecutionID: "workflowExecutionID", + }, + }) + } + + for _, caller := range callers { + for i := 0; i < numCapabilityPeers; i++ { + msg := <-caller.receivedMessages + assert.Equal(t, remotetypes.Error_OK, msg.Error) + } + } +} + func Test_Receiver_InsufficientCallers(t *testing.T) { ctx, cancel := context.WithCancel(testutils.Context(t)) defer cancel() From 8fc297232d7cdd90820c7bd3db290a649481c68d Mon Sep 17 00:00:00 2001 From: Matthew Pendrey Date: Tue, 28 May 2024 18:17:16 +0100 Subject: [PATCH 38/43] wip --- core/capabilities/remote/target/receiver.go | 31 +++++++++---------- .../target/{ => request}/receiver_request.go | 26 +++++++++++++++- .../{ => request}/receiver_request_test.go | 21 +++++++------ 3 files changed, 50 insertions(+), 28 deletions(-) rename core/capabilities/remote/target/{ => request}/receiver_request.go (91%) rename core/capabilities/remote/target/{ => request}/receiver_request_test.go (89%) diff --git a/core/capabilities/remote/target/receiver.go b/core/capabilities/remote/target/receiver.go index a11d921d1c7..3258e748c48 100644 --- a/core/capabilities/remote/target/receiver.go +++ b/core/capabilities/remote/target/receiver.go @@ -8,6 +8,7 @@ import ( "time" commoncap "github.com/smartcontractkit/chainlink-common/pkg/capabilities" + "github.com/smartcontractkit/chainlink/v2/core/capabilities/remote/target/request" "github.com/smartcontractkit/chainlink/v2/core/capabilities/remote/types" p2ptypes "github.com/smartcontractkit/chainlink/v2/core/services/p2p/types" @@ -15,6 +16,11 @@ import ( "github.com/smartcontractkit/chainlink/v2/core/logger" ) +type receiverRequest interface { + Receive(ctx context.Context, msg *types.MessageBody) error + Expired() bool +} + type remoteTargetReceiver struct { lggr logger.Logger peerID p2ptypes.PeerID @@ -24,7 +30,7 @@ type remoteTargetReceiver struct { workflowDONs map[string]commoncap.DON dispatcher types.Dispatcher - requestIDToRequest map[string]*receiverRequest + requestIDToRequest map[string]receiverRequest requestTimeout time.Duration receiveLock sync.Mutex @@ -43,7 +49,7 @@ func NewRemoteTargetReceiver(ctx context.Context, lggr logger.Logger, peerID p2p workflowDONs: workflowDONs, dispatcher: dispatcher, - requestIDToRequest: map[string]*receiverRequest{}, + requestIDToRequest: map[string]receiverRequest{}, requestTimeout: requestTimeout, lggr: lggr, @@ -69,31 +75,22 @@ func (r *remoteTargetReceiver) ExpireRequests() { r.receiveLock.Lock() defer r.receiveLock.Unlock() - for messageId, executeReq := range r.requestIDToRequest { - if time.Since(executeReq.createdTime) > r.requestTimeout { - - if !executeReq.hasResponse() { - executeReq.setError(types.Error_TIMEOUT) - if err := executeReq.sendResponses(); err != nil { - r.lggr.Errorw("failed to send timeout response to all requesters", "capabilityId", r.capInfo.ID, "err", err) - } - } - - delete(r.requestIDToRequest, messageId) + for requestID, executeReq := range r.requestIDToRequest { + if executeReq.Expired() { + delete(r.requestIDToRequest, requestID) } - } } func (r *remoteTargetReceiver) Receive(msg *types.MessageBody) { + r.receiveLock.Lock() + defer r.receiveLock.Unlock() // TODO should the dispatcher be passing in a context? ctx := context.Background() // TODO Confirm threading semantics of dispatcher Receive // TODO May want to have executor per message id to improve liveness - r.receiveLock.Lock() - defer r.receiveLock.Unlock() // TODO multithread this @@ -109,7 +106,7 @@ func (r *remoteTargetReceiver) Receive(msg *types.MessageBody) { if _, ok := r.requestIDToRequest[requestID]; !ok { if callingDon, ok := r.workflowDONs[msg.CallerDonId]; ok { - r.requestIDToRequest[requestID] = NewReceiverRequest(r.lggr, r.underlying, r.capInfo.ID, r.localDonInfo.ID, r.peerID, + r.requestIDToRequest[requestID] = request.NewReceiverRequest(r.lggr, r.underlying, r.capInfo.ID, r.localDonInfo.ID, r.peerID, callingDon, messageId, r.dispatcher, r.requestTimeout) } else { r.lggr.Errorw("received request from unregistered workflow don", "donId", msg.CallerDonId) diff --git a/core/capabilities/remote/target/receiver_request.go b/core/capabilities/remote/target/request/receiver_request.go similarity index 91% rename from core/capabilities/remote/target/receiver_request.go rename to core/capabilities/remote/target/request/receiver_request.go index 1029a203e0d..1a0038a2511 100644 --- a/core/capabilities/remote/target/receiver_request.go +++ b/core/capabilities/remote/target/request/receiver_request.go @@ -1,8 +1,9 @@ -package target +package request import ( "context" "fmt" + "sync" "time" commoncap "github.com/smartcontractkit/chainlink-common/pkg/capabilities" @@ -38,6 +39,8 @@ type receiverRequest struct { requestMessageID string requestTimeout time.Duration + + mux sync.Mutex } func NewReceiverRequest(lggr logger.Logger, capability capabilities.TargetCapability, capabilityID string, capabilityDonID string, capabilityPeerId p2ptypes.PeerID, @@ -60,6 +63,9 @@ func NewReceiverRequest(lggr logger.Logger, capability capabilities.TargetCapabi } func (e *receiverRequest) Receive(ctx context.Context, msg *types.MessageBody) error { + e.mux.Lock() + defer e.mux.Unlock() + requester := remote.ToPeerID(msg.Sender) if err := e.addRequester(requester); err != nil { return fmt.Errorf("failed to add requester to request: %w", err) @@ -79,6 +85,24 @@ func (e *receiverRequest) Receive(ctx context.Context, msg *types.MessageBody) e return nil } +func (e *receiverRequest) Expired() bool { + e.mux.Lock() + defer e.mux.Unlock() + + if time.Since(e.createdTime) > e.requestTimeout { + if !e.hasResponse() { + e.setError(types.Error_TIMEOUT) + if err := e.sendResponses(); err != nil { + e.lggr.Errorw("failed to send timeout response to all requesters", "capabilityId", e.capabilityID, "err", err) + } + } + + return true + } + + return false +} + func (e *receiverRequest) executeRequest(ctx context.Context, payload []byte) error { ctxWithTimeout, cancel := context.WithTimeout(ctx, e.requestTimeout) defer cancel() diff --git a/core/capabilities/remote/target/receiver_request_test.go b/core/capabilities/remote/target/request/receiver_request_test.go similarity index 89% rename from core/capabilities/remote/target/receiver_request_test.go rename to core/capabilities/remote/target/request/receiver_request_test.go index 1a8d45c10ce..32bf9315b49 100644 --- a/core/capabilities/remote/target/receiver_request_test.go +++ b/core/capabilities/remote/target/request/receiver_request_test.go @@ -1,4 +1,4 @@ -package target_test +package request_test import ( "context" @@ -12,6 +12,7 @@ import ( "github.com/smartcontractkit/chainlink-common/pkg/capabilities/pb" "github.com/smartcontractkit/chainlink-common/pkg/values" "github.com/smartcontractkit/chainlink/v2/core/capabilities/remote/target" + "github.com/smartcontractkit/chainlink/v2/core/capabilities/remote/target/request" "github.com/smartcontractkit/chainlink/v2/core/capabilities/remote/types" "github.com/smartcontractkit/chainlink/v2/core/logger" p2ptypes "github.com/smartcontractkit/chainlink/v2/core/services/p2p/types" @@ -19,13 +20,13 @@ import ( func Test_ReceiverRequest_MessageValidation(t *testing.T) { lggr := logger.TestLogger(t) - capability := testCapability{} - capabilityPeerID := newP2PPeerID(t) + capability := target.testCapability{} + capabilityPeerID := target.newP2PPeerID(t) numWorkflowPeers := 2 workflowPeers := make([]p2ptypes.PeerID, numWorkflowPeers) for i := 0; i < numWorkflowPeers; i++ { - workflowPeers[i] = newP2PPeerID(t) + workflowPeers[i] = target.newP2PPeerID(t) } callingDon := commoncap.DON{ @@ -55,7 +56,7 @@ func Test_ReceiverRequest_MessageValidation(t *testing.T) { require.NoError(t, err) t.Run("Send duplicate message", func(t *testing.T) { - request := target.NewReceiverRequest(lggr, capability, "capabilityID", "capabilityDonID", + request := request.NewReceiverRequest(lggr, capability, "capabilityID", "capabilityDonID", capabilityPeerID, callingDon, "requestMessageID", dispatcher, 10*time.Minute) err := sendValidRequest(request, workflowPeers, capabilityPeerID, rawRequest) @@ -65,13 +66,13 @@ func Test_ReceiverRequest_MessageValidation(t *testing.T) { }) t.Run("Send message with non calling don peer", func(t *testing.T) { - request := target.NewReceiverRequest(lggr, capability, "capabilityID", "capabilityDonID", + request := request.NewReceiverRequest(lggr, capability, "capabilityID", "capabilityDonID", capabilityPeerID, callingDon, "requestMessageID", dispatcher, 10*time.Minute) err := sendValidRequest(request, workflowPeers, capabilityPeerID, rawRequest) require.NoError(t, err) - nonDonPeer := newP2PPeerID(t) + nonDonPeer := target.newP2PPeerID(t) err = request.Receive(context.Background(), &types.MessageBody{ Version: 0, Sender: nonDonPeer[:], @@ -88,7 +89,7 @@ func Test_ReceiverRequest_MessageValidation(t *testing.T) { }) t.Run("Send message invalid payload", func(t *testing.T) { - request := target.NewReceiverRequest(lggr, capability, "capabilityID", "capabilityDonID", + request := request.NewReceiverRequest(lggr, capability, "capabilityID", "capabilityDonID", capabilityPeerID, callingDon, "requestMessageID", dispatcher, 10*time.Minute) err := sendValidRequest(request, workflowPeers, capabilityPeerID, rawRequest) @@ -115,7 +116,7 @@ func Test_ReceiverRequest_MessageValidation(t *testing.T) { t.Run("Send second valid request when capability errors", func(t *testing.T) { dispatcher := &testDispatcher{} - request := target.NewReceiverRequest(lggr, testErrorCapability{}, "capabilityID", "capabilityDonID", + request := request.NewReceiverRequest(lggr, target.testErrorCapability{}, "capabilityID", "capabilityDonID", capabilityPeerID, callingDon, "requestMessageID", dispatcher, 10*time.Minute) err := sendValidRequest(request, workflowPeers, capabilityPeerID, rawRequest) @@ -141,7 +142,7 @@ func Test_ReceiverRequest_MessageValidation(t *testing.T) { t.Run("Send second valid request", func(t *testing.T) { dispatcher := &testDispatcher{} - request := target.NewReceiverRequest(lggr, capability, "capabilityID", "capabilityDonID", + request := request.NewReceiverRequest(lggr, capability, "capabilityID", "capabilityDonID", capabilityPeerID, callingDon, "requestMessageID", dispatcher, 10*time.Minute) err := sendValidRequest(request, workflowPeers, capabilityPeerID, rawRequest) From eba06697634c87fe77aedb5b377de54ed11a58c7 Mon Sep 17 00:00:00 2001 From: Matthew Pendrey Date: Tue, 28 May 2024 18:41:21 +0100 Subject: [PATCH 39/43] wip --- core/capabilities/remote/target/caller.go | 43 ++++++----- .../capabilities/remote/target/caller_test.go | 10 +-- .../remote/target/endtoend_test.go | 34 +++++---- core/capabilities/remote/target/receiver.go | 18 ++--- .../remote/target/receiver_test.go | 17 ++--- .../target/{ => request}/caller_request.go | 17 ++++- .../{ => request}/caller_request_test.go | 26 ++++--- .../target/request/receiver_request_test.go | 75 +++++++++++++++++-- 8 files changed, 162 insertions(+), 78 deletions(-) rename core/capabilities/remote/target/{ => request}/caller_request.go (92%) rename core/capabilities/remote/target/{ => request}/caller_request_test.go (86%) diff --git a/core/capabilities/remote/target/caller.go b/core/capabilities/remote/target/caller.go index f98a9aa1770..a4e2bc2f1c6 100644 --- a/core/capabilities/remote/target/caller.go +++ b/core/capabilities/remote/target/caller.go @@ -10,35 +10,43 @@ import ( "github.com/smartcontractkit/chainlink-common/pkg/capabilities" commoncap "github.com/smartcontractkit/chainlink-common/pkg/capabilities" "github.com/smartcontractkit/chainlink/v2/core/capabilities/remote" + "github.com/smartcontractkit/chainlink/v2/core/capabilities/remote/target/request" "github.com/smartcontractkit/chainlink/v2/core/capabilities/remote/types" "github.com/smartcontractkit/chainlink/v2/core/logger" + p2ptypes "github.com/smartcontractkit/chainlink/v2/core/services/p2p/types" ) -// remoteTargetCaller/Receiver are shims translating between capability API calls and network messages -type remoteTargetCaller struct { +type callerRequest interface { + AddResponse(sender p2ptypes.PeerID, response []byte) error + ResponseChan() <-chan commoncap.CapabilityResponse + Expired() bool +} + +// caller/Receiver are shims translating between capability API calls and network messages +type caller struct { lggr logger.Logger remoteCapabilityInfo commoncap.CapabilityInfo localDONInfo capabilities.DON dispatcher types.Dispatcher requestTimeout time.Duration - messageIDToExecuteRequest map[string]*callerRequest + messageIDToExecuteRequest map[string]callerRequest mutex sync.Mutex } -var _ commoncap.TargetCapability = &remoteTargetCaller{} -var _ types.Receiver = &remoteTargetCaller{} +var _ commoncap.TargetCapability = &caller{} +var _ types.Receiver = &caller{} func NewRemoteTargetCaller(ctx context.Context, lggr logger.Logger, remoteCapabilityInfo commoncap.CapabilityInfo, localDonInfo capabilities.DON, dispatcher types.Dispatcher, - requestTimeout time.Duration) *remoteTargetCaller { + requestTimeout time.Duration) *caller { - caller := &remoteTargetCaller{ + caller := &caller{ lggr: lggr, remoteCapabilityInfo: remoteCapabilityInfo, localDONInfo: localDonInfo, dispatcher: dispatcher, requestTimeout: requestTimeout, - messageIDToExecuteRequest: make(map[string]*callerRequest), + messageIDToExecuteRequest: make(map[string]callerRequest), } go func() { @@ -57,32 +65,30 @@ func NewRemoteTargetCaller(ctx context.Context, lggr logger.Logger, remoteCapabi return caller } -func (c *remoteTargetCaller) ExpireRequests() { +func (c *caller) ExpireRequests() { c.mutex.Lock() defer c.mutex.Unlock() for messageID, req := range c.messageIDToExecuteRequest { - if time.Since(req.createdAt) > c.requestTimeout { - req.cancelRequest("request timed out") + if req.Expired() { delete(c.messageIDToExecuteRequest, messageID) } - } } -func (c *remoteTargetCaller) Info(ctx context.Context) (commoncap.CapabilityInfo, error) { +func (c *caller) Info(ctx context.Context) (commoncap.CapabilityInfo, error) { return c.remoteCapabilityInfo, nil } -func (c *remoteTargetCaller) RegisterToWorkflow(ctx context.Context, request commoncap.RegisterToWorkflowRequest) error { +func (c *caller) RegisterToWorkflow(ctx context.Context, request commoncap.RegisterToWorkflowRequest) error { return errors.New("not implemented") } -func (c *remoteTargetCaller) UnregisterFromWorkflow(ctx context.Context, request commoncap.UnregisterFromWorkflowRequest) error { +func (c *caller) UnregisterFromWorkflow(ctx context.Context, request commoncap.UnregisterFromWorkflowRequest) error { return errors.New("not implemented") } -func (c *remoteTargetCaller) Execute(ctx context.Context, req commoncap.CapabilityRequest) (<-chan commoncap.CapabilityResponse, error) { +func (c *caller) Execute(ctx context.Context, req commoncap.CapabilityRequest) (<-chan commoncap.CapabilityResponse, error) { c.mutex.Lock() defer c.mutex.Unlock() @@ -95,14 +101,15 @@ func (c *remoteTargetCaller) Execute(ctx context.Context, req commoncap.Capabili return nil, fmt.Errorf("request for message ID %s already exists", messageID) } - execRequest, err := NewCallerRequest(ctx, c.lggr, req, messageID, c.remoteCapabilityInfo, c.localDONInfo, c.dispatcher) + execRequest, err := request.NewCallerRequest(ctx, c.lggr, req, messageID, c.remoteCapabilityInfo, c.localDONInfo, c.dispatcher, + c.requestTimeout) c.messageIDToExecuteRequest[messageID] = execRequest return execRequest.ResponseChan(), nil } -func (c *remoteTargetCaller) Receive(msg *types.MessageBody) { +func (c *caller) Receive(msg *types.MessageBody) { c.mutex.Lock() defer c.mutex.Unlock() diff --git a/core/capabilities/remote/target/caller_test.go b/core/capabilities/remote/target/caller_test.go index 9b3a315586d..e9880de0362 100644 --- a/core/capabilities/remote/target/caller_test.go +++ b/core/capabilities/remote/target/caller_test.go @@ -38,7 +38,7 @@ func Test_Caller_DonTopologies(t *testing.T) { assert.Equal(t, "aValue1", responseValue.(string)) } - capability := &testCapability{} + capability := &TestCapability{} responseTimeOut := 10 * time.Minute @@ -71,7 +71,7 @@ func Test_Caller_TransmissionSchedules(t *testing.T) { assert.Equal(t, "aValue1", responseValue.(string)) } - capability := &testCapability{} + capability := &TestCapability{} responseTimeOut := 10 * time.Minute @@ -109,7 +109,7 @@ func Test_Caller_TimesOutIfInsufficientCapabilityPeerResponses(t *testing.T) { assert.NotNil(t, response.Err) } - capability := &testCapability{} + capability := &TestCapability{} transmissionSchedule, err := values.NewMap(map[string]any{ "schedule": transmission.Schedule_AllAtOnce, @@ -131,7 +131,7 @@ func testCaller(t *testing.T, ctx context.Context, numWorkflowPeers int, workflo capabilityPeers := make([]p2ptypes.PeerID, numCapabilityPeers) for i := 0; i < numCapabilityPeers; i++ { - capabilityPeers[i] = newP2PPeerID(t) + capabilityPeers[i] = NewP2PPeerID(t) } capDonInfo := commoncap.DON{ @@ -150,7 +150,7 @@ func testCaller(t *testing.T, ctx context.Context, numWorkflowPeers int, workflo workflowPeers := make([]p2ptypes.PeerID, numWorkflowPeers) for i := 0; i < numWorkflowPeers; i++ { - workflowPeers[i] = newP2PPeerID(t) + workflowPeers[i] = NewP2PPeerID(t) } workflowDonInfo := commoncap.DON{ diff --git a/core/capabilities/remote/target/endtoend_test.go b/core/capabilities/remote/target/endtoend_test.go index 285ceeda364..33a1a91bb6e 100644 --- a/core/capabilities/remote/target/endtoend_test.go +++ b/core/capabilities/remote/target/endtoend_test.go @@ -3,12 +3,12 @@ package target_test import ( "context" "crypto/rand" + "errors" "sync" "testing" "time" "github.com/mr-tron/base58" - "github.com/pkg/errors" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" @@ -32,7 +32,7 @@ func Test_RemoteTargetCapability_InsufficientCapabilityResponses(t *testing.T) { assert.NotNil(t, response.Err) } - capability := &testCapability{} + capability := &TestCapability{} transmissionSchedule, err := values.NewMap(map[string]any{ "schedule": transmission.Schedule_AllAtOnce, @@ -55,7 +55,7 @@ func Test_RemoteTargetCapability_InsufficientWorkflowRequests(t *testing.T) { timeOut := 10 * time.Minute - capability := &testCapability{} + capability := &TestCapability{} transmissionSchedule, err := values.NewMap(map[string]any{ "schedule": transmission.Schedule_AllAtOnce, @@ -86,7 +86,7 @@ func Test_RemoteTargetCapability_TransmissionSchedules(t *testing.T) { timeOut := 10 * time.Minute - capability := &testCapability{} + capability := &TestCapability{} testRemoteTarget(t, ctx, capability, 10, 9, timeOut, 10, 9, timeOut, transmissionSchedule, responseTest) @@ -120,7 +120,7 @@ func Test_RemoteTargetCapability_DonTopologies(t *testing.T) { timeOut := 10 * time.Minute - capability := &testCapability{} + capability := &TestCapability{} // Test scenarios where the number of submissions is greater than or equal to F + 1 testRemoteTarget(t, ctx, capability, 1, 0, timeOut, 1, 0, timeOut, transmissionSchedule, responseTest) @@ -146,7 +146,7 @@ func Test_RemoteTargetCapability_CapabilityError(t *testing.T) { assert.NotNil(t, response.Err) } - capability := &testErrorCapability{} + capability := &TestErrorCapability{} transmissionSchedule, err := values.NewMap(map[string]any{ "schedule": transmission.Schedule_AllAtOnce, @@ -165,12 +165,12 @@ func testRemoteTarget(t *testing.T, ctx context.Context, underlying commoncap.Ta capabilityPeers := make([]p2ptypes.PeerID, numCapabilityPeers) for i := 0; i < numCapabilityPeers; i++ { capabilityPeerID := p2ptypes.PeerID{} - require.NoError(t, capabilityPeerID.UnmarshalText([]byte(newPeerID()))) + require.NoError(t, capabilityPeerID.UnmarshalText([]byte(NewPeerID()))) capabilityPeers[i] = capabilityPeerID } capabilityPeerID := p2ptypes.PeerID{} - require.NoError(t, capabilityPeerID.UnmarshalText([]byte(newPeerID()))) + require.NoError(t, capabilityPeerID.UnmarshalText([]byte(NewPeerID()))) capDonInfo := commoncap.DON{ ID: "capability-don", @@ -189,7 +189,7 @@ func testRemoteTarget(t *testing.T, ctx context.Context, underlying commoncap.Ta workflowPeers := make([]p2ptypes.PeerID, numWorkflowPeers) for i := 0; i < numWorkflowPeers; i++ { workflowPeerID := p2ptypes.PeerID{} - require.NoError(t, workflowPeerID.UnmarshalText([]byte(newPeerID()))) + require.NoError(t, workflowPeerID.UnmarshalText([]byte(NewPeerID()))) workflowPeers[i] = workflowPeerID } @@ -328,11 +328,11 @@ func (t abstractTestCapability) UnregisterFromWorkflow(ctx context.Context, requ return nil } -type testCapability struct { +type TestCapability struct { abstractTestCapability } -func (t testCapability) Execute(ctx context.Context, request commoncap.CapabilityRequest) (<-chan commoncap.CapabilityResponse, error) { +func (t TestCapability) Execute(ctx context.Context, request commoncap.CapabilityRequest) (<-chan commoncap.CapabilityResponse, error) { ch := make(chan commoncap.CapabilityResponse, 1) value := request.Inputs.Underlying["executeValue1"] @@ -344,15 +344,21 @@ func (t testCapability) Execute(ctx context.Context, request commoncap.Capabilit return ch, nil } -type testErrorCapability struct { +type TestErrorCapability struct { abstractTestCapability } -func (t testErrorCapability) Execute(ctx context.Context, request commoncap.CapabilityRequest) (<-chan commoncap.CapabilityResponse, error) { +func (t TestErrorCapability) Execute(ctx context.Context, request commoncap.CapabilityRequest) (<-chan commoncap.CapabilityResponse, error) { return nil, errors.New("an error") } -func newPeerID() string { +func NewP2PPeerID(t *testing.T) p2ptypes.PeerID { + id := p2ptypes.PeerID{} + require.NoError(t, id.UnmarshalText([]byte(NewPeerID()))) + return id +} + +func NewPeerID() string { var privKey [32]byte _, err := rand.Read(privKey[:]) if err != nil { diff --git a/core/capabilities/remote/target/receiver.go b/core/capabilities/remote/target/receiver.go index 3258e748c48..3d3512e45b7 100644 --- a/core/capabilities/remote/target/receiver.go +++ b/core/capabilities/remote/target/receiver.go @@ -92,14 +92,13 @@ func (r *remoteTargetReceiver) Receive(msg *types.MessageBody) { // TODO Confirm threading semantics of dispatcher Receive // TODO May want to have executor per message id to improve liveness - // TODO multithread this - if msg.Method != types.MethodExecute { r.lggr.Errorw("received request for unsupported method type", "method", msg.Method) return } - // A request is uniquely identified by the message id and the hash of the payload + // A request is uniquely identified by the message id and the hash of the payload to prevent a malicious + // actor from sending a different payload with the same message id messageId := GetMessageID(msg) hash := sha256.Sum256(msg.Payload) requestID := messageId + hex.EncodeToString(hash[:]) @@ -114,13 +113,14 @@ func (r *remoteTargetReceiver) Receive(msg *types.MessageBody) { } } - request := r.requestIDToRequest[requestID] - - err := request.Receive(ctx, msg) - if err != nil { - r.lggr.Errorw("request failed to Receive new message", "request", request, "err", err) - } + req := r.requestIDToRequest[requestID] + go func() { + err := req.Receive(ctx, msg) + if err != nil { + r.lggr.Errorw("request failed to Receive new message", "request", req, "err", err) + } + }() } func GetMessageID(msg *types.MessageBody) string { diff --git a/core/capabilities/remote/target/receiver_test.go b/core/capabilities/remote/target/receiver_test.go index 8a4befc60f9..6e9f7a0d621 100644 --- a/core/capabilities/remote/target/receiver_test.go +++ b/core/capabilities/remote/target/receiver_test.go @@ -6,7 +6,6 @@ import ( "time" "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" commoncap "github.com/smartcontractkit/chainlink-common/pkg/capabilities" "github.com/smartcontractkit/chainlink-common/pkg/capabilities/pb" @@ -23,7 +22,7 @@ func Test_Receiver_RespondsAfterSufficientRequests(t *testing.T) { numCapabilityPeers := 4 - callers := testRemoteTargetReceiver(t, ctx, &testCapability{}, 10, 9, numCapabilityPeers, 3, 10*time.Minute) + callers := testRemoteTargetReceiver(t, ctx, &TestCapability{}, 10, 9, numCapabilityPeers, 3, 10*time.Minute) for _, caller := range callers { caller.Execute(context.Background(), @@ -49,7 +48,7 @@ func Test_Receiver_InsufficientCallers(t *testing.T) { numCapabilityPeers := 4 - callers := testRemoteTargetReceiver(t, ctx, &testCapability{}, 10, 10, numCapabilityPeers, 3, 100*time.Millisecond) + callers := testRemoteTargetReceiver(t, ctx, &TestCapability{}, 10, 10, numCapabilityPeers, 3, 100*time.Millisecond) for _, caller := range callers { caller.Execute(context.Background(), @@ -75,7 +74,7 @@ func Test_Receiver_CapabilityError(t *testing.T) { numCapabilityPeers := 4 - callers := testRemoteTargetReceiver(t, ctx, &testErrorCapability{}, 10, 9, numCapabilityPeers, 3, 100*time.Millisecond) + callers := testRemoteTargetReceiver(t, ctx, &TestErrorCapability{}, 10, 9, numCapabilityPeers, 3, 100*time.Millisecond) for _, caller := range callers { caller.Execute(context.Background(), @@ -104,7 +103,7 @@ func testRemoteTargetReceiver(t *testing.T, ctx context.Context, capabilityPeers := make([]p2ptypes.PeerID, numCapabilityPeers) for i := 0; i < numCapabilityPeers; i++ { - capabilityPeerID := newP2PPeerID(t) + capabilityPeerID := NewP2PPeerID(t) capabilityPeers[i] = capabilityPeerID } @@ -124,7 +123,7 @@ func testRemoteTargetReceiver(t *testing.T, ctx context.Context, workflowPeers := make([]p2ptypes.PeerID, numWorkflowPeers) for i := 0; i < numWorkflowPeers; i++ { - workflowPeers[i] = newP2PPeerID(t) + workflowPeers[i] = NewP2PPeerID(t) } workflowDonInfo := commoncap.DON{ @@ -160,12 +159,6 @@ func testRemoteTargetReceiver(t *testing.T, ctx context.Context, return workflowNodes } -func newP2PPeerID(t *testing.T) p2ptypes.PeerID { - id := p2ptypes.PeerID{} - require.NoError(t, id.UnmarshalText([]byte(newPeerID()))) - return id -} - type receiverTestCaller struct { peerID p2ptypes.PeerID dispatcher remotetypes.Dispatcher diff --git a/core/capabilities/remote/target/caller_request.go b/core/capabilities/remote/target/request/caller_request.go similarity index 92% rename from core/capabilities/remote/target/caller_request.go rename to core/capabilities/remote/target/request/caller_request.go index c1c57b67fad..13b4d73c451 100644 --- a/core/capabilities/remote/target/caller_request.go +++ b/core/capabilities/remote/target/request/caller_request.go @@ -1,4 +1,4 @@ -package target +package request import ( "context" @@ -25,11 +25,14 @@ type callerRequest struct { requiredIdenticalResponses int + requestTimeout time.Duration + respSent bool } func NewCallerRequest(ctx context.Context, lggr logger.Logger, req commoncap.CapabilityRequest, messageID string, - remoteCapabilityInfo commoncap.CapabilityInfo, localDonInfo capabilities.DON, dispatcher types.Dispatcher) (*callerRequest, error) { + remoteCapabilityInfo commoncap.CapabilityInfo, localDonInfo capabilities.DON, dispatcher types.Dispatcher, + requestTimeout time.Duration) (*callerRequest, error) { remoteCapabilityDonInfo := remoteCapabilityInfo.DON if remoteCapabilityDonInfo == nil { @@ -79,6 +82,7 @@ func NewCallerRequest(ctx context.Context, lggr logger.Logger, req commoncap.Cap return &callerRequest{ createdAt: time.Now(), + requestTimeout: requestTimeout, requiredIdenticalResponses: int(remoteCapabilityDonInfo.F + 1), responseIDCount: make(map[[32]byte]int), responseReceived: responseReceived, @@ -90,6 +94,15 @@ func (c *callerRequest) ResponseChan() <-chan commoncap.CapabilityResponse { return c.responseCh } +func (c *callerRequest) Expired() bool { + if time.Since(c.createdAt) > c.requestTimeout { + c.cancelRequest("request timed out") + return true + } + + return false +} + // TODO addResponse assumes that only one response is received from each peer, if streaming responses need to be supported this will need to be updated func (c *callerRequest) AddResponse(sender p2ptypes.PeerID, response []byte) error { if _, ok := c.responseReceived[sender]; !ok { diff --git a/core/capabilities/remote/target/caller_request_test.go b/core/capabilities/remote/target/request/caller_request_test.go similarity index 86% rename from core/capabilities/remote/target/caller_request_test.go rename to core/capabilities/remote/target/request/caller_request_test.go index 175b9012fac..ec1a5f91e00 100644 --- a/core/capabilities/remote/target/caller_request_test.go +++ b/core/capabilities/remote/target/request/caller_request_test.go @@ -1,8 +1,9 @@ -package target_test +package request_test import ( "context" "testing" + "time" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" @@ -11,6 +12,7 @@ import ( "github.com/smartcontractkit/chainlink-common/pkg/capabilities/pb" "github.com/smartcontractkit/chainlink-common/pkg/values" "github.com/smartcontractkit/chainlink/v2/core/capabilities/remote/target" + "github.com/smartcontractkit/chainlink/v2/core/capabilities/remote/target/request" "github.com/smartcontractkit/chainlink/v2/core/capabilities/remote/types" "github.com/smartcontractkit/chainlink/v2/core/capabilities/transmission" "github.com/smartcontractkit/chainlink/v2/core/logger" @@ -23,7 +25,7 @@ func Test_CallerRequest_MessageValidation(t *testing.T) { numCapabilityPeers := 2 capabilityPeers := make([]p2ptypes.PeerID, numCapabilityPeers) for i := 0; i < numCapabilityPeers; i++ { - capabilityPeers[i] = newP2PPeerID(t) + capabilityPeers[i] = NewP2PPeerID(t) } capDonInfo := commoncap.DON{ @@ -43,7 +45,7 @@ func Test_CallerRequest_MessageValidation(t *testing.T) { numWorkflowPeers := 2 workflowPeers := make([]p2ptypes.PeerID, numWorkflowPeers) for i := 0; i < numWorkflowPeers; i++ { - workflowPeers[i] = newP2PPeerID(t) + workflowPeers[i] = NewP2PPeerID(t) } workflowDonInfo := commoncap.DON{ @@ -87,8 +89,8 @@ func Test_CallerRequest_MessageValidation(t *testing.T) { require.NoError(t, err) dispatcher := &callerRequestTestDispatcher{msgs: make(chan *types.MessageBody, 100)} - request, err := target.NewCallerRequest(ctx, lggr, capabilityRequest, messageID, capInfo, - workflowDonInfo, dispatcher) + request, err := request.NewCallerRequest(ctx, lggr, capabilityRequest, messageID, capInfo, + workflowDonInfo, dispatcher, 10*time.Minute) require.NoError(t, err) capabilityResponse2 := commoncap.CapabilityResponse{ @@ -119,13 +121,13 @@ func Test_CallerRequest_MessageValidation(t *testing.T) { require.NoError(t, err) dispatcher := &callerRequestTestDispatcher{msgs: make(chan *types.MessageBody, 100)} - request, err := target.NewCallerRequest(ctx, lggr, capabilityRequest, messageID, capInfo, - workflowDonInfo, dispatcher) + request, err := request.NewCallerRequest(ctx, lggr, capabilityRequest, messageID, capInfo, + workflowDonInfo, dispatcher, 10*time.Minute) require.NoError(t, err) err = request.AddResponse(capabilityPeers[0], rawResponse) require.NoError(t, err) - err = request.AddResponse(newP2PPeerID(t), rawResponse) + err = request.AddResponse(NewP2PPeerID(t), rawResponse) require.NotNil(t, err) }) @@ -137,8 +139,8 @@ func Test_CallerRequest_MessageValidation(t *testing.T) { require.NoError(t, err) dispatcher := &callerRequestTestDispatcher{msgs: make(chan *types.MessageBody, 100)} - request, err := target.NewCallerRequest(ctx, lggr, capabilityRequest, messageID, capInfo, - workflowDonInfo, dispatcher) + request, err := request.NewCallerRequest(ctx, lggr, capabilityRequest, messageID, capInfo, + workflowDonInfo, dispatcher, 10*time.Minute) require.NoError(t, err) err = request.AddResponse(capabilityPeers[0], rawResponse) @@ -155,8 +157,8 @@ func Test_CallerRequest_MessageValidation(t *testing.T) { require.NoError(t, err) dispatcher := &callerRequestTestDispatcher{msgs: make(chan *types.MessageBody, 100)} - request, err := target.NewCallerRequest(ctx, lggr, capabilityRequest, messageID, capInfo, - workflowDonInfo, dispatcher) + request, err := request.NewCallerRequest(ctx, lggr, capabilityRequest, messageID, capInfo, + workflowDonInfo, dispatcher, 10*time.Minute) require.NoError(t, err) <-dispatcher.msgs diff --git a/core/capabilities/remote/target/request/receiver_request_test.go b/core/capabilities/remote/target/request/receiver_request_test.go index 32bf9315b49..c4a49af8600 100644 --- a/core/capabilities/remote/target/request/receiver_request_test.go +++ b/core/capabilities/remote/target/request/receiver_request_test.go @@ -2,16 +2,18 @@ package request_test import ( "context" + "crypto/rand" + "errors" "testing" "time" + "github.com/mr-tron/base58" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" commoncap "github.com/smartcontractkit/chainlink-common/pkg/capabilities" "github.com/smartcontractkit/chainlink-common/pkg/capabilities/pb" "github.com/smartcontractkit/chainlink-common/pkg/values" - "github.com/smartcontractkit/chainlink/v2/core/capabilities/remote/target" "github.com/smartcontractkit/chainlink/v2/core/capabilities/remote/target/request" "github.com/smartcontractkit/chainlink/v2/core/capabilities/remote/types" "github.com/smartcontractkit/chainlink/v2/core/logger" @@ -20,13 +22,13 @@ import ( func Test_ReceiverRequest_MessageValidation(t *testing.T) { lggr := logger.TestLogger(t) - capability := target.testCapability{} - capabilityPeerID := target.newP2PPeerID(t) + capability := TestCapability{} + capabilityPeerID := NewP2PPeerID(t) numWorkflowPeers := 2 workflowPeers := make([]p2ptypes.PeerID, numWorkflowPeers) for i := 0; i < numWorkflowPeers; i++ { - workflowPeers[i] = target.newP2PPeerID(t) + workflowPeers[i] = NewP2PPeerID(t) } callingDon := commoncap.DON{ @@ -72,7 +74,7 @@ func Test_ReceiverRequest_MessageValidation(t *testing.T) { err := sendValidRequest(request, workflowPeers, capabilityPeerID, rawRequest) require.NoError(t, err) - nonDonPeer := target.newP2PPeerID(t) + nonDonPeer := NewP2PPeerID(t) err = request.Receive(context.Background(), &types.MessageBody{ Version: 0, Sender: nonDonPeer[:], @@ -116,7 +118,7 @@ func Test_ReceiverRequest_MessageValidation(t *testing.T) { t.Run("Send second valid request when capability errors", func(t *testing.T) { dispatcher := &testDispatcher{} - request := request.NewReceiverRequest(lggr, target.testErrorCapability{}, "capabilityID", "capabilityDonID", + request := request.NewReceiverRequest(lggr, TestErrorCapability{}, "capabilityID", "capabilityDonID", capabilityPeerID, callingDon, "requestMessageID", dispatcher, 10*time.Minute) err := sendValidRequest(request, workflowPeers, capabilityPeerID, rawRequest) @@ -200,3 +202,64 @@ func (t *testDispatcher) Send(peerID p2ptypes.PeerID, msgBody *types.MessageBody t.msgs = append(t.msgs, msgBody) return nil } + +type abstractTestCapability struct { +} + +func (t abstractTestCapability) Info(ctx context.Context) (commoncap.CapabilityInfo, error) { + return commoncap.CapabilityInfo{}, nil +} + +func (t abstractTestCapability) RegisterToWorkflow(ctx context.Context, request commoncap.RegisterToWorkflowRequest) error { + return nil +} + +func (t abstractTestCapability) UnregisterFromWorkflow(ctx context.Context, request commoncap.UnregisterFromWorkflowRequest) error { + return nil +} + +type TestCapability struct { + abstractTestCapability +} + +func (t TestCapability) Execute(ctx context.Context, request commoncap.CapabilityRequest) (<-chan commoncap.CapabilityResponse, error) { + ch := make(chan commoncap.CapabilityResponse, 1) + + value := request.Inputs.Underlying["executeValue1"] + + ch <- commoncap.CapabilityResponse{ + Value: value, + } + + return ch, nil +} + +type TestErrorCapability struct { + abstractTestCapability +} + +func (t TestErrorCapability) Execute(ctx context.Context, request commoncap.CapabilityRequest) (<-chan commoncap.CapabilityResponse, error) { + return nil, errors.New("an error") +} + +func NewP2PPeerID(t *testing.T) p2ptypes.PeerID { + id := p2ptypes.PeerID{} + require.NoError(t, id.UnmarshalText([]byte(NewPeerID()))) + return id +} + +func NewPeerID() string { + var privKey [32]byte + _, err := rand.Read(privKey[:]) + if err != nil { + panic(err) + } + + peerID := append(libp2pMagic(), privKey[:]...) + + return base58.Encode(peerID[:]) +} + +func libp2pMagic() []byte { + return []byte{0x00, 0x24, 0x08, 0x01, 0x12, 0x20} +} From 4de7ff9e3b39794c2daa27d93189e534f8aea670 Mon Sep 17 00:00:00 2001 From: Matthew Pendrey Date: Tue, 28 May 2024 18:45:05 +0100 Subject: [PATCH 40/43] make caller and reciver multithreaded to prevent slow executor blocking --- core/capabilities/remote/target/request/caller_request.go | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/core/capabilities/remote/target/request/caller_request.go b/core/capabilities/remote/target/request/caller_request.go index 13b4d73c451..c1e4a99bb48 100644 --- a/core/capabilities/remote/target/request/caller_request.go +++ b/core/capabilities/remote/target/request/caller_request.go @@ -5,6 +5,7 @@ import ( "crypto/sha256" "errors" "fmt" + "sync" "time" "github.com/smartcontractkit/chainlink-common/pkg/capabilities" @@ -28,6 +29,7 @@ type callerRequest struct { requestTimeout time.Duration respSent bool + mux sync.Mutex } func NewCallerRequest(ctx context.Context, lggr logger.Logger, req commoncap.CapabilityRequest, messageID string, @@ -95,6 +97,9 @@ func (c *callerRequest) ResponseChan() <-chan commoncap.CapabilityResponse { } func (c *callerRequest) Expired() bool { + c.mux.Lock() + defer c.mux.Unlock() + if time.Since(c.createdAt) > c.requestTimeout { c.cancelRequest("request timed out") return true @@ -105,6 +110,9 @@ func (c *callerRequest) Expired() bool { // TODO addResponse assumes that only one response is received from each peer, if streaming responses need to be supported this will need to be updated func (c *callerRequest) AddResponse(sender p2ptypes.PeerID, response []byte) error { + c.mux.Lock() + defer c.mux.Unlock() + if _, ok := c.responseReceived[sender]; !ok { return fmt.Errorf("response from peer %s not expected", sender) } From ac07133382eb672e8843d379e4191128a6939f2a Mon Sep 17 00:00:00 2001 From: Matthew Pendrey Date: Wed, 29 May 2024 11:21:13 +0100 Subject: [PATCH 41/43] update error handling --- core/capabilities/remote/target/caller.go | 16 ++-- .../remote/target/endtoend_test.go | 4 +- core/capabilities/remote/target/receiver.go | 12 ++- .../remote/target/request/caller_request.go | 46 +++++----- .../target/request/caller_request_test.go | 70 ++++++++++++-- .../remote/target/request/receiver_request.go | 66 ++++++++------ .../target/request/receiver_request_test.go | 30 +++--- core/capabilities/remote/types/message.pb.go | 91 ++++++++++--------- core/capabilities/remote/types/message.proto | 8 +- 9 files changed, 210 insertions(+), 133 deletions(-) diff --git a/core/capabilities/remote/target/caller.go b/core/capabilities/remote/target/caller.go index a4e2bc2f1c6..d8f3b834400 100644 --- a/core/capabilities/remote/target/caller.go +++ b/core/capabilities/remote/target/caller.go @@ -17,9 +17,10 @@ import ( ) type callerRequest interface { - AddResponse(sender p2ptypes.PeerID, response []byte) error + AddResponse(sender p2ptypes.PeerID, msg *types.MessageBody) error ResponseChan() <-chan commoncap.CapabilityResponse Expired() bool + Cancel(reason string) } // caller/Receiver are shims translating between capability API calls and network messages @@ -71,6 +72,7 @@ func (c *caller) ExpireRequests() { for messageID, req := range c.messageIDToExecuteRequest { if req.Expired() { + req.Cancel("request expired") delete(c.messageIDToExecuteRequest, messageID) } } @@ -122,14 +124,12 @@ func (c *caller) Receive(msg *types.MessageBody) { return } - if msg.Error != types.Error_OK { - c.lggr.Warnw("received error response for pending request", "messageID", messageID, "sender", sender, "receiver", msg.Receiver, "error", msg.Error) - return - } + go func() { + if err := req.AddResponse(sender, msg); err != nil { + c.lggr.Errorw("failed to add response to request", "messageID", messageID, "sender", sender, "err", err) + } + }() - if err := req.AddResponse(sender, msg.Payload); err != nil { - c.lggr.Errorw("failed to add response to request", "messageID", messageID, "sender", sender, "err", err) - } } func GetMessageIDForRequest(req commoncap.CapabilityRequest) (string, error) { diff --git a/core/capabilities/remote/target/endtoend_test.go b/core/capabilities/remote/target/endtoend_test.go index 33a1a91bb6e..e1d81710e7d 100644 --- a/core/capabilities/remote/target/endtoend_test.go +++ b/core/capabilities/remote/target/endtoend_test.go @@ -143,7 +143,7 @@ func Test_RemoteTargetCapability_CapabilityError(t *testing.T) { responseTest := func(t *testing.T, responseCh <-chan commoncap.CapabilityResponse, responseError error) { require.NoError(t, responseError) response := <-responseCh - assert.NotNil(t, response.Err) + assert.Equal(t, "failed to execute capability: an error", response.Err.Error()) } capability := &TestErrorCapability{} @@ -154,7 +154,7 @@ func Test_RemoteTargetCapability_CapabilityError(t *testing.T) { }) require.NoError(t, err) - testRemoteTarget(t, ctx, capability, 10, 9, 10*time.Millisecond, 10, 9, 10*time.Minute, transmissionSchedule, responseTest) + testRemoteTarget(t, ctx, capability, 10, 9, 10*time.Minute, 10, 9, 10*time.Minute, transmissionSchedule, responseTest) } func testRemoteTarget(t *testing.T, ctx context.Context, underlying commoncap.TargetCapability, numWorkflowPeers int, workflowDonF uint8, workflowNodeTimeout time.Duration, diff --git a/core/capabilities/remote/target/receiver.go b/core/capabilities/remote/target/receiver.go index 3d3512e45b7..461162ffacd 100644 --- a/core/capabilities/remote/target/receiver.go +++ b/core/capabilities/remote/target/receiver.go @@ -19,6 +19,7 @@ import ( type receiverRequest interface { Receive(ctx context.Context, msg *types.MessageBody) error Expired() bool + Cancel(err types.Error, msg string) error } type remoteTargetReceiver struct { @@ -77,6 +78,10 @@ func (r *remoteTargetReceiver) ExpireRequests() { for requestID, executeReq := range r.requestIDToRequest { if executeReq.Expired() { + err := executeReq.Cancel(types.Error_TIMEOUT, "request expired") + if err != nil { + r.lggr.Errorw("failed to cancel request", "request", executeReq, "err", err) + } delete(r.requestIDToRequest, requestID) } } @@ -89,9 +94,6 @@ func (r *remoteTargetReceiver) Receive(msg *types.MessageBody) { // TODO should the dispatcher be passing in a context? ctx := context.Background() - // TODO Confirm threading semantics of dispatcher Receive - // TODO May want to have executor per message id to improve liveness - if msg.Method != types.MethodExecute { r.lggr.Errorw("received request for unsupported method type", "method", msg.Method) return @@ -105,10 +107,10 @@ func (r *remoteTargetReceiver) Receive(msg *types.MessageBody) { if _, ok := r.requestIDToRequest[requestID]; !ok { if callingDon, ok := r.workflowDONs[msg.CallerDonId]; ok { - r.requestIDToRequest[requestID] = request.NewReceiverRequest(r.lggr, r.underlying, r.capInfo.ID, r.localDonInfo.ID, r.peerID, + r.requestIDToRequest[requestID] = request.NewReceiverRequest(r.underlying, r.capInfo.ID, r.localDonInfo.ID, r.peerID, callingDon, messageId, r.dispatcher, r.requestTimeout) } else { - r.lggr.Errorw("received request from unregistered workflow don", "donId", msg.CallerDonId) + r.lggr.Errorw("received request from unregistered don", "donId", msg.CallerDonId) return } } diff --git a/core/capabilities/remote/target/request/caller_request.go b/core/capabilities/remote/target/request/caller_request.go index c1e4a99bb48..765eec44bb4 100644 --- a/core/capabilities/remote/target/request/caller_request.go +++ b/core/capabilities/remote/target/request/caller_request.go @@ -22,6 +22,7 @@ type callerRequest struct { responseCh chan commoncap.CapabilityResponse createdAt time.Time responseIDCount map[[32]byte]int + errorCount map[string]int responseReceived map[p2ptypes.PeerID]bool requiredIdenticalResponses int @@ -87,6 +88,7 @@ func NewCallerRequest(ctx context.Context, lggr logger.Logger, req commoncap.Cap requestTimeout: requestTimeout, requiredIdenticalResponses: int(remoteCapabilityDonInfo.F + 1), responseIDCount: make(map[[32]byte]int), + errorCount: make(map[string]int), responseReceived: responseReceived, responseCh: make(chan commoncap.CapabilityResponse, 1), }, nil @@ -97,19 +99,19 @@ func (c *callerRequest) ResponseChan() <-chan commoncap.CapabilityResponse { } func (c *callerRequest) Expired() bool { + return time.Since(c.createdAt) > c.requestTimeout +} + +func (c *callerRequest) Cancel(reason string) { c.mux.Lock() defer c.mux.Unlock() - - if time.Since(c.createdAt) > c.requestTimeout { - c.cancelRequest("request timed out") - return true + if !c.respSent { + c.sendResponse(commoncap.CapabilityResponse{Err: errors.New(reason)}) } - - return false } // TODO addResponse assumes that only one response is received from each peer, if streaming responses need to be supported this will need to be updated -func (c *callerRequest) AddResponse(sender p2ptypes.PeerID, response []byte) error { +func (c *callerRequest) AddResponse(sender p2ptypes.PeerID, msg *types.MessageBody) error { c.mux.Lock() defer c.mux.Unlock() @@ -123,18 +125,24 @@ func (c *callerRequest) AddResponse(sender p2ptypes.PeerID, response []byte) err c.responseReceived[sender] = true - responseID := sha256.Sum256(response) - c.responseIDCount[responseID]++ + if msg.Error == types.Error_OK { + responseID := sha256.Sum256(msg.Payload) + c.responseIDCount[responseID]++ - if c.responseIDCount[responseID] == c.requiredIdenticalResponses { - capabilityResponse, err := pb.UnmarshalCapabilityResponse(response) - if err != nil { - c.sendResponse(commoncap.CapabilityResponse{Err: fmt.Errorf("failed to unmarshal capability response: %w", err)}) - } else { - c.sendResponse(commoncap.CapabilityResponse{Value: capabilityResponse.Value}) + if c.responseIDCount[responseID] == c.requiredIdenticalResponses { + capabilityResponse, err := pb.UnmarshalCapabilityResponse(msg.Payload) + if err != nil { + c.sendResponse(commoncap.CapabilityResponse{Err: fmt.Errorf("failed to unmarshal capability response: %w", err)}) + } else { + c.sendResponse(commoncap.CapabilityResponse{Value: capabilityResponse.Value}) + } + } + } else { + c.errorCount[msg.ErrorMsg]++ + if c.errorCount[msg.ErrorMsg] == c.requiredIdenticalResponses { + c.sendResponse(commoncap.CapabilityResponse{Err: errors.New(msg.ErrorMsg)}) } } - return nil } @@ -143,9 +151,3 @@ func (c *callerRequest) sendResponse(response commoncap.CapabilityResponse) { close(c.responseCh) c.respSent = true } - -func (c *callerRequest) cancelRequest(reason string) { - if !c.respSent { - c.sendResponse(commoncap.CapabilityResponse{Err: errors.New(reason)}) - } -} diff --git a/core/capabilities/remote/target/request/caller_request_test.go b/core/capabilities/remote/target/request/caller_request_test.go index ec1a5f91e00..845bf8adfce 100644 --- a/core/capabilities/remote/target/request/caller_request_test.go +++ b/core/capabilities/remote/target/request/caller_request_test.go @@ -81,6 +81,15 @@ func Test_CallerRequest_MessageValidation(t *testing.T) { rawResponse, err := pb.MarshalCapabilityResponse(capabilityResponse) require.NoError(t, err) + msg := &types.MessageBody{ + CapabilityId: capInfo.ID, + CapabilityDonId: capDonInfo.ID, + CallerDonId: workflowDonInfo.ID, + Method: types.MethodExecute, + Payload: rawResponse, + MessageId: []byte("messageID"), + } + t.Run("Send second message with different response", func(t *testing.T) { ctx, cancel := context.WithCancel(context.Background()) defer cancel() @@ -100,10 +109,18 @@ func Test_CallerRequest_MessageValidation(t *testing.T) { rawResponse2, err := pb.MarshalCapabilityResponse(capabilityResponse2) require.NoError(t, err) + msg2 := &types.MessageBody{ + CapabilityId: capInfo.ID, + CapabilityDonId: capDonInfo.ID, + CallerDonId: workflowDonInfo.ID, + Method: types.MethodExecute, + Payload: rawResponse2, + MessageId: []byte("messageID"), + } - err = request.AddResponse(capabilityPeers[0], rawResponse) + err = request.AddResponse(capabilityPeers[0], msg) require.NoError(t, err) - err = request.AddResponse(capabilityPeers[1], rawResponse2) + err = request.AddResponse(capabilityPeers[1], msg2) require.NoError(t, err) select { @@ -125,9 +142,9 @@ func Test_CallerRequest_MessageValidation(t *testing.T) { workflowDonInfo, dispatcher, 10*time.Minute) require.NoError(t, err) - err = request.AddResponse(capabilityPeers[0], rawResponse) + err = request.AddResponse(capabilityPeers[0], msg) require.NoError(t, err) - err = request.AddResponse(NewP2PPeerID(t), rawResponse) + err = request.AddResponse(NewP2PPeerID(t), msg) require.NotNil(t, err) }) @@ -143,12 +160,49 @@ func Test_CallerRequest_MessageValidation(t *testing.T) { workflowDonInfo, dispatcher, 10*time.Minute) require.NoError(t, err) - err = request.AddResponse(capabilityPeers[0], rawResponse) + err = request.AddResponse(capabilityPeers[0], msg) require.NoError(t, err) - err = request.AddResponse(capabilityPeers[0], rawResponse) + err = request.AddResponse(capabilityPeers[0], msg) require.NotNil(t, err) }) + t.Run("Send second message with same error as first", func(t *testing.T) { + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + messageID, err := target.GetMessageIDForRequest(capabilityRequest) + require.NoError(t, err) + + dispatcher := &callerRequestTestDispatcher{msgs: make(chan *types.MessageBody, 100)} + request, err := request.NewCallerRequest(ctx, lggr, capabilityRequest, messageID, capInfo, + workflowDonInfo, dispatcher, 10*time.Minute) + require.NoError(t, err) + + <-dispatcher.msgs + <-dispatcher.msgs + assert.Equal(t, 0, len(dispatcher.msgs)) + + msgWithError := &types.MessageBody{ + CapabilityId: capInfo.ID, + CapabilityDonId: capDonInfo.ID, + CallerDonId: workflowDonInfo.ID, + Method: types.MethodExecute, + Payload: rawResponse, + MessageId: []byte("messageID"), + Error: types.Error_INTERNAL_ERROR, + ErrorMsg: "an error", + } + + err = request.AddResponse(capabilityPeers[0], msgWithError) + require.NoError(t, err) + err = request.AddResponse(capabilityPeers[1], msgWithError) + require.NoError(t, err) + + response := <-request.ResponseChan() + + assert.Equal(t, "an error", response.Err.Error()) + }) + t.Run("Send second valid message", func(t *testing.T) { ctx, cancel := context.WithCancel(context.Background()) defer cancel() @@ -165,9 +219,9 @@ func Test_CallerRequest_MessageValidation(t *testing.T) { <-dispatcher.msgs assert.Equal(t, 0, len(dispatcher.msgs)) - err = request.AddResponse(capabilityPeers[0], rawResponse) + err = request.AddResponse(capabilityPeers[0], msg) require.NoError(t, err) - err = request.AddResponse(capabilityPeers[1], rawResponse) + err = request.AddResponse(capabilityPeers[1], msg) require.NoError(t, err) response := <-request.ResponseChan() diff --git a/core/capabilities/remote/target/request/receiver_request.go b/core/capabilities/remote/target/request/receiver_request.go index 1a0038a2511..6100d57e29c 100644 --- a/core/capabilities/remote/target/request/receiver_request.go +++ b/core/capabilities/remote/target/request/receiver_request.go @@ -13,12 +13,15 @@ import ( p2ptypes "github.com/smartcontractkit/chainlink/v2/core/services/p2p/types" "github.com/smartcontractkit/chainlink-common/pkg/capabilities" - "github.com/smartcontractkit/chainlink/v2/core/logger" ) -type receiverRequest struct { - lggr logger.Logger +type response struct { + response []byte + error types.Error + errorMsg string +} +type receiverRequest struct { capability capabilities.TargetCapability capabilityPeerId p2ptypes.PeerID @@ -32,22 +35,20 @@ type receiverRequest struct { createdTime time.Time - response []byte - responseError types.Error + response *response - callingDon commoncap.DON - requestMessageID string + callingDon commoncap.DON - requestTimeout time.Duration + requestMessageID string + requestTimeout time.Duration mux sync.Mutex } -func NewReceiverRequest(lggr logger.Logger, capability capabilities.TargetCapability, capabilityID string, capabilityDonID string, capabilityPeerId p2ptypes.PeerID, +func NewReceiverRequest(capability capabilities.TargetCapability, capabilityID string, capabilityDonID string, capabilityPeerId p2ptypes.PeerID, callingDon commoncap.DON, requestMessageID string, dispatcher types.Dispatcher, requestTimeout time.Duration) *receiverRequest { return &receiverRequest{ - lggr: lggr, capability: capability, createdTime: time.Now(), capabilityID: capabilityID, @@ -73,34 +74,35 @@ func (e *receiverRequest) Receive(ctx context.Context, msg *types.MessageBody) e if e.minimumRequiredRequestsReceived() && !e.hasResponse() { if err := e.executeRequest(ctx, msg.Payload); err != nil { - e.setError(types.Error_INTERNAL_ERROR) - e.lggr.Errorw("failed to execute request", "error", err) + e.setError(types.Error_INTERNAL_ERROR, err.Error()) } } if err := e.sendResponses(); err != nil { - return fmt.Errorf("failed to send response to requesters: %w", err) + return fmt.Errorf("failed to send responses: %w", err) } return nil } func (e *receiverRequest) Expired() bool { + return time.Since(e.createdTime) > e.requestTimeout +} + +func (e *receiverRequest) Cancel(err types.Error, msg string) error { e.mux.Lock() defer e.mux.Unlock() - if time.Since(e.createdTime) > e.requestTimeout { - if !e.hasResponse() { - e.setError(types.Error_TIMEOUT) - if err := e.sendResponses(); err != nil { - e.lggr.Errorw("failed to send timeout response to all requesters", "capabilityId", e.capabilityID, "err", err) - } - } + if e.hasResponse() { + return fmt.Errorf("request already has response") + } - return true + e.setError(err, msg) + if err := e.sendResponses(); err != nil { + return fmt.Errorf("failed to send responses: %w", err) } - return false + return nil } func (e *receiverRequest) executeRequest(ctx context.Context, payload []byte) error { @@ -158,15 +160,20 @@ func (e *receiverRequest) minimumRequiredRequestsReceived() bool { } func (e *receiverRequest) setResult(result []byte) { - e.response = result + e.response = &response{ + response: result, + } } -func (e *receiverRequest) setError(err types.Error) { - e.responseError = err +func (e *receiverRequest) setError(err types.Error, errMsg string) { + e.response = &response{ + error: err, + errorMsg: errMsg, + } } func (e *receiverRequest) hasResponse() bool { - return e.response != nil || e.responseError != types.Error_OK + return e.response != nil } func (e *receiverRequest) sendResponses() error { @@ -196,10 +203,11 @@ func (e *receiverRequest) sendResponse(requester p2ptypes.PeerID) error { Receiver: requester[:], } - if e.responseError != types.Error_OK { - responseMsg.Error = e.responseError + if e.response.error != types.Error_OK { + responseMsg.Error = e.response.error + responseMsg.ErrorMsg = e.response.errorMsg } else { - responseMsg.Payload = e.response + responseMsg.Payload = e.response.response } if err := e.dispatcher.Send(requester, &responseMsg); err != nil { diff --git a/core/capabilities/remote/target/request/receiver_request_test.go b/core/capabilities/remote/target/request/receiver_request_test.go index c4a49af8600..54be175dd37 100644 --- a/core/capabilities/remote/target/request/receiver_request_test.go +++ b/core/capabilities/remote/target/request/receiver_request_test.go @@ -16,12 +16,10 @@ import ( "github.com/smartcontractkit/chainlink-common/pkg/values" "github.com/smartcontractkit/chainlink/v2/core/capabilities/remote/target/request" "github.com/smartcontractkit/chainlink/v2/core/capabilities/remote/types" - "github.com/smartcontractkit/chainlink/v2/core/logger" p2ptypes "github.com/smartcontractkit/chainlink/v2/core/services/p2p/types" ) func Test_ReceiverRequest_MessageValidation(t *testing.T) { - lggr := logger.TestLogger(t) capability := TestCapability{} capabilityPeerID := NewP2PPeerID(t) @@ -58,24 +56,24 @@ func Test_ReceiverRequest_MessageValidation(t *testing.T) { require.NoError(t, err) t.Run("Send duplicate message", func(t *testing.T) { - request := request.NewReceiverRequest(lggr, capability, "capabilityID", "capabilityDonID", + req := request.NewReceiverRequest(capability, "capabilityID", "capabilityDonID", capabilityPeerID, callingDon, "requestMessageID", dispatcher, 10*time.Minute) - err := sendValidRequest(request, workflowPeers, capabilityPeerID, rawRequest) + err := sendValidRequest(req, workflowPeers, capabilityPeerID, rawRequest) require.NoError(t, err) - err = sendValidRequest(request, workflowPeers, capabilityPeerID, rawRequest) + err = sendValidRequest(req, workflowPeers, capabilityPeerID, rawRequest) assert.NotNil(t, err) }) t.Run("Send message with non calling don peer", func(t *testing.T) { - request := request.NewReceiverRequest(lggr, capability, "capabilityID", "capabilityDonID", + req := request.NewReceiverRequest(capability, "capabilityID", "capabilityDonID", capabilityPeerID, callingDon, "requestMessageID", dispatcher, 10*time.Minute) - err := sendValidRequest(request, workflowPeers, capabilityPeerID, rawRequest) + err := sendValidRequest(req, workflowPeers, capabilityPeerID, rawRequest) require.NoError(t, err) nonDonPeer := NewP2PPeerID(t) - err = request.Receive(context.Background(), &types.MessageBody{ + err = req.Receive(context.Background(), &types.MessageBody{ Version: 0, Sender: nonDonPeer[:], Receiver: capabilityPeerID[:], @@ -91,13 +89,13 @@ func Test_ReceiverRequest_MessageValidation(t *testing.T) { }) t.Run("Send message invalid payload", func(t *testing.T) { - request := request.NewReceiverRequest(lggr, capability, "capabilityID", "capabilityDonID", + req := request.NewReceiverRequest(capability, "capabilityID", "capabilityDonID", capabilityPeerID, callingDon, "requestMessageID", dispatcher, 10*time.Minute) - err := sendValidRequest(request, workflowPeers, capabilityPeerID, rawRequest) + err := sendValidRequest(req, workflowPeers, capabilityPeerID, rawRequest) require.NoError(t, err) - err = request.Receive(context.Background(), &types.MessageBody{ + err = req.Receive(context.Background(), &types.MessageBody{ Version: 0, Sender: workflowPeers[1][:], Receiver: capabilityPeerID[:], @@ -118,13 +116,13 @@ func Test_ReceiverRequest_MessageValidation(t *testing.T) { t.Run("Send second valid request when capability errors", func(t *testing.T) { dispatcher := &testDispatcher{} - request := request.NewReceiverRequest(lggr, TestErrorCapability{}, "capabilityID", "capabilityDonID", + req := request.NewReceiverRequest(TestErrorCapability{}, "capabilityID", "capabilityDonID", capabilityPeerID, callingDon, "requestMessageID", dispatcher, 10*time.Minute) - err := sendValidRequest(request, workflowPeers, capabilityPeerID, rawRequest) + err := sendValidRequest(req, workflowPeers, capabilityPeerID, rawRequest) require.NoError(t, err) - err = request.Receive(context.Background(), &types.MessageBody{ + err = req.Receive(context.Background(), &types.MessageBody{ Version: 0, Sender: workflowPeers[1][:], Receiver: capabilityPeerID[:], @@ -138,13 +136,15 @@ func Test_ReceiverRequest_MessageValidation(t *testing.T) { assert.NoError(t, err) assert.Equal(t, 2, len(dispatcher.msgs)) assert.Equal(t, dispatcher.msgs[0].Error, types.Error_INTERNAL_ERROR) + assert.Equal(t, dispatcher.msgs[0].ErrorMsg, "failed to execute capability: an error") assert.Equal(t, dispatcher.msgs[1].Error, types.Error_INTERNAL_ERROR) + assert.Equal(t, dispatcher.msgs[1].ErrorMsg, "failed to execute capability: an error") }) t.Run("Send second valid request", func(t *testing.T) { dispatcher := &testDispatcher{} - request := request.NewReceiverRequest(lggr, capability, "capabilityID", "capabilityDonID", + request := request.NewReceiverRequest(capability, "capabilityID", "capabilityDonID", capabilityPeerID, callingDon, "requestMessageID", dispatcher, 10*time.Minute) err := sendValidRequest(request, workflowPeers, capabilityPeerID, rawRequest) diff --git a/core/capabilities/remote/types/message.pb.go b/core/capabilities/remote/types/message.pb.go index 1a9c0891251..78356b864d7 100644 --- a/core/capabilities/remote/types/message.pb.go +++ b/core/capabilities/remote/types/message.pb.go @@ -148,8 +148,9 @@ type MessageBody struct { CallerDonId string `protobuf:"bytes,8,opt,name=caller_don_id,json=callerDonId,proto3" json:"caller_don_id,omitempty"` Method string `protobuf:"bytes,9,opt,name=method,proto3" json:"method,omitempty"` Error Error `protobuf:"varint,10,opt,name=error,proto3,enum=remote.Error" json:"error,omitempty"` + ErrorMsg string `protobuf:"bytes,11,opt,name=errorMsg,proto3" json:"errorMsg,omitempty"` // payload contains a CapabilityRequest or CapabilityResponse - Payload []byte `protobuf:"bytes,11,opt,name=payload,proto3" json:"payload,omitempty"` + Payload []byte `protobuf:"bytes,12,opt,name=payload,proto3" json:"payload,omitempty"` // Types that are assignable to Metadata: // // *MessageBody_TriggerRegistrationMetadata @@ -259,6 +260,13 @@ func (x *MessageBody) GetError() Error { return Error_OK } +func (x *MessageBody) GetErrorMsg() string { + if x != nil { + return x.ErrorMsg + } + return "" +} + func (x *MessageBody) GetPayload() []byte { if x != nil { return x.Payload @@ -292,11 +300,11 @@ type isMessageBody_Metadata interface { } type MessageBody_TriggerRegistrationMetadata struct { - TriggerRegistrationMetadata *TriggerRegistrationMetadata `protobuf:"bytes,12,opt,name=trigger_registration_metadata,json=triggerRegistrationMetadata,proto3,oneof"` + TriggerRegistrationMetadata *TriggerRegistrationMetadata `protobuf:"bytes,13,opt,name=trigger_registration_metadata,json=triggerRegistrationMetadata,proto3,oneof"` } type MessageBody_TriggerEventMetadata struct { - TriggerEventMetadata *TriggerEventMetadata `protobuf:"bytes,13,opt,name=trigger_event_metadata,json=triggerEventMetadata,proto3,oneof"` + TriggerEventMetadata *TriggerEventMetadata `protobuf:"bytes,14,opt,name=trigger_event_metadata,json=triggerEventMetadata,proto3,oneof"` } func (*MessageBody_TriggerRegistrationMetadata) isMessageBody_Metadata() {} @@ -413,7 +421,7 @@ var file_message_proto_rawDesc = []byte{ 0x67, 0x65, 0x12, 0x1c, 0x0a, 0x09, 0x73, 0x69, 0x67, 0x6e, 0x61, 0x74, 0x75, 0x72, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0c, 0x52, 0x09, 0x73, 0x69, 0x67, 0x6e, 0x61, 0x74, 0x75, 0x72, 0x65, 0x12, 0x12, 0x0a, 0x04, 0x62, 0x6f, 0x64, 0x79, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0c, 0x52, 0x04, - 0x62, 0x6f, 0x64, 0x79, 0x22, 0xb1, 0x04, 0x0a, 0x0b, 0x4d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, + 0x62, 0x6f, 0x64, 0x79, 0x22, 0xcd, 0x04, 0x0a, 0x0b, 0x4d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x42, 0x6f, 0x64, 0x79, 0x12, 0x18, 0x0a, 0x07, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x07, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x65, 0x6e, 0x64, 0x65, 0x72, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0c, 0x52, 0x06, @@ -434,43 +442,44 @@ var file_message_proto_rawDesc = []byte{ 0x20, 0x01, 0x28, 0x09, 0x52, 0x06, 0x6d, 0x65, 0x74, 0x68, 0x6f, 0x64, 0x12, 0x23, 0x0a, 0x05, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x18, 0x0a, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x0d, 0x2e, 0x72, 0x65, 0x6d, 0x6f, 0x74, 0x65, 0x2e, 0x45, 0x72, 0x72, 0x6f, 0x72, 0x52, 0x05, 0x65, 0x72, 0x72, 0x6f, - 0x72, 0x12, 0x18, 0x0a, 0x07, 0x70, 0x61, 0x79, 0x6c, 0x6f, 0x61, 0x64, 0x18, 0x0b, 0x20, 0x01, - 0x28, 0x0c, 0x52, 0x07, 0x70, 0x61, 0x79, 0x6c, 0x6f, 0x61, 0x64, 0x12, 0x69, 0x0a, 0x1d, 0x74, - 0x72, 0x69, 0x67, 0x67, 0x65, 0x72, 0x5f, 0x72, 0x65, 0x67, 0x69, 0x73, 0x74, 0x72, 0x61, 0x74, - 0x69, 0x6f, 0x6e, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x18, 0x0c, 0x20, 0x01, - 0x28, 0x0b, 0x32, 0x23, 0x2e, 0x72, 0x65, 0x6d, 0x6f, 0x74, 0x65, 0x2e, 0x54, 0x72, 0x69, 0x67, - 0x67, 0x65, 0x72, 0x52, 0x65, 0x67, 0x69, 0x73, 0x74, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x4d, - 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x48, 0x00, 0x52, 0x1b, 0x74, 0x72, 0x69, 0x67, 0x67, - 0x65, 0x72, 0x52, 0x65, 0x67, 0x69, 0x73, 0x74, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x4d, 0x65, - 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x12, 0x54, 0x0a, 0x16, 0x74, 0x72, 0x69, 0x67, 0x67, 0x65, - 0x72, 0x5f, 0x65, 0x76, 0x65, 0x6e, 0x74, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, - 0x18, 0x0d, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1c, 0x2e, 0x72, 0x65, 0x6d, 0x6f, 0x74, 0x65, 0x2e, - 0x54, 0x72, 0x69, 0x67, 0x67, 0x65, 0x72, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x4d, 0x65, 0x74, 0x61, - 0x64, 0x61, 0x74, 0x61, 0x48, 0x00, 0x52, 0x14, 0x74, 0x72, 0x69, 0x67, 0x67, 0x65, 0x72, 0x45, - 0x76, 0x65, 0x6e, 0x74, 0x4d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x42, 0x0a, 0x0a, 0x08, - 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x22, 0x52, 0x0a, 0x1b, 0x54, 0x72, 0x69, 0x67, - 0x67, 0x65, 0x72, 0x52, 0x65, 0x67, 0x69, 0x73, 0x74, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x4d, - 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x12, 0x33, 0x0a, 0x16, 0x6c, 0x61, 0x73, 0x74, 0x5f, - 0x72, 0x65, 0x63, 0x65, 0x69, 0x76, 0x65, 0x64, 0x5f, 0x65, 0x76, 0x65, 0x6e, 0x74, 0x5f, 0x69, - 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x13, 0x6c, 0x61, 0x73, 0x74, 0x52, 0x65, 0x63, - 0x65, 0x69, 0x76, 0x65, 0x64, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x49, 0x64, 0x22, 0x63, 0x0a, 0x14, - 0x54, 0x72, 0x69, 0x67, 0x67, 0x65, 0x72, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x4d, 0x65, 0x74, 0x61, - 0x64, 0x61, 0x74, 0x61, 0x12, 0x28, 0x0a, 0x10, 0x74, 0x72, 0x69, 0x67, 0x67, 0x65, 0x72, 0x5f, - 0x65, 0x76, 0x65, 0x6e, 0x74, 0x5f, 0x69, 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0e, - 0x74, 0x72, 0x69, 0x67, 0x67, 0x65, 0x72, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x49, 0x64, 0x12, 0x21, - 0x0a, 0x0c, 0x77, 0x6f, 0x72, 0x6b, 0x66, 0x6c, 0x6f, 0x77, 0x5f, 0x69, 0x64, 0x73, 0x18, 0x02, - 0x20, 0x03, 0x28, 0x09, 0x52, 0x0b, 0x77, 0x6f, 0x72, 0x6b, 0x66, 0x6c, 0x6f, 0x77, 0x49, 0x64, - 0x73, 0x2a, 0x76, 0x0a, 0x05, 0x45, 0x72, 0x72, 0x6f, 0x72, 0x12, 0x06, 0x0a, 0x02, 0x4f, 0x4b, - 0x10, 0x00, 0x12, 0x15, 0x0a, 0x11, 0x56, 0x41, 0x4c, 0x49, 0x44, 0x41, 0x54, 0x49, 0x4f, 0x4e, - 0x5f, 0x46, 0x41, 0x49, 0x4c, 0x45, 0x44, 0x10, 0x01, 0x12, 0x18, 0x0a, 0x14, 0x43, 0x41, 0x50, - 0x41, 0x42, 0x49, 0x4c, 0x49, 0x54, 0x59, 0x5f, 0x4e, 0x4f, 0x54, 0x5f, 0x46, 0x4f, 0x55, 0x4e, - 0x44, 0x10, 0x02, 0x12, 0x13, 0x0a, 0x0f, 0x49, 0x4e, 0x56, 0x41, 0x4c, 0x49, 0x44, 0x5f, 0x52, - 0x45, 0x51, 0x55, 0x45, 0x53, 0x54, 0x10, 0x03, 0x12, 0x0b, 0x0a, 0x07, 0x54, 0x49, 0x4d, 0x45, - 0x4f, 0x55, 0x54, 0x10, 0x04, 0x12, 0x12, 0x0a, 0x0e, 0x49, 0x4e, 0x54, 0x45, 0x52, 0x4e, 0x41, - 0x4c, 0x5f, 0x45, 0x52, 0x52, 0x4f, 0x52, 0x10, 0x05, 0x42, 0x20, 0x5a, 0x1e, 0x63, 0x6f, 0x72, - 0x65, 0x2f, 0x63, 0x61, 0x70, 0x61, 0x62, 0x69, 0x6c, 0x69, 0x74, 0x69, 0x65, 0x73, 0x2f, 0x72, - 0x65, 0x6d, 0x6f, 0x74, 0x65, 0x2f, 0x74, 0x79, 0x70, 0x65, 0x73, 0x62, 0x06, 0x70, 0x72, 0x6f, - 0x74, 0x6f, 0x33, + 0x72, 0x12, 0x1a, 0x0a, 0x08, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x4d, 0x73, 0x67, 0x18, 0x0b, 0x20, + 0x01, 0x28, 0x09, 0x52, 0x08, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x4d, 0x73, 0x67, 0x12, 0x18, 0x0a, + 0x07, 0x70, 0x61, 0x79, 0x6c, 0x6f, 0x61, 0x64, 0x18, 0x0c, 0x20, 0x01, 0x28, 0x0c, 0x52, 0x07, + 0x70, 0x61, 0x79, 0x6c, 0x6f, 0x61, 0x64, 0x12, 0x69, 0x0a, 0x1d, 0x74, 0x72, 0x69, 0x67, 0x67, + 0x65, 0x72, 0x5f, 0x72, 0x65, 0x67, 0x69, 0x73, 0x74, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x5f, + 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x18, 0x0d, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x23, + 0x2e, 0x72, 0x65, 0x6d, 0x6f, 0x74, 0x65, 0x2e, 0x54, 0x72, 0x69, 0x67, 0x67, 0x65, 0x72, 0x52, + 0x65, 0x67, 0x69, 0x73, 0x74, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x4d, 0x65, 0x74, 0x61, 0x64, + 0x61, 0x74, 0x61, 0x48, 0x00, 0x52, 0x1b, 0x74, 0x72, 0x69, 0x67, 0x67, 0x65, 0x72, 0x52, 0x65, + 0x67, 0x69, 0x73, 0x74, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x4d, 0x65, 0x74, 0x61, 0x64, 0x61, + 0x74, 0x61, 0x12, 0x54, 0x0a, 0x16, 0x74, 0x72, 0x69, 0x67, 0x67, 0x65, 0x72, 0x5f, 0x65, 0x76, + 0x65, 0x6e, 0x74, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x18, 0x0e, 0x20, 0x01, + 0x28, 0x0b, 0x32, 0x1c, 0x2e, 0x72, 0x65, 0x6d, 0x6f, 0x74, 0x65, 0x2e, 0x54, 0x72, 0x69, 0x67, + 0x67, 0x65, 0x72, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x4d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, + 0x48, 0x00, 0x52, 0x14, 0x74, 0x72, 0x69, 0x67, 0x67, 0x65, 0x72, 0x45, 0x76, 0x65, 0x6e, 0x74, + 0x4d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x42, 0x0a, 0x0a, 0x08, 0x6d, 0x65, 0x74, 0x61, + 0x64, 0x61, 0x74, 0x61, 0x22, 0x52, 0x0a, 0x1b, 0x54, 0x72, 0x69, 0x67, 0x67, 0x65, 0x72, 0x52, + 0x65, 0x67, 0x69, 0x73, 0x74, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x4d, 0x65, 0x74, 0x61, 0x64, + 0x61, 0x74, 0x61, 0x12, 0x33, 0x0a, 0x16, 0x6c, 0x61, 0x73, 0x74, 0x5f, 0x72, 0x65, 0x63, 0x65, + 0x69, 0x76, 0x65, 0x64, 0x5f, 0x65, 0x76, 0x65, 0x6e, 0x74, 0x5f, 0x69, 0x64, 0x18, 0x01, 0x20, + 0x01, 0x28, 0x09, 0x52, 0x13, 0x6c, 0x61, 0x73, 0x74, 0x52, 0x65, 0x63, 0x65, 0x69, 0x76, 0x65, + 0x64, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x49, 0x64, 0x22, 0x63, 0x0a, 0x14, 0x54, 0x72, 0x69, 0x67, + 0x67, 0x65, 0x72, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x4d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, + 0x12, 0x28, 0x0a, 0x10, 0x74, 0x72, 0x69, 0x67, 0x67, 0x65, 0x72, 0x5f, 0x65, 0x76, 0x65, 0x6e, + 0x74, 0x5f, 0x69, 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0e, 0x74, 0x72, 0x69, 0x67, + 0x67, 0x65, 0x72, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x49, 0x64, 0x12, 0x21, 0x0a, 0x0c, 0x77, 0x6f, + 0x72, 0x6b, 0x66, 0x6c, 0x6f, 0x77, 0x5f, 0x69, 0x64, 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, 0x09, + 0x52, 0x0b, 0x77, 0x6f, 0x72, 0x6b, 0x66, 0x6c, 0x6f, 0x77, 0x49, 0x64, 0x73, 0x2a, 0x76, 0x0a, + 0x05, 0x45, 0x72, 0x72, 0x6f, 0x72, 0x12, 0x06, 0x0a, 0x02, 0x4f, 0x4b, 0x10, 0x00, 0x12, 0x15, + 0x0a, 0x11, 0x56, 0x41, 0x4c, 0x49, 0x44, 0x41, 0x54, 0x49, 0x4f, 0x4e, 0x5f, 0x46, 0x41, 0x49, + 0x4c, 0x45, 0x44, 0x10, 0x01, 0x12, 0x18, 0x0a, 0x14, 0x43, 0x41, 0x50, 0x41, 0x42, 0x49, 0x4c, + 0x49, 0x54, 0x59, 0x5f, 0x4e, 0x4f, 0x54, 0x5f, 0x46, 0x4f, 0x55, 0x4e, 0x44, 0x10, 0x02, 0x12, + 0x13, 0x0a, 0x0f, 0x49, 0x4e, 0x56, 0x41, 0x4c, 0x49, 0x44, 0x5f, 0x52, 0x45, 0x51, 0x55, 0x45, + 0x53, 0x54, 0x10, 0x03, 0x12, 0x0b, 0x0a, 0x07, 0x54, 0x49, 0x4d, 0x45, 0x4f, 0x55, 0x54, 0x10, + 0x04, 0x12, 0x12, 0x0a, 0x0e, 0x49, 0x4e, 0x54, 0x45, 0x52, 0x4e, 0x41, 0x4c, 0x5f, 0x45, 0x52, + 0x52, 0x4f, 0x52, 0x10, 0x05, 0x42, 0x20, 0x5a, 0x1e, 0x63, 0x6f, 0x72, 0x65, 0x2f, 0x63, 0x61, + 0x70, 0x61, 0x62, 0x69, 0x6c, 0x69, 0x74, 0x69, 0x65, 0x73, 0x2f, 0x72, 0x65, 0x6d, 0x6f, 0x74, + 0x65, 0x2f, 0x74, 0x79, 0x70, 0x65, 0x73, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, } var ( diff --git a/core/capabilities/remote/types/message.proto b/core/capabilities/remote/types/message.proto index 56e4aabfafc..4d0507fd1e0 100644 --- a/core/capabilities/remote/types/message.proto +++ b/core/capabilities/remote/types/message.proto @@ -29,13 +29,15 @@ message MessageBody { string caller_don_id = 8; string method = 9; Error error = 10; + string errorMsg = 11; // payload contains a CapabilityRequest or CapabilityResponse - bytes payload = 11; + bytes payload = 12; oneof metadata { - TriggerRegistrationMetadata trigger_registration_metadata = 12; - TriggerEventMetadata trigger_event_metadata = 13; + TriggerRegistrationMetadata trigger_registration_metadata = 13; + TriggerEventMetadata trigger_event_metadata = 14; } + } message TriggerRegistrationMetadata { From a505abcc4a97f84c8d8122cf7c5d273530c995ef Mon Sep 17 00:00:00 2001 From: Matthew Pendrey Date: Wed, 29 May 2024 11:40:15 +0100 Subject: [PATCH 42/43] additional error case tests --- .../remote/target/endtoend_test.go | 30 ++++++++ .../target/request/caller_request_test.go | 77 +++++++++++++++---- 2 files changed, 93 insertions(+), 14 deletions(-) diff --git a/core/capabilities/remote/target/endtoend_test.go b/core/capabilities/remote/target/endtoend_test.go index e1d81710e7d..86d887a21ec 100644 --- a/core/capabilities/remote/target/endtoend_test.go +++ b/core/capabilities/remote/target/endtoend_test.go @@ -8,6 +8,7 @@ import ( "testing" "time" + "github.com/google/uuid" "github.com/mr-tron/base58" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" @@ -157,6 +158,27 @@ func Test_RemoteTargetCapability_CapabilityError(t *testing.T) { testRemoteTarget(t, ctx, capability, 10, 9, 10*time.Minute, 10, 9, 10*time.Minute, transmissionSchedule, responseTest) } +func Test_RemoteTargetCapability_RandomCapabilityError(t *testing.T) { + ctx, cancel := context.WithCancel(testutils.Context(t)) + defer cancel() + + responseTest := func(t *testing.T, responseCh <-chan commoncap.CapabilityResponse, responseError error) { + require.NoError(t, responseError) + response := <-responseCh + assert.Equal(t, "request expired", response.Err.Error()) + } + + capability := &TestRandomErrorCapability{} + + transmissionSchedule, err := values.NewMap(map[string]any{ + "schedule": transmission.Schedule_AllAtOnce, + "deltaStage": "10ms", + }) + require.NoError(t, err) + + testRemoteTarget(t, ctx, capability, 10, 9, 10*time.Millisecond, 10, 9, 10*time.Minute, transmissionSchedule, responseTest) +} + func testRemoteTarget(t *testing.T, ctx context.Context, underlying commoncap.TargetCapability, numWorkflowPeers int, workflowDonF uint8, workflowNodeTimeout time.Duration, numCapabilityPeers int, capabilityDonF uint8, capabilityNodeResponseTimeout time.Duration, transmissionSchedule *values.Map, responseTest func(t *testing.T, responseCh <-chan commoncap.CapabilityResponse, responseError error)) { @@ -352,6 +374,14 @@ func (t TestErrorCapability) Execute(ctx context.Context, request commoncap.Capa return nil, errors.New("an error") } +type TestRandomErrorCapability struct { + abstractTestCapability +} + +func (t TestRandomErrorCapability) Execute(ctx context.Context, request commoncap.CapabilityRequest) (<-chan commoncap.CapabilityResponse, error) { + return nil, errors.New(uuid.New().String()) +} + func NewP2PPeerID(t *testing.T) p2ptypes.PeerID { id := p2ptypes.PeerID{} require.NoError(t, id.UnmarshalText([]byte(NewPeerID()))) diff --git a/core/capabilities/remote/target/request/caller_request_test.go b/core/capabilities/remote/target/request/caller_request_test.go index 845bf8adfce..a52d9a284ae 100644 --- a/core/capabilities/remote/target/request/caller_request_test.go +++ b/core/capabilities/remote/target/request/caller_request_test.go @@ -81,6 +81,9 @@ func Test_CallerRequest_MessageValidation(t *testing.T) { rawResponse, err := pb.MarshalCapabilityResponse(capabilityResponse) require.NoError(t, err) + messageID, err := target.GetMessageIDForRequest(capabilityRequest) + require.NoError(t, err) + msg := &types.MessageBody{ CapabilityId: capInfo.ID, CapabilityDonId: capDonInfo.ID, @@ -94,9 +97,6 @@ func Test_CallerRequest_MessageValidation(t *testing.T) { ctx, cancel := context.WithCancel(context.Background()) defer cancel() - messageID, err := target.GetMessageIDForRequest(capabilityRequest) - require.NoError(t, err) - dispatcher := &callerRequestTestDispatcher{msgs: make(chan *types.MessageBody, 100)} request, err := request.NewCallerRequest(ctx, lggr, capabilityRequest, messageID, capInfo, workflowDonInfo, dispatcher, 10*time.Minute) @@ -134,9 +134,6 @@ func Test_CallerRequest_MessageValidation(t *testing.T) { ctx, cancel := context.WithCancel(context.Background()) defer cancel() - messageID, err := target.GetMessageIDForRequest(capabilityRequest) - require.NoError(t, err) - dispatcher := &callerRequestTestDispatcher{msgs: make(chan *types.MessageBody, 100)} request, err := request.NewCallerRequest(ctx, lggr, capabilityRequest, messageID, capInfo, workflowDonInfo, dispatcher, 10*time.Minute) @@ -146,15 +143,19 @@ func Test_CallerRequest_MessageValidation(t *testing.T) { require.NoError(t, err) err = request.AddResponse(NewP2PPeerID(t), msg) require.NotNil(t, err) + + select { + case <-request.ResponseChan(): + t.Fatal("expected no response") + default: + } + }) t.Run("Send second message from same peer as first message", func(t *testing.T) { ctx, cancel := context.WithCancel(context.Background()) defer cancel() - messageID, err := target.GetMessageIDForRequest(capabilityRequest) - require.NoError(t, err) - dispatcher := &callerRequestTestDispatcher{msgs: make(chan *types.MessageBody, 100)} request, err := request.NewCallerRequest(ctx, lggr, capabilityRequest, messageID, capInfo, workflowDonInfo, dispatcher, 10*time.Minute) @@ -164,15 +165,19 @@ func Test_CallerRequest_MessageValidation(t *testing.T) { require.NoError(t, err) err = request.AddResponse(capabilityPeers[0], msg) require.NotNil(t, err) + + select { + case <-request.ResponseChan(): + t.Fatal("expected no response") + default: + } + }) t.Run("Send second message with same error as first", func(t *testing.T) { ctx, cancel := context.WithCancel(context.Background()) defer cancel() - messageID, err := target.GetMessageIDForRequest(capabilityRequest) - require.NoError(t, err) - dispatcher := &callerRequestTestDispatcher{msgs: make(chan *types.MessageBody, 100)} request, err := request.NewCallerRequest(ctx, lggr, capabilityRequest, messageID, capInfo, workflowDonInfo, dispatcher, 10*time.Minute) @@ -203,13 +208,57 @@ func Test_CallerRequest_MessageValidation(t *testing.T) { assert.Equal(t, "an error", response.Err.Error()) }) - t.Run("Send second valid message", func(t *testing.T) { + t.Run("Send second message with different error to first", func(t *testing.T) { ctx, cancel := context.WithCancel(context.Background()) defer cancel() - messageID, err := target.GetMessageIDForRequest(capabilityRequest) + dispatcher := &callerRequestTestDispatcher{msgs: make(chan *types.MessageBody, 100)} + request, err := request.NewCallerRequest(ctx, lggr, capabilityRequest, messageID, capInfo, + workflowDonInfo, dispatcher, 10*time.Minute) require.NoError(t, err) + <-dispatcher.msgs + <-dispatcher.msgs + assert.Equal(t, 0, len(dispatcher.msgs)) + + msgWithError := &types.MessageBody{ + CapabilityId: capInfo.ID, + CapabilityDonId: capDonInfo.ID, + CallerDonId: workflowDonInfo.ID, + Method: types.MethodExecute, + Payload: rawResponse, + MessageId: []byte("messageID"), + Error: types.Error_INTERNAL_ERROR, + ErrorMsg: "an error", + } + + msgWithError2 := &types.MessageBody{ + CapabilityId: capInfo.ID, + CapabilityDonId: capDonInfo.ID, + CallerDonId: workflowDonInfo.ID, + Method: types.MethodExecute, + Payload: rawResponse, + MessageId: []byte("messageID"), + Error: types.Error_INTERNAL_ERROR, + ErrorMsg: "an error2", + } + + err = request.AddResponse(capabilityPeers[0], msgWithError) + require.NoError(t, err) + err = request.AddResponse(capabilityPeers[1], msgWithError2) + require.NoError(t, err) + + select { + case <-request.ResponseChan(): + t.Fatal("expected no response") + default: + } + }) + + t.Run("Send second valid message", func(t *testing.T) { + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + dispatcher := &callerRequestTestDispatcher{msgs: make(chan *types.MessageBody, 100)} request, err := request.NewCallerRequest(ctx, lggr, capabilityRequest, messageID, capInfo, workflowDonInfo, dispatcher, 10*time.Minute) From a3e0c6ae919deef21c3d115aabbcef7710c67dcb Mon Sep 17 00:00:00 2001 From: Matthew Pendrey Date: Wed, 29 May 2024 11:56:29 +0100 Subject: [PATCH 43/43] tidyup --- common/client/poller_test.go | 2 +- core/capabilities/remote/dispatcher_test.go | 6 +-- core/capabilities/remote/target/caller.go | 21 +++++----- .../capabilities/remote/target/caller_test.go | 2 +- .../remote/target/endtoend_test.go | 4 +- core/capabilities/remote/target/receiver.go | 24 +++++------ .../remote/target/receiver_test.go | 2 +- .../remote/target/request/caller_request.go | 9 ++++- .../target/request/caller_request_test.go | 40 +++++++++++++------ .../remote/target/request/receiver_request.go | 6 ++- .../target/request/receiver_request_test.go | 12 +++--- core/services/p2p/types/mocks/peer.go | 2 +- dashboard-lib/k8s-pods/component.go | 2 +- 13 files changed, 79 insertions(+), 53 deletions(-) diff --git a/common/client/poller_test.go b/common/client/poller_test.go index 82a05b5dfc7..4221b232108 100644 --- a/common/client/poller_test.go +++ b/common/client/poller_test.go @@ -52,7 +52,7 @@ func Test_Poller(t *testing.T) { require.NoError(t, poller.Start()) defer poller.Unsubscribe() - // Receive updates from the poller + // OnMessage updates from the poller pollCount := 0 pollMax := 50 for ; pollCount < pollMax; pollCount++ { diff --git a/core/capabilities/remote/dispatcher_test.go b/core/capabilities/remote/dispatcher_test.go index b6ba31aa8f2..b3a2a0b3412 100644 --- a/core/capabilities/remote/dispatcher_test.go +++ b/core/capabilities/remote/dispatcher_test.go @@ -35,7 +35,7 @@ func TestDispatcher_CleanStartClose(t *testing.T) { ctx := testutils.Context(t) peer := mocks.NewPeer(t) recvCh := make(<-chan p2ptypes.Message) - peer.On("Receive", mock.Anything).Return(recvCh) + peer.On("OnMessage", mock.Anything).Return(recvCh) peer.On("ID", mock.Anything).Return(p2ptypes.PeerID{}) wrapper := mocks.NewPeerWrapper(t) wrapper.On("GetPeer").Return(peer) @@ -55,7 +55,7 @@ func TestDispatcher_Receive(t *testing.T) { peer := mocks.NewPeer(t) recvCh := make(chan p2ptypes.Message) - peer.On("Receive", mock.Anything).Return((<-chan p2ptypes.Message)(recvCh)) + peer.On("OnMessage", mock.Anything).Return((<-chan p2ptypes.Message)(recvCh)) peer.On("ID", mock.Anything).Return(peerId2) wrapper := mocks.NewPeerWrapper(t) wrapper.On("GetPeer").Return(peer) @@ -98,7 +98,7 @@ func TestDispatcher_RespondWithError(t *testing.T) { peer := mocks.NewPeer(t) recvCh := make(chan p2ptypes.Message) - peer.On("Receive", mock.Anything).Return((<-chan p2ptypes.Message)(recvCh)) + peer.On("OnMessage", mock.Anything).Return((<-chan p2ptypes.Message)(recvCh)) peer.On("ID", mock.Anything).Return(peerId2) sendCh := make(chan p2ptypes.PeerID) peer.On("Send", mock.Anything, mock.Anything).Run(func(args mock.Arguments) { diff --git a/core/capabilities/remote/target/caller.go b/core/capabilities/remote/target/caller.go index d8f3b834400..816505fbfd2 100644 --- a/core/capabilities/remote/target/caller.go +++ b/core/capabilities/remote/target/caller.go @@ -9,15 +9,13 @@ import ( "github.com/smartcontractkit/chainlink-common/pkg/capabilities" commoncap "github.com/smartcontractkit/chainlink-common/pkg/capabilities" - "github.com/smartcontractkit/chainlink/v2/core/capabilities/remote" "github.com/smartcontractkit/chainlink/v2/core/capabilities/remote/target/request" "github.com/smartcontractkit/chainlink/v2/core/capabilities/remote/types" "github.com/smartcontractkit/chainlink/v2/core/logger" - p2ptypes "github.com/smartcontractkit/chainlink/v2/core/services/p2p/types" ) type callerRequest interface { - AddResponse(sender p2ptypes.PeerID, msg *types.MessageBody) error + OnMessage(ctx context.Context, msg *types.MessageBody) error ResponseChan() <-chan commoncap.CapabilityResponse Expired() bool Cancel(reason string) @@ -38,10 +36,10 @@ type caller struct { var _ commoncap.TargetCapability = &caller{} var _ types.Receiver = &caller{} -func NewRemoteTargetCaller(ctx context.Context, lggr logger.Logger, remoteCapabilityInfo commoncap.CapabilityInfo, localDonInfo capabilities.DON, dispatcher types.Dispatcher, +func NewCaller(ctx context.Context, lggr logger.Logger, remoteCapabilityInfo commoncap.CapabilityInfo, localDonInfo capabilities.DON, dispatcher types.Dispatcher, requestTimeout time.Duration) *caller { - caller := &caller{ + c := &caller{ lggr: lggr, remoteCapabilityInfo: remoteCapabilityInfo, localDONInfo: localDonInfo, @@ -58,12 +56,12 @@ func NewRemoteTargetCaller(ctx context.Context, lggr logger.Logger, remoteCapabi case <-ctx.Done(): return case <-ticker.C: - caller.ExpireRequests() + c.ExpireRequests() } } }() - return caller + return c } func (c *caller) ExpireRequests() { @@ -114,19 +112,20 @@ func (c *caller) Execute(ctx context.Context, req commoncap.CapabilityRequest) ( func (c *caller) Receive(msg *types.MessageBody) { c.mutex.Lock() defer c.mutex.Unlock() + // TODO should the dispatcher be passing in a context? + ctx := context.Background() messageID := GetMessageID(msg) - sender := remote.ToPeerID(msg.Sender) req := c.messageIDToExecuteRequest[messageID] if req == nil { - c.lggr.Warnw("received response for unknown message ID ", "messageID", messageID, "sender", sender) + c.lggr.Warnw("received response for unknown message ID ", "messageID", messageID) return } go func() { - if err := req.AddResponse(sender, msg); err != nil { - c.lggr.Errorw("failed to add response to request", "messageID", messageID, "sender", sender, "err", err) + if err := req.OnMessage(ctx, msg); err != nil { + c.lggr.Errorw("failed to add response to request", "messageID", messageID, "err", err) } }() diff --git a/core/capabilities/remote/target/caller_test.go b/core/capabilities/remote/target/caller_test.go index e9880de0362..f5a9efd6c02 100644 --- a/core/capabilities/remote/target/caller_test.go +++ b/core/capabilities/remote/target/caller_test.go @@ -171,7 +171,7 @@ func testCaller(t *testing.T, ctx context.Context, numWorkflowPeers int, workflo callers := make([]commoncap.TargetCapability, numWorkflowPeers) for i := 0; i < numWorkflowPeers; i++ { workflowPeerDispatcher := broker.NewDispatcherForNode(workflowPeers[i]) - caller := target.NewRemoteTargetCaller(ctx, lggr, capInfo, workflowDonInfo, workflowPeerDispatcher, workflowNodeResponseTimeout) + caller := target.NewCaller(ctx, lggr, capInfo, workflowDonInfo, workflowPeerDispatcher, workflowNodeResponseTimeout) broker.RegisterReceiverNode(workflowPeers[i], caller) callers[i] = caller } diff --git a/core/capabilities/remote/target/endtoend_test.go b/core/capabilities/remote/target/endtoend_test.go index 86d887a21ec..a95de5e2d24 100644 --- a/core/capabilities/remote/target/endtoend_test.go +++ b/core/capabilities/remote/target/endtoend_test.go @@ -231,7 +231,7 @@ func testRemoteTarget(t *testing.T, ctx context.Context, underlying commoncap.Ta for i := 0; i < numCapabilityPeers; i++ { capabilityPeer := capabilityPeers[i] capabilityDispatcher := broker.NewDispatcherForNode(capabilityPeer) - capabilityNode := target.NewRemoteTargetReceiver(ctx, lggr, capabilityPeer, underlying, capInfo, capDonInfo, workflowDONs, capabilityDispatcher, + capabilityNode := target.NewReceiver(ctx, lggr, capabilityPeer, underlying, capInfo, capDonInfo, workflowDONs, capabilityDispatcher, capabilityNodeResponseTimeout) broker.RegisterReceiverNode(capabilityPeer, capabilityNode) capabilityNodes[i] = capabilityNode @@ -240,7 +240,7 @@ func testRemoteTarget(t *testing.T, ctx context.Context, underlying commoncap.Ta workflowNodes := make([]commoncap.TargetCapability, numWorkflowPeers) for i := 0; i < numWorkflowPeers; i++ { workflowPeerDispatcher := broker.NewDispatcherForNode(workflowPeers[i]) - workflowNode := target.NewRemoteTargetCaller(ctx, lggr, capInfo, workflowDonInfo, workflowPeerDispatcher, workflowNodeTimeout) + workflowNode := target.NewCaller(ctx, lggr, capInfo, workflowDonInfo, workflowPeerDispatcher, workflowNodeTimeout) broker.RegisterReceiverNode(workflowPeers[i], workflowNode) workflowNodes[i] = workflowNode } diff --git a/core/capabilities/remote/target/receiver.go b/core/capabilities/remote/target/receiver.go index 461162ffacd..63e6825e961 100644 --- a/core/capabilities/remote/target/receiver.go +++ b/core/capabilities/remote/target/receiver.go @@ -17,12 +17,12 @@ import ( ) type receiverRequest interface { - Receive(ctx context.Context, msg *types.MessageBody) error + OnMessage(ctx context.Context, msg *types.MessageBody) error Expired() bool Cancel(err types.Error, msg string) error } -type remoteTargetReceiver struct { +type receiver struct { lggr logger.Logger peerID p2ptypes.PeerID underlying commoncap.TargetCapability @@ -37,12 +37,12 @@ type remoteTargetReceiver struct { receiveLock sync.Mutex } -var _ types.Receiver = &remoteTargetReceiver{} +var _ types.Receiver = &receiver{} -func NewRemoteTargetReceiver(ctx context.Context, lggr logger.Logger, peerID p2ptypes.PeerID, underlying commoncap.TargetCapability, capInfo commoncap.CapabilityInfo, localDonInfo capabilities.DON, - workflowDONs map[string]commoncap.DON, dispatcher types.Dispatcher, requestTimeout time.Duration) *remoteTargetReceiver { +func NewReceiver(ctx context.Context, lggr logger.Logger, peerID p2ptypes.PeerID, underlying commoncap.TargetCapability, capInfo commoncap.CapabilityInfo, localDonInfo capabilities.DON, + workflowDONs map[string]commoncap.DON, dispatcher types.Dispatcher, requestTimeout time.Duration) *receiver { - receiver := &remoteTargetReceiver{ + r := &receiver{ underlying: underlying, peerID: peerID, capInfo: capInfo, @@ -64,15 +64,15 @@ func NewRemoteTargetReceiver(ctx context.Context, lggr logger.Logger, peerID p2p case <-ctx.Done(): return case <-ticker.C: - receiver.ExpireRequests() + r.ExpireRequests() } } }() - return receiver + return r } -func (r *remoteTargetReceiver) ExpireRequests() { +func (r *receiver) ExpireRequests() { r.receiveLock.Lock() defer r.receiveLock.Unlock() @@ -88,7 +88,7 @@ func (r *remoteTargetReceiver) ExpireRequests() { } -func (r *remoteTargetReceiver) Receive(msg *types.MessageBody) { +func (r *receiver) Receive(msg *types.MessageBody) { r.receiveLock.Lock() defer r.receiveLock.Unlock() // TODO should the dispatcher be passing in a context? @@ -118,9 +118,9 @@ func (r *remoteTargetReceiver) Receive(msg *types.MessageBody) { req := r.requestIDToRequest[requestID] go func() { - err := req.Receive(ctx, msg) + err := req.OnMessage(ctx, msg) if err != nil { - r.lggr.Errorw("request failed to Receive new message", "request", req, "err", err) + r.lggr.Errorw("request failed to OnMessage new message", "request", req, "err", err) } }() } diff --git a/core/capabilities/remote/target/receiver_test.go b/core/capabilities/remote/target/receiver_test.go index 6e9f7a0d621..d23cc46b93f 100644 --- a/core/capabilities/remote/target/receiver_test.go +++ b/core/capabilities/remote/target/receiver_test.go @@ -142,7 +142,7 @@ func testRemoteTargetReceiver(t *testing.T, ctx context.Context, for i := 0; i < numCapabilityPeers; i++ { capabilityPeer := capabilityPeers[i] capabilityDispatcher := broker.NewDispatcherForNode(capabilityPeer) - capabilityNode := target.NewRemoteTargetReceiver(ctx, lggr, capabilityPeer, underlying, capInfo, capDonInfo, workflowDONs, capabilityDispatcher, + capabilityNode := target.NewReceiver(ctx, lggr, capabilityPeer, underlying, capInfo, capDonInfo, workflowDONs, capabilityDispatcher, capabilityNodeResponseTimeout) broker.RegisterReceiverNode(capabilityPeer, capabilityNode) capabilityNodes[i] = capabilityNode diff --git a/core/capabilities/remote/target/request/caller_request.go b/core/capabilities/remote/target/request/caller_request.go index 765eec44bb4..c12f7419154 100644 --- a/core/capabilities/remote/target/request/caller_request.go +++ b/core/capabilities/remote/target/request/caller_request.go @@ -11,6 +11,7 @@ import ( "github.com/smartcontractkit/chainlink-common/pkg/capabilities" commoncap "github.com/smartcontractkit/chainlink-common/pkg/capabilities" "github.com/smartcontractkit/chainlink-common/pkg/capabilities/pb" + "github.com/smartcontractkit/chainlink/v2/core/capabilities/remote" "github.com/smartcontractkit/chainlink/v2/core/capabilities/remote/types" "github.com/smartcontractkit/chainlink/v2/core/capabilities/transmission" "github.com/smartcontractkit/chainlink/v2/core/logger" @@ -111,10 +112,16 @@ func (c *callerRequest) Cancel(reason string) { } // TODO addResponse assumes that only one response is received from each peer, if streaming responses need to be supported this will need to be updated -func (c *callerRequest) AddResponse(sender p2ptypes.PeerID, msg *types.MessageBody) error { +func (c *callerRequest) OnMessage(_ context.Context, msg *types.MessageBody) error { c.mux.Lock() defer c.mux.Unlock() + if msg.Sender == nil { + return fmt.Errorf("sender missing from message") + } + + sender := remote.ToPeerID(msg.Sender) + if _, ok := c.responseReceived[sender]; !ok { return fmt.Errorf("response from peer %s not expected", sender) } diff --git a/core/capabilities/remote/target/request/caller_request_test.go b/core/capabilities/remote/target/request/caller_request_test.go index a52d9a284ae..6287399be92 100644 --- a/core/capabilities/remote/target/request/caller_request_test.go +++ b/core/capabilities/remote/target/request/caller_request_test.go @@ -118,9 +118,12 @@ func Test_CallerRequest_MessageValidation(t *testing.T) { MessageId: []byte("messageID"), } - err = request.AddResponse(capabilityPeers[0], msg) + msg.Sender = capabilityPeers[0][:] + err = request.OnMessage(ctx, msg) require.NoError(t, err) - err = request.AddResponse(capabilityPeers[1], msg2) + + msg2.Sender = capabilityPeers[1][:] + err = request.OnMessage(ctx, msg2) require.NoError(t, err) select { @@ -139,9 +142,13 @@ func Test_CallerRequest_MessageValidation(t *testing.T) { workflowDonInfo, dispatcher, 10*time.Minute) require.NoError(t, err) - err = request.AddResponse(capabilityPeers[0], msg) + msg.Sender = capabilityPeers[0][:] + err = request.OnMessage(ctx, msg) require.NoError(t, err) - err = request.AddResponse(NewP2PPeerID(t), msg) + + nonDonPeer := NewP2PPeerID(t) + msg.Sender = nonDonPeer[:] + err = request.OnMessage(ctx, msg) require.NotNil(t, err) select { @@ -161,9 +168,10 @@ func Test_CallerRequest_MessageValidation(t *testing.T) { workflowDonInfo, dispatcher, 10*time.Minute) require.NoError(t, err) - err = request.AddResponse(capabilityPeers[0], msg) + msg.Sender = capabilityPeers[0][:] + err = request.OnMessage(ctx, msg) require.NoError(t, err) - err = request.AddResponse(capabilityPeers[0], msg) + err = request.OnMessage(ctx, msg) require.NotNil(t, err) select { @@ -198,9 +206,12 @@ func Test_CallerRequest_MessageValidation(t *testing.T) { ErrorMsg: "an error", } - err = request.AddResponse(capabilityPeers[0], msgWithError) + msgWithError.Sender = capabilityPeers[0][:] + err = request.OnMessage(ctx, msgWithError) require.NoError(t, err) - err = request.AddResponse(capabilityPeers[1], msgWithError) + + msgWithError.Sender = capabilityPeers[1][:] + err = request.OnMessage(ctx, msgWithError) require.NoError(t, err) response := <-request.ResponseChan() @@ -230,6 +241,7 @@ func Test_CallerRequest_MessageValidation(t *testing.T) { MessageId: []byte("messageID"), Error: types.Error_INTERNAL_ERROR, ErrorMsg: "an error", + Sender: capabilityPeers[0][:], } msgWithError2 := &types.MessageBody{ @@ -241,11 +253,12 @@ func Test_CallerRequest_MessageValidation(t *testing.T) { MessageId: []byte("messageID"), Error: types.Error_INTERNAL_ERROR, ErrorMsg: "an error2", + Sender: capabilityPeers[1][:], } - err = request.AddResponse(capabilityPeers[0], msgWithError) + err = request.OnMessage(ctx, msgWithError) require.NoError(t, err) - err = request.AddResponse(capabilityPeers[1], msgWithError2) + err = request.OnMessage(ctx, msgWithError2) require.NoError(t, err) select { @@ -268,9 +281,12 @@ func Test_CallerRequest_MessageValidation(t *testing.T) { <-dispatcher.msgs assert.Equal(t, 0, len(dispatcher.msgs)) - err = request.AddResponse(capabilityPeers[0], msg) + msg.Sender = capabilityPeers[0][:] + err = request.OnMessage(ctx, msg) require.NoError(t, err) - err = request.AddResponse(capabilityPeers[1], msg) + + msg.Sender = capabilityPeers[1][:] + err = request.OnMessage(ctx, msg) require.NoError(t, err) response := <-request.ResponseChan() diff --git a/core/capabilities/remote/target/request/receiver_request.go b/core/capabilities/remote/target/request/receiver_request.go index 6100d57e29c..0c83f09c2ce 100644 --- a/core/capabilities/remote/target/request/receiver_request.go +++ b/core/capabilities/remote/target/request/receiver_request.go @@ -63,10 +63,14 @@ func NewReceiverRequest(capability capabilities.TargetCapability, capabilityID s } } -func (e *receiverRequest) Receive(ctx context.Context, msg *types.MessageBody) error { +func (e *receiverRequest) OnMessage(ctx context.Context, msg *types.MessageBody) error { e.mux.Lock() defer e.mux.Unlock() + if msg.Sender == nil { + return fmt.Errorf("sender missing from message") + } + requester := remote.ToPeerID(msg.Sender) if err := e.addRequester(requester); err != nil { return fmt.Errorf("failed to add requester to request: %w", err) diff --git a/core/capabilities/remote/target/request/receiver_request_test.go b/core/capabilities/remote/target/request/receiver_request_test.go index 54be175dd37..4689d72de82 100644 --- a/core/capabilities/remote/target/request/receiver_request_test.go +++ b/core/capabilities/remote/target/request/receiver_request_test.go @@ -73,7 +73,7 @@ func Test_ReceiverRequest_MessageValidation(t *testing.T) { require.NoError(t, err) nonDonPeer := NewP2PPeerID(t) - err = req.Receive(context.Background(), &types.MessageBody{ + err = req.OnMessage(context.Background(), &types.MessageBody{ Version: 0, Sender: nonDonPeer[:], Receiver: capabilityPeerID[:], @@ -95,7 +95,7 @@ func Test_ReceiverRequest_MessageValidation(t *testing.T) { err := sendValidRequest(req, workflowPeers, capabilityPeerID, rawRequest) require.NoError(t, err) - err = req.Receive(context.Background(), &types.MessageBody{ + err = req.OnMessage(context.Background(), &types.MessageBody{ Version: 0, Sender: workflowPeers[1][:], Receiver: capabilityPeerID[:], @@ -122,7 +122,7 @@ func Test_ReceiverRequest_MessageValidation(t *testing.T) { err := sendValidRequest(req, workflowPeers, capabilityPeerID, rawRequest) require.NoError(t, err) - err = req.Receive(context.Background(), &types.MessageBody{ + err = req.OnMessage(context.Background(), &types.MessageBody{ Version: 0, Sender: workflowPeers[1][:], Receiver: capabilityPeerID[:], @@ -150,7 +150,7 @@ func Test_ReceiverRequest_MessageValidation(t *testing.T) { err := sendValidRequest(request, workflowPeers, capabilityPeerID, rawRequest) require.NoError(t, err) - err = request.Receive(context.Background(), &types.MessageBody{ + err = request.OnMessage(context.Background(), &types.MessageBody{ Version: 0, Sender: workflowPeers[1][:], Receiver: capabilityPeerID[:], @@ -169,12 +169,12 @@ func Test_ReceiverRequest_MessageValidation(t *testing.T) { } type receiverRequest interface { - Receive(ctx context.Context, msg *types.MessageBody) error + OnMessage(ctx context.Context, msg *types.MessageBody) error } func sendValidRequest(request receiverRequest, workflowPeers []p2ptypes.PeerID, capabilityPeerID p2ptypes.PeerID, rawRequest []byte) error { - return request.Receive(context.Background(), &types.MessageBody{ + return request.OnMessage(context.Background(), &types.MessageBody{ Version: 0, Sender: workflowPeers[0][:], Receiver: capabilityPeerID[:], diff --git a/core/services/p2p/types/mocks/peer.go b/core/services/p2p/types/mocks/peer.go index 3a2e218c170..52845e5a999 100644 --- a/core/services/p2p/types/mocks/peer.go +++ b/core/services/p2p/types/mocks/peer.go @@ -115,7 +115,7 @@ func (_m *Peer) Receive() <-chan types.Message { ret := _m.Called() if len(ret) == 0 { - panic("no return value specified for Receive") + panic("no return value specified for OnMessage") } var r0 <-chan types.Message diff --git a/dashboard-lib/k8s-pods/component.go b/dashboard-lib/k8s-pods/component.go index df9a6ac6a69..551eccd1d31 100644 --- a/dashboard-lib/k8s-pods/component.go +++ b/dashboard-lib/k8s-pods/component.go @@ -143,7 +143,7 @@ func New(p Props) []dashboard.Option { ), ), row.WithTimeSeries( - "Receive Bandwidth", + "OnMessage Bandwidth", timeseries.Span(6), timeseries.Height("200px"), timeseries.DataSource(p.PrometheusDataSource),