From 8236209e54b75d13f5061a38d9de12c341ce40e9 Mon Sep 17 00:00:00 2001 From: Matthew Pendrey Date: Mon, 2 Dec 2024 13:49:36 +0000 Subject: [PATCH] reuse capability registration logic from the trigger in the executable capability --- core/capabilities/launcher.go | 21 +- .../remote/aggregation/default_mode.go | 58 ++ .../remote/aggregation/default_mode_test.go | 51 ++ core/capabilities/remote/dispatcher.go | 31 +- core/capabilities/remote/executable/client.go | 35 +- .../remote/executable/client_test.go | 292 ++++--- .../remote/executable/endtoend_test.go | 723 ++++++++++++++++-- .../executable/request/client_request.go | 2 +- .../executable/request/server_request.go | 9 +- core/capabilities/remote/executable/server.go | 156 ++-- .../remote/executable/server_test.go | 226 ++++-- .../{ => messagecache}/message_cache.go | 18 +- .../{ => messagecache}/message_cache_test.go | 8 +- .../remote/registration/client.go | 138 ++++ .../remote/registration/client_test.go | 92 +++ .../remote/registration/server.go | 168 ++++ .../remote/registration/server_test.go | 123 +++ core/capabilities/remote/trigger_publisher.go | 219 ++---- .../capabilities/remote/trigger_subscriber.go | 194 +++-- .../remote/trigger_subscriber_test.go | 1 - core/capabilities/remote/types/types.go | 4 +- core/capabilities/remote/utils.go | 59 +- core/capabilities/remote/utils_test.go | 39 - core/scripts/go.mod | 2 +- core/scripts/go.sum | 4 +- deployment/go.mod | 2 +- deployment/go.sum | 4 +- go.mod | 2 +- go.sum | 4 +- integration-tests/go.mod | 2 +- integration-tests/go.sum | 4 +- integration-tests/load/go.mod | 2 +- integration-tests/load/go.sum | 4 +- 33 files changed, 2015 insertions(+), 682 deletions(-) create mode 100644 core/capabilities/remote/aggregation/default_mode.go create mode 100644 core/capabilities/remote/aggregation/default_mode_test.go rename core/capabilities/remote/{ => messagecache}/message_cache.go (77%) rename core/capabilities/remote/{ => messagecache}/message_cache_test.go (91%) create mode 100644 core/capabilities/remote/registration/client.go create mode 100644 core/capabilities/remote/registration/client_test.go create mode 100644 core/capabilities/remote/registration/server.go create mode 100644 core/capabilities/remote/registration/server_test.go diff --git a/core/capabilities/launcher.go b/core/capabilities/launcher.go index 27c43fe0a53..8f6c51d4c08 100644 --- a/core/capabilities/launcher.go +++ b/core/capabilities/launcher.go @@ -19,6 +19,7 @@ import ( "github.com/smartcontractkit/chainlink-common/pkg/services" "github.com/smartcontractkit/chainlink-common/pkg/values" "github.com/smartcontractkit/chainlink/v2/core/capabilities/remote" + "github.com/smartcontractkit/chainlink/v2/core/capabilities/remote/aggregation" "github.com/smartcontractkit/chainlink/v2/core/capabilities/remote/executable" remotetypes "github.com/smartcontractkit/chainlink/v2/core/capabilities/remote/types" "github.com/smartcontractkit/chainlink/v2/core/capabilities/streams" @@ -60,6 +61,7 @@ func unmarshalCapabilityConfig(data []byte) (capabilities.CapabilityConfiguratio var remoteTriggerConfig *capabilities.RemoteTriggerConfig var remoteTargetConfig *capabilities.RemoteTargetConfig + var remoteExecutableConfig *capabilities.RemoteExecutableConfig switch cconf.GetRemoteConfig().(type) { case *capabilitiespb.CapabilityConfig_RemoteTriggerConfig: @@ -73,6 +75,12 @@ func unmarshalCapabilityConfig(data []byte) (capabilities.CapabilityConfiguratio prtc := cconf.GetRemoteTargetConfig() remoteTargetConfig = &capabilities.RemoteTargetConfig{} remoteTargetConfig.RequestHashExcludedAttributes = prtc.RequestHashExcludedAttributes + case *capabilitiespb.CapabilityConfig_RemoteExecutableConfig: + prtc := cconf.GetRemoteExecutableConfig() + remoteExecutableConfig = &capabilities.RemoteExecutableConfig{} + remoteExecutableConfig.RequestHashExcludedAttributes = prtc.RequestHashExcludedAttributes + remoteExecutableConfig.RegistrationRefresh = prtc.RegistrationRefresh.AsDuration() + remoteExecutableConfig.RegistrationExpiry = prtc.RegistrationExpiry.AsDuration() } dc, err := values.FromMapValueProto(cconf.DefaultConfig) @@ -81,9 +89,10 @@ func unmarshalCapabilityConfig(data []byte) (capabilities.CapabilityConfiguratio } return capabilities.CapabilityConfiguration{ - DefaultConfig: dc, - RemoteTriggerConfig: remoteTriggerConfig, - RemoteTargetConfig: remoteTargetConfig, + DefaultConfig: dc, + RemoteTriggerConfig: remoteTriggerConfig, + RemoteTargetConfig: remoteTargetConfig, + RemoteExecutableConfig: remoteExecutableConfig, }, nil } @@ -280,7 +289,7 @@ func (w *launcher) addRemoteCapabilities(ctx context.Context, myDON registrysync w.lggr, ) } else { - aggregator = remote.NewDefaultModeAggregator(uint32(remoteDON.F) + 1) + aggregator = aggregation.NewDefaultModeAggregator(uint32(remoteDON.F) + 1) } // TODO: We need to implement a custom, Mercury-specific @@ -307,7 +316,9 @@ func (w *launcher) addRemoteCapabilities(ctx context.Context, myDON registrysync case capabilities.CapabilityTypeAction: newActionFn := func(info capabilities.CapabilityInfo) (capabilityService, error) { client := executable.NewClient( + capabilityConfig.RemoteExecutableConfig, info, + remoteDON.DON, myDON.DON, w.dispatcher, defaultTargetRequestTimeout, @@ -325,7 +336,9 @@ func (w *launcher) addRemoteCapabilities(ctx context.Context, myDON registrysync case capabilities.CapabilityTypeTarget: newTargetFn := func(info capabilities.CapabilityInfo) (capabilityService, error) { client := executable.NewClient( + capabilityConfig.RemoteExecutableConfig, info, + remoteDON.DON, myDON.DON, w.dispatcher, defaultTargetRequestTimeout, diff --git a/core/capabilities/remote/aggregation/default_mode.go b/core/capabilities/remote/aggregation/default_mode.go new file mode 100644 index 00000000000..3d5e262920f --- /dev/null +++ b/core/capabilities/remote/aggregation/default_mode.go @@ -0,0 +1,58 @@ +package aggregation + +import ( + "crypto/sha256" + "encoding/hex" + "errors" + "fmt" + + commoncap "github.com/smartcontractkit/chainlink-common/pkg/capabilities" + "github.com/smartcontractkit/chainlink-common/pkg/capabilities/pb" + remotetypes "github.com/smartcontractkit/chainlink/v2/core/capabilities/remote/types" +) + +// Default MODE Aggregator needs a configurable number of identical responses for aggregation to succeed +type defaultModeAggregator struct { + minIdenticalResponses uint32 +} + +var _ remotetypes.Aggregator = &defaultModeAggregator{} + +func NewDefaultModeAggregator(minIdenticalResponses uint32) *defaultModeAggregator { + return &defaultModeAggregator{ + minIdenticalResponses: minIdenticalResponses, + } +} + +func (a *defaultModeAggregator) Aggregate(_ string, responses [][]byte) (commoncap.TriggerResponse, error) { + found, err := AggregateModeRaw(responses, a.minIdenticalResponses) + if err != nil { + return commoncap.TriggerResponse{}, fmt.Errorf("failed to aggregate responses, err: %w", err) + } + + unmarshaled, err := pb.UnmarshalTriggerResponse(found) + if err != nil { + return commoncap.TriggerResponse{}, fmt.Errorf("failed to unmarshal aggregated responses, err: %w", err) + } + return unmarshaled, nil +} + +func AggregateModeRaw(elemList [][]byte, minIdenticalResponses uint32) ([]byte, error) { + hashToCount := make(map[string]uint32) + var found []byte + for _, elem := range elemList { + hasher := sha256.New() + hasher.Write(elem) + sha := hex.EncodeToString(hasher.Sum(nil)) + hashToCount[sha]++ + if hashToCount[sha] >= minIdenticalResponses { + found = elem + // update in case we find another elem with an even higher count + minIdenticalResponses = hashToCount[sha] + } + } + if found == nil { + return nil, errors.New("not enough identical responses found") + } + return found, nil +} diff --git a/core/capabilities/remote/aggregation/default_mode_test.go b/core/capabilities/remote/aggregation/default_mode_test.go new file mode 100644 index 00000000000..7c7d615e17a --- /dev/null +++ b/core/capabilities/remote/aggregation/default_mode_test.go @@ -0,0 +1,51 @@ +package aggregation + +import ( + "testing" + + "github.com/stretchr/testify/require" + + commoncap "github.com/smartcontractkit/chainlink-common/pkg/capabilities" + "github.com/smartcontractkit/chainlink-common/pkg/capabilities/pb" + "github.com/smartcontractkit/chainlink-common/pkg/values" +) + +var ( + triggerEvent1 = map[string]any{"event": "triggerEvent1"} + triggerEvent2 = map[string]any{"event": "triggerEvent2"} +) + +func TestDefaultModeAggregator_Aggregate(t *testing.T) { + val, err := values.NewMap(triggerEvent1) + require.NoError(t, err) + capResponse1 := commoncap.TriggerResponse{ + Event: commoncap.TriggerEvent{ + Outputs: val, + }, + Err: nil, + } + marshaled1, err := pb.MarshalTriggerResponse(capResponse1) + require.NoError(t, err) + + val2, err := values.NewMap(triggerEvent2) + require.NoError(t, err) + capResponse2 := commoncap.TriggerResponse{ + Event: commoncap.TriggerEvent{ + Outputs: val2, + }, + Err: nil, + } + marshaled2, err := pb.MarshalTriggerResponse(capResponse2) + require.NoError(t, err) + + agg := NewDefaultModeAggregator(2) + _, err = agg.Aggregate("", [][]byte{marshaled1}) + require.Error(t, err) + + _, err = agg.Aggregate("", [][]byte{marshaled1, marshaled2}) + require.Error(t, err) + + res, err := agg.Aggregate("", [][]byte{marshaled1, marshaled2, marshaled1}) + require.NoError(t, err) + require.Equal(t, res, capResponse1) +} diff --git a/core/capabilities/remote/dispatcher.go b/core/capabilities/remote/dispatcher.go index e3229d35c1e..54c2ce8298b 100644 --- a/core/capabilities/remote/dispatcher.go +++ b/core/capabilities/remote/dispatcher.go @@ -3,6 +3,7 @@ package remote import ( "context" "fmt" + "strconv" "sync" "time" @@ -42,8 +43,8 @@ type dispatcher struct { } type key struct { - capId string - donId uint32 + capID string + donID uint32 } var _ services.Service = &dispatcher{} @@ -74,7 +75,7 @@ func (d *dispatcher) Start(ctx context.Context) error { d.peer = d.peerWrapper.GetPeer() d.peerID = d.peer.ID() if d.peer == nil { - return fmt.Errorf("peer is not initialized") + return errors.New("peer is not initialized") } d.wg.Add(1) go func() { @@ -96,20 +97,20 @@ func (d *dispatcher) Close() error { var capReceiveChannelUsage = promauto.NewGaugeVec(prometheus.GaugeOpts{ Name: "capability_receive_channel_usage", Help: "The usage of the receive channel for each capability, 0 indicates empty, 1 indicates full.", -}, []string{"capabilityId", "donId"}) +}, []string{"capabilityId", "donID"}) type receiver struct { cancel context.CancelFunc ch chan *types.MessageBody } -func (d *dispatcher) SetReceiver(capabilityId string, donId uint32, rec types.Receiver) error { +func (d *dispatcher) SetReceiver(capabilityID string, donID uint32, rec types.Receiver) error { d.mu.Lock() defer d.mu.Unlock() - k := key{capabilityId, donId} + k := key{capabilityID, donID} _, ok := d.receivers[k] if ok { - return fmt.Errorf("%w: receiver already exists for capability %s and don %d", ErrReceiverExists, capabilityId, donId) + return fmt.Errorf("%w: receiver already exists for capability %s and don %d", ErrReceiverExists, capabilityID, donID) } receiverCh := make(chan *types.MessageBody, d.cfg.ReceiverBufferSize()) @@ -134,24 +135,24 @@ func (d *dispatcher) SetReceiver(capabilityId string, donId uint32, rec types.Re ch: receiverCh, } - d.lggr.Debugw("receiver set", "capabilityId", capabilityId, "donId", donId) + d.lggr.Debugw("receiver set", "capabilityID", capabilityID, "donID", donID) return nil } -func (d *dispatcher) RemoveReceiver(capabilityId string, donId uint32) { +func (d *dispatcher) RemoveReceiver(capabilityID string, donID uint32) { d.mu.Lock() defer d.mu.Unlock() - receiverKey := key{capabilityId, donId} + receiverKey := key{capabilityID, donID} if receiver, ok := d.receivers[receiverKey]; ok { receiver.cancel() delete(d.receivers, receiverKey) - d.lggr.Debugw("receiver removed", "capabilityId", capabilityId, "donId", donId) + d.lggr.Debugw("receiver removed", "capabilityID", capabilityID, "donID", donID) } } func (d *dispatcher) Send(peerID p2ptypes.PeerID, msgBody *types.MessageBody) error { - msgBody.Version = uint32(d.cfg.SupportedVersion()) + msgBody.Version = uint32(d.cfg.SupportedVersion()) //nolint:gosec // disable G115: supported version is not expected to exceed uint32 max value msgBody.Sender = d.peerID[:] msgBody.Receiver = peerID[:] msgBody.Timestamp = time.Now().UnixMilli() @@ -194,17 +195,17 @@ func (d *dispatcher) receive() { receiver, ok := d.receivers[k] d.mu.RUnlock() if !ok { - d.lggr.Debugw("received message for unregistered capability", "capabilityId", SanitizeLogString(k.capId), "donId", k.donId) + d.lggr.Debugw("received message for unregistered capability", "capabilityId", SanitizeLogString(k.capID), "donID", k.donID) d.tryRespondWithError(msg.Sender, body, types.Error_CAPABILITY_NOT_FOUND) continue } receiverQueueUsage := float64(len(receiver.ch)) / float64(d.cfg.ReceiverBufferSize()) - capReceiveChannelUsage.WithLabelValues(k.capId, fmt.Sprint(k.donId)).Set(receiverQueueUsage) + capReceiveChannelUsage.WithLabelValues(k.capID, strconv.FormatUint(uint64(k.donID), 10)).Set(receiverQueueUsage) select { case receiver.ch <- body: default: - d.lggr.Warnw("receiver channel full, dropping message", "capabilityId", k.capId, "donId", k.donId) + d.lggr.Warnw("receiver channel full, dropping message", "capabilityId", k.capID, "donID", k.donID) } } } diff --git a/core/capabilities/remote/executable/client.go b/core/capabilities/remote/executable/client.go index 9af32eb5f8e..be638742e53 100644 --- a/core/capabilities/remote/executable/client.go +++ b/core/capabilities/remote/executable/client.go @@ -12,6 +12,7 @@ import ( "github.com/smartcontractkit/chainlink-common/pkg/services" "github.com/smartcontractkit/chainlink/v2/core/capabilities/remote" "github.com/smartcontractkit/chainlink/v2/core/capabilities/remote/executable/request" + "github.com/smartcontractkit/chainlink/v2/core/capabilities/remote/registration" "github.com/smartcontractkit/chainlink/v2/core/capabilities/remote/types" "github.com/smartcontractkit/chainlink/v2/core/logger" ) @@ -30,6 +31,7 @@ type client struct { localDONInfo commoncap.DON dispatcher types.Dispatcher requestTimeout time.Duration + registrationClient *registration.Client requestIDToCallerRequest map[string]*request.ClientRequest mutex sync.Mutex @@ -41,8 +43,14 @@ var _ commoncap.ExecutableCapability = &client{} var _ types.Receiver = &client{} var _ services.Service = &client{} -func NewClient(remoteCapabilityInfo commoncap.CapabilityInfo, localDonInfo commoncap.DON, dispatcher types.Dispatcher, +func NewClient(remoteExecutableConfig *commoncap.RemoteExecutableConfig, remoteCapabilityInfo commoncap.CapabilityInfo, remoteDonInfo commoncap.DON, localDonInfo commoncap.DON, dispatcher types.Dispatcher, requestTimeout time.Duration, lggr logger.Logger) *client { + if remoteExecutableConfig == nil { + lggr.Info("no remote config provided, using default values") + remoteExecutableConfig = &commoncap.RemoteExecutableConfig{} + } + remoteExecutableConfig.ApplyDefaults() + return &client{ lggr: lggr.Named("ExecutableCapabilityClient"), remoteCapabilityInfo: remoteCapabilityInfo, @@ -50,12 +58,17 @@ func NewClient(remoteCapabilityInfo commoncap.CapabilityInfo, localDonInfo commo dispatcher: dispatcher, requestTimeout: requestTimeout, requestIDToCallerRequest: make(map[string]*request.ClientRequest), + registrationClient: registration.NewClient(lggr, types.MethodRegisterToWorkflow, remoteExecutableConfig.RegistrationRefresh, remoteCapabilityInfo, remoteDonInfo, localDonInfo, dispatcher, "ExecutableClient"), stopCh: make(services.StopChan), } } func (c *client) Start(ctx context.Context) error { return c.StartOnce(c.Name(), func() error { + if err := c.registrationClient.Start(ctx); err != nil { + return fmt.Errorf("failed to start registration client: %w", err) + } + c.wg.Add(1) go func() { defer c.wg.Done() @@ -77,6 +90,12 @@ func (c *client) Close() error { close(c.stopCh) c.cancelAllRequests(errors.New("client closed")) c.wg.Wait() + + err := c.registrationClient.Close() + if err != nil { + c.lggr.Errorw("failed to close registration client", "err", err) + } + c.lggr.Info("ExecutableCapability closed") return nil }) @@ -140,10 +159,24 @@ func (c *client) Info(ctx context.Context) (commoncap.CapabilityInfo, error) { } func (c *client) RegisterToWorkflow(ctx context.Context, registerRequest commoncap.RegisterToWorkflowRequest) error { + rawRequest, err := pb.MarshalRegisterToWorkflowRequest(registerRequest) + if err != nil { + return fmt.Errorf("failed to marshal request: %w", err) + } + workflowID := registerRequest.Metadata.WorkflowID + if workflowID == "" { + return errors.New("empty workflowID") + } + + if err = c.registrationClient.RegisterWorkflow(workflowID, rawRequest); err != nil { + return fmt.Errorf("failed to register workflow: %w", err) + } + return nil } func (c *client) UnregisterFromWorkflow(ctx context.Context, unregisterRequest commoncap.UnregisterFromWorkflowRequest) error { + c.registrationClient.UnregisterWorkflow(unregisterRequest.Metadata.WorkflowID) return nil } diff --git a/core/capabilities/remote/executable/client_test.go b/core/capabilities/remote/executable/client_test.go index 5c4da350b9e..3b92d487e1d 100644 --- a/core/capabilities/remote/executable/client_test.go +++ b/core/capabilities/remote/executable/client_test.go @@ -2,6 +2,7 @@ package executable_test import ( "context" + "fmt" "sync" "testing" "time" @@ -11,9 +12,11 @@ import ( commoncap "github.com/smartcontractkit/chainlink-common/pkg/capabilities" "github.com/smartcontractkit/chainlink-common/pkg/capabilities/pb" + "github.com/smartcontractkit/chainlink-common/pkg/services" "github.com/smartcontractkit/chainlink-common/pkg/services/servicetest" "github.com/smartcontractkit/chainlink-common/pkg/values" "github.com/smartcontractkit/chainlink/v2/core/capabilities/remote/executable" + "github.com/smartcontractkit/chainlink/v2/core/capabilities/remote/registration" remotetypes "github.com/smartcontractkit/chainlink/v2/core/capabilities/remote/types" "github.com/smartcontractkit/chainlink/v2/core/capabilities/transmission" "github.com/smartcontractkit/chainlink/v2/core/internal/testutils" @@ -44,46 +47,33 @@ func Test_Client_DonTopologies(t *testing.T) { assert.Equal(t, "aValue1", mp.(map[string]any)["response"].(string)) } - capability := &TestCapability{} + capability := func() commoncap.ExecutableCapability { return &TestCapability{} } responseTimeOut := 10 * time.Minute - var methods []func(caller commoncap.ExecutableCapability) - - methods = append(methods, func(caller commoncap.ExecutableCapability) { + method := func(caller commoncap.ExecutableCapability) { executeInputs, err := values.NewMap(map[string]any{"executeValue1": "aValue1"}) require.NoError(t, err) executeMethod(ctx, caller, transmissionSchedule, executeInputs, responseTest, t) - }) + } - methods = append(methods, func(caller commoncap.ExecutableCapability) { - registerToWorkflowMethod(ctx, caller, transmissionSchedule, func(t *testing.T, responseError error) { - require.NoError(t, responseError) - }, t) - }) + remoteExecutableConfig := &commoncap.RemoteExecutableConfig{} - methods = append(methods, func(caller commoncap.ExecutableCapability) { - unregisterFromWorkflowMethod(ctx, caller, transmissionSchedule, func(t *testing.T, responseError error) { - require.NoError(t, responseError) - }, t) - }) + testClient(t, remoteExecutableConfig, 1, responseTimeOut, 1, 0, + capability, method) - for _, method := range methods { - testClient(t, 1, responseTimeOut, 1, 0, - capability, method) + testClient(t, remoteExecutableConfig, 10, responseTimeOut, 1, 0, + capability, method) - testClient(t, 10, responseTimeOut, 1, 0, - capability, method) + testClient(t, remoteExecutableConfig, 1, responseTimeOut, 10, 3, + capability, method) - testClient(t, 1, responseTimeOut, 10, 3, - capability, method) + testClient(t, remoteExecutableConfig, 10, responseTimeOut, 10, 3, + capability, method) - testClient(t, 10, responseTimeOut, 10, 3, - capability, method) + testClient(t, remoteExecutableConfig, 10, responseTimeOut, 10, 9, + capability, method) - testClient(t, 10, responseTimeOut, 10, 9, - capability, method) - } } func Test_Client_TransmissionSchedules(t *testing.T) { @@ -106,14 +96,14 @@ func Test_Client_TransmissionSchedules(t *testing.T) { }) require.NoError(t, err) - testClient(t, 1, responseTimeOut, 1, 0, - capability, func(caller commoncap.ExecutableCapability) { + testClient(t, &commoncap.RemoteExecutableConfig{}, 1, responseTimeOut, 1, 0, + func() commoncap.ExecutableCapability { return capability }, func(caller commoncap.ExecutableCapability) { executeInputs, err2 := values.NewMap(map[string]any{"executeValue1": "aValue1"}) require.NoError(t, err2) executeMethod(ctx, caller, transmissionSchedule, executeInputs, responseTest, t) }) - testClient(t, 10, responseTimeOut, 10, 3, - capability, func(caller commoncap.ExecutableCapability) { + testClient(t, &commoncap.RemoteExecutableConfig{}, 10, responseTimeOut, 10, 3, + func() commoncap.ExecutableCapability { return capability }, func(caller commoncap.ExecutableCapability) { executeInputs, err2 := values.NewMap(map[string]any{"executeValue1": "aValue1"}) require.NoError(t, err2) executeMethod(ctx, caller, transmissionSchedule, executeInputs, responseTest, t) @@ -125,14 +115,14 @@ func Test_Client_TransmissionSchedules(t *testing.T) { }) require.NoError(t, err) - testClient(t, 1, responseTimeOut, 1, 0, - capability, func(caller commoncap.ExecutableCapability) { + testClient(t, &commoncap.RemoteExecutableConfig{}, 1, responseTimeOut, 1, 0, + func() commoncap.ExecutableCapability { return capability }, func(caller commoncap.ExecutableCapability) { executeInputs, err := values.NewMap(map[string]any{"executeValue1": "aValue1"}) require.NoError(t, err) executeMethod(ctx, caller, transmissionSchedule, executeInputs, responseTest, t) }) - testClient(t, 10, responseTimeOut, 10, 3, - capability, func(caller commoncap.ExecutableCapability) { + testClient(t, &commoncap.RemoteExecutableConfig{}, 10, responseTimeOut, 10, 3, + func() commoncap.ExecutableCapability { return capability }, func(caller commoncap.ExecutableCapability) { executeInputs, err := values.NewMap(map[string]any{"executeValue1": "aValue1"}) require.NoError(t, err) executeMethod(ctx, caller, transmissionSchedule, executeInputs, responseTest, t) @@ -156,8 +146,8 @@ func Test_Client_TimesOutIfInsufficientCapabilityPeerResponses(t *testing.T) { // number of capability peers is less than F + 1 - testClient(t, 10, 1*time.Second, 10, 11, - capability, + testClient(t, &commoncap.RemoteExecutableConfig{}, 10, 1*time.Second, 10, 11, + func() commoncap.ExecutableCapability { return capability }, func(caller commoncap.ExecutableCapability) { executeInputs, err := values.NewMap(map[string]any{"executeValue1": "aValue1"}) require.NoError(t, err) @@ -165,10 +155,83 @@ func Test_Client_TimesOutIfInsufficientCapabilityPeerResponses(t *testing.T) { }) } -func testClient(t *testing.T, numWorkflowPeers int, workflowNodeResponseTimeout time.Duration, - numCapabilityPeers int, capabilityDonF uint8, underlying commoncap.ExecutableCapability, - method func(caller commoncap.ExecutableCapability)) { +func Test_Client_RegisterAndUnregisterWorkflows(t *testing.T) { + ctx := testutils.Context(t) + + responseTest := func(t *testing.T, responseError error) { + require.NoError(t, responseError) + } + + responseTimeOut := 10 * time.Minute + + clientSideCapabilities := make([]commoncap.ExecutableCapability, 0) + mux := sync.Mutex{} + method := func(capability commoncap.ExecutableCapability) { + mux.Lock() + defer mux.Unlock() + registerToWorkflowMethod(ctx, capability, responseTest, t) + clientSideCapabilities = append(clientSideCapabilities, capability) + } + + remoteExecutableConfig := &commoncap.RemoteExecutableConfig{ + RegistrationRefresh: 100 * time.Millisecond, + RegistrationExpiry: 1 * time.Second, + } + + var serverSideCapabilities []*TestCapability + testClient(t, remoteExecutableConfig, 4, responseTimeOut, 4, 1, + func() commoncap.ExecutableCapability { + capability := &TestCapability{} + serverSideCapabilities = append(serverSideCapabilities, capability) + return capability + }, method) + + require.Eventually(t, func() bool { + for _, testCapability := range serverSideCapabilities { + if len(testCapability.GetRegisterRequests()) != 1 { + return false + } + } + + return true + }, 10*time.Second, 100*time.Millisecond, "expected a registration request to be received by all server side capabilities") + + // Wait a few multiple of the refresh interval and confirm that the capabilities have only 1 registration request and 0 unregister requests + time.Sleep(remoteExecutableConfig.RegistrationRefresh * 4) + + for _, testCapability := range serverSideCapabilities { + assert.Len(t, testCapability.GetRegisterRequests(), 1) + assert.Empty(t, testCapability.GetUnregisterRequests()) + } + + // Unregister from workflow + for _, capability := range clientSideCapabilities { + unregisterFromWorkflowMethod(ctx, capability, responseTest, t) + } + + require.Eventually(t, func() bool { + for _, testCapability := range serverSideCapabilities { + if len(testCapability.GetUnregisterRequests()) != 1 { + return false + } + } + return true + }, 10*time.Second, 100*time.Millisecond, "expected a registration request to be received by all server side capabilities") + + // Wait a few multiple of the refresh interval and confirm that the capabilities have only 1 registration request and 1 unregister requests + time.Sleep(remoteExecutableConfig.RegistrationRefresh * 4) + + for _, testCapability := range serverSideCapabilities { + assert.Len(t, testCapability.GetRegisterRequests(), 1) + assert.Len(t, testCapability.GetUnregisterRequests(), 1) + } +} + +func testClient(t *testing.T, remoteExecutableConfig *commoncap.RemoteExecutableConfig, numWorkflowPeers int, workflowNodeResponseTimeout time.Duration, + numCapabilityPeers int, capabilityDonF uint8, capFactory func() commoncap.ExecutableCapability, + method func(caller commoncap.ExecutableCapability)) []*clientTestServer { lggr := logger.TestLogger(t) + remoteExecutableConfig.ApplyDefaults() capabilityPeers := make([]p2ptypes.PeerID, numCapabilityPeers) for i := 0; i < numCapabilityPeers; i++ { @@ -183,7 +246,7 @@ func testClient(t *testing.T, numWorkflowPeers int, workflowNodeResponseTimeout capInfo := commoncap.CapabilityInfo{ ID: "cap_id@1.0.0", - CapabilityType: commoncap.CapabilityTypeTrigger, + CapabilityType: commoncap.CapabilityTypeAction, Description: "Remote Executable Capability", DON: &capDonInfo, } @@ -200,31 +263,33 @@ func testClient(t *testing.T, numWorkflowPeers int, workflowNodeResponseTimeout broker := newTestAsyncMessageBroker(t, 100) - receivers := make([]remotetypes.Receiver, numCapabilityPeers) + testServers := make([]*clientTestServer, numCapabilityPeers) for i := 0; i < numCapabilityPeers; i++ { capabilityDispatcher := broker.NewDispatcherForNode(capabilityPeers[i]) - receiver := newTestServer(capabilityPeers[i], capabilityDispatcher, workflowDonInfo, underlying) - broker.RegisterReceiverNode(capabilityPeers[i], receiver) - receivers[i] = receiver + testServer := newTestServer(lggr, capabilityPeers[i], capInfo, remoteExecutableConfig.RegistrationExpiry, capabilityDispatcher, workflowDonInfo, capFactory()) + broker.RegisterReceiverNode(capabilityPeers[i], testServer) + testServers[i] = testServer + servicetest.Run(t, testServer) } - callers := make([]commoncap.ExecutableCapability, numWorkflowPeers) + clients := make([]commoncap.ExecutableCapability, numWorkflowPeers) for i := 0; i < numWorkflowPeers; i++ { workflowPeerDispatcher := broker.NewDispatcherForNode(workflowPeers[i]) - caller := executable.NewClient(capInfo, workflowDonInfo, workflowPeerDispatcher, workflowNodeResponseTimeout, lggr) - servicetest.Run(t, caller) - broker.RegisterReceiverNode(workflowPeers[i], caller) - callers[i] = caller + client := executable.NewClient(remoteExecutableConfig, capInfo, capDonInfo, workflowDonInfo, workflowPeerDispatcher, + workflowNodeResponseTimeout, lggr) + servicetest.Run(t, client) + broker.RegisterReceiverNode(workflowPeers[i], client) + clients[i] = client } servicetest.Run(t, broker) wg := &sync.WaitGroup{} - wg.Add(len(callers)) + wg.Add(len(clients)) // Fire off all the requests - for _, caller := range callers { + for _, caller := range clients { go func(caller commoncap.ExecutableCapability) { defer wg.Done() method(caller) @@ -232,9 +297,11 @@ func testClient(t *testing.T, numWorkflowPeers int, workflowNodeResponseTimeout } wg.Wait() + + return testServers } -func registerToWorkflowMethod(ctx context.Context, caller commoncap.ExecutableCapability, transmissionSchedule *values.Map, +func registerToWorkflowMethod(ctx context.Context, caller commoncap.ExecutableCapability, responseTest func(t *testing.T, responseError error), t *testing.T) { err := caller.RegisterToWorkflow(ctx, commoncap.RegisterToWorkflowRequest{ Metadata: commoncap.RegistrationMetadata{ @@ -242,13 +309,12 @@ func registerToWorkflowMethod(ctx context.Context, caller commoncap.ExecutableCa ReferenceID: stepReferenceID1, WorkflowOwner: workflowOwnerID, }, - Config: transmissionSchedule, }) responseTest(t, err) } -func unregisterFromWorkflowMethod(ctx context.Context, caller commoncap.ExecutableCapability, transmissionSchedule *values.Map, +func unregisterFromWorkflowMethod(ctx context.Context, caller commoncap.ExecutableCapability, responseTest func(t *testing.T, responseError error), t *testing.T) { err := caller.UnregisterFromWorkflow(ctx, commoncap.UnregisterFromWorkflowRequest{ Metadata: commoncap.RegistrationMetadata{ @@ -256,7 +322,6 @@ func unregisterFromWorkflowMethod(ctx context.Context, caller commoncap.Executab ReferenceID: stepReferenceID1, WorkflowOwner: workflowOwnerID, }, - Config: transmissionSchedule, }) responseTest(t, err) @@ -280,6 +345,8 @@ func executeMethod(ctx context.Context, caller commoncap.ExecutableCapability, t // Simple client that only responds once it has received a message from each workflow peer type clientTestServer struct { + services.StateMachine + lggr logger.Logger peerID p2ptypes.PeerID dispatcher remotetypes.Dispatcher workflowDonInfo commoncap.DON @@ -287,82 +354,111 @@ type clientTestServer struct { executableCapability commoncap.ExecutableCapability + registrationServer *registration.Server + mux sync.Mutex } -func newTestServer(peerID p2ptypes.PeerID, dispatcher remotetypes.Dispatcher, workflowDonInfo commoncap.DON, +func newTestServer(lggr logger.Logger, peerID p2ptypes.PeerID, capInfo commoncap.CapabilityInfo, + registrationExpiry time.Duration, dispatcher remotetypes.Dispatcher, workflowDonInfo commoncap.DON, executableCapability commoncap.ExecutableCapability) *clientTestServer { + + target := &executable.TargetAdapter{Capability: executableCapability} + + workflowDONs := map[uint32]commoncap.DON{ + workflowDonInfo.ID: workflowDonInfo, + } + return &clientTestServer{ + lggr: lggr, dispatcher: dispatcher, workflowDonInfo: workflowDonInfo, peerID: peerID, messageIDToSenders: make(map[string]map[p2ptypes.PeerID]bool), executableCapability: executableCapability, + registrationServer: registration.NewServer(lggr, target, capInfo, registrationExpiry, workflowDONs, "testExecutableServer"), } } -func (t *clientTestServer) Receive(_ context.Context, msg *remotetypes.MessageBody) { - t.mux.Lock() - defer t.mux.Unlock() +func (r *clientTestServer) Start(ctx context.Context) error { + return r.StartOnce(r.peerID.String(), func() error { + if err := r.registrationServer.Start(ctx); err != nil { + return fmt.Errorf("failed to start capability register: %w", err) + } + return nil + }) +} + +func (r *clientTestServer) Close() error { + r.IfNotStopped(func() { + if err := r.registrationServer.Close(); err != nil { + r.lggr.Errorf("failed to close capability register: %v", err) + } + }) + + return nil +} + +func (r *clientTestServer) Receive(ctx context.Context, msg *remotetypes.MessageBody) { + r.mux.Lock() + defer r.mux.Unlock() sender := toPeerID(msg.Sender) - messageID, err := executable.GetMessageID(msg) - if err != nil { - panic(err) - } - if t.messageIDToSenders[messageID] == nil { - t.messageIDToSenders[messageID] = make(map[p2ptypes.PeerID]bool) - } + switch msg.Method { + case remotetypes.MethodExecute: + messageID, err := executable.GetMessageID(msg) + if err != nil { + panic(err) + } - sendersOfMessageID := t.messageIDToSenders[messageID] - if sendersOfMessageID[sender] { - panic("received duplicate message") - } + if r.messageIDToSenders[messageID] == nil { + r.messageIDToSenders[messageID] = make(map[p2ptypes.PeerID]bool) + } - sendersOfMessageID[sender] = true + sendersOfMessageID := r.messageIDToSenders[messageID] + if sendersOfMessageID[sender] { + panic("received duplicate message") + } - if len(t.messageIDToSenders[messageID]) == len(t.workflowDonInfo.Members) { - switch msg.Method { - case remotetypes.MethodExecute: + sendersOfMessageID[sender] = true + if len(r.messageIDToSenders[messageID]) == len(r.workflowDonInfo.Members) { capabilityRequest, err := pb.UnmarshalCapabilityRequest(msg.Payload) if err != nil { panic(err) } - resp, responseErr := t.executableCapability.Execute(context.Background(), capabilityRequest) + resp, responseErr := r.executableCapability.Execute(context.Background(), capabilityRequest) payload, marshalErr := pb.MarshalCapabilityResponse(resp) - t.sendResponse(messageID, responseErr, payload, marshalErr) + r.sendResponse(messageID, responseErr, payload, marshalErr) + } - case remotetypes.MethodRegisterToWorkflow: - registerRequest, err := pb.UnmarshalRegisterToWorkflowRequest(msg.Payload) - if err != nil { - panic(err) - } - responseErr := t.executableCapability.RegisterToWorkflow(context.Background(), registerRequest) - t.sendResponse(messageID, responseErr, nil, nil) - case remotetypes.MethodUnregisterFromWorkflow: - unregisterRequest, err := pb.UnmarshalUnregisterFromWorkflowRequest(msg.Payload) - if err != nil { - panic(err) - } - responseErr := t.executableCapability.UnregisterFromWorkflow(context.Background(), unregisterRequest) - t.sendResponse(messageID, responseErr, nil, nil) - default: - panic("unknown method") + case remotetypes.MethodRegisterToWorkflow: + registerRequest, err := pb.UnmarshalRegisterToWorkflowRequest(msg.Payload) + if err != nil { + panic(err) + } + + err = r.registrationServer.Register(ctx, msg, sender, registerRequest.Metadata.WorkflowID, registerRequest.Metadata.ReferenceID) + if err != nil { + panic(err) } + case remotetypes.MethodUnregisterFromWorkflow: + panic("unexpected call, client should explicitly unregister from workflow, expiration of registration is expected to take care of this") + default: + panic("unknown method") } } -func (t *clientTestServer) sendResponse(messageID string, responseErr error, +func (r *clientTestServer) sendResponse(messageID string, responseErr error, payload []byte, marshalErr error) { - for receiver := range t.messageIDToSenders[messageID] { + for receiver := range r.messageIDToSenders[messageID] { var responseMsg = &remotetypes.MessageBody{ CapabilityId: "cap_id@1.0.0", CapabilityDonId: 1, - CallerDonId: t.workflowDonInfo.ID, + CallerDonId: r.workflowDonInfo.ID, Method: remotetypes.MethodExecute, MessageId: []byte(messageID), - Sender: t.peerID[:], + Sender: r.peerID[:], Receiver: receiver[:], } @@ -375,7 +471,7 @@ func (t *clientTestServer) sendResponse(messageID string, responseErr error, responseMsg.Payload = payload } - err := t.dispatcher.Send(receiver, responseMsg) + err := r.dispatcher.Send(receiver, responseMsg) if err != nil { panic(err) } diff --git a/core/capabilities/remote/executable/endtoend_test.go b/core/capabilities/remote/executable/endtoend_test.go index 4e78fead87e..cfc6c8c2037 100644 --- a/core/capabilities/remote/executable/endtoend_test.go +++ b/core/capabilities/remote/executable/endtoend_test.go @@ -4,6 +4,7 @@ import ( "context" "crypto/rand" "errors" + "fmt" "sync" "testing" "time" @@ -26,6 +27,346 @@ import ( p2ptypes "github.com/smartcontractkit/chainlink/v2/core/services/p2p/types" ) +func Test_RemoteExecutionCapability_DonTopologies(t *testing.T) { + ctx := testutils.Context(t) + + responseTest := func(t *testing.T, response commoncap.CapabilityResponse, responseError error) { + require.NoError(t, responseError) + mp, err := response.Value.Unwrap() + require.NoError(t, err) + assert.Equal(t, "aValue1", mp.(map[string]any)["response"].(string)) + } + + transmissionSchedule, err := values.NewMap(map[string]any{ + "schedule": transmission.Schedule_OneAtATime, + "deltaStage": "10ms", + }) + require.NoError(t, err) + + timeOut := 10 * time.Minute + + capability := &TestCapability{} + + var methods []func(ctx context.Context, caller commoncap.ExecutableCapability) + + methods = append(methods, func(ctx context.Context, caller commoncap.ExecutableCapability) { + executeCapability(ctx, t, caller, transmissionSchedule, responseTest) + }) + + capabilityFactory := func() commoncap.ExecutableCapability { return capability } + remoteExecutableConfig := &commoncap.RemoteExecutableConfig{} + + for _, method := range methods { + // Test scenarios where the number of submissions is greater than or equal to F + 1 + wfDon, _ := setupDons(t, remoteExecutableConfig, capabilityFactory, 1, 0, timeOut, 1, 0, timeOut) + wfDon.ExecuteMethodInParallelOnAllNodes(ctx, method) + + wfDon, _ = setupDons(t, remoteExecutableConfig, capabilityFactory, 4, 3, timeOut, 1, 0, timeOut) + wfDon.ExecuteMethodInParallelOnAllNodes(ctx, method) + wfDon, _ = setupDons(t, remoteExecutableConfig, capabilityFactory, 10, 3, timeOut, 1, 0, timeOut) + wfDon.ExecuteMethodInParallelOnAllNodes(ctx, method) + + wfDon, _ = setupDons(t, remoteExecutableConfig, capabilityFactory, 1, 0, timeOut, 1, 0, timeOut) + wfDon.ExecuteMethodInParallelOnAllNodes(ctx, method) + wfDon, _ = setupDons(t, remoteExecutableConfig, capabilityFactory, 1, 0, timeOut, 4, 3, timeOut) + wfDon.ExecuteMethodInParallelOnAllNodes(ctx, method) + wfDon, _ = setupDons(t, remoteExecutableConfig, capabilityFactory, 1, 0, timeOut, 10, 3, timeOut) + wfDon.ExecuteMethodInParallelOnAllNodes(ctx, method) + + wfDon, _ = setupDons(t, remoteExecutableConfig, capabilityFactory, 4, 3, timeOut, 4, 3, timeOut) + wfDon.ExecuteMethodInParallelOnAllNodes(ctx, method) + wfDon, _ = setupDons(t, remoteExecutableConfig, capabilityFactory, 10, 3, timeOut, 10, 3, timeOut) + wfDon.ExecuteMethodInParallelOnAllNodes(ctx, method) + wfDon, _ = setupDons(t, remoteExecutableConfig, capabilityFactory, 10, 9, timeOut, 10, 9, timeOut) + wfDon.ExecuteMethodInParallelOnAllNodes(ctx, method) + } +} + +func Test_RemoteExecutionCapability_RegisterAndUnregisterWorkflow(t *testing.T) { + ctx := testutils.Context(t) + + timeOut := 10 * time.Minute + + remoteExecutableConfig := &commoncap.RemoteExecutableConfig{ + RequestHashExcludedAttributes: []string{}, + RegistrationRefresh: 100 * time.Millisecond, + RegistrationExpiry: 1 * time.Second, + } + + var serverSideCapabilities []commoncap.ExecutableCapability + + wfDon, _ := setupDons(t, remoteExecutableConfig, func() commoncap.ExecutableCapability { + testCapability := &TestCapability{} + serverSideCapabilities = append(serverSideCapabilities, testCapability) + return testCapability + }, 4, 1, timeOut, 4, 1, timeOut) + + registerRequest := commoncap.RegisterToWorkflowRequest{ + Metadata: commoncap.RegistrationMetadata{ + WorkflowID: workflowID1, + ReferenceID: stepReferenceID1, + WorkflowOwner: workflowOwnerID, + }, + } + + unregisterRequest := commoncap.UnregisterFromWorkflowRequest{ + Metadata: commoncap.RegistrationMetadata{ + WorkflowID: workflowID1, + ReferenceID: stepReferenceID1, + WorkflowOwner: workflowOwnerID, + }, + } + + workflowNodes := wfDon.GetNodes() + + // Call RegisterToWorkflow on 2 clients + err := workflowNodes[0].RegisterToWorkflow(ctx, registerRequest) + require.NoError(t, err) + err = workflowNodes[1].RegisterToWorkflow(ctx, registerRequest) + require.NoError(t, err) + + // wait a couple of refresh intervals + time.Sleep(2 * remoteExecutableConfig.RegistrationRefresh) + + // Should have no registrations on any server side capabilities + for _, capability := range serverSideCapabilities { + assert.Empty(t, capability.(*TestCapability).GetRegisterRequests()) + } + + // Subscribe the remaining 2 clients to the same workflow + err = workflowNodes[2].RegisterToWorkflow(ctx, registerRequest) + require.NoError(t, err) + err = workflowNodes[3].RegisterToWorkflow(ctx, registerRequest) + require.NoError(t, err) + + // Should eventually have 1 registration on each server side capability + for _, capability := range serverSideCapabilities { + require.Eventually(t, func() bool { + return len(capability.(*TestCapability).GetRegisterRequests()) == 1 + }, 5*time.Second, 100*time.Millisecond) + } + + // Unregister a client (leaving f+1 clients registered) + err = workflowNodes[0].UnregisterFromWorkflow(ctx, unregisterRequest) + require.NoError(t, err) + + // wait a couple of expiry intervals + time.Sleep(2 * remoteExecutableConfig.RegistrationExpiry) + + // Should have no unregistration requests on any server side capabilities + for _, capability := range serverSideCapabilities { + assert.Empty(t, capability.(*TestCapability).GetUnregisterRequests()) + } + + // Unregister another client (leaving less than f+1 clients registered) + err = workflowNodes[1].UnregisterFromWorkflow(ctx, unregisterRequest) + require.NoError(t, err) + + // Should eventually have 1 unregistration on each server side capability + for _, capability := range serverSideCapabilities { + require.Eventually(t, func() bool { + return len(capability.(*TestCapability).GetUnregisterRequests()) == 1 + }, 5*time.Second, 100*time.Millisecond) + } + + // Unregister the remaining clients + err = workflowNodes[2].UnregisterFromWorkflow(ctx, unregisterRequest) + require.NoError(t, err) + err = workflowNodes[3].UnregisterFromWorkflow(ctx, unregisterRequest) + require.NoError(t, err) + + // wait a couple of expiry intervals + time.Sleep(2 * remoteExecutableConfig.RegistrationExpiry) + + // confirm there is still only 1 unregister request on each server side capability + for _, capability := range serverSideCapabilities { + assert.Len(t, capability.(*TestCapability).GetUnregisterRequests(), 1) + } + + // re-register all the clients + for i := 0; i < len(workflowNodes); i++ { + err = workflowNodes[i].RegisterToWorkflow(ctx, registerRequest) + require.NoError(t, err) + } + + // Should eventually have 2 registration requests on each server side capability + for _, capability := range serverSideCapabilities { + require.Eventually(t, func() bool { + return len(capability.(*TestCapability).GetRegisterRequests()) == 2 + }, 5*time.Second, 100*time.Millisecond) + } +} + +func Test_RemoteExecutionCapability_RegisterAndUnregister_CapabilityNodeRestart(t *testing.T) { + ctx := testutils.Context(t) + + timeOut := 10 * time.Minute + + remoteExecutableConfig := &commoncap.RemoteExecutableConfig{ + RequestHashExcludedAttributes: []string{}, + RegistrationRefresh: 100 * time.Millisecond, + RegistrationExpiry: 1 * time.Second, + } + + wfDon, capDon := setupDons(t, remoteExecutableConfig, func() commoncap.ExecutableCapability { + testCapability := &TestCapability{} + return testCapability + }, 4, 1, timeOut, 4, 1, timeOut) + + registerRequest := commoncap.RegisterToWorkflowRequest{ + Metadata: commoncap.RegistrationMetadata{ + WorkflowID: workflowID1, + ReferenceID: stepReferenceID1, + WorkflowOwner: workflowOwnerID, + }, + } + + workflowNodes := wfDon.GetNodes() + + // Call RegisterToWorkflow on all clients + err := workflowNodes[0].RegisterToWorkflow(ctx, registerRequest) + require.NoError(t, err) + err = workflowNodes[1].RegisterToWorkflow(ctx, registerRequest) + require.NoError(t, err) + err = workflowNodes[2].RegisterToWorkflow(ctx, registerRequest) + require.NoError(t, err) + err = workflowNodes[3].RegisterToWorkflow(ctx, registerRequest) + require.NoError(t, err) + + // Should eventually have 1 registration on each server side capability + for _, node := range capDon.GetNodes() { + require.Eventually(t, func() bool { + return len(node.GetUnderlyingCapability().(*TestCapability).GetRegisterRequests()) == 1 + }, 5*time.Second, 100*time.Millisecond) + } + + // Stop a single capability node + capNodes := capDon.GetNodes() + err = capNodes[0].Close() + require.NoError(t, err) + + // Verify still have 1 registration on each server side capability + for _, node := range capNodes { + require.Eventually(t, func() bool { + return len(node.GetUnderlyingCapability().(*TestCapability).GetRegisterRequests()) == 1 + }, 5*time.Second, 100*time.Millisecond) + } + + // Restart the stopped capability node + err = capNodes[0].Start(ctx) + require.NoError(t, err) + + // The restarted nodes capability should eventually have 2 registrations, the latter one corresponding to the re-registration after restart + require.Eventually(t, func() bool { + return len(capNodes[0].GetUnderlyingCapability().(*TestCapability).GetRegisterRequests()) == 2 + }, 5*time.Second, 100*time.Millisecond) +} + +func Test_RemoteExecutionCapability_RegisterAndUnregister_WorkflowNodeRestart(t *testing.T) { + ctx := testutils.Context(t) + + timeOut := 10 * time.Minute + + remoteExecutableConfig := &commoncap.RemoteExecutableConfig{ + RequestHashExcludedAttributes: []string{}, + RegistrationRefresh: 100 * time.Millisecond, + RegistrationExpiry: 1 * time.Second, + } + + wfDon, capDon := setupDons(t, remoteExecutableConfig, func() commoncap.ExecutableCapability { + testCapability := &TestCapability{} + return testCapability + }, 4, 1, timeOut, 4, 1, timeOut) + + registerRequest := commoncap.RegisterToWorkflowRequest{ + Metadata: commoncap.RegistrationMetadata{ + WorkflowID: workflowID1, + ReferenceID: stepReferenceID1, + WorkflowOwner: workflowOwnerID, + }, + } + + workflowNodes := wfDon.GetNodes() + + // Call RegisterToWorkflow on all clients + err := workflowNodes[0].RegisterToWorkflow(ctx, registerRequest) + require.NoError(t, err) + err = workflowNodes[1].RegisterToWorkflow(ctx, registerRequest) + require.NoError(t, err) + err = workflowNodes[2].RegisterToWorkflow(ctx, registerRequest) + require.NoError(t, err) + err = workflowNodes[3].RegisterToWorkflow(ctx, registerRequest) + require.NoError(t, err) + + // Should eventually have 1 registration on each server side capability + for _, node := range capDon.GetNodes() { + require.Eventually(t, func() bool { + return len(node.GetUnderlyingCapability().(*TestCapability).GetRegisterRequests()) == 1 + }, 5*time.Second, 100*time.Millisecond) + } + + // Stop a single workflow node + wfNodes := wfDon.GetNodes() + err = wfNodes[0].Close() + require.NoError(t, err) + + // sleep for a couple of registration expiry intervals + time.Sleep(2 * remoteExecutableConfig.RegistrationExpiry) + + // Verify no unregister requests on any capability nodes + for _, node := range capDon.GetNodes() { + require.Eventually(t, func() bool { + return len(node.GetUnderlyingCapability().(*TestCapability).GetUnregisterRequests()) == 0 + }, 5*time.Second, 100*time.Millisecond) + } + + // Restart the stopped workflow node + err = wfNodes[0].Start(ctx) + require.NoError(t, err) + + // sleep for a couple of refresh intervals + time.Sleep(2 * remoteExecutableConfig.RegistrationRefresh) + + // Verify still have 1 registration on each server side capability + for _, node := range capDon.GetNodes() { + require.Eventually(t, func() bool { + return len(node.GetUnderlyingCapability().(*TestCapability).GetRegisterRequests()) == 1 + }, 5*time.Second, 100*time.Millisecond) + } + + // Stop 2 of the workflow nodes + err = wfNodes[1].Close() + require.NoError(t, err) + err = wfNodes[2].Close() + require.NoError(t, err) + + // Eventually all capability nodes should have 1 unregister request + for _, node := range capDon.GetNodes() { + require.Eventually(t, func() bool { + return len(node.GetUnderlyingCapability().(*TestCapability).GetUnregisterRequests()) == 1 + }, 5*time.Second, 100*time.Millisecond) + } + + // Restart the stopped workflow nodes and register to workflow + err = wfNodes[1].Start(ctx) + require.NoError(t, err) + err = wfNodes[2].Start(ctx) + require.NoError(t, err) + + err = workflowNodes[1].RegisterToWorkflow(ctx, registerRequest) + require.NoError(t, err) + err = workflowNodes[2].RegisterToWorkflow(ctx, registerRequest) + require.NoError(t, err) + + // Eventually all capability nodes show have 2 register requests, the latter one corresponding to the re-registration + // after restart of the workflow nodes + for _, node := range capDon.GetNodes() { + require.Eventually(t, func() bool { + return len(node.GetUnderlyingCapability().(*TestCapability).GetRegisterRequests()) == 2 + }, 5*time.Second, 100*time.Millisecond) + } +} + func Test_RemoteExecutableCapability_TransmissionSchedules(t *testing.T) { ctx := testutils.Context(t) @@ -49,7 +390,8 @@ func Test_RemoteExecutableCapability_TransmissionSchedules(t *testing.T) { method := func(ctx context.Context, caller commoncap.ExecutableCapability) { executeCapability(ctx, t, caller, transmissionSchedule, responseTest) } - testRemoteExecutableCapability(ctx, t, capability, 10, 9, timeOut, 10, 9, timeOut, method) + wfDon, _ := setupDons(t, &commoncap.RemoteExecutableConfig{}, func() commoncap.ExecutableCapability { return capability }, 10, 9, timeOut, 10, 9, timeOut) + wfDon.ExecuteMethodInParallelOnAllNodes(ctx, method) transmissionSchedule, err = values.NewMap(map[string]any{ "schedule": transmission.Schedule_AllAtOnce, @@ -60,7 +402,8 @@ func Test_RemoteExecutableCapability_TransmissionSchedules(t *testing.T) { executeCapability(ctx, t, caller, transmissionSchedule, responseTest) } - testRemoteExecutableCapability(ctx, t, capability, 10, 9, timeOut, 10, 9, timeOut, method) + wfDon, _ = setupDons(t, &commoncap.RemoteExecutableConfig{}, func() commoncap.ExecutableCapability { return capability }, 10, 9, timeOut, 10, 9, timeOut) + wfDon.ExecuteMethodInParallelOnAllNodes(ctx, method) } func Test_RemoteExecutionCapability_CapabilityError(t *testing.T) { @@ -83,7 +426,8 @@ func Test_RemoteExecutionCapability_CapabilityError(t *testing.T) { }) for _, method := range methods { - testRemoteExecutableCapability(ctx, t, capability, 10, 9, 10*time.Minute, 10, 9, 10*time.Minute, method) + wfDon, _ := setupDons(t, &commoncap.RemoteExecutableConfig{}, func() commoncap.ExecutableCapability { return capability }, 10, 9, 10*time.Minute, 10, 9, 10*time.Minute) + wfDon.ExecuteMethodInParallelOnAllNodes(ctx, method) } } @@ -107,14 +451,15 @@ func Test_RemoteExecutableCapability_RandomCapabilityError(t *testing.T) { }) for _, method := range methods { - testRemoteExecutableCapability(ctx, t, capability, 10, 9, 10*time.Millisecond, 10, 9, 10*time.Minute, - method) + wfDon, _ := setupDons(t, &commoncap.RemoteExecutableConfig{}, func() commoncap.ExecutableCapability { return capability }, 10, 9, 10*time.Millisecond, 10, 9, 10*time.Minute) + wfDon.ExecuteMethodInParallelOnAllNodes(ctx, method) } } -func testRemoteExecutableCapability(ctx context.Context, t *testing.T, underlying commoncap.ExecutableCapability, numWorkflowPeers int, workflowDonF uint8, workflowNodeTimeout time.Duration, - numCapabilityPeers int, capabilityDonF uint8, capabilityNodeResponseTimeout time.Duration, - method func(ctx context.Context, caller commoncap.ExecutableCapability)) { +func setupDons(t *testing.T, + remoteExecutableConfig *commoncap.RemoteExecutableConfig, + capabilityFactory func() commoncap.ExecutableCapability, numWorkflowPeers int, workflowDonF uint8, workflowNodeTimeout time.Duration, + numCapabilityPeers int, capabilityDonF uint8, capabilityNodeResponseTimeout time.Duration) (*workflowDon, *capabilityDon) { lggr := logger.TestLogger(t) capabilityPeers := make([]p2ptypes.PeerID, numCapabilityPeers) @@ -154,53 +499,259 @@ func testRemoteExecutableCapability(ctx context.Context, t *testing.T, underlyin } broker := newTestAsyncMessageBroker(t, 1000) + servicetest.Run(t, broker) workflowDONs := map[uint32]commoncap.DON{ workflowDonInfo.ID: workflowDonInfo, } - capabilityNodes := make([]remotetypes.Receiver, numCapabilityPeers) + capDon := newCapabilityDon() for i := 0; i < numCapabilityPeers; i++ { - capabilityPeer := capabilityPeers[i] - capabilityDispatcher := broker.NewDispatcherForNode(capabilityPeer) - capabilityNode := executable.NewServer(&commoncap.RemoteExecutableConfig{RequestHashExcludedAttributes: []string{}}, capabilityPeer, underlying, capInfo, capDonInfo, workflowDONs, capabilityDispatcher, - capabilityNodeResponseTimeout, lggr) - servicetest.Run(t, capabilityNode) - broker.RegisterReceiverNode(capabilityPeer, capabilityNode) - capabilityNodes[i] = capabilityNode + node := newServerNode(capabilityPeers[i], broker, remoteExecutableConfig, capabilityFactory(), capInfo, capDonInfo, workflowDONs, capabilityNodeResponseTimeout, lggr) + capDon.AddNode(node) + servicetest.Run(t, node) } - workflowNodes := make([]commoncap.ExecutableCapability, numWorkflowPeers) + wfDon := newWorkflowDon(broker) for i := 0; i < numWorkflowPeers; i++ { - workflowPeerDispatcher := broker.NewDispatcherForNode(workflowPeers[i]) - workflowNode := executable.NewClient(capInfo, workflowDonInfo, workflowPeerDispatcher, workflowNodeTimeout, lggr) - servicetest.Run(t, workflowNode) - broker.RegisterReceiverNode(workflowPeers[i], workflowNode) - workflowNodes[i] = workflowNode + + node := newClientNode(workflowPeers[i], broker, remoteExecutableConfig, capInfo, capDonInfo, workflowDonInfo, workflowNodeTimeout, lggr) + wfDon.AddNode(node) + servicetest.Run(t, node) } - servicetest.Run(t, broker) + return wfDon, capDon +} + +type workflowNode interface { + commoncap.ExecutableCapability + Start(ctx context.Context) error + Close() error +} + +type client interface { + remotetypes.Receiver + commoncap.ExecutableCapability + Start(ctx context.Context) error + Close() error +} + +type clientNode struct { + client client + nodePeerID p2ptypes.PeerID + broker *testAsyncMessageBroker + + remoteExecutableConfig *commoncap.RemoteExecutableConfig + remoteCapabilityInfo commoncap.CapabilityInfo + remoteDonInfo commoncap.DON + localDonInfo commoncap.DON + requestTimeout time.Duration + mux sync.Mutex + running bool + lggr logger.Logger +} + +func newClientNode(nodePeerID p2ptypes.PeerID, broker *testAsyncMessageBroker, remoteExecutableConfig *commoncap.RemoteExecutableConfig, + remoteCapabilityInfo commoncap.CapabilityInfo, + remoteDonInfo commoncap.DON, + localDonInfo commoncap.DON, + requestTimeout time.Duration, + lggr logger.Logger) *clientNode { + return &clientNode{ + nodePeerID: nodePeerID, + broker: broker, + remoteExecutableConfig: remoteExecutableConfig, + remoteCapabilityInfo: remoteCapabilityInfo, + remoteDonInfo: remoteDonInfo, + localDonInfo: localDonInfo, + requestTimeout: requestTimeout, + lggr: lggr, + } +} + +func (w *clientNode) Start(ctx context.Context) error { + w.mux.Lock() + defer w.mux.Unlock() + if !w.running { + w.client = executable.NewClient(w.remoteExecutableConfig, w.remoteCapabilityInfo, w.remoteDonInfo, w.localDonInfo, w.broker.NewDispatcherForNode(w.nodePeerID), w.requestTimeout, w.lggr) + w.broker.RegisterReceiverNode(w.nodePeerID, w.client) + if err := w.client.Start(ctx); err != nil { + return fmt.Errorf("failed to start client: %w", err) + } + w.running = true + } + + return nil +} + +func (w *clientNode) Close() error { + w.mux.Lock() + defer w.mux.Unlock() + if w.running { + w.broker.RemoveReceiverNode(w.nodePeerID) + if err := w.client.Close(); err != nil { + return fmt.Errorf("failed to close client: %w", err) + } + w.running = false + } + + return nil +} + +func (w *clientNode) Execute(ctx context.Context, request commoncap.CapabilityRequest) (commoncap.CapabilityResponse, error) { + return w.client.Execute(ctx, request) +} + +func (w *clientNode) RegisterToWorkflow(ctx context.Context, request commoncap.RegisterToWorkflowRequest) error { + return w.client.RegisterToWorkflow(ctx, request) +} +func (w *clientNode) UnregisterFromWorkflow(ctx context.Context, request commoncap.UnregisterFromWorkflowRequest) error { + return w.client.UnregisterFromWorkflow(ctx, request) +} + +func (w *clientNode) Info(ctx context.Context) (commoncap.CapabilityInfo, error) { + return w.client.Info(ctx) +} + +type workflowDon struct { + nodes []workflowNode +} + +func newWorkflowDon(broker *testAsyncMessageBroker) *workflowDon { + return &workflowDon{ + nodes: make([]workflowNode, 0), + } +} + +func (w *workflowDon) ExecuteMethodInParallelOnAllNodes(ctx context.Context, method func(ctx context.Context, caller commoncap.ExecutableCapability)) { wg := &sync.WaitGroup{} - wg.Add(len(workflowNodes)) + wg.Add(len(w.nodes)) - for _, caller := range workflowNodes { + for _, node := range w.nodes { go func(caller commoncap.ExecutableCapability) { defer wg.Done() method(ctx, caller) - }(caller) + }(node) } wg.Wait() } +func (w *workflowDon) AddNode(wfNode workflowNode) { + w.nodes = append(w.nodes, wfNode) +} + +func (w *workflowDon) GetNodes() []workflowNode { + return w.nodes +} + +type server interface { + remotetypes.Receiver + Start(ctx context.Context) error + Close() error +} + +type serverNode struct { + server server + nodePeerID p2ptypes.PeerID + broker *testAsyncMessageBroker + + remoteExecutableConfig *commoncap.RemoteExecutableConfig + underlying commoncap.ExecutableCapability + capInfo commoncap.CapabilityInfo + localDonInfo commoncap.DON + workflowDONs map[uint32]commoncap.DON + requestTimeout time.Duration + mux sync.Mutex + running bool + lggr logger.Logger +} + +func newServerNode(nodePeerID p2ptypes.PeerID, broker *testAsyncMessageBroker, remoteExecutableConfig *commoncap.RemoteExecutableConfig, + underlying commoncap.ExecutableCapability, + capInfo commoncap.CapabilityInfo, + localDonInfo commoncap.DON, + workflowDONs map[uint32]commoncap.DON, + requestTimeout time.Duration, + lggr logger.Logger) *serverNode { + return &serverNode{ + nodePeerID: nodePeerID, + broker: broker, + remoteExecutableConfig: remoteExecutableConfig, + underlying: underlying, + capInfo: capInfo, + localDonInfo: localDonInfo, + workflowDONs: workflowDONs, + requestTimeout: requestTimeout, + lggr: lggr, + } +} + +func (w *serverNode) GetUnderlyingCapability() commoncap.ExecutableCapability { + return w.underlying +} + +func (w *serverNode) Start(ctx context.Context) error { + w.mux.Lock() + defer w.mux.Unlock() + if !w.running { + w.server = executable.NewServer(w.remoteExecutableConfig, w.nodePeerID, w.underlying, w.capInfo, w.localDonInfo, w.workflowDONs, w.broker.NewDispatcherForNode(w.nodePeerID), + w.requestTimeout, w.lggr) + w.broker.RegisterReceiverNode(w.nodePeerID, w.server) + if err := w.server.Start(ctx); err != nil { + return fmt.Errorf("failed to start server: %w", err) + } + w.running = true + } + return nil +} + +func (w *serverNode) Close() error { + w.mux.Lock() + defer w.mux.Unlock() + if w.running { + w.broker.RemoveReceiverNode(w.nodePeerID) + if err := w.server.Close(); err != nil { + return fmt.Errorf("failed to close server: %w", err) + } + w.running = false + } + + return nil +} + +type capabilityNode interface { + Start(ctx context.Context) error + Close() error + GetUnderlyingCapability() commoncap.ExecutableCapability +} + +type capabilityDon struct { + nodes []capabilityNode +} + +func newCapabilityDon() *capabilityDon { + return &capabilityDon{ + nodes: make([]capabilityNode, 0), + } +} + +func (c *capabilityDon) AddNode(node capabilityNode) { + c.nodes = append(c.nodes, node) +} + +func (c *capabilityDon) GetNodes() []capabilityNode { + return c.nodes +} + type testAsyncMessageBroker struct { services.Service eng *services.Engine t *testing.T - nodes map[p2ptypes.PeerID]remotetypes.Receiver - + mux sync.Mutex + nodes map[p2ptypes.PeerID]remotetypes.Receiver sendCh chan *remotetypes.MessageBody } @@ -226,12 +777,14 @@ func (a *testAsyncMessageBroker) start(ctx context.Context) error { case msg := <-a.sendCh: receiverId := toPeerID(msg.Receiver) - receiver, ok := a.nodes[receiverId] - if !ok { - panic("server not found for peer id") - } + var receiver remotetypes.Receiver + a.mux.Lock() + receiver = a.nodes[receiverId] + a.mux.Unlock() - receiver.Receive(tests.Context(a.t), msg) + if receiver != nil { + receiver.Receive(tests.Context(a.t), msg) + } } } }) @@ -246,6 +799,8 @@ func (a *testAsyncMessageBroker) NewDispatcherForNode(nodePeerID p2ptypes.PeerID } func (a *testAsyncMessageBroker) RegisterReceiverNode(nodePeerID p2ptypes.PeerID, node remotetypes.Receiver) { + a.mux.Lock() + defer a.mux.Unlock() if _, ok := a.nodes[nodePeerID]; ok { panic("node already registered") } @@ -253,6 +808,12 @@ func (a *testAsyncMessageBroker) RegisterReceiverNode(nodePeerID p2ptypes.PeerID a.nodes[nodePeerID] = node } +func (a *testAsyncMessageBroker) RemoveReceiverNode(nodePeerID p2ptypes.PeerID) { + a.mux.Lock() + defer a.mux.Unlock() + delete(a.nodes, nodePeerID) +} + func (a *testAsyncMessageBroker) Send(msg *remotetypes.MessageBody) { a.sendCh <- msg } @@ -311,19 +872,14 @@ func (t abstractTestCapability) Info(ctx context.Context) (commoncap.CapabilityI return commoncap.CapabilityInfo{}, nil } -func (t abstractTestCapability) RegisterToWorkflow(ctx context.Context, request commoncap.RegisterToWorkflowRequest) error { - return nil -} - -func (t abstractTestCapability) UnregisterFromWorkflow(ctx context.Context, request commoncap.UnregisterFromWorkflowRequest) error { - return nil -} - type TestCapability struct { abstractTestCapability + registerRequests []commoncap.RegisterToWorkflowRequest + unregisterRequests []commoncap.UnregisterFromWorkflowRequest + mu sync.Mutex } -func (t TestCapability) Execute(ctx context.Context, request commoncap.CapabilityRequest) (commoncap.CapabilityResponse, error) { +func (t *TestCapability) Execute(ctx context.Context, request commoncap.CapabilityRequest) (commoncap.CapabilityResponse, error) { value := request.Inputs.Underlying["executeValue1"] response, err := values.NewMap(map[string]any{"response": value}) if err != nil { @@ -334,20 +890,67 @@ func (t TestCapability) Execute(ctx context.Context, request commoncap.Capabilit }, nil } +func (t *TestCapability) RegisterToWorkflow(ctx context.Context, request commoncap.RegisterToWorkflowRequest) error { + t.mu.Lock() + defer t.mu.Unlock() + t.registerRequests = append(t.registerRequests, request) + return nil +} + +func (t *TestCapability) UnregisterFromWorkflow(ctx context.Context, request commoncap.UnregisterFromWorkflowRequest) error { + t.mu.Lock() + defer t.mu.Unlock() + t.unregisterRequests = append(t.unregisterRequests, request) + return nil +} + +func (t *TestCapability) GetRegisterRequests() []commoncap.RegisterToWorkflowRequest { + t.mu.Lock() + defer t.mu.Unlock() + return t.registerRequests +} + +func (t *TestCapability) GetUnregisterRequests() []commoncap.UnregisterFromWorkflowRequest { + t.mu.Lock() + defer t.mu.Unlock() + return t.unregisterRequests +} + type TestErrorCapability struct { abstractTestCapability + registerRequests []commoncap.RegisterToWorkflowRequest + unregisterRequests []commoncap.UnregisterFromWorkflowRequest + mu sync.Mutex } -func (t TestErrorCapability) Execute(ctx context.Context, request commoncap.CapabilityRequest) (commoncap.CapabilityResponse, error) { +func (t *TestErrorCapability) Execute(ctx context.Context, request commoncap.CapabilityRequest) (commoncap.CapabilityResponse, error) { return commoncap.CapabilityResponse{}, errors.New("an error") } -func (t TestErrorCapability) RegisterToWorkflow(ctx context.Context, request commoncap.RegisterToWorkflowRequest) error { - return errors.New("an error") +func (t *TestErrorCapability) RegisterToWorkflow(ctx context.Context, request commoncap.RegisterToWorkflowRequest) error { + t.mu.Lock() + defer t.mu.Unlock() + t.registerRequests = append(t.registerRequests, request) + return nil +} + +func (t *TestErrorCapability) UnregisterFromWorkflow(ctx context.Context, request commoncap.UnregisterFromWorkflowRequest) error { + t.mu.Lock() + defer t.mu.Unlock() + t.unregisterRequests = append(t.unregisterRequests, request) + return nil } -func (t TestErrorCapability) UnregisterFromWorkflow(ctx context.Context, request commoncap.UnregisterFromWorkflowRequest) error { - return errors.New("an error") +func (t *TestErrorCapability) GetRegisterRequests() []commoncap.RegisterToWorkflowRequest { + t.mu.Lock() + defer t.mu.Unlock() + return t.registerRequests +} + +func (t *TestErrorCapability) GetUnregisterRequests() []commoncap.UnregisterFromWorkflowRequest { + t.mu.Lock() + defer t.mu.Unlock() + return t.unregisterRequests } type TestRandomErrorCapability struct { @@ -407,29 +1010,3 @@ func executeCapability(ctx context.Context, t *testing.T, caller commoncap.Execu responseTest(t, response, err) } - -func registerWorkflow(ctx context.Context, t *testing.T, caller commoncap.ExecutableCapability, transmissionSchedule *values.Map, responseTest func(t *testing.T, responseError error)) { - err := caller.RegisterToWorkflow(ctx, commoncap.RegisterToWorkflowRequest{ - Metadata: commoncap.RegistrationMetadata{ - WorkflowID: workflowID1, - ReferenceID: stepReferenceID1, - WorkflowOwner: workflowOwnerID, - }, - Config: transmissionSchedule, - }) - - responseTest(t, err) -} - -func unregisterWorkflow(ctx context.Context, t *testing.T, caller commoncap.ExecutableCapability, transmissionSchedule *values.Map, responseTest func(t *testing.T, responseError error)) { - err := caller.UnregisterFromWorkflow(ctx, commoncap.UnregisterFromWorkflowRequest{ - Metadata: commoncap.RegistrationMetadata{ - WorkflowID: workflowID1, - ReferenceID: stepReferenceID1, - WorkflowOwner: workflowOwnerID, - }, - Config: transmissionSchedule, - }) - - responseTest(t, err) -} diff --git a/core/capabilities/remote/executable/request/client_request.go b/core/capabilities/remote/executable/request/client_request.go index 6b4b9e3a0cd..ef4d0023773 100644 --- a/core/capabilities/remote/executable/request/client_request.go +++ b/core/capabilities/remote/executable/request/client_request.go @@ -212,7 +212,7 @@ func (c *ClientRequest) OnMessage(_ context.Context, msg *types.MessageBody) err } if msg.Sender == nil { - return fmt.Errorf("sender missing from message") + return errors.New("sender missing from message") } c.lggr.Debugw("OnMessage called for client request", "messageID", msg.MessageId) diff --git a/core/capabilities/remote/executable/request/server_request.go b/core/capabilities/remote/executable/request/server_request.go index a4662e93987..3b71283c91a 100644 --- a/core/capabilities/remote/executable/request/server_request.go +++ b/core/capabilities/remote/executable/request/server_request.go @@ -2,6 +2,7 @@ package request import ( "context" + "errors" "fmt" "sync" "time" @@ -25,7 +26,7 @@ type response struct { type ServerRequest struct { capability capabilities.ExecutableCapability - capabilityPeerId p2ptypes.PeerID + capabilityPeerID p2ptypes.PeerID capabilityID string capabilityDonID uint32 @@ -57,7 +58,7 @@ func NewServerRequest(capability capabilities.ExecutableCapability, method strin createdTime: time.Now(), capabilityID: capabilityID, capabilityDonID: capabilityDonID, - capabilityPeerId: capabilityPeerID, + capabilityPeerID: capabilityPeerID, dispatcher: dispatcher, requesters: map[p2ptypes.PeerID]bool{}, responseSentToRequester: map[p2ptypes.PeerID]bool{}, @@ -74,7 +75,7 @@ func (e *ServerRequest) OnMessage(ctx context.Context, msg *types.MessageBody) e defer e.mux.Unlock() if msg.Sender == nil { - return fmt.Errorf("sender missing from message") + return errors.New("sender missing from message") } requester, err := remote.ToPeerID(msg.Sender) @@ -203,7 +204,7 @@ func (e *ServerRequest) sendResponse(requester p2ptypes.PeerID) error { CallerDonId: e.callingDon.ID, Method: types.MethodExecute, MessageId: []byte(e.requestMessageID), - Sender: e.capabilityPeerId[:], + Sender: e.capabilityPeerID[:], Receiver: requester[:], } diff --git a/core/capabilities/remote/executable/server.go b/core/capabilities/remote/executable/server.go index b767a2d7030..9ee4a9a0f79 100644 --- a/core/capabilities/remote/executable/server.go +++ b/core/capabilities/remote/executable/server.go @@ -4,6 +4,7 @@ import ( "context" "crypto/sha256" "encoding/hex" + "errors" "fmt" "sync" "time" @@ -13,6 +14,7 @@ import ( "github.com/smartcontractkit/chainlink-common/pkg/services" "github.com/smartcontractkit/chainlink/v2/core/capabilities/remote" "github.com/smartcontractkit/chainlink/v2/core/capabilities/remote/executable/request" + "github.com/smartcontractkit/chainlink/v2/core/capabilities/remote/registration" "github.com/smartcontractkit/chainlink/v2/core/capabilities/remote/types" "github.com/smartcontractkit/chainlink/v2/core/capabilities/validation" p2ptypes "github.com/smartcontractkit/chainlink/v2/core/services/p2p/types" @@ -44,6 +46,8 @@ type server struct { // Used to detect messages with the same message id but different payloads messageIDToRequestIDsCount map[string]map[string]int + registrationServer *registration.Server + receiveLock sync.Mutex stopCh services.StopChan wg sync.WaitGroup @@ -57,6 +61,33 @@ type requestAndMsgID struct { messageID string } +type TargetAdapter struct { + Capability commoncap.ExecutableCapability +} + +func (u *TargetAdapter) Register(ctx context.Context, key registration.Key, registerRequest []byte) error { + unmarshalled, err := pb.UnmarshalRegisterToWorkflowRequest(registerRequest) + if err != nil { + return fmt.Errorf("failed to unmarshal register request: %w", err) + } + if err = u.Capability.RegisterToWorkflow(ctx, unmarshalled); err != nil { + return fmt.Errorf("failed to register to workflow: %w", err) + } + return nil +} + +func (u *TargetAdapter) Unregister(ctx context.Context, registerRequest []byte) error { + unmarshalled, err := pb.UnmarshalUnregisterFromWorkflowRequest(registerRequest) + if err != nil { + return fmt.Errorf("failed to unmarshal register request: %w", err) + } + if err = u.Capability.UnregisterFromWorkflow(ctx, unmarshalled); err != nil { + return fmt.Errorf("failed to unregister from workflow: %w", err) + } + + return nil +} + func NewServer(remoteExecutableConfig *commoncap.RemoteExecutableConfig, peerID p2ptypes.PeerID, underlying commoncap.ExecutableCapability, capInfo commoncap.CapabilityInfo, localDonInfo commoncap.DON, workflowDONs map[uint32]commoncap.DON, dispatcher types.Dispatcher, requestTimeout time.Duration, lggr logger.Logger) *server { @@ -64,6 +95,10 @@ func NewServer(remoteExecutableConfig *commoncap.RemoteExecutableConfig, peerID lggr.Info("no remote config provided, using default values") remoteExecutableConfig = &commoncap.RemoteExecutableConfig{} } + remoteExecutableConfig.ApplyDefaults() + + target := &TargetAdapter{Capability: underlying} + return &server{ config: remoteExecutableConfig, underlying: underlying, @@ -77,6 +112,8 @@ func NewServer(remoteExecutableConfig *commoncap.RemoteExecutableConfig, peerID messageIDToRequestIDsCount: map[string]map[string]int{}, requestTimeout: requestTimeout, + registrationServer: registration.NewServer(lggr, target, capInfo, remoteExecutableConfig.RegistrationExpiry, workflowDONs, "ExecutableCapabilityServer"), + lggr: lggr.Named("ExecutableCapabilityServer"), stopCh: make(services.StopChan), } @@ -84,6 +121,10 @@ func NewServer(remoteExecutableConfig *commoncap.RemoteExecutableConfig, peerID func (r *server) Start(ctx context.Context) error { return r.StartOnce(r.Name(), func() error { + if err := r.registrationServer.Start(ctx); err != nil { + return fmt.Errorf("failed to start registration server: %w", err) + } + r.wg.Add(1) go func() { defer r.wg.Done() @@ -108,6 +149,11 @@ func (r *server) Close() error { close(r.stopCh) r.wg.Wait() r.lggr.Info("executable capability server closed") + + if err := r.registrationServer.Close(); err != nil { + return fmt.Errorf("failed to close registration server: %w", err) + } + return nil }) } @@ -133,61 +179,83 @@ func (r *server) Receive(ctx context.Context, msg *types.MessageBody) { defer r.receiveLock.Unlock() switch msg.Method { - case types.MethodExecute, types.MethodRegisterToWorkflow, types.MethodUnregisterFromWorkflow: - default: - r.lggr.Errorw("received request for unsupported method type", "method", remote.SanitizeLogString(msg.Method)) - } + case types.MethodRegisterToWorkflow: + sender, err := remote.ToPeerID(msg.Sender) + if err != nil { + r.lggr.Errorw("failed to convert message sender to PeerID", "err", err) + return + } - messageId, err := GetMessageID(msg) - if err != nil { - r.lggr.Errorw("invalid message id", "err", err, "id", remote.SanitizeLogString(string(msg.MessageId))) - return - } + req, err := pb.UnmarshalRegisterToWorkflowRequest(msg.Payload) + if err != nil { + r.lggr.Errorw("failed to unmarshal register to workflow request", "capabilityId", r.capInfo.ID, "err", err) + return + } - msgHash, err := r.getMessageHash(msg) - if err != nil { - r.lggr.Errorw("failed to get message hash", "err", err) - return - } + workflowID := req.Metadata.WorkflowID + err = r.registrationServer.Register(ctx, msg, sender, workflowID, "") + if err != nil { + r.lggr.Errorw("failed to register executable capability", "capabilityId", r.capInfo.ID, "workflowId", + remote.SanitizeLogString(workflowID), "callerDonId", msg.CallerDonId, "sender", sender, "err", err) + } + case types.MethodUnregisterFromWorkflow: + // Explicitly unregistering from a workflow is not currently supported (or needed) for executable capabilities, + // Unregistering from a workflow is done by the registration server and occurs when the registration expires. + r.lggr.Errorw("received unregister from workflow request, this request is not supported") + case types.MethodExecute: + messageID, err := GetMessageID(msg) + if err != nil { + r.lggr.Errorw("invalid message id", "err", err, "id", remote.SanitizeLogString(string(msg.MessageId))) + return + } - // A request is uniquely identified by the message id and the hash of the payload to prevent a malicious - // actor from sending a different payload with the same message id - requestID := messageId + hex.EncodeToString(msgHash[:]) + msgHash, err := r.getMessageHash(msg) + if err != nil { + r.lggr.Errorw("failed to get message hash", "err", err) + return + } - r.lggr.Debugw("received request", "msgId", msg.MessageId, "requestID", requestID) + // A request is uniquely identified by the message id and the hash of the payload to prevent a malicious + // actor from sending a different payload with the same message id + requestID := messageID + hex.EncodeToString(msgHash[:]) - if requestIDs, ok := r.messageIDToRequestIDsCount[messageId]; ok { - requestIDs[requestID] = requestIDs[requestID] + 1 - } else { - r.messageIDToRequestIDsCount[messageId] = map[string]int{requestID: 1} - } + r.lggr.Debugw("received request", "msgId", msg.MessageId, "requestID", requestID) - requestIDs := r.messageIDToRequestIDsCount[messageId] - if len(requestIDs) > 1 { - // This is a potential attack vector as well as a situation that will occur if the client is sending non-deterministic payloads - // so a warning is logged - r.lggr.Warnw("received messages with the same id and different payloads", "messageID", messageId, "lenRequestIDs", len(requestIDs)) - } + if requestIDs, ok := r.messageIDToRequestIDsCount[messageID]; ok { + requestIDs[requestID]++ + } else { + r.messageIDToRequestIDsCount[messageID] = map[string]int{requestID: 1} + } - if _, ok := r.requestIDToRequest[requestID]; !ok { - callingDon, ok := r.workflowDONs[msg.CallerDonId] - if !ok { - r.lggr.Errorw("received request from unregistered don", "donId", msg.CallerDonId) - return + requestIDs := r.messageIDToRequestIDsCount[messageID] + if len(requestIDs) > 1 { + // This is a potential attack vector as well as a situation that will occur if the client is sending non-deterministic payloads + // so a warning is logged + r.lggr.Warnw("received messages with the same id and different payloads", "messageID", messageID, "lenRequestIDs", len(requestIDs)) } - r.requestIDToRequest[requestID] = requestAndMsgID{ - request: request.NewServerRequest(r.underlying, msg.Method, r.capInfo.ID, r.localDonInfo.ID, r.peerID, - callingDon, messageId, r.dispatcher, r.requestTimeout, r.lggr), - messageID: messageId, + if _, ok := r.requestIDToRequest[requestID]; !ok { + callingDon, ok := r.workflowDONs[msg.CallerDonId] + if !ok { + r.lggr.Errorw("received request from unregistered don", "donId", msg.CallerDonId) + return + } + + r.requestIDToRequest[requestID] = requestAndMsgID{ + request: request.NewServerRequest(r.underlying, msg.Method, r.capInfo.ID, r.localDonInfo.ID, r.peerID, + callingDon, messageID, r.dispatcher, r.requestTimeout, r.lggr), + messageID: messageID, + } } - } - reqAndMsgID := r.requestIDToRequest[requestID] + reqAndMsgID := r.requestIDToRequest[requestID] - err = reqAndMsgID.request.OnMessage(ctx, msg) - if err != nil { - r.lggr.Errorw("request failed to OnMessage new message", "messageID", reqAndMsgID.messageID, "err", err) + err = reqAndMsgID.request.OnMessage(ctx, msg) + if err != nil { + r.lggr.Errorw("request failed to OnMessage new message", "messageID", reqAndMsgID.messageID, "err", err) + } + default: + r.lggr.Errorw("received request for unsupported method type", "method", remote.SanitizeLogString(msg.Method)) } } @@ -214,7 +282,7 @@ func (r *server) getMessageHash(msg *types.MessageBody) ([32]byte, error) { func GetMessageID(msg *types.MessageBody) (string, error) { idStr := string(msg.MessageId) if !validation.IsValidID(idStr) { - return "", fmt.Errorf("invalid message id") + return "", errors.New("invalid message id") } return idStr, nil } diff --git a/core/capabilities/remote/executable/server_test.go b/core/capabilities/remote/executable/server_test.go index 1fb5c2dd413..69a65bc18d1 100644 --- a/core/capabilities/remote/executable/server_test.go +++ b/core/capabilities/remote/executable/server_test.go @@ -2,6 +2,7 @@ package executable_test import ( "context" + "fmt" "strconv" "testing" "time" @@ -12,8 +13,10 @@ import ( commoncap "github.com/smartcontractkit/chainlink-common/pkg/capabilities" "github.com/smartcontractkit/chainlink-common/pkg/capabilities/pb" "github.com/smartcontractkit/chainlink-common/pkg/services" + "github.com/smartcontractkit/chainlink-common/pkg/services/servicetest" "github.com/smartcontractkit/chainlink-common/pkg/values" "github.com/smartcontractkit/chainlink/v2/core/capabilities/remote/executable" + "github.com/smartcontractkit/chainlink/v2/core/capabilities/remote/registration" remotetypes "github.com/smartcontractkit/chainlink/v2/core/capabilities/remote/types" "github.com/smartcontractkit/chainlink/v2/core/internal/testutils" "github.com/smartcontractkit/chainlink/v2/core/logger" @@ -26,7 +29,7 @@ func Test_Server_ExcludesNonDeterministicInputAttributes(t *testing.T) { numCapabilityPeers := 4 callers, srvcs := testRemoteExecutableCapabilityServer(ctx, t, &commoncap.RemoteExecutableConfig{RequestHashExcludedAttributes: []string{"signed_report.Signatures"}}, - &TestCapability{}, 10, 9, numCapabilityPeers, 3, 10*time.Minute) + func() commoncap.ExecutableCapability { return &TestCapability{} }, 10, 9, numCapabilityPeers, 3, 10*time.Minute) for idx, caller := range callers { rawInputs := map[string]any{ @@ -61,7 +64,7 @@ func Test_Server_Execute_RespondsAfterSufficientRequests(t *testing.T) { numCapabilityPeers := 4 - callers, srvcs := testRemoteExecutableCapabilityServer(ctx, t, &commoncap.RemoteExecutableConfig{}, &TestCapability{}, 10, 9, numCapabilityPeers, 3, 10*time.Minute) + callers, srvcs := testRemoteExecutableCapabilityServer(ctx, t, &commoncap.RemoteExecutableConfig{}, func() commoncap.ExecutableCapability { return &TestCapability{} }, 10, 9, numCapabilityPeers, 3, 10*time.Minute) for _, caller := range callers { _, err := caller.Execute(context.Background(), @@ -83,14 +86,25 @@ func Test_Server_Execute_RespondsAfterSufficientRequests(t *testing.T) { closeServices(t, srvcs) } -func Test_Server_RegisterToWorkflow_RespondsAfterSufficientRequests(t *testing.T) { +func Test_Server_RegisterToWorkflow(t *testing.T) { ctx := testutils.Context(t) numCapabilityPeers := 4 - callers, srvcs := testRemoteExecutableCapabilityServer(ctx, t, &commoncap.RemoteExecutableConfig{}, &TestCapability{}, 10, 9, numCapabilityPeers, 3, 10*time.Minute) + remoteExecutableConfig := &commoncap.RemoteExecutableConfig{} + remoteExecutableConfig.RegistrationRefresh = 100 * time.Millisecond - for _, caller := range callers { + var testCapabilities []*TestCapability + + clients, srvcs := testRemoteExecutableCapabilityServer(ctx, t, &commoncap.RemoteExecutableConfig{RegistrationRefresh: 100 * time.Millisecond}, + func() commoncap.ExecutableCapability { + testCap := &TestCapability{} + testCapabilities = append(testCapabilities, testCap) + return testCap + }, + 10, 4, numCapabilityPeers, 3, 10*time.Minute) + + for _, caller := range clients { err := caller.RegisterToWorkflow(context.Background(), commoncap.RegisterToWorkflowRequest{ Metadata: commoncap.RegistrationMetadata{ WorkflowID: workflowID1, @@ -102,12 +116,23 @@ func Test_Server_RegisterToWorkflow_RespondsAfterSufficientRequests(t *testing.T require.NoError(t, err) } - for _, caller := range callers { - for i := 0; i < numCapabilityPeers; i++ { - msg := <-caller.receivedMessages - assert.Equal(t, remotetypes.Error_OK, msg.Error) + require.Eventually(t, func() bool { + for _, testCapability := range testCapabilities { + if len(testCapability.GetRegisterRequests()) != 1 { + return false + } } + + return true + }, 10*time.Second, 100*time.Millisecond, "expected one registration request to be received") + + // a short sleep to allow the registration refresh mechanism to run, then check that there is still one registration request + time.Sleep(200 * time.Millisecond) + + for _, testCapability := range testCapabilities { + assert.Len(t, testCapability.GetRegisterRequests(), 1) } + closeServices(t, srvcs) } @@ -116,9 +141,20 @@ func Test_Server_RegisterToWorkflow_Error(t *testing.T) { numCapabilityPeers := 4 - callers, srvcs := testRemoteExecutableCapabilityServer(ctx, t, &commoncap.RemoteExecutableConfig{}, &TestErrorCapability{}, 10, 9, numCapabilityPeers, 3, 10*time.Minute) + remoteExecutableConfig := &commoncap.RemoteExecutableConfig{} + remoteExecutableConfig.RegistrationRefresh = 100 * time.Millisecond - for _, caller := range callers { + var testCapabilities []*TestErrorCapability + + clients, srvcs := testRemoteExecutableCapabilityServer(ctx, t, &commoncap.RemoteExecutableConfig{RegistrationRefresh: 100 * time.Millisecond}, + func() commoncap.ExecutableCapability { + testCap := &TestErrorCapability{} + testCapabilities = append(testCapabilities, testCap) + return testCap + }, + 10, 4, numCapabilityPeers, 3, 10*time.Minute) + + for _, caller := range clients { err := caller.RegisterToWorkflow(context.Background(), commoncap.RegisterToWorkflowRequest{ Metadata: commoncap.RegistrationMetadata{ WorkflowID: workflowID1, @@ -130,39 +166,82 @@ func Test_Server_RegisterToWorkflow_Error(t *testing.T) { require.NoError(t, err) } - for _, caller := range callers { - for i := 0; i < numCapabilityPeers; i++ { - msg := <-caller.receivedMessages - assert.Equal(t, remotetypes.Error_INTERNAL_ERROR, msg.Error) + // As the registration errors, the client should retry the registration request repeatedly + require.Eventually(t, func() bool { + for _, testCapability := range testCapabilities { + if len(testCapability.GetRegisterRequests()) > 2 { + return false + } } - } + + return true + }, 10*time.Second, 100*time.Millisecond, "expected more than 2 registration requests to be received") + closeServices(t, srvcs) } -func Test_Server_UnregisterFromWorkflow_RespondsAfterSufficientRequests(t *testing.T) { +func Test_Server_UnregisterFromWorkflowIsCalledWhenClientsAreShutdown(t *testing.T) { ctx := testutils.Context(t) numCapabilityPeers := 4 - callers, srvcs := testRemoteExecutableCapabilityServer(ctx, t, &commoncap.RemoteExecutableConfig{}, &TestCapability{}, 10, 9, numCapabilityPeers, 3, 10*time.Minute) + remoteExecutableConfig := &commoncap.RemoteExecutableConfig{} + remoteExecutableConfig.RegistrationRefresh = 100 * time.Millisecond - for _, caller := range callers { - err := caller.UnregisterFromWorkflow(context.Background(), commoncap.UnregisterFromWorkflowRequest{ + var testCapabilities []*TestCapability + + clients, srvcs := testRemoteExecutableCapabilityServer(ctx, t, + &commoncap.RemoteExecutableConfig{RegistrationRefresh: 50 * time.Millisecond, RegistrationExpiry: 500 * time.Millisecond}, + func() commoncap.ExecutableCapability { + testCap := &TestCapability{} + testCapabilities = append(testCapabilities, testCap) + return testCap + }, + 10, 4, numCapabilityPeers, 3, 10*time.Minute) + + for _, caller := range clients { + err := caller.RegisterToWorkflow(context.Background(), commoncap.RegisterToWorkflowRequest{ Metadata: commoncap.RegistrationMetadata{ WorkflowID: workflowID1, ReferenceID: stepReferenceID1, WorkflowOwner: workflowOwnerID, }, }) + require.NoError(t, err) } - for _, caller := range callers { - for i := 0; i < numCapabilityPeers; i++ { - msg := <-caller.receivedMessages - assert.Equal(t, remotetypes.Error_OK, msg.Error) + require.Eventually(t, func() bool { + for _, testCapability := range testCapabilities { + if len(testCapability.GetRegisterRequests()) != 1 { + return false + } + } + + return true + }, 10*time.Second, 100*time.Millisecond, "expected one registration request to be received") + + for _, client := range clients { + require.NoError(t, client.Close()) + } + + require.Eventually(t, func() bool { + for _, testCapability := range testCapabilities { + if len(testCapability.GetUnregisterRequests()) != 1 { + return false + } } + + return true + }, 10*time.Second, 100*time.Millisecond, "expected one registration request to be received") + + // a short sleep greater than the expiry time then check that there is still only one unregistration request + time.Sleep(1 * time.Second) + + for _, testCapability := range testCapabilities { + assert.Len(t, testCapability.GetUnregisterRequests(), 1) } + closeServices(t, srvcs) } @@ -171,7 +250,8 @@ func Test_Server_InsufficientCallers(t *testing.T) { numCapabilityPeers := 4 - callers, srvcs := testRemoteExecutableCapabilityServer(ctx, t, &commoncap.RemoteExecutableConfig{}, &TestCapability{}, 10, 10, numCapabilityPeers, 3, 100*time.Millisecond) + callers, srvcs := testRemoteExecutableCapabilityServer(ctx, t, &commoncap.RemoteExecutableConfig{}, + func() commoncap.ExecutableCapability { return &TestCapability{} }, 10, 10, numCapabilityPeers, 3, 100*time.Millisecond) for _, caller := range callers { _, err := caller.Execute(context.Background(), @@ -198,7 +278,8 @@ func Test_Server_CapabilityError(t *testing.T) { numCapabilityPeers := 4 - callers, srvcs := testRemoteExecutableCapabilityServer(ctx, t, &commoncap.RemoteExecutableConfig{}, &TestErrorCapability{}, 10, 9, numCapabilityPeers, 3, 100*time.Millisecond) + callers, srvcs := testRemoteExecutableCapabilityServer(ctx, t, &commoncap.RemoteExecutableConfig{}, + func() commoncap.ExecutableCapability { return &TestErrorCapability{} }, 10, 9, numCapabilityPeers, 3, 100*time.Millisecond) for _, caller := range callers { _, err := caller.Execute(context.Background(), @@ -222,7 +303,7 @@ func Test_Server_CapabilityError(t *testing.T) { func testRemoteExecutableCapabilityServer(ctx context.Context, t *testing.T, config *commoncap.RemoteExecutableConfig, - underlying commoncap.ExecutableCapability, + capabilityFactory func() commoncap.ExecutableCapability, numWorkflowPeers int, workflowDonF uint8, numCapabilityPeers int, capabilityDonF uint8, capabilityNodeResponseTimeout time.Duration) ([]*serverTestClient, []services.Service) { lggr := logger.TestLogger(t) @@ -272,7 +353,7 @@ func testRemoteExecutableCapabilityServer(ctx context.Context, t *testing.T, for i := 0; i < numCapabilityPeers; i++ { capabilityPeer := capabilityPeers[i] capabilityDispatcher := broker.NewDispatcherForNode(capabilityPeer) - capabilityNode := executable.NewServer(config, capabilityPeer, underlying, capInfo, capDonInfo, workflowDONs, capabilityDispatcher, + capabilityNode := executable.NewServer(config, capabilityPeer, capabilityFactory(), capInfo, capDonInfo, workflowDONs, capabilityDispatcher, capabilityNodeResponseTimeout, lggr) require.NoError(t, capabilityNode.Start(ctx)) broker.RegisterReceiverNode(capabilityPeer, capabilityNode) @@ -283,9 +364,10 @@ func testRemoteExecutableCapabilityServer(ctx context.Context, t *testing.T, workflowNodes := make([]*serverTestClient, numWorkflowPeers) for i := 0; i < numWorkflowPeers; i++ { workflowPeerDispatcher := broker.NewDispatcherForNode(workflowPeers[i]) - workflowNode := newServerTestClient(workflowPeers[i], capDonInfo, workflowPeerDispatcher) + workflowNode := newServerTestClient(lggr, workflowPeers[i], config.RegistrationRefresh, capInfo, capDonInfo, workflowDonInfo, workflowPeerDispatcher) broker.RegisterReceiverNode(workflowPeers[i], workflowNode) workflowNodes[i] = workflowNode + servicetest.Run(t, workflowNode) } return workflowNodes, srvcs @@ -298,79 +380,71 @@ func closeServices(t *testing.T, srvcs []services.Service) { } type serverTestClient struct { - peerID p2ptypes.PeerID - dispatcher remotetypes.Dispatcher - capabilityDonInfo commoncap.DON - receivedMessages chan *remotetypes.MessageBody - callerDonID string + services.StateMachine + lggr logger.Logger + peerID p2ptypes.PeerID + dispatcher remotetypes.Dispatcher + capabilityDonInfo commoncap.DON + receivedMessages chan *remotetypes.MessageBody + callerDonID string + registrationClient *registration.Client } func (r *serverTestClient) Receive(_ context.Context, msg *remotetypes.MessageBody) { r.receivedMessages <- msg } -func newServerTestClient(peerID p2ptypes.PeerID, capabilityDonInfo commoncap.DON, +func newServerTestClient(lggr logger.Logger, peerID p2ptypes.PeerID, registrationRefresh time.Duration, capInfo commoncap.CapabilityInfo, + capabilityDonInfo commoncap.DON, + workflowDonInfo commoncap.DON, dispatcher remotetypes.Dispatcher) *serverTestClient { - return &serverTestClient{peerID: peerID, dispatcher: dispatcher, capabilityDonInfo: capabilityDonInfo, - receivedMessages: make(chan *remotetypes.MessageBody, 100), callerDonID: "workflow-don"} + + registrationClient := registration.NewClient(lggr, remotetypes.MethodRegisterToWorkflow, registrationRefresh, capInfo, capabilityDonInfo, workflowDonInfo, dispatcher, "serverTestClient") + + return &serverTestClient{lggr: lggr, peerID: peerID, dispatcher: dispatcher, capabilityDonInfo: capabilityDonInfo, + receivedMessages: make(chan *remotetypes.MessageBody, 100), callerDonID: "workflow-don", + registrationClient: registrationClient} } func (r *serverTestClient) Info(ctx context.Context) (commoncap.CapabilityInfo, error) { panic("not implemented") } -func (r *serverTestClient) RegisterToWorkflow(ctx context.Context, req commoncap.RegisterToWorkflowRequest) error { - rawRequest, err := pb.MarshalRegisterToWorkflowRequest(req) - if err != nil { - return err - } - - messageID := remotetypes.MethodRegisterToWorkflow + ":" + req.Metadata.WorkflowID - - for _, node := range r.capabilityDonInfo.Members { - message := &remotetypes.MessageBody{ - CapabilityId: "capability-id", - CapabilityDonId: 1, - CallerDonId: 2, - Method: remotetypes.MethodRegisterToWorkflow, - Payload: rawRequest, - MessageId: []byte(messageID), - Sender: r.peerID[:], - Receiver: node[:], +func (r *serverTestClient) Start(ctx context.Context) error { + return r.StartOnce(r.peerID.String(), func() error { + if err := r.registrationClient.Start(ctx); err != nil { + return fmt.Errorf("failed to start capability register: %w", err) } + return nil + }) +} - if err = r.dispatcher.Send(node, message); err != nil { - return err +func (r *serverTestClient) Close() error { + r.IfNotStopped(func() { + if err := r.registrationClient.Close(); err != nil { + r.lggr.Errorf("failed to close capability register: %v", err) } - } + }) return nil } -func (r *serverTestClient) UnregisterFromWorkflow(ctx context.Context, req commoncap.UnregisterFromWorkflowRequest) error { - rawRequest, err := pb.MarshalUnregisterFromWorkflowRequest(req) +func (r *serverTestClient) RegisterToWorkflow(ctx context.Context, req commoncap.RegisterToWorkflowRequest) error { + rawRequest, err := pb.MarshalRegisterToWorkflowRequest(req) if err != nil { return err } - messageID := remotetypes.MethodUnregisterFromWorkflow + ":" + req.Metadata.WorkflowID + err = r.registrationClient.RegisterWorkflow(req.Metadata.WorkflowID, rawRequest) + if err != nil { + return err + } - for _, node := range r.capabilityDonInfo.Members { - message := &remotetypes.MessageBody{ - CapabilityId: "capability-id", - CapabilityDonId: 1, - CallerDonId: 2, - Method: remotetypes.MethodUnregisterFromWorkflow, - Payload: rawRequest, - MessageId: []byte(messageID), - Sender: r.peerID[:], - Receiver: node[:], - } + return nil +} - if err = r.dispatcher.Send(node, message); err != nil { - return err - } - } +func (r *serverTestClient) UnregisterFromWorkflow(ctx context.Context, req commoncap.UnregisterFromWorkflowRequest) error { + r.registrationClient.UnregisterWorkflow(req.Metadata.WorkflowID) return nil } diff --git a/core/capabilities/remote/message_cache.go b/core/capabilities/remote/messagecache/message_cache.go similarity index 77% rename from core/capabilities/remote/message_cache.go rename to core/capabilities/remote/messagecache/message_cache.go index f3a3a79b2c6..312b16efff4 100644 --- a/core/capabilities/remote/message_cache.go +++ b/core/capabilities/remote/messagecache/message_cache.go @@ -1,9 +1,9 @@ -package remote +package messagecache // MessageCache is a simple store for messages, grouped by event ID and peer ID. // It is used to collect messages from multiple peers until they are ready for aggregation // based on quantity and freshness. -type messageCache[EventID comparable, PeerID comparable] struct { +type MessageCache[EventID comparable, PeerID comparable] struct { events map[EventID]*eventState[PeerID] } @@ -18,14 +18,14 @@ type msgState struct { payload []byte } -func NewMessageCache[EventID comparable, PeerID comparable]() *messageCache[EventID, PeerID] { - return &messageCache[EventID, PeerID]{ +func New[EventID comparable, PeerID comparable]() *MessageCache[EventID, PeerID] { + return &MessageCache[EventID, PeerID]{ events: make(map[EventID]*eventState[PeerID]), } } // Insert or overwrite a message for . Return creation timestamp of the event. -func (c *messageCache[EventID, PeerID]) Insert(eventID EventID, peerID PeerID, timestamp int64, payload []byte) int64 { +func (c *MessageCache[EventID, PeerID]) Insert(eventID EventID, peerID PeerID, timestamp int64, payload []byte) int64 { if _, ok := c.events[eventID]; !ok { c.events[eventID] = &eventState[PeerID]{ peerMsgs: make(map[PeerID]*msgState), @@ -43,7 +43,7 @@ func (c *messageCache[EventID, PeerID]) Insert(eventID EventID, peerID PeerID, t // received more recently than . // Return all messages that satisfy the above condition. // Ready() will return true at most once per event if is true. -func (c *messageCache[EventID, PeerID]) Ready(eventID EventID, minCount uint32, minTimestamp int64, once bool) (bool, [][]byte) { +func (c *MessageCache[EventID, PeerID]) Ready(eventID EventID, minCount uint32, minTimestamp int64, once bool) (bool, [][]byte) { ev, ok := c.events[eventID] if !ok { return false, nil @@ -51,7 +51,7 @@ func (c *messageCache[EventID, PeerID]) Ready(eventID EventID, minCount uint32, if ev.wasReady && once { return false, nil } - if uint32(len(ev.peerMsgs)) < minCount { + if uint32(len(ev.peerMsgs)) < minCount { //nolint:gosec // disable G115: peer msg count is not expected to exceed uint32 max value return false, nil } countAboveMinTimestamp := uint32(0) @@ -69,13 +69,13 @@ func (c *messageCache[EventID, PeerID]) Ready(eventID EventID, minCount uint32, return false, nil } -func (c *messageCache[EventID, PeerID]) Delete(eventID EventID) { +func (c *MessageCache[EventID, PeerID]) Delete(eventID EventID) { delete(c.events, eventID) } // Return the number of events deleted. // Scans all keys, which might be slow for large caches. -func (c *messageCache[EventID, PeerID]) DeleteOlderThan(cutoffTimestamp int64) int { +func (c *MessageCache[EventID, PeerID]) DeleteOlderThan(cutoffTimestamp int64) int { nDeleted := 0 for id, event := range c.events { if event.creationTimestamp < cutoffTimestamp { diff --git a/core/capabilities/remote/message_cache_test.go b/core/capabilities/remote/messagecache/message_cache_test.go similarity index 91% rename from core/capabilities/remote/message_cache_test.go rename to core/capabilities/remote/messagecache/message_cache_test.go index 5ca909ca4ec..57faa8e8725 100644 --- a/core/capabilities/remote/message_cache_test.go +++ b/core/capabilities/remote/messagecache/message_cache_test.go @@ -1,11 +1,11 @@ -package remote_test +package messagecache_test import ( "testing" "github.com/stretchr/testify/require" - "github.com/smartcontractkit/chainlink/v2/core/capabilities/remote" + "github.com/smartcontractkit/chainlink/v2/core/capabilities/remote/messagecache" ) const ( @@ -17,7 +17,7 @@ const ( ) func TestMessageCache_InsertReady(t *testing.T) { - cache := remote.NewMessageCache[string, string]() + cache := messagecache.New[string, string]() // not ready with one message ts := cache.Insert(eventId1, peerId1, 100, []byte(payloadA)) @@ -43,7 +43,7 @@ func TestMessageCache_InsertReady(t *testing.T) { } func TestMessageCache_DeleteOlderThan(t *testing.T) { - cache := remote.NewMessageCache[string, string]() + cache := messagecache.New[string, string]() ts := cache.Insert(eventId1, peerId1, 100, []byte(payloadA)) require.Equal(t, int64(100), ts) diff --git a/core/capabilities/remote/registration/client.go b/core/capabilities/remote/registration/client.go new file mode 100644 index 00000000000..d780875dc3f --- /dev/null +++ b/core/capabilities/remote/registration/client.go @@ -0,0 +1,138 @@ +package registration + +import ( + "context" + "sync" + "time" + + commoncap "github.com/smartcontractkit/chainlink-common/pkg/capabilities" + "github.com/smartcontractkit/chainlink-common/pkg/services" + "github.com/smartcontractkit/chainlink/v2/core/capabilities/remote/types" + "github.com/smartcontractkit/chainlink/v2/core/logger" + p2ptypes "github.com/smartcontractkit/chainlink/v2/core/services/p2p/types" +) + +type clientRegistration struct { + registrationRequest []byte +} + +type registerDispatcher interface { + Send(peerID p2ptypes.PeerID, msgBody *types.MessageBody) error +} + +// Client is a shim for remote capabilities that support registration to a workflow. It polls the Server to ensure +// the registration stays live. In the current implementation the Server shim will unregister any workflow that has +// not been re-registered within the registrationExpiry interval. +type Client struct { + services.StateMachine + lggr logger.Logger + registrationMethod string + registrationRefresh time.Duration + capInfo commoncap.CapabilityInfo + capDonInfo commoncap.DON + localDonInfo commoncap.DON + dispatcher registerDispatcher + registeredWorkflows map[string]*clientRegistration + mu sync.RWMutex + stopCh services.StopChan + wg sync.WaitGroup +} + +func NewClient(lggr logger.Logger, registrationMethod string, registrationRefresh time.Duration, capInfo commoncap.CapabilityInfo, capDonInfo commoncap.DON, + localDonInfo commoncap.DON, dispatcher registerDispatcher, registryType string) *Client { + return &Client{ + lggr: lggr.Named(registryType), + registrationMethod: registrationMethod, + registrationRefresh: registrationRefresh, + capInfo: capInfo, + capDonInfo: capDonInfo, + localDonInfo: localDonInfo, + dispatcher: dispatcher, + registeredWorkflows: make(map[string]*clientRegistration), + stopCh: make(services.StopChan), + } +} + +func (r *Client) Start(_ context.Context) error { + return r.StartOnce(r.lggr.Name(), func() error { + r.wg.Add(1) + go func() { + defer r.wg.Done() + r.registrationLoop() + }() + r.lggr.Info("started") + return nil + }) +} + +func (r *Client) Close() error { + return r.StopOnce(r.lggr.Name(), func() error { + close(r.stopCh) + r.wg.Wait() + r.lggr.Info("closed") + return nil + }) +} + +func (r *Client) RegisterWorkflow(workflowID string, request []byte) error { + r.mu.Lock() + defer r.mu.Unlock() + + r.lggr.Infow("register workflow called", "capabilityId", r.capInfo.ID, "donId", r.capDonInfo.ID, "workflowID", workflowID) + regState, ok := r.registeredWorkflows[workflowID] + if !ok { + regState = &clientRegistration{ + registrationRequest: request, + } + r.registeredWorkflows[workflowID] = regState + } else { + regState.registrationRequest = request + r.lggr.Warnw("re-registering workflow", "capabilityId", r.capInfo.ID, "donId", r.capDonInfo.ID, "workflowID", workflowID) + } + + return nil +} + +func (r *Client) UnregisterWorkflow(workflowID string) { + r.mu.Lock() + defer r.mu.Unlock() + + r.lggr.Infow("unregister workflow called", "capabilityId", r.capInfo.ID, "donId", r.capDonInfo.ID, "workflowID", workflowID) + delete(r.registeredWorkflows, workflowID) + // Registrations will quickly expire on all remote nodes so it is currently considered unnecessary to send + // unregister messages to the nodes +} + +func (r *Client) registrationLoop() { + ticker := time.NewTicker(r.registrationRefresh) + defer ticker.Stop() + for { + select { + case <-r.stopCh: + return + case <-ticker.C: + r.mu.RLock() + r.lggr.Infow("register for remote capability", "capabilityId", r.capInfo.ID, "donId", r.capDonInfo.ID, "nMembers", len(r.capDonInfo.Members), "nWorkflows", len(r.registeredWorkflows)) + if len(r.registeredWorkflows) == 0 { + r.lggr.Infow("no workflows to register") + } + for _, registration := range r.registeredWorkflows { + // NOTE: send to all by default, introduce different strategies later (KS-76) + for _, peerID := range r.capDonInfo.Members { + m := &types.MessageBody{ + CapabilityId: r.capInfo.ID, + CapabilityDonId: r.capDonInfo.ID, + CallerDonId: r.localDonInfo.ID, + Method: r.registrationMethod, + Payload: registration.registrationRequest, + } + err := r.dispatcher.Send(peerID, m) + if err != nil { + r.lggr.Errorw("failed to send message", "capabilityId", r.capInfo.ID, "donId", r.capDonInfo.ID, "peerId", peerID, "err", err) + } + } + } + r.mu.RUnlock() + } + } +} diff --git a/core/capabilities/remote/registration/client_test.go b/core/capabilities/remote/registration/client_test.go new file mode 100644 index 00000000000..9582180139b --- /dev/null +++ b/core/capabilities/remote/registration/client_test.go @@ -0,0 +1,92 @@ +package registration + +import ( + "sync" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/smartcontractkit/chainlink-common/pkg/capabilities" + "github.com/smartcontractkit/chainlink-common/pkg/services/servicetest" + "github.com/smartcontractkit/chainlink/v2/core/capabilities/remote/types" + "github.com/smartcontractkit/chainlink/v2/core/logger" + types2 "github.com/smartcontractkit/chainlink/v2/core/services/p2p/types" + + libocrtypes "github.com/smartcontractkit/libocr/ragep2p/types" +) + +func TestClient_RegisterWorkflow(t *testing.T) { + lggr := logger.TestLogger(t) + dispatcher := NewMockDispatcher() + peer1 := libocrtypes.PeerID{'p', 'e', 'e', 'r', '1'} + peer2 := libocrtypes.PeerID{'p', 'e', 'e', 'r', '2'} + + client := NewClient(lggr, types.MethodRegisterTrigger, 10*time.Millisecond, capabilities.CapabilityInfo{}, capabilities.DON{Members: []libocrtypes.PeerID{peer1, peer2}}, capabilities.DON{}, dispatcher, "test") + servicetest.Run(t, client) + + err := client.RegisterWorkflow("workflow1", []byte("registerrequest")) + require.NoError(t, err) + + require.Eventually(t, func() bool { + messages := dispatcher.GetMessages() + + // Check sent to both peers with the same number of requests + if len(messages[peer1]) >= 3 { + return len(messages[peer1]) == len(messages[peer2]) + } + return false + }, 60*time.Second, 10*time.Millisecond) + + assert.Equal(t, "registerrequest", string(dispatcher.GetMessages()[peer1][0].Payload)) +} + +func TestClient_UnregisterWorkflow(t *testing.T) { + lggr := logger.TestLogger(t) + dispatcher := NewMockDispatcher() + + peer1 := libocrtypes.PeerID{'p', 'e', 'e', 'r', '1'} + client := NewClient(lggr, types.MethodRegisterTrigger, 10*time.Millisecond, capabilities.CapabilityInfo{}, capabilities.DON{Members: []libocrtypes.PeerID{peer1}}, capabilities.DON{}, dispatcher, "test") + servicetest.Run(t, client) + + err := client.RegisterWorkflow("workflow1", []byte("request")) + require.NoError(t, err) + + client.UnregisterWorkflow("workflow1") + + initialCount := len(dispatcher.GetMessages()[peer1]) + time.Sleep(100 * time.Microsecond) + // If it has been unregistered then no new registration requests should be sent + finalCount := len(dispatcher.GetMessages()[peer1]) + assert.Equal(t, initialCount, finalCount) +} + +type MockDispatcher struct { + mu sync.Mutex + messages map[types2.PeerID][]*types.MessageBody +} + +func NewMockDispatcher() *MockDispatcher { + return &MockDispatcher{ + messages: make(map[types2.PeerID][]*types.MessageBody), + } +} + +func (m *MockDispatcher) Send(peerID types2.PeerID, msgBody *types.MessageBody) error { + m.mu.Lock() + defer m.mu.Unlock() + m.messages[peerID] = append(m.messages[peerID], msgBody) + return nil +} + +func (m *MockDispatcher) GetMessages() map[types2.PeerID][]*types.MessageBody { + m.mu.Lock() + defer m.mu.Unlock() + + mapCopy := make(map[types2.PeerID][]*types.MessageBody) + for k, v := range m.messages { + mapCopy[k] = append([]*types.MessageBody(nil), v...) + } + return mapCopy +} diff --git a/core/capabilities/remote/registration/server.go b/core/capabilities/remote/registration/server.go new file mode 100644 index 00000000000..d6c795978bc --- /dev/null +++ b/core/capabilities/remote/registration/server.go @@ -0,0 +1,168 @@ +package registration + +import ( + "context" + "errors" + "fmt" + "sync" + "time" + + commoncap "github.com/smartcontractkit/chainlink-common/pkg/capabilities" + "github.com/smartcontractkit/chainlink-common/pkg/services" + "github.com/smartcontractkit/chainlink/v2/core/capabilities/remote/aggregation" + "github.com/smartcontractkit/chainlink/v2/core/capabilities/remote/messagecache" + "github.com/smartcontractkit/chainlink/v2/core/capabilities/remote/types" + "github.com/smartcontractkit/chainlink/v2/core/capabilities/validation" + "github.com/smartcontractkit/chainlink/v2/core/logger" + p2ptypes "github.com/smartcontractkit/chainlink/v2/core/services/p2p/types" +) + +type Key struct { + CallerDonID uint32 + WorkflowID string + StepReference string +} + +type serverRegistration struct { + registrationRequest []byte +} + +type target interface { + Register(ctx context.Context, key Key, registerRequest []byte) error + Unregister(ctx context.Context, registerRequest []byte) error +} + +// Server is a shim for remote capabilities that support registration to a workflow. It aggregates registration requests +// and invokes the register method on the target capability when the minimum number of registrations are received (2f+1). +// The server will also periodically clean up expired registrations. A registration is considered expired if it has not +// been aggregated within the registrationExpiry period, when a registration is expired unregister is called on the target +type Server struct { + lggr logger.Logger + capInfo commoncap.CapabilityInfo + registrationExpiry time.Duration + target target + registrations map[Key]*serverRegistration + messageCache *messagecache.MessageCache[Key, p2ptypes.PeerID] + membersCache map[uint32]map[p2ptypes.PeerID]bool + workflowDONs map[uint32]commoncap.DON + + stopCh services.StopChan + wg sync.WaitGroup + + mu sync.RWMutex +} + +func NewServer(lggr logger.Logger, target target, capInfo commoncap.CapabilityInfo, registrationExpiry time.Duration, workflowDONs map[uint32]commoncap.DON, serverType string) *Server { + membersCache := make(map[uint32]map[p2ptypes.PeerID]bool) + for id, don := range workflowDONs { + cache := make(map[p2ptypes.PeerID]bool) + for _, member := range don.Members { + cache[member] = true + } + membersCache[id] = cache + } + + return &Server{ + lggr: lggr.Named(serverType), + capInfo: capInfo, + target: target, + registrationExpiry: registrationExpiry, + stopCh: make(services.StopChan), + registrations: make(map[Key]*serverRegistration), + messageCache: messagecache.New[Key, p2ptypes.PeerID](), + membersCache: membersCache, + workflowDONs: workflowDONs, + } +} + +func (s *Server) Start(ctx context.Context) error { + s.wg.Add(1) + go func() { + defer s.wg.Done() + s.registrationCleanupLoop() + }() + + s.lggr.Info("started") + return nil +} + +func (s *Server) Close() error { + close(s.stopCh) + s.wg.Wait() + s.lggr.Info("closed") + return nil +} + +func (s *Server) Register(ctx context.Context, msg *types.MessageBody, sender p2ptypes.PeerID, workflowID string, stepReference string) error { + s.mu.Lock() + defer s.mu.Unlock() + callerDon, ok := s.workflowDONs[msg.CallerDonId] + if !ok { + return errors.New("received a message from unsupported workflow DON") + } + if !s.membersCache[msg.CallerDonId][sender] { + return errors.New("sender not a member of its workflow DON") + } + if err := validation.ValidateWorkflowOrExecutionID(workflowID); err != nil { + return fmt.Errorf("received request with invalid workflow ID: %w", err) + } + + s.lggr.Debugw("received registration", "capabilityId", s.capInfo.ID, "workflowId", workflowID, "sender", sender) + key := Key{CallerDonID: msg.CallerDonId, WorkflowID: workflowID, StepReference: stepReference} + nowMs := time.Now().UnixMilli() + s.messageCache.Insert(key, sender, nowMs, msg.Payload) + _, exists := s.registrations[key] + if exists { + s.lggr.Debugw("registration already exists", "capabilityId", s.capInfo.ID, "workflowId", workflowID) + return nil + } + // NOTE: require 2F+1 by default, introduce different strategies later (KS-76) + minRequired := uint32(2*callerDon.F + 1) + ready, payloads := s.messageCache.Ready(key, minRequired, nowMs-s.registrationExpiry.Milliseconds(), false) + if !ready { + s.lggr.Debugw("not ready to aggregate yet", "capabilityId", s.capInfo.ID, "workflowId", workflowID, "minRequired", minRequired) + return nil + } + aggregated, err := aggregation.AggregateModeRaw(payloads, uint32(callerDon.F+1)) + if err != nil { + return fmt.Errorf("failed to aggregate registrations: %w", err) + } + err = s.target.Register(ctx, key, aggregated) + if err != nil { + return fmt.Errorf("failed to register request on target: %w", err) + } + + s.registrations[key] = &serverRegistration{ + registrationRequest: aggregated, + } + s.lggr.Debugw("updated registration", "capabilityId", s.capInfo.ID, "workflowId", workflowID) + return nil +} + +func (s *Server) registrationCleanupLoop() { + ticker := time.NewTicker(s.registrationExpiry) + defer ticker.Stop() + for { + select { + case <-s.stopCh: + return + case <-ticker.C: + now := time.Now().UnixMilli() + s.mu.Lock() + for key, req := range s.registrations { + callerDon := s.workflowDONs[key.CallerDonID] + ready, _ := s.messageCache.Ready(key, uint32(2*callerDon.F+1), now-s.registrationExpiry.Milliseconds(), false) + if !ready { + s.lggr.Infow("registration expired", "capabilityId", s.capInfo.ID, "callerDonID", key.CallerDonID, "workflowId", key.WorkflowID) + ctx, cancel := s.stopCh.NewCtx() + err := s.target.Unregister(ctx, req.registrationRequest) + cancel() + s.lggr.Infow("unregistered", "capabilityId", s.capInfo.ID, "callerDonID", key.CallerDonID, "workflowId", key.WorkflowID, "err", err) + delete(s.registrations, key) + s.messageCache.Delete(key) + } + } + s.mu.Unlock() + } + } +} diff --git a/core/capabilities/remote/registration/server_test.go b/core/capabilities/remote/registration/server_test.go new file mode 100644 index 00000000000..94b20e5ed90 --- /dev/null +++ b/core/capabilities/remote/registration/server_test.go @@ -0,0 +1,123 @@ +package registration + +import ( + "context" + "sync" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/smartcontractkit/chainlink-common/pkg/capabilities" + "github.com/smartcontractkit/chainlink-common/pkg/services/servicetest" + "github.com/smartcontractkit/chainlink/v2/core/capabilities/remote/types" + "github.com/smartcontractkit/chainlink/v2/core/logger" + p2ptypes "github.com/smartcontractkit/chainlink/v2/core/services/p2p/types" + + libocrtypes "github.com/smartcontractkit/libocr/ragep2p/types" +) + +func TestServer_Register(t *testing.T) { + lggr := logger.TestLogger(t) + target := &mockTarget{} + + peer1 := libocrtypes.PeerID{'p', 'e', 'e', 'r', '1'} + peer2 := libocrtypes.PeerID{'p', 'e', 'e', 'r', '2'} + peer3 := libocrtypes.PeerID{'p', 'e', 'e', 'r', '3'} + peer4 := libocrtypes.PeerID{'p', 'e', 'e', 'r', '4'} + peer5 := libocrtypes.PeerID{'p', 'e', 'e', 'r', '5'} + + workflowID1 := "15c631d295ef5e32deb99a10ee6804bc4af13855687559d7ff6552ac6dbb2ce0" + + capInfo := capabilities.CapabilityInfo{ID: "test-capability"} + registrationExpiry := 60 * time.Second + workflowDONs := make(map[uint32]capabilities.DON) + workflowDONs[1] = capabilities.DON{F: 1, Members: []p2ptypes.PeerID{peer1, peer2, peer3, peer4, peer5}} + + srv := NewServer(lggr, target, capInfo, registrationExpiry, workflowDONs, "test-server") + servicetest.Run(t, srv) + + msg := &types.MessageBody{CallerDonId: 1, Payload: []byte("test-payload")} + err := srv.Register(context.Background(), msg, peer1, workflowID1, "step1") + require.NoError(t, err) + assert.Empty(t, target.GetRegisterRequests()) + + err = srv.Register(context.Background(), msg, peer2, workflowID1, "step1") + require.NoError(t, err) + assert.Empty(t, target.GetRegisterRequests()) + + err = srv.Register(context.Background(), msg, peer3, workflowID1, "step1") + require.NoError(t, err) + // 2F+1 requests have been sent so register on the target should be called + assert.Len(t, target.GetRegisterRequests(), 1) + + // Sending more requests should not result in the target receiving more register calls + err = srv.Register(context.Background(), msg, peer4, workflowID1, "step1") + require.NoError(t, err) + err = srv.Register(context.Background(), msg, peer5, workflowID1, "step1") + require.NoError(t, err) + + assert.Len(t, target.registerRequests, 1) +} + +func TestServer_Unregister(t *testing.T) { + lggr := logger.TestLogger(t) + target := &mockTarget{} + + peer1 := libocrtypes.PeerID{'p', 'e', 'e', 'r', '1'} + peer2 := libocrtypes.PeerID{'p', 'e', 'e', 'r', '2'} + peer3 := libocrtypes.PeerID{'p', 'e', 'e', 'r', '3'} + + workflowID1 := "15c631d295ef5e32deb99a10ee6804bc4af13855687559d7ff6552ac6dbb2ce0" + + capInfo := capabilities.CapabilityInfo{ID: "test-capability"} + registrationExpiry := 10 * time.Millisecond + workflowDONs := make(map[uint32]capabilities.DON) + workflowDONs[1] = capabilities.DON{F: 1, Members: []p2ptypes.PeerID{peer1, peer2, peer3}} + + srv := NewServer(lggr, target, capInfo, registrationExpiry, workflowDONs, "test-server") + servicetest.Run(t, srv) + + msg := &types.MessageBody{CallerDonId: 1, Payload: []byte("test-payload")} + err := srv.Register(context.Background(), msg, peer1, workflowID1, "step1") + require.NoError(t, err) + err = srv.Register(context.Background(), msg, peer2, workflowID1, "step1") + require.NoError(t, err) + err = srv.Register(context.Background(), msg, peer3, workflowID1, "step1") + require.NoError(t, err) + + assert.Eventually(t, func() bool { return len(target.GetUnregisterRequests()) == 1 }, 100*time.Millisecond, 10*time.Millisecond) +} + +type mockTarget struct { + registerRequests [][]byte + unregisterRequests [][]byte + mux sync.Mutex +} + +func (m *mockTarget) Register(ctx context.Context, key Key, registerRequest []byte) error { + m.mux.Lock() + defer m.mux.Unlock() + m.registerRequests = append(m.registerRequests, registerRequest) + return nil +} + +func (m *mockTarget) Unregister(ctx context.Context, registerRequest []byte) error { + m.mux.Lock() + defer m.mux.Unlock() + m.unregisterRequests = append(m.unregisterRequests, registerRequest) + return nil +} + +func (m *mockTarget) GetRegisterRequests() [][]byte { + m.mux.Lock() + defer m.mux.Unlock() + return m.registerRequests +} + +func (m *mockTarget) GetUnregisterRequests() [][]byte { + m.mux.Lock() + defer m.mux.Unlock() + return m.unregisterRequests +} diff --git a/core/capabilities/remote/trigger_publisher.go b/core/capabilities/remote/trigger_publisher.go index 315959605e8..cf546a0d7d6 100644 --- a/core/capabilities/remote/trigger_publisher.go +++ b/core/capabilities/remote/trigger_publisher.go @@ -4,16 +4,16 @@ import ( "context" "crypto/sha256" "encoding/binary" + "fmt" "sync" "time" commoncap "github.com/smartcontractkit/chainlink-common/pkg/capabilities" "github.com/smartcontractkit/chainlink-common/pkg/capabilities/pb" "github.com/smartcontractkit/chainlink-common/pkg/services" + "github.com/smartcontractkit/chainlink/v2/core/capabilities/remote/registration" "github.com/smartcontractkit/chainlink/v2/core/capabilities/remote/types" - "github.com/smartcontractkit/chainlink/v2/core/capabilities/validation" "github.com/smartcontractkit/chainlink/v2/core/logger" - p2ptypes "github.com/smartcontractkit/chainlink/v2/core/services/p2p/types" ) // TriggerPublisher manages all external users of a local trigger capability. @@ -23,32 +23,20 @@ import ( // // TriggerPublisher communicates with corresponding TriggerSubscribers on remote nodes. type triggerPublisher struct { - config *commoncap.RemoteTriggerConfig - underlying commoncap.TriggerCapability - capInfo commoncap.CapabilityInfo - capDonInfo commoncap.DON - workflowDONs map[uint32]commoncap.DON - membersCache map[uint32]map[p2ptypes.PeerID]bool - dispatcher types.Dispatcher - messageCache *messageCache[registrationKey, p2ptypes.PeerID] - registrations map[registrationKey]*pubRegState - mu sync.RWMutex // protects messageCache and registrations - batchingQueue map[[32]byte]*batchedResponse - batchingEnabled bool - bqMu sync.Mutex // protects batchingQueue - stopCh services.StopChan - wg sync.WaitGroup - lggr logger.Logger -} - -type registrationKey struct { - callerDonId uint32 - workflowId string -} + config *commoncap.RemoteTriggerConfig + underlying commoncap.TriggerCapability + capInfo commoncap.CapabilityInfo + capDonInfo commoncap.DON + workflowDONs map[uint32]commoncap.DON + dispatcher types.Dispatcher -type pubRegState struct { - callback <-chan commoncap.TriggerResponse - request commoncap.TriggerRegistrationRequest + batchingQueue map[[32]byte]*batchedResponse + batchingEnabled bool + registrationServer *registration.Server + bqMu sync.Mutex // protects batchingQueue + stopCh services.StopChan + wg sync.WaitGroup + lggr logger.Logger } type batchedResponse struct { @@ -68,34 +56,33 @@ func NewTriggerPublisher(config *commoncap.RemoteTriggerConfig, underlying commo config = &commoncap.RemoteTriggerConfig{} } config.ApplyDefaults() - membersCache := make(map[uint32]map[p2ptypes.PeerID]bool) - for id, don := range workflowDONs { - cache := make(map[p2ptypes.PeerID]bool) - for _, member := range don.Members { - cache[member] = true - } - membersCache[id] = cache - } - return &triggerPublisher{ + + publisher := &triggerPublisher{ config: config, underlying: underlying, capInfo: capInfo, capDonInfo: capDonInfo, workflowDONs: workflowDONs, - membersCache: membersCache, dispatcher: dispatcher, - messageCache: NewMessageCache[registrationKey, p2ptypes.PeerID](), - registrations: make(map[registrationKey]*pubRegState), batchingQueue: make(map[[32]byte]*batchedResponse), batchingEnabled: config.MaxBatchSize > 1 && config.BatchCollectionPeriod >= minAllowedBatchCollectionPeriod, stopCh: make(services.StopChan), lggr: lggr.Named("TriggerPublisher"), } + + registrationServer := registration.NewServer(lggr, publisher, capInfo, config.RegistrationExpiry, workflowDONs, "TriggerPublisher") + + publisher.registrationServer = registrationServer + + return publisher } func (p *triggerPublisher) Start(ctx context.Context) error { - p.wg.Add(1) - go p.registrationCleanupLoop() + err := p.registrationServer.Start(ctx) + if err != nil { + return fmt.Errorf("failed to start registration server: %w", err) + } + if p.batchingEnabled { p.wg.Add(1) go p.batchingLoop() @@ -104,7 +91,20 @@ func (p *triggerPublisher) Start(ctx context.Context) error { return nil } -func (p *triggerPublisher) Receive(_ context.Context, msg *types.MessageBody) { +func (p *triggerPublisher) Close() error { + close(p.stopCh) + p.wg.Wait() + + err := p.registrationServer.Close() + if err != nil { + p.lggr.Errorw("failed to close registration server", "err", err) + } + + p.lggr.Info("TriggerPublisher closed") + return nil +} + +func (p *triggerPublisher) Receive(ctx context.Context, msg *types.MessageBody) { sender, err := ToPeerID(msg.Sender) if err != nil { p.lggr.Errorw("failed to convert message sender to PeerID", "err", err) @@ -117,97 +117,45 @@ func (p *triggerPublisher) Receive(_ context.Context, msg *types.MessageBody) { p.lggr.Errorw("failed to unmarshal trigger registration request", "capabilityId", p.capInfo.ID, "err", err) return } - callerDon, ok := p.workflowDONs[msg.CallerDonId] - if !ok { - p.lggr.Errorw("received a message from unsupported workflow DON", "capabilityId", p.capInfo.ID, "callerDonId", msg.CallerDonId) - return - } - if !p.membersCache[msg.CallerDonId][sender] { - p.lggr.Errorw("sender not a member of its workflow DON", "capabilityId", p.capInfo.ID, "callerDonId", msg.CallerDonId, "sender", sender) - return - } - if err = validation.ValidateWorkflowOrExecutionID(req.Metadata.WorkflowID); err != nil { - p.lggr.Errorw("received trigger request with invalid workflow ID", "capabilityId", p.capInfo.ID, "workflowId", SanitizeLogString(req.Metadata.WorkflowID), "err", err) - return - } - p.lggr.Debugw("received trigger registration", "capabilityId", p.capInfo.ID, "workflowId", req.Metadata.WorkflowID, "sender", sender) - key := registrationKey{msg.CallerDonId, req.Metadata.WorkflowID} - nowMs := time.Now().UnixMilli() - p.mu.Lock() - defer p.mu.Unlock() - p.messageCache.Insert(key, sender, nowMs, msg.Payload) - _, exists := p.registrations[key] - if exists { - p.lggr.Debugw("trigger registration already exists", "capabilityId", p.capInfo.ID, "workflowId", req.Metadata.WorkflowID) - return - } - // NOTE: require 2F+1 by default, introduce different strategies later (KS-76) - minRequired := uint32(2*callerDon.F + 1) - ready, payloads := p.messageCache.Ready(key, minRequired, nowMs-p.config.RegistrationExpiry.Milliseconds(), false) - if !ready { - p.lggr.Debugw("not ready to aggregate yet", "capabilityId", p.capInfo.ID, "workflowId", req.Metadata.WorkflowID, "minRequired", minRequired) - return - } - aggregated, err := AggregateModeRaw(payloads, uint32(callerDon.F+1)) - if err != nil { - p.lggr.Errorw("failed to aggregate trigger registrations", "capabilityId", p.capInfo.ID, "workflowId", req.Metadata.WorkflowID, "err", err) - return - } - unmarshaled, err := pb.UnmarshalTriggerRegistrationRequest(aggregated) + + workflowID := req.Metadata.WorkflowID + err = p.registrationServer.Register(ctx, msg, sender, workflowID, "") if err != nil { - p.lggr.Errorw("failed to unmarshal request", "capabilityId", p.capInfo.ID, "err", err) - return - } - ctx, cancel := p.stopCh.NewCtx() - callbackCh, err := p.underlying.RegisterTrigger(ctx, unmarshaled) - cancel() - if err == nil { - p.registrations[key] = &pubRegState{ - callback: callbackCh, - request: unmarshaled, - } - p.wg.Add(1) - go p.triggerEventLoop(callbackCh, key) - p.lggr.Debugw("updated trigger registration", "capabilityId", p.capInfo.ID, "workflowId", req.Metadata.WorkflowID) - } else { - p.lggr.Errorw("failed to register trigger", "capabilityId", p.capInfo.ID, "workflowId", req.Metadata.WorkflowID, "err", err) + p.lggr.Errorw("failed to register trigger", "capabilityId", p.capInfo.ID, "workflowID", + SanitizeLogString(workflowID), "callerDonId", msg.CallerDonId, "sender", sender, "err", err) } } else { p.lggr.Errorw("received trigger request with unknown method", "method", SanitizeLogString(msg.Method), "sender", sender) } } -func (p *triggerPublisher) registrationCleanupLoop() { - defer p.wg.Done() - ticker := time.NewTicker(p.config.RegistrationExpiry) - defer ticker.Stop() - for { - select { - case <-p.stopCh: - return - case <-ticker.C: - now := time.Now().UnixMilli() - p.mu.Lock() - for key, req := range p.registrations { - callerDon := p.workflowDONs[key.callerDonId] - ready, _ := p.messageCache.Ready(key, uint32(2*callerDon.F+1), now-p.config.RegistrationExpiry.Milliseconds(), false) - if !ready { - p.lggr.Infow("trigger registration expired", "capabilityId", p.capInfo.ID, "callerDonID", key.callerDonId, "workflowId", key.workflowId) - ctx, cancel := p.stopCh.NewCtx() - err := p.underlying.UnregisterTrigger(ctx, req.request) - cancel() - p.lggr.Infow("unregistered trigger", "capabilityId", p.capInfo.ID, "callerDonID", key.callerDonId, "workflowId", key.workflowId, "err", err) - // after calling UnregisterTrigger, the underlying trigger will not send any more events to the channel - delete(p.registrations, key) - p.messageCache.Delete(key) - } - } - p.mu.Unlock() - } +func (p *triggerPublisher) Register(_ context.Context, key registration.Key, registerRequest []byte) error { + unmarshalled, err := pb.UnmarshalTriggerRegistrationRequest(registerRequest) + if err != nil { + return fmt.Errorf("failed to unmarshal request: %w", err) } + ctx, cancel := p.stopCh.NewCtx() + callbackCh, err := p.underlying.RegisterTrigger(ctx, unmarshalled) + cancel() + if err != nil { + return fmt.Errorf("failed to register trigger: %w", err) + } + + p.wg.Add(1) + go p.triggerEventLoop(callbackCh, key) + return nil } -func (p *triggerPublisher) triggerEventLoop(callbackCh <-chan commoncap.TriggerResponse, key registrationKey) { +func (p *triggerPublisher) Unregister(ctx context.Context, registerRequest []byte) error { + unmarshalled, err := pb.UnmarshalTriggerRegistrationRequest(registerRequest) + if err != nil { + return fmt.Errorf("failed to unmarshal registration request: %w", err) + } + + return p.underlying.UnregisterTrigger(ctx, unmarshalled) +} + +func (p *triggerPublisher) triggerEventLoop(callbackCh <-chan commoncap.TriggerResponse, key registration.Key) { defer p.wg.Done() for { select { @@ -215,11 +163,11 @@ func (p *triggerPublisher) triggerEventLoop(callbackCh <-chan commoncap.TriggerR return case response, ok := <-callbackCh: if !ok { - p.lggr.Infow("triggerEventLoop channel closed", "capabilityId", p.capInfo.ID, "workflowId", key.workflowId) + p.lggr.Infow("triggerEventLoop channel closed", "capabilityId", p.capInfo.ID, "workflowID", key.WorkflowID) return } triggerEvent := response.Event - p.lggr.Debugw("received trigger event", "capabilityId", p.capInfo.ID, "workflowId", key.workflowId, "triggerEventID", triggerEvent.ID) + p.lggr.Debugw("received trigger event", "capabilityId", p.capInfo.ID, "workflowID", key.WorkflowID, "triggerEventID", triggerEvent.ID) marshaledResponse, err := pb.MarshalTriggerResponse(response) if err != nil { p.lggr.Debugw("can't marshal trigger event", "err", err) @@ -232,19 +180,19 @@ func (p *triggerPublisher) triggerEventLoop(callbackCh <-chan commoncap.TriggerR // a single-element "batch" p.sendBatch(&batchedResponse{ rawResponse: marshaledResponse, - callerDonID: key.callerDonId, + callerDonID: key.CallerDonID, triggerEventID: triggerEvent.ID, - workflowIDs: []string{key.workflowId}, + workflowIDs: []string{key.WorkflowID}, }) } } } } -func (p *triggerPublisher) enqueueForBatching(rawResponse []byte, key registrationKey, triggerEventID string) { +func (p *triggerPublisher) enqueueForBatching(rawResponse []byte, key registration.Key, triggerEventID string) { // put in batching queue, group by hash(callerDonId, triggerEventID, response) combined := make([]byte, 4) - binary.LittleEndian.PutUint32(combined, key.callerDonId) + binary.LittleEndian.PutUint32(combined, key.CallerDonID) combined = append(combined, []byte(triggerEventID)...) combined = append(combined, rawResponse...) sha := sha256.Sum256(combined) @@ -253,13 +201,13 @@ func (p *triggerPublisher) enqueueForBatching(rawResponse []byte, key registrati if !exists { elem = &batchedResponse{ rawResponse: rawResponse, - callerDonID: key.callerDonId, + callerDonID: key.CallerDonID, triggerEventID: triggerEventID, - workflowIDs: []string{key.workflowId}, + workflowIDs: []string{key.WorkflowID}, } p.batchingQueue[sha] = elem } else { - elem.workflowIDs = append(elem.workflowIDs, key.workflowId) + elem.workflowIDs = append(elem.workflowIDs, key.WorkflowID) } p.bqMu.Unlock() } @@ -317,13 +265,6 @@ func (p *triggerPublisher) batchingLoop() { } } -func (p *triggerPublisher) Close() error { - close(p.stopCh) - p.wg.Wait() - p.lggr.Info("TriggerPublisher closed") - return nil -} - func (p *triggerPublisher) Ready() error { return nil } diff --git a/core/capabilities/remote/trigger_subscriber.go b/core/capabilities/remote/trigger_subscriber.go index 2638d9ca5f3..518497b6174 100644 --- a/core/capabilities/remote/trigger_subscriber.go +++ b/core/capabilities/remote/trigger_subscriber.go @@ -3,17 +3,25 @@ package remote import ( "context" "errors" + "fmt" "sync" "time" commoncap "github.com/smartcontractkit/chainlink-common/pkg/capabilities" "github.com/smartcontractkit/chainlink-common/pkg/capabilities/pb" "github.com/smartcontractkit/chainlink-common/pkg/services" + "github.com/smartcontractkit/chainlink/v2/core/capabilities/remote/aggregation" + "github.com/smartcontractkit/chainlink/v2/core/capabilities/remote/messagecache" + "github.com/smartcontractkit/chainlink/v2/core/capabilities/remote/registration" "github.com/smartcontractkit/chainlink/v2/core/capabilities/remote/types" "github.com/smartcontractkit/chainlink/v2/core/logger" p2ptypes "github.com/smartcontractkit/chainlink/v2/core/services/p2p/types" ) +const ( + defaultSendChannelBufferSize = 1000 +) + // TriggerSubscriber is a shim for remote trigger capabilities. // It translatesd between capability API calls and network messages. // Its responsibilities are: @@ -22,29 +30,22 @@ import ( // // TriggerSubscriber communicates with corresponding TriggerReceivers on remote nodes. type triggerSubscriber struct { - config *commoncap.RemoteTriggerConfig - capInfo commoncap.CapabilityInfo - capDonInfo commoncap.DON - capDonMembers map[p2ptypes.PeerID]struct{} - localDonInfo commoncap.DON - dispatcher types.Dispatcher - aggregator types.Aggregator - messageCache *messageCache[triggerEventKey, p2ptypes.PeerID] - registeredWorkflows map[string]*subRegState - mu sync.RWMutex // protects registeredWorkflows and messageCache - stopCh services.StopChan - wg sync.WaitGroup - lggr logger.Logger + config *commoncap.RemoteTriggerConfig + capInfo commoncap.CapabilityInfo + capDonMembers map[p2ptypes.PeerID]struct{} + aggregator types.Aggregator + messageCache *messagecache.MessageCache[triggerEventKey, p2ptypes.PeerID] + mu sync.RWMutex // protects registeredWorkflows and messageCache + stopCh services.StopChan + wg sync.WaitGroup + lggr logger.Logger + registrationClient *registration.Client + responseChannels map[string]chan commoncap.TriggerResponse } type triggerEventKey struct { - triggerEventId string - workflowId string -} - -type subRegState struct { - callback chan commoncap.TriggerResponse - rawRequest []byte + triggerEventID string + workflowID string } type TriggerSubscriber interface { @@ -58,14 +59,13 @@ var _ services.Service = &triggerSubscriber{} // TODO makes this configurable with a default const ( - defaultSendChannelBufferSize = 1000 - maxBatchedWorkflowIDs = 1000 + maxBatchedWorkflowIDs = 1000 ) func NewTriggerSubscriber(config *commoncap.RemoteTriggerConfig, capInfo commoncap.CapabilityInfo, capDonInfo commoncap.DON, localDonInfo commoncap.DON, dispatcher types.Dispatcher, aggregator types.Aggregator, lggr logger.Logger) *triggerSubscriber { if aggregator == nil { lggr.Warnw("no aggregator provided, using default MODE aggregator", "capabilityId", capInfo.ID) - aggregator = NewDefaultModeAggregator(uint32(capDonInfo.F + 1)) + aggregator = aggregation.NewDefaultModeAggregator(uint32(capDonInfo.F + 1)) } if config == nil { lggr.Info("no config provided, using default values") @@ -77,23 +77,25 @@ func NewTriggerSubscriber(config *commoncap.RemoteTriggerConfig, capInfo commonc capDonMembers[member] = struct{}{} } return &triggerSubscriber{ - config: config, - capInfo: capInfo, - capDonInfo: capDonInfo, - capDonMembers: capDonMembers, - localDonInfo: localDonInfo, - dispatcher: dispatcher, - aggregator: aggregator, - messageCache: NewMessageCache[triggerEventKey, p2ptypes.PeerID](), - registeredWorkflows: make(map[string]*subRegState), - stopCh: make(services.StopChan), - lggr: lggr.Named("TriggerSubscriber"), + config: config, + capInfo: capInfo, + capDonMembers: capDonMembers, + aggregator: aggregator, + messageCache: messagecache.New[triggerEventKey, p2ptypes.PeerID](), + stopCh: make(services.StopChan), + lggr: lggr.Named("TriggerSubscriber"), + registrationClient: registration.NewClient(lggr, types.MethodRegisterTrigger, config.RegistrationRefresh, capInfo, capDonInfo, localDonInfo, dispatcher, "TriggerSubscriber"), + responseChannels: make(map[string]chan commoncap.TriggerResponse), } } func (s *triggerSubscriber) Start(ctx context.Context) error { - s.wg.Add(2) - go s.registrationLoop() + s.wg.Add(1) + err := s.registrationClient.Start(ctx) + if err != nil { + return fmt.Errorf("failed to start capability register: %w", err) + } + go s.eventCleanupLoop() s.lggr.Info("TriggerSubscriber started") return nil @@ -104,6 +106,9 @@ func (s *triggerSubscriber) Info(ctx context.Context) (commoncap.CapabilityInfo, } func (s *triggerSubscriber) RegisterTrigger(ctx context.Context, request commoncap.TriggerRegistrationRequest) (<-chan commoncap.TriggerResponse, error) { + s.mu.Lock() + defer s.mu.Unlock() + rawRequest, err := pb.MarshalTriggerRegistrationRequest(request) if err != nil { return nil, err @@ -111,71 +116,48 @@ func (s *triggerSubscriber) RegisterTrigger(ctx context.Context, request commonc if request.Metadata.WorkflowID == "" { return nil, errors.New("empty workflowID") } + if err := s.registrationClient.RegisterWorkflow(request.Metadata.WorkflowID, rawRequest); err != nil { + return nil, fmt.Errorf("failed to register workflow: %w", err) + } + + responseChannel := make(chan commoncap.TriggerResponse, defaultSendChannelBufferSize) + s.responseChannels[request.Metadata.WorkflowID] = responseChannel + return responseChannel, nil +} + +func (s *triggerSubscriber) UnregisterTrigger(ctx context.Context, request commoncap.TriggerRegistrationRequest) error { s.mu.Lock() defer s.mu.Unlock() - s.lggr.Infow("RegisterTrigger called", "capabilityId", s.capInfo.ID, "donId", s.capDonInfo.ID, "workflowID", request.Metadata.WorkflowID) - regState, ok := s.registeredWorkflows[request.Metadata.WorkflowID] - if !ok { - regState = &subRegState{ - callback: make(chan commoncap.TriggerResponse, defaultSendChannelBufferSize), - rawRequest: rawRequest, - } - s.registeredWorkflows[request.Metadata.WorkflowID] = regState - } else { - regState.rawRequest = rawRequest - s.lggr.Warnw("RegisterTrigger re-registering trigger", "capabilityId", s.capInfo.ID, "donId", s.capDonInfo.ID, "workflowID", request.Metadata.WorkflowID) + responseChannel := s.responseChannels[request.Metadata.WorkflowID] + if responseChannel != nil { + close(responseChannel) + delete(s.responseChannels, request.Metadata.WorkflowID) } - return regState.callback, nil + s.registrationClient.UnregisterWorkflow(request.Metadata.WorkflowID) + return nil } -func (s *triggerSubscriber) registrationLoop() { - defer s.wg.Done() - ticker := time.NewTicker(s.config.RegistrationRefresh) - defer ticker.Stop() - for { - select { - case <-s.stopCh: - return - case <-ticker.C: - s.mu.RLock() - s.lggr.Infow("register trigger for remote capability", "capabilityId", s.capInfo.ID, "donId", s.capDonInfo.ID, "nMembers", len(s.capDonInfo.Members), "nWorkflows", len(s.registeredWorkflows)) - if len(s.registeredWorkflows) == 0 { - s.lggr.Infow("no workflows to register") - } - for _, registration := range s.registeredWorkflows { - // NOTE: send to all by default, introduce different strategies later (KS-76) - for _, peerID := range s.capDonInfo.Members { - m := &types.MessageBody{ - CapabilityId: s.capInfo.ID, - CapabilityDonId: s.capDonInfo.ID, - CallerDonId: s.localDonInfo.ID, - Method: types.MethodRegisterTrigger, - Payload: registration.rawRequest, - } - err := s.dispatcher.Send(peerID, m) - if err != nil { - s.lggr.Errorw("failed to send message", "capabilityId", s.capInfo.ID, "donId", s.capDonInfo.ID, "peerId", peerID, "err", err) - } - } - } - s.mu.RUnlock() - } - } +func (s *triggerSubscriber) responseChannelRegister(workflowID string) bool { + s.mu.RLock() + defer s.mu.RUnlock() + _, found := s.responseChannels[workflowID] + return found } -func (s *triggerSubscriber) UnregisterTrigger(ctx context.Context, request commoncap.TriggerRegistrationRequest) error { - s.mu.Lock() - defer s.mu.Unlock() +var errResponseChannelNotFound = errors.New("response channel not found") - state := s.registeredWorkflows[request.Metadata.WorkflowID] - if state != nil && state.callback != nil { - close(state.callback) +func (s *triggerSubscriber) sendResponse(workflowID string, response commoncap.TriggerResponse) error { + s.mu.RLock() + defer s.mu.RUnlock() + + responseChannel, found := s.responseChannels[workflowID] + if !found { + return errResponseChannelNotFound } - delete(s.registeredWorkflows, request.Metadata.WorkflowID) - // Registrations will quickly expire on all remote nodes. - // Alternatively, we could send UnregisterTrigger messages right away. + + responseChannel <- response return nil } @@ -200,17 +182,15 @@ func (s *triggerSubscriber) Receive(_ context.Context, msg *types.MessageBody) { s.lggr.Errorw("received message with too many workflow IDs - truncating", "capabilityId", s.capInfo.ID, "nWorkflows", len(meta.WorkflowIds), "sender", sender) meta.WorkflowIds = meta.WorkflowIds[:maxBatchedWorkflowIDs] } - for _, workflowId := range meta.WorkflowIds { - s.mu.RLock() - registration, found := s.registeredWorkflows[workflowId] - s.mu.RUnlock() - if !found { - s.lggr.Errorw("received message for unregistered workflow", "capabilityId", s.capInfo.ID, "workflowID", SanitizeLogString(workflowId), "sender", sender) + for _, workflowID := range meta.WorkflowIds { + registered := s.responseChannelRegister(workflowID) + if !registered { + s.lggr.Errorw("received message for unregistered workflow", "capabilityId", s.capInfo.ID, "workflowID", SanitizeLogString(workflowID), "sender", sender) continue } key := triggerEventKey{ - triggerEventId: meta.TriggerEventId, - workflowId: workflowId, + triggerEventID: meta.TriggerEventId, + workflowID: workflowID, } nowMs := time.Now().UnixMilli() s.mu.Lock() @@ -218,18 +198,22 @@ func (s *triggerSubscriber) Receive(_ context.Context, msg *types.MessageBody) { ready, payloads := s.messageCache.Ready(key, s.config.MinResponsesToAggregate, nowMs-s.config.MessageExpiry.Milliseconds(), true) s.mu.Unlock() if nowMs-creationTs > s.config.RegistrationExpiry.Milliseconds() { - s.lggr.Warnw("received trigger event for an expired ID", "triggerEventID", meta.TriggerEventId, "capabilityId", s.capInfo.ID, "workflowId", workflowId, "sender", sender) + s.lggr.Warnw("received trigger event for an expired ID", "triggerEventID", meta.TriggerEventId, "capabilityId", s.capInfo.ID, "workflowID", workflowID, "sender", sender) continue } if ready { - s.lggr.Debugw("trigger event ready to aggregate", "triggerEventID", meta.TriggerEventId, "capabilityId", s.capInfo.ID, "workflowId", workflowId) + s.lggr.Debugw("trigger event ready to aggregate", "triggerEventID", meta.TriggerEventId, "capabilityId", s.capInfo.ID, "workflowID", workflowID) aggregatedResponse, err := s.aggregator.Aggregate(meta.TriggerEventId, payloads) if err != nil { - s.lggr.Errorw("failed to aggregate responses", "triggerEventID", meta.TriggerEventId, "capabilityId", s.capInfo.ID, "workflowId", workflowId, "err", err) + s.lggr.Errorw("failed to aggregate responses", "triggerEventID", meta.TriggerEventId, "capabilityId", s.capInfo.ID, "workflowID", workflowID, "err", err) continue } - s.lggr.Infow("remote trigger event aggregated", "triggerEventID", meta.TriggerEventId, "capabilityId", s.capInfo.ID, "workflowId", workflowId) - registration.callback <- aggregatedResponse + s.lggr.Infow("remote trigger event aggregated", "triggerEventID", meta.TriggerEventId, "capabilityId", s.capInfo.ID, "workflowID", workflowID) + err = s.sendResponse(workflowID, aggregatedResponse) + // Possible that the response channel for the workflow was unregistered between the check that is was registered and here, so we ignore the error + if err != nil && !errors.Is(err, errResponseChannelNotFound) { + s.lggr.Errorw("failed to send response for workflow", "triggerEventID", meta.TriggerEventId, "capabilityId", s.capInfo.ID, "workflowID", workflowID, "err", err) + } } } } else { @@ -256,6 +240,10 @@ func (s *triggerSubscriber) eventCleanupLoop() { func (s *triggerSubscriber) Close() error { close(s.stopCh) s.wg.Wait() + err := s.registrationClient.Close() + if err != nil { + return fmt.Errorf("failed to close capability register: %w", err) + } s.lggr.Info("TriggerSubscriber closed") return nil } diff --git a/core/capabilities/remote/trigger_subscriber_test.go b/core/capabilities/remote/trigger_subscriber_test.go index b8cc3ddc7bd..d5b48bc1dc8 100644 --- a/core/capabilities/remote/trigger_subscriber_test.go +++ b/core/capabilities/remote/trigger_subscriber_test.go @@ -26,7 +26,6 @@ const ( var ( triggerEvent1 = map[string]any{"event": "triggerEvent1"} - triggerEvent2 = map[string]any{"event": "triggerEvent2"} ) func TestTriggerSubscriber_RegisterAndReceive(t *testing.T) { diff --git a/core/capabilities/remote/types/types.go b/core/capabilities/remote/types/types.go index fefc9a9b5fe..188587bc7ac 100644 --- a/core/capabilities/remote/types/types.go +++ b/core/capabilities/remote/types/types.go @@ -23,8 +23,8 @@ const ( type Dispatcher interface { services.Service - SetReceiver(capabilityId string, donId uint32, receiver Receiver) error - RemoveReceiver(capabilityId string, donId uint32) + SetReceiver(capabilityID string, donID uint32, receiver Receiver) error + RemoveReceiver(capabilityID string, donID uint32) Send(peerID p2ptypes.PeerID, msgBody *MessageBody) error } diff --git a/core/capabilities/remote/utils.go b/core/capabilities/remote/utils.go index ea6a3efb186..7af34c5c946 100644 --- a/core/capabilities/remote/utils.go +++ b/core/capabilities/remote/utils.go @@ -3,7 +3,6 @@ package remote import ( "bytes" "crypto/ed25519" - "crypto/sha256" "encoding/hex" "errors" "fmt" @@ -11,8 +10,6 @@ import ( "google.golang.org/protobuf/proto" - commoncap "github.com/smartcontractkit/chainlink-common/pkg/capabilities" - "github.com/smartcontractkit/chainlink-common/pkg/capabilities/pb" remotetypes "github.com/smartcontractkit/chainlink/v2/core/capabilities/remote/types" p2ptypes "github.com/smartcontractkit/chainlink/v2/core/services/p2p/types" ) @@ -25,25 +22,25 @@ func ValidateMessage(msg p2ptypes.Message, expectedReceiver p2ptypes.PeerID) (*r var topLevelMessage remotetypes.Message err := proto.Unmarshal(msg.Payload, &topLevelMessage) if err != nil { - return nil, fmt.Errorf("failed to unmarshal message, err: %v", err) + return nil, fmt.Errorf("failed to unmarshal message, err: %w", err) } var body remotetypes.MessageBody err = proto.Unmarshal(topLevelMessage.Body, &body) if err != nil { - return nil, fmt.Errorf("failed to unmarshal message body, err: %v", err) + return nil, fmt.Errorf("failed to unmarshal message body, err: %w", err) } if len(body.Sender) != p2ptypes.PeerIDLength || len(body.Receiver) != p2ptypes.PeerIDLength { return &body, fmt.Errorf("invalid sender length (%d) or receiver length (%d)", len(body.Sender), len(body.Receiver)) } if !ed25519.Verify(body.Sender, topLevelMessage.Body, topLevelMessage.Signature) { - return &body, fmt.Errorf("failed to verify message signature") + return &body, errors.New("failed to verify message signature") } // NOTE we currently don't support relaying messages so the p2p message sender needs to be the message author if !bytes.Equal(body.Sender, msg.Sender[:]) { - return &body, fmt.Errorf("sender in message body does not match sender of p2p message") + return &body, errors.New("sender in message body does not match sender of p2p message") } if !bytes.Equal(body.Receiver, expectedReceiver[:]) { - return &body, fmt.Errorf("receiver in message body does not match expected receiver") + return &body, errors.New("receiver in message body does not match expected receiver") } return &body, nil } @@ -58,52 +55,6 @@ func ToPeerID(peerID []byte) (p2ptypes.PeerID, error) { return id, nil } -// Default MODE Aggregator needs a configurable number of identical responses for aggregation to succeed -type defaultModeAggregator struct { - minIdenticalResponses uint32 -} - -var _ remotetypes.Aggregator = &defaultModeAggregator{} - -func NewDefaultModeAggregator(minIdenticalResponses uint32) *defaultModeAggregator { - return &defaultModeAggregator{ - minIdenticalResponses: minIdenticalResponses, - } -} - -func (a *defaultModeAggregator) Aggregate(_ string, responses [][]byte) (commoncap.TriggerResponse, error) { - found, err := AggregateModeRaw(responses, a.minIdenticalResponses) - if err != nil { - return commoncap.TriggerResponse{}, fmt.Errorf("failed to aggregate responses, err: %w", err) - } - - unmarshaled, err := pb.UnmarshalTriggerResponse(found) - if err != nil { - return commoncap.TriggerResponse{}, fmt.Errorf("failed to unmarshal aggregated responses, err: %w", err) - } - return unmarshaled, nil -} - -func AggregateModeRaw(elemList [][]byte, minIdenticalResponses uint32) ([]byte, error) { - hashToCount := make(map[string]uint32) - var found []byte - for _, elem := range elemList { - hasher := sha256.New() - hasher.Write(elem) - sha := hex.EncodeToString(hasher.Sum(nil)) - hashToCount[sha]++ - if hashToCount[sha] >= minIdenticalResponses { - found = elem - // update in case we find another elem with an even higher count - minIdenticalResponses = hashToCount[sha] - } - } - if found == nil { - return nil, errors.New("not enough identical responses found") - } - return found, nil -} - func SanitizeLogString(s string) string { tooLongSuffix := "" if len(s) > maxLoggedStringLen { diff --git a/core/capabilities/remote/utils_test.go b/core/capabilities/remote/utils_test.go index 6707e6ffb25..836df6bb442 100644 --- a/core/capabilities/remote/utils_test.go +++ b/core/capabilities/remote/utils_test.go @@ -10,10 +10,6 @@ import ( ragetypes "github.com/smartcontractkit/libocr/ragep2p/types" - commoncap "github.com/smartcontractkit/chainlink-common/pkg/capabilities" - "github.com/smartcontractkit/chainlink-common/pkg/capabilities/pb" - "github.com/smartcontractkit/chainlink-common/pkg/values" - "github.com/smartcontractkit/chainlink/v2/core/capabilities/remote" remotetypes "github.com/smartcontractkit/chainlink/v2/core/capabilities/remote/types" p2ptypes "github.com/smartcontractkit/chainlink/v2/core/services/p2p/types" @@ -89,41 +85,6 @@ func TestToPeerID(t *testing.T) { require.Equal(t, "12D3KooWD8QYTQVYjB6oog4Ej8PcPpqTrPRnxLQap8yY8KUQRVvq", id.String()) } -func TestDefaultModeAggregator_Aggregate(t *testing.T) { - val, err := values.NewMap(triggerEvent1) - require.NoError(t, err) - capResponse1 := commoncap.TriggerResponse{ - Event: commoncap.TriggerEvent{ - Outputs: val, - }, - Err: nil, - } - marshaled1, err := pb.MarshalTriggerResponse(capResponse1) - require.NoError(t, err) - - val2, err := values.NewMap(triggerEvent2) - require.NoError(t, err) - capResponse2 := commoncap.TriggerResponse{ - Event: commoncap.TriggerEvent{ - Outputs: val2, - }, - Err: nil, - } - marshaled2, err := pb.MarshalTriggerResponse(capResponse2) - require.NoError(t, err) - - agg := remote.NewDefaultModeAggregator(2) - _, err = agg.Aggregate("", [][]byte{marshaled1}) - require.Error(t, err) - - _, err = agg.Aggregate("", [][]byte{marshaled1, marshaled2}) - require.Error(t, err) - - res, err := agg.Aggregate("", [][]byte{marshaled1, marshaled2, marshaled1}) - require.NoError(t, err) - require.Equal(t, res, capResponse1) -} - func TestSanitizeLogString(t *testing.T) { require.Equal(t, "hello", remote.SanitizeLogString("hello")) require.Equal(t, "[UNPRINTABLE] 0a", remote.SanitizeLogString("\n")) diff --git a/core/scripts/go.mod b/core/scripts/go.mod index 8f57442c8a1..16a3d3d6acd 100644 --- a/core/scripts/go.mod +++ b/core/scripts/go.mod @@ -26,7 +26,7 @@ require ( github.com/prometheus/client_golang v1.20.5 github.com/shopspring/decimal v1.4.0 github.com/smartcontractkit/chainlink-automation v0.8.1 - github.com/smartcontractkit/chainlink-common v0.3.1-0.20241209151352-70300ddcc776 + github.com/smartcontractkit/chainlink-common v0.3.1-0.20241210103218-ed3344947247 github.com/smartcontractkit/chainlink/deployment v0.0.0-00010101000000-000000000000 github.com/smartcontractkit/chainlink/v2 v2.14.0-mercury-20240807.0.20241106193309-5560cd76211a github.com/smartcontractkit/libocr v0.0.0-20241007185508-adbe57025f12 diff --git a/core/scripts/go.sum b/core/scripts/go.sum index 2a777f569b1..7f333e9e690 100644 --- a/core/scripts/go.sum +++ b/core/scripts/go.sum @@ -1142,8 +1142,8 @@ github.com/smartcontractkit/chainlink-automation v0.8.1 h1:sTc9LKpBvcKPc1JDYAmgB github.com/smartcontractkit/chainlink-automation v0.8.1/go.mod h1:Iij36PvWZ6blrdC5A/nrQUBuf3MH3JvsBB9sSyc9W08= github.com/smartcontractkit/chainlink-ccip v0.0.0-20241204015713-8956bb614e9e h1:GnM6ZWV6vlk2+n6c6o+v/R1LtXzBGVVx7r37nt/h6Uc= github.com/smartcontractkit/chainlink-ccip v0.0.0-20241204015713-8956bb614e9e/go.mod h1:80vGBbOfertJig0xFKsRfm+i17FkjdKkk1dAaGE45Os= -github.com/smartcontractkit/chainlink-common v0.3.1-0.20241209151352-70300ddcc776 h1:NATQA1LfrEPXCdtEed9/G4SxaVuF8EZp5O2ucOK5C98= -github.com/smartcontractkit/chainlink-common v0.3.1-0.20241209151352-70300ddcc776/go.mod h1:bQktEJf7sJ0U3SmIcXvbGUox7SmXcnSEZ4kUbT8R5Nk= +github.com/smartcontractkit/chainlink-common v0.3.1-0.20241210103218-ed3344947247 h1:8pthJqNlLCsF7MkhIOwVH4QcfgCrY3RgzNscOgGDfqg= +github.com/smartcontractkit/chainlink-common v0.3.1-0.20241210103218-ed3344947247/go.mod h1:bQktEJf7sJ0U3SmIcXvbGUox7SmXcnSEZ4kUbT8R5Nk= github.com/smartcontractkit/chainlink-cosmos v0.5.2-0.20241202195413-82468150ac1e h1:PRoeby6ZlTuTkv2f+7tVU4+zboTfRzI+beECynF4JQ0= github.com/smartcontractkit/chainlink-cosmos v0.5.2-0.20241202195413-82468150ac1e/go.mod h1:mUh5/woemsVaHgTorA080hrYmO3syBCmPdnWc/5dOqk= github.com/smartcontractkit/chainlink-data-streams v0.1.1-0.20241202141438-a90db35252db h1:N1RH1hSr2ACzOFc9hkCcjE8pRBTdcU3p8nsTJByaLes= diff --git a/deployment/go.mod b/deployment/go.mod index 058df5d2a29..e7639a4b30f 100644 --- a/deployment/go.mod +++ b/deployment/go.mod @@ -25,7 +25,7 @@ require ( github.com/smartcontractkit/ccip-owner-contracts v0.0.0-20240926212305-a6deabdfce86 github.com/smartcontractkit/chain-selectors v1.0.34 github.com/smartcontractkit/chainlink-ccip v0.0.0-20241204015713-8956bb614e9e - github.com/smartcontractkit/chainlink-common v0.3.1-0.20241209151352-70300ddcc776 + github.com/smartcontractkit/chainlink-common v0.3.1-0.20241210103218-ed3344947247 github.com/smartcontractkit/chainlink-protos/job-distributor v0.6.0 github.com/smartcontractkit/chainlink-testing-framework/lib v1.50.13 github.com/smartcontractkit/chainlink/v2 v2.0.0-00010101000000-000000000000 diff --git a/deployment/go.sum b/deployment/go.sum index 00e0aab077b..9fd42a258ca 100644 --- a/deployment/go.sum +++ b/deployment/go.sum @@ -1411,8 +1411,8 @@ github.com/smartcontractkit/chainlink-automation v0.8.1 h1:sTc9LKpBvcKPc1JDYAmgB github.com/smartcontractkit/chainlink-automation v0.8.1/go.mod h1:Iij36PvWZ6blrdC5A/nrQUBuf3MH3JvsBB9sSyc9W08= github.com/smartcontractkit/chainlink-ccip v0.0.0-20241204015713-8956bb614e9e h1:GnM6ZWV6vlk2+n6c6o+v/R1LtXzBGVVx7r37nt/h6Uc= github.com/smartcontractkit/chainlink-ccip v0.0.0-20241204015713-8956bb614e9e/go.mod h1:80vGBbOfertJig0xFKsRfm+i17FkjdKkk1dAaGE45Os= -github.com/smartcontractkit/chainlink-common v0.3.1-0.20241209151352-70300ddcc776 h1:NATQA1LfrEPXCdtEed9/G4SxaVuF8EZp5O2ucOK5C98= -github.com/smartcontractkit/chainlink-common v0.3.1-0.20241209151352-70300ddcc776/go.mod h1:bQktEJf7sJ0U3SmIcXvbGUox7SmXcnSEZ4kUbT8R5Nk= +github.com/smartcontractkit/chainlink-common v0.3.1-0.20241210103218-ed3344947247 h1:8pthJqNlLCsF7MkhIOwVH4QcfgCrY3RgzNscOgGDfqg= +github.com/smartcontractkit/chainlink-common v0.3.1-0.20241210103218-ed3344947247/go.mod h1:bQktEJf7sJ0U3SmIcXvbGUox7SmXcnSEZ4kUbT8R5Nk= github.com/smartcontractkit/chainlink-cosmos v0.5.2-0.20241202195413-82468150ac1e h1:PRoeby6ZlTuTkv2f+7tVU4+zboTfRzI+beECynF4JQ0= github.com/smartcontractkit/chainlink-cosmos v0.5.2-0.20241202195413-82468150ac1e/go.mod h1:mUh5/woemsVaHgTorA080hrYmO3syBCmPdnWc/5dOqk= github.com/smartcontractkit/chainlink-data-streams v0.1.1-0.20241202141438-a90db35252db h1:N1RH1hSr2ACzOFc9hkCcjE8pRBTdcU3p8nsTJByaLes= diff --git a/go.mod b/go.mod index 2dd7d3fcfe5..19a1848f9a3 100644 --- a/go.mod +++ b/go.mod @@ -79,7 +79,7 @@ require ( github.com/smartcontractkit/chain-selectors v1.0.34 github.com/smartcontractkit/chainlink-automation v0.8.1 github.com/smartcontractkit/chainlink-ccip v0.0.0-20241204015713-8956bb614e9e - github.com/smartcontractkit/chainlink-common v0.3.1-0.20241209151352-70300ddcc776 + github.com/smartcontractkit/chainlink-common v0.3.1-0.20241210103218-ed3344947247 github.com/smartcontractkit/chainlink-cosmos v0.5.2-0.20241202195413-82468150ac1e github.com/smartcontractkit/chainlink-data-streams v0.1.1-0.20241202141438-a90db35252db github.com/smartcontractkit/chainlink-feeds v0.1.1 diff --git a/go.sum b/go.sum index b8941bc7d01..34e89fdd078 100644 --- a/go.sum +++ b/go.sum @@ -1125,8 +1125,8 @@ github.com/smartcontractkit/chainlink-automation v0.8.1 h1:sTc9LKpBvcKPc1JDYAmgB github.com/smartcontractkit/chainlink-automation v0.8.1/go.mod h1:Iij36PvWZ6blrdC5A/nrQUBuf3MH3JvsBB9sSyc9W08= github.com/smartcontractkit/chainlink-ccip v0.0.0-20241204015713-8956bb614e9e h1:GnM6ZWV6vlk2+n6c6o+v/R1LtXzBGVVx7r37nt/h6Uc= github.com/smartcontractkit/chainlink-ccip v0.0.0-20241204015713-8956bb614e9e/go.mod h1:80vGBbOfertJig0xFKsRfm+i17FkjdKkk1dAaGE45Os= -github.com/smartcontractkit/chainlink-common v0.3.1-0.20241209151352-70300ddcc776 h1:NATQA1LfrEPXCdtEed9/G4SxaVuF8EZp5O2ucOK5C98= -github.com/smartcontractkit/chainlink-common v0.3.1-0.20241209151352-70300ddcc776/go.mod h1:bQktEJf7sJ0U3SmIcXvbGUox7SmXcnSEZ4kUbT8R5Nk= +github.com/smartcontractkit/chainlink-common v0.3.1-0.20241210103218-ed3344947247 h1:8pthJqNlLCsF7MkhIOwVH4QcfgCrY3RgzNscOgGDfqg= +github.com/smartcontractkit/chainlink-common v0.3.1-0.20241210103218-ed3344947247/go.mod h1:bQktEJf7sJ0U3SmIcXvbGUox7SmXcnSEZ4kUbT8R5Nk= github.com/smartcontractkit/chainlink-cosmos v0.5.2-0.20241202195413-82468150ac1e h1:PRoeby6ZlTuTkv2f+7tVU4+zboTfRzI+beECynF4JQ0= github.com/smartcontractkit/chainlink-cosmos v0.5.2-0.20241202195413-82468150ac1e/go.mod h1:mUh5/woemsVaHgTorA080hrYmO3syBCmPdnWc/5dOqk= github.com/smartcontractkit/chainlink-data-streams v0.1.1-0.20241202141438-a90db35252db h1:N1RH1hSr2ACzOFc9hkCcjE8pRBTdcU3p8nsTJByaLes= diff --git a/integration-tests/go.mod b/integration-tests/go.mod index 58b2a6fa1c4..52b05fadc8a 100644 --- a/integration-tests/go.mod +++ b/integration-tests/go.mod @@ -41,7 +41,7 @@ require ( github.com/smartcontractkit/chain-selectors v1.0.34 github.com/smartcontractkit/chainlink-automation v0.8.1 github.com/smartcontractkit/chainlink-ccip v0.0.0-20241204015713-8956bb614e9e - github.com/smartcontractkit/chainlink-common v0.3.1-0.20241209151352-70300ddcc776 + github.com/smartcontractkit/chainlink-common v0.3.1-0.20241210103218-ed3344947247 github.com/smartcontractkit/chainlink-protos/job-distributor v0.6.0 github.com/smartcontractkit/chainlink-testing-framework/havoc v1.50.2 github.com/smartcontractkit/chainlink-testing-framework/lib v1.50.18 diff --git a/integration-tests/go.sum b/integration-tests/go.sum index 4f31dd61871..e91c84f193b 100644 --- a/integration-tests/go.sum +++ b/integration-tests/go.sum @@ -1432,8 +1432,8 @@ github.com/smartcontractkit/chainlink-automation v0.8.1 h1:sTc9LKpBvcKPc1JDYAmgB github.com/smartcontractkit/chainlink-automation v0.8.1/go.mod h1:Iij36PvWZ6blrdC5A/nrQUBuf3MH3JvsBB9sSyc9W08= github.com/smartcontractkit/chainlink-ccip v0.0.0-20241204015713-8956bb614e9e h1:GnM6ZWV6vlk2+n6c6o+v/R1LtXzBGVVx7r37nt/h6Uc= github.com/smartcontractkit/chainlink-ccip v0.0.0-20241204015713-8956bb614e9e/go.mod h1:80vGBbOfertJig0xFKsRfm+i17FkjdKkk1dAaGE45Os= -github.com/smartcontractkit/chainlink-common v0.3.1-0.20241209151352-70300ddcc776 h1:NATQA1LfrEPXCdtEed9/G4SxaVuF8EZp5O2ucOK5C98= -github.com/smartcontractkit/chainlink-common v0.3.1-0.20241209151352-70300ddcc776/go.mod h1:bQktEJf7sJ0U3SmIcXvbGUox7SmXcnSEZ4kUbT8R5Nk= +github.com/smartcontractkit/chainlink-common v0.3.1-0.20241210103218-ed3344947247 h1:8pthJqNlLCsF7MkhIOwVH4QcfgCrY3RgzNscOgGDfqg= +github.com/smartcontractkit/chainlink-common v0.3.1-0.20241210103218-ed3344947247/go.mod h1:bQktEJf7sJ0U3SmIcXvbGUox7SmXcnSEZ4kUbT8R5Nk= github.com/smartcontractkit/chainlink-cosmos v0.5.2-0.20241202195413-82468150ac1e h1:PRoeby6ZlTuTkv2f+7tVU4+zboTfRzI+beECynF4JQ0= github.com/smartcontractkit/chainlink-cosmos v0.5.2-0.20241202195413-82468150ac1e/go.mod h1:mUh5/woemsVaHgTorA080hrYmO3syBCmPdnWc/5dOqk= github.com/smartcontractkit/chainlink-data-streams v0.1.1-0.20241202141438-a90db35252db h1:N1RH1hSr2ACzOFc9hkCcjE8pRBTdcU3p8nsTJByaLes= diff --git a/integration-tests/load/go.mod b/integration-tests/load/go.mod index 47b128c7f60..1596675b8cc 100644 --- a/integration-tests/load/go.mod +++ b/integration-tests/load/go.mod @@ -19,7 +19,7 @@ require ( github.com/pkg/errors v0.9.1 github.com/rs/zerolog v1.33.0 github.com/slack-go/slack v0.15.0 - github.com/smartcontractkit/chainlink-common v0.3.1-0.20241209151352-70300ddcc776 + github.com/smartcontractkit/chainlink-common v0.3.1-0.20241210103218-ed3344947247 github.com/smartcontractkit/chainlink-testing-framework/lib v1.50.18 github.com/smartcontractkit/chainlink-testing-framework/seth v1.50.9 github.com/smartcontractkit/chainlink-testing-framework/wasp v1.50.2 diff --git a/integration-tests/load/go.sum b/integration-tests/load/go.sum index 59a4e9e64ad..6e3045d7299 100644 --- a/integration-tests/load/go.sum +++ b/integration-tests/load/go.sum @@ -1423,8 +1423,8 @@ github.com/smartcontractkit/chainlink-automation v0.8.1 h1:sTc9LKpBvcKPc1JDYAmgB github.com/smartcontractkit/chainlink-automation v0.8.1/go.mod h1:Iij36PvWZ6blrdC5A/nrQUBuf3MH3JvsBB9sSyc9W08= github.com/smartcontractkit/chainlink-ccip v0.0.0-20241204015713-8956bb614e9e h1:GnM6ZWV6vlk2+n6c6o+v/R1LtXzBGVVx7r37nt/h6Uc= github.com/smartcontractkit/chainlink-ccip v0.0.0-20241204015713-8956bb614e9e/go.mod h1:80vGBbOfertJig0xFKsRfm+i17FkjdKkk1dAaGE45Os= -github.com/smartcontractkit/chainlink-common v0.3.1-0.20241209151352-70300ddcc776 h1:NATQA1LfrEPXCdtEed9/G4SxaVuF8EZp5O2ucOK5C98= -github.com/smartcontractkit/chainlink-common v0.3.1-0.20241209151352-70300ddcc776/go.mod h1:bQktEJf7sJ0U3SmIcXvbGUox7SmXcnSEZ4kUbT8R5Nk= +github.com/smartcontractkit/chainlink-common v0.3.1-0.20241210103218-ed3344947247 h1:8pthJqNlLCsF7MkhIOwVH4QcfgCrY3RgzNscOgGDfqg= +github.com/smartcontractkit/chainlink-common v0.3.1-0.20241210103218-ed3344947247/go.mod h1:bQktEJf7sJ0U3SmIcXvbGUox7SmXcnSEZ4kUbT8R5Nk= github.com/smartcontractkit/chainlink-cosmos v0.5.2-0.20241202195413-82468150ac1e h1:PRoeby6ZlTuTkv2f+7tVU4+zboTfRzI+beECynF4JQ0= github.com/smartcontractkit/chainlink-cosmos v0.5.2-0.20241202195413-82468150ac1e/go.mod h1:mUh5/woemsVaHgTorA080hrYmO3syBCmPdnWc/5dOqk= github.com/smartcontractkit/chainlink-data-streams v0.1.1-0.20241202141438-a90db35252db h1:N1RH1hSr2ACzOFc9hkCcjE8pRBTdcU3p8nsTJByaLes=