From 81a21bb56cd597858221f775c796994be0f2e0da Mon Sep 17 00:00:00 2001 From: Matthew Pendrey Date: Thu, 18 Jul 2024 11:20:14 +0100 Subject: [PATCH] KS-384 add better logging for when payload changes per message id and fix client non-deterministic serialisation of remote target (#13877) * improve logging when different payloads for the same message received and fix non-deterministic serialisation * lint --- .changeset/shiny-ligers-compete.md | 5 ++ .../remote/target/request/client_request.go | 5 +- core/capabilities/remote/target/server.go | 46 +++++++++++++++---- 3 files changed, 45 insertions(+), 11 deletions(-) create mode 100644 .changeset/shiny-ligers-compete.md diff --git a/.changeset/shiny-ligers-compete.md b/.changeset/shiny-ligers-compete.md new file mode 100644 index 00000000000..d621b94183c --- /dev/null +++ b/.changeset/shiny-ligers-compete.md @@ -0,0 +1,5 @@ +--- +"chainlink": patch +--- + +#internal logging of non determinism in target server diff --git a/core/capabilities/remote/target/request/client_request.go b/core/capabilities/remote/target/request/client_request.go index 3952a2f7f64..50a742c2188 100644 --- a/core/capabilities/remote/target/request/client_request.go +++ b/core/capabilities/remote/target/request/client_request.go @@ -8,6 +8,8 @@ import ( "sync" "time" + "google.golang.org/protobuf/proto" + ragep2ptypes "github.com/smartcontractkit/libocr/ragep2p/types" "github.com/smartcontractkit/chainlink-common/pkg/capabilities" @@ -46,7 +48,8 @@ func NewClientRequest(ctx context.Context, lggr logger.Logger, req commoncap.Cap return nil, errors.New("remote capability info missing DON") } - rawRequest, err := pb.MarshalCapabilityRequest(req) + rawRequest, err := proto.MarshalOptions{Deterministic: true}.Marshal(pb.CapabilityRequestToProto(req)) + if err != nil { return nil, fmt.Errorf("failed to marshal capability request: %w", err) } diff --git a/core/capabilities/remote/target/server.go b/core/capabilities/remote/target/server.go index 086be15f270..ea9caf81eff 100644 --- a/core/capabilities/remote/target/server.go +++ b/core/capabilities/remote/target/server.go @@ -32,9 +32,12 @@ type server struct { workflowDONs map[uint32]commoncap.DON dispatcher types.Dispatcher - requestIDToRequest map[string]*request.ServerRequest + requestIDToRequest map[string]requestAndMsgID requestTimeout time.Duration + // Used to detect messages with the same message id but different payloads + messageIDToRequestIDsCount map[string]map[string]int + receiveLock sync.Mutex stopCh services.StopChan wg sync.WaitGroup @@ -43,6 +46,11 @@ type server struct { var _ types.Receiver = &server{} var _ services.Service = &server{} +type requestAndMsgID struct { + request *request.ServerRequest + messageID string +} + func NewServer(peerID p2ptypes.PeerID, underlying commoncap.TargetCapability, capInfo commoncap.CapabilityInfo, localDonInfo commoncap.DON, workflowDONs map[uint32]commoncap.DON, dispatcher types.Dispatcher, requestTimeout time.Duration, lggr logger.Logger) *server { return &server{ @@ -53,8 +61,9 @@ func NewServer(peerID p2ptypes.PeerID, underlying commoncap.TargetCapability, ca workflowDONs: workflowDONs, dispatcher: dispatcher, - requestIDToRequest: map[string]*request.ServerRequest{}, - requestTimeout: requestTimeout, + requestIDToRequest: map[string]requestAndMsgID{}, + messageIDToRequestIDsCount: map[string]map[string]int{}, + requestTimeout: requestTimeout, lggr: lggr.Named("TargetServer"), stopCh: make(services.StopChan), @@ -96,12 +105,13 @@ func (r *server) expireRequests() { defer r.receiveLock.Unlock() for requestID, executeReq := range r.requestIDToRequest { - if executeReq.Expired() { - err := executeReq.Cancel(types.Error_TIMEOUT, "request expired") + if executeReq.request.Expired() { + err := executeReq.request.Cancel(types.Error_TIMEOUT, "request expired") if err != nil { r.lggr.Errorw("failed to cancel request", "request", executeReq, "err", err) } delete(r.requestIDToRequest, requestID) + delete(r.messageIDToRequestIDsCount, executeReq.messageID) } } } @@ -122,6 +132,19 @@ func (r *server) Receive(ctx context.Context, msg *types.MessageBody) { hash := sha256.Sum256(msg.Payload) requestID := messageId + hex.EncodeToString(hash[:]) + if requestIDs, ok := r.messageIDToRequestIDsCount[messageId]; ok { + requestIDs[requestID] = requestIDs[requestID] + 1 + } else { + r.messageIDToRequestIDsCount[messageId] = map[string]int{requestID: 1} + } + + requestIDs := r.messageIDToRequestIDsCount[messageId] + if len(requestIDs) > 1 { + // This is a potential attack vector as well as a situation that will occur if the client is sending non-deterministic payloads + // so a warning is logged + r.lggr.Warnw("received messages with the same id and different payloads", "messageID", messageId, "requestIDToCount", requestIDs) + } + if _, ok := r.requestIDToRequest[requestID]; !ok { callingDon, ok := r.workflowDONs[msg.CallerDonId] if !ok { @@ -129,15 +152,18 @@ func (r *server) Receive(ctx context.Context, msg *types.MessageBody) { return } - r.requestIDToRequest[requestID] = request.NewServerRequest(r.underlying, r.capInfo.ID, r.localDonInfo.ID, r.peerID, - callingDon, messageId, r.dispatcher, r.requestTimeout, r.lggr) + r.requestIDToRequest[requestID] = requestAndMsgID{ + request: request.NewServerRequest(r.underlying, r.capInfo.ID, r.localDonInfo.ID, r.peerID, + callingDon, messageId, r.dispatcher, r.requestTimeout, r.lggr), + messageID: messageId, + } } - req := r.requestIDToRequest[requestID] + reqAndMsgID := r.requestIDToRequest[requestID] - err := req.OnMessage(ctx, msg) + err := reqAndMsgID.request.OnMessage(ctx, msg) if err != nil { - r.lggr.Errorw("request failed to OnMessage new message", "request", req, "err", err) + r.lggr.Errorw("request failed to OnMessage new message", "request", reqAndMsgID, "err", err) } }