Skip to content

Commit

Permalink
Fix RPCClient Deadlock on Unsubscribe and NewHead (#14236)
Browse files Browse the repository at this point in the history
* Fix RPCClient Deadlock on Unsubscribe and NewHead

* changeset
  • Loading branch information
dhaidashenko authored Aug 26, 2024
1 parent c00ac96 commit 0294e1f
Show file tree
Hide file tree
Showing 3 changed files with 42 additions and 6 deletions.
5 changes: 5 additions & 0 deletions .changeset/curly-birds-guess.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"chainlink": patch
---

Fixed deadlock in RPCClient causing CL Node to stop performing RPC requests for the affected chain #bugfix
15 changes: 9 additions & 6 deletions core/chains/evm/client/rpc_client.go
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,7 @@ type rpcClient struct {
// stateMu since it can happen on state transitions as well as rpcClient Close.
chStopInFlight chan struct{}

chainInfoLock sync.RWMutex
// intercepted values seen by callers of the rpcClient excluding health check calls. Need to ensure MultiNode provides repeatable read guarantee
highestUserObservations commonclient.ChainInfo
// most recent chain info observed during current lifecycle (reseted on DisconnectAll)
Expand Down Expand Up @@ -336,7 +337,9 @@ func (r *rpcClient) DisconnectAll() {
}
r.cancelInflightRequests()
r.unsubscribeAll()
r.chainInfoLock.Lock()
r.latestChainInfo = commonclient.ChainInfo{}
r.chainInfoLock.Unlock()
}

// unsubscribeAll unsubscribes all subscriptions
Expand Down Expand Up @@ -1379,8 +1382,8 @@ func (r *rpcClient) onNewHead(ctx context.Context, requestCh <-chan struct{}, he
return
}

r.stateMu.Lock()
defer r.stateMu.Unlock()
r.chainInfoLock.Lock()
defer r.chainInfoLock.Unlock()
if !commonclient.CtxIsHeathCheckRequest(ctx) {
r.highestUserObservations.BlockNumber = max(r.highestUserObservations.BlockNumber, head.Number)
r.highestUserObservations.TotalDifficulty = commonclient.MaxTotalDifficulty(r.highestUserObservations.TotalDifficulty, head.TotalDifficulty)
Expand All @@ -1398,8 +1401,8 @@ func (r *rpcClient) onNewFinalizedHead(ctx context.Context, requestCh <-chan str
if head == nil {
return
}
r.stateMu.Lock()
defer r.stateMu.Unlock()
r.chainInfoLock.Lock()
defer r.chainInfoLock.Unlock()
if !commonclient.CtxIsHeathCheckRequest(ctx) {
r.highestUserObservations.FinalizedBlockNumber = max(r.highestUserObservations.FinalizedBlockNumber, head.Number)
}
Expand All @@ -1412,8 +1415,8 @@ func (r *rpcClient) onNewFinalizedHead(ctx context.Context, requestCh <-chan str
}

func (r *rpcClient) GetInterceptedChainInfo() (latest, highestUserObservations commonclient.ChainInfo) {
r.stateMu.RLock()
defer r.stateMu.RUnlock()
r.chainInfoLock.RLock()
defer r.chainInfoLock.RUnlock()
return r.latestChainInfo, r.highestUserObservations
}

Expand Down
28 changes: 28 additions & 0 deletions core/chains/evm/client/rpc_client_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (
"fmt"
"math/big"
"net/url"
"sync"
"testing"
"time"

Expand Down Expand Up @@ -130,6 +131,33 @@ func TestRPCClient_SubscribeNewHead(t *testing.T) {
assert.Equal(t, int64(0), highestUserObservations.FinalizedBlockNumber)
assert.Equal(t, (*big.Int)(nil), highestUserObservations.TotalDifficulty)
})
t.Run("Concurrent Unsubscribe and onNewHead calls do not lead to a deadlock", func(t *testing.T) {
const numberOfAttempts = 1000 // need a large number to increase the odds of reproducing the issue
server := testutils.NewWSServer(t, chainId, serverCallBack)
wsURL := server.WSURL()

rpc := client.NewRPCClient(lggr, *wsURL, nil, "rpc", 1, chainId, commonclient.Primary, 0, commonclient.QueryTimeout, commonclient.QueryTimeout, "")
defer rpc.Close()
require.NoError(t, rpc.Dial(ctx))
var wg sync.WaitGroup
for i := 0; i < numberOfAttempts; i++ {
ch := make(chan *evmtypes.Head)
sub, err := rpc.SubscribeNewHead(tests.Context(t), ch)
require.NoError(t, err)
wg.Add(2)
go func() {
server.MustWriteBinaryMessageSync(t, makeNewHeadWSMessage(&evmtypes.Head{Number: 256, TotalDifficulty: big.NewInt(1000)}))
wg.Done()
}()
go func() {
rpc.UnsubscribeAllExceptAliveLoop()
sub.Unsubscribe()
wg.Done()
}()
wg.Wait()
}

})
t.Run("Block's chain ID matched configured", func(t *testing.T) {
server := testutils.NewWSServer(t, chainId, serverCallBack)
wsURL := server.WSURL()
Expand Down

0 comments on commit 0294e1f

Please sign in to comment.