Skip to content

Commit

Permalink
add back leader election timeout metric. Use existing grpc clients fo…
Browse files Browse the repository at this point in the history
…r ping
  • Loading branch information
agouin committed Nov 15, 2023
1 parent a6113e2 commit 46a65ce
Show file tree
Hide file tree
Showing 3 changed files with 10 additions and 23 deletions.
28 changes: 5 additions & 23 deletions signer/cosigner_health.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,12 @@ package signer

import (
"context"
"net/url"
"sort"
"sync"
"time"

cometlog "github.com/cometbft/cometbft/libs/log"
"github.com/strangelove-ventures/horcrux/signer/proto"
"google.golang.org/grpc"
"google.golang.org/grpc/credentials/insecure"
)

const (
Expand Down Expand Up @@ -40,7 +37,9 @@ func (ch *CosignerHealth) Reconcile(ctx context.Context) {
var wg sync.WaitGroup
wg.Add(len(ch.cosigners))
for _, cosigner := range ch.cosigners {
go ch.updateRTT(ctx, cosigner, &wg)
if rc, ok := cosigner.(*RemoteCosigner); ok {
go ch.updateRTT(ctx, rc, &wg)
}
}
wg.Wait()
}
Expand All @@ -65,7 +64,7 @@ func (ch *CosignerHealth) MarkUnhealthy(cosigner Cosigner) {
ch.rtt[cosigner.GetID()] = -1
}

func (ch *CosignerHealth) updateRTT(ctx context.Context, cosigner Cosigner, wg *sync.WaitGroup) {
func (ch *CosignerHealth) updateRTT(ctx context.Context, cosigner *RemoteCosigner, wg *sync.WaitGroup) {
defer wg.Done()

rtt := int64(-1)
Expand All @@ -78,24 +77,7 @@ func (ch *CosignerHealth) updateRTT(ctx context.Context, cosigner Cosigner, wg *
ctx, cancel := context.WithTimeout(ctx, 1*time.Second)
defer cancel()

var grpcAddress string
cosignerAddress := cosigner.GetAddress()
url, err := url.Parse(cosignerAddress)
if err != nil {
grpcAddress = cosignerAddress
} else {
grpcAddress = url.Host
}

conn, err := grpc.DialContext(ctx, grpcAddress, grpc.WithTransportCredentials(insecure.NewCredentials()))
if err != nil {
ch.logger.Error("Failed to dial", "cosigner", cosigner.GetID(), "error", err)
return
}
defer conn.Close()

client := proto.NewCosignerClient(conn)
_, err = client.Ping(ctx, &proto.PingRequest{})
_, err := cosigner.client.Ping(ctx, &proto.PingRequest{})
if err != nil {
ch.logger.Error("Failed to ping", "cosigner", cosigner.GetID(), "error", err)
return
Expand Down
4 changes: 4 additions & 0 deletions signer/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,10 @@ var (
Name: "signer_total_raft_not_leader",
Help: "Total Times Signer is NOT Raft Leader (Proxy signing to Raft Leader)",
})
totalRaftLeaderElectionTimeout = promauto.NewCounter(prometheus.CounterOpts{
Name: "signer_total_raft_leader_election_timeout",
Help: "Total Times Raft Leader Failed Election (Lacking Peers)",
})
totalInvalidSignature = promauto.NewCounter(prometheus.CounterOpts{
Name: "signer_error_total_invalid_signatures",
Help: "Total Times Combined Signature is Invalid",
Expand Down
1 change: 1 addition & 0 deletions signer/threshold_validator.go
Original file line number Diff line number Diff line change
Expand Up @@ -565,6 +565,7 @@ func (pv *ThresholdValidator) proxyIfNecessary(
}

if leader == -1 {
totalRaftLeaderElectionTimeout.Inc()
return true, nil, stamp, fmt.Errorf("timed out waiting for raft leader")
}

Expand Down

0 comments on commit 46a65ce

Please sign in to comment.