diff --git a/cmd/horcrux/cmd/start.go b/cmd/horcrux/cmd/start.go index ef4fde1d..393e1388 100644 --- a/cmd/horcrux/cmd/start.go +++ b/cmd/horcrux/cmd/start.go @@ -2,6 +2,7 @@ package cmd import ( "fmt" + "github.com/strangelove-ventures/horcrux/pkg/connector" "os" cometlog "github.com/cometbft/cometbft/libs/log" @@ -42,7 +43,7 @@ func startCmd() *cobra.Command { acceptRisk, _ := cmd.Flags().GetBool(flagAcceptRisk) - var val signer.PrivValidator + var val connector.PrivValidator var services []service.Service switch config.Config.SignMode { @@ -61,7 +62,7 @@ func startCmd() *cobra.Command { } if config.Config.GRPCAddr != "" { - grpcServer := signer.NewRemoteSignerGRPCServer(logger, val, config.Config.GRPCAddr) + grpcServer := connector.NewRemoteSignerGRPCServer(logger, val, config.Config.GRPCAddr) services = append(services, grpcServer) if err := grpcServer.Start(); err != nil { @@ -71,7 +72,7 @@ func startCmd() *cobra.Command { go EnableDebugAndMetrics(cmd.Context(), out) - services, err = signer.StartRemoteSigners(services, logger, val, config.Config.Nodes()) + services, err = connector.StartRemoteSigners(services, logger, val, config.Config.Nodes()) if err != nil { return fmt.Errorf("failed to start remote signer(s): %w", err) } diff --git a/signer/remote_signer.go b/pkg/connector/remote_signer.go similarity index 94% rename from signer/remote_signer.go rename to pkg/connector/remote_signer.go index ef852f26..58e99065 100644 --- a/signer/remote_signer.go +++ b/pkg/connector/remote_signer.go @@ -1,11 +1,16 @@ -package signer +package connector +/* +Connector is the conections between the "sentry" (consensus nodes) and the Horcrux nodes. +*/ import ( "context" "fmt" "net" "time" + "github.com/strangelove-ventures/horcrux/pkg/metrics" + "github.com/strangelove-ventures/horcrux/pkg/types" cometcryptoed25519 "github.com/cometbft/cometbft/crypto/ed25519" @@ -108,14 +113,14 @@ func (rs *ReconnRemoteSigner) loop(ctx context.Context) { timer := time.NewTimer(connRetrySec * time.Second) conn, err = rs.establishConnection(ctx) if err == nil { - sentryConnectTries.WithLabelValues(rs.address).Set(0) + metrics.SentryConnectTries.WithLabelValues(rs.address).Set(0) timer.Stop() rs.Logger.Info("Connected to Sentry", "address", rs.address) break } - sentryConnectTries.WithLabelValues(rs.address).Add(1) - totalSentryConnectTries.WithLabelValues(rs.address).Inc() + metrics.SentryConnectTries.WithLabelValues(rs.address).Add(1) + metrics.TotalSentryConnectTries.WithLabelValues(rs.address).Inc() retries++ rs.Logger.Error( "Error establishing connection, will retry", @@ -231,7 +236,7 @@ func (rs *ReconnRemoteSigner) handleSignProposalRequest( } func (rs *ReconnRemoteSigner) handlePubKeyRequest(chainID string) cometprotoprivval.Message { - totalPubKeyRequests.WithLabelValues(chainID).Inc() + metrics.TotalPubKeyRequests.WithLabelValues(chainID).Inc() msgSum := &cometprotoprivval.Message_PubKeyResponse{PubKeyResponse: &cometprotoprivval.PubKeyResponse{ PubKey: cometprotocrypto.PublicKey{}, Error: nil, @@ -288,7 +293,7 @@ func StartRemoteSigners( nodes []string, ) ([]cometservice.Service, error) { var err error - go StartMetrics() + go metrics.StartMetrics() for _, node := range nodes { // CometBFT requires a connection within 3 seconds of start or crashes // A long timeout such as 30 seconds would cause the sentry to fail in loops diff --git a/signer/remote_signer_grpc_server.go b/pkg/connector/remote_signer_grpc_server.go similarity index 62% rename from signer/remote_signer_grpc_server.go rename to pkg/connector/remote_signer_grpc_server.go index 04aa4ef8..0f053e22 100644 --- a/signer/remote_signer_grpc_server.go +++ b/pkg/connector/remote_signer_grpc_server.go @@ -1,10 +1,12 @@ -package signer +package connector import ( "context" "net" "time" + "github.com/strangelove-ventures/horcrux/pkg/metrics" + "github.com/strangelove-ventures/horcrux/pkg/types" cometlog "github.com/cometbft/cometbft/libs/log" @@ -62,7 +64,7 @@ func (s *RemoteSignerGRPCServer) OnStop() { func (s *RemoteSignerGRPCServer) PubKey(ctx context.Context, req *proto.PubKeyRequest) (*proto.PubKeyResponse, error) { chainID := req.ChainId - totalPubKeyRequests.WithLabelValues(chainID).Inc() + metrics.TotalPubKeyRequests.WithLabelValues(chainID).Inc() pubKey, err := s.validator.GetPubKey(ctx, chainID) if err != nil { @@ -83,7 +85,7 @@ func (s *RemoteSignerGRPCServer) Sign( ctx context.Context, req *proto.SignBlockRequest, ) (*proto.SignBlockResponse, error) { - chainID, block := req.ChainID, BlockFromProto(req.Block) + chainID, block := req.ChainID, types.BlockFromProto(req.Block) signature, timestamp, err := signAndTrack(ctx, s.logger, s.validator, chainID, block) if err != nil { @@ -106,16 +108,16 @@ func signAndTrack( signature, timestamp, err := validator.Sign(ctx, chainID, block) if err != nil { switch typedErr := err.(type) { - case *BeyondBlockError: + case *metrics.BeyondBlockError: logger.Debug( "Rejecting sign request", "type", types.SignType(block.Step), "chain_id", chainID, "height", block.Height, "round", block.Round, - "reason", typedErr.msg, + "reason", typedErr.Msg, ) - beyondBlockErrors.WithLabelValues(chainID).Inc() + metrics.BeyondBlockErrors.WithLabelValues(chainID).Inc() default: logger.Error( "Failed to sign", @@ -125,7 +127,7 @@ func signAndTrack( "round", block.Round, "error", err, ) - failedSignVote.WithLabelValues(chainID).Inc() + metrics.FailedSignVote.WithLabelValues(chainID).Inc() } return nil, block.Timestamp, err } @@ -147,41 +149,41 @@ func signAndTrack( switch block.Step { case types.StepPropose: - lastProposalHeight.WithLabelValues(chainID).Set(float64(block.Height)) - lastProposalRound.WithLabelValues(chainID).Set(float64(block.Round)) - totalProposalsSigned.WithLabelValues(chainID).Inc() + metrics.LastProposalHeight.WithLabelValues(chainID).Set(float64(block.Height)) + metrics.LastProposalRound.WithLabelValues(chainID).Set(float64(block.Round)) + metrics.TotalProposalsSigned.WithLabelValues(chainID).Inc() case types.StepPrevote: // Determine number of heights since the last Prevote - stepSize := block.Height - previousPrevoteHeight - if previousPrevoteHeight != 0 && stepSize > 1 { - missedPrevotes.WithLabelValues(chainID).Add(float64(stepSize)) - totalMissedPrevotes.WithLabelValues(chainID).Add(float64(stepSize)) + stepSize := block.Height - metrics.PreviousPrevoteHeight + if metrics.PreviousPrevoteHeight != 0 && stepSize > 1 { + metrics.MissedPrevotes.WithLabelValues(chainID).Add(float64(stepSize)) + metrics.TotalMissedPrevotes.WithLabelValues(chainID).Add(float64(stepSize)) } else { - missedPrevotes.WithLabelValues(chainID).Set(0) + metrics.MissedPrevotes.WithLabelValues(chainID).Set(0) } - previousPrevoteHeight = block.Height // remember last PrevoteHeight + metrics.PreviousPrevoteHeight = block.Height // remember last PrevoteHeight - metricsTimeKeeper.SetPreviousPrevote(time.Now()) + metrics.MetricsTimeKeeper.SetPreviousPrevote(time.Now()) - lastPrevoteHeight.WithLabelValues(chainID).Set(float64(block.Height)) - lastPrevoteRound.WithLabelValues(chainID).Set(float64(block.Round)) - totalPrevotesSigned.WithLabelValues(chainID).Inc() + metrics.LastPrevoteHeight.WithLabelValues(chainID).Set(float64(block.Height)) + metrics.LastPrevoteRound.WithLabelValues(chainID).Set(float64(block.Round)) + metrics.TotalPrevotesSigned.WithLabelValues(chainID).Inc() case types.StepPrecommit: - stepSize := block.Height - previousPrecommitHeight - if previousPrecommitHeight != 0 && stepSize > 1 { - missedPrecommits.WithLabelValues(chainID).Add(float64(stepSize)) - totalMissedPrecommits.WithLabelValues(chainID).Add(float64(stepSize)) + stepSize := block.Height - metrics.PreviousPrecommitHeight + if metrics.PreviousPrecommitHeight != 0 && stepSize > 1 { + metrics.MissedPrecommits.WithLabelValues(chainID).Add(float64(stepSize)) + metrics.TotalMissedPrecommits.WithLabelValues(chainID).Add(float64(stepSize)) } else { - missedPrecommits.WithLabelValues(chainID).Set(0) + metrics.MissedPrecommits.WithLabelValues(chainID).Set(0) } - previousPrecommitHeight = block.Height // remember last PrecommitHeight + metrics.PreviousPrecommitHeight = block.Height // remember last PrecommitHeight - metricsTimeKeeper.SetPreviousPrecommit(time.Now()) + metrics.MetricsTimeKeeper.SetPreviousPrecommit(time.Now()) - lastPrecommitHeight.WithLabelValues(chainID).Set(float64(block.Height)) - lastPrecommitRound.WithLabelValues(chainID).Set(float64(block.Round)) - totalPrecommitsSigned.WithLabelValues(chainID).Inc() + metrics.LastPrecommitHeight.WithLabelValues(chainID).Set(float64(block.Height)) + metrics.LastPrecommitRound.WithLabelValues(chainID).Set(float64(block.Round)) + metrics.TotalPrecommitsSigned.WithLabelValues(chainID).Inc() } return signature, timestamp, nil diff --git a/signer/metrics.go b/pkg/metrics/metrics.go similarity index 74% rename from signer/metrics.go rename to pkg/metrics/metrics.go index 2145ba77..27f4ff39 100644 --- a/signer/metrics.go +++ b/pkg/metrics/metrics.go @@ -1,4 +1,4 @@ -package signer +package metrics import ( "sync" @@ -8,6 +8,19 @@ import ( "github.com/prometheus/client_golang/prometheus/promauto" ) +// Placeholders for ERRORS +type BeyondBlockError struct { + Msg string +} + +func (e *BeyondBlockError) Error() string { return e.Msg } + +type SameBlockError struct { + Msg string +} + +func (e *SameBlockError) Error() string { return e.Msg } + type metricsTimer struct { mu sync.Mutex previousPrecommit, previousPrevote time.Time @@ -60,28 +73,28 @@ func (mt *metricsTimer) UpdatePrometheusMetrics() { defer mt.mu.Unlock() // Update Prometheus Gauges - secondsSinceLastPrecommit.Set(time.Since(mt.previousPrecommit).Seconds()) - secondsSinceLastPrevote.Set(time.Since(mt.previousPrevote).Seconds()) - secondsSinceLastLocalSignStart.Set(time.Since(mt.previousLocalSignStart).Seconds()) - secondsSinceLastLocalSignFinish.Set(time.Since(mt.previousLocalSignFinish).Seconds()) - secondsSinceLastLocalNonceTime.Set(time.Since(mt.previousLocalNonce).Seconds()) + SecondsSinceLastPrecommit.Set(time.Since(mt.previousPrecommit).Seconds()) + SecondsSinceLastPrevote.Set(time.Since(mt.previousPrevote).Seconds()) + SecondsSinceLastLocalSignStart.Set(time.Since(mt.previousLocalSignStart).Seconds()) + SecondsSinceLastLocalSignFinish.Set(time.Since(mt.previousLocalSignFinish).Seconds()) + SecondsSinceLastLocalNonceTime.Set(time.Since(mt.previousLocalNonce).Seconds()) } var ( // Variables to calculate Prometheus Metrics - previousPrecommitHeight = int64(0) - previousPrevoteHeight = int64(0) - metricsTimeKeeper = newMetricsTimer() + PreviousPrecommitHeight = int64(0) + PreviousPrevoteHeight = int64(0) + MetricsTimeKeeper = newMetricsTimer() // Prometheus Metrics - totalPubKeyRequests = promauto.NewCounterVec( + TotalPubKeyRequests = promauto.NewCounterVec( prometheus.CounterOpts{ Name: "signer_total_pubkey_requests", Help: "Total times public key requested (High count may indicate validator restarts)", }, []string{"chain_id"}, ) - lastPrecommitHeight = promauto.NewGaugeVec( + LastPrecommitHeight = promauto.NewGaugeVec( prometheus.GaugeOpts{ Name: "signer_last_precommit_height", Help: "Last Height Precommit Signed", @@ -89,7 +102,7 @@ var ( []string{"chain_id"}, ) - lastPrevoteHeight = promauto.NewGaugeVec( + LastPrevoteHeight = promauto.NewGaugeVec( prometheus.GaugeOpts{ Name: "signer_last_prevote_height", Help: "Last Height Prevote Signed", @@ -97,28 +110,28 @@ var ( []string{"chain_id"}, ) - lastProposalHeight = promauto.NewGaugeVec( + LastProposalHeight = promauto.NewGaugeVec( prometheus.GaugeOpts{ Name: "signer_last_proposal_height", Help: "Last Height Proposal Signed", }, []string{"chain_id"}, ) - lastPrecommitRound = promauto.NewGaugeVec( + LastPrecommitRound = promauto.NewGaugeVec( prometheus.GaugeOpts{ Name: "signer_last_precommit_round", Help: "Last Round Precommit Signed", }, []string{"chain_id"}, ) - lastPrevoteRound = promauto.NewGaugeVec( + LastPrevoteRound = promauto.NewGaugeVec( prometheus.GaugeOpts{ Name: "signer_last_prevote_round", Help: "Last Round Prevote Signed", }, []string{"chain_id"}, ) - lastProposalRound = promauto.NewGaugeVec( + LastProposalRound = promauto.NewGaugeVec( prometheus.GaugeOpts{ Name: "signer_last_proposal_round", Help: "Last Round Proposal Signed", @@ -126,21 +139,21 @@ var ( []string{"chain_id"}, ) - totalPrecommitsSigned = promauto.NewCounterVec( + TotalPrecommitsSigned = promauto.NewCounterVec( prometheus.CounterOpts{ Name: "signer_total_precommits_signed", Help: "Total Precommit Signed", }, []string{"chain_id"}, ) - totalPrevotesSigned = promauto.NewCounterVec( + TotalPrevotesSigned = promauto.NewCounterVec( prometheus.CounterOpts{ Name: "signer_total_prevotes_signed", Help: "Total Prevote Signed", }, []string{"chain_id"}, ) - totalProposalsSigned = promauto.NewCounterVec( + TotalProposalsSigned = promauto.NewCounterVec( prometheus.CounterOpts{ Name: "signer_total_proposals_signed", Help: "Total Proposal Signed", @@ -148,51 +161,51 @@ var ( []string{"chain_id"}, ) - secondsSinceLastPrecommit = promauto.NewGauge(prometheus.GaugeOpts{ + SecondsSinceLastPrecommit = promauto.NewGauge(prometheus.GaugeOpts{ Name: "signer_seconds_since_last_precommit", Help: "Seconds Since Last Precommit (Useful for Signing Co-Signer Node, Single Signer)", }) - secondsSinceLastPrevote = promauto.NewGauge(prometheus.GaugeOpts{ + SecondsSinceLastPrevote = promauto.NewGauge(prometheus.GaugeOpts{ Name: "signer_seconds_since_last_prevote", Help: "Seconds Since Last Prevote (Useful for Signing Co-Signer Node, Single Signer)", }) - secondsSinceLastLocalSignStart = promauto.NewGauge(prometheus.GaugeOpts{ + SecondsSinceLastLocalSignStart = promauto.NewGauge(prometheus.GaugeOpts{ Name: "signer_seconds_since_last_local_sign_start_time", Help: "Seconds Since Last Local Start Sign (May increase beyond block time, Rarely important) ", }) - secondsSinceLastLocalSignFinish = promauto.NewGauge(prometheus.GaugeOpts{ + SecondsSinceLastLocalSignFinish = promauto.NewGauge(prometheus.GaugeOpts{ Name: "signer_seconds_since_last_local_sign_finish_time", Help: "Seconds Since Last Local Finish Sign (Should stay below 2 * Block Time)", }) - secondsSinceLastLocalNonceTime = promauto.NewGauge(prometheus.GaugeOpts{ + SecondsSinceLastLocalNonceTime = promauto.NewGauge(prometheus.GaugeOpts{ Name: "signer_seconds_since_last_local_ephemeral_share_time", Help: "Seconds Since Last Local Ephemeral Share Sign " + "(Should not increase beyond block time; If high, may indicate raft joining issue for CoSigner) ", }) - missedPrecommits = promauto.NewGaugeVec( + MissedPrecommits = promauto.NewGaugeVec( prometheus.GaugeOpts{ Name: "signer_missed_precommits", Help: "Consecutive Precommit Missed", }, []string{"chain_id"}, ) - missedPrevotes = promauto.NewGaugeVec( + MissedPrevotes = promauto.NewGaugeVec( prometheus.GaugeOpts{ Name: "signer_missed_prevotes", Help: "Consecutive Prevote Missed", }, []string{"chain_id"}, ) - totalMissedPrecommits = promauto.NewCounterVec( + TotalMissedPrecommits = promauto.NewCounterVec( prometheus.CounterOpts{ Name: "signer_total_missed_precommits", Help: "Total Precommit Missed", }, []string{"chain_id"}, ) - totalMissedPrevotes = promauto.NewCounterVec( + TotalMissedPrevotes = promauto.NewCounterVec( prometheus.CounterOpts{ Name: "signer_total_missed_prevotes", Help: "Total Prevote Missed", @@ -200,41 +213,41 @@ var ( []string{"chain_id"}, ) - missedNonces = promauto.NewGaugeVec( + MissedNonces = promauto.NewGaugeVec( prometheus.GaugeOpts{ Name: "signer_missed_ephemeral_shares", Help: "Consecutive Threshold Signature Parts Missed", }, []string{"peerid"}, ) - totalMissedNonces = promauto.NewCounterVec( + TotalMissedNonces = promauto.NewCounterVec( prometheus.CounterOpts{ Name: "signer_total_missed_ephemeral_shares", Help: "Total Threshold Signature Parts Missed", }, []string{"peerid"}, ) - drainedNonceCache = promauto.NewGauge( + DrainedNonceCache = promauto.NewGauge( prometheus.GaugeOpts{ Name: "signer_drained_nonce_cache", Help: "Consecutive Nonces Requested When Cache is Drained", }, ) - totalDrainedNonceCache = promauto.NewCounter( + TotalDrainedNonceCache = promauto.NewCounter( prometheus.CounterOpts{ Name: "signer_total_drained_nonce_cache", Help: "Total Nonces Requested When Cache is Drained", }, ) - sentryConnectTries = promauto.NewGaugeVec( + SentryConnectTries = promauto.NewGaugeVec( prometheus.GaugeOpts{ Name: "signer_sentry_connect_tries", Help: "Consecutive Number of times sentry TCP connect has been tried (High count may indicate validator restarts)", }, []string{"node"}, ) - totalSentryConnectTries = promauto.NewCounterVec( + TotalSentryConnectTries = promauto.NewCounterVec( prometheus.CounterOpts{ Name: "signer_total_sentry_connect_tries", Help: "Total Number of times sentry TCP connect has been tried (High count may indicate validator restarts)", @@ -242,14 +255,14 @@ var ( []string{"node"}, ) - beyondBlockErrors = promauto.NewCounterVec( + BeyondBlockErrors = promauto.NewCounterVec( prometheus.CounterOpts{ Name: "signer_total_beyond_block_errors", Help: "Total Times Signing Started but duplicate height/round request arrives", }, []string{"chain_id"}, ) - failedSignVote = promauto.NewCounterVec( + FailedSignVote = promauto.NewCounterVec( prometheus.CounterOpts{ Name: "signer_total_failed_sign_vote", Help: "Total Times Signer Failed to sign block - Unstarted and Unexepcted Height", @@ -257,47 +270,47 @@ var ( []string{"chain_id"}, ) - totalRaftLeader = promauto.NewCounter(prometheus.CounterOpts{ + TotalRaftLeader = promauto.NewCounter(prometheus.CounterOpts{ Name: "signer_total_raft_leader", Help: "Total Times Signer is Raft Leader", }) - totalNotRaftLeader = promauto.NewCounter(prometheus.CounterOpts{ + TotalNotRaftLeader = promauto.NewCounter(prometheus.CounterOpts{ Name: "signer_total_raft_not_leader", Help: "Total Times Signer is NOT Raft Leader (Proxy signing to Raft Leader)", }) - totalRaftLeaderElectionTimeout = promauto.NewCounter(prometheus.CounterOpts{ + TotalRaftLeaderElectionTimeout = promauto.NewCounter(prometheus.CounterOpts{ Name: "signer_total_raft_leader_election_timeout", Help: "Total Times Raft Leader Failed Election (Lacking Peers)", }) - totalInvalidSignature = promauto.NewCounter(prometheus.CounterOpts{ + TotalInvalidSignature = promauto.NewCounter(prometheus.CounterOpts{ Name: "signer_error_total_invalid_signatures", Help: "Total Times Combined Signature is Invalid", }) - totalInsufficientCosigners = promauto.NewCounter(prometheus.CounterOpts{ + TotalInsufficientCosigners = promauto.NewCounter(prometheus.CounterOpts{ Name: "signer_error_total_insufficient_cosigners", Help: "Total Times Cosigners doesn't reach threshold", }) - timedSignBlockThresholdLag = promauto.NewSummary(prometheus.SummaryOpts{ + TimedSignBlockThresholdLag = promauto.NewSummary(prometheus.SummaryOpts{ Name: "signer_sign_block_threshold_lag_seconds", Help: "Seconds taken to get threshold of cosigners available", Objectives: map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.99: 0.001}, }) - timedSignBlockCosignerLag = promauto.NewSummary(prometheus.SummaryOpts{ + TimedSignBlockCosignerLag = promauto.NewSummary(prometheus.SummaryOpts{ Name: "signer_sign_block_cosigner_lag_seconds", Help: "Seconds taken to get all cosigner signatures", Objectives: map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.99: 0.001}, }) - timedSignBlockLag = promauto.NewSummary(prometheus.SummaryOpts{ + TimedSignBlockLag = promauto.NewSummary(prometheus.SummaryOpts{ Name: "signer_sign_block_lag_seconds", Help: "Seconds taken to sign block", Objectives: map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.99: 0.001}, }) - timedCosignerNonceLag = promauto.NewSummaryVec( + TimedCosignerNonceLag = promauto.NewSummaryVec( prometheus.SummaryOpts{ Name: "signer_cosigner_ephemeral_share_lag_seconds", Help: "Time taken to get cosigner ephemeral share", @@ -305,7 +318,7 @@ var ( }, []string{"peerid"}, ) - timedCosignerSignLag = promauto.NewSummaryVec( + TimedCosignerSignLag = promauto.NewSummaryVec( prometheus.SummaryOpts{ Name: "signer_cosigner_sign_lag_seconds", Help: "Time taken to get cosigner signature", @@ -318,7 +331,7 @@ var ( func StartMetrics() { // Update elapsed times on an interval basis for { - metricsTimeKeeper.UpdatePrometheusMetrics() + MetricsTimeKeeper.UpdatePrometheusMetrics() // Prometheus often only polls every 1 to every few seconds // Frequent updates minimize reporting error. diff --git a/pkg/types/block.go b/pkg/types/block.go index 04169402..9bae9680 100644 --- a/pkg/types/block.go +++ b/pkg/types/block.go @@ -40,3 +40,13 @@ func (block Block) ToProto() *proto.Block { Timestamp: block.Timestamp.UnixNano(), } } + +func BlockFromProto(block *proto.Block) Block { + return Block{ + Height: block.Height, + Round: block.Round, + Step: int8(block.Step), + SignBytes: block.SignBytes, + Timestamp: time.Unix(0, block.Timestamp), + } +} diff --git a/signer/cosigner.go b/signer/cosigner.go index 1be4cc77..9ce3e15a 100644 --- a/signer/cosigner.go +++ b/signer/cosigner.go @@ -86,6 +86,7 @@ func (secretPart *CosignerNonce) toProto() *proto.Nonce { } } +// CosignerNonces are a list of CosignerNonce type CosignerNonces []CosignerNonce func (secretParts CosignerNonces) toProto() (out []*proto.Nonce) { diff --git a/signer/cosigner_grpc_server.go b/signer/cosigner_grpc_server.go index b3e72178..f0e8262f 100644 --- a/signer/cosigner_grpc_server.go +++ b/signer/cosigner_grpc_server.go @@ -37,7 +37,7 @@ func (rpc *CosignerGRPCServer) SignBlock( ctx context.Context, req *proto.SignBlockRequest, ) (*proto.SignBlockResponse, error) { - res, _, err := rpc.thresholdValidator.Sign(ctx, req.ChainID, BlockFromProto(req.Block)) + res, _, err := rpc.thresholdValidator.Sign(ctx, req.ChainID, types.BlockFromProto(req.Block)) if err != nil { return nil, err } diff --git a/signer/cosigner_key.go b/signer/cosigner_key.go index 32639a81..63c6ced7 100644 --- a/signer/cosigner_key.go +++ b/signer/cosigner_key.go @@ -2,6 +2,7 @@ package signer import ( "encoding/json" + cometcrypto "github.com/cometbft/cometbft/crypto" cometcryptoed25519 "github.com/cometbft/cometbft/crypto/ed25519" cometcryptoencoding "github.com/cometbft/cometbft/crypto/encoding" @@ -10,7 +11,6 @@ import ( ) // CosignerEd25519Key is a single Ed255219 key shard for an m-of-n threshold signer. -// TODO: CosignerEd25519Key is almost a duplicate of ThresholdSignerEd25519Key type CosignerEd25519Key struct { PubKey cometcrypto.PubKey `json:"pubKey"` PrivateShard []byte `json:"privateShard"` diff --git a/signer/cosigner_nonce_cache.go b/signer/cosigner_nonce_cache.go index 78edcf42..d364a2a6 100644 --- a/signer/cosigner_nonce_cache.go +++ b/signer/cosigner_nonce_cache.go @@ -6,6 +6,8 @@ import ( "sync" "time" + "github.com/strangelove-ventures/horcrux/pkg/metrics" + cometlog "github.com/cometbft/cometbft/libs/log" "github.com/google/uuid" ) @@ -282,15 +284,15 @@ func (cnc *CosignerNonceCache) LoadN(ctx context.Context, n int) { n, err := p.GetNonces(ctx, uuids) if err != nil { // Significant missing shares may lead to signature failure - missedNonces.WithLabelValues(p.GetAddress()).Add(float64(1)) - totalMissedNonces.WithLabelValues(p.GetAddress()).Inc() + metrics.MissedNonces.WithLabelValues(p.GetAddress()).Add(float64(1)) + metrics.TotalMissedNonces.WithLabelValues(p.GetAddress()).Inc() cnc.logger.Error("Failed to get nonces from peer", "peer", p.GetIndex(), "error", err) return } - missedNonces.WithLabelValues(p.GetAddress()).Set(0) - timedCosignerNonceLag.WithLabelValues(p.GetAddress()).Observe(time.Since(peerStartTime).Seconds()) + metrics.MissedNonces.WithLabelValues(p.GetAddress()).Set(0) + metrics.TimedCosignerNonceLag.WithLabelValues(p.GetAddress()).Observe(time.Since(peerStartTime).Seconds()) nonces[i] = &CachedNonceSingle{ Cosigner: p, diff --git a/signer/local_cosigner.go b/signer/local_cosigner.go index 1236dd8d..9768692c 100644 --- a/signer/local_cosigner.go +++ b/signer/local_cosigner.go @@ -7,6 +7,8 @@ import ( "sync" "time" + "github.com/strangelove-ventures/horcrux/pkg/metrics" + "github.com/strangelove-ventures/horcrux/pkg/types" cometcrypto "github.com/cometbft/cometbft/crypto" @@ -173,7 +175,7 @@ func (cosigner *LocalCosigner) GetPubKey(chainID string) (cometcrypto.PubKey, er return nil, err } - return cometcryptoed25519.PubKey(ccs.signer.PubKey()), nil + return cometcryptoed25519.PubKey(ccs.signer.GetPubKey()), nil } // CombineSignatures combines partial signatures into a full signature. @@ -201,7 +203,7 @@ func (cosigner *LocalCosigner) VerifySignature(chainID string, payload, signatur sig := make([]byte, len(signature)) copy(sig, signature) - return cometcryptoed25519.PubKey(ccs.signer.PubKey()).VerifySignature(payload, sig) + return cometcryptoed25519.PubKey(ccs.signer.GetPubKey()).VerifySignature(payload, sig) } // Sign the sign request using the cosigner's shard @@ -218,7 +220,7 @@ func (cosigner *LocalCosigner) sign(req CosignerSignRequest) (CosignerSignRespon } // This function has multiple exit points. Only start time can be guaranteed - metricsTimeKeeper.SetPreviousLocalSignStart(time.Now()) + metrics.MetricsTimeKeeper.SetPreviousLocalSignStart(time.Now()) hrst, err := types.UnpackHRST(req.SignBytes) if err != nil { @@ -270,7 +272,7 @@ func (cosigner *LocalCosigner) sign(req CosignerSignRequest) (CosignerSignRespon res.Signature = sig // Note - Function may return before this line so elapsed time for Finish may be multiple block times - metricsTimeKeeper.SetPreviousLocalSignFinish(time.Now()) + metrics.MetricsTimeKeeper.SetPreviousLocalSignFinish(time.Now()) return res, nil } @@ -326,7 +328,7 @@ func (cosigner *LocalCosigner) GetNonces( _ context.Context, uuids []uuid.UUID, ) (CosignerUUIDNoncesMultiple, error) { - metricsTimeKeeper.SetPreviousLocalNonce(time.Now()) + metrics.MetricsTimeKeeper.SetPreviousLocalNonce(time.Now()) total := len(cosigner.config.Config.ThresholdModeConfig.Cosigners) diff --git a/signer/single_signer_validator.go b/signer/single_signer_validator.go index 52e60b00..cb51ad6e 100644 --- a/signer/single_signer_validator.go +++ b/signer/single_signer_validator.go @@ -3,6 +3,7 @@ package signer import ( "context" "fmt" + "github.com/strangelove-ventures/horcrux/pkg/connector" "os" "sync" "time" @@ -10,7 +11,7 @@ import ( "github.com/strangelove-ventures/horcrux/pkg/types" ) -var _ PrivValidator = &SingleSignerValidator{} +var _ connector.PrivValidator = &SingleSignerValidator{} // SingleSignerValidator guards access to an underlying PrivValidator by using mutexes // for each of the PrivValidator interface functions diff --git a/signer/threshold_signer.go b/signer/threshold_signer.go index 8e911c17..676e15c4 100644 --- a/signer/threshold_signer.go +++ b/signer/threshold_signer.go @@ -2,16 +2,17 @@ package signer import ( "encoding/json" + "os" + "github.com/cometbft/cometbft/privval" "github.com/strangelove-ventures/horcrux/pkg/types" tsed25519 "gitlab.com/unit410/threshold-ed25519/pkg" - "os" ) // Interface for the local signer whether it's a soft sign or HSM type ThresholdSigner interface { - // PubKey returns the public key bytes for the combination of all cosigners. - PubKey() []byte + // GetPubKey returns the public key bytes for the combination of all cosigners. + GetPubKey() []byte // Sign signs a byte payload with the provided nonces. Sign(nonces []types.Nonce, payload []byte) ([]byte, error) diff --git a/signer/threshold_signer_bls.go b/signer/threshold_signer_bls.go index 100b06de..bc8c9e15 100644 --- a/signer/threshold_signer_bls.go +++ b/signer/threshold_signer_bls.go @@ -45,7 +45,7 @@ func NewThresholdSignerSoftBLS(config *RuntimeConfig, id int, chainID string) (* return &s, nil } -func (s *ThresholdSignerSoftBLS) PubKey() []byte { +func (s *ThresholdSignerSoftBLS) GetPubKey() []byte { return s.pubKey } diff --git a/signer/threshold_signer_soft.go b/signer/threshold_signer_soft.go index 2991f440..38c7a471 100644 --- a/signer/threshold_signer_soft.go +++ b/signer/threshold_signer_soft.go @@ -46,7 +46,7 @@ func NewThresholdSignerSoft(config *RuntimeConfig, id int, chainID string) (*Thr return &s, nil } -func (s *ThresholdSignerSoft) PubKey() []byte { +func (s *ThresholdSignerSoft) GetPubKey() []byte { return s.pubKey } diff --git a/signer/threshold_validator.go b/signer/threshold_validator.go index 393f41ce..0343286b 100644 --- a/signer/threshold_validator.go +++ b/signer/threshold_validator.go @@ -9,18 +9,20 @@ import ( "sync" "time" + "github.com/strangelove-ventures/horcrux/pkg/connector" + "github.com/strangelove-ventures/horcrux/pkg/metrics" + "github.com/strangelove-ventures/horcrux/pkg/types" "github.com/cometbft/cometbft/libs/log" cometrpcjsontypes "github.com/cometbft/cometbft/rpc/jsonrpc/types" "github.com/google/uuid" - "github.com/strangelove-ventures/horcrux/signer/proto" "golang.org/x/sync/errgroup" "google.golang.org/grpc/codes" "google.golang.org/grpc/status" ) -var _ PrivValidator = &ThresholdValidator{} +var _ connector.PrivValidator = &ThresholdValidator{} type ThresholdValidator struct { config *RuntimeConfig @@ -282,28 +284,12 @@ func (pv *ThresholdValidator) GetPubKey(_ context.Context, chainID string) ([]by return pubKey.Bytes(), nil } -func BlockFromProto(block *proto.Block) types.Block { - return types.Block{ - Height: block.Height, - Round: block.Round, - Step: int8(block.Step), - SignBytes: block.SignBytes, - Timestamp: time.Unix(0, block.Timestamp), - } -} - -type BeyondBlockError struct { - msg string -} - -func (e *BeyondBlockError) Error() string { return e.msg } - -func (pv *ThresholdValidator) newBeyondBlockError(chainID string, hrs types.HRSKey) *BeyondBlockError { +func (pv *ThresholdValidator) newBeyondBlockError(chainID string, hrs types.HRSKey) *metrics.BeyondBlockError { css := pv.mustLoadChainState(chainID) lss := css.lastSignStateInitiated - return &BeyondBlockError{ - msg: fmt.Sprintf("[%s] Progress already started on block %d.%d.%d, skipping %d.%d.%d", + return &metrics.BeyondBlockError{ + Msg: fmt.Sprintf("[%s] Progress already started on block %d.%d.%d, skipping %d.%d.%d", chainID, lss.Height, lss.Round, lss.Step, hrs.Height, hrs.Round, hrs.Step, @@ -324,15 +310,9 @@ func newStillWaitingForBlockError(chainID string, hrs types.HRSKey) *StillWaitin } } -type SameBlockError struct { - msg string -} - -func (e *SameBlockError) Error() string { return e.msg } - -func newSameBlockError(chainID string, hrs types.HRSKey) *SameBlockError { - return &SameBlockError{ - msg: fmt.Sprintf("[%s] Same block: %d.%d.%d", +func newSameBlockError(chainID string, hrs types.HRSKey) *metrics.SameBlockError { + return &metrics.SameBlockError{ + Msg: fmt.Sprintf("[%s] Same block: %d.%d.%d", chainID, hrs.Height, hrs.Round, hrs.Step), } } @@ -395,7 +375,7 @@ func (pv *ThresholdValidator) compareBlockSignatureAgainstSSC( stamp, signBytes := block.Timestamp, block.SignBytes if err := pv.compareBlockSignatureAgainstHRS(chainID, block, existingSignature.HRSKey()); err != nil { - if _, ok := err.(*SameBlockError); !ok { + if _, ok := err.(*metrics.SameBlockError); !ok { return nil, stamp, err } } @@ -440,8 +420,8 @@ func (pv *ThresholdValidator) getNoncesFallback( ) (*CosignerUUIDNonces, []Cosigner, error) { nonces := make(map[Cosigner]CosignerNonces) - drainedNonceCache.Inc() - totalDrainedNonceCache.Inc() + metrics.DrainedNonceCache.Inc() + metrics.TotalDrainedNonceCache.Inc() var wg sync.WaitGroup wg.Add(pv.threshold) @@ -505,15 +485,15 @@ func (pv *ThresholdValidator) waitForPeerNonces( peerStartTime := time.Now() peerNonces, err := peer.GetNonces(ctx, []uuid.UUID{u}) if err != nil { - missedNonces.WithLabelValues(peer.GetAddress()).Inc() - totalMissedNonces.WithLabelValues(peer.GetAddress()).Inc() + metrics.MissedNonces.WithLabelValues(peer.GetAddress()).Inc() + metrics.TotalMissedNonces.WithLabelValues(peer.GetAddress()).Inc() pv.logger.Error("Error getting nonces", "cosigner", peer.GetIndex(), "err", err) return } - missedNonces.WithLabelValues(peer.GetAddress()).Set(0) - timedCosignerNonceLag.WithLabelValues(peer.GetAddress()).Observe(time.Since(peerStartTime).Seconds()) + metrics.MissedNonces.WithLabelValues(peer.GetAddress()).Set(0) + metrics.TimedCosignerNonceLag.WithLabelValues(peer.GetAddress()).Observe(time.Since(peerStartTime).Seconds()) // Check so that wg.Done is not called more than (threshold - 1) times which causes hardlock mu.Lock() @@ -544,7 +524,7 @@ func (pv *ThresholdValidator) proxyIfNecessary( } if leader == -1 { - totalRaftLeaderElectionTimeout.Inc() + metrics.TotalRaftLeaderElectionTimeout.Inc() return true, nil, stamp, fmt.Errorf("timed out waiting for raft leader") } @@ -558,7 +538,7 @@ func (pv *ThresholdValidator) proxyIfNecessary( "round", round, "step", step, ) - totalNotRaftLeader.Inc() + metrics.TotalNotRaftLeader.Inc() cosignerLeader := pv.peerCosigners.GetByID(leader) if cosignerLeader == nil { @@ -574,7 +554,7 @@ func (pv *ThresholdValidator) proxyIfNecessary( rpcErrUnwrapped := err.(*cometrpcjsontypes.RPCError).Data // Need to return BeyondBlockError after proxy since the error type will be lost over RPC if len(rpcErrUnwrapped) > 33 && rpcErrUnwrapped[:33] == "Progress already started on block" { - return true, nil, stamp, &BeyondBlockError{msg: rpcErrUnwrapped} + return true, nil, stamp, &metrics.BeyondBlockError{Msg: rpcErrUnwrapped} } } return true, nil, stamp, err @@ -603,7 +583,7 @@ func (pv *ThresholdValidator) Sign(ctx context.Context, chainID string, block ty return proxySig, proxyStamp, err } - totalRaftLeader.Inc() + metrics.TotalRaftLeader.Inc() log.Debug("I am the leader. Managing the sign process for this block") @@ -649,7 +629,7 @@ func (pv *ThresholdValidator) Sign(ctx context.Context, chainID string, block ty } dontIterateFastestCosigners = true } else { - drainedNonceCache.Set(0) + metrics.DrainedNonceCache.Set(0) } nextFastestCosignerIndex := pv.threshold - 1 @@ -665,11 +645,11 @@ func (pv *ThresholdValidator) Sign(ctx context.Context, chainID string, block ty return cosigner } - timedSignBlockThresholdLag.Observe(time.Since(timeStartSignBlock).Seconds()) + metrics.TimedSignBlockThresholdLag.Observe(time.Since(timeStartSignBlock).Seconds()) for _, peer := range pv.peerCosigners { - missedNonces.WithLabelValues(peer.GetAddress()).Set(0) - timedCosignerNonceLag.WithLabelValues(peer.GetAddress()).Observe(time.Since(peerStartTime).Seconds()) + metrics.MissedNonces.WithLabelValues(peer.GetAddress()).Set(0) + metrics.TimedCosignerNonceLag.WithLabelValues(peer.GetAddress()).Observe(time.Since(peerStartTime).Seconds()) } cosignersForThisBlockInt := make([]int, len(cosignersForThisBlock)) @@ -726,7 +706,7 @@ func (pv *ThresholdValidator) Sign(ctx context.Context, chainID string, block ty } if cosigner != pv.myCosigner { - timedCosignerSignLag.WithLabelValues(cosigner.GetAddress()).Observe(time.Since(peerStartTime).Seconds()) + metrics.TimedCosignerSignLag.WithLabelValues(cosigner.GetAddress()).Observe(time.Since(peerStartTime).Seconds()) } shareSignatures[cosigner.GetIndex()-1] = sigRes.Signature @@ -741,7 +721,7 @@ func (pv *ThresholdValidator) Sign(ctx context.Context, chainID string, block ty return nil, stamp, fmt.Errorf("error from cosigner(s): %s", err) } - timedSignBlockCosignerLag.Observe(time.Since(timeStartSignBlock).Seconds()) + metrics.TimedSignBlockCosignerLag.Observe(time.Since(timeStartSignBlock).Seconds()) // collect all valid responses into array of partial signatures shareSigs := make([]types.PartialSignature, 0, pv.threshold) @@ -762,7 +742,7 @@ func (pv *ThresholdValidator) Sign(ctx context.Context, chainID string, block ty } if len(shareSigs) < pv.threshold { - totalInsufficientCosigners.Inc() + metrics.TotalInsufficientCosigners.Inc() pv.notifyBlockSignError(chainID, block.HRSKey(), signBytes) return nil, stamp, errors.New("not enough co-signers") } @@ -776,7 +756,7 @@ func (pv *ThresholdValidator) Sign(ctx context.Context, chainID string, block ty // verify the combined signature before saving to watermark if !pv.myCosigner.VerifySignature(chainID, signBytes, signature) { - totalInvalidSignature.Inc() + metrics.TotalInvalidSignature.Inc() pv.notifyBlockSignError(chainID, block.HRSKey(), signBytes) return nil, stamp, errors.New("combined signature is not valid") @@ -817,7 +797,7 @@ func (pv *ThresholdValidator) Sign(ctx context.Context, chainID string, block ty timeSignBlock := time.Since(timeStartSignBlock) timeSignBlockSec := timeSignBlock.Seconds() - timedSignBlockLag.Observe(timeSignBlockSec) + metrics.TimedSignBlockLag.Observe(timeSignBlockSec) log.Info( "Signed",