Skip to content

Commit

Permalink
Host: only evict HA sequencer enclaves (#2222)
Browse files Browse the repository at this point in the history
  • Loading branch information
BedrockSquirrel authored Dec 20, 2024
1 parent 514417e commit 04a765a
Show file tree
Hide file tree
Showing 4 changed files with 20 additions and 17 deletions.
2 changes: 1 addition & 1 deletion go/common/host/services.go
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,7 @@ type EnclaveService interface {
// EvictEnclave will remove the enclave from the list of enclaves, it is used when an enclave is unhealthy
// - the enclave guardians are responsible for calling this method when they detect an enclave is unhealthy to notify
// the service that it should failover if possible
EvictEnclave(enclaveID *common.EnclaveID)
NotifyUnavailable(enclaveID *common.EnclaveID)

// SubmitAndBroadcastTx submits an encrypted transaction to the enclave, and broadcasts it to other hosts on the network (in particular, to the sequencer)
SubmitAndBroadcastTx(ctx context.Context, encryptedParams common.EncryptedRequest) (*responses.RawTx, error)
Expand Down
15 changes: 5 additions & 10 deletions go/host/enclave/guardian.go
Original file line number Diff line number Diff line change
Expand Up @@ -275,9 +275,9 @@ func (g *Guardian) mainLoop() {
g.logger.Trace("mainLoop - enclave status", "status", g.state.GetStatus())
switch g.state.GetStatus() {
case Disconnected, Unavailable:
if unavailableCounter > 3 {
// todo make this eviction trigger configurable once we've settled on how it should work
if unavailableCounter > 10 {
// enclave has been unavailable for a while, evict it from the HA pool
// todo - @matt - we need to consider more carefully when to evict an enclave
g.evictEnclaveFromHAPool()
}
// nothing to do, we are waiting for the enclave to be available
Expand Down Expand Up @@ -609,7 +609,6 @@ func (g *Guardian) periodicBatchProduction() {
skipBatchIfEmpty := g.maxBatchInterval > g.batchInterval && time.Since(g.lastBatchCreated) < g.maxBatchInterval
err := g.enclaveClient.CreateBatch(context.Background(), skipBatchIfEmpty)
if err != nil {
// todo: is this too low a bar for failover? Retry first?
g.logger.Error("Unable to produce batch", log.ErrKey, err)
g.evictEnclaveFromHAPool()
}
Expand Down Expand Up @@ -813,16 +812,12 @@ func (g *Guardian) startSequencerProcesses() {
go g.periodicBundleSubmission()
}

// evictEnclaveFromHAPool evicts a failing enclave from the HA pool and shuts down the guardian.
// evictEnclaveFromHAPool evicts a failing enclave from the HA pool if appropriate
// This is called when the enclave is unrecoverable and we want to notify the host that it should failover if an
// alternative enclave is available.
func (g *Guardian) evictEnclaveFromHAPool() {
g.logger.Error("Enclave is unrecoverable - requesting to evict it from HA pool")
err := g.Stop()
if err != nil {
g.logger.Error("Error while stopping guardian of failed enclave", log.ErrKey, err)
}
go g.sl.Enclaves().EvictEnclave(g.enclaveID)
g.logger.Warn("Enclave is unavailable - notifying enclave service to evict it from HA pool if necessary")
go g.sl.Enclaves().NotifyUnavailable(g.enclaveID)
}

func (g *Guardian) getRollupsAndContractAddrTxs(processed common.ProcessedL1Data) ([]*common.L1RollupTx, bool) {
Expand Down
10 changes: 9 additions & 1 deletion go/host/enclave/service.go
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,15 @@ func (e *Service) GetEnclaveClients() []common.Enclave {
return clients
}

func (e *Service) EvictEnclave(enclaveID *common.EnclaveID) {
func (e *Service) NotifyUnavailable(enclaveID *common.EnclaveID) {
if len(e.enclaveGuardians) <= 1 {
e.logger.Info("not running in HA mode, no need to evict enclave", log.EnclaveIDKey, enclaveID)
return
}
if *e.activeSequencerID != *enclaveID {
e.logger.Info("Enclave is not the active sequencer, no need to evict yet.", log.EnclaveIDKey, enclaveID)
return
}
failedEnclaveIdx := -1
e.haLock.Lock()
defer e.haLock.Unlock()
Expand Down
10 changes: 5 additions & 5 deletions tools/walletextension/common/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,9 @@ type Config struct {
RateLimitWindow time.Duration
RateLimitMaxConcurrentRequests int

InsideEnclave bool // Indicates if the program is running inside an enclave
KeyExchangeURL string
EnableTLS bool
TLSDomain string
EncryptingCertificateEnabled bool
InsideEnclave bool // Indicates if the program is running inside an enclave
KeyExchangeURL string
EnableTLS bool
TLSDomain string
EncryptingCertificateEnabled bool
}

0 comments on commit 04a765a

Please sign in to comment.