Skip to content

Commit ce6b156

Browse files
HA: Support multiple enclaves on a single host (#1852)
1 parent fe8b248 commit ce6b156

File tree

15 files changed

+277
-137
lines changed

15 files changed

+277
-137
lines changed

go/common/host/host_healthcheck.go

+19
Original file line numberDiff line numberDiff line change
@@ -24,3 +24,22 @@ func (l *BasicErrHealthStatus) OK() bool {
2424
func (l *BasicErrHealthStatus) Message() string {
2525
return l.ErrMsg
2626
}
27+
28+
type GroupErrsHealthStatus struct {
29+
Errors []error
30+
}
31+
32+
func (g *GroupErrsHealthStatus) OK() bool {
33+
return len(g.Errors) == 0
34+
}
35+
36+
func (g *GroupErrsHealthStatus) Message() string {
37+
msg := ""
38+
for i, err := range g.Errors {
39+
if i > 0 {
40+
msg += ", "
41+
}
42+
msg += err.Error()
43+
}
44+
return msg
45+
}

go/config/host_config.go

+5-5
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ type HostInputConfig struct {
3232
// Host on which to handle client RPC requests
3333
ClientRPCHost string
3434
// Address on which to connect to the enclave
35-
EnclaveRPCAddress string
35+
EnclaveRPCAddresses []string
3636
// P2PBindAddress is the address where the P2P server is bound to
3737
P2PBindAddress string
3838
// P2PPublicAddress is the advertised P2P server address
@@ -111,7 +111,7 @@ func (p HostInputConfig) ToHostConfig() *HostConfig {
111111
HasClientRPCWebsockets: p.HasClientRPCWebsockets,
112112
ClientRPCPortWS: p.ClientRPCPortWS,
113113
ClientRPCHost: p.ClientRPCHost,
114-
EnclaveRPCAddress: p.EnclaveRPCAddress,
114+
EnclaveRPCAddresses: p.EnclaveRPCAddresses,
115115
P2PBindAddress: p.P2PBindAddress,
116116
P2PPublicAddress: p.P2PPublicAddress,
117117
L1WebsocketURL: p.L1WebsocketURL,
@@ -208,8 +208,8 @@ type HostConfig struct {
208208
ClientRPCPortWS uint64
209209
// Host on which to handle client RPC requests
210210
ClientRPCHost string
211-
// Address on which to connect to the enclave
212-
EnclaveRPCAddress string
211+
// Addresses on which to connect to the node's enclaves (HA setups may have multiple)
212+
EnclaveRPCAddresses []string
213213
// P2PBindAddress is the address where the P2P server is bound to
214214
P2PBindAddress string
215215
// P2PPublicAddress is the advertised P2P server address
@@ -244,7 +244,7 @@ func DefaultHostParsedConfig() *HostInputConfig {
244244
HasClientRPCWebsockets: true,
245245
ClientRPCPortWS: 81,
246246
ClientRPCHost: "127.0.0.1",
247-
EnclaveRPCAddress: "127.0.0.1:11000",
247+
EnclaveRPCAddresses: []string{"127.0.0.1:11000"},
248248
P2PBindAddress: "0.0.0.0:10000",
249249
P2PPublicAddress: "127.0.0.1:10000",
250250
L1WebsocketURL: "ws://127.0.0.1:8546",

go/host/container/cli.go

+5-4
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import (
44
"flag"
55
"fmt"
66
"os"
7+
"strings"
78
"time"
89

910
"github.com/ten-protocol/go-ten/go/common"
@@ -24,7 +25,7 @@ type HostConfigToml struct {
2425
HasClientRPCWebsockets bool
2526
ClientRPCPortWS uint
2627
ClientRPCHost string
27-
EnclaveRPCAddress string
28+
EnclaveRPCAddresses string // comma-separated
2829
P2PBindAddress string
2930
P2PPublicAddress string
3031
L1WebsocketURL string
@@ -66,7 +67,7 @@ func ParseConfig() (*config.HostInputConfig, error) {
6667
clientRPCPortHTTP := flag.Uint64(clientRPCPortHTTPName, cfg.ClientRPCPortHTTP, flagUsageMap[clientRPCPortHTTPName])
6768
clientRPCPortWS := flag.Uint64(clientRPCPortWSName, cfg.ClientRPCPortWS, flagUsageMap[clientRPCPortWSName])
6869
clientRPCHost := flag.String(clientRPCHostName, cfg.ClientRPCHost, flagUsageMap[clientRPCHostName])
69-
enclaveRPCAddress := flag.String(enclaveRPCAddressName, cfg.EnclaveRPCAddress, flagUsageMap[enclaveRPCAddressName])
70+
enclaveRPCAddressesStr := flag.String(enclaveRPCAddressesName, strings.Join(cfg.EnclaveRPCAddresses, ","), flagUsageMap[enclaveRPCAddressesName])
7071
p2pBindAddress := flag.String(p2pBindAddressName, cfg.P2PBindAddress, flagUsageMap[p2pBindAddressName])
7172
p2pPublicAddress := flag.String(p2pPublicAddressName, cfg.P2PPublicAddress, flagUsageMap[p2pPublicAddressName])
7273
l1WSURL := flag.String(l1WebsocketURLName, cfg.L1WebsocketURL, flagUsageMap[l1WebsocketURLName])
@@ -112,7 +113,7 @@ func ParseConfig() (*config.HostInputConfig, error) {
112113
cfg.HasClientRPCWebsockets = true
113114
cfg.ClientRPCPortWS = *clientRPCPortWS
114115
cfg.ClientRPCHost = *clientRPCHost
115-
cfg.EnclaveRPCAddress = *enclaveRPCAddress
116+
cfg.EnclaveRPCAddresses = strings.Split(*enclaveRPCAddressesStr, ",")
116117
cfg.P2PBindAddress = *p2pBindAddress
117118
cfg.P2PPublicAddress = *p2pPublicAddress
118119
cfg.L1WebsocketURL = *l1WSURL
@@ -189,7 +190,7 @@ func fileBasedConfig(configPath string) (*config.HostInputConfig, error) {
189190
HasClientRPCWebsockets: tomlConfig.HasClientRPCWebsockets,
190191
ClientRPCPortWS: uint64(tomlConfig.ClientRPCPortWS),
191192
ClientRPCHost: tomlConfig.ClientRPCHost,
192-
EnclaveRPCAddress: tomlConfig.EnclaveRPCAddress,
193+
EnclaveRPCAddresses: strings.Split(tomlConfig.EnclaveRPCAddresses, ","),
193194
P2PBindAddress: tomlConfig.P2PBindAddress,
194195
P2PPublicAddress: tomlConfig.P2PPublicAddress,
195196
L1WebsocketURL: tomlConfig.L1WebsocketURL,

go/host/container/cli_flags.go

+2-2
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ const (
99
clientRPCPortHTTPName = "clientRPCPortHttp"
1010
clientRPCPortWSName = "clientRPCPortWs"
1111
clientRPCHostName = "clientRPCHost"
12-
enclaveRPCAddressName = "enclaveRPCAddress"
12+
enclaveRPCAddressesName = "enclaveRPCAddresses"
1313
p2pBindAddressName = "p2pBindAddress"
1414
p2pPublicAddressName = "p2pPublicAddress"
1515
l1WebsocketURLName = "l1WSURL"
@@ -49,7 +49,7 @@ func getFlagUsageMap() map[string]string {
4949
clientRPCPortHTTPName: "The port on which to listen for client application RPC requests over HTTP",
5050
clientRPCPortWSName: "The port on which to listen for client application RPC requests over websockets",
5151
clientRPCHostName: "The host on which to handle client application RPC requests",
52-
enclaveRPCAddressName: "The address to use to connect to the Obscuro enclave service",
52+
enclaveRPCAddressesName: "The comma-separated addresses to use to connect to the Ten enclaves",
5353
p2pBindAddressName: "The address where the p2p server is bound to. Defaults to 0.0.0.0:10000",
5454
p2pPublicAddressName: "The P2P address where the other servers should connect to. Defaults to 127.0.0.1:10000",
5555
l1WebsocketURLName: "The websocket RPC address the host can use for L1 requests",

go/host/container/host_container.go

+7-4
Original file line numberDiff line numberDiff line change
@@ -132,7 +132,10 @@ func NewHostContainerFromConfig(parsedConfig *config.HostInputConfig, logger get
132132

133133
fmt.Println("Connecting to the enclave...")
134134
services := host.NewServicesRegistry(logger)
135-
enclaveClient := enclaverpc.NewClient(cfg, logger)
135+
enclaveClients := make([]common.Enclave, len(cfg.EnclaveRPCAddresses))
136+
for i, addr := range cfg.EnclaveRPCAddresses {
137+
enclaveClients[i] = enclaverpc.NewClient(addr, cfg.EnclaveRPCTimeout, logger)
138+
}
136139
p2pLogger := logger.New(log.CmpKey, log.P2PCmp)
137140
metricsService := metrics.New(cfg.MetricsEnabled, cfg.MetricsHTTPPort, logger)
138141

@@ -150,13 +153,13 @@ func NewHostContainerFromConfig(parsedConfig *config.HostInputConfig, logger get
150153
obscuroRelevantContracts := []gethcommon.Address{cfg.ManagementContractAddress, cfg.MessageBusAddress}
151154
l1Repo := l1.NewL1Repository(l1Client, obscuroRelevantContracts, logger)
152155

153-
return NewHostContainer(cfg, services, aggP2P, l1Client, l1Repo, enclaveClient, mgmtContractLib, ethWallet, rpcServer, logger, metricsService)
156+
return NewHostContainer(cfg, services, aggP2P, l1Client, l1Repo, enclaveClients, mgmtContractLib, ethWallet, rpcServer, logger, metricsService)
154157
}
155158

156159
// NewHostContainer builds a host container with dependency injection rather than from config.
157160
// Useful for testing etc. (want to be able to pass in logger, and also have option to mock out dependencies)
158-
func NewHostContainer(cfg *config.HostConfig, services *host.ServicesRegistry, p2p hostcommon.P2PHostService, l1Client ethadapter.EthClient, l1Repo hostcommon.L1RepoService, enclaveClient common.Enclave, contractLib mgmtcontractlib.MgmtContractLib, hostWallet wallet.Wallet, rpcServer node.Server, logger gethlog.Logger, metricsService *metrics.Service) *HostContainer {
159-
h := host.NewHost(cfg, services, p2p, l1Client, l1Repo, enclaveClient, hostWallet, contractLib, logger, metricsService.Registry())
161+
func NewHostContainer(cfg *config.HostConfig, services *host.ServicesRegistry, p2p hostcommon.P2PHostService, l1Client ethadapter.EthClient, l1Repo hostcommon.L1RepoService, enclaveClients []common.Enclave, contractLib mgmtcontractlib.MgmtContractLib, hostWallet wallet.Wallet, rpcServer node.Server, logger gethlog.Logger, metricsService *metrics.Service) *HostContainer {
162+
h := host.NewHost(cfg, services, p2p, l1Client, l1Repo, enclaveClients, hostWallet, contractLib, logger, metricsService.Registry())
160163

161164
hostContainer := &HostContainer{
162165
host: h,

go/host/container/test.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ clientRPCPortHTTP = 80
55
HasClientRPCWebsockets = true
66
ClientRPCPortWS = 81
77
ClientRPCHost = "127.0.0.1"
8-
EnclaveRPCAddress = "127.0.0.1:11000"
8+
EnclaveRPCAddresses = "127.0.0.1:11000"
99
EnclaveRPCTimeout = 10
1010
P2PBindAddress = "0.0.0.0:10000"
1111
P2PPublicAddress = "127.0.0.1:10000"

go/host/enclave/service.go

+57-26
Original file line numberDiff line numberDiff line change
@@ -23,73 +23,104 @@ type enclaveServiceLocator interface {
2323
type Service struct {
2424
hostData host.Identity
2525
sl enclaveServiceLocator
26-
// eventually this service will support multiple enclaves for HA, but currently there's only one
27-
// The service goes via the Guardian to talk to the enclave (because guardian knows if the enclave is healthy etc.)
28-
enclaveGuardian *Guardian
26+
27+
// The service goes via the Guardians to talk to the enclave (because guardian knows if the enclave is healthy etc.)
28+
enclaveGuardians []*Guardian
2929

3030
running atomic.Bool
3131
logger gethlog.Logger
3232
}
3333

34-
func NewService(hostData host.Identity, serviceLocator enclaveServiceLocator, enclaveGuardian *Guardian, logger gethlog.Logger) *Service {
34+
func NewService(hostData host.Identity, serviceLocator enclaveServiceLocator, enclaveGuardians []*Guardian, logger gethlog.Logger) *Service {
3535
return &Service{
36-
hostData: hostData,
37-
sl: serviceLocator,
38-
enclaveGuardian: enclaveGuardian,
39-
logger: logger,
36+
hostData: hostData,
37+
sl: serviceLocator,
38+
enclaveGuardians: enclaveGuardians,
39+
logger: logger,
4040
}
4141
}
4242

4343
func (e *Service) Start() error {
4444
e.running.Store(true)
45-
return e.enclaveGuardian.Start()
45+
for _, guardian := range e.enclaveGuardians {
46+
if err := guardian.Start(); err != nil {
47+
// abandon starting the rest of the guardians if one fails
48+
return err
49+
}
50+
}
51+
return nil
4652
}
4753

4854
func (e *Service) Stop() error {
4955
e.running.Store(false)
50-
return e.enclaveGuardian.Stop()
56+
var errors []error
57+
for i, guardian := range e.enclaveGuardians {
58+
if err := guardian.Stop(); err != nil {
59+
errors = append(errors, fmt.Errorf("error stopping enclave guardian [%d]: %w", i, err))
60+
}
61+
}
62+
if len(errors) > 0 {
63+
return fmt.Errorf("errors stopping enclave guardians: %v", errors)
64+
}
65+
return nil
5166
}
5267

5368
func (e *Service) HealthStatus() host.HealthStatus {
5469
if !e.running.Load() {
5570
return &host.BasicErrHealthStatus{ErrMsg: "not running"}
5671
}
5772

58-
// check the enclave health, which in turn checks the DB health
59-
enclaveHealthy, err := e.enclaveGuardian.enclaveClient.HealthCheck()
60-
if err != nil {
61-
return &host.BasicErrHealthStatus{ErrMsg: fmt.Sprintf("unable to HealthCheck enclave - %s", err.Error())}
62-
} else if !enclaveHealthy {
63-
return &host.BasicErrHealthStatus{ErrMsg: "enclave reported itself as not healthy"}
64-
}
73+
errors := make([]error, 0, len(e.enclaveGuardians))
6574

66-
if !e.enclaveGuardian.GetEnclaveState().InSyncWithL1() {
67-
return &host.BasicErrHealthStatus{ErrMsg: "enclave not in sync with L1"}
75+
for i, guardian := range e.enclaveGuardians {
76+
// check the enclave health, which in turn checks the DB health
77+
enclaveHealthy, err := guardian.enclaveClient.HealthCheck()
78+
if err != nil {
79+
errors = append(errors, fmt.Errorf("unable to HealthCheck enclave[%d] - %w", i, err))
80+
} else if !enclaveHealthy {
81+
errors = append(errors, fmt.Errorf("enclave[%d] reported itself not healthy", i))
82+
}
83+
84+
if !guardian.GetEnclaveState().InSyncWithL1() {
85+
errors = append(errors, fmt.Errorf("enclave[%d] not in sync with L1", i))
86+
}
6887
}
6988

7089
// empty error msg means healthy
71-
return &host.BasicErrHealthStatus{ErrMsg: ""}
90+
return &host.GroupErrsHealthStatus{Errors: errors}
91+
}
92+
93+
func (e *Service) HealthyGuardian() *Guardian {
94+
for _, guardian := range e.enclaveGuardians {
95+
if guardian.HealthStatus().OK() {
96+
return guardian
97+
}
98+
}
99+
return nil
72100
}
73101

74102
// LookupBatchBySeqNo is used to fetch batch data from the enclave - it is only used as a fallback for the sequencer
75103
// host if it's missing a batch (other host services should use L2Repo to fetch batch data)
76104
func (e *Service) LookupBatchBySeqNo(seqNo *big.Int) (*common.ExtBatch, error) {
77-
state := e.enclaveGuardian.GetEnclaveState()
105+
hg := e.HealthyGuardian()
106+
state := hg.GetEnclaveState()
78107
if state.GetEnclaveL2Head().Cmp(seqNo) < 0 {
79108
return nil, errutil.ErrNotFound
80109
}
81-
client := e.enclaveGuardian.GetEnclaveClient()
110+
client := hg.GetEnclaveClient()
82111
return client.GetBatchBySeqNo(seqNo.Uint64())
83112
}
84113

85114
func (e *Service) GetEnclaveClient() common.Enclave {
86-
return e.enclaveGuardian.GetEnclaveClient()
115+
// for now we always return first guardian's enclave client
116+
// in future be good to load balance and failover but need to improve subscribe/unsubscribe (unsubscribe from same enclave)
117+
return e.enclaveGuardians[0].GetEnclaveClient()
87118
}
88119

89120
func (e *Service) SubmitAndBroadcastTx(encryptedParams common.EncryptedParamsSendRawTx) (*responses.RawTx, error) {
90121
encryptedTx := common.EncryptedTx(encryptedParams)
91122

92-
enclaveResponse, sysError := e.enclaveGuardian.GetEnclaveClient().SubmitTx(encryptedTx)
123+
enclaveResponse, sysError := e.GetEnclaveClient().SubmitTx(encryptedTx)
93124
if sysError != nil {
94125
e.logger.Warn("Could not submit transaction due to sysError.", log.ErrKey, sysError)
95126
return nil, sysError
@@ -110,9 +141,9 @@ func (e *Service) SubmitAndBroadcastTx(encryptedParams common.EncryptedParamsSen
110141
}
111142

112143
func (e *Service) Subscribe(id rpc.ID, encryptedParams common.EncryptedParamsLogSubscription) error {
113-
return e.enclaveGuardian.GetEnclaveClient().Subscribe(id, encryptedParams)
144+
return e.GetEnclaveClient().Subscribe(id, encryptedParams)
114145
}
115146

116147
func (e *Service) Unsubscribe(id rpc.ID) error {
117-
return e.enclaveGuardian.GetEnclaveClient().Unsubscribe(id)
148+
return e.GetEnclaveClient().Unsubscribe(id)
118149
}

go/host/host.go

+15-3
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ type host struct {
4949
l2MessageBusAddress *gethcommon.Address
5050
}
5151

52-
func NewHost(config *config.HostConfig, hostServices *ServicesRegistry, p2p hostcommon.P2PHostService, ethClient ethadapter.EthClient, l1Repo hostcommon.L1RepoService, enclaveClient common.Enclave, ethWallet wallet.Wallet, mgmtContractLib mgmtcontractlib.MgmtContractLib, logger gethlog.Logger, regMetrics gethmetrics.Registry) hostcommon.Host {
52+
func NewHost(config *config.HostConfig, hostServices *ServicesRegistry, p2p hostcommon.P2PHostService, ethClient ethadapter.EthClient, l1Repo hostcommon.L1RepoService, enclaveClients []common.Enclave, ethWallet wallet.Wallet, mgmtContractLib mgmtcontractlib.MgmtContractLib, logger gethlog.Logger, regMetrics gethmetrics.Registry) hostcommon.Host {
5353
database, err := db.CreateDBFromConfig(config, regMetrics, logger)
5454
if err != nil {
5555
logger.Crit("unable to create database for host", log.ErrKey, err)
@@ -72,8 +72,20 @@ func NewHost(config *config.HostConfig, hostServices *ServicesRegistry, p2p host
7272
stopControl: stopcontrol.New(),
7373
}
7474

75-
enclGuardian := enclave.NewGuardian(config, hostIdentity, hostServices, enclaveClient, database, host.stopControl, logger)
76-
enclService := enclave.NewService(hostIdentity, hostServices, enclGuardian, logger)
75+
enclGuardians := make([]*enclave.Guardian, 0, len(enclaveClients))
76+
for i, enclClient := range enclaveClients {
77+
// clone the hostIdentity data for each enclave
78+
enclHostID := hostIdentity
79+
if i > 0 {
80+
// only the first enclave can be the sequencer for now, others behave as read-only validators
81+
enclHostID.IsSequencer = false
82+
enclHostID.IsGenesis = false
83+
}
84+
enclGuardian := enclave.NewGuardian(config, enclHostID, hostServices, enclClient, database, host.stopControl, logger)
85+
enclGuardians = append(enclGuardians, enclGuardian)
86+
}
87+
88+
enclService := enclave.NewService(hostIdentity, hostServices, enclGuardians, logger)
7789
l2Repo := l2.NewBatchRepository(config, hostServices, database, logger)
7890
subsService := events.NewLogEventManager(hostServices, logger)
7991

0 commit comments

Comments
 (0)