Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Misc fixes for key sync #87

Open
wants to merge 7 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 10 additions & 9 deletions doc/key-synchronization.md
Original file line number Diff line number Diff line change
Expand Up @@ -95,19 +95,20 @@ sequenceDiagram
end

leader->>leader: Generate HTTPS certificate
leaderApp->>leaderApp: Generate key material

Note over leader,worker: Enclaves designate the leader
worker->>+leader: GET /enclave/leader (nonce_w)
leader-->>-worker: OK

worker->>worker: Did not call itself: worker
leader->>leader: GET /enclave/leader (nonce_l)
leader->>leader: Did call itself: leader

Note over leaderApp,leader: Application sets its key material
leaderApp->>+leader: PUT /enclave/state (key material)
Note over leaderApp,leader: Enclave prompts key generation

leader->>+leaderApp: GET /enclave/state
leaderApp-->>-leader: OK
leader->>leader: Save key material
leader-->>-leaderApp: OK

Note over leader,worker: Worker announces itself to leader
worker->>+leader: POST /enclave/heartbeat
Expand All @@ -127,10 +128,9 @@ worker-->>-leader: OK

worker->>worker: Install HTTPS certificate

Note over worker,workerApp: Application retrieves key material
workerApp->>+worker: GET /enclave/state
worker->>worker: Retrieve key material
worker-->>-workerApp: OK (key material)
Note over worker,workerApp: Enclave sends key material to app
worker->>+workerApp: PUT /enclave/state
workerApp-->>-worker: OK
workerApp->>workerApp: Install key material

Note over leader, worker: Worker starts heartbeat loop
Expand All @@ -141,9 +141,10 @@ loop Heartbeat
end

Note over leaderApp: Application updates its key material

leaderApp->>+leader: PUT /enclave/state (key material)
leader->>leader: Save key material
leader-->>-leaderApp: OK

note over leader,worker: Leader initiates key re-synchronization as above
```
```
126 changes: 84 additions & 42 deletions enclave.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ import (
"net/http/httputil"
_ "net/http/pprof"
"net/url"
"strconv"
"sync"
"time"

Expand Down Expand Up @@ -65,20 +66,22 @@ var (
// Enclave represents a service running inside an AWS Nitro Enclave.
type Enclave struct {
attester
sync.Mutex // Guard syncState.
cfg *Config
syncState int
extPubSrv, extPrivSrv *http.Server
intSrv *http.Server
promSrv *http.Server
revProxy *httputil.ReverseProxy
hashes *AttestationHashes
promRegistry *prometheus.Registry
metrics *metrics
workers *workerManager
keys *enclaveKeys
httpsCert *certRetriever
ready, stop chan struct{}
sync.Mutex // Guard syncState.
cfg *Config
syncState int
extPubSrv, extPrivSrv *http.Server
intSrv *http.Server
promSrv *http.Server
revProxy *httputil.ReverseProxy
hashes *AttestationHashes
promRegistry *prometheus.Registry
metrics *metrics
workers *workerManager
keys *enclaveKeys
httpsCert *certRetriever
appReady, networkReady, stop chan struct{}
heartbeatActive bool
myHostname string
}

// Config represents the configuration of our enclave service.
Expand All @@ -88,7 +91,7 @@ type Config struct {
// is required.
FQDN string

// FQDNLeader contains the fully qualified domain name of the leader
// FQDNLeader contains the fully qualified domain name and port of the leader
// enclave, which coordinates enclave synchronization. Only set this field
// if horizontal scaling is required.
FQDNLeader string
Expand Down Expand Up @@ -133,6 +136,10 @@ type Config struct {
// metrics. Consider setting this to your application's name.
PrometheusNamespace string

// Port of the host IP provider, provided by vsock-relay.
// Only required if key synchronization is enabled.
HostIpProviderPort uint32

// UseProfiling enables profiling via pprof. Profiling information will be
// available at /enclave/debug. Note that profiling data is privacy
// sensitive and therefore must not be enabled in production.
Expand Down Expand Up @@ -245,7 +252,8 @@ func NewEnclave(cfg *Config) (*Enclave, error) {
hashes: new(AttestationHashes),
workers: newWorkerManager(time.Minute),
stop: make(chan struct{}),
ready: make(chan struct{}),
appReady: make(chan struct{}),
networkReady: make(chan struct{}),
}

// Increase the maximum number of idle connections per host. This is
Expand Down Expand Up @@ -291,9 +299,8 @@ func NewEnclave(cfg *Config) (*Enclave, error) {
// Register enclave-internal HTTP API.
m = e.intSrv.Handler.(*chi.Mux)
if cfg.WaitForApp {
m.Get(pathReady, readyHandler(e.ready))
m.Get(pathReady, readyHandler(e.appReady))
}
m.Get(pathState, getStateHandler(e.getSyncState, e.keys))
m.Put(pathState, putStateHandler(e.attester, e.getSyncState, e.keys, e.workers))
m.Post(pathHash, hashHandler(e))

Expand All @@ -303,7 +310,11 @@ func NewEnclave(cfg *Config) (*Enclave, error) {
e.revProxy = httputil.NewSingleHostReverseProxy(cfg.AppWebSrv)
e.revProxy.BufferPool = newBufPool()
e.revProxy.Transport = customTransport
e.extPubSrv.Handler.(*chi.Mux).Handle(pathProxy, e.revProxy)
extm := e.extPubSrv.Handler.(*chi.Mux)
extm.Handle(pathState, http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
http.Error(w, "Forbidden", http.StatusForbidden)
}))
extm.Handle(pathProxy, e.revProxy)
// If we expose Prometheus metrics, we keep track of the HTTP backend's
// responses.
if cfg.PrometheusPort > 0 {
Expand Down Expand Up @@ -336,7 +347,7 @@ func (e *Enclave) Start() error {

// Set up our networking environment which creates a TAP device that
// forwards traffic (via the VSOCK interface) to the EC2 host.
go runNetworking(e.cfg, e.stop)
go runNetworking(e.cfg, e.stop, e.networkReady)

// Get an HTTPS certificate.
if e.cfg.UseACME {
Expand All @@ -356,14 +367,24 @@ func (e *Enclave) Start() error {
return nil
}

elog.Println("Waiting for networking setup...")
<-e.networkReady
// Check if we are the leader.
if !e.weAreLeader() {
// Get the worker's hostname/IP, so we can give the leader
// enclave contact details for future sync attempts.
elog.Println("Obtaining worker's hostname.")
worker := getSyncURL(getHostnameOrDie(), e.cfg.ExtPrivPort)
e.myHostname = getHostnameOrDie(e.cfg.HostIpProviderPort)
worker := getSyncURL(e.myHostname, e.cfg.ExtPrivPort)
err = asWorker(e.setupWorkerPostSync, e.attester).registerWith(leader, worker)
if err != nil {
elog.Fatalf("Error syncing with leader: %v", err)
}
} else {
// Get leader app key to share with worker enclaves.
if err = requestAndStoreKeyFromApp(e.cfg.AppWebSrv, e.keys); err != nil {
elog.Fatalf("Failed to retrieve key material from app as leader: %v", err)
}
}

return nil
Expand Down Expand Up @@ -416,15 +437,14 @@ func (e *Enclave) weAreLeader() (result bool) {
},
)

timeout := time.NewTicker(10 * time.Second)
timeout := time.NewTicker(120 * time.Second)
for {
go makeLeaderRequest(leader, ourNonce, areWeLeader, errChan)
select {
case <-e.stop:
return
case <-errChan:
elog.Println("Not yet able to talk to leader designation endpoint.")
time.Sleep(time.Second)
case err = <-errChan:
elog.Printf("Not yet able to talk to leader designation endpoint: %v", err)
continue
case result = <-areWeLeader:
return
Expand All @@ -442,15 +462,26 @@ func (e *Enclave) weAreLeader() (result bool) {
// installing the given enclave keys and starting the heartbeat loop.
func (e *Enclave) setupWorkerPostSync(keys *enclaveKeys) error {
e.keys.set(keys)

if err := sendKeyToApp(e.cfg.AppWebSrv, e.keys); err != nil {
return err
}

cert, err := tls.X509KeyPair(keys.NitridingCert, keys.NitridingKey)
if err != nil {
return err
}
e.httpsCert.set(&cert)
if err = e.setCertFingerprint(keys.NitridingCert); err != nil {
return err
}

// Start our heartbeat.
worker := getSyncURL(getHostnameOrDie(), e.cfg.ExtPrivPort)
go e.workerHeartbeat(worker)
if !e.heartbeatActive {
worker := getSyncURL(e.myHostname, e.cfg.ExtPrivPort)

go e.workerHeartbeat(worker)
e.heartbeatActive = true
}

return nil
}
Expand Down Expand Up @@ -516,6 +547,7 @@ func (e *Enclave) workerHeartbeat(worker *url.URL) {
// Stop stops the enclave.
func (e *Enclave) Stop() error {
close(e.stop)
e.heartbeatActive = false
if err := e.intSrv.Shutdown(context.Background()); err != nil {
return err
}
Expand All @@ -533,11 +565,11 @@ func (e *Enclave) Stop() error {

// getExtListener returns a listener for the HTTPS service
// via AF_INET or AF_VSOCK.
func (e *Enclave) getExtListener() (net.Listener, error) {
func (e *Enclave) getExtListener(port uint16) (net.Listener, error) {
if e.cfg.UseVsockForExtPort {
return vsock.Listen(uint32(e.cfg.ExtPubPort), nil)
return vsock.Listen(uint32(port), nil)
} else {
return net.Listen("tcp", fmt.Sprintf(":%d", e.cfg.ExtPubPort))
return net.Listen("tcp", fmt.Sprintf(":%d", port))
}
}

Expand All @@ -547,7 +579,11 @@ func (e *Enclave) startWebServers() error {
if e.cfg.PrometheusPort > 0 {
elog.Printf("Starting Prometheus Web server (%s).", e.promSrv.Addr)
go func() {
err := e.promSrv.ListenAndServe()
listener, err := e.getExtListener(e.cfg.PrometheusPort)
if err != nil {
elog.Fatalf("Failed to listen on Prometheus port: %v", err)
}
err = e.promSrv.Serve(listener)
if err != nil && !errors.Is(err, http.ErrServerClosed) {
elog.Fatalf("Prometheus Web server error: %v", err)
}
Expand All @@ -562,8 +598,12 @@ func (e *Enclave) startWebServers() error {
}
}()
go func() {
listener, err := e.getExtListener(e.cfg.ExtPrivPort)
if err != nil {
elog.Fatalf("Failed to listen on external port: %v", err)
}
elog.Printf("Starting external private Web server at %s.", e.extPrivSrv.Addr)
err := e.extPrivSrv.ListenAndServeTLS("", "")
err = e.extPrivSrv.ServeTLS(listener, "", "")
if err != nil && !errors.Is(err, http.ErrServerClosed) {
elog.Fatalf("External private Web server error: %v", err)
}
Expand All @@ -572,11 +612,11 @@ func (e *Enclave) startWebServers() error {
// If desired, don't launch our Internet-facing Web server until the
// application signalled that it's ready.
if e.cfg.WaitForApp {
<-e.ready
<-e.appReady
elog.Println("Application signalled that it's ready. Starting public Web server.")
}

listener, err := e.getExtListener()
listener, err := e.getExtListener(e.cfg.ExtPubPort)
if err != nil {
elog.Fatalf("Failed to listen on external port: %v", err)
}
Expand Down Expand Up @@ -704,21 +744,23 @@ func (e *Enclave) setCertFingerprint(rawData []byte) error {
func (e *Enclave) getLeader(path string) *url.URL {
return &url.URL{
Scheme: "https",
Host: fmt.Sprintf("%s:%d", e.cfg.FQDNLeader, e.cfg.ExtPrivPort),
Host: e.cfg.FQDNLeader,
Path: path,
}
}

// getWorker takes as input the worker's heartbeat request payload and returns
// the worker's URL.
func (e *Enclave) getWorker(hb *heartbeatRequest) (*url.URL, error) {
var (
host string
err error
)
host, _, err = net.SplitHostPort(hb.WorkerHostname)
host, port, err := net.SplitHostPort(hb.WorkerHostname)
if err != nil {
return nil, err
}
return getSyncURL(host, e.cfg.ExtPrivPort), nil
portUint, err := strconv.ParseUint(port, 10, 16)
if err != nil {
return nil, fmt.Errorf("invalid port number: %v", err)
}
portUint16 := uint16(portUint)

return getSyncURL(host, portUint16), nil
}
25 changes: 25 additions & 0 deletions enclave_test.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
package main

import (
"io"
"net/http"
"net/http/httptest"
"testing"
)

Expand All @@ -17,13 +20,35 @@ var defaultCfg = Config{
WaitForApp: true,
}

type mockAppRequestInfo struct {
method string
path string
body []byte
}

func assertEqual(t *testing.T, is, should interface{}) {
t.Helper()
if should != is {
t.Fatalf("Expected value\n%v\nbut got\n%v", should, is)
}
}

func createMockServer(responseBody []byte, mockAppRequests *[]mockAppRequestInfo) *httptest.Server {
return httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
body, _ := io.ReadAll(r.Body)

*mockAppRequests = append(*mockAppRequests, mockAppRequestInfo{
method: r.Method,
path: r.URL.Path,
body: body,
})
w.WriteHeader(http.StatusOK)
if responseBody != nil {
_, _ = w.Write(responseBody)
}
}))
}

func createEnclave(cfg *Config) *Enclave {
e, err := NewEnclave(cfg)
if err != nil {
Expand Down
Loading
Loading