Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[PAL/Linux-SGX] Add AEX-Notify support in exception handling flow #1531

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
86 changes: 80 additions & 6 deletions pal/src/host/linux-sgx/enclave_entry.S
Original file line number Diff line number Diff line change
Expand Up @@ -225,8 +225,8 @@ enclave_entry:
cmpl $0, %edi
jne .Lcssa1_exception_determine_when

# TODO: we shouldn't ignore definitely-malicious exception, but we do it now
jmp .Lcssa1_exception_eexit
# definitely-malicious exception (correct untrusted runtime would never generate "0" event)
FAIL_LOOP

.Lcssa1_exception_determine_when:
# If this enclave thread has not been initialized yet, we should not try to call an event
Expand Down Expand Up @@ -263,7 +263,9 @@ enclave_entry:

# We are interrupted during the never-returning OCALL_EXIT. Because the thread is going to exit
# anyway, we can ignore this exception.
jmp .Lcssa1_exception_eexit
movq %rdx, %rbx # upon EENTER the exit address was in RDX, mov it to RBX for EEXIT
movq $EEXIT, %rax
enclu

.Lcssa1_exception_during_ocall_flows:
# At this point, we are in the stage-1 exception handler (CSSA=1) and
Expand Down Expand Up @@ -543,8 +545,8 @@ enclave_entry:
movq %rdi, SGX_GPR_RIP(%rbx)

# copy the whole SSA[0].XSAVE region to the CPU context's XSAVE on stack;
# __restore_xregs / __save_xregs clobber RDX so need to stash it in RBX
movq %rdx, %rbx
# __restore_xregs / __save_xregs clobber RDX so need to stash it in R10
movq %rdx, %r10
movq %gs:SGX_SSA, %rdi
leaq 1f(%rip), %r11
jmp __restore_xregs
Expand All @@ -553,9 +555,14 @@ enclave_entry:
leaq 2f(%rip), %r11
jmp __save_xregs
2:
movq %rbx, %rdx
movq %r10, %rdx

.Lcssa1_exception_eexit:
cmpb $1, g_aex_notify_enabled(%rip)
je 1f

# AEX Notify is not set, stage-1 exception handler follows the normal EEXIT flow.

# .Lcssa0_ocall_or_cssa1_exception_eexit has an ABI that uses RSI, RDI, RSP; clear the relevant
# regs (note that stage-1 handler didn't clobber RSP -- which contains an untrusted pointer to
# untrusted-runtime stack -- but this flow doesn't read/write RSP at all so there is no need to
Expand All @@ -567,6 +574,52 @@ enclave_entry:
movq %rdx, %rbx
jmp .Lcssa0_ocall_or_cssa1_exception_eexit

1:
# AEX Notify is set, stage-1 exception handler doesn't invoke EEXIT but instead the new EDECCSSA
# instruction.

# after restoring SSA[0] context into GPRs, we'll need to jump to the stage-2 handler, so
# memorize SSA[0].GPRSGX.RIP in an otherwise-unused R11
movq SGX_GPR_RIP(%rbx), %r11

# clear bit 0 within SSA[0].GPRSGX.AEXNOTIFY (so that ERESUME actually resumes stage-2 handler)
movb $0, SGX_GPR_AEXNOTIFY(%rbx)

# restore context from SSA[0] (which was already modified above to jump to stage-2 C handler);
# note that XSAVE area (xregs) was already restored above, so only need to restore GPRs;
# note that we only restore those GPRs of SSA[0] that were used/modified by stage-1 handler;
# note that we don't care about SSA[0].GPRSGX.{URSP,URBP,EXITINFO,RESERVED,GSBASE}

leaq SGX_GPR_RFLAGS(%rbx), %rsp # trick to restore RFLAGS directly from SSA[0].GPRSGX.RFLAGS
popfq

movq SGX_GPR_FSBASE(%rbx), %rdi
.byte 0xf3, 0x48, 0x0f, 0xae, 0xd7 # WRFSBASE %RDI

movq SGX_GPR_RDI(%rbx), %rdi
movq SGX_GPR_RSI(%rbx), %rsi
movq SGX_GPR_RDX(%rbx), %rdx
movq SGX_GPR_RCX(%rbx), %rcx # not strictly needed
movq SGX_GPR_R8(%rbx), %r8 # not strictly needed
movq SGX_GPR_R9(%rbx), %r9 # not strictly needed
movq SGX_GPR_R10(%rbx), %r10 # not strictly needed
movq SGX_GPR_R12(%rbx), %r12 # not strictly needed
movq SGX_GPR_R13(%rbx), %r13 # not strictly needed
movq SGX_GPR_R14(%rbx), %r14 # not strictly needed
movq SGX_GPR_R15(%rbx), %r15 # not strictly needed
movq SGX_GPR_RBP(%rbx), %rbp # not strictly needed
movq SGX_GPR_RSP(%rbx), %rsp
xorq %rbx, %rbx # for sanity

# go from SSA[1] to SSA[0] (more specifically, simply decrement CSSA from 1 to 0);
# must be careful after this ENCLU instruction because may be interrupted by new exceptions
movq $EDECCSSA, %rax
enclu

# finally jump to the stage-2 C exception handler
.Lfinalize_context_after_deccssa_inst0:
jmp *%r11

.cfi_endproc


Expand Down Expand Up @@ -602,6 +655,15 @@ sgx_ocall:
pushq %rbx
.cfi_offset %rbx, -24

# Disable aex-notify before stack switch
cmpq $0, %gs:SGX_READY_FOR_AEX_NOTIFY
je 1f
movq %gs:SGX_GPR, %rax
cmpb $0, SGX_GPR_AEXNOTIFY(%rax)
je 1f
movb $0, SGX_GPR_AEXNOTIFY(%rax)
movq $0x1, %gs:SGX_AEX_NOTIFY_FLAG # set aexnotify_flag
1:
CHECK_IF_SIGNAL_STACK_IS_USED %rsp, .Lon_signal_stack_ocall, .Lout_of_signal_stack_ocall

.Lout_of_signal_stack_ocall:
Expand Down Expand Up @@ -757,6 +819,16 @@ sgx_ocall:
cmpq $PAL_EVENT_NUM_BOUND, %rsi
jb 2f

1:
# check if it is ready to enable AEX-Notify
cmpq $0, %gs:SGX_READY_FOR_AEX_NOTIFY
je 1f
# check if aexnotify is disabled during ocall-exit
cmpq $0, %gs:SGX_AEX_NOTIFY_FLAG
je 1f
movq $0, %gs:SGX_AEX_NOTIFY_FLAG
movq %gs:SGX_GPR, %rdi
movb $0x1, SGX_GPR_AEXNOTIFY(%rdi)
1:
# there was no event, simply call _restore_sgx_context(uc, xsave_area)
movq %rsp, %rdi
Expand All @@ -768,6 +840,8 @@ sgx_ocall:

2:
# there was some event, call _PalHandleExternalEvent(event, uc, xregs_state)
# Note we don't check AEX-Notify here. If we disable it during ocall-exit and
# the execution comes below code, the aex-notify will be enabled after event handling

# clear the Alignment Check flag to prevent #AC-fault side channel
pushfq
Expand Down
71 changes: 62 additions & 9 deletions pal/src/host/linux-sgx/host_entry.S
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,6 @@ sgx_ecall:
enclu

# currently only ECALL_THREAD_RESET returns
.Lafter_resume:
popq %r15
.cfi_adjust_cfa_offset -8
popq %r14
Expand All @@ -70,9 +69,9 @@ async_exit_pointer:
# increment per-thread AEX counter for stats
lock incq %gs:PAL_HOST_TCB_AEX_CNT

#ifdef DEBUG
# Inform that we are in AEX profiling code
movb $1, %gs:PAL_HOST_TCB_IN_AEX_PROF
# Inform that we are in AEX code
movb $1, %gs:PAL_HOST_TCB_IN_AEX

# Save ERESUME parameters
pushq %rax
.cfi_adjust_cfa_offset 8
Expand All @@ -84,11 +83,17 @@ async_exit_pointer:
# Align stack (required by System V AMD64 ABI)
movq %rsp, %rbp
.cfi_def_cfa_register %rbp
subq $RED_ZONE_SIZE, %rsp
andq $~0xF, %rsp

#ifdef DEBUG
# Call sgx_profile_sample_aex with %rdi = TCS
movq %rbx, %rdi
call sgx_profile_sample_aex
#endif

# Check if there are sync/async signals pending and invoke in-enclave stage-1 handler if any
call sgx_handle_aex_signal

# Restore stack
movq %rbp, %rsp
Expand All @@ -101,18 +106,22 @@ async_exit_pointer:
.cfi_adjust_cfa_offset -8
popq %rax
.cfi_adjust_cfa_offset -8
movb $0, %gs:PAL_HOST_TCB_IN_AEX_PROF
#endif

.cfi_endproc
movb $0, %gs:PAL_HOST_TCB_IN_AEX

# In case of normal ERESUME, RDI is not used;
# In case of ERESUME-morphed-into-EENTER, RDI is external event in flow .Lcssa1_exception
movq $PAL_EVENT_INTERRUPTED, %rdi

# fall-through to ERESUME
.cfi_endproc

.global eresume_pointer
.type eresume_pointer, @function

eresume_pointer:
enclu # perform ERESUME
# perform ERESUME (RAX already contains "ERESUME" because that's what AEX hardware flow does)
enclu

.global async_exit_pointer_end
.type async_exit_pointer_end, @function
Expand All @@ -123,8 +132,52 @@ async_exit_pointer_end:
.type sgx_raise, @function

sgx_raise:
.cfi_startproc
leaq .Lafter_resume(%rip), %rdx
jmp .Ldo_ecall_callee_save

# other arguments: RDI - event (sync or async signal)

# below logic is the same as for sgx_ecall(), see comments for that function
pushq %rbx
.cfi_adjust_cfa_offset 8
pushq %rbp
.cfi_adjust_cfa_offset 8
pushq %r12
.cfi_adjust_cfa_offset 8
pushq %r13
.cfi_adjust_cfa_offset 8
pushq %r14
.cfi_adjust_cfa_offset 8
pushq %r15
.cfi_adjust_cfa_offset 8

lock incq %gs:PAL_HOST_TCB_EENTER_CNT
movq %gs:PAL_HOST_TCB_TCS, %rbx
leaq async_exit_pointer(%rip), %rcx
movq $EENTER, %rax

.global sgx_raise_eenter_instr
.type sgx_raise_eenter_instr, @function

sgx_raise_eenter_instr:
enclu

.Lafter_resume:
popq %r15
.cfi_adjust_cfa_offset -8
popq %r14
.cfi_adjust_cfa_offset -8
popq %r13
.cfi_adjust_cfa_offset -8
popq %r12
.cfi_adjust_cfa_offset -8
popq %rbp
.cfi_adjust_cfa_offset -8
popq %rbx
.cfi_adjust_cfa_offset -8
retq
.cfi_endproc


.Lsgx_entry:
# arguments: RDI - code, RSI - ms
Expand Down
68 changes: 58 additions & 10 deletions pal/src/host/linux-sgx/host_exception.c
Original file line number Diff line number Diff line change
Expand Up @@ -93,9 +93,9 @@ static bool interrupted_in_enclave(struct ucontext* uc) {
return rip >= (unsigned long)async_exit_pointer && rip < (unsigned long)async_exit_pointer_end;
}

static bool interrupted_in_aex_profiling(void) {
return pal_get_host_tcb()->is_in_aex_profiling != 0;
}
static bool interrupted_in_aex(void) {
return pal_get_host_tcb()->is_in_aex != 0;
}

static void handle_sync_signal(int signum, siginfo_t* info, struct ucontext* uc) {
enum pal_event event = signal_to_pal_event(signum);
Expand All @@ -107,10 +107,20 @@ static void handle_sync_signal(int signum, siginfo_t* info, struct ucontext* uc)
for (size_t i = 0; i < g_rpc_queue->rpc_threads_cnt; i++)
DO_SYSCALL(tkill, g_rpc_queue->rpc_threads[i], SIGUSR2);

if (event == PAL_EVENT_MEMFAULT &&
ucontext_get_ip(uc) == (unsigned long)&sgx_raise_eenter_instr) {
/* this is #GP on EENTER instruction during sgx_raise() -- means that some async signal
* arrived while the enclave thread executes in CSSA=1 (stage-1 exception handler); we
* should ignore this fault by skipping EENTER, see also sgx_handle_aex_signal() */
ucontext_set_ip(uc, ucontext_get_ip(uc) + /*sizeof(ENCLU)=*/3);
return;
}

if (interrupted_in_enclave(uc)) {
/* exception happened in app/LibOS/trusted PAL code, handle signal inside enclave */
/* exception happened in app/LibOS/trusted PAL code, mark to handle signal inside enclave */
assert(pal_get_host_tcb()->aex_sync_event == PAL_EVENT_NO_EVENT);
pal_get_host_tcb()->aex_sync_event = event;
pal_get_host_tcb()->sync_signal_cnt++;
sgx_raise(event);
return;
}

Expand Down Expand Up @@ -158,15 +168,19 @@ static void handle_async_signal(int signum, siginfo_t* info, struct ucontext* uc
for (size_t i = 0; i < g_rpc_queue->rpc_threads_cnt; i++)
DO_SYSCALL(tkill, g_rpc_queue->rpc_threads[i], SIGUSR2);

if (interrupted_in_enclave(uc) || interrupted_in_aex_profiling()) {
/* signal arrived while in app/LibOS/trusted PAL code or when handling another AEX, handle
* signal inside enclave */
assert(event == PAL_EVENT_INTERRUPTED || event == PAL_EVENT_QUIT);

if (interrupted_in_enclave(uc) || interrupted_in_aex()) {
/* signal arrived while in app/LibOS/trusted PAL code or when handling another AEX, mark to
* handle signal inside enclave */
if (pal_get_host_tcb()->aex_async_event != PAL_EVENT_QUIT) {
/* Do not overwrite `PAL_EVENT_QUIT`. See explanation below. */
pal_get_host_tcb()->aex_async_event = event;
}
pal_get_host_tcb()->async_signal_cnt++;
sgx_raise(event);
return;
}

assert(event == PAL_EVENT_INTERRUPTED || event == PAL_EVENT_QUIT);
if (pal_get_host_tcb()->last_async_event != PAL_EVENT_QUIT) {
/* Do not overwrite `PAL_EVENT_QUIT`. The only other possible event here is
* `PAL_EVENT_INTERRUPTED`, which is basically a no-op (just makes sure that a thread
Expand All @@ -188,6 +202,40 @@ static void handle_dummy_signal(int signum, siginfo_t* info, struct ucontext* uc
/* we need this handler to interrupt blocking syscalls in RPC threads */
}

/* The handle_sync_signal() and handle_async_signal() functions, executed in signal-handling
* context, added sync/async signals to process (that happened during enclave-thread execution)
* -- now the regular context must inform the enclave about these events. This function is
* potentially noreturn -- if there is at least one signal, and the enclave is ready to handle it,
* then the call to sgx_raise() never returns. Only one of potentially two signals (one sync and
* one async) will be added by this function, because sgx_raise() doesn't return; the hope is that
* the second (async) signal will be added on some later AEX. Also note that new sync signals
* cannot occur while in this function, but new async signals can occur (since we are in regular
* context and cannot block async signals), thus handling async signals must be aware of concurrent
* signal handling code. */
void sgx_handle_aex_signal(void) {
if (pal_get_host_tcb()->aex_sync_event != PAL_EVENT_NO_EVENT) {
/* sync event must always be consumed by the enclave (there is no scenario where in-enclave
* stage-1 handling of another sync/async event would generate a sync event) */
enum pal_event event = pal_get_host_tcb()->aex_sync_event;
pal_get_host_tcb()->aex_sync_event = PAL_EVENT_NO_EVENT;
sgx_raise(event);
return;
}

enum pal_event event = __atomic_exchange_n(&pal_get_host_tcb()->aex_async_event,
PAL_EVENT_NO_EVENT, __ATOMIC_RELAXED);
if (event != PAL_EVENT_NO_EVENT) {
/* if async event is consumed by the enclave, then below sgx_raise() does not return;
* otherwise it means that the enclave was already in the middle of stage-1 handler and
* could not consume this async event: simply ignore for now; the very next AEX will try to
* raise this async event again */
sgx_raise(event);
enum pal_event no_event = PAL_EVENT_NO_EVENT;
__atomic_compare_exchange_n(&pal_get_host_tcb()->aex_async_event, &no_event, event,
/*weak=*/false, __ATOMIC_RELAXED, __ATOMIC_RELAXED);
}
}

int sgx_signal_setup(void) {
int ret;

Expand Down
16 changes: 14 additions & 2 deletions pal/src/host/linux-sgx/pal_exception.c
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,16 @@

#define ADDR_IN_PAL(addr) ((void*)(addr) > TEXT_START && (void*)(addr) < TEXT_END)

__attribute_no_sanitize_address
noreturn static void apply_mitigation_handler_and_restore_sgx_context(sgx_cpu_context_t* uc, PAL_XREGS_STATE* xregs_state)
{
assert(g_aex_notify_enabled);
// TODO - apply mitigation and restore context
// Right now, directly restore the context and contine execution
_restore_sgx_context(uc, xregs_state);
}


/* Restore an sgx_cpu_context_t as generated by .Lhandle_exception. Execution will
* continue as specified by the rip in the context. */
__attribute_no_sanitize_address
Expand All @@ -31,8 +41,10 @@ noreturn static void restore_sgx_context(sgx_cpu_context_t* uc, PAL_XREGS_STATE*
uintptr_t sig_stack_high = GET_ENCLAVE_TCB(sig_stack_high);
asan_unpoison_current_stack(sig_stack_low, sig_stack_high - sig_stack_low);
#endif

_restore_sgx_context(uc, xregs_state);
if (g_aex_notify_enabled && GET_ENCLAVE_TCB(ready_for_aex_notify))
apply_mitigation_handler_and_restore_sgx_context(uc, xregs_state);
else
_restore_sgx_context(uc, xregs_state);
}

noreturn static void restore_pal_context(sgx_cpu_context_t* uc, PAL_CONTEXT* ctx) {
Expand Down
Loading