gramineproject · lzha101 · Sep 1, 2023
diff --git a/pal/src/host/linux-sgx/enclave_entry.S b/pal/src/host/linux-sgx/enclave_entry.S
@@ -225,8 +225,8 @@ enclave_entry:
     cmpl $0, %edi
     jne .Lcssa1_exception_determine_when
 
-    # TODO: we shouldn't ignore definitely-malicious exception, but we do it now
-    jmp .Lcssa1_exception_eexit
+    # definitely-malicious exception (correct untrusted runtime would never generate "0" event)
+    FAIL_LOOP
 
 .Lcssa1_exception_determine_when:
     # If this enclave thread has not been initialized yet, we should not try to call an event
@@ -263,7 +263,9 @@ enclave_entry:
 
     # We are interrupted during the never-returning OCALL_EXIT. Because the thread is going to exit
     # anyway, we can ignore this exception.
-    jmp .Lcssa1_exception_eexit
+    movq %rdx, %rbx    # upon EENTER the exit address was in RDX, mov it to RBX for EEXIT
+    movq $EEXIT, %rax
+    enclu
 
 .Lcssa1_exception_during_ocall_flows:
     # At this point, we are in the stage-1 exception handler (CSSA=1) and
@@ -543,8 +545,8 @@ enclave_entry:
     movq %rdi, SGX_GPR_RIP(%rbx)
 
     # copy the whole SSA[0].XSAVE region to the CPU context's XSAVE on stack;
-    # __restore_xregs / __save_xregs clobber RDX so need to stash it in RBX
-    movq %rdx, %rbx
+    # __restore_xregs / __save_xregs clobber RDX so need to stash it in R10
+    movq %rdx, %r10
     movq %gs:SGX_SSA, %rdi
     leaq 1f(%rip), %r11
     jmp __restore_xregs
@@ -553,9 +555,14 @@ enclave_entry:
     leaq 2f(%rip), %r11
     jmp __save_xregs
 2:
-    movq %rbx, %rdx
+    movq %r10, %rdx
 
 .Lcssa1_exception_eexit:
+    cmpb $1, g_aex_notify_enabled(%rip)
+    je 1f
+
+    # AEX Notify is not set, stage-1 exception handler follows the normal EEXIT flow.
+
     # .Lcssa0_ocall_or_cssa1_exception_eexit has an ABI that uses RSI, RDI, RSP; clear the relevant
     # regs (note that stage-1 handler didn't clobber RSP -- which contains an untrusted pointer to
     # untrusted-runtime stack -- but this flow doesn't read/write RSP at all so there is no need to
@@ -567,6 +574,52 @@ enclave_entry:
     movq %rdx, %rbx
     jmp .Lcssa0_ocall_or_cssa1_exception_eexit
 
+1:
+    # AEX Notify is set, stage-1 exception handler doesn't invoke EEXIT but instead the new EDECCSSA
+    # instruction.
+
+    # after restoring SSA[0] context into GPRs, we'll need to jump to the stage-2 handler, so
+    # memorize SSA[0].GPRSGX.RIP in an otherwise-unused R11
+    movq SGX_GPR_RIP(%rbx), %r11
+
+    # clear bit 0 within SSA[0].GPRSGX.AEXNOTIFY (so that ERESUME actually resumes stage-2 handler)
+    movb $0, SGX_GPR_AEXNOTIFY(%rbx)
+
+    # restore context from SSA[0] (which was already modified above to jump to stage-2 C handler);
+    # note that XSAVE area (xregs) was already restored above, so only need to restore GPRs;
+    # note that we only restore those GPRs of SSA[0] that were used/modified by stage-1 handler;
+    # note that we don't care about SSA[0].GPRSGX.{URSP,URBP,EXITINFO,RESERVED,GSBASE}
+
+    leaq SGX_GPR_RFLAGS(%rbx), %rsp     # trick to restore RFLAGS directly from SSA[0].GPRSGX.RFLAGS
+    popfq
+
+    movq SGX_GPR_FSBASE(%rbx), %rdi
+    .byte 0xf3, 0x48, 0x0f, 0xae, 0xd7  # WRFSBASE %RDI
+
+    movq SGX_GPR_RDI(%rbx), %rdi
+    movq SGX_GPR_RSI(%rbx), %rsi
+    movq SGX_GPR_RDX(%rbx), %rdx
+    movq SGX_GPR_RCX(%rbx), %rcx     # not strictly needed
+    movq SGX_GPR_R8(%rbx),  %r8      # not strictly needed
+    movq SGX_GPR_R9(%rbx),  %r9      # not strictly needed
+    movq SGX_GPR_R10(%rbx), %r10     # not strictly needed
+    movq SGX_GPR_R12(%rbx), %r12     # not strictly needed
+    movq SGX_GPR_R13(%rbx), %r13     # not strictly needed
+    movq SGX_GPR_R14(%rbx), %r14     # not strictly needed
+    movq SGX_GPR_R15(%rbx), %r15     # not strictly needed
+    movq SGX_GPR_RBP(%rbx), %rbp     # not strictly needed
+    movq SGX_GPR_RSP(%rbx), %rsp
+    xorq %rbx, %rbx                  # for sanity
+
+    # go from SSA[1] to SSA[0] (more specifically, simply decrement CSSA from 1 to 0);
+    # must be careful after this ENCLU instruction because may be interrupted by new exceptions
+    movq $EDECCSSA, %rax
+    enclu
+
+    # finally jump to the stage-2 C exception handler
+.Lfinalize_context_after_deccssa_inst0:
+    jmp *%r11
+
     .cfi_endproc
 
 
@@ -602,6 +655,15 @@ sgx_ocall:
     pushq %rbx
     .cfi_offset %rbx, -24
 
+    # Disable aex-notify before stack switch
+    cmpq $0, %gs:SGX_READY_FOR_AEX_NOTIFY
+    je 1f
+    movq %gs:SGX_GPR, %rax
+    cmpb $0, SGX_GPR_AEXNOTIFY(%rax)
+    je 1f
+    movb $0, SGX_GPR_AEXNOTIFY(%rax)
+    movq $0x1, %gs:SGX_AEX_NOTIFY_FLAG  # set aexnotify_flag
+1:
     CHECK_IF_SIGNAL_STACK_IS_USED %rsp, .Lon_signal_stack_ocall, .Lout_of_signal_stack_ocall
 
 .Lout_of_signal_stack_ocall:
@@ -757,6 +819,16 @@ sgx_ocall:
     cmpq $PAL_EVENT_NUM_BOUND, %rsi
     jb 2f
 
+1:
+    # check if it is ready to enable AEX-Notify
+    cmpq $0, %gs:SGX_READY_FOR_AEX_NOTIFY
+    je 1f
+    # check if aexnotify is disabled during ocall-exit
+    cmpq $0, %gs:SGX_AEX_NOTIFY_FLAG
+    je 1f
+    movq $0, %gs:SGX_AEX_NOTIFY_FLAG
+    movq %gs:SGX_GPR, %rdi
+    movb $0x1, SGX_GPR_AEXNOTIFY(%rdi)
 1:
     # there was no event, simply call _restore_sgx_context(uc, xsave_area)
     movq %rsp, %rdi
@@ -768,6 +840,8 @@ sgx_ocall:
 
 2:
     # there was some event, call _PalHandleExternalEvent(event, uc, xregs_state)
+    # Note we don't check AEX-Notify here. If we disable it during ocall-exit and 
+    # the execution comes below code, the aex-notify will be enabled after event handling
 
     # clear the Alignment Check flag to prevent #AC-fault side channel
     pushfq

diff --git a/pal/src/host/linux-sgx/host_entry.S b/pal/src/host/linux-sgx/host_entry.S
@@ -44,7 +44,6 @@ sgx_ecall:
     enclu
 
     # currently only ECALL_THREAD_RESET returns
-.Lafter_resume:
     popq %r15
     .cfi_adjust_cfa_offset -8
     popq %r14
@@ -70,9 +69,9 @@ async_exit_pointer:
     # increment per-thread AEX counter for stats
     lock incq %gs:PAL_HOST_TCB_AEX_CNT
 
-#ifdef DEBUG
-    # Inform that we are in AEX profiling code
-    movb $1, %gs:PAL_HOST_TCB_IN_AEX_PROF
+    # Inform that we are in AEX code
+    movb $1, %gs:PAL_HOST_TCB_IN_AEX
+
     # Save ERESUME parameters
     pushq %rax
     .cfi_adjust_cfa_offset 8
@@ -84,11 +83,17 @@ async_exit_pointer:
     # Align stack (required by System V AMD64 ABI)
     movq %rsp, %rbp
     .cfi_def_cfa_register %rbp
+    subq $RED_ZONE_SIZE, %rsp
     andq $~0xF, %rsp
 
+#ifdef DEBUG
     # Call sgx_profile_sample_aex with %rdi = TCS
     movq %rbx, %rdi
     call sgx_profile_sample_aex
+#endif
+
+    # Check if there are sync/async signals pending and invoke in-enclave stage-1 handler if any
+    call sgx_handle_aex_signal
 
     # Restore stack
     movq %rbp, %rsp
@@ -101,18 +106,22 @@ async_exit_pointer:
     .cfi_adjust_cfa_offset -8
     popq %rax
     .cfi_adjust_cfa_offset -8
-    movb $0, %gs:PAL_HOST_TCB_IN_AEX_PROF
-#endif
 
-    .cfi_endproc
+    movb $0, %gs:PAL_HOST_TCB_IN_AEX
+
+    # In case of normal ERESUME, RDI is not used;
+    # In case of ERESUME-morphed-into-EENTER, RDI is external event in flow .Lcssa1_exception
+    movq $PAL_EVENT_INTERRUPTED, %rdi
 
     # fall-through to ERESUME
+    .cfi_endproc
 
     .global eresume_pointer
     .type eresume_pointer, @function
 
 eresume_pointer:
-    enclu   # perform ERESUME
+    # perform ERESUME (RAX already contains "ERESUME" because that's what AEX hardware flow does)
+    enclu
 
     .global async_exit_pointer_end
     .type async_exit_pointer_end, @function
@@ -123,8 +132,52 @@ async_exit_pointer_end:
     .type sgx_raise, @function
 
 sgx_raise:
+    .cfi_startproc
     leaq .Lafter_resume(%rip), %rdx
-    jmp .Ldo_ecall_callee_save
+
+    # other arguments: RDI - event (sync or async signal)
+
+    # below logic is the same as for sgx_ecall(), see comments for that function
+    pushq %rbx
+    .cfi_adjust_cfa_offset 8
+    pushq %rbp
+    .cfi_adjust_cfa_offset 8
+    pushq %r12
+    .cfi_adjust_cfa_offset 8
+    pushq %r13
+    .cfi_adjust_cfa_offset 8
+    pushq %r14
+    .cfi_adjust_cfa_offset 8
+    pushq %r15
+    .cfi_adjust_cfa_offset 8
+
+    lock incq %gs:PAL_HOST_TCB_EENTER_CNT
+    movq %gs:PAL_HOST_TCB_TCS, %rbx
+    leaq async_exit_pointer(%rip), %rcx
+    movq $EENTER, %rax
+
+    .global sgx_raise_eenter_instr
+    .type sgx_raise_eenter_instr, @function
+
+sgx_raise_eenter_instr:
+    enclu
+
+.Lafter_resume:
+    popq %r15
+    .cfi_adjust_cfa_offset -8
+    popq %r14
+    .cfi_adjust_cfa_offset -8
+    popq %r13
+    .cfi_adjust_cfa_offset -8
+    popq %r12
+    .cfi_adjust_cfa_offset -8
+    popq %rbp
+    .cfi_adjust_cfa_offset -8
+    popq %rbx
+    .cfi_adjust_cfa_offset -8
+    retq
+    .cfi_endproc
+
 
 .Lsgx_entry:
     # arguments: RDI - code, RSI - ms

diff --git a/pal/src/host/linux-sgx/host_exception.c b/pal/src/host/linux-sgx/host_exception.c
@@ -93,9 +93,9 @@ static bool interrupted_in_enclave(struct ucontext* uc) {
     return rip >= (unsigned long)async_exit_pointer && rip < (unsigned long)async_exit_pointer_end;
 }
 
-static bool interrupted_in_aex_profiling(void) {
-    return pal_get_host_tcb()->is_in_aex_profiling != 0;
-}
+static bool interrupted_in_aex(void) {
+    return pal_get_host_tcb()->is_in_aex != 0;
+ }
 
 static void handle_sync_signal(int signum, siginfo_t* info, struct ucontext* uc) {
     enum pal_event event = signal_to_pal_event(signum);
@@ -107,10 +107,20 @@ static void handle_sync_signal(int signum, siginfo_t* info, struct ucontext* uc)
         for (size_t i = 0; i < g_rpc_queue->rpc_threads_cnt; i++)
             DO_SYSCALL(tkill, g_rpc_queue->rpc_threads[i], SIGUSR2);
 
+    if (event == PAL_EVENT_MEMFAULT &&
+            ucontext_get_ip(uc) == (unsigned long)&sgx_raise_eenter_instr) {
+        /* this is #GP on EENTER instruction during sgx_raise() -- means that some async signal
+         * arrived while the enclave thread executes in CSSA=1 (stage-1 exception handler); we
+         * should ignore this fault by skipping EENTER, see also sgx_handle_aex_signal() */
+        ucontext_set_ip(uc, ucontext_get_ip(uc) + /*sizeof(ENCLU)=*/3);
+        return;
+    }
+
     if (interrupted_in_enclave(uc)) {
-        /* exception happened in app/LibOS/trusted PAL code, handle signal inside enclave */
+        /* exception happened in app/LibOS/trusted PAL code, mark to handle signal inside enclave */
+        assert(pal_get_host_tcb()->aex_sync_event == PAL_EVENT_NO_EVENT);
+        pal_get_host_tcb()->aex_sync_event = event;
         pal_get_host_tcb()->sync_signal_cnt++;
-        sgx_raise(event);
         return;
     }
 
@@ -158,15 +168,19 @@ static void handle_async_signal(int signum, siginfo_t* info, struct ucontext* uc
         for (size_t i = 0; i < g_rpc_queue->rpc_threads_cnt; i++)
             DO_SYSCALL(tkill, g_rpc_queue->rpc_threads[i], SIGUSR2);
 
-    if (interrupted_in_enclave(uc) || interrupted_in_aex_profiling()) {
-        /* signal arrived while in app/LibOS/trusted PAL code or when handling another AEX, handle
-         * signal inside enclave */
+    assert(event == PAL_EVENT_INTERRUPTED || event == PAL_EVENT_QUIT);
+
+    if (interrupted_in_enclave(uc) || interrupted_in_aex()) {
+        /* signal arrived while in app/LibOS/trusted PAL code or when handling another AEX, mark to
+         * handle signal inside enclave */
+        if (pal_get_host_tcb()->aex_async_event != PAL_EVENT_QUIT) {
+            /* Do not overwrite `PAL_EVENT_QUIT`. See explanation below. */
+            pal_get_host_tcb()->aex_async_event = event;
+        }
         pal_get_host_tcb()->async_signal_cnt++;
-        sgx_raise(event);
         return;
     }
 
-    assert(event == PAL_EVENT_INTERRUPTED || event == PAL_EVENT_QUIT);
     if (pal_get_host_tcb()->last_async_event != PAL_EVENT_QUIT) {
         /* Do not overwrite `PAL_EVENT_QUIT`. The only other possible event here is
          * `PAL_EVENT_INTERRUPTED`, which is basically a no-op (just makes sure that a thread
@@ -188,6 +202,40 @@ static void handle_dummy_signal(int signum, siginfo_t* info, struct ucontext* uc
     /* we need this handler to interrupt blocking syscalls in RPC threads */
 }
 
+/* The handle_sync_signal() and handle_async_signal() functions, executed in signal-handling
+ * context, added sync/async signals to process (that happened during enclave-thread execution)
+ * -- now the regular context must inform the enclave about these events. This function is
+ * potentially noreturn -- if there is at least one signal, and the enclave is ready to handle it,
+ * then the call to sgx_raise() never returns. Only one of potentially two signals (one sync and
+ * one async) will be added by this function, because sgx_raise() doesn't return; the hope is that
+ * the second (async) signal will be added on some later AEX. Also note that new sync signals
+ * cannot occur while in this function, but new async signals can occur (since we are in regular
+ * context and cannot block async signals), thus handling async signals must be aware of concurrent
+ * signal handling code. */
+void sgx_handle_aex_signal(void) {
+    if (pal_get_host_tcb()->aex_sync_event != PAL_EVENT_NO_EVENT) {
+        /* sync event must always be consumed by the enclave (there is no scenario where in-enclave
+         * stage-1 handling of another sync/async event would generate a sync event) */
+        enum pal_event event = pal_get_host_tcb()->aex_sync_event;
+        pal_get_host_tcb()->aex_sync_event = PAL_EVENT_NO_EVENT;
+        sgx_raise(event);
+        return;
+    }
+
+    enum pal_event event = __atomic_exchange_n(&pal_get_host_tcb()->aex_async_event,
+            PAL_EVENT_NO_EVENT, __ATOMIC_RELAXED);
+    if (event != PAL_EVENT_NO_EVENT) {
+        /* if async event is consumed by the enclave, then below sgx_raise() does not return;
+         * otherwise it means that the enclave was already in the middle of stage-1 handler and
+         * could not consume this async event: simply ignore for now; the very next AEX will try to
+         * raise this async event again */
+        sgx_raise(event);
+        enum pal_event no_event = PAL_EVENT_NO_EVENT;
+        __atomic_compare_exchange_n(&pal_get_host_tcb()->aex_async_event, &no_event, event,
+                                    /*weak=*/false, __ATOMIC_RELAXED, __ATOMIC_RELAXED);
+    }
+}
+
 int sgx_signal_setup(void) {
     int ret;
 

diff --git a/pal/src/host/linux-sgx/pal_exception.c b/pal/src/host/linux-sgx/pal_exception.c
@@ -18,6 +18,16 @@
 
 #define ADDR_IN_PAL(addr) ((void*)(addr) > TEXT_START && (void*)(addr) < TEXT_END)
 
+__attribute_no_sanitize_address
+noreturn static void apply_mitigation_handler_and_restore_sgx_context(sgx_cpu_context_t* uc, PAL_XREGS_STATE* xregs_state)
+{
+    assert(g_aex_notify_enabled);
+    // TODO - apply mitigation and restore context
+    // Right now, directly restore the context and contine execution
+     _restore_sgx_context(uc, xregs_state);
+}
+
+
 /* Restore an sgx_cpu_context_t as generated by .Lhandle_exception. Execution will
  * continue as specified by the rip in the context. */
 __attribute_no_sanitize_address
@@ -31,8 +41,10 @@ noreturn static void restore_sgx_context(sgx_cpu_context_t* uc, PAL_XREGS_STATE*
     uintptr_t sig_stack_high = GET_ENCLAVE_TCB(sig_stack_high);
     asan_unpoison_current_stack(sig_stack_low, sig_stack_high - sig_stack_low);
 #endif
-
-    _restore_sgx_context(uc, xregs_state);
+    if (g_aex_notify_enabled && GET_ENCLAVE_TCB(ready_for_aex_notify))
+        apply_mitigation_handler_and_restore_sgx_context(uc, xregs_state);
+    else
+        _restore_sgx_context(uc, xregs_state);
 }
 
 noreturn static void restore_pal_context(sgx_cpu_context_t* uc, PAL_CONTEXT* ctx) {