Skip to content

Commit

Permalink
restorer: shstk: implement shadow stack restore
Browse files Browse the repository at this point in the history
The restore of a task with shadow stack enabled adds these steps:

* switch from the default shadow stack to a temporary shadow stack
  allocated in the premmaped area
* unmap CRIU mappings; nothing changed here, but it's important that
  CRIU mappings can be removed only after switching to a temporary
  shadow stack
* create shadow stack VMA with map_shadow_stack()
* restore shadow stack contents with wrss
* switch to "real" shadow stack
* lock shadow stack features

Signed-off-by: Mike Rapoport (IBM) <[email protected]>
  • Loading branch information
rppt committed Nov 30, 2023
1 parent 3f48321 commit 394dab0
Show file tree
Hide file tree
Showing 6 changed files with 271 additions and 7 deletions.
1 change: 1 addition & 0 deletions compel/arch/x86/plugins/std/syscalls/syscall_64.tbl
Original file line number Diff line number Diff line change
Expand Up @@ -118,3 +118,4 @@ __NR_openat2 437 sys_openat2 (int dirfd, char *pathname, struct open_how *how
__NR_pidfd_getfd 438 sys_pidfd_getfd (int pidfd, int targetfd, unsigned int flags)
__NR_rseq 334 sys_rseq (void *rseq, uint32_t rseq_len, int flags, uint32_t sig)
__NR_membarrier 324 sys_membarrier (int cmd, unsigned int flags, int cpu_id)
__NR_map_shadow_stack 453 sys_map_shadow_stack (unsigned long addr, unsigned long size, unsigned int flags)
23 changes: 21 additions & 2 deletions compel/arch/x86/src/lib/include/uapi/asm/sigframe.h
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,24 @@ static inline void rt_sigframe_erase_sigset(struct rt_sigframe *sigframe)
#define USER32_CS 0x23

/* clang-format off */
/*
* rst_sigreturn in resorer is noninline call which adds an entry to the
* shadow stack above the sigframe token;
* if shadow stack is enabled, increment the shadow stack pointer to remove
* that entry
*/
#define ARCH_SHSTK_POP() \
asm volatile( \
"xor %%rax, %%rax\n" \
"rdsspq %%rax\n" \
"cmpq $0, %%rax\n" \
"jz 1f\n" \
"movq $1, %%rax\n" \
"incsspq %%rax\n" \
"1:\n" \
: : \
: "rax")

#define ARCH_RT_SIGRETURN_NATIVE(new_sp) \
asm volatile( \
"movq %0, %%rax \n" \
Expand Down Expand Up @@ -205,9 +223,10 @@ static inline void rt_sigframe_erase_sigset(struct rt_sigframe *sigframe)

#define ARCH_RT_SIGRETURN_RST(new_sp, rt_sigframe) \
do { \
if ((rt_sigframe)->is_native) \
if ((rt_sigframe)->is_native) { \
ARCH_SHSTK_POP(); \
ARCH_RT_SIGRETURN_NATIVE(new_sp); \
else \
} else \
ARCH_RT_SIGRETURN_COMPAT(new_sp); \
} while (0)

Expand Down
204 changes: 199 additions & 5 deletions criu/arch/x86/include/asm/shstk.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,11 @@
#endif

/* arch/x86/include/uapi/asm/prctl.h */
#define ARCH_SHSTK_ENABLE 0x5001
#define ARCH_SHSTK_ENABLE 0x5001
#define ARCH_SHSTK_DISABLE 0x5002
#define ARCH_SHSTK_LOCK 0x5003
#define ARCH_SHSTK_UNLOCK 0x5004
#define ARCH_SHSTK_STATUS 0x5005
#define ARCH_SHSTK_UNLOCK 0x5004
#define ARCH_SHSTK_STATUS 0x5005

#define ARCH_SHSTK_SHSTK (1ULL << 0)
#define ARCH_SHSTK_WRSS (1ULL << 1)
Expand Down Expand Up @@ -66,13 +66,207 @@ int arch_shstk_prepare(struct pstree_item *item, CoreEntry *core,
struct task_restore_args *ta);
#define arch_shstk_prepare arch_shstk_prepare

#if 0
int arch_shstk_unlock(struct pstree_item *item, CoreEntry *core, pid_t pid);
#define arch_shstk_unlock arch_shstk_unlock

int arch_shstk_trampoline(struct pstree_item *item, CoreEntry *core,
int (*func)(void *arg), void *arg);

Check warning on line 73 in criu/arch/x86/include/asm/shstk.h

View workflow job for this annotation

GitHub Actions / build

#define arch_shstk_trampoline arch_shstk_trampoline
#endif

#ifdef CR_NOGLIBC

#include <compel/plugins/std/syscall.h>
#include <compel/cpu.h>
#include "vma.h"

#define SHSTK_BUSY_BIT (1UL << 0) /* BIT(0) */

Check warning on line 82 in criu/arch/x86/include/asm/shstk.h

View workflow job for this annotation

GitHub Actions / build


static inline int shstk_map(unsigned long addr, unsigned long size)
{
long shstk = sys_map_shadow_stack(addr, size, SHADOW_STACK_SET_TOKEN);

if (shstk < 0) {
pr_err("Failed to map shadow stack at %lx: %ld\n", addr, shstk);
return -1;
}

if (shstk != addr) {
pr_err("Shadow stack address mismatch: need %lx, got %lx\n", addr, shstk);
return -1;
}

pr_info("Created shadow stack at %lx\n", shstk);

return 0;
}

/* clang-format off */
static inline unsigned long get_ssp(void)
{
unsigned long ssp;

asm volatile("rdsspq %0" : "=r"(ssp) :: );

return ssp;
}

static inline void wrssq(unsigned long addr, unsigned long val)
{
asm volatile("wrssq %1, (%0)" :: "r"(addr), "r"(val) : "memory");
}
/* clang-format off */

static always_inline void shstk_switch_ssp(unsigned long new_ssp)
{
unsigned long old_ssp = get_ssp();

asm volatile("rstorssp (%0)\n" :: "r"(new_ssp));
asm volatile("saveprevssp");

pr_debug("changed ssp from %lx to %lx\n", old_ssp, new_ssp);
}

/*
* Disable writes to the shadow stack and lock it's disable/enable control
*/
static inline int shstk_finalize(void)
{
int ret = 0;

ret = sys_arch_prctl(ARCH_SHSTK_DISABLE, ARCH_SHSTK_WRSS);
if (ret) {
pr_err("Failed to disable writes to shadow stack\n");
return ret;
}

ret = sys_arch_prctl(ARCH_SHSTK_LOCK, ARCH_SHSTK_SHSTK);
if (ret)
pr_err("Failed to lock shadow stack controls\n");

return ret;
}

/*
* Restore contents of the shadow stack and set shadow stack pointer
*/
static always_inline int shstk_restore(struct rst_shstk_info *cet)
{
unsigned long *shstk_data = (unsigned long *)cet->premmaped_addr;
unsigned long ssp = cet->vma_start + cet->vma_size - 8;
unsigned long shstk_top = cet->vma_size / 8 - 1;
unsigned long val;
long ret;

if (!(cet->cet & ARCH_SHSTK_SHSTK))
return 0;

if (shstk_map(cet->vma_start, cet->vma_size))
return -1;

/*
* Switch shadow stack from temporary location to the actual task's
* shadow stack VMA
*/
shstk_switch_ssp(ssp);

/* restore shadow stack contents */
for (; ssp >= cet->ssp; ssp -= 8, shstk_top--)
wrssq(ssp, shstk_data[shstk_top]);

/*
* Add tokens for sigreturn frame and for switch of the shadow stack.
* The sigreturn token will be checked by the kernel during
* processing of sigreturn
* The token for stack switch is required by rstorssp and
* saveprevssp semantics
*/

/* token for sigreturn frame */
val = ALIGN_DOWN(cet->ssp, 8) | SHSTK_DATA_BIT;
wrssq(ssp, val);

/* shadow stack switch token */
val = ssp | SHSTK_BUSY_BIT;
ssp -= 8;
wrssq(ssp, val);

/* reset shadow stack pointer to the proper location */
shstk_switch_ssp(ssp);

ret = sys_munmap(shstk_data, cet->vma_size + PAGE_SIZE);
if (ret < 0) {
pr_err("Failed to unmap premmaped shadow stack\n");
return ret;
}

return shstk_finalize();
}
#define arch_shstk_restore shstk_restore

/*
* Disable shadow stack
*/
static inline int shstk_disable(void)
{
int ret;

ret = sys_arch_prctl(ARCH_SHSTK_DISABLE, ARCH_SHSTK_WRSS);
if (ret) {
pr_err("Failed to disable writes to shadow stack\n");
return ret;
}

ret = sys_arch_prctl(ARCH_SHSTK_DISABLE, ARCH_SHSTK_SHSTK);
if (ret) {
pr_err("Failed to disable shadow stack\n");
return ret;
}

ret = sys_arch_prctl(ARCH_SHSTK_LOCK, ARCH_SHSTK_SHSTK);
if (ret)
pr_err("Failed to lock shadow stack controls\n");

return 0;
}

/*
* Switch to temporary shadow stack
*/
static always_inline int shstk_switch_to_restorer(struct rst_shstk_info *cet)
{
unsigned long ssp;
long ret;

if (!(cet->cet & ARCH_SHSTK_SHSTK))
return 0;

ret = sys_munmap((void *)cet->tmp_shstk, PAGE_SIZE);
if (ret < 0) {
pr_err("Failed to unmap area for temporary shadow stack\n");
return -1;
}

ret = shstk_map(cet->tmp_shstk, PAGE_SIZE);
if (ret < 0)
return -1;

/*
* Switch shadow stack from the default created by the kernel to a
* temporary shadow stack allocated in the premmaped area
*/
ssp = cet->tmp_shstk + PAGE_SIZE - 8;
shstk_switch_ssp(ssp);

ret = sys_arch_prctl(ARCH_SHSTK_ENABLE, ARCH_SHSTK_WRSS);
if (ret) {
pr_err("Failed to enable writes to shadow stack\n");
return ret;
}

return 0;
}
#define arch_shstk_switch_to_restorer shstk_switch_to_restorer

#endif /* CR_NOGLIBC */

#endif /* __CR_ASM_SHSTK_H__ */
16 changes: 16 additions & 0 deletions criu/include/restorer.h
Original file line number Diff line number Diff line change
Expand Up @@ -339,4 +339,20 @@ enum {
#define __r_sym(name) restorer_sym##name
#define restorer_sym(rblob, name) (void *)(rblob + __r_sym(name))

#ifndef arch_shstk_switch_to_restorer
static inline int arch_shstk_switch_to_restorer(struct rst_shstk_info *shstk)
{
return 0;
}
#define arch_shstk_switch_to_restorer arch_shstk_switch_to_restorer
#endif

#ifndef arch_shstk_restore
static inline int arch_shstk_restore(struct rst_shstk_info *shstk)
{
return 0;
}
#define arch_shstk_restore arch_shstk_restore
#endif

#endif /* __CR_RESTORER_H__ */
5 changes: 5 additions & 0 deletions criu/pie/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,11 @@ ifeq ($(ARCH),mips)
ccflags-y += -mno-abicalls -fno-pic
endif

# -mshstk required for CET instructions
ifeq ($(ARCH),x86)
ccflags-y += -mshstk
endif

LDS := compel/arch/$(ARCH)/scripts/compel-pack.lds.S

restorer-obj-y += parasite-vdso.o ./$(ARCH_DIR)/vdso-pie.o
Expand Down
29 changes: 29 additions & 0 deletions criu/pie/restorer.c
Original file line number Diff line number Diff line change
Expand Up @@ -752,6 +752,10 @@ __visible long __export_restore_thread(struct thread_restore_args *args)
goto core_restore_end;
}

/* restore original shadow stack */
if (arch_shstk_restore(&args->shstk))
goto core_restore_end;

/* All signals must be handled by thread leader */
ksigfillset(&to_block);
ret = sys_sigprocmask(SIG_SETMASK, &to_block, NULL, sizeof(k_rtsigset_t));
Expand Down Expand Up @@ -1672,6 +1676,9 @@ __visible long __export_restore_task(struct task_restore_args *args)
pr_debug("lazy-pages: uffd %d\n", args->uffd);
}

if (arch_shstk_switch_to_restorer(&args->shstk))
goto core_restore_end;

/*
* Park vdso/vvar in a safe place if architecture doesn't support
* mapping them with arch_prctl().
Expand Down Expand Up @@ -1723,6 +1730,13 @@ __visible long __export_restore_task(struct task_restore_args *args)
if (vma_entry->start > vma_entry->shmid)
break;

/*
* shadow stack VMAs cannot be remapped, they must be
* recreated with map_shadow_stack system call
*/
if (vma_entry_is(vma_entry, VMA_AREA_SHSTK))
continue;

if (vma_remap(vma_entry, args->uffd))
goto core_restore_end;
}
Expand All @@ -1740,6 +1754,13 @@ __visible long __export_restore_task(struct task_restore_args *args)
if (vma_entry->start < vma_entry->shmid)
break;

/*
* shadow stack VMAs cannot be remapped, they must be
* recreated with map_shadow_stack system call
*/
if (vma_entry_is(vma_entry, VMA_AREA_SHSTK))
continue;

if (vma_remap(vma_entry, args->uffd))
goto core_restore_end;
}
Expand Down Expand Up @@ -2166,6 +2187,14 @@ __visible long __export_restore_task(struct task_restore_args *args)

futex_set_and_wake(&thread_inprogress, args->nr_threads);

/*
* Shadow stack of the leader can be locked only after all other
* threads were cloned, otherwise they may start with read-only
* shadow stack.
*/
if (arch_shstk_restore(&args->shstk))
goto core_restore_end;

restore_finish_stage(task_entries_local, CR_STATE_RESTORE_CREDS);

if (ret)
Expand Down

0 comments on commit 394dab0

Please sign in to comment.