Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[LibOS] Delay IPC leader notification until it has 0 connections #1588

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion libos/include/libos_ipc.h
Original file line number Diff line number Diff line change
Expand Up @@ -63,8 +63,10 @@ int init_ipc_worker(void);

/*!
* \brief Terminate the IPC worker thread.
*
* \param force Whether to force the termination of the IPC thread
*/
void terminate_ipc_worker(void);
void terminate_ipc_worker(bool force);

/*!
* \brief Establish a one-way IPC connection to another process.
Expand Down
33 changes: 32 additions & 1 deletion libos/src/ipc/libos_ipc_worker.c
Original file line number Diff line number Diff line change
Expand Up @@ -65,13 +65,19 @@ static ipc_callback ipc_callbacks[] = {
[IPC_MSG_FILE_LOCK_CLEAR_PID] = ipc_file_lock_clear_pid_callback,
};

static PAL_HANDLE g_leader_notifier;

static void ipc_leader_died_callback(void) {
/* This might happen legitimately e.g. if IPC leader is also our parent and does `wait` + `exit`
* If this is an erroneous disconnect it will be noticed when trying to communicate with
* the leader. */
log_debug("IPC leader disconnected");
}

static inline bool is_ipc_leader(void) {
return g_process_ipc_ids.self_vmid == STARTING_VMID;
}

static void disconnect_callbacks(struct libos_ipc_connection* conn) {
if (g_process_ipc_ids.leader_vmid == conn->vmid) {
ipc_leader_died_callback();
Expand Down Expand Up @@ -113,6 +119,10 @@ static void del_ipc_connection(struct libos_ipc_connection* conn) {
PalObjectDestroy(conn->handle);

free(conn);

if (is_ipc_leader() && g_ipc_connections_cnt == 0) {
PalEventSet(g_leader_notifier);
}
}

/*
Expand Down Expand Up @@ -386,6 +396,15 @@ static int create_ipc_worker(void) {
return ret;
}

/* IPC leader gets a notifier used in terminate_ipc_leader */
if (is_ipc_leader()) {
ret = PalEventCreate(&g_leader_notifier, /*init_signaled=*/false, /*auto_clear=*/false);
if (ret < 0) {
log_error("IPC leader: PalEventCreate() failed");
return pal_to_unix_errno(ret);
}
}

g_worker_thread = get_new_internal_thread();
if (!g_worker_thread) {
return -ENOMEM;
Expand All @@ -408,7 +427,17 @@ int init_ipc_worker(void) {
return create_ipc_worker();
}

void terminate_ipc_worker(void) {
/* Terminate the IPC worker. If 'force' is set, the IPC leader will not wait until it has 0
* connections but terminate immediately, otherwise it will wait until all child processes have
* died and there are 0 connections. 'force' will for example be set when the user sends a SIGTERM
* signal to the Gramine process.
*/
void terminate_ipc_worker(bool force) {
if (is_ipc_leader() && !force) {
PalEventClear(g_leader_notifier);
while (__atomic_load_n(&g_ipc_connections_cnt, __ATOMIC_ACQUIRE) > 0 &&
PalEventWait(g_leader_notifier, NULL) < 0);
}
set_pollable_event(&g_worker_thread->pollable_event);

while (__atomic_load_n(&g_clear_on_worker_exit, __ATOMIC_ACQUIRE)) {
Expand All @@ -419,4 +448,6 @@ void terminate_ipc_worker(void) {
g_worker_thread = NULL;
PalObjectDestroy(g_self_ipc_handle);
g_self_ipc_handle = NULL;
if (g_leader_notifier)
PalObjectDestroy(g_leader_notifier);
}
17 changes: 8 additions & 9 deletions libos/src/sys/libos_exit.c
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,7 @@
#include "libos_utils.h"
#include "pal.h"

static noreturn void libos_clean_and_exit(int exit_code) {
/*
* TODO: if we are the IPC leader, we need to either:
* 1) kill all other Gramine processes
* 2) wait for them to exit here, before we terminate the IPC helper
*/

static noreturn void libos_clean_and_exit(int exit_code, int term_signal) {
shutdown_sync_client();

struct libos_thread* async_thread = terminate_async_worker();
Expand Down Expand Up @@ -53,7 +47,11 @@ static noreturn void libos_clean_and_exit(int exit_code) {
*/
release_id(get_cur_thread()->tid);

terminate_ipc_worker();
/* Terminate the IPC worker and wait until all child processes have also terminated. However,
* if we received a SIGTERM then the IPC worker will be forcefully terminated without waiting
* for child processes. Once we exit, all child proceses will then also exit.
*/
terminate_ipc_worker(term_signal == SIGTERM);

log_debug("process %u exited with status %d", g_process_ipc_ids.self_vmid, exit_code);

Expand Down Expand Up @@ -147,7 +145,8 @@ noreturn void thread_exit(int error_code, int term_signal) {

/* At this point other threads might be still in the middle of an exit routine, but we don't
* care since the below will call `exit_group` eventually. */
libos_clean_and_exit(term_signal ? 128 + (term_signal & ~__WCOREDUMP_BIT) : error_code);
libos_clean_and_exit(term_signal ? 128 + (term_signal & ~__WCOREDUMP_BIT) : error_code,
term_signal);
}

static int mark_thread_to_die(struct libos_thread* thread, void* arg) {
Expand Down