Skip to content

Commit

Permalink
[LibOS] Delay IPC leader notification until it has 0 connections
Browse files Browse the repository at this point in the history
To avoid killing the IPC leader before all the child processes
have died, delay notifying the leader that it should terminate
until it has 0 connections.

Signed-off-by: Stefan Berger <[email protected]>
  • Loading branch information
stefanberger committed Oct 3, 2023
1 parent 9489939 commit c8b04ab
Showing 1 changed file with 38 additions and 5 deletions.
43 changes: 38 additions & 5 deletions libos/src/ipc/libos_ipc_worker.c
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,8 @@ static ipc_callback ipc_callbacks[] = {
[IPC_MSG_FILE_LOCK_CLEAR_PID] = ipc_file_lock_clear_pid_callback,
};

static PAL_HANDLE leader_notifier;

static void ipc_leader_died_callback(void) {
/* This might happen legitimately e.g. if IPC leader is also our parent and does `wait` + `exit`
* If this is an erroneous disconnect it will be noticed when trying to communicate with
Expand Down Expand Up @@ -106,13 +108,17 @@ static int add_ipc_connection(PAL_HANDLE handle, IDTYPE id) {
return 0;
}

static void del_ipc_connection(struct libos_ipc_connection* conn) {
static void del_ipc_connection(struct libos_ipc_connection* conn,
PAL_HANDLE *notifier) {
LISTP_DEL(conn, &g_ipc_connections, list);
g_ipc_connections_cnt--;

PalObjectDestroy(conn->handle);

free(conn);

if (notifier && g_ipc_connections_cnt == 0)
PalEventSet(*notifier);
}

/*
Expand Down Expand Up @@ -213,7 +219,7 @@ static int receive_ipc_messages(struct libos_ipc_connection* conn) {
return 0;
}

static noreturn void ipc_worker_main(void) {
static noreturn void ipc_worker_main(PAL_HANDLE *notifier) {
/* TODO: If we had a global array of connections (instead of a list) we wouldn't have to gather
* them all here in every loop iteration, but then deletion would be slower (but deletion should
* be rare). */
Expand Down Expand Up @@ -337,7 +343,7 @@ static noreturn void ipc_worker_main(void) {
if (ret == 1) {
/* Connection closed. */
disconnect_callbacks(conn);
del_ipc_connection(conn);
del_ipc_connection(conn, notifier);
continue;
}
if (ret < 0) {
Expand All @@ -351,12 +357,18 @@ static noreturn void ipc_worker_main(void) {
* more time - in case there are messages left to be read. */
if (ret_events[i] == PAL_WAIT_ERROR) {
disconnect_callbacks(conn);
del_ipc_connection(conn);
del_ipc_connection(conn, notifier);
}
}
}

out_die:

if (notifier) {
g_ipc_connections_cnt = 0;
PalEventSet(*notifier);
}

PalProcessExit(1);
}

Expand All @@ -370,7 +382,17 @@ static int ipc_worker_wrapper(void* arg) {
log_setprefix(libos_get_tcb());

log_debug("IPC worker started");
ipc_worker_main();

PAL_HANDLE *notifier = NULL;
if (g_process_ipc_ids.self_vmid == STARTING_VMID) {
notifier = &leader_notifier;
/* IPC leader gets a notifier used in terminate_ipc_leader */
if (PalEventCreate(notifier, false, false) < 0) {
log_error("PalEventCreate failed");
return -1;
}
}
ipc_worker_main(notifier);
/* Unreachable. */
}

Expand Down Expand Up @@ -409,12 +431,23 @@ int init_ipc_worker(void) {
}

void terminate_ipc_worker(void) {
if (g_process_ipc_ids.self_vmid == STARTING_VMID) {
uint64_t timeout_us = 100*1000;

PalEventClear(leader_notifier);
while (__atomic_load_n(&g_ipc_connections_cnt, __ATOMIC_ACQUIRE) > 0) {
PalEventWait(leader_notifier, &timeout_us);
if (!__atomic_load_n(&g_clear_on_worker_exit, __ATOMIC_ACQUIRE))
goto end;
}
}
set_pollable_event(&g_worker_thread->pollable_event);

while (__atomic_load_n(&g_clear_on_worker_exit, __ATOMIC_ACQUIRE)) {
CPU_RELAX();
}

end:
put_thread(g_worker_thread);
g_worker_thread = NULL;
PalObjectDestroy(g_self_ipc_handle);
Expand Down

0 comments on commit c8b04ab

Please sign in to comment.