Skip to content

kernel: workq: introduce work timeout: #88345

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions include/zephyr/kernel.h
Original file line number Diff line number Diff line change
Expand Up @@ -4168,6 +4168,16 @@ struct k_work_queue_config {
* essential thread.
*/
bool essential;

/** Controls whether work queue monitors work timeouts.
*
* If non-zero, and CONFIG_WORKQUEUE_WORK_TIMEOUT is enabled,
* the work queue will monitor the duration of each work item.
* If the work item handler takes longer than the specified
* time to execute, the work queue thread will be aborted, and
* an error will be logged if CONFIG_LOG is enabled.
*/
uint32_t work_timeout_ms;
};

/** @brief A structure used to hold work until it can be processed. */
Expand All @@ -4190,6 +4200,12 @@ struct k_work_q {

/* Flags describing queue state. */
uint32_t flags;

#if defined(CONFIG_WORKQUEUE_WORK_TIMEOUT)
struct _timeout workto;
struct k_work *work;
k_timeout_t work_timeout;
#endif /* defined(CONFIG_WORKQUEUE_WORK_TIMEOUT) */
};

/* Provide the implementation for inline functions declared above */
Expand Down
16 changes: 16 additions & 0 deletions kernel/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -574,6 +574,14 @@ endmenu

rsource "Kconfig.obj_core"

config WORKQUEUE_WORK_TIMEOUT
bool "Support workqueue work timeout monitoring"
help
If enabled, the workqueue will monitor the duration of each work item.
If the work item handler takes longer than the specified time to
execute, the work queue thread will be aborted, and an error will be
logged.

menu "System Work Queue Options"
config SYSTEM_WORKQUEUE_STACK_SIZE
int "System workqueue stack size"
Expand All @@ -600,6 +608,14 @@ config SYSTEM_WORKQUEUE_NO_YIELD
cooperative and a sequence of work items is expected to complete
without yielding.

config SYSTEM_WORKQUEUE_WORK_TIMEOUT_MS
int "Select system work queue work timeout in milliseconds"
default 5000 if ASSERT
default 0
help
Set to 0 to disable work timeout for system workqueue. Option
has no effect if WORKQUEUE_WORK_TIMEOUT is not enabled.

endmenu

menu "Barrier Operations"
Expand Down
1 change: 1 addition & 0 deletions kernel/system_work_q.c
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ static int k_sys_work_q_init(void)
.name = "sysworkq",
.no_yield = IS_ENABLED(CONFIG_SYSTEM_WORKQUEUE_NO_YIELD),
.essential = true,
.work_timeout_ms = CONFIG_SYSTEM_WORKQUEUE_WORK_TIMEOUT_MS,
};

k_work_queue_start(&k_sys_work_q,
Expand Down
63 changes: 63 additions & 0 deletions kernel/work.c
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@
#include <errno.h>
#include <ksched.h>
#include <zephyr/sys/printk.h>
#include <zephyr/logging/log.h>

LOG_MODULE_DECLARE(os, CONFIG_KERNEL_LOG_LEVEL);

static inline void flag_clear(uint32_t *flagp,
uint32_t bit)
Expand Down Expand Up @@ -599,6 +602,50 @@ bool k_work_cancel_sync(struct k_work *work,
return pending;
}

#if defined(CONFIG_WORKQUEUE_WORK_TIMEOUT)
static void workto_handler(struct _timeout *to)
{
struct k_work_q *queue = CONTAINER_OF(to, struct k_work_q, workto);
const char *name;
struct k_work *work;
k_work_handler_t handler;

name = k_thread_name_get(&queue->thread);

K_SPINLOCK(&lock) {
work = queue->work;
handler = work->handler;
}

if (name != NULL) {
LOG_ERR("queue %s blocked by work %p with handler %p", name, work, handler);
} else {
LOG_ERR("queue %p blocked by work %p with handler %p", queue, work, handler);
}
Comment on lines +620 to +624
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Still not crazy about this, as it needlessly duplicates a nearly identical string in the string table.

Copy link
Collaborator Author

@bjarki-andreasen bjarki-andreasen Apr 11, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How would you solve it? the string is about 30 bytes, and stored in ROM, the added operations of conditionally copying the thread name or a queue pointer, to a RAM buffer, to then pass to the LOG_ERR could easily take up the same or more ROM (while also adding complexity, the worst kind, involving null terminated strings)

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(LOG_ERR is probably adding code as well given the use of VARGS)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How would you solve it?

Using the code suggestion here:
#88345 (comment)


k_thread_abort(&queue->thread);
}

static void work_timeout_start_locked(struct k_work_q *queue, struct k_work *work)
{
if (K_TIMEOUT_EQ(queue->work_timeout, K_FOREVER)) {
return;
}

queue->work = work;
z_add_timeout(&queue->workto, workto_handler, queue->work_timeout);
}

static void work_timeout_stop_locked(struct k_work_q *queue)
{
if (K_TIMEOUT_EQ(queue->work_timeout, K_FOREVER)) {
return;
}

z_abort_timeout(&queue->workto);
}
#endif /* defined(CONFIG_WORKQUEUE_WORK_TIMEOUT) */

/* Loop executed by a work queue thread.
*
* @param workq_ptr pointer to the work queue structure
Expand Down Expand Up @@ -678,6 +725,10 @@ static void work_queue_main(void *workq_ptr, void *p2, void *p3)
continue;
}

#if defined(CONFIG_WORKQUEUE_WORK_TIMEOUT)
work_timeout_start_locked(queue, work);
#endif /* defined(CONFIG_WORKQUEUE_WORK_TIMEOUT) */

k_spin_unlock(&lock, key);

__ASSERT_NO_MSG(handler != NULL);
Expand All @@ -690,6 +741,10 @@ static void work_queue_main(void *workq_ptr, void *p2, void *p3)
*/
key = k_spin_lock(&lock);

#if defined(CONFIG_WORKQUEUE_WORK_TIMEOUT)
work_timeout_stop_locked(queue);
#endif /* defined(CONFIG_WORKQUEUE_WORK_TIMEOUT) */

flag_clear(&work->flags, K_WORK_RUNNING_BIT);
if (flag_test(&work->flags, K_WORK_FLUSHING_BIT)) {
finalize_flush_locked(work);
Expand Down Expand Up @@ -761,6 +816,14 @@ void k_work_queue_start(struct k_work_q *queue,
queue->thread.base.user_options |= K_ESSENTIAL;
}

#if defined(CONFIG_WORKQUEUE_WORK_TIMEOUT)
if ((cfg != NULL) && (cfg->work_timeout_ms)) {
queue->work_timeout = K_MSEC(cfg->work_timeout_ms);
} else {
queue->work_timeout = K_FOREVER;
}
#endif /* defined(CONFIG_WORKQUEUE_WORK_TIMEOUT) */

k_thread_start(&queue->thread);

SYS_PORT_TRACING_OBJ_FUNC_EXIT(k_work_queue, start, queue);
Expand Down
2 changes: 1 addition & 1 deletion submanifests/optional.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ manifest:
groups:
- optional
- name: zephyr-lang-rust
revision: d4f9036a88e53080bf2b186afa5289f9c77a0f73
revision: pull/86/head
path: modules/lang/rust
remote: upstream
groups:
Expand Down
8 changes: 8 additions & 0 deletions tests/kernel/workq/work_timeout/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# SPDX-License-Identifier: Apache-2.0

cmake_minimum_required(VERSION 3.20.0)
find_package(Zephyr REQUIRED HINTS $ENV{ZEPHYR_BASE})
project(work_timeout)

FILE(GLOB app_sources src/test.c)
target_sources(app PRIVATE ${app_sources})
1 change: 1 addition & 0 deletions tests/kernel/workq/work_timeout/prj.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
CONFIG_ZTEST=y
84 changes: 84 additions & 0 deletions tests/kernel/workq/work_timeout/src/test.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
/*
* Copyright (c) 2025 Nordic Semiconductor ASA
*
* SPDX-License-Identifier: Apache-2.0
*/

#include <zephyr/kernel.h>
#include <zephyr/ztest.h>

#define TEST_WORK_TIMEOUT_MS 100
#define TEST_WORK_DURATION_MS (TEST_WORK_TIMEOUT_MS / 2)
#define TEST_WORK_DELAY K_MSEC(TEST_WORK_DURATION_MS * 6)
#define TEST_WORK_BLOCKING_DELAY K_MSEC(TEST_WORK_TIMEOUT_MS * 2)

static struct k_work_q test_workq;
static K_KERNEL_STACK_DEFINE(test_workq_stack, CONFIG_MAIN_STACK_SIZE);

static void test_work_handler(struct k_work *work)
{
k_msleep(TEST_WORK_DURATION_MS);
}

static K_WORK_DEFINE(test_work0, test_work_handler);
static K_WORK_DEFINE(test_work1, test_work_handler);
static K_WORK_DEFINE(test_work2, test_work_handler);
static K_WORK_DEFINE(test_work3, test_work_handler);

static void test_work_handler_blocking(struct k_work *work)
{
k_sleep(K_FOREVER);
}

static K_WORK_DEFINE(test_work_blocking, test_work_handler_blocking);

static void *test_setup(void)
{
const struct k_work_queue_config config = {
.name = "sysworkq",
.no_yield = false,
.essential = false,
.work_timeout_ms = TEST_WORK_TIMEOUT_MS,
};

k_work_queue_start(&test_workq,
test_workq_stack,
K_KERNEL_STACK_SIZEOF(test_workq_stack),
0,
&config);

return NULL;
}

ZTEST_SUITE(work_timeout, NULL, test_setup, NULL, NULL, NULL);

ZTEST(work_timeout, test_work)
{
int ret;

/* Submit multiple items which take less time than TEST_WORK_TIMEOUT_MS each */
zassert_equal(k_work_submit_to_queue(&test_workq, &test_work0), 1);
zassert_equal(k_work_submit_to_queue(&test_workq, &test_work1), 1);
zassert_equal(k_work_submit_to_queue(&test_workq, &test_work2), 1);
zassert_equal(k_work_submit_to_queue(&test_workq, &test_work3), 1);

/*
* Submitted items takes longer than TEST_WORK_TIMEOUT_MS, but each item takes
* less time than TEST_WORK_DELAY so workqueue thread will not be aborted.
*/
zassert_equal(k_thread_join(&test_workq.thread, TEST_WORK_DELAY), -EAGAIN);

/* Submit single item which takes longer than TEST_WORK_TIMEOUT_MS */
zassert_equal(k_work_submit_to_queue(&test_workq, &test_work_blocking), 1);

/*
* Submitted item shall cause the work to time out and the workqueue thread be
* aborted if CONFIG_WORKQUEUE_WORK_TIMEOUT is enabled.
*/
ret = k_thread_join(&test_workq.thread, TEST_WORK_BLOCKING_DELAY);
if (IS_ENABLED(CONFIG_WORKQUEUE_WORK_TIMEOUT)) {
zassert_equal(ret, 0);
} else {
zassert_equal(ret, -EAGAIN);
}
}
10 changes: 10 additions & 0 deletions tests/kernel/workq/work_timeout/testcase.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
tests:
kernel.workqueue.work_timeout:
tags: kernel
min_flash: 64
min_ram: 32
extra_args: CONFIG_WORKQUEUE_WORK_TIMEOUT=y
kernel.workqueue.work_timeout.disabled:
tags: kernel
min_flash: 64
min_ram: 32
Loading