diff --git a/core/sched.cc b/core/sched.cc index e1736f040..44e24dc2a 100644 --- a/core/sched.cc +++ b/core/sched.cc @@ -276,6 +276,10 @@ void cpu::reschedule_from_interrupt(bool called_from_yield, } thread* p = thread::current(); + if (p->_realtime.has_slice() && p->_realtime._priority > 0) { + p->_realtime._run_time += interval; + } + const auto p_status = p->_detached_state->st.load(); assert(p_status != thread::status::queued); @@ -295,8 +299,18 @@ void cpu::reschedule_from_interrupt(bool called_from_yield, } else if (!called_from_yield) { auto &t = *runqueue.begin(); if (p->_realtime._priority > 0) { - // Only switch to a higher-priority realtime thread - if (t._realtime._priority <= p->_realtime._priority) { + // If the threads have the same realtime priority, then only reschedule + // if the currently executed thread has exceeded its time slice (if any). + if (t._realtime._priority == p->_realtime._priority && + ((!p->_realtime.has_slice() || p->_realtime.has_remaining()))) { +#ifdef __aarch64__ + return switch_data; +#else + return; +#endif + // Otherwise, don't switch to a lower-priority realtime thread, + // no matter how much time slice was used by the running thread. + } else if (t._realtime._priority < p->_realtime._priority) { #ifdef __aarch64__ return switch_data; #else @@ -336,15 +350,27 @@ void cpu::reschedule_from_interrupt(bool called_from_yield, p->_detached_state->st.store(thread::status::queued); if (!called_from_yield) { - // POSIX requires that if a real-time thread doesn't yield but - // rather is preempted by a higher-priority thread, it be - // reinserted into the runqueue first, not last, among its equals. - enqueue_first_equal(*p); + if (p->_realtime.has_slice() && !p->_realtime.has_remaining()) { + // The real-time thread has exceeded it's time slice, enqueue + // it at the end among its equals and reset the time slice. + enqueue(*p); + p->_realtime.reset_slice(); + } else { + // POSIX requires that if a real-time thread doesn't yield but + // rather is preempted by a higher-priority thread, it be + // reinserted into the runqueue first, not last, among its equals. + enqueue_first_equal(*p); + } } trace_sched_preempt(); p->stat_preemptions.incr(); } else { + // p is no longer running, if it has a realtime slice reset it. + if (p->_realtime.has_slice()) { + p->_realtime.reset_slice(); + } + // p is no longer running, so we'll switch to a different thread. // Return the runtime p borrowed for hysteresis. p->_runtime.hysteresis_run_stop(); @@ -392,6 +418,9 @@ void cpu::reschedule_from_interrupt(bool called_from_yield, } else { preemption_timer.set_with_irq_disabled(now + preempt_after); } + } else if (n->_realtime.has_slice()) { + assert(n->_realtime.has_remaining()); + preemption_timer.set_with_irq_disabled(now + n->_realtime.remaining()); } if (app_thread.load(std::memory_order_relaxed) != n->_app) { // don't write into a cache line if it can be avoided @@ -920,10 +949,6 @@ unsigned thread::realtime_priority() const void thread::set_realtime_time_slice(thread_realtime::duration time_slice) { - if (time_slice > 0) { - WARN_ONCE("set_realtime_time_slice() used but real-time time slices" - " not yet supported"); - } _realtime._time_slice = time_slice; } diff --git a/include/osv/sched.hh b/include/osv/sched.hh index 0ed8fa11e..af71a2812 100644 --- a/include/osv/sched.hh +++ b/include/osv/sched.hh @@ -348,6 +348,25 @@ public: using duration = thread_runtime::duration; unsigned _priority = 0; duration _time_slice = duration::zero(); + + // Total time this thread ran since starting this slice. + duration _run_time = duration::zero(); + + void reset_slice() { + _run_time = duration::zero(); + } + + bool has_slice() const { + return _time_slice != duration::zero(); + } + + bool has_remaining() const { + return _time_slice > _run_time; + } + + duration remaining() const { + return _time_slice - _run_time; + } }; // "tau" controls the length of the history we consider for scheduling, @@ -691,6 +710,8 @@ public: * With time_slice == 0, the real-time scheduling policy matches POSIX's * "SCHED_FIFO" policy. With time_slice > 0, it matches POSIX's "SCHED_RR" * policy. + * + * Note: The time_slice should be set before the thread is started. */ void set_realtime_time_slice(thread_realtime::duration time_slice); /** diff --git a/modules/tests/Makefile b/modules/tests/Makefile index 52596e419..3e7d3dba9 100644 --- a/modules/tests/Makefile +++ b/modules/tests/Makefile @@ -146,7 +146,7 @@ tests := tst-pthread.so misc-ramdisk.so tst-vblk.so tst-bsd-evh.so \ tst-sigaction.so tst-syscall.so tst-ifaddrs.so tst-getdents.so \ tst-netlink.so misc-zfs-io.so misc-zfs-arc.so tst-pthread-create.so \ misc-futex-perf.so misc-syscall-perf.so tst-brk.so tst-reloc.so \ - misc-vdso-perf.so tst-string-utils.so + misc-vdso-perf.so tst-string-utils.so tst-thread-realtime.so # libstatic-thread-variable.so tst-static-thread-variable.so \ # tst-f128.so \ diff --git a/tests/tst-thread-realtime.cc b/tests/tst-thread-realtime.cc new file mode 100644 index 000000000..bd963508b --- /dev/null +++ b/tests/tst-thread-realtime.cc @@ -0,0 +1,128 @@ +#include +#include +#include +#include + +#define TIME_SLICE 100000000 + +static std::string name = "tst-thr-wrk"; +static int tests = 0, fails = 0; + +static void report(bool ok, std::string msg) +{ + ++tests; + fails += !ok; + std::cout << (ok ? "PASS" : "FAIL") << ": " << msg << "\n"; +} + +static int fac(int n) +{ + if (n == 0) { + return 0; + } else if (n == 1) { + return 1; + } else { + return fac(n - 1) + fac(n - 2); + } +} + +bool runtime_equalized() +{ + constexpr int num_threads = 5; + constexpr int switches = 3; + static std::atomic stop_threads; + sched::thread *threads[num_threads]; + + sched::cpu *c = sched::cpu::current(); + for (int i = 0; i < num_threads; i++) { + threads[i] = sched::thread::make([&]{ + while (!stop_threads.load()) { + fac(10); + } + }, sched::thread::attr().name(name)); + + threads[i]->pin(c); + threads[i]->set_realtime_priority(1); + threads[i]->set_realtime_time_slice(std::chrono::nanoseconds(TIME_SLICE)); + threads[i]->start(); + } + + auto runtime = std::chrono::nanoseconds(TIME_SLICE * num_threads * switches); + sched::thread::sleep(runtime); + stop_threads = true; + + bool ok = true; + long prev_switches = -1; + for (int i = 0; i < num_threads; i++) { + long switches = threads[i]->stat_switches.get(); + if (prev_switches != -1 && prev_switches != switches) { + ok = false; + break; + } + prev_switches = switches; + } + + for (int i = 0; i < num_threads; i++) { + delete threads[i]; + } + + return ok; +} + +bool priority_precedence() +{ + static std::atomic high_prio_stop; + sched::thread *high_prio = sched::thread::make([&]{ + while (!high_prio_stop.load()) { + fac(10); + } + }); + + static std::atomic low_prio_stop; + sched::thread *low_prio = sched::thread::make([&]{ + while (!low_prio_stop.load()) { + fac(10); + } + }); + + sched::cpu *c = sched::cpu::current(); + high_prio->pin(c); + low_prio->pin(c); + + high_prio->set_realtime_priority(2); + low_prio->set_realtime_priority(1); + + // The higher priority thread has a time slice, but since there is no other thread + // with the same priority, it should monopolize the CPU and lower_prio shouldn't run. + high_prio->set_realtime_time_slice(sched::thread_realtime::duration(TIME_SLICE)); + + high_prio->start(); + low_prio->start(); + + auto runtime = std::chrono::nanoseconds(TIME_SLICE * 3); + sched::thread::sleep(runtime); + + // Since both threads are pinned to the CPU and the higher priority + // thread is always runnable, the lower priority thread should starve. + bool ok = high_prio->thread_clock().count() > 0 && + low_prio->thread_clock().count() == 0; + + low_prio_stop = true; + high_prio_stop = true; + + delete high_prio; + delete low_prio; + + return ok; +} + +int main(int ac, char** av) +{ + // Ensure that the main thread doesn't starve. + sched::thread *cur = sched::thread::current(); + cur->set_realtime_priority(10); + + report(runtime_equalized(), "runtime_equalized"); + report(priority_precedence(), "priority_precedence"); + std::cout << "SUMMARY: " << tests << " tests, " << fails << " failures\n"; +}