From 9bcded8b4523a3776553043ce0bac512457f507c Mon Sep 17 00:00:00 2001 From: Lucas Alber Date: Mon, 12 Aug 2024 15:26:09 +0200 Subject: [PATCH] merian: Frame timing related fixes --- include/merian-nodes/graph/graph.hpp | 56 ++++++++++------- include/merian-nodes/graph/graph_run.hpp | 21 +++++-- .../nodes/glfw_window/glfw_window.hpp | 14 +++-- include/merian/utils/stopwatch.hpp | 1 + include/merian/vk/window/swapchain.hpp | 8 +-- meson.build | 2 +- src/merian/utils/stopwatch.cpp | 22 +++++-- src/merian/vk/window/swapchain.cpp | 60 ++++++++++--------- 8 files changed, 113 insertions(+), 71 deletions(-) diff --git a/include/merian-nodes/graph/graph.hpp b/include/merian-nodes/graph/graph.hpp index 09819f8a..495733a5 100644 --- a/include/merian-nodes/graph/graph.hpp +++ b/include/merian-nodes/graph/graph.hpp @@ -461,18 +461,18 @@ class Graph : public std::enable_shared_from_this> { run_in_progress = true; // wait for the in-flight processing to finish - const auto before_gpu_wait = std::chrono::high_resolution_clock::now(); + Stopwatch sw_gpu_wait; InFlightData& in_flight_data = ring_fences.next_cycle_wait_get(); - gpu_wait_time = gpu_wait_time * 0.8 + - (std::chrono::high_resolution_clock::now() - before_gpu_wait) * 0.2; + gpu_wait_time = gpu_wait_time * 0.9 + sw_gpu_wait.duration() * 0.1; - if (low_latency_mode && !needs_reconnect) { - const auto total_wait = gpu_wait_time + cpu_sleep_time; - cpu_sleep_time = 0.95 * total_wait; - if (cpu_sleep_time < 1ms) { - cpu_sleep_time = 0ms; - } + // last pred: gpu_time > cpu_time + const auto total_wait = + std::max((gpu_wait_time + external_wait_time + cpu_sleep_time - 0.1ms), 0.1ms); + if (low_latency_mode && !needs_reconnect && (total_wait > time_delta - total_wait)) { + cpu_sleep_time = 0.92 * total_wait; std::this_thread::sleep_for(cpu_sleep_time); + } else { + cpu_sleep_time = 0ms; } // now we can release the resources from staging space and reset the command pool @@ -553,22 +553,35 @@ class Graph : public std::enable_shared_from_this> { // FINISH RUN: submit - on_pre_submit(run, cmd); + { + MERIAN_PROFILE_SCOPE_GPU(profiler, cmd, "Pre-Submit"); + on_pre_submit(run, cmd); + } cmd_pool->end_all(); in_flight_data.staging_set_id = resource_allocator->getStaging()->finalizeResourceSet(); - queue->submit(cmd_pool, ring_fences.reset(), run.get_signal_semaphores(), - run.get_wait_semaphores(), run.get_wait_stages(), - run.get_timeline_semaphore_submit_info()); - run.execute_callbacks(queue); - on_post_submit(); + { + MERIAN_PROFILE_SCOPE(profiler, "Submit"); + queue->submit(cmd_pool, ring_fences.reset(), run.get_signal_semaphores(), + run.get_wait_semaphores(), run.get_wait_stages(), + run.get_timeline_semaphore_submit_info()); + } + { + MERIAN_PROFILE_SCOPE(profiler, "Execute callbacks"); + run.execute_callbacks(queue); + } + { + MERIAN_PROFILE_SCOPE(profiler, "Post-Submit"); + on_post_submit(); + } + external_wait_time = 0.9 * external_wait_time + 0.1 * run.external_wait_time; needs_reconnect |= run.needs_reconnect; ++run_iteration; ++total_iteration; + run_in_progress = false; for (const auto& task : on_run_finished_tasks) task(); on_run_finished_tasks.clear(); - run_in_progress = false; } // waits until all in-flight iterations have finished @@ -614,6 +627,7 @@ class Graph : public std::enable_shared_from_this> { props.output_text("Total Elapsed: {:%H:%M:%S}s", duration_elapsed); props.output_text("Time delta: {:04f}ms", to_milliseconds(time_delta)); props.output_text("GPU wait: {:04f}ms", to_milliseconds(gpu_wait_time)); + props.output_text("External wait: {:04f}ms", to_milliseconds(external_wait_time)); props.st_separate(); if (props.config_options("time overwrite", time_overwrite, {"None", "Time", "Delta"}, @@ -634,9 +648,10 @@ class Graph : public std::enable_shared_from_this> { } props.st_separate(); - props.config_bool("low latency", low_latency_mode, - "Delays CPU processing to recude input latency in GPU bound " - "applications. Might reduce framerate."); + props.config_bool( + "low latency", low_latency_mode, + "Experimental: Delays CPU processing to recude input latency in GPU bound " + "applications. Might reduce framerate."); if (low_latency_mode) { props.output_text("CPU sleep time: {:04f}ms", to_milliseconds(cpu_sleep_time)); } @@ -1675,7 +1690,7 @@ class Graph : public std::enable_shared_from_this> { remove_connection(src_node, dst_node, dst_input->name); return false; } - it++; + ++it; } } } @@ -1913,6 +1928,7 @@ class Graph : public std::enable_shared_from_this> { bool low_latency_mode = false; std::chrono::duration gpu_wait_time = 0ns; std::chrono::duration cpu_sleep_time = 0ns; + std::chrono::duration external_wait_time = 0ns; Profiler::Report last_build_report; Profiler::Report last_run_report; diff --git a/include/merian-nodes/graph/graph_run.hpp b/include/merian-nodes/graph/graph_run.hpp index fe6a4b73..0544e025 100644 --- a/include/merian-nodes/graph/graph_run.hpp +++ b/include/merian-nodes/graph/graph_run.hpp @@ -11,6 +11,7 @@ namespace merian_nodes { using namespace merian; +using namespace std::literals::chrono_literals; // Manages data of a single graph run. class GraphRun { @@ -45,8 +46,8 @@ class GraphRun { signal_values.push_back(value); } - void - add_submit_callback(const std::function& callback) noexcept { + void add_submit_callback( + const std::function& callback) noexcept { submit_callbacks.push_back(callback); } @@ -97,15 +98,15 @@ class GraphRun { // You must call every callback after you submited the graph command buffer // Or you use the execute_callbacks function. - const std::vector>& + const std::vector>& get_submit_callbacks() const noexcept { return submit_callbacks; } // Call this after you submitted the graph command buffer - void execute_callbacks(const QueueHandle& queue) const { + void execute_callbacks(const QueueHandle& queue) { for (const auto& callback : submit_callbacks) { - callback(queue); + callback(queue, *this); } } @@ -151,6 +152,12 @@ class GraphRun { return to_seconds(elapsed_since_connect); } + // Hint the graph that waiting was necessary for external events. This information can be used + // to shift CPU processing back to reduce waiting and reduce latency. + void hint_external_wait_time(auto chrono_duration) { + external_wait_time = std::max(external_wait_time, chrono_duration); + } + private: void reset(const uint64_t iteration, const uint32_t in_flight_index, @@ -173,6 +180,7 @@ class GraphRun { signal_semaphores.clear(); signal_values.clear(); submit_callbacks.clear(); + external_wait_time = 0ns; this->profiler = profiler; this->needs_reconnect = false; @@ -187,7 +195,8 @@ class GraphRun { std::vector signal_semaphores; std::vector signal_values; - std::vector> submit_callbacks; + std::vector> submit_callbacks; + std::chrono::nanoseconds external_wait_time; ProfilerHandle profiler = nullptr; CommandPoolHandle cmd_pool = nullptr; diff --git a/include/merian-nodes/nodes/glfw_window/glfw_window.hpp b/include/merian-nodes/nodes/glfw_window/glfw_window.hpp index 35be86e6..48bb8320 100644 --- a/include/merian-nodes/nodes/glfw_window/glfw_window.hpp +++ b/include/merian-nodes/nodes/glfw_window/glfw_window.hpp @@ -19,7 +19,7 @@ namespace merian_nodes { */ class GLFWWindow : public Node { public: - GLFWWindow(const ContextHandle context) : Node() { + GLFWWindow(const ContextHandle& context) : Node() { if (context->get_extension()) { window = std::make_shared(context); swapchain = std::make_shared(context, window->get_surface()); @@ -47,7 +47,7 @@ class GLFWWindow : public Node { acquire.reset(); for (uint32_t tries = 0; !acquire && tries < 2; tries++) { try { - acquire = swapchain->acquire(window, 1000 * 1000 /* 1s */); + acquire = swapchain->acquire(window, 1000L * 1000L /* 1s */); } catch (const Swapchain::needs_recreate& e) { old_swapchains.emplace_back(swapchain); swapchain = std::make_shared(swapchain); @@ -87,9 +87,11 @@ class GLFWWindow : public Node { run.add_wait_semaphore(acquire->wait_semaphore, vk::PipelineStageFlagBits::eTransfer); run.add_signal_semaphore(acquire->signal_semaphore); - run.add_submit_callback([&](const QueueHandle& queue) { + run.add_submit_callback([&](const QueueHandle& queue, GraphRun& run) { try { + Stopwatch present_duration; swapchain->present(queue); + run.hint_external_wait_time(present_duration.duration()); } catch (const Swapchain::needs_recreate& e) { // do nothing and hope for the best return; @@ -106,12 +108,12 @@ class GLFWWindow : public Node { } NodeStatusFlags properties(Properties& config) override { - GLFWmonitor* monitor = window ? glfwGetWindowMonitor(*window) : NULL; - int fullscreen = monitor != NULL; + GLFWmonitor* monitor = window ? glfwGetWindowMonitor(*window) : nullptr; + int fullscreen = static_cast(monitor != nullptr); const int old_fullscreen = fullscreen; config.config_options("mode", fullscreen, {"windowed", "fullscreen"}); if (window && fullscreen != old_fullscreen) { - if (fullscreen) { + if (fullscreen != 0) { try { glfwGetWindowPos(*window, &windowed_pos_size[0], &windowed_pos_size[1]); } catch (const ExtensionVkGLFW::glfw_error& e) { diff --git a/include/merian/utils/stopwatch.hpp b/include/merian/utils/stopwatch.hpp index 521765d7..67ac9b2d 100644 --- a/include/merian/utils/stopwatch.hpp +++ b/include/merian/utils/stopwatch.hpp @@ -15,6 +15,7 @@ class Stopwatch { uint64_t nanos() const; double millis() const; double seconds() const; + std::chrono::nanoseconds duration() const; private: chrono_clock::time_point start; diff --git a/include/merian/vk/window/swapchain.hpp b/include/merian/vk/window/swapchain.hpp index 6992cdb2..9c95e460 100644 --- a/include/merian/vk/window/swapchain.hpp +++ b/include/merian/vk/window/swapchain.hpp @@ -162,7 +162,7 @@ class Swapchain : public std::enable_shared_from_this { } /* Image index only valid until the next acquire_*() */ - uint32_t current_image_index() { + uint32_t current_image_index() const { return current_image_idx; } @@ -184,7 +184,7 @@ class Swapchain : public std::enable_shared_from_this { } /* Remember to also transition image layouts */ - vk::Extent2D create_swapchain(int width, int height); + vk::Extent2D create_swapchain(const uint32_t width, const uint32_t height); /* Sets vsync. The swapchain is automatically recreated on next aquire. * Returns if vsync could be enabled. @@ -197,11 +197,11 @@ class Swapchain : public std::enable_shared_from_this { } bool vsync_enabled() const { - return cur_present_mode == vk::PresentModeKHR::eFifo; + return present_mode == vk::PresentModeKHR::eFifo; } vk::PresentModeKHR get_present_mode() { - return cur_present_mode; + return present_mode; } // intened to destroy framebuffers and renderpasses when the swapchain is destroyed. diff --git a/meson.build b/meson.build index 7323fecf..53517b38 100644 --- a/meson.build +++ b/meson.build @@ -93,7 +93,7 @@ if not shaderc.found() and get_option('shaderc').enabled() ) endif -if get_option('shaderc').enabled() +if shaderc.found() add_project_arguments('-DMERIAN_ENABLE_SHADERC', language: 'cpp') endif diff --git a/src/merian/utils/stopwatch.cpp b/src/merian/utils/stopwatch.cpp index 4ab55570..9c7b330c 100644 --- a/src/merian/utils/stopwatch.cpp +++ b/src/merian/utils/stopwatch.cpp @@ -1,4 +1,6 @@ #include "merian/utils/stopwatch.hpp" +#include "merian/utils/chrono.hpp" +#include namespace merian { @@ -8,21 +10,31 @@ Stopwatch::Stopwatch() { void Stopwatch::reset() { start = chrono_clock::now(); + std::atomic_signal_fence(std::memory_order_seq_cst); } uint64_t Stopwatch::nanos() const { - auto end = chrono_clock::now(); + std::atomic_signal_fence(std::memory_order_seq_cst); + const auto end = chrono_clock::now(); return std::chrono::duration_cast(end - start).count(); } double Stopwatch::millis() const { - auto end = chrono_clock::now(); - return std::chrono::duration(end - start).count(); + std::atomic_signal_fence(std::memory_order_seq_cst); + const auto end = chrono_clock::now(); + return to_milliseconds(end - start); } double Stopwatch::seconds() const { - auto end = chrono_clock::now(); - return std::chrono::duration(end - start).count(); + std::atomic_signal_fence(std::memory_order_seq_cst); + const auto end = chrono_clock::now(); + return to_seconds(end - start); +} + +std::chrono::nanoseconds Stopwatch::duration() const { + std::atomic_signal_fence(std::memory_order_seq_cst); + const auto end = chrono_clock::now(); + return end - start; } } // namespace merian diff --git a/src/merian/vk/window/swapchain.cpp b/src/merian/vk/window/swapchain.cpp index bc6599a0..0991efd4 100644 --- a/src/merian/vk/window/swapchain.cpp +++ b/src/merian/vk/window/swapchain.cpp @@ -26,16 +26,15 @@ namespace merian { if (vsync) { return best; - } else { - // Find a faster mode - for (const auto& present_mode : present_modes) { - if (present_mode == preferred_vsync_off_mode) { - return present_mode; - } - if (present_mode == vk::PresentModeKHR::eImmediate || - present_mode == vk::PresentModeKHR::eMailbox) { - best = present_mode; - } + } + // Find a faster mode + for (const auto& present_mode : present_modes) { + if (present_mode == preferred_vsync_off_mode) { + return present_mode; + } + if (present_mode == vk::PresentModeKHR::eImmediate || + present_mode == vk::PresentModeKHR::eMailbox) { + best = present_mode; } } @@ -97,7 +96,9 @@ Swapchain::~Swapchain() { // ------------------------------------------------------------------------------------- -vk::Extent2D make_extent2D(vk::SurfaceCapabilitiesKHR capabilities, int width, int height) { +vk::Extent2D make_extent2D(const vk::SurfaceCapabilitiesKHR capabilities, + const uint32_t width, + const uint32_t height) { vk::Extent2D extent; if (capabilities.currentExtent.width != UINT32_MAX) { // If the surface size is defined, the image size must match @@ -112,7 +113,7 @@ vk::Extent2D make_extent2D(vk::SurfaceCapabilitiesKHR capabilities, int width, i return extent; } -vk::Extent2D Swapchain::create_swapchain(int width, int height) { +vk::Extent2D Swapchain::create_swapchain(const uint32_t width, const uint32_t height) { vk::SwapchainKHR old = VK_NULL_HANDLE; if (old_swapchain.expired()) { SPDLOG_DEBUG("create swapchain"); @@ -137,7 +138,7 @@ vk::Extent2D Swapchain::create_swapchain(int width, int height) { } // clang-format off - vk::SwapchainCreateInfoKHR createInfo( + vk::SwapchainCreateInfoKHR create_info( vk::SwapchainCreateFlagBitsKHR(), *surface, min_images, @@ -152,11 +153,11 @@ vk::Extent2D Swapchain::create_swapchain(int width, int height) { pre_transform, vk::CompositeAlphaFlagBitsKHR::eOpaque, present_mode, - false, + VK_FALSE, old ); - swapchain = context->device.createSwapchainKHR(createInfo, nullptr); + swapchain = context->device.createSwapchainKHR(create_info, nullptr); std::vector swapchain_images = context->device.getSwapchainImagesKHR(swapchain); num_images = swapchain_images.size(); @@ -172,7 +173,7 @@ vk::Extent2D Swapchain::create_swapchain(int width, int height) { entry.image = swapchain_images[i]; // View - vk::ImageViewCreateInfo createInfo( + vk::ImageViewCreateInfo create_info( vk::ImageViewCreateFlagBits(), entry.image, vk::ImageViewType::e2D, @@ -188,14 +189,14 @@ vk::Extent2D Swapchain::create_swapchain(int width, int height) { 0, 1, 0, 1 } ); - entry.imageView = context->device.createImageView(createInfo); + entry.imageView = context->device.createImageView(create_info); // Semaphore semaphore_group.read_semaphore = std::make_shared(context); semaphore_group.written_semaphore = std::make_shared(context); // Barrier - vk::ImageSubresourceRange imageSubresourceRange { + vk::ImageSubresourceRange image_subresource_range { vk::ImageAspectFlagBits::eColor, 0, VK_REMAINING_MIP_LEVELS, @@ -210,7 +211,7 @@ vk::Extent2D Swapchain::create_swapchain(int width, int height) { {}, {}, entry.image, - imageSubresourceRange, + image_subresource_range, }; barriers[i] = barrier; } @@ -267,13 +268,12 @@ Swapchain::acquire(const std::function& framebuffer_extent, SwapchainAcquireResult aquire_result; - if ((extent.width != cur_width || extent.height != cur_height || - present_mode != cur_present_mode)) { - if (!swapchain) { - create_swapchain(extent.width, extent.height); - } else { - throw needs_recreate("changed framebuffer size"); - } + if (!swapchain) { + create_swapchain(extent.width, extent.height); + } else if (extent.width != cur_width || extent.height != cur_height) { + throw needs_recreate("changed framebuffer size"); + } else if (present_mode != cur_present_mode) { + throw needs_recreate("changed present mode (vsync)"); } const vk::Result result = context->device.acquireNextImageKHR( @@ -294,11 +294,13 @@ Swapchain::acquire(const std::function& framebuffer_extent, created = false; return aquire_result; - } else if (result == vk::Result::eErrorOutOfDateKHR || result == vk::Result::eSuboptimalKHR) { + } + + if (result == vk::Result::eErrorOutOfDateKHR || result == vk::Result::eSuboptimalKHR) { throw needs_recreate(result); - } else { - return std::nullopt; } + + return std::nullopt; } void Swapchain::present(const QueueHandle& queue) {