From 9bcded8b4523a3776553043ce0bac512457f507c Mon Sep 17 00:00:00 2001
From: Lucas Alber <lucasd.alber@gmail.com>
Date: Mon, 12 Aug 2024 15:26:09 +0200
Subject: [PATCH] merian: Frame timing related fixes

---
 include/merian-nodes/graph/graph.hpp          | 56 ++++++++++-------
 include/merian-nodes/graph/graph_run.hpp      | 21 +++++--
 .../nodes/glfw_window/glfw_window.hpp         | 14 +++--
 include/merian/utils/stopwatch.hpp            |  1 +
 include/merian/vk/window/swapchain.hpp        |  8 +--
 meson.build                                   |  2 +-
 src/merian/utils/stopwatch.cpp                | 22 +++++--
 src/merian/vk/window/swapchain.cpp            | 60 ++++++++++---------
 8 files changed, 113 insertions(+), 71 deletions(-)

diff --git a/include/merian-nodes/graph/graph.hpp b/include/merian-nodes/graph/graph.hpp
index 09819f8a..495733a5 100644
--- a/include/merian-nodes/graph/graph.hpp
+++ b/include/merian-nodes/graph/graph.hpp
@@ -461,18 +461,18 @@ class Graph : public std::enable_shared_from_this<Graph<RING_SIZE>> {
         run_in_progress = true;
 
         // wait for the in-flight processing to finish
-        const auto before_gpu_wait = std::chrono::high_resolution_clock::now();
+        Stopwatch sw_gpu_wait;
         InFlightData& in_flight_data = ring_fences.next_cycle_wait_get();
-        gpu_wait_time = gpu_wait_time * 0.8 +
-                        (std::chrono::high_resolution_clock::now() - before_gpu_wait) * 0.2;
+        gpu_wait_time = gpu_wait_time * 0.9 + sw_gpu_wait.duration() * 0.1;
 
-        if (low_latency_mode && !needs_reconnect) {
-            const auto total_wait = gpu_wait_time + cpu_sleep_time;
-            cpu_sleep_time = 0.95 * total_wait;
-            if (cpu_sleep_time < 1ms) {
-                cpu_sleep_time = 0ms;
-            }
+        // last pred: gpu_time > cpu_time
+        const auto total_wait =
+            std::max((gpu_wait_time + external_wait_time + cpu_sleep_time - 0.1ms), 0.1ms);
+        if (low_latency_mode && !needs_reconnect && (total_wait > time_delta - total_wait)) {
+            cpu_sleep_time = 0.92 * total_wait;
             std::this_thread::sleep_for(cpu_sleep_time);
+        } else {
+            cpu_sleep_time = 0ms;
         }
 
         // now we can release the resources from staging space and reset the command pool
@@ -553,22 +553,35 @@ class Graph : public std::enable_shared_from_this<Graph<RING_SIZE>> {
 
         // FINISH RUN: submit
 
-        on_pre_submit(run, cmd);
+        {
+            MERIAN_PROFILE_SCOPE_GPU(profiler, cmd, "Pre-Submit");
+            on_pre_submit(run, cmd);
+        }
         cmd_pool->end_all();
         in_flight_data.staging_set_id = resource_allocator->getStaging()->finalizeResourceSet();
-        queue->submit(cmd_pool, ring_fences.reset(), run.get_signal_semaphores(),
-                      run.get_wait_semaphores(), run.get_wait_stages(),
-                      run.get_timeline_semaphore_submit_info());
-        run.execute_callbacks(queue);
-        on_post_submit();
+        {
+            MERIAN_PROFILE_SCOPE(profiler, "Submit");
+            queue->submit(cmd_pool, ring_fences.reset(), run.get_signal_semaphores(),
+                          run.get_wait_semaphores(), run.get_wait_stages(),
+                          run.get_timeline_semaphore_submit_info());
+        }
+        {
+            MERIAN_PROFILE_SCOPE(profiler, "Execute callbacks");
+            run.execute_callbacks(queue);
+        }
+        {
+            MERIAN_PROFILE_SCOPE(profiler, "Post-Submit");
+            on_post_submit();
+        }
 
+        external_wait_time = 0.9 * external_wait_time + 0.1 * run.external_wait_time;
         needs_reconnect |= run.needs_reconnect;
         ++run_iteration;
         ++total_iteration;
+        run_in_progress = false;
         for (const auto& task : on_run_finished_tasks)
             task();
         on_run_finished_tasks.clear();
-        run_in_progress = false;
     }
 
     // waits until all in-flight iterations have finished
@@ -614,6 +627,7 @@ class Graph : public std::enable_shared_from_this<Graph<RING_SIZE>> {
             props.output_text("Total Elapsed: {:%H:%M:%S}s", duration_elapsed);
             props.output_text("Time delta: {:04f}ms", to_milliseconds(time_delta));
             props.output_text("GPU wait: {:04f}ms", to_milliseconds(gpu_wait_time));
+            props.output_text("External wait: {:04f}ms", to_milliseconds(external_wait_time));
 
             props.st_separate();
             if (props.config_options("time overwrite", time_overwrite, {"None", "Time", "Delta"},
@@ -634,9 +648,10 @@ class Graph : public std::enable_shared_from_this<Graph<RING_SIZE>> {
             }
 
             props.st_separate();
-            props.config_bool("low latency", low_latency_mode,
-                              "Delays CPU processing to recude input latency in GPU bound "
-                              "applications. Might reduce framerate.");
+            props.config_bool(
+                "low latency", low_latency_mode,
+                "Experimental: Delays CPU processing to recude input latency in GPU bound "
+                "applications. Might reduce framerate.");
             if (low_latency_mode) {
                 props.output_text("CPU sleep time: {:04f}ms", to_milliseconds(cpu_sleep_time));
             }
@@ -1675,7 +1690,7 @@ class Graph : public std::enable_shared_from_this<Graph<RING_SIZE>> {
                             remove_connection(src_node, dst_node, dst_input->name);
                             return false;
                         }
-                        it++;
+                        ++it;
                     }
                 }
             }
@@ -1913,6 +1928,7 @@ class Graph : public std::enable_shared_from_this<Graph<RING_SIZE>> {
     bool low_latency_mode = false;
     std::chrono::duration<double> gpu_wait_time = 0ns;
     std::chrono::duration<double> cpu_sleep_time = 0ns;
+    std::chrono::duration<double> external_wait_time = 0ns;
 
     Profiler::Report last_build_report;
     Profiler::Report last_run_report;
diff --git a/include/merian-nodes/graph/graph_run.hpp b/include/merian-nodes/graph/graph_run.hpp
index fe6a4b73..0544e025 100644
--- a/include/merian-nodes/graph/graph_run.hpp
+++ b/include/merian-nodes/graph/graph_run.hpp
@@ -11,6 +11,7 @@
 namespace merian_nodes {
 
 using namespace merian;
+using namespace std::literals::chrono_literals;
 
 // Manages data of a single graph run.
 class GraphRun {
@@ -45,8 +46,8 @@ class GraphRun {
         signal_values.push_back(value);
     }
 
-    void
-    add_submit_callback(const std::function<void(const QueueHandle& queue)>& callback) noexcept {
+    void add_submit_callback(
+        const std::function<void(const QueueHandle& queue, GraphRun& run)>& callback) noexcept {
         submit_callbacks.push_back(callback);
     }
 
@@ -97,15 +98,15 @@ class GraphRun {
 
     // You must call every callback after you submited the graph command buffer
     // Or you use the execute_callbacks function.
-    const std::vector<std::function<void(const QueueHandle& queue)>>&
+    const std::vector<std::function<void(const QueueHandle& queue, GraphRun& run)>>&
     get_submit_callbacks() const noexcept {
         return submit_callbacks;
     }
 
     // Call this after you submitted the graph command buffer
-    void execute_callbacks(const QueueHandle& queue) const {
+    void execute_callbacks(const QueueHandle& queue) {
         for (const auto& callback : submit_callbacks) {
-            callback(queue);
+            callback(queue, *this);
         }
     }
 
@@ -151,6 +152,12 @@ class GraphRun {
         return to_seconds(elapsed_since_connect);
     }
 
+    // Hint the graph that waiting was necessary for external events. This information can be used
+    // to shift CPU processing back to reduce waiting and reduce latency.
+    void hint_external_wait_time(auto chrono_duration) {
+        external_wait_time = std::max(external_wait_time, chrono_duration);
+    }
+
   private:
     void reset(const uint64_t iteration,
                const uint32_t in_flight_index,
@@ -173,6 +180,7 @@ class GraphRun {
         signal_semaphores.clear();
         signal_values.clear();
         submit_callbacks.clear();
+        external_wait_time = 0ns;
 
         this->profiler = profiler;
         this->needs_reconnect = false;
@@ -187,7 +195,8 @@ class GraphRun {
     std::vector<vk::Semaphore> signal_semaphores;
     std::vector<uint64_t> signal_values;
 
-    std::vector<std::function<void(const QueueHandle& queue)>> submit_callbacks;
+    std::vector<std::function<void(const QueueHandle& queue, GraphRun& run)>> submit_callbacks;
+    std::chrono::nanoseconds external_wait_time;
 
     ProfilerHandle profiler = nullptr;
     CommandPoolHandle cmd_pool = nullptr;
diff --git a/include/merian-nodes/nodes/glfw_window/glfw_window.hpp b/include/merian-nodes/nodes/glfw_window/glfw_window.hpp
index 35be86e6..48bb8320 100644
--- a/include/merian-nodes/nodes/glfw_window/glfw_window.hpp
+++ b/include/merian-nodes/nodes/glfw_window/glfw_window.hpp
@@ -19,7 +19,7 @@ namespace merian_nodes {
  */
 class GLFWWindow : public Node {
   public:
-    GLFWWindow(const ContextHandle context) : Node() {
+    GLFWWindow(const ContextHandle& context) : Node() {
         if (context->get_extension<ExtensionVkGLFW>()) {
             window = std::make_shared<merian::GLFWWindow>(context);
             swapchain = std::make_shared<merian::Swapchain>(context, window->get_surface());
@@ -47,7 +47,7 @@ class GLFWWindow : public Node {
         acquire.reset();
         for (uint32_t tries = 0; !acquire && tries < 2; tries++) {
             try {
-                acquire = swapchain->acquire(window, 1000 * 1000 /* 1s */);
+                acquire = swapchain->acquire(window, 1000L * 1000L /* 1s */);
             } catch (const Swapchain::needs_recreate& e) {
                 old_swapchains.emplace_back(swapchain);
                 swapchain = std::make_shared<Swapchain>(swapchain);
@@ -87,9 +87,11 @@ class GLFWWindow : public Node {
 
             run.add_wait_semaphore(acquire->wait_semaphore, vk::PipelineStageFlagBits::eTransfer);
             run.add_signal_semaphore(acquire->signal_semaphore);
-            run.add_submit_callback([&](const QueueHandle& queue) {
+            run.add_submit_callback([&](const QueueHandle& queue, GraphRun& run) {
                 try {
+                    Stopwatch present_duration;
                     swapchain->present(queue);
+                    run.hint_external_wait_time(present_duration.duration());
                 } catch (const Swapchain::needs_recreate& e) {
                     // do nothing and hope for the best
                     return;
@@ -106,12 +108,12 @@ class GLFWWindow : public Node {
     }
 
     NodeStatusFlags properties(Properties& config) override {
-        GLFWmonitor* monitor = window ? glfwGetWindowMonitor(*window) : NULL;
-        int fullscreen = monitor != NULL;
+        GLFWmonitor* monitor = window ? glfwGetWindowMonitor(*window) : nullptr;
+        int fullscreen = static_cast<int>(monitor != nullptr);
         const int old_fullscreen = fullscreen;
         config.config_options("mode", fullscreen, {"windowed", "fullscreen"});
         if (window && fullscreen != old_fullscreen) {
-            if (fullscreen) {
+            if (fullscreen != 0) {
                 try {
                     glfwGetWindowPos(*window, &windowed_pos_size[0], &windowed_pos_size[1]);
                 } catch (const ExtensionVkGLFW::glfw_error& e) {
diff --git a/include/merian/utils/stopwatch.hpp b/include/merian/utils/stopwatch.hpp
index 521765d7..67ac9b2d 100644
--- a/include/merian/utils/stopwatch.hpp
+++ b/include/merian/utils/stopwatch.hpp
@@ -15,6 +15,7 @@ class Stopwatch {
     uint64_t nanos() const;
     double millis() const;
     double seconds() const;
+    std::chrono::nanoseconds duration() const;
 
   private:
     chrono_clock::time_point start;
diff --git a/include/merian/vk/window/swapchain.hpp b/include/merian/vk/window/swapchain.hpp
index 6992cdb2..9c95e460 100644
--- a/include/merian/vk/window/swapchain.hpp
+++ b/include/merian/vk/window/swapchain.hpp
@@ -162,7 +162,7 @@ class Swapchain : public std::enable_shared_from_this<Swapchain> {
     }
 
     /* Image index only valid until the next acquire_*() */
-    uint32_t current_image_index() {
+    uint32_t current_image_index() const {
         return current_image_idx;
     }
 
@@ -184,7 +184,7 @@ class Swapchain : public std::enable_shared_from_this<Swapchain> {
     }
 
     /* Remember to also transition image layouts */
-    vk::Extent2D create_swapchain(int width, int height);
+    vk::Extent2D create_swapchain(const uint32_t width, const uint32_t height);
 
     /* Sets vsync. The swapchain is automatically recreated on next aquire.
      * Returns if vsync could be enabled.
@@ -197,11 +197,11 @@ class Swapchain : public std::enable_shared_from_this<Swapchain> {
     }
 
     bool vsync_enabled() const {
-        return cur_present_mode == vk::PresentModeKHR::eFifo;
+        return present_mode == vk::PresentModeKHR::eFifo;
     }
 
     vk::PresentModeKHR get_present_mode() {
-        return cur_present_mode;
+        return present_mode;
     }
 
     // intened to destroy framebuffers and renderpasses when the swapchain is destroyed.
diff --git a/meson.build b/meson.build
index 7323fecf..53517b38 100644
--- a/meson.build
+++ b/meson.build
@@ -93,7 +93,7 @@ if not shaderc.found() and get_option('shaderc').enabled()
     )
 endif
 
-if get_option('shaderc').enabled()
+if shaderc.found()
     add_project_arguments('-DMERIAN_ENABLE_SHADERC', language: 'cpp')
 endif
 
diff --git a/src/merian/utils/stopwatch.cpp b/src/merian/utils/stopwatch.cpp
index 4ab55570..9c7b330c 100644
--- a/src/merian/utils/stopwatch.cpp
+++ b/src/merian/utils/stopwatch.cpp
@@ -1,4 +1,6 @@
 #include "merian/utils/stopwatch.hpp"
+#include "merian/utils/chrono.hpp"
+#include <atomic>
 
 namespace merian {
 
@@ -8,21 +10,31 @@ Stopwatch::Stopwatch() {
 
 void Stopwatch::reset() {
     start = chrono_clock::now();
+    std::atomic_signal_fence(std::memory_order_seq_cst);
 }
 
 uint64_t Stopwatch::nanos() const {
-    auto end = chrono_clock::now();
+    std::atomic_signal_fence(std::memory_order_seq_cst);
+    const auto end = chrono_clock::now();
     return std::chrono::duration_cast<std::chrono::nanoseconds>(end - start).count();
 }
 
 double Stopwatch::millis() const {
-    auto end = chrono_clock::now();
-    return std::chrono::duration<double, std::milli>(end - start).count();
+    std::atomic_signal_fence(std::memory_order_seq_cst);
+    const auto end = chrono_clock::now();
+    return to_milliseconds(end - start);
 }
 
 double Stopwatch::seconds() const {
-    auto end = chrono_clock::now();
-    return std::chrono::duration<double>(end - start).count();
+    std::atomic_signal_fence(std::memory_order_seq_cst);
+    const auto end = chrono_clock::now();
+    return to_seconds(end - start);
+}
+
+std::chrono::nanoseconds Stopwatch::duration() const {
+    std::atomic_signal_fence(std::memory_order_seq_cst);
+    const auto end = chrono_clock::now();
+    return end - start;
 }
 
 } // namespace merian
diff --git a/src/merian/vk/window/swapchain.cpp b/src/merian/vk/window/swapchain.cpp
index bc6599a0..0991efd4 100644
--- a/src/merian/vk/window/swapchain.cpp
+++ b/src/merian/vk/window/swapchain.cpp
@@ -26,16 +26,15 @@ namespace merian {
 
     if (vsync) {
         return best;
-    } else {
-        // Find a faster mode
-        for (const auto& present_mode : present_modes) {
-            if (present_mode == preferred_vsync_off_mode) {
-                return present_mode;
-            }
-            if (present_mode == vk::PresentModeKHR::eImmediate ||
-                present_mode == vk::PresentModeKHR::eMailbox) {
-                best = present_mode;
-            }
+    }
+    // Find a faster mode
+    for (const auto& present_mode : present_modes) {
+        if (present_mode == preferred_vsync_off_mode) {
+            return present_mode;
+        }
+        if (present_mode == vk::PresentModeKHR::eImmediate ||
+            present_mode == vk::PresentModeKHR::eMailbox) {
+            best = present_mode;
         }
     }
 
@@ -97,7 +96,9 @@ Swapchain::~Swapchain() {
 
 // -------------------------------------------------------------------------------------
 
-vk::Extent2D make_extent2D(vk::SurfaceCapabilitiesKHR capabilities, int width, int height) {
+vk::Extent2D make_extent2D(const vk::SurfaceCapabilitiesKHR capabilities,
+                           const uint32_t width,
+                           const uint32_t height) {
     vk::Extent2D extent;
     if (capabilities.currentExtent.width != UINT32_MAX) {
         // If the surface size is defined, the image size must match
@@ -112,7 +113,7 @@ vk::Extent2D make_extent2D(vk::SurfaceCapabilitiesKHR capabilities, int width, i
     return extent;
 }
 
-vk::Extent2D Swapchain::create_swapchain(int width, int height) {
+vk::Extent2D Swapchain::create_swapchain(const uint32_t width, const uint32_t height) {
     vk::SwapchainKHR old = VK_NULL_HANDLE;
     if (old_swapchain.expired()) {
         SPDLOG_DEBUG("create swapchain");
@@ -137,7 +138,7 @@ vk::Extent2D Swapchain::create_swapchain(int width, int height) {
     }
 
     // clang-format off
-    vk::SwapchainCreateInfoKHR createInfo(
+    vk::SwapchainCreateInfoKHR create_info(
                                           vk::SwapchainCreateFlagBitsKHR(),
                                           *surface,
                                           min_images,
@@ -152,11 +153,11 @@ vk::Extent2D Swapchain::create_swapchain(int width, int height) {
                                           pre_transform,
                                           vk::CompositeAlphaFlagBitsKHR::eOpaque,
                                           present_mode,
-                                          false,
+                                          VK_FALSE,
                                           old
                                           );
 
-    swapchain = context->device.createSwapchainKHR(createInfo, nullptr);
+    swapchain = context->device.createSwapchainKHR(create_info, nullptr);
 
     std::vector<vk::Image> swapchain_images = context->device.getSwapchainImagesKHR(swapchain);
     num_images = swapchain_images.size();
@@ -172,7 +173,7 @@ vk::Extent2D Swapchain::create_swapchain(int width, int height) {
         entry.image = swapchain_images[i];
 
         // View
-        vk::ImageViewCreateInfo createInfo(
+        vk::ImageViewCreateInfo create_info(
                                            vk::ImageViewCreateFlagBits(),
                                            entry.image,
                                            vk::ImageViewType::e2D,
@@ -188,14 +189,14 @@ vk::Extent2D Swapchain::create_swapchain(int width, int height) {
                                             0, 1, 0, 1
                                         }
                                         );
-        entry.imageView = context->device.createImageView(createInfo);
+        entry.imageView = context->device.createImageView(create_info);
 
         // Semaphore
         semaphore_group.read_semaphore = std::make_shared<BinarySemaphore>(context);
         semaphore_group.written_semaphore = std::make_shared<BinarySemaphore>(context);
 
         // Barrier
-        vk::ImageSubresourceRange imageSubresourceRange {
+        vk::ImageSubresourceRange image_subresource_range {
             vk::ImageAspectFlagBits::eColor,    
             0,
             VK_REMAINING_MIP_LEVELS,    
@@ -210,7 +211,7 @@ vk::Extent2D Swapchain::create_swapchain(int width, int height) {
             {},
             {},
             entry.image,
-            imageSubresourceRange,
+            image_subresource_range,
         };
         barriers[i] = barrier;
     }
@@ -267,13 +268,12 @@ Swapchain::acquire(const std::function<vk::Extent2D()>& framebuffer_extent,
 
     SwapchainAcquireResult aquire_result;
 
-    if ((extent.width != cur_width || extent.height != cur_height ||
-         present_mode != cur_present_mode)) {
-        if (!swapchain) {
-            create_swapchain(extent.width, extent.height);
-        } else {
-            throw needs_recreate("changed framebuffer size");
-        }
+    if (!swapchain) {
+        create_swapchain(extent.width, extent.height);
+    } else if (extent.width != cur_width || extent.height != cur_height) {
+        throw needs_recreate("changed framebuffer size");
+    } else if (present_mode != cur_present_mode) {
+        throw needs_recreate("changed present mode (vsync)");
     }
 
     const vk::Result result = context->device.acquireNextImageKHR(
@@ -294,11 +294,13 @@ Swapchain::acquire(const std::function<vk::Extent2D()>& framebuffer_extent,
         created = false;
 
         return aquire_result;
-    } else if (result == vk::Result::eErrorOutOfDateKHR || result == vk::Result::eSuboptimalKHR) {
+    }
+
+    if (result == vk::Result::eErrorOutOfDateKHR || result == vk::Result::eSuboptimalKHR) {
         throw needs_recreate(result);
-    } else {
-        return std::nullopt;
     }
+
+    return std::nullopt;
 }
 
 void Swapchain::present(const QueueHandle& queue) {