Skip to content

Commit

Permalink
merian: Frame timing related fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
LDAP committed Aug 12, 2024
1 parent 966d555 commit 9bcded8
Show file tree
Hide file tree
Showing 8 changed files with 113 additions and 71 deletions.
56 changes: 36 additions & 20 deletions include/merian-nodes/graph/graph.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -461,18 +461,18 @@ class Graph : public std::enable_shared_from_this<Graph<RING_SIZE>> {
run_in_progress = true;

// wait for the in-flight processing to finish
const auto before_gpu_wait = std::chrono::high_resolution_clock::now();
Stopwatch sw_gpu_wait;
InFlightData& in_flight_data = ring_fences.next_cycle_wait_get();
gpu_wait_time = gpu_wait_time * 0.8 +
(std::chrono::high_resolution_clock::now() - before_gpu_wait) * 0.2;
gpu_wait_time = gpu_wait_time * 0.9 + sw_gpu_wait.duration() * 0.1;

if (low_latency_mode && !needs_reconnect) {
const auto total_wait = gpu_wait_time + cpu_sleep_time;
cpu_sleep_time = 0.95 * total_wait;
if (cpu_sleep_time < 1ms) {
cpu_sleep_time = 0ms;
}
// last pred: gpu_time > cpu_time
const auto total_wait =
std::max((gpu_wait_time + external_wait_time + cpu_sleep_time - 0.1ms), 0.1ms);
if (low_latency_mode && !needs_reconnect && (total_wait > time_delta - total_wait)) {
cpu_sleep_time = 0.92 * total_wait;
std::this_thread::sleep_for(cpu_sleep_time);
} else {
cpu_sleep_time = 0ms;
}

// now we can release the resources from staging space and reset the command pool
Expand Down Expand Up @@ -553,22 +553,35 @@ class Graph : public std::enable_shared_from_this<Graph<RING_SIZE>> {

// FINISH RUN: submit

on_pre_submit(run, cmd);
{
MERIAN_PROFILE_SCOPE_GPU(profiler, cmd, "Pre-Submit");
on_pre_submit(run, cmd);
}
cmd_pool->end_all();
in_flight_data.staging_set_id = resource_allocator->getStaging()->finalizeResourceSet();
queue->submit(cmd_pool, ring_fences.reset(), run.get_signal_semaphores(),
run.get_wait_semaphores(), run.get_wait_stages(),
run.get_timeline_semaphore_submit_info());
run.execute_callbacks(queue);
on_post_submit();
{
MERIAN_PROFILE_SCOPE(profiler, "Submit");
queue->submit(cmd_pool, ring_fences.reset(), run.get_signal_semaphores(),
run.get_wait_semaphores(), run.get_wait_stages(),
run.get_timeline_semaphore_submit_info());
}
{
MERIAN_PROFILE_SCOPE(profiler, "Execute callbacks");
run.execute_callbacks(queue);
}
{
MERIAN_PROFILE_SCOPE(profiler, "Post-Submit");
on_post_submit();
}

external_wait_time = 0.9 * external_wait_time + 0.1 * run.external_wait_time;
needs_reconnect |= run.needs_reconnect;
++run_iteration;
++total_iteration;
run_in_progress = false;
for (const auto& task : on_run_finished_tasks)
task();
on_run_finished_tasks.clear();
run_in_progress = false;
}

// waits until all in-flight iterations have finished
Expand Down Expand Up @@ -614,6 +627,7 @@ class Graph : public std::enable_shared_from_this<Graph<RING_SIZE>> {
props.output_text("Total Elapsed: {:%H:%M:%S}s", duration_elapsed);
props.output_text("Time delta: {:04f}ms", to_milliseconds(time_delta));
props.output_text("GPU wait: {:04f}ms", to_milliseconds(gpu_wait_time));
props.output_text("External wait: {:04f}ms", to_milliseconds(external_wait_time));

props.st_separate();
if (props.config_options("time overwrite", time_overwrite, {"None", "Time", "Delta"},
Expand All @@ -634,9 +648,10 @@ class Graph : public std::enable_shared_from_this<Graph<RING_SIZE>> {
}

props.st_separate();
props.config_bool("low latency", low_latency_mode,
"Delays CPU processing to recude input latency in GPU bound "
"applications. Might reduce framerate.");
props.config_bool(
"low latency", low_latency_mode,
"Experimental: Delays CPU processing to recude input latency in GPU bound "
"applications. Might reduce framerate.");
if (low_latency_mode) {
props.output_text("CPU sleep time: {:04f}ms", to_milliseconds(cpu_sleep_time));
}
Expand Down Expand Up @@ -1675,7 +1690,7 @@ class Graph : public std::enable_shared_from_this<Graph<RING_SIZE>> {
remove_connection(src_node, dst_node, dst_input->name);
return false;
}
it++;
++it;
}
}
}
Expand Down Expand Up @@ -1913,6 +1928,7 @@ class Graph : public std::enable_shared_from_this<Graph<RING_SIZE>> {
bool low_latency_mode = false;
std::chrono::duration<double> gpu_wait_time = 0ns;
std::chrono::duration<double> cpu_sleep_time = 0ns;
std::chrono::duration<double> external_wait_time = 0ns;

Profiler::Report last_build_report;
Profiler::Report last_run_report;
Expand Down
21 changes: 15 additions & 6 deletions include/merian-nodes/graph/graph_run.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
namespace merian_nodes {

using namespace merian;
using namespace std::literals::chrono_literals;

// Manages data of a single graph run.
class GraphRun {
Expand Down Expand Up @@ -45,8 +46,8 @@ class GraphRun {
signal_values.push_back(value);
}

void
add_submit_callback(const std::function<void(const QueueHandle& queue)>& callback) noexcept {
void add_submit_callback(
const std::function<void(const QueueHandle& queue, GraphRun& run)>& callback) noexcept {
submit_callbacks.push_back(callback);
}

Expand Down Expand Up @@ -97,15 +98,15 @@ class GraphRun {

// You must call every callback after you submited the graph command buffer
// Or you use the execute_callbacks function.
const std::vector<std::function<void(const QueueHandle& queue)>>&
const std::vector<std::function<void(const QueueHandle& queue, GraphRun& run)>>&
get_submit_callbacks() const noexcept {
return submit_callbacks;
}

// Call this after you submitted the graph command buffer
void execute_callbacks(const QueueHandle& queue) const {
void execute_callbacks(const QueueHandle& queue) {
for (const auto& callback : submit_callbacks) {
callback(queue);
callback(queue, *this);
}
}

Expand Down Expand Up @@ -151,6 +152,12 @@ class GraphRun {
return to_seconds(elapsed_since_connect);
}

// Hint the graph that waiting was necessary for external events. This information can be used
// to shift CPU processing back to reduce waiting and reduce latency.
void hint_external_wait_time(auto chrono_duration) {
external_wait_time = std::max(external_wait_time, chrono_duration);
}

private:
void reset(const uint64_t iteration,
const uint32_t in_flight_index,
Expand All @@ -173,6 +180,7 @@ class GraphRun {
signal_semaphores.clear();
signal_values.clear();
submit_callbacks.clear();
external_wait_time = 0ns;

this->profiler = profiler;
this->needs_reconnect = false;
Expand All @@ -187,7 +195,8 @@ class GraphRun {
std::vector<vk::Semaphore> signal_semaphores;
std::vector<uint64_t> signal_values;

std::vector<std::function<void(const QueueHandle& queue)>> submit_callbacks;
std::vector<std::function<void(const QueueHandle& queue, GraphRun& run)>> submit_callbacks;
std::chrono::nanoseconds external_wait_time;

ProfilerHandle profiler = nullptr;
CommandPoolHandle cmd_pool = nullptr;
Expand Down
14 changes: 8 additions & 6 deletions include/merian-nodes/nodes/glfw_window/glfw_window.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ namespace merian_nodes {
*/
class GLFWWindow : public Node {
public:
GLFWWindow(const ContextHandle context) : Node() {
GLFWWindow(const ContextHandle& context) : Node() {
if (context->get_extension<ExtensionVkGLFW>()) {
window = std::make_shared<merian::GLFWWindow>(context);
swapchain = std::make_shared<merian::Swapchain>(context, window->get_surface());
Expand Down Expand Up @@ -47,7 +47,7 @@ class GLFWWindow : public Node {
acquire.reset();
for (uint32_t tries = 0; !acquire && tries < 2; tries++) {
try {
acquire = swapchain->acquire(window, 1000 * 1000 /* 1s */);
acquire = swapchain->acquire(window, 1000L * 1000L /* 1s */);
} catch (const Swapchain::needs_recreate& e) {
old_swapchains.emplace_back(swapchain);
swapchain = std::make_shared<Swapchain>(swapchain);
Expand Down Expand Up @@ -87,9 +87,11 @@ class GLFWWindow : public Node {

run.add_wait_semaphore(acquire->wait_semaphore, vk::PipelineStageFlagBits::eTransfer);
run.add_signal_semaphore(acquire->signal_semaphore);
run.add_submit_callback([&](const QueueHandle& queue) {
run.add_submit_callback([&](const QueueHandle& queue, GraphRun& run) {
try {
Stopwatch present_duration;
swapchain->present(queue);
run.hint_external_wait_time(present_duration.duration());
} catch (const Swapchain::needs_recreate& e) {
// do nothing and hope for the best
return;
Expand All @@ -106,12 +108,12 @@ class GLFWWindow : public Node {
}

NodeStatusFlags properties(Properties& config) override {
GLFWmonitor* monitor = window ? glfwGetWindowMonitor(*window) : NULL;
int fullscreen = monitor != NULL;
GLFWmonitor* monitor = window ? glfwGetWindowMonitor(*window) : nullptr;
int fullscreen = static_cast<int>(monitor != nullptr);
const int old_fullscreen = fullscreen;
config.config_options("mode", fullscreen, {"windowed", "fullscreen"});
if (window && fullscreen != old_fullscreen) {
if (fullscreen) {
if (fullscreen != 0) {
try {
glfwGetWindowPos(*window, &windowed_pos_size[0], &windowed_pos_size[1]);
} catch (const ExtensionVkGLFW::glfw_error& e) {
Expand Down
1 change: 1 addition & 0 deletions include/merian/utils/stopwatch.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ class Stopwatch {
uint64_t nanos() const;
double millis() const;
double seconds() const;
std::chrono::nanoseconds duration() const;

private:
chrono_clock::time_point start;
Expand Down
8 changes: 4 additions & 4 deletions include/merian/vk/window/swapchain.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,7 @@ class Swapchain : public std::enable_shared_from_this<Swapchain> {
}

/* Image index only valid until the next acquire_*() */
uint32_t current_image_index() {
uint32_t current_image_index() const {
return current_image_idx;
}

Expand All @@ -184,7 +184,7 @@ class Swapchain : public std::enable_shared_from_this<Swapchain> {
}

/* Remember to also transition image layouts */
vk::Extent2D create_swapchain(int width, int height);
vk::Extent2D create_swapchain(const uint32_t width, const uint32_t height);

/* Sets vsync. The swapchain is automatically recreated on next aquire.
* Returns if vsync could be enabled.
Expand All @@ -197,11 +197,11 @@ class Swapchain : public std::enable_shared_from_this<Swapchain> {
}

bool vsync_enabled() const {
return cur_present_mode == vk::PresentModeKHR::eFifo;
return present_mode == vk::PresentModeKHR::eFifo;
}

vk::PresentModeKHR get_present_mode() {
return cur_present_mode;
return present_mode;
}

// intened to destroy framebuffers and renderpasses when the swapchain is destroyed.
Expand Down
2 changes: 1 addition & 1 deletion meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ if not shaderc.found() and get_option('shaderc').enabled()
)
endif

if get_option('shaderc').enabled()
if shaderc.found()
add_project_arguments('-DMERIAN_ENABLE_SHADERC', language: 'cpp')
endif

Expand Down
22 changes: 17 additions & 5 deletions src/merian/utils/stopwatch.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
#include "merian/utils/stopwatch.hpp"
#include "merian/utils/chrono.hpp"
#include <atomic>

namespace merian {

Expand All @@ -8,21 +10,31 @@ Stopwatch::Stopwatch() {

void Stopwatch::reset() {
start = chrono_clock::now();
std::atomic_signal_fence(std::memory_order_seq_cst);
}

uint64_t Stopwatch::nanos() const {
auto end = chrono_clock::now();
std::atomic_signal_fence(std::memory_order_seq_cst);
const auto end = chrono_clock::now();
return std::chrono::duration_cast<std::chrono::nanoseconds>(end - start).count();
}

double Stopwatch::millis() const {
auto end = chrono_clock::now();
return std::chrono::duration<double, std::milli>(end - start).count();
std::atomic_signal_fence(std::memory_order_seq_cst);
const auto end = chrono_clock::now();
return to_milliseconds(end - start);
}

double Stopwatch::seconds() const {
auto end = chrono_clock::now();
return std::chrono::duration<double>(end - start).count();
std::atomic_signal_fence(std::memory_order_seq_cst);
const auto end = chrono_clock::now();
return to_seconds(end - start);
}

std::chrono::nanoseconds Stopwatch::duration() const {
std::atomic_signal_fence(std::memory_order_seq_cst);
const auto end = chrono_clock::now();
return end - start;
}

} // namespace merian
Loading

0 comments on commit 9bcded8

Please sign in to comment.