diff --git a/src/optick.config.h b/src/optick.config.h index 59f24523..18d4ee9d 100644 --- a/src/optick.config.h +++ b/src/optick.config.h @@ -67,7 +67,12 @@ #if defined(_MSC_VER) #define OPTICK_ENABLE_GPU_VULKAN (OPTICK_ENABLE_GPU /*&& 0*/) #else -#define OPTICK_ENABLE_GPU_VULKAN (0) +#define OPTICK_ENABLE_GPU_VULKAN (OPTICK_ENABLE_GPU /*&& 0*/) +#endif +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// Vulkan Functions - static+dynamic (1) or dynamic linking only (0) +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +#if !defined(OPTICK_STATIC_VULKAN_FUNCTIONS) +#define OPTICK_STATIC_VULKAN_FUNCTIONS (1) #endif #endif - diff --git a/src/optick.h b/src/optick.h index 96b28b99..9d2d02ec 100644 --- a/src/optick.h +++ b/src/optick.h @@ -96,6 +96,7 @@ // Vulkan Forward Declarations #define OPTICK_DEFINE_HANDLE(object) typedef struct object##_T *object; +OPTICK_DEFINE_HANDLE(VkInstance); OPTICK_DEFINE_HANDLE(VkDevice); OPTICK_DEFINE_HANDLE(VkPhysicalDevice); OPTICK_DEFINE_HANDLE(VkQueue); @@ -103,11 +104,13 @@ OPTICK_DEFINE_HANDLE(VkCommandBuffer); OPTICK_DEFINE_HANDLE(VkQueryPool); OPTICK_DEFINE_HANDLE(VkCommandPool); OPTICK_DEFINE_HANDLE(VkFence); +OPTICK_DEFINE_HANDLE(VkEvent); struct VkPhysicalDeviceProperties; struct VkQueryPoolCreateInfo; struct VkAllocationCallbacks; struct VkCommandPoolCreateInfo; +struct VkEventCreateInfo; struct VkCommandBufferAllocateInfo; struct VkFenceCreateInfo; struct VkSubmitInfo; @@ -123,12 +126,18 @@ struct VkCommandBufferBeginInfo; #endif #endif +typedef void* (VKAPI_PTR *PFN_vkGetInstanceProcAddr_)(VkInstance instance, const char* pName); typedef void (VKAPI_PTR *PFN_vkGetPhysicalDeviceProperties_)(VkPhysicalDevice physicalDevice, VkPhysicalDeviceProperties* pProperties); typedef int32_t (VKAPI_PTR *PFN_vkCreateQueryPool_)(VkDevice device, const VkQueryPoolCreateInfo* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkQueryPool* pQueryPool); typedef int32_t (VKAPI_PTR *PFN_vkCreateCommandPool_)(VkDevice device, const VkCommandPoolCreateInfo* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkCommandPool* pCommandPool); +typedef int32_t (VKAPI_PTR *PFN_vkCreateEvent_)(VkDevice device, const VkEventCreateInfo* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkEvent* pEvent); typedef int32_t (VKAPI_PTR *PFN_vkAllocateCommandBuffers_)(VkDevice device, const VkCommandBufferAllocateInfo* pAllocateInfo, VkCommandBuffer* pCommandBuffers); typedef int32_t (VKAPI_PTR *PFN_vkCreateFence_)(VkDevice device, const VkFenceCreateInfo* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkFence* pFence); typedef void (VKAPI_PTR *PFN_vkCmdResetQueryPool_)(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t firstQuery, uint32_t queryCount); +typedef void (VKAPI_PTR *PFN_vkResetQueryPool_)(VkDevice device, VkQueryPool queryPool, uint32_t firstQuery, uint32_t queryCount); +typedef void (VKAPI_PTR *PFN_vkCmdWaitEvents_)(VkCommandBuffer commandBuffer, uint32_t eventCount, const VkEvent* pEvents, uint32_t srcStageMask, uint32_t dstStageMask, uint32_t memoryBarrierCount, const void* pMemoryBarriers, uint32_t bufferMemoryBarrierCount, const void* pBufferMemoryBarriers, uint32_t imageMemoryBarrierCount, const void* pImageMemoryBarriers); +typedef int32_t (VKAPI_PTR *PFN_vkResetEvent_)(VkDevice device, VkEvent event); +typedef int32_t (VKAPI_PTR *PFN_vkSetEvent_)(VkDevice device, VkEvent event); typedef int32_t (VKAPI_PTR *PFN_vkQueueSubmit_)(VkQueue queue, uint32_t submitCount, const VkSubmitInfo* pSubmits, VkFence fence); typedef int32_t (VKAPI_PTR *PFN_vkWaitForFences_)(VkDevice device, uint32_t fenceCount, const VkFence* pFences, uint32_t waitAll, uint64_t timeout); typedef int32_t (VKAPI_PTR *PFN_vkResetCommandBuffer_)(VkCommandBuffer commandBuffer, uint32_t flags); @@ -139,8 +148,10 @@ typedef int32_t (VKAPI_PTR *PFN_vkEndCommandBuffer_)(VkCommandBuffer commandBuff typedef int32_t (VKAPI_PTR *PFN_vkResetFences_)(VkDevice device, uint32_t fenceCount, const VkFence* pFences); typedef void (VKAPI_PTR *PFN_vkDestroyCommandPool_)(VkDevice device, VkCommandPool commandPool, const VkAllocationCallbacks* pAllocator); typedef void (VKAPI_PTR *PFN_vkDestroyQueryPool_)(VkDevice device, VkQueryPool queryPool, const VkAllocationCallbacks* pAllocator); +typedef void (VKAPI_PTR *PFN_vkDestroyEvent_)(VkDevice device, VkEvent event, const VkAllocationCallbacks* pAllocator); typedef void (VKAPI_PTR *PFN_vkDestroyFence_)(VkDevice device, VkFence fence, const VkAllocationCallbacks* pAllocator); typedef void (VKAPI_PTR *PFN_vkFreeCommandBuffers_)(VkDevice device, VkCommandPool commandPool, uint32_t commandBufferCount, const VkCommandBuffer* pCommandBuffers); +typedef int32_t (VKAPI_PTR *PFN_vkGetPastPresentationTimingGOOGLE_)(VkDevice device, void* swapchain, uint32_t* pPresentationTimingCount, void* pPresentationTimings); #if OPTICK_VKAPI_PTR_DEFINED #undef VKAPI_PTR @@ -156,12 +167,18 @@ namespace Optick { struct OPTICK_API VulkanFunctions { + PFN_vkGetInstanceProcAddr_ vkGetInstanceProcAddr; PFN_vkGetPhysicalDeviceProperties_ vkGetPhysicalDeviceProperties; PFN_vkCreateQueryPool_ vkCreateQueryPool; PFN_vkCreateCommandPool_ vkCreateCommandPool; + PFN_vkCreateEvent_ vkCreateEvent; PFN_vkAllocateCommandBuffers_ vkAllocateCommandBuffers; PFN_vkCreateFence_ vkCreateFence; PFN_vkCmdResetQueryPool_ vkCmdResetQueryPool; + PFN_vkResetQueryPool_ vkResetQueryPool; + PFN_vkCmdWaitEvents_ vkCmdWaitEvents; + PFN_vkResetEvent_ vkResetEvent; + PFN_vkSetEvent_ vkSetEvent; PFN_vkQueueSubmit_ vkQueueSubmit; PFN_vkWaitForFences_ vkWaitForFences; PFN_vkResetCommandBuffer_ vkResetCommandBuffer; @@ -172,8 +189,10 @@ namespace Optick PFN_vkResetFences_ vkResetFences; PFN_vkDestroyCommandPool_ vkDestroyCommandPool; PFN_vkDestroyQueryPool_ vkDestroyQueryPool; + PFN_vkDestroyEvent_ vkDestroyEvent; PFN_vkDestroyFence_ vkDestroyFence; PFN_vkFreeCommandBuffers_ vkFreeCommandBuffers; + PFN_vkGetPastPresentationTimingGOOGLE_ vkGetPastPresentationTimingGOOGLE; }; // Source: http://msdn.microsoft.com/en-us/library/system.windows.media.colors(v=vs.110).aspx @@ -639,8 +658,8 @@ struct OPTICK_API EventDescription uint32_t filter; uint8_t flags; - static EventDescription* Create(const char* eventName, const char* fileName, const unsigned long fileLine, const unsigned long eventColor = Color::Null, const unsigned long filter = 0, const uint8_t eventFlags = 0); - static EventDescription* CreateShared(const char* eventName, const char* fileName = nullptr, const unsigned long fileLine = 0, const unsigned long eventColor = Color::Null, const unsigned long filter = 0); + static EventDescription* Create(const char* eventName, const char* fileName, const uint32_t fileLine, const uint32_t eventColor = Color::Null, const uint32_t filter = 0, const uint8_t eventFlags = 0); + static EventDescription* CreateShared(const char* eventName, const char* fileName = nullptr, const uint32_t fileLine = 0, const uint32_t eventColor = Color::Null, const uint32_t filter = 0); EventDescription(); private: @@ -681,11 +700,11 @@ OPTICK_INLINE Optick::EventDescription* CreateDescription(const char* functionNa if (eventName != nullptr) flags |= ::Optick::EventDescription::IS_CUSTOM_NAME; - return ::Optick::EventDescription::Create(eventName != nullptr ? eventName : functionName, fileName, (unsigned long)fileLine, ::Optick::Category::GetColor(category), ::Optick::Category::GetMask(category), flags); + return ::Optick::EventDescription::Create(eventName != nullptr ? eventName : functionName, fileName, (uint32_t)fileLine, ::Optick::Category::GetColor(category), ::Optick::Category::GetMask(category), flags); } OPTICK_INLINE Optick::EventDescription* CreateDescription(const char* functionName, const char* fileName, int fileLine, const ::Optick::Category::Type category) { - return ::Optick::EventDescription::Create(functionName, fileName, (unsigned long)fileLine, ::Optick::Category::GetColor(category), ::Optick::Category::GetMask(category)); + return ::Optick::EventDescription::Create(functionName, fileName, (uint32_t)fileLine, ::Optick::Category::GetColor(category), ::Optick::Category::GetMask(category)); } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// struct OPTICK_API GPUEvent @@ -717,11 +736,22 @@ struct OPTICK_API Tag static void Attach(const EventDescription& description, const char* val); static void Attach(const EventDescription& description, const char* val, uint16_t length); + static void Attach(EventStorage* storage, int64_t timestamp, const EventDescription& description, float val); + static void Attach(EventStorage* storage, int64_t timestamp, const EventDescription& description, int32_t val); + static void Attach(EventStorage* storage, int64_t timestamp, const EventDescription& description, uint32_t val); + static void Attach(EventStorage* storage, int64_t timestamp, const EventDescription& description, uint64_t val); + static void Attach(EventStorage* storage, int64_t timestamp, const EventDescription& description, float val[3]); + static void Attach(EventStorage* storage, int64_t timestamp, const EventDescription& description, const char* val); + // Derived static void Attach(const EventDescription& description, float x, float y, float z) { float p[3] = { x, y, z }; Attach(description, p); } + static void Attach(EventStorage* storage, int64_t timestamp, const EventDescription& description, float x, float y, float z) + { + float p[3] = { x, y, z }; Attach(storage, timestamp, description, p); + } }; //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -762,8 +792,8 @@ struct OPTICK_API GPUContext }; //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// OPTICK_API void InitGpuD3D12(ID3D12Device* device, ID3D12CommandQueue** cmdQueues, uint32_t numQueues); -OPTICK_API void InitGpuVulkan(VkDevice* vkDevices, VkPhysicalDevice* vkPhysicalDevices, VkQueue* vkQueues, uint32_t* cmdQueuesFamily, uint32_t numQueues, const VulkanFunctions* functions); -OPTICK_API void GpuFlip(void* swapChain); +OPTICK_API void InitGpuVulkan(VkInstance vkInstance, VkDevice* vkDevices, VkPhysicalDevice* vkPhysicalDevices, VkQueue* vkQueues, uint32_t* cmdQueuesFamily, uint32_t numQueues, const VulkanFunctions* functions); +OPTICK_API void GpuFlip(void* swapChain, uint32_t frameID = 0); OPTICK_API GPUContext SetGpuContext(GPUContext context); //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// struct OPTICK_API GPUContextScope @@ -780,6 +810,12 @@ struct OPTICK_API GPUContextScope prevContext = SetGpuContext(GPUContext(cmdBuffer, queue, node)); } + // SRS - add typeless void* commandHandle prototype to support runtime selection of graphics API + GPUContextScope(void* commandHandle, GPUQueueType queue = GPU_QUEUE_GRAPHICS, int node = 0) + { + prevContext = SetGpuContext(GPUContext(commandHandle, queue, node)); + } + ~GPUContextScope() { SetGpuContext(prevContext); @@ -923,7 +959,7 @@ struct OptickApp #define OPTICK_STOP_THREAD() ::Optick::UnRegisterThread(false); // Attaches a custom data-tag. -// Supported types: int32, uint32, uint64, vec3, string (cut to 32 characters) +// Supported types: float, int32, uint32, uint64, vec3, string (cut to 32 characters) // Example: // OPTICK_TAG("PlayerName", name[index]); // OPTICK_TAG("Health", 100); @@ -1000,6 +1036,18 @@ struct OptickApp #define OPTICK_STORAGE_PUSH(STORAGE, DESCRIPTION, CPU_TIMESTAMP_START) if (::Optick::IsActive()) { ::Optick::Event::Push(STORAGE, DESCRIPTION, CPU_TIMESTAMP_START); } #define OPTICK_STORAGE_POP(STORAGE, CPU_TIMESTAMP_FINISH) if (::Optick::IsActive()) { ::Optick::Event::Pop(STORAGE, CPU_TIMESTAMP_FINISH); } +// Attaches a custom data-tag to the custom storage. +// Supported types: float, int32, uint32, uint64, vec3, string (cut to 32 characters) +// Example: +// OPTICK_STORAGE_TAG(IOStorage, cpuTimestamp, "PlayerName", name[index]); +// OPTICK_STORAGE_TAG(IOStorage, cpuTimestamp, "Health", 100); +// OPTICK_STORAGE_TAG(IOStorage, cpuTimestamp, "Score", 0x80000000u); +// OPTICK_STORAGE_TAG(IOStorage, cpuTimestamp, "Height(cm)", 176.3f); +// OPTICK_STORAGE_TAG(IOStorage, cpuTimestamp, "Address", (uint64)*this); +// OPTICK_STORAGE_TAG(IOStorage, cpuTimestamp, "Position", 123.0f, 456.0f, 789.0f); +#define OPTICK_STORAGE_TAG(STORAGE, CPU_TIMESTAMP, NAME, ...) static ::Optick::EventDescription* OPTICK_CONCAT(autogen_tag_, __LINE__) = nullptr; \ + if (OPTICK_CONCAT(autogen_tag_, __LINE__) == nullptr) OPTICK_CONCAT(autogen_tag_, __LINE__) = ::Optick::EventDescription::Create( NAME, __FILE__, __LINE__ ); \ + ::Optick::Tag::Attach(STORAGE, CPU_TIMESTAMP, *OPTICK_CONCAT(autogen_tag_, __LINE__), __VA_ARGS__); \ // Registers state change callback // If callback returns false - the call is repeated the next frame @@ -1024,7 +1072,7 @@ struct OptickApp // GPU events #define OPTICK_GPU_INIT_D3D12(DEVICE, CMD_QUEUES, NUM_CMD_QUEUS) ::Optick::InitGpuD3D12(DEVICE, CMD_QUEUES, NUM_CMD_QUEUS); -#define OPTICK_GPU_INIT_VULKAN(DEVICES, PHYSICAL_DEVICES, CMD_QUEUES, CMD_QUEUES_FAMILY, NUM_CMD_QUEUS, FUNCTIONS) ::Optick::InitGpuVulkan(DEVICES, PHYSICAL_DEVICES, CMD_QUEUES, CMD_QUEUES_FAMILY, NUM_CMD_QUEUS, FUNCTIONS); +#define OPTICK_GPU_INIT_VULKAN(INSTANCE, DEVICES, PHYSICAL_DEVICES, CMD_QUEUES, CMD_QUEUES_FAMILY, NUM_CMD_QUEUS, FUNCTIONS) ::Optick::InitGpuVulkan(INSTANCE, DEVICES, PHYSICAL_DEVICES, CMD_QUEUES, CMD_QUEUES_FAMILY, NUM_CMD_QUEUS, FUNCTIONS); // Setup GPU context: // Params: @@ -1041,7 +1089,7 @@ struct OptickApp if (OPTICK_CONCAT(gpu_autogen_description_, __LINE__) == nullptr) OPTICK_CONCAT(gpu_autogen_description_, __LINE__) = ::Optick::EventDescription::Create( NAME, __FILE__, __LINE__ ); \ ::Optick::GPUEvent OPTICK_CONCAT(gpu_autogen_event_, __LINE__)( *(OPTICK_CONCAT(gpu_autogen_description_, __LINE__)) ); \ -#define OPTICK_GPU_FLIP(SWAP_CHAIN) ::Optick::GpuFlip(SWAP_CHAIN); +#define OPTICK_GPU_FLIP(...) ::Optick::GpuFlip(__VA_ARGS__); ///////////////////////////////////////////////////////////////////////////////// // [Automation][Startup] @@ -1095,14 +1143,15 @@ struct OptickApp #define OPTICK_STORAGE_EVENT(STORAGE, DESCRIPTION, CPU_TIMESTAMP_START, CPU_TIMESTAMP_FINISH) #define OPTICK_STORAGE_PUSH(STORAGE, DESCRIPTION, CPU_TIMESTAMP_START) #define OPTICK_STORAGE_POP(STORAGE, CPU_TIMESTAMP_FINISH) +#define OPTICK_STORAGE_TAG(STORAGE, CPU_TIMESTAMP, NAME, ...) #define OPTICK_SET_STATE_CHANGED_CALLBACK(CALLBACK) -#define OPTICK_SET_MEMORY_ALLOCATOR(ALLOCATE_FUNCTION, DEALLOCATE_FUNCTION) +#define OPTICK_SET_MEMORY_ALLOCATOR(ALLOCATE_FUNCTION, DEALLOCATE_FUNCTION, INIT_THREAD_CALLBACK) #define OPTICK_SHUTDOWN() #define OPTICK_GPU_INIT_D3D12(DEVICE, CMD_QUEUES, NUM_CMD_QUEUS) -#define OPTICK_GPU_INIT_VULKAN(DEVICES, PHYSICAL_DEVICES, CMD_QUEUES, CMD_QUEUES_FAMILY, NUM_CMD_QUEUS, FUNCTIONS) +#define OPTICK_GPU_INIT_VULKAN(INSTANCE, DEVICES, PHYSICAL_DEVICES, CMD_QUEUES, CMD_QUEUES_FAMILY, NUM_CMD_QUEUS, FUNCTIONS) #define OPTICK_GPU_CONTEXT(...) #define OPTICK_GPU_EVENT(NAME) -#define OPTICK_GPU_FLIP(SWAP_CHAIN) +#define OPTICK_GPU_FLIP(...) #define OPTICK_UPDATE() #define OPTICK_FRAME_FLIP(...) #define OPTICK_FRAME_EVENT(FRAME_TYPE, ...) diff --git a/src/optick_common.h b/src/optick_common.h index be496c9d..a68cacfd 100644 --- a/src/optick_common.h +++ b/src/optick_common.h @@ -29,10 +29,12 @@ #include "optick.h" #include +#include #include #include #include #include +#include #if defined(OPTICK_MSVC) @@ -133,21 +135,26 @@ static const ProcessID INVALID_PROCESS_ID = (ProcessID)-1; // Asserts //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// #if defined(OPTICK_MSVC) -#define OPTICK_DEBUG_BREAK __debugbreak() + #define OPTICK_DEBUG_BREAK(description) OutputDebugString(TEXT("Optick ERROR: ") description TEXT("\n")); __debugbreak() #elif defined(OPTICK_GCC) -#define OPTICK_DEBUG_BREAK __builtin_trap() +#if __has_builtin(__builtin_debugtrap) + #define OPTICK_DEBUG_BREAK(description) std::cerr << "Optick ERROR: " << description << std::endl; __builtin_debugtrap() +#else + #define OPTICK_DEBUG_BREAK(description) std::cerr << "Optick ERROR: " << description << std::endl; __builtin_trap() +#endif #else #error Can not define OPTICK_DEBUG_BREAK. Unknown platform. #endif #define OPTICK_UNUSED(x) (void)(x) #ifdef _DEBUG - #define OPTICK_ASSERT(arg, description) if (!(arg)) { OPTICK_DEBUG_BREAK; } - #define OPTICK_FAILED(description) { OPTICK_DEBUG_BREAK; } + #define OPTICK_ASSERT(arg, description) if (!(arg)) { OPTICK_DEBUG_BREAK(description); } + #define OPTICK_FAILED(description) { OPTICK_DEBUG_BREAK(description); } + #define OPTICK_VERIFY(arg, description, operation) if (!(arg)) { OPTICK_DEBUG_BREAK(description); operation; } #else #define OPTICK_ASSERT(arg, description) - #define OPTICK_FAILED(description) + #define OPTICK_FAILED(description) { std::cerr << "Optick FATAL ERROR: " << description << std::endl; throw std::runtime_error("Optick FAILED"); } + #define OPTICK_VERIFY(arg, description, operation) if (!(arg)) { std::cerr << "Optick ERROR: " << description << std::endl; operation; } #endif -#define OPTICK_VERIFY(arg, description, operation) if (!(arg)) { OPTICK_DEBUG_BREAK; operation; } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -168,7 +175,7 @@ inline int sprintf_s(char(&buffer)[sizeOfBuffer], const char* format, ...) #if defined(OPTICK_GCC) #include template -inline int wcstombs_s(char(&buffer)[sizeOfBuffer], const wchar_t* src, size_t maxCount) +inline size_t wcstombs_s(char(&buffer)[sizeOfBuffer], const wchar_t* src, size_t maxCount) { return wcstombs(buffer, src, maxCount); } @@ -176,7 +183,7 @@ inline int wcstombs_s(char(&buffer)[sizeOfBuffer], const wchar_t* src, size_t ma #if defined(OPTICK_MSVC) template -inline int wcstombs_s(char(&buffer)[sizeOfBuffer], const wchar_t* src, size_t maxCount) +inline size_t wcstombs_s(char(&buffer)[sizeOfBuffer], const wchar_t* src, size_t maxCount) { size_t converted = 0; return ::wcstombs_s(&converted, buffer, src, maxCount); diff --git a/src/optick_core.cpp b/src/optick_core.cpp index a992c4f0..87d47dfa 100644 --- a/src/optick_core.cpp +++ b/src/optick_core.cpp @@ -224,12 +224,12 @@ void SortMemoryPool(MemoryPool& memoryPool) } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -EventDescription* EventDescription::Create(const char* eventName, const char* fileName, const unsigned long fileLine, const unsigned long eventColor /*= Color::Null*/, const unsigned long filter /*= 0*/, const uint8_t eventFlags /*= 0*/) +EventDescription* EventDescription::Create(const char* eventName, const char* fileName, const uint32_t fileLine, const uint32_t eventColor /*= Color::Null*/, const uint32_t filter /*= 0*/, const uint8_t eventFlags /*= 0*/) { return EventDescriptionBoard::Get().CreateDescription(eventName, fileName, fileLine, eventColor, filter, eventFlags); } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -EventDescription* EventDescription::CreateShared(const char* eventName, const char* fileName, const unsigned long fileLine, const unsigned long eventColor /*= Color::Null*/, const unsigned long filter /*= 0*/) +EventDescription* EventDescription::CreateShared(const char* eventName, const char* fileName, const uint32_t fileLine, const uint32_t eventColor /*= Color::Null*/, const uint32_t filter /*= 0*/) { return EventDescriptionBoard::Get().CreateSharedDescription(eventName, fileName, fileLine, eventColor, filter); } @@ -240,7 +240,7 @@ EventDescription::EventDescription() : name(""), file(""), line(0), index((uint3 //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// EventDescription& EventDescription::operator=(const EventDescription&) { - OPTICK_FAILED("It is pointless to copy EventDescription. Please, check you logic!"); return *this; + OPTICK_VERIFY( false, "It is pointless to copy EventDescription. Please, check your logic!", return *this ); } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// EventData* Event::Start(const EventDescription& description) @@ -411,6 +411,48 @@ void Tag::Attach(const EventDescription& description, const char* val, uint16_t storage->tagStringBuffer.Add(TagString(description, val, length)); } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void Tag::Attach(EventStorage* storage, int64_t timestamp, const EventDescription& description, float val) +{ + if (EventStorage* coreStorage = Core::storage) + if (storage && (coreStorage->currentMode & Mode::TAGS)) + storage->tagFloatBuffer.Add(TagFloat(description, val, timestamp)); +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void Tag::Attach(EventStorage* storage, int64_t timestamp, const EventDescription& description, int32_t val) +{ + if (EventStorage* coreStorage = Core::storage) + if (storage && (coreStorage->currentMode & Mode::TAGS)) + storage->tagS32Buffer.Add(TagS32(description, val, timestamp)); +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void Tag::Attach(EventStorage* storage, int64_t timestamp, const EventDescription& description, uint32_t val) +{ + if (EventStorage* coreStorage = Core::storage) + if (storage && (coreStorage->currentMode & Mode::TAGS)) + storage->tagU32Buffer.Add(TagU32(description, val, timestamp)); +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void Tag::Attach(EventStorage* storage, int64_t timestamp, const EventDescription& description, uint64_t val) +{ + if (EventStorage* coreStorage = Core::storage) + if (storage && (coreStorage->currentMode & Mode::TAGS)) + storage->tagU64Buffer.Add(TagU64(description, val, timestamp)); +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void Tag::Attach(EventStorage* storage, int64_t timestamp, const EventDescription& description, float val[3]) +{ + if (EventStorage* coreStorage = Core::storage) + if (storage && (coreStorage->currentMode & Mode::TAGS)) + storage->tagPointBuffer.Add(TagPoint(description, val, timestamp)); +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void Tag::Attach(EventStorage* storage, int64_t timestamp, const EventDescription& description, const char* val) +{ + if (EventStorage* coreStorage = Core::storage) + if (storage && (coreStorage->currentMode & Mode::TAGS)) + storage->tagStringBuffer.Add(TagString(description, val, timestamp)); +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// OutputDataStream & operator<<(OutputDataStream &stream, const EventDescription &ob) { return stream << ob.name << ob.file << ob.line << ob.filter << ob.color << (float)0.0f << ob.flags; @@ -840,7 +882,8 @@ void Core::DumpProgressFormatted(const char* format, ...) #ifdef OPTICK_MSVC vsprintf_s(buffer, format, arglist); #else - vsprintf(buffer, format, arglist); + // SRS - use vsnprintf() for buffer security and to eliminate warning + vsnprintf(buffer, sizeof(buffer), format, arglist); #endif va_end(arglist); DumpProgress(buffer); @@ -1801,10 +1844,10 @@ OPTICK_API EventStorage* RegisterStorage(const char* name, uint64_t threadID, Th return entry ? &entry->storage : nullptr; } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -OPTICK_API void GpuFlip(void* swapChain) +OPTICK_API void GpuFlip(void* swapChain, uint32_t frameID) { if (GPUProfiler* gpuProfiler = Core::Get().gpuProfiler) - gpuProfiler->Flip(swapChain); + gpuProfiler->Flip(swapChain, frameID); } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// OPTICK_API GPUContext SetGpuContext(GPUContext context) diff --git a/src/optick_core.h b/src/optick_core.h index 26ca22f2..8a1cec63 100644 --- a/src/optick_core.h +++ b/src/optick_core.h @@ -333,6 +333,19 @@ struct ThreadEntry bool isAlive; ThreadEntry(const ThreadDescription& desc, EventStorage** tls) : description(desc), threadTLS(tls), isAlive(true) {} + // RB: see Fix for crash on stop capture #1 + // https://github.com/ulricheck/optick/pull/1/commits/1e5e1919816a64f235caa0f4b0bf20495225b1fa + ~ThreadEntry() + { + // SRS - check threadTLS handle for null before dereferencing, not *threadTLS + if (threadTLS != nullptr) + { + *threadTLS = nullptr; + } + + // SRS - make sure thread storage is empty before thread entry terminates + storage.Clear(false); + } void Activate(Mode::Type mode); void Sort(); }; diff --git a/src/optick_core.macos.h b/src/optick_core.macos.h index 95d308f1..7863ff92 100644 --- a/src/optick_core.macos.h +++ b/src/optick_core.macos.h @@ -61,7 +61,7 @@ namespace Optick int64 Platform::GetTime() { struct timespec ts; - clock_gettime(CLOCK_REALTIME, &ts); + clock_gettime(CLOCK_MONOTONIC, &ts); return ts.tv_sec * 1000000000LL + ts.tv_nsec; } } diff --git a/src/optick_gpu.cpp b/src/optick_gpu.cpp index bf96ea93..697a56a7 100644 --- a/src/optick_gpu.cpp +++ b/src/optick_gpu.cpp @@ -62,10 +62,35 @@ namespace Optick { std::lock_guard lock(updateLock); currentState = STATE_OFF; + + // SRS - Resolve delayed GPU frame timestamps before dumping data + for (uint32_t nodeIndex = 0; nodeIndex < nodes.size(); ++nodeIndex) + { + Node* node = nodes[nodeIndex]; + + uint32_t nextFrameIndex = (frameNumber + 1 - NUM_FRAMES_DELAY) % NUM_FRAMES_DELAY; + QueryFrame& nextFrame = node->queryGpuframes[nextFrameIndex]; + + while (nextFrame.queryIndexStart != (uint32_t)-1 && nextFrame.queryIndexCount > 0 && + nextFrameIndex != frameNumber % NUM_FRAMES_DELAY) + { + WaitForFrame(nodeIndex, (uint64_t)nextFrameIndex); + + uint32_t resolveStart = nextFrame.queryIndexStart % MAX_QUERIES_COUNT; + uint32_t resolveFinish = resolveStart + nextFrame.queryIndexCount; + ResolveTimestamps(nodeIndex, resolveStart, std::min(resolveFinish, MAX_QUERIES_COUNT) - resolveStart); + if (resolveFinish > MAX_QUERIES_COUNT) + ResolveTimestamps(nodeIndex, 0, resolveFinish - MAX_QUERIES_COUNT); + + nextFrameIndex = (nextFrameIndex + 1) % NUM_FRAMES_DELAY; + nextFrame = node->queryGpuframes[nextFrameIndex]; + } + } } void GPUProfiler::Dump(uint32 /*mode*/) { + std::lock_guard lock(updateLock); for (size_t nodeIndex = 0; nodeIndex < nodes.size(); ++nodeIndex) { Node* node = nodes[nodeIndex]; @@ -119,9 +144,9 @@ namespace Optick return event; } - EventData& GPUProfiler::AddVSyncEvent() + EventData& GPUProfiler::AddVSyncEvent(const char *eventName) { - static const EventDescription* VSyncDescription = EventDescription::Create("VSync", __FILE__, __LINE__); + static const EventDescription* VSyncDescription = EventDescription::Create(eventName, __FILE__, __LINE__); EventData& event = nodes[currentNode]->gpuEventStorage[GPU_QUEUE_VSYNC]->eventBuffer.Add(); event.description = VSyncDescription; event.start = EventTime::INVALID_TIMESTAMP; @@ -139,6 +164,16 @@ namespace Optick return tag; } + TagData& GPUProfiler::AddVSyncTag() + { + static const EventDescription* VSyncTagDescription = EventDescription::CreateShared("Frame"); + TagData& tag = nodes[currentNode]->gpuEventStorage[GPU_QUEUE_VSYNC]->tagU32Buffer.Add(); + tag.description = VSyncTagDescription; + tag.timestamp = EventTime::INVALID_TIMESTAMP; + tag.data = 0; + return tag; + } + const char * GetGPUQueueName(GPUQueueType queue) { const char* GPUQueueToName[GPU_QUEUE_COUNT] = { "Graphics", "Compute", "Transfer", "VSync" }; diff --git a/src/optick_gpu.d3d12.cpp b/src/optick_gpu.d3d12.cpp index 1fd8b117..aad9012a 100644 --- a/src/optick_gpu.d3d12.cpp +++ b/src/optick_gpu.d3d12.cpp @@ -31,6 +31,7 @@ #include #include +#include #include #include @@ -80,16 +81,14 @@ namespace Optick ID3D12Resource* queryBuffer; ID3D12Device* device; - // VSync Stats + // VSync / Present Stats DXGI_FRAME_STATISTICS prevFrameStatistics; + std::queue presentIdQueue; + std::queue frameIdQueue; //void UpdateRange(uint32_t start, uint32_t finish) void InitNodeInternal(const char* nodeName, uint32_t nodeIndex, ID3D12CommandQueue* pCmdQueue); - void ResolveTimestamps(uint32_t startIndex, uint32_t count); - - void WaitForFrame(uint64_t frameNumber); - public: GPUProfilerD3D12(); ~GPUProfilerD3D12(); @@ -98,7 +97,7 @@ namespace Optick void QueryTimestamp(ID3D12GraphicsCommandList* context, int64_t* outCpuTimestamp); - void Flip(IDXGISwapChain* swapChain); + void Flip(IDXGISwapChain* swapChain, uint32_t frameID); // Interface implementation @@ -109,9 +108,13 @@ namespace Optick QueryTimestamp((ID3D12GraphicsCommandList*)context, outCpuTimestamp); } - void Flip(void* swapChain) override + void ResolveTimestamps(uint32_t nodeIndex, uint32_t startIndex, uint32_t count) override; + + void WaitForFrame(uint32_t nodeIndex, uint64_t frameNumber) override; + + void Flip(void* swapChain, uint32_t frameID) override { - Flip(static_cast(swapChain)); + Flip(static_cast(swapChain), frameID); } }; @@ -241,11 +244,11 @@ namespace Optick } } - void GPUProfilerD3D12::ResolveTimestamps(uint32_t startIndex, uint32_t count) + void GPUProfilerD3D12::ResolveTimestamps(uint32_t nodeIndex, uint32_t startIndex, uint32_t count) { if (count) { - Node* node = nodes[currentNode]; + Node* node = nodes[nodeIndex]; D3D12_RANGE range = { sizeof(uint64_t)*startIndex, sizeof(uint64_t)*(startIndex + count) }; void* pData = nullptr; @@ -259,18 +262,18 @@ namespace Optick } } - void GPUProfilerD3D12::WaitForFrame(uint64_t frameNumberToWait) + void GPUProfilerD3D12::WaitForFrame(uint32_t nodeIndex, uint64_t frameNumberToWait) { OPTICK_EVENT(); - NodePayload* payload = nodePayloads[currentNode]; + NodePayload* payload = nodePayloads[nodeIndex]; while (frameNumberToWait > payload->syncFence->GetCompletedValue()) { std::this_thread::sleep_for(std::chrono::milliseconds(1)); } } - void GPUProfilerD3D12::Flip(IDXGISwapChain* swapChain) + void GPUProfilerD3D12::Flip(IDXGISwapChain* swapChain, uint32_t frameID) { OPTICK_CATEGORY("GPUProfilerD3D12::Flip", Category::Debug); @@ -328,38 +331,76 @@ namespace Optick commandList->ResolveQueryData(payload.queryHeap, D3D12_QUERY_TYPE_TIMESTAMP, 0, finishIndex, queryBuffer, 0); } } + else + { + // Initialize present / frame statistics + prevFrameStatistics = { 0 }; + swapChain->GetFrameStatistics(&prevFrameStatistics); + + while (!presentIdQueue.empty()) + { + presentIdQueue.pop(); + frameIdQueue.pop(); + } + } commandList->Close(); payload.commandQueue->ExecuteCommandLists(1, (ID3D12CommandList*const*)&commandList); payload.commandQueue->Signal(payload.syncFence, frameNumber); + // Save presentID to frameID correlation for the next present's vsync tag + if (frameID > 0) + { + UINT prevPresentID = 0; + HRESULT result = swapChain->GetLastPresentCount(&prevPresentID); + if (result == S_OK) + { + presentIdQueue.push(prevPresentID + 1); + frameIdQueue.push(frameID); + } + } + + // Process VSync / Presentation timing + DXGI_FRAME_STATISTICS currentFrameStatistics = { 0 }; + HRESULT result = swapChain->GetFrameStatistics(¤tFrameStatistics); + if ((result == S_OK) && (currentFrameStatistics.SyncQPCTime.QuadPart > prevFrameStatistics.SyncQPCTime.QuadPart)) + { + EventData& data = AddVSyncEvent("Present"); + data.start = prevFrameStatistics.SyncQPCTime.QuadPart; + data.finish = currentFrameStatistics.SyncQPCTime.QuadPart; + + while (!presentIdQueue.empty() && presentIdQueue.front() <= prevFrameStatistics.PresentCount) + { + if (presentIdQueue.front() == prevFrameStatistics.PresentCount) + { + TagData& tag = AddVSyncTag(); + tag.timestamp = prevFrameStatistics.SyncQPCTime.QuadPart; + tag.data = frameIdQueue.front(); + } + + presentIdQueue.pop(); + frameIdQueue.pop(); + } + + prevFrameStatistics = currentFrameStatistics; + } + // Preparing Next Frame // Try resolve timestamps for the current frame if (frameNumber >= NUM_FRAMES_DELAY && nextFrame.queryIndexCount) { - WaitForFrame(frameNumber + 1 - NUM_FRAMES_DELAY); + WaitForFrame(currentNode, (uint64_t)frameNumber + 1 - NUM_FRAMES_DELAY); uint32_t resolveStart = nextFrame.queryIndexStart % MAX_QUERIES_COUNT; uint32_t resolveFinish = resolveStart + nextFrame.queryIndexCount; - ResolveTimestamps(resolveStart, std::min(resolveFinish, MAX_QUERIES_COUNT) - resolveStart); + ResolveTimestamps(currentNode, resolveStart, std::min(resolveFinish, MAX_QUERIES_COUNT) - resolveStart); if (resolveFinish > MAX_QUERIES_COUNT) - ResolveTimestamps(0, resolveFinish - MAX_QUERIES_COUNT); + ResolveTimestamps(currentNode, 0, resolveFinish - MAX_QUERIES_COUNT); } nextFrame.queryIndexStart = queryEnd; nextFrame.queryIndexCount = 0; - - // Process VSync - DXGI_FRAME_STATISTICS currentFrameStatistics = { 0 }; - HRESULT result = swapChain->GetFrameStatistics(¤tFrameStatistics); - if ((result == S_OK) && (prevFrameStatistics.PresentCount + 1 == currentFrameStatistics.PresentCount)) - { - EventData& data = AddVSyncEvent(); - data.start = prevFrameStatistics.SyncQPCTime.QuadPart; - data.finish = currentFrameStatistics.SyncQPCTime.QuadPart; - } - prevFrameStatistics = currentFrameStatistics; } ++frameNumber; diff --git a/src/optick_gpu.h b/src/optick_gpu.h index 7977ad3d..7e0a899b 100644 --- a/src/optick_gpu.h +++ b/src/optick_gpu.h @@ -61,7 +61,8 @@ namespace Optick int64_t GetCPUTimestamp(int64_t gpuTimestamp) { - return timestampCPU + (gpuTimestamp - timestampGPU) * frequencyCPU / frequencyGPU; + // SRS - Improve accuracy of GPU to CPU timestamp conversion by using floating point doubles + return timestampCPU + (int64_t)(double(gpuTimestamp - timestampGPU) * (double)frequencyCPU / (double)frequencyGPU); } ClockSynchronization() : frequencyCPU(0), frequencyGPU(0), timestampCPU(0), timestampGPU(0) {} @@ -122,8 +123,9 @@ namespace Optick void Reset(); EventData& AddFrameEvent(); - EventData& AddVSyncEvent(); + EventData& AddVSyncEvent(const char *eventName = "VSync"); TagData& AddFrameTag(); + TagData& AddVSyncTag(); public: GPUProfiler(); @@ -141,7 +143,9 @@ namespace Optick // Interface to implement virtual ClockSynchronization GetClockSynchronization(uint32_t nodeIndex) = 0; virtual void QueryTimestamp(void* context, int64_t* cpuTimestampOut) = 0; - virtual void Flip(void* swapChain) = 0; + virtual void ResolveTimestamps(uint32_t nodeIndex, uint32_t startIndex, uint32_t count) = 0; + virtual void WaitForFrame(uint32_t nodeIndex, uint64_t frameNumber) = 0; + virtual void Flip(void* swapChain, uint32_t frameID) = 0; virtual ~GPUProfiler(); }; diff --git a/src/optick_gpu.vulkan.cpp b/src/optick_gpu.vulkan.cpp index 7a54212f..43e53a4e 100644 --- a/src/optick_gpu.vulkan.cpp +++ b/src/optick_gpu.vulkan.cpp @@ -35,9 +35,6 @@ namespace Optick { class GPUProfilerVulkan : public GPUProfiler { - private: - VulkanFunctions vulkanFunctions = {}; - protected: struct Frame { @@ -48,29 +45,32 @@ namespace Optick struct NodePayload { - VulkanFunctions* vulkanFunctions; + VulkanFunctions vulkanFunctions; VkDevice device; VkPhysicalDevice physicalDevice; VkQueue queue; VkQueryPool queryPool; VkCommandPool commandPool; + VkEvent event; array frames; - NodePayload() : vulkanFunctions(), device(VK_NULL_HANDLE), physicalDevice(VK_NULL_HANDLE), queue(VK_NULL_HANDLE), queryPool(VK_NULL_HANDLE), commandPool(VK_NULL_HANDLE) {} + NodePayload() : vulkanFunctions(), device(VK_NULL_HANDLE), physicalDevice(VK_NULL_HANDLE), queue(VK_NULL_HANDLE), queryPool(VK_NULL_HANDLE), commandPool(VK_NULL_HANDLE), event(VK_NULL_HANDLE) {} ~NodePayload(); }; vector nodePayloads; - void ResolveTimestamps(VkCommandBuffer commandBuffer, uint32_t startIndex, uint32_t count); - void WaitForFrame(uint64_t frameNumber); + // VSync / Present Stats + uint64_t prevPresentTime; + uint32_t prevPresentID; public: GPUProfilerVulkan(); ~GPUProfilerVulkan(); - void InitDevice(VkDevice* devices, VkPhysicalDevice* physicalDevices, VkQueue* cmdQueues, uint32_t* cmdQueuesFamily, uint32_t nodeCount, const VulkanFunctions* functions); + void InitDevice(VkInstance instance, VkDevice* devices, VkPhysicalDevice* physicalDevices, VkQueue* cmdQueues, uint32_t* cmdQueuesFamily, uint32_t nodeCount, const VulkanFunctions* functions); void QueryTimestamp(VkCommandBuffer commandBuffer, int64_t* outCpuTimestamp); + void Flip(VkSwapchainKHR swapChain); // Interface implementation @@ -81,35 +81,53 @@ namespace Optick QueryTimestamp((VkCommandBuffer)context, outCpuTimestamp); } - void Flip(void* swapChain) override; + void ResolveTimestamps(uint32_t nodeIndex, uint32_t startIndex, uint32_t count) override; + + void WaitForFrame(uint32_t nodeIndex, uint64_t frameNumber) override; + + void Flip(void* swapChain, uint32_t frameID) override + { + Flip(static_cast(swapChain)); + } }; - void InitGpuVulkan(VkDevice* vkDevices, VkPhysicalDevice* vkPhysicalDevices, VkQueue* vkQueues, uint32_t* cmdQueuesFamily, uint32_t numQueues, const VulkanFunctions* functions) + void InitGpuVulkan(VkInstance vkInstance, VkDevice* vkDevices, VkPhysicalDevice* vkPhysicalDevices, VkQueue* vkQueues, uint32_t* cmdQueuesFamily, uint32_t numQueues, const VulkanFunctions* functions) { GPUProfilerVulkan* gpuProfiler = Memory::New(); - gpuProfiler->InitDevice(vkDevices, vkPhysicalDevices, vkQueues, cmdQueuesFamily, numQueues, functions); + gpuProfiler->InitDevice(vkInstance, vkDevices, vkPhysicalDevices, vkQueues, cmdQueuesFamily, numQueues, functions); Core::Get().InitGPUProfiler(gpuProfiler); } GPUProfilerVulkan::GPUProfilerVulkan() { + prevPresentTime = 0; + prevPresentID = 0; } - void GPUProfilerVulkan::InitDevice(VkDevice* devices, VkPhysicalDevice* physicalDevices, VkQueue* cmdQueues, uint32_t* cmdQueuesFamily, uint32_t nodeCount, const VulkanFunctions* functions) + void GPUProfilerVulkan::InitDevice(VkInstance instance, VkDevice* devices, VkPhysicalDevice* physicalDevices, VkQueue* cmdQueues, uint32_t* cmdQueuesFamily, uint32_t nodeCount, const VulkanFunctions* functions) { + VulkanFunctions vulkanFunctions = {}; + if (functions != nullptr) { vulkanFunctions = *functions; } - else + else { +#if OPTICK_STATIC_VULKAN_FUNCTIONS vulkanFunctions = { + nullptr, // don't define vkGetInstanceProcAddr if vulkan functions are static vkGetPhysicalDeviceProperties, (PFN_vkCreateQueryPool_)vkCreateQueryPool, (PFN_vkCreateCommandPool_)vkCreateCommandPool, + (PFN_vkCreateEvent_)vkCreateEvent, (PFN_vkAllocateCommandBuffers_)vkAllocateCommandBuffers, (PFN_vkCreateFence_)vkCreateFence, vkCmdResetQueryPool, + nullptr, // dynamically define vkResetQueryPool via VK_EXT_host_query_reset extension or Vulkan 1.2 hostQueryReset feature + (PFN_vkCmdWaitEvents_)vkCmdWaitEvents, + (PFN_vkResetEvent_)vkResetEvent, + (PFN_vkSetEvent_)vkSetEvent, (PFN_vkQueueSubmit_)vkQueueSubmit, (PFN_vkWaitForFences_)vkWaitForFences, (PFN_vkResetCommandBuffer_)vkResetCommandBuffer, @@ -120,9 +138,28 @@ namespace Optick (PFN_vkResetFences_)vkResetFences, vkDestroyCommandPool, vkDestroyQueryPool, + vkDestroyEvent, vkDestroyFence, vkFreeCommandBuffers, + nullptr, // dynamically define vkGetPastPresentationTimingGOOGLE if VK_GOOGLE_display_timing extension available }; +#else + OPTICK_FAILED("Either set OPTICK_STATIC_VULKAN_FUNCTIONS = 1 or VulkanFunctions must be defined! Can't initialize GPU Profiler!"); +#endif + } + + PFN_vkGetDeviceProcAddr vkGetDeviceProcAddr_ = nullptr; + if (vulkanFunctions.vkGetInstanceProcAddr) + { + if (instance) + { + vkGetDeviceProcAddr_ = (PFN_vkGetDeviceProcAddr)(*vulkanFunctions.vkGetInstanceProcAddr)(instance, "vkGetDeviceProcAddr"); + vulkanFunctions.vkGetPhysicalDeviceProperties = (PFN_vkGetPhysicalDeviceProperties_)(*vulkanFunctions.vkGetInstanceProcAddr)(instance, "vkGetPhysicalDeviceProperties"); + } + else + { + OPTICK_FAILED("VkInstance must be defined if VulkanFunctions::vkGetInstanceProcAddr is defined! Can't initialize GPU Profiler!"); + } } VkQueryPoolCreateInfo queryPoolCreateInfo; @@ -131,25 +168,81 @@ namespace Optick queryPoolCreateInfo.flags = 0; queryPoolCreateInfo.queryType = VK_QUERY_TYPE_TIMESTAMP; queryPoolCreateInfo.queryCount = MAX_QUERIES_COUNT + 1; + queryPoolCreateInfo.pipelineStatistics = 0; VkCommandPoolCreateInfo commandPoolCreateInfo; commandPoolCreateInfo.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO; commandPoolCreateInfo.pNext = 0; commandPoolCreateInfo.flags = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT | VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT; + VkEventCreateInfo eventCreateInfo; + eventCreateInfo.sType = VK_STRUCTURE_TYPE_EVENT_CREATE_INFO; + eventCreateInfo.pNext = 0; + eventCreateInfo.flags = 0; + nodes.resize(nodeCount); nodePayloads.resize(nodeCount); VkResult r; for (uint32_t i = 0; i < nodeCount; ++i) { + if (vkGetDeviceProcAddr_) + { + vulkanFunctions.vkCreateQueryPool = (PFN_vkCreateQueryPool_)vkGetDeviceProcAddr_(devices[i], "vkCreateQueryPool"); + vulkanFunctions.vkCreateCommandPool = (PFN_vkCreateCommandPool_)vkGetDeviceProcAddr_(devices[i], "vkCreateCommandPool"); + vulkanFunctions.vkCreateEvent = (PFN_vkCreateEvent_)vkGetDeviceProcAddr_(devices[i], "vkCreateEvent"); + vulkanFunctions.vkAllocateCommandBuffers = (PFN_vkAllocateCommandBuffers_)vkGetDeviceProcAddr_(devices[i], "vkAllocateCommandBuffers"); + vulkanFunctions.vkCreateFence = (PFN_vkCreateFence_)vkGetDeviceProcAddr_(devices[i], "vkCreateFence"); + vulkanFunctions.vkCmdResetQueryPool = (PFN_vkCmdResetQueryPool_)vkGetDeviceProcAddr_(devices[i], "vkCmdResetQueryPool"); + vulkanFunctions.vkResetQueryPool = (PFN_vkResetQueryPool_)vkGetDeviceProcAddr_(devices[i], "vkResetQueryPool"); + if (!vulkanFunctions.vkResetQueryPool) { // if vkResetQueryPool not defined via Vulkan 1.2, try vkResetQueryPoolEXT + vulkanFunctions.vkResetQueryPool = (PFN_vkResetQueryPool_)vkGetDeviceProcAddr_(devices[i], "vkResetQueryPoolEXT"); + } + vulkanFunctions.vkCmdWaitEvents = (PFN_vkCmdWaitEvents_)vkGetDeviceProcAddr_(devices[i], "vkCmdWaitEvents"); + vulkanFunctions.vkResetEvent = (PFN_vkResetEvent_)vkGetDeviceProcAddr_(devices[i], "vkResetEvent"); + vulkanFunctions.vkSetEvent = (PFN_vkSetEvent_)vkGetDeviceProcAddr_(devices[i], "vkSetEvent"); + vulkanFunctions.vkQueueSubmit = (PFN_vkQueueSubmit_)vkGetDeviceProcAddr_(devices[i], "vkQueueSubmit"); + vulkanFunctions.vkWaitForFences = (PFN_vkWaitForFences_)vkGetDeviceProcAddr_(devices[i], "vkWaitForFences"); + vulkanFunctions.vkResetCommandBuffer = (PFN_vkResetCommandBuffer_)vkGetDeviceProcAddr_(devices[i], "vkResetCommandBuffer"); + vulkanFunctions.vkCmdWriteTimestamp = (PFN_vkCmdWriteTimestamp_)vkGetDeviceProcAddr_(devices[i], "vkCmdWriteTimestamp"); + vulkanFunctions.vkGetQueryPoolResults = (PFN_vkGetQueryPoolResults_)vkGetDeviceProcAddr_(devices[i], "vkGetQueryPoolResults"); + vulkanFunctions.vkBeginCommandBuffer = (PFN_vkBeginCommandBuffer_)vkGetDeviceProcAddr_(devices[i], "vkBeginCommandBuffer"); + vulkanFunctions.vkEndCommandBuffer = (PFN_vkEndCommandBuffer_)vkGetDeviceProcAddr_(devices[i], "vkEndCommandBuffer"); + vulkanFunctions.vkResetFences = (PFN_vkResetFences_)vkGetDeviceProcAddr_(devices[i], "vkResetFences"); + vulkanFunctions.vkDestroyCommandPool = (PFN_vkDestroyCommandPool_)vkGetDeviceProcAddr_(devices[i], "vkDestroyCommandPool"); + vulkanFunctions.vkDestroyQueryPool = (PFN_vkDestroyQueryPool_)vkGetDeviceProcAddr_(devices[i], "vkDestroyQueryPool"); + vulkanFunctions.vkDestroyEvent = (PFN_vkDestroyEvent_)vkGetDeviceProcAddr_(devices[i], "vkDestroyEvent"); + vulkanFunctions.vkDestroyFence = (PFN_vkDestroyFence_)vkGetDeviceProcAddr_(devices[i], "vkDestroyFence"); + vulkanFunctions.vkFreeCommandBuffers = (PFN_vkFreeCommandBuffers_)vkGetDeviceProcAddr_(devices[i], "vkFreeCommandBuffers"); + vulkanFunctions.vkGetPastPresentationTimingGOOGLE = (PFN_vkGetPastPresentationTimingGOOGLE_)vkGetDeviceProcAddr_(devices[i], "vkGetPastPresentationTimingGOOGLE"); + } +#if OPTICK_STATIC_VULKAN_FUNCTIONS + else // this condition can also run if vulkanFunctions are manually-defined via the "functions" parameter and vulkanFunctions.vkGetInstanceProcAddr == nullptr + { + // SRS - First check for nullptr to make sure we don't overwrite any manually-defined function pointers + if (!vulkanFunctions.vkResetQueryPool) { + vulkanFunctions.vkResetQueryPool = (PFN_vkResetQueryPool_)vkGetDeviceProcAddr(devices[i], "vkResetQueryPool"); + if (!vulkanFunctions.vkResetQueryPool) { // if vkResetQueryPool not defined via Vulkan 1.2, try vkResetQueryPoolEXT + vulkanFunctions.vkResetQueryPool = (PFN_vkResetQueryPool_)vkGetDeviceProcAddr(devices[i], "vkResetQueryPoolEXT"); + } + } + + if (!vulkanFunctions.vkGetPastPresentationTimingGOOGLE) { + vulkanFunctions.vkGetPastPresentationTimingGOOGLE = (PFN_vkGetPastPresentationTimingGOOGLE_)vkGetDeviceProcAddr(devices[i], "vkGetPastPresentationTimingGOOGLE"); + } + } +#endif + if (!vulkanFunctions.vkResetQueryPool) { + OPTICK_FAILED("vkResetQueryPool must be enabled via VK_EXT_host_query_reset extension or Vulkan 1.2 hostQueryReset feature. Can't initialize GPU Profiler!"); + } + VkPhysicalDeviceProperties properties = { 0 }; (*vulkanFunctions.vkGetPhysicalDeviceProperties)(physicalDevices[i], &properties); GPUProfiler::InitNode(properties.deviceName, i); NodePayload* nodePayload = Memory::New(); nodePayloads[i] = nodePayload; - nodePayload->vulkanFunctions = &vulkanFunctions; + nodePayload->vulkanFunctions = vulkanFunctions; nodePayload->device = devices[i]; nodePayload->physicalDevice = physicalDevices[i]; nodePayload->queue = cmdQueues[i]; @@ -163,6 +256,10 @@ namespace Optick OPTICK_ASSERT(r == VK_SUCCESS, "Failed"); (void)r; + r = (VkResult)(*vulkanFunctions.vkCreateEvent)(nodePayload->device, &eventCreateInfo, 0, &nodePayload->event); + OPTICK_ASSERT(r == VK_SUCCESS, "Failed"); + (void)r; + for (uint32_t j = 0; j < nodePayload->frames.size(); ++j) { Frame& frame = nodePayload->frames[j]; @@ -216,41 +313,43 @@ namespace Optick { if (currentState == STATE_RUNNING) { - uint32_t index = nodes[currentNode]->QueryTimestamp(outCpuTimestamp); - (*vulkanFunctions.vkCmdWriteTimestamp)(commandBuffer, VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT, nodePayloads[currentNode]->queryPool, index); + Node& node = *nodes[currentNode]; + NodePayload& payload = *nodePayloads[currentNode]; + + uint32_t index = node.QueryTimestamp(outCpuTimestamp); + (*payload.vulkanFunctions.vkCmdWriteTimestamp)(commandBuffer, VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT, payload.queryPool, index); } } - void GPUProfilerVulkan::ResolveTimestamps(VkCommandBuffer commandBuffer, uint32_t startIndex, uint32_t count) + void GPUProfilerVulkan::ResolveTimestamps(uint32_t nodeIndex, uint32_t startIndex, uint32_t count) { if (count) { - Node* node = nodes[currentNode]; - - NodePayload* payload = nodePayloads[currentNode]; + Node& node = *nodes[nodeIndex]; + NodePayload& payload = *nodePayloads[nodeIndex]; - OPTICK_VK_CHECK((VkResult)(*vulkanFunctions.vkGetQueryPoolResults)(payload->device, payload->queryPool, startIndex, count, 8 * count, &nodes[currentNode]->queryGpuTimestamps[startIndex], 8, VK_QUERY_RESULT_64_BIT)); - (*vulkanFunctions.vkCmdResetQueryPool)(commandBuffer, payload->queryPool, startIndex, count); + OPTICK_VK_CHECK((VkResult)(*payload.vulkanFunctions.vkGetQueryPoolResults)(payload.device, payload.queryPool, startIndex, count, 8 * (size_t)count, &node.queryGpuTimestamps[startIndex], 8, VK_QUERY_RESULT_64_BIT)); + (*payload.vulkanFunctions.vkResetQueryPool)(payload.device, payload.queryPool, startIndex, count); // Convert GPU timestamps => CPU Timestamps for (uint32_t index = startIndex; index < startIndex + count; ++index) - *node->queryCpuTimestamps[index] = node->clock.GetCPUTimestamp(node->queryGpuTimestamps[index]); + *node.queryCpuTimestamps[index] = node.clock.GetCPUTimestamp(node.queryGpuTimestamps[index]); } } - void GPUProfilerVulkan::WaitForFrame(uint64_t frameNumberToWait) + void GPUProfilerVulkan::WaitForFrame(uint32_t nodeIndex, uint64_t frameNumberToWait) { OPTICK_EVENT(); int r = VK_SUCCESS; do { - NodePayload& payload = *nodePayloads[currentNode]; - r = (*vulkanFunctions.vkWaitForFences)(nodePayloads[currentNode]->device, 1, &payload.frames[frameNumberToWait % payload.frames.size()].fence, 1, 1000 * 30); + NodePayload& payload = *nodePayloads[nodeIndex]; + r = (*payload.vulkanFunctions.vkWaitForFences)(payload.device, 1, &payload.frames[frameNumberToWait % payload.frames.size()].fence, 1, 1000 * 30); } while (r != VK_SUCCESS); } - void GPUProfilerVulkan::Flip(void* /*swapChain*/) + void GPUProfilerVulkan::Flip(VkSwapchainKHR swapChain) { OPTICK_CATEGORY("GPUProfilerVulkan::Flip", Category::Debug); @@ -275,15 +374,15 @@ namespace Optick VkDevice device = payload.device; VkQueue queue = payload.queue; - (*vulkanFunctions.vkWaitForFences)(device, 1, &fence, 1, (uint64_t)-1); + (*payload.vulkanFunctions.vkWaitForFences)(device, 1, &fence, 1, (uint64_t)-1); + (*payload.vulkanFunctions.vkResetFences)(device, 1, &fence); VkCommandBufferBeginInfo commandBufferBeginInfo; commandBufferBeginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; commandBufferBeginInfo.pNext = 0; commandBufferBeginInfo.pInheritanceInfo = 0; commandBufferBeginInfo.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT; - OPTICK_VK_CHECK((VkResult)(*vulkanFunctions.vkBeginCommandBuffer)(commandBuffer, &commandBufferBeginInfo)); - (*vulkanFunctions.vkResetFences)(device, 1, &fence); + OPTICK_VK_CHECK((VkResult)(*payload.vulkanFunctions.vkBeginCommandBuffer)(commandBuffer, &commandBufferBeginInfo)); if (EventData* frameEvent = currentFrame.frameEvent) QueryTimestamp(commandBuffer, &frameEvent->finish); @@ -294,7 +393,7 @@ namespace Optick QueryTimestamp(commandBuffer, &AddFrameTag().timestamp); nextFrame.frameEvent = &event; - OPTICK_VK_CHECK((VkResult)(*vulkanFunctions.vkEndCommandBuffer)(commandBuffer)); + OPTICK_VK_CHECK((VkResult)(*payload.vulkanFunctions.vkEndCommandBuffer)(commandBuffer)); VkSubmitInfo submitInfo = {}; submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; submitInfo.pNext = nullptr; @@ -304,15 +403,23 @@ namespace Optick submitInfo.pCommandBuffers = &commandBuffer; submitInfo.signalSemaphoreCount = 0; submitInfo.pSignalSemaphores = nullptr; - OPTICK_VK_CHECK((VkResult)(*vulkanFunctions.vkQueueSubmit)(queue, 1, &submitInfo, fence)); + OPTICK_VK_CHECK((VkResult)(*payload.vulkanFunctions.vkQueueSubmit)(queue, 1, &submitInfo, fence)); uint32_t queryBegin = currentFrame.queryIndexStart; uint32_t queryEnd = node.queryIndex; if (queryBegin != (uint32_t)-1) { + OPTICK_ASSERT(queryEnd - queryBegin <= MAX_QUERIES_COUNT, "Too many queries in one frame? Increase GPUProfiler::MAX_QUERIES_COUNT to fix the problem!"); currentFrame.queryIndexCount = queryEnd - queryBegin; } + else + { + currentFrame.queryIndexStart = 0; + currentFrame.queryIndexCount = queryEnd; + prevPresentTime = 0; + prevPresentID = 0; + } // Preparing Next Frame // Try resolve timestamps for the current frame @@ -323,12 +430,44 @@ namespace Optick if (startIndex < finishIndex) { - ResolveTimestamps(commandBuffer, startIndex, finishIndex - startIndex); + ResolveTimestamps(currentNode, startIndex, finishIndex - startIndex); } else if (startIndex > finishIndex) { - ResolveTimestamps(commandBuffer, startIndex, MAX_QUERIES_COUNT - startIndex); - ResolveTimestamps(commandBuffer, 0, finishIndex); + ResolveTimestamps(currentNode, startIndex, MAX_QUERIES_COUNT - startIndex); + ResolveTimestamps(currentNode, 0, finishIndex); + } + + // SRS - Add Vulkan presentation / vsync timing if VK_GOOGLE_display_timing extension available + if (payload.vulkanFunctions.vkGetPastPresentationTimingGOOGLE) + { + uint32_t queryPresentTimingCount = 0; + (*payload.vulkanFunctions.vkGetPastPresentationTimingGOOGLE)(device, swapChain, &queryPresentTimingCount, nullptr); + if (queryPresentTimingCount > 0) + { + // Query Presentation Timing / VSync + vector queryPresentTimings; + queryPresentTimings.resize(queryPresentTimingCount); + (*payload.vulkanFunctions.vkGetPastPresentationTimingGOOGLE)(device, swapChain, &queryPresentTimingCount, &queryPresentTimings[0]); + for (uint32_t presentIndex = 0; presentIndex < queryPresentTimingCount; presentIndex++) + { + // Process Presentation Timing / VSync if swap image was actually presented (i.e. not dropped) + VkPastPresentationTimingGOOGLE presentTiming = queryPresentTimings[presentIndex]; + if (presentTiming.actualPresentTime > prevPresentTime) + { + EventData& data = AddVSyncEvent("Present"); + data.start = prevPresentTime; + data.finish = presentTiming.actualPresentTime; + + TagData& tag = AddVSyncTag(); + tag.timestamp = prevPresentTime; + tag.data = prevPresentID; + + prevPresentTime = presentTiming.actualPresentTime; + prevPresentID = presentTiming.presentID; + } + } + } } } @@ -343,8 +482,8 @@ namespace Optick { GPUProfiler::ClockSynchronization clock; - NodePayload& node = *nodePayloads[nodeIndex]; - Frame& currentFrame = node.frames[frameNumber % NUM_FRAMES_DELAY]; + NodePayload& payload = *nodePayloads[nodeIndex]; + Frame& currentFrame = payload.frames[frameNumber % NUM_FRAMES_DELAY]; VkCommandBufferBeginInfo commandBufferBeginInfo; commandBufferBeginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; @@ -352,16 +491,15 @@ namespace Optick commandBufferBeginInfo.pInheritanceInfo = 0; commandBufferBeginInfo.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT; VkCommandBuffer CB = currentFrame.commandBuffer; - VkDevice Device = node.device; + VkDevice Device = payload.device; VkFence Fence = currentFrame.fence; - (*vulkanFunctions.vkWaitForFences)(Device, 1, &Fence, 1, (uint64_t)-1); - (*vulkanFunctions.vkResetFences)(Device, 1, &Fence); - (*vulkanFunctions.vkResetCommandBuffer)(CB, VK_COMMAND_BUFFER_RESET_RELEASE_RESOURCES_BIT); - (*vulkanFunctions.vkBeginCommandBuffer)(CB, &commandBufferBeginInfo); - (*vulkanFunctions.vkCmdResetQueryPool)(CB, nodePayloads[nodeIndex]->queryPool, 0, 1); - (*vulkanFunctions.vkCmdWriteTimestamp)(CB, VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT, nodePayloads[nodeIndex]->queryPool, 0); - (*vulkanFunctions.vkEndCommandBuffer)(CB); + // SRS - Prepare and submit an empty command buffer to wait on app buffer completion + (*payload.vulkanFunctions.vkWaitForFences)(Device, 1, &Fence, 1, (uint64_t)-1); + (*payload.vulkanFunctions.vkResetFences)(Device, 1, &Fence); + (*payload.vulkanFunctions.vkResetCommandBuffer)(CB, VK_COMMAND_BUFFER_RESET_RELEASE_RESOURCES_BIT); + (*payload.vulkanFunctions.vkBeginCommandBuffer)(CB, &commandBufferBeginInfo); + (*payload.vulkanFunctions.vkEndCommandBuffer)(CB); VkSubmitInfo submitInfo = {}; submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; @@ -372,25 +510,48 @@ namespace Optick submitInfo.pCommandBuffers = &CB; submitInfo.signalSemaphoreCount = 0; submitInfo.pSignalSemaphores = nullptr; - (*vulkanFunctions.vkQueueSubmit)(nodePayloads[nodeIndex]->queue, 1, &submitInfo, Fence); - (*vulkanFunctions.vkWaitForFences)(Device, 1, &Fence, 1, (uint64_t)-1); - - clock.timestampGPU = 0; - (*vulkanFunctions.vkGetQueryPoolResults)(Device, nodePayloads[nodeIndex]->queryPool, 0, 1, 8, &clock.timestampGPU, 8, VK_QUERY_RESULT_64_BIT); + (*payload.vulkanFunctions.vkQueueSubmit)(payload.queue, 1, &submitInfo, Fence); + + // SRS - Prepare and submit the actual command buffer used for clock synchronization + (*payload.vulkanFunctions.vkWaitForFences)(Device, 1, &Fence, 1, (uint64_t)-1); + (*payload.vulkanFunctions.vkResetFences)(Device, 1, &Fence); + (*payload.vulkanFunctions.vkResetEvent)(Device, payload.event); + (*payload.vulkanFunctions.vkBeginCommandBuffer)(CB, &commandBufferBeginInfo); + (*payload.vulkanFunctions.vkCmdResetQueryPool)(CB, payload.queryPool, 0, 1); + (*payload.vulkanFunctions.vkCmdWaitEvents)(CB, 1, &payload.event, VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_HOST_BIT | VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, nullptr, 0, nullptr, 0, nullptr); + (*payload.vulkanFunctions.vkCmdWriteTimestamp)(CB, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, payload.queryPool, 0); + (*payload.vulkanFunctions.vkEndCommandBuffer)(CB); + (*payload.vulkanFunctions.vkQueueSubmit)(payload.queue, 1, &submitInfo, Fence); + + // SRS - Improve GPU to CPU clock offset calibration by using Vulkan events + // thanks to cdwfs for concept at https://gist.github.com/cdwfs/4222ca09cb259f8dd50f7f2cf7d09179 + (*payload.vulkanFunctions.vkSetEvent)(Device, payload.event); clock.timestampCPU = GetHighPrecisionTime(); - clock.frequencyCPU = GetHighPrecisionFrequency(); + (*payload.vulkanFunctions.vkWaitForFences)(Device, 1, &Fence, 1, (uint64_t)-1); + (*payload.vulkanFunctions.vkResetFences)(Device, 1, &Fence); + clock.timestampGPU = 0; + (*payload.vulkanFunctions.vkGetQueryPoolResults)(Device, payload.queryPool, 0, 1, 8, &clock.timestampGPU, 8, VK_QUERY_RESULT_64_BIT); + // SRS - Improve GPU to CPU clock frequency scaling by using floating point doubles + clock.frequencyCPU = GetHighPrecisionFrequency(); VkPhysicalDeviceProperties Properties; - (*vulkanFunctions.vkGetPhysicalDeviceProperties)(nodePayloads[nodeIndex]->physicalDevice, &Properties); - clock.frequencyGPU = (uint64_t)(1000000000ll / Properties.limits.timestampPeriod); + (*payload.vulkanFunctions.vkGetPhysicalDeviceProperties)(payload.physicalDevice, &Properties); + clock.frequencyGPU = (int64_t)(1000000000.0 / (double)Properties.limits.timestampPeriod); + + // SRS - Reset entire query pool to clear clock sync query + any leftover queries from previous run + (*payload.vulkanFunctions.vkBeginCommandBuffer)(CB, &commandBufferBeginInfo); + (*payload.vulkanFunctions.vkCmdResetQueryPool)(CB, payload.queryPool, 0, MAX_QUERIES_COUNT); + (*payload.vulkanFunctions.vkEndCommandBuffer)(CB); + (*payload.vulkanFunctions.vkQueueSubmit)(payload.queue, 1, &submitInfo, Fence); return clock; } GPUProfilerVulkan::NodePayload::~NodePayload() { - (*vulkanFunctions->vkDestroyCommandPool)(device, commandPool, nullptr); - (*vulkanFunctions->vkDestroyQueryPool)(device, queryPool, nullptr); + (*vulkanFunctions.vkDestroyEvent)(device, event, nullptr); + (*vulkanFunctions.vkDestroyCommandPool)(device, commandPool, nullptr); + (*vulkanFunctions.vkDestroyQueryPool)(device, queryPool, nullptr); } GPUProfilerVulkan::~GPUProfilerVulkan() @@ -399,8 +560,8 @@ namespace Optick { for (Frame& frame : payload->frames) { - (*vulkanFunctions.vkDestroyFence)(payload->device, frame.fence, nullptr); - (*vulkanFunctions.vkFreeCommandBuffers)(payload->device, payload->commandPool, 1, &frame.commandBuffer); + (*payload->vulkanFunctions.vkDestroyFence)(payload->device, frame.fence, nullptr); + (*payload->vulkanFunctions.vkFreeCommandBuffers)(payload->device, payload->commandPool, 1, &frame.commandBuffer); } Memory::Delete(payload); @@ -413,10 +574,10 @@ namespace Optick #include "optick_common.h" namespace Optick { - void InitGpuVulkan(VkDevice* /*vkDevices*/, VkPhysicalDevice* /*vkPhysicalDevices*/, VkQueue* /*vkQueues*/, uint32_t* /*cmdQueuesFamily*/, uint32_t /*numQueues*/, const VulkanFunctions* /*functions*/) + void InitGpuVulkan(VkInstance /*vkInstance*/, VkDevice* /*vkDevices*/, VkPhysicalDevice* /*vkPhysicalDevices*/, VkQueue* /*vkQueues*/, uint32_t* /*cmdQueuesFamily*/, uint32_t /*numQueues*/, const VulkanFunctions* /*functions*/) { OPTICK_FAILED("OPTICK_ENABLE_GPU_VULKAN is disabled! Can't initialize GPU Profiler!"); } } -#endif //OPTICK_ENABLE_GPU_D3D12 -#endif //USE_OPTICK \ No newline at end of file +#endif //OPTICK_ENABLE_GPU_VULKAN +#endif //USE_OPTICK diff --git a/src/optick_message.cpp b/src/optick_message.cpp index 672b1bd3..b2ec5f64 100644 --- a/src/optick_message.cpp +++ b/src/optick_message.cpp @@ -91,11 +91,7 @@ class MessageFactory IMessage* result = factory[messageType](str); - if (header.length + str.Length() != length) - { - OPTICK_FAILED("Message Stream is corrupted! Invalid Protocol?") - return nullptr; - } + OPTICK_VERIFY( header.length + str.Length() == length, "Message Stream is corrupted! Invalid Protocol?", return nullptr ); return result; } diff --git a/src/optick_server.cpp b/src/optick_server.cpp index 769c403b..413a473c 100644 --- a/src/optick_server.cpp +++ b/src/optick_server.cpp @@ -241,7 +241,11 @@ class Socket return true; } +#if defined(USE_WINDOWS_SOCKETS) int Receive(char *buf, int len) +#else + ssize_t Receive(char *buf, int len) +#endif { std::lock_guard lock(socketLock); @@ -289,7 +293,7 @@ Server::Server(short port) : socket(Memory::New()), saveCb(nullptr) { if (!socket->Bind(port, 4)) { - OPTICK_FAILED("Failed to bind a socket! Most probably the port is blocked by anti-virus! Change the port and verify that your game has enough permissions to communicate over the TCP\IP."); + OPTICK_FAILED("Failed to bind a socket! Most probably the port is blocked by anti-virus! Change the port and verify that your game has enough permissions to communicate over the TCP/IP."); } else { @@ -304,7 +308,11 @@ void Server::Update() if (!InitConnection()) return; +#if defined(USE_WINDOWS_SOCKETS) int length = -1; +#else + ssize_t length = -1; +#endif while ( (length = socket->Receive( buffer, BIFFER_SIZE ) ) > 0 ) { networkStream.Append(buffer, length); @@ -499,4 +507,4 @@ Server & Server::Get() } -#endif //USE_OPTICK \ No newline at end of file +#endif //USE_OPTICK