From 8d934bec8e59d09b7b722fa691660e25c79f9e5d Mon Sep 17 00:00:00 2001 From: Chun Yang Date: Wed, 1 Sep 2021 00:21:26 -0700 Subject: [PATCH] SWDEV-301543 SWDEV-276146 : Fix profile output buff allocation L2 flush is triggered by explicit cache flush PM4 packet in aqlprofile packets to GPU. This cache flush is used to sync up CPU and GPU to make sure perfomance counters copied to profile output buffer is visible to CPU. To get rid of this cache flush the followings are done: 1. This explicit cache flush packet is removed from aqlprofile code (another commit to aqlprofile code). 2. This commit which changed profile output buffer to use kernarg memory since it is uncached for GPU. After these changes profile counter values when copied by GPU to output buffer they are guaranteed to be visible to CPU. Change-Id: Ie953949c85fbee2f4369f1de966bcfb33daec084 (cherry picked from commit 2b7993163129d3c2d67eb5e60143237e5276ce0d) --- src/core/profile.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/core/profile.h b/src/core/profile.h index 609c811d..f12e7a9e 100644 --- a/src/core/profile.h +++ b/src/core/profile.h @@ -331,7 +331,10 @@ class PmcProfile : public Profile { hsa_status_t Allocate(util::HsaRsrcFactory* rsrc) { profile_.command_buffer.ptr = rsrc->AllocateSysMemory(agent_info_, profile_.command_buffer.size); - profile_.output_buffer.ptr = rsrc->AllocateSysMemory(agent_info_, profile_.output_buffer.size); + // Allocate profile output buffer from kernarg memory pool since kernarg + // memory buffer is uncached. So when GPU copies performance counter values + // to this buffer they are guaranteed to be visible to CPU. + profile_.output_buffer.ptr = rsrc->AllocateKernArgMemory(agent_info_, profile_.output_buffer.size); return (profile_.command_buffer.ptr && profile_.output_buffer.ptr) ? HSA_STATUS_SUCCESS : HSA_STATUS_ERROR; }