Skip to content

Commit

Permalink
Merge pull request #18478 from hrydgard/block-transfer-to-depth
Browse files Browse the repository at this point in the history
Handle block transfers from RAM to depth buffers.
  • Loading branch information
hrydgard authored Dec 6, 2023
2 parents 4b6dbd1 + 26a5119 commit b90b626
Show file tree
Hide file tree
Showing 6 changed files with 46 additions and 23 deletions.
49 changes: 34 additions & 15 deletions GPU/Common/FramebufferManagerCommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -511,8 +511,8 @@ VirtualFramebuffer *FramebufferManagerCommon::DoSetRenderFrameBuffer(Framebuffer
vfb->usageFlags = FB_USAGE_RENDER_COLOR;

u32 colorByteSize = vfb->BufferByteSize(RASTER_COLOR);
if (Memory::IsVRAMAddress(params.fb_address) && params.fb_address + colorByteSize > framebufRangeEnd_) {
framebufRangeEnd_ = params.fb_address + colorByteSize;
if (Memory::IsVRAMAddress(params.fb_address) && params.fb_address + colorByteSize > framebufColorRangeEnd_) {
framebufColorRangeEnd_ = params.fb_address + colorByteSize;
}

// This is where we actually create the framebuffer. The true is "force".
Expand Down Expand Up @@ -1904,6 +1904,7 @@ bool FramebufferManagerCommon::NotifyFramebufferCopy(u32 src, u32 dst, int size,

dst &= 0x3FFFFFFF;
src &= 0x3FFFFFFF;

if (Memory::IsVRAMAddress(dst))
dst &= 0x041FFFFF;
if (Memory::IsVRAMAddress(src))
Expand All @@ -1917,7 +1918,7 @@ bool FramebufferManagerCommon::NotifyFramebufferCopy(u32 src, u32 dst, int size,
bool ignoreSrcBuffer = flags & (GPUCopyFlag::FORCE_SRC_MATCH_MEM | GPUCopyFlag::MEMSET);

// TODO: In the future we should probably check both channels. Currently depth is only on request.
RasterChannel channel = flags & GPUCopyFlag::DEPTH_REQUESTED ? RASTER_DEPTH : RASTER_COLOR;
RasterChannel channel = (flags & GPUCopyFlag::DEPTH_REQUESTED) ? RASTER_DEPTH : RASTER_COLOR;

TinySet<CopyCandidate, 4> srcCandidates;
TinySet<CopyCandidate, 4> dstCandidates;
Expand Down Expand Up @@ -2172,14 +2173,14 @@ bool FramebufferManagerCommon::FindTransferFramebuffer(u32 basePtr, int stride_p
for (auto vfb : vfbs_) {
BlockTransferRect candidate{ vfb, RASTER_COLOR };

// Check for easily detected depth copies for logging purposes.
// Depth copies are not that useful though because you manually need to account for swizzle, so
// not sure if games will use them. Actually we do have a case, Iron Man in issue #16530.
if (vfb->z_address == basePtr && vfb->z_stride == stride_pixels && PSP_CoreParameter().compat.flags().BlockTransferDepth) {
// Two cases so far of games depending on depth copies: Iron Man in issue #16530 (buffer->buffer)
// and also #17878 where a game does ram->buffer to an auto-swizzling (|0x600000) address,
// to initialize Z with a pre-rendered depth buffer.
if (vfb->z_address == basePtr && vfb->BufferByteStride(RASTER_DEPTH) == byteStride && PSP_CoreParameter().compat.flags().BlockTransferDepth) {
WARN_LOG_N_TIMES(z_xfer, 5, G3D, "FindTransferFramebuffer: found matching depth buffer, %08x (dest=%d, bpp=%d)", basePtr, (int)destination, bpp);
candidate.channel = RASTER_DEPTH;
candidate.x_bytes = x_pixels * 2;
candidate.w_bytes = w_pixels * 2;
candidate.x_bytes = x_pixels * bpp;
candidate.w_bytes = w_pixels * bpp;
candidate.y = y;
candidate.h = h;
candidates.push_back(candidate);
Expand Down Expand Up @@ -2347,8 +2348,8 @@ VirtualFramebuffer *FramebufferManagerCommon::CreateRAMFramebuffer(uint32_t fbAd
vfbs_.push_back(vfb);

u32 byteSize = vfb->BufferByteSize(channel);
if (fbAddress + byteSize > framebufRangeEnd_) {
framebufRangeEnd_ = fbAddress + byteSize;
if (fbAddress + byteSize > framebufColorRangeEnd_) {
framebufColorRangeEnd_ = fbAddress + byteSize;
}

return vfb;
Expand Down Expand Up @@ -2510,8 +2511,10 @@ bool FramebufferManagerCommon::NotifyBlockTransferBefore(u32 dstBasePtr, int dst
return false;
}

// Skip checking if there's no framebuffers in that area.
if (!MayIntersectFramebuffer(srcBasePtr) && !MayIntersectFramebuffer(dstBasePtr)) {
// Skip checking if there's no framebuffers in that area. Make a special exception for obvious transfers to depth buffer, see issue #17878
bool dstDepthSwizzle = Memory::IsVRAMAddress(dstBasePtr) && ((dstBasePtr & 0x600000) == 0x600000);

if (!dstDepthSwizzle && !MayIntersectFramebufferColor(srcBasePtr) && !MayIntersectFramebufferColor(dstBasePtr)) {
return false;
}

Expand All @@ -2529,6 +2532,10 @@ bool FramebufferManagerCommon::NotifyBlockTransferBefore(u32 dstBasePtr, int dst
}
}

if (!srcBuffer && dstBuffer && dstRect.channel == RASTER_DEPTH) {
dstBuffer = true;
}

if (srcBuffer && !dstBuffer) {
// In here, we can't read from dstRect.
if (PSP_CoreParameter().compat.flags().BlockTransferAllowCreateFB ||
Expand Down Expand Up @@ -2635,7 +2642,19 @@ bool FramebufferManagerCommon::NotifyBlockTransferBefore(u32 dstBasePtr, int dst
return true;

} else if (dstBuffer) {
// Here we should just draw the pixels into the buffer. Copy first.
// Handle depth uploads directly here, and let's not bother copying the data. This is compat-flag-gated for now,
// may generalize it when I remove the compat flag.
if (dstRect.channel == RASTER_DEPTH) {
WARN_LOG_ONCE(btud, G3D, "Block transfer upload %08x -> %08x (%dx%d %d,%d bpp=%d %s)", srcBasePtr, dstBasePtr, width, height, dstX, dstY, bpp, RasterChannelToString(dstRect.channel));
FlushBeforeCopy();
const u8 *srcBase = Memory::GetPointerUnchecked(srcBasePtr) + (srcX + srcY * srcStride) * bpp;
DrawPixels(dstRect.vfb, dstX, dstY, srcBase, dstRect.vfb->Format(dstRect.channel), srcStride * bpp / 2, (int)(dstRect.w_bytes / 2), dstRect.h, dstRect.channel, "BlockTransferCopy_DrawPixelsDepth");
RebindFramebuffer("RebindFramebuffer - UploadDepth");
return true;
}

// Here we should just draw the pixels into the buffer. Return false to copy the memory first.
// NotifyBlockTransferAfter will take care of the rest.
return false;
} else if (srcBuffer) {
WARN_LOG_N_TIMES(btd, 10, G3D, "Block transfer readback %dx%d %dbpp from %08x (x:%d y:%d stride:%d) -> %08x (x:%d y:%d stride:%d)",
Expand Down Expand Up @@ -2680,7 +2699,7 @@ void FramebufferManagerCommon::NotifyBlockTransferAfter(u32 dstBasePtr, int dstS
}
}

if (MayIntersectFramebuffer(srcBasePtr) || MayIntersectFramebuffer(dstBasePtr)) {
if (MayIntersectFramebufferColor(srcBasePtr) || MayIntersectFramebufferColor(dstBasePtr)) {
// TODO: Figure out how we can avoid repeating the search here.

BlockTransferRect dstRect{};
Expand Down
7 changes: 4 additions & 3 deletions GPU/Common/FramebufferManagerCommon.h
Original file line number Diff line number Diff line change
Expand Up @@ -382,13 +382,14 @@ class FramebufferManagerCommon {
return useBufferedRendering_;
}

bool MayIntersectFramebuffer(u32 start) const {
// TODO: Maybe just include the last depth buffer address in this, too.
bool MayIntersectFramebufferColor(u32 start) const {
// Clear the cache/kernel bits.
start &= 0x3FFFFFFF;
if (Memory::IsVRAMAddress(start))
start &= 0x041FFFFF;
// Most games only have two framebuffers at the start.
if (start >= framebufRangeEnd_ || start < PSP_GetVidMemBase()) {
if (start >= framebufColorRangeEnd_ || start < PSP_GetVidMemBase()) {
return false;
}
return true;
Expand Down Expand Up @@ -572,7 +573,7 @@ class FramebufferManagerCommon {
Draw::Framebuffer *currentFramebufferCopy_ = nullptr;

// The range of PSP memory that may contain FBOs. So we can skip iterating.
u32 framebufRangeEnd_ = 0;
u32 framebufColorRangeEnd_ = 0;

bool useBufferedRendering_ = false;
bool postShaderIsUpscalingFilter_ = false;
Expand Down
2 changes: 1 addition & 1 deletion GPU/Common/StencilCommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,7 @@ bool FramebufferManagerCommon::PerformWriteStencilFromMemory(u32 addr, int size,
using namespace Draw;

addr &= 0x3FFFFFFF;
if (!MayIntersectFramebuffer(addr)) {
if (!MayIntersectFramebufferColor(addr)) {
return false;
}

Expand Down
6 changes: 3 additions & 3 deletions GPU/GPUCommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1853,7 +1853,7 @@ void GPUCommon::DoBlockTransfer(u32 skipDrawReason) {

bool GPUCommon::PerformMemoryCopy(u32 dest, u32 src, int size, GPUCopyFlag flags) {
// Track stray copies of a framebuffer in RAM. MotoGP does this.
if (framebufferManager_->MayIntersectFramebuffer(src) || framebufferManager_->MayIntersectFramebuffer(dest)) {
if (framebufferManager_->MayIntersectFramebufferColor(src) || framebufferManager_->MayIntersectFramebufferColor(dest)) {
if (!framebufferManager_->NotifyFramebufferCopy(src, dest, size, flags, gstate_c.skipDrawReason)) {
// We use matching values in PerformReadbackToMemory/PerformWriteColorFromMemory.
// Since they're identical we don't need to copy.
Expand Down Expand Up @@ -1881,7 +1881,7 @@ bool GPUCommon::PerformMemoryCopy(u32 dest, u32 src, int size, GPUCopyFlag flags

bool GPUCommon::PerformMemorySet(u32 dest, u8 v, int size) {
// This may indicate a memset, usually to 0, of a framebuffer.
if (framebufferManager_->MayIntersectFramebuffer(dest)) {
if (framebufferManager_->MayIntersectFramebufferColor(dest)) {
Memory::Memset(dest, v, size, "GPUMemset");
if (!framebufferManager_->NotifyFramebufferCopy(dest, dest, size, GPUCopyFlag::MEMSET, gstate_c.skipDrawReason)) {
InvalidateCache(dest, size, GPU_INVALIDATE_HINT);
Expand Down Expand Up @@ -1920,7 +1920,7 @@ void GPUCommon::PerformWriteFormattedFromMemory(u32 addr, int size, int frameWid
}

bool GPUCommon::PerformWriteStencilFromMemory(u32 dest, int size, WriteStencil flags) {
if (framebufferManager_->MayIntersectFramebuffer(dest)) {
if (framebufferManager_->MayIntersectFramebufferColor(dest)) {
framebufferManager_->PerformWriteStencilFromMemory(dest, size, flags);
return true;
}
Expand Down
2 changes: 1 addition & 1 deletion GPU/GPUCommonHW.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -773,7 +773,7 @@ void GPUCommonHW::InvalidateCache(u32 addr, int size, GPUInvalidationType type)
else
textureCache_->InvalidateAll(type);

if (type != GPU_INVALIDATE_ALL && framebufferManager_->MayIntersectFramebuffer(addr)) {
if (type != GPU_INVALIDATE_ALL && framebufferManager_->MayIntersectFramebufferColor(addr)) {
// Vempire invalidates (with writeback) after drawing, but before blitting.
// TODO: Investigate whether we can get this to work some other way.
if (type == GPU_INVALIDATE_SAFE) {
Expand Down
3 changes: 3 additions & 0 deletions assets/compat.ini
Original file line number Diff line number Diff line change
Expand Up @@ -1573,6 +1573,9 @@ ULES01070 = true
ULES01071 = true
ULUS10347 = true

# Hayate no Gotoku!! Nightmare Paradise - see issue #17878
ULJM05416 = true

[DaxterRotatedAnalogStick]
# Daxter (see issue #17015)
UCUS98618 = true
Expand Down

0 comments on commit b90b626

Please sign in to comment.