Skip to content

Commit

Permalink
Merge pull request #19764 from hrydgard/even-more-depth-raster-stuff
Browse files Browse the repository at this point in the history
Enable depth raster in all backends
  • Loading branch information
hrydgard authored Dec 25, 2024
2 parents f8774a4 + 4aaea67 commit 5def50c
Show file tree
Hide file tree
Showing 10 changed files with 32 additions and 23 deletions.
2 changes: 1 addition & 1 deletion Common/Math/CrossSIMD.h
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,7 @@ struct Vec4U16 {
Vec4U16 operator ^(Vec4U16 other) const { return Vec4U16{ _mm_xor_si128(v, other.v) }; }

Vec4U16 Max(Vec4U16 other) const { return Vec4U16{ _mm_max_epu16_SSE2(v, other.v) }; }
Vec4U16 Min(Vec4U16 other) const { return Vec4U16{ _mm_max_epu16_SSE2(v, other.v) }; }
Vec4U16 Min(Vec4U16 other) const { return Vec4U16{ _mm_min_epu16_SSE2(v, other.v) }; }
Vec4U16 CompareLT(Vec4U16 other) { return Vec4U16{ _mm_cmplt_epu16(v, other.v) }; }
};

Expand Down
2 changes: 1 addition & 1 deletion GPU/Common/DepthRaster.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -199,7 +199,7 @@ TriangleResult DepthRasterTriangle(uint16_t *depthBuf, int stride, int x1, int y
switch (compareMode) {
case ZCompareMode::Greater:
// To implement the greater/greater-than comparison, we can combine mask and max.
// It might be better to do the math in float space on x86 due to SSE2 deficiencies.
// Unfortunately there's no unsigned max on SSE2, it's synthesized by xoring 0x8000 on input and output.
// We use AndNot to zero out Z results, before doing Max with the buffer.
AndNot(shortZ, shortMaskInv).Max(bufferValues).Store(rowPtr + x);
break;
Expand Down
10 changes: 10 additions & 0 deletions GPU/D3D11/DrawEngineD3D11.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -335,6 +335,9 @@ void DrawEngineD3D11::Flush() {
context_->Draw(vertexCount, 0);
}
}
if (useDepthRaster_) {
DepthRasterTransform(prim, dec_, dec_->VertexType(), vertexCount);
}
} else {
PROFILE_THIS_SCOPE("soft");
VertexDecoder *swDec = dec_;
Expand Down Expand Up @@ -388,6 +391,13 @@ void DrawEngineD3D11::Flush() {
UpdateCachedViewportState(vpAndScissor);
}

// At this point, rect and line primitives are still preserved as such. So, it's the best time to do software depth raster.
// We could piggyback on the viewport transform below, but it gets complicated since it's different per-backend. Which we really
// should clean up one day...
if (useDepthRaster_) {
DepthRasterPredecoded(prim, decoded_, numDecodedVerts_, swDec, vertexCount);
}

SoftwareTransform swTransform(params);

const Lin::Vec3 trans(gstate_c.vpXOffset, -gstate_c.vpYOffset, gstate_c.vpZOffset * 0.5f + 0.5f);
Expand Down
10 changes: 10 additions & 0 deletions GPU/Directx9/DrawEngineDX9.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -292,6 +292,9 @@ void DrawEngineDX9::Flush() {
}
}
}
if (useDepthRaster_) {
DepthRasterTransform(prim, dec_, dec_->VertexType(), vertexCount);
}
} else {
VertexDecoder *swDec = dec_;
if (swDec->nweights != 0) {
Expand Down Expand Up @@ -344,6 +347,13 @@ void DrawEngineDX9::Flush() {
UpdateCachedViewportState(vpAndScissor);
}

// At this point, rect and line primitives are still preserved as such. So, it's the best time to do software depth raster.
// We could piggyback on the viewport transform below, but it gets complicated since it's different per-backend. Which we really
// should clean up one day...
if (useDepthRaster_) {
DepthRasterPredecoded(prim, decoded_, numDecodedVerts_, swDec, vertexCount);
}

int maxIndex = numDecodedVerts_;
SoftwareTransform swTransform(params);

Expand Down
10 changes: 10 additions & 0 deletions GPU/GLES/DrawEngineGLES.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -315,6 +315,9 @@ void DrawEngineGLES::Flush() {
inputLayout, vertexBuffer, vertexBufferOffset,
glprim[prim], 0, vertexCount);
}
if (useDepthRaster_) {
DepthRasterTransform(prim, dec_, dec_->VertexType(), vertexCount);
}
} else {
PROFILE_THIS_SCOPE("soft");
VertexDecoder *swDec = dec_;
Expand Down Expand Up @@ -371,6 +374,13 @@ void DrawEngineGLES::Flush() {
}
}

// At this point, rect and line primitives are still preserved as such. So, it's the best time to do software depth raster.
// We could piggyback on the viewport transform below, but it gets complicated since it's different per-backend. Which we really
// should clean up one day...
if (useDepthRaster_) {
DepthRasterPredecoded(prim, decoded_, numDecodedVerts_, swDec, vertexCount);
}

SoftwareTransform swTransform(params);

const Lin::Vec3 trans(gstate_c.vpXOffset, gstate_c.vpYOffset, gstate_c.vpZOffset);
Expand Down
2 changes: 0 additions & 2 deletions UWP/lua/lua.cpp

This file was deleted.

1 change: 0 additions & 1 deletion UWP/lua/lua.h

This file was deleted.

1 change: 0 additions & 1 deletion UWP/lua/pch.cpp

This file was deleted.

9 changes: 0 additions & 9 deletions UWP/lua/pch.h

This file was deleted.

8 changes: 0 additions & 8 deletions UWP/lua/targetver.h

This file was deleted.

0 comments on commit 5def50c

Please sign in to comment.