Skip to content

Commit

Permalink
Poking and prodding
Browse files Browse the repository at this point in the history
- Invert image size on CPU to avoid divisions on GPU
- Increase depth reduce group size to 16x16
- Early-out in uploadInstances based on changed cardinality
  - Much faster to calculate cardinality than it is to clear an
    AtomicBitSet, so the check is worth it
- Upload scatter list directly in the staging buffer if there's room
  • Loading branch information
Jozufozu committed Sep 10, 2024
1 parent f12aa15 commit 0bfaac7
Show file tree
Hide file tree
Showing 5 changed files with 32 additions and 8 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@ public static void setup(Material material) {
setupBackfaceCulling(material.backfaceCulling());
setupPolygonOffset(material.polygonOffset());
setupDepthTest(material.depthTest());
// setupDepthTest(DepthTest.OFF);
setupTransparency(material.transparency());
setupWriteMask(material.writeMask());
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
import net.minecraft.client.Minecraft;

public class DepthPyramid {
private static final int GROUP_SIZE = 16;

private final GlProgram depthReduceProgram;

public int pyramidTextureId = -1;
Expand Down Expand Up @@ -50,10 +52,10 @@ public void generate() {

GL46.glBindImageTexture(0, pyramidTextureId, i, false, 0, GL32.GL_WRITE_ONLY, GL32.GL_R32F);

depthReduceProgram.setVec2("imageSize", mipWidth, mipHeight);
depthReduceProgram.setVec2("oneOverImageSize", 1f / (float) mipWidth, 1f / (float) mipHeight);
depthReduceProgram.setInt("lod", Math.max(0, i - 1));

GL46.glDispatchCompute(MoreMath.ceilingDiv(mipWidth, 8), MoreMath.ceilingDiv(mipHeight, 8), 1);
GL46.glDispatchCompute(MoreMath.ceilingDiv(mipWidth, GROUP_SIZE), MoreMath.ceilingDiv(mipHeight, GROUP_SIZE), 1);

GL46.glMemoryBarrier(GL46.GL_TEXTURE_FETCH_BARRIER_BIT);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,11 @@ public void writeModel(long ptr) {
}

public void uploadInstances(StagingBuffer stagingBuffer, int instanceVbo) {
if (changedPages.cardinality() == 0) {
// Early return because checking the cardinality is faster than clearing.
return;
}

int numPages = mapping.pageCount();

var instanceCount = instances.size();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ public class StagingBuffer {
private static final int STORAGE_FLAGS = GL45C.GL_MAP_PERSISTENT_BIT | GL45C.GL_MAP_WRITE_BIT | GL45C.GL_CLIENT_STORAGE_BIT;
private static final int MAP_FLAGS = GL45C.GL_MAP_PERSISTENT_BIT | GL45C.GL_MAP_WRITE_BIT | GL45C.GL_MAP_FLUSH_EXPLICIT_BIT | GL45C.GL_MAP_INVALIDATE_BUFFER_BIT;

private static final int SSBO_ALIGNMENT = GL45.glGetInteger(GL45.GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT);

private final int vbo;
private final long map;
private final long capacity;
Expand Down Expand Up @@ -254,7 +256,6 @@ private void dispatchComputeCopies() {
scatterProgram.bind();

// These bindings don't change between dstVbos.
GL45.glBindBufferBase(GL45C.GL_SHADER_STORAGE_BUFFER, 0, scatterBuffer.handle());
GL45.glBindBufferBase(GL45C.GL_SHADER_STORAGE_BUFFER, 1, vbo);

int dstVbo;
Expand All @@ -276,7 +277,24 @@ private void dispatchComputeCopies() {
}

private void dispatchScatter(int dstVbo) {
scatterBuffer.upload(scatterList.ptr(), scatterList.usedBytes());
var scatterSize = scatterList.usedBytes();

long alignedPos = pos + SSBO_ALIGNMENT - 1 - (pos + SSBO_ALIGNMENT - 1) % SSBO_ALIGNMENT;

long remaining = capacity - alignedPos;
if (scatterSize <= remaining && scatterSize <= totalAvailable) {
MemoryUtil.memCopy(scatterList.ptr(), map + alignedPos, scatterSize);
GL45.glBindBufferRange(GL45C.GL_SHADER_STORAGE_BUFFER, 0, vbo, alignedPos, scatterSize);

long alignmentCost = alignedPos - pos;

usedCapacity += scatterSize + alignmentCost;
totalAvailable -= scatterSize + alignmentCost;
pos += scatterSize + alignmentCost;
} else {
scatterBuffer.upload(scatterList.ptr(), scatterSize);
GL45.glBindBufferBase(GL45C.GL_SHADER_STORAGE_BUFFER, 0, scatterBuffer.handle());
}

GL45.glBindBufferBase(GL45C.GL_SHADER_STORAGE_BUFFER, 2, dstVbo);

Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
layout(local_size_x = 8, local_size_y = 8) in;
layout(local_size_x = 16, local_size_y = 16) in;

layout(binding = 0, r32f) uniform writeonly image2D outImage;
layout(binding = 1) uniform sampler2D inImage;

uniform vec2 imageSize;
uniform vec2 oneOverImageSize;
uniform int lod;

uniform int useMin = 0;
Expand All @@ -13,7 +13,7 @@ void main() {

// Map the output texel to an input texel. Properly do the division because generating mip0 maps from the actual
// full resolution depth buffer and the aspect ratio may be different from our Po2 pyramid.
ivec2 samplePos = ivec2(floor(vec2(pos) * vec2(textureSize(inImage, lod)) / imageSize));
ivec2 samplePos = ivec2(floor(vec2(pos) * vec2(textureSize(inImage, lod)) * oneOverImageSize));

float depth01 = texelFetchOffset(inImage, samplePos, lod, ivec2(0, 1)).r;
float depth11 = texelFetchOffset(inImage, samplePos, lod, ivec2(1, 1)).r;
Expand Down

0 comments on commit 0bfaac7

Please sign in to comment.