diff --git a/src/main/java/me/cortex/voxy/client/core/gl/GlBuffer.java b/src/main/java/me/cortex/voxy/client/core/gl/GlBuffer.java index 2ef05117..0844e0de 100644 --- a/src/main/java/me/cortex/voxy/client/core/gl/GlBuffer.java +++ b/src/main/java/me/cortex/voxy/client/core/gl/GlBuffer.java @@ -47,6 +47,11 @@ public class GlBuffer extends TrackedObject { return this; } + public GlBuffer zeroRange(long offset, long size) { + nglClearNamedBufferSubData(this.id, GL_R8UI, offset, size, GL_RED_INTEGER, GL_UNSIGNED_BYTE, 0); + return this; + } + public GlBuffer fill(int data) { //Clear unpack values //Fixed in mesa commit a5c3c452 diff --git a/src/main/java/me/cortex/voxy/client/core/rendering/section/MDICSectionRenderer.java b/src/main/java/me/cortex/voxy/client/core/rendering/section/MDICSectionRenderer.java index 8459dceb..4665f4fe 100644 --- a/src/main/java/me/cortex/voxy/client/core/rendering/section/MDICSectionRenderer.java +++ b/src/main/java/me/cortex/voxy/client/core/rendering/section/MDICSectionRenderer.java @@ -32,12 +32,13 @@ import static org.lwjgl.opengl.GL33.glBindSampler; import static org.lwjgl.opengl.GL40C.GL_DRAW_INDIRECT_BUFFER; import static org.lwjgl.opengl.GL43.*; import static org.lwjgl.opengl.GL45.glBindTextureUnit; +import static org.lwjgl.opengl.GL45.glClearNamedBufferData; //Uses MDIC to render the sections public class MDICSectionRenderer extends AbstractSectionRenderer { private static final int TRANSLUCENT_OFFSET = 400_000;//in draw calls private static final int TEMPORAL_OFFSET = 500_000;//in draw calls - private static final int STATISTICS_BUFFER_BINDING = 7; + private static final int STATISTICS_BUFFER_BINDING = 8; private final Shader terrainShader = Shader.make() .defineIf("DEBUG_RENDER", false) .add(ShaderType.VERTEX, "voxy:lod/gl46/quads2.vert") @@ -45,9 +46,11 @@ public class MDICSectionRenderer extends AbstractSectionRenderer{ + int[] a = new int[1024]; + for (int i = 0; i < 1024; i++) { + a[i] = MemoryUtil.memGetInt(ptr+4*i); + } + for (int i = 0; i < 1023; i++){ + if (a[i+1] +#import + +/* + uint count; + uint instanceCount; + uint firstIndex; + int baseVertex; + uint baseInstance; + */ + +layout(binding = TRANSLUCENT_DISTANCE_BUFFER_BINDING, std430) restrict buffer TranslucentCommandCount { + uint[] translucentCommandData; +}; + +//Note: if i want reverse indexing i need to use the index buffer offset to offset +void writeCmd(uint idx, uint instance, uint offset, uint quadCount) { + DrawCommand cmd; + cmd.count = quadCount * 6; + cmd.instanceCount = 1; + cmd.firstIndex = 0; + cmd.baseVertex = int(offset)<<2; + cmd.baseInstance = instance; + cmdBuffer[idx] = cmd; +} + +void main() { + if (gl_GlobalInvocationID.x >= translucentDrawCount) { + return; + } + uint drawId = translucentCommandData[gl_GlobalInvocationID.x+TRANSLUCENT_WRITE_BASE]; + SectionMeta meta = sectionData[indirectLookup[drawId]]; + uint detail = extractDetail(meta); + + uvec3 rel = abs(extractPosition(meta)-(baseSectionPos>>detail)); + uint dist = (rel.x+rel.y+rel.z)< #import #import -#line 11 //https://github.com/KhronosGroup/GLSL/blob/master/extensions/ext/GL_EXT_shader_16bit_storage.txt // adds support for uint8_t which can use for compact visibility buffer @@ -35,6 +33,9 @@ layout(binding = STATISTICS_BUFFER_BINDING, std430) restrict buffer statisticsBu uint baseInstance; */ +layout(binding = TRANSLUCENT_DISTANCE_BUFFER_BINDING, std430) restrict buffer TranslucentCommandCount { + uint[] translucentCommandData; +}; //Note: if i want reverse indexing i need to use the index buffer offset to offset void writeCmd(uint idx, uint instance, uint offset, uint quadCount) { @@ -108,8 +109,12 @@ void main() { //Translucency count = meta.cntA&0xFFFF; if (count != 0) { - uint translucentCommandPtr = atomicAdd(translucentDrawCount, 1) + TRANSLUCENT_OFFSET;//FIXME: dont hardcode this offset - writeCmd(translucentCommandPtr, drawId, ptr, count); + uint tp = atomicAdd(translucentDrawCount, 1)+TRANSLUCENT_WRITE_BASE; + translucentCommandData[tp] = drawId; + uvec3 absRel = abs(relative); + uint distToCamera = (absRel.x+absRel.y+absRel.z)<>5)); + + uint gid = gl_GlobalInvocationID.x; + uvec4 count = uvec4(0); + uint sum = 0; + { + uvec4 dat = ioCount[gid]; + count.yzw = dat.xyz; + count.z += count.y; + count.w += count.z; + sum = count.w + dat.w; + } + + subgroupBarrier();//Wait for all threads in the subgroup to get the buffer + + count += subgroupExclusiveAdd(sum); + + if ((gl_LocalInvocationID.x&31u)==31) { + warpPrefixSum[gl_SubgroupID] = count.x+sum; + } + + barrier(); + + if (gl_SubgroupID == 0) { + uint val = warpPrefixSum[gl_SubgroupInvocationID]; + subgroupBarrier(); + //Use warp to do entire add in 1 reduction + warpPrefixSum[gl_SubgroupInvocationID] = subgroupExclusiveAdd(val); + } + + barrier(); + //Add the computed sum across all threads and warps + count += warpPrefixSum[gl_SubgroupID]; + ioCount[gid] = count; +} \ No newline at end of file