From 43d04febd519bc7fde9585869b69943ff2ba34fe Mon Sep 17 00:00:00 2001 From: mcrcortex <18544518+MCRcortex@users.noreply.github.com> Date: Mon, 14 Jul 2025 16:53:33 +1000 Subject: [PATCH] ext mesh --- .../voxy/client/core/gl/EXTMeshShader.java | 25 ++ .../section/MeshEXTSectionRenderer.java | 227 ++++++++++++ .../assets/voxy/shaders/lod/meshext/frag.glsl | 88 +++++ .../assets/voxy/shaders/lod/meshext/mesh.glsl | 325 ++++++++++++++++++ .../assets/voxy/shaders/lod/meshext/task.glsl | 124 +++++++ 5 files changed, 789 insertions(+) create mode 100644 src/main/java/me/cortex/voxy/client/core/gl/EXTMeshShader.java create mode 100644 src/main/java/me/cortex/voxy/client/core/rendering/section/MeshEXTSectionRenderer.java create mode 100644 src/main/resources/assets/voxy/shaders/lod/meshext/frag.glsl create mode 100644 src/main/resources/assets/voxy/shaders/lod/meshext/mesh.glsl create mode 100644 src/main/resources/assets/voxy/shaders/lod/meshext/task.glsl diff --git a/src/main/java/me/cortex/voxy/client/core/gl/EXTMeshShader.java b/src/main/java/me/cortex/voxy/client/core/gl/EXTMeshShader.java new file mode 100644 index 00000000..2cb01feb --- /dev/null +++ b/src/main/java/me/cortex/voxy/client/core/gl/EXTMeshShader.java @@ -0,0 +1,25 @@ +package me.cortex.voxy.client.core.gl; + +import org.lwjgl.opengl.GL; +import org.lwjgl.system.JNI; + +public class EXTMeshShader { + public static final int + GL_MESH_SHADER_EXT = 0x9559, + GL_TASK_SHADER_EXT = 0x955A; + + private static final long glDrawMeshTasksIndirectEXT_ptr; + static { + if (GL.getFunctionProvider() == null) { + throw new IllegalStateException("Class must be initalized after gl context has been created"); + } + glDrawMeshTasksIndirectEXT_ptr = GL.getFunctionProvider().getFunctionAddress("glDrawMeshTasksIndirectEXT"); + } + + public static void glDrawMeshTasksIndirectEXT(long indirect) { + if (glDrawMeshTasksIndirectEXT_ptr == 0) { + throw new IllegalStateException("glDrawMeshTasksIndirectEXT not supported"); + } + JNI.callV(indirect, glDrawMeshTasksIndirectEXT_ptr); + } +} diff --git a/src/main/java/me/cortex/voxy/client/core/rendering/section/MeshEXTSectionRenderer.java b/src/main/java/me/cortex/voxy/client/core/rendering/section/MeshEXTSectionRenderer.java new file mode 100644 index 00000000..3338e269 --- /dev/null +++ b/src/main/java/me/cortex/voxy/client/core/rendering/section/MeshEXTSectionRenderer.java @@ -0,0 +1,227 @@ +package me.cortex.voxy.client.core.rendering.section; + + +import me.cortex.voxy.client.RenderStatistics; +import me.cortex.voxy.client.core.gl.Capabilities; +import me.cortex.voxy.client.core.gl.GlBuffer; +import me.cortex.voxy.client.core.gl.GlTexture; +import me.cortex.voxy.client.core.gl.shader.Shader; +import me.cortex.voxy.client.core.gl.shader.ShaderType; +import me.cortex.voxy.client.core.model.ModelStore; +import me.cortex.voxy.client.core.rendering.RenderService; +import me.cortex.voxy.client.core.rendering.section.geometry.BasicSectionGeometryData; +import me.cortex.voxy.client.core.rendering.util.DownloadStream; +import me.cortex.voxy.client.core.rendering.util.LightMapHelper; +import me.cortex.voxy.client.core.rendering.util.SharedIndexBuffer; +import me.cortex.voxy.client.core.rendering.util.UploadStream; +import me.cortex.voxy.common.Logger; +import me.cortex.voxy.common.world.WorldEngine; +import org.joml.Matrix4f; +import org.lwjgl.system.MemoryUtil; + +import java.util.List; + +import static me.cortex.voxy.client.core.gl.EXTMeshShader.glDrawMeshTasksIndirectEXT; +import static org.lwjgl.opengl.GL11.*; +import static org.lwjgl.opengl.GL11C.GL_UNSIGNED_INT; +import static org.lwjgl.opengl.GL15.GL_ELEMENT_ARRAY_BUFFER; +import static org.lwjgl.opengl.GL15.glBindBuffer; +import static org.lwjgl.opengl.GL30.glBindBufferBase; +import static org.lwjgl.opengl.GL30.glBindVertexArray; +import static org.lwjgl.opengl.GL30C.GL_R32UI; +import static org.lwjgl.opengl.GL30C.GL_RED_INTEGER; +import static org.lwjgl.opengl.GL31.GL_UNIFORM_BUFFER; +import static org.lwjgl.opengl.GL33.glBindSampler; +import static org.lwjgl.opengl.GL40C.GL_DRAW_INDIRECT_BUFFER; +import static org.lwjgl.opengl.GL42.glMemoryBarrier; +import static org.lwjgl.opengl.GL43.*; +import static org.lwjgl.opengl.GL45.*; +import static org.lwjgl.opengl.NVRepresentativeFragmentTest.GL_REPRESENTATIVE_FRAGMENT_TEST_NV; + +//Uses MDIC to render the sections +public class MeshEXTSectionRenderer extends AbstractSectionRenderer { + private static final int STATISTICS_BUFFER_BINDING = 8; + private final Shader terrainShader = Shader.make() + .define("MESH_SIZE", 32)//16 + + .defineIf("HAS_STATISTICS", RenderStatistics.enabled) + .defineIf("STATISTICS_BUFFER_BINDING", RenderStatistics.enabled, STATISTICS_BUFFER_BINDING) + + .add(ShaderType.TASK, "voxy:lod/meshext/task.glsl") + .add(ShaderType.MESH, "voxy:lod/meshext/mesh.glsl") + .add(ShaderType.FRAGMENT, "voxy:lod/meshext/frag.glsl") + .compile(); + + private final Shader cullShader = Shader.make() + .add(ShaderType.VERTEX, "voxy:lod/gl46/cull/raster.vert") + .add(ShaderType.FRAGMENT, "voxy:lod/gl46/cull/raster.frag") + .compile(); + + private final GlBuffer uniform = new GlBuffer(1024).zero(); + private final GlBuffer cullAndMeshDrawCommand = new GlBuffer(8*4).zero();//TODO: this needs tobe in the viewport + + //Statistics + private final GlBuffer statisticsBuffer = new GlBuffer(1024).zero(); + + public MeshEXTSectionRenderer(ModelStore modelStore, BasicSectionGeometryData geometryData) { + super(modelStore, geometryData); + glClearNamedBufferSubData(this.cullAndMeshDrawCommand.id, GL_R32UI,0, 4, GL_RED_INTEGER, GL_UNSIGNED_INT, new int[]{6*2*3});//count + glClearNamedBufferSubData(this.cullAndMeshDrawCommand.id, GL_R32UI,8, 4, GL_RED_INTEGER, GL_UNSIGNED_INT, new int[]{(1<<16)*6*2});//firstIndex + glClearNamedBufferSubData(this.cullAndMeshDrawCommand.id, GL_R32UI,5*4+4, 4, GL_RED_INTEGER, GL_UNSIGNED_INT, new int[]{1});//y + glClearNamedBufferSubData(this.cullAndMeshDrawCommand.id, GL_R32UI,5*4+8, 4, GL_RED_INTEGER, GL_UNSIGNED_INT, new int[]{1});//z + } + + private void uploadUniformBuffer(MeshViewport viewport) { + long ptr = UploadStream.INSTANCE.upload(this.uniform, 0, 1024); + + var mat = new Matrix4f(viewport.MVP); + mat.translate(-viewport.innerTranslation.x, -viewport.innerTranslation.y, -viewport.innerTranslation.z); + mat.getToAddress(ptr); ptr += 4*4*4; + + viewport.section.getToAddress(ptr); ptr += 4*3; + + if (viewport.frameId<0) { + Logger.error("Frame ID negative, this will cause things to break, wrapping around"); + viewport.frameId &= 0x7fffffff; + } + MemoryUtil.memPutInt(ptr, viewport.frameId&0x7fffffff); ptr += 4; + viewport.innerTranslation.getToAddress(ptr); ptr += 4*3; + + ptr += 4;// padd + + MemoryUtil.memPutFloat(ptr, viewport.width); ptr += 4; + MemoryUtil.memPutFloat(ptr, viewport.height); ptr += 4; + + UploadStream.INSTANCE.commit(); + } + + + private void bindRenderingBuffers(MeshViewport viewport, GlTexture depthBoundTexture) { + glBindBufferBase(GL_UNIFORM_BUFFER, 0, this.uniform.id); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, viewport.getRenderList().id); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, this.geometryManager.getMetadataBuffer().id); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 3, viewport.visibilityBuffer.id); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 4, this.geometryManager.getGeometryBuffer().id); + this.modelStore.bind(5, 6, 0); + LightMapHelper.bind(1); + glBindTextureUnit(2, depthBoundTexture.id); + + glBindBuffer(GL_DRAW_INDIRECT_BUFFER, this.cullAndMeshDrawCommand.id); + + if (RenderStatistics.enabled) { + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, STATISTICS_BUFFER_BINDING, this.statisticsBuffer.id); + } + } + + private void renderTerrain(MeshViewport viewport, GlTexture depthBoundTexture) { + //RenderLayer.getCutoutMipped().startDrawing(); + glDisable(GL_CULL_FACE); + glEnable(GL_DEPTH_TEST); + this.terrainShader.bind(); + this.bindRenderingBuffers(viewport, depthBoundTexture); + + glMemoryBarrier(GL_COMMAND_BARRIER_BIT|GL_SHADER_STORAGE_BARRIER_BIT);//Barrier everything is needed + + glDrawMeshTasksIndirectEXT(20); + + glEnable(GL_CULL_FACE); + glBindSampler(0, 0); + glBindTextureUnit(0, 0); + glBindSampler(1, 0); + glBindTextureUnit(1, 0); + + //RenderLayer.getCutoutMipped().endDrawing(); + } + + @Override + public void renderOpaque(MeshViewport viewport, GlTexture dbt) { + if (this.geometryManager.getSectionCount() == 0) return; + + this.uploadUniformBuffer(viewport); + + this.renderTerrain(viewport, dbt); + + //We need todo the statistics here as rastering is part of them, download then clear + if (RenderStatistics.enabled) { + DownloadStream.INSTANCE.download(this.statisticsBuffer, down->{ + final int LAYERS = WorldEngine.MAX_LOD_LAYER+1; + for (int i = 0; i < LAYERS; i++) { + RenderStatistics.visibleSections[i] = MemoryUtil.memGetInt(down.address+i*4L); + } + + for (int i = 0; i < LAYERS; i++) { + RenderStatistics.quadCount[i] = MemoryUtil.memGetInt(down.address+LAYERS*4L+i*4L); + } + }); + + this.statisticsBuffer.zero(); + } + } + + @Override + public void renderTranslucent(MeshViewport viewport, GlTexture depthBoundTexture) { + return; + } + + @Override + public void buildDrawCalls(MeshViewport viewport) { + if (this.geometryManager.getSectionCount() == 0) return; + this.uploadUniformBuffer(viewport); + //Can do a sneeky trick, since the sectionRenderList is a list to things to render, it invokes the culler + // which only marks visible sections + + + {//Test occlusion + glCopyNamedBufferSubData(viewport.getRenderList().id, this.cullAndMeshDrawCommand.id, 0, 4, 4);//Copy counts to the draw buffer + glCopyNamedBufferSubData(viewport.getRenderList().id, this.cullAndMeshDrawCommand.id, 0, 20, 4);//Copy counts to the draw buffer + + this.cullShader.bind(); + if (Capabilities.INSTANCE.repFragTest) { + glEnable(GL_REPRESENTATIVE_FRAGMENT_TEST_NV); + } + glBindVertexArray(RenderService.STATIC_VAO); + glBindBufferBase(GL_UNIFORM_BUFFER, 0, this.uniform.id); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, this.geometryManager.getMetadataBuffer().id); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, viewport.visibilityBuffer.id); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 3, viewport.getRenderList().id); + glBindBuffer(GL_DRAW_INDIRECT_BUFFER, this.cullAndMeshDrawCommand.id); + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, SharedIndexBuffer.INSTANCE.id()); + glEnable(GL_DEPTH_TEST); + glColorMask(false, false, false, false); + glDepthMask(false); + glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT|GL_COMMAND_BARRIER_BIT); + glDrawElementsIndirect(GL_TRIANGLES, GL_UNSIGNED_BYTE, 0); + glDepthMask(true); + glColorMask(true, true, true, true); + glDisable(GL_DEPTH_TEST); + if (Capabilities.INSTANCE.repFragTest) { + glDisable(GL_REPRESENTATIVE_FRAGMENT_TEST_NV); + } + } + } + + @Override + public void renderTemporal(MeshViewport viewport, GlTexture dbt) { + return; + } + + @Override + public void addDebug(List lines) { + super.addDebug(lines); + //lines.add("SC/GS: " + this.geometryManager.getSectionCount() + "/" + (this.geometryManager.getGeometryUsed()/(1024*1024)));//section count/geometry size (MB) + } + + @Override + public MeshViewport createViewport() { + return new MeshViewport(this.geometryManager.getMaxSectionCount()); + } + + @Override + public void free() { + this.cullAndMeshDrawCommand.free(); + this.uniform.free(); + this.terrainShader.free(); + this.cullShader.free(); + this.statisticsBuffer.free(); + } +} diff --git a/src/main/resources/assets/voxy/shaders/lod/meshext/frag.glsl b/src/main/resources/assets/voxy/shaders/lod/meshext/frag.glsl new file mode 100644 index 00000000..0a1cb545 --- /dev/null +++ b/src/main/resources/assets/voxy/shaders/lod/meshext/frag.glsl @@ -0,0 +1,88 @@ +#version 460 core + +layout(binding = 0) uniform sampler2D blockModelAtlas; +layout(binding = 2) uniform sampler2D depthTex; + +layout(location=1) perprimitiveEXT in PerPrimData { + uvec4 data; +} primIn; + +layout(location = 0) out vec4 outColour; + +vec4 uint2vec4RGBA(uint colour) { + return vec4((uvec4(colour)>>uvec4(24,16,8,0))&uvec4(0xFF))/255.0; +} + +bool useMipmaps() { + return (primIn.data.x&2u)==0u; +} + +bool useTinting() { + return (primIn.data.x&4u)!=0u; +} + +bool useCutout() { + return (primIn.data.x&1u)==1u; +} + +vec4 computeColour(vec4 colour) { + //Conditional tinting, TODO: FIXME: REPLACE WITH MASK OR SOMETHING, like encode data into the top bit of alpha + if (useTinting() && abs(colour.r-colour.g) < 0.02f && abs(colour.g-colour.b) < 0.02f) { + colour *= uint2vec4RGBA(primIn.data.z).yzwx; + } + return (colour * uint2vec4RGBA(primIn.data.y)) + vec4(0,0,0,float(primIn.data.w&0xFFu)/255); +} + + +uint getFace() { + return (primIn.data.w>>8)&7u; +} + +vec2 getBaseUV() { + uint face = getFace(); + uint modelId = primIn.data.x>>16; + vec2 modelUV = vec2(modelId&0xFFu, (modelId>>8)&0xFFu)*(1.0/(256.0)); + return modelUV + (vec2(face>>1, face&1u) * (1.0/(vec2(3.0, 2.0)*256.0))); +} + + +void main() { + vec2 uv = vec2(0); + + //Tile is the tile we are in + vec2 tile; + vec2 uv2 = modf(uv, tile)*(1.0/(vec2(3.0,2.0)*256.0)); + vec4 colour; + vec2 texPos = uv2 + getBaseUV(); + if (useMipmaps()) { + vec2 uvSmol = uv*(1.0/(vec2(3.0,2.0)*256.0)); + vec2 dx = dFdx(uvSmol);//vec2(lDx, dDx); + vec2 dy = dFdy(uvSmol);//vec2(lDy, dDy); + colour = textureGrad(blockModelAtlas, texPos, dx, dy); + } else { + colour = textureLod(blockModelAtlas, texPos, 0); + } + + if (any(notEqual(clamp(tile, vec2(0), vec2((primIn.data.x>>8)&0xFu, (primIn.data.x>>12)&0xFu)), tile))) { + discard; + } + + //Check the minimum bounding texture and ensure we are greater than it + if (gl_FragCoord.z < texelFetch(depthTex, ivec2(gl_FragCoord.xy), 0).r) { + discard; + } + + + //Also, small quad is really fking over the mipping level somehow + if (useCutout() && (textureLod(blockModelAtlas, texPos, 0).a <= 0.1f)) { + //This is stupidly stupidly bad for divergence + //TODO: FIXME, basicly what this do is sample the exact pixel (no lod) for discarding, this stops mipmapping fucking it over + #ifndef DEBUG_RENDER + discard; + #endif + } + + colour = computeColour(colour); + + outColour = colour; +} \ No newline at end of file diff --git a/src/main/resources/assets/voxy/shaders/lod/meshext/mesh.glsl b/src/main/resources/assets/voxy/shaders/lod/meshext/mesh.glsl new file mode 100644 index 00000000..250d19bf --- /dev/null +++ b/src/main/resources/assets/voxy/shaders/lod/meshext/mesh.glsl @@ -0,0 +1,325 @@ +#version 460 core + +#extension GL_EXT_mesh_shader : require + +#extension GL_ARB_gpu_shader_int64 : enable + +#extension GL_KHR_shader_subgroup_arithmetic: require +#extension GL_KHR_shader_subgroup_basic : require +#extension GL_KHR_shader_subgroup_ballot : require +#extension GL_KHR_shader_subgroup_vote : require + + +//TODO: finetune the local size and emission size +layout(local_size_x = MESH_SIZE, local_size_y=1, local_size_z=1) in; +layout(triangles, max_vertices=(MESH_SIZE*4), max_primitives=(MESH_SIZE*2)) out; + +taskPayloadSharedEXT in Task { + //Tightly packed, prefix sum + offset + //uvec4 binA; + //uvec4 binB; + uint bins[8]; + + vec3 cameraOffset; + uint lodLvl; + + uint baseQuad; + uint quadCount; +} task; + +layout(location=1) perprimitiveEXT out PerPrimData { + uvec4 data; +} primOut[]; + + +uint getQuadId() { + uint mid = gl_GlobalInvocationID.x; + uint cv = (mid<<16)|0xFFFFu; + /* + //Funny method + uvec4 a = mix(uvec4(0), uvec4( 1, 2, 4, 8), lessThanEqual(uvec4(task.bins[0],task.bins[1],task.bins[2],task.bins[3]), uvec4(cv))) + + mix(uvec4(0), uvec4(16,32,64,128), lessThanEqual(uvec4(task.bins[4],task.bins[5],task.bins[6],task.bins[7]), uvec4(cv))); + uint act = a.x+a.y+a.z+a.w; + uint id = findLSB(act^(act>>1)); + + //uint point = mix(binB, binA, id<4)[id&3u]; + uint point = task.bins[id]; + + return (point&0xFFFFu)+(mid-(point>>16)); + */ + #pragma unroll + for (uint i = 0; i<7; i++) { + uint point = task.bins[i]; + if (point<=cv&&cv>16)); + } + } + return -1; + + + + /* + for (uint i = 0; i<7; i++) { + uint point = task.bins[i]; + if (point <= ((mid<<16)|0xFFFFu) && ((mid<<16)|0xFFFFu)>16)); + } + } + return -1; + */ +} + +#import +#import + +layout(binding = 0, std140) uniform SceneUniform { + mat4 MVP; + ivec3 baseSectionPos; + uint frameId; + vec3 cameraSubPos; + uint pad_; + vec2 screenSize; +}; + +layout(binding = 4, std430) readonly restrict buffer QuadBuffer { + Quad quadData[]; +}; + +layout(binding = 5, std430) readonly restrict buffer ModelBuffer { + BlockModel modelData[]; +}; + +layout(binding = 6, std430) readonly restrict buffer ModelColourBuffer { + uint colourData[]; +}; + +layout(binding = 1) uniform sampler2D lightSampler; +vec4 getLighting(uint index) { + int i2 = int(index); + return texture(lightSampler, clamp((vec2((i2>>4)&0xF, i2&0xF))/16, vec2(8.0f/255), vec2(248.0f/255))); +} + +//=============== + + +vec3 swizzelDataAxis(uint axis, vec3 data) { + return mix(mix(data.zxy,data.xzy,bvec3(axis==0)),data,bvec3(axis==1)); +} + +vec4 getFaceSize(uint faceData) { + float EPSILON = 0.00005f; + + vec4 faceOffsetsSizes = extractFaceSizes(faceData); + + //Expand the quads by a very small amount (because of the subtraction after this also becomes an implicit add) + faceOffsetsSizes.xz -= vec2(EPSILON); + + //Make the end relative to the start + faceOffsetsSizes.yw -= faceOffsetsSizes.xz; + + return faceOffsetsSizes; +} + +vec3 faceNormal(uint face) { + //TODO: optimize this garbage + return vec3(uint((face>>1)==2), uint((face>>1)==0), uint((face>>1)==1)) * (float(int(face)&1)*2-1); +} + +uint packVec4(vec4 vec) { + uvec4 vec_=uvec4(vec*255)<>1, vec3(faceSize.xz, mix(depthOffset, 1-depthOffset, float(face&1u)))); + cornerPos *= (1<>1)&1u, corner&1u);; +} + +uvec4 createQuadData(Quad quad) { + uint flags = faceHasAlphaCuttout(faceData); + + ivec2 quadSize = extractSize(quad); + //We need to have a conditional override based on if the model size is < a full face + quadSize > 1 + flags |= uint(any(greaterThan(quadSize, ivec2(1)))) & faceHasAlphaCuttoutOverride(faceData); + + flags |= uint(!modelHasMipmaps(model))<<1; + + //Compute lighting + vec4 tinting = getLighting(extractLightId(quad)); + + //Apply model colour tinting + uint tintColour = model.colourTint; + if (modelHasBiomeLUT(model)) { + tintColour = colourData[tintColour + extractBiomeId(quad)]; + } + + uint conditionalTinting = 0; + if (tintColour != uint(-1)) { + flags |= 1u<<2; + conditionalTinting = tintColour; + } + + uint addin = 0; + if (!isTranslucent) { + tinting.w = 0.0; + //Encode the face, the lod level and + uint encodedData = 0; + encodedData |= face; + encodedData |= (task.lodLvl<<3); + encodedData |= uint(hasAO)<<6; + addin = encodedData; + } + + //Apply face tint + if (isShaded) { + //TODO: make branchless, infact apply ahead of time to the texture itself in ModelManager since that is + // per face + if ((face>>1) == 1) {//NORTH, SOUTH + tinting.xyz *= 0.8f; + } else if ((face>>1) == 2) {//EAST, WEST + tinting.xyz *= 0.6f; + } else if (face == 0) {//DOWN + tinting.xyz *= 0.5f; + } + } + + + + uvec4 interData; + + interData.x = (modelId<<16) | flags | (uint(quadSize.x-1)<<8) | (uint(quadSize.y-1)<<12); + + interData.y = packVec4(tinting); + interData.z = conditionalTinting; + interData.w = addin|(face<<8); + + return interData; +} + +vec4 emitVertexPos(int corner) { + vec3 pointPos = swizzelDataAxis(face>>1,vec3(axisFaceSize*mix(vec2(0),vec2(1<>1)&1, corner&1)),0))+cornerPos; + return MVP*vec4(pointPos, 1.0); +} + +bvec2 whatRender(vec4 p1, vec4 p2, vec4 p0, vec4 p3) { + vec2 ssmin = ((p1.xy/p1.w)+1)*screenSize; + vec2 ssmax = ssmin; + + vec2 point = ((p2.xy/p2.w)+1)*screenSize; + ssmin = min(ssmin, point); + ssmax = max(ssmax, point); + + point = ((p0.xy/p0.w)+1)*screenSize; + vec2 t0min = min(ssmin, point); + vec2 t0max = max(ssmax, point); + + point = ((p3.xy/p3.w)+1)*screenSize; + vec2 t1min = min(ssmin, point); + vec2 t1max = max(ssmax, point); + + //Possibly cull the triangles if they dont cover the center of a pixel on the screen (degen) + float degenBias = 0.01f; + bool t0draw = all(notEqual(round(t0min-degenBias),round(t0max+degenBias))); + bool t1draw = all(notEqual(round(t1min-degenBias),round(t1max+degenBias))); + return bvec2(t0draw, t1draw); +} + +#ifdef HAS_STATISTICS +layout(binding = STATISTICS_BUFFER_BINDING, std430) restrict buffer statisticsBuffer { + uint visibleSectionCounts[5]; + uint quadCounts[5]; +}; +#endif + +void main() { + uint qid = uint(-1); + Quad quad; + if (gl_GlobalInvocationID.x + +layout(binding = 0, std140) uniform SceneUniform { + mat4 MVP; + ivec3 baseSectionPos; + uint frameId; + vec3 cameraSubPos; + uint pad_; + vec2 screenSize; +}; + +layout(binding = 1, std430) restrict readonly buffer IndirectSectionLookupBuffer { + uint sectionCount; + uint indirectLookup[]; +}; + +layout(binding = 2, std430) restrict readonly buffer SectionBuffer { + SectionMeta sectionData[]; +}; + +layout(binding = 3, std430) restrict readonly buffer VisibilityBuffer { + uint visibilityData[]; +}; + +#ifdef HAS_STATISTICS +layout(binding = STATISTICS_BUFFER_BINDING, std430) restrict buffer statisticsBuffer { + uint visibleSectionCounts[5]; + uint quadCounts[5]; +}; +#endif + +taskPayloadSharedEXT out Task { + //Tightly packed, prefix sum + offset + //uvec4 binA; + //uvec4 binB; + uint bins[8]; + + vec3 cameraOffset; + uint lodLvl; + + uint baseQuad; + uint quadCount; +} task; + +#define BIN(br, cnt) if (br) { task.bins[i++] = (sum<<16)|off; sum += cnt; } off += cnt; +//#define BIN(br, cnt) if (br) { batch[i++] = (sum<<16)|off; sum += cnt; } off += cnt; + +bvec3 and(bvec3 a, bvec3 b) { + return bvec3(a.x&&b.x, a.y&&b.y, a.z&&b.z); +} +uint fillBins(uvec4 counts, ivec3 relative) {//Returns quad count + #pragma unroll + for (uint i = 0; i < 8; i++) task.bins[i] = uint(-1); + + uvec3 cA = counts.yzw&0xFFFFu; + uvec3 cB = counts.yzw>>16; + + bvec3 a = and(notEqual(cA, uvec3(0)), lessThanEqual(ivec3(0), relative.yzx)); + bvec3 b = and(notEqual(cB, uvec3(0)), lessThanEqual(relative.yzx, ivec3(0))); + + uint dsc = counts.x>>16;//double sided quads + uint sum = 0; + uint off = counts.x&0xFFFFu;//translucent quads + uint i = 0; + + //TODO: might need to move this into shared memory or somethign? so that compiler can reason about it (or make the bin an array in here and mesh) + //uint batch[8] = {uint(-1), uint(-1), uint(-1), uint(-1), uint(-1),uint(-1),uint(-1),uint(-1)}; + + BIN(dsc!=0, dsc);//Double sided quads + + //TODO: compute prefix sums and then jsut batch set into the array (this is an optimization) + + BIN(a.x, cA.x);//Down + BIN(b.x, cB.x);//Up + BIN(a.y, cA.y);//North + BIN(b.y, cB.y);//South + BIN(a.z, cA.z);//West + BIN(b.z, cB.z);//East + + //task.binA = uvec4(batch[0], batch[1], batch[2], batch[3]); + //task.binB = uvec4(batch[4], batch[5], batch[6], batch[7]); + return sum; +} + + +void main() { + uint secId = indirectLookup[gl_WorkGroupID.x]; + uint vis = visibilityData[secId]; + + bool shouldRender = (vis&0x7fffffffu) == frameId-1;//-1 since we are technically in the next frame for the primary rasterization + bool renderTemporally = (vis&0x80000000u)==0; + + task.quadCount = 0; + + if (shouldRender) { + SectionMeta section = sectionData[secId]; + + uint detail = extractDetail(section); + ivec3 ipos = extractPosition(section); + + ivec3 relative = ipos-(baseSectionPos>>detail); + + #ifdef HAS_STATISTICS + atomicAdd(visibleSectionCounts[detail], 1); + #endif + + //TODO: here enqueue the id here for both translucent and temporal (if relevant) (* note technically dont need for temporal as can just check :tm: if we are in temporal render mode) + + task.baseQuad = extractQuadStart(section); + task.quadCount = fillBins(section.b, relative); + + task.cameraOffset = vec3(((ipos<