From aff34fb463f72474b2edc7c7dead78d048943918 Mon Sep 17 00:00:00 2001 From: mcrcortex <18544518+MCRcortex@users.noreply.github.com> Date: Fri, 18 Jul 2025 13:12:56 +1000 Subject: [PATCH] e --- .../assets/voxy/shaders/lod/mesh/frag.glsl | 24 ++- .../assets/voxy/shaders/lod/mesh/mesh.glsl | 58 ++++--- .../assets/voxy/shaders/lod/mesh/task2.glsl | 154 ++++++++++++++++++ 3 files changed, 200 insertions(+), 36 deletions(-) create mode 100644 src/main/resources/assets/voxy/shaders/lod/mesh/task2.glsl diff --git a/src/main/resources/assets/voxy/shaders/lod/mesh/frag.glsl b/src/main/resources/assets/voxy/shaders/lod/mesh/frag.glsl index c47c4188..c69e5e07 100644 --- a/src/main/resources/assets/voxy/shaders/lod/mesh/frag.glsl +++ b/src/main/resources/assets/voxy/shaders/lod/mesh/frag.glsl @@ -5,10 +5,8 @@ layout(binding = 0) uniform sampler2D blockModelAtlas; layout(binding = 2) uniform sampler2D depthTex; -layout(location=1) perprimitiveNV in PerPrimData { - uvec4 data; - vec4 uvData; -} primIn; +perprimitiveNV in uvec4 primData; +perprimitiveNV in vec4 uvData; layout(location = 0) out vec4 outColour; @@ -17,33 +15,33 @@ vec4 uint2vec4RGBA(uint colour) { } bool useMipmaps() { - return (primIn.data.x&2u)==0u; + return (primData.x&2u)==0u; } bool useTinting() { - return (primIn.data.x&4u)!=0u; + return (primData.x&4u)!=0u; } bool useCutout() { - return (primIn.data.x&1u)==1u; + return (primData.x&1u)==1u; } vec4 computeColour(vec4 colour) { //Conditional tinting, TODO: FIXME: REPLACE WITH MASK OR SOMETHING, like encode data into the top bit of alpha if (useTinting() && abs(colour.r-colour.g) < 0.02f && abs(colour.g-colour.b) < 0.02f) { - colour *= uint2vec4RGBA(primIn.data.z).yzwx; + colour *= uint2vec4RGBA(primData.z).yzwx; } - return (colour * uint2vec4RGBA(primIn.data.y)) + vec4(0,0,0,float(primIn.data.w&0xFFu)/255); + return (colour * uint2vec4RGBA(primData.y)) + vec4(0,0,0,float(primData.w&0xFFu)/255); } uint getFace() { - return (primIn.data.w>>8)&7u; + return (primData.w>>8)&7u; } vec2 getBaseUV() { uint face = getFace(); - uint modelId = primIn.data.x>>16; + uint modelId = primData.x>>16; vec2 modelUV = vec2(modelId&0xFFu, (modelId>>8)&0xFFu)*(1.0/(256.0)); return modelUV + (vec2(face>>1, face&1u) * (1.0/(vec2(3.0, 2.0)*256.0))); } @@ -62,7 +60,7 @@ void main() { //vec2(0,gl_BaryCoordNV.x)+vec2(gl_BaryCoordNV.y,gl_BaryCoordNV.y)+vec2(gl_BaryCoordNV.z,0); - vec2 uv = fma(mix(gl_BaryCoordNV.zx+gl_BaryCoordNV.y, gl_BaryCoordNV.yx, bvec2(tri0)), primIn.uvData.zw, primIn.uvData.xy); + vec2 uv = fma(mix(gl_BaryCoordNV.zx+gl_BaryCoordNV.y, gl_BaryCoordNV.yx, bvec2(tri0)), uvData.zw, uvData.xy); //Need to interpolate //Tile is the tile we are in @@ -79,7 +77,7 @@ void main() { colour = textureLod(blockModelAtlas, texPos, 0); } - if (any(notEqual(clamp(tile, vec2(0), vec2((primIn.data.x>>8)&0xFu, (primIn.data.x>>12)&0xFu)), tile))) { + if (any(notEqual(clamp(tile, vec2(0), vec2((primData.x>>8)&0xFu, (primData.x>>12)&0xFu)), tile))) { discard; } diff --git a/src/main/resources/assets/voxy/shaders/lod/mesh/mesh.glsl b/src/main/resources/assets/voxy/shaders/lod/mesh/mesh.glsl index 5132c3aa..8ce8090b 100644 --- a/src/main/resources/assets/voxy/shaders/lod/mesh/mesh.glsl +++ b/src/main/resources/assets/voxy/shaders/lod/mesh/mesh.glsl @@ -27,11 +27,8 @@ layout(std430) taskNV in Task { uint quadCount; } task; -layout(location=1) perprimitiveNV out PerPrimData { - uvec4 data; - vec4 uvData; -} primOut[]; - +perprimitiveNV out uvec4 primData[MESH_SIZE*2]; +perprimitiveNV out vec4 uvData[MESH_SIZE*2]; uint getQuadId() { uint mid = gl_GlobalInvocationID.x; @@ -260,31 +257,44 @@ void main() { Quad quad = quadData[qid]; setup(quad); - subgroupBarrier(); - - bool render = dot(faceNormal(face), cornerPos-cameraSubPos) <= 0; - + subgroupBarrier(); + uint qId = subgroupExclusiveAdd(render?1:0); if (render) { - vec4 p1 = emitVertexPos(1); - vec4 p2 = emitVertexPos(2); - vec4 p0 = emitVertexPos(0); - vec4 p3 = emitVertexPos(3); uvec4 data = createQuadData(quad); - subgroupBarrier(); - uint triId_ = subgroupExclusiveAdd(2); - uint triId = triId_; - uint vertId_ = subgroupExclusiveAdd(4); - uint vertId = vertId_; - uint idxId = triId*3; + primData[qId*2] = data; + uvData[qId*2] = vec4(faceSize.xz, axisFaceSize); + primData[qId*2+1] = data; + uvData[qId*2+1] = vec4(faceSize.xz, axisFaceSize); + #define VID(i) (gl_LocalInvocationIndex*4+i) + gl_MeshVerticesNV[VID(0)].gl_Position = emitVertexPos(1); + gl_MeshVerticesNV[VID(1)].gl_Position = emitVertexPos(2); + + gl_MeshVerticesNV[VID(2)].gl_Position = emitVertexPos(0); + gl_MeshVerticesNV[VID(3)].gl_Position = emitVertexPos(3); + + gl_PrimitiveIndicesNV[qId*6+0] = VID(0); + gl_PrimitiveIndicesNV[qId*6+1] = VID(1); + gl_PrimitiveIndicesNV[qId*6+2] = VID(2); + + gl_PrimitiveIndicesNV[qId*6+3] = VID(0); + gl_PrimitiveIndicesNV[qId*6+4] = VID(3); + gl_PrimitiveIndicesNV[qId*6+5] = VID(1); + + gl_MeshPrimitivesNV[qId*2].gl_PrimitiveID = int(qid|(0u<<31)); + gl_MeshPrimitivesNV[qId*2+1].gl_PrimitiveID = int(qid|(1u<<31)); + + /* + //vec4 p1 = ; + //vec4 p2 = ; + //vec4 p0 = emitVertexPos(0); + //vec4 p3 = emitVertexPos(3); //Emit common - gl_MeshVerticesNV[vertId++].gl_Position = p1; - gl_MeshVerticesNV[vertId++].gl_Position = p2; { gl_PrimitiveIndicesNV[idxId++] = vertId_+0; @@ -309,11 +319,13 @@ void main() { primOut[triId].uvData = vec4(faceSize.xz, axisFaceSize); gl_MeshPrimitivesNV[triId++].gl_PrimitiveID = int(qid|(1u<<31)); } + */ subgroupBarrier(); - uint count = subgroupMax(triId_+2); - if (subgroupElect()) { + uint count = subgroupMax(qId); + if (count != 0 && subgroupElect()) { + count = count *2+2; gl_PrimitiveCountNV = count; #ifdef HAS_STATISTICS atomicAdd(quadCounts[task.lodLvl], count); diff --git a/src/main/resources/assets/voxy/shaders/lod/mesh/task2.glsl b/src/main/resources/assets/voxy/shaders/lod/mesh/task2.glsl new file mode 100644 index 00000000..f4f74432 --- /dev/null +++ b/src/main/resources/assets/voxy/shaders/lod/mesh/task2.glsl @@ -0,0 +1,154 @@ +#version 460 core + +#extension GL_NV_mesh_shader : require + +layout(local_size_x=4) in; + +#import + +bvec3 and(bvec3 a, bvec3 b) { + return bvec3(a.x&&b.x, a.y&&b.y, a.z&&b.z); +} + +layout(binding = 0, std140) uniform SceneUniform { + mat4 MVP; + ivec3 baseSectionPos; + uint frameId; + vec3 cameraSubPos; + uint pad_; + vec2 screenSize; +}; + +layout(binding = 1, std430) restrict readonly buffer IndirectSectionLookupBuffer { + uint sectionCount; + uint indirectLookup[]; +}; + +layout(binding = 2, std430) restrict readonly buffer SectionBuffer { + SectionMeta sectionData[]; +}; + +layout(binding = 3, std430) restrict readonly buffer VisibilityBuffer { + uint visibilityData[]; +}; + +#ifdef HAS_STATISTICS +layout(binding = STATISTICS_BUFFER_BINDING, std430) restrict buffer statisticsBuffer { + uint visibleSectionCounts[5]; + uint quadCounts[5]; +}; +#endif + +taskNV out Task { + uvec4 control;//the control vec it defines what subvector to use, it is effectivly the terminating ranges of each bin + uvec4 bins[4];//the bins for each section the last component of each bin is the quad offset + + uint launchSize; +} task; + +#define BIN(br, cnt) if (br) { if (!pset) {bin[i++] = (sum<<16)|off;} sum += cnt; } pset = br; off += cnt; + +/* +void createBin(out uvec4 bin, out uint sum, out uint offset, uint dsc, bvec3 a, bvec3 b, uvec3 cA, uvec3 cB) { + bin = uvec4(-1); + + bool pset = false; + uint i = 0; + sum = 0; + offset = counts.x&0xFFFFu;//translucent quads + + uint dsc = counts.x>>16;//double sided quads + + uint off = counts.x&0xFFFFu;//translucent quads + uint i = 0; + + BIN(dsc!=0, dsc);//Double sided quads + + + BIN(a.x, cA.x);//Down + BIN(b.x, cB.x);//Up + BIN(a.y, cA.y);//North + BIN(b.y, cB.y);//South + BIN(a.z, cA.z);//West + BIN(b.z, cB.z);//East +}*/ + +uint fillBins(uvec4 counts, ivec3 relative) {//Returns quad count + uvec3 cA = counts.yzw&0xFFFFu; + uvec3 cB = counts.yzw>>16; + + bvec3 a = and(notEqual(cA, uvec3(0)), lessThanEqual(ivec3(0), relative.yzx)); + bvec3 b = and(notEqual(cB, uvec3(0)), lessThanEqual(relative.yzx, ivec3(0))); + + //compute the merged bin values + uvec4 bin = uvec4(-1); + + bool pset = false; + uint i = 0; + uint sum = 0; + uint offset = counts.x&0xFFFFu;//translucent quads + + uint dsc = counts.x>>16;//double sided quads + + uint off = counts.x&0xFFFFu;//translucent quads + uint i = 0; + + BIN(dsc!=0, dsc);//Double sided quads + + + BIN(a.x, cA.x);//Down + BIN(b.x, cB.x);//Up + BIN(a.y, cA.y);//North + BIN(b.y, cB.y);//South + BIN(a.z, cA.z);//West + BIN(b.z, cB.z);//East + + //bin contains filled bin data, non filled slots contain -1 + + return sum; +} + + +void main() { + if (sectionCount<=gl_GlobalInvocationID.x) { + return; + } + if (subgroupElect()) { + task.quadCount = 0; + } + + uint secId = indirectLookup[gl_GlobalInvocationID.x]; + uint vis = visibilityData[secId]; + + bool shouldRender = (vis&0x7fffffffu) == frameId-1;//-1 since we are technically in the next frame for the primary rasterization + bool renderTemporally = (vis&0x80000000u)==0;// If we are the temporal specialization, only render if marked as render temporally + + if (shouldRender) { + SectionMeta section = sectionData[secId]; + + uint detail = extractDetail(section); + ivec3 ipos = extractPosition(section); + + ivec3 relative = ipos-(baseSectionPos>>detail); + + #ifdef HAS_STATISTICS + atomicAdd(visibleSectionCounts[detail], 1); + #endif + + //TODO: here enqueue the id here for both translucent and temporal (if relevant) (* note technically dont need for temporal as can just check :tm: if we are in temporal render mode) + + //TODO: in the temporal phase, extract the sections that are ment to be rendered and are also translucent + // enqueue them into a seperate buffer and increment the bin counters based on distance + // this should allow a massive simplificattion of the raster pipeline by eliminating all command gen shaders + prep shaders + + + + task.baseQuad = extractQuadStart(section); + task.quadCount = fillBins(section.b, relative); + + task.cameraOffset = vec3(((ipos<