From 107850749520e7fb92bcfebdd92c8ab890f60092 Mon Sep 17 00:00:00 2001 From: mcrcortex <18544518+MCRcortex@users.noreply.github.com> Date: Mon, 14 Jul 2025 16:01:28 +1000 Subject: [PATCH] improvements and started frag impl (missing uv) --- .../assets/voxy/shaders/lod/mesh/frag.glsl | 89 ++++++++++- .../assets/voxy/shaders/lod/mesh/mesh.glsl | 139 +++++++++++++++--- .../assets/voxy/shaders/lod/mesh/task.glsl | 22 +-- 3 files changed, 213 insertions(+), 37 deletions(-) diff --git a/src/main/resources/assets/voxy/shaders/lod/mesh/frag.glsl b/src/main/resources/assets/voxy/shaders/lod/mesh/frag.glsl index c00efcf0..c70171ed 100644 --- a/src/main/resources/assets/voxy/shaders/lod/mesh/frag.glsl +++ b/src/main/resources/assets/voxy/shaders/lod/mesh/frag.glsl @@ -3,13 +3,86 @@ layout(binding = 0) uniform sampler2D blockModelAtlas; layout(binding = 2) uniform sampler2D depthTex; -layout(location = 0) out vec4 colour; -void main() { +layout(location=1) perprimitiveNV in PerPrimData { + uvec4 data; +} primIn; - uint hash = gl_PrimitiveID*1231421+123141; - hash ^= hash>>16; - hash = hash*1231421+123141; - hash ^= hash>>16; - hash = hash * 1827364925 + 123325621; - colour = vec4(float(hash&63u)/63, float((hash>>6)&63u)/63, float((hash>>12)&63u)/63, 0); +layout(location = 0) out vec4 outColour; + +vec4 uint2vec4RGBA(uint colour) { + return vec4((uvec4(colour)>>uvec4(24,16,8,0))&uvec4(0xFF))/255.0; +} + +bool useMipmaps() { + return (primIn.data.x&2u)==0u; +} + +bool useTinting() { + return (primIn.data.x&4u)!=0u; +} + +bool useCutout() { + return (primIn.data.x&1u)==1u; +} + +vec4 computeColour(vec4 colour) { + //Conditional tinting, TODO: FIXME: REPLACE WITH MASK OR SOMETHING, like encode data into the top bit of alpha + if (useTinting() && abs(colour.r-colour.g) < 0.02f && abs(colour.g-colour.b) < 0.02f) { + colour *= uint2vec4RGBA(primIn.data.z).yzwx; + } + return (colour * uint2vec4RGBA(primIn.data.y)) + vec4(0,0,0,float(primIn.data.w&0xFFu)/255); +} + + +uint getFace() { + return (primIn.data.w>>8)&7u; +} + +vec2 getBaseUV() { + uint face = getFace(); + uint modelId = primIn.data.x>>16; + vec2 modelUV = vec2(modelId&0xFFu, (modelId>>8)&0xFFu)*(1.0/(256.0)); + return modelUV + (vec2(face>>1, face&1u) * (1.0/(vec2(3.0, 2.0)*256.0))); +} + + +void main() { + vec2 uv = vec2(0); + + //Tile is the tile we are in + vec2 tile; + vec2 uv2 = modf(uv, tile)*(1.0/(vec2(3.0,2.0)*256.0)); + vec4 colour; + vec2 texPos = uv2 + getBaseUV(); + if (useMipmaps()) { + vec2 uvSmol = uv*(1.0/(vec2(3.0,2.0)*256.0)); + vec2 dx = dFdx(uvSmol);//vec2(lDx, dDx); + vec2 dy = dFdy(uvSmol);//vec2(lDy, dDy); + colour = textureGrad(blockModelAtlas, texPos, dx, dy); + } else { + colour = textureLod(blockModelAtlas, texPos, 0); + } + + if (any(notEqual(clamp(tile, vec2(0), vec2((primIn.data.x>>8)&0xFu, (primIn.data.x>>12)&0xFu)), tile))) { + discard; + } + + //Check the minimum bounding texture and ensure we are greater than it + if (gl_FragCoord.z < texelFetch(depthTex, ivec2(gl_FragCoord.xy), 0).r) { + discard; + } + + + //Also, small quad is really fking over the mipping level somehow + if (useCutout() && (textureLod(blockModelAtlas, texPos, 0).a <= 0.1f)) { + //This is stupidly stupidly bad for divergence + //TODO: FIXME, basicly what this do is sample the exact pixel (no lod) for discarding, this stops mipmapping fucking it over + #ifndef DEBUG_RENDER + discard; + #endif + } + + colour = computeColour(colour); + + outColour = colour; } \ No newline at end of file diff --git a/src/main/resources/assets/voxy/shaders/lod/mesh/mesh.glsl b/src/main/resources/assets/voxy/shaders/lod/mesh/mesh.glsl index 5e25c3e7..8540bc66 100644 --- a/src/main/resources/assets/voxy/shaders/lod/mesh/mesh.glsl +++ b/src/main/resources/assets/voxy/shaders/lod/mesh/mesh.glsl @@ -14,11 +14,11 @@ layout(local_size_x = MESH_SIZE) in; layout(triangles, max_vertices=(MESH_SIZE*4), max_primitives=(MESH_SIZE*2)) out; -taskNV in Task { +layout(std430) taskNV in Task { //Tightly packed, prefix sum + offset - uvec4 binA; - uvec4 binB; - //uint bins[8]; + //uvec4 binA; + //uvec4 binB; + uint bins[8]; vec3 cameraOffset; uint lodLvl; @@ -27,20 +27,36 @@ taskNV in Task { uint quadCount; } task; +layout(location=1) perprimitiveNV out PerPrimData { + uvec4 data; +} primOut[]; + uint getQuadId() { uint mid = gl_GlobalInvocationID.x; - //Funny method uint cv = (mid<<16)|0xFFFFu; - uvec4 a = mix(uvec4(0), uvec4( 1, 2, 4, 8), lessThanEqual(task.binA, uvec4(cv))) + - mix(uvec4(0), uvec4(16,32,64,128), lessThanEqual(task.binB, uvec4(cv))); + /* + //Funny method + uvec4 a = mix(uvec4(0), uvec4( 1, 2, 4, 8), lessThanEqual(uvec4(task.bins[0],task.bins[1],task.bins[2],task.bins[3]), uvec4(cv))) + + mix(uvec4(0), uvec4(16,32,64,128), lessThanEqual(uvec4(task.bins[4],task.bins[5],task.bins[6],task.bins[7]), uvec4(cv))); uint act = a.x+a.y+a.z+a.w; uint id = findLSB(act^(act>>1)); //uint point = mix(binB, binA, id<4)[id&3u]; - uint point = mix(task.binB[id&3u], task.binA[id&3u], id<4); + uint point = task.bins[id]; return (point&0xFFFFu)+(mid-(point>>16)); + */ + #pragma unroll + for (uint i = 0; i<7; i++) { + uint point = task.bins[i]; + if (point<=cv&&cv>16)); + } + } + return -1; + + /* for (uint i = 0; i<7; i++) { @@ -110,22 +126,37 @@ vec3 faceNormal(uint face) { return vec3(uint((face>>1)==2), uint((face>>1)==0), uint((face>>1)==1)) * (float(int(face)&1)*2-1); } +uint packVec4(vec4 vec) { + uvec4 vec_=uvec4(vec*255)<>1)&1, cornerIdx&1); + //uv = } +vec2 getUvCorner(uint corner) { + return faceSize.xz + axisFaceSize*vec2((corner>>1)&1u, corner&1u);; +} + +uvec4 createQuadData(Quad quad) { + uint flags = faceHasAlphaCuttout(faceData); + + ivec2 quadSize = extractSize(quad); + //We need to have a conditional override based on if the model size is < a full face + quadSize > 1 + flags |= uint(any(greaterThan(quadSize, ivec2(1)))) & faceHasAlphaCuttoutOverride(faceData); + + flags |= uint(!modelHasMipmaps(model))<<1; + + //Compute lighting + vec4 tinting = getLighting(extractLightId(quad)); + + //Apply model colour tinting + uint tintColour = model.colourTint; + if (modelHasBiomeLUT(model)) { + tintColour = colourData[tintColour + extractBiomeId(quad)]; + } + + uint conditionalTinting = 0; + if (tintColour != uint(-1)) { + flags |= 1u<<2; + conditionalTinting = tintColour; + } + + uint addin = 0; + if (!isTranslucent) { + tinting.w = 0.0; + //Encode the face, the lod level and + uint encodedData = 0; + encodedData |= face; + encodedData |= (task.lodLvl<<3); + encodedData |= uint(hasAO)<<6; + addin = encodedData; + } + + //Apply face tint + if (isShaded) { + //TODO: make branchless, infact apply ahead of time to the texture itself in ModelManager since that is + // per face + if ((face>>1) == 1) {//NORTH, SOUTH + tinting.xyz *= 0.8f; + } else if ((face>>1) == 2) {//EAST, WEST + tinting.xyz *= 0.6f; + } else if (face == 0) {//DOWN + tinting.xyz *= 0.5f; + } + } + + + + uvec4 interData; + + interData.x = (modelId<<16) | flags | (uint(quadSize.x-1)<<8) | (uint(quadSize.y-1)<<12); + + interData.y = packVec4(tinting); + interData.z = conditionalTinting; + interData.w = addin|(face<<8); + + return interData; +} + vec4 emitVertexPos(int corner) { vec3 pointPos = swizzelDataAxis(face>>1,vec3(axisFaceSize*mix(vec2(0),vec2(1<>1)&1, corner&1)),0))+cornerPos; return MVP*vec4(pointPos, 1.0); @@ -161,7 +257,7 @@ bvec2 whatRender(vec4 p1, vec4 p2, vec4 p0, vec4 p3) { vec2 t1max = max(ssmax, point); //Possibly cull the triangles if they dont cover the center of a pixel on the screen (degen) - float degenBias = 0.001f; + float degenBias = 0.01f; bool t0draw = all(notEqual(round(t0min-degenBias),round(t0max+degenBias))); bool t1draw = all(notEqual(round(t1min-degenBias),round(t1max+degenBias))); return bvec2(t0draw, t1draw); @@ -195,17 +291,22 @@ void main() { vec4 p2 = emitVertexPos(2); vec4 p0 = emitVertexPos(0); vec4 p3 = emitVertexPos(3); - bvec2 what = whatRender(p1, p2, p0, p3); + bvec2 what = bvec2(true);//whatRender(p1, p2, p0, p3); uint c = uint(what.x)+uint(what.y); if (c == 0) { return;//Early exit } + uvec4 data = createQuadData(quad); + + subgroupBarrier(); uint triId_ = subgroupExclusiveAdd(c); uint triId = triId_; uint vertId_ = subgroupExclusiveAdd(c==1?3:4); uint vertId = vertId_; uint idxId = triId*3; + + //Emit common gl_MeshVerticesNV[vertId++].gl_Position = p1; gl_MeshVerticesNV[vertId++].gl_Position = p2; @@ -216,6 +317,7 @@ void main() { gl_MeshVerticesNV[vertId++].gl_Position = p0; + primOut[triId].data = data; gl_MeshPrimitivesNV[triId++].gl_PrimitiveID = int(qid); } if (what.y) { @@ -225,6 +327,7 @@ void main() { gl_MeshVerticesNV[vertId++].gl_Position = p3; + primOut[triId].data = data; gl_MeshPrimitivesNV[triId++].gl_PrimitiveID = int(qid); } diff --git a/src/main/resources/assets/voxy/shaders/lod/mesh/task.glsl b/src/main/resources/assets/voxy/shaders/lod/mesh/task.glsl index e6e7310d..60e3e958 100644 --- a/src/main/resources/assets/voxy/shaders/lod/mesh/task.glsl +++ b/src/main/resources/assets/voxy/shaders/lod/mesh/task.glsl @@ -35,11 +35,11 @@ layout(binding = STATISTICS_BUFFER_BINDING, std430) restrict buffer statisticsBu }; #endif -taskNV out Task { +layout(std430) taskNV out Task { //Tightly packed, prefix sum + offset - uvec4 binA; - uvec4 binB; - //uint bins[8]; + //uvec4 binA; + //uvec4 binB; + layout(offset = 0) uint bins[8]; vec3 cameraOffset; uint lodLvl; @@ -48,15 +48,15 @@ taskNV out Task { uint quadCount; } task; -//#define BIN(br, cnt) if (br) { task.bins[i++] = (sum<<16)|off; sum += cnt; } off += cnt; -#define BIN(br, cnt) if (br) { batch[i++] = (sum<<16)|off; sum += cnt; } off += cnt; +#define BIN(br, cnt) if (br) { task.bins[i++] = (sum<<16)|off; sum += cnt; } off += cnt; +//#define BIN(br, cnt) if (br) { batch[i++] = (sum<<16)|off; sum += cnt; } off += cnt; bvec3 and(bvec3 a, bvec3 b) { return bvec3(a.x&&b.x, a.y&&b.y, a.z&&b.z); } uint fillBins(uvec4 counts, ivec3 relative) {//Returns quad count - //#pragma unroll - //for (uint i = 0; i < 8; i++) task.bins[i] = uint(-1); + #pragma unroll + for (uint i = 0; i < 8; i++) task.bins[i] = uint(-1); uvec3 cA = counts.yzw&0xFFFFu; uvec3 cB = counts.yzw>>16; @@ -70,7 +70,7 @@ uint fillBins(uvec4 counts, ivec3 relative) {//Returns quad count uint i = 0; //TODO: might need to move this into shared memory or somethign? so that compiler can reason about it (or make the bin an array in here and mesh) - uint batch[8] = {uint(-1), uint(-1), uint(-1), uint(-1), uint(-1),uint(-1),uint(-1),uint(-1)}; + //uint batch[8] = {uint(-1), uint(-1), uint(-1), uint(-1), uint(-1),uint(-1),uint(-1),uint(-1)}; BIN(dsc!=0, dsc);//Double sided quads @@ -83,8 +83,8 @@ uint fillBins(uvec4 counts, ivec3 relative) {//Returns quad count BIN(a.z, cA.z);//West BIN(b.z, cB.z);//East - task.binA = uvec4(batch[0], batch[1], batch[2], batch[3]); - task.binB = uvec4(batch[4], batch[5], batch[6], batch[7]); + //task.binA = uvec4(batch[0], batch[1], batch[2], batch[3]); + //task.binB = uvec4(batch[4], batch[5], batch[6], batch[7]); return sum; }