diff --git a/src/main/java/me/cortex/voxy/client/core/VoxelCore.java b/src/main/java/me/cortex/voxy/client/core/VoxelCore.java index 158dfa55..bd7e37c0 100644 --- a/src/main/java/me/cortex/voxy/client/core/VoxelCore.java +++ b/src/main/java/me/cortex/voxy/client/core/VoxelCore.java @@ -64,8 +64,10 @@ public class VoxelCore { SharedIndexBuffer.INSTANCE.id(); if (VoxyConfig.CONFIG.useMeshShaders()) { this.renderer = new NvMeshFarWorldRenderer(VoxyConfig.CONFIG.geometryBufferSize, VoxyConfig.CONFIG.maxSections); + System.out.println("Using NvMeshFarWorldRenderer"); } else { this.renderer = new Gl46FarWorldRenderer(VoxyConfig.CONFIG.geometryBufferSize, VoxyConfig.CONFIG.maxSections); + System.out.println("Using Gl46FarWorldRenderer"); } this.viewportSelector = new ViewportSelector<>(this.renderer::createViewport); System.out.println("Renderer initialized"); diff --git a/src/main/java/me/cortex/voxy/client/core/rendering/Gl46FarWorldRenderer.java b/src/main/java/me/cortex/voxy/client/core/rendering/Gl46FarWorldRenderer.java index 6bd98f4d..2ecf6437 100644 --- a/src/main/java/me/cortex/voxy/client/core/rendering/Gl46FarWorldRenderer.java +++ b/src/main/java/me/cortex/voxy/client/core/rendering/Gl46FarWorldRenderer.java @@ -38,7 +38,7 @@ public class Gl46FarWorldRenderer extends AbstractFarWorldRenderer .compile(); private final Shader lodShader = Shader.make() - .add(ShaderType.VERTEX, "voxy:lod/gl46/quads.vert") + .add(ShaderType.VERTEX, "voxy:lod/gl46/quads2.vert") .add(ShaderType.FRAGMENT, "voxy:lod/gl46/quads.frag") .compile(); diff --git a/src/main/java/me/cortex/voxy/client/core/rendering/NvMeshFarWorldRenderer.java b/src/main/java/me/cortex/voxy/client/core/rendering/NvMeshFarWorldRenderer.java index 7fdaf9a1..29784183 100644 --- a/src/main/java/me/cortex/voxy/client/core/rendering/NvMeshFarWorldRenderer.java +++ b/src/main/java/me/cortex/voxy/client/core/rendering/NvMeshFarWorldRenderer.java @@ -15,7 +15,9 @@ import org.lwjgl.system.MemoryUtil; import java.util.List; import static org.lwjgl.opengl.ARBIndirectParameters.GL_PARAMETER_BUFFER_ARB; +import static org.lwjgl.opengl.ARBIndirectParameters.glMultiDrawElementsIndirectCountARB; import static org.lwjgl.opengl.GL11.*; +import static org.lwjgl.opengl.GL14C.glBlendFuncSeparate; import static org.lwjgl.opengl.GL15.GL_ELEMENT_ARRAY_BUFFER; import static org.lwjgl.opengl.GL15.glBindBuffer; import static org.lwjgl.opengl.GL30.glBindBufferBase; @@ -39,6 +41,12 @@ public class NvMeshFarWorldRenderer extends AbstractFarWorldRenderer +#import +#import +#line 8 + +layout(location = 0) out vec2 uv; +layout(location = 1) out flat vec2 baseUV; +layout(location = 2) out flat vec4 tinting; +layout(location = 3) out flat vec4 addin; +layout(location = 4) out flat uint flags; +layout(location = 5) out flat vec4 conditionalTinting; +//layout(location = 6) out flat vec4 solidColour; + +uint extractLodLevel() { + return uint(gl_BaseInstance)>>27; +} + +//Note the last 2 bits of gl_BaseInstance are unused +//Gives a relative position of +-255 relative to the player center in its respective lod +ivec3 extractRelativeLodPos() { + return (ivec3(gl_BaseInstance)<>ivec3(23); +} + +vec4 uint2vec4RGBA(uint colour) { + return vec4((uvec4(colour)>>uvec4(24,16,8,0))&uvec4(0xFF))/255.0; +} + +vec4 getFaceSize(uint faceData) { + float EPSILON = 0.001f; + vec4 faceOffsetsSizes = extractFaceSizes(faceData); + //Expand the quads by a very small amount + faceOffsetsSizes.xz -= vec2(EPSILON); + faceOffsetsSizes.yw += vec2(EPSILON); + + //Make the end relative to the start + faceOffsetsSizes.yw -= faceOffsetsSizes.xz; + + return faceOffsetsSizes; +} + +//TODO: make branchless by using ternaries i think +vec3 swizzelDataAxis(uint axis, vec3 data) { + if (axis == 0) { //Up/down + data = data.xzy; + } + //Not needed, here for readability + //if (axis == 1) {//north/south + // offset = offset.xyz; + //} + if (axis == 2) { //west/east + data = data.zxy; + } + return data; +} + +//TODO: add a mechanism so that some quads can ignore backface culling +// this would help alot with stuff like crops as they would look kinda weird i think, +// same with flowers etc +void main() { + int cornerIdx = gl_VertexID&3; + Quad quad = quadData[uint(gl_VertexID)>>2]; + vec3 innerPos = extractPos(quad); + uint face = extractFace(quad); + uint modelId = extractStateId(quad); + BlockModel model = modelData[modelId]; + uint faceData = model.faceData[face]; + bool isTranslucent = modelIsTranslucent(model); + bool hasAO = modelHasMipmaps(model);//TODO: replace with per face AO flag + bool isShaded = hasAO;//TODO: make this a per face flag + + uint lodLevel = extractLodLevel(); + + + vec2 modelUV = vec2(modelId&0xFFu, (modelId>>8)&0xFFu)*(1.0/(256.0)); + baseUV = modelUV + (vec2(face>>1, face&1u) * (1.0/(vec2(3.0, 2.0)*256.0))); + + ivec2 quadSize = extractSize(quad); + + { //Generate tinting and flag data + flags = faceHasAlphaCuttout(faceData); + + //We need to have a conditional override based on if the model size is < a full face + quadSize > 1 + flags |= uint(any(greaterThan(quadSize, ivec2(1)))) & faceHasAlphaCuttoutOverride(faceData); + + flags |= uint(!modelHasMipmaps(model))<<1; + + //Compute lighting + tinting = getLighting(extractLightId(quad)); + + //Apply model colour tinting + uint tintColour = model.colourTint; + if (modelHasBiomeLUT(model)) { + tintColour = colourData[tintColour + extractBiomeId(quad)]; + } + + conditionalTinting = vec4(0); + if (tintColour != uint(-1)) { + flags |= 1u<<2; + conditionalTinting = uint2vec4RGBA(tintColour).yzwx; + } + + addin = vec4(0.0); + if (!isTranslucent) { + tinting.w = 0.0; + //Encode the face, the lod level and + uint encodedData = 0; + encodedData |= face; + encodedData |= (lodLevel<<3); + encodedData |= uint(hasAO)<<6; + addin.w = float(encodedData)/255.0; + } + + //Apply face tint + if (isShaded) { + //TODO: make branchless, infact apply ahead of time to the texture itself in ModelManager since that is + // per face + if ((face>>1) == 1) { + tinting.xyz *= 0.8f; + } else if ((face>>1) == 2) { + tinting.xyz *= 0.6f; + } else if (face == 0){ + tinting.xyz *= 0.5f; + } + } + } + + + + + + vec4 faceSize = getFaceSize(faceData); + + vec2 cQuadSize = (faceSize.yw + quadSize - 1) * vec2((cornerIdx>>1)&1, cornerIdx&1); + uv = faceSize.xz + cQuadSize; + + vec3 cornerPos = extractPos(quad); + float depthOffset = extractFaceIndentation(faceData); + cornerPos += swizzelDataAxis(face>>1, vec3(faceSize.xz, mix(depthOffset, 1-depthOffset, float(face&1u)))); + + + vec3 origin = vec3(((extractRelativeLodPos()<>1,vec3(cQuadSize,0)))*(1<>uvec4(24,16,8,0))&uvec4(0xFF))/255.0; } -//Gets the face offset with respect to the face direction (e.g. some will be + some will be -) -float getDepthOffset(uint faceData, uint face) { - float offset = extractFaceIndentation(faceData); - return offset * (1.0-((int(face)&1)*2.0)); -} - vec4 getFaceSize(uint faceData) { float EPSILON = 0.001f; vec4 faceOffsetsSizes = extractFaceSizes(faceData); diff --git a/src/main/resources/assets/voxy/shaders/lod/nvmesh/translucent.mesh b/src/main/resources/assets/voxy/shaders/lod/nvmesh/translucent.mesh new file mode 100644 index 00000000..3e799c3e --- /dev/null +++ b/src/main/resources/assets/voxy/shaders/lod/nvmesh/translucent.mesh @@ -0,0 +1,212 @@ +#version 460 + +#extension GL_ARB_shading_language_include : enable +#pragma optionNV(unroll all) +#define UNROLL_LOOP + +#extension GL_NV_mesh_shader : require +#extension GL_NV_gpu_shader5 : require +#extension GL_ARB_gpu_shader_int64 : require + +#import +#import +#import +#line 13 + +layout(local_size_x = 16) in; +layout(triangles, max_vertices=64, max_primitives=32) out; + +layout(location=1) out Interpolants { + vec2 uv; +} i_out[]; + +layout(location=2) perprimitiveNV out PerPrimData { + vec2 baseUV; + vec4 tinting; + vec4 addin; + uint flags; + vec4 conditionalTinting; +} per_prim_out[]; + +void emitIndicies() { + uint primBase = gl_LocalInvocationID.x * 6; + uint vertBase = gl_LocalInvocationID.x<<2; + gl_PrimitiveIndicesNV[primBase+0] = vertBase+0; + gl_PrimitiveIndicesNV[primBase+1] = vertBase+1; + gl_PrimitiveIndicesNV[primBase+2] = vertBase+2; + gl_PrimitiveIndicesNV[primBase+3] = vertBase+2; + gl_PrimitiveIndicesNV[primBase+4] = vertBase+3; + gl_PrimitiveIndicesNV[primBase+5] = vertBase+0; +} + +vec4 uint2vec4RGBA(uint colour) { + return vec4((uvec4(colour)>>uvec4(24,16,8,0))&uvec4(0xFF))/255.0; +} + +vec4 getFaceSize(uint faceData) { + float EPSILON = 0.001f; + vec4 faceOffsetsSizes = extractFaceSizes(faceData); + //Expand the quads by a very small amount + faceOffsetsSizes.xz -= vec2(EPSILON); + faceOffsetsSizes.yw += vec2(EPSILON); + + //Make the end relative to the start + faceOffsetsSizes.yw -= faceOffsetsSizes.xz; + + return faceOffsetsSizes; +} + +//TODO: make branchless by using ternaries i think +vec3 swizzelDataAxis(uint axis, vec3 data) { + if (axis == 0) { //Up/down + data = data.xzy; + } + //Not needed, here for readability + //if (axis == 1) {//north/south + // offset = offset.xyz; + //} + if (axis == 2) { //west/east + data = data.zxy; + } + return data; +} + +taskNV in Task { + vec3 origin;//Offset to camera in world space (already multiplied by lod level) + uint baseOffset;//Base offset into the quad data buffer + + uint meta;//First 4 bits is lod level, remaining is quadCount +}; + +uint getQuadIndex() { + if ((meta>>4)<=gl_GlobalInvocationID.x) return -1; + return baseOffset + gl_GlobalInvocationID.x; +} + +void main() { + uint idx = getQuadIndex(); + //If its over, dont render + if (idx == uint(-1)) { + return; + } + emitIndicies(); + + uint A = gl_LocalInvocationID.x<<1; + uint B = (gl_LocalInvocationID.x<<1)|1u; + uint V = (gl_LocalInvocationID.x<<2); + + uint lodLvl = meta&0xf; + float lodScale = (1<>8)&0xFFu)*(1.0/(256.0)); + vec2 baseUV = modelUV + (vec2(face>>1, face&1u) * (1.0/(vec2(3.0, 2.0)*256.0))); + //Write out baseUV + per_prim_out[A].baseUV = baseUV; + per_prim_out[B].baseUV = baseUV; + + + + + uint flags = faceHasAlphaCuttout(faceData); + + //We need to have a conditional override based on if the model size is < a full face + quadSize > 1 + flags |= uint(any(greaterThan(quadSize, ivec2(1)))) & faceHasAlphaCuttoutOverride(faceData); + + flags |= uint(!modelHasMipmaps(model))<<1; + + //Compute lighting + vec4 tinting = getLighting(extractLightId(quad)); + + //Apply model colour tinting + uint tintColour = model.colourTint; + if (modelHasBiomeLUT(model)) { + tintColour = colourData[tintColour + extractBiomeId(quad)]; + } + + vec4 conditionalTinting = vec4(0); + if (tintColour != uint(-1)) { + flags |= 1u<<2; + conditionalTinting = uint2vec4RGBA(tintColour).yzwx; + } + + vec4 addin = vec4(0.0); + if (!isTranslucent) { + tinting.w = 0.0; + //Encode the face, the lod level and + uint encodedData = 0; + encodedData |= face; + encodedData |= (lodLvl<<3); + encodedData |= uint(hasAO)<<6; + addin.w = float(encodedData)/255.0; + } + + //Apply face tint + if (isShaded) { + //TODO: make branchless, infact apply ahead of time to the texture itself in ModelManager since that is + // per face + if ((face>>1) == 1) { + tinting.xyz *= 0.8f; + } else if ((face>>1) == 2) { + tinting.xyz *= 0.6f; + } else if (face == 0){ + tinting.xyz *= 0.5f; + } + } + + + //Write out everything + per_prim_out[A].tinting = tinting; + per_prim_out[A].addin = addin; + per_prim_out[A].flags = flags; + per_prim_out[A].conditionalTinting = conditionalTinting; + per_prim_out[B].tinting = tinting; + per_prim_out[B].addin = addin; + per_prim_out[B].flags = flags; + per_prim_out[B].conditionalTinting = conditionalTinting; + + + + + + vec4 faceSize = getFaceSize(faceData); + + vec2 cQuadSize = faceSize.yw + quadSize - 1; + vec2 uv0 = faceSize.xz; + i_out[V|0].uv = uv0; + i_out[V|1].uv = uv0 + vec2(0, cQuadSize.y); + i_out[V|2].uv = uv0 + cQuadSize; + i_out[V|3].uv = uv0 + vec2(cQuadSize.x, 0); + + + + + //Corner position of quad relative to section corner (in 0->32 scale) + vec3 cornerPos = extractPos(quad); + float depthOffset = extractFaceIndentation(faceData); + cornerPos += swizzelDataAxis(face>>1, vec3(faceSize.xz, mix(depthOffset, 1-depthOffset, float(face&1u)))); + gl_MeshVerticesNV[V|0].gl_Position = MVP*vec4(cornerPos*lodScale+origin, 1.0); + gl_MeshVerticesNV[V|1].gl_Position = MVP*vec4((cornerPos+swizzelDataAxis(face>>1,vec3(0,cQuadSize.y,0)))*lodScale+origin, 1.0); + gl_MeshVerticesNV[V|2].gl_Position = MVP*vec4((cornerPos+swizzelDataAxis(face>>1,vec3(cQuadSize, 0)))*lodScale+origin, 1.0); + gl_MeshVerticesNV[V|3].gl_Position = MVP*vec4((cornerPos+swizzelDataAxis(face>>1,vec3(cQuadSize.x,0,0)))*lodScale+origin, 1.0); + + if (gl_LocalInvocationID.x == 0) { + //Remaining quads in workgroup + gl_PrimitiveCountNV = min(uint(int(meta>>4)-int(gl_WorkGroupID.x<<4))<<1, 32);//2 primatives per quad + } +} \ No newline at end of file diff --git a/src/main/resources/assets/voxy/shaders/lod/nvmesh/translucent.task b/src/main/resources/assets/voxy/shaders/lod/nvmesh/translucent.task new file mode 100644 index 00000000..eba6d055 --- /dev/null +++ b/src/main/resources/assets/voxy/shaders/lod/nvmesh/translucent.task @@ -0,0 +1,49 @@ +#version 460 + +#extension GL_ARB_shading_language_include : enable +#pragma optionNV(unroll all) +#define UNROLL_LOOP + +#extension GL_NV_mesh_shader : require +#extension GL_NV_gpu_shader5 : require +#extension GL_ARB_gpu_shader_int64 : require + +#import +#import +#line 12 + +#define MESH_WORKLOAD_PER_INVOCATION 16 + +layout(local_size_x=1) in; + +taskNV out Task { + vec3 origin;//Offset to camera in world space (already multiplied by lod level) + uint baseOffset;//Base offset into the quad data buffer + uint meta;//First 4 bits is lod level, remaining is quadCount +} task; + +void main() { + uint sectionId = gl_WorkGroupID.x; + bool visibleLastFrame = visibilityData[sectionId] == frameId; + + //If it wasnt visible last frame then dont render this frame ** (do temporal coherance) + if (!visibleLastFrame) { + gl_TaskCountNV = 0; + return; + } + SectionMeta meta = sectionData[sectionId]; + uint lodLvl = extractDetail(meta); + ivec3 lodPos= extractPosition(meta); + //Relative position to camera with resepct to lod level to check for visibility bits + ivec3 cpos = lodPos-(baseSectionPos>>lodLvl); + //Relative position to camera + task.origin = vec3(((lodPos<