diff --git a/src/main/java/me/cortex/voxy/client/config/VoxyConfig.java b/src/main/java/me/cortex/voxy/client/config/VoxyConfig.java index 47f671d4..c8f1bb21 100644 --- a/src/main/java/me/cortex/voxy/client/config/VoxyConfig.java +++ b/src/main/java/me/cortex/voxy/client/config/VoxyConfig.java @@ -4,6 +4,7 @@ import com.google.gson.FieldNamingPolicy; import com.google.gson.Gson; import com.google.gson.GsonBuilder; import net.fabricmc.loader.api.FabricLoader; +import org.lwjgl.opengl.GL; import java.io.FileReader; import java.io.IOException; @@ -28,6 +29,7 @@ public class VoxyConfig { public int ingestThreads = 2; public int savingThreads = 4; public int renderThreads = 5; + public boolean useMeshShaderIfPossible = true; public static VoxyConfig loadOrCreate() { @@ -58,4 +60,8 @@ public class VoxyConfig { .resolve("voxy-config.json"); } + public boolean useMeshShaders() { + var cap = GL.getCapabilities(); + return this.useMeshShaderIfPossible && cap.GL_NV_mesh_shader && cap.GL_NV_representative_fragment_test; + } } diff --git a/src/main/java/me/cortex/voxy/client/config/VoxyConfigScreenFactory.java b/src/main/java/me/cortex/voxy/client/config/VoxyConfigScreenFactory.java index 195371d5..f8b04712 100644 --- a/src/main/java/me/cortex/voxy/client/config/VoxyConfigScreenFactory.java +++ b/src/main/java/me/cortex/voxy/client/config/VoxyConfigScreenFactory.java @@ -97,6 +97,12 @@ public class VoxyConfigScreenFactory implements ModMenuApi { .setDefaultValue(DEFAULT.renderDistance) .build()); + category.addEntry(entryBuilder.startBooleanToggle(Text.translatable("voxy.config.general.nvmesh"), config.useMeshShaderIfPossible) + .setTooltip(Text.translatable("voxy.config.general.nvmesh.tooltip")) + .setSaveConsumer(val -> config.useMeshShaderIfPossible = val) + .setDefaultValue(DEFAULT.useMeshShaderIfPossible) + .build()); + //category.addEntry(entryBuilder.startIntSlider(Text.translatable("voxy.config.general.compression"), config.savingCompressionLevel, 1, 21) // .setTooltip(Text.translatable("voxy.config.general.compression.tooltip")) // .setSaveConsumer(val -> config.savingCompressionLevel = val) diff --git a/src/main/java/me/cortex/voxy/client/core/VoxelCore.java b/src/main/java/me/cortex/voxy/client/core/VoxelCore.java index 7d16f937..158dfa55 100644 --- a/src/main/java/me/cortex/voxy/client/core/VoxelCore.java +++ b/src/main/java/me/cortex/voxy/client/core/VoxelCore.java @@ -62,7 +62,11 @@ public class VoxelCore { //Trigger the shared index buffer loading SharedIndexBuffer.INSTANCE.id(); - this.renderer = new Gl46FarWorldRenderer(VoxyConfig.CONFIG.geometryBufferSize, VoxyConfig.CONFIG.maxSections); + if (VoxyConfig.CONFIG.useMeshShaders()) { + this.renderer = new NvMeshFarWorldRenderer(VoxyConfig.CONFIG.geometryBufferSize, VoxyConfig.CONFIG.maxSections); + } else { + this.renderer = new Gl46FarWorldRenderer(VoxyConfig.CONFIG.geometryBufferSize, VoxyConfig.CONFIG.maxSections); + } this.viewportSelector = new ViewportSelector<>(this.renderer::createViewport); System.out.println("Renderer initialized"); @@ -168,6 +172,7 @@ public class VoxelCore { //fb.bind(); var projection = computeProjectionMat(); + //var projection = RenderSystem.getProjectionMatrix();//computeProjectionMat(); var viewport = this.viewportSelector.getViewport(); viewport.setProjection(projection).setModelView(matrices.peek().getPositionMatrix()).setCamera(cameraX, cameraY, cameraZ); diff --git a/src/main/java/me/cortex/voxy/client/core/gl/shader/Shader.java b/src/main/java/me/cortex/voxy/client/core/gl/shader/Shader.java index e88044f8..a3f3814d 100644 --- a/src/main/java/me/cortex/voxy/client/core/gl/shader/Shader.java +++ b/src/main/java/me/cortex/voxy/client/core/gl/shader/Shader.java @@ -102,7 +102,7 @@ public class Shader extends TrackedObject { if (result != GL20C.GL_TRUE) { GL20C.glDeleteShader(shader); - throw new RuntimeException("Shader compilation failed, see log for details"); + throw new RuntimeException("Shader compilation failed of type " + type.name() + ", see log for details"); } return shader; diff --git a/src/main/java/me/cortex/voxy/client/core/rendering/AbstractFarWorldRenderer.java b/src/main/java/me/cortex/voxy/client/core/rendering/AbstractFarWorldRenderer.java index 347be8a7..60add36a 100644 --- a/src/main/java/me/cortex/voxy/client/core/rendering/AbstractFarWorldRenderer.java +++ b/src/main/java/me/cortex/voxy/client/core/rendering/AbstractFarWorldRenderer.java @@ -146,6 +146,8 @@ public abstract class AbstractFarWorldRenderer { public void addDebugData(List debug) { this.models.addDebugInfo(debug); + debug.add("Geometry buffer usage: " + ((float)Math.round((this.geometry.getGeometryBufferUsage()*100000))/1000) + "%"); + debug.add("Render Sections: " + this.geometry.getSectionCount()); } public void shutdown() { diff --git a/src/main/java/me/cortex/voxy/client/core/rendering/Gl46FarWorldRenderer.java b/src/main/java/me/cortex/voxy/client/core/rendering/Gl46FarWorldRenderer.java index 13d5867e..6bd98f4d 100644 --- a/src/main/java/me/cortex/voxy/client/core/rendering/Gl46FarWorldRenderer.java +++ b/src/main/java/me/cortex/voxy/client/core/rendering/Gl46FarWorldRenderer.java @@ -215,7 +215,7 @@ public class Gl46FarWorldRenderer extends AbstractFarWorldRenderer } protected Gl46Viewport createViewport0() { - return new Gl46Viewport(this, this.maxSections); + return new Gl46Viewport(this); } @Override @@ -227,11 +227,4 @@ public class Gl46FarWorldRenderer extends AbstractFarWorldRenderer this.glCommandBuffer.free(); this.glCommandCountBuffer.free(); } - - @Override - public void addDebugData(List debug) { - super.addDebugData(debug); - debug.add("Geometry buffer usage: " + ((float)Math.round((this.geometry.getGeometryBufferUsage()*100000))/1000) + "%"); - debug.add("Render Sections: " + this.geometry.getSectionCount()); - } } diff --git a/src/main/java/me/cortex/voxy/client/core/rendering/Gl46Viewport.java b/src/main/java/me/cortex/voxy/client/core/rendering/Gl46Viewport.java index 2e73be0a..d46b7975 100644 --- a/src/main/java/me/cortex/voxy/client/core/rendering/Gl46Viewport.java +++ b/src/main/java/me/cortex/voxy/client/core/rendering/Gl46Viewport.java @@ -10,9 +10,9 @@ import static org.lwjgl.opengl.GL45C.glClearNamedBufferData; public class Gl46Viewport extends Viewport { GlBuffer visibilityBuffer; - public Gl46Viewport(Gl46FarWorldRenderer renderer, int maxSections) { + public Gl46Viewport(Gl46FarWorldRenderer renderer) { super(renderer); - this.visibilityBuffer = new GlBuffer(maxSections*4L); + this.visibilityBuffer = new GlBuffer(renderer.maxSections*4L); glClearNamedBufferData(this.visibilityBuffer.id, GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE, new int[1]); } diff --git a/src/main/java/me/cortex/voxy/client/core/rendering/NvMeshFarWorldRenderer.java b/src/main/java/me/cortex/voxy/client/core/rendering/NvMeshFarWorldRenderer.java new file mode 100644 index 00000000..7fdaf9a1 --- /dev/null +++ b/src/main/java/me/cortex/voxy/client/core/rendering/NvMeshFarWorldRenderer.java @@ -0,0 +1,153 @@ +package me.cortex.voxy.client.core.rendering; + +import me.cortex.voxy.client.core.gl.shader.Shader; +import me.cortex.voxy.client.core.gl.shader.ShaderType; +import me.cortex.voxy.client.core.rendering.util.UploadStream; +import me.cortex.voxy.client.mixin.joml.AccessFrustumIntersection; +import net.minecraft.client.render.Camera; +import net.minecraft.client.render.Frustum; +import net.minecraft.client.render.RenderLayer; +import org.joml.Matrix4f; +import org.joml.Vector3f; +import org.lwjgl.opengl.GL11C; +import org.lwjgl.system.MemoryUtil; + +import java.util.List; + +import static org.lwjgl.opengl.ARBIndirectParameters.GL_PARAMETER_BUFFER_ARB; +import static org.lwjgl.opengl.GL11.*; +import static org.lwjgl.opengl.GL15.GL_ELEMENT_ARRAY_BUFFER; +import static org.lwjgl.opengl.GL15.glBindBuffer; +import static org.lwjgl.opengl.GL30.glBindBufferBase; +import static org.lwjgl.opengl.GL30.glBindVertexArray; +import static org.lwjgl.opengl.GL31.GL_UNIFORM_BUFFER; +import static org.lwjgl.opengl.GL31.glDrawElementsInstanced; +import static org.lwjgl.opengl.GL33.glBindSampler; +import static org.lwjgl.opengl.GL40C.GL_DRAW_INDIRECT_BUFFER; +import static org.lwjgl.opengl.GL42.GL_FRAMEBUFFER_BARRIER_BIT; +import static org.lwjgl.opengl.GL42.glMemoryBarrier; +import static org.lwjgl.opengl.GL43.GL_SHADER_STORAGE_BARRIER_BIT; +import static org.lwjgl.opengl.GL43.GL_SHADER_STORAGE_BUFFER; +import static org.lwjgl.opengl.GL45.glBindTextureUnit; +import static org.lwjgl.opengl.NVMeshShader.glDrawMeshTasksNV; +import static org.lwjgl.opengl.NVRepresentativeFragmentTest.GL_REPRESENTATIVE_FRAGMENT_TEST_NV; + +public class NvMeshFarWorldRenderer extends AbstractFarWorldRenderer { + private final Shader terrain = Shader.make() + .add(ShaderType.TASK, "voxy:lod/nvmesh/primary.task") + .add(ShaderType.MESH, "voxy:lod/nvmesh/primary.mesh") + .add(ShaderType.FRAGMENT, "voxy:lod/nvmesh/primary.frag") + .compile(); + + private final Shader cull = Shader.make() + .add(ShaderType.VERTEX, "voxy:lod/nvmesh/cull.vert") + .add(ShaderType.FRAGMENT, "voxy:lod/nvmesh/cull.frag") + .compile(); + + public NvMeshFarWorldRenderer(int geometrySize, int maxSections) { + super(geometrySize, maxSections); + } + + + private void updateUniform(NvMeshViewport viewport) { + long ptr = UploadStream.INSTANCE.upload(this.uniformBuffer, 0, this.uniformBuffer.size()); + + var mat = new Matrix4f(viewport.projection).mul(viewport.modelView); + mat.getToAddress(ptr); ptr += 4*4*4; + var innerTranslation = new Vector3f((float) (viewport.cameraX-(this.sx<<5)), (float) (viewport.cameraY-(this.sy<<5)), (float) (viewport.cameraZ-(this.sz<<5))); + MemoryUtil.memPutInt(ptr, this.sx); ptr += 4; + MemoryUtil.memPutInt(ptr, this.sy); ptr += 4; + MemoryUtil.memPutInt(ptr, this.sz); ptr += 4; + MemoryUtil.memPutInt(ptr, this.geometry.getSectionCount()); ptr += 4; + innerTranslation.getToAddress(ptr); ptr += 4*3; + MemoryUtil.memPutInt(ptr, viewport.frameId++); ptr += 4; + } + + private void bindResources(NvMeshViewport viewport) { + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, SharedIndexBuffer.INSTANCE.id()); + glBindBufferBase(GL_UNIFORM_BUFFER, 0, this.uniformBuffer.id); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, this.geometry.geometryId()); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, this.geometry.metaId()); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 3, viewport.visibilityBuffer.id); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 4, this.models.getBufferId()); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 5, this.models.getColourBufferId()); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 6, this.lightDataBuffer.id);//Lighting LUT + + //Bind the texture atlas + glBindSampler(0, this.models.getSamplerId()); + glBindTextureUnit(0, this.models.getTextureId()); + } + + @Override + public void renderFarAwayOpaque(NvMeshViewport viewport) { + {//TODO: move all this code into a common super method renderFarAwayTranslucent and make the current method renderFarAwayTranslucent0 + if (this.geometry.getSectionCount() == 0) { + return; + } + + {//Mark all of the updated sections as being visible from last frame + for (int id : this.updatedSectionIds) { + long ptr = UploadStream.INSTANCE.upload(viewport.visibilityBuffer, id * 4L, 4); + MemoryUtil.memPutInt(ptr, viewport.frameId - 1);//(visible from last frame) + } + } + } + + glDisable(GL_BLEND); + glEnable(GL_DEPTH_TEST); + + //Update and upload data + this.updateUniform(viewport); + UploadStream.INSTANCE.commit(); + + + this.terrain.bind(); + + RenderLayer.getCutoutMipped().startDrawing(); + + glBindVertexArray(AbstractFarWorldRenderer.STATIC_VAO); + this.bindResources(viewport); + + glDisable(GL_CULL_FACE); + glDrawMeshTasksNV(0, this.geometry.getSectionCount()); + glEnable(GL_CULL_FACE); + + glMemoryBarrier(GL_FRAMEBUFFER_BARRIER_BIT|GL_SHADER_STORAGE_BARRIER_BIT); + + this.cull.bind(); + this.bindResources(viewport); + glColorMask(false, false, false, false); + glDepthMask(false); + glEnable(GL_REPRESENTATIVE_FRAGMENT_TEST_NV); + glDrawElementsInstanced(GL_TRIANGLES, 6 * 2 * 3, GL_UNSIGNED_BYTE, (1 << 16) * 6 * 2, this.geometry.getSectionCount()); + glDisable(GL_REPRESENTATIVE_FRAGMENT_TEST_NV); + glDepthMask(true); + glColorMask(true, true, true, true); + + glBindVertexArray(0); + glBindSampler(0, 0); + glBindTextureUnit(0, 0); + RenderLayer.getCutoutMipped().endDrawing(); + } + + @Override + public void renderFarAwayTranslucent(NvMeshViewport viewport) { + if (this.geometry.getSectionCount()==0) { + return; + } + //TODO: make a different task shader for translucent + } + + @Override + protected NvMeshViewport createViewport0() { + return new NvMeshViewport(this); + } + + + @Override + public void shutdown() { + super.shutdown(); + this.terrain.free(); + this.cull.free(); + } +} diff --git a/src/main/java/me/cortex/voxy/client/core/rendering/NvMeshViewport.java b/src/main/java/me/cortex/voxy/client/core/rendering/NvMeshViewport.java new file mode 100644 index 00000000..4ce78faf --- /dev/null +++ b/src/main/java/me/cortex/voxy/client/core/rendering/NvMeshViewport.java @@ -0,0 +1,21 @@ +package me.cortex.voxy.client.core.rendering; + +import me.cortex.voxy.client.core.gl.GlBuffer; + +import static org.lwjgl.opengl.GL30C.GL_R8UI; +import static org.lwjgl.opengl.GL30C.GL_RED_INTEGER; +import static org.lwjgl.opengl.GL42.GL_UNSIGNED_BYTE; +import static org.lwjgl.opengl.GL45C.glClearNamedBufferData; + +public class NvMeshViewport extends Viewport { + GlBuffer visibilityBuffer; + public NvMeshViewport(NvMeshFarWorldRenderer renderer) { + super(renderer); + this.visibilityBuffer = new GlBuffer(renderer.maxSections*4L); + glClearNamedBufferData(this.visibilityBuffer.id, GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE, new int[1]); + } + + protected void delete0() { + this.visibilityBuffer.free(); + } +} diff --git a/src/main/java/me/cortex/voxy/common/config/Serialization.java b/src/main/java/me/cortex/voxy/common/config/Serialization.java index 83374aa1..19fcc86a 100644 --- a/src/main/java/me/cortex/voxy/common/config/Serialization.java +++ b/src/main/java/me/cortex/voxy/common/config/Serialization.java @@ -97,7 +97,7 @@ public class Serialization { var path = FabricLoader.getInstance().getModContainer("voxy").get().getRootPaths().get(0); clazzs.addAll(collectAllClasses(path, BASE_SEARCH_PACKAGE)); clazzs.addAll(collectAllClasses(BASE_SEARCH_PACKAGE)); - + int count = 0; outer: for (var clzName : clazzs) { if (!clzName.toLowerCase().contains("config")) { @@ -132,6 +132,7 @@ public class Serialization { System.err.println("WARNING: Config class " + clzName + " doesnt contain a getConfigTypeName and thus wont be serializable"); continue outer; } + count++; String name = (String) nameMethod.invoke(null); serializers.computeIfAbsent(clz, GsonConfigSerialization::new) .register(name, (Class) original); @@ -151,6 +152,7 @@ public class Serialization { } GSON = builder.create(); + System.out.println("Registered " + count + " config types"); } private static List collectAllClasses(String pack) { diff --git a/src/main/resources/assets/voxy/lang/en_us.json b/src/main/resources/assets/voxy/lang/en_us.json index 2cac1f8d..5de6eec3 100644 --- a/src/main/resources/assets/voxy/lang/en_us.json +++ b/src/main/resources/assets/voxy/lang/en_us.json @@ -17,6 +17,8 @@ "voxy.config.general.maxSections.tooltip": "The max number of sections the renderer can contain", "voxy.config.general.renderDistance": "Render Distance", "voxy.config.general.renderDistance.tooltip": "The render distance in chunks (set to -1 to disable chunk unloading)", + "voxy.config.general.nvmesh": "Use nvidia mesh shaders", + "voxy.config.general.nvmesh.tooltip": "Use nvidia mesh shaders if possible to render LoDs", "voxy.config.threads.ingest": "Ingest", "voxy.config.threads.ingest.tooltip": "How many threads voxy will use for ingesting new chunks", diff --git a/src/main/resources/assets/voxy/shaders/lod/gl46/block_model.glsl b/src/main/resources/assets/voxy/shaders/lod/block_model.glsl similarity index 100% rename from src/main/resources/assets/voxy/shaders/lod/gl46/block_model.glsl rename to src/main/resources/assets/voxy/shaders/lod/block_model.glsl diff --git a/src/main/resources/assets/voxy/shaders/lod/gl46/cmdgen.comp b/src/main/resources/assets/voxy/shaders/lod/gl46/cmdgen.comp index 6fa19b79..4bfa1479 100644 --- a/src/main/resources/assets/voxy/shaders/lod/gl46/cmdgen.comp +++ b/src/main/resources/assets/voxy/shaders/lod/gl46/cmdgen.comp @@ -3,10 +3,10 @@ layout(local_size_x = 128) in; -#import +#import #import #import -#import +#import #line 11 //https://github.com/KhronosGroup/GLSL/blob/master/extensions/ext/GL_EXT_shader_16bit_storage.txt diff --git a/src/main/resources/assets/voxy/shaders/lod/gl46/cull/raster.vert b/src/main/resources/assets/voxy/shaders/lod/gl46/cull/raster.vert index afa2877c..28cdb5a8 100644 --- a/src/main/resources/assets/voxy/shaders/lod/gl46/cull/raster.vert +++ b/src/main/resources/assets/voxy/shaders/lod/gl46/cull/raster.vert @@ -2,7 +2,7 @@ #extension GL_ARB_gpu_shader_int64 : enable #define VISIBILITY_ACCESS writeonly #import -#import +#import flat out uint id; flat out uint value; diff --git a/src/main/resources/assets/voxy/shaders/lod/gl46/quads.vert b/src/main/resources/assets/voxy/shaders/lod/gl46/quads.vert index add49a11..e87e4b95 100644 --- a/src/main/resources/assets/voxy/shaders/lod/gl46/quads.vert +++ b/src/main/resources/assets/voxy/shaders/lod/gl46/quads.vert @@ -1,9 +1,9 @@ #version 460 core #extension GL_ARB_gpu_shader_int64 : enable -#import +#import #import -#import +#import #line 8 layout(location = 0) out vec2 uv; diff --git a/src/main/resources/assets/voxy/shaders/lod/nvmesh/bindings.glsl b/src/main/resources/assets/voxy/shaders/lod/nvmesh/bindings.glsl new file mode 100644 index 00000000..1572306b --- /dev/null +++ b/src/main/resources/assets/voxy/shaders/lod/nvmesh/bindings.glsl @@ -0,0 +1,64 @@ +struct SectionMeta { + uint posA; + uint posB; + uint AABB; + uint ptr; + uint cntA; + uint cntB; + uint cntC; + uint cntD; +}; + +struct BlockModel { + uint faceData[6]; + uint flagsA; + uint colourTint; + uint _pad[8]; +}; + + +layout(binding = 0) uniform sampler2D blockModelAtlas; + +layout(binding = 0, std140) uniform SceneUniform { + mat4 MVP; + ivec3 baseSectionPos; + int sectionCount; + vec3 cameraSubPos; + uint frameId; +}; + +#define Quad uint64_t +layout(binding = 1, std430) readonly restrict buffer QuadBuffer { + Quad quadData[]; +}; + +layout(binding = 2, std430) readonly restrict buffer SectionBuffer { + SectionMeta sectionData[]; +}; + +#ifndef VISIBILITY_ACCESS +#define VISIBILITY_ACCESS readonly +#endif +layout(binding = 3, std430) VISIBILITY_ACCESS restrict buffer VisibilityBuffer { + uint visibilityData[]; +}; + +layout(binding = 4, std430) readonly restrict buffer ModelBuffer { + BlockModel modelData[]; +}; + +layout(binding = 5, std430) readonly restrict buffer ModelColourBuffer { + uint colourData[]; +}; + +layout(binding = 6, std430) readonly restrict buffer LightingBuffer { + uint lightData[]; +}; + +vec4 getLighting(uint index) { + uvec4 arr = uvec4(lightData[index]); + arr = arr>>uvec4(16,8,0,24); + arr = arr & uvec4(0xFF); + return vec4(arr)*vec4(1.0f/255.0f); +} + diff --git a/src/main/resources/assets/voxy/shaders/lod/nvmesh/cull.frag b/src/main/resources/assets/voxy/shaders/lod/nvmesh/cull.frag new file mode 100644 index 00000000..a542313b --- /dev/null +++ b/src/main/resources/assets/voxy/shaders/lod/nvmesh/cull.frag @@ -0,0 +1,14 @@ +#version 460 core +#extension GL_ARB_gpu_shader_int64 : enable +#define VISIBILITY_ACCESS writeonly +#import +layout(early_fragment_tests) in; + +flat in uint id; +flat in uint value; +//out vec4 colour; + +void main() { + visibilityData[id] = value; + //colour = vec4(float(id&7u)/7, float((id>>3)&7u)/7, float((id>>6)&7u)/7, 1); +} \ No newline at end of file diff --git a/src/main/resources/assets/voxy/shaders/lod/nvmesh/cull.vert b/src/main/resources/assets/voxy/shaders/lod/nvmesh/cull.vert new file mode 100644 index 00000000..6ff9709d --- /dev/null +++ b/src/main/resources/assets/voxy/shaders/lod/nvmesh/cull.vert @@ -0,0 +1,30 @@ +#version 460 core +#extension GL_ARB_gpu_shader_int64 : enable +#define VISIBILITY_ACCESS writeonly +#import +#import + +flat out uint id; +flat out uint value; + +void main() { + uint sid = gl_InstanceID; + + SectionMeta section = sectionData[sid]; + + uint detail = extractDetail(section); + ivec3 ipos = extractPosition(section); + ivec3 aabbOffset = extractAABBOffset(section); + ivec3 size = extractAABBSize(section); + + //Transform ipos with respect to the vertex corner + ivec3 pos = (((ipos<>2)&1, (gl_VertexID>>1)&1)*(size+2))*(1<>1)&1u)*-4.0); + if ((flags&1u) == 1 && colour.a <= 0.25f) { + discard; + } + + //Conditional tinting, TODO: FIXME: REPLACE WITH MASK OR SOMETHING, like encode data into the top bit of alpha + if ((flags&(1u<<2)) != 0 && abs(colour.r-colour.g) < 0.02f && abs(colour.g-colour.b) < 0.02f) { + colour *= conditionalTinting; + } + + outColour = (colour * tinting) + addin; +} diff --git a/src/main/resources/assets/voxy/shaders/lod/nvmesh/primary.mesh b/src/main/resources/assets/voxy/shaders/lod/nvmesh/primary.mesh index e69de29b..33a8810d 100644 --- a/src/main/resources/assets/voxy/shaders/lod/nvmesh/primary.mesh +++ b/src/main/resources/assets/voxy/shaders/lod/nvmesh/primary.mesh @@ -0,0 +1,243 @@ +#version 460 + +#extension GL_ARB_shading_language_include : enable +#pragma optionNV(unroll all) +#define UNROLL_LOOP + +#extension GL_NV_mesh_shader : require +#extension GL_NV_gpu_shader5 : require +#extension GL_ARB_gpu_shader_int64 : require + +#import +#import +#import +#line 13 + +layout(local_size_x = 16) in; +layout(triangles, max_vertices=64, max_primitives=32) out; + +layout(location=1) out Interpolants { + vec2 uv; +} i_out[]; + +layout(location=2) perprimitiveNV out PerPrimData { + vec2 baseUV; + vec4 tinting; + vec4 addin; + uint flags; + vec4 conditionalTinting; +} per_prim_out[]; + +void emitIndicies() { + uint primBase = gl_LocalInvocationID.x * 6; + uint vertBase = gl_LocalInvocationID.x<<2; + gl_PrimitiveIndicesNV[primBase+0] = vertBase+0; + gl_PrimitiveIndicesNV[primBase+1] = vertBase+1; + gl_PrimitiveIndicesNV[primBase+2] = vertBase+2; + gl_PrimitiveIndicesNV[primBase+3] = vertBase+2; + gl_PrimitiveIndicesNV[primBase+4] = vertBase+3; + gl_PrimitiveIndicesNV[primBase+5] = vertBase+0; +} + +vec4 uint2vec4RGBA(uint colour) { + return vec4((uvec4(colour)>>uvec4(24,16,8,0))&uvec4(0xFF))/255.0; +} + +//Gets the face offset with respect to the face direction (e.g. some will be + some will be -) +float getDepthOffset(uint faceData, uint face) { + float offset = extractFaceIndentation(faceData); + return offset * (1.0-((int(face)&1)*2.0)); +} + +vec4 getFaceSize(uint faceData) { + float EPSILON = 0.001f; + vec4 faceOffsetsSizes = extractFaceSizes(faceData); + //Expand the quads by a very small amount + faceOffsetsSizes.xz -= vec2(EPSILON); + faceOffsetsSizes.yw += vec2(EPSILON); + + //Make the end relative to the start + faceOffsetsSizes.yw -= faceOffsetsSizes.xz; + + return faceOffsetsSizes; +} + +//TODO: make branchless by using ternaries i think +vec3 swizzelDataAxis(uint axis, vec3 data) { + if (axis == 0) { //Up/down + data = data.xzy; + } + //Not needed, here for readability + //if (axis == 1) {//north/south + // offset = offset.xyz; + //} + if (axis == 2) { //west/east + data = data.zxy; + } + return data; +} + +taskNV in Task { + vec3 origin;//Offset to camera in world space (already multiplied by lod level) + uint baseOffset;//Base offset into the quad data buffer + + //Binary search indexs and data + uvec4 binIa; + uvec4 binIb; + uvec4 binVa; + uvec4 binVb; + + uint meta;//First 4 bits is lod level, remaining is quadCount +}; + +uint getQuadIndex() { + uint gii = gl_GlobalInvocationID.x; + //TODO: replace this with binary search + if (gii < binIa.x) { + return binVa.x + gii + baseOffset; + } else if (gii < binIa.y) { + return binVa.y + (gii - binIa.x) + baseOffset; + } else if (gii < binIa.z) { + return binVa.z + (gii - binIa.y) + baseOffset; + } else if (gii < binIa.w) { + return binVa.w + (gii - binIa.z) + baseOffset; + } else if (gii < binIb.x) { + return binVb.x + (gii - binIa.w) + baseOffset; + } else if (gii < binIb.y) { + return binVb.y + (gii - binIb.x) + baseOffset; + } else if (gii < binIb.z) { + return binVb.z + (gii - binIb.y) + baseOffset; + } else if (gii < binIb.w) { + return binVb.w + (gii - binIb.z) + baseOffset; + } else { + return uint(-1); + } +} + +void main() { + uint idx = getQuadIndex(); + //If its over, dont render + if (idx == uint(-1)) { + return; + } + emitIndicies(); + + uint A = gl_LocalInvocationID.x<<1; + uint B = (gl_LocalInvocationID.x<<1)|1u; + uint V = (gl_LocalInvocationID.x<<2); + + uint lodLvl = meta&0xf; + float lodScale = (1<>8)&0xFFu)*(1.0/(256.0)); + vec2 baseUV = modelUV + (vec2(face>>1, face&1u) * (1.0/(vec2(3.0, 2.0)*256.0))); + //Write out baseUV + per_prim_out[A].baseUV = baseUV; + per_prim_out[B].baseUV = baseUV; + + + + + uint flags = faceHasAlphaCuttout(faceData); + + //We need to have a conditional override based on if the model size is < a full face + quadSize > 1 + flags |= uint(any(greaterThan(quadSize, ivec2(1)))) & faceHasAlphaCuttoutOverride(faceData); + + flags |= uint(!modelHasMipmaps(model))<<1; + + //Compute lighting + vec4 tinting = getLighting(extractLightId(quad)); + + //Apply model colour tinting + uint tintColour = model.colourTint; + if (modelHasBiomeLUT(model)) { + tintColour = colourData[tintColour + extractBiomeId(quad)]; + } + + vec4 conditionalTinting = vec4(0); + if (tintColour != uint(-1)) { + flags |= 1u<<2; + conditionalTinting = uint2vec4RGBA(tintColour).yzwx; + } + + vec4 addin = vec4(0.0); + if (!isTranslucent) { + tinting.w = 0.0; + //Encode the face, the lod level and + uint encodedData = 0; + encodedData |= face; + encodedData |= (lodLvl<<3); + encodedData |= uint(hasAO)<<6; + addin.w = float(encodedData)/255.0; + } + + //Apply face tint + if (isShaded) { + //TODO: make branchless, infact apply ahead of time to the texture itself in ModelManager since that is + // per face + if ((face>>1) == 1) { + tinting.xyz *= 0.8f; + } else if ((face>>1) == 2) { + tinting.xyz *= 0.6f; + } else if (face == 0){ + tinting.xyz *= 0.5f; + } + } + + + //Write out everything + per_prim_out[A].tinting = tinting; + per_prim_out[A].addin = addin; + per_prim_out[A].flags = flags; + per_prim_out[A].conditionalTinting = conditionalTinting; + per_prim_out[B].tinting = tinting; + per_prim_out[B].addin = addin; + per_prim_out[B].flags = flags; + per_prim_out[B].conditionalTinting = conditionalTinting; + + + + + + vec4 faceSize = getFaceSize(faceData); + + vec2 cQuadSize = faceSize.yw + quadSize - 1; + vec2 uv0 = faceSize.xz; + i_out[V|0].uv = uv0; + i_out[V|1].uv = uv0 + vec2(0, cQuadSize.y); + i_out[V|2].uv = uv0 + cQuadSize; + i_out[V|3].uv = uv0 + vec2(cQuadSize.x, 0); + + + + + //Corner position of quad relative to section corner (in 0->32 scale) + vec3 cornerPos = extractPos(quad); + float depthOffset = extractFaceIndentation(faceData); + cornerPos += swizzelDataAxis(face>>1, vec3(faceSize.xz, mix(depthOffset, 1-depthOffset, float(face&1u)))); + gl_MeshVerticesNV[V|0].gl_Position = MVP*vec4(cornerPos*lodScale+origin, 1.0); + gl_MeshVerticesNV[V|1].gl_Position = MVP*vec4((cornerPos+swizzelDataAxis(face>>1,vec3(0,cQuadSize.y,0)))*lodScale+origin, 1.0); + gl_MeshVerticesNV[V|2].gl_Position = MVP*vec4((cornerPos+swizzelDataAxis(face>>1,vec3(cQuadSize, 0)))*lodScale+origin, 1.0); + gl_MeshVerticesNV[V|3].gl_Position = MVP*vec4((cornerPos+swizzelDataAxis(face>>1,vec3(cQuadSize.x,0,0)))*lodScale+origin, 1.0); + + if (gl_LocalInvocationID.x == 0) { + //Remaining quads in workgroup + gl_PrimitiveCountNV = min(uint(int(meta>>4)-int(gl_WorkGroupID.x<<4))<<1, 32);//2 primatives per quad + } +} \ No newline at end of file diff --git a/src/main/resources/assets/voxy/shaders/lod/nvmesh/primary.task b/src/main/resources/assets/voxy/shaders/lod/nvmesh/primary.task index e69de29b..42e29c62 100644 --- a/src/main/resources/assets/voxy/shaders/lod/nvmesh/primary.task +++ b/src/main/resources/assets/voxy/shaders/lod/nvmesh/primary.task @@ -0,0 +1,118 @@ +#version 460 + +#extension GL_ARB_shading_language_include : enable +#pragma optionNV(unroll all) +#define UNROLL_LOOP + +#extension GL_NV_mesh_shader : require +#extension GL_NV_gpu_shader5 : require +#extension GL_ARB_gpu_shader_int64 : require + +#import +#import +#line 12 + +#define MESH_WORKLOAD_PER_INVOCATION 16 + +layout(local_size_x=1) in; + +taskNV out Task { + vec3 origin;//Offset to camera in world space (already multiplied by lod level) + uint baseOffset;//Base offset into the quad data buffer + + //Binary search indexs and data + uvec4 binIa; + uvec4 binIb; + uvec4 binVa; + uvec4 binVb; + + uint meta;//First 4 bits is lod level, remaining is quadCount +} task; + + +void putBinData(inout uint idx, inout uint lastIndex, uint offset, uint cnt) { + uint id = idx++; + if (id < 4) { + task.binIa[id] = lastIndex + cnt; + task.binVa[id] = offset; + } else { + task.binIb[id - 4] = lastIndex + cnt; + task.binVb[id - 4] = offset; + } + lastIndex += cnt; +} + +void main() { + uint sectionId = gl_WorkGroupID.x; + bool visibleLastFrame = visibilityData[sectionId] == (frameId-1); + + //If it wasnt visible last frame then dont render this frame ** (do temporal coherance) + if (!visibleLastFrame) { + gl_TaskCountNV = 0; + return; + } + SectionMeta meta = sectionData[sectionId]; + uint lodLvl = extractDetail(meta); + ivec3 lodPos= extractPosition(meta); + //Relative position to camera with resepct to lod level to check for visibility bits + ivec3 cpos = lodPos-(baseSectionPos>>lodLvl); + //Relative position to camera + task.origin = vec3(((lodPos<>16)&0xFFFF; + if (cnt!=0) { + putBinData(idx, lastIndex, offset, cnt); + } + offset += cnt; + + cnt = meta.cntB &0xFFFF; + if ((cnt!=0) && (cpos.y>-1)) { + putBinData(idx, lastIndex, offset, cnt); + } + offset += cnt; + + cnt = (meta.cntB>>16)&0xFFFF; + if((cnt!=0) && (cpos.y<1 )){ + putBinData(idx, lastIndex, offset, cnt); + } + offset += cnt; + + cnt = meta.cntC &0xFFFF; + if((cnt!=0) && (cpos.z>-1)){ + putBinData(idx, lastIndex, offset, cnt); + } + offset += cnt; + + cnt = (meta.cntC>>16)&0xFFFF; + if((cnt!=0) && (cpos.z<1 )){ + putBinData(idx, lastIndex, offset, cnt); + } + offset += cnt; + + cnt = meta.cntD &0xFFFF; + if((cnt!=0) && (cpos.x>-1)){ + putBinData(idx, lastIndex, offset, cnt); + } + offset += cnt; + + cnt = (meta.cntD>>16)&0xFFFF; + if((cnt!=0) && (cpos.x<1 )){ + putBinData(idx, lastIndex, offset, cnt); + } + offset += cnt; + + + task.meta |= lastIndex<<4; + gl_TaskCountNV = (lastIndex+MESH_WORKLOAD_PER_INVOCATION-1)/MESH_WORKLOAD_PER_INVOCATION; +} \ No newline at end of file diff --git a/src/main/resources/assets/voxy/shaders/lod/gl46/quad_format.glsl b/src/main/resources/assets/voxy/shaders/lod/quad_format.glsl similarity index 100% rename from src/main/resources/assets/voxy/shaders/lod/gl46/quad_format.glsl rename to src/main/resources/assets/voxy/shaders/lod/quad_format.glsl diff --git a/src/main/resources/assets/voxy/shaders/lod/gl46/section.glsl b/src/main/resources/assets/voxy/shaders/lod/section.glsl similarity index 100% rename from src/main/resources/assets/voxy/shaders/lod/gl46/section.glsl rename to src/main/resources/assets/voxy/shaders/lod/section.glsl diff --git a/src/main/resources/assets/voxy/shaders/post/ssao.comp b/src/main/resources/assets/voxy/shaders/post/ssao.comp index ba791e08..e291e1c9 100644 --- a/src/main/resources/assets/voxy/shaders/post/ssao.comp +++ b/src/main/resources/assets/voxy/shaders/post/ssao.comp @@ -19,11 +19,11 @@ vec4 reDeProject(vec3 pos) { vec2 UV = clamp(view.xy*0.5+0.5, 0.0, 1.0); //TODO: sample the colour texture and check if the alpha has the hasAO flag - float depth = texture(depthTex, UV, -4.0f).x; + float depth = texture(depthTex, UV).x; if (depth == 1.0f) { return vec4(-1.0f); } - uint meta = uint(255.0f*texture(colourTex, UV, -4.0f).w); + uint meta = uint(255.0f*texture(colourTex, UV).w); if ((meta>>6)==0) { return vec4(-1.0f); }