From 91e93dea2b703c712aaacf6bbb267f48ee9a1ea9 Mon Sep 17 00:00:00 2001 From: mcrcortex <{ID}+{username}@users.noreply.github.com> Date: Mon, 15 Jul 2024 10:41:04 +1000 Subject: [PATCH] Performance tinkering --- build.gradle | 4 + .../me/cortex/voxy/client/core/VoxelCore.java | 24 +++- .../rendering/Gl46HierarchicalRenderer.java | 125 ++++++++++++++++-- .../Gl46MeshletsFarWorldRenderer.java | 2 + .../voxy/client/core/rendering/Viewport.java | 4 +- .../rendering/building/RenderDataFactory.java | 2 +- .../HierarchicalOcclusionRenderer.java | 58 ++------ .../voxy/client/core/util/IndexUtil.java | 12 +- .../voxy/shaders/lod/gl46mesh/quads.frag | 13 +- .../voxy/shaders/lod/gl46mesh/quads.vert | 28 +++- 10 files changed, 195 insertions(+), 77 deletions(-) diff --git a/build.gradle b/build.gradle index efbdaafc..1a5a6c6b 100644 --- a/build.gradle +++ b/build.gradle @@ -72,6 +72,10 @@ dependencies { modCompileOnly("maven.modrinth:chunky:1.3.138") modRuntimeOnly("maven.modrinth:chunky:1.3.138") + modRuntimeOnly("maven.modrinth:spark:1.10.73-fabric") + modRuntimeOnly("maven.modrinth:fabric-permissions-api:0.3.1") + modRuntimeOnly("maven.modrinth:nsight-loader:1.2.0") + modImplementation('io.github.douira:glsl-transformer:2.0.1') } diff --git a/src/main/java/me/cortex/voxy/client/core/VoxelCore.java b/src/main/java/me/cortex/voxy/client/core/VoxelCore.java index 99398f08..f955f745 100644 --- a/src/main/java/me/cortex/voxy/client/core/VoxelCore.java +++ b/src/main/java/me/cortex/voxy/client/core/VoxelCore.java @@ -44,6 +44,13 @@ import static org.lwjgl.opengl.GL30C.GL_DRAW_FRAMEBUFFER_BINDING; //There is strict forward only dataflow //Ingest -> world engine -> raw render data -> render data + + + +//REDESIGN THIS PIECE OF SHIT SPAGETTY SHIT FUCK +// like Get rid of interactor and renderer being seperate just fucking put them together +// fix the callback bullshit spagetti +//REMOVE setRenderGen like holy hell public class VoxelCore { private final WorldEngine world; private final RenderGenerationService renderGen; @@ -67,9 +74,16 @@ public class VoxelCore { Capabilities.init();//Ensure clinit is called this.modelManager = new ModelManager(16); this.renderer = this.createRenderBackend(); + System.out.println("Using " + this.renderer.getClass().getSimpleName()); this.viewportSelector = new ViewportSelector<>(this.renderer::createViewport); + + //Ungodly hacky code + if (this.renderer instanceof AbstractRenderWorldInteractor) { + this.interactor = (AbstractRenderWorldInteractor) this.renderer; + } else { + this.interactor = new DefaultRenderWorldInteractor(cfg, this.world, this.renderer); + } System.out.println("Renderer initialized"); - this.interactor = new DefaultRenderWorldInteractor(cfg, this.world, this.renderer); this.renderGen = new RenderGenerationService(this.world, this.modelManager, VoxyConfig.CONFIG.renderThreads, this.interactor::processBuildResult, this.renderer.generateMeshlets()); this.world.setDirtyCallback(this.interactor::sectionUpdated); @@ -101,16 +115,15 @@ public class VoxelCore { System.out.println("Voxy core initialized"); } - private AbstractFarWorldRenderer createRenderBackend() { + private IRenderInterface createRenderBackend() { if (false) { - System.out.println("Using Gl46MeshletFarWorldRendering"); + return new Gl46HierarchicalRenderer(this.modelManager); + } else if (true) { return new Gl46MeshletsFarWorldRenderer(this.modelManager, VoxyConfig.CONFIG.geometryBufferSize, VoxyConfig.CONFIG.maxSections); } else { if (VoxyConfig.CONFIG.useMeshShaders()) { - System.out.println("Using NvMeshFarWorldRenderer"); return new NvMeshFarWorldRenderer(this.modelManager, VoxyConfig.CONFIG.geometryBufferSize, VoxyConfig.CONFIG.maxSections); } else { - System.out.println("Using Gl46FarWorldRenderer"); return new Gl46FarWorldRenderer(this.modelManager, VoxyConfig.CONFIG.geometryBufferSize, VoxyConfig.CONFIG.maxSections); } } @@ -124,6 +137,7 @@ public class VoxelCore { boolean firstTime = true; public void renderSetup(Frustum frustum, Camera camera) { if (this.firstTime) { + //TODO: remove initPosition this.interactor.initPosition(camera.getBlockPos().getX(), camera.getBlockPos().getZ()); this.firstTime = false; //this.renderTracker.addLvl0(0,6,0); diff --git a/src/main/java/me/cortex/voxy/client/core/rendering/Gl46HierarchicalRenderer.java b/src/main/java/me/cortex/voxy/client/core/rendering/Gl46HierarchicalRenderer.java index 516228da..aee12f30 100644 --- a/src/main/java/me/cortex/voxy/client/core/rendering/Gl46HierarchicalRenderer.java +++ b/src/main/java/me/cortex/voxy/client/core/rendering/Gl46HierarchicalRenderer.java @@ -1,11 +1,20 @@ package me.cortex.voxy.client.core.rendering; +import me.cortex.voxy.client.core.AbstractRenderWorldInteractor; import me.cortex.voxy.client.core.gl.GlBuffer; +import me.cortex.voxy.client.core.gl.shader.PrintfInjector; import me.cortex.voxy.client.core.gl.shader.Shader; import me.cortex.voxy.client.core.gl.shader.ShaderType; +import me.cortex.voxy.client.core.model.ModelManager; +import me.cortex.voxy.client.core.rendering.building.BuiltSection; import me.cortex.voxy.client.core.rendering.building.RenderDataFactory; +import me.cortex.voxy.client.core.rendering.building.RenderGenerationService; +import me.cortex.voxy.client.core.rendering.hierarchical.HierarchicalOcclusionRenderer; +import me.cortex.voxy.client.core.rendering.hierarchical.INodeInteractor; +import me.cortex.voxy.client.core.rendering.hierarchical.MeshManager; import me.cortex.voxy.client.core.rendering.util.UploadStream; import me.cortex.voxy.client.mixin.joml.AccessFrustumIntersection; +import me.cortex.voxy.common.world.WorldSection; import me.cortex.voxy.common.world.other.Mapper; import net.minecraft.client.MinecraftClient; import net.minecraft.client.render.Camera; @@ -16,6 +25,7 @@ import org.joml.Vector3f; import org.lwjgl.system.MemoryUtil; import java.util.List; +import java.util.function.Consumer; import static org.lwjgl.opengl.ARBDirectStateAccess.glTextureParameteri; import static org.lwjgl.opengl.GL11.*; @@ -35,10 +45,41 @@ import static org.lwjgl.opengl.GL43.*; import static org.lwjgl.opengl.GL45.glBindTextureUnit; import static org.lwjgl.opengl.GL45.nglClearNamedBufferSubData; -public class Gl46HierarchicalRenderer implements IRenderInterface { - @Override - public Viewport createViewport() { - return null; +public class Gl46HierarchicalRenderer implements IRenderInterface, AbstractRenderWorldInteractor { + private final HierarchicalOcclusionRenderer sectionSelector; + private final MeshManager meshManager = new MeshManager(); + private final PrintfInjector printf = new PrintfInjector(100000, 10, System.out::println); + private final GlBuffer renderSections = new GlBuffer(100_000 * 4 + 4).zero(); + + + private final ModelManager modelManager; + private RenderGenerationService sectionGenerationService; + private Consumer resultConsumer; + + public Gl46HierarchicalRenderer(ModelManager model) { + this.modelManager = model; + + this.sectionSelector = new HierarchicalOcclusionRenderer(new INodeInteractor() { + @Override + public void watchUpdates(long pos) { + + } + + @Override + public void unwatchUpdates(long pos) { + + } + + @Override + public void requestMesh(long pos) { + + } + + @Override + public void setMeshUpdateCallback(Consumer mesh) { + Gl46HierarchicalRenderer.this.resultConsumer = mesh; + } + }, this.meshManager, this.printf); } @Override @@ -47,19 +88,36 @@ public class Gl46HierarchicalRenderer implements IRenderInterface { } @Override - public void renderFarAwayOpaque(Viewport viewport) { + public void renderFarAwayOpaque(Gl46HierarchicalViewport viewport) { + //Render terrain from previous frame (renderSections) + + + + {//Run the hierarchical selector over the buffer to generate the set of render sections + var i = new int[1]; + glGetFramebufferAttachmentParameteriv(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_FRAMEBUFFER_ATTACHMENT_OBJECT_NAME, i); + this.sectionSelector.doHierarchicalTraversalSelection(viewport, i[0], this.renderSections); + } + + + this.printf.download(); + } + + @Override + public void renderFarAwayTranslucent(Gl46HierarchicalViewport viewport) { } @Override - public void renderFarAwayTranslucent(Viewport viewport) { + public void addDebugData(List debug) { } - @Override - public void shutdown() { - } + + + + @Override public void addBlockState(Mapper.StateEntry stateEntry) { @@ -71,13 +129,60 @@ public class Gl46HierarchicalRenderer implements IRenderInterface { } + + + + @Override + public void processBuildResult(BuiltSection section) { + + } + + @Override + public void sectionUpdated(WorldSection worldSection) { + + } + + + + + + @Override + public void initPosition(int x, int z) { + + } + + @Override + public void setCenter(int x, int y, int z) { + + } + + + + + + @Override public boolean generateMeshlets() { return false; } @Override - public void addDebugData(List debug) { + public void setRenderGen(RenderGenerationService renderService) { + this.sectionGenerationService = renderService; + } + @Override + public Gl46HierarchicalViewport createViewport() { + return new Gl46HierarchicalViewport(this); + } + + + + + @Override + public void shutdown() { + this.meshManager.free(); + this.sectionSelector.free(); + this.printf.free(); } } diff --git a/src/main/java/me/cortex/voxy/client/core/rendering/Gl46MeshletsFarWorldRenderer.java b/src/main/java/me/cortex/voxy/client/core/rendering/Gl46MeshletsFarWorldRenderer.java index 790316dd..a35a6fbd 100644 --- a/src/main/java/me/cortex/voxy/client/core/rendering/Gl46MeshletsFarWorldRenderer.java +++ b/src/main/java/me/cortex/voxy/client/core/rendering/Gl46MeshletsFarWorldRenderer.java @@ -85,6 +85,7 @@ public class Gl46MeshletsFarWorldRenderer extends AbstractFarWorldRenderer> { - int width; - int height; + public int width; + public int height; int frameId; Matrix4f projection; Matrix4f modelView; diff --git a/src/main/java/me/cortex/voxy/client/core/rendering/building/RenderDataFactory.java b/src/main/java/me/cortex/voxy/client/core/rendering/building/RenderDataFactory.java index 9e0f8de3..c736114d 100644 --- a/src/main/java/me/cortex/voxy/client/core/rendering/building/RenderDataFactory.java +++ b/src/main/java/me/cortex/voxy/client/core/rendering/building/RenderDataFactory.java @@ -55,7 +55,7 @@ public class RenderDataFactory { // can do funny stuff like double rendering private static final boolean USE_UINT64 = Capabilities.INSTANCE.INT64_t; - public static final int QUADS_PER_MESHLET = 62; + public static final int QUADS_PER_MESHLET = 14; private static void writePos(long ptr, long pos) { if (USE_UINT64) { MemoryUtil.memPutLong(ptr, pos); diff --git a/src/main/java/me/cortex/voxy/client/core/rendering/hierarchical/HierarchicalOcclusionRenderer.java b/src/main/java/me/cortex/voxy/client/core/rendering/hierarchical/HierarchicalOcclusionRenderer.java index 88d2d746..0a641d74 100644 --- a/src/main/java/me/cortex/voxy/client/core/rendering/hierarchical/HierarchicalOcclusionRenderer.java +++ b/src/main/java/me/cortex/voxy/client/core/rendering/hierarchical/HierarchicalOcclusionRenderer.java @@ -4,6 +4,7 @@ import me.cortex.voxy.client.core.gl.GlBuffer; import me.cortex.voxy.client.core.gl.shader.PrintfInjector; import me.cortex.voxy.client.core.gl.shader.Shader; import me.cortex.voxy.client.core.gl.shader.ShaderType; +import me.cortex.voxy.client.core.rendering.Gl46HierarchicalViewport; import me.cortex.voxy.client.core.rendering.HiZBuffer; import me.cortex.voxy.client.core.rendering.building.BuiltSection; import me.cortex.voxy.client.core.rendering.hierarchical.INodeInteractor; @@ -22,47 +23,25 @@ import static org.lwjgl.opengl.GL43.glDispatchCompute; import static org.lwjgl.opengl.GL45.glBindTextureUnit; public class HierarchicalOcclusionRenderer { - private PrintfInjector printf = new PrintfInjector(100000, 10, System.out::println); - - private final MeshManager meshManager = new MeshManager(); - private final NodeManager nodeManager = new NodeManager(new INodeInteractor() { - @Override - public void watchUpdates(long pos) { - - } - - @Override - public void unwatchUpdates(long pos) { - - } - - @Override - public void requestMesh(long pos) { - - } - - @Override - public void setMeshUpdateCallback(Consumer mesh) { - - } - }, this.meshManager); - private final HiZBuffer hiz = new HiZBuffer(); private final int hizSampler = glGenSamplers(); - private final Shader hierarchicalTraversal = Shader.make(this.printf) - .add(ShaderType.COMPUTE, "voxy:lod/hierarchical/traversal.comp") - .compile(); + private final NodeManager nodeManager; + private final Shader hierarchicalTraversal; + private final PrintfInjector printf; private final GlBuffer nodeQueue; - private final GlBuffer renderQueue; private final GlBuffer uniformBuffer; - public HierarchicalOcclusionRenderer() { + public HierarchicalOcclusionRenderer(INodeInteractor interactor, MeshManager mesh, PrintfInjector printf) { + this.nodeManager = new NodeManager(interactor, mesh); this.nodeQueue = new GlBuffer(1000000*4+4).zero(); - this.renderQueue = new GlBuffer(1000000*4+4).zero(); this.uniformBuffer = new GlBuffer(1024).zero(); + this.printf = printf; + this.hierarchicalTraversal = Shader.make(printf) + .add(ShaderType.COMPUTE, "voxy:lod/hierarchical/traversal.comp") + .compile(); } private void uploadUniform() { @@ -70,11 +49,12 @@ public class HierarchicalOcclusionRenderer { } - private void doHierarchicalTraversal(int depthBuffer, int width, int height) { + public void doHierarchicalTraversalSelection(Gl46HierarchicalViewport viewport, int depthBuffer, GlBuffer renderSelectionResult) { this.uploadUniform(); this.nodeManager.upload(); + //Make hiz - this.hiz.buildMipChain(depthBuffer, width, height); + this.hiz.buildMipChain(depthBuffer, viewport.width, viewport.height); glMemoryBarrier(GL_PIXEL_BUFFER_BARRIER_BIT); this.hierarchicalTraversal.bind(); @@ -83,7 +63,7 @@ public class HierarchicalOcclusionRenderer { glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, this.nodeManager.nodeBuffer.id); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, this.nodeQueue.id); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 3, this.nodeManager.requestQueue.id); - glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 4, this.renderQueue.id); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 4, renderSelectionResult.id); //Bind the hiz buffer glBindSampler(0, this.hizSampler); @@ -98,20 +78,10 @@ public class HierarchicalOcclusionRenderer { this.nodeManager.download(); } - public void render(int depthBuffer, int width, int height) { - this.doHierarchicalTraversal(depthBuffer, width, height); - - - this.printf.download(); - } - public void free() { this.nodeQueue.free(); - this.renderQueue.free(); - this.printf.free(); this.hiz.free(); this.nodeManager.free(); - this.meshManager.free(); glDeleteSamplers(this.hizSampler); } } diff --git a/src/main/java/me/cortex/voxy/client/core/util/IndexUtil.java b/src/main/java/me/cortex/voxy/client/core/util/IndexUtil.java index 91e4924c..a9a975d4 100644 --- a/src/main/java/me/cortex/voxy/client/core/util/IndexUtil.java +++ b/src/main/java/me/cortex/voxy/client/core/util/IndexUtil.java @@ -11,9 +11,9 @@ public class IndexUtil { MemoryBuffer buffer = new MemoryBuffer(quadCount * 6L); long ptr = buffer.address; for(int i = 0; i < quadCount*4; i += 4) { - MemoryUtil.memPutByte(ptr + (0), (byte) i); - MemoryUtil.memPutByte(ptr + (1), (byte) (i + 1)); - MemoryUtil.memPutByte(ptr + (2), (byte) (i + 2)); + MemoryUtil.memPutByte(ptr + (0), (byte) (i + 1)); + MemoryUtil.memPutByte(ptr + (1), (byte) (i + 2)); + MemoryUtil.memPutByte(ptr + (2), (byte) (i + 0)); MemoryUtil.memPutByte(ptr + (3), (byte) (i + 1)); MemoryUtil.memPutByte(ptr + (4), (byte) (i + 3)); MemoryUtil.memPutByte(ptr + (5), (byte) (i + 2)); @@ -30,9 +30,9 @@ public class IndexUtil { MemoryBuffer buffer = new MemoryBuffer(quadCount * 6L * 2); long ptr = buffer.address; for(int i = 0; i < quadCount*4; i += 4) { - MemoryUtil.memPutShort(ptr + (0*2), (short) i); - MemoryUtil.memPutShort(ptr + (1*2), (short) (i + 1)); - MemoryUtil.memPutShort(ptr + (2*2), (short) (i + 2)); + MemoryUtil.memPutShort(ptr + (0*2), (short) (i + 1)); + MemoryUtil.memPutShort(ptr + (1*2), (short) (i + 2)); + MemoryUtil.memPutShort(ptr + (2*2), (short) (i + 0)); MemoryUtil.memPutShort(ptr + (3*2), (short) (i + 1)); MemoryUtil.memPutShort(ptr + (4*2), (short) (i + 3)); MemoryUtil.memPutShort(ptr + (5*2), (short) (i + 2)); diff --git a/src/main/resources/assets/voxy/shaders/lod/gl46mesh/quads.frag b/src/main/resources/assets/voxy/shaders/lod/gl46mesh/quads.frag index 14e2362f..56502f6a 100644 --- a/src/main/resources/assets/voxy/shaders/lod/gl46mesh/quads.frag +++ b/src/main/resources/assets/voxy/shaders/lod/gl46mesh/quads.frag @@ -4,17 +4,22 @@ layout(binding = 0) uniform sampler2D blockModelAtlas; //TODO: need to fix when merged quads have discardAlpha set to false but they span multiple tiles // however they are not a full block +//#define DEBUG_MESHLETS_ONLY + +#ifndef DEBUG_MESHLETS_ONLY layout(location = 0) in vec2 uv; layout(location = 1) in flat vec2 baseUV; layout(location = 2) in flat vec4 tinting; layout(location = 3) in flat vec4 addin; layout(location = 4) in flat uint flags; layout(location = 5) in flat vec4 conditionalTinting; +#else layout(location = 6) in flat uint meshlet; -//layout(location = 6) in flat vec4 solidColour; +#endif layout(location = 0) out vec4 outColour; void main() { + #ifndef DEBUG_MESHLETS_ONLY vec2 uv = mod(uv, vec2(1.0))*(1.0/(vec2(3.0,2.0)*256.0)); //vec4 colour = solidColour; vec4 colour = texture(blockModelAtlas, uv + baseUV, ((flags>>1)&1u)*-4.0); @@ -30,12 +35,12 @@ void main() { outColour = (colour * tinting) + addin; //outColour = vec4(uv + baseUV, 0, 1); - - + #else uint hash = meshlet*1231421+123141; hash ^= hash>>16; hash = hash*1231421+123141; hash ^= hash>>16; hash = hash * 1827364925 + 123325621; - //outColour = vec4(float(hash&15u)/15, float((hash>>4)&15u)/15, float((hash>>8)&15u)/15, 1); + outColour = vec4(float(hash&15u)/15, float((hash>>4)&15u)/15, float((hash>>8)&15u)/15, 1); + #endif } \ No newline at end of file diff --git a/src/main/resources/assets/voxy/shaders/lod/gl46mesh/quads.vert b/src/main/resources/assets/voxy/shaders/lod/gl46mesh/quads.vert index 92508acb..a8549bdc 100644 --- a/src/main/resources/assets/voxy/shaders/lod/gl46mesh/quads.vert +++ b/src/main/resources/assets/voxy/shaders/lod/gl46mesh/quads.vert @@ -2,12 +2,17 @@ #extension GL_ARB_gpu_shader_int64 : enable #extension GL_ARB_shader_draw_parameters : require +//#define DEBUG_MESHLETS_ONLY + #import #import #import #import +#ifdef DEBUG_MESHLETS_ONLY layout(location = 6) out flat uint meshlet; +#endif + PosHeader meshletPosition; Quad quad; bool setupMeshlet() { @@ -21,7 +26,11 @@ bool setupMeshlet() { return true; } + #ifdef DEBUG_MESHLETS_ONLY meshlet = data; + #endif + + uint baseId = (data*MESHLET_SIZE); uint quadIndex = baseId + (gl_VertexID>>2) + 2; meshletPosition = geometryPool[baseId]; @@ -35,13 +44,14 @@ bool setupMeshlet() { - +#ifndef DEBUG_MESHLETS_ONLY layout(location = 0) out vec2 uv; layout(location = 1) out flat vec2 baseUV; layout(location = 2) out flat vec4 tinting; layout(location = 3) out flat vec4 addin; layout(location = 4) out flat uint flags; layout(location = 5) out flat vec4 conditionalTinting; +#endif vec4 uint2vec4RGBA(uint colour) { return vec4((uvec4(colour)>>uvec4(24,16,8,0))&uvec4(0xFF))/255.0; @@ -100,12 +110,18 @@ void main() { bool isShaded = hasAO;//TODO: make this a per face flag - vec2 modelUV = vec2(modelId&0xFFu, (modelId>>8)&0xFFu)*(1.0/(256.0)); - baseUV = modelUV + (vec2(face>>1, face&1u) * (1.0/(vec2(3.0, 2.0)*256.0))); ivec2 quadSize = extractSize(quad); - { //Generate tinting and flag data + #ifndef DEBUG_MESHLETS_ONLY + //Exploit provoking vertex to do less work + //if (cornerIdx==1) + { + vec2 modelUV = vec2(modelId&0xFFu, (modelId>>8)&0xFFu)*(1.0/(256.0)); + baseUV = modelUV + (vec2(face>>1, face&1u) * (1.0/(vec2(3.0, 2.0)*256.0))); + + //Generate tinting and flag data + flags = faceHasAlphaCuttout(faceData); //We need to have a conditional override based on if the model size is < a full face + quadSize > 1 @@ -152,6 +168,7 @@ void main() { } } } + #endif @@ -160,7 +177,8 @@ void main() { vec4 faceSize = getFaceSize(faceData); vec2 cQuadSize = (faceSize.yw + quadSize - 1) * vec2((cornerIdx>>1)&1, cornerIdx&1); - uv = faceSize.xz + cQuadSize; + + //uv = faceSize.xz + cQuadSize; vec3 cornerPos = extractPos(quad); float depthOffset = extractFaceIndentation(faceData);