diff --git a/src/main/java/me/cortex/voxy/client/TimingStatistics.java b/src/main/java/me/cortex/voxy/client/TimingStatistics.java new file mode 100644 index 00000000..2402b45a --- /dev/null +++ b/src/main/java/me/cortex/voxy/client/TimingStatistics.java @@ -0,0 +1,81 @@ +package me.cortex.voxy.client; + +import java.lang.invoke.VarHandle; +import java.util.ArrayList; + +public class TimingStatistics { + public static double ROLLING_WEIGHT = 0.95; + private static final ArrayList allSamplers = new ArrayList<>(); + public static final class TimeSampler { + private boolean running; + private long timestamp; + private long runtime; + + private double rolling; + + public TimeSampler() { + TimingStatistics.allSamplers.add(this); + } + + private void reset() { + if (this.running) { + throw new IllegalStateException(); + } + this.runtime = 0; + } + + public void start() { + if (this.running) { + throw new IllegalStateException(); + } + this.running = true; + VarHandle.fullFence(); + this.timestamp = System.nanoTime(); + VarHandle.fullFence(); + } + + public void stop() { + if (!this.running) { + throw new IllegalStateException(); + } + this.running = false; + VarHandle.fullFence(); + this.runtime += System.nanoTime() - this.timestamp; + VarHandle.fullFence(); + } + + public void subtract(TimeSampler sampler) { + this.runtime -= sampler.runtime; + } + + private void update() { + double time = ((double) (this.runtime / 1000)) / 1000; + this.rolling = Math.max(this.rolling * ROLLING_WEIGHT + time * (1-ROLLING_WEIGHT), time); + } + + public double getRolling() { + return this.rolling; + } + + public String pVal() { + return String.format("%6.3f", this.rolling); + } + } + + public static void resetSamplers() { + TimingStatistics.allSamplers.forEach(TimeSampler::reset); + } + + private static void updateSamplers() { + TimingStatistics.allSamplers.forEach(TimeSampler::update); + } + + public static TimeSampler setup = new TimeSampler(); + public static TimeSampler main = new TimeSampler(); + public static TimeSampler dynamic = new TimeSampler(); + + public static void update() { + updateSamplers(); + } + +} diff --git a/src/main/java/me/cortex/voxy/client/core/model/ModelFactory.java b/src/main/java/me/cortex/voxy/client/core/model/ModelFactory.java index 52efa586..50e66f85 100644 --- a/src/main/java/me/cortex/voxy/client/core/model/ModelFactory.java +++ b/src/main/java/me/cortex/voxy/client/core/model/ModelFactory.java @@ -139,16 +139,16 @@ public class ModelFactory { - public void addEntry(int blockId) { + public boolean addEntry(int blockId) { if (this.idMappings[blockId] != -1) { - return; + return false; } //We are (probably) going to be baking the block id // check that it is currently not inflight, if it is, return as its already being baked // else add it to the flight as it is going to be baked if (!this.blockStatesInFlight.add(blockId)) { //Block baking is already in-flight - return; + return false; } var blockState = this.mapper.getBlockStateFromBlockId(blockId); @@ -192,6 +192,7 @@ public class ModelFactory { processTextureBakeResult(blockId, blockState, textureData); }); this.bakery.renderFacesToStream(blockState, 123456, isFluid, this.downstream.getBufferId(), allocation); + return true; } //TODO: what i need to do is seperate out fluid states from blockStates diff --git a/src/main/java/me/cortex/voxy/client/core/model/bakery/ModelTextureBakery.java b/src/main/java/me/cortex/voxy/client/core/model/bakery/ModelTextureBakery.java index 6ca19bdc..32da0cd6 100644 --- a/src/main/java/me/cortex/voxy/client/core/model/bakery/ModelTextureBakery.java +++ b/src/main/java/me/cortex/voxy/client/core/model/bakery/ModelTextureBakery.java @@ -95,6 +95,8 @@ public class ModelTextureBakery { }); + int originalFramebuffer = glGetInteger(GL_FRAMEBUFFER_BINDING); + RenderLayer renderLayer = null; if (!renderFluid) { @@ -181,7 +183,7 @@ public class ModelTextureBakery { glClear(GL_DEPTH_BUFFER_BIT); - glBindFramebuffer(GL_FRAMEBUFFER, 0); + glBindFramebuffer(GL_FRAMEBUFFER, originalFramebuffer); } private final BufferAllocator allocator = new BufferAllocator(786432); diff --git a/src/main/java/me/cortex/voxy/client/core/rendering/RenderService.java b/src/main/java/me/cortex/voxy/client/core/rendering/RenderService.java index f5aee8d0..9a6cceab 100644 --- a/src/main/java/me/cortex/voxy/client/core/rendering/RenderService.java +++ b/src/main/java/me/cortex/voxy/client/core/rendering/RenderService.java @@ -2,6 +2,7 @@ package me.cortex.voxy.client.core.rendering; import io.netty.util.internal.MathUtil; import me.cortex.voxy.client.RenderStatistics; +import me.cortex.voxy.client.TimingStatistics; import me.cortex.voxy.client.core.gl.Capabilities; import me.cortex.voxy.client.core.model.ModelBakerySubsystem; import me.cortex.voxy.client.core.model.ModelStore; @@ -22,6 +23,7 @@ import me.cortex.voxy.common.thread.ServiceThreadPool; import me.cortex.voxy.common.world.WorldSection; import net.minecraft.client.render.Camera; +import java.lang.invoke.VarHandle; import java.util.Arrays; import java.util.List; import java.util.stream.Collectors; @@ -127,9 +129,12 @@ public class RenderService, J extends Vi // this is because e.g. shadows, cause this pipeline to be invoked multiple times // which may cause the geometry to become outdated resulting in corruption rendering in renderOpaque //TODO: Need to find a proper way to fix this (if there even is one) - if (true /* firstInvocationThisFrame */) { - DownloadStream.INSTANCE.tick(); + { + TimingStatistics.main.stop(); + TimingStatistics.dynamic.start(); + //Tick download stream + DownloadStream.INSTANCE.tick(); this.sectionUpdateQueue.consume(128); @@ -143,8 +148,13 @@ public class RenderService, J extends Vi //this needs to go after, due to geometry updates committed by the nodeManager this.sectionRenderer.getGeometryManager().tick(); + + //Tick upload stream + UploadStream.INSTANCE.tick(); + + TimingStatistics.dynamic.stop(); + TimingStatistics.main.start(); } - UploadStream.INSTANCE.tick(); glMemoryBarrier(GL_FRAMEBUFFER_BARRIER_BIT|GL_PIXEL_BUFFER_BARRIER_BIT); diff --git a/src/main/java/me/cortex/voxy/client/core/rendering/VoxyRenderSystem.java b/src/main/java/me/cortex/voxy/client/core/rendering/VoxyRenderSystem.java index e38000b9..62bb2b3b 100644 --- a/src/main/java/me/cortex/voxy/client/core/rendering/VoxyRenderSystem.java +++ b/src/main/java/me/cortex/voxy/client/core/rendering/VoxyRenderSystem.java @@ -2,6 +2,7 @@ package me.cortex.voxy.client.core.rendering; import com.mojang.blaze3d.opengl.GlConst; import com.mojang.blaze3d.systems.RenderSystem; +import me.cortex.voxy.client.TimingStatistics; import me.cortex.voxy.client.config.VoxyConfig; import me.cortex.voxy.client.core.gl.Capabilities; import me.cortex.voxy.client.core.gl.GlBuffer; @@ -46,7 +47,6 @@ public class VoxyRenderSystem { private final PostProcessing postProcessing; private final WorldEngine worldIn; private final RenderDistanceTracker renderDistanceTracker; - private long runTimeNano = 0; public VoxyRenderSystem(WorldEngine world, ServiceThreadPool threadPool) { //Trigger the shared index buffer loading @@ -70,9 +70,12 @@ public class VoxyRenderSystem { this.renderDistanceTracker.setRenderDistance(renderDistance); } + //private static final ModelTextureBakery mtb = new ModelTextureBakery(16, 16); //private static final RawDownloadStream downstream = new RawDownloadStream(1<<20); public void renderSetup(Frustum frustum, Camera camera) { + TimingStatistics.resetSamplers(); + /* if (false) { int allocation = downstream.download(2 * 4 * 6 * 16 * 16, ptr -> { @@ -100,18 +103,15 @@ public class VoxyRenderSystem { downstream.submit(); downstream.tick(); }*/ - VarHandle.fullFence(); - long start = System.nanoTime(); - VarHandle.fullFence(); + TimingStatistics.setup.start(); this.renderDistanceTracker.setCenterAndProcess(camera.getBlockPos().getX(), camera.getBlockPos().getZ()); + //Done here as is allows less gl state resetup this.renderer.tickModelService(); - PrintfDebugUtil.tick(); - VarHandle.fullFence(); - this.runTimeNano = System.nanoTime() - start; - VarHandle.fullFence(); + PrintfDebugUtil.tick(); + TimingStatistics.setup.stop(); } private static Matrix4f makeProjectionMatrix(float near, float far) { @@ -137,13 +137,10 @@ public class VoxyRenderSystem { } public void renderOpaque(MatrixStack matrices, double cameraX, double cameraY, double cameraZ) { - VarHandle.fullFence(); - long startTime = System.nanoTime(); - VarHandle.fullFence(); - if (IrisUtil.irisShadowActive()) { return; } + TimingStatistics.main.start(); if (false) { //only increase quality while there are very few mesh queues, this stops, @@ -209,20 +206,17 @@ public class VoxyRenderSystem { this.postProcessing.renderPost(projection, RenderSystem.getProjectionMatrix(), boundFB); glBindFramebuffer(GlConst.GL_FRAMEBUFFER, oldFB); + TimingStatistics.main.stop(); - VarHandle.fullFence(); - this.runTimeNano += System.nanoTime() - startTime; - VarHandle.fullFence(); } - private double role = 0; public void addDebugInfo(List debug) { debug.add("GlBuffer, Count/Size (mb): " + GlBuffer.getCount() + "/" + (GlBuffer.getTotalSize()/1_000_000)); this.renderer.addDebugData(debug); - double aa = (((double)(this.runTimeNano/1000))/1000); - - this.role = Math.max(Math.ceil((this.role * 0.99 + (aa*0.01))*1000)/1000, aa); - debug.add("Voxy frame runtime (millis): " + this.role); + { + TimingStatistics.update(); + debug.add("Voxy frame runtime (millis): " + TimingStatistics.setup.pVal() + ", " + TimingStatistics.dynamic.pVal() + ", " + TimingStatistics.main.pVal()); + } PrintfDebugUtil.addToOut(debug); } diff --git a/src/main/java/me/cortex/voxy/client/core/rendering/hierachical/HierarchicalOcclusionTraverser.java b/src/main/java/me/cortex/voxy/client/core/rendering/hierachical/HierarchicalOcclusionTraverser.java index c66dc6f2..b898efc0 100644 --- a/src/main/java/me/cortex/voxy/client/core/rendering/hierachical/HierarchicalOcclusionTraverser.java +++ b/src/main/java/me/cortex/voxy/client/core/rendering/hierachical/HierarchicalOcclusionTraverser.java @@ -1,5 +1,6 @@ package me.cortex.voxy.client.core.rendering.hierachical; +import it.unimi.dsi.fastutil.ints.Int2IntOpenHashMap; import me.cortex.voxy.client.RenderStatistics; import me.cortex.voxy.client.config.VoxyConfig; import me.cortex.voxy.client.core.gl.GlBuffer; @@ -47,6 +48,10 @@ public class HierarchicalOcclusionTraverser { private final GlBuffer statisticsBuffer = new GlBuffer(1024).zero(); + private int topNodeCount; + private final Int2IntOpenHashMap topNode2idxMapping = new Int2IntOpenHashMap();//Used to store mapping from TLN to array index + private final int[] idx2topNodeMapping = new int[100_000];//Used to map idx to TLN id + private final GlBuffer topNodeIds = new GlBuffer(100_000*4).zero(); private final GlBuffer queueMetaBuffer = new GlBuffer(4*4*5).zero(); private final GlBuffer scratchQueueA = new GlBuffer(100_000*4).zero(); private final GlBuffer scratchQueueB = new GlBuffer(100_000*4).zero(); @@ -118,8 +123,47 @@ public class HierarchicalOcclusionTraverser { .ssbo("NODE_QUEUE_META_BINDING", this.queueMetaBuffer) .ssbo("RENDER_TRACKER_BINDING", this.nodeCleaner.visibilityBuffer) .ssboIf("STATISTICS_BUFFER_BINDING", this.statisticsBuffer); + + this.topNode2idxMapping.defaultReturnValue(-1); + this.nodeManager.setTLNCallbacks(this::addTLN, this::remTLN); } + private void addTLN(int id) { + int aid = this.topNodeCount++;//Increment buffer + if (this.topNodeCount > 100_000) { + throw new IllegalStateException("Top level node count greater than capacity"); + } + + //Use clear buffer, yes know is a bad idea, TODO: replace + //Add the new top level node to the queue + glClearNamedBufferSubData(this.topNodeIds.id, GL_R32UI, aid*4L, 4, GL_RED_INTEGER, GL_UNSIGNED_INT, new int[]{id}); + this.topNode2idxMapping.put(id, aid); + this.idx2topNodeMapping[aid] = id; + } + + private void remTLN(int id) { + //Remove id + int idx = this.topNode2idxMapping.remove(id); + //Decrement count + this.topNodeCount--; + if (idx == -1) { + throw new IllegalStateException(); + } + + //Count has already been decremented so is an exact match + //If we are at the end of the array we dont need to do anything + if (idx == this.topNodeCount) { + return; + } + + //Move the entry at the end to the current index + int endTLNId = this.idx2topNodeMapping[this.topNodeCount]; + this.idx2topNodeMapping[idx] = endTLNId;//Set the old to the new + if (this.topNode2idxMapping.put(endTLNId, idx) == -1) + throw new IllegalStateException(); + //Move it server side, from end to new idx + glClearNamedBufferSubData(this.topNodeIds.id, GL_R32UI, idx*4L, 4, GL_RED_INTEGER, GL_UNSIGNED_INT, new int[]{endTLNId}); + } private static void setFrustum(Viewport viewport, long ptr) { for (int i = 0; i < 6; i++) { @@ -183,7 +227,7 @@ public class HierarchicalOcclusionTraverser { this.statisticsBuffer.zero(); } - this.traverseInternal(this.nodeManager.getTopLevelNodeIds().size()); + this.traverseInternal(); this.downloadResetRequestQueue(); @@ -204,7 +248,7 @@ public class HierarchicalOcclusionTraverser { glBindTextureUnit(0, 0); } - private void traverseInternal(int initialQueueSize) { + private void traverseInternal() { { //Fix mesa bug glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); @@ -218,7 +262,7 @@ public class HierarchicalOcclusionTraverser { nglClearNamedBufferSubData(this.renderList.id, GL_R32UI, 0, 4, GL_RED_INTEGER, GL_UNSIGNED_INT, 0); - int firstDispatchSize = (initialQueueSize+(1<>LOCAL_WORK_SIZE_BITS; + int firstDispatchSize = (this.topNodeCount+(1<>LOCAL_WORK_SIZE_BITS; /* //prime the queue Todo: maybe move after the traversal? cause then it is more efficient work since it doesnt need to wait for this before starting? glClearNamedBufferData(this.queueMetaBuffer.id, GL_RGBA32UI, GL_RGBA, GL_UNSIGNED_INT, new int[]{0,1,1,0});//Prime the metadata buffer, which also contains @@ -231,27 +275,21 @@ public class HierarchicalOcclusionTraverser { MemoryUtil.memPutInt(ptr + 0, firstDispatchSize); MemoryUtil.memPutInt(ptr + 4, 1); MemoryUtil.memPutInt(ptr + 8, 1); - MemoryUtil.memPutInt(ptr + 12, initialQueueSize); + MemoryUtil.memPutInt(ptr + 12, this.topNodeCount); for (int i = 1; i < 5; i++) { MemoryUtil.memPutInt(ptr + (i*16)+ 0, 0); MemoryUtil.memPutInt(ptr + (i*16)+ 4, 1); MemoryUtil.memPutInt(ptr + (i*16)+ 8, 1); MemoryUtil.memPutInt(ptr + (i*16)+12, 0); } - //TODO: Move the first queue to a persistent list so its not updated every frame - - ptr = UploadStream.INSTANCE.upload(this.scratchQueueA, 0, 4L*initialQueueSize); - int i = 0; - for (int node : this.nodeManager.getTopLevelNodeIds()) { - MemoryUtil.memPutInt(ptr + 4L*(i++), node); - } - UploadStream.INSTANCE.commit(); } + //Execute first iteration glUniform1ui(NODE_QUEUE_INDEX_BINDING, 0); - glBindBufferBase(GL_SHADER_STORAGE_BUFFER, NODE_QUEUE_SOURCE_BINDING, this.scratchQueueA.id); + //Use the top node id buffer + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, NODE_QUEUE_SOURCE_BINDING, this.topNodeIds.id); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, NODE_QUEUE_SINK_BINDING, this.scratchQueueB.id); //Dont need to use indirect to dispatch the first iteration @@ -278,7 +316,6 @@ public class HierarchicalOcclusionTraverser { private void downloadResetRequestQueue() { glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT); DownloadStream.INSTANCE.download(this.requestBuffer, this::forwardDownloadResult); - DownloadStream.INSTANCE.commit(); nglClearNamedBufferSubData(this.requestBuffer.id, GL_R32UI, 0, 4, GL_RED_INTEGER, GL_UNSIGNED_INT, 0); } @@ -327,6 +364,7 @@ public class HierarchicalOcclusionTraverser { this.statisticsBuffer.free(); this.renderList.free(); this.queueMetaBuffer.free(); + this.topNodeIds.free(); this.scratchQueueA.free(); this.scratchQueueB.free(); glDeleteSamplers(this.hizSampler); diff --git a/src/main/java/me/cortex/voxy/client/core/rendering/hierachical/NodeManager.java b/src/main/java/me/cortex/voxy/client/core/rendering/hierachical/NodeManager.java index 004f6064..9c436334 100644 --- a/src/main/java/me/cortex/voxy/client/core/rendering/hierachical/NodeManager.java +++ b/src/main/java/me/cortex/voxy/client/core/rendering/hierachical/NodeManager.java @@ -1,5 +1,6 @@ package me.cortex.voxy.client.core.rendering.hierachical; +import it.unimi.dsi.fastutil.ints.IntConsumer; import it.unimi.dsi.fastutil.ints.IntOpenHashSet; import it.unimi.dsi.fastutil.ints.IntSet; import it.unimi.dsi.fastutil.longs.Long2IntOpenHashMap; @@ -90,6 +91,9 @@ public class NodeManager { private final LongOpenHashSet topLevelNodes = new LongOpenHashSet(); private int activeNodeRequestCount; + private IntConsumer topLevelNodeIdAddedCallback; + private IntConsumer topLevelNodeIdRemovedCallback; + public interface ICleaner { void alloc(int id); void move(int from, int to); @@ -101,6 +105,11 @@ public class NodeManager { private void clearMoveId(int from, int to) { if (this.cleanerInterface != null) this.cleanerInterface.move(from, to); } private void clearFreeId(int id) { if (this.cleanerInterface != null) this.cleanerInterface.free(id); } + public void setTLNCallbacks(IntConsumer onAdd, IntConsumer onRemove) { + this.topLevelNodeIdAddedCallback = onAdd; + this.topLevelNodeIdRemovedCallback = onRemove; + } + public NodeManager(int maxNodeCount, AbstractSectionGeometryManager geometryManager, ISectionWatcher watcher) { if (!MathUtil.isPowerOfTwo(maxNodeCount)) { throw new IllegalArgumentException("Max node count must be a power of 2"); @@ -144,8 +153,9 @@ public class NodeManager { if (!this.topLevelNodeIds.remove(id)) { throw new IllegalStateException("Node id was not in top level node ids: " + nodeId + " pos: " + WorldEngine.pprintPos(pos)); } + if (this.topLevelNodeIdRemovedCallback != null) + this.topLevelNodeIdRemovedCallback.accept(id); } - //Remove the entire thing this.recurseRemoveNode(pos); } @@ -805,6 +815,8 @@ public class NodeManager { throw new IllegalStateException(); } this.clearAllocId(id); + if (this.topLevelNodeIdAddedCallback != null) + this.topLevelNodeIdAddedCallback.accept(id); } private void finishRequest(int requestId, NodeChildRequest request) { diff --git a/src/main/java/me/cortex/voxy/client/core/rendering/section/MDICSectionRenderer.java b/src/main/java/me/cortex/voxy/client/core/rendering/section/MDICSectionRenderer.java index d906c9ba..2a37c605 100644 --- a/src/main/java/me/cortex/voxy/client/core/rendering/section/MDICSectionRenderer.java +++ b/src/main/java/me/cortex/voxy/client/core/rendering/section/MDICSectionRenderer.java @@ -68,7 +68,7 @@ public class MDICSectionRenderer extends AbstractSectionRenderer