Timing stats and not uploading the entire TLN node list every fking frame

This commit is contained in:
mcrcortex
2025-04-28 11:58:37 +10:00
parent aed8366206
commit d91d41de79
10 changed files with 185 additions and 50 deletions

View File

@@ -0,0 +1,81 @@
package me.cortex.voxy.client;
import java.lang.invoke.VarHandle;
import java.util.ArrayList;
public class TimingStatistics {
public static double ROLLING_WEIGHT = 0.95;
private static final ArrayList<TimeSampler> allSamplers = new ArrayList<>();
public static final class TimeSampler {
private boolean running;
private long timestamp;
private long runtime;
private double rolling;
public TimeSampler() {
TimingStatistics.allSamplers.add(this);
}
private void reset() {
if (this.running) {
throw new IllegalStateException();
}
this.runtime = 0;
}
public void start() {
if (this.running) {
throw new IllegalStateException();
}
this.running = true;
VarHandle.fullFence();
this.timestamp = System.nanoTime();
VarHandle.fullFence();
}
public void stop() {
if (!this.running) {
throw new IllegalStateException();
}
this.running = false;
VarHandle.fullFence();
this.runtime += System.nanoTime() - this.timestamp;
VarHandle.fullFence();
}
public void subtract(TimeSampler sampler) {
this.runtime -= sampler.runtime;
}
private void update() {
double time = ((double) (this.runtime / 1000)) / 1000;
this.rolling = Math.max(this.rolling * ROLLING_WEIGHT + time * (1-ROLLING_WEIGHT), time);
}
public double getRolling() {
return this.rolling;
}
public String pVal() {
return String.format("%6.3f", this.rolling);
}
}
public static void resetSamplers() {
TimingStatistics.allSamplers.forEach(TimeSampler::reset);
}
private static void updateSamplers() {
TimingStatistics.allSamplers.forEach(TimeSampler::update);
}
public static TimeSampler setup = new TimeSampler();
public static TimeSampler main = new TimeSampler();
public static TimeSampler dynamic = new TimeSampler();
public static void update() {
updateSamplers();
}
}

View File

@@ -139,16 +139,16 @@ public class ModelFactory {
public void addEntry(int blockId) {
public boolean addEntry(int blockId) {
if (this.idMappings[blockId] != -1) {
return;
return false;
}
//We are (probably) going to be baking the block id
// check that it is currently not inflight, if it is, return as its already being baked
// else add it to the flight as it is going to be baked
if (!this.blockStatesInFlight.add(blockId)) {
//Block baking is already in-flight
return;
return false;
}
var blockState = this.mapper.getBlockStateFromBlockId(blockId);
@@ -192,6 +192,7 @@ public class ModelFactory {
processTextureBakeResult(blockId, blockState, textureData);
});
this.bakery.renderFacesToStream(blockState, 123456, isFluid, this.downstream.getBufferId(), allocation);
return true;
}
//TODO: what i need to do is seperate out fluid states from blockStates

View File

@@ -95,6 +95,8 @@ public class ModelTextureBakery {
});
int originalFramebuffer = glGetInteger(GL_FRAMEBUFFER_BINDING);
RenderLayer renderLayer = null;
if (!renderFluid) {
@@ -181,7 +183,7 @@ public class ModelTextureBakery {
glClear(GL_DEPTH_BUFFER_BIT);
glBindFramebuffer(GL_FRAMEBUFFER, 0);
glBindFramebuffer(GL_FRAMEBUFFER, originalFramebuffer);
}
private final BufferAllocator allocator = new BufferAllocator(786432);

View File

@@ -2,6 +2,7 @@ package me.cortex.voxy.client.core.rendering;
import io.netty.util.internal.MathUtil;
import me.cortex.voxy.client.RenderStatistics;
import me.cortex.voxy.client.TimingStatistics;
import me.cortex.voxy.client.core.gl.Capabilities;
import me.cortex.voxy.client.core.model.ModelBakerySubsystem;
import me.cortex.voxy.client.core.model.ModelStore;
@@ -22,6 +23,7 @@ import me.cortex.voxy.common.thread.ServiceThreadPool;
import me.cortex.voxy.common.world.WorldSection;
import net.minecraft.client.render.Camera;
import java.lang.invoke.VarHandle;
import java.util.Arrays;
import java.util.List;
import java.util.stream.Collectors;
@@ -127,9 +129,12 @@ public class RenderService<T extends AbstractSectionRenderer<J, ?>, J extends Vi
// this is because e.g. shadows, cause this pipeline to be invoked multiple times
// which may cause the geometry to become outdated resulting in corruption rendering in renderOpaque
//TODO: Need to find a proper way to fix this (if there even is one)
if (true /* firstInvocationThisFrame */) {
DownloadStream.INSTANCE.tick();
{
TimingStatistics.main.stop();
TimingStatistics.dynamic.start();
//Tick download stream
DownloadStream.INSTANCE.tick();
this.sectionUpdateQueue.consume(128);
@@ -143,8 +148,13 @@ public class RenderService<T extends AbstractSectionRenderer<J, ?>, J extends Vi
//this needs to go after, due to geometry updates committed by the nodeManager
this.sectionRenderer.getGeometryManager().tick();
//Tick upload stream
UploadStream.INSTANCE.tick();
TimingStatistics.dynamic.stop();
TimingStatistics.main.start();
}
UploadStream.INSTANCE.tick();
glMemoryBarrier(GL_FRAMEBUFFER_BARRIER_BIT|GL_PIXEL_BUFFER_BARRIER_BIT);

View File

@@ -2,6 +2,7 @@ package me.cortex.voxy.client.core.rendering;
import com.mojang.blaze3d.opengl.GlConst;
import com.mojang.blaze3d.systems.RenderSystem;
import me.cortex.voxy.client.TimingStatistics;
import me.cortex.voxy.client.config.VoxyConfig;
import me.cortex.voxy.client.core.gl.Capabilities;
import me.cortex.voxy.client.core.gl.GlBuffer;
@@ -46,7 +47,6 @@ public class VoxyRenderSystem {
private final PostProcessing postProcessing;
private final WorldEngine worldIn;
private final RenderDistanceTracker renderDistanceTracker;
private long runTimeNano = 0;
public VoxyRenderSystem(WorldEngine world, ServiceThreadPool threadPool) {
//Trigger the shared index buffer loading
@@ -70,9 +70,12 @@ public class VoxyRenderSystem {
this.renderDistanceTracker.setRenderDistance(renderDistance);
}
//private static final ModelTextureBakery mtb = new ModelTextureBakery(16, 16);
//private static final RawDownloadStream downstream = new RawDownloadStream(1<<20);
public void renderSetup(Frustum frustum, Camera camera) {
TimingStatistics.resetSamplers();
/*
if (false) {
int allocation = downstream.download(2 * 4 * 6 * 16 * 16, ptr -> {
@@ -100,18 +103,15 @@ public class VoxyRenderSystem {
downstream.submit();
downstream.tick();
}*/
VarHandle.fullFence();
long start = System.nanoTime();
VarHandle.fullFence();
TimingStatistics.setup.start();
this.renderDistanceTracker.setCenterAndProcess(camera.getBlockPos().getX(), camera.getBlockPos().getZ());
//Done here as is allows less gl state resetup
this.renderer.tickModelService();
PrintfDebugUtil.tick();
VarHandle.fullFence();
this.runTimeNano = System.nanoTime() - start;
VarHandle.fullFence();
PrintfDebugUtil.tick();
TimingStatistics.setup.stop();
}
private static Matrix4f makeProjectionMatrix(float near, float far) {
@@ -137,13 +137,10 @@ public class VoxyRenderSystem {
}
public void renderOpaque(MatrixStack matrices, double cameraX, double cameraY, double cameraZ) {
VarHandle.fullFence();
long startTime = System.nanoTime();
VarHandle.fullFence();
if (IrisUtil.irisShadowActive()) {
return;
}
TimingStatistics.main.start();
if (false) {
//only increase quality while there are very few mesh queues, this stops,
@@ -209,20 +206,17 @@ public class VoxyRenderSystem {
this.postProcessing.renderPost(projection, RenderSystem.getProjectionMatrix(), boundFB);
glBindFramebuffer(GlConst.GL_FRAMEBUFFER, oldFB);
TimingStatistics.main.stop();
VarHandle.fullFence();
this.runTimeNano += System.nanoTime() - startTime;
VarHandle.fullFence();
}
private double role = 0;
public void addDebugInfo(List<String> debug) {
debug.add("GlBuffer, Count/Size (mb): " + GlBuffer.getCount() + "/" + (GlBuffer.getTotalSize()/1_000_000));
this.renderer.addDebugData(debug);
double aa = (((double)(this.runTimeNano/1000))/1000);
this.role = Math.max(Math.ceil((this.role * 0.99 + (aa*0.01))*1000)/1000, aa);
debug.add("Voxy frame runtime (millis): " + this.role);
{
TimingStatistics.update();
debug.add("Voxy frame runtime (millis): " + TimingStatistics.setup.pVal() + ", " + TimingStatistics.dynamic.pVal() + ", " + TimingStatistics.main.pVal());
}
PrintfDebugUtil.addToOut(debug);
}

View File

@@ -1,5 +1,6 @@
package me.cortex.voxy.client.core.rendering.hierachical;
import it.unimi.dsi.fastutil.ints.Int2IntOpenHashMap;
import me.cortex.voxy.client.RenderStatistics;
import me.cortex.voxy.client.config.VoxyConfig;
import me.cortex.voxy.client.core.gl.GlBuffer;
@@ -47,6 +48,10 @@ public class HierarchicalOcclusionTraverser {
private final GlBuffer statisticsBuffer = new GlBuffer(1024).zero();
private int topNodeCount;
private final Int2IntOpenHashMap topNode2idxMapping = new Int2IntOpenHashMap();//Used to store mapping from TLN to array index
private final int[] idx2topNodeMapping = new int[100_000];//Used to map idx to TLN id
private final GlBuffer topNodeIds = new GlBuffer(100_000*4).zero();
private final GlBuffer queueMetaBuffer = new GlBuffer(4*4*5).zero();
private final GlBuffer scratchQueueA = new GlBuffer(100_000*4).zero();
private final GlBuffer scratchQueueB = new GlBuffer(100_000*4).zero();
@@ -118,8 +123,47 @@ public class HierarchicalOcclusionTraverser {
.ssbo("NODE_QUEUE_META_BINDING", this.queueMetaBuffer)
.ssbo("RENDER_TRACKER_BINDING", this.nodeCleaner.visibilityBuffer)
.ssboIf("STATISTICS_BUFFER_BINDING", this.statisticsBuffer);
this.topNode2idxMapping.defaultReturnValue(-1);
this.nodeManager.setTLNCallbacks(this::addTLN, this::remTLN);
}
private void addTLN(int id) {
int aid = this.topNodeCount++;//Increment buffer
if (this.topNodeCount > 100_000) {
throw new IllegalStateException("Top level node count greater than capacity");
}
//Use clear buffer, yes know is a bad idea, TODO: replace
//Add the new top level node to the queue
glClearNamedBufferSubData(this.topNodeIds.id, GL_R32UI, aid*4L, 4, GL_RED_INTEGER, GL_UNSIGNED_INT, new int[]{id});
this.topNode2idxMapping.put(id, aid);
this.idx2topNodeMapping[aid] = id;
}
private void remTLN(int id) {
//Remove id
int idx = this.topNode2idxMapping.remove(id);
//Decrement count
this.topNodeCount--;
if (idx == -1) {
throw new IllegalStateException();
}
//Count has already been decremented so is an exact match
//If we are at the end of the array we dont need to do anything
if (idx == this.topNodeCount) {
return;
}
//Move the entry at the end to the current index
int endTLNId = this.idx2topNodeMapping[this.topNodeCount];
this.idx2topNodeMapping[idx] = endTLNId;//Set the old to the new
if (this.topNode2idxMapping.put(endTLNId, idx) == -1)
throw new IllegalStateException();
//Move it server side, from end to new idx
glClearNamedBufferSubData(this.topNodeIds.id, GL_R32UI, idx*4L, 4, GL_RED_INTEGER, GL_UNSIGNED_INT, new int[]{endTLNId});
}
private static void setFrustum(Viewport<?> viewport, long ptr) {
for (int i = 0; i < 6; i++) {
@@ -183,7 +227,7 @@ public class HierarchicalOcclusionTraverser {
this.statisticsBuffer.zero();
}
this.traverseInternal(this.nodeManager.getTopLevelNodeIds().size());
this.traverseInternal();
this.downloadResetRequestQueue();
@@ -204,7 +248,7 @@ public class HierarchicalOcclusionTraverser {
glBindTextureUnit(0, 0);
}
private void traverseInternal(int initialQueueSize) {
private void traverseInternal() {
{
//Fix mesa bug
glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
@@ -218,7 +262,7 @@ public class HierarchicalOcclusionTraverser {
nglClearNamedBufferSubData(this.renderList.id, GL_R32UI, 0, 4, GL_RED_INTEGER, GL_UNSIGNED_INT, 0);
int firstDispatchSize = (initialQueueSize+(1<<LOCAL_WORK_SIZE_BITS)-1)>>LOCAL_WORK_SIZE_BITS;
int firstDispatchSize = (this.topNodeCount+(1<<LOCAL_WORK_SIZE_BITS)-1)>>LOCAL_WORK_SIZE_BITS;
/*
//prime the queue Todo: maybe move after the traversal? cause then it is more efficient work since it doesnt need to wait for this before starting?
glClearNamedBufferData(this.queueMetaBuffer.id, GL_RGBA32UI, GL_RGBA, GL_UNSIGNED_INT, new int[]{0,1,1,0});//Prime the metadata buffer, which also contains
@@ -231,27 +275,21 @@ public class HierarchicalOcclusionTraverser {
MemoryUtil.memPutInt(ptr + 0, firstDispatchSize);
MemoryUtil.memPutInt(ptr + 4, 1);
MemoryUtil.memPutInt(ptr + 8, 1);
MemoryUtil.memPutInt(ptr + 12, initialQueueSize);
MemoryUtil.memPutInt(ptr + 12, this.topNodeCount);
for (int i = 1; i < 5; i++) {
MemoryUtil.memPutInt(ptr + (i*16)+ 0, 0);
MemoryUtil.memPutInt(ptr + (i*16)+ 4, 1);
MemoryUtil.memPutInt(ptr + (i*16)+ 8, 1);
MemoryUtil.memPutInt(ptr + (i*16)+12, 0);
}
//TODO: Move the first queue to a persistent list so its not updated every frame
ptr = UploadStream.INSTANCE.upload(this.scratchQueueA, 0, 4L*initialQueueSize);
int i = 0;
for (int node : this.nodeManager.getTopLevelNodeIds()) {
MemoryUtil.memPutInt(ptr + 4L*(i++), node);
}
UploadStream.INSTANCE.commit();
}
//Execute first iteration
glUniform1ui(NODE_QUEUE_INDEX_BINDING, 0);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, NODE_QUEUE_SOURCE_BINDING, this.scratchQueueA.id);
//Use the top node id buffer
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, NODE_QUEUE_SOURCE_BINDING, this.topNodeIds.id);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, NODE_QUEUE_SINK_BINDING, this.scratchQueueB.id);
//Dont need to use indirect to dispatch the first iteration
@@ -278,7 +316,6 @@ public class HierarchicalOcclusionTraverser {
private void downloadResetRequestQueue() {
glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
DownloadStream.INSTANCE.download(this.requestBuffer, this::forwardDownloadResult);
DownloadStream.INSTANCE.commit();
nglClearNamedBufferSubData(this.requestBuffer.id, GL_R32UI, 0, 4, GL_RED_INTEGER, GL_UNSIGNED_INT, 0);
}
@@ -327,6 +364,7 @@ public class HierarchicalOcclusionTraverser {
this.statisticsBuffer.free();
this.renderList.free();
this.queueMetaBuffer.free();
this.topNodeIds.free();
this.scratchQueueA.free();
this.scratchQueueB.free();
glDeleteSamplers(this.hizSampler);

View File

@@ -1,5 +1,6 @@
package me.cortex.voxy.client.core.rendering.hierachical;
import it.unimi.dsi.fastutil.ints.IntConsumer;
import it.unimi.dsi.fastutil.ints.IntOpenHashSet;
import it.unimi.dsi.fastutil.ints.IntSet;
import it.unimi.dsi.fastutil.longs.Long2IntOpenHashMap;
@@ -90,6 +91,9 @@ public class NodeManager {
private final LongOpenHashSet topLevelNodes = new LongOpenHashSet();
private int activeNodeRequestCount;
private IntConsumer topLevelNodeIdAddedCallback;
private IntConsumer topLevelNodeIdRemovedCallback;
public interface ICleaner {
void alloc(int id);
void move(int from, int to);
@@ -101,6 +105,11 @@ public class NodeManager {
private void clearMoveId(int from, int to) { if (this.cleanerInterface != null) this.cleanerInterface.move(from, to); }
private void clearFreeId(int id) { if (this.cleanerInterface != null) this.cleanerInterface.free(id); }
public void setTLNCallbacks(IntConsumer onAdd, IntConsumer onRemove) {
this.topLevelNodeIdAddedCallback = onAdd;
this.topLevelNodeIdRemovedCallback = onRemove;
}
public NodeManager(int maxNodeCount, AbstractSectionGeometryManager geometryManager, ISectionWatcher watcher) {
if (!MathUtil.isPowerOfTwo(maxNodeCount)) {
throw new IllegalArgumentException("Max node count must be a power of 2");
@@ -144,8 +153,9 @@ public class NodeManager {
if (!this.topLevelNodeIds.remove(id)) {
throw new IllegalStateException("Node id was not in top level node ids: " + nodeId + " pos: " + WorldEngine.pprintPos(pos));
}
if (this.topLevelNodeIdRemovedCallback != null)
this.topLevelNodeIdRemovedCallback.accept(id);
}
//Remove the entire thing
this.recurseRemoveNode(pos);
}
@@ -805,6 +815,8 @@ public class NodeManager {
throw new IllegalStateException();
}
this.clearAllocId(id);
if (this.topLevelNodeIdAddedCallback != null)
this.topLevelNodeIdAddedCallback.accept(id);
}
private void finishRequest(int requestId, NodeChildRequest request) {

View File

@@ -68,7 +68,7 @@ public class MDICSectionRenderer extends AbstractSectionRenderer<MDICViewport, B
//TODO: needs to be in the viewport, since it contains the compute indirect call/values
private final GlBuffer drawCountCallBuffer = new GlBuffer(1024).zero();
private final GlBuffer drawCallBuffer = new GlBuffer(5*4*(400_000+100_000+100_000)).zero();//400k draw calls
private final GlBuffer drawCallBuffer = new GlBuffer(5*4*(400_000+100_000+100_000)).zero();//400k draw calls
private final GlBuffer positionScratchBuffer = new GlBuffer(8*400000).zero();//400k positions
//Statistics
@@ -118,12 +118,6 @@ public class MDICSectionRenderer extends AbstractSectionRenderer<MDICViewport, B
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, SharedIndexBuffer.INSTANCE.id());
glBindBuffer(GL_DRAW_INDIRECT_BUFFER, this.drawCallBuffer.id);
glBindBuffer(GL_PARAMETER_BUFFER_ARB, this.drawCountCallBuffer.id);
}
private void renderTemporalTerrain() {
}
private void renderTerrain(long indirectOffset, long drawCountOffset, int maxDrawCount) {

View File

@@ -105,6 +105,9 @@ public class DownloadStream {
public void commit() {
if (this.downloadList.isEmpty()) {
return;
}
glMemoryBarrier(GL_CLIENT_MAPPED_BUFFER_BARRIER_BIT | GL_BUFFER_UPDATE_BARRIER_BIT);
//Copies all the data from target buffers into the download stream
for (var entry : this.downloadList) {

View File

@@ -32,7 +32,7 @@ public class ServiceThreadPool {
private final ThreadGroup threadGroup;
public ServiceThreadPool(int threadCount) {
this(threadCount, 4);//Maybe change to 3
this(threadCount, 3);//Maybe change to 3
}
public ServiceThreadPool(int threadCount, int priority) {