diff --git a/src/main/java/me/cortex/voxy/client/core/VoxyRenderSystem.java b/src/main/java/me/cortex/voxy/client/core/VoxyRenderSystem.java index d123ed5d..770d9482 100644 --- a/src/main/java/me/cortex/voxy/client/core/VoxyRenderSystem.java +++ b/src/main/java/me/cortex/voxy/client/core/VoxyRenderSystem.java @@ -26,6 +26,7 @@ import me.cortex.voxy.client.core.rendering.section.geometry.IGeometryData; import me.cortex.voxy.client.core.rendering.util.DownloadStream; import me.cortex.voxy.client.core.rendering.util.PrintfDebugUtil; import me.cortex.voxy.client.core.rendering.util.UploadStream; +import me.cortex.voxy.client.core.util.GPUTiming; import me.cortex.voxy.client.core.util.IrisUtil; import me.cortex.voxy.common.Logger; import me.cortex.voxy.common.thread.ServiceThreadPool; @@ -206,6 +207,7 @@ public class VoxyRenderSystem { long startTime = System.nanoTime(); TimingStatistics.all.start(); + GPUTiming.INSTANCE.marker();//Start marker TimingStatistics.main.start(); //TODO: optimize @@ -260,8 +262,11 @@ public class VoxyRenderSystem { //Done here as is allows less gl state resetup this.modelService.tick(Math.max(3_000_000-(System.nanoTime()-startTime), 500_000)); } + GPUTiming.INSTANCE.marker(); TimingStatistics.postDynamic.stop(); + GPUTiming.INSTANCE.tick(); + glBindFramebuffer(GlConst.GL_FRAMEBUFFER, oldFB); glViewport(dims[0], dims[1], dims[2], dims[3]); diff --git a/src/main/java/me/cortex/voxy/client/core/rendering/building/RenderGenerationService.java b/src/main/java/me/cortex/voxy/client/core/rendering/building/RenderGenerationService.java index cefc2f70..be55a658 100644 --- a/src/main/java/me/cortex/voxy/client/core/rendering/building/RenderGenerationService.java +++ b/src/main/java/me/cortex/voxy/client/core/rendering/building/RenderGenerationService.java @@ -51,9 +51,9 @@ public class RenderGenerationService { private final AtomicInteger holdingSectionCount = new AtomicInteger();//Used to limit section holding private final AtomicInteger taskQueueCount = new AtomicInteger(); - private final PriorityBlockingQueue taskQueue = new PriorityBlockingQueue<>(320000, (a,b)-> Long.compareUnsigned(a.priority, b.priority)); + private final PriorityBlockingQueue taskQueue = new PriorityBlockingQueue<>(5000, (a,b)-> Long.compareUnsigned(a.priority, b.priority)); private final StampedLock taskMapLock = new StampedLock(); - private final Long2ObjectOpenHashMap taskMap = new Long2ObjectOpenHashMap<>(320000); + private final Long2ObjectOpenHashMap taskMap = new Long2ObjectOpenHashMap<>(5000); private final WorldEngine world; private final ModelBakerySubsystem modelBakery; diff --git a/src/main/java/me/cortex/voxy/client/core/util/GPUTiming.java b/src/main/java/me/cortex/voxy/client/core/util/GPUTiming.java new file mode 100644 index 00000000..fb520267 --- /dev/null +++ b/src/main/java/me/cortex/voxy/client/core/util/GPUTiming.java @@ -0,0 +1,165 @@ +package me.cortex.voxy.client.core.util; + +import it.unimi.dsi.fastutil.ints.IntArrayFIFOQueue; +import it.unimi.dsi.fastutil.objects.ObjectArrayFIFOQueue; +import it.unimi.dsi.fastutil.objects.ObjectArrayList; +import me.cortex.voxy.client.core.gl.GlBuffer; +import me.cortex.voxy.client.core.rendering.util.DownloadStream; +import me.cortex.voxy.common.util.MemoryBuffer; +import me.cortex.voxy.common.util.Pair; +import me.cortex.voxy.common.util.TrackedObject; +import org.lwjgl.system.MemoryUtil; + +import java.util.Arrays; +import java.util.function.Consumer; + +import static org.lwjgl.opengl.ARBTimerQuery.GL_TIMESTAMP; +import static org.lwjgl.opengl.ARBTimerQuery.glQueryCounter; +import static org.lwjgl.opengl.GL11.glFinish; +import static org.lwjgl.opengl.GL11.glFlush; +import static org.lwjgl.opengl.GL15.glDeleteQueries; +import static org.lwjgl.opengl.GL15.glGenQueries; +import static org.lwjgl.opengl.GL15C.*; +import static org.lwjgl.opengl.GL33.glGetQueryObjecti64; +import static org.lwjgl.opengl.GL42.glMemoryBarrier; +import static org.lwjgl.opengl.GL44.GL_QUERY_RESULT_NO_WAIT; +import static org.lwjgl.opengl.GL45.glGetQueryBufferObjectui64v; + +public class GPUTiming { + public static GPUTiming INSTANCE = new GPUTiming(); + + private final GlTimestampQuerySet timingSet = new GlTimestampQuerySet(); + + public void marker() { + this.timingSet.capture(0); + } + + public void tick() { + this.timingSet.download((meta,data)->{ + long current = data[0]; + for (int i = 1; i < meta.length; i++) { + long next = data[i]; + long delta = next - current; + //System.out.println(delta); + current = next; + } + }); + this.timingSet.tick(); + } + + public void free() { + this.timingSet.free(); + } + + public interface TimingDataConsumer { + void accept(int[] metadata, long[] timings); + } + private static final class GlTimestampQuerySet extends TrackedObject { + private record InflightRequest(int[] queries, int[] meta, TimingDataConsumer callback) { + private boolean callbackIfReady(IntArrayFIFOQueue queryPool) { + boolean ready = glGetQueryObjecti(this.queries[this.queries.length-1], GL_QUERY_RESULT_AVAILABLE) == GL_TRUE; + if (!ready) { + return false; + } + long[] results = new long[this.queries.length]; + for (int i = 0; i < this.queries.length; i++) { + results[i] = glGetQueryObjecti64(this.queries[i], GL_QUERY_RESULT); + queryPool.enqueue(this.queries[i]); + } + this.callback.accept(this.meta, results); + return true; + } + } + private final IntArrayFIFOQueue POOL = new IntArrayFIFOQueue(); + private final ObjectArrayFIFOQueue INFLIGHT = new ObjectArrayFIFOQueue(); + + private final int[] queries = new int[64]; + private final int[] metadata = new int[64]; + private int index; + + + public void capture(int metadata) { + if (this.index > this.metadata.length) { + throw new IllegalStateException(); + } + int slot = this.index++; + this.metadata[slot] = metadata; + int query = this.getQuery(); + glQueryCounter(query, GL_TIMESTAMP); + this.queries[slot] = query; + + } + + public void download(TimingDataConsumer consumer) { + var queries = Arrays.copyOf(this.queries, this.index); + var metadata = Arrays.copyOf(this.metadata, this.index); + this.index = 0; + this.INFLIGHT.enqueue(new InflightRequest(queries, metadata, consumer)); + } + + public void tick() { + while (!INFLIGHT.isEmpty()) { + if (INFLIGHT.first().callbackIfReady(POOL)) { + INFLIGHT.dequeue(); + } else { + break; + } + } + } + + private int getQuery() { + if (POOL.isEmpty()) { + return glGenQueries(); + } else { + return POOL.dequeueInt(); + } + } + + @Override + public void free() { + super.free0(); + while (!POOL.isEmpty()) { + glDeleteQueries(POOL.dequeueInt()); + } + while (!INFLIGHT.isEmpty()) { + glDeleteQueries(INFLIGHT.dequeue().queries); + } + } + } + /* + private static final class GlTimestampQuerySet extends TrackedObject { + private final int query = glGenQueries(); + public final GlBuffer store; + public final int[] metadata; + public int index; + public GlTimestampQuerySet(int maxCount) { + this.store = new GlBuffer(maxCount*8L); + this.metadata = new int[maxCount]; + } + + public void capture(int metadata) { + if (this.index>this.metadata.length) { + throw new IllegalStateException(); + } + int slot = this.index++; + this.metadata[slot] = metadata; + glQueryCounter(this.query, GL_TIMESTAMP);//This should be gpu side, so should be fast + glFinish(); + glGetQueryBufferObjectui64v(this.query, this.store.id, GL_QUERY_RESULT_NO_WAIT, slot*8L); + glMemoryBarrier(-1); + } + + public void download(TimingDataConsumer consumer) { + var meta = Arrays.copyOf(this.metadata, this.index); + this.index = 0; + //DownloadStream.INSTANCE.download(this.store, buffer->consumer.accept(meta, buffer)); + } + + @Override + public void free() { + super.free0(); + glDeleteQueries(this.query); + this.store.free(); + } + }*/ +}