Major tinkering with improving frametimes

This commit is contained in:
mcrcortex
2025-04-28 21:23:19 +10:00
parent d91d41de79
commit 7a88e1f893
10 changed files with 159 additions and 69 deletions

View File

@@ -4,7 +4,7 @@ import java.lang.invoke.VarHandle;
import java.util.ArrayList;
public class TimingStatistics {
public static double ROLLING_WEIGHT = 0.95;
public static double ROLLING_WEIGHT = 0.975;
private static final ArrayList<TimeSampler> allSamplers = new ArrayList<>();
public static final class TimeSampler {
private boolean running;
@@ -70,10 +70,12 @@ public class TimingStatistics {
TimingStatistics.allSamplers.forEach(TimeSampler::update);
}
public static TimeSampler all = new TimeSampler();
public static TimeSampler setup = new TimeSampler();
public static TimeSampler main = new TimeSampler();
public static TimeSampler dynamic = new TimeSampler();
public static void update() {
updateSamplers();
}

View File

@@ -2,8 +2,12 @@ package me.cortex.voxy.client.core.model;
public class IdNotYetComputedException extends RuntimeException {
public final int id;
public IdNotYetComputedException(int id) {
public final boolean isIdBlockId;
public int auxBitMsk;
public long[] auxData;
public IdNotYetComputedException(int id, boolean isIdBlockId) {
super(null, null, false, false);
this.id = id;
this.isIdBlockId = isIdBlockId;
}
}

View File

@@ -2,6 +2,7 @@ package me.cortex.voxy.client.core.model;
import it.unimi.dsi.fastutil.ints.IntLinkedOpenHashSet;
import me.cortex.voxy.client.TimingStatistics;
import me.cortex.voxy.client.core.gl.GlFramebuffer;
import me.cortex.voxy.client.core.rendering.building.BuiltSection;
import me.cortex.voxy.client.core.rendering.util.RawDownloadStream;
@@ -25,15 +26,13 @@ public class ModelBakerySubsystem {
//Redo to just make it request the block faces with the async texture download stream which
// basicly solves all the render stutter due to the baking
private final RawDownloadStream textureDownStream = new RawDownloadStream(8*1024*1024);//8mb downstream
private final ModelStore storage = new ModelStore();
public final ModelFactory factory;
private final IntLinkedOpenHashSet blockIdQueue = new IntLinkedOpenHashSet();
private final ConcurrentLinkedDeque<Integer> blockIdQueue = new ConcurrentLinkedDeque<>();//TODO: replace with custom DS
private final ConcurrentLinkedDeque<Mapper.BiomeEntry> biomeQueue = new ConcurrentLinkedDeque<>();
public ModelBakerySubsystem(Mapper mapper) {
this.factory = new ModelFactory(mapper, this.storage, this.textureDownStream);
this.factory = new ModelFactory(mapper, this.storage);
}
public void tick() {
@@ -45,6 +44,7 @@ public class ModelBakerySubsystem {
}
/*
//There should be a method to access the frame time IIRC, if the user framecap is unlimited lock it to like 60 fps for computation
int BUDGET = 16;//TODO: make this computed based on the remaining free time in a frame (and like div by 2 to reduce overhead) (with a min of 1)
if (!this.blockIdQueue.isEmpty()) {
@@ -64,27 +64,39 @@ public class ModelBakerySubsystem {
for (int j = 0; j < i; j++) {
this.factory.addEntry(est[j]);
}
}*/
long totalBudget = 2_000_000;
//TimingStatistics.modelProcess.start();
long start = System.nanoTime();
VarHandle.fullFence();
{
long budget = Math.min(totalBudget-200_000, totalBudget-(this.factory.resultJobs.size()*20_000L))-200_000;
if (budget > 50_000) {
Integer i = this.blockIdQueue.poll();
while (i != null && (System.nanoTime() - start < budget)) {
this.factory.addEntry(i);
i = this.blockIdQueue.poll();
}
}
}
//Submit is effectively free if nothing is submitted
this.textureDownStream.submit();
this.factory.tick();
//Tick the download stream
this.textureDownStream.tick();
while (!this.factory.resultJobs.isEmpty()) {
this.factory.resultJobs.poll().run();
if (totalBudget<(System.nanoTime()-start))
break;
}
//TimingStatistics.modelProcess.stop();
}
public void shutdown() {
this.factory.free();
this.storage.free();
this.textureDownStream.free();
}
public void requestBlockBake(int blockId) {
synchronized (this.blockIdQueue) {
if (this.blockIdQueue.add(blockId)) {
VarHandle.fullFence();//Ensure memory coherancy
}
}
this.blockIdQueue.add(blockId);
}
public void addBiome(Mapper.BiomeEntry biomeEntry) {
@@ -92,7 +104,7 @@ public class ModelBakerySubsystem {
}
public void addDebugData(List<String> debug) {
debug.add("MQ/IF/MC: " + this.blockIdQueue.size() + "/" + this.factory.getInflightCount() + "/" + this.factory.getBakedCount());//Model bake queue/in flight/model baked count
debug.add(String.format("MQ/IF/MC: %04d, %03d, %04d", this.blockIdQueue.size(), this.factory.getInflightCount(), this.factory.getBakedCount()));//Model bake queue/in flight/model baked count
}
public ModelStore getStore() {
@@ -102,4 +114,8 @@ public class ModelBakerySubsystem {
public boolean areQueuesEmpty() {
return this.blockIdQueue.isEmpty() && this.factory.getInflightCount() == 0 && this.biomeQueue.isEmpty();
}
public int getProcessingCount() {
return this.blockIdQueue.size() + this.factory.getInflightCount();
}
}

View File

@@ -115,15 +115,16 @@ public class ModelFactory {
private final Mapper mapper;
private final ModelStore storage;
private final RawDownloadStream downstream;
private final RawDownloadStream downstream = new RawDownloadStream(8*1024*1024);//8mb downstream
public final Deque<Runnable> resultJobs = new ArrayDeque<>();
//TODO: NOTE!!! is it worth even uploading as a 16x16 texture, since automatic lod selection... doing 8x8 textures might be perfectly ok!!!
// this _quarters_ the memory requirements for the texture atlas!!! WHICH IS HUGE saving
public ModelFactory(Mapper mapper, ModelStore storage, RawDownloadStream downstream) {
public ModelFactory(Mapper mapper, ModelStore storage) {
this.mapper = mapper;
this.storage = storage;
this.downstream = downstream;
this.bakery = new ModelTextureBakery(MODEL_TEXTURE_SIZE, MODEL_TEXTURE_SIZE);
this.metadataCache = new long[1<<16];
@@ -137,7 +138,9 @@ public class ModelFactory {
}
public void tick() {
this.downstream.tick();
}
public boolean addEntry(int blockId) {
if (this.idMappings[blockId] != -1) {
@@ -172,7 +175,7 @@ public class ModelFactory {
}
int TOTAL_FACES_TEXTURE_SIZE = MODEL_TEXTURE_SIZE*MODEL_TEXTURE_SIZE*2*4*6;// since both depth and colour are packed together, 6 faces, 4 bytes per pixel
int allocation = this.downstream.download(TOTAL_FACES_TEXTURE_SIZE, ptr->{
int allocation = this.downstream.download(TOTAL_FACES_TEXTURE_SIZE, ptr -> {
ColourDepthTextureData[] textureData = new ColourDepthTextureData[6];
final int FACE_SIZE = MODEL_TEXTURE_SIZE*MODEL_TEXTURE_SIZE;
for (int face = 0; face < 6; face++) {
@@ -189,18 +192,14 @@ public class ModelFactory {
textureData[face] = new ColourDepthTextureData(colour, depth, MODEL_TEXTURE_SIZE, MODEL_TEXTURE_SIZE);
}
processTextureBakeResult(blockId, blockState, textureData);
this.resultJobs.add(()->processTextureBakeResult(blockId, blockState, textureData));
});
this.bakery.renderFacesToStream(blockState, 123456, isFluid, this.downstream.getBufferId(), allocation);
return true;
}
//TODO: what i need to do is seperate out fluid states from blockStates
//TODO: so need a few things, per face sizes and offsets, the sizes should be computed from the pixels and find the minimum bounding pixel
// while the depth is computed from the depth buffer data
//This is
private void processTextureBakeResult(int blockId, BlockState blockState, ColourDepthTextureData[] textureData) {
if (this.idMappings[blockId] != -1) {
@@ -634,7 +633,7 @@ public class ModelFactory {
public int getModelId(int blockId) {
int map = this.idMappings[blockId];
if (map == -1) {
throw new IdNotYetComputedException(blockId);
throw new IdNotYetComputedException(blockId, true);
}
return map;
}
@@ -646,7 +645,7 @@ public class ModelFactory {
public int getFluidClientStateId(int clientBlockStateId) {
int map = this.fluidStateLUT[clientBlockStateId];
if (map == -1) {
throw new IdNotYetComputedException(clientBlockStateId);
throw new IdNotYetComputedException(clientBlockStateId, false);
}
return map;
}
@@ -691,6 +690,7 @@ public class ModelFactory {
}
public void free() {
this.downstream.free();
this.bakery.free();
}

View File

@@ -79,7 +79,7 @@ public class RenderService<T extends AbstractSectionRenderer<J, ?>, J extends Vi
this.viewportSelector = new ViewportSelector<>(this.sectionRenderer::createViewport);
this.renderGen = new RenderGenerationService(world, this.modelService, serviceThreadPool,
this.geometryUpdateQueue::push, this.sectionRenderer.getGeometryManager() instanceof IUsesMeshlets,
()->this.geometryUpdateQueue.count()<2000);
()->this.geometryUpdateQueue.count()<1000 && this.modelService.getProcessingCount()< 750);
router.setCallbacks(this.renderGen::enqueueTask, section -> {
section.acquire();
@@ -132,26 +132,36 @@ public class RenderService<T extends AbstractSectionRenderer<J, ?>, J extends Vi
{
TimingStatistics.main.stop();
TimingStatistics.dynamic.start();
long start = System.nanoTime();
VarHandle.fullFence();
//Tick download stream
DownloadStream.INSTANCE.tick();
//Tick upload stream (this is ok to do here as upload ticking is just memory management)
UploadStream.INSTANCE.tick();
this.sectionUpdateQueue.consume(128);
//Cap the number of consumed sections per frame to 40 + 2% of the queue size, cap of 200
//int geoUpdateCap = 20;//Math.max(100, Math.min((int)(0.15*this.geometryUpdateQueue.count()), 260));
this.geometryUpdateQueue.consumeMillis(1);
VarHandle.fullFence();
long updateBudget = Math.max(1_000_000-(System.nanoTime()-start), 0);
VarHandle.fullFence();
if (updateBudget > 50_000) {
//Cap the number of consumed sections per frame to 40 + 2% of the queue size, cap of 200
//int geoUpdateCap = 20;//Math.max(100, Math.min((int)(0.15*this.geometryUpdateQueue.count()), 260));
this.geometryUpdateQueue.consumeNano(updateBudget);
}
this.nodeCleaner.tick(this.traversal.getNodeBuffer());//Probably do this here??
if (this.nodeManager.writeChanges(this.traversal.getNodeBuffer())) {//TODO: maybe move the node buffer out of the traversal class
UploadStream.INSTANCE.commit();
}
this.nodeCleaner.tick(this.traversal.getNodeBuffer());//Probably do this here??
//this needs to go after, due to geometry updates committed by the nodeManager
this.sectionRenderer.getGeometryManager().tick();
//Tick upload stream
UploadStream.INSTANCE.tick();
TimingStatistics.dynamic.stop();
TimingStatistics.main.start();
}

View File

@@ -104,6 +104,7 @@ public class VoxyRenderSystem {
downstream.tick();
}*/
TimingStatistics.all.start();
TimingStatistics.setup.start();
this.renderDistanceTracker.setCenterAndProcess(camera.getBlockPos().getX(), camera.getBlockPos().getZ());
@@ -112,6 +113,7 @@ public class VoxyRenderSystem {
PrintfDebugUtil.tick();
TimingStatistics.setup.stop();
TimingStatistics.all.stop();
}
private static Matrix4f makeProjectionMatrix(float near, float far) {
@@ -140,6 +142,7 @@ public class VoxyRenderSystem {
if (IrisUtil.irisShadowActive()) {
return;
}
TimingStatistics.all.start();
TimingStatistics.main.start();
if (false) {
@@ -207,7 +210,7 @@ public class VoxyRenderSystem {
this.postProcessing.renderPost(projection, RenderSystem.getProjectionMatrix(), boundFB);
glBindFramebuffer(GlConst.GL_FRAMEBUFFER, oldFB);
TimingStatistics.main.stop();
TimingStatistics.all.stop();
}
public void addDebugInfo(List<String> debug) {
@@ -215,8 +218,10 @@ public class VoxyRenderSystem {
this.renderer.addDebugData(debug);
{
TimingStatistics.update();
debug.add("Voxy frame runtime (millis): " + TimingStatistics.setup.pVal() + ", " + TimingStatistics.dynamic.pVal() + ", " + TimingStatistics.main.pVal());
debug.add("Voxy frame runtime (millis): " + TimingStatistics.setup.pVal() + ", " + TimingStatistics.dynamic.pVal() + ", " + TimingStatistics.main.pVal()+ ", " + TimingStatistics.all.pVal());
}
int val = RenderGenerationService.FC.getAndSet(0);
debug.add("FC: " + val);
PrintfDebugUtil.addToOut(debug);
}

View File

@@ -1,6 +1,7 @@
package me.cortex.voxy.client.core.rendering.building;
import me.cortex.voxy.client.core.gl.Capabilities;
import me.cortex.voxy.client.core.model.IdNotYetComputedException;
import me.cortex.voxy.client.core.model.ModelFactory;
import me.cortex.voxy.client.core.model.ModelQueries;
import me.cortex.voxy.client.core.util.Mesher2D;
@@ -1230,9 +1231,14 @@ public class RenderDataFactory45 {
int neighborMsk = this.prepareSectionData();
this.acquireNeighborData(section, neighborMsk);
this.generateYZFaces();
this.generateXFaces();
try {
this.generateYZFaces();
this.generateXFaces();
} catch (IdNotYetComputedException e) {
e.auxBitMsk = neighborMsk;
e.auxData = this.neighboringFaces;
throw e;
}
//TODO:NOTE! when doing face culling of translucent blocks,
// if the connecting type of the translucent block is the same AND the face is full, discard it

View File

@@ -14,6 +14,7 @@ import me.cortex.voxy.common.thread.ServiceSlice;
import me.cortex.voxy.common.thread.ServiceThreadPool;
import java.util.List;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.function.BooleanSupplier;
import java.util.function.Consumer;
import java.util.function.Supplier;
@@ -23,7 +24,8 @@ public class RenderGenerationService {
private static final class BuildTask {
WorldSection section;
final long position;
boolean hasDoneModelRequest;
boolean hasDoneModelRequestInner;
boolean hasDoneModelRequestOuter;
private BuildTask(long position) {
this.position = position;
}
@@ -52,8 +54,9 @@ public class RenderGenerationService {
this.threads = serviceThreadPool.createService("Section mesh generation service", 100, ()->{
//Thread local instance of the factory
var factory = new RenderDataFactory45(this.world, this.modelBakery.factory, this.emitMeshlets);
IntOpenHashSet seenMissed = new IntOpenHashSet(128);
return new Pair<>(() -> {
this.processJob(factory);
this.processJob(factory, seenMissed);
}, factory::free);
}, taskLimiter);
}
@@ -61,14 +64,28 @@ public class RenderGenerationService {
//NOTE: the biomes are always fully populated/kept up to date
//Asks the Model system to bake all blocks that currently dont have a model
private void computeAndRequestRequiredModels(WorldSection section, int extraId) {
var raw = section.copyData();//TODO: replace with copyDataTo and use a "thread local"/context array to reduce allocation rates
IntOpenHashSet seen = new IntOpenHashSet(128);
seen.add(extraId);
for (long state : raw) {
private void computeAndRequestRequiredModels(IntOpenHashSet seenMissedIds, int bitMsk, long[] auxData) {
final var factory = this.modelBakery.factory;
for (int i = 0; i < 6; i++) {
if ((bitMsk&(1<<i))==0) continue;
for (int j = 0; j < 32*32; j++) {
int block = Mapper.getBlockId(auxData[j+(i*32*32)]);
if (block != 0 && !factory.hasModelForBlockId(block)) {
if (seenMissedIds.add(block)) {
this.modelBakery.requestBlockBake(block);
}
}
}
}
}
private void computeAndRequestRequiredModels(IntOpenHashSet seenMissedIds, WorldSection section) {
//Know this is... very much not safe, however it reduces allocation rates and other garbage, am sure its "fine"
final var factory = this.modelBakery.factory;
for (long state : section._unsafeGetRawDataArray()) {
int block = Mapper.getBlockId(state);
if (!this.modelBakery.factory.hasModelForBlockId(block)) {
if (seen.add(block)) {
if (block != 0 && !factory.hasModelForBlockId(block)) {
if (seenMissedIds.add(block)) {
this.modelBakery.requestBlockBake(block);
}
}
@@ -79,8 +96,14 @@ public class RenderGenerationService {
return this.world.acquireIfExists(pos);
}
private static boolean putTaskFirst(long pos) {
//Level 3 or 4
return WorldEngine.getLevel(pos) > 2;
}
public static final AtomicInteger FC = new AtomicInteger(0);
//TODO: add a generated render data cache
private void processJob(RenderDataFactory45 factory) {
private void processJob(RenderDataFactory45 factory, IntOpenHashSet seenMissedIds) {
BuildTask task;
synchronized (this.taskQueue) {
task = this.taskQueue.removeFirst();
@@ -105,20 +128,33 @@ public class RenderGenerationService {
try {
mesh = factory.generateMesh(section);
} catch (IdNotYetComputedException e) {
//TODO: maybe move this to _after_ task as been readded to queue??
if (!this.modelBakery.factory.hasModelForBlockId(e.id)) {
this.modelBakery.requestBlockBake(e.id);
if (e.isIdBlockId) {
//TODO: maybe move this to _after_ task as been readded to queue??
if (!this.modelBakery.factory.hasModelForBlockId(e.id)) {
if (seenMissedIds.add(e.id)) {
this.modelBakery.requestBlockBake(e.id);
}
}
}
if (task.hasDoneModelRequest) {
if (task.hasDoneModelRequestInner && task.hasDoneModelRequestOuter) {
FC.addAndGet(1);
try {
Thread.sleep(1);
} catch (InterruptedException ex) {
throw new RuntimeException(ex);
}
} else {
}
if (!task.hasDoneModelRequestInner) {
//The reason for the extra id parameter is that we explicitly add/check against the exception id due to e.g. requesting accross a chunk boarder wont be captured in the request
this.computeAndRequestRequiredModels(section, e.id);
if (e.auxData == null)//the null check this is because for it to be, the inner must already be computed
this.computeAndRequestRequiredModels(seenMissedIds, section);
task.hasDoneModelRequestInner = true;
}
if ((!task.hasDoneModelRequestOuter) && e.auxData != null) {
this.computeAndRequestRequiredModels(seenMissedIds, e.auxBitMsk, e.auxData);
task.hasDoneModelRequestOuter = true;
}
@@ -130,12 +166,14 @@ public class RenderGenerationService {
BuildTask queuedTask;
synchronized (this.taskQueue) {
queuedTask = this.taskQueue.putIfAbsent(section.key, task);
}
if (queuedTask == null) {
queuedTask = task;
}
if (queuedTask == null) {
queuedTask = task;
}
queuedTask.hasDoneModelRequest = true;//Mark (or remark) the section as having chunks requested
if (queuedTask.hasDoneModelRequestInner && queuedTask.hasDoneModelRequestOuter && putTaskFirst(section.key)) {//Force higher priority
this.taskQueue.getAndMoveToFirst(section.key);
}
}
if (queuedTask == task) {//use the == not .equal to see if we need to release a permit
if (this.threads.isAlive()) {//Only execute if were not dead
@@ -145,8 +183,12 @@ public class RenderGenerationService {
//If we did put it in the queue, dont release the section
shouldFreeSection = false;
} else {
//This should no longer be a worry with LRU section cache
//Logger.info("Funkyness happened and multiple tasks for same section where in queue");
//Mark (or remark) the section as having models requested
if (task.hasDoneModelRequestInner)
queuedTask.hasDoneModelRequestInner = true;
if (task.hasDoneModelRequestOuter)
queuedTask.hasDoneModelRequestOuter = true;
//Things went bad, set section to null and ensure section is freed
task.section = null;
@@ -172,7 +214,7 @@ public class RenderGenerationService {
return new BuildTask(key);
});
//Prioritize lower detail builds
if (WorldEngine.getLevel(pos) > 2) {
if (putTaskFirst(pos)) {
this.taskQueue.getAndMoveToFirst(pos);
}
}

View File

@@ -58,6 +58,8 @@ public class RawDownloadStream {
}
public void tick() {
this.submit();
while (!this.frames.isEmpty()) {
//If the first element is not signaled, none of the others will be signaled so break
if (!this.frames.peek().fence.signaled()) {

View File

@@ -1,5 +1,6 @@
package me.cortex.voxy.common.util;
import java.lang.invoke.VarHandle;
import java.util.concurrent.ConcurrentLinkedDeque;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.function.Consumer;
@@ -39,18 +40,20 @@ public class MessageQueue <T> {
return i;
}
public int consumeMillis(int millis) {
public int consumeNano(long budget) {
if (this.count.get() == 0) {
return 0;
}
int i = 0;
VarHandle.fullFence();
long nano = System.nanoTime();
VarHandle.fullFence();
do {
var entry = this.queue.poll();
if (entry == null) break;
i++;
this.consumer.accept(entry);
} while ((System.nanoTime()-nano) < millis*1000_000L);
} while ((System.nanoTime()-nano) < budget);
if (i != 0) {
this.count.addAndGet(-i);
}