This commit is contained in:
mcrcortex
2025-05-07 09:48:21 +10:00
parent ec866fa1b8
commit ff4c1b267e
9 changed files with 23 additions and 399 deletions

View File

@@ -166,7 +166,7 @@ public class RenderService<T extends AbstractSectionRenderer<J, ?>, J extends Vi
} }
this.traversal.doTraversal(viewport, depthBuffer); this.traversal.doTraversal(viewport, depthBuffer);
this.sectionRenderer.buildDrawCalls(viewport, this.traversal.getRenderListBuffer()); this.sectionRenderer.buildDrawCalls(viewport);
this.sectionRenderer.renderTemporal(depthBoundTexture); this.sectionRenderer.renderTemporal(depthBoundTexture);
} }

View File

@@ -1,5 +1,6 @@
package me.cortex.voxy.client.core.rendering; package me.cortex.voxy.client.core.rendering;
import me.cortex.voxy.client.core.gl.GlBuffer;
import net.minecraft.util.math.MathHelper; import net.minecraft.util.math.MathHelper;
import org.joml.*; import org.joml.*;
@@ -90,4 +91,6 @@ public abstract class Viewport <A extends Viewport<A>> {
return (A) this; return (A) this;
} }
public abstract GlBuffer getRenderList();
} }

View File

@@ -43,13 +43,12 @@ public class HierarchicalOcclusionTraverser {
private final GlBuffer nodeBuffer; private final GlBuffer nodeBuffer;
private final GlBuffer uniformBuffer = new GlBuffer(1024).zero(); private final GlBuffer uniformBuffer = new GlBuffer(1024).zero();
private final GlBuffer renderList = new GlBuffer(MAX_QUEUE_SIZE * 4 + 4).zero();//MAX_QUEUE_SIZE sections max to render, TODO: Maybe move to render service or somewhere else
private final GlBuffer statisticsBuffer = new GlBuffer(1024).zero(); private final GlBuffer statisticsBuffer = new GlBuffer(1024).zero();
private int topNodeCount; private int topNodeCount;
private final Int2IntOpenHashMap topNode2idxMapping = new Int2IntOpenHashMap();//Used to store mapping from TLN to array index private final Int2IntOpenHashMap topNode2idxMapping = new Int2IntOpenHashMap();//Used to store mapping from TLN to array index
private final int[] idx2topNodeMapping = new int[100_000];//Used to map idx to TLN id private final int[] idx2topNodeMapping = new int[MAX_QUEUE_SIZE];//Used to map idx to TLN id
private final GlBuffer topNodeIds = new GlBuffer(MAX_QUEUE_SIZE*4).zero(); private final GlBuffer topNodeIds = new GlBuffer(MAX_QUEUE_SIZE*4).zero();
private final GlBuffer queueMetaBuffer = new GlBuffer(4*4*MAX_ITERATIONS).zero(); private final GlBuffer queueMetaBuffer = new GlBuffer(4*4*MAX_ITERATIONS).zero();
private final GlBuffer scratchQueueA = new GlBuffer(MAX_QUEUE_SIZE*4).zero(); private final GlBuffer scratchQueueA = new GlBuffer(MAX_QUEUE_SIZE*4).zero();
@@ -114,7 +113,6 @@ public class HierarchicalOcclusionTraverser {
this.traversal this.traversal
.ubo("SCENE_UNIFORM_BINDING", this.uniformBuffer) .ubo("SCENE_UNIFORM_BINDING", this.uniformBuffer)
.ssbo("REQUEST_QUEUE_BINDING", this.requestBuffer) .ssbo("REQUEST_QUEUE_BINDING", this.requestBuffer)
.ssbo("RENDER_QUEUE_BINDING", this.renderList)
.ssbo("NODE_DATA_BINDING", this.nodeBuffer) .ssbo("NODE_DATA_BINDING", this.nodeBuffer)
.ssbo("NODE_QUEUE_META_BINDING", this.queueMetaBuffer) .ssbo("NODE_QUEUE_META_BINDING", this.queueMetaBuffer)
.ssbo("RENDER_TRACKER_BINDING", this.nodeCleaner.visibilityBuffer) .ssbo("RENDER_TRACKER_BINDING", this.nodeCleaner.visibilityBuffer)
@@ -183,7 +181,7 @@ public class HierarchicalOcclusionTraverser {
setFrustum(viewport, ptr); ptr += 4*4*6; setFrustum(viewport, ptr); ptr += 4*4*6;
MemoryUtil.memPutInt(ptr, (int) (this.renderList.size()/4-1)); ptr += 4; MemoryUtil.memPutInt(ptr, (int) (viewport.getRenderList().size()/4-1)); ptr += 4;
final float screenspaceAreaDecreasingSize = VoxyConfig.CONFIG.subDivisionSize*VoxyConfig.CONFIG.subDivisionSize; final float screenspaceAreaDecreasingSize = VoxyConfig.CONFIG.subDivisionSize*VoxyConfig.CONFIG.subDivisionSize;
@@ -194,12 +192,13 @@ public class HierarchicalOcclusionTraverser {
MemoryUtil.memPutInt(ptr, this.nodeCleaner.visibilityId); ptr += 4; MemoryUtil.memPutInt(ptr, this.nodeCleaner.visibilityId); ptr += 4;
} }
private void bindings() { private void bindings(Viewport<?> viewport) {
glBindBuffer(GL_DISPATCH_INDIRECT_BUFFER, this.queueMetaBuffer.id); glBindBuffer(GL_DISPATCH_INDIRECT_BUFFER, this.queueMetaBuffer.id);
//Bind the hiz buffer //Bind the hiz buffer
glBindTextureUnit(0, this.hiZBuffer.getHizTextureId()); glBindTextureUnit(0, this.hiZBuffer.getHizTextureId());
glBindSampler(0, this.hizSampler); glBindSampler(0, this.hizSampler);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, RENDER_QUEUE_BINDING, viewport.getRenderList().id);
} }
public void doTraversal(Viewport<?> viewport, int depthBuffer) { public void doTraversal(Viewport<?> viewport, int depthBuffer) {
@@ -210,13 +209,17 @@ public class HierarchicalOcclusionTraverser {
//UploadStream.INSTANCE.commit(); //Done inside traversal //UploadStream.INSTANCE.commit(); //Done inside traversal
this.traversal.bind(); this.traversal.bind();
this.bindings(); this.bindings(viewport);
PrintfDebugUtil.bind(); PrintfDebugUtil.bind();
if (RenderStatistics.enabled) { if (RenderStatistics.enabled) {
this.statisticsBuffer.zero(); this.statisticsBuffer.zero();
} }
//Clear the render output counter
nglClearNamedBufferSubData(viewport.getRenderList().id, GL_R32UI, 0, 4, GL_RED_INTEGER, GL_UNSIGNED_INT, 0);
//Traverse
this.traverseInternal(); this.traverseInternal();
this.downloadResetRequestQueue(); this.downloadResetRequestQueue();
@@ -248,10 +251,6 @@ public class HierarchicalOcclusionTraverser {
glPixelStorei(GL_UNPACK_SKIP_IMAGES, 0); glPixelStorei(GL_UNPACK_SKIP_IMAGES, 0);
} }
//Clear the render output counter
nglClearNamedBufferSubData(this.renderList.id, GL_R32UI, 0, 4, GL_RED_INTEGER, GL_UNSIGNED_INT, 0);
int firstDispatchSize = (this.topNodeCount+(1<<LOCAL_WORK_SIZE_BITS)-1)>>LOCAL_WORK_SIZE_BITS; int firstDispatchSize = (this.topNodeCount+(1<<LOCAL_WORK_SIZE_BITS)-1)>>LOCAL_WORK_SIZE_BITS;
/* /*
//prime the queue Todo: maybe move after the traversal? cause then it is more efficient work since it doesnt need to wait for this before starting? //prime the queue Todo: maybe move after the traversal? cause then it is more efficient work since it doesnt need to wait for this before starting?
@@ -309,10 +308,6 @@ public class HierarchicalOcclusionTraverser {
nglClearNamedBufferSubData(this.requestBuffer.id, GL_R32UI, 0, 4, GL_RED_INTEGER, GL_UNSIGNED_INT, 0); nglClearNamedBufferSubData(this.requestBuffer.id, GL_R32UI, 0, 4, GL_RED_INTEGER, GL_UNSIGNED_INT, 0);
} }
public GlBuffer getRenderListBuffer() {
return this.renderList;
}
private void forwardDownloadResult(long ptr, long size) { private void forwardDownloadResult(long ptr, long size) {
int count = MemoryUtil.memGetInt(ptr);ptr += 8;//its 8 since we need to skip the second value (which is empty) int count = MemoryUtil.memGetInt(ptr);ptr += 8;//its 8 since we need to skip the second value (which is empty)
if (count < 0 || count > 50000) { if (count < 0 || count > 50000) {
@@ -352,7 +347,6 @@ public class HierarchicalOcclusionTraverser {
this.nodeBuffer.free(); this.nodeBuffer.free();
this.uniformBuffer.free(); this.uniformBuffer.free();
this.statisticsBuffer.free(); this.statisticsBuffer.free();
this.renderList.free();
this.queueMetaBuffer.free(); this.queueMetaBuffer.free();
this.topNodeIds.free(); this.topNodeIds.free();
this.scratchQueueA.free(); this.scratchQueueA.free();

View File

@@ -18,7 +18,7 @@ public abstract class AbstractSectionRenderer <T extends Viewport<T>, J extends
} }
public abstract void renderOpaque(T viewport, GlTexture depthBoundTexture); public abstract void renderOpaque(T viewport, GlTexture depthBoundTexture);
public abstract void buildDrawCalls(T viewport, GlBuffer sectionRenderList); public abstract void buildDrawCalls(T viewport);
public abstract void renderTemporal(GlTexture depthBoundTexture); public abstract void renderTemporal(GlTexture depthBoundTexture);
public abstract void renderTranslucent(T viewport, GlTexture depthBoundTexture); public abstract void renderTranslucent(T viewport, GlTexture depthBoundTexture);
public abstract T createViewport(); public abstract T createViewport();

View File

@@ -1,10 +0,0 @@
package me.cortex.voxy.client.core.rendering.section;
import me.cortex.voxy.client.core.rendering.Viewport;
public class BasicViewport extends Viewport<BasicViewport> {
@Override
protected void delete0() {
}
}

View File

@@ -172,24 +172,18 @@ public class MDICSectionRenderer extends AbstractSectionRenderer<MDICViewport, B
} }
@Override @Override
public void buildDrawCalls(MDICViewport viewport, GlBuffer sectionRenderList) { public void buildDrawCalls(MDICViewport viewport) {
if (this.geometryManager.getSectionCount() == 0) return; if (this.geometryManager.getSectionCount() == 0) return;
this.uploadUniformBuffer(viewport); this.uploadUniformBuffer(viewport);
//Can do a sneeky trick, since the sectionRenderList is a list to things to render, it invokes the culler //Can do a sneeky trick, since the sectionRenderList is a list to things to render, it invokes the culler
// which only marks visible sections // which only marks visible sections
//TODO: dont do a copy
// make it so that the viewport contains the original indirectLookupBuffer list!!!
// that way dont need to copy the array
glCopyNamedBufferSubData(sectionRenderList.id, viewport.indirectLookupBuffer.id, 0, 0, sectionRenderList.size());
{//Dispatch prep {//Dispatch prep
this.prepShader.bind(); this.prepShader.bind();
glBindBufferBase(GL_UNIFORM_BUFFER, 0, this.uniform.id); glBindBufferBase(GL_UNIFORM_BUFFER, 0, this.uniform.id);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, this.drawCountCallBuffer.id); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, this.drawCountCallBuffer.id);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, sectionRenderList.id); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, viewport.getRenderList().id);
glDispatchCompute(1,1,1); glDispatchCompute(1,1,1);
glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT); glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
} }

View File

@@ -2,9 +2,10 @@ package me.cortex.voxy.client.core.rendering.section;
import me.cortex.voxy.client.core.gl.GlBuffer; import me.cortex.voxy.client.core.gl.GlBuffer;
import me.cortex.voxy.client.core.rendering.Viewport; import me.cortex.voxy.client.core.rendering.Viewport;
import me.cortex.voxy.client.core.rendering.hierachical.HierarchicalOcclusionTraverser;
public class MDICViewport extends Viewport<MDICViewport> { public class MDICViewport extends Viewport<MDICViewport> {
public final GlBuffer indirectLookupBuffer = new GlBuffer(100_000*4+4); public final GlBuffer indirectLookupBuffer = new GlBuffer(HierarchicalOcclusionTraverser.MAX_QUEUE_SIZE *4+4);
public final GlBuffer visibilityBuffer; public final GlBuffer visibilityBuffer;
public MDICViewport(int maxSectionCount) { public MDICViewport(int maxSectionCount) {
@@ -16,4 +17,9 @@ public class MDICViewport extends Viewport<MDICViewport> {
this.visibilityBuffer.free(); this.visibilityBuffer.free();
this.indirectLookupBuffer.free(); this.indirectLookupBuffer.free();
} }
@Override
public GlBuffer getRenderList() {
return this.indirectLookupBuffer;
}
} }

View File

@@ -1,258 +0,0 @@
package me.cortex.voxy.client.core.util;
import java.util.Arrays;
import java.util.BitSet;
import java.util.Random;
//TODO: redo this so that it works as you are inserting data into it maybe? since it should be much faster??
public final class Mesher2D {
private static final int MAX_MERGED_SIZE = 15;//16
private static final int SIZE_BITS = 5;
private static final int MSK = (1<<SIZE_BITS) -1;
private final long[] data;
private final long[] setset;
private int[] quadCache;
private boolean isEmpty = true;
private int setsMsk = 0;
public Mesher2D() {
this.data = new long[1<<(SIZE_BITS<<1)];
this.setset = new long[(1<<(SIZE_BITS<<1))>>6];
this.quadCache = new int[128];
}
private static int getIdx(int x, int z) {
return ((z&MSK)<<SIZE_BITS)|(x&MSK);
}
public Mesher2D put(int x, int z, long data) {
this.isEmpty = false;
int idx = getIdx(x, z);
this.data[idx] = data;
this.setset[idx>>6] |= 1L<<(idx&0b111111);
this.setsMsk |= 1<<(idx>>6);
return this;
}
public static int getX(int data) {
return (data>>24)&0xFF;
}
public static int getZ(int data) {
return (data>>16)&0xFF;
}
public static int getH(int data) {
return (data>>8)&0xFF;
}
public static int getW(int data) {
return data&0xFF;
}
//
private static int encodeQuad(int x, int z, int sx, int sz) {
return ((x&0xFF)<<24)|((z&0xFF)<<16)|((sx&0xFF)<<8)|((sz&0xFF)<<0);
}
private boolean canMerge(int x, int z, long match) {
int id = getIdx(x, z);
return (this.setset[id>>6]&(1L<<(id&0b111111))) != 0 && this.data[id] == match;
}
private int nextSetBit(int base) {
int wPos = Integer.numberOfTrailingZeros(this.setsMsk>>>(base>>6))+(base>>6);
while (wPos != 16) {
long word = this.setset[wPos++];
if (word != 0) {
return Long.numberOfTrailingZeros(word) + ((wPos-1)<<6);
}
}
return -1;
}
//Returns the number of compacted quads
public int process() {
if (this.isEmpty) {
return 0;
}
int[] quads = this.quadCache;
int idxCount = 0;
int counter = 0;
//TODO: add different strategies/ways to mesh
int posId = this.data[0] == 0?this.nextSetBit(0):0;
while (posId < this.data.length && posId != -1) {
int idx = posId;
long data = this.data[idx];
int x = idx&MSK;
int z = (idx>>>SIZE_BITS)&MSK;
boolean ex = x != MSK;
boolean ez = z != MSK;
int endX = x;
int endZ = z;
while (ex || ez) {
//Expand in the x direction
if (ex) {
if (endX - x >= MAX_MERGED_SIZE || endX >= MSK) {
ex = false;
}
}
if (ex) {
for (int tz = z; tz < endZ+1; tz++) {
if (!this.canMerge(endX + 1, tz, data)) {
ex = false;
break;
}
}
}
if (ex) {
endX++;
}
if (ez) {
if (endZ - z >= SIZE_BITS || endZ >= MSK) {
ez = false;
}
}
if (ez) {
for (int tx = x; tx < endX+1; tx++) {
if (!this.canMerge(tx, endZ + 1, data)) {
ez = false;
break;
}
}
}
if (ez) {
endZ++;
}
}
//Mark the sections as meshed
for (int mx = x; mx <= endX; mx++) {
for (int mz = z; mz <= endZ; mz++) {
int cid = getIdx(mx, mz);
this.setset[cid>>6] &= ~(1L<<(cid&0b111111));
}
}
int encodedQuad = encodeQuad(x, z, endX - x + 1, endZ - z + 1);
{
counter++;
int pIdx = idxCount;
idxCount += 3;
if (quads.length <= idxCount+3) {
var newArray = new int[quads.length + 64*3];
System.arraycopy(quads, 0, newArray, 0, quads.length);
quads = newArray;
}
quads[pIdx] = encodedQuad;
quads[pIdx+1] = (int) data;
quads[pIdx+2] = (int) (data>>32);
}
posId = this.nextSetBit(posId);
}
this.quadCache = quads;
return counter;
}
public int[] getArray() {
return this.quadCache;
}
public void reset() {
if (!this.isEmpty) {
this.isEmpty = true;
this.setsMsk = 0;
Arrays.fill(this.setset, 0);
Arrays.fill(this.data, 0);
}
}
public static void main3(String[] args) {
var mesh = new Mesher2D();
mesh.put(30,30, 123);
mesh.put(31,30, 123);
mesh.put(30,31, 123);
mesh.put(31,31, 123);
int count = mesh.process();
System.err.println(count);
}
public static void main2(String[] args) {
var r = new Random(123451);
var mesh = new Mesher2D();
/*
for (int j = 0; j < 512; j++) {
mesh.put(r.nextInt(32), r.nextInt(32), r.nextInt(10));
}
*/
int cnt = 0;
for (int i = 0; i < 12000; i++) {
for (int j = 0; j < 512; j++) {
mesh.put(r.nextInt(32), r.nextInt(32), r.nextInt(32));
}
cnt += mesh.process();
mesh.reset();
}
cnt = 0;
long start = System.currentTimeMillis();
for (int i = 0; i < 1000000; i++) {
for (int x = 0; x < 16; x++) {
for (int z = 0; z < 15; z++) {
mesh.put(x, z, 134);
}
}
mesh.put(31, 31, 134);
cnt += mesh.process();
mesh.reset();
}
System.err.println(cnt);
System.err.println(System.currentTimeMillis()-start);
var dat = mesh.getArray();
//for (int i = 0; i < cnt; i++) {
// var q = dat[i];
// System.err.println("X: " + getX(q) + " Z: " + getZ(q) + " W: " + getW(q) + " H: " + getH(q));
//}
}
public static void main(String[] args) {
var r = new Random(123451);
int a = 0;
//Prime code
for (int i = 0; i < 100000; i++) {
var mesh = new Mesher2D();
for (int j = 0; j < 512; j++) {
mesh.put(r.nextInt(32), r.nextInt(32), r.nextInt(100));
}
var result = mesh.process();
a += result;
}
long total = 0;
int COUNT = 200000;
for (int i = 0; i < COUNT; i++) {
var mesh = new Mesher2D();
for (int j = 0; j < 512; j++) {
mesh.put(r.nextInt(32), r.nextInt(32), r.nextInt(100));
}
long s = System.nanoTime();
var result = mesh.process();
total += System.nanoTime() - s;
a += result;
}
System.out.println(((double) total/COUNT)*(1e-6));
}
}

View File

@@ -183,111 +183,6 @@ public abstract class ScanMesher2D {
} }
} }
public static void main5(String[] args) {
var r = new Random(0);
long[] data = new long[32*32];
float DENSITY = 0.5f;
int RANGE = 50;
for (int i = 0; i < data.length; i++) {
data[i] = r.nextFloat()<DENSITY?(r.nextInt(RANGE)+1):0;
}
int[] qc = new int[2];
var mesher = new ScanMesher2D(){
@Override
protected void emitQuad(int x, int z, int length, int width, long data) {
qc[0]++;
qc[1]+=length*width;
}
};
for (int i = 0; i < 500000; i++) {
for (long v : data) {
mesher.putNext(v);
}
mesher.finish();
}
var m2 = new Mesher2D();
for (int i = 0; i < 500000; i++) {
int j = 0;
m2.reset();
for (long v : data) {
if (v!=0)
m2.put(j&31, j>>5, v);
j++;
}
m2.process();
}
long t = System.nanoTime();
for (int i = 0; i < 1000000; i++) {
for (long v : data) {
mesher.putNext(v);
}
mesher.finish();
}
long delta = System.nanoTime()-t;
System.out.println(delta*1e-6);
t = System.nanoTime();
for (int i = 0; i < 1000000; i++) {
int j = 0;
m2.reset();
for (long v : data) {
if (v!=0)
m2.put(j&31, j>>5, v);
j++;
}
m2.process();
}
delta = System.nanoTime()-t;
System.out.println(delta*1e-6);
}
public static void main4(String[] args) {
var r = new Random(0);
int[] qc = new int[2];
var mesher = new ScanMesher2D(){
@Override
protected void emitQuad(int x, int z, int length, int width, long data) {
qc[0]++;
qc[1]+=length*width;
}
};
var mesh2 = new Mesher2D();
float DENSITY = 0.75f;
int RANGE = 25;
int total = 0;
while (true) {
//DENSITY = r.nextFloat();
//RANGE = r.nextInt(500)+1;
qc[0] = 0; qc[1] = 0;
int c = 0;
for (int i = 0; i < 32*32; i++) {
long val = r.nextFloat()<DENSITY?(r.nextInt(RANGE)+1):0;
c += val==0?0:1;
mesher.putNext(val);
if (val != 0) {
mesh2.put(i&31, i>>5, val);
}
}
mesher.finish();
if (c != qc[1]) {
System.out.println("ERROR: "+c+", " + qc[1]);
}
int count = mesh2.process();
int delta = count - qc[0];
total += delta;
//System.out.println(total);
//System.out.println(c+", new: " + qc[0] + " old: " + count);
}
}
public static void main2(String[] args) { public static void main2(String[] args) {
long[] sample = new long[32*32]; long[] sample = new long[32*32];