diff --git a/src/main/java/me/cortex/voxy/client/RenderStatistics.java b/src/main/java/me/cortex/voxy/client/RenderStatistics.java new file mode 100644 index 00000000..2516e491 --- /dev/null +++ b/src/main/java/me/cortex/voxy/client/RenderStatistics.java @@ -0,0 +1,12 @@ +package me.cortex.voxy.client; + +import me.cortex.voxy.common.world.WorldEngine; + +public class RenderStatistics { + public static boolean enabled = true; + + public static final int[] hierarchicalTraversalCounts = new int[WorldEngine.MAX_LOD_LAYER+1]; + public static final int[] hierarchicalRenderSections = new int[WorldEngine.MAX_LOD_LAYER+1]; + public static final int[] visibleSections = new int[WorldEngine.MAX_LOD_LAYER+1]; + public static int renderedQuadCount = 0; +} diff --git a/src/main/java/me/cortex/voxy/client/core/gl/shader/AutoBindingShader.java b/src/main/java/me/cortex/voxy/client/core/gl/shader/AutoBindingShader.java index 2085a38d..753d0294 100644 --- a/src/main/java/me/cortex/voxy/client/core/gl/shader/AutoBindingShader.java +++ b/src/main/java/me/cortex/voxy/client/core/gl/shader/AutoBindingShader.java @@ -39,6 +39,13 @@ public class AutoBindingShader extends Shader { return GlDebug.name(name, this); } + public AutoBindingShader ssboIf(String define, GlBuffer buffer) { + if (this.defines.containsKey(define)) { + return this.ssbo(define, buffer); + } + return this; + } + public AutoBindingShader ssbo(int index, GlBuffer binding) { return this.ssbo(index, binding, 0); } @@ -52,6 +59,11 @@ public class AutoBindingShader extends Shader { return this; } + + public AutoBindingShader ubo(String define, GlBuffer buffer) { + return this.ubo(Integer.parseInt(this.defines.get(define)), buffer); + } + public AutoBindingShader ubo(int index, GlBuffer buffer) { return this.ubo(index, buffer, 0); } diff --git a/src/main/java/me/cortex/voxy/client/core/gl/shader/Shader.java b/src/main/java/me/cortex/voxy/client/core/gl/shader/Shader.java index 7fc51216..47a59b78 100644 --- a/src/main/java/me/cortex/voxy/client/core/gl/shader/Shader.java +++ b/src/main/java/me/cortex/voxy/client/core/gl/shader/Shader.java @@ -84,6 +84,13 @@ public class Shader extends TrackedObject { return this; } + public Builder defineIf(String name, boolean condition, int value) { + if (condition) { + this.defines.put(name, Integer.toString(value)); + } + return this; + } + public Builder define(String name, int value) { this.defines.put(name, Integer.toString(value)); return this; diff --git a/src/main/java/me/cortex/voxy/client/core/rendering/RenderService.java b/src/main/java/me/cortex/voxy/client/core/rendering/RenderService.java index d5cecc50..3fd80000 100644 --- a/src/main/java/me/cortex/voxy/client/core/rendering/RenderService.java +++ b/src/main/java/me/cortex/voxy/client/core/rendering/RenderService.java @@ -1,6 +1,7 @@ package me.cortex.voxy.client.core.rendering; import io.netty.util.internal.MathUtil; +import me.cortex.voxy.client.RenderStatistics; import me.cortex.voxy.client.core.gl.Capabilities; import me.cortex.voxy.client.core.model.ModelBakerySubsystem; import me.cortex.voxy.client.core.model.ModelStore; @@ -23,6 +24,7 @@ import net.minecraft.client.render.Camera; import java.util.Arrays; import java.util.List; +import java.util.stream.Collectors; import static org.lwjgl.opengl.GL42.*; @@ -157,6 +159,20 @@ public class RenderService, J extends Vi this.renderGen.addDebugData(debug); this.sectionRenderer.addDebug(debug); this.nodeManager.addDebug(debug); + + if (RenderStatistics.enabled) { + debug.add("HTC: [" + Arrays.stream(flipCopy(RenderStatistics.hierarchicalTraversalCounts)).mapToObj(Integer::toString).collect(Collectors.joining(", "))+"]"); + debug.add("HRS: [" + Arrays.stream(flipCopy(RenderStatistics.hierarchicalRenderSections)).mapToObj(Integer::toString).collect(Collectors.joining(", "))+"]"); + } + } + + private static int[] flipCopy(int[] array) { + int[] ret = new int[array.length]; + int i = ret.length; + for (int j : array) { + ret[--i] = j; + } + return ret; } public void shutdown() { diff --git a/src/main/java/me/cortex/voxy/client/core/rendering/building/RenderDataFactory45.java b/src/main/java/me/cortex/voxy/client/core/rendering/building/RenderDataFactory45.java index c972ae93..a9af6772 100644 --- a/src/main/java/me/cortex/voxy/client/core/rendering/building/RenderDataFactory45.java +++ b/src/main/java/me/cortex/voxy/client/core/rendering/building/RenderDataFactory45.java @@ -432,12 +432,13 @@ public class RenderDataFactory45 { this.seondaryblockMesher.doAuxiliaryFaceOffset = false; this.blockMesher.axis = axis; this.seondaryblockMesher.axis = axis; - for (int layer = 0; layer < 32; layer++) {//(should be 1->31, then have outer face mesher) + for (int layer = 1; layer < 31; layer++) {//(should be 1->31, then have outer face mesher) this.blockMesher.auxiliaryPosition = layer; this.seondaryblockMesher.auxiliaryPosition = layer; int cSkip = 0; for (int other = 0; other < 32; other++) {//TODO: need to do the faces that border sections int pidx = axis == 0 ? (layer * 32 + other) : (other * 32 + layer); + int skipAmount = axis==0?32*32:32; int msk = this.nonOpaqueMasks[pidx]; @@ -465,24 +466,57 @@ public class RenderDataFactory45 { int idx = index + (pidx * 32); long B = this.sectionData[idx * 2+1]; - + //This is just some garbage hack test thing if (ModelQueries.isTranslucent(B)) { - this.blockMesher.putNext(0); + if (axis != 0) { + this.blockMesher.putNext(0); + this.seondaryblockMesher.putNext(0); + continue; + } + + //Example thing thats just wrong but as example + long A = this.sectionData[idx * 2]; + + long MSK = 0xFFFFL<<26; + + long O = this.sectionData[(idx+skipAmount)*2]; + if ((O&MSK) != (A&MSK)) { + this.blockMesher.putNext((long) (false ? 0L : 1L) | + A | + (((0xFFL) & 0xFF) << 55) + ); + } else { + this.blockMesher.putNext(0); + } + this.seondaryblockMesher.putNext(0); + /* + O = this.sectionData[(idx-skipAmount)*2]; + if ((O&MSK) != (A&MSK)) { + this.seondaryblockMesher.putNext((long) (true ? 0L : 1L) | + A | + (((0xFFL) & 0xFF) << 55) + ); + }*/ continue; + } else { + //this.blockMesher.putNext(0); + //this.seondaryblockMesher.putNext(0); + //continue; + + + long A = this.sectionData[idx * 2]; + + //Example thing thats just wrong but as example + this.blockMesher.putNext((long) (false ? 0L : 1L) | + A | + (((0xFFL) & 0xFF) << 55) + ); + this.seondaryblockMesher.putNext((long) (true ? 0L : 1L) | + A | + (((0xFFL) & 0xFF) << 55) + ); } - - long A = this.sectionData[idx * 2]; - - //Example thing thats just wrong but as example - this.blockMesher.putNext((long) (false ? 0L : 1L) | - A | - (((0xFFL) & 0xFF) << 55) - ); - this.seondaryblockMesher.putNext((long) (true ? 0L : 1L) | - A | - (((0xFFL) & 0xFF) << 55) - ); } } this.blockMesher.endRow(); diff --git a/src/main/java/me/cortex/voxy/client/core/rendering/hierachical/HierarchicalOcclusionTraverser.java b/src/main/java/me/cortex/voxy/client/core/rendering/hierachical/HierarchicalOcclusionTraverser.java index 68a813fb..729d68aa 100644 --- a/src/main/java/me/cortex/voxy/client/core/rendering/hierachical/HierarchicalOcclusionTraverser.java +++ b/src/main/java/me/cortex/voxy/client/core/rendering/hierachical/HierarchicalOcclusionTraverser.java @@ -1,7 +1,9 @@ package me.cortex.voxy.client.core.rendering.hierachical; +import me.cortex.voxy.client.RenderStatistics; import me.cortex.voxy.client.config.VoxyConfig; import me.cortex.voxy.client.core.gl.GlBuffer; +import me.cortex.voxy.client.core.gl.shader.AutoBindingShader; import me.cortex.voxy.client.core.gl.shader.Shader; import me.cortex.voxy.client.core.gl.shader.ShaderType; import me.cortex.voxy.client.core.rendering.PrintfDebugUtil; @@ -38,15 +40,15 @@ public class HierarchicalOcclusionTraverser { private final GlBuffer nodeBuffer; private final GlBuffer uniformBuffer = new GlBuffer(1024).zero(); private final GlBuffer renderList = new GlBuffer(100_000 * 4 + 4).zero();//100k sections max to render, TODO: Maybe move to render service or somewhere else - + private final GlBuffer statisticsBuffer = new GlBuffer(1024).zero(); private final GlBuffer queueMetaBuffer = new GlBuffer(4*4*5).zero(); private final GlBuffer scratchQueueA = new GlBuffer(100_000*4).zero(); private final GlBuffer scratchQueueB = new GlBuffer(100_000*4).zero(); - private static final int LOCAL_WORK_SIZE_BITS = 5; private static final int MAX_ITERATIONS = 5; + private static final int LOCAL_WORK_SIZE_BITS = 5; private static int BINDING_COUNTER = 1; private static final int SCENE_UNIFORM_BINDING = BINDING_COUNTER++; @@ -58,11 +60,12 @@ public class HierarchicalOcclusionTraverser { private static final int NODE_QUEUE_SOURCE_BINDING = BINDING_COUNTER++; private static final int NODE_QUEUE_SINK_BINDING = BINDING_COUNTER++; private static final int RENDER_TRACKER_BINDING = BINDING_COUNTER++; + private static final int STATISTICS_BUFFER_BINDING = BINDING_COUNTER++; private final HiZBuffer hiZBuffer = new HiZBuffer(); private final int hizSampler = glGenSamplers(); - private final Shader traversal = Shader.make(PRINTF_processor) + private final AutoBindingShader traversal = Shader.makeAuto(PRINTF_processor) .defineIf("DEBUG", HIERARCHICAL_SHADER_DEBUG) .define("MAX_ITERATIONS", MAX_ITERATIONS) .define("LOCAL_SIZE_BITS", LOCAL_WORK_SIZE_BITS) @@ -82,6 +85,9 @@ public class HierarchicalOcclusionTraverser { .define("RENDER_TRACKER_BINDING", RENDER_TRACKER_BINDING) + .defineIf("HAS_STATISTICS", RenderStatistics.enabled) + .defineIf("STATISTICS_BUFFER_BINDING", RenderStatistics.enabled, STATISTICS_BUFFER_BINDING) + .add(ShaderType.COMPUTE, "voxy:lod/hierarchical/traversal_dev.comp") .compile(); @@ -99,6 +105,15 @@ public class HierarchicalOcclusionTraverser { glSamplerParameteri(this.hizSampler, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); glSamplerParameteri(this.hizSampler, GL_TEXTURE_COMPARE_MODE, GL_COMPARE_REF_TO_TEXTURE); glSamplerParameteri(this.hizSampler, GL_TEXTURE_COMPARE_FUNC, GL_LEQUAL); + + this.traversal + .ubo("SCENE_UNIFORM_BINDING", this.uniformBuffer) + .ssbo("REQUEST_QUEUE_BINDING", this.requestBuffer) + .ssbo("RENDER_QUEUE_BINDING", this.renderList) + .ssbo("NODE_DATA_BINDING", this.nodeBuffer) + .ssbo("NODE_QUEUE_META_BINDING", this.queueMetaBuffer) + .ssbo("RENDER_TRACKER_BINDING", this.nodeCleaner.visibilityBuffer) + .ssboIf("STATISTICS_BUFFER_BINDING", this.statisticsBuffer); } private void uploadUniform(Viewport viewport) { @@ -127,28 +142,11 @@ public class HierarchicalOcclusionTraverser { //Screen space size for descending MemoryUtil.memPutFloat(ptr, (float) (screenspaceAreaDecreasingSize) /(viewport.width*viewport.height)); ptr += 4; - //VisibilityId MemoryUtil.memPutInt(ptr, this.nodeCleaner.visibilityId); ptr += 4; - - /* - //Very funny and cool thing that is possible - if (MinecraftClient.getInstance().getCurrentFps() < 30) { - VoxyConfig.CONFIG.subDivisionSize = Math.min(VoxyConfig.CONFIG.subDivisionSize + 5, 256); - } - - if (60 < MinecraftClient.getInstance().getCurrentFps()) { - VoxyConfig.CONFIG.subDivisionSize = Math.max(VoxyConfig.CONFIG.subDivisionSize - 1, 32); - }*/ } private void bindings() { - glBindBufferBase(GL_UNIFORM_BUFFER, SCENE_UNIFORM_BINDING, this.uniformBuffer.id); - glBindBufferBase(GL_SHADER_STORAGE_BUFFER, REQUEST_QUEUE_BINDING, this.requestBuffer.id); - glBindBufferBase(GL_SHADER_STORAGE_BUFFER, RENDER_QUEUE_BINDING, this.renderList.id); - glBindBufferBase(GL_SHADER_STORAGE_BUFFER, NODE_DATA_BINDING, this.nodeBuffer.id); - glBindBufferBase(GL_SHADER_STORAGE_BUFFER, NODE_QUEUE_META_BINDING, this.queueMetaBuffer.id); - glBindBufferBase(GL_SHADER_STORAGE_BUFFER, RENDER_TRACKER_BINDING, this.nodeCleaner.visibilityBuffer.id); glBindBuffer(GL_DISPATCH_INDIRECT_BUFFER, this.queueMetaBuffer.id); //Bind the hiz buffer @@ -167,12 +165,25 @@ public class HierarchicalOcclusionTraverser { this.bindings(); PrintfDebugUtil.bind(); + if (RenderStatistics.enabled) { + this.statisticsBuffer.zero(); + } this.traverseInternal(this.nodeManager.getTopLevelNodeIds().size()); - this.downloadResetRequestQueue(); + if (RenderStatistics.enabled) { + DownloadStream.INSTANCE.download(this.statisticsBuffer, down->{ + for (int i = 0; i < 5; i++) { + RenderStatistics.hierarchicalTraversalCounts[i] = MemoryUtil.memGetInt(down.address+i*4L); + } + + for (int i = 0; i < 5; i++) { + RenderStatistics.hierarchicalRenderSections[i] = MemoryUtil.memGetInt(down.address+5*4L+i*4L); + } + }); + } //Bind the hiz buffer glBindSampler(0, 0); @@ -299,6 +310,7 @@ public class HierarchicalOcclusionTraverser { this.hiZBuffer.free(); this.nodeBuffer.free(); this.uniformBuffer.free(); + this.statisticsBuffer.free(); this.renderList.free(); this.queueMetaBuffer.free(); this.scratchQueueA.free(); diff --git a/src/main/java/me/cortex/voxy/client/core/rendering/section/MDICSectionRenderer.java b/src/main/java/me/cortex/voxy/client/core/rendering/section/MDICSectionRenderer.java index 030b95c4..33d7c737 100644 --- a/src/main/java/me/cortex/voxy/client/core/rendering/section/MDICSectionRenderer.java +++ b/src/main/java/me/cortex/voxy/client/core/rendering/section/MDICSectionRenderer.java @@ -33,6 +33,7 @@ import static org.lwjgl.opengl.GL45.glCopyNamedBufferSubData; //Uses MDIC to render the sections public class MDICSectionRenderer extends AbstractSectionRenderer { + private static final int TRANSLUCENT_OFFSET = 400_000;//in draw calls private final Shader terrainShader = Shader.make() .defineIf("DEBUG_RENDER", false) .add(ShaderType.VERTEX, "voxy:lod/gl46/quads2.vert") @@ -40,6 +41,7 @@ public class MDICSectionRenderer extends AbstractSectionRenderer>>= eBits; byte light = lightSupplier.supply(i&0xF, (i>>8)&0xF, (i>>4)&0xF); - if (!(bId == 0 && (light == 0))) { - data[i] = Mapper.composeMappingId(light, bId, biomes[Integer.compress(i,0b1100_1100_1100)]); - } else { - data[i] = Mapper.AIR; - } + + data[i] = Mapper.composeMappingId(light, bId, biomes[Integer.compress(i,0b1100_1100_1100)]); } } else { if (!(blockContainer.data.storage instanceof EmptyPaletteStorage)) { diff --git a/src/main/resources/assets/voxy/shaders/lod/gl46/cmdgen.comp b/src/main/resources/assets/voxy/shaders/lod/gl46/cmdgen.comp index 0c6e3e9b..ef2b053d 100644 --- a/src/main/resources/assets/voxy/shaders/lod/gl46/cmdgen.comp +++ b/src/main/resources/assets/voxy/shaders/lod/gl46/cmdgen.comp @@ -60,8 +60,8 @@ void main() { //Note! its not with respect to the sectionId // - //Check the occlusion data from last frame - bool shouldRender = visibilityData[gl_GlobalInvocationID.x] == frameId - 1; + //Check the occlusion data from this frame occlusion + bool shouldRender = visibilityData[gl_GlobalInvocationID.x] == frameId; //Clear the occlusion data (not strictly? needed? i think???) //visibilityData[gl_GlobalInvocationID.x] = 0; @@ -100,8 +100,8 @@ void main() { //Translucency count = meta.cntA&0xFFFF; if (count != 0) { - //uint translucentCommandPtr = atomicAdd(translucentDrawCount, 1) + 400000;//FIXME: dont hardcode this offset - //writeCmd(translucentCommandPtr, drawId, ptr, count); + uint translucentCommandPtr = atomicAdd(translucentDrawCount, 1) + TRANSLUCENT_OFFSET;//FIXME: dont hardcode this offset + writeCmd(translucentCommandPtr, drawId, ptr, count); } ptr += count; diff --git a/src/main/resources/assets/voxy/shaders/lod/hierarchical/binding_points.glsl b/src/main/resources/assets/voxy/shaders/lod/hierarchical/binding_points.glsl deleted file mode 100644 index 4a34f58d..00000000 --- a/src/main/resources/assets/voxy/shaders/lod/hierarchical/binding_points.glsl +++ /dev/null @@ -1,15 +0,0 @@ -#define SCENE_UNIFORM_INDEX 0 -#define NODE_DATA_INDEX 1 -#define NODE_QUEUE_INDEX 2 -#define REQUEST_QUEUE_INDEX 3 -#define RENDER_QUEUE_INDEX 4 -#define TRANSFORM_ARRAY_INDEX 5 -#define NEXT_NODE_QUEUE_INDEX 6 - -#ifdef IS_DEBUG -#define DEBUG_RENDER_NODE_INDEX 7 -#endif - -//Samplers -#define HIZ_BINDING_INDEX 0 - diff --git a/src/main/resources/assets/voxy/shaders/lod/hierarchical/traversal.comp b/src/main/resources/assets/voxy/shaders/lod/hierarchical/traversal.comp deleted file mode 100644 index c1644289..00000000 --- a/src/main/resources/assets/voxy/shaders/lod/hierarchical/traversal.comp +++ /dev/null @@ -1,186 +0,0 @@ -#version 460 core - -//TODO: increase local size -#define LOCAL_SIZE_BITS 5 -#define LOCAL_SIZE_MSK ((1< -#line 7 - -//The queue contains 3 atomics -// end (the current processing pointer) -// head (the current point that is ok to read from) -// top (An atomic that is only used for writing to) -//The way it works when enqueuing -// top is incremented by x, -// write the data getting enqueued at the starting point specified by the `top` incrmenet -// then increment head strictly _AFTER_ writing to the queue, this ensures that the data is always written and avaible in the queue - -layout(binding = SCENE_UNIFORM_INDEX, std140) uniform SceneUniform { - mat4 VP; - ivec3 camSecPos; - uint screenW; - vec3 camSubSecPos; - uint screenH; - uint requestQueueMaxSize; - uint renderQueueMaxSize; - float decendSSS; -}; - -layout(binding = REQUEST_QUEUE_INDEX, std430) restrict buffer RequestQueue { - uint requestQueueIndex; - uint[] requestQueue; -}; - -layout(binding = RENDER_QUEUE_INDEX, std430) restrict buffer RenderQueue { - uint renderQueueIndex; - uint[] renderQueue; -}; - -layout(binding = NODE_QUEUE_INDEX, std430) restrict buffer NodeQueue { - uint nodeQueueSize; - uint[] nodeQueue; -}; - -layout(binding = NEXT_NODE_QUEUE_INDEX, std430) restrict buffer NextNodeQueue { - uint nextNodeQueueIndex; - uint[] nextNodeQueue; -}; - -#ifdef IS_DEBUG -layout(binding = DEBUG_RENDER_NODE_INDEX, std430) restrict buffer DebugRenderNodeQueue { - uint debugRenderNodeQueueIndex; - uint[] debugRenderNodeQueue; -}; -#endif - -#import - -#import - -//Contains all the screenspace computation -#import - -void addRequest(inout UnpackedNode node) { - if (!hasRequested(node)) { - //printf("Request %d %d %d %d", node.nodeId, node.flags, node.meshPtr, node.childPtr); - //TODO: maybe try using only 1 variable and it being <0 being bad - if (requestQueueIndex < requestQueueMaxSize) { - //Mark node as having a request submitted to prevent duplicate submissions - requestQueue[atomicAdd(requestQueueIndex, 1)] = getId(node); - markRequested(node); - } - } -} - -void enqueueChildren(in UnpackedNode node) { - //printf("children"); - uint children = getChildCount(node); - uint ptr = getChildPtr(node); - uint widx = atomicAdd(nextNodeQueueIndex, children); - - for (int i = 0; i < children; i++) { - nextNodeQueue[widx+i] = ptr+i; - } -} - -void enqueueSelfForRender(in UnpackedNode node) { - //printf("render %d@[%d,%d,%d]", node.lodLevel, node.pos.x, node.pos.y, node.pos.z); - if ((!isEmptyMesh(node)) && renderQueueIndex < renderQueueMaxSize) { - renderQueue[atomicAdd(renderQueueIndex, 1)] = getMesh(node); - #ifdef IS_DEBUG - debugRenderNodeQueue[atomicAdd(debugRenderNodeQueueIndex, 1)] = node.nodeId; - #endif - } -} - -//TODO: need to add an empty mesh, as a parent node might not have anything to render but the children do?? -void main() { - if (gl_GlobalInvocationID.x>=nodeQueueSize) { - return; - } - - UnpackedNode node; - //Setup/unpack the node - unpackNode(node, nodeQueue[gl_GlobalInvocationID.x]); - //TODO: check the node is OK first??? maybe? - - //Compute screenspace - setupScreenspace(node); - //printf("Node %d@[%d,%d,%d] - %d - %f", node.lodLevel, node.pos.x, node.pos.y, node.pos.z, node.flags, (size.x*size.y*screenW*screenH)); - - //debugDumpNode(node); - - if (outsideFrustum() || isCulledByHiz()) { - //printf("HizCulled"); - //We are done here, dont do any more, the issue is the shader barriers maybe - // its culled, maybe just mark it as culled? - - - //printf("Cull"); - } else { - //It is visible, TODO: maybe do a more detailed hiz test? (or make it so that ) - - //Only decend if not a root node - if (node.lodLevel!=0 && shouldDecend()) { - if (hasChildren(node)) { - //printf("A"); - enqueueChildren(node); - } else { - //printf("B"); - addRequest(node); - //TODO: use self mesh (is error state if it doesnt have one since all leaf nodes should have a mesh) - // Basicly guarenteed to have a mesh, if it doesnt it is very very bad and incorect since its a violation of the graph properties - // that all leaf nodes must contain a mesh - enqueueSelfForRender(node); - } - } else { - if (hasMesh(node)) { - //printf("C"); - enqueueSelfForRender(node); - } else { - //printf("D"); - //!! not ideal, we want to render this mesh but dont have it. If we havent sent a request - // then send a request for a mesh for this node. - addRequest(node); - - //TODO: Decend into children? maybe add a bitflag saying is bad if the immediate children dont have meshes - enqueueChildren(node); - } - } - } -} - - -/* -Persistent threading - -//Thread 0 grabs a batch when empty -void main() { - while (true) { - //Each thread processes an entry on the queue and pushes all children to the queue if it is determined the children need to be added - } -} -*/ - - - - -//If a request is successfully added to the RequestQueue, must update NodeData to mark that the node has been put into the request queue -// to prevent it from being requested every frame and blocking the queue - - -//Once a suitable render section is found, it is put into the RenderQueue, or if its not availbe its put into the RequestQueue -// and its children are rendered instead if it has them avalible - -//NOTE: EXPERIMENT: INSTEAD OF PERSISTENT THREADS -//TODO: since we know the tree depth is worst case 5, we can just do an indirect dispatch 5 times one for each layer -// issues with this approach, barriers and waiting for one to finish before the otehr can be executed -// advantages, MUCH SIMPLER, no shader barriers needed really , issue is need a flipflip queue but thats ok, -// also ensures the gpu is full of work capacity -// this might be what i do to start with since its much easier to do -// not sure - - diff --git a/src/main/resources/assets/voxy/shaders/lod/hierarchical/traversal_dev.comp b/src/main/resources/assets/voxy/shaders/lod/hierarchical/traversal_dev.comp index 54266a9f..f1a013fe 100644 --- a/src/main/resources/assets/voxy/shaders/lod/hierarchical/traversal_dev.comp +++ b/src/main/resources/assets/voxy/shaders/lod/hierarchical/traversal_dev.comp @@ -36,6 +36,13 @@ layout(binding = RENDER_TRACKER_BINDING, std430) restrict writeonly buffer rende uint[] lastRenderFrame; }; +#ifdef HAS_STATISTICS +layout(binding = STATISTICS_BUFFER_BINDING, std430) restrict buffer statisticsBuffer { + uint traversalCounts[5]; + uint renderCounts[5]; +}; +#endif + void addRequest(inout UnpackedNode node) { //printf("Put node decend request"); if (!hasRequested(node)) { @@ -72,12 +79,20 @@ void enqueueSelfForRender(in UnpackedNode node) { #ifdef IS_DEBUG debugRenderNodeQueue[atomicAdd(debugRenderNodeQueueIndex, 1)] = node.nodeId; #endif + + #ifdef HAS_STATISTICS + atomicAdd(renderCounts[node.lodLevel], 1); + #endif } } } void traverse(in UnpackedNode node) { + #ifdef HAS_STATISTICS + atomicAdd(traversalCounts[node.lodLevel], 1); + #endif + //Compute screenspace setupScreenspace(node); //debugDumpNode(node);