Incremental traversal system works

2024-09-15 12:10:32 +10:00
parent 76aaf3824d
commit b9a3d18b56
10 changed files with 207 additions and 35 deletions
--- a/src/main/resources/assets/voxy/shaders/lod/hierarchical/queue.glsl
+++ b/src/main/resources/assets/voxy/shaders/lod/hierarchical/queue.glsl
@@ -0,0 +1,50 @@
+#define SENTINAL_OUT_OF_BOUNDS uint(-1)
+
+layout(location = NODE_QUEUE_INDEX_BINDING) uniform uint queueIdx;
+
+layout(binding = NODE_QUEUE_META_BINDING, std430) restrict buffer NodeQueueMeta {
+    uvec4 nodeQueueMetadata[MAX_ITERATIONS];
+};
+
+layout(binding = NODE_QUEUE_SOURCE_BINDING, std430) restrict readonly buffer NodeQueueSource {
+    uint[] nodeQueueSource;
+};
+
+layout(binding = NODE_QUEUE_SINK_BINDING, std430) restrict writeonly buffer NodeQueueSink {
+    uint[] nodeQueueSink;
+};
+
+uint getCurrentNode() {
+    if (nodeQueueMetadata[queueIdx].w <= gl_GlobalInvocationID.x) {
+        return SENTINAL_OUT_OF_BOUNDS;
+    }
+    return nodeQueueSource[gl_GlobalInvocationID.x];
+}
+
+uint nodePushIndex = -1;
+void pushNodesInit(uint nodeCount) {
+    //Debug
+    #ifdef DEBUG
+    if (queueIdx >= (MAX_ITERATIONS-1)) {
+        printf("LOG: Traversal tried inserting a node into next iteration, which is outside max iteration bounds. GID: %d, count: %d", gl_GlobalInvocationID.x, nodeCount);
+        nodePushIndex = -1;
+        return;
+    }
+    #endif
+
+    uint index = atomicAdd(nodeQueueMetadata[queueIdx+1].w, nodeCount);
+    //Increment first metadata value if it changes threash hold
+    uint inc = ((index+LOCAL_SIZE)>>LOCAL_SIZE_BITS)-(index>>LOCAL_SIZE_BITS);
+    atomicAdd(nodeQueueMetadata[queueIdx+1].x, inc);//TODO: see if making this conditional on inc != 0 is faster
+    nodePushIndex = index;
+}
+
+void pushNode(uint nodeId) {
+    #ifdef DEBUG
+    if (nodePushIndex == -1) {
+        printf("LOG: Tried pushing node when push node wasnt successful. GID: %d, pushing: %d", gl_GlobalInvocationID.x, nodeId);
+        return;
+    }
+    #endif
+    nodeQueueSink[nodePushIndex++] = nodeId;
+}
--- a/src/main/resources/assets/voxy/shaders/lod/hierarchical/traversal.comp
+++ b/src/main/resources/assets/voxy/shaders/lod/hierarchical/traversal.comp
@@ -63,23 +63,6 @@ layout(binding = DEBUG_RENDER_NODE_INDEX, std430) restrict buffer DebugRenderNod
 //Contains all the screenspace computation
 #import <voxy:lod/hierarchical/screenspace.glsl>

-//If a request is successfully added to the RequestQueue, must update NodeData to mark that the node has been put into the request queue
-// to prevent it from being requested every frame and blocking the queue
-
-
-//Once a suitable render section is found, it is put into the RenderQueue, or if its not availbe its put into the RequestQueue
-// and its children are rendered instead if it has them avalible
-
-//NOTE: EXPERIMENT: INSTEAD OF PERSISTENT THREADS
-//TODO: since we know the tree depth is worst case 5, we can just do an indirect dispatch 5 times one for each layer
-// issues with this approach, barriers and waiting for one to finish before the otehr can be executed
-// advantages, MUCH SIMPLER, no shader barriers needed really , issue is need a flipflip queue but thats ok,
-// also ensures the gpu is full of work capacity
-// this might be what i do to start with since its much easier to do
-// not sure
-
-
-
 void addRequest(inout UnpackedNode node) {
    if (!hasRequested(node)) {
        //printf("Request %d %d %d %d", node.nodeId, node.flags, node.meshPtr, node.childPtr);
@@ -172,10 +155,32 @@ void main() {


 /*
+Persistent threading
+
 //Thread 0 grabs a batch when empty
 void main() {
    while (true) {
        //Each thread processes an entry on the queue and pushes all children to the queue if it is determined the children need to be added
    }
 }
-*/
+*/
+
+
+
+
+//If a request is successfully added to the RequestQueue, must update NodeData to mark that the node has been put into the request queue
+// to prevent it from being requested every frame and blocking the queue
+
+
+//Once a suitable render section is found, it is put into the RenderQueue, or if its not availbe its put into the RequestQueue
+// and its children are rendered instead if it has them avalible
+
+//NOTE: EXPERIMENT: INSTEAD OF PERSISTENT THREADS
+//TODO: since we know the tree depth is worst case 5, we can just do an indirect dispatch 5 times one for each layer
+// issues with this approach, barriers and waiting for one to finish before the otehr can be executed
+// advantages, MUCH SIMPLER, no shader barriers needed really , issue is need a flipflip queue but thats ok,
+// also ensures the gpu is full of work capacity
+// this might be what i do to start with since its much easier to do
+// not sure
+
+
--- a/src/main/resources/assets/voxy/shaders/lod/hierarchical/traversal_dev.comp
+++ b/src/main/resources/assets/voxy/shaders/lod/hierarchical/traversal_dev.comp
@@ -0,0 +1,16 @@
+#version 460 core
+
+//TODO: increase local size
+#define LOCAL_SIZE_MSK ((1<<LOCAL_SIZE_BITS)-1)
+#define LOCAL_SIZE (1<<LOCAL_SIZE_BITS)
+layout(local_size_x=LOCAL_SIZE) in;//, local_size_y=1
+#import <voxy:lod/hierarchical/queue.glsl>
+
+void main() {
+    uint node = getCurrentNode();
+    if (node != SENTINAL_OUT_OF_BOUNDS) {
+        printf("GID:%d, NODE %d, %d, AA, %d, %d, %d, %d", gl_GlobalInvocationID.x, node, queueIdx, nodeQueueMetadata[queueIdx].x, nodeQueueMetadata[queueIdx].y, nodeQueueMetadata[queueIdx].z, nodeQueueMetadata[queueIdx].w);
+        pushNodesInit(1);
+        pushNode(node);
+    }
+}