Incremental traversal system works
This commit is contained in:
@@ -0,0 +1,50 @@
|
||||
#define SENTINAL_OUT_OF_BOUNDS uint(-1)
|
||||
|
||||
layout(location = NODE_QUEUE_INDEX_BINDING) uniform uint queueIdx;
|
||||
|
||||
layout(binding = NODE_QUEUE_META_BINDING, std430) restrict buffer NodeQueueMeta {
|
||||
uvec4 nodeQueueMetadata[MAX_ITERATIONS];
|
||||
};
|
||||
|
||||
layout(binding = NODE_QUEUE_SOURCE_BINDING, std430) restrict readonly buffer NodeQueueSource {
|
||||
uint[] nodeQueueSource;
|
||||
};
|
||||
|
||||
layout(binding = NODE_QUEUE_SINK_BINDING, std430) restrict writeonly buffer NodeQueueSink {
|
||||
uint[] nodeQueueSink;
|
||||
};
|
||||
|
||||
uint getCurrentNode() {
|
||||
if (nodeQueueMetadata[queueIdx].w <= gl_GlobalInvocationID.x) {
|
||||
return SENTINAL_OUT_OF_BOUNDS;
|
||||
}
|
||||
return nodeQueueSource[gl_GlobalInvocationID.x];
|
||||
}
|
||||
|
||||
uint nodePushIndex = -1;
|
||||
void pushNodesInit(uint nodeCount) {
|
||||
//Debug
|
||||
#ifdef DEBUG
|
||||
if (queueIdx >= (MAX_ITERATIONS-1)) {
|
||||
printf("LOG: Traversal tried inserting a node into next iteration, which is outside max iteration bounds. GID: %d, count: %d", gl_GlobalInvocationID.x, nodeCount);
|
||||
nodePushIndex = -1;
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
uint index = atomicAdd(nodeQueueMetadata[queueIdx+1].w, nodeCount);
|
||||
//Increment first metadata value if it changes threash hold
|
||||
uint inc = ((index+LOCAL_SIZE)>>LOCAL_SIZE_BITS)-(index>>LOCAL_SIZE_BITS);
|
||||
atomicAdd(nodeQueueMetadata[queueIdx+1].x, inc);//TODO: see if making this conditional on inc != 0 is faster
|
||||
nodePushIndex = index;
|
||||
}
|
||||
|
||||
void pushNode(uint nodeId) {
|
||||
#ifdef DEBUG
|
||||
if (nodePushIndex == -1) {
|
||||
printf("LOG: Tried pushing node when push node wasnt successful. GID: %d, pushing: %d", gl_GlobalInvocationID.x, nodeId);
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
nodeQueueSink[nodePushIndex++] = nodeId;
|
||||
}
|
||||
@@ -63,23 +63,6 @@ layout(binding = DEBUG_RENDER_NODE_INDEX, std430) restrict buffer DebugRenderNod
|
||||
//Contains all the screenspace computation
|
||||
#import <voxy:lod/hierarchical/screenspace.glsl>
|
||||
|
||||
//If a request is successfully added to the RequestQueue, must update NodeData to mark that the node has been put into the request queue
|
||||
// to prevent it from being requested every frame and blocking the queue
|
||||
|
||||
|
||||
//Once a suitable render section is found, it is put into the RenderQueue, or if its not availbe its put into the RequestQueue
|
||||
// and its children are rendered instead if it has them avalible
|
||||
|
||||
//NOTE: EXPERIMENT: INSTEAD OF PERSISTENT THREADS
|
||||
//TODO: since we know the tree depth is worst case 5, we can just do an indirect dispatch 5 times one for each layer
|
||||
// issues with this approach, barriers and waiting for one to finish before the otehr can be executed
|
||||
// advantages, MUCH SIMPLER, no shader barriers needed really , issue is need a flipflip queue but thats ok,
|
||||
// also ensures the gpu is full of work capacity
|
||||
// this might be what i do to start with since its much easier to do
|
||||
// not sure
|
||||
|
||||
|
||||
|
||||
void addRequest(inout UnpackedNode node) {
|
||||
if (!hasRequested(node)) {
|
||||
//printf("Request %d %d %d %d", node.nodeId, node.flags, node.meshPtr, node.childPtr);
|
||||
@@ -172,10 +155,32 @@ void main() {
|
||||
|
||||
|
||||
/*
|
||||
Persistent threading
|
||||
|
||||
//Thread 0 grabs a batch when empty
|
||||
void main() {
|
||||
while (true) {
|
||||
//Each thread processes an entry on the queue and pushes all children to the queue if it is determined the children need to be added
|
||||
}
|
||||
}
|
||||
*/
|
||||
*/
|
||||
|
||||
|
||||
|
||||
|
||||
//If a request is successfully added to the RequestQueue, must update NodeData to mark that the node has been put into the request queue
|
||||
// to prevent it from being requested every frame and blocking the queue
|
||||
|
||||
|
||||
//Once a suitable render section is found, it is put into the RenderQueue, or if its not availbe its put into the RequestQueue
|
||||
// and its children are rendered instead if it has them avalible
|
||||
|
||||
//NOTE: EXPERIMENT: INSTEAD OF PERSISTENT THREADS
|
||||
//TODO: since we know the tree depth is worst case 5, we can just do an indirect dispatch 5 times one for each layer
|
||||
// issues with this approach, barriers and waiting for one to finish before the otehr can be executed
|
||||
// advantages, MUCH SIMPLER, no shader barriers needed really , issue is need a flipflip queue but thats ok,
|
||||
// also ensures the gpu is full of work capacity
|
||||
// this might be what i do to start with since its much easier to do
|
||||
// not sure
|
||||
|
||||
|
||||
|
||||
@@ -0,0 +1,16 @@
|
||||
#version 460 core
|
||||
|
||||
//TODO: increase local size
|
||||
#define LOCAL_SIZE_MSK ((1<<LOCAL_SIZE_BITS)-1)
|
||||
#define LOCAL_SIZE (1<<LOCAL_SIZE_BITS)
|
||||
layout(local_size_x=LOCAL_SIZE) in;//, local_size_y=1
|
||||
#import <voxy:lod/hierarchical/queue.glsl>
|
||||
|
||||
void main() {
|
||||
uint node = getCurrentNode();
|
||||
if (node != SENTINAL_OUT_OF_BOUNDS) {
|
||||
printf("GID:%d, NODE %d, %d, AA, %d, %d, %d, %d", gl_GlobalInvocationID.x, node, queueIdx, nodeQueueMetadata[queueIdx].x, nodeQueueMetadata[queueIdx].y, nodeQueueMetadata[queueIdx].z, nodeQueueMetadata[queueIdx].w);
|
||||
pushNodesInit(1);
|
||||
pushNode(node);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user