|
|
|
|
@@ -1,186 +0,0 @@
|
|
|
|
|
#version 460 core
|
|
|
|
|
|
|
|
|
|
//TODO: increase local size
|
|
|
|
|
#define LOCAL_SIZE_BITS 5
|
|
|
|
|
#define LOCAL_SIZE_MSK ((1<<LOCAL_SIZE_BITS)-1)
|
|
|
|
|
#define LOCAL_SIZE (1<<LOCAL_SIZE_BITS)
|
|
|
|
|
layout(local_size_x=LOCAL_SIZE) in;//, local_size_y=1
|
|
|
|
|
|
|
|
|
|
#import <voxy:lod/hierarchical/binding_points.glsl>
|
|
|
|
|
#line 7
|
|
|
|
|
|
|
|
|
|
//The queue contains 3 atomics
|
|
|
|
|
// end (the current processing pointer)
|
|
|
|
|
// head (the current point that is ok to read from)
|
|
|
|
|
// top (An atomic that is only used for writing to)
|
|
|
|
|
//The way it works when enqueuing
|
|
|
|
|
// top is incremented by x,
|
|
|
|
|
// write the data getting enqueued at the starting point specified by the `top` incrmenet
|
|
|
|
|
// then increment head strictly _AFTER_ writing to the queue, this ensures that the data is always written and avaible in the queue
|
|
|
|
|
|
|
|
|
|
layout(binding = SCENE_UNIFORM_INDEX, std140) uniform SceneUniform {
|
|
|
|
|
mat4 VP;
|
|
|
|
|
ivec3 camSecPos;
|
|
|
|
|
uint screenW;
|
|
|
|
|
vec3 camSubSecPos;
|
|
|
|
|
uint screenH;
|
|
|
|
|
uint requestQueueMaxSize;
|
|
|
|
|
uint renderQueueMaxSize;
|
|
|
|
|
float decendSSS;
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
layout(binding = REQUEST_QUEUE_INDEX, std430) restrict buffer RequestQueue {
|
|
|
|
|
uint requestQueueIndex;
|
|
|
|
|
uint[] requestQueue;
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
layout(binding = RENDER_QUEUE_INDEX, std430) restrict buffer RenderQueue {
|
|
|
|
|
uint renderQueueIndex;
|
|
|
|
|
uint[] renderQueue;
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
layout(binding = NODE_QUEUE_INDEX, std430) restrict buffer NodeQueue {
|
|
|
|
|
uint nodeQueueSize;
|
|
|
|
|
uint[] nodeQueue;
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
layout(binding = NEXT_NODE_QUEUE_INDEX, std430) restrict buffer NextNodeQueue {
|
|
|
|
|
uint nextNodeQueueIndex;
|
|
|
|
|
uint[] nextNodeQueue;
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
#ifdef IS_DEBUG
|
|
|
|
|
layout(binding = DEBUG_RENDER_NODE_INDEX, std430) restrict buffer DebugRenderNodeQueue {
|
|
|
|
|
uint debugRenderNodeQueueIndex;
|
|
|
|
|
uint[] debugRenderNodeQueue;
|
|
|
|
|
};
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
#import <voxy:lod/hierarchical/transform.glsl>
|
|
|
|
|
|
|
|
|
|
#import <voxy:lod/hierarchical/node.glsl>
|
|
|
|
|
|
|
|
|
|
//Contains all the screenspace computation
|
|
|
|
|
#import <voxy:lod/hierarchical/screenspace.glsl>
|
|
|
|
|
|
|
|
|
|
void addRequest(inout UnpackedNode node) {
|
|
|
|
|
if (!hasRequested(node)) {
|
|
|
|
|
//printf("Request %d %d %d %d", node.nodeId, node.flags, node.meshPtr, node.childPtr);
|
|
|
|
|
//TODO: maybe try using only 1 variable and it being <0 being bad
|
|
|
|
|
if (requestQueueIndex < requestQueueMaxSize) {
|
|
|
|
|
//Mark node as having a request submitted to prevent duplicate submissions
|
|
|
|
|
requestQueue[atomicAdd(requestQueueIndex, 1)] = getId(node);
|
|
|
|
|
markRequested(node);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void enqueueChildren(in UnpackedNode node) {
|
|
|
|
|
//printf("children");
|
|
|
|
|
uint children = getChildCount(node);
|
|
|
|
|
uint ptr = getChildPtr(node);
|
|
|
|
|
uint widx = atomicAdd(nextNodeQueueIndex, children);
|
|
|
|
|
|
|
|
|
|
for (int i = 0; i < children; i++) {
|
|
|
|
|
nextNodeQueue[widx+i] = ptr+i;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void enqueueSelfForRender(in UnpackedNode node) {
|
|
|
|
|
//printf("render %d@[%d,%d,%d]", node.lodLevel, node.pos.x, node.pos.y, node.pos.z);
|
|
|
|
|
if ((!isEmptyMesh(node)) && renderQueueIndex < renderQueueMaxSize) {
|
|
|
|
|
renderQueue[atomicAdd(renderQueueIndex, 1)] = getMesh(node);
|
|
|
|
|
#ifdef IS_DEBUG
|
|
|
|
|
debugRenderNodeQueue[atomicAdd(debugRenderNodeQueueIndex, 1)] = node.nodeId;
|
|
|
|
|
#endif
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
//TODO: need to add an empty mesh, as a parent node might not have anything to render but the children do??
|
|
|
|
|
void main() {
|
|
|
|
|
if (gl_GlobalInvocationID.x>=nodeQueueSize) {
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
UnpackedNode node;
|
|
|
|
|
//Setup/unpack the node
|
|
|
|
|
unpackNode(node, nodeQueue[gl_GlobalInvocationID.x]);
|
|
|
|
|
//TODO: check the node is OK first??? maybe?
|
|
|
|
|
|
|
|
|
|
//Compute screenspace
|
|
|
|
|
setupScreenspace(node);
|
|
|
|
|
//printf("Node %d@[%d,%d,%d] - %d - %f", node.lodLevel, node.pos.x, node.pos.y, node.pos.z, node.flags, (size.x*size.y*screenW*screenH));
|
|
|
|
|
|
|
|
|
|
//debugDumpNode(node);
|
|
|
|
|
|
|
|
|
|
if (outsideFrustum() || isCulledByHiz()) {
|
|
|
|
|
//printf("HizCulled");
|
|
|
|
|
//We are done here, dont do any more, the issue is the shader barriers maybe
|
|
|
|
|
// its culled, maybe just mark it as culled?
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
//printf("Cull");
|
|
|
|
|
} else {
|
|
|
|
|
//It is visible, TODO: maybe do a more detailed hiz test? (or make it so that )
|
|
|
|
|
|
|
|
|
|
//Only decend if not a root node
|
|
|
|
|
if (node.lodLevel!=0 && shouldDecend()) {
|
|
|
|
|
if (hasChildren(node)) {
|
|
|
|
|
//printf("A");
|
|
|
|
|
enqueueChildren(node);
|
|
|
|
|
} else {
|
|
|
|
|
//printf("B");
|
|
|
|
|
addRequest(node);
|
|
|
|
|
//TODO: use self mesh (is error state if it doesnt have one since all leaf nodes should have a mesh)
|
|
|
|
|
// Basicly guarenteed to have a mesh, if it doesnt it is very very bad and incorect since its a violation of the graph properties
|
|
|
|
|
// that all leaf nodes must contain a mesh
|
|
|
|
|
enqueueSelfForRender(node);
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
if (hasMesh(node)) {
|
|
|
|
|
//printf("C");
|
|
|
|
|
enqueueSelfForRender(node);
|
|
|
|
|
} else {
|
|
|
|
|
//printf("D");
|
|
|
|
|
//!! not ideal, we want to render this mesh but dont have it. If we havent sent a request
|
|
|
|
|
// then send a request for a mesh for this node.
|
|
|
|
|
addRequest(node);
|
|
|
|
|
|
|
|
|
|
//TODO: Decend into children? maybe add a bitflag saying is bad if the immediate children dont have meshes
|
|
|
|
|
enqueueChildren(node);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
Persistent threading
|
|
|
|
|
|
|
|
|
|
//Thread 0 grabs a batch when empty
|
|
|
|
|
void main() {
|
|
|
|
|
while (true) {
|
|
|
|
|
//Each thread processes an entry on the queue and pushes all children to the queue if it is determined the children need to be added
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
//If a request is successfully added to the RequestQueue, must update NodeData to mark that the node has been put into the request queue
|
|
|
|
|
// to prevent it from being requested every frame and blocking the queue
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
//Once a suitable render section is found, it is put into the RenderQueue, or if its not availbe its put into the RequestQueue
|
|
|
|
|
// and its children are rendered instead if it has them avalible
|
|
|
|
|
|
|
|
|
|
//NOTE: EXPERIMENT: INSTEAD OF PERSISTENT THREADS
|
|
|
|
|
//TODO: since we know the tree depth is worst case 5, we can just do an indirect dispatch 5 times one for each layer
|
|
|
|
|
// issues with this approach, barriers and waiting for one to finish before the otehr can be executed
|
|
|
|
|
// advantages, MUCH SIMPLER, no shader barriers needed really , issue is need a flipflip queue but thats ok,
|
|
|
|
|
// also ensures the gpu is full of work capacity
|
|
|
|
|
// this might be what i do to start with since its much easier to do
|
|
|
|
|
// not sure
|
|
|
|
|
|
|
|
|
|
|