basic translucency

This commit is contained in:
mcrcortex
2025-04-05 16:55:47 +10:00
parent 9e5e5e654d
commit 7cc92a533d
12 changed files with 188 additions and 267 deletions

View File

@@ -60,8 +60,8 @@ void main() {
//Note! its not with respect to the sectionId
//
//Check the occlusion data from last frame
bool shouldRender = visibilityData[gl_GlobalInvocationID.x] == frameId - 1;
//Check the occlusion data from this frame occlusion
bool shouldRender = visibilityData[gl_GlobalInvocationID.x] == frameId;
//Clear the occlusion data (not strictly? needed? i think???)
//visibilityData[gl_GlobalInvocationID.x] = 0;
@@ -100,8 +100,8 @@ void main() {
//Translucency
count = meta.cntA&0xFFFF;
if (count != 0) {
//uint translucentCommandPtr = atomicAdd(translucentDrawCount, 1) + 400000;//FIXME: dont hardcode this offset
//writeCmd(translucentCommandPtr, drawId, ptr, count);
uint translucentCommandPtr = atomicAdd(translucentDrawCount, 1) + TRANSLUCENT_OFFSET;//FIXME: dont hardcode this offset
writeCmd(translucentCommandPtr, drawId, ptr, count);
}
ptr += count;

View File

@@ -1,15 +0,0 @@
#define SCENE_UNIFORM_INDEX 0
#define NODE_DATA_INDEX 1
#define NODE_QUEUE_INDEX 2
#define REQUEST_QUEUE_INDEX 3
#define RENDER_QUEUE_INDEX 4
#define TRANSFORM_ARRAY_INDEX 5
#define NEXT_NODE_QUEUE_INDEX 6
#ifdef IS_DEBUG
#define DEBUG_RENDER_NODE_INDEX 7
#endif
//Samplers
#define HIZ_BINDING_INDEX 0

View File

@@ -1,186 +0,0 @@
#version 460 core
//TODO: increase local size
#define LOCAL_SIZE_BITS 5
#define LOCAL_SIZE_MSK ((1<<LOCAL_SIZE_BITS)-1)
#define LOCAL_SIZE (1<<LOCAL_SIZE_BITS)
layout(local_size_x=LOCAL_SIZE) in;//, local_size_y=1
#import <voxy:lod/hierarchical/binding_points.glsl>
#line 7
//The queue contains 3 atomics
// end (the current processing pointer)
// head (the current point that is ok to read from)
// top (An atomic that is only used for writing to)
//The way it works when enqueuing
// top is incremented by x,
// write the data getting enqueued at the starting point specified by the `top` incrmenet
// then increment head strictly _AFTER_ writing to the queue, this ensures that the data is always written and avaible in the queue
layout(binding = SCENE_UNIFORM_INDEX, std140) uniform SceneUniform {
mat4 VP;
ivec3 camSecPos;
uint screenW;
vec3 camSubSecPos;
uint screenH;
uint requestQueueMaxSize;
uint renderQueueMaxSize;
float decendSSS;
};
layout(binding = REQUEST_QUEUE_INDEX, std430) restrict buffer RequestQueue {
uint requestQueueIndex;
uint[] requestQueue;
};
layout(binding = RENDER_QUEUE_INDEX, std430) restrict buffer RenderQueue {
uint renderQueueIndex;
uint[] renderQueue;
};
layout(binding = NODE_QUEUE_INDEX, std430) restrict buffer NodeQueue {
uint nodeQueueSize;
uint[] nodeQueue;
};
layout(binding = NEXT_NODE_QUEUE_INDEX, std430) restrict buffer NextNodeQueue {
uint nextNodeQueueIndex;
uint[] nextNodeQueue;
};
#ifdef IS_DEBUG
layout(binding = DEBUG_RENDER_NODE_INDEX, std430) restrict buffer DebugRenderNodeQueue {
uint debugRenderNodeQueueIndex;
uint[] debugRenderNodeQueue;
};
#endif
#import <voxy:lod/hierarchical/transform.glsl>
#import <voxy:lod/hierarchical/node.glsl>
//Contains all the screenspace computation
#import <voxy:lod/hierarchical/screenspace.glsl>
void addRequest(inout UnpackedNode node) {
if (!hasRequested(node)) {
//printf("Request %d %d %d %d", node.nodeId, node.flags, node.meshPtr, node.childPtr);
//TODO: maybe try using only 1 variable and it being <0 being bad
if (requestQueueIndex < requestQueueMaxSize) {
//Mark node as having a request submitted to prevent duplicate submissions
requestQueue[atomicAdd(requestQueueIndex, 1)] = getId(node);
markRequested(node);
}
}
}
void enqueueChildren(in UnpackedNode node) {
//printf("children");
uint children = getChildCount(node);
uint ptr = getChildPtr(node);
uint widx = atomicAdd(nextNodeQueueIndex, children);
for (int i = 0; i < children; i++) {
nextNodeQueue[widx+i] = ptr+i;
}
}
void enqueueSelfForRender(in UnpackedNode node) {
//printf("render %d@[%d,%d,%d]", node.lodLevel, node.pos.x, node.pos.y, node.pos.z);
if ((!isEmptyMesh(node)) && renderQueueIndex < renderQueueMaxSize) {
renderQueue[atomicAdd(renderQueueIndex, 1)] = getMesh(node);
#ifdef IS_DEBUG
debugRenderNodeQueue[atomicAdd(debugRenderNodeQueueIndex, 1)] = node.nodeId;
#endif
}
}
//TODO: need to add an empty mesh, as a parent node might not have anything to render but the children do??
void main() {
if (gl_GlobalInvocationID.x>=nodeQueueSize) {
return;
}
UnpackedNode node;
//Setup/unpack the node
unpackNode(node, nodeQueue[gl_GlobalInvocationID.x]);
//TODO: check the node is OK first??? maybe?
//Compute screenspace
setupScreenspace(node);
//printf("Node %d@[%d,%d,%d] - %d - %f", node.lodLevel, node.pos.x, node.pos.y, node.pos.z, node.flags, (size.x*size.y*screenW*screenH));
//debugDumpNode(node);
if (outsideFrustum() || isCulledByHiz()) {
//printf("HizCulled");
//We are done here, dont do any more, the issue is the shader barriers maybe
// its culled, maybe just mark it as culled?
//printf("Cull");
} else {
//It is visible, TODO: maybe do a more detailed hiz test? (or make it so that )
//Only decend if not a root node
if (node.lodLevel!=0 && shouldDecend()) {
if (hasChildren(node)) {
//printf("A");
enqueueChildren(node);
} else {
//printf("B");
addRequest(node);
//TODO: use self mesh (is error state if it doesnt have one since all leaf nodes should have a mesh)
// Basicly guarenteed to have a mesh, if it doesnt it is very very bad and incorect since its a violation of the graph properties
// that all leaf nodes must contain a mesh
enqueueSelfForRender(node);
}
} else {
if (hasMesh(node)) {
//printf("C");
enqueueSelfForRender(node);
} else {
//printf("D");
//!! not ideal, we want to render this mesh but dont have it. If we havent sent a request
// then send a request for a mesh for this node.
addRequest(node);
//TODO: Decend into children? maybe add a bitflag saying is bad if the immediate children dont have meshes
enqueueChildren(node);
}
}
}
}
/*
Persistent threading
//Thread 0 grabs a batch when empty
void main() {
while (true) {
//Each thread processes an entry on the queue and pushes all children to the queue if it is determined the children need to be added
}
}
*/
//If a request is successfully added to the RequestQueue, must update NodeData to mark that the node has been put into the request queue
// to prevent it from being requested every frame and blocking the queue
//Once a suitable render section is found, it is put into the RenderQueue, or if its not availbe its put into the RequestQueue
// and its children are rendered instead if it has them avalible
//NOTE: EXPERIMENT: INSTEAD OF PERSISTENT THREADS
//TODO: since we know the tree depth is worst case 5, we can just do an indirect dispatch 5 times one for each layer
// issues with this approach, barriers and waiting for one to finish before the otehr can be executed
// advantages, MUCH SIMPLER, no shader barriers needed really , issue is need a flipflip queue but thats ok,
// also ensures the gpu is full of work capacity
// this might be what i do to start with since its much easier to do
// not sure

View File

@@ -36,6 +36,13 @@ layout(binding = RENDER_TRACKER_BINDING, std430) restrict writeonly buffer rende
uint[] lastRenderFrame;
};
#ifdef HAS_STATISTICS
layout(binding = STATISTICS_BUFFER_BINDING, std430) restrict buffer statisticsBuffer {
uint traversalCounts[5];
uint renderCounts[5];
};
#endif
void addRequest(inout UnpackedNode node) {
//printf("Put node decend request");
if (!hasRequested(node)) {
@@ -72,12 +79,20 @@ void enqueueSelfForRender(in UnpackedNode node) {
#ifdef IS_DEBUG
debugRenderNodeQueue[atomicAdd(debugRenderNodeQueueIndex, 1)] = node.nodeId;
#endif
#ifdef HAS_STATISTICS
atomicAdd(renderCounts[node.lodLevel], 1);
#endif
}
}
}
void traverse(in UnpackedNode node) {
#ifdef HAS_STATISTICS
atomicAdd(traversalCounts[node.lodLevel], 1);
#endif
//Compute screenspace
setupScreenspace(node);
//debugDumpNode(node);