implemented gpu traversal i think?

This commit is contained in:
mcrcortex
2024-09-16 15:46:09 +10:00
parent d5045731ad
commit 0aeb0fbf21
6 changed files with 109 additions and 18 deletions

View File

@@ -63,7 +63,10 @@ public class RenderService<T extends AbstractSectionRenderer<J, ?>, J extends Vi
this.viewportSelector = new ViewportSelector<>(this.sectionRenderer::createViewport);
this.renderGen = new RenderGenerationService(world, this.modelService, serviceThreadPool, this.geometryUpdateQueue::push, this.sectionRenderer.getGeometryManager() instanceof IUsesMeshlets);
router.setCallbacks(this.renderGen::enqueueTask, this.sectionUpdateQueue::push);
router.setCallbacks(this.renderGen::enqueueTask, section -> {
section.acquire();
this.sectionUpdateQueue.push(section);
});
this.traversal = new HierarchicalOcclusionTraverser(this.nodeManager, 512);

View File

@@ -111,7 +111,7 @@ public class HierarchicalOcclusionTraverser {
MemoryUtil.memPutInt(ptr, viewport.height); ptr += 4;
MemoryUtil.memPutInt(ptr, NodeManager.REQUEST_QUEUE_SIZE); ptr += 4;
MemoryUtil.memPutInt(ptr, 1000000); ptr += 4;
MemoryUtil.memPutInt(ptr, (int) (this.renderList.size()/4-1)); ptr += 4;//TODO maybe move this to a #define
//Screen space size for descending
MemoryUtil.memPutFloat(ptr, 64*64); ptr += 4;
@@ -141,10 +141,16 @@ public class HierarchicalOcclusionTraverser {
this.bindings();
PrintfDebugUtil.bind();
this.traverseInternal(1);
this.downloadResetRequestQueue();
//Bind the hiz buffer
glBindSampler(0, 0);
glBindTextureUnit(0, 0);
}
private void traverseInternal(int initialQueueSize) {
@@ -157,6 +163,10 @@ public class HierarchicalOcclusionTraverser {
glPixelStorei(GL_UNPACK_SKIP_IMAGES, 0);
}
//Clear the render output counter
nglClearNamedBufferSubData(this.renderList.id, GL_R32UI, 0, 4, GL_RED_INTEGER, GL_UNSIGNED_INT, 0);
int firstDispatchSize = (initialQueueSize+(1<<LOCAL_WORK_SIZE_BITS)-1)>>LOCAL_WORK_SIZE_BITS;
/*
//prime the queue Todo: maybe move after the traversal? cause then it is more efficient work since it doesnt need to wait for this before starting?

View File

@@ -97,5 +97,5 @@ void markRequested(inout UnpackedNode node) {
}
void debugDumpNode(in UnpackedNode node) {
//printf("Node %d, %d@[%d,%d,%d], flags: %d, mesh: %d, ChildPtr: %d", node.nodeId, node.lodLevel, node.pos.x, node.pos.y, node.pos.z, node.flags, node.meshPtr, node.childPtr);
printf("Node %d, %d@[%d,%d,%d], flags: %d, mesh: %d, ChildPtr: %d", node.nodeId, node.lodLevel, node.pos.x, node.pos.y, node.pos.z, node.flags, node.meshPtr, node.childPtr);
}

View File

@@ -51,7 +51,7 @@ void pushNode(uint nodeId) {
#define SIMPLE_QUEUE(name, binding) layout(binding = binding, std430) restrict buffer name##Struct { \
#define SIMPLE_QUEUE(name, bindingIndex) layout(binding = bindingIndex, std430) restrict buffer name##Struct { \
uint name##Index; \
uint[] name; \
};

View File

@@ -85,6 +85,6 @@ bool isCulledByHiz() {
//Returns if we should decend into its children or not
bool shouldDecend() {
//printf("Screen area %f: %f, %f", (size.x*size.y*float(screenW)*float(screenH)), float(screenW), float(screenH));
return (size.x*size.y*screenW*screenH) > decendSSS;
printf("Screen area %f: %f, %f", (size.x*size.y*float(screenW)*float(screenH)), float(screenW), float(screenH));
return (size.x*size.y*screenW*screenH) > minSSS;
}

View File

@@ -5,6 +5,17 @@
#define LOCAL_SIZE (1<<LOCAL_SIZE_BITS)
layout(local_size_x=LOCAL_SIZE) in;//, local_size_y=1
layout(binding = SCENE_UNIFORM_BINDING, std140) uniform SceneUniform {
mat4 VP;
ivec3 camSecPos;
uint screenW;
vec3 camSubSecPos;
uint screenH;
uint requestQueueMaxSize;
uint renderQueueMaxSize;
float minSSS;
};
#import <voxy:lod/hierarchical/queue.glsl>
#import <voxy:lod/hierarchical/node.glsl>
#import <voxy:lod/hierarchical/screenspace.glsl>
@@ -12,23 +23,90 @@ layout(local_size_x=LOCAL_SIZE) in;//, local_size_y=1
SIMPLE_QUEUE(requestQueue, REQUEST_QUEUE_BINDING);
SIMPLE_QUEUE(renderQueue, RENDER_QUEUE_BINDING);
/*
layout(binding = REQUEST_QUEUE_INDEX, std430) restrict buffer RequestQueue {
uint requestQueueIndex;
uint[] requestQueue;
};
void addRequest(inout UnpackedNode node) {
printf("Put node decend request");
if (!hasRequested(node)) {
if (requestQueueIndex < requestQueueMaxSize) {
//Mark node as having a request submitted to prevent duplicate submissions
requestQueue[atomicAdd(requestQueueIndex, 1)] = getId(node);
markRequested(node);
}
}
}
layout(binding = RENDER_QUEUE_INDEX, std430) restrict buffer RenderQueue {
uint renderQueueIndex;
uint[] renderQueue;
};*/
void enqueueChildren(in UnpackedNode node) {
uint children = getChildCount(node);
pushNodesInit(children);
uint ptr = getChildPtr(node);
for (int i = 0; i < children; i++) {
pushNode(ptr+i);
}
}
void enqueueSelfForRender(in UnpackedNode node) {
//printf("render %d@[%d,%d,%d]", node.lodLevel, node.pos.x, node.pos.y, node.pos.z);
if ((!isEmptyMesh(node)) && renderQueueIndex < renderQueueMaxSize) {
renderQueue[atomicAdd(renderQueueIndex, 1)] = getMesh(node);
#ifdef IS_DEBUG
debugRenderNodeQueue[atomicAdd(debugRenderNodeQueueIndex, 1)] = node.nodeId;
#endif
}
}
void traverse(in UnpackedNode node) {
//Compute screenspace
setupScreenspace(node);
debugDumpNode(node);
if (outsideFrustum() || isCulledByHiz()) {
printf("culled");
} else {
//It is visible, TODO: maybe do a more detailed hiz test? (or make it so that )
//Only decend if not a root node
if (node.lodLevel!=0 && shouldDecend()) {
if (hasChildren(node)) {
printf("A");
enqueueChildren(node);
} else {
printf("B");
addRequest(node);
//TODO: use self mesh (is error state if it doesnt have one since all leaf nodes should have a mesh)
// Basicly guarenteed to have a mesh, if it doesnt it is very very bad and incorect since its a violation of the graph properties
// that all leaf nodes must contain a mesh
enqueueSelfForRender(node);
}
} else {
if (hasMesh(node)) {
printf("C");
enqueueSelfForRender(node);
} else {
printf("D");
//!! not ideal, we want to render this mesh but dont have it. If we havent sent a request
// then send a request for a mesh for this node.
addRequest(node);
//TODO: Decend into children? maybe add a bitflag saying is bad if the immediate children dont have meshes
enqueueChildren(node);
}
}
}
}
void main() {
uint node = getCurrentNode();
if (node != SENTINAL_OUT_OF_BOUNDS) {
uint nodeId = getCurrentNode();
if (nodeId != SENTINAL_OUT_OF_BOUNDS) {
//Fetch + decode node
UnpackedNode node;
unpackNode(node, nodeId);
traverse(node);
/*
printf("GID:%d, NODE %d, %d, AA, %d, %d, %d, %d", gl_GlobalInvocationID.x, node, queueIdx, nodeQueueMetadata[queueIdx].x, nodeQueueMetadata[queueIdx].y, nodeQueueMetadata[queueIdx].z, nodeQueueMetadata[queueIdx].w);
pushNodesInit(1);
pushNode(node);
*/
}
}