From 0aeb0fbf2165e2262379758bc4b2fa8158d3f1f5 Mon Sep 17 00:00:00 2001 From: mcrcortex <18544518+MCRcortex@users.noreply.github.com> Date: Mon, 16 Sep 2024 15:46:09 +1000 Subject: [PATCH] implemented gpu traversal i think? --- .../client/core/rendering/RenderService.java | 7 +- .../HierarchicalOcclusionTraverser.java | 12 ++- .../voxy/shaders/lod/hierarchical/node.glsl | 2 +- .../voxy/shaders/lod/hierarchical/queue.glsl | 2 +- .../shaders/lod/hierarchical/screenspace.glsl | 4 +- .../lod/hierarchical/traversal_dev.comp | 100 ++++++++++++++++-- 6 files changed, 109 insertions(+), 18 deletions(-) diff --git a/src/main/java/me/cortex/voxy/client/core/rendering/RenderService.java b/src/main/java/me/cortex/voxy/client/core/rendering/RenderService.java index d72d16a3..9f8cbaf8 100644 --- a/src/main/java/me/cortex/voxy/client/core/rendering/RenderService.java +++ b/src/main/java/me/cortex/voxy/client/core/rendering/RenderService.java @@ -63,7 +63,10 @@ public class RenderService, J extends Vi this.viewportSelector = new ViewportSelector<>(this.sectionRenderer::createViewport); this.renderGen = new RenderGenerationService(world, this.modelService, serviceThreadPool, this.geometryUpdateQueue::push, this.sectionRenderer.getGeometryManager() instanceof IUsesMeshlets); - router.setCallbacks(this.renderGen::enqueueTask, this.sectionUpdateQueue::push); + router.setCallbacks(this.renderGen::enqueueTask, section -> { + section.acquire(); + this.sectionUpdateQueue.push(section); + }); this.traversal = new HierarchicalOcclusionTraverser(this.nodeManager, 512); @@ -72,7 +75,7 @@ public class RenderService, J extends Vi Arrays.stream(world.getMapper().getBiomeEntries()).forEach(this.modelService::addBiome); world.getMapper().setBiomeCallback(this.modelService::addBiome); - + final int H_WIDTH = 1; for (int x = -H_WIDTH; x <= H_WIDTH; x++) { for (int y = -1; y <= 0; y++) { diff --git a/src/main/java/me/cortex/voxy/client/core/rendering/hierachical2/HierarchicalOcclusionTraverser.java b/src/main/java/me/cortex/voxy/client/core/rendering/hierachical2/HierarchicalOcclusionTraverser.java index 27f36162..e1d5afb5 100644 --- a/src/main/java/me/cortex/voxy/client/core/rendering/hierachical2/HierarchicalOcclusionTraverser.java +++ b/src/main/java/me/cortex/voxy/client/core/rendering/hierachical2/HierarchicalOcclusionTraverser.java @@ -111,7 +111,7 @@ public class HierarchicalOcclusionTraverser { MemoryUtil.memPutInt(ptr, viewport.height); ptr += 4; MemoryUtil.memPutInt(ptr, NodeManager.REQUEST_QUEUE_SIZE); ptr += 4; - MemoryUtil.memPutInt(ptr, 1000000); ptr += 4; + MemoryUtil.memPutInt(ptr, (int) (this.renderList.size()/4-1)); ptr += 4;//TODO maybe move this to a #define //Screen space size for descending MemoryUtil.memPutFloat(ptr, 64*64); ptr += 4; @@ -141,10 +141,16 @@ public class HierarchicalOcclusionTraverser { this.bindings(); PrintfDebugUtil.bind(); + this.traverseInternal(1); this.downloadResetRequestQueue(); + + + //Bind the hiz buffer + glBindSampler(0, 0); + glBindTextureUnit(0, 0); } private void traverseInternal(int initialQueueSize) { @@ -157,6 +163,10 @@ public class HierarchicalOcclusionTraverser { glPixelStorei(GL_UNPACK_SKIP_IMAGES, 0); } + //Clear the render output counter + nglClearNamedBufferSubData(this.renderList.id, GL_R32UI, 0, 4, GL_RED_INTEGER, GL_UNSIGNED_INT, 0); + + int firstDispatchSize = (initialQueueSize+(1<>LOCAL_WORK_SIZE_BITS; /* //prime the queue Todo: maybe move after the traversal? cause then it is more efficient work since it doesnt need to wait for this before starting? diff --git a/src/main/resources/assets/voxy/shaders/lod/hierarchical/node.glsl b/src/main/resources/assets/voxy/shaders/lod/hierarchical/node.glsl index 5d80c399..372dc798 100644 --- a/src/main/resources/assets/voxy/shaders/lod/hierarchical/node.glsl +++ b/src/main/resources/assets/voxy/shaders/lod/hierarchical/node.glsl @@ -97,5 +97,5 @@ void markRequested(inout UnpackedNode node) { } void debugDumpNode(in UnpackedNode node) { - //printf("Node %d, %d@[%d,%d,%d], flags: %d, mesh: %d, ChildPtr: %d", node.nodeId, node.lodLevel, node.pos.x, node.pos.y, node.pos.z, node.flags, node.meshPtr, node.childPtr); + printf("Node %d, %d@[%d,%d,%d], flags: %d, mesh: %d, ChildPtr: %d", node.nodeId, node.lodLevel, node.pos.x, node.pos.y, node.pos.z, node.flags, node.meshPtr, node.childPtr); } \ No newline at end of file diff --git a/src/main/resources/assets/voxy/shaders/lod/hierarchical/queue.glsl b/src/main/resources/assets/voxy/shaders/lod/hierarchical/queue.glsl index 7711fbb5..a8724617 100644 --- a/src/main/resources/assets/voxy/shaders/lod/hierarchical/queue.glsl +++ b/src/main/resources/assets/voxy/shaders/lod/hierarchical/queue.glsl @@ -51,7 +51,7 @@ void pushNode(uint nodeId) { -#define SIMPLE_QUEUE(name, binding) layout(binding = binding, std430) restrict buffer name##Struct { \ +#define SIMPLE_QUEUE(name, bindingIndex) layout(binding = bindingIndex, std430) restrict buffer name##Struct { \ uint name##Index; \ uint[] name; \ }; \ No newline at end of file diff --git a/src/main/resources/assets/voxy/shaders/lod/hierarchical/screenspace.glsl b/src/main/resources/assets/voxy/shaders/lod/hierarchical/screenspace.glsl index 50c0a3f9..aabb0d77 100644 --- a/src/main/resources/assets/voxy/shaders/lod/hierarchical/screenspace.glsl +++ b/src/main/resources/assets/voxy/shaders/lod/hierarchical/screenspace.glsl @@ -85,6 +85,6 @@ bool isCulledByHiz() { //Returns if we should decend into its children or not bool shouldDecend() { - //printf("Screen area %f: %f, %f", (size.x*size.y*float(screenW)*float(screenH)), float(screenW), float(screenH)); - return (size.x*size.y*screenW*screenH) > decendSSS; + printf("Screen area %f: %f, %f", (size.x*size.y*float(screenW)*float(screenH)), float(screenW), float(screenH)); + return (size.x*size.y*screenW*screenH) > minSSS; } \ No newline at end of file diff --git a/src/main/resources/assets/voxy/shaders/lod/hierarchical/traversal_dev.comp b/src/main/resources/assets/voxy/shaders/lod/hierarchical/traversal_dev.comp index 27d9ce4b..5a53b72e 100644 --- a/src/main/resources/assets/voxy/shaders/lod/hierarchical/traversal_dev.comp +++ b/src/main/resources/assets/voxy/shaders/lod/hierarchical/traversal_dev.comp @@ -5,6 +5,17 @@ #define LOCAL_SIZE (1< #import #import @@ -12,23 +23,90 @@ layout(local_size_x=LOCAL_SIZE) in;//, local_size_y=1 SIMPLE_QUEUE(requestQueue, REQUEST_QUEUE_BINDING); SIMPLE_QUEUE(renderQueue, RENDER_QUEUE_BINDING); -/* -layout(binding = REQUEST_QUEUE_INDEX, std430) restrict buffer RequestQueue { - uint requestQueueIndex; - uint[] requestQueue; -}; +void addRequest(inout UnpackedNode node) { + printf("Put node decend request"); + if (!hasRequested(node)) { + if (requestQueueIndex < requestQueueMaxSize) { + //Mark node as having a request submitted to prevent duplicate submissions + requestQueue[atomicAdd(requestQueueIndex, 1)] = getId(node); + markRequested(node); + } + } +} -layout(binding = RENDER_QUEUE_INDEX, std430) restrict buffer RenderQueue { - uint renderQueueIndex; - uint[] renderQueue; -};*/ +void enqueueChildren(in UnpackedNode node) { + uint children = getChildCount(node); + pushNodesInit(children); + uint ptr = getChildPtr(node); + for (int i = 0; i < children; i++) { + pushNode(ptr+i); + } +} +void enqueueSelfForRender(in UnpackedNode node) { + //printf("render %d@[%d,%d,%d]", node.lodLevel, node.pos.x, node.pos.y, node.pos.z); + if ((!isEmptyMesh(node)) && renderQueueIndex < renderQueueMaxSize) { + renderQueue[atomicAdd(renderQueueIndex, 1)] = getMesh(node); + #ifdef IS_DEBUG + debugRenderNodeQueue[atomicAdd(debugRenderNodeQueueIndex, 1)] = node.nodeId; + #endif + } +} + + +void traverse(in UnpackedNode node) { + //Compute screenspace + setupScreenspace(node); + debugDumpNode(node); + + if (outsideFrustum() || isCulledByHiz()) { + printf("culled"); + } else { + //It is visible, TODO: maybe do a more detailed hiz test? (or make it so that ) + + //Only decend if not a root node + if (node.lodLevel!=0 && shouldDecend()) { + if (hasChildren(node)) { + printf("A"); + enqueueChildren(node); + } else { + printf("B"); + addRequest(node); + //TODO: use self mesh (is error state if it doesnt have one since all leaf nodes should have a mesh) + // Basicly guarenteed to have a mesh, if it doesnt it is very very bad and incorect since its a violation of the graph properties + // that all leaf nodes must contain a mesh + enqueueSelfForRender(node); + } + } else { + if (hasMesh(node)) { + printf("C"); + enqueueSelfForRender(node); + } else { + printf("D"); + //!! not ideal, we want to render this mesh but dont have it. If we havent sent a request + // then send a request for a mesh for this node. + addRequest(node); + + //TODO: Decend into children? maybe add a bitflag saying is bad if the immediate children dont have meshes + enqueueChildren(node); + } + } + } +} void main() { - uint node = getCurrentNode(); - if (node != SENTINAL_OUT_OF_BOUNDS) { + uint nodeId = getCurrentNode(); + if (nodeId != SENTINAL_OUT_OF_BOUNDS) { + //Fetch + decode node + UnpackedNode node; + unpackNode(node, nodeId); + + traverse(node); + + /* printf("GID:%d, NODE %d, %d, AA, %d, %d, %d, %d", gl_GlobalInvocationID.x, node, queueIdx, nodeQueueMetadata[queueIdx].x, nodeQueueMetadata[queueIdx].y, nodeQueueMetadata[queueIdx].z, nodeQueueMetadata[queueIdx].w); pushNodesInit(1); pushNode(node); + */ } } \ No newline at end of file