implemented gpu traversal i think?

2024-09-16 15:46:09 +10:00
parent d5045731ad
commit 0aeb0fbf21
6 changed files with 109 additions and 18 deletions
--- a/src/main/java/me/cortex/voxy/client/core/rendering/RenderService.java
+++ b/src/main/java/me/cortex/voxy/client/core/rendering/RenderService.java
@@ -63,7 +63,10 @@ public class RenderService<T extends AbstractSectionRenderer<J, ?>, J extends Vi
        this.viewportSelector = new ViewportSelector<>(this.sectionRenderer::createViewport);
        this.renderGen = new RenderGenerationService(world, this.modelService, serviceThreadPool, this.geometryUpdateQueue::push, this.sectionRenderer.getGeometryManager() instanceof IUsesMeshlets);
-        router.setCallbacks(this.renderGen::enqueueTask, this.sectionUpdateQueue::push);
+        router.setCallbacks(this.renderGen::enqueueTask, section -> {
            section.acquire();
            this.sectionUpdateQueue.push(section);
        });
        this.traversal = new HierarchicalOcclusionTraverser(this.nodeManager, 512);
--- a/src/main/java/me/cortex/voxy/client/core/rendering/hierachical2/HierarchicalOcclusionTraverser.java
+++ b/src/main/java/me/cortex/voxy/client/core/rendering/hierachical2/HierarchicalOcclusionTraverser.java
@@ -111,7 +111,7 @@ public class HierarchicalOcclusionTraverser {
        MemoryUtil.memPutInt(ptr, viewport.height); ptr += 4;
        MemoryUtil.memPutInt(ptr, NodeManager.REQUEST_QUEUE_SIZE); ptr += 4;
-        MemoryUtil.memPutInt(ptr, 1000000); ptr += 4;
+        MemoryUtil.memPutInt(ptr, (int) (this.renderList.size()/4-1)); ptr += 4;//TODO maybe move this to a #define
        //Screen space size for descending
        MemoryUtil.memPutFloat(ptr, 64*64); ptr += 4;
@@ -141,10 +141,16 @@ public class HierarchicalOcclusionTraverser {
        this.bindings();
        PrintfDebugUtil.bind();
        this.traverseInternal(1);
        this.downloadResetRequestQueue();
        //Bind the hiz buffer
        glBindSampler(0, 0);
        glBindTextureUnit(0, 0);
    }
    private void traverseInternal(int initialQueueSize) {
@@ -157,6 +163,10 @@ public class HierarchicalOcclusionTraverser {
            glPixelStorei(GL_UNPACK_SKIP_IMAGES, 0);
        }
        //Clear the render output counter
        nglClearNamedBufferSubData(this.renderList.id, GL_R32UI, 0, 4, GL_RED_INTEGER, GL_UNSIGNED_INT, 0);
        int firstDispatchSize = (initialQueueSize+(1<<LOCAL_WORK_SIZE_BITS)-1)>>LOCAL_WORK_SIZE_BITS;
        /*
        //prime the queue Todo: maybe move after the traversal? cause then it is more efficient work since it doesnt need to wait for this before starting?
--- a/src/main/resources/assets/voxy/shaders/lod/hierarchical/node.glsl
+++ b/src/main/resources/assets/voxy/shaders/lod/hierarchical/node.glsl
@@ -97,5 +97,5 @@ void markRequested(inout UnpackedNode node) {
 }
 void debugDumpNode(in UnpackedNode node) {
-    //printf("Node %d, %d@[%d,%d,%d], flags: %d, mesh: %d, ChildPtr: %d", node.nodeId, node.lodLevel, node.pos.x, node.pos.y, node.pos.z, node.flags, node.meshPtr, node.childPtr);
+    printf("Node %d, %d@[%d,%d,%d], flags: %d, mesh: %d, ChildPtr: %d", node.nodeId, node.lodLevel, node.pos.x, node.pos.y, node.pos.z, node.flags, node.meshPtr, node.childPtr);
 }
--- a/src/main/resources/assets/voxy/shaders/lod/hierarchical/queue.glsl
+++ b/src/main/resources/assets/voxy/shaders/lod/hierarchical/queue.glsl
@@ -51,7 +51,7 @@ void pushNode(uint nodeId) {
-#define SIMPLE_QUEUE(name, binding) layout(binding = binding, std430) restrict buffer name##Struct { \
+#define SIMPLE_QUEUE(name, bindingIndex) layout(binding = bindingIndex, std430) restrict buffer name##Struct { \
    uint name##Index; \
    uint[] name; \
 };
--- a/src/main/resources/assets/voxy/shaders/lod/hierarchical/screenspace.glsl
+++ b/src/main/resources/assets/voxy/shaders/lod/hierarchical/screenspace.glsl
@@ -85,6 +85,6 @@ bool isCulledByHiz() {
 //Returns if we should decend into its children or not
 bool shouldDecend() {
-    //printf("Screen area %f: %f, %f", (size.x*size.y*float(screenW)*float(screenH)), float(screenW), float(screenH));
+    printf("Screen area %f: %f, %f", (size.x*size.y*float(screenW)*float(screenH)), float(screenW), float(screenH));
-    return (size.x*size.y*screenW*screenH) > decendSSS;
+    return (size.x*size.y*screenW*screenH) > minSSS;
 }
--- a/src/main/resources/assets/voxy/shaders/lod/hierarchical/traversal_dev.comp
+++ b/src/main/resources/assets/voxy/shaders/lod/hierarchical/traversal_dev.comp
@@ -5,6 +5,17 @@
 #define LOCAL_SIZE (1<<LOCAL_SIZE_BITS)
 layout(local_size_x=LOCAL_SIZE) in;//, local_size_y=1
 layout(binding = SCENE_UNIFORM_BINDING, std140) uniform SceneUniform {
    mat4 VP;
    ivec3 camSecPos;
    uint screenW;
    vec3 camSubSecPos;
    uint screenH;
    uint requestQueueMaxSize;
    uint renderQueueMaxSize;
    float minSSS;
 };
 #import <voxy:lod/hierarchical/queue.glsl>
 #import <voxy:lod/hierarchical/node.glsl>
 #import <voxy:lod/hierarchical/screenspace.glsl>
@@ -12,23 +23,90 @@ layout(local_size_x=LOCAL_SIZE) in;//, local_size_y=1
 SIMPLE_QUEUE(requestQueue, REQUEST_QUEUE_BINDING);
 SIMPLE_QUEUE(renderQueue, RENDER_QUEUE_BINDING);
-/*
+void addRequest(inout UnpackedNode node) {
-layout(binding = REQUEST_QUEUE_INDEX, std430) restrict buffer RequestQueue {
+    printf("Put node decend request");
-    uint requestQueueIndex;
+    if (!hasRequested(node)) {
-    uint[] requestQueue;
+        if (requestQueueIndex < requestQueueMaxSize) {
-};
+            //Mark node as having a request submitted to prevent duplicate submissions
            requestQueue[atomicAdd(requestQueueIndex, 1)] = getId(node);
            markRequested(node);
        }
    }
 }
-layout(binding = RENDER_QUEUE_INDEX, std430) restrict buffer RenderQueue {
+void enqueueChildren(in UnpackedNode node) {
-    uint renderQueueIndex;
+    uint children = getChildCount(node);
-    uint[] renderQueue;
+    pushNodesInit(children);
-};*/
+    uint ptr = getChildPtr(node);
    for (int i = 0; i < children; i++) {
        pushNode(ptr+i);
    }
 }
 void enqueueSelfForRender(in UnpackedNode node) {
    //printf("render %d@[%d,%d,%d]", node.lodLevel, node.pos.x, node.pos.y, node.pos.z);
    if ((!isEmptyMesh(node)) && renderQueueIndex < renderQueueMaxSize) {
        renderQueue[atomicAdd(renderQueueIndex, 1)] = getMesh(node);
        #ifdef IS_DEBUG
        debugRenderNodeQueue[atomicAdd(debugRenderNodeQueueIndex, 1)] = node.nodeId;
        #endif
    }
 }
 void traverse(in UnpackedNode node) {
    //Compute screenspace
    setupScreenspace(node);
    debugDumpNode(node);
    if (outsideFrustum() || isCulledByHiz()) {
        printf("culled");
    } else {
        //It is visible, TODO: maybe do a more detailed hiz test? (or make it so that )
        //Only decend if not a root node
        if (node.lodLevel!=0 && shouldDecend()) {
            if (hasChildren(node)) {
                printf("A");
                enqueueChildren(node);
            } else {
                printf("B");
                addRequest(node);
                //TODO: use self mesh (is error state if it doesnt have one since all leaf nodes should have a mesh)
                // Basicly guarenteed to have a mesh, if it doesnt it is very very bad and incorect since its a violation of the graph properties
                // that all leaf nodes must contain a mesh
                enqueueSelfForRender(node);
            }
        } else {
            if (hasMesh(node)) {
                printf("C");
                enqueueSelfForRender(node);
            } else {
                printf("D");
                //!! not ideal, we want to render this mesh but dont have it. If we havent sent a request
                // then send a request for a mesh for this node.
                addRequest(node);
                //TODO: Decend into children? maybe add a bitflag saying is bad if the immediate children dont have meshes
                enqueueChildren(node);
            }
        }
    }
 }
 void main() {
-    uint node = getCurrentNode();
+    uint nodeId = getCurrentNode();
-    if (node != SENTINAL_OUT_OF_BOUNDS) {
+    if (nodeId != SENTINAL_OUT_OF_BOUNDS) {
        //Fetch + decode node
        UnpackedNode node;
        unpackNode(node, nodeId);
        traverse(node);
        /*
        printf("GID:%d, NODE %d, %d, AA, %d, %d, %d, %d", gl_GlobalInvocationID.x, node, queueIdx, nodeQueueMetadata[queueIdx].x, nodeQueueMetadata[queueIdx].y, nodeQueueMetadata[queueIdx].z, nodeQueueMetadata[queueIdx].w);
        pushNodesInit(1);
        pushNode(node);
        */
    }
 }