Much work much wow

2024-07-10 00:03:59 +09:00
parent e1c7bb18a9
commit a7c6768449
5 changed files with 218 additions and 0 deletions
--- a/src/main/java/me/cortex/voxy/client/core/rendering/hierarchical/NodeManager2.java
+++ b/src/main/java/me/cortex/voxy/client/core/rendering/hierarchical/NodeManager2.java
@@ -341,5 +341,8 @@ public class NodeManager2 {
    private void pushNode(int node) {

    }
+    public void nodeUpload() {
+
+    }

 }
--- a/src/main/resources/assets/voxy/shaders/lod/hierarchical/node.glsl
+++ b/src/main/resources/assets/voxy/shaders/lod/hierarchical/node.glsl
@@ -0,0 +1,29 @@
+
+//All node access and setup into global variables
+uint lodLevel;
+ivec3 position;
+
+uint childrenPtr;
+uint meshPtr;
+
+uint flags;
+
+void unpackNode(uint nodeId) {
+
+}
+
+bool hasMesh() {
+
+}
+
+bool hasChildren() {
+
+}
+
+bool hasRequested() {
+
+}
+
+void markRequested() {
+
+}
--- a/src/main/resources/assets/voxy/shaders/lod/hierarchical/screenspace.glsl
+++ b/src/main/resources/assets/voxy/shaders/lod/hierarchical/screenspace.glsl
@@ -0,0 +1,33 @@
+
+//All the screenspace computuation code, hiz culling + size/screenspace AABB size computation
+// to determin whether child node should be visited
+// it controls the actions of the traversal logic
+//NOTEEE!!! SO can do a few things, technically since atm its split not useing persistent threads
+// can use mesh shaders to do rasterized occlution directly with a meshdrawindirect, one per layer
+//Persistent threads might still be viable/usable since the inital lods supplied to the culler are mixed level
+// (basiclly the minimum guarenteed value, like dont supply a top level lod right in front of the camera, since that is guarenteed not to, never be that level)
+// do this based on camera distance computation
+
+//changing the base level/root of the graph for some nodes can be really tricky and incorrect so might not be worth it but it should help
+// substantually for performance (for both persistent threads and incremental)
+
+
+
+
+
+
+
+//Sets up screenspace with the given node id, returns true on success false on failure/should not continue
+//Accesses data that is setup in the main traversal and is just shared to here
+bool setupScreenspace() {
+
+}
+
+bool isCulledByHiz() {
+
+}
+
+//Returns if we should decend into its children or not
+bool shouldDecend() {
+
+}
--- a/src/main/resources/assets/voxy/shaders/lod/hierarchical/transform.glsl
+++ b/src/main/resources/assets/voxy/shaders/lod/hierarchical/transform.glsl
@@ -0,0 +1,3 @@
+//This provides per scene/viewport/transfrom access, that is, a node can be attached to a specific scene/viewport/transfrom, this is so that
+// different nodes/models can have different viewports/scenes/transfrom which enables some very cool things like
+// absolutly massive VS2 structures should... just work :tm: - todd howard
--- a/src/main/resources/assets/voxy/shaders/lod/hierarchical/traversal.glsl
+++ b/src/main/resources/assets/voxy/shaders/lod/hierarchical/traversal.glsl
@@ -0,0 +1,150 @@
+#version 460 core
+//TODO: make this better than a single thread
+layout(local_size_x=1, local_size_y=1) in;
+
+//The queue contains 3 atomics
+// end (the current processing pointer)
+// head (the current point that is ok to read from)
+// top (An atomic that is only used for writing to)
+//The way it works when enqueuing
+// top is incremented by x,
+//   write the data getting enqueued at the starting point specified by the `top` incrmenet
+// then increment head strictly _AFTER_ writing to the queue, this ensures that the data is always written and avaible in the queue
+
+layout(binding = 0, std140) uniform SceneUniform {
+
+};
+
+layout(binding = 1, std430) restrict buffer NodeData {//Needs to be read and writeable for marking data,
+    //(could do an evil violation, make this readonly, then have a writeonly varient, which means that writing might not be visible but will show up by the next frame)
+    //Nodes are 16 bytes big (or 32 cant decide, 16 might _just_ be enough)
+    ivec4[] nodes;
+};
+
+
+layout(binding = 2, std430) restrict buffer Atomics {
+    uint requestQueueIndex;
+    uint requestQueueMaxSize;
+
+    uint renderQueueIndex;
+    uint renderQueueMaxSize;
+} atomics;
+
+layout(binding = 3, std430) restrict writeonly buffer RequestQueue {
+    uint[] requestQueue;
+};
+
+layout(binding = 4, std430) restrict writeonly buffer RenderQueue {
+    uint[] renderQueue;
+};
+
+/*
+layout(binding = 2, std430) restrict buffer QueueData {
+    uint tail;
+    uint head;
+    uint top;
+    uint[] queue;
+} queue;
+*/
+
+#import <voxy:lod/hierarchial/transform.glsl>
+
+#import <voxy:lod/hierarchial/node.glsl>
+
+layout(binding = 0) uniform sampler2DShadow hizDepthSampler;
+
+
+void aqcuireNewBatch() {
+
+}
+
+//Contains all the screenspace computation
+#import <voxy:lod/hierarchial/screenspace.glsl>
+
+
+
+
+
+//If a request is successfully added to the RequestQueue, must update NodeData to mark that the node has been put into the request queue
+// to prevent it from being requested every frame and blocking the queue
+
+
+//Once a suitable render section is found, it is put into the RenderQueue, or if its not availbe its put into the RequestQueue
+// and its children are rendered instead if it has them avalible
+
+//NOTE: EXPERIMENT: INSTEAD OF PERSISTENT THREADS
+//TODO: since we know the tree depth is worst case 5, we can just do an indirect dispatch 5 times one for each layer
+// issues with this approach, barriers and waiting for one to finish before the otehr can be executed
+// advantages, MUCH SIMPLER, no shader barriers needed really , issue is need a flipflip queue but thats ok,
+// also ensures the gpu is full of work capacity
+// this might be what i do to start with since its much easier to do
+// not sure
+
+
+void addRequest() {
+    if (!hasRequested()) {
+        //TODO: request this node (cpu side can figure out what it wants/needs)
+
+        //Mark node as having a request submitted to prevent duplicate submissions
+    }
+}
+
+void enqueueSelfForRender() {
+    //TODO: Draw mesh and stop with this node (good path)
+}
+
+void enqueueChildren() {
+
+}
+
+//TODO: need to add an empty mesh, as a parent node might not have anything to render but the children do??
+void main() {
+    uint id = 0;
+
+    //Setup/unpack the node
+    unpackNode(id);
+    //TODO: check the node is OK first??? maybe?
+
+    //Compute screenspace
+    setupScreenspace();
+
+    if (isCulledByHiz()) {
+        //We are done here, dont do any more, the issue is the shader barriers maybe
+        // its culled, maybe just mark it as culled?
+    } else {
+        //It is visible, TODO: maybe do a more detailed hiz test? (or make it so that )
+
+        if (shouldDecend()) {
+            if (hasChildren()) {
+                enqueueChildren();
+            } else {
+                addRequest();
+                //TODO: use self mesh (is error state if it doesnt have one since all leaf nodes should have a mesh)
+                // Basicly guarenteed to have a mesh, if it doesnt it is very very bad and incorect since its a violation of the graph properties
+                // that all leaf nodes must contain a mesh
+                enqueueSelfForRender();
+            }
+        } else {
+            if (hasMesh()) {
+                enqueueSelfForRender();
+            } else {
+                //!! not ideal, we want to render this mesh but dont have it. If we havent sent a request
+                // then send a request for a mesh for this node.
+                addRequest();
+
+                //TODO: Decend into children? maybe add a bitflag saying is bad if the immediate children dont have meshes
+                enqueueChildren();
+            }
+        }
+    }
+}
+
+
+/*
+//Thread 0 grabs a batch when empty
+void main() {
+    while (true) {
+        //Each thread processes an entry on the queue and pushes all children to the queue if it is determined the children need to be added
+    }
+}
+*/