WIP

2024-06-11 23:06:24 +10:00
parent 584028ae7a
commit 25ddb83d22
18 changed files with 882 additions and 158 deletions
--- a/src/main/resources/assets/voxy/shaders/lod/gl46/quads.frag
+++ b/src/main/resources/assets/voxy/shaders/lod/gl46/quads.frag
@@ -1,6 +1,8 @@
 #version 460 core
 layout(binding = 0) uniform sampler2D blockModelAtlas;

+//#define DEBUG_RENDER
+
 //TODO: need to fix when merged quads have discardAlpha set to false but they span multiple tiles
 // however they are not a full block

@@ -10,8 +12,11 @@ layout(location = 2) in flat vec4 tinting;
 layout(location = 3) in flat vec4 addin;
 layout(location = 4) in flat uint flags;
 layout(location = 5) in flat vec4 conditionalTinting;
-//layout(location = 6) in flat vec4 solidColour;

+
+#ifdef DEBUG_RENDER
+layout(location = 6) in flat uint quadDebug;
+#endif
 layout(location = 0) out vec4 outColour;
 void main() {
    vec2 uv = mod(uv, vec2(1.0))*(1.0/(vec2(3.0,2.0)*256.0));
@@ -29,4 +34,14 @@ void main() {
    outColour = (colour * tinting) + addin;

    //outColour = vec4(uv + baseUV, 0, 1);
+
+
+    #ifdef DEBUG_RENDER
+    uint hash = quadDebug*1231421+123141;
+    hash ^= hash>>16;
+    hash = hash*1231421+123141;
+    hash ^= hash>>16;
+    hash = hash * 1827364925 + 123325621;
+    outColour = vec4(float(hash&15u)/15, float((hash>>4)&15u)/15, float((hash>>8)&15u)/15, 1);
+    #endif
 }
--- a/src/main/resources/assets/voxy/shaders/lod/gl46/quads.vert
+++ b/src/main/resources/assets/voxy/shaders/lod/gl46/quads.vert
@@ -1,150 +0,0 @@
-#version 460 core
-#extension GL_ARB_gpu_shader_int64 : enable
-
-#import <voxy:lod/quad_format.glsl>
-#import <voxy:lod/gl46/bindings.glsl>
-#import <voxy:lod/block_model.glsl>
-#line 8
-
-layout(location = 0) out vec2 uv;
-layout(location = 1) out flat vec2 baseUV;
-layout(location = 2) out flat vec4 tinting;
-layout(location = 3) out flat vec4 addin;
-layout(location = 4) out flat uint flags;
-layout(location = 5) out flat vec4 conditionalTinting;
-//layout(location = 6) out flat vec4 solidColour;
-
-uint extractLodLevel() {
-    return uint(gl_BaseInstance)>>27;
-}
-
-//Note the last 2 bits of gl_BaseInstance are unused
-//Gives a relative position of +-255 relative to the player center in its respective lod
-ivec3 extractRelativeLodPos() {
-    return (ivec3(gl_BaseInstance)<<ivec3(5,14,23))>>ivec3(23);
-}
-
-vec4 uint2vec4RGBA(uint colour) {
-    return vec4((uvec4(colour)>>uvec4(24,16,8,0))&uvec4(0xFF))/255.0;
-}
-
-//Gets the face offset with respect to the face direction (e.g. some will be + some will be -)
-float getDepthOffset(uint faceData, uint face) {
-    float offset = extractFaceIndentation(faceData);
-    return offset * (1.0-((int(face)&1)*2.0));
-}
-
-vec2 getFaceSizeOffset(uint faceData, uint corner) {
-    float EPSILON = 0.001f;
-    vec4 faceOffsetsSizes = extractFaceSizes(faceData);
-    //Expand the quads by a very small amount
-    faceOffsetsSizes.xz -= vec2(EPSILON);
-    faceOffsetsSizes.yw += vec2(EPSILON);
-    return mix(faceOffsetsSizes.xz, faceOffsetsSizes.yw-1.0f, bvec2(((corner>>1)&1u)==1, (corner&1u)==1));
-}
-
-//TODO: add a mechanism so that some quads can ignore backface culling
-// this would help alot with stuff like crops as they would look kinda weird i think,
-// same with flowers etc
-void main() {
-    int cornerIdx = gl_VertexID&3;
-    Quad quad = quadData[uint(gl_VertexID)>>2];
-    vec3 innerPos = extractPos(quad);
-    uint face = extractFace(quad);
-    uint modelId = extractStateId(quad);
-    BlockModel model = modelData[modelId];
-    uint faceData = model.faceData[face];
-    bool isTranslucent = modelIsTranslucent(model);
-    bool hasAO = modelHasMipmaps(model);//TODO: replace with per face AO flag
-    bool isShaded = hasAO;//TODO: make this a per face flag
-    //Change the ordering due to backface culling
-    //NOTE: when rendering, backface culling is disabled as we simply dispatch calls for each face
-    // this has the advantage of having "unassigned" geometry, that is geometry where the backface isnt culled
-    //if (face == 0 || (face>>1 != 0 && (face&1)==1)) {
-    //    cornerIdx ^= 1;
-    //}
-
-    uint lodLevel = extractLodLevel();
-    ivec3 lodCorner = ((extractRelativeLodPos()<<lodLevel) - (baseSectionPos&(ivec3((1<<lodLevel)-1))))<<5;
-    vec3 corner = innerPos * (1<<lodLevel) + lodCorner;
-
-    vec2 faceOffset = getFaceSizeOffset(faceData, cornerIdx);
-    ivec2 quadSize = extractSize(quad);
-    vec2 respectiveQuadSize = vec2(quadSize * ivec2((cornerIdx>>1)&1, cornerIdx&1));
-    vec2 size = (respectiveQuadSize + faceOffset) * (1<<lodLevel);
-
-    vec3 offset = vec3(size, (float(face&1u) + getDepthOffset(faceData, face)) * (1<<lodLevel));
-
-    if ((face>>1) == 0) { //Up/down
-        offset = offset.xzy;
-    }
-    //Not needed, here for readability
-    //if ((face>>1) == 1) {//north/south
-    //    offset = offset.xyz;
-    //}
-    if ((face>>1) == 2) { //west/east
-        offset = offset.zxy;
-    }
-
-    gl_Position = MVP * vec4(corner + offset, 1.0);
-
-
-    //Compute the uv coordinates
-    vec2 modelUV = vec2(modelId&0xFFu, (modelId>>8)&0xFFu)*(1.0/(256.0));
-    //TODO: make the face orientated by 2x3 so that division is not a integer div and modulo isnt needed
-    // as these are very slow ops
-    baseUV = modelUV + (vec2(face>>1, face&1u) * (1.0/(vec2(3.0, 2.0)*256.0)));
-    //TODO: add an option to scale the quad size by the lod level so that
-    // e.g. at lod level 2 a face will have 2x2
-    uv = respectiveQuadSize + faceOffset;//Add in the face offset for 0,0 uv
-
-    flags = faceHasAlphaCuttout(faceData);
-
-    //We need to have a conditional override based on if the model size is < a full face + quadSize > 1
-    flags |= uint(any(greaterThan(quadSize, ivec2(1)))) & faceHasAlphaCuttoutOverride(faceData);
-
-    flags |= uint(!modelHasMipmaps(model))<<1;
-
-    //Compute lighting
-    tinting = getLighting(extractLightId(quad));
-
-    //Apply model colour tinting
-    uint tintColour = model.colourTint;
-    if (modelHasBiomeLUT(model)) {
-        tintColour = colourData[tintColour + extractBiomeId(quad)];
-    }
-
-    conditionalTinting = vec4(0);
-    if (tintColour != uint(-1)) {
-        flags |= 1u<<2;
-        conditionalTinting = uint2vec4RGBA(tintColour).yzwx;
-    }
-
-    addin = vec4(0.0);
-    if (!isTranslucent) {
-        tinting.w = 0.0;
-        //Encode the face, the lod level and
-        uint encodedData = 0;
-        encodedData |= face;
-        encodedData |= (lodLevel<<3);
-        encodedData |= uint(hasAO)<<6;
-        addin.w = float(encodedData)/255.0;
-    }
-
-    //Apply face tint
-    if (isShaded) {
-        if ((face>>1) == 1) {
-            tinting.xyz *= 0.8f;
-        } else if ((face>>1) == 2) {
-            tinting.xyz *= 0.6f;
-        } else if (face == 0){
-            tinting.xyz *= 0.5f;
-        } else {
-            //TODO: FIXME: DONT HAVE SOME ARBITARY TINT LIKE THIS
-            tinting.xyz *= 0.95f;
-        }
-    }
-
-
-    //solidColour = vec4(vec3(modelId&0xFu, (modelId>>4)&0xFu, (modelId>>8)&0xFu)*(1f/15f),1f);
-}
--- a/src/main/resources/assets/voxy/shaders/lod/gl46/quads2.vert
+++ b/src/main/resources/assets/voxy/shaders/lod/gl46/quads2.vert
@@ -6,13 +6,18 @@
 #import <voxy:lod/block_model.glsl>
 #line 8

+//#define DEBUG_RENDER
+
 layout(location = 0) out vec2 uv;
 layout(location = 1) out flat vec2 baseUV;
 layout(location = 2) out flat vec4 tinting;
 layout(location = 3) out flat vec4 addin;
 layout(location = 4) out flat uint flags;
 layout(location = 5) out flat vec4 conditionalTinting;
-//layout(location = 6) out flat vec4 solidColour;
+
+#ifdef DEBUG_RENDER
+layout(location = 6) out flat uint quadDebug;
+#endif

 uint extractLodLevel() {
    return uint(gl_BaseInstance)>>27;
@@ -143,4 +148,8 @@ void main() {

    vec3 origin = vec3(((extractRelativeLodPos()<<lodLevel) - (baseSectionPos&(ivec3((1<<lodLevel)-1))))<<5);
    gl_Position = MVP*vec4((cornerPos+swizzelDataAxis(face>>1,vec3(cQuadSize,0)))*(1<<lodLevel)+origin, 1.0);
+
+    #ifdef DEBUG_RENDER
+    quadDebug = lodLevel;
+    #endif
 }
--- a/src/main/resources/assets/voxy/shaders/lod/hierarchical/Queue.glsl
+++ b/src/main/resources/assets/voxy/shaders/lod/hierarchical/Queue.glsl
@@ -0,0 +1,20 @@
+//Use defines and undefines to define the queue, allows for hacky reuse of imports
+#ifndef QUEUE_NAME
+#error QUEUE_NAME is not defined
+#endif
+
+
+
+
+void push(queue, item) {
+
+}
+
+
+
+
+
+
+
+
+#
--- a/src/main/resources/assets/voxy/shaders/lod/hierarchical/selector.comp
+++ b/src/main/resources/assets/voxy/shaders/lod/hierarchical/selector.comp
@@ -0,0 +1,262 @@
+#version 460 core
+
+#define WORKGROUP 4
+#define MINI_BATCH_SIZE 32
+//The entire uint is a minibatch (each idx is one)
+#define MINI_BATCH_MSK (uint(-1))
+
+//Each y dim is a quadrent in the octree
+// multiple x dims to fill up workgroups
+layout(local_size_x=WORKGROUP, local_size_y=8) in;
+
+layout(binding = 1, std430) restrict buffer RequestSectionLoadQueue {
+    uint counter;
+    uint[] queue;
+} requestQueue;
+
+//SectionNodeData is a uvec4 that contains the position + flags + ptr to own render section data + ptr to children
+layout(binding = 2, std430) restrict readonly buffer SectionNodeData {
+    uvec4[] sectionNodes;
+};
+
+layout(binding = 3, std430) restrict buffer ActiveWorkingNodeQueue {
+    uint feedbackStatus;
+    uint batchIndex;
+    uint end;
+    uint start;
+    uint maxSize;//Needs to be a multiple of local_size_x
+    uint[] queue;
+} nodeQueue;
+
+
+struct UnpackedNode {
+    ivec4 position;//x,y,z,detail
+    uint  flags;//16 bits
+    uint  self;
+    uint  children;
+};
+
+UnpackedNode unpackNode(uvec4 data) {
+    UnpackedNode node;
+
+    return node;
+}
+
+//NOTE: this is different to nanite in the fact that if a node is not loaded, too bad dont render
+
+shared UnpackedNode workingNodes[WORKGROUP];
+shared uint miniBatchMsk;
+void loadNode() {
+    if (gl_LocalInvocationIndex == 0) {//Check if we need to
+        batchMsk = 0;//Reset the minibatch
+        if (miniBatchMsk == MINI_BATCH_SIZE) {
+
+        }
+    }
+    barrier();
+    if (gl_LocalInvocationID.y == 0) {
+
+
+        //Need to make it work in y size 8, but only gl_LocalInvocationId.x == 0
+        workingNodes[gl_LocalInvocationID.x] = unpackNode(sectionNodes[id]);
+    }
+    barrier();//Synchonize, also acts as memory barrier
+}
+
+
+
+//Computes screensize of the node and whether it should render itself or its children
+bool shouldRenderChildren(UnpackedNode node) {
+
+}
+
+//Process a single node and enqueue child nodes if needed into work queue, enqueue self to render and/or request children to load
+void processNode(uint id) {//Called even if it doesnt have any work (id==-1) to ensure uniform control flow for barriers
+
+    //Bottom 2 bits are status flags, is air and children loaded
+    // node.flags
+
+    //If the childrenloaded flag is not set, send a request for the children of the node to be loaded
+    // if all the children are loaded but we are not and we need to render, render the children and dispatch
+    // a request to load self
+
+    if (shouldRenderChildren(node)) {
+        //Dont care about
+    } else {
+
+    }
+
+}
+
+
+//The activly schedualed/acquired work slot for this group
+shared uint workingBatchIndex;
+shared uint workingBatchOffset;
+void process() {
+    if (gl_LocalInvocationIndex == 0) {//This includes both x and y
+        workingBatchIndex = atomicAdd(nodeQueue.batchIndex, BATCH_SIZE);
+    }
+}
+
+
+
+void main() {
+    while (true) {
+        barrier();
+
+    }
+}
+
+
+
+
+//when a node is processed,
+// compute its screen bounding box is computed using fast trick (e.g. if your viewing it from a quadrent you already know its bounding points (min/max))
+// frustum cull, check hiz
+// if it passes culling, use the screensize to check wether it must render itself
+// or dispatch its children to render
+//      IF its error is small enough, then render itself, its mesh should always be loaded, if not its a critical error (except maybe if its a top level node or something)
+//      if its error is too large,
+//          check that all children are loaded (or empty), if they are not all loaded, enqueu a request for the cpu to load
+//          that nodes children
+//              if the load queue is full, dont enqueue it to the queue
+//          then instead of rendering children, render its own mesh since it should always be loaded
+
+//Can also reverse the above slightly and make it so that it checks the children before enqueuing them
+
+
+//the main thing to worry about is if there is enough work to fill the inital few rounds of this
+// before amplification takes effect
+// can do a thing where it initally just blasts child nodes out until the size is small enough
+
+
+
+// NOTE: since matrix multiplication distributes over addition
+//  can precompute the AABB corners with respect to the matrix
+//  then you can just add a translation vector
+
+
+
+
+
+
+
+//TODO: can do in another way
+// first compute the sections that should either render self or childs
+// then in as a seperate job queue work though it
+
+
+
+
+
+
+
+
+
+
+
+uint getChildCount(UnpackedNode node) {
+
+}
+
+
+//Checks whether a node should be culled based on hiz/frustum
+bool cullNode(UnpackedNode node) {
+
+}
+
+//Should render this node, or recurse to children
+bool shouldRenderChildrenInstead(UnpackedNode node) {
+
+}
+
+//Does the node have its own mesh loaded
+bool nodeHasSelfMesh(UnpackedNode node) {
+
+}
+
+//Does the node its children loaded (note! not child meshes)
+bool nodeHasChildrenLoaded(UnpackedNode node) {
+
+}
+
+//Are all the childrens meshes loaded
+bool nodeHasChildMeshesLoaded(UnpackedNode node) {
+
+}
+
+void request(uint type, uint idx) {
+
+}
+
+void renderMesh(uint idx) {
+
+}
+
+void enqueueChildren(uint arg, UnpackedNode node) {
+    uint cnt = getChildCount(node);
+    //TODO: the queue needs 2 counters, the pre and post atomic,
+    // pre is incremented to get index
+    // queue is written to
+    // post is then incremented to signal
+}
+
+void reportCritical(uint type) {
+
+}
+
+void processNode(uint idx) {
+    UnpackedNode node = unpackNode(sectionNodes[idx]);
+    if (!cullNode(node)) {
+        //Should we render children instead of ourselves with respect to screenspace error
+        if (shouldRenderChildrenInstead(node)) {
+            if (nodeHasChildrenLoaded(node)) {
+                //Dispatch nodes to queue
+                enqueueChildren(0, node);
+            } else {
+                //Children arnt loaded so either render self mesh or if we cant
+                // abort basicly must request nodes
+                if (nodeHasSelfMesh(node)) {
+                    //Render self and dispatch request to load children
+                    renderMesh(node.self);
+                    request(1, idx);
+                } else {
+                    //Critical issue, no are loaded and self has no mesh
+                    reportCritical(0);
+                }
+            }
+        } else {
+            if (nodeHasSelfMesh(node)) {
+                //render self
+                renderMesh(node.self);
+            } else {
+                //Request that self mesh is loaded
+                request(0, idx);
+
+                //render children instead
+                if (nodeHasChildrenLoaded(node)) {//Might need to be node nodeHasChildMeshesLoaded
+                    enqueueChildren(1, node);
+                } else {
+                    //This is very bad, it means cant render anything
+                    reportCritical(1);
+                }
+            }
+        }
+    }
+}
+
+//Psudo code, one thread, one load
+void main() {
+    while (true) {
+        //Try to process a node queue entry
+        uint work = atomicAdd(workingNodeQueuePos, 1);
+        uint idx = work&0xFFFFFFu;
+        uint arg = work>>24;
+        if (idx < workingNodeQueueEnd) {
+
+
+        } else {
+            //Do other queue work however we still have the work slot allocated
+        }
+    }
+}