Shuffled around shaders

2025-06-06 17:00:25 +10:00
parent d24b719a93
commit 35850082d5
11 changed files with 27 additions and 371 deletions
--- a/src/main/resources/assets/voxy/shaders/lod/block_model.glsl
+++ b/src/main/resources/assets/voxy/shaders/lod/block_model.glsl
@@ -1,3 +1,10 @@
+struct BlockModel {
+    uint faceData[6];
+    uint flagsA;
+    uint colourTint;
+    uint _pad[8];
+};
+
 //TODO: FIXME: this isnt actually correct cause depending on the face (i think) it could be 1/64 th of a position off
 // but im going to assume that since we are dealing with huge render distances, this shouldent matter that much
 float extractFaceIndentation(uint faceData) {
@@ -18,13 +25,13 @@ uint faceHasAlphaCuttoutOverride(uint faceData) {
 }

 bool modelHasBiomeLUT(BlockModel model) {
-    return ((model.flagsA)&2) != 0;
+    return ((model.flagsA)&2u) != 0;
 }

 bool modelIsTranslucent(BlockModel model) {
-    return ((model.flagsA)&4) != 0;
+    return ((model.flagsA)&4u) != 0;
 }

 bool modelHasMipmaps(BlockModel model) {
-    return ((model.flagsA)&8) != 0;
+    return ((model.flagsA)&8u) != 0;
 }
--- a/src/main/resources/assets/voxy/shaders/lod/gl46/bindings.glsl
+++ b/src/main/resources/assets/voxy/shaders/lod/gl46/bindings.glsl
@@ -7,24 +7,6 @@ layout(binding = 0, std140) uniform SceneUniform {
    vec3 cameraSubPos;
 };

-struct BlockModel {
-    uint faceData[6];
-    uint flagsA;
-    uint colourTint;
-    uint _pad[8];
-};
-
-struct SectionMeta {
-    uint posA;
-    uint posB;
-    uint AABB;
-    uint ptr;
-    uint cntA;
-    uint cntB;
-    uint cntC;
-    uint cntD;
-};
-
 //TODO: see if making the stride 2*4*4 bytes or something cause you get that 16 byte write
 struct DrawCommand {
    uint  count;
--- a/src/main/resources/assets/voxy/shaders/lod/gl46/buildtranslucents.comp
+++ b/src/main/resources/assets/voxy/shaders/lod/gl46/buildtranslucents.comp
@@ -8,8 +8,8 @@ layout(local_size_x = 128) in;
 #define SECTION_METADATA_BUFFER_BINDING 3
 #define INDIRECT_SECTION_LOOKUP_BINDING 4

-#import <voxy:lod/gl46/bindings.glsl>
 #import <voxy:lod/section.glsl>
+#import <voxy:lod/gl46/bindings.glsl>

 /*
    uint  count;
--- a/src/main/resources/assets/voxy/shaders/lod/gl46/cmdgen.comp
+++ b/src/main/resources/assets/voxy/shaders/lod/gl46/cmdgen.comp
@@ -11,8 +11,8 @@ layout(local_size_x = 128) in;
 #define POSITION_SCRATCH_BINDING 6
 #define POSITION_SCRATCH_ACCESS writeonly

-#import <voxy:lod/gl46/bindings.glsl>
 #import <voxy:lod/section.glsl>
+#import <voxy:lod/gl46/bindings.glsl>

 //https://github.com/KhronosGroup/GLSL/blob/master/extensions/ext/GL_EXT_shader_16bit_storage.txt
 // adds support for uint8_t which can use for compact visibility buffer
--- a/src/main/resources/assets/voxy/shaders/lod/gl46/cull/raster.vert
+++ b/src/main/resources/assets/voxy/shaders/lod/gl46/cull/raster.vert
@@ -6,8 +6,8 @@
 #define VISIBILITY_BUFFER_BINDING 2
 #define INDIRECT_SECTION_LOOKUP_BINDING 3

-#import <voxy:lod/gl46/bindings.glsl>
 #import <voxy:lod/section.glsl>
+#import <voxy:lod/gl46/bindings.glsl>

 flat out uint id;
 flat out uint value;
--- a/src/main/resources/assets/voxy/shaders/lod/gl46/quads2.vert
+++ b/src/main/resources/assets/voxy/shaders/lod/gl46/quads2.vert
@@ -2,7 +2,6 @@
 #extension GL_ARB_gpu_shader_int64 : enable

 #define QUAD_BUFFER_BINDING 1
-#define SECTION_METADATA_BUFFER_BINDING 2
 #define MODEL_BUFFER_BINDING 3
 #define MODEL_COLOUR_BUFFER_BINDING 4
 #define POSITION_SCRATCH_BINDING 5
@@ -10,8 +9,8 @@


 #import <voxy:lod/quad_format.glsl>
-#import <voxy:lod/gl46/bindings.glsl>
 #import <voxy:lod/block_model.glsl>
+#import <voxy:lod/gl46/bindings.glsl>

 //#define DEBUG_RENDER

--- a/src/main/resources/assets/voxy/shaders/lod/gl46/test/raw.vert
+++ b/src/main/resources/assets/voxy/shaders/lod/gl46/test/raw.vert
@@ -9,8 +9,8 @@


 #import <voxy:lod/quad_format.glsl>
-#import <voxy:lod/gl46/bindings.glsl>
 #import <voxy:lod/block_model.glsl>
+#import <voxy:lod/gl46/bindings.glsl>

 layout(location = 6) out flat uint quadDebug;

--- a/src/main/resources/assets/voxy/shaders/lod/hierarchical/screenspace2.glsl
+++ b/src/main/resources/assets/voxy/shaders/lod/hierarchical/screenspace2.glsl
@@ -1,67 +0,0 @@
-
-layout(binding = HIZ_BINDING_INDEX) uniform sampler2DShadow hizDepthSampler;
-
-vec3 minBB;
-vec3 maxBB;
-vec2 size;
-
-
-//Sets up screenspace with the given node id, returns true on success false on failure/should not continue
-//Accesses data that is setup in the main traversal and is just shared to here
-void setupScreenspace(in UnpackedNode node) {
-    //TODO: implment transform support
-    Transform transform = transforms[getTransformIndex(node)];
-
-
-    vec4 base = VP*vec4(vec3(((node.pos<<node.lodLevel)-camSecPos)<<5)-camSubSecPos, 1);
-
-    //TODO: AABB SIZES not just a max cube
-
-    //vec3 minPos = minSize + basePos;
-    //vec3 maxPos = maxSize + basePos;
-
-    minBB = base.xyz/base.w;
-    maxBB = minBB;
-
-    for (int i = 1; i < 8; i++) {
-        //NOTE!: cant this be precomputed and put in an array?? in the scene uniform??
-        vec4 pPoint = (VP*vec4(vec3((i&1)!=0,(i&2)!=0,(i&4)!=0),1))*(32<<node.lodLevel);//Size of section is 32x32x32 (need to change it to a bounding box in the future)
-        pPoint += base;
-        vec3 point = pPoint.xyz/pPoint.w;
-        //TODO: CLIP TO VIEWPORT
-        minBB = min(minBB, point);
-        maxBB = max(maxBB, point);
-    }
-
-    //TODO: MORE ACCURATLY DETERMIN SCREENSPACE AREA, this can be done by computing and adding
-    //  the projected surface area of each face/quad which winding order faces the camera
-    //  (this is just the dot product of 2 projected vectors)
-
-    //can do a funny by not doing the perspective divide except on the output of the area
-
-    //printf("Screenspace MIN: %f, %f, %f  MAX: %f, %f, %f", minBB.x,minBB.y,minBB.z, maxBB.x,maxBB.y,maxBB.z);
-
-    size = maxBB.xy - minBB.xy;
-
-}
-
-//Checks if the node is implicitly culled (outside frustum)
-bool outsideFrustum() {
-    return any(lessThanEqual(maxBB, vec3(-1f, -1f, 0f))) || any(lessThanEqual(vec3(1f, 1f, 1f), minBB));
-}
-
-bool isCulledByHiz() {
-    if (minBB.z < 0) {//Minpoint is behind the camera, its always going to pass
-        return false;
-    }
-    vec2 ssize = size.xy * vec2(ivec2(screenW, screenH));
-    float miplevel = ceil(log2(max(max(ssize.x, ssize.y),1)));
-    vec2 midpoint = (maxBB.xy + minBB.xy)*0.5;
-    return textureLod(hizDepthSampler, vec3(midpoint, minBB.z), miplevel) > 0.0001;
-}
-
-//Returns if we should decend into its children or not
-bool shouldDecend() {
-    //printf("Screen area %f: %f, %f", (size.x*size.y*float(screenW)*float(screenH)), float(screenW), float(screenH));
-    return (size.x*size.y*screenW*screenH) > decendSSS;
-}
--- a/src/main/resources/assets/voxy/shaders/lod/hierarchical/selectorold.comp
+++ b/src/main/resources/assets/voxy/shaders/lod/hierarchical/selectorold.comp
@@ -1,262 +0,0 @@
-#version 460 core
-
-#define WORKGROUP 4
-#define MINI_BATCH_SIZE 32
-//The entire uint is a minibatch (each idx is one)
-#define MINI_BATCH_MSK (uint(-1))
-
-//Each y dim is a quadrent in the octree
-// multiple x dims to fill up workgroups
-layout(local_size_x=WORKGROUP, local_size_y=8) in;
-
-layout(binding = 1, std430) restrict buffer RequestSectionLoadQueue {
-    uint counter;
-    uint[] queue;
-} requestQueue;
-
-//SectionNodeData is a uvec4 that contains the position + flags + ptr to own render section data + ptr to children
-layout(binding = 2, std430) restrict readonly buffer SectionNodeData {
-    uvec4[] sectionNodes;
-};
-
-layout(binding = 3, std430) restrict buffer ActiveWorkingNodeQueue {
-    uint feedbackStatus;
-    uint batchIndex;
-    uint end;
-    uint start;
-    uint maxSize;//Needs to be a multiple of local_size_x
-    uint[] queue;
-} nodeQueue;
-
-
-struct UnpackedNode {
-    ivec4 position;//x,y,z,detail
-    uint  flags;//16 bits
-    uint  self;
-    uint  children;
-};
-
-UnpackedNode unpackNode(uvec4 data) {
-    UnpackedNode node;
-
-    return node;
-}
-
-//NOTE: this is different to nanite in the fact that if a node is not loaded, too bad dont render
-
-shared UnpackedNode workingNodes[WORKGROUP];
-shared uint miniBatchMsk;
-void loadNode() {
-    if (gl_LocalInvocationIndex == 0) {//Check if we need to
-        batchMsk = 0;//Reset the minibatch
-        if (miniBatchMsk == MINI_BATCH_SIZE) {
-
-        }
-    }
-    barrier();
-    if (gl_LocalInvocationID.y == 0) {
-
-
-        //Need to make it work in y size 8, but only gl_LocalInvocationId.x == 0
-        workingNodes[gl_LocalInvocationID.x] = unpackNode(sectionNodes[id]);
-    }
-    barrier();//Synchonize, also acts as memory barrier
-}
-
-
-
-//Computes screensize of the node and whether it should render itself or its children
-bool shouldRenderChildren(UnpackedNode node) {
-
-}
-
-//Process a single node and enqueue child nodes if needed into work queue, enqueue self to render and/or request children to load
-void processNode(uint id) {//Called even if it doesnt have any work (id==-1) to ensure uniform control flow for barriers
-
-    //Bottom 2 bits are status flags, is air and children loaded
-    // node.flags
-
-    //If the childrenloaded flag is not set, send a request for the children of the node to be loaded
-    // if all the children are loaded but we are not and we need to render, render the children and dispatch
-    // a request to load self
-
-    if (shouldRenderChildren(node)) {
-        //Dont care about
-    } else {
-
-    }
-
-}
-
-
-//The activly schedualed/acquired work slot for this group
-shared uint workingBatchIndex;
-shared uint workingBatchOffset;
-void process() {
-    if (gl_LocalInvocationIndex == 0) {//This includes both x and y
-        workingBatchIndex = atomicAdd(nodeQueue.batchIndex, BATCH_SIZE);
-    }
-}
-
-
-
-void main() {
-    while (true) {
-        barrier();
-
-    }
-}
-
-
-
-
-//when a node is processed,
-// compute its screen bounding box is computed using fast trick (e.g. if your viewing it from a quadrent you already know its bounding points (min/max))
-// frustum cull, check hiz
-// if it passes culling, use the screensize to check wether it must render itself
-// or dispatch its children to render
-//      IF its error is small enough, then render itself, its mesh should always be loaded, if not its a critical error (except maybe if its a top level node or something)
-//      if its error is too large,
-//          check that all children are loaded (or empty), if they are not all loaded, enqueu a request for the cpu to load
-//          that nodes children
-//              if the load queue is full, dont enqueue it to the queue
-//          then instead of rendering children, render its own mesh since it should always be loaded
-
-//Can also reverse the above slightly and make it so that it checks the children before enqueuing them
-
-
-//the main thing to worry about is if there is enough work to fill the inital few rounds of this
-// before amplification takes effect
-// can do a thing where it initally just blasts child nodes out until the size is small enough
-
-
-
-// NOTE: since matrix multiplication distributes over addition
-//  can precompute the AABB corners with respect to the matrix
-//  then you can just add a translation vector
-
-
-
-
-
-
-
-//TODO: can do in another way
-// first compute the sections that should either render self or childs
-// then in as a seperate job queue work though it
-
-
-
-
-
-
-
-
-
-
-
-uint getChildCount(UnpackedNode node) {
-
-}
-
-
-//Checks whether a node should be culled based on hiz/frustum
-bool cullNode(UnpackedNode node) {
-
-}
-
-//Should render this node, or recurse to children
-bool shouldRenderChildrenInstead(UnpackedNode node) {
-
-}
-
-//Does the node have its own mesh loaded
-bool nodeHasSelfMesh(UnpackedNode node) {
-
-}
-
-//Does the node its children loaded (note! not child meshes)
-bool nodeHasChildrenLoaded(UnpackedNode node) {
-
-}
-
-//Are all the childrens meshes loaded
-bool nodeHasChildMeshesLoaded(UnpackedNode node) {
-
-}
-
-void request(uint type, uint idx) {
-
-}
-
-void renderMesh(uint idx) {
-
-}
-
-void enqueueChildren(uint arg, UnpackedNode node) {
-    uint cnt = getChildCount(node);
-    //TODO: the queue needs 2 counters, the pre and post atomic,
-    // pre is incremented to get index
-    // queue is written to
-    // post is then incremented to signal
-}
-
-void reportCritical(uint type) {
-
-}
-
-void processNode(uint idx) {
-    UnpackedNode node = unpackNode(sectionNodes[idx]);
-    if (!cullNode(node)) {
-        //Should we render children instead of ourselves with respect to screenspace error
-        if (shouldRenderChildrenInstead(node)) {
-            if (nodeHasChildrenLoaded(node)) {
-                //Dispatch nodes to queue
-                enqueueChildren(0, node);
-            } else {
-                //Children arnt loaded so either render self mesh or if we cant
-                // abort basicly must request nodes
-                if (nodeHasSelfMesh(node)) {
-                    //Render self and dispatch request to load children
-                    renderMesh(node.self);
-                    request(1, idx);
-                } else {
-                    //Critical issue, no are loaded and self has no mesh
-                    reportCritical(0);
-                }
-            }
-        } else {
-            if (nodeHasSelfMesh(node)) {
-                //render self
-                renderMesh(node.self);
-            } else {
-                //Request that self mesh is loaded
-                request(0, idx);
-
-                //render children instead
-                if (nodeHasChildrenLoaded(node)) {//Might need to be node nodeHasChildMeshesLoaded
-                    enqueueChildren(1, node);
-                } else {
-                    //This is very bad, it means cant render anything
-                    reportCritical(1);
-                }
-            }
-        }
-    }
-}
-
-//Psudo code, one thread, one load
-void main() {
-    while (true) {
-        //Try to process a node queue entry
-        uint work = atomicAdd(workingNodeQueuePos, 1);
-        uint idx = work&0xFFFFFFu;
-        uint arg = work>>24;
-        if (idx < workingNodeQueueEnd) {
-
-
-        } else {
-            //Do other queue work however we still have the work slot allocated
-        }
-    }
-}
--- a/src/main/resources/assets/voxy/shaders/lod/hierarchical/transform.glsl
+++ b/src/main/resources/assets/voxy/shaders/lod/hierarchical/transform.glsl
@@ -1,14 +0,0 @@
-//This provides per scene/viewport/transfrom access, that is, a node can be attached to a specific scene/viewport/transfrom, this is so that
-// different nodes/models can have different viewports/scenes/transfrom which enables some very cool things like
-// absolutly massive VS2 structures should... just work :tm: - todd howard
-
-struct Transform {
-    mat4 transform;
-    ivec4 originPos;
-    ivec4 worldPos;
-};
-
-
-layout(binding = TRANSFORM_ARRAY_INDEX, std140) uniform TransformArray {
-    Transform transforms[32];
-};
--- a/src/main/resources/assets/voxy/shaders/lod/section.glsl
+++ b/src/main/resources/assets/voxy/shaders/lod/section.glsl
@@ -1,3 +1,14 @@
+struct SectionMeta {
+    uint posA;
+    uint posB;
+    uint AABB;
+    uint ptr;
+    uint cntA;
+    uint cntB;
+    uint cntC;
+    uint cntD;
+};
+
 uint extractDetail(SectionMeta section) {
    return section.posA>>28;
 }
@@ -5,7 +16,7 @@ uint extractDetail(SectionMeta section) {
 ivec3 extractPosition(SectionMeta section) {
    int y = ((int(section.posA)<<4)>>24);
    int x = (int(section.posB)<<4)>>8;
-    int z = int((section.posA&((1<<20)-1))<<4);
+    int z = int((section.posA&((1u<<20)-1))<<4);
    z |= int(section.posB>>28);
    z <<= 8;
    z >>= 8;