From 35850082d533ca1f778de3e8591b31562e64f67f Mon Sep 17 00:00:00 2001 From: mcrcortex <18544518+MCRcortex@users.noreply.github.com> Date: Fri, 6 Jun 2025 17:00:25 +1000 Subject: [PATCH] Shuffled around shaders --- .../assets/voxy/shaders/lod/block_model.glsl | 13 +- .../voxy/shaders/lod/gl46/bindings.glsl | 18 -- .../shaders/lod/gl46/buildtranslucents.comp | 2 +- .../assets/voxy/shaders/lod/gl46/cmdgen.comp | 2 +- .../voxy/shaders/lod/gl46/cull/raster.vert | 2 +- .../assets/voxy/shaders/lod/gl46/quads2.vert | 3 +- .../voxy/shaders/lod/gl46/test/raw.vert | 2 +- .../lod/hierarchical/screenspace2.glsl | 67 ----- .../shaders/lod/hierarchical/selectorold.comp | 262 ------------------ .../shaders/lod/hierarchical/transform.glsl | 14 - .../assets/voxy/shaders/lod/section.glsl | 13 +- 11 files changed, 27 insertions(+), 371 deletions(-) delete mode 100644 src/main/resources/assets/voxy/shaders/lod/hierarchical/screenspace2.glsl delete mode 100644 src/main/resources/assets/voxy/shaders/lod/hierarchical/selectorold.comp delete mode 100644 src/main/resources/assets/voxy/shaders/lod/hierarchical/transform.glsl diff --git a/src/main/resources/assets/voxy/shaders/lod/block_model.glsl b/src/main/resources/assets/voxy/shaders/lod/block_model.glsl index f2454303..c4013b8d 100644 --- a/src/main/resources/assets/voxy/shaders/lod/block_model.glsl +++ b/src/main/resources/assets/voxy/shaders/lod/block_model.glsl @@ -1,3 +1,10 @@ +struct BlockModel { + uint faceData[6]; + uint flagsA; + uint colourTint; + uint _pad[8]; +}; + //TODO: FIXME: this isnt actually correct cause depending on the face (i think) it could be 1/64 th of a position off // but im going to assume that since we are dealing with huge render distances, this shouldent matter that much float extractFaceIndentation(uint faceData) { @@ -18,13 +25,13 @@ uint faceHasAlphaCuttoutOverride(uint faceData) { } bool modelHasBiomeLUT(BlockModel model) { - return ((model.flagsA)&2) != 0; + return ((model.flagsA)&2u) != 0; } bool modelIsTranslucent(BlockModel model) { - return ((model.flagsA)&4) != 0; + return ((model.flagsA)&4u) != 0; } bool modelHasMipmaps(BlockModel model) { - return ((model.flagsA)&8) != 0; + return ((model.flagsA)&8u) != 0; } \ No newline at end of file diff --git a/src/main/resources/assets/voxy/shaders/lod/gl46/bindings.glsl b/src/main/resources/assets/voxy/shaders/lod/gl46/bindings.glsl index 04e5de6f..2a5f50c6 100644 --- a/src/main/resources/assets/voxy/shaders/lod/gl46/bindings.glsl +++ b/src/main/resources/assets/voxy/shaders/lod/gl46/bindings.glsl @@ -7,24 +7,6 @@ layout(binding = 0, std140) uniform SceneUniform { vec3 cameraSubPos; }; -struct BlockModel { - uint faceData[6]; - uint flagsA; - uint colourTint; - uint _pad[8]; -}; - -struct SectionMeta { - uint posA; - uint posB; - uint AABB; - uint ptr; - uint cntA; - uint cntB; - uint cntC; - uint cntD; -}; - //TODO: see if making the stride 2*4*4 bytes or something cause you get that 16 byte write struct DrawCommand { uint count; diff --git a/src/main/resources/assets/voxy/shaders/lod/gl46/buildtranslucents.comp b/src/main/resources/assets/voxy/shaders/lod/gl46/buildtranslucents.comp index 90558fb4..864791ae 100644 --- a/src/main/resources/assets/voxy/shaders/lod/gl46/buildtranslucents.comp +++ b/src/main/resources/assets/voxy/shaders/lod/gl46/buildtranslucents.comp @@ -8,8 +8,8 @@ layout(local_size_x = 128) in; #define SECTION_METADATA_BUFFER_BINDING 3 #define INDIRECT_SECTION_LOOKUP_BINDING 4 -#import #import +#import /* uint count; diff --git a/src/main/resources/assets/voxy/shaders/lod/gl46/cmdgen.comp b/src/main/resources/assets/voxy/shaders/lod/gl46/cmdgen.comp index e049123f..aaa737b4 100644 --- a/src/main/resources/assets/voxy/shaders/lod/gl46/cmdgen.comp +++ b/src/main/resources/assets/voxy/shaders/lod/gl46/cmdgen.comp @@ -11,8 +11,8 @@ layout(local_size_x = 128) in; #define POSITION_SCRATCH_BINDING 6 #define POSITION_SCRATCH_ACCESS writeonly -#import #import +#import //https://github.com/KhronosGroup/GLSL/blob/master/extensions/ext/GL_EXT_shader_16bit_storage.txt // adds support for uint8_t which can use for compact visibility buffer diff --git a/src/main/resources/assets/voxy/shaders/lod/gl46/cull/raster.vert b/src/main/resources/assets/voxy/shaders/lod/gl46/cull/raster.vert index ed57fda0..bfa0b55a 100644 --- a/src/main/resources/assets/voxy/shaders/lod/gl46/cull/raster.vert +++ b/src/main/resources/assets/voxy/shaders/lod/gl46/cull/raster.vert @@ -6,8 +6,8 @@ #define VISIBILITY_BUFFER_BINDING 2 #define INDIRECT_SECTION_LOOKUP_BINDING 3 -#import #import +#import flat out uint id; flat out uint value; diff --git a/src/main/resources/assets/voxy/shaders/lod/gl46/quads2.vert b/src/main/resources/assets/voxy/shaders/lod/gl46/quads2.vert index 713d57bb..518da095 100644 --- a/src/main/resources/assets/voxy/shaders/lod/gl46/quads2.vert +++ b/src/main/resources/assets/voxy/shaders/lod/gl46/quads2.vert @@ -2,7 +2,6 @@ #extension GL_ARB_gpu_shader_int64 : enable #define QUAD_BUFFER_BINDING 1 -#define SECTION_METADATA_BUFFER_BINDING 2 #define MODEL_BUFFER_BINDING 3 #define MODEL_COLOUR_BUFFER_BINDING 4 #define POSITION_SCRATCH_BINDING 5 @@ -10,8 +9,8 @@ #import -#import #import +#import //#define DEBUG_RENDER diff --git a/src/main/resources/assets/voxy/shaders/lod/gl46/test/raw.vert b/src/main/resources/assets/voxy/shaders/lod/gl46/test/raw.vert index ecca7c37..6f084283 100644 --- a/src/main/resources/assets/voxy/shaders/lod/gl46/test/raw.vert +++ b/src/main/resources/assets/voxy/shaders/lod/gl46/test/raw.vert @@ -9,8 +9,8 @@ #import -#import #import +#import layout(location = 6) out flat uint quadDebug; diff --git a/src/main/resources/assets/voxy/shaders/lod/hierarchical/screenspace2.glsl b/src/main/resources/assets/voxy/shaders/lod/hierarchical/screenspace2.glsl deleted file mode 100644 index 418b2566..00000000 --- a/src/main/resources/assets/voxy/shaders/lod/hierarchical/screenspace2.glsl +++ /dev/null @@ -1,67 +0,0 @@ - -layout(binding = HIZ_BINDING_INDEX) uniform sampler2DShadow hizDepthSampler; - -vec3 minBB; -vec3 maxBB; -vec2 size; - - -//Sets up screenspace with the given node id, returns true on success false on failure/should not continue -//Accesses data that is setup in the main traversal and is just shared to here -void setupScreenspace(in UnpackedNode node) { - //TODO: implment transform support - Transform transform = transforms[getTransformIndex(node)]; - - - vec4 base = VP*vec4(vec3(((node.pos< 0.0001; -} - -//Returns if we should decend into its children or not -bool shouldDecend() { - //printf("Screen area %f: %f, %f", (size.x*size.y*float(screenW)*float(screenH)), float(screenW), float(screenH)); - return (size.x*size.y*screenW*screenH) > decendSSS; -} \ No newline at end of file diff --git a/src/main/resources/assets/voxy/shaders/lod/hierarchical/selectorold.comp b/src/main/resources/assets/voxy/shaders/lod/hierarchical/selectorold.comp deleted file mode 100644 index 7bfaaab7..00000000 --- a/src/main/resources/assets/voxy/shaders/lod/hierarchical/selectorold.comp +++ /dev/null @@ -1,262 +0,0 @@ -#version 460 core - -#define WORKGROUP 4 -#define MINI_BATCH_SIZE 32 -//The entire uint is a minibatch (each idx is one) -#define MINI_BATCH_MSK (uint(-1)) - -//Each y dim is a quadrent in the octree -// multiple x dims to fill up workgroups -layout(local_size_x=WORKGROUP, local_size_y=8) in; - -layout(binding = 1, std430) restrict buffer RequestSectionLoadQueue { - uint counter; - uint[] queue; -} requestQueue; - -//SectionNodeData is a uvec4 that contains the position + flags + ptr to own render section data + ptr to children -layout(binding = 2, std430) restrict readonly buffer SectionNodeData { - uvec4[] sectionNodes; -}; - -layout(binding = 3, std430) restrict buffer ActiveWorkingNodeQueue { - uint feedbackStatus; - uint batchIndex; - uint end; - uint start; - uint maxSize;//Needs to be a multiple of local_size_x - uint[] queue; -} nodeQueue; - - -struct UnpackedNode { - ivec4 position;//x,y,z,detail - uint flags;//16 bits - uint self; - uint children; -}; - -UnpackedNode unpackNode(uvec4 data) { - UnpackedNode node; - - return node; -} - -//NOTE: this is different to nanite in the fact that if a node is not loaded, too bad dont render - -shared UnpackedNode workingNodes[WORKGROUP]; -shared uint miniBatchMsk; -void loadNode() { - if (gl_LocalInvocationIndex == 0) {//Check if we need to - batchMsk = 0;//Reset the minibatch - if (miniBatchMsk == MINI_BATCH_SIZE) { - - } - } - barrier(); - if (gl_LocalInvocationID.y == 0) { - - - //Need to make it work in y size 8, but only gl_LocalInvocationId.x == 0 - workingNodes[gl_LocalInvocationID.x] = unpackNode(sectionNodes[id]); - } - barrier();//Synchonize, also acts as memory barrier -} - - - -//Computes screensize of the node and whether it should render itself or its children -bool shouldRenderChildren(UnpackedNode node) { - -} - -//Process a single node and enqueue child nodes if needed into work queue, enqueue self to render and/or request children to load -void processNode(uint id) {//Called even if it doesnt have any work (id==-1) to ensure uniform control flow for barriers - - //Bottom 2 bits are status flags, is air and children loaded - // node.flags - - //If the childrenloaded flag is not set, send a request for the children of the node to be loaded - // if all the children are loaded but we are not and we need to render, render the children and dispatch - // a request to load self - - if (shouldRenderChildren(node)) { - //Dont care about - } else { - - } - -} - - -//The activly schedualed/acquired work slot for this group -shared uint workingBatchIndex; -shared uint workingBatchOffset; -void process() { - if (gl_LocalInvocationIndex == 0) {//This includes both x and y - workingBatchIndex = atomicAdd(nodeQueue.batchIndex, BATCH_SIZE); - } -} - - - -void main() { - while (true) { - barrier(); - - } -} - - - - -//when a node is processed, -// compute its screen bounding box is computed using fast trick (e.g. if your viewing it from a quadrent you already know its bounding points (min/max)) -// frustum cull, check hiz -// if it passes culling, use the screensize to check wether it must render itself -// or dispatch its children to render -// IF its error is small enough, then render itself, its mesh should always be loaded, if not its a critical error (except maybe if its a top level node or something) -// if its error is too large, -// check that all children are loaded (or empty), if they are not all loaded, enqueu a request for the cpu to load -// that nodes children -// if the load queue is full, dont enqueue it to the queue -// then instead of rendering children, render its own mesh since it should always be loaded - -//Can also reverse the above slightly and make it so that it checks the children before enqueuing them - - -//the main thing to worry about is if there is enough work to fill the inital few rounds of this -// before amplification takes effect -// can do a thing where it initally just blasts child nodes out until the size is small enough - - - -// NOTE: since matrix multiplication distributes over addition -// can precompute the AABB corners with respect to the matrix -// then you can just add a translation vector - - - - - - - -//TODO: can do in another way -// first compute the sections that should either render self or childs -// then in as a seperate job queue work though it - - - - - - - - - - - -uint getChildCount(UnpackedNode node) { - -} - - -//Checks whether a node should be culled based on hiz/frustum -bool cullNode(UnpackedNode node) { - -} - -//Should render this node, or recurse to children -bool shouldRenderChildrenInstead(UnpackedNode node) { - -} - -//Does the node have its own mesh loaded -bool nodeHasSelfMesh(UnpackedNode node) { - -} - -//Does the node its children loaded (note! not child meshes) -bool nodeHasChildrenLoaded(UnpackedNode node) { - -} - -//Are all the childrens meshes loaded -bool nodeHasChildMeshesLoaded(UnpackedNode node) { - -} - -void request(uint type, uint idx) { - -} - -void renderMesh(uint idx) { - -} - -void enqueueChildren(uint arg, UnpackedNode node) { - uint cnt = getChildCount(node); - //TODO: the queue needs 2 counters, the pre and post atomic, - // pre is incremented to get index - // queue is written to - // post is then incremented to signal -} - -void reportCritical(uint type) { - -} - -void processNode(uint idx) { - UnpackedNode node = unpackNode(sectionNodes[idx]); - if (!cullNode(node)) { - //Should we render children instead of ourselves with respect to screenspace error - if (shouldRenderChildrenInstead(node)) { - if (nodeHasChildrenLoaded(node)) { - //Dispatch nodes to queue - enqueueChildren(0, node); - } else { - //Children arnt loaded so either render self mesh or if we cant - // abort basicly must request nodes - if (nodeHasSelfMesh(node)) { - //Render self and dispatch request to load children - renderMesh(node.self); - request(1, idx); - } else { - //Critical issue, no are loaded and self has no mesh - reportCritical(0); - } - } - } else { - if (nodeHasSelfMesh(node)) { - //render self - renderMesh(node.self); - } else { - //Request that self mesh is loaded - request(0, idx); - - //render children instead - if (nodeHasChildrenLoaded(node)) {//Might need to be node nodeHasChildMeshesLoaded - enqueueChildren(1, node); - } else { - //This is very bad, it means cant render anything - reportCritical(1); - } - } - } - } -} - -//Psudo code, one thread, one load -void main() { - while (true) { - //Try to process a node queue entry - uint work = atomicAdd(workingNodeQueuePos, 1); - uint idx = work&0xFFFFFFu; - uint arg = work>>24; - if (idx < workingNodeQueueEnd) { - - - } else { - //Do other queue work however we still have the work slot allocated - } - } -} \ No newline at end of file diff --git a/src/main/resources/assets/voxy/shaders/lod/hierarchical/transform.glsl b/src/main/resources/assets/voxy/shaders/lod/hierarchical/transform.glsl deleted file mode 100644 index c6b71e57..00000000 --- a/src/main/resources/assets/voxy/shaders/lod/hierarchical/transform.glsl +++ /dev/null @@ -1,14 +0,0 @@ -//This provides per scene/viewport/transfrom access, that is, a node can be attached to a specific scene/viewport/transfrom, this is so that -// different nodes/models can have different viewports/scenes/transfrom which enables some very cool things like -// absolutly massive VS2 structures should... just work :tm: - todd howard - -struct Transform { - mat4 transform; - ivec4 originPos; - ivec4 worldPos; -}; - - -layout(binding = TRANSFORM_ARRAY_INDEX, std140) uniform TransformArray { - Transform transforms[32]; -}; diff --git a/src/main/resources/assets/voxy/shaders/lod/section.glsl b/src/main/resources/assets/voxy/shaders/lod/section.glsl index 1eed3f6b..52bcebeb 100644 --- a/src/main/resources/assets/voxy/shaders/lod/section.glsl +++ b/src/main/resources/assets/voxy/shaders/lod/section.glsl @@ -1,3 +1,14 @@ +struct SectionMeta { + uint posA; + uint posB; + uint AABB; + uint ptr; + uint cntA; + uint cntB; + uint cntC; + uint cntD; +}; + uint extractDetail(SectionMeta section) { return section.posA>>28; } @@ -5,7 +16,7 @@ uint extractDetail(SectionMeta section) { ivec3 extractPosition(SectionMeta section) { int y = ((int(section.posA)<<4)>>24); int x = (int(section.posB)<<4)>>8; - int z = int((section.posA&((1<<20)-1))<<4); + int z = int((section.posA&((1u<<20)-1))<<4); z |= int(section.posB>>28); z <<= 8; z >>= 8;