Shader compiles

2024-07-11 16:50:32 +10:00
parent 716bf097bf
commit 1e855a0ed0
6 changed files with 64 additions and 29 deletions
--- a/src/main/java/me/cortex/voxy/client/core/rendering/Gl46MeshletsFarWorldRenderer.java
+++ b/src/main/java/me/cortex/voxy/client/core/rendering/Gl46MeshletsFarWorldRenderer.java
@@ -44,6 +44,7 @@ import static org.lwjgl.opengl.NVRepresentativeFragmentTest.GL_REPRESENTATIVE_FR
 // the shader can cull the verticies of any quad that has its index over the expected quuad count
 // this could potentially result in a fair bit of memory savings (especially if used in normal mc terrain rendering)
 public class Gl46MeshletsFarWorldRenderer extends AbstractFarWorldRenderer<Gl46MeshletViewport, DefaultGeometryManager> {
    private final Shader lodShader = Shader.make()
            .define("QUADS_PER_MESHLET", RenderDataFactory.QUADS_PER_MESHLET)
            .add(ShaderType.VERTEX, "voxy:lod/gl46mesh/quads.vert")
--- a/src/main/resources/assets/voxy/shaders/lod/hierarchial/binding_points.glsl
+++ b/src/main/resources/assets/voxy/shaders/lod/hierarchial/binding_points.glsl
@@ -0,0 +1,10 @@
 #define SCENE_UNIFORM_INDEX 0
 #define NODE_DATA_INDEX 1
 #define ATOMIC_DATA_INDEX 2
 #define REQUEST_QUEUE_INDEX 3
 #define RENDER_QUEUE_INDEX 4
 #define TRANSFORM_ARRAY_INDEX 5
 //Samplers
 #define HIZ_BINDING_INDEX 0
--- a/src/main/resources/assets/voxy/shaders/lod/hierarchial/node.glsl
+++ b/src/main/resources/assets/voxy/shaders/lod/hierarchial/node.glsl
@@ -3,7 +3,7 @@ layout(binding = NODE_DATA_INDEX, std430) restrict buffer NodeData {
 //Needs to be read and writeable for marking data,
 //(could do an evil violation, make this readonly, then have a writeonly varient, which means that writing might not be visible but will show up by the next frame)
 //Nodes are 16 bytes big (or 32 cant decide, 16 might _just_ be enough)
-    ivec4[] nodes;
+    uvec4[] nodes;
 };
 //First 2 are joined to be the position
@@ -73,8 +73,12 @@ uint getId(in UnpackedNode node) {
    return node.nodeId;
 }
-uint getChild(in UnpackedNode node) {
+uint getChildCount(in UnpackedNode node) {
-    return node.flags >> 2;
+    return ((node.flags >> 2)&7U)+1;
 }
 uint getTransformIndex(in UnpackedNode node) {
    return (node.flags >> 5)&31u;
 }
 //-----------------------------------
--- a/src/main/resources/assets/voxy/shaders/lod/hierarchial/screenspace.glsl
+++ b/src/main/resources/assets/voxy/shaders/lod/hierarchial/screenspace.glsl
@@ -21,31 +21,35 @@ vec3 minBB;
 vec3 maxBB;
 vec2 size;
 vec3 projPoint(mat4 mat, vec3 pos) {
    vec4 t = mat * vec4(vec3(pos),1);
    return t.xyz/t.w;
 }
 //Sets up screenspace with the given node id, returns true on success false on failure/should not continue
 //Accesses data that is setup in the main traversal and is just shared to here
 void setupScreenspace(in UnpackedNode node) {
    //TODO: Need to do aabb size for the nodes, it must be an overesimate of all the children
-    mat4 mvp;
+    Transform transform = transforms[getTransformIndex(node)];
-    vec3 basePos;
+    /*
-    vec3 minSize;
+    vec3 point = VP*(((transform.transform*vec4((node.pos<<node.lodLevel) - transform.originPos.xyz, 1))
-    vec3 maxSize;
+                    + (transform.worldPos.xyz-camChunkPos))-camSubChunk);
                    */
-    vec3 minPos = minSize + basePos;
+    vec4 base = VP*vec4(vec3(((node.pos<<node.lodLevel)-camSecPos)<<5)-camSubSecPos, 1);
    vec3 maxPos = maxSize + basePos;
-    minBB = projPoint(mvp, minPos);
+    //TODO: AABB SIZES not just a max cube
    //vec3 minPos = minSize + basePos;
    //vec3 maxPos = maxSize + basePos;
    minBB = base.xyz/base.w;
    maxBB = minBB;
    for (int i = 1; i < 8; i++) {
-        vec3 point = projPoint(mvp, mix(minPos, maxPos, bvec3((i&1)!=0,(i&2)!=0,(i&4)!=0)));
+        //NOTE!: cant this be precomputed and put in an array?? in the scene uniform??
        vec4 pPoint = (VP*vec4(vec3((i&1)!=0,(i&2)!=0,(i&4)!=0)*32,1));//Size of section is 32x32x32 (need to change it to a bounding box in the future)
        pPoint += base;
        vec3 point = pPoint.xyz/pPoint.w;
        minBB = min(minBB, point);
        maxBB = max(maxBB, point);
    }
@@ -54,7 +58,7 @@ void setupScreenspace(in UnpackedNode node) {
 }
 bool isCulledByHiz() {
-    vec2 ssize = size.xy * vec2(ivec2(screensize));
+    vec2 ssize = size.xy * vec2(ivec2(screenW, screenH));
    float miplevel = ceil(log2(max(max(ssize.x, ssize.y),1)));
    vec2 midpoint = (maxBB.xy + minBB.xy)*0.5;
    return textureLod(hizDepthSampler, vec3(midpoint, minBB.z), miplevel) > 0.0001;
--- a/src/main/resources/assets/voxy/shaders/lod/hierarchial/transform.glsl
+++ b/src/main/resources/assets/voxy/shaders/lod/hierarchial/transform.glsl
@@ -1,3 +1,14 @@
 //This provides per scene/viewport/transfrom access, that is, a node can be attached to a specific scene/viewport/transfrom, this is so that
 // different nodes/models can have different viewports/scenes/transfrom which enables some very cool things like
 // absolutly massive VS2 structures should... just work :tm: - todd howard
 struct Transform {
    mat4 transform;
    ivec4 originPos;
    ivec4 worldPos;
 };
 layout(binding = TRANSFORM_ARRAY_INDEX, std140) uniform TransformArray {
    Transform transforms[32];
 };
--- a/src/main/resources/assets/voxy/shaders/lod/hierarchial/traversal.comp
+++ b/src/main/resources/assets/voxy/shaders/lod/hierarchial/traversal.comp
@@ -1,7 +1,11 @@
 #version 460 core
 //TODO: make this better than a single thread
 layout(local_size_x=1, local_size_y=1) in;
 #import <voxy:lod/hierarchial/binding_points.glsl>
 #line 7
 //The queue contains 3 atomics
 // end (the current processing pointer)
 // head (the current point that is ok to read from)
@@ -11,13 +15,15 @@ layout(local_size_x=1, local_size_y=1) in;
 //   write the data getting enqueued at the starting point specified by the `top` incrmenet
 // then increment head strictly _AFTER_ writing to the queue, this ensures that the data is always written and avaible in the queue
-layout(binding = 0, std140) uniform SceneUniform {
+layout(binding = SCENE_UNIFORM_INDEX, std140) uniform SceneUniform {
-    uint a;
+    mat4 VP;
    ivec3 camSecPos;
    uint screenW;
    vec3 camSubSecPos;
    uint screenH;
 };
-#define NODE_DATA_INDEX 1
+layout(binding = ATOMIC_DATA_INDEX, std430) restrict buffer Atomics {
 layout(binding = 2, std430) restrict buffer Atomics {
    uint requestQueueIndex;
    uint requestQueueMaxSize;
@@ -25,11 +31,11 @@ layout(binding = 2, std430) restrict buffer Atomics {
    uint renderQueueMaxSize;
 } atomics;
-layout(binding = 3, std430) restrict writeonly buffer RequestQueue {
+layout(binding = REQUEST_QUEUE_INDEX, std430) restrict writeonly buffer RequestQueue {
    uint[] requestQueue;
 };
-layout(binding = 4, std430) restrict writeonly buffer RenderQueue {
+layout(binding = RENDER_QUEUE_INDEX, std430) restrict writeonly buffer RenderQueue {
    uint[] renderQueue;
 };
@@ -41,16 +47,15 @@ layout(binding = 2, std430) restrict buffer QueueData {
    uint[] queue;
 } queue;
 */
-
+#line 1
 #import <voxy:lod/hierarchial/transform.glsl>
-
+#line 1
 #import <voxy:lod/hierarchial/node.glsl>
-
+#line 1
 #define HIZ_BINDING_INDEX 0
 //Contains all the screenspace computation
 #import <voxy:lod/hierarchial/screenspace.glsl>
-
+#line 58
 //If a request is successfully added to the RequestQueue, must update NodeData to mark that the node has been put into the request queue
 // to prevent it from being requested every frame and blocking the queue
@@ -72,7 +77,7 @@ layout(binding = 2, std430) restrict buffer QueueData {
 void addRequest(inout UnpackedNode node) {
    if (!hasRequested(node)) {
        //TODO: maybe try using only 1 variable and it being <0 being bad
-        if (atomics.requestQueueIndex < atomic.requestQueueMaxSize) {
+        if (atomics.requestQueueIndex < atomics.requestQueueMaxSize) {
            //Mark node as having a request submitted to prevent duplicate submissions
            requestQueue[atomicAdd(atomics.requestQueueIndex, 1)] = getId(node);
            markRequested(node);