Shader compiles

2024-07-11 16:50:32 +10:00
parent 716bf097bf
commit 1e855a0ed0
6 changed files with 64 additions and 29 deletions
--- a/src/main/java/me/cortex/voxy/client/core/rendering/Gl46MeshletsFarWorldRenderer.java
+++ b/src/main/java/me/cortex/voxy/client/core/rendering/Gl46MeshletsFarWorldRenderer.java
@@ -44,6 +44,7 @@ import static org.lwjgl.opengl.NVRepresentativeFragmentTest.GL_REPRESENTATIVE_FR
 // the shader can cull the verticies of any quad that has its index over the expected quuad count
 // this could potentially result in a fair bit of memory savings (especially if used in normal mc terrain rendering)
 public class Gl46MeshletsFarWorldRenderer extends AbstractFarWorldRenderer<Gl46MeshletViewport, DefaultGeometryManager> {
+
    private final Shader lodShader = Shader.make()
            .define("QUADS_PER_MESHLET", RenderDataFactory.QUADS_PER_MESHLET)
            .add(ShaderType.VERTEX, "voxy:lod/gl46mesh/quads.vert")
--- a/src/main/resources/assets/voxy/shaders/lod/hierarchial/binding_points.glsl
+++ b/src/main/resources/assets/voxy/shaders/lod/hierarchial/binding_points.glsl
@@ -0,0 +1,10 @@
+#define SCENE_UNIFORM_INDEX 0
+#define NODE_DATA_INDEX 1
+#define ATOMIC_DATA_INDEX 2
+#define REQUEST_QUEUE_INDEX 3
+#define RENDER_QUEUE_INDEX 4
+#define TRANSFORM_ARRAY_INDEX 5
+
+//Samplers
+#define HIZ_BINDING_INDEX 0
+
--- a/src/main/resources/assets/voxy/shaders/lod/hierarchial/node.glsl
+++ b/src/main/resources/assets/voxy/shaders/lod/hierarchial/node.glsl
@@ -3,7 +3,7 @@ layout(binding = NODE_DATA_INDEX, std430) restrict buffer NodeData {
 //Needs to be read and writeable for marking data,
 //(could do an evil violation, make this readonly, then have a writeonly varient, which means that writing might not be visible but will show up by the next frame)
 //Nodes are 16 bytes big (or 32 cant decide, 16 might _just_ be enough)
-    ivec4[] nodes;
+    uvec4[] nodes;
 };

 //First 2 are joined to be the position
@@ -73,8 +73,12 @@ uint getId(in UnpackedNode node) {
    return node.nodeId;
 }

-uint getChild(in UnpackedNode node) {
-    return node.flags >> 2;
+uint getChildCount(in UnpackedNode node) {
+    return ((node.flags >> 2)&7U)+1;
+}
+
+uint getTransformIndex(in UnpackedNode node) {
+    return (node.flags >> 5)&31u;
 }

 //-----------------------------------
--- a/src/main/resources/assets/voxy/shaders/lod/hierarchial/screenspace.glsl
+++ b/src/main/resources/assets/voxy/shaders/lod/hierarchial/screenspace.glsl
@@ -21,31 +21,35 @@ vec3 minBB;
 vec3 maxBB;
 vec2 size;

-vec3 projPoint(mat4 mat, vec3 pos) {
-    vec4 t = mat * vec4(vec3(pos),1);
-    return t.xyz/t.w;
-}

 //Sets up screenspace with the given node id, returns true on success false on failure/should not continue
 //Accesses data that is setup in the main traversal and is just shared to here
 void setupScreenspace(in UnpackedNode node) {
    //TODO: Need to do aabb size for the nodes, it must be an overesimate of all the children

-    mat4 mvp;
+    Transform transform = transforms[getTransformIndex(node)];

-    vec3 basePos;
-    vec3 minSize;
-    vec3 maxSize;
+    /*
+    vec3 point = VP*(((transform.transform*vec4((node.pos<<node.lodLevel) - transform.originPos.xyz, 1))
+                    + (transform.worldPos.xyz-camChunkPos))-camSubChunk);
+                    */


-    vec3 minPos = minSize + basePos;
-    vec3 maxPos = maxSize + basePos;
+    vec4 base = VP*vec4(vec3(((node.pos<<node.lodLevel)-camSecPos)<<5)-camSubSecPos, 1);

-    minBB = projPoint(mvp, minPos);
+    //TODO: AABB SIZES not just a max cube
+
+    //vec3 minPos = minSize + basePos;
+    //vec3 maxPos = maxSize + basePos;
+
+    minBB = base.xyz/base.w;
    maxBB = minBB;

    for (int i = 1; i < 8; i++) {
-        vec3 point = projPoint(mvp, mix(minPos, maxPos, bvec3((i&1)!=0,(i&2)!=0,(i&4)!=0)));
+        //NOTE!: cant this be precomputed and put in an array?? in the scene uniform??
+        vec4 pPoint = (VP*vec4(vec3((i&1)!=0,(i&2)!=0,(i&4)!=0)*32,1));//Size of section is 32x32x32 (need to change it to a bounding box in the future)
+        pPoint += base;
+        vec3 point = pPoint.xyz/pPoint.w;
        minBB = min(minBB, point);
        maxBB = max(maxBB, point);
    }
@@ -54,7 +58,7 @@ void setupScreenspace(in UnpackedNode node) {
 }

 bool isCulledByHiz() {
-    vec2 ssize = size.xy * vec2(ivec2(screensize));
+    vec2 ssize = size.xy * vec2(ivec2(screenW, screenH));
    float miplevel = ceil(log2(max(max(ssize.x, ssize.y),1)));
    vec2 midpoint = (maxBB.xy + minBB.xy)*0.5;
    return textureLod(hizDepthSampler, vec3(midpoint, minBB.z), miplevel) > 0.0001;
--- a/src/main/resources/assets/voxy/shaders/lod/hierarchial/transform.glsl
+++ b/src/main/resources/assets/voxy/shaders/lod/hierarchial/transform.glsl
@@ -1,3 +1,14 @@
 //This provides per scene/viewport/transfrom access, that is, a node can be attached to a specific scene/viewport/transfrom, this is so that
 // different nodes/models can have different viewports/scenes/transfrom which enables some very cool things like
 // absolutly massive VS2 structures should... just work :tm: - todd howard
+
+struct Transform {
+    mat4 transform;
+    ivec4 originPos;
+    ivec4 worldPos;
+};
+
+
+layout(binding = TRANSFORM_ARRAY_INDEX, std140) uniform TransformArray {
+    Transform transforms[32];
+};
--- a/src/main/resources/assets/voxy/shaders/lod/hierarchial/traversal.comp
+++ b/src/main/resources/assets/voxy/shaders/lod/hierarchial/traversal.comp
@@ -1,7 +1,11 @@
 #version 460 core
+
 //TODO: make this better than a single thread
 layout(local_size_x=1, local_size_y=1) in;

+#import <voxy:lod/hierarchial/binding_points.glsl>
+#line 7
+
 //The queue contains 3 atomics
 // end (the current processing pointer)
 // head (the current point that is ok to read from)
@@ -11,13 +15,15 @@ layout(local_size_x=1, local_size_y=1) in;
 //   write the data getting enqueued at the starting point specified by the `top` incrmenet
 // then increment head strictly _AFTER_ writing to the queue, this ensures that the data is always written and avaible in the queue

-layout(binding = 0, std140) uniform SceneUniform {
-    uint a;
+layout(binding = SCENE_UNIFORM_INDEX, std140) uniform SceneUniform {
+    mat4 VP;
+    ivec3 camSecPos;
+    uint screenW;
+    vec3 camSubSecPos;
+    uint screenH;
 };

-#define NODE_DATA_INDEX 1
-
-layout(binding = 2, std430) restrict buffer Atomics {
+layout(binding = ATOMIC_DATA_INDEX, std430) restrict buffer Atomics {
    uint requestQueueIndex;
    uint requestQueueMaxSize;

@@ -25,11 +31,11 @@ layout(binding = 2, std430) restrict buffer Atomics {
    uint renderQueueMaxSize;
 } atomics;

-layout(binding = 3, std430) restrict writeonly buffer RequestQueue {
+layout(binding = REQUEST_QUEUE_INDEX, std430) restrict writeonly buffer RequestQueue {
    uint[] requestQueue;
 };

-layout(binding = 4, std430) restrict writeonly buffer RenderQueue {
+layout(binding = RENDER_QUEUE_INDEX, std430) restrict writeonly buffer RenderQueue {
    uint[] renderQueue;
 };

@@ -41,16 +47,15 @@ layout(binding = 2, std430) restrict buffer QueueData {
    uint[] queue;
 } queue;
 */
-
+#line 1
 #import <voxy:lod/hierarchial/transform.glsl>
-
+#line 1
 #import <voxy:lod/hierarchial/node.glsl>
-
-#define HIZ_BINDING_INDEX 0
+#line 1

 //Contains all the screenspace computation
 #import <voxy:lod/hierarchial/screenspace.glsl>
-
+#line 58

 //If a request is successfully added to the RequestQueue, must update NodeData to mark that the node has been put into the request queue
 // to prevent it from being requested every frame and blocking the queue
@@ -72,7 +77,7 @@ layout(binding = 2, std430) restrict buffer QueueData {
 void addRequest(inout UnpackedNode node) {
    if (!hasRequested(node)) {
        //TODO: maybe try using only 1 variable and it being <0 being bad
-        if (atomics.requestQueueIndex < atomic.requestQueueMaxSize) {
+        if (atomics.requestQueueIndex < atomics.requestQueueMaxSize) {
            //Mark node as having a request submitted to prevent duplicate submissions
            requestQueue[atomicAdd(atomics.requestQueueIndex, 1)] = getId(node);
            markRequested(node);