From 716bf097bfe48eafef8da6c1e54f8812a108a763 Mon Sep 17 00:00:00 2001 From: mcrcortex <{ID}+{username}@users.noreply.github.com> Date: Wed, 10 Jul 2024 23:02:00 +1000 Subject: [PATCH] Alot of gpu work --- build.gradle | 9 ++ .../world/service/ServiceThreadPool.java | 72 ++++++++++++++++ .../voxy/shaders/lod/hierarchial/node.glsl | 85 +++++++++++++++++++ .../screenspace.glsl | 41 ++++++++- .../selectorOLDDD.comp | 0 .../transform.glsl | 0 .../traversal.glsl | 62 ++++++-------- .../voxy/shaders/lod/hierarchical/node.glsl | 29 ------- 8 files changed, 230 insertions(+), 68 deletions(-) create mode 100644 src/main/java/me/cortex/voxy/common/world/service/ServiceThreadPool.java create mode 100644 src/main/resources/assets/voxy/shaders/lod/hierarchial/node.glsl rename src/main/resources/assets/voxy/shaders/lod/{hierarchical => hierarchial}/screenspace.glsl (50%) rename src/main/resources/assets/voxy/shaders/lod/{hierarchical => hierarchial}/selectorOLDDD.comp (100%) rename src/main/resources/assets/voxy/shaders/lod/{hierarchical => hierarchial}/transform.glsl (100%) rename src/main/resources/assets/voxy/shaders/lod/{hierarchical => hierarchial}/traversal.glsl (76%) delete mode 100644 src/main/resources/assets/voxy/shaders/lod/hierarchical/node.glsl diff --git a/build.gradle b/build.gradle index ff07cc7b..efbdaafc 100644 --- a/build.gradle +++ b/build.gradle @@ -56,14 +56,23 @@ dependencies { modCompileOnly "maven.modrinth:nvidium:0.2.8-beta" modImplementation("maven.modrinth:cloth-config:13.0.121+fabric") + modImplementation("maven.modrinth:modmenu:11.0.1") + modCompileOnly("maven.modrinth:iris:1.7.3+1.21") + //modRuntimeOnly("maven.modrinth:iris:1.6.17+1.20.4") + modCompileOnly("maven.modrinth:starlight:1.1.3+1.20.4") + //modCompileOnly("maven.modrinth:immersiveportals:v5.1.7-mc1.20.4") + modCompileOnly("maven.modrinth:vivecraft:1.20.4-1.1.6-fabric") + modCompileOnly("maven.modrinth:chunky:1.3.138") modRuntimeOnly("maven.modrinth:chunky:1.3.138") + + modImplementation('io.github.douira:glsl-transformer:2.0.1') } diff --git a/src/main/java/me/cortex/voxy/common/world/service/ServiceThreadPool.java b/src/main/java/me/cortex/voxy/common/world/service/ServiceThreadPool.java new file mode 100644 index 00000000..a90dd762 --- /dev/null +++ b/src/main/java/me/cortex/voxy/common/world/service/ServiceThreadPool.java @@ -0,0 +1,72 @@ +package me.cortex.voxy.common.world.service; + +import me.cortex.voxy.common.world.WorldEngine; +import me.cortex.voxy.common.world.WorldSection; +import net.minecraft.client.MinecraftClient; +import net.minecraft.text.Text; + +import java.util.concurrent.ConcurrentLinkedDeque; +import java.util.concurrent.Semaphore; + +//TODO: +//FIXME: +// FINISHME: +// Use this instead of seperate thread pools, use a single shared pool where tasks are submitted to and worked on + +public class ServiceThreadPool { + private volatile boolean running = true; + private final Thread[] workers; + private final Semaphore jobCounter = new Semaphore(0); + //TODO: have a wrapper to specify extra information about the job for debugging + private final ConcurrentLinkedDeque jobQueue = new ConcurrentLinkedDeque<>(); + + + public ServiceThreadPool(int workers) { + this.workers = new Thread[workers]; + for (int i = 0; i < workers; i++) { + var worker = new Thread(this::worker); + worker.setDaemon(false); + worker.setName("Service worker #" + i); + worker.start(); + this.workers[i] = worker; + } + } + + private void worker() { + while (true) { + this.jobCounter.acquireUninterruptibly(); + if (!this.running) { + break; + } + var job = this.jobQueue.pop(); + try { + job.run(); + } catch (Exception e) { + System.err.println(e); + MinecraftClient.getInstance().executeSync(()-> + MinecraftClient.getInstance().player.sendMessage( + Text.literal( + "Voxy ingester had an exception while executing service job please check logs and report error"))); + } + } + } + + + public void shutdown() { + //Wait for the tasks to finish + while (this.jobCounter.availablePermits() != 0) { + Thread.onSpinWait(); + } + + //Shutdown + this.running = false; + this.jobCounter.release(1000); + + //Wait for thread to join + try { + for (var worker : this.workers) { + worker.join(); + } + } catch (InterruptedException e) {throw new RuntimeException(e);} + } +} diff --git a/src/main/resources/assets/voxy/shaders/lod/hierarchial/node.glsl b/src/main/resources/assets/voxy/shaders/lod/hierarchial/node.glsl new file mode 100644 index 00000000..970731df --- /dev/null +++ b/src/main/resources/assets/voxy/shaders/lod/hierarchial/node.glsl @@ -0,0 +1,85 @@ + +layout(binding = NODE_DATA_INDEX, std430) restrict buffer NodeData { +//Needs to be read and writeable for marking data, +//(could do an evil violation, make this readonly, then have a writeonly varient, which means that writing might not be visible but will show up by the next frame) +//Nodes are 16 bytes big (or 32 cant decide, 16 might _just_ be enough) + ivec4[] nodes; +}; + +//First 2 are joined to be the position + + + + +//All node access and setup into global variables +//TODO: maybe make it global vars +struct UnpackedNode { + uint nodeId; + + ivec3 pos; + uint lodLevel; + + uint flags; + + uint meshPtr; + uint childPtr; +}; + +#define NULL_NODE ((1<<25)-1) +#define NULL_MESH ((1<<24)-1) + +void unpackNode(inout UnpackedNode node, uint nodeId) { + node.nodeId = nodeId; + uvec4 compactedNode = nodes[nodeId]; + node.lodLevel = compactedNode.x >> 28; + + { + int y = ((int(compactedNode.x)<<4)>>24); + int x = (int(compactedNode.y)<<4)>>8; + int z = int((int(compactedNode.x)&((1<<20)-1))<<4); + z |= int(compactedNode.y>>28); + z <<= 8; + z >>= 8; + + node.pos = ivec3(x, y, z); + } + + node.meshPtr = compactedNode.z&0xFFFFFFu; + node.childPtr = compactedNode.w&0x1FFFFFFu; + node.flags = (compactedNode.z>>24) | ((compactedNode.w>>23)<<8); +} + +bool hasMesh(in UnpackedNode node) { + return node.meshPtr != NULL_MESH; +} + +bool hasChildren(in UnpackedNode node) { + return node.childPtr != NULL_NODE; +} + +bool isEmpty(in UnpackedNode node) { + return (node.flags&2u) != 0; +} + +bool hasRequested(in UnpackedNode node) { + return (node.flags&1u) != 0u; +} + +uint getMesh(in UnpackedNode node) { + return node.meshPtr; +} + +uint getId(in UnpackedNode node) { + return node.nodeId; +} + +uint getChild(in UnpackedNode node) { + return node.flags >> 2; +} + +//----------------------------------- + +void markRequested(inout UnpackedNode node) { + node.flags |= 1u; + nodes[node.nodeId].z |= 1u<<24; +} \ No newline at end of file diff --git a/src/main/resources/assets/voxy/shaders/lod/hierarchical/screenspace.glsl b/src/main/resources/assets/voxy/shaders/lod/hierarchial/screenspace.glsl similarity index 50% rename from src/main/resources/assets/voxy/shaders/lod/hierarchical/screenspace.glsl rename to src/main/resources/assets/voxy/shaders/lod/hierarchial/screenspace.glsl index 8cad2d34..ff449c21 100644 --- a/src/main/resources/assets/voxy/shaders/lod/hierarchical/screenspace.glsl +++ b/src/main/resources/assets/voxy/shaders/lod/hierarchial/screenspace.glsl @@ -12,22 +12,55 @@ // substantually for performance (for both persistent threads and incremental) +layout(binding = HIZ_BINDING_INDEX) uniform sampler2DShadow hizDepthSampler; +//TODO: maybe do spher bounds aswell? cause they have different accuracies but are both over estimates (liberals (non conservative xD)) +// so can do && +vec3 minBB; +vec3 maxBB; +vec2 size; - +vec3 projPoint(mat4 mat, vec3 pos) { + vec4 t = mat * vec4(vec3(pos),1); + return t.xyz/t.w; +} //Sets up screenspace with the given node id, returns true on success false on failure/should not continue //Accesses data that is setup in the main traversal and is just shared to here -bool setupScreenspace() { +void setupScreenspace(in UnpackedNode node) { + //TODO: Need to do aabb size for the nodes, it must be an overesimate of all the children + mat4 mvp; + + vec3 basePos; + vec3 minSize; + vec3 maxSize; + + + vec3 minPos = minSize + basePos; + vec3 maxPos = maxSize + basePos; + + minBB = projPoint(mvp, minPos); + maxBB = minBB; + + for (int i = 1; i < 8; i++) { + vec3 point = projPoint(mvp, mix(minPos, maxPos, bvec3((i&1)!=0,(i&2)!=0,(i&4)!=0))); + minBB = min(minBB, point); + maxBB = max(maxBB, point); + } + + size = maxBB.xy - minBB.xy; } bool isCulledByHiz() { - + vec2 ssize = size.xy * vec2(ivec2(screensize)); + float miplevel = ceil(log2(max(max(ssize.x, ssize.y),1))); + vec2 midpoint = (maxBB.xy + minBB.xy)*0.5; + return textureLod(hizDepthSampler, vec3(midpoint, minBB.z), miplevel) > 0.0001; } //Returns if we should decend into its children or not bool shouldDecend() { - + return (size.x*size.y) > (64*64F); } \ No newline at end of file diff --git a/src/main/resources/assets/voxy/shaders/lod/hierarchical/selectorOLDDD.comp b/src/main/resources/assets/voxy/shaders/lod/hierarchial/selectorOLDDD.comp similarity index 100% rename from src/main/resources/assets/voxy/shaders/lod/hierarchical/selectorOLDDD.comp rename to src/main/resources/assets/voxy/shaders/lod/hierarchial/selectorOLDDD.comp diff --git a/src/main/resources/assets/voxy/shaders/lod/hierarchical/transform.glsl b/src/main/resources/assets/voxy/shaders/lod/hierarchial/transform.glsl similarity index 100% rename from src/main/resources/assets/voxy/shaders/lod/hierarchical/transform.glsl rename to src/main/resources/assets/voxy/shaders/lod/hierarchial/transform.glsl diff --git a/src/main/resources/assets/voxy/shaders/lod/hierarchical/traversal.glsl b/src/main/resources/assets/voxy/shaders/lod/hierarchial/traversal.glsl similarity index 76% rename from src/main/resources/assets/voxy/shaders/lod/hierarchical/traversal.glsl rename to src/main/resources/assets/voxy/shaders/lod/hierarchial/traversal.glsl index d007c1ce..e137dc19 100644 --- a/src/main/resources/assets/voxy/shaders/lod/hierarchical/traversal.glsl +++ b/src/main/resources/assets/voxy/shaders/lod/hierarchial/traversal.glsl @@ -12,15 +12,10 @@ layout(local_size_x=1, local_size_y=1) in; // then increment head strictly _AFTER_ writing to the queue, this ensures that the data is always written and avaible in the queue layout(binding = 0, std140) uniform SceneUniform { - -}; - -layout(binding = 1, std430) restrict buffer NodeData {//Needs to be read and writeable for marking data, - //(could do an evil violation, make this readonly, then have a writeonly varient, which means that writing might not be visible but will show up by the next frame) - //Nodes are 16 bytes big (or 32 cant decide, 16 might _just_ be enough) - ivec4[] nodes; + uint a; }; +#define NODE_DATA_INDEX 1 layout(binding = 2, std430) restrict buffer Atomics { uint requestQueueIndex; @@ -51,20 +46,12 @@ layout(binding = 2, std430) restrict buffer QueueData { #import -layout(binding = 0) uniform sampler2DShadow hizDepthSampler; - - -void aqcuireNewBatch() { - -} +#define HIZ_BINDING_INDEX 0 //Contains all the screenspace computation #import - - - //If a request is successfully added to the RequestQueue, must update NodeData to mark that the node has been put into the request queue // to prevent it from being requested every frame and blocking the queue @@ -81,32 +68,37 @@ void aqcuireNewBatch() { // not sure -void addRequest() { - if (!hasRequested()) { - //TODO: request this node (cpu side can figure out what it wants/needs) - //Mark node as having a request submitted to prevent duplicate submissions +void addRequest(inout UnpackedNode node) { + if (!hasRequested(node)) { + //TODO: maybe try using only 1 variable and it being <0 being bad + if (atomics.requestQueueIndex < atomic.requestQueueMaxSize) { + //Mark node as having a request submitted to prevent duplicate submissions + requestQueue[atomicAdd(atomics.requestQueueIndex, 1)] = getId(node); + markRequested(node); + } } } -void enqueueSelfForRender() { - //TODO: Draw mesh and stop with this node (good path) +void enqueueChildren(in UnpackedNode node) { + } -void enqueueChildren() { - +void enqueueSelfForRender(in UnpackedNode node) { + renderQueue[atomicAdd(atomics.renderQueueIndex, 1)] = getMesh(node); } //TODO: need to add an empty mesh, as a parent node might not have anything to render but the children do?? void main() { - uint id = 0; + UnpackedNode node; //Setup/unpack the node - unpackNode(id); + unpackNode(node, gl_GlobalInvocationID.x); + //TODO: check the node is OK first??? maybe? //Compute screenspace - setupScreenspace(); + setupScreenspace(node); if (isCulledByHiz()) { //We are done here, dont do any more, the issue is the shader barriers maybe @@ -115,25 +107,25 @@ void main() { //It is visible, TODO: maybe do a more detailed hiz test? (or make it so that ) if (shouldDecend()) { - if (hasChildren()) { - enqueueChildren(); + if (hasChildren(node)) { + enqueueChildren(node); } else { - addRequest(); + addRequest(node); //TODO: use self mesh (is error state if it doesnt have one since all leaf nodes should have a mesh) // Basicly guarenteed to have a mesh, if it doesnt it is very very bad and incorect since its a violation of the graph properties // that all leaf nodes must contain a mesh - enqueueSelfForRender(); + enqueueSelfForRender(node); } } else { - if (hasMesh()) { - enqueueSelfForRender(); + if (hasMesh(node)) { + enqueueSelfForRender(node); } else { //!! not ideal, we want to render this mesh but dont have it. If we havent sent a request // then send a request for a mesh for this node. - addRequest(); + addRequest(node); //TODO: Decend into children? maybe add a bitflag saying is bad if the immediate children dont have meshes - enqueueChildren(); + enqueueChildren(node); } } } diff --git a/src/main/resources/assets/voxy/shaders/lod/hierarchical/node.glsl b/src/main/resources/assets/voxy/shaders/lod/hierarchical/node.glsl deleted file mode 100644 index 86c145d7..00000000 --- a/src/main/resources/assets/voxy/shaders/lod/hierarchical/node.glsl +++ /dev/null @@ -1,29 +0,0 @@ - -//All node access and setup into global variables -uint lodLevel; -ivec3 position; - -uint childrenPtr; -uint meshPtr; - -uint flags; - -void unpackNode(uint nodeId) { - -} - -bool hasMesh() { - -} - -bool hasChildren() { - -} - -bool hasRequested() { - -} - -void markRequested() { - -} \ No newline at end of file