Alot of gpu work

This commit is contained in:
mcrcortex
2024-07-10 23:02:00 +10:00
parent 8ed121f71e
commit 716bf097bf
8 changed files with 230 additions and 68 deletions

View File

@@ -56,14 +56,23 @@ dependencies {
modCompileOnly "maven.modrinth:nvidium:0.2.8-beta" modCompileOnly "maven.modrinth:nvidium:0.2.8-beta"
modImplementation("maven.modrinth:cloth-config:13.0.121+fabric") modImplementation("maven.modrinth:cloth-config:13.0.121+fabric")
modImplementation("maven.modrinth:modmenu:11.0.1") modImplementation("maven.modrinth:modmenu:11.0.1")
modCompileOnly("maven.modrinth:iris:1.7.3+1.21") modCompileOnly("maven.modrinth:iris:1.7.3+1.21")
//modRuntimeOnly("maven.modrinth:iris:1.6.17+1.20.4") //modRuntimeOnly("maven.modrinth:iris:1.6.17+1.20.4")
modCompileOnly("maven.modrinth:starlight:1.1.3+1.20.4") modCompileOnly("maven.modrinth:starlight:1.1.3+1.20.4")
//modCompileOnly("maven.modrinth:immersiveportals:v5.1.7-mc1.20.4") //modCompileOnly("maven.modrinth:immersiveportals:v5.1.7-mc1.20.4")
modCompileOnly("maven.modrinth:vivecraft:1.20.4-1.1.6-fabric") modCompileOnly("maven.modrinth:vivecraft:1.20.4-1.1.6-fabric")
modCompileOnly("maven.modrinth:chunky:1.3.138") modCompileOnly("maven.modrinth:chunky:1.3.138")
modRuntimeOnly("maven.modrinth:chunky:1.3.138") modRuntimeOnly("maven.modrinth:chunky:1.3.138")
modImplementation('io.github.douira:glsl-transformer:2.0.1')
} }

View File

@@ -0,0 +1,72 @@
package me.cortex.voxy.common.world.service;
import me.cortex.voxy.common.world.WorldEngine;
import me.cortex.voxy.common.world.WorldSection;
import net.minecraft.client.MinecraftClient;
import net.minecraft.text.Text;
import java.util.concurrent.ConcurrentLinkedDeque;
import java.util.concurrent.Semaphore;
//TODO:
//FIXME:
// FINISHME:
// Use this instead of seperate thread pools, use a single shared pool where tasks are submitted to and worked on
public class ServiceThreadPool {
private volatile boolean running = true;
private final Thread[] workers;
private final Semaphore jobCounter = new Semaphore(0);
//TODO: have a wrapper to specify extra information about the job for debugging
private final ConcurrentLinkedDeque<Runnable> jobQueue = new ConcurrentLinkedDeque<>();
public ServiceThreadPool(int workers) {
this.workers = new Thread[workers];
for (int i = 0; i < workers; i++) {
var worker = new Thread(this::worker);
worker.setDaemon(false);
worker.setName("Service worker #" + i);
worker.start();
this.workers[i] = worker;
}
}
private void worker() {
while (true) {
this.jobCounter.acquireUninterruptibly();
if (!this.running) {
break;
}
var job = this.jobQueue.pop();
try {
job.run();
} catch (Exception e) {
System.err.println(e);
MinecraftClient.getInstance().executeSync(()->
MinecraftClient.getInstance().player.sendMessage(
Text.literal(
"Voxy ingester had an exception while executing service job please check logs and report error")));
}
}
}
public void shutdown() {
//Wait for the tasks to finish
while (this.jobCounter.availablePermits() != 0) {
Thread.onSpinWait();
}
//Shutdown
this.running = false;
this.jobCounter.release(1000);
//Wait for thread to join
try {
for (var worker : this.workers) {
worker.join();
}
} catch (InterruptedException e) {throw new RuntimeException(e);}
}
}

View File

@@ -0,0 +1,85 @@
layout(binding = NODE_DATA_INDEX, std430) restrict buffer NodeData {
//Needs to be read and writeable for marking data,
//(could do an evil violation, make this readonly, then have a writeonly varient, which means that writing might not be visible but will show up by the next frame)
//Nodes are 16 bytes big (or 32 cant decide, 16 might _just_ be enough)
ivec4[] nodes;
};
//First 2 are joined to be the position
//All node access and setup into global variables
//TODO: maybe make it global vars
struct UnpackedNode {
uint nodeId;
ivec3 pos;
uint lodLevel;
uint flags;
uint meshPtr;
uint childPtr;
};
#define NULL_NODE ((1<<25)-1)
#define NULL_MESH ((1<<24)-1)
void unpackNode(inout UnpackedNode node, uint nodeId) {
node.nodeId = nodeId;
uvec4 compactedNode = nodes[nodeId];
node.lodLevel = compactedNode.x >> 28;
{
int y = ((int(compactedNode.x)<<4)>>24);
int x = (int(compactedNode.y)<<4)>>8;
int z = int((int(compactedNode.x)&((1<<20)-1))<<4);
z |= int(compactedNode.y>>28);
z <<= 8;
z >>= 8;
node.pos = ivec3(x, y, z);
}
node.meshPtr = compactedNode.z&0xFFFFFFu;
node.childPtr = compactedNode.w&0x1FFFFFFu;
node.flags = (compactedNode.z>>24) | ((compactedNode.w>>23)<<8);
}
bool hasMesh(in UnpackedNode node) {
return node.meshPtr != NULL_MESH;
}
bool hasChildren(in UnpackedNode node) {
return node.childPtr != NULL_NODE;
}
bool isEmpty(in UnpackedNode node) {
return (node.flags&2u) != 0;
}
bool hasRequested(in UnpackedNode node) {
return (node.flags&1u) != 0u;
}
uint getMesh(in UnpackedNode node) {
return node.meshPtr;
}
uint getId(in UnpackedNode node) {
return node.nodeId;
}
uint getChild(in UnpackedNode node) {
return node.flags >> 2;
}
//-----------------------------------
void markRequested(inout UnpackedNode node) {
node.flags |= 1u;
nodes[node.nodeId].z |= 1u<<24;
}

View File

@@ -12,22 +12,55 @@
// substantually for performance (for both persistent threads and incremental) // substantually for performance (for both persistent threads and incremental)
layout(binding = HIZ_BINDING_INDEX) uniform sampler2DShadow hizDepthSampler;
//TODO: maybe do spher bounds aswell? cause they have different accuracies but are both over estimates (liberals (non conservative xD))
// so can do &&
vec3 minBB;
vec3 maxBB;
vec2 size;
vec3 projPoint(mat4 mat, vec3 pos) {
vec4 t = mat * vec4(vec3(pos),1);
return t.xyz/t.w;
}
//Sets up screenspace with the given node id, returns true on success false on failure/should not continue //Sets up screenspace with the given node id, returns true on success false on failure/should not continue
//Accesses data that is setup in the main traversal and is just shared to here //Accesses data that is setup in the main traversal and is just shared to here
bool setupScreenspace() { void setupScreenspace(in UnpackedNode node) {
//TODO: Need to do aabb size for the nodes, it must be an overesimate of all the children
mat4 mvp;
vec3 basePos;
vec3 minSize;
vec3 maxSize;
vec3 minPos = minSize + basePos;
vec3 maxPos = maxSize + basePos;
minBB = projPoint(mvp, minPos);
maxBB = minBB;
for (int i = 1; i < 8; i++) {
vec3 point = projPoint(mvp, mix(minPos, maxPos, bvec3((i&1)!=0,(i&2)!=0,(i&4)!=0)));
minBB = min(minBB, point);
maxBB = max(maxBB, point);
}
size = maxBB.xy - minBB.xy;
} }
bool isCulledByHiz() { bool isCulledByHiz() {
vec2 ssize = size.xy * vec2(ivec2(screensize));
float miplevel = ceil(log2(max(max(ssize.x, ssize.y),1)));
vec2 midpoint = (maxBB.xy + minBB.xy)*0.5;
return textureLod(hizDepthSampler, vec3(midpoint, minBB.z), miplevel) > 0.0001;
} }
//Returns if we should decend into its children or not //Returns if we should decend into its children or not
bool shouldDecend() { bool shouldDecend() {
return (size.x*size.y) > (64*64F);
} }

View File

@@ -12,15 +12,10 @@ layout(local_size_x=1, local_size_y=1) in;
// then increment head strictly _AFTER_ writing to the queue, this ensures that the data is always written and avaible in the queue // then increment head strictly _AFTER_ writing to the queue, this ensures that the data is always written and avaible in the queue
layout(binding = 0, std140) uniform SceneUniform { layout(binding = 0, std140) uniform SceneUniform {
uint a;
};
layout(binding = 1, std430) restrict buffer NodeData {//Needs to be read and writeable for marking data,
//(could do an evil violation, make this readonly, then have a writeonly varient, which means that writing might not be visible but will show up by the next frame)
//Nodes are 16 bytes big (or 32 cant decide, 16 might _just_ be enough)
ivec4[] nodes;
}; };
#define NODE_DATA_INDEX 1
layout(binding = 2, std430) restrict buffer Atomics { layout(binding = 2, std430) restrict buffer Atomics {
uint requestQueueIndex; uint requestQueueIndex;
@@ -51,20 +46,12 @@ layout(binding = 2, std430) restrict buffer QueueData {
#import <voxy:lod/hierarchial/node.glsl> #import <voxy:lod/hierarchial/node.glsl>
layout(binding = 0) uniform sampler2DShadow hizDepthSampler; #define HIZ_BINDING_INDEX 0
void aqcuireNewBatch() {
}
//Contains all the screenspace computation //Contains all the screenspace computation
#import <voxy:lod/hierarchial/screenspace.glsl> #import <voxy:lod/hierarchial/screenspace.glsl>
//If a request is successfully added to the RequestQueue, must update NodeData to mark that the node has been put into the request queue //If a request is successfully added to the RequestQueue, must update NodeData to mark that the node has been put into the request queue
// to prevent it from being requested every frame and blocking the queue // to prevent it from being requested every frame and blocking the queue
@@ -81,32 +68,37 @@ void aqcuireNewBatch() {
// not sure // not sure
void addRequest() {
if (!hasRequested()) {
//TODO: request this node (cpu side can figure out what it wants/needs)
//Mark node as having a request submitted to prevent duplicate submissions void addRequest(inout UnpackedNode node) {
if (!hasRequested(node)) {
//TODO: maybe try using only 1 variable and it being <0 being bad
if (atomics.requestQueueIndex < atomic.requestQueueMaxSize) {
//Mark node as having a request submitted to prevent duplicate submissions
requestQueue[atomicAdd(atomics.requestQueueIndex, 1)] = getId(node);
markRequested(node);
}
} }
} }
void enqueueSelfForRender() { void enqueueChildren(in UnpackedNode node) {
//TODO: Draw mesh and stop with this node (good path)
} }
void enqueueChildren() { void enqueueSelfForRender(in UnpackedNode node) {
renderQueue[atomicAdd(atomics.renderQueueIndex, 1)] = getMesh(node);
} }
//TODO: need to add an empty mesh, as a parent node might not have anything to render but the children do?? //TODO: need to add an empty mesh, as a parent node might not have anything to render but the children do??
void main() { void main() {
uint id = 0; UnpackedNode node;
//Setup/unpack the node //Setup/unpack the node
unpackNode(id); unpackNode(node, gl_GlobalInvocationID.x);
//TODO: check the node is OK first??? maybe? //TODO: check the node is OK first??? maybe?
//Compute screenspace //Compute screenspace
setupScreenspace(); setupScreenspace(node);
if (isCulledByHiz()) { if (isCulledByHiz()) {
//We are done here, dont do any more, the issue is the shader barriers maybe //We are done here, dont do any more, the issue is the shader barriers maybe
@@ -115,25 +107,25 @@ void main() {
//It is visible, TODO: maybe do a more detailed hiz test? (or make it so that ) //It is visible, TODO: maybe do a more detailed hiz test? (or make it so that )
if (shouldDecend()) { if (shouldDecend()) {
if (hasChildren()) { if (hasChildren(node)) {
enqueueChildren(); enqueueChildren(node);
} else { } else {
addRequest(); addRequest(node);
//TODO: use self mesh (is error state if it doesnt have one since all leaf nodes should have a mesh) //TODO: use self mesh (is error state if it doesnt have one since all leaf nodes should have a mesh)
// Basicly guarenteed to have a mesh, if it doesnt it is very very bad and incorect since its a violation of the graph properties // Basicly guarenteed to have a mesh, if it doesnt it is very very bad and incorect since its a violation of the graph properties
// that all leaf nodes must contain a mesh // that all leaf nodes must contain a mesh
enqueueSelfForRender(); enqueueSelfForRender(node);
} }
} else { } else {
if (hasMesh()) { if (hasMesh(node)) {
enqueueSelfForRender(); enqueueSelfForRender(node);
} else { } else {
//!! not ideal, we want to render this mesh but dont have it. If we havent sent a request //!! not ideal, we want to render this mesh but dont have it. If we havent sent a request
// then send a request for a mesh for this node. // then send a request for a mesh for this node.
addRequest(); addRequest(node);
//TODO: Decend into children? maybe add a bitflag saying is bad if the immediate children dont have meshes //TODO: Decend into children? maybe add a bitflag saying is bad if the immediate children dont have meshes
enqueueChildren(); enqueueChildren(node);
} }
} }
} }

View File

@@ -1,29 +0,0 @@
//All node access and setup into global variables
uint lodLevel;
ivec3 position;
uint childrenPtr;
uint meshPtr;
uint flags;
void unpackNode(uint nodeId) {
}
bool hasMesh() {
}
bool hasChildren() {
}
bool hasRequested() {
}
void markRequested() {
}