Incremental traversal system works
This commit is contained in:
@@ -16,6 +16,11 @@ import net.minecraft.client.world.ClientWorld;
|
|||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
|
|
||||||
public class Voxy implements ClientModInitializer {
|
public class Voxy implements ClientModInitializer {
|
||||||
|
public static final boolean SHADER_DEBUG;
|
||||||
|
static {
|
||||||
|
SHADER_DEBUG = System.getProperty("voxy.shaderDebug", "false").equals("true");
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void onInitializeClient() {
|
public void onInitializeClient() {
|
||||||
ClientCommandRegistrationCallback.EVENT.register((dispatcher, registryAccess) -> {
|
ClientCommandRegistrationCallback.EVENT.register((dispatcher, registryAccess) -> {
|
||||||
|
|||||||
@@ -163,7 +163,7 @@ public class PrintfInjector implements IShaderProcessor {
|
|||||||
|
|
||||||
for (int i = 0; i < types.size(); i++) {
|
for (int i = 0; i < types.size(); i++) {
|
||||||
subCode.append("printfOutputStruct.stream[printfWriteIndex+").append(i+1).append("]=");
|
subCode.append("printfOutputStruct.stream[printfWriteIndex+").append(i+1).append("]=");
|
||||||
if (types.get(i) == 'd' || types.get(i) == 'i') {
|
if (types.get(i) == 'd') {
|
||||||
subCode.append("uint(").append(argVals.get(i)).append(")");
|
subCode.append("uint(").append(argVals.get(i)).append(")");
|
||||||
} else if (types.get(i) == 'f') {
|
} else if (types.get(i) == 'f') {
|
||||||
subCode.append("floatBitsToUint(").append(argVals.get(i)).append(")");
|
subCode.append("floatBitsToUint(").append(argVals.get(i)).append(")");
|
||||||
@@ -207,7 +207,7 @@ public class PrintfInjector implements IShaderProcessor {
|
|||||||
parsePrintfTypes(fmt, types);
|
parsePrintfTypes(fmt, types);
|
||||||
Object[] args = new Object[types.size()];
|
Object[] args = new Object[types.size()];
|
||||||
for (int i = 0; i < types.size(); i++) {
|
for (int i = 0; i < types.size(); i++) {
|
||||||
if (types.get(i) == 'd' || types.get(i) == 'i') {
|
if (types.get(i) == 'd') {
|
||||||
args[i] = MemoryUtil.memGetInt(ptr);
|
args[i] = MemoryUtil.memGetInt(ptr);
|
||||||
ptr += 4;
|
ptr += 4;
|
||||||
cnt++;
|
cnt++;
|
||||||
|
|||||||
@@ -56,6 +56,14 @@ public class Shader extends TrackedObject {
|
|||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//Useful for inline setting (such as debug)
|
||||||
|
public Builder defineIf(String name, boolean condition) {
|
||||||
|
if (condition) {
|
||||||
|
this.defines.put(name, "");
|
||||||
|
}
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
public Builder define(String name, int value) {
|
public Builder define(String name, int value) {
|
||||||
this.defines.put(name, Integer.toString(value));
|
this.defines.put(name, Integer.toString(value));
|
||||||
return this;
|
return this;
|
||||||
|
|||||||
@@ -1,5 +1,6 @@
|
|||||||
package me.cortex.voxy.client.core.rendering;
|
package me.cortex.voxy.client.core.rendering;
|
||||||
|
|
||||||
|
import me.cortex.voxy.client.Voxy;
|
||||||
import me.cortex.voxy.client.core.gl.shader.IShaderProcessor;
|
import me.cortex.voxy.client.core.gl.shader.IShaderProcessor;
|
||||||
import me.cortex.voxy.client.core.gl.shader.PrintfInjector;
|
import me.cortex.voxy.client.core.gl.shader.PrintfInjector;
|
||||||
|
|
||||||
@@ -7,7 +8,7 @@ import java.util.ArrayList;
|
|||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
public final class PrintfDebugUtil {
|
public final class PrintfDebugUtil {
|
||||||
public static final boolean ENABLE_PRINTF_DEBUGGING = System.getProperty("voxy.enableShaderDebugPrintf", "false").equals("true");
|
public static final boolean ENABLE_PRINTF_DEBUGGING = System.getProperty("voxy.enableShaderDebugPrintf", "false").equals("true") || Voxy.SHADER_DEBUG;
|
||||||
|
|
||||||
private static final List<String> printfQueue2 = new ArrayList<>();
|
private static final List<String> printfQueue2 = new ArrayList<>();
|
||||||
private static final List<String> printfQueue = new ArrayList<>();
|
private static final List<String> printfQueue = new ArrayList<>();
|
||||||
@@ -46,4 +47,10 @@ public final class PrintfDebugUtil {
|
|||||||
out.addAll(printfQueue2);
|
out.addAll(printfQueue2);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static void bind() {
|
||||||
|
if (ENABLE_PRINTF_DEBUGGING) {
|
||||||
|
PRINTF_object.bind();
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -72,6 +72,7 @@ public class RenderService<T extends AbstractSectionRenderer<J, ?>, J extends Vi
|
|||||||
Arrays.stream(world.getMapper().getBiomeEntries()).forEach(this.modelService::addBiome);
|
Arrays.stream(world.getMapper().getBiomeEntries()).forEach(this.modelService::addBiome);
|
||||||
world.getMapper().setBiomeCallback(this.modelService::addBiome);
|
world.getMapper().setBiomeCallback(this.modelService::addBiome);
|
||||||
|
|
||||||
|
/*
|
||||||
final int H_WIDTH = 1;
|
final int H_WIDTH = 1;
|
||||||
for (int x = -H_WIDTH; x <= H_WIDTH; x++) {
|
for (int x = -H_WIDTH; x <= H_WIDTH; x++) {
|
||||||
for (int y = -1; y <= 0; y++) {
|
for (int y = -1; y <= 0; y++) {
|
||||||
@@ -80,7 +81,7 @@ public class RenderService<T extends AbstractSectionRenderer<J, ?>, J extends Vi
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
*/
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setup(Camera camera) {
|
public void setup(Camera camera) {
|
||||||
|
|||||||
@@ -1,8 +1,10 @@
|
|||||||
package me.cortex.voxy.client.core.rendering.hierachical2;
|
package me.cortex.voxy.client.core.rendering.hierachical2;
|
||||||
|
|
||||||
|
import me.cortex.voxy.client.Voxy;
|
||||||
import me.cortex.voxy.client.core.gl.GlBuffer;
|
import me.cortex.voxy.client.core.gl.GlBuffer;
|
||||||
import me.cortex.voxy.client.core.gl.shader.Shader;
|
import me.cortex.voxy.client.core.gl.shader.Shader;
|
||||||
import me.cortex.voxy.client.core.gl.shader.ShaderType;
|
import me.cortex.voxy.client.core.gl.shader.ShaderType;
|
||||||
|
import me.cortex.voxy.client.core.rendering.PrintfDebugUtil;
|
||||||
import me.cortex.voxy.client.core.rendering.util.HiZBuffer;
|
import me.cortex.voxy.client.core.rendering.util.HiZBuffer;
|
||||||
import me.cortex.voxy.client.core.rendering.Viewport;
|
import me.cortex.voxy.client.core.rendering.Viewport;
|
||||||
import me.cortex.voxy.client.core.rendering.util.DownloadStream;
|
import me.cortex.voxy.client.core.rendering.util.DownloadStream;
|
||||||
@@ -10,13 +12,16 @@ import me.cortex.voxy.client.core.rendering.util.UploadStream;
|
|||||||
import org.lwjgl.system.MemoryUtil;
|
import org.lwjgl.system.MemoryUtil;
|
||||||
|
|
||||||
import static me.cortex.voxy.client.core.rendering.PrintfDebugUtil.PRINTF_object;
|
import static me.cortex.voxy.client.core.rendering.PrintfDebugUtil.PRINTF_object;
|
||||||
import static org.lwjgl.opengl.GL11.GL_UNSIGNED_INT;
|
import static org.lwjgl.opengl.GL11.*;
|
||||||
import static org.lwjgl.opengl.GL30.GL_R32UI;
|
import static org.lwjgl.opengl.GL12.GL_UNPACK_IMAGE_HEIGHT;
|
||||||
|
import static org.lwjgl.opengl.GL12.GL_UNPACK_SKIP_IMAGES;
|
||||||
|
import static org.lwjgl.opengl.GL30.*;
|
||||||
import static org.lwjgl.opengl.GL30C.GL_RED_INTEGER;
|
import static org.lwjgl.opengl.GL30C.GL_RED_INTEGER;
|
||||||
import static org.lwjgl.opengl.GL42.glMemoryBarrier;
|
import static org.lwjgl.opengl.GL42.glMemoryBarrier;
|
||||||
import static org.lwjgl.opengl.GL43.GL_SHADER_STORAGE_BARRIER_BIT;
|
import static org.lwjgl.opengl.GL43.GL_SHADER_STORAGE_BARRIER_BIT;
|
||||||
import static org.lwjgl.opengl.GL45.nglClearNamedBufferSubData;
|
import static org.lwjgl.opengl.GL45.*;
|
||||||
|
|
||||||
|
// TODO: swap to persistent gpu threads instead of dispatching MAX_ITERATIONS of compute layers
|
||||||
public class HierarchicalOcclusionTraverser {
|
public class HierarchicalOcclusionTraverser {
|
||||||
private final HierarchicalNodeManager nodeManager;
|
private final HierarchicalNodeManager nodeManager;
|
||||||
|
|
||||||
@@ -27,17 +32,31 @@ public class HierarchicalOcclusionTraverser {
|
|||||||
private final GlBuffer uniformBuffer = new GlBuffer(1024).zero();
|
private final GlBuffer uniformBuffer = new GlBuffer(1024).zero();
|
||||||
private final GlBuffer renderList = new GlBuffer(100_000 * 4 + 4).zero();//100k sections max to render, TODO: Maybe move to render service or somewhere else
|
private final GlBuffer renderList = new GlBuffer(100_000 * 4 + 4).zero();//100k sections max to render, TODO: Maybe move to render service or somewhere else
|
||||||
|
|
||||||
private final GlBuffer scratchBuffer = new GlBuffer(1024).zero();//Scratch utility buffer for small things to get the ordering right and memory overall
|
private final GlBuffer queueMetaBuffer = new GlBuffer(4*4*5).zero();
|
||||||
//Scratch queues for node traversal
|
|
||||||
private final GlBuffer scratchQueueA = new GlBuffer(10_000*4).zero();
|
private final GlBuffer scratchQueueA = new GlBuffer(10_000*4).zero();
|
||||||
private final GlBuffer scratchQueueB = new GlBuffer(10_000*4).zero();
|
private final GlBuffer scratchQueueB = new GlBuffer(10_000*4).zero();
|
||||||
|
|
||||||
|
private static final int LOCAL_WORK_SIZE_BITS = 5;
|
||||||
|
private static final int MAX_ITERATIONS = 5;
|
||||||
|
|
||||||
|
private static final int NODE_QUEUE_INDEX_BINDING = 1;
|
||||||
|
private static final int NODE_QUEUE_META_BINDING = 2;
|
||||||
|
private static final int NODE_QUEUE_SOURCE_BINDING = 3;
|
||||||
|
private static final int NODE_QUEUE_SINK_BINDING = 4;
|
||||||
|
|
||||||
private final HiZBuffer hiZBuffer = new HiZBuffer();
|
private final HiZBuffer hiZBuffer = new HiZBuffer();
|
||||||
|
|
||||||
private final Shader traversal = Shader.make(PRINTF_object)
|
private final Shader traversal = Shader.make(PRINTF_object)
|
||||||
.add(ShaderType.COMPUTE, "voxy:lod/hierarchical/traversal.comp")
|
.defineIf("DEBUG", Voxy.SHADER_DEBUG)
|
||||||
|
.define("MAX_ITERATIONS", MAX_ITERATIONS)
|
||||||
|
.define("LOCAL_SIZE_BITS", LOCAL_WORK_SIZE_BITS)
|
||||||
|
|
||||||
|
.define("NODE_QUEUE_INDEX_BINDING", NODE_QUEUE_INDEX_BINDING)
|
||||||
|
.define("NODE_QUEUE_META_BINDING", NODE_QUEUE_META_BINDING)
|
||||||
|
.define("NODE_QUEUE_SOURCE_BINDING", NODE_QUEUE_SOURCE_BINDING)
|
||||||
|
.define("NODE_QUEUE_SINK_BINDING", NODE_QUEUE_SINK_BINDING)
|
||||||
|
|
||||||
|
.add(ShaderType.COMPUTE, "voxy:lod/hierarchical/traversal_dev.comp")
|
||||||
.compile();
|
.compile();
|
||||||
|
|
||||||
|
|
||||||
@@ -53,7 +72,8 @@ public class HierarchicalOcclusionTraverser {
|
|||||||
}
|
}
|
||||||
|
|
||||||
private void bindings() {
|
private void bindings() {
|
||||||
|
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, NODE_QUEUE_META_BINDING, this.queueMetaBuffer.id);
|
||||||
|
glBindBuffer(GL_DISPATCH_INDIRECT_BUFFER, this.queueMetaBuffer.id);
|
||||||
}
|
}
|
||||||
|
|
||||||
public void doTraversal(Viewport<?> viewport, int depthBuffer) {
|
public void doTraversal(Viewport<?> viewport, int depthBuffer) {
|
||||||
@@ -61,19 +81,78 @@ public class HierarchicalOcclusionTraverser {
|
|||||||
this.hiZBuffer.buildMipChain(depthBuffer, viewport.width, viewport.height);
|
this.hiZBuffer.buildMipChain(depthBuffer, viewport.width, viewport.height);
|
||||||
|
|
||||||
this.uploadUniform(viewport);
|
this.uploadUniform(viewport);
|
||||||
UploadStream.INSTANCE.commit();
|
//UploadStream.INSTANCE.commit(); //Done inside traversal
|
||||||
|
|
||||||
this.traversal.bind();
|
this.traversal.bind();
|
||||||
this.bindings();
|
this.bindings();
|
||||||
|
PrintfDebugUtil.bind();
|
||||||
|
|
||||||
//Use a chain of glDispatchComputeIndirect (5 times) with alternating read/write buffers
|
this.traverseInternal(1);
|
||||||
// TODO: swap to persistent gpu thread instead
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
this.downloadResetRequestQueue();
|
this.downloadResetRequestQueue();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private void traverseInternal(int initialQueueSize) {
|
||||||
|
{
|
||||||
|
//Fix mesa bug
|
||||||
|
glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
|
||||||
|
glPixelStorei(GL_UNPACK_IMAGE_HEIGHT, 0);
|
||||||
|
glPixelStorei(GL_UNPACK_SKIP_PIXELS, 0);
|
||||||
|
glPixelStorei(GL_UNPACK_SKIP_ROWS, 0);
|
||||||
|
glPixelStorei(GL_UNPACK_SKIP_IMAGES, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
int firstDispatchSize = (initialQueueSize+(1<<LOCAL_WORK_SIZE_BITS)-1)>>LOCAL_WORK_SIZE_BITS;
|
||||||
|
/*
|
||||||
|
//prime the queue Todo: maybe move after the traversal? cause then it is more efficient work since it doesnt need to wait for this before starting?
|
||||||
|
glClearNamedBufferData(this.queueMetaBuffer.id, GL_RGBA32UI, GL_RGBA, GL_UNSIGNED_INT, new int[]{0,1,1,0});//Prime the metadata buffer, which also contains
|
||||||
|
|
||||||
|
//Set the first entry
|
||||||
|
glClearNamedBufferSubData(this.queueMetaBuffer.id, GL_RGBA32UI, 0, 16, GL_RGBA, GL_UNSIGNED_INT, new int[]{firstDispatchSize,1,1,initialQueueSize});
|
||||||
|
*/
|
||||||
|
{
|
||||||
|
long ptr = UploadStream.INSTANCE.upload(this.queueMetaBuffer, 0, 16*5);
|
||||||
|
MemoryUtil.memPutInt(ptr + 0, firstDispatchSize);
|
||||||
|
MemoryUtil.memPutInt(ptr + 4, 1);
|
||||||
|
MemoryUtil.memPutInt(ptr + 8, 1);
|
||||||
|
MemoryUtil.memPutInt(ptr + 12, initialQueueSize);
|
||||||
|
for (int i = 1; i < 5; i++) {
|
||||||
|
MemoryUtil.memPutInt(ptr + (i*16)+ 0, 0);
|
||||||
|
MemoryUtil.memPutInt(ptr + (i*16)+ 4, 1);
|
||||||
|
MemoryUtil.memPutInt(ptr + (i*16)+ 8, 1);
|
||||||
|
MemoryUtil.memPutInt(ptr + (i*16)+12, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
UploadStream.INSTANCE.commit();
|
||||||
|
}
|
||||||
|
|
||||||
|
glUniform1ui(NODE_QUEUE_INDEX_BINDING, 0);
|
||||||
|
|
||||||
|
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, NODE_QUEUE_SOURCE_BINDING, this.scratchQueueA.id);
|
||||||
|
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, NODE_QUEUE_SINK_BINDING, this.scratchQueueB.id);
|
||||||
|
|
||||||
|
//Dont need to use indirect to dispatch the first iteration
|
||||||
|
glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT|GL_COMMAND_BARRIER_BIT);
|
||||||
|
glDispatchCompute(firstDispatchSize, 1,1);
|
||||||
|
glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT|GL_COMMAND_BARRIER_BIT);
|
||||||
|
|
||||||
|
//Dispatch max iterations
|
||||||
|
for (int iter = 1; iter < MAX_ITERATIONS; iter++) {
|
||||||
|
glUniform1ui(NODE_QUEUE_INDEX_BINDING, iter);
|
||||||
|
|
||||||
|
//Flipflop buffers
|
||||||
|
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, NODE_QUEUE_SOURCE_BINDING, ((iter & 1) == 0 ? this.scratchQueueA : this.scratchQueueB).id);
|
||||||
|
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, NODE_QUEUE_SINK_BINDING, ((iter & 1) == 0 ? this.scratchQueueB : this.scratchQueueA).id);
|
||||||
|
|
||||||
|
//Dispatch and barrier
|
||||||
|
glDispatchComputeIndirect(iter * 4 * 4);
|
||||||
|
|
||||||
|
glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT | GL_COMMAND_BARRIER_BIT);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
private void downloadResetRequestQueue() {
|
private void downloadResetRequestQueue() {
|
||||||
glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
|
glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
|
||||||
DownloadStream.INSTANCE.download(this.requestBuffer, this::forwardDownloadResult);
|
DownloadStream.INSTANCE.download(this.requestBuffer, this::forwardDownloadResult);
|
||||||
@@ -108,6 +187,8 @@ public class HierarchicalOcclusionTraverser {
|
|||||||
this.nodeBuffer.free();
|
this.nodeBuffer.free();
|
||||||
this.uniformBuffer.free();
|
this.uniformBuffer.free();
|
||||||
this.renderList.free();
|
this.renderList.free();
|
||||||
this.scratchBuffer.free();
|
this.queueMetaBuffer.free();
|
||||||
|
this.scratchQueueA.free();
|
||||||
|
this.scratchQueueB.free();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -18,7 +18,6 @@ public class VoxyCommon implements ModInitializer {
|
|||||||
var commit = mod.getMetadata().getCustomValue("commit").getAsString();
|
var commit = mod.getMetadata().getCustomValue("commit").getAsString();
|
||||||
MOD_VERSION = version+"-"+commit;
|
MOD_VERSION = version+"-"+commit;
|
||||||
IS_DEDICATED_SERVER = FabricLoader.getInstance().getEnvironmentType() == EnvType.SERVER;
|
IS_DEDICATED_SERVER = FabricLoader.getInstance().getEnvironmentType() == EnvType.SERVER;
|
||||||
|
|
||||||
Serialization.init();
|
Serialization.init();
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -0,0 +1,50 @@
|
|||||||
|
#define SENTINAL_OUT_OF_BOUNDS uint(-1)
|
||||||
|
|
||||||
|
layout(location = NODE_QUEUE_INDEX_BINDING) uniform uint queueIdx;
|
||||||
|
|
||||||
|
layout(binding = NODE_QUEUE_META_BINDING, std430) restrict buffer NodeQueueMeta {
|
||||||
|
uvec4 nodeQueueMetadata[MAX_ITERATIONS];
|
||||||
|
};
|
||||||
|
|
||||||
|
layout(binding = NODE_QUEUE_SOURCE_BINDING, std430) restrict readonly buffer NodeQueueSource {
|
||||||
|
uint[] nodeQueueSource;
|
||||||
|
};
|
||||||
|
|
||||||
|
layout(binding = NODE_QUEUE_SINK_BINDING, std430) restrict writeonly buffer NodeQueueSink {
|
||||||
|
uint[] nodeQueueSink;
|
||||||
|
};
|
||||||
|
|
||||||
|
uint getCurrentNode() {
|
||||||
|
if (nodeQueueMetadata[queueIdx].w <= gl_GlobalInvocationID.x) {
|
||||||
|
return SENTINAL_OUT_OF_BOUNDS;
|
||||||
|
}
|
||||||
|
return nodeQueueSource[gl_GlobalInvocationID.x];
|
||||||
|
}
|
||||||
|
|
||||||
|
uint nodePushIndex = -1;
|
||||||
|
void pushNodesInit(uint nodeCount) {
|
||||||
|
//Debug
|
||||||
|
#ifdef DEBUG
|
||||||
|
if (queueIdx >= (MAX_ITERATIONS-1)) {
|
||||||
|
printf("LOG: Traversal tried inserting a node into next iteration, which is outside max iteration bounds. GID: %d, count: %d", gl_GlobalInvocationID.x, nodeCount);
|
||||||
|
nodePushIndex = -1;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
uint index = atomicAdd(nodeQueueMetadata[queueIdx+1].w, nodeCount);
|
||||||
|
//Increment first metadata value if it changes threash hold
|
||||||
|
uint inc = ((index+LOCAL_SIZE)>>LOCAL_SIZE_BITS)-(index>>LOCAL_SIZE_BITS);
|
||||||
|
atomicAdd(nodeQueueMetadata[queueIdx+1].x, inc);//TODO: see if making this conditional on inc != 0 is faster
|
||||||
|
nodePushIndex = index;
|
||||||
|
}
|
||||||
|
|
||||||
|
void pushNode(uint nodeId) {
|
||||||
|
#ifdef DEBUG
|
||||||
|
if (nodePushIndex == -1) {
|
||||||
|
printf("LOG: Tried pushing node when push node wasnt successful. GID: %d, pushing: %d", gl_GlobalInvocationID.x, nodeId);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
nodeQueueSink[nodePushIndex++] = nodeId;
|
||||||
|
}
|
||||||
@@ -63,23 +63,6 @@ layout(binding = DEBUG_RENDER_NODE_INDEX, std430) restrict buffer DebugRenderNod
|
|||||||
//Contains all the screenspace computation
|
//Contains all the screenspace computation
|
||||||
#import <voxy:lod/hierarchical/screenspace.glsl>
|
#import <voxy:lod/hierarchical/screenspace.glsl>
|
||||||
|
|
||||||
//If a request is successfully added to the RequestQueue, must update NodeData to mark that the node has been put into the request queue
|
|
||||||
// to prevent it from being requested every frame and blocking the queue
|
|
||||||
|
|
||||||
|
|
||||||
//Once a suitable render section is found, it is put into the RenderQueue, or if its not availbe its put into the RequestQueue
|
|
||||||
// and its children are rendered instead if it has them avalible
|
|
||||||
|
|
||||||
//NOTE: EXPERIMENT: INSTEAD OF PERSISTENT THREADS
|
|
||||||
//TODO: since we know the tree depth is worst case 5, we can just do an indirect dispatch 5 times one for each layer
|
|
||||||
// issues with this approach, barriers and waiting for one to finish before the otehr can be executed
|
|
||||||
// advantages, MUCH SIMPLER, no shader barriers needed really , issue is need a flipflip queue but thats ok,
|
|
||||||
// also ensures the gpu is full of work capacity
|
|
||||||
// this might be what i do to start with since its much easier to do
|
|
||||||
// not sure
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
void addRequest(inout UnpackedNode node) {
|
void addRequest(inout UnpackedNode node) {
|
||||||
if (!hasRequested(node)) {
|
if (!hasRequested(node)) {
|
||||||
//printf("Request %d %d %d %d", node.nodeId, node.flags, node.meshPtr, node.childPtr);
|
//printf("Request %d %d %d %d", node.nodeId, node.flags, node.meshPtr, node.childPtr);
|
||||||
@@ -172,6 +155,8 @@ void main() {
|
|||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
Persistent threading
|
||||||
|
|
||||||
//Thread 0 grabs a batch when empty
|
//Thread 0 grabs a batch when empty
|
||||||
void main() {
|
void main() {
|
||||||
while (true) {
|
while (true) {
|
||||||
@@ -179,3 +164,23 @@ void main() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
//If a request is successfully added to the RequestQueue, must update NodeData to mark that the node has been put into the request queue
|
||||||
|
// to prevent it from being requested every frame and blocking the queue
|
||||||
|
|
||||||
|
|
||||||
|
//Once a suitable render section is found, it is put into the RenderQueue, or if its not availbe its put into the RequestQueue
|
||||||
|
// and its children are rendered instead if it has them avalible
|
||||||
|
|
||||||
|
//NOTE: EXPERIMENT: INSTEAD OF PERSISTENT THREADS
|
||||||
|
//TODO: since we know the tree depth is worst case 5, we can just do an indirect dispatch 5 times one for each layer
|
||||||
|
// issues with this approach, barriers and waiting for one to finish before the otehr can be executed
|
||||||
|
// advantages, MUCH SIMPLER, no shader barriers needed really , issue is need a flipflip queue but thats ok,
|
||||||
|
// also ensures the gpu is full of work capacity
|
||||||
|
// this might be what i do to start with since its much easier to do
|
||||||
|
// not sure
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -0,0 +1,16 @@
|
|||||||
|
#version 460 core
|
||||||
|
|
||||||
|
//TODO: increase local size
|
||||||
|
#define LOCAL_SIZE_MSK ((1<<LOCAL_SIZE_BITS)-1)
|
||||||
|
#define LOCAL_SIZE (1<<LOCAL_SIZE_BITS)
|
||||||
|
layout(local_size_x=LOCAL_SIZE) in;//, local_size_y=1
|
||||||
|
#import <voxy:lod/hierarchical/queue.glsl>
|
||||||
|
|
||||||
|
void main() {
|
||||||
|
uint node = getCurrentNode();
|
||||||
|
if (node != SENTINAL_OUT_OF_BOUNDS) {
|
||||||
|
printf("GID:%d, NODE %d, %d, AA, %d, %d, %d, %d", gl_GlobalInvocationID.x, node, queueIdx, nodeQueueMetadata[queueIdx].x, nodeQueueMetadata[queueIdx].y, nodeQueueMetadata[queueIdx].z, nodeQueueMetadata[queueIdx].w);
|
||||||
|
pushNodesInit(1);
|
||||||
|
pushNode(node);
|
||||||
|
}
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user