This commit is contained in:
mcrcortex
2024-07-12 10:47:46 +10:00
parent 707c0b1c85
commit 86f1770af3
12 changed files with 129 additions and 35 deletions

View File

@@ -5,6 +5,7 @@ import me.cortex.voxy.client.Voxy;
import me.cortex.voxy.client.config.VoxyConfig;
import me.cortex.voxy.client.core.rendering.*;
import me.cortex.voxy.client.core.rendering.building.RenderGenerationService;
import me.cortex.voxy.client.core.rendering.Test;
import me.cortex.voxy.client.core.rendering.post.PostProcessing;
import me.cortex.voxy.client.core.util.IrisUtil;
import me.cortex.voxy.client.saver.ContextSelectionSystem;
@@ -56,6 +57,7 @@ public class VoxelCore {
//private final Thread shutdownThread = new Thread(this::shutdown);
private WorldImporter importer;
private Test test;
public VoxelCore(ContextSelectionSystem.Selection worldSelection) {
this.world = worldSelection.createEngine();
var cfg = worldSelection.getConfig();
@@ -113,10 +115,11 @@ public class VoxelCore {
//this.renderer.getModelManager().updateEntry(0, Blocks.GRASS_BLOCK.getDefaultState());
System.out.println("Voxy core initialized");
this.test = new Test();
}
private AbstractFarWorldRenderer<?,?> createRenderBackend() {
if (true) {
if (false) {
System.out.println("Using Gl46MeshletFarWorldRendering");
return new Gl46MeshletsFarWorldRenderer(VoxyConfig.CONFIG.geometryBufferSize, VoxyConfig.CONFIG.maxSections);
} else {
@@ -196,6 +199,8 @@ public class VoxelCore {
this.renderer.renderFarAwayOpaque(viewport);
this.test.doIt(viewport);
//Compute the SSAO of the rendered terrain
this.postProcessing.computeSSAO(projection, matrices);

View File

@@ -17,7 +17,7 @@ import static org.lwjgl.opengl.GL30C.GL_RED_INTEGER;
import static org.lwjgl.opengl.GL43.GL_SHADER_STORAGE_BUFFER;
import static org.lwjgl.opengl.GL45.nglClearNamedBufferSubData;
public class PrintfInjector {
public class PrintfInjector implements IShaderProcessor {
private final GlBuffer textBuffer;
private final HashMap<String, Integer> printfStringMap = new HashMap<>();
private final HashMap<Integer, String> idToPrintfStringMap = new HashMap<>();
@@ -149,19 +149,20 @@ public class PrintfInjector {
//Inject the printf code
StringBuilder subCode = new StringBuilder();
subCode.append(String.format("{" +
"uint printfWriteIndex = atomicAdd(printfOutputStruct.index,%s);" +
"printfOutputStruct.stream[printfWriteIndex]=%s;", types.size()+1,
"uint printfWriteIndex = atomicAdd(printfOutputStruct.index,%s);" +
"printfOutputStruct.stream[printfWriteIndex]=%s;", types.size()+1,
this.printfStringMap.computeIfAbsent(fmtStr, a->{int id = this.printfStringMap.size();
this.idToPrintfStringMap.put(id, a);
return id;})));
for (int i = 0; i < types.size(); i++) {
subCode.append("printfOutputStruct.stream[printfWriteIndex+").append(i+1).append("]=");
if (types.get(i) == 'd' || types.get(i) == 'i' || types.get(i) == 'u') {
if (types.get(i) == 'd' || types.get(i) == 'i') {
subCode.append("uint(").append(argVals.get(i)).append(")");
}
if (types.get(i) == 'f') {
} else if (types.get(i) == 'f') {
subCode.append("floatBitsToUint(").append(argVals.get(i)).append(")");
} else {
throw new IllegalStateException("Unknown type " + types.get(i));
}
subCode.append(";");
}
@@ -193,7 +194,7 @@ public class PrintfInjector {
parsePrintfTypes(fmt, types);
Object[] args = new Object[types.size()];
for (int i = 0; i < types.size(); i++) {
if (types.get(i) == 'd' || types.get(i) == 'i' || types.get(i) == 'u') {
if (types.get(i) == 'd' || types.get(i) == 'i') {
args[i] = MemoryUtil.memGetInt(ptr);
ptr += 4;
cnt++;
@@ -209,7 +210,7 @@ public class PrintfInjector {
}
public void download() {
DownloadStream.INSTANCE.download(this.textBuffer, 0, this.textBuffer.size(), this::processResult);
DownloadStream.INSTANCE.download(this.textBuffer, this::processResult);
DownloadStream.INSTANCE.commit();
nglClearNamedBufferSubData(this.textBuffer.id, GL_R32UI, 0, 4, GL_RED_INTEGER, GL_UNSIGNED_INT, 0);
}
@@ -217,4 +218,9 @@ public class PrintfInjector {
public void free() {
this.textBuffer.free();
}
@Override
public String process(ShaderType type, String source) {
return this.transformInject(source);
}
}

View File

@@ -1,20 +1,27 @@
package me.cortex.voxy.client.core.rendering;
import me.cortex.voxy.client.core.gl.shader.PrintfInjector;
import me.cortex.voxy.client.core.gl.shader.Shader;
import me.cortex.voxy.client.core.gl.shader.ShaderType;
import me.cortex.voxy.client.core.rendering.hierarchical.NodeManager2;
import me.cortex.voxy.client.core.rendering.hierarchical.NodeManagerOLD;
import static org.lwjgl.opengl.GL30.glBindBufferBase;
import static org.lwjgl.opengl.GL33.glBindSampler;
import static org.lwjgl.opengl.GL33.glGenSamplers;
import static org.lwjgl.opengl.GL42C.*;
import static org.lwjgl.opengl.GL43.GL_SHADER_STORAGE_BUFFER;
import static org.lwjgl.opengl.GL43C.GL_SHADER_STORAGE_BARRIER_BIT;
import static org.lwjgl.opengl.GL44.GL_CLIENT_MAPPED_BUFFER_BARRIER_BIT;
import static org.lwjgl.opengl.GL45.glBindTextureUnit;
public class HierarchicalOcclusionRenderer {
private PrintfInjector printf = new PrintfInjector(100000, 10, System.out::println);
private final NodeManager2 nodeManager = new NodeManager2(null, null);
private final HiZBuffer hiz = new HiZBuffer();
private final int hizSampler = glGenSamplers();
private Shader hiercarchialShader = Shader.make()
private Shader hierarchicalTraversal = Shader.make(this.printf)
.add(ShaderType.COMPUTE, "voxy:lod/hierarchical/traversal.comp")
.compile();
@@ -27,20 +34,36 @@ public class HierarchicalOcclusionRenderer {
}
public void render(int depthBuffer, int width, int height) {
this.nodeManager.upload();
//Make hiz
this.hiz.buildMipChain(depthBuffer, width, height);
//Node upload phase
//this.nodeManager.uploadPhase();
//Node download phase (pulls from previous frame (should maybe result in lower latency)) also clears and resets the queues
//this.nodeManager.downloadPhase();
//Bind all the resources
this.bind();
//run hierachial selection shader
this.hiercarchialShader.bind();
//barrier
glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT|GL_UNIFORM_BARRIER_BIT|GL_CLIENT_MAPPED_BUFFER_BARRIER_BIT|GL_FRAMEBUFFER_BARRIER_BIT);
//Emit enough work to fully populate the gpu
//glDispatchCompute(this.workgroup_dispatch_size_x, 1, 1);
glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT|GL_COMMAND_BARRIER_BIT|GL_UNIFORM_BARRIER_BIT);
glMemoryBarrier(GL_PIXEL_BUFFER_BARRIER_BIT);
this.hierarchicalTraversal.bind();
{
//Bind stuff here
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, this.nodeManager.nodeBuffer.id);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, this.nodeManager.requestQueue.id);
//Bind the hiz buffer
glBindSampler(0, this.hizSampler);
glBindTextureUnit(0, this.hiz.getHizTextureId());
}
this.printf.bind();
{
//Dispatch hierarchies
}
this.nodeManager.download();
this.printf.download();
}
public void free() {
this.printf.free();
this.hiz.free();
this.nodeManager.free();
glDeleteSamplers(this.hizSampler);
}
}

View File

@@ -0,0 +1,15 @@
package me.cortex.voxy.client.core.rendering;
import static org.lwjgl.opengl.GL30.*;
import static org.lwjgl.opengl.GL30.GL_FRAMEBUFFER_ATTACHMENT_OBJECT_NAME;
public class Test {
private final HierarchicalOcclusionRenderer hor = new HierarchicalOcclusionRenderer();
public void doIt(Viewport viewport) {
var i = new int[1];
glGetFramebufferAttachmentParameteriv(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_FRAMEBUFFER_ATTACHMENT_OBJECT_NAME, i);
this.hor.render(i[0], viewport.width, viewport.height);
}
}

View File

@@ -1,6 +1,8 @@
package me.cortex.voxy.client.core.rendering.hierarchical;
import it.unimi.dsi.fastutil.ints.IntArrayList;
import it.unimi.dsi.fastutil.longs.Long2IntOpenHashMap;
import me.cortex.voxy.client.core.gl.GlBuffer;
import me.cortex.voxy.client.core.rendering.building.BuiltSection;
import me.cortex.voxy.client.core.rendering.util.DownloadStream;
import me.cortex.voxy.client.core.rendering.util.MarkedObjectList;
@@ -10,6 +12,11 @@ import org.lwjgl.system.MemoryUtil;
import java.util.Arrays;
import static org.lwjgl.opengl.GL11.GL_UNSIGNED_INT;
import static org.lwjgl.opengl.GL30.GL_R32UI;
import static org.lwjgl.opengl.GL30C.GL_RED_INTEGER;
import static org.lwjgl.opengl.GL45.nglClearNamedBufferSubData;
//TODO:FIXME: TODO, Must fix/have some filtering for section updates based on time or something
// as there can be a cursed situation where an update occures requiring expensive meshing for a section but then in the same
@@ -91,6 +98,7 @@ public class NodeManager2 {
}
}
public static final int REQUEST_QUEUE_SIZE = 1024;
public static final int MAX_NODE_COUNT = 1<<22;
public static final int NODE_MSK = MAX_NODE_COUNT-1;
@@ -104,11 +112,16 @@ public class NodeManager2 {
private final INodeInteractor interactor;
private final MeshManager meshManager;
public final GlBuffer nodeBuffer;
public final GlBuffer requestQueue;
public NodeManager2(INodeInteractor interactor, MeshManager meshManager) {
this.interactor = interactor;
this.pos2meshId.defaultReturnValue(NO_NODE);
this.interactor.setMeshUpdateCallback(this::meshUpdate);
this.meshManager = meshManager;
this.nodeBuffer = new GlBuffer(MAX_NODE_COUNT*16);
this.requestQueue = new GlBuffer(REQUEST_QUEUE_SIZE*4+4);
}
public void insertTopLevelNode(long position) {
@@ -335,24 +348,41 @@ public class NodeManager2 {
//The section was empty, so just remove/skip it
}
}
}
private final IntArrayList nodeUpdates = new IntArrayList();
//Invalidates the node and tells it to be pushed to the gpu next slot, NOTE: pushing a node, clears any gpu side flags
private void pushNode(int node) {
//TODO: update the local struct with the current frame id to prevent it from being put in the queue multiple times
this.nodeUpdates.add(node);
}
private void writeNode(long dst, int id) {
}
//2 parts upload and download
public void upload() {
for (int i = 0; i < this.nodeUpdates.size(); i++) {
int node = this.nodeUpdates.getInt(i);
//TODO: UPLOAD NODE
private void download() {
}
this.nodeUpdates.clear();
}
public void download() {
//Download the request queue then clear the counter (first 4 bytes)
//DownloadStream.INSTANCE.download(this.);
DownloadStream.INSTANCE.download(this.requestQueue, this::processRequestQueue);
DownloadStream.INSTANCE.commit();
nglClearNamedBufferSubData(this.requestQueue.id, GL_R32UI, 0, 4, GL_RED_INTEGER, GL_UNSIGNED_INT, 0);
}
public void free() {
this.requestQueue.free();
this.nodeBuffer.free();
}
}

View File

@@ -43,6 +43,12 @@ public class DownloadStream {
private long caddr = -1;
private long offset = 0;
//Pulls the entire buffer from the gpu
public void download(GlBuffer buffer, DownloadResultConsumer resultConsumer) {
this.download(buffer, 0, buffer.size(), resultConsumer);
}
public void download(GlBuffer buffer, long destOffset, long size, DownloadResultConsumer resultConsumer) {
if (size > Integer.MAX_VALUE) {
throw new IllegalArgumentException();

View File

@@ -87,3 +87,7 @@ void markRequested(inout UnpackedNode node) {
node.flags |= 1u;
nodes[node.nodeId].z |= 1u<<24;
}
void debugDumpNode(in UnpackedNode node) {
printf("Node %d, %d@[%d,%d,%d], flags: %d, mesh: %d, ChildPtr: %d", node.nodeId, node.lodLevel, node.pos.x, node.pos.z, node.pos.z, node.flags, node.meshPtr, node.childPtr);
}

View File

@@ -3,7 +3,7 @@
//TODO: make this better than a single thread
layout(local_size_x=1, local_size_y=1) in;
#import <voxy:lod/hierarchial/binding_points.glsl>
#import <voxy:lod/hierarchical/binding_points.glsl>
#line 7
//The queue contains 3 atomics
@@ -44,13 +44,13 @@ layout(binding = 2, std430) restrict buffer QueueData {
} queue;
*/
#line 1
#import <voxy:lod/hierarchial/transform.glsl>
#import <voxy:lod/hierarchical/transform.glsl>
#line 1
#import <voxy:lod/hierarchial/node.glsl>
#import <voxy:lod/hierarchical/node.glsl>
#line 1
//Contains all the screenspace computation
#import <voxy:lod/hierarchial/screenspace.glsl>
#import <voxy:lod/hierarchical/screenspace.glsl>
#line 58
//If a request is successfully added to the RequestQueue, must update NodeData to mark that the node has been put into the request queue
@@ -71,6 +71,7 @@ layout(binding = 2, std430) restrict buffer QueueData {
void addRequest(inout UnpackedNode node) {
printf("requested");
if (!hasRequested(node)) {
//TODO: maybe try using only 1 variable and it being <0 being bad
if (requestQueueIndex < requestQueueMaxSize) {
@@ -82,10 +83,11 @@ void addRequest(inout UnpackedNode node) {
}
void enqueueChildren(in UnpackedNode node) {
printf("children");
}
void enqueueSelfForRender(in UnpackedNode node) {
printf("render");
if (renderQueueIndex < renderQueueMaxSize) {
renderQueue[atomicAdd(renderQueueIndex, 1)] = getMesh(node);
}
@@ -103,7 +105,10 @@ void main() {
//Compute screenspace
setupScreenspace(node);
debugDumpNode(node);
if (isCulledByHiz()) {
printf("HizCulled");
//We are done here, dont do any more, the issue is the shader barriers maybe
// its culled, maybe just mark it as culled?
} else {