This commit is contained in:
mcrcortex
2024-05-06 09:58:27 +10:00
parent 52baa303dc
commit dba69f9470
14 changed files with 172 additions and 35 deletions

View File

@@ -3,6 +3,7 @@ package me.cortex.voxy.client.config;
import com.google.gson.FieldNamingPolicy;
import com.google.gson.Gson;
import com.google.gson.GsonBuilder;
import me.cortex.voxy.client.core.Capabilities;
import me.cortex.voxy.client.saver.ContextSelectionSystem;
import net.fabricmc.loader.api.FabricLoader;
import org.lwjgl.opengl.GL;
@@ -68,7 +69,6 @@ public class VoxyConfig {
}
public boolean useMeshShaders() {
var cap = GL.getCapabilities();
return this.useMeshShaderIfPossible && cap.GL_NV_mesh_shader && cap.GL_NV_representative_fragment_test;
return this.useMeshShaderIfPossible && Capabilities.INSTANCE.meshShaders;
}
}

View File

@@ -0,0 +1,16 @@
package me.cortex.voxy.client.core;
import org.lwjgl.opengl.GL;
public class Capabilities {
public static final Capabilities INSTANCE = new Capabilities();
public final boolean meshShaders;
public final boolean INT64_t;
public Capabilities() {
var cap = GL.getCapabilities();
this.meshShaders = cap.GL_NV_mesh_shader && cap.GL_NV_representative_fragment_test;
this.INT64_t = cap.GL_ARB_gpu_shader_int64 || cap.GL_AMD_gpu_shader_int64;
}
}

View File

@@ -68,23 +68,12 @@ public class VoxelCore {
//Trigger the shared index buffer loading
SharedIndexBuffer.INSTANCE.id();
if (true) {
this.renderer = new Gl46MeshletsFarWorldRenderer(VoxyConfig.CONFIG.geometryBufferSize, VoxyConfig.CONFIG.maxSections);
System.out.println("Using Gl46MeshletFarWorldRendering");
} else {
if (VoxyConfig.CONFIG.useMeshShaders()) {
this.renderer = new NvMeshFarWorldRenderer(VoxyConfig.CONFIG.geometryBufferSize, VoxyConfig.CONFIG.maxSections);
System.out.println("Using NvMeshFarWorldRenderer");
} else {
this.renderer = new Gl46FarWorldRenderer(VoxyConfig.CONFIG.geometryBufferSize, VoxyConfig.CONFIG.maxSections);
System.out.println("Using Gl46FarWorldRenderer");
}
}
this.renderer = this.createRenderBackend();
this.viewportSelector = new ViewportSelector<>(this.renderer::createViewport);
System.out.println("Renderer initialized");
this.renderTracker = new RenderTracker(this.world, this.renderer);
this.renderGen = new RenderGenerationService(this.world, this.renderer.getModelManager(), VoxyConfig.CONFIG.renderThreads, this.renderTracker::processBuildResult);
this.renderGen = new RenderGenerationService(this.world, this.renderer.getModelManager(), VoxyConfig.CONFIG.renderThreads, this.renderTracker::processBuildResult, this.renderer.usesMeshlets());
this.world.setDirtyCallback(this.renderTracker::sectionUpdated);
this.renderTracker.setRenderGen(this.renderGen);
System.out.println("Render tracker and generator initialized");
@@ -130,6 +119,20 @@ public class VoxelCore {
System.out.println("Voxy core initialized");
}
private AbstractFarWorldRenderer<?,?> createRenderBackend() {
if (true) {
System.out.println("Using Gl46MeshletFarWorldRendering");
return new Gl46MeshletsFarWorldRenderer(VoxyConfig.CONFIG.geometryBufferSize, VoxyConfig.CONFIG.maxSections);
} else {
if (VoxyConfig.CONFIG.useMeshShaders()) {
System.out.println("Using NvMeshFarWorldRenderer");
return new NvMeshFarWorldRenderer(VoxyConfig.CONFIG.geometryBufferSize, VoxyConfig.CONFIG.maxSections);
} else {
System.out.println("Using Gl46FarWorldRenderer");
return new Gl46FarWorldRenderer(VoxyConfig.CONFIG.geometryBufferSize, VoxyConfig.CONFIG.maxSections);
}
}
}
public void enqueueIngest(WorldChunk worldChunk) {

View File

@@ -174,4 +174,8 @@ public abstract class AbstractFarWorldRenderer <T extends Viewport, J extends Ab
}
protected abstract T createViewport0();
public boolean usesMeshlets() {
return false;
}
}

View File

@@ -12,6 +12,7 @@ import org.joml.Vector3f;
import org.lwjgl.system.MemoryUtil;
import static org.lwjgl.opengl.ARBDirectStateAccess.glGetNamedFramebufferAttachmentParameteriv;
import static org.lwjgl.opengl.ARBDirectStateAccess.glTextureParameteri;
import static org.lwjgl.opengl.ARBIndirectParameters.GL_PARAMETER_BUFFER_ARB;
import static org.lwjgl.opengl.GL11.*;
import static org.lwjgl.opengl.GL14C.glBlendFuncSeparate;
@@ -69,6 +70,12 @@ public class Gl46MeshletsFarWorldRenderer extends AbstractFarWorldRenderer<Gl46M
super(new DefaultGeometryManager(alignUp(geometrySize*8L, 8*32), maxSections, 8*32));
this.glDrawIndirect = new GlBuffer(4*(4+5));
this.meshletBuffer = new GlBuffer(4*1000000);//TODO: Make max meshlet count configurable, not just 1 million (even tho thats a max of 126 million quads per frame)
glSamplerParameteri(this.hizSampler, GL_TEXTURE_MIN_FILTER, GL_NEAREST_MIPMAP_NEAREST);
glTextureParameteri(this.hizSampler, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
glTextureParameteri(this.hizSampler, GL_TEXTURE_COMPARE_MODE, GL_NONE);
glTextureParameteri(this.hizSampler, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
glTextureParameteri(this.hizSampler, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
}
protected void bindResources(Gl46MeshletViewport viewport, boolean bindToDrawIndirect, boolean bindToDispatchIndirect, boolean bindHiz) {
@@ -113,8 +120,9 @@ public class Gl46MeshletsFarWorldRenderer extends AbstractFarWorldRenderer<Gl46M
}
innerTranslation.getToAddress(ptr); ptr += 4*3;
MemoryUtil.memPutInt(ptr, viewport.frameId++); ptr += 4;
MemoryUtil.memPutInt(ptr, viewport.width); ptr += 4;
MemoryUtil.memPutInt(ptr, viewport.height); ptr += 4;
//Divided by 2 cause hiz is half the size of the viewport
MemoryUtil.memPutInt(ptr, viewport.width/2); ptr += 4;
MemoryUtil.memPutInt(ptr, viewport.height/2); ptr += 4;
}
@Override
@@ -209,4 +217,9 @@ public class Gl46MeshletsFarWorldRenderer extends AbstractFarWorldRenderer<Gl46M
public static long alignUp(long n, long alignment) {
return (n + alignment - 1) & -alignment;
}
@Override
public boolean usesMeshlets() {
return true;
}
}

View File

@@ -1,6 +1,7 @@
package me.cortex.voxy.client.core.rendering.building;
import it.unimi.dsi.fastutil.longs.LongArrayList;
import me.cortex.voxy.client.core.Capabilities;
import me.cortex.voxy.client.core.model.ModelManager;
import me.cortex.voxy.client.core.util.Mesher2D;
import me.cortex.voxy.common.util.MemoryBuffer;
@@ -30,7 +31,7 @@ public class RenderDataFactory {
private final LongArrayList translucentQuadCollector = new LongArrayList();
private final LongArrayList[] directionalQuadCollectors = new LongArrayList[]{new LongArrayList(), new LongArrayList(), new LongArrayList(), new LongArrayList(), new LongArrayList(), new LongArrayList()};
private final boolean generateMeshlets = true;
private final boolean generateMeshlets;
private int minX;
private int minY;
@@ -38,9 +39,10 @@ public class RenderDataFactory {
private int maxX;
private int maxY;
private int maxZ;
public RenderDataFactory(WorldEngine world, ModelManager modelManager) {
public RenderDataFactory(WorldEngine world, ModelManager modelManager, boolean emitMeshlets) {
this.world = world;
this.modelMan = modelManager;
this.generateMeshlets = emitMeshlets;
}
@@ -52,7 +54,7 @@ public class RenderDataFactory {
// since fluid states are explicitly overlays over the base block
// can do funny stuff like double rendering
private static final boolean USE_UINT64 = false;//FIXME: replace with automatic detection of uint64 shader extension support
private static final boolean USE_UINT64 = Capabilities.INSTANCE.INT64_t;
private static final int QUADS_PER_MESHLET = 30;
private static void writePos(long ptr, long pos) {
if (USE_UINT64) {

View File

@@ -30,8 +30,10 @@ public class RenderGenerationService {
private final ModelManager modelManager;
private final Consumer<BuiltSection> resultConsumer;
private final BuiltSectionMeshCache meshCache = new BuiltSectionMeshCache();
private final boolean emitMeshlets;
public RenderGenerationService(WorldEngine world, ModelManager modelManager, int workers, Consumer<BuiltSection> consumer) {
public RenderGenerationService(WorldEngine world, ModelManager modelManager, int workers, Consumer<BuiltSection> consumer, boolean emitMeshlets) {
this.emitMeshlets = emitMeshlets;
this.world = world;
this.modelManager = modelManager;
this.resultConsumer = consumer;
@@ -47,7 +49,7 @@ public class RenderGenerationService {
//TODO: add a generated render data cache
private void renderWorker() {
//Thread local instance of the factory
var factory = new RenderDataFactory(this.world, this.modelManager);
var factory = new RenderDataFactory(this.world, this.modelManager, this.emitMeshlets);
while (this.running) {
this.taskCounter.acquireUninterruptibly();
if (!this.running) break;

View File

@@ -0,0 +1,32 @@
#version 460
layout(local_size_x=8)
//NEW IDEAm use the depth buffer directly to compute the lod level needed to cover it
//Location in world space up to 2x2x2 block size resolution
#define OctNodeTask uint64_t
//First 32 bits are the start of child
// next 16 bits are split into 8 pairs, each pair specifies the type of the subnode (air/empty, partial, full)
//Tasks are of size uint64_t
void main() {
while (true) {
barrier();
}
}

View File

@@ -0,0 +1,37 @@
#version 460
#define WAVE_SIZE 64
layout(local_size_x=WAVE_SIZE)
#define WorkTask
//or make the shape 4x2x4 which has a local size of 32
//The work queue is a circular queue with collision detection and abortion
struct WorkQueueHeader {
uint queueSizeBits;
uint start;
uint end;
uint _padd;
};
//Task is a uint32,
//The idea is to use persistent threads + octree culling to recursivly find bottom level sections that satisfy a pixel density requirement
// given a matrix
void main() {
while (true) {
}
}
//Idea cull/recuse in a manor of 4x4x4 cube (64 bits), have an existance mask per section so that unnessasery computation isnt done on air subsections
// if a section is fully or partially visible and its aabb does not occupy 1 pixel (or a subset of some specified area/density)
// then enqueue that section as a job
// once a node has reached its tail ending, check if its loaded or not, if not, request it to be loaded
// note that the cpu side can discard sections if they are superceeded by a higher level lod load request etc

View File

@@ -0,0 +1,24 @@
#version 460
layout(local_size_x=32)
//Works in multiple parts
// first is a downwards traversal from a base level that finds all the bottom level pixel detail AABBs
// task queue is firstly filled with large visible AABB's from rastered occlusion
//
// each node metadata contains the position in 3d space relative to the toplevel node
// an offset into the datapool for the child nodes, and union between (a bitmsk of if a child node is full, empty, or mixed (or unloaded))
// and a pointer to render metadata for meshlets
//The overarching idea is to have meshlets be automatatically selected based on the resulting pixel size/density
// after 3d projection, we dont want subpixel triangles and we want to be able to automatically account for the
// perspective warp on the edges of the screen (e.g. high fov == higher density at the center of the screen)
// from this, the gpu can then (if they are not present) request meshlets be added and thereby automatic lod selection
// and dynamic building resulting in possibly O(fast) rendering
void main() {
while (true) {
barrier();
}
}

View File

@@ -17,7 +17,7 @@ uint extractDetail(PosHeader pos64) {
return uint(pos64>>60);
}
uvec3 extractMin(AABBHeader aabb) {
return uvec3(uint(aabb&0xFF),uint((aabb>>8)&0xFF),uint((aabb>>16)&0xFF));
return uvec3(uint(uint(aabb)&0xFF),uint((uint(aabb)>>8)&0xFF),uint((uint(aabb)>>16)&0xFF));
}
uvec3 extractMax(AABBHeader aabb) {
return uvec3(uint((aabb>>24)&0xFF),uint((aabb>>32)&0xFF),uint((aabb>>40)&0xFF));

View File

@@ -16,14 +16,18 @@ vec3 proj(vec3 pos) {
bool testHiZ(PosHeader secPos, AABBHeader aabb) {
ivec3 section = extractPosition(secPos);
uint detail = extractDetail(secPos);
ivec3 pos = (((section<<detail)-baseSectionPos)<<5);
uvec3 cmin = extractMin(aabb)*(1<<detail);
uvec3 cmax = extractMax(aabb)*(1<<detail);
vec3 pos = vec3(ivec3(((section<<detail)-baseSectionPos)<<5));
vec3 cmin = ivec3(extractMin(aabb)*(1<<detail));
vec3 cmax = ivec3((extractMax(aabb)+1)*(1<<detail));
vec3 minBB = proj(pos);
//TODO:FIXME: either pos,cmin,cmax isnt correct, aswell as the miplevel isnt correct as its sampling at the wrong detail level
vec3 minBB = proj(pos + cmin);//
vec3 maxBB = minBB;
for (int i = 1; i < 8; i++) {
vec3 point = proj(pos+mix(cmin, cmax, bvec3((i&1)!=0,(i&2)!=0,(i&4)!=0)));
vec3 point = proj(pos + mix(cmin, cmax, bvec3((i&1)!=0,(i&2)!=0,(i&4)!=0)));
minBB = min(minBB, point);
maxBB = max(maxBB, point);
}
@@ -32,7 +36,7 @@ bool testHiZ(PosHeader secPos, AABBHeader aabb) {
maxBB = maxBB*0.5+0.5;
vec2 size = (maxBB.xy - minBB.xy) * vec2(screensize);
float miplevel = ceil(log2(max(size.x, size.y)/2));//NOTE: the /2 is cause the mipmaps dont include bottom level depth
float miplevel = ceil(log2(max(size.x, size.y)));//NOTE: the /2 is cause the mipmaps dont include bottom level depth
float a = textureLod(hizSampler,minBB.xy,miplevel).r;
float b = textureLod(hizSampler,vec2(minBB.x,maxBB.y),miplevel).r;
@@ -59,7 +63,7 @@ void main() {
PosHeader pos = geometryPool[meshletId*MESHLET_SIZE];
AABBHeader aabb = geometryPool[meshletId*MESHLET_SIZE+1];
if (true||testHiZ(pos, aabb)) {//If didnt cull, insert it back into the stream
if (testHiZ(pos, aabb)) {//If didnt cull, insert it back into the stream
meshlets[atomicAdd(drawCmd.instanceCount, 1)+fullMeshletCount] = meshletId;
}
}

View File

@@ -19,7 +19,7 @@ void main() {
//vec4 colour = solidColour;
vec4 colour = texture(blockModelAtlas, uv + baseUV, ((flags>>1)&1u)*-4.0);
if ((flags&1u) == 1 && colour.a <= 0.25f) {
discard;
//discard;
}
//Conditional tinting, TODO: FIXME: REPLACE WITH MASK OR SOMETHING, like encode data into the top bit of alpha
@@ -31,12 +31,11 @@ void main() {
//outColour = vec4(uv + baseUV, 0, 1);
/*
uint hash = meshlet*1231421+123141;
hash ^= hash>>16;
hash = hash*1231421+123141;
hash ^= hash>>16;
outColour = vec4(float(hash&15u)/15, float((hash>>4)&15u)/15, float((hash>>8)&15u)/15, 1);
*/
hash = hash * 1827364925 + 123325621;
//outColour = vec4(float(hash&15u)/15, float((hash>>4)&15u)/15, float((hash>>8)&15u)/15, 1);
}

View File

@@ -84,6 +84,7 @@ void main() {
uint lodLevel = extractDetail(meshletPosition);
ivec3 sectionPos = extractPosition(meshletPosition);
//meshlet = (meshlet<<5)|(gl_VertexID>>2);