Size 62 meshlets + major fix for hiz culling

This commit is contained in:
mcrcortex
2024-05-09 21:46:42 +10:00
parent 7a6669fb7d
commit b885d7c1ec
6 changed files with 48 additions and 13 deletions

View File

@@ -5,6 +5,7 @@ import org.lwjgl.opengl.GL20C;
import java.util.HashMap;
import java.util.Map;
import java.util.stream.Collectors;
import static org.lwjgl.opengl.GL20.glDeleteProgram;
import static org.lwjgl.opengl.GL20.glUseProgram;
@@ -37,12 +38,23 @@ public class Shader extends TrackedObject {
}
public static class Builder {
private final Map<String, String> defines = new HashMap<>();
private final Map<ShaderType, String> sources = new HashMap<>();
private final IShaderProcessor processor;
private Builder(IShaderProcessor processor) {
this.processor = processor;
}
public Builder define(String name) {
this.defines.put(name, "");
return this;
}
public Builder define(String name, int value) {
this.defines.put(name, Integer.toString(value));
return this;
}
public Builder add(ShaderType type, String id) {
this.addSource(type, ShaderLoader.parse(id));
return this;
@@ -55,7 +67,21 @@ public class Shader extends TrackedObject {
public Shader compile() {
int program = GL20C.glCreateProgram();
int[] shaders = this.sources.entrySet().stream().mapToInt(a->createShader(a.getKey(), a.getValue())).toArray();
int[] shaders = new int[this.sources.size()];
{
String defs = this.defines.entrySet().stream().map(a->"#define " + a.getKey() + " " + a.getValue() + "\n").collect(Collectors.joining());
int i = 0;
for (var entry : this.sources.entrySet()) {
String src = entry.getValue();
//Inject defines
src = src.substring(0, src.indexOf('\n')+1) +
defs
+ src.substring(src.indexOf('\n')+1);
shaders[i++] = createShader(entry.getKey(), src);
}
}
for (int i : shaders) {
GL20C.glAttachShader(program, i);

View File

@@ -3,6 +3,7 @@ package me.cortex.voxy.client.core.rendering;
import me.cortex.voxy.client.core.gl.GlBuffer;
import me.cortex.voxy.client.core.gl.shader.Shader;
import me.cortex.voxy.client.core.gl.shader.ShaderType;
import me.cortex.voxy.client.core.rendering.building.RenderDataFactory;
import me.cortex.voxy.client.core.rendering.util.UploadStream;
import me.cortex.voxy.client.mixin.joml.AccessFrustumIntersection;
import net.minecraft.client.MinecraftClient;
@@ -44,20 +45,24 @@ import static org.lwjgl.opengl.NVRepresentativeFragmentTest.GL_REPRESENTATIVE_FR
// this could potentially result in a fair bit of memory savings (especially if used in normal mc terrain rendering)
public class Gl46MeshletsFarWorldRenderer extends AbstractFarWorldRenderer<Gl46MeshletViewport, DefaultGeometryManager> {
private final Shader lodShader = Shader.make()
.define("QUADS_PER_MESHLET", RenderDataFactory.QUADS_PER_MESHLET)
.add(ShaderType.VERTEX, "voxy:lod/gl46mesh/quads.vert")
.add(ShaderType.FRAGMENT, "voxy:lod/gl46mesh/quads.frag")
.compile();
private final Shader cullShader = Shader.make()
.define("QUADS_PER_MESHLET", RenderDataFactory.QUADS_PER_MESHLET)
.add(ShaderType.VERTEX, "voxy:lod/gl46mesh/cull.vert")
.add(ShaderType.FRAGMENT, "voxy:lod/gl46mesh/cull.frag")
.compile();
private final Shader meshletGenerator = Shader.make()
.define("QUADS_PER_MESHLET", RenderDataFactory.QUADS_PER_MESHLET)
.add(ShaderType.COMPUTE, "voxy:lod/gl46mesh/cmdgen.comp")
.compile();
private final Shader meshletCuller = Shader.make()
.define("QUADS_PER_MESHLET", RenderDataFactory.QUADS_PER_MESHLET)
.add(ShaderType.COMPUTE, "voxy:lod/gl46mesh/meshletculler.comp")
.compile();
@@ -67,15 +72,19 @@ public class Gl46MeshletsFarWorldRenderer extends AbstractFarWorldRenderer<Gl46M
private final int hizSampler = glGenSamplers();
public Gl46MeshletsFarWorldRenderer(int geometrySize, int maxSections) {
super(new DefaultGeometryManager(alignUp(geometrySize*8L, 8*32), maxSections, 8*32));
super(new DefaultGeometryManager(alignUp(geometrySize*8L, 8* (RenderDataFactory.QUADS_PER_MESHLET+2)), maxSections, 8*(RenderDataFactory.QUADS_PER_MESHLET+2)));
this.glDrawIndirect = new GlBuffer(4*(4+5));
this.meshletBuffer = new GlBuffer(4*1000000);//TODO: Make max meshlet count configurable, not just 1 million (even tho thats a max of 126 million quads per frame)
glSamplerParameteri(this.hizSampler, GL_TEXTURE_MIN_FILTER, GL_NEAREST_MIPMAP_NEAREST);
glSamplerParameteri(this.hizSampler, GL_TEXTURE_MIN_FILTER, GL_LINEAR_MIPMAP_NEAREST);//This is so that using the shadow sampler works correctly
glTextureParameteri(this.hizSampler, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
glTextureParameteri(this.hizSampler, GL_TEXTURE_COMPARE_MODE, GL_NONE);
glTextureParameteri(this.hizSampler, GL_TEXTURE_COMPARE_MODE, GL_COMPARE_REF_TO_TEXTURE);
glTextureParameteri(this.hizSampler, GL_TEXTURE_COMPARE_FUNC, GL_LEQUAL);
glTextureParameteri(this.hizSampler, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
glTextureParameteri(this.hizSampler, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
nglClearNamedBufferData(this.meshletBuffer.id, GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT, 0);
nglClearNamedBufferData(this.glDrawIndirect.id, GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT, 0);
}
protected void bindResources(Gl46MeshletViewport viewport, boolean bindToDrawIndirect, boolean bindToDispatchIndirect, boolean bindHiz) {
@@ -122,7 +131,6 @@ public class Gl46MeshletsFarWorldRenderer extends AbstractFarWorldRenderer<Gl46M
}
innerTranslation.getToAddress(ptr); ptr += 4*3;
MemoryUtil.memPutInt(ptr, viewport.frameId++); ptr += 4;
//Divided by 2 cause hiz is half the size of the viewport
MemoryUtil.memPutInt(ptr, viewport.width); ptr += 4;
MemoryUtil.memPutInt(ptr, viewport.height); ptr += 4;
}

View File

@@ -55,7 +55,7 @@ public class RenderDataFactory {
// can do funny stuff like double rendering
private static final boolean USE_UINT64 = Capabilities.INSTANCE.INT64_t;
private static final int QUADS_PER_MESHLET = 30;
public static final int QUADS_PER_MESHLET = 62;
private static void writePos(long ptr, long pos) {
if (USE_UINT64) {
MemoryUtil.memPutLong(ptr, pos);

View File

@@ -46,7 +46,7 @@ struct DispatchIndirect {
};
#ifdef BIND_SAMPLER_AS_HIZ
layout(binding = 0) uniform sampler2D hizSampler;
layout(binding = 0) uniform sampler2DShadow hizSampler;
#else
layout(binding = 0) uniform sampler2D blockModelAtlas;
#endif

View File

@@ -1,5 +1,3 @@
#define QUADS_PER_MESHLET 30
#define extractMeshletStart extractQuadStart
#define PosHeader Quad
#define AABBHeader Quad

View File

@@ -32,19 +32,22 @@ bool testHiZ(PosHeader secPos, AABBHeader aabb) {
maxBB = max(maxBB, point);
}
minBB = minBB*0.5+0.5;
maxBB = maxBB*0.5+0.5;
minBB = clamp(minBB*0.5+0.5, vec3(0), vec3(1));
maxBB = clamp(maxBB*0.5+0.5, vec3(0), vec3(1));
vec2 size = (maxBB.xy - minBB.xy) * vec2(ivec2(screensize));
float miplevel = ceil(log2(max(max(size.x, size.y),1)));
/*
float a = textureLod(hizSampler,minBB.xy,miplevel).r;
float b = textureLod(hizSampler,vec2(minBB.x,maxBB.y),miplevel).r;
float c = textureLod(hizSampler,maxBB.xy,miplevel).r;
float d = textureLod(hizSampler,vec2(maxBB.x,minBB.y),miplevel).r;
float depth = max(max(a,b),max(c,d));
return minBB.z <= depth;
*/
vec2 midpoint = (maxBB.xy + minBB.xy)*0.5;
return textureLod(hizSampler, vec3(midpoint, minBB.z - 0.000000001), miplevel) > 0.0001;
}