Finished mesh implementation optimized gl46 shader

This commit is contained in:
mcrcortex
2024-03-13 19:25:09 +10:00
parent 283084cfa8
commit af9c45bb51
7 changed files with 447 additions and 8 deletions

View File

@@ -64,8 +64,10 @@ public class VoxelCore {
SharedIndexBuffer.INSTANCE.id();
if (VoxyConfig.CONFIG.useMeshShaders()) {
this.renderer = new NvMeshFarWorldRenderer(VoxyConfig.CONFIG.geometryBufferSize, VoxyConfig.CONFIG.maxSections);
System.out.println("Using NvMeshFarWorldRenderer");
} else {
this.renderer = new Gl46FarWorldRenderer(VoxyConfig.CONFIG.geometryBufferSize, VoxyConfig.CONFIG.maxSections);
System.out.println("Using Gl46FarWorldRenderer");
}
this.viewportSelector = new ViewportSelector<>(this.renderer::createViewport);
System.out.println("Renderer initialized");

View File

@@ -38,7 +38,7 @@ public class Gl46FarWorldRenderer extends AbstractFarWorldRenderer<Gl46Viewport>
.compile();
private final Shader lodShader = Shader.make()
.add(ShaderType.VERTEX, "voxy:lod/gl46/quads.vert")
.add(ShaderType.VERTEX, "voxy:lod/gl46/quads2.vert")
.add(ShaderType.FRAGMENT, "voxy:lod/gl46/quads.frag")
.compile();

View File

@@ -15,7 +15,9 @@ import org.lwjgl.system.MemoryUtil;
import java.util.List;
import static org.lwjgl.opengl.ARBIndirectParameters.GL_PARAMETER_BUFFER_ARB;
import static org.lwjgl.opengl.ARBIndirectParameters.glMultiDrawElementsIndirectCountARB;
import static org.lwjgl.opengl.GL11.*;
import static org.lwjgl.opengl.GL14C.glBlendFuncSeparate;
import static org.lwjgl.opengl.GL15.GL_ELEMENT_ARRAY_BUFFER;
import static org.lwjgl.opengl.GL15.glBindBuffer;
import static org.lwjgl.opengl.GL30.glBindBufferBase;
@@ -39,6 +41,12 @@ public class NvMeshFarWorldRenderer extends AbstractFarWorldRenderer<NvMeshViewp
.add(ShaderType.FRAGMENT, "voxy:lod/nvmesh/primary.frag")
.compile();
private final Shader translucent = Shader.make()
.add(ShaderType.TASK, "voxy:lod/nvmesh/translucent.task")
.add(ShaderType.MESH, "voxy:lod/nvmesh/translucent.mesh")
.add(ShaderType.FRAGMENT, "voxy:lod/nvmesh/primary.frag")
.compile();
private final Shader cull = Shader.make()
.add(ShaderType.VERTEX, "voxy:lod/nvmesh/cull.vert")
.add(ShaderType.FRAGMENT, "voxy:lod/nvmesh/cull.frag")
@@ -135,7 +143,35 @@ public class NvMeshFarWorldRenderer extends AbstractFarWorldRenderer<NvMeshViewp
if (this.geometry.getSectionCount()==0) {
return;
}
//TODO: make a different task shader for translucent
RenderLayer.getTranslucent().startDrawing();
glBindVertexArray(AbstractFarWorldRenderer.STATIC_VAO);
glDisable(GL_CULL_FACE);
glEnable(GL_BLEND);
//TODO: maybe change this so the alpha isnt applied in the same way or something?? since atm the texture bakery uses a very hacky
// blend equation to make it avoid double applying translucency
glBlendFuncSeparate(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA, GL_ONE, GL_ONE_MINUS_SRC_ALPHA);
glBindSampler(0, this.models.getSamplerId());
glBindTextureUnit(0, this.models.getTextureId());
this.translucent.bind();
this.bindResources(viewport);
glDepthMask(false);
glDrawMeshTasksNV(0, this.geometry.getSectionCount());
glDepthMask(true);
glEnable(GL_CULL_FACE);
glBindVertexArray(0);
glBindSampler(0, 0);
glBindTextureUnit(0, 0);
glDisable(GL_BLEND);
RenderLayer.getTranslucent().endDrawing();
}
@Override

View File

@@ -0,0 +1,146 @@
#version 460 core
#extension GL_ARB_gpu_shader_int64 : enable
#import <voxy:lod/quad_format.glsl>
#import <voxy:lod/gl46/bindings.glsl>
#import <voxy:lod/block_model.glsl>
#line 8
layout(location = 0) out vec2 uv;
layout(location = 1) out flat vec2 baseUV;
layout(location = 2) out flat vec4 tinting;
layout(location = 3) out flat vec4 addin;
layout(location = 4) out flat uint flags;
layout(location = 5) out flat vec4 conditionalTinting;
//layout(location = 6) out flat vec4 solidColour;
uint extractLodLevel() {
return uint(gl_BaseInstance)>>27;
}
//Note the last 2 bits of gl_BaseInstance are unused
//Gives a relative position of +-255 relative to the player center in its respective lod
ivec3 extractRelativeLodPos() {
return (ivec3(gl_BaseInstance)<<ivec3(5,14,23))>>ivec3(23);
}
vec4 uint2vec4RGBA(uint colour) {
return vec4((uvec4(colour)>>uvec4(24,16,8,0))&uvec4(0xFF))/255.0;
}
vec4 getFaceSize(uint faceData) {
float EPSILON = 0.001f;
vec4 faceOffsetsSizes = extractFaceSizes(faceData);
//Expand the quads by a very small amount
faceOffsetsSizes.xz -= vec2(EPSILON);
faceOffsetsSizes.yw += vec2(EPSILON);
//Make the end relative to the start
faceOffsetsSizes.yw -= faceOffsetsSizes.xz;
return faceOffsetsSizes;
}
//TODO: make branchless by using ternaries i think
vec3 swizzelDataAxis(uint axis, vec3 data) {
if (axis == 0) { //Up/down
data = data.xzy;
}
//Not needed, here for readability
//if (axis == 1) {//north/south
// offset = offset.xyz;
//}
if (axis == 2) { //west/east
data = data.zxy;
}
return data;
}
//TODO: add a mechanism so that some quads can ignore backface culling
// this would help alot with stuff like crops as they would look kinda weird i think,
// same with flowers etc
void main() {
int cornerIdx = gl_VertexID&3;
Quad quad = quadData[uint(gl_VertexID)>>2];
vec3 innerPos = extractPos(quad);
uint face = extractFace(quad);
uint modelId = extractStateId(quad);
BlockModel model = modelData[modelId];
uint faceData = model.faceData[face];
bool isTranslucent = modelIsTranslucent(model);
bool hasAO = modelHasMipmaps(model);//TODO: replace with per face AO flag
bool isShaded = hasAO;//TODO: make this a per face flag
uint lodLevel = extractLodLevel();
vec2 modelUV = vec2(modelId&0xFFu, (modelId>>8)&0xFFu)*(1.0/(256.0));
baseUV = modelUV + (vec2(face>>1, face&1u) * (1.0/(vec2(3.0, 2.0)*256.0)));
ivec2 quadSize = extractSize(quad);
{ //Generate tinting and flag data
flags = faceHasAlphaCuttout(faceData);
//We need to have a conditional override based on if the model size is < a full face + quadSize > 1
flags |= uint(any(greaterThan(quadSize, ivec2(1)))) & faceHasAlphaCuttoutOverride(faceData);
flags |= uint(!modelHasMipmaps(model))<<1;
//Compute lighting
tinting = getLighting(extractLightId(quad));
//Apply model colour tinting
uint tintColour = model.colourTint;
if (modelHasBiomeLUT(model)) {
tintColour = colourData[tintColour + extractBiomeId(quad)];
}
conditionalTinting = vec4(0);
if (tintColour != uint(-1)) {
flags |= 1u<<2;
conditionalTinting = uint2vec4RGBA(tintColour).yzwx;
}
addin = vec4(0.0);
if (!isTranslucent) {
tinting.w = 0.0;
//Encode the face, the lod level and
uint encodedData = 0;
encodedData |= face;
encodedData |= (lodLevel<<3);
encodedData |= uint(hasAO)<<6;
addin.w = float(encodedData)/255.0;
}
//Apply face tint
if (isShaded) {
//TODO: make branchless, infact apply ahead of time to the texture itself in ModelManager since that is
// per face
if ((face>>1) == 1) {
tinting.xyz *= 0.8f;
} else if ((face>>1) == 2) {
tinting.xyz *= 0.6f;
} else if (face == 0){
tinting.xyz *= 0.5f;
}
}
}
vec4 faceSize = getFaceSize(faceData);
vec2 cQuadSize = (faceSize.yw + quadSize - 1) * vec2((cornerIdx>>1)&1, cornerIdx&1);
uv = faceSize.xz + cQuadSize;
vec3 cornerPos = extractPos(quad);
float depthOffset = extractFaceIndentation(faceData);
cornerPos += swizzelDataAxis(face>>1, vec3(faceSize.xz, mix(depthOffset, 1-depthOffset, float(face&1u))));
vec3 origin = vec3(((extractRelativeLodPos()<<lodLevel) - (baseSectionPos&(ivec3((1<<lodLevel)-1))))<<5);
gl_Position = MVP*vec4((cornerPos+swizzelDataAxis(face>>1,vec3(cQuadSize,0)))*(1<<lodLevel)+origin, 1.0);
}

View File

@@ -43,12 +43,6 @@ vec4 uint2vec4RGBA(uint colour) {
return vec4((uvec4(colour)>>uvec4(24,16,8,0))&uvec4(0xFF))/255.0;
}
//Gets the face offset with respect to the face direction (e.g. some will be + some will be -)
float getDepthOffset(uint faceData, uint face) {
float offset = extractFaceIndentation(faceData);
return offset * (1.0-((int(face)&1)*2.0));
}
vec4 getFaceSize(uint faceData) {
float EPSILON = 0.001f;
vec4 faceOffsetsSizes = extractFaceSizes(faceData);

View File

@@ -0,0 +1,212 @@
#version 460
#extension GL_ARB_shading_language_include : enable
#pragma optionNV(unroll all)
#define UNROLL_LOOP
#extension GL_NV_mesh_shader : require
#extension GL_NV_gpu_shader5 : require
#extension GL_ARB_gpu_shader_int64 : require
#import <voxy:lod/nvmesh/bindings.glsl>
#import <voxy:lod/block_model.glsl>
#import <voxy:lod/quad_format.glsl>
#line 13
layout(local_size_x = 16) in;
layout(triangles, max_vertices=64, max_primitives=32) out;
layout(location=1) out Interpolants {
vec2 uv;
} i_out[];
layout(location=2) perprimitiveNV out PerPrimData {
vec2 baseUV;
vec4 tinting;
vec4 addin;
uint flags;
vec4 conditionalTinting;
} per_prim_out[];
void emitIndicies() {
uint primBase = gl_LocalInvocationID.x * 6;
uint vertBase = gl_LocalInvocationID.x<<2;
gl_PrimitiveIndicesNV[primBase+0] = vertBase+0;
gl_PrimitiveIndicesNV[primBase+1] = vertBase+1;
gl_PrimitiveIndicesNV[primBase+2] = vertBase+2;
gl_PrimitiveIndicesNV[primBase+3] = vertBase+2;
gl_PrimitiveIndicesNV[primBase+4] = vertBase+3;
gl_PrimitiveIndicesNV[primBase+5] = vertBase+0;
}
vec4 uint2vec4RGBA(uint colour) {
return vec4((uvec4(colour)>>uvec4(24,16,8,0))&uvec4(0xFF))/255.0;
}
vec4 getFaceSize(uint faceData) {
float EPSILON = 0.001f;
vec4 faceOffsetsSizes = extractFaceSizes(faceData);
//Expand the quads by a very small amount
faceOffsetsSizes.xz -= vec2(EPSILON);
faceOffsetsSizes.yw += vec2(EPSILON);
//Make the end relative to the start
faceOffsetsSizes.yw -= faceOffsetsSizes.xz;
return faceOffsetsSizes;
}
//TODO: make branchless by using ternaries i think
vec3 swizzelDataAxis(uint axis, vec3 data) {
if (axis == 0) { //Up/down
data = data.xzy;
}
//Not needed, here for readability
//if (axis == 1) {//north/south
// offset = offset.xyz;
//}
if (axis == 2) { //west/east
data = data.zxy;
}
return data;
}
taskNV in Task {
vec3 origin;//Offset to camera in world space (already multiplied by lod level)
uint baseOffset;//Base offset into the quad data buffer
uint meta;//First 4 bits is lod level, remaining is quadCount
};
uint getQuadIndex() {
if ((meta>>4)<=gl_GlobalInvocationID.x) return -1;
return baseOffset + gl_GlobalInvocationID.x;
}
void main() {
uint idx = getQuadIndex();
//If its over, dont render
if (idx == uint(-1)) {
return;
}
emitIndicies();
uint A = gl_LocalInvocationID.x<<1;
uint B = (gl_LocalInvocationID.x<<1)|1u;
uint V = (gl_LocalInvocationID.x<<2);
uint lodLvl = meta&0xf;
float lodScale = (1<<lodLvl);
Quad quad = quadData[idx];
uint face = extractFace(quad);
uint modelId = extractStateId(quad);
BlockModel model = modelData[modelId];
uint faceData = model.faceData[face];
bool isTranslucent = modelIsTranslucent(model);
bool hasAO = modelHasMipmaps(model);//TODO: replace with per face AO flag
bool isShaded = hasAO;//TODO: make this a per face flag
ivec2 quadSize = extractSize(quad);
//Compute the uv coordinates
vec2 modelUV = vec2(modelId&0xFFu, (modelId>>8)&0xFFu)*(1.0/(256.0));
vec2 baseUV = modelUV + (vec2(face>>1, face&1u) * (1.0/(vec2(3.0, 2.0)*256.0)));
//Write out baseUV
per_prim_out[A].baseUV = baseUV;
per_prim_out[B].baseUV = baseUV;
uint flags = faceHasAlphaCuttout(faceData);
//We need to have a conditional override based on if the model size is < a full face + quadSize > 1
flags |= uint(any(greaterThan(quadSize, ivec2(1)))) & faceHasAlphaCuttoutOverride(faceData);
flags |= uint(!modelHasMipmaps(model))<<1;
//Compute lighting
vec4 tinting = getLighting(extractLightId(quad));
//Apply model colour tinting
uint tintColour = model.colourTint;
if (modelHasBiomeLUT(model)) {
tintColour = colourData[tintColour + extractBiomeId(quad)];
}
vec4 conditionalTinting = vec4(0);
if (tintColour != uint(-1)) {
flags |= 1u<<2;
conditionalTinting = uint2vec4RGBA(tintColour).yzwx;
}
vec4 addin = vec4(0.0);
if (!isTranslucent) {
tinting.w = 0.0;
//Encode the face, the lod level and
uint encodedData = 0;
encodedData |= face;
encodedData |= (lodLvl<<3);
encodedData |= uint(hasAO)<<6;
addin.w = float(encodedData)/255.0;
}
//Apply face tint
if (isShaded) {
//TODO: make branchless, infact apply ahead of time to the texture itself in ModelManager since that is
// per face
if ((face>>1) == 1) {
tinting.xyz *= 0.8f;
} else if ((face>>1) == 2) {
tinting.xyz *= 0.6f;
} else if (face == 0){
tinting.xyz *= 0.5f;
}
}
//Write out everything
per_prim_out[A].tinting = tinting;
per_prim_out[A].addin = addin;
per_prim_out[A].flags = flags;
per_prim_out[A].conditionalTinting = conditionalTinting;
per_prim_out[B].tinting = tinting;
per_prim_out[B].addin = addin;
per_prim_out[B].flags = flags;
per_prim_out[B].conditionalTinting = conditionalTinting;
vec4 faceSize = getFaceSize(faceData);
vec2 cQuadSize = faceSize.yw + quadSize - 1;
vec2 uv0 = faceSize.xz;
i_out[V|0].uv = uv0;
i_out[V|1].uv = uv0 + vec2(0, cQuadSize.y);
i_out[V|2].uv = uv0 + cQuadSize;
i_out[V|3].uv = uv0 + vec2(cQuadSize.x, 0);
//Corner position of quad relative to section corner (in 0->32 scale)
vec3 cornerPos = extractPos(quad);
float depthOffset = extractFaceIndentation(faceData);
cornerPos += swizzelDataAxis(face>>1, vec3(faceSize.xz, mix(depthOffset, 1-depthOffset, float(face&1u))));
gl_MeshVerticesNV[V|0].gl_Position = MVP*vec4(cornerPos*lodScale+origin, 1.0);
gl_MeshVerticesNV[V|1].gl_Position = MVP*vec4((cornerPos+swizzelDataAxis(face>>1,vec3(0,cQuadSize.y,0)))*lodScale+origin, 1.0);
gl_MeshVerticesNV[V|2].gl_Position = MVP*vec4((cornerPos+swizzelDataAxis(face>>1,vec3(cQuadSize, 0)))*lodScale+origin, 1.0);
gl_MeshVerticesNV[V|3].gl_Position = MVP*vec4((cornerPos+swizzelDataAxis(face>>1,vec3(cQuadSize.x,0,0)))*lodScale+origin, 1.0);
if (gl_LocalInvocationID.x == 0) {
//Remaining quads in workgroup
gl_PrimitiveCountNV = min(uint(int(meta>>4)-int(gl_WorkGroupID.x<<4))<<1, 32);//2 primatives per quad
}
}

View File

@@ -0,0 +1,49 @@
#version 460
#extension GL_ARB_shading_language_include : enable
#pragma optionNV(unroll all)
#define UNROLL_LOOP
#extension GL_NV_mesh_shader : require
#extension GL_NV_gpu_shader5 : require
#extension GL_ARB_gpu_shader_int64 : require
#import <voxy:lod/nvmesh/bindings.glsl>
#import <voxy:lod/section.glsl>
#line 12
#define MESH_WORKLOAD_PER_INVOCATION 16
layout(local_size_x=1) in;
taskNV out Task {
vec3 origin;//Offset to camera in world space (already multiplied by lod level)
uint baseOffset;//Base offset into the quad data buffer
uint meta;//First 4 bits is lod level, remaining is quadCount
} task;
void main() {
uint sectionId = gl_WorkGroupID.x;
bool visibleLastFrame = visibilityData[sectionId] == frameId;
//If it wasnt visible last frame then dont render this frame ** (do temporal coherance)
if (!visibleLastFrame) {
gl_TaskCountNV = 0;
return;
}
SectionMeta meta = sectionData[sectionId];
uint lodLvl = extractDetail(meta);
ivec3 lodPos= extractPosition(meta);
//Relative position to camera with resepct to lod level to check for visibility bits
ivec3 cpos = lodPos-(baseSectionPos>>lodLvl);
//Relative position to camera
task.origin = vec3(((lodPos<<lodLvl)-baseSectionPos)<<5)-cameraSubPos;
task.baseOffset = extractQuadStart(meta);
task.meta = lodLvl&0xFu;
uint cnt = meta.cntA&0xFFFF;//Skip translucency
task.meta |= cnt<<4;
gl_TaskCountNV = (cnt+MESH_WORKLOAD_PER_INVOCATION-1)/MESH_WORKLOAD_PER_INVOCATION;
}