This commit is contained in:
mcrcortex
2024-04-17 11:22:11 +10:00
parent 6ebc4739b9
commit 52baa303dc
12 changed files with 317 additions and 89 deletions

View File

@@ -10,6 +10,7 @@ layout(binding = 0, std140) uniform SceneUniform {
Frustum frustum;
vec3 cameraSubPos;
uint frameId;
uvec2 screensize;
};
struct BlockModel {
@@ -38,8 +39,17 @@ struct DrawCommand {
int baseVertex;
uint baseInstance;
};
struct DispatchIndirect {
uint x;
uint y;
uint z;
};
#ifdef BIND_SAMPLER_AS_HIZ
layout(binding = 0) uniform sampler2D hizSampler;
#else
layout(binding = 0) uniform sampler2D blockModelAtlas;
#endif
#ifndef Quad
#define Quad ivec2
@@ -49,13 +59,12 @@ layout(binding = 1, std430) readonly restrict buffer GeometryBuffer {
};
layout(binding = 2, std430) restrict buffer DrawBuffer {
DispatchIndirect dispatchCmd;
uint fullMeshletCount;
DrawCommand drawCmd;
};
#ifndef MESHLET_ACCESS
#define MESHLET_ACCESS readonly writeonly
#endif
layout(binding = 3, std430) MESHLET_ACCESS restrict buffer MeshletListData {
layout(binding = 3, std430) restrict buffer MeshletListData {
uint meshlets[];
};

View File

@@ -5,9 +5,9 @@
#import <voxy:lod/quad_format.glsl>
#import <voxy:lod/gl46mesh/bindings.glsl>
#import <voxy:lod/section.glsl>
#define extractMeshletStart extractQuadStart
#import <voxy:lod/gl46mesh/meshlet.glsl>
layout(local_size_x = 64) in;
#define QUADS_PER_MESHLET 30
void emitMeshlets(inout uint mli, inout uint meshletPtr, uint mskedCnt, uint cnt) {
for (;mskedCnt != 0; mskedCnt--,mli++) {
@@ -17,19 +17,19 @@ void emitMeshlets(inout uint mli, inout uint meshletPtr, uint mskedCnt, uint cnt
}
void main() {
//Clear here as it stops the need to dispatch a glClearData instruction
if (gl_GlobalInvocationID.x == 0) {
//Setup the remaining state of the drawElementsIndirect command
drawCmd.count = QUADS_PER_MESHLET*6;
drawCmd.firstIndex = 0;
drawCmd.baseVertex = 0;
drawCmd.baseInstance = 0;
drawCmd.instanceCount = 0;
dispatchCmd.y = 1;
dispatchCmd.z = 1;
}
if (gl_GlobalInvocationID.x >= sectionCount) {
return;
}
//Check the occlusion data from last frame
bool shouldRender = visibilityData[gl_GlobalInvocationID.x] == frameId - 1;
bool shouldRender = visibilityData[gl_GlobalInvocationID.x] == frameId;
if (shouldRender) {
SectionMeta meta = sectionData[gl_GlobalInvocationID.x];
uint detail = extractDetail(meta);
@@ -46,7 +46,12 @@ void main() {
uint e = ((meta.cntD>>16)&0xFFFF) * uint(relative.x<1 );
uint total = a + u + d + s + n + w + e;
uint mli = atomicAdd(drawCmd.instanceCount, total);//meshletListIndex
uint mli = atomicAdd(fullMeshletCount, total);//meshletListIndex
//Need to increment the glDispatchComputeIndirect with respect to the workgroup
uint addWorkAmount = ((mli+total)>>7)-(mli>>7);//the >>7 is cause the workgroup size is 128
addWorkAmount += uint(mli==0); //If we where the first to add to the meshlet counter then we need to add an extra dispatch
// to account for trailing data
atomicAdd(dispatchCmd.x, addWorkAmount);
uint meshletPtr = extractMeshletStart(meta) + (meta.cntA&0xFFFF);

View File

@@ -0,0 +1,46 @@
#define QUADS_PER_MESHLET 30
#define extractMeshletStart extractQuadStart
#define PosHeader Quad
#define AABBHeader Quad
//There are 16 bytes of metadata at the start of the meshlet
#define MESHLET_SIZE (QUADS_PER_MESHLET+2)
#ifdef GL_ARB_gpu_shader_int64
ivec3 extractPosition(PosHeader pos64) {
//((long)lvl<<60)|((long)(y&0xFF)<<52)|((long)(z&((1<<24)-1))<<28)|((long)(x&((1<<24)-1))<<4);
//return ivec3((pos64<<4)&uint64_t(0xFFFFFFFF),(pos64>>28)&uint64_t(0xFFFFFFFF),(pos64>>24)&uint64_t(0xFFFFFFFF))>>ivec3(8,24,8);
return (ivec3(int(pos64>>4)&((1<<24)-1), int(pos64>>52)&0xFF, int(pos64>>28)&((1<<24)-1))<<ivec3(8,24,8))>>ivec3(8,24,8);
}
uint extractDetail(PosHeader pos64) {
return uint(pos64>>60);
}
uvec3 extractMin(AABBHeader aabb) {
return uvec3(uint(aabb&0xFF),uint((aabb>>8)&0xFF),uint((aabb>>16)&0xFF));
}
uvec3 extractMax(AABBHeader aabb) {
return uvec3(uint((aabb>>24)&0xFF),uint((aabb>>32)&0xFF),uint((aabb>>40)&0xFF));
}
#else
ivec3 extractPosition(PosHeader pos) {
int y = ((int(pos.x)<<4)>>24);
int x = (int(pos.y)<<4)>>8;
int z = int((pos.x&((1<<20)-1))<<4);
z |= int(pos.y>>28)&0xF;
z <<= 8;
z >>= 8;
return ivec3(x,y,z);
}
uint extractDetail(PosHeader pos) {
return uint(pos.x)>>28;
}
uvec3 extractMin(AABBHeader aabb) {
return uvec3(aabb.x&0xFF,(aabb.x>>8)&0xFF,(aabb.x>>16)&0xFF);
}
uvec3 extractMax(AABBHeader aabb) {
return uvec3((aabb.x>>24)&0xFF,aabb.y&0xFF,(aabb.y>>8)&0xFF);
}
#endif

View File

@@ -0,0 +1,65 @@
#version 450
#extension GL_ARB_gpu_shader_int64 : enable
#define MESHLET_ACCESS
#define BIND_SAMPLER_AS_HIZ
#import <voxy:lod/quad_format.glsl>
#import <voxy:lod/gl46mesh/bindings.glsl>
#import <voxy:lod/section.glsl>
#import <voxy:lod/gl46mesh/meshlet.glsl>
layout(local_size_x=128) in;
vec3 proj(vec3 pos) {
vec4 t = MVP * vec4(vec3(pos),1);
return t.xyz/t.w;
}
bool testHiZ(PosHeader secPos, AABBHeader aabb) {
ivec3 section = extractPosition(secPos);
uint detail = extractDetail(secPos);
ivec3 pos = (((section<<detail)-baseSectionPos)<<5);
uvec3 cmin = extractMin(aabb)*(1<<detail);
uvec3 cmax = extractMax(aabb)*(1<<detail);
vec3 minBB = proj(pos);
vec3 maxBB = minBB;
for (int i = 1; i < 8; i++) {
vec3 point = proj(pos+mix(cmin, cmax, bvec3((i&1)!=0,(i&2)!=0,(i&4)!=0)));
minBB = min(minBB, point);
maxBB = max(maxBB, point);
}
minBB = minBB*0.5+0.5;
maxBB = maxBB*0.5+0.5;
vec2 size = (maxBB.xy - minBB.xy) * vec2(screensize);
float miplevel = ceil(log2(max(size.x, size.y)/2));//NOTE: the /2 is cause the mipmaps dont include bottom level depth
float a = textureLod(hizSampler,minBB.xy,miplevel).r;
float b = textureLod(hizSampler,vec2(minBB.x,maxBB.y),miplevel).r;
float c = textureLod(hizSampler,maxBB.xy,miplevel).r;
float d = textureLod(hizSampler,vec2(maxBB.x,minBB.y),miplevel).r;
float depth = max(max(a,b),max(c,d));
return minBB.z <= depth;
}
void main() {
if (gl_GlobalInvocationID.x >= fullMeshletCount) {
return;
}
if (gl_GlobalInvocationID.x == 0) {
//Setup the state of the drawElementsIndirect command, instanceCount is cleared externally
drawCmd.count = QUADS_PER_MESHLET*6;
drawCmd.firstIndex = 0;
drawCmd.baseVertex = 0;
drawCmd.baseInstance = fullMeshletCount;//Start at the begining of the newly emitted meshlet array
}
uint meshletId = meshlets[gl_GlobalInvocationID.x];
PosHeader pos = geometryPool[meshletId*MESHLET_SIZE];
AABBHeader aabb = geometryPool[meshletId*MESHLET_SIZE+1];
if (true||testHiZ(pos, aabb)) {//If didnt cull, insert it back into the stream
meshlets[atomicAdd(drawCmd.instanceCount, 1)+fullMeshletCount] = meshletId;
}
}

View File

@@ -1,40 +1,10 @@
#version 450
#extension GL_ARB_gpu_shader_int64 : enable
#define QUADS_PER_MESHLET 30
#define MESHLET_ACCESS readonly
//There are 16 bytes of metadata at the start of the meshlet
#define MESHLET_SIZE (QUADS_PER_MESHLET+2)
#import <voxy:lod/quad_format.glsl>
#import <voxy:lod/gl46mesh/bindings.glsl>
#import <voxy:lod/block_model.glsl>
#define PosHeader Quad
#ifdef GL_ARB_gpu_shader_int64
ivec3 extractPosition(PosHeader pos64) {
//((long)lvl<<60)|((long)(y&0xFF)<<52)|((long)(z&((1<<24)-1))<<28)|((long)(x&((1<<24)-1))<<4);
//return ivec3((pos64<<4)&uint64_t(0xFFFFFFFF),(pos64>>28)&uint64_t(0xFFFFFFFF),(pos64>>24)&uint64_t(0xFFFFFFFF))>>ivec3(8,24,8);
return (ivec3(int(pos64>>4)&((1<<24)-1), int(pos64>>52)&0xFF, int(pos64>>28)&((1<<24)-1))<<ivec3(8,24,8))>>ivec3(8,24,8);
}
uint extractDetail(PosHeader pos64) {
return uint(pos64>>60);
}
#else
ivec3 extractPosition(PosHeader pos) {
int y = ((int(pos.x)<<4)>>24);
int x = (int(pos.y)<<4)>>8;
int z = int((pos.x&((1<<20)-1))<<4);
z |= int(pos.y>>28)&0xF;
z <<= 8;
z >>= 8;
return ivec3(x,y,z);
}
uint extractDetail(PosHeader pos) {
return uint(pos.x)>>28;
}
#endif
#import <voxy:lod/gl46mesh/meshlet.glsl>
layout(location = 6) out flat uint meshlet;
PosHeader meshletPosition;