Prefix sum based translucency
This commit is contained in:
@@ -47,6 +47,11 @@ public class GlBuffer extends TrackedObject {
|
||||
return this;
|
||||
}
|
||||
|
||||
public GlBuffer zeroRange(long offset, long size) {
|
||||
nglClearNamedBufferSubData(this.id, GL_R8UI, offset, size, GL_RED_INTEGER, GL_UNSIGNED_BYTE, 0);
|
||||
return this;
|
||||
}
|
||||
|
||||
public GlBuffer fill(int data) {
|
||||
//Clear unpack values
|
||||
//Fixed in mesa commit a5c3c452
|
||||
|
||||
@@ -32,12 +32,13 @@ import static org.lwjgl.opengl.GL33.glBindSampler;
|
||||
import static org.lwjgl.opengl.GL40C.GL_DRAW_INDIRECT_BUFFER;
|
||||
import static org.lwjgl.opengl.GL43.*;
|
||||
import static org.lwjgl.opengl.GL45.glBindTextureUnit;
|
||||
import static org.lwjgl.opengl.GL45.glClearNamedBufferData;
|
||||
|
||||
//Uses MDIC to render the sections
|
||||
public class MDICSectionRenderer extends AbstractSectionRenderer<MDICViewport, BasicSectionGeometryData> {
|
||||
private static final int TRANSLUCENT_OFFSET = 400_000;//in draw calls
|
||||
private static final int TEMPORAL_OFFSET = 500_000;//in draw calls
|
||||
private static final int STATISTICS_BUFFER_BINDING = 7;
|
||||
private static final int STATISTICS_BUFFER_BINDING = 8;
|
||||
private final Shader terrainShader = Shader.make()
|
||||
.defineIf("DEBUG_RENDER", false)
|
||||
.add(ShaderType.VERTEX, "voxy:lod/gl46/quads2.vert")
|
||||
@@ -45,9 +46,11 @@ public class MDICSectionRenderer extends AbstractSectionRenderer<MDICViewport, B
|
||||
.compile();
|
||||
|
||||
private final Shader commandGenShader = Shader.make()
|
||||
.define("TRANSLUCENT_OFFSET", TRANSLUCENT_OFFSET)
|
||||
.define("TRANSLUCENT_WRITE_BASE", 1024)
|
||||
.define("TEMPORAL_OFFSET", TEMPORAL_OFFSET)
|
||||
|
||||
.define("TRANSLUCENT_DISTANCE_BUFFER_BINDING", 7)
|
||||
|
||||
.defineIf("HAS_STATISTICS", RenderStatistics.enabled)
|
||||
.defineIf("STATISTICS_BUFFER_BINDING", RenderStatistics.enabled, STATISTICS_BUFFER_BINDING)
|
||||
|
||||
@@ -63,9 +66,23 @@ public class MDICSectionRenderer extends AbstractSectionRenderer<MDICViewport, B
|
||||
.add(ShaderType.FRAGMENT, "voxy:lod/gl46/cull/raster.frag")
|
||||
.compile();
|
||||
|
||||
private final Shader prefixSumShader = Shader.make()
|
||||
.add(ShaderType.COMPUTE, "voxy:util/prefixsum/inital3.comp")
|
||||
.define("IO_BUFFER", 0)
|
||||
.compile();
|
||||
|
||||
private final Shader translucentGenShader = Shader.make()
|
||||
.add(ShaderType.COMPUTE, "voxy:lod/gl46/buildtranslucents.comp")
|
||||
.define("TRANSLUCENT_WRITE_BASE", 1024)//The size of the prefix sum array
|
||||
.define("TRANSLUCENT_DISTANCE_BUFFER_BINDING", 5)
|
||||
.define("TRANSLUCENT_OFFSET", TRANSLUCENT_OFFSET)
|
||||
|
||||
.compile();
|
||||
|
||||
private final GlBuffer uniform = new GlBuffer(1024).zero();
|
||||
|
||||
//TODO: needs to be in the viewport, since it contains the compute indirect call/values
|
||||
private final GlBuffer distanceCountBuffer = new GlBuffer(1024*4+100_000*4).zero();
|
||||
private final GlBuffer drawCountCallBuffer = new GlBuffer(1024).zero();
|
||||
private final GlBuffer drawCallBuffer = new GlBuffer(5*4*(400_000+100_000+100_000)).zero();//400k draw calls
|
||||
private final GlBuffer positionScratchBuffer = new GlBuffer(8*400000).zero();//400k positions
|
||||
@@ -205,6 +222,7 @@ public class MDICSectionRenderer extends AbstractSectionRenderer<MDICViewport, B
|
||||
|
||||
|
||||
{//Generate the commands
|
||||
this.distanceCountBuffer.zeroRange(0, 1024*4);
|
||||
this.commandGenShader.bind();
|
||||
glBindBufferBase(GL_UNIFORM_BUFFER, 0, this.uniform.id);
|
||||
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, this.drawCallBuffer.id);
|
||||
@@ -213,6 +231,7 @@ public class MDICSectionRenderer extends AbstractSectionRenderer<MDICViewport, B
|
||||
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 4, viewport.visibilityBuffer.id);
|
||||
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 5, viewport.indirectLookupBuffer.id);
|
||||
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 6, this.positionScratchBuffer.id);
|
||||
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 7, this.distanceCountBuffer.id);
|
||||
|
||||
if (RenderStatistics.enabled) {
|
||||
this.statisticsBuffer.zero();
|
||||
@@ -238,6 +257,40 @@ public class MDICSectionRenderer extends AbstractSectionRenderer<MDICViewport, B
|
||||
}
|
||||
}
|
||||
|
||||
{//Do translucency sorting
|
||||
this.prefixSumShader.bind();
|
||||
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, this.distanceCountBuffer.id);
|
||||
glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);//Am unsure if is needed
|
||||
glDispatchCompute(1,1,1);
|
||||
glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
|
||||
//glFinish();
|
||||
/*
|
||||
DownloadStream.INSTANCE.download(this.distanceCountBuffer, 0, 1024*4, (ptr,size)->{
|
||||
int[] a = new int[1024];
|
||||
for (int i = 0; i < 1024; i++) {
|
||||
a[i] = MemoryUtil.memGetInt(ptr+4*i);
|
||||
}
|
||||
for (int i = 0; i < 1023; i++){
|
||||
if (a[i+1]<a[i]) {
|
||||
System.out.println(a[i]+","+a[i+1]);
|
||||
}
|
||||
}
|
||||
});
|
||||
*/
|
||||
|
||||
this.translucentGenShader.bind();
|
||||
glBindBufferBase(GL_UNIFORM_BUFFER, 0, this.uniform.id);
|
||||
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, this.drawCallBuffer.id);
|
||||
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, this.drawCountCallBuffer.id);
|
||||
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 3, this.geometryManager.getMetadataBuffer().id);
|
||||
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 4, viewport.indirectLookupBuffer.id);
|
||||
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 5, this.distanceCountBuffer.id);
|
||||
|
||||
glBindBuffer(GL_DISPATCH_INDIRECT_BUFFER, this.drawCountCallBuffer.id);//This isnt great but its a nice trick to bound it, even if its inefficent ;-;
|
||||
glDispatchComputeIndirect(0);
|
||||
glMemoryBarrier(GL_COMMAND_BARRIER_BIT|GL_SHADER_STORAGE_BARRIER_BIT);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
@@ -261,10 +314,13 @@ public class MDICSectionRenderer extends AbstractSectionRenderer<MDICViewport, B
|
||||
@Override
|
||||
public void free() {
|
||||
this.uniform.free();
|
||||
this.distanceCountBuffer.free();
|
||||
this.terrainShader.free();
|
||||
this.commandGenShader.free();
|
||||
this.cullShader.free();
|
||||
this.prepShader.free();
|
||||
this.translucentGenShader.free();
|
||||
this.prefixSumShader.free();
|
||||
this.drawCallBuffer.free();
|
||||
this.drawCountCallBuffer.free();
|
||||
this.positionScratchBuffer.free();
|
||||
|
||||
@@ -0,0 +1,51 @@
|
||||
#version 450
|
||||
#extension GL_ARB_gpu_shader_int64 : enable
|
||||
|
||||
layout(local_size_x = 128) in;
|
||||
|
||||
#define DRAW_BUFFER_BINDING 1
|
||||
#define DRAW_COUNT_BUFFER_BINDING 2
|
||||
#define SECTION_METADATA_BUFFER_BINDING 3
|
||||
#define INDIRECT_SECTION_LOOKUP_BINDING 4
|
||||
|
||||
#import <voxy:lod/gl46/bindings.glsl>
|
||||
#import <voxy:lod/section.glsl>
|
||||
|
||||
/*
|
||||
uint count;
|
||||
uint instanceCount;
|
||||
uint firstIndex;
|
||||
int baseVertex;
|
||||
uint baseInstance;
|
||||
*/
|
||||
|
||||
layout(binding = TRANSLUCENT_DISTANCE_BUFFER_BINDING, std430) restrict buffer TranslucentCommandCount {
|
||||
uint[] translucentCommandData;
|
||||
};
|
||||
|
||||
//Note: if i want reverse indexing i need to use the index buffer offset to offset
|
||||
void writeCmd(uint idx, uint instance, uint offset, uint quadCount) {
|
||||
DrawCommand cmd;
|
||||
cmd.count = quadCount * 6;
|
||||
cmd.instanceCount = 1;
|
||||
cmd.firstIndex = 0;
|
||||
cmd.baseVertex = int(offset)<<2;
|
||||
cmd.baseInstance = instance;
|
||||
cmdBuffer[idx] = cmd;
|
||||
}
|
||||
|
||||
void main() {
|
||||
if (gl_GlobalInvocationID.x >= translucentDrawCount) {
|
||||
return;
|
||||
}
|
||||
uint drawId = translucentCommandData[gl_GlobalInvocationID.x+TRANSLUCENT_WRITE_BASE];
|
||||
SectionMeta meta = sectionData[indirectLookup[drawId]];
|
||||
uint detail = extractDetail(meta);
|
||||
|
||||
uvec3 rel = abs(extractPosition(meta)-(baseSectionPos>>detail));
|
||||
uint dist = (rel.x+rel.y+rel.z)<<detail;
|
||||
dist = TRANSLUCENT_WRITE_BASE-min(dist, TRANSLUCENT_WRITE_BASE);
|
||||
|
||||
uint drawPtr = atomicAdd(translucentCommandData[dist],1)+TRANSLUCENT_OFFSET;
|
||||
writeCmd(drawPtr, drawId, extractQuadStart(meta), meta.cntA&0xFFFF);
|
||||
}
|
||||
@@ -11,10 +11,8 @@ layout(local_size_x = 128) in;
|
||||
#define POSITION_SCRATCH_BINDING 6
|
||||
#define POSITION_SCRATCH_ACCESS writeonly
|
||||
|
||||
#import <voxy:lod/quad_format.glsl>
|
||||
#import <voxy:lod/gl46/bindings.glsl>
|
||||
#import <voxy:lod/section.glsl>
|
||||
#line 11
|
||||
|
||||
//https://github.com/KhronosGroup/GLSL/blob/master/extensions/ext/GL_EXT_shader_16bit_storage.txt
|
||||
// adds support for uint8_t which can use for compact visibility buffer
|
||||
@@ -35,6 +33,9 @@ layout(binding = STATISTICS_BUFFER_BINDING, std430) restrict buffer statisticsBu
|
||||
uint baseInstance;
|
||||
*/
|
||||
|
||||
layout(binding = TRANSLUCENT_DISTANCE_BUFFER_BINDING, std430) restrict buffer TranslucentCommandCount {
|
||||
uint[] translucentCommandData;
|
||||
};
|
||||
|
||||
//Note: if i want reverse indexing i need to use the index buffer offset to offset
|
||||
void writeCmd(uint idx, uint instance, uint offset, uint quadCount) {
|
||||
@@ -108,8 +109,12 @@ void main() {
|
||||
//Translucency
|
||||
count = meta.cntA&0xFFFF;
|
||||
if (count != 0) {
|
||||
uint translucentCommandPtr = atomicAdd(translucentDrawCount, 1) + TRANSLUCENT_OFFSET;//FIXME: dont hardcode this offset
|
||||
writeCmd(translucentCommandPtr, drawId, ptr, count);
|
||||
uint tp = atomicAdd(translucentDrawCount, 1)+TRANSLUCENT_WRITE_BASE;
|
||||
translucentCommandData[tp] = drawId;
|
||||
uvec3 absRel = abs(relative);
|
||||
uint distToCamera = (absRel.x+absRel.y+absRel.z)<<detail;
|
||||
distToCamera = TRANSLUCENT_WRITE_BASE-min(distToCamera, TRANSLUCENT_WRITE_BASE);
|
||||
atomicAdd(translucentCommandData[distToCamera], 1);
|
||||
#ifdef HAS_STATISTICS
|
||||
totalQuads += count;
|
||||
#endif
|
||||
|
||||
@@ -0,0 +1,57 @@
|
||||
#version 460
|
||||
|
||||
#extension GL_KHR_shader_subgroup_arithmetic: require
|
||||
#extension GL_KHR_shader_subgroup_basic : require
|
||||
|
||||
#define WORK_SIZE 256
|
||||
|
||||
//Does inital parralel prefix sum on batches of WORK_SIZE
|
||||
layout(local_size_x=WORK_SIZE) in;
|
||||
|
||||
layout(binding = IO_BUFFER, std430) restrict buffer InputBuffer {
|
||||
uvec4[] ioCount;
|
||||
};
|
||||
|
||||
shared uint warpPrefixSum[32];//Warps are 32, tricks require full warp
|
||||
|
||||
void main() {
|
||||
warpPrefixSum[gl_SubgroupInvocationID] = 0;
|
||||
barrier();
|
||||
|
||||
//todo
|
||||
//assert(gl_SubgroupSize == 32);
|
||||
//assert(gl_NumSubgroups == (WORK_SIZE>>5));
|
||||
|
||||
uint gid = gl_GlobalInvocationID.x;
|
||||
uvec4 count = uvec4(0);
|
||||
uint sum = 0;
|
||||
{
|
||||
uvec4 dat = ioCount[gid];
|
||||
count.yzw = dat.xyz;
|
||||
count.z += count.y;
|
||||
count.w += count.z;
|
||||
sum = count.w + dat.w;
|
||||
}
|
||||
|
||||
subgroupBarrier();//Wait for all threads in the subgroup to get the buffer
|
||||
|
||||
count += subgroupExclusiveAdd(sum);
|
||||
|
||||
if ((gl_LocalInvocationID.x&31u)==31) {
|
||||
warpPrefixSum[gl_SubgroupID] = count.x+sum;
|
||||
}
|
||||
|
||||
barrier();
|
||||
|
||||
if (gl_SubgroupID == 0) {
|
||||
uint val = warpPrefixSum[gl_SubgroupInvocationID];
|
||||
subgroupBarrier();
|
||||
//Use warp to do entire add in 1 reduction
|
||||
warpPrefixSum[gl_SubgroupInvocationID] = subgroupExclusiveAdd(val);
|
||||
}
|
||||
|
||||
barrier();
|
||||
//Add the computed sum across all threads and warps
|
||||
count += warpPrefixSum[gl_SubgroupID];
|
||||
ioCount[gid] = count;
|
||||
}
|
||||
Reference in New Issue
Block a user