Merge branch 'mc_1215' into mc_1216

This commit is contained in:
mcrcortex
2025-06-18 08:55:06 +10:00
13 changed files with 163 additions and 157 deletions

View File

@@ -6,7 +6,7 @@ layout(binding = 0) uniform sampler2D depthTex;
void main() {
vec4 depths = textureGather(depthTex, uv, 0); // Get depth values from all surrounding texels.
bvec4 cv = lessThanEqual(vec4(0.999999f), depths);
bvec4 cv = lessThanEqual(vec4(0.999999999f), depths);
if (any(cv)) {//Patch holes (its very dodgy but should work :tm:, should clamp it to the first 3 levels)
depths = mix(vec4(0.0f), depths, cv);
}

View File

@@ -93,10 +93,9 @@ void bubbleSortInital(uint vis, uint id) {
bool shouldSortId(uint id) {
UnpackedNode node;
if (unpackNode(node, gl_GlobalInvocationID.x)==uvec4(-1)) {
if (unpackNode(node, id)==uvec4(-1)) {
return false;//Unallocated node
}
if (isEmptyMesh(node) || (!hasMesh(node))) {//|| (!hasChildren(node))
return false;
}
@@ -105,6 +104,9 @@ bool shouldSortId(uint id) {
return false;//Cannot remove geometry from top level node
}
if (hasRequested(node)) {//If a node has a request its not valid to remove
return false;
}
/*THIS IS COMPLETLY WRONG, we need to check if all the children of the parent of the child are leaf nodes
// not this node
@@ -130,7 +132,7 @@ void main() {
// this means that insertion into the local buffer can be accelerated W.R.T global
for (uint i = 0; i < OPS_PER_THREAD; i++) {
//Copy in with warp size batch fetch
uint id = gl_LocalInvocationID.x + (i*WORK_SIZE);
uint id = (gl_LocalInvocationID.x*OPS_PER_THREAD) + i;
initalSort[id] = minVisIds[id]|(1u<<31);//Flag the id as being external
}
barrier();
@@ -158,7 +160,7 @@ void main() {
//Work size batching
for (uint i = 0; i < OPS_PER_THREAD; i++) {
barrier();//Probably unneeded, was just to keep warp coheriancy
uint id = gl_LocalInvocationID.x+(i*WORK_SIZE);
uint id = (gl_LocalInvocationID.x*OPS_PER_THREAD)+i;
uint sid = initalSort[id];
if ((sid&(1u<<31)) != 0) {
//The flag being external was set, meaning we should NOT insert this element

View File

@@ -12,7 +12,7 @@
// substantually for performance (for both persistent threads and incremental)
layout(binding = HIZ_BINDING) uniform sampler2DShadow hizDepthSampler;
layout(binding = HIZ_BINDING) uniform sampler2D hizDepthSampler;
//TODO: maybe do spher bounds aswell? cause they have different accuracies but are both over estimates (liberals (non conservative xD))
// so can do &&
@@ -39,7 +39,6 @@ bool checkPointInView(vec4 point) {
vec3 minBB = vec3(0.0f);
vec3 maxBB = vec3(0.0f);
vec2 size = vec2(0.0f);
bool insideFrustum = false;
float screenSize = 0.0f;
@@ -117,7 +116,8 @@ void setupScreenspace(in UnpackedNode node) {
minBB = min(min(min(p000, p100), min(p001, p101)), min(min(p010, p110), min(p011, p111)));
maxBB = max(max(max(p000, p100), max(p001, p101)), max(max(p010, p110), max(p011, p111)));
size = clamp(maxBB.xy - minBB.xy, vec2(0), vec2(1));
minBB = clamp(minBB, vec3(0), vec3(1));
maxBB = clamp(maxBB, vec3(0), vec3(1));
}
//Checks if the node is implicitly culled (outside frustum)
@@ -128,32 +128,31 @@ bool outsideFrustum() {
}
bool isCulledByHiz() {
vec2 ssize = size * vec2(screenW, screenH);
float miplevel = log2(max(max(ssize.x, ssize.y),1));
ivec2 ssize = ivec2(1)<<ivec2((packedHizSize>>16)&0xFFFF,packedHizSize&0xFFFF);
vec2 size = (maxBB.xy-minBB.xy)*ssize;
float miplevel = log2(max(max(size.x, size.y),1));
//TODO: make a path for if the miplevel would result in the textureSampler sampling a size of 1
miplevel = floor(miplevel)-1;
miplevel = clamp(miplevel, 0, textureQueryLevels(hizDepthSampler)-1);
int ml = int(miplevel);
ssize = max(ivec2(1), ssize>>ml);
ivec2 mxbb = ivec2(maxBB.xy*ssize);
ivec2 mnbb = ivec2(minBB.xy*ssize);
miplevel = ceil(miplevel);
miplevel = clamp(miplevel, 0, 20);
if (miplevel >= 10.0f) {//Level 9 or 10// TODO: FIX THIS JANK SHIT
//return false;
float pointSample = -1.0f;
//float pointSample2 = 0.0f;
for (int x = mnbb.x; x<=mxbb.x; x++) {
for (int y = mnbb.y; y<=mxbb.y; y++) {
float sp = texelFetch(hizDepthSampler, ivec2(x, y), ml).r;
//pointSample2 = max(sp, pointSample2);
//sp = mix(sp, pointSample, 0.9999999f<=sp);
pointSample = max(sp, pointSample);
}
}
//pointSample = mix(pointSample, pointSample2, pointSample<=0.000001f);
vec2 midpoint = (maxBB.xy + minBB.xy)*0.5f;
float testAgainst = minBB.z;
//the *2.0f-1.0f converts from the 0->1 range to -1->1 range that depth is in (not having this causes tighter bounds, but causes culling issues in caves)
testAgainst = testAgainst*2.0f-1.0f;
bool culled = textureLod(hizDepthSampler, clamp(vec3(midpoint, testAgainst), vec3(0), vec3(1)), miplevel) < 0.0001f;
//printf("HiZ sample point: (%f,%f)@%f against %f", midpoint.x, midpoint.y, miplevel, minBB.z);
//if ((culled) && node22.lodLevel == 0) {
// printf("HiZ sample point: (%f,%f)@%f against %f, value %f", midpoint.x, midpoint.y, miplevel, minBB.z, textureLod(hizDepthSampler, vec3(0.5f,0.5f, 0.000000001f), 9.0f));
//}
return culled;
return pointSample<=minBB.z;
}

View File

@@ -10,13 +10,13 @@ layout(local_size_x=LOCAL_SIZE) in;//, local_size_y=1
layout(binding = SCENE_UNIFORM_BINDING, std140) uniform SceneUniform {
mat4 VP;
ivec3 camSecPos;
float screenW;
int packedHizSize;
vec3 camSubSecPos;
float screenH;
float minSSS;
Frustum frustum;
uint renderQueueMaxSize;
float minSSS;
uint frameId;
uint requestQueueSize;
};
#import <voxy:lod/hierarchical/queue.glsl>
@@ -49,9 +49,9 @@ layout(binding = STATISTICS_BUFFER_BINDING, std430) restrict buffer statisticsBu
void addRequest(inout UnpackedNode node) {
//printf("Put node decend request");
if (!hasRequested(node)) {
if (requestQueueIndex.x < REQUEST_QUEUE_SIZE) {
if (requestQueueIndex.x < requestQueueSize) {//Soft limit
uint atomRes = atomicAdd(requestQueueIndex.x, 1);
if (atomRes < REQUEST_QUEUE_SIZE) {
if (atomRes < MAX_REQUEST_QUEUE_SIZE) {//Hard limit
//Mark node as having a request submitted to prevent duplicate submissions
requestQueue[atomRes] = getRawPos(node);
markRequested(node);