Shuffled around shaders
This commit is contained in:
@@ -1,3 +1,10 @@
|
||||
struct BlockModel {
|
||||
uint faceData[6];
|
||||
uint flagsA;
|
||||
uint colourTint;
|
||||
uint _pad[8];
|
||||
};
|
||||
|
||||
//TODO: FIXME: this isnt actually correct cause depending on the face (i think) it could be 1/64 th of a position off
|
||||
// but im going to assume that since we are dealing with huge render distances, this shouldent matter that much
|
||||
float extractFaceIndentation(uint faceData) {
|
||||
@@ -18,13 +25,13 @@ uint faceHasAlphaCuttoutOverride(uint faceData) {
|
||||
}
|
||||
|
||||
bool modelHasBiomeLUT(BlockModel model) {
|
||||
return ((model.flagsA)&2) != 0;
|
||||
return ((model.flagsA)&2u) != 0;
|
||||
}
|
||||
|
||||
bool modelIsTranslucent(BlockModel model) {
|
||||
return ((model.flagsA)&4) != 0;
|
||||
return ((model.flagsA)&4u) != 0;
|
||||
}
|
||||
|
||||
bool modelHasMipmaps(BlockModel model) {
|
||||
return ((model.flagsA)&8) != 0;
|
||||
return ((model.flagsA)&8u) != 0;
|
||||
}
|
||||
@@ -7,24 +7,6 @@ layout(binding = 0, std140) uniform SceneUniform {
|
||||
vec3 cameraSubPos;
|
||||
};
|
||||
|
||||
struct BlockModel {
|
||||
uint faceData[6];
|
||||
uint flagsA;
|
||||
uint colourTint;
|
||||
uint _pad[8];
|
||||
};
|
||||
|
||||
struct SectionMeta {
|
||||
uint posA;
|
||||
uint posB;
|
||||
uint AABB;
|
||||
uint ptr;
|
||||
uint cntA;
|
||||
uint cntB;
|
||||
uint cntC;
|
||||
uint cntD;
|
||||
};
|
||||
|
||||
//TODO: see if making the stride 2*4*4 bytes or something cause you get that 16 byte write
|
||||
struct DrawCommand {
|
||||
uint count;
|
||||
|
||||
@@ -8,8 +8,8 @@ layout(local_size_x = 128) in;
|
||||
#define SECTION_METADATA_BUFFER_BINDING 3
|
||||
#define INDIRECT_SECTION_LOOKUP_BINDING 4
|
||||
|
||||
#import <voxy:lod/gl46/bindings.glsl>
|
||||
#import <voxy:lod/section.glsl>
|
||||
#import <voxy:lod/gl46/bindings.glsl>
|
||||
|
||||
/*
|
||||
uint count;
|
||||
|
||||
@@ -11,8 +11,8 @@ layout(local_size_x = 128) in;
|
||||
#define POSITION_SCRATCH_BINDING 6
|
||||
#define POSITION_SCRATCH_ACCESS writeonly
|
||||
|
||||
#import <voxy:lod/gl46/bindings.glsl>
|
||||
#import <voxy:lod/section.glsl>
|
||||
#import <voxy:lod/gl46/bindings.glsl>
|
||||
|
||||
//https://github.com/KhronosGroup/GLSL/blob/master/extensions/ext/GL_EXT_shader_16bit_storage.txt
|
||||
// adds support for uint8_t which can use for compact visibility buffer
|
||||
|
||||
@@ -6,8 +6,8 @@
|
||||
#define VISIBILITY_BUFFER_BINDING 2
|
||||
#define INDIRECT_SECTION_LOOKUP_BINDING 3
|
||||
|
||||
#import <voxy:lod/gl46/bindings.glsl>
|
||||
#import <voxy:lod/section.glsl>
|
||||
#import <voxy:lod/gl46/bindings.glsl>
|
||||
|
||||
flat out uint id;
|
||||
flat out uint value;
|
||||
|
||||
@@ -2,7 +2,6 @@
|
||||
#extension GL_ARB_gpu_shader_int64 : enable
|
||||
|
||||
#define QUAD_BUFFER_BINDING 1
|
||||
#define SECTION_METADATA_BUFFER_BINDING 2
|
||||
#define MODEL_BUFFER_BINDING 3
|
||||
#define MODEL_COLOUR_BUFFER_BINDING 4
|
||||
#define POSITION_SCRATCH_BINDING 5
|
||||
@@ -10,8 +9,8 @@
|
||||
|
||||
|
||||
#import <voxy:lod/quad_format.glsl>
|
||||
#import <voxy:lod/gl46/bindings.glsl>
|
||||
#import <voxy:lod/block_model.glsl>
|
||||
#import <voxy:lod/gl46/bindings.glsl>
|
||||
|
||||
//#define DEBUG_RENDER
|
||||
|
||||
|
||||
@@ -9,8 +9,8 @@
|
||||
|
||||
|
||||
#import <voxy:lod/quad_format.glsl>
|
||||
#import <voxy:lod/gl46/bindings.glsl>
|
||||
#import <voxy:lod/block_model.glsl>
|
||||
#import <voxy:lod/gl46/bindings.glsl>
|
||||
|
||||
layout(location = 6) out flat uint quadDebug;
|
||||
|
||||
|
||||
@@ -1,67 +0,0 @@
|
||||
|
||||
layout(binding = HIZ_BINDING_INDEX) uniform sampler2DShadow hizDepthSampler;
|
||||
|
||||
vec3 minBB;
|
||||
vec3 maxBB;
|
||||
vec2 size;
|
||||
|
||||
|
||||
//Sets up screenspace with the given node id, returns true on success false on failure/should not continue
|
||||
//Accesses data that is setup in the main traversal and is just shared to here
|
||||
void setupScreenspace(in UnpackedNode node) {
|
||||
//TODO: implment transform support
|
||||
Transform transform = transforms[getTransformIndex(node)];
|
||||
|
||||
|
||||
vec4 base = VP*vec4(vec3(((node.pos<<node.lodLevel)-camSecPos)<<5)-camSubSecPos, 1);
|
||||
|
||||
//TODO: AABB SIZES not just a max cube
|
||||
|
||||
//vec3 minPos = minSize + basePos;
|
||||
//vec3 maxPos = maxSize + basePos;
|
||||
|
||||
minBB = base.xyz/base.w;
|
||||
maxBB = minBB;
|
||||
|
||||
for (int i = 1; i < 8; i++) {
|
||||
//NOTE!: cant this be precomputed and put in an array?? in the scene uniform??
|
||||
vec4 pPoint = (VP*vec4(vec3((i&1)!=0,(i&2)!=0,(i&4)!=0),1))*(32<<node.lodLevel);//Size of section is 32x32x32 (need to change it to a bounding box in the future)
|
||||
pPoint += base;
|
||||
vec3 point = pPoint.xyz/pPoint.w;
|
||||
//TODO: CLIP TO VIEWPORT
|
||||
minBB = min(minBB, point);
|
||||
maxBB = max(maxBB, point);
|
||||
}
|
||||
|
||||
//TODO: MORE ACCURATLY DETERMIN SCREENSPACE AREA, this can be done by computing and adding
|
||||
// the projected surface area of each face/quad which winding order faces the camera
|
||||
// (this is just the dot product of 2 projected vectors)
|
||||
|
||||
//can do a funny by not doing the perspective divide except on the output of the area
|
||||
|
||||
//printf("Screenspace MIN: %f, %f, %f MAX: %f, %f, %f", minBB.x,minBB.y,minBB.z, maxBB.x,maxBB.y,maxBB.z);
|
||||
|
||||
size = maxBB.xy - minBB.xy;
|
||||
|
||||
}
|
||||
|
||||
//Checks if the node is implicitly culled (outside frustum)
|
||||
bool outsideFrustum() {
|
||||
return any(lessThanEqual(maxBB, vec3(-1f, -1f, 0f))) || any(lessThanEqual(vec3(1f, 1f, 1f), minBB));
|
||||
}
|
||||
|
||||
bool isCulledByHiz() {
|
||||
if (minBB.z < 0) {//Minpoint is behind the camera, its always going to pass
|
||||
return false;
|
||||
}
|
||||
vec2 ssize = size.xy * vec2(ivec2(screenW, screenH));
|
||||
float miplevel = ceil(log2(max(max(ssize.x, ssize.y),1)));
|
||||
vec2 midpoint = (maxBB.xy + minBB.xy)*0.5;
|
||||
return textureLod(hizDepthSampler, vec3(midpoint, minBB.z), miplevel) > 0.0001;
|
||||
}
|
||||
|
||||
//Returns if we should decend into its children or not
|
||||
bool shouldDecend() {
|
||||
//printf("Screen area %f: %f, %f", (size.x*size.y*float(screenW)*float(screenH)), float(screenW), float(screenH));
|
||||
return (size.x*size.y*screenW*screenH) > decendSSS;
|
||||
}
|
||||
@@ -1,262 +0,0 @@
|
||||
#version 460 core
|
||||
|
||||
#define WORKGROUP 4
|
||||
#define MINI_BATCH_SIZE 32
|
||||
//The entire uint is a minibatch (each idx is one)
|
||||
#define MINI_BATCH_MSK (uint(-1))
|
||||
|
||||
//Each y dim is a quadrent in the octree
|
||||
// multiple x dims to fill up workgroups
|
||||
layout(local_size_x=WORKGROUP, local_size_y=8) in;
|
||||
|
||||
layout(binding = 1, std430) restrict buffer RequestSectionLoadQueue {
|
||||
uint counter;
|
||||
uint[] queue;
|
||||
} requestQueue;
|
||||
|
||||
//SectionNodeData is a uvec4 that contains the position + flags + ptr to own render section data + ptr to children
|
||||
layout(binding = 2, std430) restrict readonly buffer SectionNodeData {
|
||||
uvec4[] sectionNodes;
|
||||
};
|
||||
|
||||
layout(binding = 3, std430) restrict buffer ActiveWorkingNodeQueue {
|
||||
uint feedbackStatus;
|
||||
uint batchIndex;
|
||||
uint end;
|
||||
uint start;
|
||||
uint maxSize;//Needs to be a multiple of local_size_x
|
||||
uint[] queue;
|
||||
} nodeQueue;
|
||||
|
||||
|
||||
struct UnpackedNode {
|
||||
ivec4 position;//x,y,z,detail
|
||||
uint flags;//16 bits
|
||||
uint self;
|
||||
uint children;
|
||||
};
|
||||
|
||||
UnpackedNode unpackNode(uvec4 data) {
|
||||
UnpackedNode node;
|
||||
|
||||
return node;
|
||||
}
|
||||
|
||||
//NOTE: this is different to nanite in the fact that if a node is not loaded, too bad dont render
|
||||
|
||||
shared UnpackedNode workingNodes[WORKGROUP];
|
||||
shared uint miniBatchMsk;
|
||||
void loadNode() {
|
||||
if (gl_LocalInvocationIndex == 0) {//Check if we need to
|
||||
batchMsk = 0;//Reset the minibatch
|
||||
if (miniBatchMsk == MINI_BATCH_SIZE) {
|
||||
|
||||
}
|
||||
}
|
||||
barrier();
|
||||
if (gl_LocalInvocationID.y == 0) {
|
||||
|
||||
|
||||
//Need to make it work in y size 8, but only gl_LocalInvocationId.x == 0
|
||||
workingNodes[gl_LocalInvocationID.x] = unpackNode(sectionNodes[id]);
|
||||
}
|
||||
barrier();//Synchonize, also acts as memory barrier
|
||||
}
|
||||
|
||||
|
||||
|
||||
//Computes screensize of the node and whether it should render itself or its children
|
||||
bool shouldRenderChildren(UnpackedNode node) {
|
||||
|
||||
}
|
||||
|
||||
//Process a single node and enqueue child nodes if needed into work queue, enqueue self to render and/or request children to load
|
||||
void processNode(uint id) {//Called even if it doesnt have any work (id==-1) to ensure uniform control flow for barriers
|
||||
|
||||
//Bottom 2 bits are status flags, is air and children loaded
|
||||
// node.flags
|
||||
|
||||
//If the childrenloaded flag is not set, send a request for the children of the node to be loaded
|
||||
// if all the children are loaded but we are not and we need to render, render the children and dispatch
|
||||
// a request to load self
|
||||
|
||||
if (shouldRenderChildren(node)) {
|
||||
//Dont care about
|
||||
} else {
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
//The activly schedualed/acquired work slot for this group
|
||||
shared uint workingBatchIndex;
|
||||
shared uint workingBatchOffset;
|
||||
void process() {
|
||||
if (gl_LocalInvocationIndex == 0) {//This includes both x and y
|
||||
workingBatchIndex = atomicAdd(nodeQueue.batchIndex, BATCH_SIZE);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
void main() {
|
||||
while (true) {
|
||||
barrier();
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
//when a node is processed,
|
||||
// compute its screen bounding box is computed using fast trick (e.g. if your viewing it from a quadrent you already know its bounding points (min/max))
|
||||
// frustum cull, check hiz
|
||||
// if it passes culling, use the screensize to check wether it must render itself
|
||||
// or dispatch its children to render
|
||||
// IF its error is small enough, then render itself, its mesh should always be loaded, if not its a critical error (except maybe if its a top level node or something)
|
||||
// if its error is too large,
|
||||
// check that all children are loaded (or empty), if they are not all loaded, enqueu a request for the cpu to load
|
||||
// that nodes children
|
||||
// if the load queue is full, dont enqueue it to the queue
|
||||
// then instead of rendering children, render its own mesh since it should always be loaded
|
||||
|
||||
//Can also reverse the above slightly and make it so that it checks the children before enqueuing them
|
||||
|
||||
|
||||
//the main thing to worry about is if there is enough work to fill the inital few rounds of this
|
||||
// before amplification takes effect
|
||||
// can do a thing where it initally just blasts child nodes out until the size is small enough
|
||||
|
||||
|
||||
|
||||
// NOTE: since matrix multiplication distributes over addition
|
||||
// can precompute the AABB corners with respect to the matrix
|
||||
// then you can just add a translation vector
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
//TODO: can do in another way
|
||||
// first compute the sections that should either render self or childs
|
||||
// then in as a seperate job queue work though it
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
uint getChildCount(UnpackedNode node) {
|
||||
|
||||
}
|
||||
|
||||
|
||||
//Checks whether a node should be culled based on hiz/frustum
|
||||
bool cullNode(UnpackedNode node) {
|
||||
|
||||
}
|
||||
|
||||
//Should render this node, or recurse to children
|
||||
bool shouldRenderChildrenInstead(UnpackedNode node) {
|
||||
|
||||
}
|
||||
|
||||
//Does the node have its own mesh loaded
|
||||
bool nodeHasSelfMesh(UnpackedNode node) {
|
||||
|
||||
}
|
||||
|
||||
//Does the node its children loaded (note! not child meshes)
|
||||
bool nodeHasChildrenLoaded(UnpackedNode node) {
|
||||
|
||||
}
|
||||
|
||||
//Are all the childrens meshes loaded
|
||||
bool nodeHasChildMeshesLoaded(UnpackedNode node) {
|
||||
|
||||
}
|
||||
|
||||
void request(uint type, uint idx) {
|
||||
|
||||
}
|
||||
|
||||
void renderMesh(uint idx) {
|
||||
|
||||
}
|
||||
|
||||
void enqueueChildren(uint arg, UnpackedNode node) {
|
||||
uint cnt = getChildCount(node);
|
||||
//TODO: the queue needs 2 counters, the pre and post atomic,
|
||||
// pre is incremented to get index
|
||||
// queue is written to
|
||||
// post is then incremented to signal
|
||||
}
|
||||
|
||||
void reportCritical(uint type) {
|
||||
|
||||
}
|
||||
|
||||
void processNode(uint idx) {
|
||||
UnpackedNode node = unpackNode(sectionNodes[idx]);
|
||||
if (!cullNode(node)) {
|
||||
//Should we render children instead of ourselves with respect to screenspace error
|
||||
if (shouldRenderChildrenInstead(node)) {
|
||||
if (nodeHasChildrenLoaded(node)) {
|
||||
//Dispatch nodes to queue
|
||||
enqueueChildren(0, node);
|
||||
} else {
|
||||
//Children arnt loaded so either render self mesh or if we cant
|
||||
// abort basicly must request nodes
|
||||
if (nodeHasSelfMesh(node)) {
|
||||
//Render self and dispatch request to load children
|
||||
renderMesh(node.self);
|
||||
request(1, idx);
|
||||
} else {
|
||||
//Critical issue, no are loaded and self has no mesh
|
||||
reportCritical(0);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if (nodeHasSelfMesh(node)) {
|
||||
//render self
|
||||
renderMesh(node.self);
|
||||
} else {
|
||||
//Request that self mesh is loaded
|
||||
request(0, idx);
|
||||
|
||||
//render children instead
|
||||
if (nodeHasChildrenLoaded(node)) {//Might need to be node nodeHasChildMeshesLoaded
|
||||
enqueueChildren(1, node);
|
||||
} else {
|
||||
//This is very bad, it means cant render anything
|
||||
reportCritical(1);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//Psudo code, one thread, one load
|
||||
void main() {
|
||||
while (true) {
|
||||
//Try to process a node queue entry
|
||||
uint work = atomicAdd(workingNodeQueuePos, 1);
|
||||
uint idx = work&0xFFFFFFu;
|
||||
uint arg = work>>24;
|
||||
if (idx < workingNodeQueueEnd) {
|
||||
|
||||
|
||||
} else {
|
||||
//Do other queue work however we still have the work slot allocated
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,14 +0,0 @@
|
||||
//This provides per scene/viewport/transfrom access, that is, a node can be attached to a specific scene/viewport/transfrom, this is so that
|
||||
// different nodes/models can have different viewports/scenes/transfrom which enables some very cool things like
|
||||
// absolutly massive VS2 structures should... just work :tm: - todd howard
|
||||
|
||||
struct Transform {
|
||||
mat4 transform;
|
||||
ivec4 originPos;
|
||||
ivec4 worldPos;
|
||||
};
|
||||
|
||||
|
||||
layout(binding = TRANSFORM_ARRAY_INDEX, std140) uniform TransformArray {
|
||||
Transform transforms[32];
|
||||
};
|
||||
@@ -1,3 +1,14 @@
|
||||
struct SectionMeta {
|
||||
uint posA;
|
||||
uint posB;
|
||||
uint AABB;
|
||||
uint ptr;
|
||||
uint cntA;
|
||||
uint cntB;
|
||||
uint cntC;
|
||||
uint cntD;
|
||||
};
|
||||
|
||||
uint extractDetail(SectionMeta section) {
|
||||
return section.posA>>28;
|
||||
}
|
||||
@@ -5,7 +16,7 @@ uint extractDetail(SectionMeta section) {
|
||||
ivec3 extractPosition(SectionMeta section) {
|
||||
int y = ((int(section.posA)<<4)>>24);
|
||||
int x = (int(section.posB)<<4)>>8;
|
||||
int z = int((section.posA&((1<<20)-1))<<4);
|
||||
int z = int((section.posA&((1u<<20)-1))<<4);
|
||||
z |= int(section.posB>>28);
|
||||
z <<= 8;
|
||||
z >>= 8;
|
||||
|
||||
Reference in New Issue
Block a user