WIP
This commit is contained in:
@@ -1,6 +1,8 @@
|
||||
#version 460 core
|
||||
layout(binding = 0) uniform sampler2D blockModelAtlas;
|
||||
|
||||
//#define DEBUG_RENDER
|
||||
|
||||
//TODO: need to fix when merged quads have discardAlpha set to false but they span multiple tiles
|
||||
// however they are not a full block
|
||||
|
||||
@@ -10,8 +12,11 @@ layout(location = 2) in flat vec4 tinting;
|
||||
layout(location = 3) in flat vec4 addin;
|
||||
layout(location = 4) in flat uint flags;
|
||||
layout(location = 5) in flat vec4 conditionalTinting;
|
||||
//layout(location = 6) in flat vec4 solidColour;
|
||||
|
||||
|
||||
#ifdef DEBUG_RENDER
|
||||
layout(location = 6) in flat uint quadDebug;
|
||||
#endif
|
||||
layout(location = 0) out vec4 outColour;
|
||||
void main() {
|
||||
vec2 uv = mod(uv, vec2(1.0))*(1.0/(vec2(3.0,2.0)*256.0));
|
||||
@@ -29,4 +34,14 @@ void main() {
|
||||
outColour = (colour * tinting) + addin;
|
||||
|
||||
//outColour = vec4(uv + baseUV, 0, 1);
|
||||
|
||||
|
||||
#ifdef DEBUG_RENDER
|
||||
uint hash = quadDebug*1231421+123141;
|
||||
hash ^= hash>>16;
|
||||
hash = hash*1231421+123141;
|
||||
hash ^= hash>>16;
|
||||
hash = hash * 1827364925 + 123325621;
|
||||
outColour = vec4(float(hash&15u)/15, float((hash>>4)&15u)/15, float((hash>>8)&15u)/15, 1);
|
||||
#endif
|
||||
}
|
||||
@@ -1,150 +0,0 @@
|
||||
#version 460 core
|
||||
#extension GL_ARB_gpu_shader_int64 : enable
|
||||
|
||||
#import <voxy:lod/quad_format.glsl>
|
||||
#import <voxy:lod/gl46/bindings.glsl>
|
||||
#import <voxy:lod/block_model.glsl>
|
||||
#line 8
|
||||
|
||||
layout(location = 0) out vec2 uv;
|
||||
layout(location = 1) out flat vec2 baseUV;
|
||||
layout(location = 2) out flat vec4 tinting;
|
||||
layout(location = 3) out flat vec4 addin;
|
||||
layout(location = 4) out flat uint flags;
|
||||
layout(location = 5) out flat vec4 conditionalTinting;
|
||||
//layout(location = 6) out flat vec4 solidColour;
|
||||
|
||||
uint extractLodLevel() {
|
||||
return uint(gl_BaseInstance)>>27;
|
||||
}
|
||||
|
||||
//Note the last 2 bits of gl_BaseInstance are unused
|
||||
//Gives a relative position of +-255 relative to the player center in its respective lod
|
||||
ivec3 extractRelativeLodPos() {
|
||||
return (ivec3(gl_BaseInstance)<<ivec3(5,14,23))>>ivec3(23);
|
||||
}
|
||||
|
||||
vec4 uint2vec4RGBA(uint colour) {
|
||||
return vec4((uvec4(colour)>>uvec4(24,16,8,0))&uvec4(0xFF))/255.0;
|
||||
}
|
||||
|
||||
//Gets the face offset with respect to the face direction (e.g. some will be + some will be -)
|
||||
float getDepthOffset(uint faceData, uint face) {
|
||||
float offset = extractFaceIndentation(faceData);
|
||||
return offset * (1.0-((int(face)&1)*2.0));
|
||||
}
|
||||
|
||||
vec2 getFaceSizeOffset(uint faceData, uint corner) {
|
||||
float EPSILON = 0.001f;
|
||||
vec4 faceOffsetsSizes = extractFaceSizes(faceData);
|
||||
//Expand the quads by a very small amount
|
||||
faceOffsetsSizes.xz -= vec2(EPSILON);
|
||||
faceOffsetsSizes.yw += vec2(EPSILON);
|
||||
return mix(faceOffsetsSizes.xz, faceOffsetsSizes.yw-1.0f, bvec2(((corner>>1)&1u)==1, (corner&1u)==1));
|
||||
}
|
||||
|
||||
//TODO: add a mechanism so that some quads can ignore backface culling
|
||||
// this would help alot with stuff like crops as they would look kinda weird i think,
|
||||
// same with flowers etc
|
||||
void main() {
|
||||
int cornerIdx = gl_VertexID&3;
|
||||
Quad quad = quadData[uint(gl_VertexID)>>2];
|
||||
vec3 innerPos = extractPos(quad);
|
||||
uint face = extractFace(quad);
|
||||
uint modelId = extractStateId(quad);
|
||||
BlockModel model = modelData[modelId];
|
||||
uint faceData = model.faceData[face];
|
||||
bool isTranslucent = modelIsTranslucent(model);
|
||||
bool hasAO = modelHasMipmaps(model);//TODO: replace with per face AO flag
|
||||
bool isShaded = hasAO;//TODO: make this a per face flag
|
||||
//Change the ordering due to backface culling
|
||||
//NOTE: when rendering, backface culling is disabled as we simply dispatch calls for each face
|
||||
// this has the advantage of having "unassigned" geometry, that is geometry where the backface isnt culled
|
||||
//if (face == 0 || (face>>1 != 0 && (face&1)==1)) {
|
||||
// cornerIdx ^= 1;
|
||||
//}
|
||||
|
||||
uint lodLevel = extractLodLevel();
|
||||
ivec3 lodCorner = ((extractRelativeLodPos()<<lodLevel) - (baseSectionPos&(ivec3((1<<lodLevel)-1))))<<5;
|
||||
vec3 corner = innerPos * (1<<lodLevel) + lodCorner;
|
||||
|
||||
vec2 faceOffset = getFaceSizeOffset(faceData, cornerIdx);
|
||||
ivec2 quadSize = extractSize(quad);
|
||||
vec2 respectiveQuadSize = vec2(quadSize * ivec2((cornerIdx>>1)&1, cornerIdx&1));
|
||||
vec2 size = (respectiveQuadSize + faceOffset) * (1<<lodLevel);
|
||||
|
||||
vec3 offset = vec3(size, (float(face&1u) + getDepthOffset(faceData, face)) * (1<<lodLevel));
|
||||
|
||||
if ((face>>1) == 0) { //Up/down
|
||||
offset = offset.xzy;
|
||||
}
|
||||
//Not needed, here for readability
|
||||
//if ((face>>1) == 1) {//north/south
|
||||
// offset = offset.xyz;
|
||||
//}
|
||||
if ((face>>1) == 2) { //west/east
|
||||
offset = offset.zxy;
|
||||
}
|
||||
|
||||
gl_Position = MVP * vec4(corner + offset, 1.0);
|
||||
|
||||
|
||||
//Compute the uv coordinates
|
||||
vec2 modelUV = vec2(modelId&0xFFu, (modelId>>8)&0xFFu)*(1.0/(256.0));
|
||||
//TODO: make the face orientated by 2x3 so that division is not a integer div and modulo isnt needed
|
||||
// as these are very slow ops
|
||||
baseUV = modelUV + (vec2(face>>1, face&1u) * (1.0/(vec2(3.0, 2.0)*256.0)));
|
||||
//TODO: add an option to scale the quad size by the lod level so that
|
||||
// e.g. at lod level 2 a face will have 2x2
|
||||
uv = respectiveQuadSize + faceOffset;//Add in the face offset for 0,0 uv
|
||||
|
||||
flags = faceHasAlphaCuttout(faceData);
|
||||
|
||||
//We need to have a conditional override based on if the model size is < a full face + quadSize > 1
|
||||
flags |= uint(any(greaterThan(quadSize, ivec2(1)))) & faceHasAlphaCuttoutOverride(faceData);
|
||||
|
||||
flags |= uint(!modelHasMipmaps(model))<<1;
|
||||
|
||||
//Compute lighting
|
||||
tinting = getLighting(extractLightId(quad));
|
||||
|
||||
//Apply model colour tinting
|
||||
uint tintColour = model.colourTint;
|
||||
if (modelHasBiomeLUT(model)) {
|
||||
tintColour = colourData[tintColour + extractBiomeId(quad)];
|
||||
}
|
||||
|
||||
conditionalTinting = vec4(0);
|
||||
if (tintColour != uint(-1)) {
|
||||
flags |= 1u<<2;
|
||||
conditionalTinting = uint2vec4RGBA(tintColour).yzwx;
|
||||
}
|
||||
|
||||
addin = vec4(0.0);
|
||||
if (!isTranslucent) {
|
||||
tinting.w = 0.0;
|
||||
//Encode the face, the lod level and
|
||||
uint encodedData = 0;
|
||||
encodedData |= face;
|
||||
encodedData |= (lodLevel<<3);
|
||||
encodedData |= uint(hasAO)<<6;
|
||||
addin.w = float(encodedData)/255.0;
|
||||
}
|
||||
|
||||
//Apply face tint
|
||||
if (isShaded) {
|
||||
if ((face>>1) == 1) {
|
||||
tinting.xyz *= 0.8f;
|
||||
} else if ((face>>1) == 2) {
|
||||
tinting.xyz *= 0.6f;
|
||||
} else if (face == 0){
|
||||
tinting.xyz *= 0.5f;
|
||||
} else {
|
||||
//TODO: FIXME: DONT HAVE SOME ARBITARY TINT LIKE THIS
|
||||
tinting.xyz *= 0.95f;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
//solidColour = vec4(vec3(modelId&0xFu, (modelId>>4)&0xFu, (modelId>>8)&0xFu)*(1f/15f),1f);
|
||||
}
|
||||
@@ -6,13 +6,18 @@
|
||||
#import <voxy:lod/block_model.glsl>
|
||||
#line 8
|
||||
|
||||
//#define DEBUG_RENDER
|
||||
|
||||
layout(location = 0) out vec2 uv;
|
||||
layout(location = 1) out flat vec2 baseUV;
|
||||
layout(location = 2) out flat vec4 tinting;
|
||||
layout(location = 3) out flat vec4 addin;
|
||||
layout(location = 4) out flat uint flags;
|
||||
layout(location = 5) out flat vec4 conditionalTinting;
|
||||
//layout(location = 6) out flat vec4 solidColour;
|
||||
|
||||
#ifdef DEBUG_RENDER
|
||||
layout(location = 6) out flat uint quadDebug;
|
||||
#endif
|
||||
|
||||
uint extractLodLevel() {
|
||||
return uint(gl_BaseInstance)>>27;
|
||||
@@ -143,4 +148,8 @@ void main() {
|
||||
|
||||
vec3 origin = vec3(((extractRelativeLodPos()<<lodLevel) - (baseSectionPos&(ivec3((1<<lodLevel)-1))))<<5);
|
||||
gl_Position = MVP*vec4((cornerPos+swizzelDataAxis(face>>1,vec3(cQuadSize,0)))*(1<<lodLevel)+origin, 1.0);
|
||||
|
||||
#ifdef DEBUG_RENDER
|
||||
quadDebug = lodLevel;
|
||||
#endif
|
||||
}
|
||||
@@ -0,0 +1,20 @@
|
||||
//Use defines and undefines to define the queue, allows for hacky reuse of imports
|
||||
#ifndef QUEUE_NAME
|
||||
#error QUEUE_NAME is not defined
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
|
||||
void push(queue, item) {
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
#
|
||||
@@ -0,0 +1,262 @@
|
||||
#version 460 core
|
||||
|
||||
#define WORKGROUP 4
|
||||
#define MINI_BATCH_SIZE 32
|
||||
//The entire uint is a minibatch (each idx is one)
|
||||
#define MINI_BATCH_MSK (uint(-1))
|
||||
|
||||
//Each y dim is a quadrent in the octree
|
||||
// multiple x dims to fill up workgroups
|
||||
layout(local_size_x=WORKGROUP, local_size_y=8) in;
|
||||
|
||||
layout(binding = 1, std430) restrict buffer RequestSectionLoadQueue {
|
||||
uint counter;
|
||||
uint[] queue;
|
||||
} requestQueue;
|
||||
|
||||
//SectionNodeData is a uvec4 that contains the position + flags + ptr to own render section data + ptr to children
|
||||
layout(binding = 2, std430) restrict readonly buffer SectionNodeData {
|
||||
uvec4[] sectionNodes;
|
||||
};
|
||||
|
||||
layout(binding = 3, std430) restrict buffer ActiveWorkingNodeQueue {
|
||||
uint feedbackStatus;
|
||||
uint batchIndex;
|
||||
uint end;
|
||||
uint start;
|
||||
uint maxSize;//Needs to be a multiple of local_size_x
|
||||
uint[] queue;
|
||||
} nodeQueue;
|
||||
|
||||
|
||||
struct UnpackedNode {
|
||||
ivec4 position;//x,y,z,detail
|
||||
uint flags;//16 bits
|
||||
uint self;
|
||||
uint children;
|
||||
};
|
||||
|
||||
UnpackedNode unpackNode(uvec4 data) {
|
||||
UnpackedNode node;
|
||||
|
||||
return node;
|
||||
}
|
||||
|
||||
//NOTE: this is different to nanite in the fact that if a node is not loaded, too bad dont render
|
||||
|
||||
shared UnpackedNode workingNodes[WORKGROUP];
|
||||
shared uint miniBatchMsk;
|
||||
void loadNode() {
|
||||
if (gl_LocalInvocationIndex == 0) {//Check if we need to
|
||||
batchMsk = 0;//Reset the minibatch
|
||||
if (miniBatchMsk == MINI_BATCH_SIZE) {
|
||||
|
||||
}
|
||||
}
|
||||
barrier();
|
||||
if (gl_LocalInvocationID.y == 0) {
|
||||
|
||||
|
||||
//Need to make it work in y size 8, but only gl_LocalInvocationId.x == 0
|
||||
workingNodes[gl_LocalInvocationID.x] = unpackNode(sectionNodes[id]);
|
||||
}
|
||||
barrier();//Synchonize, also acts as memory barrier
|
||||
}
|
||||
|
||||
|
||||
|
||||
//Computes screensize of the node and whether it should render itself or its children
|
||||
bool shouldRenderChildren(UnpackedNode node) {
|
||||
|
||||
}
|
||||
|
||||
//Process a single node and enqueue child nodes if needed into work queue, enqueue self to render and/or request children to load
|
||||
void processNode(uint id) {//Called even if it doesnt have any work (id==-1) to ensure uniform control flow for barriers
|
||||
|
||||
//Bottom 2 bits are status flags, is air and children loaded
|
||||
// node.flags
|
||||
|
||||
//If the childrenloaded flag is not set, send a request for the children of the node to be loaded
|
||||
// if all the children are loaded but we are not and we need to render, render the children and dispatch
|
||||
// a request to load self
|
||||
|
||||
if (shouldRenderChildren(node)) {
|
||||
//Dont care about
|
||||
} else {
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
//The activly schedualed/acquired work slot for this group
|
||||
shared uint workingBatchIndex;
|
||||
shared uint workingBatchOffset;
|
||||
void process() {
|
||||
if (gl_LocalInvocationIndex == 0) {//This includes both x and y
|
||||
workingBatchIndex = atomicAdd(nodeQueue.batchIndex, BATCH_SIZE);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
void main() {
|
||||
while (true) {
|
||||
barrier();
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
//when a node is processed,
|
||||
// compute its screen bounding box is computed using fast trick (e.g. if your viewing it from a quadrent you already know its bounding points (min/max))
|
||||
// frustum cull, check hiz
|
||||
// if it passes culling, use the screensize to check wether it must render itself
|
||||
// or dispatch its children to render
|
||||
// IF its error is small enough, then render itself, its mesh should always be loaded, if not its a critical error (except maybe if its a top level node or something)
|
||||
// if its error is too large,
|
||||
// check that all children are loaded (or empty), if they are not all loaded, enqueu a request for the cpu to load
|
||||
// that nodes children
|
||||
// if the load queue is full, dont enqueue it to the queue
|
||||
// then instead of rendering children, render its own mesh since it should always be loaded
|
||||
|
||||
//Can also reverse the above slightly and make it so that it checks the children before enqueuing them
|
||||
|
||||
|
||||
//the main thing to worry about is if there is enough work to fill the inital few rounds of this
|
||||
// before amplification takes effect
|
||||
// can do a thing where it initally just blasts child nodes out until the size is small enough
|
||||
|
||||
|
||||
|
||||
// NOTE: since matrix multiplication distributes over addition
|
||||
// can precompute the AABB corners with respect to the matrix
|
||||
// then you can just add a translation vector
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
//TODO: can do in another way
|
||||
// first compute the sections that should either render self or childs
|
||||
// then in as a seperate job queue work though it
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
uint getChildCount(UnpackedNode node) {
|
||||
|
||||
}
|
||||
|
||||
|
||||
//Checks whether a node should be culled based on hiz/frustum
|
||||
bool cullNode(UnpackedNode node) {
|
||||
|
||||
}
|
||||
|
||||
//Should render this node, or recurse to children
|
||||
bool shouldRenderChildrenInstead(UnpackedNode node) {
|
||||
|
||||
}
|
||||
|
||||
//Does the node have its own mesh loaded
|
||||
bool nodeHasSelfMesh(UnpackedNode node) {
|
||||
|
||||
}
|
||||
|
||||
//Does the node its children loaded (note! not child meshes)
|
||||
bool nodeHasChildrenLoaded(UnpackedNode node) {
|
||||
|
||||
}
|
||||
|
||||
//Are all the childrens meshes loaded
|
||||
bool nodeHasChildMeshesLoaded(UnpackedNode node) {
|
||||
|
||||
}
|
||||
|
||||
void request(uint type, uint idx) {
|
||||
|
||||
}
|
||||
|
||||
void renderMesh(uint idx) {
|
||||
|
||||
}
|
||||
|
||||
void enqueueChildren(uint arg, UnpackedNode node) {
|
||||
uint cnt = getChildCount(node);
|
||||
//TODO: the queue needs 2 counters, the pre and post atomic,
|
||||
// pre is incremented to get index
|
||||
// queue is written to
|
||||
// post is then incremented to signal
|
||||
}
|
||||
|
||||
void reportCritical(uint type) {
|
||||
|
||||
}
|
||||
|
||||
void processNode(uint idx) {
|
||||
UnpackedNode node = unpackNode(sectionNodes[idx]);
|
||||
if (!cullNode(node)) {
|
||||
//Should we render children instead of ourselves with respect to screenspace error
|
||||
if (shouldRenderChildrenInstead(node)) {
|
||||
if (nodeHasChildrenLoaded(node)) {
|
||||
//Dispatch nodes to queue
|
||||
enqueueChildren(0, node);
|
||||
} else {
|
||||
//Children arnt loaded so either render self mesh or if we cant
|
||||
// abort basicly must request nodes
|
||||
if (nodeHasSelfMesh(node)) {
|
||||
//Render self and dispatch request to load children
|
||||
renderMesh(node.self);
|
||||
request(1, idx);
|
||||
} else {
|
||||
//Critical issue, no are loaded and self has no mesh
|
||||
reportCritical(0);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if (nodeHasSelfMesh(node)) {
|
||||
//render self
|
||||
renderMesh(node.self);
|
||||
} else {
|
||||
//Request that self mesh is loaded
|
||||
request(0, idx);
|
||||
|
||||
//render children instead
|
||||
if (nodeHasChildrenLoaded(node)) {//Might need to be node nodeHasChildMeshesLoaded
|
||||
enqueueChildren(1, node);
|
||||
} else {
|
||||
//This is very bad, it means cant render anything
|
||||
reportCritical(1);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//Psudo code, one thread, one load
|
||||
void main() {
|
||||
while (true) {
|
||||
//Try to process a node queue entry
|
||||
uint work = atomicAdd(workingNodeQueuePos, 1);
|
||||
uint idx = work&0xFFFFFFu;
|
||||
uint arg = work>>24;
|
||||
if (idx < workingNodeQueueEnd) {
|
||||
|
||||
|
||||
} else {
|
||||
//Do other queue work however we still have the work slot allocated
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user