implemented gpu traversal i think?
This commit is contained in:
@@ -63,7 +63,10 @@ public class RenderService<T extends AbstractSectionRenderer<J, ?>, J extends Vi
|
|||||||
this.viewportSelector = new ViewportSelector<>(this.sectionRenderer::createViewport);
|
this.viewportSelector = new ViewportSelector<>(this.sectionRenderer::createViewport);
|
||||||
this.renderGen = new RenderGenerationService(world, this.modelService, serviceThreadPool, this.geometryUpdateQueue::push, this.sectionRenderer.getGeometryManager() instanceof IUsesMeshlets);
|
this.renderGen = new RenderGenerationService(world, this.modelService, serviceThreadPool, this.geometryUpdateQueue::push, this.sectionRenderer.getGeometryManager() instanceof IUsesMeshlets);
|
||||||
|
|
||||||
router.setCallbacks(this.renderGen::enqueueTask, this.sectionUpdateQueue::push);
|
router.setCallbacks(this.renderGen::enqueueTask, section -> {
|
||||||
|
section.acquire();
|
||||||
|
this.sectionUpdateQueue.push(section);
|
||||||
|
});
|
||||||
|
|
||||||
this.traversal = new HierarchicalOcclusionTraverser(this.nodeManager, 512);
|
this.traversal = new HierarchicalOcclusionTraverser(this.nodeManager, 512);
|
||||||
|
|
||||||
|
|||||||
@@ -111,7 +111,7 @@ public class HierarchicalOcclusionTraverser {
|
|||||||
MemoryUtil.memPutInt(ptr, viewport.height); ptr += 4;
|
MemoryUtil.memPutInt(ptr, viewport.height); ptr += 4;
|
||||||
|
|
||||||
MemoryUtil.memPutInt(ptr, NodeManager.REQUEST_QUEUE_SIZE); ptr += 4;
|
MemoryUtil.memPutInt(ptr, NodeManager.REQUEST_QUEUE_SIZE); ptr += 4;
|
||||||
MemoryUtil.memPutInt(ptr, 1000000); ptr += 4;
|
MemoryUtil.memPutInt(ptr, (int) (this.renderList.size()/4-1)); ptr += 4;//TODO maybe move this to a #define
|
||||||
|
|
||||||
//Screen space size for descending
|
//Screen space size for descending
|
||||||
MemoryUtil.memPutFloat(ptr, 64*64); ptr += 4;
|
MemoryUtil.memPutFloat(ptr, 64*64); ptr += 4;
|
||||||
@@ -141,10 +141,16 @@ public class HierarchicalOcclusionTraverser {
|
|||||||
this.bindings();
|
this.bindings();
|
||||||
PrintfDebugUtil.bind();
|
PrintfDebugUtil.bind();
|
||||||
|
|
||||||
|
|
||||||
this.traverseInternal(1);
|
this.traverseInternal(1);
|
||||||
|
|
||||||
|
|
||||||
this.downloadResetRequestQueue();
|
this.downloadResetRequestQueue();
|
||||||
|
|
||||||
|
|
||||||
|
//Bind the hiz buffer
|
||||||
|
glBindSampler(0, 0);
|
||||||
|
glBindTextureUnit(0, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
private void traverseInternal(int initialQueueSize) {
|
private void traverseInternal(int initialQueueSize) {
|
||||||
@@ -157,6 +163,10 @@ public class HierarchicalOcclusionTraverser {
|
|||||||
glPixelStorei(GL_UNPACK_SKIP_IMAGES, 0);
|
glPixelStorei(GL_UNPACK_SKIP_IMAGES, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//Clear the render output counter
|
||||||
|
nglClearNamedBufferSubData(this.renderList.id, GL_R32UI, 0, 4, GL_RED_INTEGER, GL_UNSIGNED_INT, 0);
|
||||||
|
|
||||||
|
|
||||||
int firstDispatchSize = (initialQueueSize+(1<<LOCAL_WORK_SIZE_BITS)-1)>>LOCAL_WORK_SIZE_BITS;
|
int firstDispatchSize = (initialQueueSize+(1<<LOCAL_WORK_SIZE_BITS)-1)>>LOCAL_WORK_SIZE_BITS;
|
||||||
/*
|
/*
|
||||||
//prime the queue Todo: maybe move after the traversal? cause then it is more efficient work since it doesnt need to wait for this before starting?
|
//prime the queue Todo: maybe move after the traversal? cause then it is more efficient work since it doesnt need to wait for this before starting?
|
||||||
|
|||||||
@@ -97,5 +97,5 @@ void markRequested(inout UnpackedNode node) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void debugDumpNode(in UnpackedNode node) {
|
void debugDumpNode(in UnpackedNode node) {
|
||||||
//printf("Node %d, %d@[%d,%d,%d], flags: %d, mesh: %d, ChildPtr: %d", node.nodeId, node.lodLevel, node.pos.x, node.pos.y, node.pos.z, node.flags, node.meshPtr, node.childPtr);
|
printf("Node %d, %d@[%d,%d,%d], flags: %d, mesh: %d, ChildPtr: %d", node.nodeId, node.lodLevel, node.pos.x, node.pos.y, node.pos.z, node.flags, node.meshPtr, node.childPtr);
|
||||||
}
|
}
|
||||||
@@ -51,7 +51,7 @@ void pushNode(uint nodeId) {
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
#define SIMPLE_QUEUE(name, binding) layout(binding = binding, std430) restrict buffer name##Struct { \
|
#define SIMPLE_QUEUE(name, bindingIndex) layout(binding = bindingIndex, std430) restrict buffer name##Struct { \
|
||||||
uint name##Index; \
|
uint name##Index; \
|
||||||
uint[] name; \
|
uint[] name; \
|
||||||
};
|
};
|
||||||
@@ -85,6 +85,6 @@ bool isCulledByHiz() {
|
|||||||
|
|
||||||
//Returns if we should decend into its children or not
|
//Returns if we should decend into its children or not
|
||||||
bool shouldDecend() {
|
bool shouldDecend() {
|
||||||
//printf("Screen area %f: %f, %f", (size.x*size.y*float(screenW)*float(screenH)), float(screenW), float(screenH));
|
printf("Screen area %f: %f, %f", (size.x*size.y*float(screenW)*float(screenH)), float(screenW), float(screenH));
|
||||||
return (size.x*size.y*screenW*screenH) > decendSSS;
|
return (size.x*size.y*screenW*screenH) > minSSS;
|
||||||
}
|
}
|
||||||
@@ -5,6 +5,17 @@
|
|||||||
#define LOCAL_SIZE (1<<LOCAL_SIZE_BITS)
|
#define LOCAL_SIZE (1<<LOCAL_SIZE_BITS)
|
||||||
layout(local_size_x=LOCAL_SIZE) in;//, local_size_y=1
|
layout(local_size_x=LOCAL_SIZE) in;//, local_size_y=1
|
||||||
|
|
||||||
|
layout(binding = SCENE_UNIFORM_BINDING, std140) uniform SceneUniform {
|
||||||
|
mat4 VP;
|
||||||
|
ivec3 camSecPos;
|
||||||
|
uint screenW;
|
||||||
|
vec3 camSubSecPos;
|
||||||
|
uint screenH;
|
||||||
|
uint requestQueueMaxSize;
|
||||||
|
uint renderQueueMaxSize;
|
||||||
|
float minSSS;
|
||||||
|
};
|
||||||
|
|
||||||
#import <voxy:lod/hierarchical/queue.glsl>
|
#import <voxy:lod/hierarchical/queue.glsl>
|
||||||
#import <voxy:lod/hierarchical/node.glsl>
|
#import <voxy:lod/hierarchical/node.glsl>
|
||||||
#import <voxy:lod/hierarchical/screenspace.glsl>
|
#import <voxy:lod/hierarchical/screenspace.glsl>
|
||||||
@@ -12,23 +23,90 @@ layout(local_size_x=LOCAL_SIZE) in;//, local_size_y=1
|
|||||||
SIMPLE_QUEUE(requestQueue, REQUEST_QUEUE_BINDING);
|
SIMPLE_QUEUE(requestQueue, REQUEST_QUEUE_BINDING);
|
||||||
SIMPLE_QUEUE(renderQueue, RENDER_QUEUE_BINDING);
|
SIMPLE_QUEUE(renderQueue, RENDER_QUEUE_BINDING);
|
||||||
|
|
||||||
/*
|
void addRequest(inout UnpackedNode node) {
|
||||||
layout(binding = REQUEST_QUEUE_INDEX, std430) restrict buffer RequestQueue {
|
printf("Put node decend request");
|
||||||
uint requestQueueIndex;
|
if (!hasRequested(node)) {
|
||||||
uint[] requestQueue;
|
if (requestQueueIndex < requestQueueMaxSize) {
|
||||||
};
|
//Mark node as having a request submitted to prevent duplicate submissions
|
||||||
|
requestQueue[atomicAdd(requestQueueIndex, 1)] = getId(node);
|
||||||
|
markRequested(node);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
layout(binding = RENDER_QUEUE_INDEX, std430) restrict buffer RenderQueue {
|
void enqueueChildren(in UnpackedNode node) {
|
||||||
uint renderQueueIndex;
|
uint children = getChildCount(node);
|
||||||
uint[] renderQueue;
|
pushNodesInit(children);
|
||||||
};*/
|
uint ptr = getChildPtr(node);
|
||||||
|
for (int i = 0; i < children; i++) {
|
||||||
|
pushNode(ptr+i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void enqueueSelfForRender(in UnpackedNode node) {
|
||||||
|
//printf("render %d@[%d,%d,%d]", node.lodLevel, node.pos.x, node.pos.y, node.pos.z);
|
||||||
|
if ((!isEmptyMesh(node)) && renderQueueIndex < renderQueueMaxSize) {
|
||||||
|
renderQueue[atomicAdd(renderQueueIndex, 1)] = getMesh(node);
|
||||||
|
#ifdef IS_DEBUG
|
||||||
|
debugRenderNodeQueue[atomicAdd(debugRenderNodeQueueIndex, 1)] = node.nodeId;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void traverse(in UnpackedNode node) {
|
||||||
|
//Compute screenspace
|
||||||
|
setupScreenspace(node);
|
||||||
|
debugDumpNode(node);
|
||||||
|
|
||||||
|
if (outsideFrustum() || isCulledByHiz()) {
|
||||||
|
printf("culled");
|
||||||
|
} else {
|
||||||
|
//It is visible, TODO: maybe do a more detailed hiz test? (or make it so that )
|
||||||
|
|
||||||
|
//Only decend if not a root node
|
||||||
|
if (node.lodLevel!=0 && shouldDecend()) {
|
||||||
|
if (hasChildren(node)) {
|
||||||
|
printf("A");
|
||||||
|
enqueueChildren(node);
|
||||||
|
} else {
|
||||||
|
printf("B");
|
||||||
|
addRequest(node);
|
||||||
|
//TODO: use self mesh (is error state if it doesnt have one since all leaf nodes should have a mesh)
|
||||||
|
// Basicly guarenteed to have a mesh, if it doesnt it is very very bad and incorect since its a violation of the graph properties
|
||||||
|
// that all leaf nodes must contain a mesh
|
||||||
|
enqueueSelfForRender(node);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (hasMesh(node)) {
|
||||||
|
printf("C");
|
||||||
|
enqueueSelfForRender(node);
|
||||||
|
} else {
|
||||||
|
printf("D");
|
||||||
|
//!! not ideal, we want to render this mesh but dont have it. If we havent sent a request
|
||||||
|
// then send a request for a mesh for this node.
|
||||||
|
addRequest(node);
|
||||||
|
|
||||||
|
//TODO: Decend into children? maybe add a bitflag saying is bad if the immediate children dont have meshes
|
||||||
|
enqueueChildren(node);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void main() {
|
void main() {
|
||||||
uint node = getCurrentNode();
|
uint nodeId = getCurrentNode();
|
||||||
if (node != SENTINAL_OUT_OF_BOUNDS) {
|
if (nodeId != SENTINAL_OUT_OF_BOUNDS) {
|
||||||
|
//Fetch + decode node
|
||||||
|
UnpackedNode node;
|
||||||
|
unpackNode(node, nodeId);
|
||||||
|
|
||||||
|
traverse(node);
|
||||||
|
|
||||||
|
/*
|
||||||
printf("GID:%d, NODE %d, %d, AA, %d, %d, %d, %d", gl_GlobalInvocationID.x, node, queueIdx, nodeQueueMetadata[queueIdx].x, nodeQueueMetadata[queueIdx].y, nodeQueueMetadata[queueIdx].z, nodeQueueMetadata[queueIdx].w);
|
printf("GID:%d, NODE %d, %d, AA, %d, %d, %d, %d", gl_GlobalInvocationID.x, node, queueIdx, nodeQueueMetadata[queueIdx].x, nodeQueueMetadata[queueIdx].y, nodeQueueMetadata[queueIdx].z, nodeQueueMetadata[queueIdx].w);
|
||||||
pushNodesInit(1);
|
pushNodesInit(1);
|
||||||
pushNode(node);
|
pushNode(node);
|
||||||
|
*/
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Reference in New Issue
Block a user