diff --git a/src/main/java/me/cortex/voxy/client/core/VoxyRenderSystem.java b/src/main/java/me/cortex/voxy/client/core/VoxyRenderSystem.java index 60d935dd..a866648b 100644 --- a/src/main/java/me/cortex/voxy/client/core/VoxyRenderSystem.java +++ b/src/main/java/me/cortex/voxy/client/core/VoxyRenderSystem.java @@ -48,35 +48,40 @@ public class VoxyRenderSystem { //Keep the world loaded, NOTE: this is done FIRST, to keep and ensure that even if the rest of loading takes more // than timeout, we keep the world acquired world.acquireRef(); + try { + //wait for opengl to be finished, this should hopefully ensure all memory allocations are free + glFinish(); + glFinish(); - //wait for opengl to be finished, this should hopefully ensure all memory allocations are free - glFinish();glFinish(); + //Trigger the shared index buffer loading + SharedIndexBuffer.INSTANCE.id(); + Capabilities.init();//Ensure clinit is called - //Trigger the shared index buffer loading - SharedIndexBuffer.INSTANCE.id(); - Capabilities.init();//Ensure clinit is called + this.worldIn = world; + this.renderer = new RenderService(world, threadPool); + this.postProcessing = new PostProcessing(); + int minSec = MinecraftClient.getInstance().world.getBottomSectionCoord() >> 5; + int maxSec = (MinecraftClient.getInstance().world.getTopSectionCoord() - 1) >> 5; - this.worldIn = world; - this.renderer = new RenderService(world, threadPool); - this.postProcessing = new PostProcessing(); - int minSec = MinecraftClient.getInstance().world.getBottomSectionCoord()>>5; - int maxSec = (MinecraftClient.getInstance().world.getTopSectionCoord()-1)>>5; + //Do some very cheeky stuff for MiB + if (false) { + minSec = -8; + maxSec = 7; + } - //Do some very cheeky stuff for MiB - if (false) { - minSec = -8; - maxSec = 7; + this.renderDistanceTracker = new RenderDistanceTracker(20, + minSec, + maxSec, + this.renderer::addTopLevelNode, + this.renderer::removeTopLevelNode); + + this.renderDistanceTracker.setRenderDistance(VoxyConfig.CONFIG.sectionRenderDistance); + + this.chunkBoundRenderer = new ChunkBoundRenderer(); + } catch (RuntimeException e) { + world.releaseRef();//If something goes wrong, we must release the world first + throw e; } - - this.renderDistanceTracker = new RenderDistanceTracker(20, - minSec, - maxSec, - this.renderer::addTopLevelNode, - this.renderer::removeTopLevelNode); - - this.renderDistanceTracker.setRenderDistance(VoxyConfig.CONFIG.sectionRenderDistance); - - this.chunkBoundRenderer = new ChunkBoundRenderer(); } public void setRenderDistance(int renderDistance) { @@ -84,40 +89,6 @@ public class VoxyRenderSystem { } - //private static final ModelTextureBakery mtb = new ModelTextureBakery(16, 16); - //private static final RawDownloadStream downstream = new RawDownloadStream(1<<20); - public void renderSetup(Frustum frustum, Camera camera) { - TimingStatistics.resetSamplers(); - - /* - if (false) { - int allocation = downstream.download(2 * 4 * 6 * 16 * 16, ptr -> { - ColourDepthTextureData[] textureData = new ColourDepthTextureData[6]; - final int FACE_SIZE = 16 * 16; - for (int face = 0; face < 6; face++) { - long faceDataPtr = ptr + (FACE_SIZE * 4) * face * 2; - int[] colour = new int[FACE_SIZE]; - int[] depth = new int[FACE_SIZE]; - - //Copy out colour - for (int i = 0; i < FACE_SIZE; i++) { - //De-interpolate results - colour[i] = MemoryUtil.memGetInt(faceDataPtr + (i * 4 * 2)); - depth[i] = MemoryUtil.memGetInt(faceDataPtr + (i * 4 * 2) + 4); - } - - textureData[face] = new ColourDepthTextureData(colour, depth, 16, 16); - } - if (textureData[0].colour()[0] == 0) { - int a = 0; - } - }); - mtb.renderFacesToStream(Blocks.AIR.getDefaultState(), 123456, false, downstream.getBufferId(), allocation); - downstream.submit(); - downstream.tick(); - }*/ - } - private void autoBalanceSubDivSize() { //only increase quality while there are very few mesh queues, this stops, // e.g. while flying and is rendering alot of low quality chunks @@ -160,6 +131,9 @@ public class VoxyRenderSystem { if (IrisUtil.irisShadowActive()) { return; } + TimingStatistics.resetSamplers(); + + //Do some very cheeky stuff for MiB if (false) { int sector = (((int)Math.floor(cameraX)>>4)+512)>>10; diff --git a/src/main/java/me/cortex/voxy/client/core/rendering/RenderService.java b/src/main/java/me/cortex/voxy/client/core/rendering/RenderService.java index b6fc2339..8df7988b 100644 --- a/src/main/java/me/cortex/voxy/client/core/rendering/RenderService.java +++ b/src/main/java/me/cortex/voxy/client/core/rendering/RenderService.java @@ -56,7 +56,7 @@ public class RenderService, J extends Vi geometryCapacity = Math.min(geometryCapacity, limit); } - //geometryCapacity = 1<<24; + //geometryCapacity = 1<<28; //geometryCapacity = 1<<30;//1GB test return geometryCapacity; } @@ -84,7 +84,7 @@ public class RenderService, J extends Vi this.nodeManager = new AsyncNodeManager(1<<21, this.geometryData, this.renderGen); this.nodeCleaner = new NodeCleaner(this.nodeManager); - this.traversal = new HierarchicalOcclusionTraverser(this.nodeManager, this.nodeCleaner); + this.traversal = new HierarchicalOcclusionTraverser(this.nodeManager, this.nodeCleaner, this.renderGen); world.setDirtyCallback(this.nodeManager::worldEvent); @@ -106,6 +106,18 @@ public class RenderService, J extends Vi this.modelService.tick(budget); } + private boolean frexStillHasWork() { + if (!VoxyClient.isFrexActive()) { + return false; + } + //If frex is running we must tick everything to ensure correctness + UploadStream.INSTANCE.tick(); + //Done here as is allows less gl state resetup + this.modelService.tick(100_000_000); + glFinish(); + return this.nodeManager.hasWork() || this.renderGen.getTaskCount()!=0 || !this.modelService.areQueuesEmpty(); + } + public void renderFarAwayOpaque(J viewport, GlTexture depthBoundTexture) { //LightMapHelper.tickLightmap(); @@ -121,6 +133,13 @@ public class RenderService, J extends Vi this.sectionRenderer.renderOpaque(viewport, depthBoundTexture); TimingStatistics.G.stop(); + { + int depthBuffer = glGetFramebufferAttachmentParameteri(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_FRAMEBUFFER_ATTACHMENT_OBJECT_NAME); + + //Compute the mip chain + viewport.hiZBuffer.buildMipChain(depthBuffer, viewport.width, viewport.height); + } + do { //NOTE: need to do the upload and download tick here, after the section renderer renders the world, to ensure "stable" // sections @@ -157,23 +176,11 @@ public class RenderService, J extends Vi glMemoryBarrier(GL_FRAMEBUFFER_BARRIER_BIT | GL_PIXEL_BUFFER_BARRIER_BIT); - int depthBuffer = glGetFramebufferAttachmentParameteri(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_FRAMEBUFFER_ATTACHMENT_OBJECT_NAME); - //if (depthBuffer == 0) { - // depthBuffer = glGetFramebufferAttachmentParameteri(GL_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_FRAMEBUFFER_ATTACHMENT_OBJECT_NAME); - //} - TimingStatistics.I.start(); - this.traversal.doTraversal(viewport, depthBuffer); + this.traversal.doTraversal(viewport); TimingStatistics.I.stop(); - - if (VoxyClient.isFrexActive()) {//If frex is running we must tick everything to ensure correctness - UploadStream.INSTANCE.tick(); - //Done here as is allows less gl state resetup - this.tickModelService(100_000_000); - glFinish(); - } - } while (VoxyClient.isFrexActive() && (this.nodeManager.hasWork() || this.renderGen.getTaskCount()!=0 || !this.modelService.areQueuesEmpty())); + } while (this.frexStillHasWork()); TimingStatistics.H.start(); diff --git a/src/main/java/me/cortex/voxy/client/core/rendering/hierachical/AsyncNodeManager.java b/src/main/java/me/cortex/voxy/client/core/rendering/hierachical/AsyncNodeManager.java index 344fa886..daaaa499 100644 --- a/src/main/java/me/cortex/voxy/client/core/rendering/hierachical/AsyncNodeManager.java +++ b/src/main/java/me/cortex/voxy/client/core/rendering/hierachical/AsyncNodeManager.java @@ -179,6 +179,7 @@ public class AsyncNodeManager { private void run() { if (this.workCounter.get() <= 0) { + //TODO: here, instead of parking, we can do more work on other sub-tasks such as filtering the mesh build queue LockSupport.park(); if (this.workCounter.get() <= 0 || !this.running) {//No work return; @@ -753,7 +754,7 @@ public class AsyncNodeManager { } public boolean hasWork() { - return this.workCounter.get()!=0 && RESULT_HANDLE.get(this) != null; + return this.workCounter.get()!=0 || RESULT_HANDLE.get(this) != null; } public void worldEvent(WorldSection section, int flags) { diff --git a/src/main/java/me/cortex/voxy/client/core/rendering/hierachical/HierarchicalOcclusionTraverser.java b/src/main/java/me/cortex/voxy/client/core/rendering/hierachical/HierarchicalOcclusionTraverser.java index cc203ad7..35038fb8 100644 --- a/src/main/java/me/cortex/voxy/client/core/rendering/hierachical/HierarchicalOcclusionTraverser.java +++ b/src/main/java/me/cortex/voxy/client/core/rendering/hierachical/HierarchicalOcclusionTraverser.java @@ -7,6 +7,7 @@ import me.cortex.voxy.client.core.gl.GlBuffer; import me.cortex.voxy.client.core.gl.shader.AutoBindingShader; import me.cortex.voxy.client.core.gl.shader.Shader; import me.cortex.voxy.client.core.gl.shader.ShaderType; +import me.cortex.voxy.client.core.rendering.building.RenderGenerationService; import me.cortex.voxy.client.core.rendering.util.PrintfDebugUtil; import me.cortex.voxy.client.core.rendering.util.HiZBuffer; import me.cortex.voxy.client.core.rendering.Viewport; @@ -30,7 +31,7 @@ import static org.lwjgl.opengl.GL45.*; public class HierarchicalOcclusionTraverser { public static final boolean HIERARCHICAL_SHADER_DEBUG = System.getProperty("voxy.hierarchicalShaderDebug", "false").equals("true"); - public static final int REQUEST_QUEUE_SIZE = 50; + public static final int MAX_REQUEST_QUEUE_SIZE = 50; public static final int MAX_QUEUE_SIZE = 200_000; @@ -39,6 +40,7 @@ public class HierarchicalOcclusionTraverser { private final AsyncNodeManager nodeManager; private final NodeCleaner nodeCleaner; + private final RenderGenerationService meshGen; private final GlBuffer requestBuffer; @@ -73,7 +75,7 @@ public class HierarchicalOcclusionTraverser { .defineIf("DEBUG", HIERARCHICAL_SHADER_DEBUG) .define("MAX_ITERATIONS", MAX_ITERATIONS) .define("LOCAL_SIZE_BITS", LOCAL_WORK_SIZE_BITS) - .define("REQUEST_QUEUE_SIZE", REQUEST_QUEUE_SIZE) + .define("MAX_REQUEST_QUEUE_SIZE", MAX_REQUEST_QUEUE_SIZE) .define("HIZ_BINDING", 0) @@ -96,19 +98,18 @@ public class HierarchicalOcclusionTraverser { .compile(); - public HierarchicalOcclusionTraverser(AsyncNodeManager nodeManager, NodeCleaner nodeCleaner) { + public HierarchicalOcclusionTraverser(AsyncNodeManager nodeManager, NodeCleaner nodeCleaner, RenderGenerationService meshGen) { this.nodeCleaner = nodeCleaner; this.nodeManager = nodeManager; - this.requestBuffer = new GlBuffer(REQUEST_QUEUE_SIZE*8L+8).zero(); + this.meshGen = meshGen; + this.requestBuffer = new GlBuffer(MAX_REQUEST_QUEUE_SIZE*8L+8).zero(); this.nodeBuffer = new GlBuffer(nodeManager.maxNodeCount*16L).fill(-1); - glSamplerParameteri(this.hizSampler, GL_TEXTURE_MIN_FILTER, GL_LINEAR_MIPMAP_NEAREST); - glSamplerParameteri(this.hizSampler, GL_TEXTURE_MAG_FILTER, GL_LINEAR); + glSamplerParameteri(this.hizSampler, GL_TEXTURE_MIN_FILTER, GL_NEAREST_MIPMAP_NEAREST); + glSamplerParameteri(this.hizSampler, GL_TEXTURE_MAG_FILTER, GL_NEAREST); glSamplerParameteri(this.hizSampler, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); glSamplerParameteri(this.hizSampler, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); - glSamplerParameteri(this.hizSampler, GL_TEXTURE_COMPARE_MODE, GL_COMPARE_REF_TO_TEXTURE); - glSamplerParameteri(this.hizSampler, GL_TEXTURE_COMPARE_FUNC, GL_LEQUAL); this.traversal .ubo("SCENE_UNIFORM_BINDING", this.uniformBuffer) @@ -175,23 +176,31 @@ public class HierarchicalOcclusionTraverser { viewport.section.getToAddress(ptr); ptr += 4*3; - MemoryUtil.memPutFloat(ptr, viewport.width); ptr += 4; + //MemoryUtil.memPutFloat(ptr, viewport.width); ptr += 4; + MemoryUtil.memPutInt(ptr, viewport.hiZBuffer.getPackedLevels()); ptr += 4; viewport.innerTranslation.getToAddress(ptr); ptr += 4*3; - MemoryUtil.memPutFloat(ptr, viewport.height); ptr += 4; - - setFrustum(viewport, ptr); ptr += 4*4*6; - - MemoryUtil.memPutInt(ptr, (int) (viewport.getRenderList().size()/4-1)); ptr += 4; - + //MemoryUtil.memPutFloat(ptr, viewport.height); ptr += 4; final float screenspaceAreaDecreasingSize = VoxyConfig.CONFIG.subDivisionSize*VoxyConfig.CONFIG.subDivisionSize; //Screen space size for descending MemoryUtil.memPutFloat(ptr, (float) (screenspaceAreaDecreasingSize) /(viewport.width*viewport.height)); ptr += 4; + setFrustum(viewport, ptr); ptr += 4*4*6; + + MemoryUtil.memPutInt(ptr, (int) (viewport.getRenderList().size()/4-1)); ptr += 4; + //VisibilityId MemoryUtil.memPutInt(ptr, this.nodeCleaner.visibilityId); ptr += 4; + + { + final double TARGET_COUNT = 4000;//TODO: make this configurable, or at least dynamically computed based on throughput rate of mesh gen + double iFillness = Math.max(0, (TARGET_COUNT - this.meshGen.getTaskCount()) / TARGET_COUNT); + iFillness = Math.pow(iFillness, 2); + final int requestSize = (int) Math.ceil(iFillness * MAX_REQUEST_QUEUE_SIZE); + MemoryUtil.memPutInt(ptr, Math.max(0, Math.min(MAX_REQUEST_QUEUE_SIZE, requestSize)));ptr += 4; + } } private void bindings(Viewport viewport) { @@ -203,10 +212,7 @@ public class HierarchicalOcclusionTraverser { glBindBufferBase(GL_SHADER_STORAGE_BUFFER, RENDER_QUEUE_BINDING, viewport.getRenderList().id); } - public void doTraversal(Viewport viewport, int depthBuffer) { - //Compute the mip chain - viewport.hiZBuffer.buildMipChain(depthBuffer, viewport.width, viewport.height); - + public void doTraversal(Viewport viewport) { this.uploadUniform(viewport); //UploadStream.INSTANCE.commit(); //Done inside traversal diff --git a/src/main/java/me/cortex/voxy/client/core/rendering/hierachical/NodeManager.java b/src/main/java/me/cortex/voxy/client/core/rendering/hierachical/NodeManager.java index 1e560753..37a27d7c 100644 --- a/src/main/java/me/cortex/voxy/client/core/rendering/hierachical/NodeManager.java +++ b/src/main/java/me/cortex/voxy/client/core/rendering/hierachical/NodeManager.java @@ -1213,7 +1213,8 @@ public class NodeManager { if (!this.nodeData.isNodeGeometryInFlight(nodeId)) { if (!this.watcher.watch(pos, WorldEngine.UPDATE_TYPE_BLOCK_BIT)) { - Logger.info("Node: " + nodeId + " at pos: " + WorldEngine.pprintPos(pos) + " got update request, but geometry was already being watched"); + //Logger.info("Node: " + nodeId + " at pos: " + WorldEngine.pprintPos(pos) + " got update request, but geometry was already being watched"); + this.invalidateNode(nodeId);//Who knows why but just invalidate the data just to keep in sync } else { this.nodeData.markNodeGeometryInFlight(nodeId); } diff --git a/src/main/java/me/cortex/voxy/client/core/rendering/section/MDICSectionRenderer.java b/src/main/java/me/cortex/voxy/client/core/rendering/section/MDICSectionRenderer.java index e1a68ed1..34b89a58 100644 --- a/src/main/java/me/cortex/voxy/client/core/rendering/section/MDICSectionRenderer.java +++ b/src/main/java/me/cortex/voxy/client/core/rendering/section/MDICSectionRenderer.java @@ -172,6 +172,7 @@ public class MDICSectionRenderer extends AbstractSectionRenderer>16)&0xFFFF,packedHizSize&0xFFFF); + vec2 size = (maxBB.xy-minBB.xy)*ssize; + float miplevel = log2(max(max(size.x, size.y),1)); - //TODO: make a path for if the miplevel would result in the textureSampler sampling a size of 1 + miplevel = floor(miplevel)-1; + miplevel = clamp(miplevel, 0, textureQueryLevels(hizDepthSampler)-1); + int ml = int(miplevel); + ssize = max(ivec2(1), ssize>>ml); + ivec2 mxbb = ivec2(maxBB.xy*ssize); + ivec2 mnbb = ivec2(minBB.xy*ssize); - miplevel = ceil(miplevel); - miplevel = clamp(miplevel, 0, 20); - - if (miplevel >= 10.0f) {//Level 9 or 10// TODO: FIX THIS JANK SHIT - //return false; + float pointSample = -1.0f; + //float pointSample2 = 0.0f; + for (int x = mnbb.x; x<=mxbb.x; x++) { + for (int y = mnbb.y; y<=mxbb.y; y++) { + float sp = texelFetch(hizDepthSampler, ivec2(x, y), ml).r; + //pointSample2 = max(sp, pointSample2); + //sp = mix(sp, pointSample, 0.9999999f<=sp); + pointSample = max(sp, pointSample); + } } + //pointSample = mix(pointSample, pointSample2, pointSample<=0.000001f); - vec2 midpoint = (maxBB.xy + minBB.xy)*0.5f; - - float testAgainst = minBB.z; - //the *2.0f-1.0f converts from the 0->1 range to -1->1 range that depth is in (not having this causes tighter bounds, but causes culling issues in caves) - testAgainst = testAgainst*2.0f-1.0f; - - bool culled = textureLod(hizDepthSampler, clamp(vec3(midpoint, testAgainst), vec3(0), vec3(1)), miplevel) < 0.0001f; - - //printf("HiZ sample point: (%f,%f)@%f against %f", midpoint.x, midpoint.y, miplevel, minBB.z); - //if ((culled) && node22.lodLevel == 0) { - // printf("HiZ sample point: (%f,%f)@%f against %f, value %f", midpoint.x, midpoint.y, miplevel, minBB.z, textureLod(hizDepthSampler, vec3(0.5f,0.5f, 0.000000001f), 9.0f)); - //} - return culled; + return pointSample<=minBB.z; } diff --git a/src/main/resources/assets/voxy/shaders/lod/hierarchical/traversal_dev.comp b/src/main/resources/assets/voxy/shaders/lod/hierarchical/traversal_dev.comp index 642a339e..db1faa01 100644 --- a/src/main/resources/assets/voxy/shaders/lod/hierarchical/traversal_dev.comp +++ b/src/main/resources/assets/voxy/shaders/lod/hierarchical/traversal_dev.comp @@ -10,13 +10,13 @@ layout(local_size_x=LOCAL_SIZE) in;//, local_size_y=1 layout(binding = SCENE_UNIFORM_BINDING, std140) uniform SceneUniform { mat4 VP; ivec3 camSecPos; - float screenW; + int packedHizSize; vec3 camSubSecPos; - float screenH; + float minSSS; Frustum frustum; uint renderQueueMaxSize; - float minSSS; uint frameId; + uint requestQueueSize; }; #import @@ -49,9 +49,9 @@ layout(binding = STATISTICS_BUFFER_BINDING, std430) restrict buffer statisticsBu void addRequest(inout UnpackedNode node) { //printf("Put node decend request"); if (!hasRequested(node)) { - if (requestQueueIndex.x < REQUEST_QUEUE_SIZE) { + if (requestQueueIndex.x < requestQueueSize) {//Soft limit uint atomRes = atomicAdd(requestQueueIndex.x, 1); - if (atomRes < REQUEST_QUEUE_SIZE) { + if (atomRes < MAX_REQUEST_QUEUE_SIZE) {//Hard limit //Mark node as having a request submitted to prevent duplicate submissions requestQueue[atomRes] = getRawPos(node); markRequested(node);