Added gpu compute memcpy + cpu side timing statistics
This commit is contained in:
@@ -75,6 +75,17 @@ public class TimingStatistics {
|
|||||||
public static TimeSampler dynamic = new TimeSampler();
|
public static TimeSampler dynamic = new TimeSampler();
|
||||||
public static TimeSampler postDynamic = new TimeSampler();
|
public static TimeSampler postDynamic = new TimeSampler();
|
||||||
|
|
||||||
|
public static TimeSampler A = new TimeSampler();
|
||||||
|
public static TimeSampler B = new TimeSampler();
|
||||||
|
public static TimeSampler C = new TimeSampler();
|
||||||
|
public static TimeSampler D = new TimeSampler();
|
||||||
|
|
||||||
|
public static TimeSampler E = new TimeSampler();
|
||||||
|
public static TimeSampler F = new TimeSampler();
|
||||||
|
public static TimeSampler G = new TimeSampler();
|
||||||
|
public static TimeSampler H = new TimeSampler();
|
||||||
|
public static TimeSampler I = new TimeSampler();
|
||||||
|
|
||||||
|
|
||||||
public static void update() {
|
public static void update() {
|
||||||
updateSamplers();
|
updateSamplers();
|
||||||
|
|||||||
@@ -188,20 +188,32 @@ public class VoxyRenderSystem {
|
|||||||
throw new IllegalStateException("Cannot use the default framebuffer as cannot source from it");
|
throw new IllegalStateException("Cannot use the default framebuffer as cannot source from it");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TimingStatistics.E.start();
|
||||||
this.chunkBoundRenderer.render(viewport);
|
this.chunkBoundRenderer.render(viewport);
|
||||||
|
TimingStatistics.E.stop();
|
||||||
|
|
||||||
|
TimingStatistics.F.start();
|
||||||
this.postProcessing.setup(target.textureWidth, target.textureHeight, boundFB);
|
this.postProcessing.setup(target.textureWidth, target.textureHeight, boundFB);
|
||||||
|
TimingStatistics.F.stop();
|
||||||
|
|
||||||
this.renderer.renderFarAwayOpaque(viewport, this.chunkBoundRenderer.getDepthBoundTexture(), startTime);
|
this.renderer.renderFarAwayOpaque(viewport, this.chunkBoundRenderer.getDepthBoundTexture(), startTime);
|
||||||
|
|
||||||
|
|
||||||
|
TimingStatistics.F.start();
|
||||||
//Compute the SSAO of the rendered terrain, TODO: fix it breaking depth or breaking _something_ am not sure what
|
//Compute the SSAO of the rendered terrain, TODO: fix it breaking depth or breaking _something_ am not sure what
|
||||||
this.postProcessing.computeSSAO(viewport.MVP);
|
this.postProcessing.computeSSAO(viewport.MVP);
|
||||||
|
TimingStatistics.F.stop();
|
||||||
|
|
||||||
|
TimingStatistics.G.start();
|
||||||
//We can render the translucent directly after as it is the furthest translucent objects
|
//We can render the translucent directly after as it is the furthest translucent objects
|
||||||
this.renderer.renderFarAwayTranslucent(viewport, this.chunkBoundRenderer.getDepthBoundTexture());
|
this.renderer.renderFarAwayTranslucent(viewport, this.chunkBoundRenderer.getDepthBoundTexture());
|
||||||
|
TimingStatistics.G.stop();
|
||||||
|
|
||||||
|
|
||||||
|
TimingStatistics.F.start();
|
||||||
this.postProcessing.renderPost(projection, matrices.projection(), boundFB);
|
this.postProcessing.renderPost(projection, matrices.projection(), boundFB);
|
||||||
|
TimingStatistics.F.stop();
|
||||||
|
|
||||||
TimingStatistics.main.stop();
|
TimingStatistics.main.stop();
|
||||||
TimingStatistics.postDynamic.start();
|
TimingStatistics.postDynamic.start();
|
||||||
|
|
||||||
@@ -245,6 +257,8 @@ public class VoxyRenderSystem {
|
|||||||
{
|
{
|
||||||
TimingStatistics.update();
|
TimingStatistics.update();
|
||||||
debug.add("Voxy frame runtime (millis): " + TimingStatistics.dynamic.pVal() + ", " + TimingStatistics.main.pVal()+ ", " + TimingStatistics.postDynamic.pVal()+ ", " + TimingStatistics.all.pVal());
|
debug.add("Voxy frame runtime (millis): " + TimingStatistics.dynamic.pVal() + ", " + TimingStatistics.main.pVal()+ ", " + TimingStatistics.postDynamic.pVal()+ ", " + TimingStatistics.all.pVal());
|
||||||
|
debug.add("Extra time: " + TimingStatistics.A.pVal() + ", " + TimingStatistics.B.pVal() + ", " + TimingStatistics.C.pVal() + ", " + TimingStatistics.D.pVal());
|
||||||
|
debug.add("Extra 2 time: " + TimingStatistics.E.pVal() + ", " + TimingStatistics.F.pVal() + ", " + TimingStatistics.G.pVal() + ", " + TimingStatistics.H.pVal() + ", " + TimingStatistics.I.pVal());
|
||||||
}
|
}
|
||||||
PrintfDebugUtil.addToOut(debug);
|
PrintfDebugUtil.addToOut(debug);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -112,7 +112,9 @@ public class RenderService<T extends AbstractSectionRenderer<J, Q>, J extends Vi
|
|||||||
// the section renderer is as it might have different backends, but they all accept a buffer containing the section list
|
// the section renderer is as it might have different backends, but they all accept a buffer containing the section list
|
||||||
|
|
||||||
|
|
||||||
|
TimingStatistics.G.start();
|
||||||
this.sectionRenderer.renderOpaque(viewport, depthBoundTexture);
|
this.sectionRenderer.renderOpaque(viewport, depthBoundTexture);
|
||||||
|
TimingStatistics.G.stop();
|
||||||
|
|
||||||
//NOTE: need to do the upload and download tick here, after the section renderer renders the world, to ensure "stable"
|
//NOTE: need to do the upload and download tick here, after the section renderer renders the world, to ensure "stable"
|
||||||
// sections
|
// sections
|
||||||
@@ -140,8 +142,10 @@ public class RenderService<T extends AbstractSectionRenderer<J, Q>, J extends Vi
|
|||||||
}*/
|
}*/
|
||||||
|
|
||||||
|
|
||||||
|
TimingStatistics.D.start();
|
||||||
//Tick download stream
|
//Tick download stream
|
||||||
DownloadStream.INSTANCE.tick();
|
DownloadStream.INSTANCE.tick();
|
||||||
|
TimingStatistics.D.stop();
|
||||||
|
|
||||||
this.nodeManager.tick(this.traversal.getNodeBuffer(), this.nodeCleaner);
|
this.nodeManager.tick(this.traversal.getNodeBuffer(), this.nodeCleaner);
|
||||||
//glFlush();
|
//glFlush();
|
||||||
@@ -158,10 +162,17 @@ public class RenderService<T extends AbstractSectionRenderer<J, Q>, J extends Vi
|
|||||||
if (depthBuffer == 0) {
|
if (depthBuffer == 0) {
|
||||||
depthBuffer = glGetFramebufferAttachmentParameteri(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_FRAMEBUFFER_ATTACHMENT_OBJECT_NAME);
|
depthBuffer = glGetFramebufferAttachmentParameteri(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_FRAMEBUFFER_ATTACHMENT_OBJECT_NAME);
|
||||||
}
|
}
|
||||||
|
TimingStatistics.I.start();
|
||||||
this.traversal.doTraversal(viewport, depthBuffer);
|
this.traversal.doTraversal(viewport, depthBuffer);
|
||||||
|
TimingStatistics.I.stop();
|
||||||
|
|
||||||
|
TimingStatistics.H.start();
|
||||||
this.sectionRenderer.buildDrawCalls(viewport);
|
this.sectionRenderer.buildDrawCalls(viewport);
|
||||||
|
TimingStatistics.H.stop();
|
||||||
|
|
||||||
|
TimingStatistics.G.start();
|
||||||
this.sectionRenderer.renderTemporal(depthBoundTexture);
|
this.sectionRenderer.renderTemporal(depthBoundTexture);
|
||||||
|
TimingStatistics.G.stop();
|
||||||
}
|
}
|
||||||
|
|
||||||
public void renderFarAwayTranslucent(J viewport, GlTexture depthBoundTexture) {
|
public void renderFarAwayTranslucent(J viewport, GlTexture depthBoundTexture) {
|
||||||
@@ -172,6 +183,7 @@ public class RenderService<T extends AbstractSectionRenderer<J, Q>, J extends Vi
|
|||||||
this.modelService.addDebugData(debug);
|
this.modelService.addDebugData(debug);
|
||||||
this.renderGen.addDebugData(debug);
|
this.renderGen.addDebugData(debug);
|
||||||
this.sectionRenderer.addDebug(debug);
|
this.sectionRenderer.addDebug(debug);
|
||||||
|
this.nodeManager.addDebug(debug);
|
||||||
|
|
||||||
if (RenderStatistics.enabled) {
|
if (RenderStatistics.enabled) {
|
||||||
debug.add("HTC: [" + Arrays.stream(flipCopy(RenderStatistics.hierarchicalTraversalCounts)).mapToObj(Integer::toString).collect(Collectors.joining(", "))+"]");
|
debug.add("HTC: [" + Arrays.stream(flipCopy(RenderStatistics.hierarchicalTraversalCounts)).mapToObj(Integer::toString).collect(Collectors.joining(", "))+"]");
|
||||||
|
|||||||
@@ -1,10 +1,8 @@
|
|||||||
package me.cortex.voxy.client.core.rendering.hierachical;
|
package me.cortex.voxy.client.core.rendering.hierachical;
|
||||||
|
|
||||||
import it.unimi.dsi.fastutil.ints.Int2IntOpenHashMap;
|
import it.unimi.dsi.fastutil.ints.*;
|
||||||
import it.unimi.dsi.fastutil.ints.Int2ObjectOpenHashMap;
|
|
||||||
import it.unimi.dsi.fastutil.ints.IntConsumer;
|
|
||||||
import it.unimi.dsi.fastutil.ints.IntOpenHashSet;
|
|
||||||
import it.unimi.dsi.fastutil.longs.LongOpenHashSet;
|
import it.unimi.dsi.fastutil.longs.LongOpenHashSet;
|
||||||
|
import me.cortex.voxy.client.TimingStatistics;
|
||||||
import me.cortex.voxy.client.core.gl.GlBuffer;
|
import me.cortex.voxy.client.core.gl.GlBuffer;
|
||||||
import me.cortex.voxy.client.core.gl.shader.Shader;
|
import me.cortex.voxy.client.core.gl.shader.Shader;
|
||||||
import me.cortex.voxy.client.core.gl.shader.ShaderType;
|
import me.cortex.voxy.client.core.gl.shader.ShaderType;
|
||||||
@@ -15,12 +13,14 @@ import me.cortex.voxy.client.core.rendering.section.geometry.BasicSectionGeometr
|
|||||||
import me.cortex.voxy.client.core.rendering.section.geometry.IGeometryData;
|
import me.cortex.voxy.client.core.rendering.section.geometry.IGeometryData;
|
||||||
import me.cortex.voxy.client.core.rendering.util.UploadStream;
|
import me.cortex.voxy.client.core.rendering.util.UploadStream;
|
||||||
import me.cortex.voxy.common.Logger;
|
import me.cortex.voxy.common.Logger;
|
||||||
|
import me.cortex.voxy.common.util.AllocationArena;
|
||||||
import me.cortex.voxy.common.util.MemoryBuffer;
|
import me.cortex.voxy.common.util.MemoryBuffer;
|
||||||
import me.cortex.voxy.common.world.WorldSection;
|
import me.cortex.voxy.common.world.WorldSection;
|
||||||
import org.lwjgl.system.MemoryUtil;
|
import org.lwjgl.system.MemoryUtil;
|
||||||
|
|
||||||
import java.lang.invoke.MethodHandles;
|
import java.lang.invoke.MethodHandles;
|
||||||
import java.lang.invoke.VarHandle;
|
import java.lang.invoke.VarHandle;
|
||||||
|
import java.util.List;
|
||||||
import java.util.concurrent.ConcurrentLinkedDeque;
|
import java.util.concurrent.ConcurrentLinkedDeque;
|
||||||
import java.util.concurrent.atomic.AtomicInteger;
|
import java.util.concurrent.atomic.AtomicInteger;
|
||||||
import java.util.concurrent.locks.LockSupport;
|
import java.util.concurrent.locks.LockSupport;
|
||||||
@@ -31,7 +31,6 @@ import static org.lwjgl.opengl.GL30C.glUniform1ui;
|
|||||||
import static org.lwjgl.opengl.GL42C.GL_UNIFORM_BARRIER_BIT;
|
import static org.lwjgl.opengl.GL42C.GL_UNIFORM_BARRIER_BIT;
|
||||||
import static org.lwjgl.opengl.GL42C.glMemoryBarrier;
|
import static org.lwjgl.opengl.GL42C.glMemoryBarrier;
|
||||||
import static org.lwjgl.opengl.GL43C.*;
|
import static org.lwjgl.opengl.GL43C.*;
|
||||||
import static org.lwjgl.opengl.GL44.GL_CLIENT_MAPPED_BUFFER_BARRIER_BIT;
|
|
||||||
|
|
||||||
//TODO: create an "async upload stream", that is, the upload stream is a raw mapped buffer pointer that can be written to
|
//TODO: create an "async upload stream", that is, the upload stream is a raw mapped buffer pointer that can be written to
|
||||||
// which is then synced to the gpu on "render thread sync",
|
// which is then synced to the gpu on "render thread sync",
|
||||||
@@ -68,9 +67,6 @@ public class AsyncNodeManager {
|
|||||||
private volatile SyncResults resultCache1 = new SyncResults();
|
private volatile SyncResults resultCache1 = new SyncResults();
|
||||||
private volatile SyncResults resultCache2 = new SyncResults();
|
private volatile SyncResults resultCache2 = new SyncResults();
|
||||||
|
|
||||||
//Yes. this is stupid. yes. it is a large amount of runtime. Is it profiler bias, probably
|
|
||||||
private final ConcurrentLinkedDeque<MemoryBuffer> buffersToFreeQueue = new ConcurrentLinkedDeque<>();
|
|
||||||
|
|
||||||
|
|
||||||
//locals for during iteration
|
//locals for during iteration
|
||||||
private final IntOpenHashSet tlnIdChange = new IntOpenHashSet();//"Encoded" add/remove id, first bit indicates if its add or remove, 1 is add
|
private final IntOpenHashSet tlnIdChange = new IntOpenHashSet();//"Encoded" add/remove id, first bit indicates if its add or remove, 1 is add
|
||||||
@@ -156,15 +152,14 @@ public class AsyncNodeManager {
|
|||||||
.add(ShaderType.COMPUTE, "voxy:util/scatter.comp")
|
.add(ShaderType.COMPUTE, "voxy:util/scatter.comp")
|
||||||
.compile();
|
.compile();
|
||||||
|
|
||||||
private void run() {
|
private final Shader multiMemcpy = Shader.make()
|
||||||
while (true) {
|
.define("INPUT_HEADER_BUFFER_BINDING", 0)
|
||||||
var buffer = this.buffersToFreeQueue.poll();
|
.define("INPUT_DATA_BUFFER_BINDING", 1)
|
||||||
if (buffer == null) {
|
.define("OUTPUT_BUFFER_BINDING", 2)
|
||||||
break;
|
.add(ShaderType.COMPUTE, "voxy:util/memcpy.comp")
|
||||||
}
|
.compile();
|
||||||
buffer.free();
|
|
||||||
}
|
|
||||||
|
|
||||||
|
private void run() {
|
||||||
if (this.workCounter.get() <= 0) {
|
if (this.workCounter.get() <= 0) {
|
||||||
LockSupport.park();
|
LockSupport.park();
|
||||||
if (this.workCounter.get() <= 0 || !this.running) {//No work
|
if (this.workCounter.get() <= 0 || !this.running) {//No work
|
||||||
@@ -229,7 +224,7 @@ public class AsyncNodeManager {
|
|||||||
job.release();
|
job.release();
|
||||||
} while (true);
|
} while (true);
|
||||||
|
|
||||||
final int UPLOAD_LIMIT = 200;
|
final int UPLOAD_LIMIT = 500;
|
||||||
for (int limit = 0; limit < UPLOAD_LIMIT/2; limit++) //Limit uploading, TODO: limit this by frame sync count, not here
|
for (int limit = 0; limit < UPLOAD_LIMIT/2; limit++) //Limit uploading, TODO: limit this by frame sync count, not here
|
||||||
{
|
{
|
||||||
var job = this.geometryUpdateQueue.poll();
|
var job = this.geometryUpdateQueue.poll();
|
||||||
@@ -357,8 +352,16 @@ public class AsyncNodeManager {
|
|||||||
results.tlnDelta.addAll(this.tlnIdChange);
|
results.tlnDelta.addAll(this.tlnIdChange);
|
||||||
this.tlnIdChange.clear();
|
this.tlnIdChange.clear();
|
||||||
|
|
||||||
results.geometryUploads.putAll(this.geometryManager.getUploads());
|
if (!this.geometryManager.getUploads().isEmpty()){//Put in new data into sync set
|
||||||
this.geometryManager.getUploads().clear();//Put in new data into sync set
|
var iter = this.geometryManager.getUploads().int2ObjectEntrySet().fastIterator();
|
||||||
|
while (iter.hasNext()) {
|
||||||
|
var val = iter.next();
|
||||||
|
results.geometryUpload.upload(val.getIntKey(), val.getValue());
|
||||||
|
val.getValue().free();
|
||||||
|
}
|
||||||
|
this.geometryManager.getUploads().clear();
|
||||||
|
}
|
||||||
|
|
||||||
this.geometryManager.getHeapRemovals().clear();//We dont do removals on new data (as there is "none")
|
this.geometryManager.getHeapRemovals().clear();//We dont do removals on new data (as there is "none")
|
||||||
results.cleanerOperations.addAll(this.cleanerIdResetClear); this.cleanerIdResetClear.clear();
|
results.cleanerOperations.addAll(this.cleanerIdResetClear); this.cleanerIdResetClear.clear();
|
||||||
} else {
|
} else {
|
||||||
@@ -390,10 +393,7 @@ public class AsyncNodeManager {
|
|||||||
var rem = this.geometryManager.getHeapRemovals();
|
var rem = this.geometryManager.getHeapRemovals();
|
||||||
var iter = rem.intIterator();
|
var iter = rem.intIterator();
|
||||||
while (iter.hasNext()) {
|
while (iter.hasNext()) {
|
||||||
var buffer = results.geometryUploads.remove(iter.nextInt());
|
results.geometryUpload.remove(iter.nextInt());
|
||||||
if (buffer != null) {
|
|
||||||
buffer.free();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
rem.clear();
|
rem.clear();
|
||||||
}
|
}
|
||||||
@@ -403,10 +403,8 @@ public class AsyncNodeManager {
|
|||||||
var iter = add.int2ObjectEntrySet().fastIterator();
|
var iter = add.int2ObjectEntrySet().fastIterator();
|
||||||
while (iter.hasNext()) {
|
while (iter.hasNext()) {
|
||||||
var val = iter.next();
|
var val = iter.next();
|
||||||
var prevBuffer = results.geometryUploads.put(val.getIntKey(), val.getValue());
|
results.geometryUpload.upload(val.getIntKey(), val.getValue());
|
||||||
if (prevBuffer != null) {
|
val.getValue().free();
|
||||||
prevBuffer.free();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
add.clear();
|
add.clear();
|
||||||
}
|
}
|
||||||
@@ -450,7 +448,7 @@ public class AsyncNodeManager {
|
|||||||
results.usedGeometry = this.geometryManager.getGeometryUsedBytes();
|
results.usedGeometry = this.geometryManager.getGeometryUsedBytes();
|
||||||
results.currentMaxNodeId = this.manager.getCurrentMaxNodeId();
|
results.currentMaxNodeId = this.manager.getCurrentMaxNodeId();
|
||||||
|
|
||||||
this.needsWaitForSync |= results.geometryUploads.size() > UPLOAD_LIMIT;//Max of 200 uploads per frame :(
|
this.needsWaitForSync |= results.geometryUpload.currentElemCopyAmount*8L > 4L<<20;//4mb limit per frame
|
||||||
|
|
||||||
if (!RESULT_HANDLE.compareAndSet(this, null, results)) {
|
if (!RESULT_HANDLE.compareAndSet(this, null, results)) {
|
||||||
throw new IllegalArgumentException("Should always have null");
|
throw new IllegalArgumentException("Should always have null");
|
||||||
@@ -484,20 +482,35 @@ public class AsyncNodeManager {
|
|||||||
|
|
||||||
store.setSectionCount(results.geometrySectionCount);
|
store.setSectionCount(results.geometrySectionCount);
|
||||||
|
|
||||||
//Do geometry uploads
|
var upload = results.geometryUpload;
|
||||||
if (!results.geometryUploads.isEmpty()) {
|
if (!upload.dataUploadPoints.isEmpty()) {
|
||||||
var iter = results.geometryUploads.int2ObjectEntrySet().fastIterator();
|
TimingStatistics.A.start();
|
||||||
while (iter.hasNext()) {
|
|
||||||
var val = iter.next();
|
int copies = upload.dataUploadPoints.size();
|
||||||
var buffer = val.getValue();
|
int scratchSize = (int) upload.arena.getSize() * 8;
|
||||||
UploadStream.INSTANCE.upload(store.getGeometryBuffer(), Integer.toUnsignedLong(val.getIntKey()) * 8L, buffer);
|
long ptr = UploadStream.INSTANCE.rawUploadAddress(scratchSize + copies * 16);
|
||||||
//Put the queue into the buffer queue to free... yes this is stupid that need todo this...
|
MemoryUtil.memCopy(upload.scratchHeaderBuffer.address, UploadStream.INSTANCE.getBaseAddress() + ptr, copies * 16L);
|
||||||
this.buffersToFreeQueue.add(buffer);//buffer.free();//Free the buffer was uploading
|
MemoryUtil.memCopy(upload.scratchDataBuffer.address, UploadStream.INSTANCE.getBaseAddress() + ptr + copies * 16L, scratchSize);
|
||||||
|
UploadStream.INSTANCE.commit();//Commit the buffer
|
||||||
|
|
||||||
|
this.multiMemcpy.bind();
|
||||||
|
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, 0, UploadStream.INSTANCE.getRawBufferId(), ptr, copies*16L);
|
||||||
|
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, 1, UploadStream.INSTANCE.getRawBufferId(), ptr+copies*16L, scratchSize);
|
||||||
|
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, ((BasicSectionGeometryData) this.geometryData).getGeometryBuffer().id);
|
||||||
|
|
||||||
|
if (copies > 500) {
|
||||||
|
Logger.warn("Large amount of copies, lag will probably happen: " + copies);
|
||||||
}
|
}
|
||||||
UploadStream.INSTANCE.commit();
|
|
||||||
|
glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
|
||||||
|
glDispatchCompute(copies, 1, 1);//Execute the copies
|
||||||
|
glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
|
||||||
|
|
||||||
|
TimingStatistics.A.stop();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TimingStatistics.B.start();
|
||||||
if (!results.scatterWriteLocationMap.isEmpty()) {//Scatter write
|
if (!results.scatterWriteLocationMap.isEmpty()) {//Scatter write
|
||||||
int count = results.scatterWriteLocationMap.size();//Number of writes, not chunks or uvec4 count
|
int count = results.scatterWriteLocationMap.size();//Number of writes, not chunks or uvec4 count
|
||||||
int chunks = (count+3)/4;
|
int chunks = (count+3)/4;
|
||||||
@@ -512,14 +525,17 @@ public class AsyncNodeManager {
|
|||||||
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, nodeBuffer.id);
|
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, nodeBuffer.id);
|
||||||
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, ((BasicSectionGeometryData) this.geometryData).getMetadataBuffer().id);
|
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, ((BasicSectionGeometryData) this.geometryData).getMetadataBuffer().id);
|
||||||
glUniform1ui(0, count);
|
glUniform1ui(0, count);
|
||||||
glMemoryBarrier(GL_UNIFORM_BARRIER_BIT|GL_SHADER_STORAGE_BARRIER_BIT|GL_CLIENT_MAPPED_BUFFER_BARRIER_BIT);
|
glMemoryBarrier(GL_UNIFORM_BARRIER_BIT|GL_SHADER_STORAGE_BARRIER_BIT);
|
||||||
glDispatchCompute((count+127)/128, 1, 1);
|
glDispatchCompute((count+127)/128, 1, 1);
|
||||||
glMemoryBarrier(GL_UNIFORM_BARRIER_BIT|GL_SHADER_STORAGE_BARRIER_BIT);
|
glMemoryBarrier(GL_UNIFORM_BARRIER_BIT|GL_SHADER_STORAGE_BARRIER_BIT);
|
||||||
}
|
}
|
||||||
|
TimingStatistics.B.stop();
|
||||||
|
|
||||||
|
TimingStatistics.C.start();
|
||||||
if (!results.cleanerOperations.isEmpty()) {
|
if (!results.cleanerOperations.isEmpty()) {
|
||||||
cleaner.updateIds(results.cleanerOperations);
|
cleaner.updateIds(results.cleanerOperations);
|
||||||
}
|
}
|
||||||
|
TimingStatistics.C.stop();
|
||||||
|
|
||||||
this.currentMaxNodeId = results.currentMaxNodeId;
|
this.currentMaxNodeId = results.currentMaxNodeId;
|
||||||
this.usedGeometryAmount = results.usedGeometry;
|
this.usedGeometryAmount = results.usedGeometry;
|
||||||
@@ -671,30 +687,28 @@ public class AsyncNodeManager {
|
|||||||
|
|
||||||
if (RESULT_HANDLE.get(this) != null) {
|
if (RESULT_HANDLE.get(this) != null) {
|
||||||
var result = (SyncResults)RESULT_HANDLE.getAndSet(this, null);
|
var result = (SyncResults)RESULT_HANDLE.getAndSet(this, null);
|
||||||
result.geometryUploads.forEach((a,b)->b.free());
|
result.geometryUpload.free();
|
||||||
result.scatterWriteBuffer.free();
|
result.scatterWriteBuffer.free();
|
||||||
}
|
}
|
||||||
|
|
||||||
if (RESULT_CACHE_1_HANDLE.get(this) != null) {//Clear cache 1
|
if (RESULT_CACHE_1_HANDLE.get(this) != null) {//Clear cache 1
|
||||||
var result = (SyncResults)RESULT_CACHE_1_HANDLE.getAndSet(this, null);
|
var result = (SyncResults)RESULT_CACHE_1_HANDLE.getAndSet(this, null);
|
||||||
|
result.geometryUpload.free();
|
||||||
result.scatterWriteBuffer.free();
|
result.scatterWriteBuffer.free();
|
||||||
}
|
}
|
||||||
|
|
||||||
if (RESULT_CACHE_2_HANDLE.get(this) != null) {//Clear cache 2
|
if (RESULT_CACHE_2_HANDLE.get(this) != null) {//Clear cache 2
|
||||||
var result = (SyncResults)RESULT_CACHE_2_HANDLE.getAndSet(this, null);
|
var result = (SyncResults)RESULT_CACHE_2_HANDLE.getAndSet(this, null);
|
||||||
|
result.geometryUpload.free();
|
||||||
result.scatterWriteBuffer.free();
|
result.scatterWriteBuffer.free();
|
||||||
}
|
}
|
||||||
|
|
||||||
this.scatterWrite.free();
|
this.scatterWrite.free();
|
||||||
|
this.multiMemcpy.free();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void addDebug(List<String> debug) {
|
||||||
while (true) {
|
debug.add("UC/GC: " + (this.getUsedGeometryCapacity()/(1<<20))+"/"+(this.getGeometryCapacity()/(1<<20)));
|
||||||
var buffer = this.buffersToFreeQueue.poll();
|
|
||||||
if (buffer == null) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
buffer.free();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
//Results object, which is to be synced between the render thread and worker thread
|
//Results object, which is to be synced between the render thread and worker thread
|
||||||
@@ -714,27 +728,25 @@ public class AsyncNodeManager {
|
|||||||
//Deltas for geometry store
|
//Deltas for geometry store
|
||||||
private int geometrySectionCount;
|
private int geometrySectionCount;
|
||||||
private long usedGeometry;
|
private long usedGeometry;
|
||||||
private final Int2ObjectOpenHashMap<MemoryBuffer> geometryUploads = new Int2ObjectOpenHashMap<>();
|
private final ComputeMemoryCopy geometryUpload = new ComputeMemoryCopy();
|
||||||
|
|
||||||
|
|
||||||
//Scatter writes for both geometry and node metadata
|
//Scatter writes for both geometry and node metadata
|
||||||
private MemoryBuffer scatterWriteBuffer = new MemoryBuffer(8192*2);
|
private MemoryBuffer scatterWriteBuffer = new MemoryBuffer(8192*2);
|
||||||
private final Int2IntOpenHashMap scatterWriteLocationMap = new Int2IntOpenHashMap(1024);
|
private final Int2IntOpenHashMap scatterWriteLocationMap = new Int2IntOpenHashMap(1024);
|
||||||
|
{this.scatterWriteLocationMap.defaultReturnValue(-1);}
|
||||||
|
|
||||||
//Cleaner operations
|
//Cleaner operations
|
||||||
private final IntOpenHashSet cleanerOperations = new IntOpenHashSet();
|
private final IntOpenHashSet cleanerOperations = new IntOpenHashSet();
|
||||||
|
|
||||||
public SyncResults() {
|
|
||||||
this.scatterWriteLocationMap.defaultReturnValue(-1);
|
|
||||||
}
|
|
||||||
|
|
||||||
public void reset() {
|
public void reset() {
|
||||||
this.cleanerOperations.clear();
|
this.cleanerOperations.clear();
|
||||||
this.scatterWriteLocationMap.clear();
|
this.scatterWriteLocationMap.clear();
|
||||||
this.currentMaxNodeId = 0;
|
this.currentMaxNodeId = 0;
|
||||||
this.tlnDelta.clear();
|
this.tlnDelta.clear();
|
||||||
this.geometrySectionCount = 0;
|
this.geometrySectionCount = 0;
|
||||||
this.geometryUploads.clear();
|
this.usedGeometry = 0;
|
||||||
|
this.geometryUpload.reset();
|
||||||
}
|
}
|
||||||
|
|
||||||
//Get or create a scatter write address for the given location
|
//Get or create a scatter write address for the given location
|
||||||
@@ -775,4 +787,142 @@ public class AsyncNodeManager {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static class ComputeMemoryCopy {
|
||||||
|
public int currentElemCopyAmount;
|
||||||
|
private MemoryBuffer scratchHeaderBuffer = new MemoryBuffer(1<<16);
|
||||||
|
private MemoryBuffer scratchDataBuffer = new MemoryBuffer(1<<20);
|
||||||
|
|
||||||
|
private final AllocationArena arena = new AllocationArena();
|
||||||
|
private final Int2IntOpenHashMap dataUploadPoints = new Int2IntOpenHashMap();//Points to the header index
|
||||||
|
{this.dataUploadPoints.defaultReturnValue(-1);}
|
||||||
|
|
||||||
|
|
||||||
|
public void remove(int point) {
|
||||||
|
int header = this.dataUploadPoints.remove(point);
|
||||||
|
if (header == -1) {//No upload for point
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
int size = MemoryUtil.memGetInt(this.scratchHeaderBuffer.address + header*16L + 8L);
|
||||||
|
this.currentElemCopyAmount -= size;
|
||||||
|
//Free the old memory addr from arena
|
||||||
|
if (this.arena.free(MemoryUtil.memGetInt(this.scratchHeaderBuffer.address + header*16L)) != size) {
|
||||||
|
throw new IllegalStateException("Freed memory not same size as expected");
|
||||||
|
}
|
||||||
|
if (MemoryUtil.memGetInt(this.scratchHeaderBuffer.address + header*16L + 4L) != point) {
|
||||||
|
throw new IllegalStateException("Destination not the same as point");
|
||||||
|
}
|
||||||
|
|
||||||
|
//If we were the end upload header, return as we dont need to shuffle
|
||||||
|
if (header == this.dataUploadPoints.size()) {
|
||||||
|
long A = this.scratchHeaderBuffer.address + header*16L;
|
||||||
|
//Zero the memory, for consistancy
|
||||||
|
MemoryUtil.memPutLong(A, 0);
|
||||||
|
MemoryUtil.memPutLong(A+8, 0);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
//Else: we need to move the ending upload header from the end to where the freed point was
|
||||||
|
int endingPoint = MemoryUtil.memGetInt(this.scratchHeaderBuffer.address + this.dataUploadPoints.size()*16L + 4);
|
||||||
|
if (this.dataUploadPoints.get(endingPoint) != this.dataUploadPoints.size()) {
|
||||||
|
throw new IllegalStateException("ending header not pointing at end point");
|
||||||
|
}
|
||||||
|
|
||||||
|
//Move the end header to the old header location
|
||||||
|
long A = this.scratchHeaderBuffer.address + this.dataUploadPoints.size()*16L;
|
||||||
|
long B = this.scratchHeaderBuffer.address + header*16L;
|
||||||
|
MemoryUtil.memPutLong(B, MemoryUtil.memGetLong(A)); MemoryUtil.memPutLong(A, 0);
|
||||||
|
MemoryUtil.memPutLong(B+8, MemoryUtil.memGetLong(A+8)); MemoryUtil.memPutLong(A+8, 0);
|
||||||
|
|
||||||
|
//Update the map
|
||||||
|
this.dataUploadPoints.put(endingPoint, header);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void upload(int point, MemoryBuffer data) {
|
||||||
|
if ((data.size%8)!=0) throw new IllegalStateException("Data must be of size multiple 8");
|
||||||
|
int elemSize = (int) (data.size / 8);
|
||||||
|
int header = this.dataUploadPoints.get(point);
|
||||||
|
if (header != -1) {
|
||||||
|
//If we already have a header location, we just need to reallocate the data
|
||||||
|
long headerPtr = this.scratchHeaderBuffer.address + header*16L;
|
||||||
|
if (MemoryUtil.memGetInt(headerPtr+4L) != point) {
|
||||||
|
throw new IllegalStateException("Existing destination not the point");
|
||||||
|
}
|
||||||
|
int pSize = MemoryUtil.memGetInt(headerPtr+8L);//Previous size
|
||||||
|
if (pSize == elemSize) {
|
||||||
|
//The data we are replacing is the same size, so just overwrite it, this is the easiest
|
||||||
|
data.cpyTo(this.scratchDataBuffer.address+MemoryUtil.memGetInt(headerPtr)*8L);
|
||||||
|
} else {
|
||||||
|
//Dealloc
|
||||||
|
if (this.arena.free(MemoryUtil.memGetInt(headerPtr)) != pSize) {
|
||||||
|
throw new IllegalStateException("Freed allocation not size as expected");
|
||||||
|
}
|
||||||
|
|
||||||
|
this.currentElemCopyAmount -= pSize;
|
||||||
|
this.currentElemCopyAmount += elemSize;
|
||||||
|
|
||||||
|
int alloc = this.allocScratchDataPos(elemSize);//New allocation position
|
||||||
|
//Copy data into position
|
||||||
|
data.cpyTo(this.scratchDataBuffer.address+alloc*8L);
|
||||||
|
|
||||||
|
//Update the header
|
||||||
|
MemoryUtil.memPutInt(headerPtr, alloc);
|
||||||
|
MemoryUtil.memPutInt(headerPtr+8, elemSize);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
//We need to create and allocate a new header for the upload
|
||||||
|
header = this.dataUploadPoints.size();
|
||||||
|
this.dataUploadPoints.put(point, header);
|
||||||
|
|
||||||
|
if (this.scratchHeaderBuffer.size<=header*16L) {
|
||||||
|
//We must resize the header buffer
|
||||||
|
long newSize = Math.max(this.scratchHeaderBuffer.size*2, header*16L);
|
||||||
|
Logger.info("Resizing scratch header buffer to: " + newSize);
|
||||||
|
var newScratch = new MemoryBuffer(newSize);
|
||||||
|
this.scratchHeaderBuffer.cpyTo(newScratch.address);
|
||||||
|
this.scratchHeaderBuffer.free();
|
||||||
|
this.scratchHeaderBuffer = newScratch;
|
||||||
|
}
|
||||||
|
|
||||||
|
long headerPtr = this.scratchHeaderBuffer.address + header*16L;//Header resize has happened so this is a stable address
|
||||||
|
|
||||||
|
this.currentElemCopyAmount += elemSize;
|
||||||
|
|
||||||
|
int alloc = this.allocScratchDataPos(elemSize);//New allocation position
|
||||||
|
//Copy data into position
|
||||||
|
data.cpyTo(this.scratchDataBuffer.address+alloc*8L);
|
||||||
|
|
||||||
|
//Set header data
|
||||||
|
MemoryUtil.memPutInt(headerPtr, alloc);
|
||||||
|
MemoryUtil.memPutInt(headerPtr+4, point);
|
||||||
|
MemoryUtil.memPutInt(headerPtr+8, elemSize);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
//This is done here as it enables easily doing scratch data resizing
|
||||||
|
private int allocScratchDataPos(int size) {
|
||||||
|
int pos = (int) this.arena.alloc(size);
|
||||||
|
if (this.scratchDataBuffer.size <= (pos+size)*8L) {
|
||||||
|
//We must resize :cri:
|
||||||
|
long newSize = Math.max(this.scratchDataBuffer.size*2, (pos+size)*8L);
|
||||||
|
Logger.info("Resizing scratch data buffer to: " + newSize);
|
||||||
|
var newScratch = new MemoryBuffer(newSize);
|
||||||
|
this.scratchDataBuffer.cpyTo(newScratch.address);
|
||||||
|
this.scratchDataBuffer.free();
|
||||||
|
this.scratchDataBuffer = newScratch;
|
||||||
|
}
|
||||||
|
return pos;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void reset() {
|
||||||
|
this.currentElemCopyAmount = 0;
|
||||||
|
this.dataUploadPoints.clear();
|
||||||
|
this.arena.reset();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void free() {
|
||||||
|
this.scratchHeaderBuffer.free(); this.scratchHeaderBuffer = null;
|
||||||
|
this.scratchDataBuffer.free(); this.scratchDataBuffer = null;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
28
src/main/resources/assets/voxy/shaders/util/memcpy.comp
Normal file
28
src/main/resources/assets/voxy/shaders/util/memcpy.comp
Normal file
@@ -0,0 +1,28 @@
|
|||||||
|
#version 460 core
|
||||||
|
#define WORK_SIZE 256
|
||||||
|
layout(local_size_x=WORK_SIZE) in;
|
||||||
|
|
||||||
|
//Header data about destination, size and location of what is being copied (NOTE: can probably make it a uvec2?)
|
||||||
|
layout(binding = INPUT_HEADER_BUFFER_BINDING, std430) restrict readonly buffer InputHeaderBuffer {
|
||||||
|
uvec4[] dataCopyHeader;
|
||||||
|
};
|
||||||
|
|
||||||
|
layout(binding = INPUT_DATA_BUFFER_BINDING, std430) restrict readonly buffer InputDataBuffer {
|
||||||
|
uvec2[] dataInBuffer;
|
||||||
|
};
|
||||||
|
|
||||||
|
layout(binding = OUTPUT_BUFFER_BINDING, std430) restrict writeonly buffer OutputBuffer {
|
||||||
|
uvec2[] outputBuffer;
|
||||||
|
};
|
||||||
|
|
||||||
|
void main() {
|
||||||
|
uvec4 job = dataCopyHeader[gl_WorkGroupID.x];
|
||||||
|
//Copy from input to output
|
||||||
|
uint src = job.x;
|
||||||
|
uint dst = job.y;
|
||||||
|
uint siz = job.z;
|
||||||
|
|
||||||
|
for (uint i = gl_LocalInvocationID.x; i < siz; i+=WORK_SIZE) {
|
||||||
|
outputBuffer[dst+i] = dataInBuffer[src+i];
|
||||||
|
}
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user