diff --git a/src/main/java/me/cortex/voxy/client/core/gl/GlPersistentMappedBuffer.java b/src/main/java/me/cortex/voxy/client/core/gl/GlPersistentMappedBuffer.java index a5f76ee0..2b7d2ab5 100644 --- a/src/main/java/me/cortex/voxy/client/core/gl/GlPersistentMappedBuffer.java +++ b/src/main/java/me/cortex/voxy/client/core/gl/GlPersistentMappedBuffer.java @@ -14,7 +14,7 @@ public class GlPersistentMappedBuffer extends TrackedObject { this.id = glCreateBuffers(); this.size = size; glNamedBufferStorage(this.id, size, GL_CLIENT_STORAGE_BIT|GL_MAP_PERSISTENT_BIT|(flags&(GL_MAP_COHERENT_BIT|GL_MAP_WRITE_BIT|GL_MAP_READ_BIT))); - this.addr = nglMapNamedBufferRange(this.id, 0, size, flags|GL_MAP_PERSISTENT_BIT); + this.addr = nglMapNamedBufferRange(this.id, 0, size, (flags&(GL_MAP_WRITE_BIT|GL_MAP_READ_BIT|GL_MAP_UNSYNCHRONIZED_BIT|GL_MAP_FLUSH_EXPLICIT_BIT))|GL_MAP_PERSISTENT_BIT); } @Override diff --git a/src/main/java/me/cortex/voxy/client/core/gl/shader/GenericsProcessor.java b/src/main/java/me/cortex/voxy/client/core/gl/shader/GenericsProcessor.java new file mode 100644 index 00000000..c81367ed --- /dev/null +++ b/src/main/java/me/cortex/voxy/client/core/gl/shader/GenericsProcessor.java @@ -0,0 +1,12 @@ +package me.cortex.voxy.client.core.gl.shader; + +import java.util.regex.Pattern; + +public class GenericsProcessor implements IShaderProcessor { + private static final Pattern GENERIC_DEFINE = Pattern.compile("#defineGen (?[A-Za-z0-9]+)<(?[A-Za-z0-9]*)>"); + private static final Pattern GENERIC_USE = Pattern.compile("(?[A-Za-z0-9]+)<(?[A-Za-z0-9]*)>"); + @Override + public String process(ShaderType type, String source) { + return null; + } +} diff --git a/src/main/java/me/cortex/voxy/client/core/gl/shader/Shader.java b/src/main/java/me/cortex/voxy/client/core/gl/shader/Shader.java index 63d215b5..5e9a069d 100644 --- a/src/main/java/me/cortex/voxy/client/core/gl/shader/Shader.java +++ b/src/main/java/me/cortex/voxy/client/core/gl/shader/Shader.java @@ -3,8 +3,7 @@ package me.cortex.voxy.client.core.gl.shader; import me.cortex.voxy.common.util.TrackedObject; import org.lwjgl.opengl.GL20C; -import java.util.HashMap; -import java.util.Map; +import java.util.*; import java.util.stream.Collectors; import static org.lwjgl.opengl.GL20.glDeleteProgram; @@ -16,8 +15,15 @@ public class Shader extends TrackedObject { id = program; } - public static Builder make(IShaderProcessor processor) { - return new Builder(processor); + public static Builder make(IShaderProcessor... processors) { + List aa = new ArrayList<>(List.of(processors)); + Collections.reverse(aa); + IShaderProcessor applicator = (type,source)->source; + for (IShaderProcessor processor : processors) { + IShaderProcessor finalApplicator = applicator; + applicator = (type, source) -> finalApplicator.process(type, processor.process(type, source)); + } + return new Builder(applicator); } public static Builder make() { diff --git a/src/main/java/me/cortex/voxy/client/core/rendering/HierarchicalOcclusionRenderer.java b/src/main/java/me/cortex/voxy/client/core/rendering/HierarchicalOcclusionRenderer.java new file mode 100644 index 00000000..d42dc9be --- /dev/null +++ b/src/main/java/me/cortex/voxy/client/core/rendering/HierarchicalOcclusionRenderer.java @@ -0,0 +1,50 @@ +package me.cortex.voxy.client.core.rendering; + +import me.cortex.voxy.client.core.gl.GlBuffer; +import me.cortex.voxy.client.core.gl.shader.Shader; +import me.cortex.voxy.client.core.gl.shader.ShaderType; +import me.cortex.voxy.client.core.rendering.hierarchical.NodeManager; +import me.cortex.voxy.common.util.HierarchicalBitSet; + +import static org.lwjgl.opengl.GL42C.*; +import static org.lwjgl.opengl.GL43C.GL_SHADER_STORAGE_BARRIER_BIT; +import static org.lwjgl.opengl.GL43C.glDispatchCompute; +import static org.lwjgl.opengl.GL44.GL_CLIENT_MAPPED_BUFFER_BARRIER_BIT; + +public class HierarchicalOcclusionRenderer { + private final int workgroup_dispatch_size_x;//The number of workgroups required to saturate the gpu efficiently + private final NodeManager nodeManager = new NodeManager(null); + private final HiZBuffer hiz = new HiZBuffer(); + + + private Shader hiercarchialShader = Shader.make() + .add(ShaderType.COMPUTE, "voxy:lod/hierarchical/selector.comp") + .compile(); + + public HierarchicalOcclusionRenderer(int workgroup_size) { + this.workgroup_dispatch_size_x = workgroup_size; + + } + + private void bind() { + + } + + public void render(int depthBuffer, int width, int height) { + //Make hiz + this.hiz.buildMipChain(depthBuffer, width, height); + //Node upload phase + this.nodeManager.uploadPhase(); + //Node download phase (pulls from previous frame (should maybe result in lower latency)) also clears and resets the queues + this.nodeManager.downloadPhase(); + //Bind all the resources + this.bind(); + //run hierachial selection shader + this.hiercarchialShader.bind(); + //barrier + glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT|GL_UNIFORM_BARRIER_BIT|GL_CLIENT_MAPPED_BUFFER_BARRIER_BIT|GL_FRAMEBUFFER_BARRIER_BIT); + //Emit enough work to fully populate the gpu + glDispatchCompute(this.workgroup_dispatch_size_x, 1, 1); + glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT|GL_COMMAND_BARRIER_BIT|GL_UNIFORM_BARRIER_BIT); + } +} diff --git a/src/main/java/me/cortex/voxy/client/core/rendering/hierarchical/INodeInteractor.java b/src/main/java/me/cortex/voxy/client/core/rendering/hierarchical/INodeInteractor.java new file mode 100644 index 00000000..51e78aec --- /dev/null +++ b/src/main/java/me/cortex/voxy/client/core/rendering/hierarchical/INodeInteractor.java @@ -0,0 +1,15 @@ +package me.cortex.voxy.client.core.rendering.hierarchical; + +import me.cortex.voxy.client.core.rendering.building.BuiltSection; + +import java.util.function.Consumer; + +//Interface for node manager to interact with the outside world +public interface INodeInteractor { + void watchUpdates(long pos);//marks pos as watching for updates, i.e. any LoD updates will trigger a callback + void unwatchUpdates(long pos);//Unmarks a position for updates + + void requestMesh(long pos);//Explicitly requests a mesh at a position, run the callback + + void setMeshUpdateCallback(Consumer mesh); +} diff --git a/src/main/java/me/cortex/voxy/client/core/rendering/hierarchical/ITrimInterface.java b/src/main/java/me/cortex/voxy/client/core/rendering/hierarchical/ITrimInterface.java new file mode 100644 index 00000000..f69118a1 --- /dev/null +++ b/src/main/java/me/cortex/voxy/client/core/rendering/hierarchical/ITrimInterface.java @@ -0,0 +1,12 @@ +package me.cortex.voxy.client.core.rendering.hierarchical; + +public interface ITrimInterface { + //Last recorded/known use time of a nodes mesh, returns -1 if node doesnt have a mesh + int lastUsedTime(int node); + + //Returns an integer with the bottom 24 bits being the ptr top 8 bits being count or something + int getChildren(int node); + + //Returns a size of the nodes mesh, -1 if the node doesnt have a mesh + int getNodeSize(int node); +} diff --git a/src/main/java/me/cortex/voxy/client/core/rendering/hierarchical/NodeLoadSystem.java b/src/main/java/me/cortex/voxy/client/core/rendering/hierarchical/NodeLoadSystem.java new file mode 100644 index 00000000..e344a4ba --- /dev/null +++ b/src/main/java/me/cortex/voxy/client/core/rendering/hierarchical/NodeLoadSystem.java @@ -0,0 +1,5 @@ +package me.cortex.voxy.client.core.rendering.hierarchical; + +//Uses a persistently mapped coherient buffer with off thread polling to pull in requests +public class NodeLoadSystem { +} diff --git a/src/main/java/me/cortex/voxy/client/core/rendering/hierarchical/NodeManager.java b/src/main/java/me/cortex/voxy/client/core/rendering/hierarchical/NodeManager.java new file mode 100644 index 00000000..04ec929e --- /dev/null +++ b/src/main/java/me/cortex/voxy/client/core/rendering/hierarchical/NodeManager.java @@ -0,0 +1,219 @@ +package me.cortex.voxy.client.core.rendering.hierarchical; + +import it.unimi.dsi.fastutil.longs.Long2IntOpenHashMap; +import me.cortex.voxy.client.core.gl.GlBuffer; +import me.cortex.voxy.client.core.rendering.building.BuiltSection; +import me.cortex.voxy.client.core.rendering.util.DownloadStream; +import me.cortex.voxy.common.util.HierarchicalBitSet; +import org.lwjgl.system.MemoryUtil; + +import static org.lwjgl.opengl.GL11.GL_UNSIGNED_INT; +import static org.lwjgl.opengl.GL30.GL_R32UI; +import static org.lwjgl.opengl.GL30C.GL_RED_INTEGER; +import static org.lwjgl.opengl.GL45.nglClearNamedBufferSubData; + +public class NodeManager { + public static final int MAX_NODE_COUNT = 1<<22; + public static final int MAX_REQUESTS = 1024; + private final HierarchicalBitSet bitSet = new HierarchicalBitSet(MAX_NODE_COUNT); + private final GlBuffer nodeBuffer = new GlBuffer(MAX_NODE_COUNT*16);//Node size is 16 bytes + + //TODO: maybe make this a coherent persistent mapped read buffer, instead of download synced buffer copy thing + + //a request payload is a single uint, first 8 bits are flags followed by 24 bit node identifier + // (e.g. load child nodes, load child nodes + meshs, load self meshes ) + private final int REQUEST_QUEUE_SIZE = 4 + MAX_REQUESTS * 4;//TODO: add a priority system + private final GlBuffer requestQueue = new GlBuffer(4 + MAX_REQUESTS * 4); + + //Buffer containing the index of the root nodes + private final GlBuffer roots = new GlBuffer(1024*4); + + + + //500mb TODO: SEE IF CAN SHRINK IT BY EITHER NOT NEEDING AS MUCH SPACE or reducing max node count + private final long[] localNodes = new long[MAX_NODE_COUNT * 3];//1.5x the size of the gpu copy to store extra metadata + //LocalNodes have an up value pointing to the parent, enabling full traversal + + private final INodeInteractor interactor; + + public NodeManager(INodeInteractor interactor) { + this.interactor = interactor; + this.pos2meshId.defaultReturnValue(NO_NODE); + } + + //Returns true if it has its own mesh loaded + private static boolean nodeHasMeshLoaded(long metaA, long metaB) { + return false; + } + + private static final int REQUEST_SELF = 0; + private static final int REQUEST_CHILDREN = 1; + //A node can be loaded in the tree but have no mesh associated with it + // this is so that higher level nodes dont waste mesh space + + + //The reason that nodes have both child and own mesh pointers + // is so that on an edge of the screen or when moving, nodes arnt constantly being swapped back and forth + // it basicly acts as an inline cache :tm: however it does present some painpoints + // especially in managing the graph + + //It might be easier to have nodes strictly either point to child nodes or meshes + // if a parent needs to be rendered instead of the child, request for node change to self + // while this will generate a shitton more requests it should be alot easier to manage graph wise + // can probably add a caching service via a compute shader that ingests a request list + // sees if the requested nodes are already cached, if so swap them in, otherwise dispatch a request + // to cpu + + private void processRequestQueue(long ptr, long size) { + int count = MemoryUtil.memGetInt(ptr); ptr += 4; + for (int i = 0; i < count; i++) { + int request = MemoryUtil.memGetInt(ptr + i*4L); + int args = request&(0xFF<<24); + int nodeId = request&(0xFFFFFF); + + long pos = this.localNodes[nodeId*3]; + long metaA = this.localNodes[nodeId*3 + 1]; + long metaB = this.localNodes[nodeId*3 + 2]; + + int type = args&0b11;//2 bits for future request types such as parent and ensure stable (i.e. both parent and child loaded) + if (type == REQUEST_SELF) { + //Requires own mesh loaded (it can have 2 different priorites, it can fallback to using its children to render if they are loaded) + // else it is critical priority + if (nodeHasMeshLoaded(metaA, metaB)) { + throw new IllegalStateException("Node requested a mesh load, but mesh is already loaded: " + pos); + } + + //watch the mesh and request it + this.interactor.watchUpdates(pos); + this.interactor.requestMesh(pos); + + } else if (type == REQUEST_CHILDREN) { + //Node requires children to be loaded NOTE: when this is the case, it doesnt just mean the nodes, + // it means the meshes aswell, + // meshes may be unloaded later + + //when this case is hit it means that the child nodes arnt even loaded, so it becomes a bit more complex + // basicly, need to request all child nodes be loaded in a batch + // then in the upload tick need to do update many things + + } else { + throw new IllegalArgumentException("Unknown update type: " + type + " @pos:" + pos); + } + + } + } + + + public void uploadPhase() { + //All uploads + + //Have a set of upload tasks for nodes, + // this could include updating the mesh ptr + // or child ptr or uploading new nodes + // NOTE: when uploading a set of new nodes (must be clustered as children) + // have to update parent + // same when removing a set of children + + //Note: child node upload tasks need to all be complete before they can be uploaded + + + //The way the graph works and can be cut is that all the leaf nodes _must_ at all times contain a mesh + // this is critical to prevent "cracks"/no geometry being rendered + // when the render mesh buffer is "full" (or even just periodicly), trimming of the tree must occur to keep + // size within reason + //Note tho that there is a feedback delay and such so geometry buffer should probably be trimmed when it reaches + // 80-90% capacity so that new geometry can still be uploaded without being blocked on geometry clearing + // it becomes a critical error if the geometry buffer becomes full while the tree is fully trimmed + //NOTE: while trimming the tree, need to also trim the parents down i.e. the top level should really not have its mesh + // loaded while it isnt really ever used + // however as long as the rule that all leaf nodes have a mesh loaded is held then there should never be + // any geometry holes + } + + + //Download and upload point, called once per frame + public void downloadPhase() { + DownloadStream.INSTANCE.download(this.requestQueue, 0, REQUEST_QUEUE_SIZE, this::processRequestQueue); + DownloadStream.INSTANCE.commit(); + //Clear the queue counter, TODO: maybe do it some other way to batch clears + nglClearNamedBufferSubData(this.requestQueue.id, GL_R32UI, 0, 4, GL_RED_INTEGER, GL_UNSIGNED_INT, 0); + //TODO: compute cleanup here of loaded nodes, and what needs to be uploaded + // i.e. if there is more upload stuff than there is free memory, cull nodes in the tree + // to fit upload points, can also create errors if all nodes in the tree are requested but no memory to put + } + + + + + + //Inserts a top level node into the graph, it has geometry and no children loaded as it is a leaf node + public void insertTopLevelNode(long position) { + + } + + //Removes a top level node from the graph, doing so also removes all child nodes and associate geometry + // the allocated slots when removing nodes are stored and roped off until it is guarenteed that all requests have + // passed + public void removeTopLevelNode(long position) { + + } + + + + //Tracking for nodes that specifically need meshes, if a node doesnt have or doesnt need a mesh node, it is not in the map + // the map should be identical to the currently watched set of sections + //NOTE: that if the id is negative its part of a mesh request + private final Long2IntOpenHashMap pos2meshId = new Long2IntOpenHashMap(); + private static final int NO_NODE = -1; + + //Need to make this system attatched with a batched worker system, since a mesh update can be a few things + // it can be a mesh update of a tracked render section, in this case we must ensure that it is still tracked and hasnt been removed bla bla bla + // if its still valid and tracked then upload it and update the node aswell ensuring sync bla bla bla + // if it was part of a request, then we need to first check that the request still exists and hasnt been discarded B) probably upload it immediatly still + // B) set the request with that section to have been, well, uploaded and the mesh set, (note if the mesh was updated while a request was inprogress/other requests not fufilled, need to remove the old and replace with the updated) + // if all the meshes in the request are satisfied, upload the request nodes and update its parent + // NOTE! batch requests where this is needed are only strictly required when children are requested in order to guarentee that all + // propertiy of leaf nodes must have meshes remains + //(TODO: see when sync with main thread should be, in the renderer or here since the updates are dispatched offthread) + // Note that the geometry buffer should have idk 20% free? that way meshes can always be inserted (same for the node buffer ig) maybe 10%? idk need to experiement + // if the buffer goes over this threshold, the tree/graph culler must start culling last/least used nodes somehow + // it should be an error if the geometry or node buffer fills up but there are no nodes/meshes to cull/remove + public void meshUpdate(BuiltSection mesh) { + int id = this.pos2meshId.get(mesh.position); + if (id == NO_NODE) { + //The built mesh section is no longer needed, discard it + // TODO: could probably?? cache the mesh in ram that way if its requested? it can be immediatly fetched while a newer mesh is built?? + mesh.free(); + return; + } + if ((id&(1<<31))!=0) { + //The mesh is part of a batched request + id = id^(1<<31); + + } else { + //The mesh is an update for an existing node + //this.localNodes[id*3] + } + } + + + //A node has a position (64 bit) + // a ptr to its own mesh (24 bit) + // a ptr to children nodes (24 bit) + // flags (16 bit) + // Total of 128 bits (16 bytes) + + //First 2 flag bits are a requested dispatch type (0 meaning no request and the 3 remaining states for different request types) + // this ensures that over multiple frames the same node is not requested + + //Bits exist for whether or not the children have meshes loaded or if the parents have meshes loaded + // the idea is to keep +-1 lod meshes loaded into vram to enable seemless transitioning + // the only critical state is that if a mesh wants to be rendered it should be able to be rendered + + //Basicly, there are multiple things, it depends on the screensize error + // if a node is close to needing its children loaded but they arnt, then request it but with a lower priority + // if a node must need its children then request at a high prioirty + // if a node doesnt have a mesh but all its children do than dispatch a medium priority to have its own mesh loaded + // but then just use the child meshes for rendering + +} diff --git a/src/main/java/me/cortex/voxy/client/core/rendering/hierarchical/NodeManager2.java b/src/main/java/me/cortex/voxy/client/core/rendering/hierarchical/NodeManager2.java new file mode 100644 index 00000000..e6249b58 --- /dev/null +++ b/src/main/java/me/cortex/voxy/client/core/rendering/hierarchical/NodeManager2.java @@ -0,0 +1,103 @@ +package me.cortex.voxy.client.core.rendering.hierarchical; + +import it.unimi.dsi.fastutil.longs.Long2IntOpenHashMap; +import me.cortex.voxy.client.core.rendering.building.BuiltSection; +import me.cortex.voxy.client.core.rendering.util.MarkedObjectList; + +public class NodeManager2 { + //A request for making a new child nodes + private static final class LeafRequest { + //LoD position identifier + public long position; + + //Node id of the node the leaf request is for, note! While there is a leaf request, the node should not be unloaded or removed + public int nodeId; + + //The mask of what child nodes are required + public byte requiredChildMask; + + //The mask of currently supplied child node data + public byte currentChildMask; + + //Reset/clear the request so that it may be reused + public void clear() { + + } + } + + public static final int MAX_NODE_COUNT = 1<<22; + + //Local data layout + // first long is position (todo! might not be needed) + // next long contains mesh position ig/id + private final long[] localNodeData = new long[MAX_NODE_COUNT * 3]; + + private final INodeInteractor interactor; + + public NodeManager2(INodeInteractor interactor) { + this.interactor = interactor; + this.pos2meshId.defaultReturnValue(NO_NODE); + this.interactor.setMeshUpdateCallback(this::meshUpdate); + } + + public void insertTopLevelNode(long position) { + + } + + public void removeTopLevelNode(long position) { + + } + + //Returns the mesh offset/id for the given node or -1 if it doesnt exist + private int getMeshForNode(int node) { + return -1; + } + + + //Tracking for nodes that specifically need meshes, if a node doesnt have or doesnt need a mesh node, it is not in the map + // the map should be identical to the currently watched set of sections + //NOTE: that if the id is negative its part of a mesh request + private final Long2IntOpenHashMap pos2meshId = new Long2IntOpenHashMap(); + private static final int NO_NODE = -1; + + //The request queue should be like some array that can reuse objects to prevent gc nightmare + like a bitset to find an avalible free slot + // hashmap might work bar the gc overhead + private final MarkedObjectList leafRequests = new MarkedObjectList<>(LeafRequest[]::new, LeafRequest::new); + + + private void meshUpdate(BuiltSection mesh) { + int id = this.pos2meshId.get(mesh.position); + if (id == NO_NODE) { + //The built mesh section is no longer needed, discard it + // TODO: could probably?? cache the mesh in ram that way if its requested? it can be immediatly fetched while a newer mesh is built?? + mesh.free(); + return; + } + if ((id&(1<<31))!=0) { + //The mesh is part of a batched request + id = id^(1<<31);//Basically abs it + + //There are a few cases for this branch + // the section could be replacing an existing mesh that is part of the request (due to an update) + // the section mesh could be new to the request + // in this case the section mesh could be the last entry needed to satisfy the request + // in which case! we must either A) mark the request as ready to be uploaded + // and then uploaded after all the mesh updates are processed, or upload it immediately + + //The lower 3 bits of the id specify the quadrant (8 pos) of the node in the request + LeafRequest request = this.leafRequests.get(id>>3); + + + } else { + //The mesh is an update for an existing node + + int prevMesh = this.getMeshForNode(id); + if (prevMesh != -1) { + //Node has a mesh attached, remove and replace it + } else { + //Node didnt have a mesh attached, so just set the current mesh + } + } + } + +} diff --git a/src/main/java/me/cortex/voxy/client/core/rendering/hierarchical/TreeTrimmer.java b/src/main/java/me/cortex/voxy/client/core/rendering/hierarchical/TreeTrimmer.java new file mode 100644 index 00000000..e213ad1e --- /dev/null +++ b/src/main/java/me/cortex/voxy/client/core/rendering/hierarchical/TreeTrimmer.java @@ -0,0 +1,23 @@ +package me.cortex.voxy.client.core.rendering.hierarchical; + +//System to determine what nodes to remove from the hericial tree while retaining the property that all +// leaf nodes should have meshes +//This system is critical to prevent the geometry buffer from growing to large or for too many nodes to fill up +// the node system +public class TreeTrimmer { + //Used to interact with the outside world + private final ITrimInterface trimInterface; + + public TreeTrimmer(ITrimInterface trimInterface) { + this.trimInterface = trimInterface; + } + + public void computeTrimPoints() { + //Do a bfs to find ending points to trim needs to be based on some, last used, metric + + //First stratagy is to compute a bfs and or generate a list of nodes sorted by last use time + // the thing is that if we cull a mesh, it cannot be a leaf node + // if it is a leaf node its parent node must have a mesh loaded + + } +} diff --git a/src/main/java/me/cortex/voxy/client/core/rendering/util/DownloadStream.java b/src/main/java/me/cortex/voxy/client/core/rendering/util/DownloadStream.java index 52d40d75..7611271b 100644 --- a/src/main/java/me/cortex/voxy/client/core/rendering/util/DownloadStream.java +++ b/src/main/java/me/cortex/voxy/client/core/rendering/util/DownloadStream.java @@ -84,7 +84,7 @@ public class DownloadStream { for (var entry : this.downloadList) { glCopyNamedBufferSubData(entry.target.id, this.downloadBuffer.id, entry.targetOffset, entry.downloadStreamOffset, entry.size); } - thisFrameDownloadList.addAll(this.downloadList); + this.thisFrameDownloadList.addAll(this.downloadList); this.downloadList.clear(); this.caddr = -1; diff --git a/src/main/java/me/cortex/voxy/client/core/rendering/util/MarkedObjectList.java b/src/main/java/me/cortex/voxy/client/core/rendering/util/MarkedObjectList.java new file mode 100644 index 00000000..e4a20c79 --- /dev/null +++ b/src/main/java/me/cortex/voxy/client/core/rendering/util/MarkedObjectList.java @@ -0,0 +1,51 @@ +package me.cortex.voxy.client.core.rendering.util; + +import it.unimi.dsi.fastutil.ints.Int2ObjectFunction; +import me.cortex.voxy.common.util.HierarchicalBitSet; + +import java.util.function.Supplier; + +public class MarkedObjectList { + private static final float GROWTH_FACTOR = 0.75f; + + private final Int2ObjectFunction arrayGenerator; + private final Supplier nullSupplier; + private final HierarchicalBitSet bitSet = new HierarchicalBitSet(-1); + private T[] objects;//Should maybe make a getter function instead + + public MarkedObjectList(Int2ObjectFunction arrayGenerator, Supplier nullSupplier) { + this.arrayGenerator = arrayGenerator; + this.nullSupplier = nullSupplier; + this.objects = this.arrayGenerator.apply(16); + } + + public int allocate() { + //Gets an unused id for some entry in objects, if its null fill it + int id = this.bitSet.allocateNext(); + if (this.objects.length <= id) { + //Resize and copy over the objects array + int newLen = this.objects.length + (int)Math.ceil(this.objects.length*GROWTH_FACTOR); + T[] newArr = this.arrayGenerator.apply(newLen); + System.arraycopy(this.objects, 0, newArr, 0, this.objects.length); + this.objects = newArr; + } + if (this.objects[id] == null) { + this.objects[id] = this.nullSupplier.get(); + } + return id; + } + + public void release(int id) { + if (!this.bitSet.free(id)) { + throw new IllegalArgumentException("Index " + id + " was already released"); + } + } + + public T get(int index) { + //Make the checking that index is allocated optional, as it might cause overhead due to multiple cacheline misses + if (!this.bitSet.isSet(index)) { + throw new IllegalArgumentException("Index " + index + " is not allocated"); + } + return this.objects[index]; + } +} diff --git a/src/main/java/me/cortex/voxy/common/util/HierarchicalBitSet.java b/src/main/java/me/cortex/voxy/common/util/HierarchicalBitSet.java new file mode 100644 index 00000000..319b2df4 --- /dev/null +++ b/src/main/java/me/cortex/voxy/common/util/HierarchicalBitSet.java @@ -0,0 +1,72 @@ +package me.cortex.voxy.common.util; + +public class HierarchicalBitSet { + private final int limit; + private int cnt; + private long A = 0; + private final long[] B = new long[64]; + private final long[] C = new long[64*64]; + private final long[] D = new long[64*64*64]; + public HierarchicalBitSet(int limit) {//Fixed size of 64^4 + this.limit = limit; + if (limit > (1<<(6*4))) { + throw new IllegalArgumentException("Limit greater than capacity"); + } + } + + public int allocateNext() { + if (this.A==-1) { + return -1; + } + if (this.cnt+1>this.limit) { + return -2;//Limit reached + } + int idx = Long.numberOfTrailingZeros(~this.A); + long bp = this.B[idx]; + idx = Long.numberOfTrailingZeros(~bp) + 64*idx; + long cp = this.C[idx]; + idx = Long.numberOfTrailingZeros(~cp) + 64*idx; + long dp = this.D[idx]; + idx = Long.numberOfTrailingZeros(~dp) + 64*idx; + dp |= 1L<<(idx&0x3f); + this.D[idx>>6] = dp; + int ret = idx; + if (dp==-1) { + idx >>= 6; + cp |= 1L<<(idx&0x3f); + this.C[idx>>6] = cp; + if (cp==-1) { + idx >>= 6; + bp |= 1L<<(idx&0x3f); + this.B[idx>>6] = bp; + if (bp==-1) { + this.A |= 1L<<(idx&0x3f); + } + } + } + this.cnt++; + return ret; + } + + public boolean free(int idx) { + long v = this.D[idx>>6]; + boolean wasSet = (v&(1L<<(idx&0x3f)))!=0; + this.cnt -= wasSet?1:0; + this.D[idx>>6] = v&~(1L<<(idx&0x3f)); + idx >>= 6; + this.C[idx>>6] &= ~(1L<<(idx&0x3f)); + idx >>= 6; + this.B[idx>>6] &= ~(1L<<(idx&0x3f)); + idx >>= 6; + this.A &= ~(1L<<(idx&0x3f)); + return wasSet; + } + + public int getCount() { + return this.cnt; + } + + public boolean isSet(int idx) { + return (this.D[idx>>6]&(1L<<(idx&0x3f)))!=0; + } +} diff --git a/src/main/resources/assets/voxy/shaders/lod/gl46/quads.frag b/src/main/resources/assets/voxy/shaders/lod/gl46/quads.frag index 7732974e..3ad24c2f 100644 --- a/src/main/resources/assets/voxy/shaders/lod/gl46/quads.frag +++ b/src/main/resources/assets/voxy/shaders/lod/gl46/quads.frag @@ -1,6 +1,8 @@ #version 460 core layout(binding = 0) uniform sampler2D blockModelAtlas; +//#define DEBUG_RENDER + //TODO: need to fix when merged quads have discardAlpha set to false but they span multiple tiles // however they are not a full block @@ -10,8 +12,11 @@ layout(location = 2) in flat vec4 tinting; layout(location = 3) in flat vec4 addin; layout(location = 4) in flat uint flags; layout(location = 5) in flat vec4 conditionalTinting; -//layout(location = 6) in flat vec4 solidColour; + +#ifdef DEBUG_RENDER +layout(location = 6) in flat uint quadDebug; +#endif layout(location = 0) out vec4 outColour; void main() { vec2 uv = mod(uv, vec2(1.0))*(1.0/(vec2(3.0,2.0)*256.0)); @@ -29,4 +34,14 @@ void main() { outColour = (colour * tinting) + addin; //outColour = vec4(uv + baseUV, 0, 1); + + + #ifdef DEBUG_RENDER + uint hash = quadDebug*1231421+123141; + hash ^= hash>>16; + hash = hash*1231421+123141; + hash ^= hash>>16; + hash = hash * 1827364925 + 123325621; + outColour = vec4(float(hash&15u)/15, float((hash>>4)&15u)/15, float((hash>>8)&15u)/15, 1); + #endif } \ No newline at end of file diff --git a/src/main/resources/assets/voxy/shaders/lod/gl46/quads.vert b/src/main/resources/assets/voxy/shaders/lod/gl46/quads.vert deleted file mode 100644 index e87e4b95..00000000 --- a/src/main/resources/assets/voxy/shaders/lod/gl46/quads.vert +++ /dev/null @@ -1,150 +0,0 @@ -#version 460 core -#extension GL_ARB_gpu_shader_int64 : enable - -#import -#import -#import -#line 8 - -layout(location = 0) out vec2 uv; -layout(location = 1) out flat vec2 baseUV; -layout(location = 2) out flat vec4 tinting; -layout(location = 3) out flat vec4 addin; -layout(location = 4) out flat uint flags; -layout(location = 5) out flat vec4 conditionalTinting; -//layout(location = 6) out flat vec4 solidColour; - -uint extractLodLevel() { - return uint(gl_BaseInstance)>>27; -} - -//Note the last 2 bits of gl_BaseInstance are unused -//Gives a relative position of +-255 relative to the player center in its respective lod -ivec3 extractRelativeLodPos() { - return (ivec3(gl_BaseInstance)<>ivec3(23); -} - -vec4 uint2vec4RGBA(uint colour) { - return vec4((uvec4(colour)>>uvec4(24,16,8,0))&uvec4(0xFF))/255.0; -} - -//Gets the face offset with respect to the face direction (e.g. some will be + some will be -) -float getDepthOffset(uint faceData, uint face) { - float offset = extractFaceIndentation(faceData); - return offset * (1.0-((int(face)&1)*2.0)); -} - -vec2 getFaceSizeOffset(uint faceData, uint corner) { - float EPSILON = 0.001f; - vec4 faceOffsetsSizes = extractFaceSizes(faceData); - //Expand the quads by a very small amount - faceOffsetsSizes.xz -= vec2(EPSILON); - faceOffsetsSizes.yw += vec2(EPSILON); - return mix(faceOffsetsSizes.xz, faceOffsetsSizes.yw-1.0f, bvec2(((corner>>1)&1u)==1, (corner&1u)==1)); -} - -//TODO: add a mechanism so that some quads can ignore backface culling -// this would help alot with stuff like crops as they would look kinda weird i think, -// same with flowers etc -void main() { - int cornerIdx = gl_VertexID&3; - Quad quad = quadData[uint(gl_VertexID)>>2]; - vec3 innerPos = extractPos(quad); - uint face = extractFace(quad); - uint modelId = extractStateId(quad); - BlockModel model = modelData[modelId]; - uint faceData = model.faceData[face]; - bool isTranslucent = modelIsTranslucent(model); - bool hasAO = modelHasMipmaps(model);//TODO: replace with per face AO flag - bool isShaded = hasAO;//TODO: make this a per face flag - //Change the ordering due to backface culling - //NOTE: when rendering, backface culling is disabled as we simply dispatch calls for each face - // this has the advantage of having "unassigned" geometry, that is geometry where the backface isnt culled - //if (face == 0 || (face>>1 != 0 && (face&1)==1)) { - // cornerIdx ^= 1; - //} - - uint lodLevel = extractLodLevel(); - ivec3 lodCorner = ((extractRelativeLodPos()<>1)&1, cornerIdx&1)); - vec2 size = (respectiveQuadSize + faceOffset) * (1<>1) == 0) { //Up/down - offset = offset.xzy; - } - //Not needed, here for readability - //if ((face>>1) == 1) {//north/south - // offset = offset.xyz; - //} - if ((face>>1) == 2) { //west/east - offset = offset.zxy; - } - - gl_Position = MVP * vec4(corner + offset, 1.0); - - - //Compute the uv coordinates - vec2 modelUV = vec2(modelId&0xFFu, (modelId>>8)&0xFFu)*(1.0/(256.0)); - //TODO: make the face orientated by 2x3 so that division is not a integer div and modulo isnt needed - // as these are very slow ops - baseUV = modelUV + (vec2(face>>1, face&1u) * (1.0/(vec2(3.0, 2.0)*256.0))); - //TODO: add an option to scale the quad size by the lod level so that - // e.g. at lod level 2 a face will have 2x2 - uv = respectiveQuadSize + faceOffset;//Add in the face offset for 0,0 uv - - flags = faceHasAlphaCuttout(faceData); - - //We need to have a conditional override based on if the model size is < a full face + quadSize > 1 - flags |= uint(any(greaterThan(quadSize, ivec2(1)))) & faceHasAlphaCuttoutOverride(faceData); - - flags |= uint(!modelHasMipmaps(model))<<1; - - //Compute lighting - tinting = getLighting(extractLightId(quad)); - - //Apply model colour tinting - uint tintColour = model.colourTint; - if (modelHasBiomeLUT(model)) { - tintColour = colourData[tintColour + extractBiomeId(quad)]; - } - - conditionalTinting = vec4(0); - if (tintColour != uint(-1)) { - flags |= 1u<<2; - conditionalTinting = uint2vec4RGBA(tintColour).yzwx; - } - - addin = vec4(0.0); - if (!isTranslucent) { - tinting.w = 0.0; - //Encode the face, the lod level and - uint encodedData = 0; - encodedData |= face; - encodedData |= (lodLevel<<3); - encodedData |= uint(hasAO)<<6; - addin.w = float(encodedData)/255.0; - } - - //Apply face tint - if (isShaded) { - if ((face>>1) == 1) { - tinting.xyz *= 0.8f; - } else if ((face>>1) == 2) { - tinting.xyz *= 0.6f; - } else if (face == 0){ - tinting.xyz *= 0.5f; - } else { - //TODO: FIXME: DONT HAVE SOME ARBITARY TINT LIKE THIS - tinting.xyz *= 0.95f; - } - } - - - //solidColour = vec4(vec3(modelId&0xFu, (modelId>>4)&0xFu, (modelId>>8)&0xFu)*(1f/15f),1f); -} \ No newline at end of file diff --git a/src/main/resources/assets/voxy/shaders/lod/gl46/quads2.vert b/src/main/resources/assets/voxy/shaders/lod/gl46/quads2.vert index 8a79fd48..5ad88094 100644 --- a/src/main/resources/assets/voxy/shaders/lod/gl46/quads2.vert +++ b/src/main/resources/assets/voxy/shaders/lod/gl46/quads2.vert @@ -6,13 +6,18 @@ #import #line 8 +//#define DEBUG_RENDER + layout(location = 0) out vec2 uv; layout(location = 1) out flat vec2 baseUV; layout(location = 2) out flat vec4 tinting; layout(location = 3) out flat vec4 addin; layout(location = 4) out flat uint flags; layout(location = 5) out flat vec4 conditionalTinting; -//layout(location = 6) out flat vec4 solidColour; + +#ifdef DEBUG_RENDER +layout(location = 6) out flat uint quadDebug; +#endif uint extractLodLevel() { return uint(gl_BaseInstance)>>27; @@ -143,4 +148,8 @@ void main() { vec3 origin = vec3(((extractRelativeLodPos()<>1,vec3(cQuadSize,0)))*(1<>24; + if (idx < workingNodeQueueEnd) { + + + } else { + //Do other queue work however we still have the work slot allocated + } + } +} \ No newline at end of file