Compute shader buffer transfer converter

This commit is contained in:
mcrcortex
2024-08-01 12:44:09 +10:00
parent 004bd1e751
commit 6fdcde856b
8 changed files with 121 additions and 54 deletions

View File

@@ -19,6 +19,11 @@ public class GlFramebuffer extends TrackedObject {
return this; return this;
} }
public GlFramebuffer bind(int attachment, GlRenderBuffer buffer) {
glNamedFramebufferRenderbuffer(this.id, attachment, GL_RENDERBUFFER, buffer.id);
return this;
}
@Override @Override
public void free() { public void free() {
super.free0(); super.free0();

View File

@@ -0,0 +1,21 @@
package me.cortex.voxy.client.core.gl;
import me.cortex.voxy.common.util.TrackedObject;
import static org.lwjgl.opengl.GL11C.*;
import static org.lwjgl.opengl.GL45C.*;
public class GlRenderBuffer extends TrackedObject {
public final int id;
public GlRenderBuffer(int format, int width, int height) {
this.id = glCreateRenderbuffers();
glNamedRenderbufferStorage(this.id, format, width, height);
}
@Override
public void free() {
super.free0();
glDeleteRenderbuffers(this.id);
}
}

View File

@@ -11,6 +11,7 @@ import static org.lwjgl.opengl.GL45C.*;
public class GlTexture extends TrackedObject { public class GlTexture extends TrackedObject {
public final int id; public final int id;
private final int type; private final int type;
private int format;
public GlTexture() { public GlTexture() {
this(GL_TEXTURE_2D); this(GL_TEXTURE_2D);
} }
@@ -20,7 +21,17 @@ public class GlTexture extends TrackedObject {
this.type = type; this.type = type;
} }
private GlTexture(int type, boolean useGenTypes) {
if (useGenTypes) {
this.id = glGenTextures();
} else {
this.id = glCreateTextures(type);
}
this.type = type;
}
public GlTexture store(int format, int levels, int width, int height) { public GlTexture store(int format, int levels, int width, int height) {
this.format = format;
if (this.type == GL_TEXTURE_2D) { if (this.type == GL_TEXTURE_2D) {
glTextureStorage2D(this.id, levels, format, width, height); glTextureStorage2D(this.id, levels, format, width, height);
} else { } else {
@@ -29,21 +40,15 @@ public class GlTexture extends TrackedObject {
return this; return this;
} }
public GlTexture createView() {
var view = new GlTexture(this.type, true);
glTextureView(view.id, this.type, this.id, this.format, 0, 1, 0, 1);
return view;
}
@Override @Override
public void free() { public void free() {
super.free0(); super.free0();
glDeleteTextures(this.id); glDeleteTextures(this.id);
} }
//TODO: FIXME, glGetTextureParameteri doesnt work
public static int getRawTextureType(int texture) {
if (!glIsTexture(texture)) {
throw new IllegalStateException("Not texture");
}
int immFormat = glGetTextureParameteri(texture, GL_TEXTURE_IMMUTABLE_FORMAT);
if (immFormat == 0) {
throw new IllegalStateException("Texture: " + texture + " is not immutable");
}
return immFormat;
}
} }

View File

@@ -18,13 +18,14 @@ public class ModelBakerySubsystem {
public final ModelFactory factory; public final ModelFactory factory;
private final IntLinkedOpenHashSet blockIdQueue = new IntLinkedOpenHashSet(); private final IntLinkedOpenHashSet blockIdQueue = new IntLinkedOpenHashSet();
public ModelBakerySubsystem(Mapper mapper) { public ModelBakerySubsystem(Mapper mapper) {
this.factory = new ModelFactory(mapper, this.storage, this.textureDownStream); this.factory = new ModelFactory(mapper, this.storage, this.textureDownStream);
} }
public void tick() { public void tick() {
//There should be a method to access the frame time IIRC, if the user framecap is unlimited lock it to like 60 fps for computation //There should be a method to access the frame time IIRC, if the user framecap is unlimited lock it to like 60 fps for computation
int BUDGET = 50;//TODO: make this computed based on the remaining free time in a frame (and like div by 2 to reduce overhead) (with a min of 1) int BUDGET = 10;//TODO: make this computed based on the remaining free time in a frame (and like div by 2 to reduce overhead) (with a min of 1)
for (int i = 0; i < BUDGET && !this.blockIdQueue.isEmpty(); i++) { for (int i = 0; i < BUDGET && !this.blockIdQueue.isEmpty(); i++) {
int blockId = -1; int blockId = -1;

View File

@@ -185,16 +185,11 @@ public class ModelFactory {
int[] colour = new int[FACE_SIZE]; int[] colour = new int[FACE_SIZE];
int[] depth = new int[FACE_SIZE]; int[] depth = new int[FACE_SIZE];
//TODO: see if there is a memory intrinsic to help here
//Copy out colour //Copy out colour
for (int i = 0; i < FACE_SIZE; i++) { for (int i = 0; i < FACE_SIZE; i++) {
//De-interpolate results
colour[i] = MemoryUtil.memGetInt(faceDataPtr+ (i*4)); colour[i] = MemoryUtil.memGetInt(faceDataPtr+ (i*4));
} depth[i] = MemoryUtil.memGetInt(faceDataPtr+ (i*4)+4);
//Shift ptr and copy out depth
faceDataPtr += FACE_SIZE*4;
for (int i = 0; i < FACE_SIZE; i++) {
depth[i] = MemoryUtil.memGetInt(faceDataPtr+ (i*4));
} }
textureData[face] = new ColourDepthTextureData(colour, depth, MODEL_TEXTURE_SIZE, MODEL_TEXTURE_SIZE); textureData[face] = new ColourDepthTextureData(colour, depth, MODEL_TEXTURE_SIZE, MODEL_TEXTURE_SIZE);

View File

@@ -1,11 +1,8 @@
package me.cortex.voxy.client.core.model; package me.cortex.voxy.client.core.model;
import com.mojang.blaze3d.platform.GlConst;
import com.mojang.blaze3d.platform.GlStateManager; import com.mojang.blaze3d.platform.GlStateManager;
import com.mojang.blaze3d.systems.RenderSystem;
import com.mojang.blaze3d.systems.VertexSorter;
import me.cortex.voxy.client.core.gl.GlBuffer;
import me.cortex.voxy.client.core.gl.GlFramebuffer; import me.cortex.voxy.client.core.gl.GlFramebuffer;
import me.cortex.voxy.client.core.gl.GlRenderBuffer;
import me.cortex.voxy.client.core.gl.GlTexture; import me.cortex.voxy.client.core.gl.GlTexture;
import me.cortex.voxy.client.core.gl.shader.Shader; import me.cortex.voxy.client.core.gl.shader.Shader;
import me.cortex.voxy.client.core.gl.shader.ShaderType; import me.cortex.voxy.client.core.gl.shader.ShaderType;
@@ -30,14 +27,14 @@ import net.minecraft.world.biome.ColorResolver;
import net.minecraft.world.chunk.light.LightingProvider; import net.minecraft.world.chunk.light.LightingProvider;
import org.jetbrains.annotations.Nullable; import org.jetbrains.annotations.Nullable;
import org.joml.Matrix4f; import org.joml.Matrix4f;
import org.lwjgl.opengl.GL11;
import org.lwjgl.opengl.GL11C; import org.lwjgl.opengl.GL11C;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List; import java.util.List;
import static org.lwjgl.opengl.ARBDirectStateAccess.glGetTextureImage; import static org.lwjgl.opengl.ARBDirectStateAccess.glGetTextureImage;
import static org.lwjgl.opengl.ARBImaging.GL_FUNC_ADD; import static org.lwjgl.opengl.ARBDirectStateAccess.glTextureParameteri;
import static org.lwjgl.opengl.ARBImaging.glBlendEquation;
import static org.lwjgl.opengl.ARBShaderImageLoadStore.GL_FRAMEBUFFER_BARRIER_BIT; import static org.lwjgl.opengl.ARBShaderImageLoadStore.GL_FRAMEBUFFER_BARRIER_BIT;
import static org.lwjgl.opengl.ARBShaderImageLoadStore.glMemoryBarrier; import static org.lwjgl.opengl.ARBShaderImageLoadStore.glMemoryBarrier;
import static org.lwjgl.opengl.GL11C.GL_TEXTURE_2D; import static org.lwjgl.opengl.GL11C.GL_TEXTURE_2D;
@@ -46,7 +43,7 @@ import static org.lwjgl.opengl.GL15C.glBindBuffer;
import static org.lwjgl.opengl.GL20C.glUniformMatrix4fv; import static org.lwjgl.opengl.GL20C.glUniformMatrix4fv;
import static org.lwjgl.opengl.GL21C.GL_PIXEL_PACK_BUFFER; import static org.lwjgl.opengl.GL21C.GL_PIXEL_PACK_BUFFER;
import static org.lwjgl.opengl.GL30.*; import static org.lwjgl.opengl.GL30.*;
import static org.lwjgl.opengl.GL43.glCopyImageSubData; import static org.lwjgl.opengl.GL43.*;
//Builds a texture for each face of a model //Builds a texture for each face of a model
public class ModelTextureBakery { public class ModelTextureBakery {
@@ -54,6 +51,7 @@ public class ModelTextureBakery {
private final int height; private final int height;
private final GlTexture colourTex; private final GlTexture colourTex;
private final GlTexture depthTex; private final GlTexture depthTex;
private final GlTexture depthTexView;
private final GlFramebuffer framebuffer; private final GlFramebuffer framebuffer;
private final GlStateCapture glState = GlStateCapture.make() private final GlStateCapture glState = GlStateCapture.make()
.addCapability(GL_DEPTH_TEST) .addCapability(GL_DEPTH_TEST)
@@ -62,6 +60,7 @@ public class ModelTextureBakery {
.addCapability(GL_CULL_FACE) .addCapability(GL_CULL_FACE)
.addTexture(GL_TEXTURE0) .addTexture(GL_TEXTURE0)
.addTexture(GL_TEXTURE1) .addTexture(GL_TEXTURE1)
.addTexture(GL_TEXTURE2)
.build() .build()
; ;
private final Shader rasterShader = Shader.make() private final Shader rasterShader = Shader.make()
@@ -69,10 +68,9 @@ public class ModelTextureBakery {
.add(ShaderType.FRAGMENT, "voxy:bakery/position_tex.fsh") .add(ShaderType.FRAGMENT, "voxy:bakery/position_tex.fsh")
.compile(); .compile();
private static final List<MatrixStack> FACE_VIEWS = new ArrayList<>(); private final Shader copyOutShader;
//A truly terrible hackfix for nvidia drivers murderizing performance with PBO readout with a depth texture private static final List<MatrixStack> FACE_VIEWS = new ArrayList<>();
private static final GlTexture TEMPORARY = new GlTexture().store(GL_R32UI, 1, ModelFactory.MODEL_TEXTURE_SIZE, ModelFactory.MODEL_TEXTURE_SIZE);
public ModelTextureBakery(int width, int height) { public ModelTextureBakery(int width, int height) {
@@ -82,8 +80,19 @@ public class ModelTextureBakery {
this.height = height; this.height = height;
this.colourTex = new GlTexture().store(GL_RGBA8, 1, width, height); this.colourTex = new GlTexture().store(GL_RGBA8, 1, width, height);
this.depthTex = new GlTexture().store(GL_DEPTH24_STENCIL8, 1, width, height); this.depthTex = new GlTexture().store(GL_DEPTH24_STENCIL8, 1, width, height);
this.depthTexView = this.depthTex.createView();
this.framebuffer = new GlFramebuffer().bind(GL_COLOR_ATTACHMENT0, this.colourTex).bind(GL_DEPTH_STENCIL_ATTACHMENT, this.depthTex).verify(); this.framebuffer = new GlFramebuffer().bind(GL_COLOR_ATTACHMENT0, this.colourTex).bind(GL_DEPTH_STENCIL_ATTACHMENT, this.depthTex).verify();
glTextureParameteri(this.depthTex.id, GL_DEPTH_STENCIL_TEXTURE_MODE, GL_DEPTH_COMPONENT);
glTextureParameteri(this.depthTexView.id, GL_DEPTH_STENCIL_TEXTURE_MODE, GL_STENCIL_INDEX);
this.copyOutShader = Shader.make()
.define("WIDTH", width)
.define("HEIGHT", height)
.add(ShaderType.COMPUTE, "voxy:bakery/buffercopy.comp")
.compile();
//This is done to help make debugging easier //This is done to help make debugging easier
FACE_VIEWS.clear(); FACE_VIEWS.clear();
AddViews(); AddViews();
@@ -175,15 +184,13 @@ public class ModelTextureBakery {
glStencilFunc(GL_ALWAYS, 1, 0xFF); glStencilFunc(GL_ALWAYS, 1, 0xFF);
glStencilMask(0xFF); glStencilMask(0xFF);
this.rasterShader.bind();
glActiveTexture(GL_TEXTURE0);
int texId = MinecraftClient.getInstance().getTextureManager().getTexture(Identifier.of("minecraft", "textures/atlas/blocks.png")).getGlId(); int texId = MinecraftClient.getInstance().getTextureManager().getTexture(Identifier.of("minecraft", "textures/atlas/blocks.png")).getGlId();
GlUniform.uniform1(0, 0);
final int TEXTURE_SIZE = this.width*this.height *4;//NOTE! assume here that both depth and colour are 4 bytes in size final int TEXTURE_SIZE = this.width*this.height *4;//NOTE! assume here that both depth and colour are 4 bytes in size
for (int i = 0; i < FACE_VIEWS.size(); i++) { for (int i = 0; i < FACE_VIEWS.size(); i++) {
int faceOffset = streamBaseOffset + TEXTURE_SIZE*i*2; int faceOffset = streamBaseOffset + TEXTURE_SIZE*i*2;
captureViewToStream(state, model, entityModel, FACE_VIEWS.get(i), randomValue, i, renderFluid, texId, projection, streamBuffer, faceOffset, faceOffset + TEXTURE_SIZE); captureViewToStream(state, model, entityModel, FACE_VIEWS.get(i), randomValue, i, renderFluid, texId, projection, streamBuffer, faceOffset);
} }
renderLayer.endDrawing(); renderLayer.endDrawing();
@@ -199,7 +206,11 @@ public class ModelTextureBakery {
} }
private final BufferAllocator allocator = new BufferAllocator(786432); private final BufferAllocator allocator = new BufferAllocator(786432);
private void captureViewToStream(BlockState state, BakedModel model, BakedBlockEntityModel blockEntityModel, MatrixStack stack, long randomValue, int face, boolean renderFluid, int textureId, Matrix4f projection, int streamBuffer, int streamColourOffset, int streamDepthOffset) { private void captureViewToStream(BlockState state, BakedModel model, BakedBlockEntityModel blockEntityModel, MatrixStack stack, long randomValue, int face, boolean renderFluid, int textureId, Matrix4f projection, int streamBuffer, int streamOffset) {
this.rasterShader.bind();
glActiveTexture(GL_TEXTURE0);
GlUniform.uniform1(0, 0);
glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT); glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT);
float[] mat = new float[4*4]; float[] mat = new float[4*4];
new Matrix4f(projection).mul(stack.peek().getPositionMatrix()).get(mat); new Matrix4f(projection).mul(stack.peek().getPositionMatrix()).get(mat);
@@ -292,27 +303,24 @@ public class ModelTextureBakery {
glMemoryBarrier(GL_FRAMEBUFFER_BARRIER_BIT); glMemoryBarrier(GL_FRAMEBUFFER_BARRIER_BIT);
GlStateManager._pixelStore(GL_PACK_ROW_LENGTH, 0); this.emitToStream(streamBuffer, streamOffset);
GlStateManager._pixelStore(GL_PACK_SKIP_PIXELS, 0); }
GlStateManager._pixelStore(GL_PACK_SKIP_ROWS, 0);
GlStateManager._pixelStore(GL_PACK_ALIGNMENT, 4);
glBindBuffer(GL_PIXEL_PACK_BUFFER, streamBuffer); private void emitToStream(int streamBuffer, int streamOffset) {
{//Copy colour if (streamOffset%4 != 0) {
glGetTextureImage(this.colourTex.id, 0, GL_RGBA, GL_UNSIGNED_BYTE, this.width*this.height*4, streamColourOffset); throw new IllegalArgumentException();
} }
this.copyOutShader.bind();
glActiveTexture(GL_TEXTURE0);
GL11C.glBindTexture(GL11.GL_TEXTURE_2D, this.colourTex.id);
glActiveTexture(GL_TEXTURE1);
glBindTexture(GL11.GL_TEXTURE_2D, this.depthTex.id);
glActiveTexture(GL_TEXTURE2);
glBindTexture(GL11.GL_TEXTURE_2D, this.depthTexView.id);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 3, streamBuffer);
glUniform1ui(4, streamOffset/4);
//TODO: fixme!! only do this dodgy double copy if the driver is nvidia glDispatchCompute(1,1,1);
{//Copy depth
//First copy to the temporary buffer
glCopyImageSubData(textureId, GL_TEXTURE_2D, 0,0,0,0,
TEMPORARY.id, GL_TEXTURE_2D, 0,0,0,0,
ModelFactory.MODEL_TEXTURE_SIZE, ModelFactory.MODEL_TEXTURE_SIZE, 1);
//Then download
glGetTextureImage(TEMPORARY.id, 0, GL_RED_INTEGER, GL_UNSIGNED_INT, this.width*this.height*4, streamDepthOffset);
}
glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
} }
private static void renderQuads(BufferBuilder builder, BlockState state, BakedModel model, MatrixStack stack, long randomValue) { private static void renderQuads(BufferBuilder builder, BlockState state, BakedModel model, MatrixStack stack, long randomValue) {
@@ -329,8 +337,10 @@ public class ModelTextureBakery {
public void free() { public void free() {
this.framebuffer.free(); this.framebuffer.free();
this.colourTex.free(); this.colourTex.free();
this.depthTexView.free();
this.depthTex.free(); this.depthTex.free();
this.rasterShader.free(); this.rasterShader.free();
this.copyOutShader.free();
this.allocator.close(); this.allocator.close();
} }
} }

View File

@@ -94,7 +94,8 @@ public class TextureUtils {
//https://registry.khronos.org/OpenGL-Refpages/gl4/html/glDepthRange.xhtml //https://registry.khronos.org/OpenGL-Refpages/gl4/html/glDepthRange.xhtml
// due to this and the unsigned bullshit, i believe the depth value needs to get multiplied by 2 // due to this and the unsigned bullshit, i believe the depth value needs to get multiplied by 2
depthF *= 2; //Shouldent be needed due to the compute bake copy
//depthF *= 2;
if (depthF > 1.00001f) { if (depthF > 1.00001f) {
System.err.println("Warning: Depth greater than 1"); System.err.println("Warning: Depth greater than 1");
depthF = 1.0f; depthF = 1.0f;
@@ -180,6 +181,7 @@ public class TextureUtils {
return weightedAverageColor(weightedAverageColor(one, two), weightedAverageColor(three, four)); return weightedAverageColor(weightedAverageColor(one, two), weightedAverageColor(three, four));
} }
//TODO: FIXME!!! ITS READING IT AS ABGR??? isnt the format RGBA??
private static int weightedAverageColor(int one, int two) { private static int weightedAverageColor(int one, int two) {
int alphaOne = ColorHelper.Abgr.getAlpha(one); int alphaOne = ColorHelper.Abgr.getAlpha(one);
int alphaTwo = ColorHelper.Abgr.getAlpha(two); int alphaTwo = ColorHelper.Abgr.getAlpha(two);

View File

@@ -0,0 +1,28 @@
#version 450
layout(local_size_x = WIDTH, local_size_y = HEIGHT) in;
layout(binding = 0) uniform sampler2D colourTexIn;
layout(binding = 1) uniform sampler2D depthTexIn;
layout(binding = 2) uniform usampler2D stencilTexIn;
layout(binding = 3, std430) writeonly restrict buffer OutBuffer {
uint[] outBuffer;
};
layout(location=4) uniform uint bufferOffset;
void main() {
ivec2 point = ivec2(gl_GlobalInvocationID.xy);
uint writeIndex = ((gl_GlobalInvocationID.x+(gl_GlobalInvocationID.y*HEIGHT))*2)+bufferOffset;
uvec4 colour = clamp(uvec4(texelFetch(colourTexIn, point, 0)*255), uvec4(0), uvec4(255));//TODO: check that this actually gets to the range of 255
colour <<= uvec4(0,8,16,24);//ABGR format!!!
outBuffer[writeIndex] = colour.r|colour.g|colour.b|colour.a;
float depth = clamp(texelFetch(depthTexIn, point, 0).r, 0, 1);//Opengl grumble grumble
uint stencil = texelFetch(stencilTexIn, point, 0).r;
uint value = uint(depth*((1<<24)-1))<<8;
value |= stencil;
outBuffer[writeIndex+1] = value;
}