attempted micro optimization of memcpy writer

This commit is contained in:
mcrcortex
2025-07-20 18:42:11 +10:00
parent bdcdae791e
commit 0514528a4c

View File

@@ -22,7 +22,10 @@ void main() {
uint dst = job.y;
uint siz = job.z;
for (uint i = gl_LocalInvocationID.x; i < siz; i+=WORK_SIZE) {
outputBuffer[dst+i] = dataInBuffer[src+i];
uint workPerThread = (siz+255)>>8;
uint start = gl_LocalInvocationID.x*workPerThread+src;
uint diff = dst-src;
for (uint i = start; i < min(start+workPerThread,siz+src); i++) {
outputBuffer[i+diff] = dataInBuffer[i];
}
}