attempted micro optimization of memcpy writer
This commit is contained in:
@@ -22,7 +22,10 @@ void main() {
|
|||||||
uint dst = job.y;
|
uint dst = job.y;
|
||||||
uint siz = job.z;
|
uint siz = job.z;
|
||||||
|
|
||||||
for (uint i = gl_LocalInvocationID.x; i < siz; i+=WORK_SIZE) {
|
uint workPerThread = (siz+255)>>8;
|
||||||
outputBuffer[dst+i] = dataInBuffer[src+i];
|
uint start = gl_LocalInvocationID.x*workPerThread+src;
|
||||||
|
uint diff = dst-src;
|
||||||
|
for (uint i = start; i < min(start+workPerThread,siz+src); i++) {
|
||||||
|
outputBuffer[i+diff] = dataInBuffer[i];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Reference in New Issue
Block a user