1const uint WORKGROUP_SIZE = 1024; 2 3struct GlobalCounts { 4 atomicUint firstHalfCount; 5 atomicUint secondHalfCount; 6}; 7layout(metal, binding = 0) buffer ssbo { 8 GlobalCounts globalCounts; 9}; 10 11workgroup atomicUint localCounts[2]; 12 13void main() { 14 // Initialize the local counts. 15 if (sk_LocalInvocationID.x == 0) { 16 atomicStore(localCounts[0], 0); 17 atomicStore(localCounts[1], 0); 18 } 19 20 // Synchronize the threads in the workgroup so they all see the initial value. 21 workgroupBarrier(); 22 23 // Each thread increments one of the local counters based on its invocation index. 24 uint idx = sk_LocalInvocationID.x < (WORKGROUP_SIZE / 2) ? 0 : 1; 25 atomicAdd(localCounts[idx], 1); 26 27 // Synchronize the threads again to ensure they have all executed the increments 28 // and the following load reads the same value across all threads in the 29 // workgroup. 30 workgroupBarrier(); 31 32 // Add the workgroup-only tally to the global counter. 33 if (sk_LocalInvocationID.x == 0) { 34 atomicAdd(globalCounts.firstHalfCount, atomicLoad(localCounts[0])); 35 atomicAdd(globalCounts.secondHalfCount, atomicLoad(localCounts[1])); 36 } 37} 38