1#include <metal_stdlib> 2#include <simd/simd.h> 3using namespace metal; 4struct GlobalCounts { 5 atomic_uint firstHalfCount; 6 atomic_uint secondHalfCount; 7}; 8struct Inputs { 9 uint3 sk_LocalInvocationID; 10}; 11struct ssbo { 12 GlobalCounts globalCounts; 13}; 14struct Globals { 15 device ssbo* _anonInterface0; 16}; 17struct Threadgroups { 18 array<atomic_uint, 2> localCounts; 19}; 20kernel void computeMain(uint3 sk_LocalInvocationID [[thread_position_in_threadgroup]], device ssbo& _anonInterface0 [[buffer(0)]]) { 21 Globals _globals{&_anonInterface0}; 22 (void)_globals; 23 threadgroup Threadgroups _threadgroups{{}}; 24 (void)_threadgroups; 25 Inputs _in = { sk_LocalInvocationID }; 26 if (_in.sk_LocalInvocationID.x == 0u) { 27 atomic_store_explicit(&_threadgroups.localCounts[0], 0u, memory_order_relaxed); 28 atomic_store_explicit(&_threadgroups.localCounts[1], 0u, memory_order_relaxed); 29 } 30 threadgroup_barrier(mem_flags::mem_threadgroup); 31 uint idx = uint(_in.sk_LocalInvocationID.x < 512u ? 0 : 1); 32 atomic_fetch_add_explicit(&_threadgroups.localCounts[idx], 1u, memory_order_relaxed); 33 threadgroup_barrier(mem_flags::mem_threadgroup); 34 if (_in.sk_LocalInvocationID.x == 0u) { 35 atomic_fetch_add_explicit(&_globals._anonInterface0->globalCounts.firstHalfCount, atomic_load_explicit(&_threadgroups.localCounts[0], memory_order_relaxed), memory_order_relaxed); 36 atomic_fetch_add_explicit(&_globals._anonInterface0->globalCounts.secondHalfCount, atomic_load_explicit(&_threadgroups.localCounts[1], memory_order_relaxed), memory_order_relaxed); 37 } 38 return; 39} 40