1#pragma clang diagnostic ignored "-Wmissing-prototypes" 2 3#include <metal_stdlib> 4#include <simd/simd.h> 5 6using namespace metal; 7 8struct SSBO 9{ 10 float FragColor; 11}; 12 13constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); 14 15template<typename T> 16inline T spvSubgroupBroadcast(T value, ushort lane) 17{ 18 return simd_broadcast(value, lane); 19} 20 21template<> 22inline bool spvSubgroupBroadcast(bool value, ushort lane) 23{ 24 return !!simd_broadcast((ushort)value, lane); 25} 26 27template<uint N> 28inline vec<bool, N> spvSubgroupBroadcast(vec<bool, N> value, ushort lane) 29{ 30 return (vec<bool, N>)simd_broadcast((vec<ushort, N>)value, lane); 31} 32 33template<typename T> 34inline T spvSubgroupBroadcastFirst(T value) 35{ 36 return simd_broadcast_first(value); 37} 38 39template<> 40inline bool spvSubgroupBroadcastFirst(bool value) 41{ 42 return !!simd_broadcast_first((ushort)value); 43} 44 45template<uint N> 46inline vec<bool, N> spvSubgroupBroadcastFirst(vec<bool, N> value) 47{ 48 return (vec<bool, N>)simd_broadcast_first((vec<ushort, N>)value); 49} 50 51inline uint4 spvSubgroupBallot(bool value) 52{ 53 return uint4((simd_vote::vote_t)simd_ballot(value), 0, 0, 0); 54} 55 56inline bool spvSubgroupBallotBitExtract(uint4 ballot, uint bit) 57{ 58 return !!extract_bits(ballot[bit / 32], bit % 32, 1); 59} 60 61inline uint spvSubgroupBallotFindLSB(uint4 ballot, uint gl_SubgroupSize) 62{ 63 uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, gl_SubgroupSize), uint3(0)); 64 ballot &= mask; 65 return select(ctz(ballot.x), select(32 + ctz(ballot.y), select(64 + ctz(ballot.z), select(96 + ctz(ballot.w), uint(-1), ballot.w == 0), ballot.z == 0), ballot.y == 0), ballot.x == 0); 66} 67 68inline uint spvSubgroupBallotFindMSB(uint4 ballot, uint gl_SubgroupSize) 69{ 70 uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, gl_SubgroupSize), uint3(0)); 71 ballot &= mask; 72 return select(128 - (clz(ballot.w) + 1), select(96 - (clz(ballot.z) + 1), select(64 - (clz(ballot.y) + 1), select(32 - (clz(ballot.x) + 1), uint(-1), ballot.x == 0), ballot.y == 0), ballot.z == 0), ballot.w == 0); 73} 74 75inline uint spvPopCount4(uint4 ballot) 76{ 77 return popcount(ballot.x) + popcount(ballot.y) + popcount(ballot.z) + popcount(ballot.w); 78} 79 80inline uint spvSubgroupBallotBitCount(uint4 ballot, uint gl_SubgroupSize) 81{ 82 uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, gl_SubgroupSize), uint3(0)); 83 return spvPopCount4(ballot & mask); 84} 85 86inline uint spvSubgroupBallotInclusiveBitCount(uint4 ballot, uint gl_SubgroupInvocationID) 87{ 88 uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, gl_SubgroupInvocationID + 1), uint3(0)); 89 return spvPopCount4(ballot & mask); 90} 91 92inline uint spvSubgroupBallotExclusiveBitCount(uint4 ballot, uint gl_SubgroupInvocationID) 93{ 94 uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, gl_SubgroupInvocationID), uint2(0)); 95 return spvPopCount4(ballot & mask); 96} 97 98template<typename T> 99inline bool spvSubgroupAllEqual(T value) 100{ 101 return simd_all(all(value == simd_broadcast_first(value))); 102} 103 104template<> 105inline bool spvSubgroupAllEqual(bool value) 106{ 107 return simd_all(value) || !simd_any(value); 108} 109 110template<uint N> 111inline bool spvSubgroupAllEqual(vec<bool, N> value) 112{ 113 return simd_all(all(value == (vec<bool, N>)simd_broadcast_first((vec<ushort, N>)value))); 114} 115 116template<typename T> 117inline T spvSubgroupShuffle(T value, ushort lane) 118{ 119 return simd_shuffle(value, lane); 120} 121 122template<> 123inline bool spvSubgroupShuffle(bool value, ushort lane) 124{ 125 return !!simd_shuffle((ushort)value, lane); 126} 127 128template<uint N> 129inline vec<bool, N> spvSubgroupShuffle(vec<bool, N> value, ushort lane) 130{ 131 return (vec<bool, N>)simd_shuffle((vec<ushort, N>)value, lane); 132} 133 134template<typename T> 135inline T spvSubgroupShuffleXor(T value, ushort mask) 136{ 137 return simd_shuffle_xor(value, mask); 138} 139 140template<> 141inline bool spvSubgroupShuffleXor(bool value, ushort mask) 142{ 143 return !!simd_shuffle_xor((ushort)value, mask); 144} 145 146template<uint N> 147inline vec<bool, N> spvSubgroupShuffleXor(vec<bool, N> value, ushort mask) 148{ 149 return (vec<bool, N>)simd_shuffle_xor((vec<ushort, N>)value, mask); 150} 151 152template<typename T> 153inline T spvSubgroupShuffleUp(T value, ushort delta) 154{ 155 return simd_shuffle_up(value, delta); 156} 157 158template<> 159inline bool spvSubgroupShuffleUp(bool value, ushort delta) 160{ 161 return !!simd_shuffle_up((ushort)value, delta); 162} 163 164template<uint N> 165inline vec<bool, N> spvSubgroupShuffleUp(vec<bool, N> value, ushort delta) 166{ 167 return (vec<bool, N>)simd_shuffle_up((vec<ushort, N>)value, delta); 168} 169 170template<typename T> 171inline T spvSubgroupShuffleDown(T value, ushort delta) 172{ 173 return simd_shuffle_down(value, delta); 174} 175 176template<> 177inline bool spvSubgroupShuffleDown(bool value, ushort delta) 178{ 179 return !!simd_shuffle_down((ushort)value, delta); 180} 181 182template<uint N> 183inline vec<bool, N> spvSubgroupShuffleDown(vec<bool, N> value, ushort delta) 184{ 185 return (vec<bool, N>)simd_shuffle_down((vec<ushort, N>)value, delta); 186} 187 188template<typename T> 189inline T spvQuadBroadcast(T value, uint lane) 190{ 191 return quad_broadcast(value, lane); 192} 193 194template<> 195inline bool spvQuadBroadcast(bool value, uint lane) 196{ 197 return !!quad_broadcast((ushort)value, lane); 198} 199 200template<uint N> 201inline vec<bool, N> spvQuadBroadcast(vec<bool, N> value, uint lane) 202{ 203 return (vec<bool, N>)quad_broadcast((vec<ushort, N>)value, lane); 204} 205 206template<typename T> 207inline T spvQuadSwap(T value, uint dir) 208{ 209 return quad_shuffle_xor(value, dir + 1); 210} 211 212template<> 213inline bool spvQuadSwap(bool value, uint dir) 214{ 215 return !!quad_shuffle_xor((ushort)value, dir + 1); 216} 217 218template<uint N> 219inline vec<bool, N> spvQuadSwap(vec<bool, N> value, uint dir) 220{ 221 return (vec<bool, N>)quad_shuffle_xor((vec<ushort, N>)value, dir + 1); 222} 223 224kernel void main0(device SSBO& _9 [[buffer(0)]], uint gl_NumSubgroups [[quadgroups_per_threadgroup]], uint gl_SubgroupID [[quadgroup_index_in_threadgroup]], uint gl_SubgroupSize [[thread_execution_width]], uint gl_SubgroupInvocationID [[thread_index_in_quadgroup]]) 225{ 226 uint4 gl_SubgroupEqMask = uint4(1 << gl_SubgroupInvocationID, uint3(0)); 227 uint4 gl_SubgroupGeMask = uint4(insert_bits(0u, 0xFFFFFFFF, gl_SubgroupInvocationID, gl_SubgroupSize - gl_SubgroupInvocationID), uint3(0)); 228 uint4 gl_SubgroupGtMask = uint4(insert_bits(0u, 0xFFFFFFFF, gl_SubgroupInvocationID + 1, gl_SubgroupSize - gl_SubgroupInvocationID - 1), uint3(0)); 229 uint4 gl_SubgroupLeMask = uint4(extract_bits(0xFFFFFFFF, 0, gl_SubgroupInvocationID + 1), uint3(0)); 230 uint4 gl_SubgroupLtMask = uint4(extract_bits(0xFFFFFFFF, 0, gl_SubgroupInvocationID), uint3(0)); 231 _9.FragColor = float(gl_NumSubgroups); 232 _9.FragColor = float(gl_SubgroupID); 233 _9.FragColor = float(gl_SubgroupSize); 234 _9.FragColor = float(gl_SubgroupInvocationID); 235 simdgroup_barrier(mem_flags::mem_device | mem_flags::mem_threadgroup | mem_flags::mem_texture); 236 simdgroup_barrier(mem_flags::mem_device | mem_flags::mem_threadgroup | mem_flags::mem_texture); 237 simdgroup_barrier(mem_flags::mem_device); 238 simdgroup_barrier(mem_flags::mem_threadgroup); 239 simdgroup_barrier(mem_flags::mem_texture); 240 bool _39 = simd_is_first(); 241 bool elected = _39; 242 _9.FragColor = float4(gl_SubgroupEqMask).x; 243 _9.FragColor = float4(gl_SubgroupGeMask).x; 244 _9.FragColor = float4(gl_SubgroupGtMask).x; 245 _9.FragColor = float4(gl_SubgroupLeMask).x; 246 _9.FragColor = float4(gl_SubgroupLtMask).x; 247 float4 broadcasted = spvSubgroupBroadcast(float4(10.0), 8u); 248 bool2 broadcasted_bool = spvSubgroupBroadcast(bool2(true), 8u); 249 float3 first = spvSubgroupBroadcastFirst(float3(20.0)); 250 bool4 first_bool = spvSubgroupBroadcastFirst(bool4(false)); 251 uint4 ballot_value = spvSubgroupBallot(true); 252 bool inverse_ballot_value = spvSubgroupBallotBitExtract(ballot_value, gl_SubgroupInvocationID); 253 bool bit_extracted = spvSubgroupBallotBitExtract(uint4(10u), 8u); 254 uint bit_count = spvSubgroupBallotBitCount(ballot_value, gl_SubgroupSize); 255 uint inclusive_bit_count = spvSubgroupBallotInclusiveBitCount(ballot_value, gl_SubgroupInvocationID); 256 uint exclusive_bit_count = spvSubgroupBallotExclusiveBitCount(ballot_value, gl_SubgroupInvocationID); 257 uint lsb = spvSubgroupBallotFindLSB(ballot_value, gl_SubgroupSize); 258 uint msb = spvSubgroupBallotFindMSB(ballot_value, gl_SubgroupSize); 259 uint shuffled = spvSubgroupShuffle(10u, 8u); 260 bool shuffled_bool = spvSubgroupShuffle(true, 9u); 261 uint shuffled_xor = spvSubgroupShuffleXor(30u, 8u); 262 bool shuffled_xor_bool = spvSubgroupShuffleXor(false, 9u); 263 uint shuffled_up = spvSubgroupShuffleUp(20u, 4u); 264 bool shuffled_up_bool = spvSubgroupShuffleUp(true, 4u); 265 uint shuffled_down = spvSubgroupShuffleDown(20u, 4u); 266 bool shuffled_down_bool = spvSubgroupShuffleDown(false, 4u); 267 bool has_all = simd_all(true); 268 bool has_any = simd_any(true); 269 bool has_equal = spvSubgroupAllEqual(0); 270 has_equal = spvSubgroupAllEqual(true); 271 has_equal = spvSubgroupAllEqual(float3(0.0, 1.0, 2.0)); 272 has_equal = spvSubgroupAllEqual(bool4(true, true, false, true)); 273 float4 added = simd_sum(float4(20.0)); 274 int4 iadded = simd_sum(int4(20)); 275 float4 multiplied = simd_product(float4(20.0)); 276 int4 imultiplied = simd_product(int4(20)); 277 float4 lo = simd_min(float4(20.0)); 278 float4 hi = simd_max(float4(20.0)); 279 int4 slo = simd_min(int4(20)); 280 int4 shi = simd_max(int4(20)); 281 uint4 ulo = simd_min(uint4(20u)); 282 uint4 uhi = simd_max(uint4(20u)); 283 uint4 anded = simd_and(ballot_value); 284 uint4 ored = simd_or(ballot_value); 285 uint4 xored = simd_xor(ballot_value); 286 added = simd_prefix_inclusive_sum(added); 287 iadded = simd_prefix_inclusive_sum(iadded); 288 multiplied = simd_prefix_inclusive_product(multiplied); 289 imultiplied = simd_prefix_inclusive_product(imultiplied); 290 added = simd_prefix_exclusive_sum(multiplied); 291 multiplied = simd_prefix_exclusive_product(multiplied); 292 iadded = simd_prefix_exclusive_sum(imultiplied); 293 imultiplied = simd_prefix_exclusive_product(imultiplied); 294 added = quad_sum(added); 295 multiplied = quad_product(multiplied); 296 iadded = quad_sum(iadded); 297 imultiplied = quad_product(imultiplied); 298 lo = quad_min(lo); 299 hi = quad_max(hi); 300 ulo = quad_min(ulo); 301 uhi = quad_max(uhi); 302 slo = quad_min(slo); 303 shi = quad_max(shi); 304 anded = quad_and(anded); 305 ored = quad_or(ored); 306 xored = quad_xor(xored); 307 float4 swap_horiz = spvQuadSwap(float4(20.0), 0u); 308 bool4 swap_horiz_bool = spvQuadSwap(bool4(true), 0u); 309 float4 swap_vertical = spvQuadSwap(float4(20.0), 1u); 310 bool4 swap_vertical_bool = spvQuadSwap(bool4(true), 1u); 311 float4 swap_diagonal = spvQuadSwap(float4(20.0), 2u); 312 bool4 swap_diagonal_bool = spvQuadSwap(bool4(true), 2u); 313 float4 quad_broadcast0 = spvQuadBroadcast(float4(20.0), 3u); 314 bool4 quad_broadcast_bool = spvQuadBroadcast(bool4(true), 3u); 315} 316 317