• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#pragma clang diagnostic ignored "-Wmissing-prototypes"
2
3#include <metal_stdlib>
4#include <simd/simd.h>
5
6using namespace metal;
7
8struct SSBO
9{
10    float FragColor;
11};
12
13constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
14
15template<typename T>
16inline T spvSubgroupBroadcast(T value, ushort lane)
17{
18    return simd_broadcast(value, lane);
19}
20
21template<>
22inline bool spvSubgroupBroadcast(bool value, ushort lane)
23{
24    return !!simd_broadcast((ushort)value, lane);
25}
26
27template<uint N>
28inline vec<bool, N> spvSubgroupBroadcast(vec<bool, N> value, ushort lane)
29{
30    return (vec<bool, N>)simd_broadcast((vec<ushort, N>)value, lane);
31}
32
33template<typename T>
34inline T spvSubgroupBroadcastFirst(T value)
35{
36    return simd_broadcast_first(value);
37}
38
39template<>
40inline bool spvSubgroupBroadcastFirst(bool value)
41{
42    return !!simd_broadcast_first((ushort)value);
43}
44
45template<uint N>
46inline vec<bool, N> spvSubgroupBroadcastFirst(vec<bool, N> value)
47{
48    return (vec<bool, N>)simd_broadcast_first((vec<ushort, N>)value);
49}
50
51inline uint4 spvSubgroupBallot(bool value)
52{
53    return uint4((simd_vote::vote_t)simd_ballot(value), 0, 0, 0);
54}
55
56inline bool spvSubgroupBallotBitExtract(uint4 ballot, uint bit)
57{
58    return !!extract_bits(ballot[bit / 32], bit % 32, 1);
59}
60
61inline uint spvSubgroupBallotFindLSB(uint4 ballot, uint gl_SubgroupSize)
62{
63    uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, gl_SubgroupSize), uint3(0));
64    ballot &= mask;
65    return select(ctz(ballot.x), select(32 + ctz(ballot.y), select(64 + ctz(ballot.z), select(96 + ctz(ballot.w), uint(-1), ballot.w == 0), ballot.z == 0), ballot.y == 0), ballot.x == 0);
66}
67
68inline uint spvSubgroupBallotFindMSB(uint4 ballot, uint gl_SubgroupSize)
69{
70    uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, gl_SubgroupSize), uint3(0));
71    ballot &= mask;
72    return select(128 - (clz(ballot.w) + 1), select(96 - (clz(ballot.z) + 1), select(64 - (clz(ballot.y) + 1), select(32 - (clz(ballot.x) + 1), uint(-1), ballot.x == 0), ballot.y == 0), ballot.z == 0), ballot.w == 0);
73}
74
75inline uint spvPopCount4(uint4 ballot)
76{
77    return popcount(ballot.x) + popcount(ballot.y) + popcount(ballot.z) + popcount(ballot.w);
78}
79
80inline uint spvSubgroupBallotBitCount(uint4 ballot, uint gl_SubgroupSize)
81{
82    uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, gl_SubgroupSize), uint3(0));
83    return spvPopCount4(ballot & mask);
84}
85
86inline uint spvSubgroupBallotInclusiveBitCount(uint4 ballot, uint gl_SubgroupInvocationID)
87{
88    uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, gl_SubgroupInvocationID + 1), uint3(0));
89    return spvPopCount4(ballot & mask);
90}
91
92inline uint spvSubgroupBallotExclusiveBitCount(uint4 ballot, uint gl_SubgroupInvocationID)
93{
94    uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, gl_SubgroupInvocationID), uint2(0));
95    return spvPopCount4(ballot & mask);
96}
97
98template<typename T>
99inline bool spvSubgroupAllEqual(T value)
100{
101    return simd_all(all(value == simd_broadcast_first(value)));
102}
103
104template<>
105inline bool spvSubgroupAllEqual(bool value)
106{
107    return simd_all(value) || !simd_any(value);
108}
109
110template<uint N>
111inline bool spvSubgroupAllEqual(vec<bool, N> value)
112{
113    return simd_all(all(value == (vec<bool, N>)simd_broadcast_first((vec<ushort, N>)value)));
114}
115
116template<typename T>
117inline T spvSubgroupShuffle(T value, ushort lane)
118{
119    return simd_shuffle(value, lane);
120}
121
122template<>
123inline bool spvSubgroupShuffle(bool value, ushort lane)
124{
125    return !!simd_shuffle((ushort)value, lane);
126}
127
128template<uint N>
129inline vec<bool, N> spvSubgroupShuffle(vec<bool, N> value, ushort lane)
130{
131    return (vec<bool, N>)simd_shuffle((vec<ushort, N>)value, lane);
132}
133
134template<typename T>
135inline T spvSubgroupShuffleXor(T value, ushort mask)
136{
137    return simd_shuffle_xor(value, mask);
138}
139
140template<>
141inline bool spvSubgroupShuffleXor(bool value, ushort mask)
142{
143    return !!simd_shuffle_xor((ushort)value, mask);
144}
145
146template<uint N>
147inline vec<bool, N> spvSubgroupShuffleXor(vec<bool, N> value, ushort mask)
148{
149    return (vec<bool, N>)simd_shuffle_xor((vec<ushort, N>)value, mask);
150}
151
152template<typename T>
153inline T spvSubgroupShuffleUp(T value, ushort delta)
154{
155    return simd_shuffle_up(value, delta);
156}
157
158template<>
159inline bool spvSubgroupShuffleUp(bool value, ushort delta)
160{
161    return !!simd_shuffle_up((ushort)value, delta);
162}
163
164template<uint N>
165inline vec<bool, N> spvSubgroupShuffleUp(vec<bool, N> value, ushort delta)
166{
167    return (vec<bool, N>)simd_shuffle_up((vec<ushort, N>)value, delta);
168}
169
170template<typename T>
171inline T spvSubgroupShuffleDown(T value, ushort delta)
172{
173    return simd_shuffle_down(value, delta);
174}
175
176template<>
177inline bool spvSubgroupShuffleDown(bool value, ushort delta)
178{
179    return !!simd_shuffle_down((ushort)value, delta);
180}
181
182template<uint N>
183inline vec<bool, N> spvSubgroupShuffleDown(vec<bool, N> value, ushort delta)
184{
185    return (vec<bool, N>)simd_shuffle_down((vec<ushort, N>)value, delta);
186}
187
188template<typename T>
189inline T spvQuadBroadcast(T value, uint lane)
190{
191    return quad_broadcast(value, lane);
192}
193
194template<>
195inline bool spvQuadBroadcast(bool value, uint lane)
196{
197    return !!quad_broadcast((ushort)value, lane);
198}
199
200template<uint N>
201inline vec<bool, N> spvQuadBroadcast(vec<bool, N> value, uint lane)
202{
203    return (vec<bool, N>)quad_broadcast((vec<ushort, N>)value, lane);
204}
205
206template<typename T>
207inline T spvQuadSwap(T value, uint dir)
208{
209    return quad_shuffle_xor(value, dir + 1);
210}
211
212template<>
213inline bool spvQuadSwap(bool value, uint dir)
214{
215    return !!quad_shuffle_xor((ushort)value, dir + 1);
216}
217
218template<uint N>
219inline vec<bool, N> spvQuadSwap(vec<bool, N> value, uint dir)
220{
221    return (vec<bool, N>)quad_shuffle_xor((vec<ushort, N>)value, dir + 1);
222}
223
224kernel void main0(device SSBO& _9 [[buffer(0)]], uint gl_NumSubgroups [[quadgroups_per_threadgroup]], uint gl_SubgroupID [[quadgroup_index_in_threadgroup]], uint gl_SubgroupSize [[thread_execution_width]], uint gl_SubgroupInvocationID [[thread_index_in_quadgroup]])
225{
226    uint4 gl_SubgroupEqMask = uint4(1 << gl_SubgroupInvocationID, uint3(0));
227    uint4 gl_SubgroupGeMask = uint4(insert_bits(0u, 0xFFFFFFFF, gl_SubgroupInvocationID, gl_SubgroupSize - gl_SubgroupInvocationID), uint3(0));
228    uint4 gl_SubgroupGtMask = uint4(insert_bits(0u, 0xFFFFFFFF, gl_SubgroupInvocationID + 1, gl_SubgroupSize - gl_SubgroupInvocationID - 1), uint3(0));
229    uint4 gl_SubgroupLeMask = uint4(extract_bits(0xFFFFFFFF, 0, gl_SubgroupInvocationID + 1), uint3(0));
230    uint4 gl_SubgroupLtMask = uint4(extract_bits(0xFFFFFFFF, 0, gl_SubgroupInvocationID), uint3(0));
231    _9.FragColor = float(gl_NumSubgroups);
232    _9.FragColor = float(gl_SubgroupID);
233    _9.FragColor = float(gl_SubgroupSize);
234    _9.FragColor = float(gl_SubgroupInvocationID);
235    simdgroup_barrier(mem_flags::mem_device | mem_flags::mem_threadgroup | mem_flags::mem_texture);
236    simdgroup_barrier(mem_flags::mem_device | mem_flags::mem_threadgroup | mem_flags::mem_texture);
237    simdgroup_barrier(mem_flags::mem_device);
238    simdgroup_barrier(mem_flags::mem_threadgroup);
239    simdgroup_barrier(mem_flags::mem_texture);
240    bool _39 = simd_is_first();
241    bool elected = _39;
242    _9.FragColor = float4(gl_SubgroupEqMask).x;
243    _9.FragColor = float4(gl_SubgroupGeMask).x;
244    _9.FragColor = float4(gl_SubgroupGtMask).x;
245    _9.FragColor = float4(gl_SubgroupLeMask).x;
246    _9.FragColor = float4(gl_SubgroupLtMask).x;
247    float4 broadcasted = spvSubgroupBroadcast(float4(10.0), 8u);
248    bool2 broadcasted_bool = spvSubgroupBroadcast(bool2(true), 8u);
249    float3 first = spvSubgroupBroadcastFirst(float3(20.0));
250    bool4 first_bool = spvSubgroupBroadcastFirst(bool4(false));
251    uint4 ballot_value = spvSubgroupBallot(true);
252    bool inverse_ballot_value = spvSubgroupBallotBitExtract(ballot_value, gl_SubgroupInvocationID);
253    bool bit_extracted = spvSubgroupBallotBitExtract(uint4(10u), 8u);
254    uint bit_count = spvSubgroupBallotBitCount(ballot_value, gl_SubgroupSize);
255    uint inclusive_bit_count = spvSubgroupBallotInclusiveBitCount(ballot_value, gl_SubgroupInvocationID);
256    uint exclusive_bit_count = spvSubgroupBallotExclusiveBitCount(ballot_value, gl_SubgroupInvocationID);
257    uint lsb = spvSubgroupBallotFindLSB(ballot_value, gl_SubgroupSize);
258    uint msb = spvSubgroupBallotFindMSB(ballot_value, gl_SubgroupSize);
259    uint shuffled = spvSubgroupShuffle(10u, 8u);
260    bool shuffled_bool = spvSubgroupShuffle(true, 9u);
261    uint shuffled_xor = spvSubgroupShuffleXor(30u, 8u);
262    bool shuffled_xor_bool = spvSubgroupShuffleXor(false, 9u);
263    uint shuffled_up = spvSubgroupShuffleUp(20u, 4u);
264    bool shuffled_up_bool = spvSubgroupShuffleUp(true, 4u);
265    uint shuffled_down = spvSubgroupShuffleDown(20u, 4u);
266    bool shuffled_down_bool = spvSubgroupShuffleDown(false, 4u);
267    bool has_all = simd_all(true);
268    bool has_any = simd_any(true);
269    bool has_equal = spvSubgroupAllEqual(0);
270    has_equal = spvSubgroupAllEqual(true);
271    has_equal = spvSubgroupAllEqual(float3(0.0, 1.0, 2.0));
272    has_equal = spvSubgroupAllEqual(bool4(true, true, false, true));
273    float4 added = simd_sum(float4(20.0));
274    int4 iadded = simd_sum(int4(20));
275    float4 multiplied = simd_product(float4(20.0));
276    int4 imultiplied = simd_product(int4(20));
277    float4 lo = simd_min(float4(20.0));
278    float4 hi = simd_max(float4(20.0));
279    int4 slo = simd_min(int4(20));
280    int4 shi = simd_max(int4(20));
281    uint4 ulo = simd_min(uint4(20u));
282    uint4 uhi = simd_max(uint4(20u));
283    uint4 anded = simd_and(ballot_value);
284    uint4 ored = simd_or(ballot_value);
285    uint4 xored = simd_xor(ballot_value);
286    added = simd_prefix_inclusive_sum(added);
287    iadded = simd_prefix_inclusive_sum(iadded);
288    multiplied = simd_prefix_inclusive_product(multiplied);
289    imultiplied = simd_prefix_inclusive_product(imultiplied);
290    added = simd_prefix_exclusive_sum(multiplied);
291    multiplied = simd_prefix_exclusive_product(multiplied);
292    iadded = simd_prefix_exclusive_sum(imultiplied);
293    imultiplied = simd_prefix_exclusive_product(imultiplied);
294    added = quad_sum(added);
295    multiplied = quad_product(multiplied);
296    iadded = quad_sum(iadded);
297    imultiplied = quad_product(imultiplied);
298    lo = quad_min(lo);
299    hi = quad_max(hi);
300    ulo = quad_min(ulo);
301    uhi = quad_max(uhi);
302    slo = quad_min(slo);
303    shi = quad_max(shi);
304    anded = quad_and(anded);
305    ored = quad_or(ored);
306    xored = quad_xor(xored);
307    float4 swap_horiz = spvQuadSwap(float4(20.0), 0u);
308    bool4 swap_horiz_bool = spvQuadSwap(bool4(true), 0u);
309    float4 swap_vertical = spvQuadSwap(float4(20.0), 1u);
310    bool4 swap_vertical_bool = spvQuadSwap(bool4(true), 1u);
311    float4 swap_diagonal = spvQuadSwap(float4(20.0), 2u);
312    bool4 swap_diagonal_bool = spvQuadSwap(bool4(true), 2u);
313    float4 quad_broadcast0 = spvQuadBroadcast(float4(20.0), 3u);
314    bool4 quad_broadcast_bool = spvQuadBroadcast(bool4(true), 3u);
315}
316
317