• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // GENERATED FILE - DO NOT EDIT.
2 // Generated by gen_mtl_internal_shaders.py
3 //
4 // Copyright 2020 The ANGLE Project Authors. All rights reserved.
5 // Use of this source code is governed by a BSD-style license that can be
6 // found in the LICENSE file.
7 //
8 
9 // C++ string version of combined Metal default shaders.
10 
11 static char gDefaultMetallibSrc[] = R"(
12 # 1 "temp_master_source.metal"
13 # 1 "<built-in>" 1
14 # 1 "<built-in>" 3
15 # 483 "<built-in>" 3
16 # 1 "<command line>" 1
17 # 1 "<built-in>" 2
18 # 1 "temp_master_source.metal" 2
19 # 1 "./blit.metal" 1
20 
21 
22 
23 
24 
25 
26 
27 # 1 "./common.h" 1
28 # 13 "./common.h"
29 # include <simd/simd.h>
30 # include <metal_stdlib>
31 
32 
33 # 1 "./constants.h" 1
34 # 11 "./constants.h"
35 namespace rx
36 {
37 namespace mtl_shader
38 {
39 
40 enum
41 {
42     kTextureType2D = 0,
43     kTextureType2DMultisample = 1,
44     kTextureType2DArray = 2,
45     kTextureTypeCube = 3,
46     kTextureType3D = 4,
47     kTextureTypeCount = 5,
48 };
49 
50 
51 
52 
53 }
54 }
55 # 18 "./common.h" 2
56 
57 
58 
59 
60 
61 
62 
63 using namespace metal;
64 
65 
66 
67 constant uint32_t kNumColorOutputs [[function_constant(0)]];
68 constant bool kColorOutputAvailable0 = kNumColorOutputs > 0;
69 constant bool kColorOutputAvailable1 = kNumColorOutputs > 1;
70 constant bool kColorOutputAvailable2 = kNumColorOutputs > 2;
71 constant bool kColorOutputAvailable3 = kNumColorOutputs > 3;
72 constant bool kColorOutputAvailable4 = kNumColorOutputs > 4;
73 constant bool kColorOutputAvailable5 = kNumColorOutputs > 5;
74 constant bool kColorOutputAvailable6 = kNumColorOutputs > 6;
75 constant bool kColorOutputAvailable7 = kNumColorOutputs > 7;
76 
77 namespace rx
78 {
79 namespace mtl_shader
80 {
81 
82 
83 constant float2 gCorners[3] = {float2(-1.0f, -1.0f), float2(3.0f, -1.0f), float2(-1.0f, 3.0f)};
84 
85 template <typename T>
86 struct MultipleColorOutputs
87 {
88     vec<T, 4> color0 [[color(0), function_constant(kColorOutputAvailable0)]];
89     vec<T, 4> color1 [[color(1), function_constant(kColorOutputAvailable1)]];
90     vec<T, 4> color2 [[color(2), function_constant(kColorOutputAvailable2)]];
91     vec<T, 4> color3 [[color(3), function_constant(kColorOutputAvailable3)]];
92     vec<T, 4> color4 [[color(4), function_constant(kColorOutputAvailable4)]];
93     vec<T, 4> color5 [[color(5), function_constant(kColorOutputAvailable5)]];
94     vec<T, 4> color6 [[color(6), function_constant(kColorOutputAvailable6)]];
95     vec<T, 4> color7 [[color(7), function_constant(kColorOutputAvailable7)]];
96 };
97 # 69 "./common.h"
98 template <typename T>
99 static inline MultipleColorOutputs<T> toMultipleColorOutputs(vec<T, 4> color)
100 {
101     MultipleColorOutputs<T> re;
102 
103     do { if (kColorOutputAvailable0) { re.color0 = color; } } while (0);
104     do { if (kColorOutputAvailable1) { re.color1 = color; } } while (0);
105     do { if (kColorOutputAvailable2) { re.color2 = color; } } while (0);
106     do { if (kColorOutputAvailable3) { re.color3 = color; } } while (0);
107     do { if (kColorOutputAvailable4) { re.color4 = color; } } while (0);
108     do { if (kColorOutputAvailable5) { re.color5 = color; } } while (0);
109     do { if (kColorOutputAvailable6) { re.color6 = color; } } while (0);
110     do { if (kColorOutputAvailable7) { re.color7 = color; } } while (0);
111 
112     return re;
113 }
114 
115 static inline float3 cubeTexcoords(float2 texcoords, int face)
116 {
117     texcoords = 2.0 * texcoords - 1.0;
118     switch (face)
119     {
120         case 0:
121             return float3(1.0, -texcoords.y, -texcoords.x);
122         case 1:
123             return float3(-1.0, -texcoords.y, texcoords.x);
124         case 2:
125             return float3(texcoords.x, 1.0, texcoords.y);
126         case 3:
127             return float3(texcoords.x, -1.0, -texcoords.y);
128         case 4:
129             return float3(texcoords.x, -texcoords.y, 1.0);
130         case 5:
131             return float3(-texcoords.x, -texcoords.y, -1.0);
132     }
133     return float3(texcoords, 0);
134 }
135 
136 template <typename T>
137 static inline vec<T, 4> resolveTextureMS(texture2d_ms<T> srcTexture, uint2 coords)
138 {
139     uint samples = srcTexture.get_num_samples();
140 
141     vec<T, 4> output(0);
142 
143     for (uint sample = 0; sample < samples; ++sample)
144     {
145         output += srcTexture.read(coords, sample);
146     }
147 
148     output = output / samples;
149 
150     return output;
151 }
152 
153 static inline float4 sRGBtoLinear(float4 color)
154 {
155     float3 linear1 = color.rgb / 12.92;
156     float3 linear2 = powr((color.rgb + float3(0.055)) / 1.055, 2.4);
157     float3 factor = float3(color.rgb <= float3(0.04045));
158     float4 linear = float4(factor * linear1 + float3(1.0 - factor) * linear2, color.a);
159 
160     return linear;
161 }
162 
163 static inline float linearToSRGB(float color)
164 {
165     if (color <= 0.0f)
166         return 0.0f;
167     if (color < 0.0031308f)
168         return 12.92f * color;
169     if (color < 1.0f)
170         return 1.055f * powr(color, 0.41666f) - 0.055f;
171     return 1.0f;
172 }
173 
174 static inline float4 linearToSRGB(float4 color)
175 {
176     return float4(linearToSRGB(color.r), linearToSRGB(color.g), linearToSRGB(color.b), color.a);
177 }
178 
179 template <typename Short>
180 static inline Short bytesToShort(constant uchar *input, uint offset)
181 {
182     Short inputLo = input[offset];
183     Short inputHi = input[offset + 1];
184 
185     return inputLo | (inputHi << 8);
186 }
187 
188 template <typename Int>
189 static inline Int bytesToInt(constant uchar *input, uint offset)
190 {
191     Int input0 = input[offset];
192     Int input1 = input[offset + 1];
193     Int input2 = input[offset + 2];
194     Int input3 = input[offset + 3];
195 
196     return input0 | (input1 << 8) | (input2 << 16) | (input3 << 24);
197 }
198 
199 template <typename Short>
200 static inline void shortToBytes(Short val, uint offset, device uchar *output)
201 {
202     ushort valUnsigned = as_type<ushort>(val);
203     output[offset] = valUnsigned & 0xff;
204     output[offset + 1] = (valUnsigned >> 8) & 0xff;
205 }
206 
207 template <typename Int>
208 static inline void intToBytes(Int val, uint offset, device uchar *output)
209 {
210     uint valUnsigned = as_type<uint>(val);
211     output[offset] = valUnsigned & 0xff;
212     output[offset + 1] = (valUnsigned >> 8) & 0xff;
213     output[offset + 2] = (valUnsigned >> 16) & 0xff;
214     output[offset + 3] = (valUnsigned >> 24) & 0xff;
215 }
216 
217 static inline void floatToBytes(float val, uint offset, device uchar *output)
218 {
219     intToBytes(as_type<uint>(val), offset, output);
220 }
221 
222 static inline void int24bitToBytes(uint val, uint offset, device uchar *output)
223 {
224     output[offset] = val & 0xff;
225     output[offset + 1] = (val >> 8) & 0xff;
226     output[offset + 2] = (val >> 16) & 0xff;
227 }
228 
229 template <unsigned int inputBitCount, unsigned int inputBitStart, typename T>
230 static inline T getShiftedData(T input)
231 {
232     static_assert(inputBitCount + inputBitStart <= (sizeof(T) * 8),
233                   "T must have at least as many bits as inputBitCount + inputBitStart.");
234     const T mask = (1 << inputBitCount) - 1;
235     return (input >> inputBitStart) & mask;
236 }
237 
238 template <unsigned int inputBitCount, unsigned int inputBitStart, typename T>
239 static inline T shiftData(T input)
240 {
241     static_assert(inputBitCount + inputBitStart <= (sizeof(T) * 8),
242                   "T must have at least as many bits as inputBitCount + inputBitStart.");
243     const T mask = (1 << inputBitCount) - 1;
244     return (input & mask) << inputBitStart;
245 }
246 
247 template <unsigned int inputBitCount, typename T>
248 static inline float normalizedToFloat(T input)
249 {
250     static_assert(inputBitCount <= (sizeof(T) * 8),
251                   "T must have more bits than or same bits as inputBitCount.");
252     static_assert(inputBitCount <= 23, "Only single precision is supported");
253 
254     constexpr float inverseMax = 1.0f / ((1 << inputBitCount) - 1);
255     return input * inverseMax;
256 }
257 
258 template <typename T>
259 static inline float normalizedToFloat(T input)
260 {
261     return normalizedToFloat<sizeof(T) * 8, T>(input);
262 }
263 
264 template <>
265 inline float normalizedToFloat(short input)
266 {
267     constexpr float inverseMax = 1.0f / 0x7fff;
268     return static_cast<float>(input) * inverseMax;
269 }
270 
271 template <>
272 inline float normalizedToFloat(int input)
273 {
274     constexpr float inverseMax = 1.0f / 0x7fffffff;
275     return static_cast<float>(input) * inverseMax;
276 }
277 
278 template <>
279 inline float normalizedToFloat(uint input)
280 {
281     constexpr float inverseMax = 1.0f / 0xffffffff;
282     return static_cast<float>(input) * inverseMax;
283 }
284 
285 template <unsigned int outputBitCount, typename T>
286 static inline T floatToNormalized(float input)
287 {
288     static_assert(outputBitCount <= (sizeof(T) * 8),
289                   "T must have more bits than or same bits as inputBitCount.");
290     static_assert(outputBitCount <= 23, "Only single precision is supported");
291 
292     return static_cast<T>(((1 << outputBitCount) - 1) * input + 0.5f);
293 }
294 
295 template <typename T>
296 static inline T floatToNormalized(float input)
297 {
298     return floatToNormalized<sizeof(T) * 8, T>(input);
299 }
300 
301 }
302 }
303 # 9 "./blit.metal" 2
304 
305 using namespace rx::mtl_shader;
306 
307 
308 constant bool kPremultiplyAlpha [[function_constant(1)]];
309 constant bool kUnmultiplyAlpha [[function_constant(2)]];
310 constant bool kTransformLinearToSrgb [[function_constant(3)]];
311 constant int kSourceTextureType [[function_constant(4)]];
312 constant int kSourceTexture2Type [[function_constant(5)]];
313 
314 constant bool kSourceTextureType2D = kSourceTextureType == kTextureType2D;
315 constant bool kSourceTextureType2DArray = kSourceTextureType == kTextureType2DArray;
316 constant bool kSourceTextureType2DMS = kSourceTextureType == kTextureType2DMultisample;
317 constant bool kSourceTextureTypeCube = kSourceTextureType == kTextureTypeCube;
318 constant bool kSourceTextureType3D = kSourceTextureType == kTextureType3D;
319 
320 constant bool kSourceTexture2Type2D = kSourceTexture2Type == kTextureType2D;
321 constant bool kSourceTexture2Type2DArray = kSourceTexture2Type == kTextureType2DArray;
322 constant bool kSourceTexture2Type2DMS = kSourceTexture2Type == kTextureType2DMultisample;
323 constant bool kSourceTexture2TypeCube = kSourceTexture2Type == kTextureTypeCube;
324 
325 struct BlitParams
326 {
327 
328     float4 srcTexCoords;
329     int srcLevel;
330     int srcLayer;
331     bool dstLuminance;
332     uint8_t padding[7];
333 };
334 
335 struct BlitVSOut
336 {
337     float4 position [[position]];
338     float2 texCoords [[center_no_perspective, user(locn1)]];
339 };
340 
341 vertex BlitVSOut blitVS(unsigned int vid [[vertex_id]], constant BlitParams &options [[buffer(0)]])
342 {
343     BlitVSOut output;
344     output.position.xy = select(float2(-1.0f), float2(1.0f), bool2(vid & uint2(2, 1)));
345     output.position.zw = float2(0.0, 1.0);
346     output.texCoords = select(options.srcTexCoords.xy, options.srcTexCoords.zw, bool2(vid & uint2(2, 1)));
347 
348     return output;
349 }
350 
351 template <typename SrcTexture2d>
352 static uint2 getImageCoords(SrcTexture2d srcTexture, float2 texCoords)
353 {
354     uint2 dimens(srcTexture.get_width(), srcTexture.get_height());
355     uint2 coords = uint2(texCoords * float2(dimens));
356 
357     return coords;
358 }
359 
360 template <typename T>
361 static inline vec<T, 4> blitSampleTextureMS(texture2d_ms<T> srcTexture, float2 texCoords)
362 {
363     uint2 coords = getImageCoords(srcTexture, texCoords);
364     return resolveTextureMS(srcTexture, coords);
365 }
366 
367 template <typename T>
368 static inline vec<T, 4> blitSampleTexture3D(texture3d<T> srcTexture,
369                                             sampler textureSampler,
370                                             float2 texCoords,
371                                             constant BlitParams &options)
372 {
373     uint depth = srcTexture.get_depth(options.srcLevel);
374     float zCoord = (float(options.srcLayer) + 0.5) / float(depth);
375 
376     return srcTexture.sample(textureSampler, float3(texCoords, zCoord), level(options.srcLevel));
377 }
378 # 101 "./blit.metal"
379 template <typename T>
380 static inline vec<T, 4> blitReadTexture(BlitVSOut input [[stage_in]], texture2d<T> srcTexture2d [[texture(0), function_constant(kSourceTextureType2D)]], texture2d_array<T> srcTexture2dArray [[texture(0), function_constant(kSourceTextureType2DArray)]], texture2d_ms<T> srcTexture2dMS [[texture(0), function_constant(kSourceTextureType2DMS)]], texturecube<T> srcTextureCube [[texture(0), function_constant(kSourceTextureTypeCube)]], texture3d<T> srcTexture3d [[texture(0), function_constant(kSourceTextureType3D)]], sampler textureSampler [[sampler(0)]], constant BlitParams &options [[buffer(0)]])
381 {
382     vec<T, 4> output;
383 
384     switch (kSourceTextureType)
385     {
386         case kTextureType2D:
387             output = srcTexture2d.sample(textureSampler, input.texCoords, level(options.srcLevel));
388             break;
389         case kTextureType2DArray:
390             output = srcTexture2dArray.sample(textureSampler, input.texCoords, options.srcLayer,
391                                               level(options.srcLevel));
392             break;
393         case kTextureType2DMultisample:
394             output = blitSampleTextureMS(srcTexture2dMS, input.texCoords);
395             break;
396         case kTextureTypeCube:
397             output = srcTextureCube.sample(textureSampler,
398                                            cubeTexcoords(input.texCoords, options.srcLayer),
399                                            level(options.srcLevel));
400             break;
401         case kTextureType3D:
402             output = blitSampleTexture3D(srcTexture3d, textureSampler, input.texCoords, options);
403             break;
404     }
405 
406     if (kTransformLinearToSrgb) {
407         output.x = linearToSRGB(output.x);
408         output.y = linearToSRGB(output.y);
409         output.z = linearToSRGB(output.z);
410     }
411     if (kUnmultiplyAlpha)
412     {
413         if (output.a != 0.0)
414         {
415             output.xyz /= output.a;
416         }
417     }
418     if (kPremultiplyAlpha)
419     {
420         output.xyz *= output.a;
421     }
422 
423     if (options.dstLuminance)
424     {
425         output.g = output.b = output.r;
426     }
427 
428     return output;
429 }
430 
431 template <typename T>
432 static inline MultipleColorOutputs<T> blitFS(BlitVSOut input [[stage_in]], texture2d<T> srcTexture2d [[texture(0), function_constant(kSourceTextureType2D)]], texture2d_array<T> srcTexture2dArray [[texture(0), function_constant(kSourceTextureType2DArray)]], texture2d_ms<T> srcTexture2dMS [[texture(0), function_constant(kSourceTextureType2DMS)]], texturecube<T> srcTextureCube [[texture(0), function_constant(kSourceTextureTypeCube)]], texture3d<T> srcTexture3d [[texture(0), function_constant(kSourceTextureType3D)]], sampler textureSampler [[sampler(0)]], constant BlitParams &options [[buffer(0)]])
433 {
434     vec<T, 4> output = blitReadTexture(input, srcTexture2d, srcTexture2dArray, srcTexture2dMS, srcTextureCube, srcTexture3d, textureSampler, options);
435 
436     return toMultipleColorOutputs(output);
437 }
438 
439 fragment MultipleColorOutputs<float> blitFloatFS(BlitVSOut input [[stage_in]], texture2d<float> srcTexture2d [[texture(0), function_constant(kSourceTextureType2D)]], texture2d_array<float> srcTexture2dArray [[texture(0), function_constant(kSourceTextureType2DArray)]], texture2d_ms<float> srcTexture2dMS [[texture(0), function_constant(kSourceTextureType2DMS)]], texturecube<float> srcTextureCube [[texture(0), function_constant(kSourceTextureTypeCube)]], texture3d<float> srcTexture3d [[texture(0), function_constant(kSourceTextureType3D)]], sampler textureSampler [[sampler(0)]], constant BlitParams &options [[buffer(0)]])
440 {
441     return blitFS(input, srcTexture2d, srcTexture2dArray, srcTexture2dMS, srcTextureCube, srcTexture3d, textureSampler, options);
442 }
443 fragment MultipleColorOutputs<int> blitIntFS(BlitVSOut input [[stage_in]], texture2d<int> srcTexture2d [[texture(0), function_constant(kSourceTextureType2D)]], texture2d_array<int> srcTexture2dArray [[texture(0), function_constant(kSourceTextureType2DArray)]], texture2d_ms<int> srcTexture2dMS [[texture(0), function_constant(kSourceTextureType2DMS)]], texturecube<int> srcTextureCube [[texture(0), function_constant(kSourceTextureTypeCube)]], texture3d<int> srcTexture3d [[texture(0), function_constant(kSourceTextureType3D)]], sampler textureSampler [[sampler(0)]], constant BlitParams &options [[buffer(0)]])
444 {
445     return blitFS(input, srcTexture2d, srcTexture2dArray, srcTexture2dMS, srcTextureCube, srcTexture3d, textureSampler, options);
446 }
447 fragment MultipleColorOutputs<uint> blitUIntFS(BlitVSOut input [[stage_in]], texture2d<uint> srcTexture2d [[texture(0), function_constant(kSourceTextureType2D)]], texture2d_array<uint> srcTexture2dArray [[texture(0), function_constant(kSourceTextureType2DArray)]], texture2d_ms<uint> srcTexture2dMS [[texture(0), function_constant(kSourceTextureType2DMS)]], texturecube<uint> srcTextureCube [[texture(0), function_constant(kSourceTextureTypeCube)]], texture3d<uint> srcTexture3d [[texture(0), function_constant(kSourceTextureType3D)]], sampler textureSampler [[sampler(0)]], constant BlitParams &options [[buffer(0)]])
448 {
449     return blitFS(input, srcTexture2d, srcTexture2dArray, srcTexture2dMS, srcTextureCube, srcTexture3d, textureSampler, options);
450 }
451 
452 fragment MultipleColorOutputs<uint> copyTextureFloatToUIntFS(BlitVSOut input [[stage_in]], texture2d<float> srcTexture2d [[texture(0), function_constant(kSourceTextureType2D)]], texture2d_array<float> srcTexture2dArray [[texture(0), function_constant(kSourceTextureType2DArray)]], texture2d_ms<float> srcTexture2dMS [[texture(0), function_constant(kSourceTextureType2DMS)]], texturecube<float> srcTextureCube [[texture(0), function_constant(kSourceTextureTypeCube)]], texture3d<float> srcTexture3d [[texture(0), function_constant(kSourceTextureType3D)]], sampler textureSampler [[sampler(0)]], constant BlitParams &options [[buffer(0)]])
453 {
454     float4 inputColor = blitReadTexture<>(input, srcTexture2d, srcTexture2dArray, srcTexture2dMS, srcTextureCube, srcTexture3d, textureSampler, options);
455     uint4 output = uint4(inputColor * float4(255.0));
456 
457     return toMultipleColorOutputs(output);
458 }
459 
460 
461 struct FragmentDepthOut
462 {
463     float depth [[depth(any)]];
464 };
465 
466 static inline float sampleDepth(
467     texture2d<float> srcTexture2d [[function_constant(kSourceTextureType2D)]],
468     texture2d_array<float> srcTexture2dArray [[function_constant(kSourceTextureType2DArray)]],
469     texture2d_ms<float> srcTexture2dMS [[function_constant(kSourceTextureType2DMS)]],
470     texturecube<float> srcTextureCube [[function_constant(kSourceTextureTypeCube)]],
471     float2 texCoords,
472     constant BlitParams &options)
473 {
474     float4 output;
475 
476     constexpr sampler textureSampler(mag_filter::nearest, min_filter::nearest);
477 
478     switch (kSourceTextureType)
479     {
480         case kTextureType2D:
481             output = srcTexture2d.sample(textureSampler, texCoords, level(options.srcLevel));
482             break;
483         case kTextureType2DArray:
484             output = srcTexture2dArray.sample(textureSampler, texCoords, options.srcLayer,
485                                               level(options.srcLevel));
486             break;
487         case kTextureType2DMultisample:
488 
489             output = srcTexture2dMS.read(getImageCoords(srcTexture2dMS, texCoords), 0);
490             break;
491         case kTextureTypeCube:
492             output =
493                 srcTextureCube.sample(textureSampler, cubeTexcoords(texCoords, options.srcLayer),
494                                       level(options.srcLevel));
495             break;
496     }
497 
498     return output.r;
499 }
500 
501 fragment FragmentDepthOut blitDepthFS(BlitVSOut input [[stage_in]],
502                                       texture2d<float> srcTexture2d
503                                       [[texture(0), function_constant(kSourceTextureType2D)]],
504                                       texture2d_array<float> srcTexture2dArray
505                                       [[texture(0), function_constant(kSourceTextureType2DArray)]],
506                                       texture2d_ms<float> srcTexture2dMS
507                                       [[texture(0), function_constant(kSourceTextureType2DMS)]],
508                                       texturecube<float> srcTextureCube
509                                       [[texture(0), function_constant(kSourceTextureTypeCube)]],
510                                       constant BlitParams &options [[buffer(0)]])
511 {
512     FragmentDepthOut re;
513 
514     re.depth = sampleDepth(srcTexture2d, srcTexture2dArray, srcTexture2dMS, srcTextureCube,
515                            input.texCoords, options);
516 
517     return re;
518 }
519 
520 static inline uint32_t sampleStencil(
521     texture2d<uint32_t> srcTexture2d [[function_constant(kSourceTexture2Type2D)]],
522     texture2d_array<uint32_t> srcTexture2dArray [[function_constant(kSourceTexture2Type2DArray)]],
523     texture2d_ms<uint32_t> srcTexture2dMS [[function_constant(kSourceTexture2Type2DMS)]],
524     texturecube<uint32_t> srcTextureCube [[function_constant(kSourceTexture2TypeCube)]],
525     float2 texCoords,
526     int srcLevel,
527     int srcLayer)
528 {
529     uint4 output;
530     constexpr sampler textureSampler(mag_filter::nearest, min_filter::nearest);
531 
532     switch (kSourceTexture2Type)
533     {
534         case kTextureType2D:
535             output = srcTexture2d.sample(textureSampler, texCoords, level(srcLevel));
536             break;
537         case kTextureType2DArray:
538             output = srcTexture2dArray.sample(textureSampler, texCoords, srcLayer, level(srcLevel));
539             break;
540         case kTextureType2DMultisample:
541 
542             output = srcTexture2dMS.read(getImageCoords(srcTexture2dMS, texCoords), 0);
543             break;
544         case kTextureTypeCube:
545             output = srcTextureCube.sample(textureSampler, cubeTexcoords(texCoords, srcLayer),
546                                            level(srcLevel));
547             break;
548     }
549 
550     return output.r;
551 }
552 
553 
554 struct BlitStencilToBufferParams
555 {
556     float2 srcStartTexCoords;
557     float2 srcTexCoordSteps;
558     int srcLevel;
559     int srcLayer;
560 
561     uint2 dstSize;
562     uint dstBufferRowPitch;
563 
564     bool resolveMS;
565 };
566 
567 kernel void blitStencilToBufferCS(ushort2 gIndices [[thread_position_in_grid]],
568                                   texture2d<uint32_t> srcTexture2d
569                                   [[texture(1), function_constant(kSourceTexture2Type2D)]],
570                                   texture2d_array<uint32_t> srcTexture2dArray
571                                   [[texture(1), function_constant(kSourceTexture2Type2DArray)]],
572                                   texture2d_ms<uint32_t> srcTexture2dMS
573                                   [[texture(1), function_constant(kSourceTexture2Type2DMS)]],
574                                   texturecube<uint32_t> srcTextureCube
575                                   [[texture(1), function_constant(kSourceTexture2TypeCube)]],
576                                   constant BlitStencilToBufferParams &options [[buffer(0)]],
577                                   device uchar *buffer [[buffer(1)]])
578 {
579     if (gIndices.x >= options.dstSize.x || gIndices.y >= options.dstSize.y)
580     {
581         return;
582     }
583 
584     float2 srcTexCoords = options.srcStartTexCoords + float2(gIndices) * options.srcTexCoordSteps;
585 
586     if (kSourceTexture2Type == kTextureType2DMultisample && !options.resolveMS)
587     {
588         uint samples = srcTexture2dMS.get_num_samples();
589         uint2 imageCoords = getImageCoords(srcTexture2dMS, srcTexCoords);
590         uint bufferOffset = options.dstBufferRowPitch * gIndices.y + samples * gIndices.x;
591 
592         for (uint sample = 0; sample < samples; ++sample)
593         {
594             uint stencilPerSample = srcTexture2dMS.read(imageCoords, sample).r;
595             buffer[bufferOffset + sample] = static_cast<uchar>(stencilPerSample);
596         }
597     }
598     else
599     {
600         uint32_t stencil =
601             sampleStencil(srcTexture2d, srcTexture2dArray, srcTexture2dMS, srcTextureCube,
602                           srcTexCoords, options.srcLevel, options.srcLayer);
603 
604         buffer[options.dstBufferRowPitch * gIndices.y + gIndices.x] = static_cast<uchar>(stencil);
605     }
606 }
607 
608 
609 #if __METAL_VERSION__ >= 210
610 
611 struct FragmentStencilOut
612 {
613     uint32_t stencil [[stencil]];
614 };
615 
616 struct FragmentDepthStencilOut
617 {
618     float depth [[depth(any)]];
619     uint32_t stencil [[stencil]];
620 };
621 
622 fragment FragmentStencilOut blitStencilFS(
623     BlitVSOut input [[stage_in]],
624     texture2d<uint32_t> srcTexture2d [[texture(1), function_constant(kSourceTexture2Type2D)]],
625     texture2d_array<uint32_t> srcTexture2dArray
626     [[texture(1), function_constant(kSourceTexture2Type2DArray)]],
627     texture2d_ms<uint32_t> srcTexture2dMS
628     [[texture(1), function_constant(kSourceTexture2Type2DMS)]],
629     texturecube<uint32_t> srcTextureCube [[texture(1), function_constant(kSourceTexture2TypeCube)]],
630     constant BlitParams &options [[buffer(0)]])
631 {
632     FragmentStencilOut re;
633 
634     re.stencil = sampleStencil(srcTexture2d, srcTexture2dArray, srcTexture2dMS, srcTextureCube,
635                                input.texCoords, options.srcLevel, options.srcLayer);
636 
637     return re;
638 }
639 
640 fragment FragmentDepthStencilOut blitDepthStencilFS(
641     BlitVSOut input [[stage_in]],
642 
643     texture2d<float> srcDepthTexture2d [[texture(0), function_constant(kSourceTextureType2D)]],
644     texture2d_array<float> srcDepthTexture2dArray
645     [[texture(0), function_constant(kSourceTextureType2DArray)]],
646     texture2d_ms<float> srcDepthTexture2dMS
647     [[texture(0), function_constant(kSourceTextureType2DMS)]],
648     texturecube<float> srcDepthTextureCube
649     [[texture(0), function_constant(kSourceTextureTypeCube)]],
650 
651 
652     texture2d<uint32_t> srcStencilTexture2d
653     [[texture(1), function_constant(kSourceTexture2Type2D)]],
654     texture2d_array<uint32_t> srcStencilTexture2dArray
655     [[texture(1), function_constant(kSourceTexture2Type2DArray)]],
656     texture2d_ms<uint32_t> srcStencilTexture2dMS
657     [[texture(1), function_constant(kSourceTexture2Type2DMS)]],
658     texturecube<uint32_t> srcStencilTextureCube
659     [[texture(1), function_constant(kSourceTexture2TypeCube)]],
660 
661     constant BlitParams &options [[buffer(0)]])
662 {
663     FragmentDepthStencilOut re;
664 
665     re.depth = sampleDepth(srcDepthTexture2d, srcDepthTexture2dArray, srcDepthTexture2dMS,
666                            srcDepthTextureCube, input.texCoords, options);
667     re.stencil =
668         sampleStencil(srcStencilTexture2d, srcStencilTexture2dArray, srcStencilTexture2dMS,
669                       srcStencilTextureCube, input.texCoords, options.srcLevel, options.srcLayer);
670     return re;
671 }
672 #endif
673 # 2 "temp_master_source.metal" 2
674 # 1 "./clear.metal" 1
675 # 10 "./clear.metal"
676 using namespace rx::mtl_shader;
677 
678 struct ClearParams
679 {
680     float4 clearColor;
681     float clearDepth;
682 };
683 
684 vertex float4 clearVS(unsigned int vid [[ vertex_id ]],
685                       constant ClearParams &clearParams [[buffer(0)]])
686 {
687     return float4(gCorners[vid], clearParams.clearDepth, 1.0);
688 }
689 
690 fragment MultipleColorOutputs<float> clearFloatFS(constant ClearParams &clearParams [[buffer(0)]])
691 {
692     return toMultipleColorOutputs(clearParams.clearColor);
693 }
694 
695 fragment MultipleColorOutputs<int> clearIntFS(constant ClearParams &clearParams [[buffer(0)]])
696 {
697     return toMultipleColorOutputs(as_type<int4>(clearParams.clearColor));
698 }
699 
700 fragment MultipleColorOutputs<uint> clearUIntFS(constant ClearParams &clearParams [[buffer(0)]])
701 {
702     return toMultipleColorOutputs(as_type<uint4>(clearParams.clearColor));
703 }
704 # 3 "temp_master_source.metal" 2
705 # 1 "./gen_indices.metal" 1
706 
707 
708 
709 
710 
711 
712 
713 
714 using namespace rx::mtl_shader;
715 
716 
717 constant bool kSourceBufferAligned[[function_constant(100)]];
718 constant bool kSourceIndexIsU8[[function_constant(200)]];
719 constant bool kSourceIndexIsU16[[function_constant(300)]];
720 constant bool kSourceIndexIsU32[[function_constant(400)]];
721 constant bool kSourceBufferUnaligned = !kSourceBufferAligned;
722 constant bool kUseSourceBufferU8 = kSourceIndexIsU8 || kSourceBufferUnaligned;
723 constant bool kUseSourceBufferU16 = kSourceIndexIsU16 && kSourceBufferAligned;
724 constant bool kUseSourceBufferU32 = kSourceIndexIsU32 && kSourceBufferAligned;
725 
726 struct IndexConversionParams
727 {
728     uint32_t srcOffset;
729     uint32_t indexCount;
730     bool primitiveRestartEnabled;
731 };
732 
733 
734 
735 inline ushort getIndexAligned(constant ushort *inputAligned, uint offset, uint idx)
736 {
737     return inputAligned[offset / 2 + idx];
738 }
739 inline uint getIndexAligned(constant uint *inputAligned, uint offset, uint idx)
740 {
741     return inputAligned[offset / 4 + idx];
742 }
743 inline uchar getIndexAligned(constant uchar *input, uint offset, uint idx)
744 {
745     return input[offset + idx];
746 }
747 inline ushort getIndexUnalignedU16(constant uchar *input, uint offset, uint idx)
748 {
749     ushort inputLo = input[offset + 2 * idx];
750     ushort inputHi = input[offset + 2 * idx + 1];
751 
752     return inputLo | (inputHi << 8);
753 }
754 inline uint getIndexUnalignedU32(constant uchar *input, uint offset, uint idx)
755 {
756     uint input0 = input[offset + 4 * idx];
757     uint input1 = input[offset + 4 * idx + 1];
758     uint input2 = input[offset + 4 * idx + 2];
759     uint input3 = input[offset + 4 * idx + 3];
760 
761     return input0 | (input1 << 8) | (input2 << 16) | (input3 << 24);
762 }
763 
764 kernel void convertIndexU8ToU16(uint idx [[thread_position_in_grid]],
765                                 constant IndexConversionParams &options [[buffer(0)]],
766                                 constant uchar *input [[buffer(1)]],
767                                 device ushort *output [[buffer(2)]])
768 {
769     if (idx >= options.indexCount) { return; };
770 
771     uchar value = getIndexAligned(input, options.srcOffset, idx);
772 
773     if (options.primitiveRestartEnabled && value == 0xff)
774     {
775         output[idx] = 0xffff;
776     }
777     else
778     {
779         output[idx] = value;
780     }
781 }
782 
783 kernel void convertIndexU16(uint idx [[thread_position_in_grid]],
784                             constant IndexConversionParams &options [[buffer(0)]],
785                             constant uchar *input
786                             [[buffer(1), function_constant(kSourceBufferUnaligned)]],
787                             constant ushort *inputAligned
788                             [[buffer(1), function_constant(kSourceBufferAligned)]],
789                             device ushort *output [[buffer(2)]])
790 {
791     if (idx >= options.indexCount) { return; };
792 
793     ushort value;
794     if (kSourceBufferAligned)
795     {
796         value = getIndexAligned(inputAligned, options.srcOffset, idx);
797     }
798     else
799     {
800         value = getIndexUnalignedU16(input, options.srcOffset, idx);
801     }
802     output[idx] = value;
803 }
804 
805 kernel void convertIndexU32(uint idx [[thread_position_in_grid]],
806                             constant IndexConversionParams &options [[buffer(0)]],
807                             constant uchar *input
808                             [[buffer(1), function_constant(kSourceBufferUnaligned)]],
809                             constant uint *inputAligned
810                             [[buffer(1), function_constant(kSourceBufferAligned)]],
811                             device uint *output [[buffer(2)]])
812 {
813     if (idx >= options.indexCount) { return; };
814 
815     uint value;
816     if (kSourceBufferAligned)
817     {
818         value = getIndexAligned(inputAligned, options.srcOffset, idx);
819     }
820     else
821     {
822         value = getIndexUnalignedU32(input, options.srcOffset, idx);
823     }
824     output[idx] = value;
825 }
826 
827 struct IndexFromArrayParams
828 {
829     uint firstVertex;
830 
831     uint vertexCount;
832 };
833 
834 
835 kernel void genTriFanIndicesFromArray(uint idx [[thread_position_in_grid]],
836                                       constant IndexFromArrayParams &options [[buffer(0)]],
837                                       device uint *output [[buffer(2)]])
838 {
839     if (idx >= options.vertexCount) { return; };
840 
841     uint vertexIdx = options.firstVertex + 2 + idx;
842 
843 
844 
845     output[3 * idx ] = vertexIdx - 1;
846     output[3 * idx + 1] = vertexIdx;
847     output[3 * idx + 2] = options.firstVertex;
848 }
849 
850 inline uint getIndexU32(uint offset,
851                         uint idx,
852                         constant uchar *inputU8 [[function_constant(kUseSourceBufferU8)]],
853                         constant ushort *inputU16 [[function_constant(kUseSourceBufferU16)]],
854                         constant uint *inputU32 [[function_constant(kUseSourceBufferU32)]])
855 {
856     if (kUseSourceBufferU8)
857     {
858         if (kSourceIndexIsU16)
859         {
860             return getIndexUnalignedU16(inputU8, offset, idx);
861         }
862         else if (kSourceIndexIsU32)
863         {
864             return getIndexUnalignedU32(inputU8, offset, idx);
865         }
866         return getIndexAligned(inputU8, offset, idx);
867     }
868     else if (kUseSourceBufferU16)
869     {
870         return getIndexAligned(inputU16, offset, idx);
871     }
872     else if (kUseSourceBufferU32)
873     {
874         return getIndexAligned(inputU32, offset, idx);
875     }
876     return 0;
877 }
878 
879 
880 
881 
882 kernel void genTriFanIndicesFromElements(uint idx [[thread_position_in_grid]],
883                                          constant IndexConversionParams &options [[buffer(0)]],
884                                          constant uchar *inputU8
885                                          [[buffer(1), function_constant(kUseSourceBufferU8)]],
886                                          constant ushort *inputU16
887                                          [[buffer(1), function_constant(kUseSourceBufferU16)]],
888                                          constant uint *inputU32
889                                          [[buffer(1), function_constant(kUseSourceBufferU32)]],
890                                          device uint *output [[buffer(2)]])
891 {
892     if (idx >= options.indexCount) { return; };
893 
894     uint elemIdx = 2 + idx;
895 
896     output[3 * idx] = getIndexU32(options.srcOffset, 0, inputU8, inputU16, inputU32);
897     output[3 * idx + 1] = getIndexU32(options.srcOffset, elemIdx - 1, inputU8, inputU16, inputU32);
898     output[3 * idx + 2] = getIndexU32(options.srcOffset, elemIdx, inputU8, inputU16, inputU32);
899 }
900 
901 
902 kernel void genLineLoopIndicesFromArray(uint idx [[thread_position_in_grid]],
903                                         constant IndexFromArrayParams &options [[buffer(0)]],
904                                         device uint *output [[buffer(2)]])
905 {
906     uint totalIndices = options.vertexCount + 1;
907     if (idx >= totalIndices) { return; };
908 
909     output[idx] = options.firstVertex + idx % options.vertexCount;
910 }
911 
912 
913 
914 kernel void genLineLoopIndicesFromElements(uint idx [[thread_position_in_grid]],
915                                            constant IndexConversionParams &options [[buffer(0)]],
916                                            constant uchar *inputU8
917                                            [[buffer(1), function_constant(kUseSourceBufferU8)]],
918                                            constant ushort *inputU16
919                                            [[buffer(1), function_constant(kUseSourceBufferU16)]],
920                                            constant uint *inputU32
921                                            [[buffer(1), function_constant(kUseSourceBufferU32)]],
922                                            device uint *output [[buffer(2)]])
923 {
924     uint totalTargetIndices = options.indexCount + 1;
925     if (idx >= totalTargetIndices) { return; };
926 
927     output[idx] =
928         getIndexU32(options.srcOffset, idx % options.indexCount, inputU8, inputU16, inputU32);
929 }
930 # 4 "temp_master_source.metal" 2
931 # 1 "./gen_mipmap.metal" 1
932 
933 
934 
935 
936 
937 
938 
939 
940 using namespace rx::mtl_shader;
941 # 31 "./gen_mipmap.metal"
942 struct GenMipParams
943 {
944     uint srcLevel;
945     uint numMipLevelsToGen;
946     bool sRGB;
947 };
948 
949 
950 
951 kernel void generate3DMipmaps(uint lIndex [[thread_index_in_threadgroup]],
952                               ushort3 gIndices [[thread_position_in_grid]],
953                               texture3d<float> srcTexture [[texture(0)]],
954                               texture3d<float, access::write> dstMip1 [[texture(1)]],
955                               texture3d<float, access::write> dstMip2 [[texture(2)]],
956                               texture3d<float, access::write> dstMip3 [[texture(3)]],
957                               texture3d<float, access::write> dstMip4 [[texture(4)]],
958                               constant GenMipParams &options [[buffer(0)]])
959 {
960     ushort3 mipSize = ushort3(dstMip1.get_width(), dstMip1.get_height(), dstMip1.get_depth());
961     bool validThread = gIndices.x < mipSize.x && gIndices.y < mipSize.y && gIndices.z < mipSize.z;
962 
963     constexpr sampler textureSampler(mag_filter::linear, min_filter::linear, mip_filter::linear);
964 
965 
966 
967 
968     threadgroup float sR[(8 * 8 * 8)];
969     threadgroup float sG[(8 * 8 * 8)];
970     threadgroup float sB[(8 * 8 * 8)];
971     threadgroup float sA[(8 * 8 * 8)];
972 
973 
974     float4 texel1;
975     if (validThread)
976     {
977         float3 texCoords = (float3(gIndices) + float3(0.5, 0.5, 0.5)) / float3(mipSize);
978         texel1 = srcTexture.sample(textureSampler, texCoords, level(options.srcLevel));
979 
980 
981         dstMip1.write(texel1, gIndices);
982     }
983     else
984     {
985 
986         lIndex = 0xffffffff;
987     }
988 
989     if (options.numMipLevelsToGen == 1)
990     {
991         return;
992     }
993 
994 
995 
996 
997     if (options.sRGB)
998     {
999         texel1 = linearToSRGB(texel1);
1000     }
1001     sR[lIndex] = texel1.r; sG[lIndex] = texel1.g; sB[lIndex] = texel1.b; sA[lIndex] = texel1.a;;
1002 
1003     threadgroup_barrier(mem_flags::mem_threadgroup);
1004 
1005 
1006     if ((lIndex & 0x49) == 0)
1007     {
1008         bool3 atEdge = gIndices == (mipSize - ushort3(1));
1009 
1010 
1011 
1012         float4 texel2 = (atEdge.x) ? (texel1) : (float4(sR[lIndex + 1], sG[lIndex + 1], sB[lIndex + 1], sA[lIndex + 1]));
1013 
1014         float4 texel3 = (atEdge.y) ? (texel1) : (float4(sR[lIndex + 8], sG[lIndex + 8], sB[lIndex + 8], sA[lIndex + 8]));
1015 
1016         float4 texel4 = (atEdge.z) ? (texel1) : (float4(sR[lIndex + (8 * 8)], sG[lIndex + (8 * 8)], sB[lIndex + (8 * 8)], sA[lIndex + (8 * 8)]));
1017 
1018         float4 texel5 = (atEdge.x | atEdge.y) ? (texel2) : (float4(sR[lIndex + (8 + 1)], sG[lIndex + (8 + 1)], sB[lIndex + (8 + 1)], sA[lIndex + (8 + 1)]));
1019 
1020 
1021         float4 texel6 = (atEdge.x | atEdge.z) ? (texel2) : (float4(sR[lIndex + ((8 * 8) + 1)], sG[lIndex + ((8 * 8) + 1)], sB[lIndex + ((8 * 8) + 1)], sA[lIndex + ((8 * 8) + 1)]));
1022 
1023 
1024         float4 texel7 = (atEdge.y | atEdge.z) ? (texel3) : (float4(sR[lIndex + ((8 * 8) + 8)], sG[lIndex + ((8 * 8) + 8)], sB[lIndex + ((8 * 8) + 8)], sA[lIndex + ((8 * 8) + 8)]));
1025 
1026 
1027         float4 texel8 =
1028             (atEdge.x | atEdge.y | atEdge.z) ? (texel5) : (float4(sR[lIndex + ((8 * 8) + 8 + 1)], sG[lIndex + ((8 * 8) + 8 + 1)], sB[lIndex + ((8 * 8) + 8 + 1)], sA[lIndex + ((8 * 8) + 8 + 1)]));
1029 
1030 
1031         texel1 = (texel1 + texel2 + texel3 + texel4 + texel5 + texel6 + texel7 + texel8) / 8.0;
1032 
1033         dstMip2.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices >> 1);
1034 
1035 
1036         sR[lIndex] = texel1.r; sG[lIndex] = texel1.g; sB[lIndex] = texel1.b; sA[lIndex] = texel1.a;;
1037     }
1038 
1039     if (options.numMipLevelsToGen == 2)
1040     {
1041         return;
1042     }
1043 
1044 
1045     threadgroup_barrier(mem_flags::mem_threadgroup);
1046 
1047 
1048     if ((lIndex & 0xdb) == 0)
1049     {
1050         mipSize = max(mipSize >> 1, ushort3(1));
1051         bool3 atEdge = (gIndices >> 1) == (mipSize - ushort3(1));
1052 
1053 
1054 
1055         float4 texel2 = (atEdge.x) ? (texel1) : (float4(sR[lIndex + 2], sG[lIndex + 2], sB[lIndex + 2], sA[lIndex + 2]));
1056 
1057         float4 texel3 =
1058             (atEdge.y) ? (texel1) : (float4(sR[lIndex + (2 * 8)], sG[lIndex + (2 * 8)], sB[lIndex + (2 * 8)], sA[lIndex + (2 * 8)]));
1059 
1060         float4 texel4 =
1061             (atEdge.z) ? (texel1) : (float4(sR[lIndex + (2 * (8 * 8))], sG[lIndex + (2 * (8 * 8))], sB[lIndex + (2 * (8 * 8))], sA[lIndex + (2 * (8 * 8))]));
1062 
1063         float4 texel5 = (atEdge.x | atEdge.y) ? (texel2) : (float4(sR[lIndex + (2 * 8 + 2)], sG[lIndex + (2 * 8 + 2)], sB[lIndex + (2 * 8 + 2)], sA[lIndex + (2 * 8 + 2)]));
1064 
1065 
1066         float4 texel6 = (atEdge.x | atEdge.z) ? (texel2) : (float4(sR[lIndex + (2 * (8 * 8) + 2)], sG[lIndex + (2 * (8 * 8) + 2)], sB[lIndex + (2 * (8 * 8) + 2)], sA[lIndex + (2 * (8 * 8) + 2)]));
1067 
1068 
1069         float4 texel7 = (atEdge.y | atEdge.z) ? (texel3) : (float4(sR[lIndex + (2 * (8 * 8) + 2 * 8)], sG[lIndex + (2 * (8 * 8) + 2 * 8)], sB[lIndex + (2 * (8 * 8) + 2 * 8)], sA[lIndex + (2 * (8 * 8) + 2 * 8)]));
1070 
1071 
1072 
1073         float4 texel8 = (atEdge.x | atEdge.y | atEdge.z) ? (texel5) : (float4(sR[lIndex + (2 * (8 * 8) + 2 * 8 + 2)], sG[lIndex + (2 * (8 * 8) + 2 * 8 + 2)], sB[lIndex + (2 * (8 * 8) + 2 * 8 + 2)], sA[lIndex + (2 * (8 * 8) + 2 * 8 + 2)]));
1074 
1075 
1076 
1077         texel1 = (texel1 + texel2 + texel3 + texel4 + texel5 + texel6 + texel7 + texel8) / 8.0;
1078 
1079         dstMip3.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices >> 2);
1080 
1081 
1082         sR[lIndex] = texel1.r; sG[lIndex] = texel1.g; sB[lIndex] = texel1.b; sA[lIndex] = texel1.a;;
1083     }
1084 
1085     if (options.numMipLevelsToGen == 3)
1086     {
1087         return;
1088     }
1089 
1090 
1091     threadgroup_barrier(mem_flags::mem_threadgroup);
1092 
1093 
1094     if ((lIndex & 0x1ff) == 0)
1095     {
1096         mipSize = max(mipSize >> 1, ushort3(1));
1097         bool3 atEdge = (gIndices >> 2) == (mipSize - ushort3(1));
1098 
1099 
1100 
1101         float4 texel2 = (atEdge.x) ? (texel1) : (float4(sR[lIndex + 4], sG[lIndex + 4], sB[lIndex + 4], sA[lIndex + 4]));
1102 
1103         float4 texel3 =
1104             (atEdge.y) ? (texel1) : (float4(sR[lIndex + (4 * 8)], sG[lIndex + (4 * 8)], sB[lIndex + (4 * 8)], sA[lIndex + (4 * 8)]));
1105 
1106         float4 texel4 =
1107             (atEdge.z) ? (texel1) : (float4(sR[lIndex + (4 * (8 * 8))], sG[lIndex + (4 * (8 * 8))], sB[lIndex + (4 * (8 * 8))], sA[lIndex + (4 * (8 * 8))]));
1108 
1109         float4 texel5 = (atEdge.x | atEdge.y) ? (texel2) : (float4(sR[lIndex + (4 * 8 + 4)], sG[lIndex + (4 * 8 + 4)], sB[lIndex + (4 * 8 + 4)], sA[lIndex + (4 * 8 + 4)]));
1110 
1111 
1112         float4 texel6 = (atEdge.x | atEdge.z) ? (texel2) : (float4(sR[lIndex + (4 * (8 * 8) + 4)], sG[lIndex + (4 * (8 * 8) + 4)], sB[lIndex + (4 * (8 * 8) + 4)], sA[lIndex + (4 * (8 * 8) + 4)]));
1113 
1114 
1115         float4 texel7 = (atEdge.y | atEdge.z) ? (texel3) : (float4(sR[lIndex + (4 * (8 * 8) + 4 * 8)], sG[lIndex + (4 * (8 * 8) + 4 * 8)], sB[lIndex + (4 * (8 * 8) + 4 * 8)], sA[lIndex + (4 * (8 * 8) + 4 * 8)]));
1116 
1117 
1118 
1119         float4 texel8 = (atEdge.x | atEdge.y | atEdge.z) ? (texel5) : (float4(sR[lIndex + (4 * (8 * 8) + 4 * 8 + 4)], sG[lIndex + (4 * (8 * 8) + 4 * 8 + 4)], sB[lIndex + (4 * (8 * 8) + 4 * 8 + 4)], sA[lIndex + (4 * (8 * 8) + 4 * 8 + 4)]));
1120 
1121 
1122 
1123         texel1 = (texel1 + texel2 + texel3 + texel4 + texel5 + texel6 + texel7 + texel8) / 8.0;
1124 
1125         dstMip4.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices >> 3);
1126     }
1127 }
1128 
1129 kernel void generate2DMipmaps(uint lIndex [[thread_index_in_threadgroup]],
1130                               ushort2 gIndices [[thread_position_in_grid]],
1131                               texture2d<float> srcTexture [[texture(0)]],
1132                               texture2d<float, access::write> dstMip1 [[texture(1)]],
1133                               texture2d<float, access::write> dstMip2 [[texture(2)]],
1134                               texture2d<float, access::write> dstMip3 [[texture(3)]],
1135                               texture2d<float, access::write> dstMip4 [[texture(4)]],
1136                               constant GenMipParams &options [[buffer(0)]])
1137 {
1138     uint firstMipLevel = options.srcLevel + 1;
1139     ushort2 mipSize =
1140         ushort2(srcTexture.get_width(firstMipLevel), srcTexture.get_height(firstMipLevel));
1141     bool validThread = gIndices.x < mipSize.x && gIndices.y < mipSize.y;
1142 
1143     constexpr sampler textureSampler(mag_filter::linear, min_filter::linear, mip_filter::linear);
1144 
1145 
1146 
1147 
1148     threadgroup float sR[(8 * 8)];
1149     threadgroup float sG[(8 * 8)];
1150     threadgroup float sB[(8 * 8)];
1151     threadgroup float sA[(8 * 8)];
1152 
1153 
1154     float4 texel1;
1155     if (validThread)
1156     {
1157         float2 texCoords = (float2(gIndices) + float2(0.5, 0.5)) / float2(mipSize);
1158         texel1 = srcTexture.sample(textureSampler, texCoords, level(options.srcLevel));
1159 
1160 
1161         dstMip1.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices);
1162     }
1163     else
1164     {
1165 
1166         lIndex = 0xffffffff;
1167     }
1168 
1169     if (options.numMipLevelsToGen == 1)
1170     {
1171         return;
1172     }
1173 
1174 
1175 
1176 
1177     sR[lIndex] = texel1.r; sG[lIndex] = texel1.g; sB[lIndex] = texel1.b; sA[lIndex] = texel1.a;;
1178 
1179     threadgroup_barrier(mem_flags::mem_threadgroup);
1180 
1181 
1182     if ((lIndex & 0x09) == 0)
1183     {
1184         bool2 atEdge = gIndices == (mipSize - ushort2(1));
1185 
1186 
1187 
1188         float4 texel2 = (atEdge.x) ? (texel1) : (float4(sR[lIndex + 1], sG[lIndex + 1], sB[lIndex + 1], sA[lIndex + 1]));
1189 
1190         float4 texel3 = (atEdge.y) ? (texel1) : (float4(sR[lIndex + 8], sG[lIndex + 8], sB[lIndex + 8], sA[lIndex + 8]));
1191 
1192         float4 texel4 = (atEdge.x | atEdge.y) ? (texel2) : (float4(sR[lIndex + (8 + 1)], sG[lIndex + (8 + 1)], sB[lIndex + (8 + 1)], sA[lIndex + (8 + 1)]));
1193 
1194 
1195         texel1 = (texel1 + texel2 + texel3 + texel4) / 4.0;
1196 
1197         dstMip2.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices >> 1);
1198 
1199 
1200         sR[lIndex] = texel1.r; sG[lIndex] = texel1.g; sB[lIndex] = texel1.b; sA[lIndex] = texel1.a;;
1201     }
1202 
1203     if (options.numMipLevelsToGen == 2)
1204     {
1205         return;
1206     }
1207 
1208 
1209     threadgroup_barrier(mem_flags::mem_threadgroup);
1210 
1211 
1212     if ((lIndex & 0x1b) == 0)
1213     {
1214         mipSize = max(mipSize >> 1, ushort2(1));
1215         bool2 atEdge = (gIndices >> 1) == (mipSize - ushort2(1));
1216 
1217 
1218 
1219         float4 texel2 = (atEdge.x) ? (texel1) : (float4(sR[lIndex + 2], sG[lIndex + 2], sB[lIndex + 2], sA[lIndex + 2]));
1220 
1221         float4 texel3 =
1222             (atEdge.y) ? (texel1) : (float4(sR[lIndex + 2 * 8], sG[lIndex + 2 * 8], sB[lIndex + 2 * 8], sA[lIndex + 2 * 8]));
1223 
1224         float4 texel4 = (atEdge.x | atEdge.y) ? (texel2) : (float4(sR[lIndex + (2 * 8 + 2)], sG[lIndex + (2 * 8 + 2)], sB[lIndex + (2 * 8 + 2)], sA[lIndex + (2 * 8 + 2)]));
1225 
1226 
1227         texel1 = (texel1 + texel2 + texel3 + texel4) / 4.0;
1228 
1229         dstMip3.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices >> 2);
1230 
1231 
1232         sR[lIndex] = texel1.r; sG[lIndex] = texel1.g; sB[lIndex] = texel1.b; sA[lIndex] = texel1.a;;
1233     }
1234 
1235     if (options.numMipLevelsToGen == 3)
1236     {
1237         return;
1238     }
1239 
1240 
1241     threadgroup_barrier(mem_flags::mem_threadgroup);
1242 
1243 
1244     if ((lIndex & 0x3f) == 0)
1245     {
1246         mipSize = max(mipSize >> 1, ushort2(1));
1247         bool2 atEdge = (gIndices >> 2) == (mipSize - ushort2(1));
1248 
1249 
1250 
1251         float4 texel2 = (atEdge.x) ? (texel1) : (float4(sR[lIndex + 4], sG[lIndex + 4], sB[lIndex + 4], sA[lIndex + 4]));
1252 
1253         float4 texel3 =
1254             (atEdge.y) ? (texel1) : (float4(sR[lIndex + 4 * 8], sG[lIndex + 4 * 8], sB[lIndex + 4 * 8], sA[lIndex + 4 * 8]));
1255 
1256         float4 texel4 = (atEdge.x | atEdge.y) ? (texel2) : (float4(sR[lIndex + (4 * 8 + 4)], sG[lIndex + (4 * 8 + 4)], sB[lIndex + (4 * 8 + 4)], sA[lIndex + (4 * 8 + 4)]));
1257 
1258 
1259         texel1 = (texel1 + texel2 + texel3 + texel4) / 4.0;
1260 
1261         dstMip4.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices >> 3);
1262     }
1263 }
1264 
1265 template <typename TextureTypeR, typename TextureTypeW>
1266 static __attribute__((always_inline)) void generateCubeOr2DArray2ndAndMoreMipmaps(
1267     uint lIndex,
1268     ushort3 gIndices,
1269     TextureTypeR srcTexture,
1270     TextureTypeW dstMip2,
1271     TextureTypeW dstMip3,
1272     TextureTypeW dstMip4,
1273     ushort2 mip1Size,
1274     float4 mip1Texel,
1275     threadgroup float *sR,
1276     threadgroup float *sG,
1277     threadgroup float *sB,
1278     threadgroup float *sA,
1279     constant GenMipParams &options)
1280 {
1281     ushort2 mipSize = mip1Size;
1282     float4 texel1 = mip1Texel;
1283 
1284 
1285 
1286 
1287     sR[lIndex] = texel1.r; sG[lIndex] = texel1.g; sB[lIndex] = texel1.b; sA[lIndex] = texel1.a;;
1288 
1289     threadgroup_barrier(mem_flags::mem_threadgroup);
1290 
1291 
1292     if ((lIndex & 0x09) == 0)
1293     {
1294         bool2 atEdge = gIndices.xy == (mipSize - ushort2(1));
1295 
1296 
1297 
1298         float4 texel2 = (atEdge.x) ? (texel1) : (float4(sR[lIndex + 1], sG[lIndex + 1], sB[lIndex + 1], sA[lIndex + 1]));
1299 
1300         float4 texel3 = (atEdge.y) ? (texel1) : (float4(sR[lIndex + 8], sG[lIndex + 8], sB[lIndex + 8], sA[lIndex + 8]));
1301 
1302         float4 texel4 = (atEdge.x | atEdge.y) ? (texel2) : (float4(sR[lIndex + (8 + 1)], sG[lIndex + (8 + 1)], sB[lIndex + (8 + 1)], sA[lIndex + (8 + 1)]));
1303 
1304 
1305         texel1 = (texel1 + texel2 + texel3 + texel4) / 4.0;
1306 
1307         dstMip2.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices.xy >> 1, gIndices.z);
1308 
1309 
1310         sR[lIndex] = texel1.r; sG[lIndex] = texel1.g; sB[lIndex] = texel1.b; sA[lIndex] = texel1.a;;
1311     }
1312 
1313     if (options.numMipLevelsToGen == 2)
1314     {
1315         return;
1316     }
1317 
1318 
1319     threadgroup_barrier(mem_flags::mem_threadgroup);
1320 
1321 
1322     if ((lIndex & 0x1b) == 0)
1323     {
1324         mipSize = max(mipSize >> 1, ushort2(1));
1325         bool2 atEdge = (gIndices.xy >> 1) == (mipSize - ushort2(1));
1326 
1327 
1328 
1329         float4 texel2 = (atEdge.x) ? (texel1) : (float4(sR[lIndex + 2], sG[lIndex + 2], sB[lIndex + 2], sA[lIndex + 2]));
1330 
1331         float4 texel3 =
1332             (atEdge.y) ? (texel1) : (float4(sR[lIndex + 2 * 8], sG[lIndex + 2 * 8], sB[lIndex + 2 * 8], sA[lIndex + 2 * 8]));
1333 
1334         float4 texel4 = (atEdge.x | atEdge.y) ? (texel2) : (float4(sR[lIndex + (2 * 8 + 2)], sG[lIndex + (2 * 8 + 2)], sB[lIndex + (2 * 8 + 2)], sA[lIndex + (2 * 8 + 2)]));
1335 
1336 
1337         texel1 = (texel1 + texel2 + texel3 + texel4) / 4.0;
1338 
1339         dstMip3.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices.xy >> 2, gIndices.z);
1340 
1341 
1342         sR[lIndex] = texel1.r; sG[lIndex] = texel1.g; sB[lIndex] = texel1.b; sA[lIndex] = texel1.a;;
1343     }
1344 
1345     if (options.numMipLevelsToGen == 3)
1346     {
1347         return;
1348     }
1349 
1350 
1351     threadgroup_barrier(mem_flags::mem_threadgroup);
1352 
1353 
1354     if ((lIndex & 0x3f) == 0)
1355     {
1356         mipSize = max(mipSize >> 1, ushort2(1));
1357         bool2 atEdge = (gIndices.xy >> 2) == (mipSize - ushort2(1));
1358 
1359 
1360 
1361         float4 texel2 = (atEdge.x) ? (texel1) : (float4(sR[lIndex + 4], sG[lIndex + 4], sB[lIndex + 4], sA[lIndex + 4]));
1362 
1363         float4 texel3 =
1364             (atEdge.y) ? (texel1) : (float4(sR[lIndex + 4 * 8], sG[lIndex + 4 * 8], sB[lIndex + 4 * 8], sA[lIndex + 4 * 8]));
1365 
1366         float4 texel4 = (atEdge.x | atEdge.y) ? (texel2) : (float4(sR[lIndex + (4 * 8 + 4)], sG[lIndex + (4 * 8 + 4)], sB[lIndex + (4 * 8 + 4)], sA[lIndex + (4 * 8 + 4)]));
1367 
1368 
1369         texel1 = (texel1 + texel2 + texel3 + texel4) / 4.0;
1370 
1371         dstMip4.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices.xy >> 3, gIndices.z);
1372     }
1373 }
1374 
1375 kernel void generateCubeMipmaps(uint lIndex [[thread_index_in_threadgroup]],
1376                                 ushort3 gIndices [[thread_position_in_grid]],
1377                                 texturecube<float> srcTexture [[texture(0)]],
1378                                 texturecube<float, access::write> dstMip1 [[texture(1)]],
1379                                 texturecube<float, access::write> dstMip2 [[texture(2)]],
1380                                 texturecube<float, access::write> dstMip3 [[texture(3)]],
1381                                 texturecube<float, access::write> dstMip4 [[texture(4)]],
1382                                 constant GenMipParams &options [[buffer(0)]])
1383 {
1384     uint firstMipLevel = options.srcLevel + 1;
1385     ushort2 mip1Size =
1386         ushort2(srcTexture.get_width(firstMipLevel), srcTexture.get_height(firstMipLevel));
1387     bool validThread = gIndices.x < mip1Size.x && gIndices.y < mip1Size.y;
1388 
1389     constexpr sampler textureSampler(mag_filter::linear, min_filter::linear, mip_filter::linear);
1390 
1391 
1392     float4 mip1Texel;
1393     if (validThread)
1394     {
1395         float2 texCoords = (float2(gIndices.xy) + float2(0.5, 0.5)) / float2(mip1Size);
1396         mip1Texel = srcTexture.sample(textureSampler, cubeTexcoords(texCoords, int(gIndices.z)),
1397                                       level(options.srcLevel));
1398 
1399 
1400         dstMip1.write((options.sRGB ? sRGBtoLinear(mip1Texel) : mip1Texel), gIndices.xy, gIndices.z);
1401     }
1402     else
1403     {
1404 
1405         lIndex = 0xffffffff;
1406     }
1407 
1408     if (options.numMipLevelsToGen == 1)
1409     {
1410         return;
1411     }
1412 
1413 
1414     threadgroup float sR[(8 * 8)];
1415     threadgroup float sG[(8 * 8)];
1416     threadgroup float sB[(8 * 8)];
1417     threadgroup float sA[(8 * 8)];
1418 
1419     generateCubeOr2DArray2ndAndMoreMipmaps(lIndex, gIndices, srcTexture, dstMip2, dstMip3, dstMip4,
1420                                            mip1Size, mip1Texel, sR, sG, sB, sA, options);
1421 }
1422 
1423 kernel void generate2DArrayMipmaps(uint lIndex [[thread_index_in_threadgroup]],
1424                                    ushort3 gIndices [[thread_position_in_grid]],
1425                                    texture2d_array<float> srcTexture [[texture(0)]],
1426                                    texture2d_array<float, access::write> dstMip1 [[texture(1)]],
1427                                    texture2d_array<float, access::write> dstMip2 [[texture(2)]],
1428                                    texture2d_array<float, access::write> dstMip3 [[texture(3)]],
1429                                    texture2d_array<float, access::write> dstMip4 [[texture(4)]],
1430                                    constant GenMipParams &options [[buffer(0)]])
1431 {
1432     uint firstMipLevel = options.srcLevel + 1;
1433     ushort2 mip1Size =
1434         ushort2(srcTexture.get_width(firstMipLevel), srcTexture.get_height(firstMipLevel));
1435     bool validThread = gIndices.x < mip1Size.x && gIndices.y < mip1Size.y;
1436 
1437     constexpr sampler textureSampler(mag_filter::linear, min_filter::linear, mip_filter::linear);
1438 
1439 
1440     float4 mip1Texel;
1441     if (validThread)
1442     {
1443         float2 texCoords = (float2(gIndices.xy) + float2(0.5, 0.5)) / float2(mip1Size);
1444         mip1Texel =
1445             srcTexture.sample(textureSampler, texCoords, gIndices.z, level(options.srcLevel));
1446 
1447 
1448         dstMip1.write((options.sRGB ? sRGBtoLinear(mip1Texel) : mip1Texel), gIndices.xy, gIndices.z);
1449     }
1450     else
1451     {
1452 
1453         lIndex = 0xffffffff;
1454     }
1455 
1456     if (options.numMipLevelsToGen == 1)
1457     {
1458         return;
1459     }
1460 
1461 
1462     threadgroup float sR[(8 * 8)];
1463     threadgroup float sG[(8 * 8)];
1464     threadgroup float sB[(8 * 8)];
1465     threadgroup float sA[(8 * 8)];
1466 
1467     generateCubeOr2DArray2ndAndMoreMipmaps(lIndex, gIndices, srcTexture, dstMip2, dstMip3, dstMip4,
1468                                            mip1Size, mip1Texel, sR, sG, sB, sA, options);
1469 }
1470 # 5 "temp_master_source.metal" 2
1471 # 1 "./copy_buffer.metal" 1
1472 # 12 "./copy_buffer.metal"
1473 #include <metal_pack>
1474 
1475 
1476 # 1 "./format_autogen.h" 1
1477 
1478 
1479 
1480 namespace rx
1481 {
1482 namespace mtl_shader
1483 {
1484 
1485 namespace FormatID
1486 {
1487 enum
1488 {
1489     NONE,
1490     D16_UNORM,
1491     D24_UNORM_S8_UINT,
1492     D24_UNORM_X8_UINT,
1493     D32_FLOAT,
1494     D32_FLOAT_S8X24_UINT,
1495     D32_UNORM,
1496     S8_UINT,
1497     A16_FLOAT,
1498     A1R5G5B5_UNORM,
1499     A2R10G10B10_SINT_VERTEX,
1500     A2R10G10B10_SNORM_VERTEX,
1501     A2R10G10B10_SSCALED_VERTEX,
1502     A2R10G10B10_UINT_VERTEX,
1503     A2R10G10B10_UNORM_VERTEX,
1504     A2R10G10B10_USCALED_VERTEX,
1505     A32_FLOAT,
1506     A8_UNORM,
1507     ASTC_10x10_SRGB_BLOCK,
1508     ASTC_10x10_UNORM_BLOCK,
1509     ASTC_10x5_SRGB_BLOCK,
1510     ASTC_10x5_UNORM_BLOCK,
1511     ASTC_10x6_SRGB_BLOCK,
1512     ASTC_10x6_UNORM_BLOCK,
1513     ASTC_10x8_SRGB_BLOCK,
1514     ASTC_10x8_UNORM_BLOCK,
1515     ASTC_12x10_SRGB_BLOCK,
1516     ASTC_12x10_UNORM_BLOCK,
1517     ASTC_12x12_SRGB_BLOCK,
1518     ASTC_12x12_UNORM_BLOCK,
1519     ASTC_3x3x3_UNORM_BLOCK,
1520     ASTC_3x3x3_UNORM_SRGB_BLOCK,
1521     ASTC_4x3x3_UNORM_BLOCK,
1522     ASTC_4x3x3_UNORM_SRGB_BLOCK,
1523     ASTC_4x4_SRGB_BLOCK,
1524     ASTC_4x4_UNORM_BLOCK,
1525     ASTC_4x4x3_UNORM_BLOCK,
1526     ASTC_4x4x3_UNORM_SRGB_BLOCK,
1527     ASTC_4x4x4_UNORM_BLOCK,
1528     ASTC_4x4x4_UNORM_SRGB_BLOCK,
1529     ASTC_5x4_SRGB_BLOCK,
1530     ASTC_5x4_UNORM_BLOCK,
1531     ASTC_5x4x4_UNORM_BLOCK,
1532     ASTC_5x4x4_UNORM_SRGB_BLOCK,
1533     ASTC_5x5_SRGB_BLOCK,
1534     ASTC_5x5_UNORM_BLOCK,
1535     ASTC_5x5x4_UNORM_BLOCK,
1536     ASTC_5x5x4_UNORM_SRGB_BLOCK,
1537     ASTC_5x5x5_UNORM_BLOCK,
1538     ASTC_5x5x5_UNORM_SRGB_BLOCK,
1539     ASTC_6x5_SRGB_BLOCK,
1540     ASTC_6x5_UNORM_BLOCK,
1541     ASTC_6x5x5_UNORM_BLOCK,
1542     ASTC_6x5x5_UNORM_SRGB_BLOCK,
1543     ASTC_6x6_SRGB_BLOCK,
1544     ASTC_6x6_UNORM_BLOCK,
1545     ASTC_6x6x5_UNORM_BLOCK,
1546     ASTC_6x6x5_UNORM_SRGB_BLOCK,
1547     ASTC_6x6x6_UNORM_BLOCK,
1548     ASTC_6x6x6_UNORM_SRGB_BLOCK,
1549     ASTC_8x5_SRGB_BLOCK,
1550     ASTC_8x5_UNORM_BLOCK,
1551     ASTC_8x6_SRGB_BLOCK,
1552     ASTC_8x6_UNORM_BLOCK,
1553     ASTC_8x8_SRGB_BLOCK,
1554     ASTC_8x8_UNORM_BLOCK,
1555     B10G10R10A2_UNORM,
1556     B4G4R4A4_UNORM,
1557     B5G5R5A1_UNORM,
1558     B5G6R5_UNORM,
1559     B8G8R8A8_TYPELESS,
1560     B8G8R8A8_TYPELESS_SRGB,
1561     B8G8R8A8_UNORM,
1562     B8G8R8A8_UNORM_SRGB,
1563     B8G8R8X8_UNORM,
1564     B8G8R8X8_UNORM_SRGB,
1565     BC1_RGBA_UNORM_BLOCK,
1566     BC1_RGBA_UNORM_SRGB_BLOCK,
1567     BC1_RGB_UNORM_BLOCK,
1568     BC1_RGB_UNORM_SRGB_BLOCK,
1569     BC2_RGBA_UNORM_BLOCK,
1570     BC2_RGBA_UNORM_SRGB_BLOCK,
1571     BC3_RGBA_UNORM_BLOCK,
1572     BC3_RGBA_UNORM_SRGB_BLOCK,
1573     BC4_RED_SNORM_BLOCK,
1574     BC4_RED_UNORM_BLOCK,
1575     BC5_RG_SNORM_BLOCK,
1576     BC5_RG_UNORM_BLOCK,
1577     BC6H_RGB_SFLOAT_BLOCK,
1578     BC6H_RGB_UFLOAT_BLOCK,
1579     BC7_RGBA_UNORM_BLOCK,
1580     BC7_RGBA_UNORM_SRGB_BLOCK,
1581     EAC_R11G11_SNORM_BLOCK,
1582     EAC_R11G11_UNORM_BLOCK,
1583     EAC_R11_SNORM_BLOCK,
1584     EAC_R11_UNORM_BLOCK,
1585     ETC1_LOSSY_DECODE_R8G8B8_UNORM_BLOCK,
1586     ETC1_R8G8B8_UNORM_BLOCK,
1587     ETC2_R8G8B8A1_SRGB_BLOCK,
1588     ETC2_R8G8B8A1_UNORM_BLOCK,
1589     ETC2_R8G8B8A8_SRGB_BLOCK,
1590     ETC2_R8G8B8A8_UNORM_BLOCK,
1591     ETC2_R8G8B8_SRGB_BLOCK,
1592     ETC2_R8G8B8_UNORM_BLOCK,
1593     G8_B8R8_2PLANE_420_UNORM,
1594     G8_B8_R8_3PLANE_420_UNORM,
1595     L16A16_FLOAT,
1596     L16_FLOAT,
1597     L32A32_FLOAT,
1598     L32_FLOAT,
1599     L8A8_UNORM,
1600     L8_UNORM,
1601     PALETTE4_R4G4B4A4_UNORM,
1602     PALETTE4_R5G5B5A1_UNORM,
1603     PALETTE4_R5G6B5_UNORM,
1604     PALETTE4_R8G8B8A8_UNORM,
1605     PALETTE4_R8G8B8_UNORM,
1606     PALETTE8_R4G4B4A4_UNORM,
1607     PALETTE8_R5G5B5A1_UNORM,
1608     PALETTE8_R5G6B5_UNORM,
1609     PALETTE8_R8G8B8A8_UNORM,
1610     PALETTE8_R8G8B8_UNORM,
1611     PVRTC1_RGBA_2BPP_UNORM_BLOCK,
1612     PVRTC1_RGBA_2BPP_UNORM_SRGB_BLOCK,
1613     PVRTC1_RGBA_4BPP_UNORM_BLOCK,
1614     PVRTC1_RGBA_4BPP_UNORM_SRGB_BLOCK,
1615     PVRTC1_RGB_2BPP_UNORM_BLOCK,
1616     PVRTC1_RGB_2BPP_UNORM_SRGB_BLOCK,
1617     PVRTC1_RGB_4BPP_UNORM_BLOCK,
1618     PVRTC1_RGB_4BPP_UNORM_SRGB_BLOCK,
1619     R10G10B10A2_SINT,
1620     R10G10B10A2_SNORM,
1621     R10G10B10A2_SSCALED,
1622     R10G10B10A2_UINT,
1623     R10G10B10A2_UNORM,
1624     R10G10B10A2_USCALED,
1625     R10G10B10X2_UNORM,
1626     R11G11B10_FLOAT,
1627     R16G16B16A16_FLOAT,
1628     R16G16B16A16_SINT,
1629     R16G16B16A16_SNORM,
1630     R16G16B16A16_SSCALED,
1631     R16G16B16A16_UINT,
1632     R16G16B16A16_UNORM,
1633     R16G16B16A16_USCALED,
1634     R16G16B16_FLOAT,
1635     R16G16B16_SINT,
1636     R16G16B16_SNORM,
1637     R16G16B16_SSCALED,
1638     R16G16B16_UINT,
1639     R16G16B16_UNORM,
1640     R16G16B16_USCALED,
1641     R16G16_FLOAT,
1642     R16G16_SINT,
1643     R16G16_SNORM,
1644     R16G16_SSCALED,
1645     R16G16_UINT,
1646     R16G16_UNORM,
1647     R16G16_USCALED,
1648     R16_FLOAT,
1649     R16_SINT,
1650     R16_SNORM,
1651     R16_SSCALED,
1652     R16_UINT,
1653     R16_UNORM,
1654     R16_USCALED,
1655     R32G32B32A32_FIXED,
1656     R32G32B32A32_FLOAT,
1657     R32G32B32A32_SINT,
1658     R32G32B32A32_SNORM,
1659     R32G32B32A32_SSCALED,
1660     R32G32B32A32_UINT,
1661     R32G32B32A32_UNORM,
1662     R32G32B32A32_USCALED,
1663     R32G32B32_FIXED,
1664     R32G32B32_FLOAT,
1665     R32G32B32_SINT,
1666     R32G32B32_SNORM,
1667     R32G32B32_SSCALED,
1668     R32G32B32_UINT,
1669     R32G32B32_UNORM,
1670     R32G32B32_USCALED,
1671     R32G32_FIXED,
1672     R32G32_FLOAT,
1673     R32G32_SINT,
1674     R32G32_SNORM,
1675     R32G32_SSCALED,
1676     R32G32_UINT,
1677     R32G32_UNORM,
1678     R32G32_USCALED,
1679     R32_FIXED,
1680     R32_FLOAT,
1681     R32_SINT,
1682     R32_SNORM,
1683     R32_SSCALED,
1684     R32_UINT,
1685     R32_UNORM,
1686     R32_USCALED,
1687     R4G4B4A4_UNORM,
1688     R5G5B5A1_UNORM,
1689     R5G6B5_UNORM,
1690     R8G8B8A8_SINT,
1691     R8G8B8A8_SNORM,
1692     R8G8B8A8_SSCALED,
1693     R8G8B8A8_TYPELESS,
1694     R8G8B8A8_TYPELESS_SRGB,
1695     R8G8B8A8_UINT,
1696     R8G8B8A8_UNORM,
1697     R8G8B8A8_UNORM_SRGB,
1698     R8G8B8A8_USCALED,
1699     R8G8B8X8_UNORM,
1700     R8G8B8X8_UNORM_SRGB,
1701     R8G8B8_SINT,
1702     R8G8B8_SNORM,
1703     R8G8B8_SSCALED,
1704     R8G8B8_UINT,
1705     R8G8B8_UNORM,
1706     R8G8B8_UNORM_SRGB,
1707     R8G8B8_USCALED,
1708     R8G8_SINT,
1709     R8G8_SNORM,
1710     R8G8_SSCALED,
1711     R8G8_UINT,
1712     R8G8_UNORM,
1713     R8G8_UNORM_SRGB,
1714     R8G8_USCALED,
1715     R8_SINT,
1716     R8_SNORM,
1717     R8_SSCALED,
1718     R8_UINT,
1719     R8_UNORM,
1720     R8_UNORM_SRGB,
1721     R8_USCALED,
1722     R9G9B9E5_SHAREDEXP,
1723     X2R10G10B10_SINT_VERTEX,
1724     X2R10G10B10_SNORM_VERTEX,
1725     X2R10G10B10_SSCALED_VERTEX,
1726     X2R10G10B10_UINT_VERTEX,
1727     X2R10G10B10_UNORM_VERTEX,
1728     X2R10G10B10_USCALED_VERTEX,
1729     EXTERNAL0,
1730     EXTERNAL1,
1731     EXTERNAL2,
1732     EXTERNAL3,
1733     EXTERNAL4,
1734     EXTERNAL5,
1735     EXTERNAL6,
1736     EXTERNAL7
1737 };
1738 
1739 }
1740 
1741 }
1742 }
1743 # 16 "./copy_buffer.metal" 2
1744 
1745 using namespace rx::mtl_shader;
1746 
1747 constant int kCopyFormatType [[function_constant(10)]];
1748 
1749 
1750 constant int kCopyTextureType [[function_constant(20)]];
1751 constant bool kCopyTextureType2D = kCopyTextureType == kTextureType2D;
1752 constant bool kCopyTextureType2DArray = kCopyTextureType == kTextureType2DArray;
1753 constant bool kCopyTextureType2DMS = kCopyTextureType == kTextureType2DMultisample;
1754 constant bool kCopyTextureTypeCube = kCopyTextureType == kTextureTypeCube;
1755 constant bool kCopyTextureType3D = kCopyTextureType == kTextureType3D;
1756 
1757 struct CopyPixelParams
1758 {
1759     uint3 copySize;
1760     uint3 textureOffset;
1761 
1762     uint bufferStartOffset;
1763     uint pixelSize;
1764     uint bufferRowPitch;
1765     uint bufferDepthPitch;
1766 };
1767 
1768 struct WritePixelParams
1769 {
1770     uint2 copySize;
1771     uint2 textureOffset;
1772 
1773     uint bufferStartOffset;
1774 
1775     uint pixelSize;
1776     uint bufferRowPitch;
1777 
1778     uint textureLevel;
1779     uint textureLayer;
1780 
1781     bool reverseTextureRowOrder;
1782 };
1783 # 120 "./copy_buffer.metal"
1784 template <typename T>
1785 static inline void textureWrite(ushort3 gIndices,
1786                                 constant CopyPixelParams &options,
1787                                 vec<T, 4> color,
1788                                 texture2d<T, access::write> dstTexture2d [[texture(0), function_constant(kCopyTextureType2D)]], texture2d_array<T, access::write> dstTexture2dArray [[texture(0), function_constant(kCopyTextureType2DArray)]], texture3d<T, access::write> dstTexture3d [[texture(0), function_constant(kCopyTextureType3D)]], texturecube<T, access::write> dstTextureCube [[texture(0), function_constant(kCopyTextureTypeCube)]])
1789 {
1790     uint3 writeIndices = options.textureOffset + uint3(gIndices);
1791     switch (kCopyTextureType)
1792     {
1793         case kTextureType2D:
1794             dstTexture2d.write(color, writeIndices.xy);
1795             break;
1796         case kTextureType2DArray:
1797             dstTexture2dArray.write(color, writeIndices.xy, writeIndices.z);
1798             break;
1799         case kTextureType3D:
1800             dstTexture3d.write(color, writeIndices);
1801             break;
1802         case kTextureTypeCube:
1803             dstTextureCube.write(color, writeIndices.xy, writeIndices.z);
1804             break;
1805     }
1806 }
1807 
1808 
1809 template <typename T>
1810 static inline vec<T, 4> textureRead(ushort2 gIndices,
1811                                     constant WritePixelParams &options,
1812                                     texture2d<T, access::read> srcTexture2d [[texture(0), function_constant(kCopyTextureType2D)]], texture2d_array<T, access::read> srcTexture2dArray [[texture(0), function_constant(kCopyTextureType2DArray)]], texture3d<T, access::read> srcTexture3d [[texture(0), function_constant(kCopyTextureType3D)]], texturecube<T, access::read> srcTextureCube [[texture(0), function_constant(kCopyTextureTypeCube)]], texture2d_ms<T, access::read> srcTexture2dMS [[texture(0), function_constant(kCopyTextureType2DMS)]])
1813 {
1814     vec<T, 4> color;
1815     uint2 coords = uint2(gIndices);
1816     if (options.reverseTextureRowOrder)
1817     {
1818         coords.y = options.copySize.y - 1 - gIndices.y;
1819     }
1820     coords += options.textureOffset;
1821     switch (kCopyTextureType)
1822     {
1823         case kTextureType2D:
1824             color = srcTexture2d.read(coords.xy, options.textureLevel);
1825             break;
1826         case kTextureType2DArray:
1827             color = srcTexture2dArray.read(coords.xy, options.textureLayer, options.textureLevel);
1828             break;
1829         case kTextureType2DMultisample:
1830             color = resolveTextureMS(srcTexture2dMS, coords.xy);
1831             break;
1832         case kTextureType3D:
1833             color = srcTexture3d.read(uint3(coords, options.textureLayer), options.textureLevel);
1834             break;
1835         case kTextureTypeCube:
1836             color = srcTextureCube.read(coords.xy, options.textureLayer, options.textureLevel);
1837             break;
1838     }
1839     return color;
1840 }
1841 # 215 "./copy_buffer.metal"
1842 static inline float4 readR5G6B5_UNORM(uint bufferOffset, constant uchar *buffer)
1843 {
1844     float4 color;
1845     ushort src = bytesToShort<ushort>(buffer, bufferOffset);
1846 
1847     color.r = normalizedToFloat<5>(getShiftedData<5, 11>(src));
1848     color.g = normalizedToFloat<6>(getShiftedData<6, 5>(src));
1849     color.b = normalizedToFloat<5>(getShiftedData<5, 0>(src));
1850     color.a = 1.0;
1851     return color;
1852 }
1853 static inline void writeR5G6B5_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
1854 {
1855     ushort dst = shiftData<5, 11>(floatToNormalized<5, ushort>(color.r)) |
1856                  shiftData<6, 5>(floatToNormalized<6, ushort>(color.g)) |
1857                  shiftData<5, 0>(floatToNormalized<5, ushort>(color.b));
1858 
1859     shortToBytes(dst, bufferOffset, buffer);
1860 }
1861 
1862 
1863 static inline float4 readR4G4B4A4_UNORM(uint bufferOffset, constant uchar *buffer)
1864 {
1865     float4 color;
1866     ushort src = bytesToShort<ushort>(buffer, bufferOffset);
1867 
1868     color.r = normalizedToFloat<4>(getShiftedData<4, 12>(src));
1869     color.g = normalizedToFloat<4>(getShiftedData<4, 8>(src));
1870     color.b = normalizedToFloat<4>(getShiftedData<4, 4>(src));
1871     color.a = normalizedToFloat<4>(getShiftedData<4, 0>(src));
1872     return color;
1873 }
1874 static inline void writeR4G4B4A4_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
1875 {
1876     ushort dst = shiftData<4, 12>(floatToNormalized<4, ushort>(color.r)) |
1877                  shiftData<4, 8>(floatToNormalized<4, ushort>(color.g)) |
1878                  shiftData<4, 4>(floatToNormalized<4, ushort>(color.b)) |
1879                  shiftData<4, 0>(floatToNormalized<4, ushort>(color.a));
1880     ;
1881 
1882     shortToBytes(dst, bufferOffset, buffer);
1883 }
1884 
1885 
1886 static inline float4 readR5G5B5A1_UNORM(uint bufferOffset, constant uchar *buffer)
1887 {
1888     float4 color;
1889     ushort src = bytesToShort<ushort>(buffer, bufferOffset);
1890 
1891     color.r = normalizedToFloat<5>(getShiftedData<5, 11>(src));
1892     color.g = normalizedToFloat<5>(getShiftedData<5, 6>(src));
1893     color.b = normalizedToFloat<5>(getShiftedData<5, 1>(src));
1894     color.a = normalizedToFloat<1>(getShiftedData<1, 0>(src));
1895     return color;
1896 }
1897 static inline void writeR5G5B5A1_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
1898 {
1899     ushort dst = shiftData<5, 11>(floatToNormalized<5, ushort>(color.r)) |
1900                  shiftData<5, 6>(floatToNormalized<5, ushort>(color.g)) |
1901                  shiftData<5, 1>(floatToNormalized<5, ushort>(color.b)) |
1902                  shiftData<1, 0>(floatToNormalized<1, ushort>(color.a));
1903     ;
1904 
1905     shortToBytes(dst, bufferOffset, buffer);
1906 }
1907 
1908 
1909 static inline int4 readR10G10B10A2_SINT(uint bufferOffset, constant uchar *buffer)
1910 {
1911     int4 color;
1912     int src = bytesToInt<int>(buffer, bufferOffset);
1913 
1914     constexpr int3 rgbSignMask(0x200);
1915     constexpr int3 negativeMask(0xfffffc00);
1916     constexpr int alphaSignMask = 0x2;
1917     constexpr int alphaNegMask = 0xfffffffc;
1918 
1919     color.r = getShiftedData<10, 0>(src);
1920     color.g = getShiftedData<10, 10>(src);
1921     color.b = getShiftedData<10, 20>(src);
1922 
1923     int3 isRgbNegative = (color.rgb & rgbSignMask) >> 9;
1924     color.rgb = (isRgbNegative * negativeMask) | color.rgb;
1925 
1926     color.a = getShiftedData<2, 30>(src);
1927     int isAlphaNegative = color.a & alphaSignMask >> 1;
1928     color.a = (isAlphaNegative * alphaNegMask) | color.a;
1929     return color;
1930 }
1931 
1932 static inline uint4 readR10G10B10A2_UINT(uint bufferOffset, constant uchar *buffer)
1933 {
1934     uint4 color;
1935     uint src = bytesToInt<uint>(buffer, bufferOffset);
1936 
1937     color.r = getShiftedData<10, 0>(src);
1938     color.g = getShiftedData<10, 10>(src);
1939     color.b = getShiftedData<10, 20>(src);
1940     color.a = getShiftedData<2, 30>(src);
1941     return color;
1942 }
1943 
1944 
1945 static inline float4 readR8G8B8A8(uint bufferOffset, constant uchar *buffer, bool isSRGB)
1946 {
1947     float4 color;
1948     uint src = bytesToInt<uint>(buffer, bufferOffset);
1949 
1950     if (isSRGB)
1951     {
1952         color = unpack_unorm4x8_srgb_to_float(src);
1953     }
1954     else
1955     {
1956         color = unpack_unorm4x8_to_float(src);
1957     }
1958     return color;
1959 }
1960 static inline void writeR8G8B8A8(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer, bool isSRGB)
1961 {
1962     uint dst;
1963 
1964     if (isSRGB)
1965     {
1966         dst = pack_float_to_srgb_unorm4x8(color);
1967     }
1968     else
1969     {
1970         dst = pack_float_to_unorm4x8(color);
1971     }
1972 
1973     intToBytes(dst, bufferOffset, buffer);
1974 }
1975 
1976 static inline float4 readR8G8B8(uint bufferOffset, constant uchar *buffer, bool isSRGB)
1977 {
1978     float4 color;
1979     color.r = normalizedToFloat<uchar>(buffer[bufferOffset]);
1980     color.g = normalizedToFloat<uchar>(buffer[bufferOffset + 1]);
1981     color.b = normalizedToFloat<uchar>(buffer[bufferOffset + 2]);
1982     color.a = 1.0;
1983 
1984     if (isSRGB)
1985     {
1986         color = sRGBtoLinear(color);
1987     }
1988     return color;
1989 }
1990 static inline void writeR8G8B8(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer, bool isSRGB)
1991 {
1992     color.a = 1.0;
1993     uint dst;
1994 
1995     if (isSRGB)
1996     {
1997         dst = pack_float_to_srgb_unorm4x8(color);
1998     }
1999     else
2000     {
2001         dst = pack_float_to_unorm4x8(color);
2002     }
2003     int24bitToBytes(dst, bufferOffset, buffer);
2004 }
2005 
2006 
2007 static inline float4 readR8G8B8A8_SNORM(uint bufferOffset, constant uchar *buffer)
2008 {
2009     float4 color;
2010     uint src = bytesToInt<uint>(buffer, bufferOffset);
2011 
2012     color = unpack_snorm4x8_to_float(src);
2013 
2014     return color;
2015 }
2016 static inline void writeR8G8B8A8_SNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2017 {
2018     uint dst = pack_float_to_snorm4x8(color);
2019 
2020     intToBytes(dst, bufferOffset, buffer);
2021 }
2022 
2023 
2024 static inline float4 readR8G8B8_SNORM(uint bufferOffset, constant uchar *buffer)
2025 {
2026     float4 color;
2027     color.r = normalizedToFloat<7, char>(buffer[bufferOffset]);
2028     color.g = normalizedToFloat<7, char>(buffer[bufferOffset + 1]);
2029     color.b = normalizedToFloat<7, char>(buffer[bufferOffset + 2]);
2030     color.a = 1.0;
2031 
2032     return color;
2033 }
2034 static inline void writeR8G8B8_SNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2035 {
2036     uint dst = pack_float_to_snorm4x8(color);
2037 
2038     int24bitToBytes(dst, bufferOffset, buffer);
2039 }
2040 
2041 
2042 static inline float4 readR8G8B8A8_UNORM(uint bufferOffset, constant uchar *buffer)
2043 {
2044     return readR8G8B8A8(bufferOffset, buffer, false);
2045 }
2046 static inline void writeR8G8B8A8_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2047 {
2048     return writeR8G8B8A8(gIndices, options, bufferOffset, color, buffer, false);
2049 }
2050 
2051 static inline float4 readR8G8B8A8_UNORM_SRGB(uint bufferOffset, constant uchar *buffer)
2052 {
2053     return readR8G8B8A8(bufferOffset, buffer, true);
2054 }
2055 static inline void writeR8G8B8A8_UNORM_SRGB(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2056 {
2057     return writeR8G8B8A8(gIndices, options, bufferOffset, color, buffer, true);
2058 }
2059 
2060 
2061 static inline float4 readB8G8R8A8_UNORM(uint bufferOffset, constant uchar *buffer)
2062 {
2063     return readR8G8B8A8(bufferOffset, buffer, false).bgra;
2064 }
2065 static inline void writeB8G8R8A8_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2066 {
2067     color.rgba = color.bgra;
2068     return writeR8G8B8A8(gIndices, options, bufferOffset, color, buffer, false);
2069 }
2070 
2071 static inline float4 readB8G8R8A8_UNORM_SRGB(uint bufferOffset, constant uchar *buffer)
2072 {
2073     return readR8G8B8A8(bufferOffset, buffer, true).bgra;
2074 }
2075 static inline void writeB8G8R8A8_UNORM_SRGB(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2076 {
2077     color.rgba = color.bgra;
2078     return writeR8G8B8A8(gIndices, options, bufferOffset, color, buffer, true);
2079 }
2080 
2081 
2082 static inline float4 readR8G8B8_UNORM(uint bufferOffset, constant uchar *buffer)
2083 {
2084     return readR8G8B8(bufferOffset, buffer, false);
2085 }
2086 static inline void writeR8G8B8_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2087 {
2088     return writeR8G8B8(gIndices, options, bufferOffset, color, buffer, false);
2089 }
2090 
2091 static inline float4 readR8G8B8_UNORM_SRGB(uint bufferOffset, constant uchar *buffer)
2092 {
2093     return readR8G8B8(bufferOffset, buffer, true);
2094 }
2095 static inline void writeR8G8B8_UNORM_SRGB(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2096 {
2097     return writeR8G8B8(gIndices, options, bufferOffset, color, buffer, true);
2098 }
2099 
2100 
2101 static inline float4 readL8_UNORM(uint bufferOffset, constant uchar *buffer)
2102 {
2103     float4 color;
2104     color.rgb = float3(normalizedToFloat<uchar>(buffer[bufferOffset]));
2105     color.a = 1.0;
2106     return color;
2107 }
2108 static inline void writeL8_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2109 {
2110     buffer[bufferOffset] = floatToNormalized<uchar>(color.r);
2111 }
2112 
2113 
2114 static inline void writeA8_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2115 {
2116     buffer[bufferOffset] = floatToNormalized<uchar>(color.a);
2117 }
2118 
2119 
2120 static inline float4 readL8A8_UNORM(uint bufferOffset, constant uchar *buffer)
2121 {
2122     float4 color;
2123     color.rgb = float3(normalizedToFloat<uchar>(buffer[bufferOffset]));
2124     color.a = normalizedToFloat<uchar>(buffer[bufferOffset + 1]);
2125     return color;
2126 }
2127 static inline void writeL8A8_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2128 {
2129     buffer[bufferOffset] = floatToNormalized<uchar>(color.r);
2130     buffer[bufferOffset + 1] = floatToNormalized<uchar>(color.a);
2131 }
2132 
2133 
2134 static inline float4 readR8_UNORM(uint bufferOffset, constant uchar *buffer)
2135 {
2136     float4 color;
2137     color.r = normalizedToFloat<uchar>(buffer[bufferOffset]);
2138     color.g = color.b = 0.0;
2139     color.a = 1.0;
2140     return color;
2141 }
2142 static inline void writeR8_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2143 {
2144     buffer[bufferOffset] = floatToNormalized<uchar>(color.r);
2145 }
2146 
2147 static inline float4 readR8_SNORM(uint bufferOffset, constant uchar *buffer)
2148 {
2149     float4 color;
2150     color.r = normalizedToFloat<7, char>(buffer[bufferOffset]);
2151     color.g = color.b = 0.0;
2152     color.a = 1.0;
2153     return color;
2154 }
2155 static inline void writeR8_SNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2156 {
2157     buffer[bufferOffset] = as_type<uchar>(floatToNormalized<7, char>(color.r));
2158 }
2159 
2160 
2161 static inline int4 readR8_SINT(uint bufferOffset, constant uchar *buffer)
2162 {
2163     int4 color;
2164     color.r = as_type<char>(buffer[bufferOffset]);
2165     color.g = color.b = 0;
2166     color.a = 1;
2167     return color;
2168 }
2169 static inline void writeR8_SINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<int, 4> color, device uchar *buffer)
2170 {
2171     buffer[bufferOffset] = static_cast<uchar>(color.r);
2172 }
2173 
2174 
2175 static inline uint4 readR8_UINT(uint bufferOffset, constant uchar *buffer)
2176 {
2177     uint4 color;
2178     color.r = as_type<uchar>(buffer[bufferOffset]);
2179     color.g = color.b = 0;
2180     color.a = 1;
2181     return color;
2182 }
2183 static inline void writeR8_UINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<uint, 4> color, device uchar *buffer)
2184 {
2185     buffer[bufferOffset] = static_cast<uchar>(color.r);
2186 }
2187 
2188 
2189 static inline float4 readR8G8_UNORM(uint bufferOffset, constant uchar *buffer)
2190 {
2191     float4 color;
2192     color.r = normalizedToFloat<uchar>(buffer[bufferOffset]);
2193     color.g = normalizedToFloat<uchar>(buffer[bufferOffset + 1]);
2194     color.b = 0.0;
2195     color.a = 1.0;
2196     return color;
2197 }
2198 static inline void writeR8G8_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2199 {
2200     buffer[bufferOffset] = floatToNormalized<uchar>(color.r);
2201     buffer[bufferOffset + 1] = floatToNormalized<uchar>(color.g);
2202 }
2203 
2204 static inline float4 readR8G8_SNORM(uint bufferOffset, constant uchar *buffer)
2205 {
2206     float4 color;
2207     color.r = normalizedToFloat<7, char>(buffer[bufferOffset]);
2208     color.g = normalizedToFloat<7, char>(buffer[bufferOffset + 1]);
2209     color.b = 0.0;
2210     color.a = 1.0;
2211     return color;
2212 }
2213 static inline void writeR8G8_SNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2214 {
2215     buffer[bufferOffset] = as_type<uchar>(floatToNormalized<7, char>(color.r));
2216     buffer[bufferOffset + 1] = as_type<uchar>(floatToNormalized<7, char>(color.g));
2217 }
2218 
2219 
2220 static inline int4 readR8G8_SINT(uint bufferOffset, constant uchar *buffer)
2221 {
2222     int4 color;
2223     color.r = as_type<char>(buffer[bufferOffset]);
2224     color.g = as_type<char>(buffer[bufferOffset + 1]);
2225     color.b = 0;
2226     color.a = 1;
2227     return color;
2228 }
2229 static inline void writeR8G8_SINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<int, 4> color, device uchar *buffer)
2230 {
2231     buffer[bufferOffset] = static_cast<uchar>(color.r);
2232     buffer[bufferOffset + 1] = static_cast<uchar>(color.g);
2233 }
2234 
2235 
2236 static inline uint4 readR8G8_UINT(uint bufferOffset, constant uchar *buffer)
2237 {
2238     uint4 color;
2239     color.r = as_type<uchar>(buffer[bufferOffset]);
2240     color.g = as_type<uchar>(buffer[bufferOffset + 1]);
2241     color.b = 0;
2242     color.a = 1;
2243     return color;
2244 }
2245 static inline void writeR8G8_UINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<uint, 4> color, device uchar *buffer)
2246 {
2247     buffer[bufferOffset] = static_cast<uchar>(color.r);
2248     buffer[bufferOffset + 1] = static_cast<uchar>(color.g);
2249 }
2250 
2251 
2252 static inline int4 readR8G8B8_SINT(uint bufferOffset, constant uchar *buffer)
2253 {
2254     int4 color;
2255     color.r = as_type<char>(buffer[bufferOffset]);
2256     color.g = as_type<char>(buffer[bufferOffset + 1]);
2257     color.b = as_type<char>(buffer[bufferOffset + 2]);
2258     color.a = 1;
2259     return color;
2260 }
2261 
2262 
2263 static inline uint4 readR8G8B8_UINT(uint bufferOffset, constant uchar *buffer)
2264 {
2265     uint4 color;
2266     color.r = as_type<uchar>(buffer[bufferOffset]);
2267     color.g = as_type<uchar>(buffer[bufferOffset + 1]);
2268     color.b = as_type<uchar>(buffer[bufferOffset + 2]);
2269     color.a = 1;
2270     return color;
2271 }
2272 
2273 
2274 static inline int4 readR8G8B8A8_SINT(uint bufferOffset, constant uchar *buffer)
2275 {
2276     int4 color;
2277     color.r = as_type<char>(buffer[bufferOffset]);
2278     color.g = as_type<char>(buffer[bufferOffset + 1]);
2279     color.b = as_type<char>(buffer[bufferOffset + 2]);
2280     color.a = as_type<char>(buffer[bufferOffset + 3]);
2281     return color;
2282 }
2283 static inline void writeR8G8B8A8_SINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<int, 4> color, device uchar *buffer)
2284 {
2285     buffer[bufferOffset] = static_cast<uchar>(color.r);
2286     buffer[bufferOffset + 1] = static_cast<uchar>(color.g);
2287     buffer[bufferOffset + 2] = static_cast<uchar>(color.b);
2288     buffer[bufferOffset + 3] = static_cast<uchar>(color.a);
2289 }
2290 
2291 
2292 static inline uint4 readR8G8B8A8_UINT(uint bufferOffset, constant uchar *buffer)
2293 {
2294     uint4 color;
2295     color.r = as_type<uchar>(buffer[bufferOffset]);
2296     color.g = as_type<uchar>(buffer[bufferOffset + 1]);
2297     color.b = as_type<uchar>(buffer[bufferOffset + 2]);
2298     color.a = as_type<uchar>(buffer[bufferOffset + 3]);
2299     return color;
2300 }
2301 static inline void writeR8G8B8A8_UINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<uint, 4> color, device uchar *buffer)
2302 {
2303     buffer[bufferOffset] = static_cast<uchar>(color.r);
2304     buffer[bufferOffset + 1] = static_cast<uchar>(color.g);
2305     buffer[bufferOffset + 2] = static_cast<uchar>(color.b);
2306     buffer[bufferOffset + 3] = static_cast<uchar>(color.a);
2307 }
2308 
2309 
2310 static inline float4 readR16_FLOAT(uint bufferOffset, constant uchar *buffer)
2311 {
2312     float4 color;
2313     color.r = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset));
2314     color.g = color.b = 0.0;
2315     color.a = 1.0;
2316     return color;
2317 }
2318 static inline void writeR16_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2319 {
2320     shortToBytes(as_type<ushort>(static_cast<half>(color.r)), bufferOffset, buffer);
2321 }
2322 
2323 template <typename ShortType>
2324 static inline float4 readR16_NORM(uint bufferOffset, constant uchar *buffer)
2325 {
2326     float4 color;
2327     color.r = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset));
2328     color.g = color.b = 0.0;
2329     color.a = 1.0;
2330     return color;
2331 }
2332 
2333 
2334 
2335 
2336 static inline int4 readR16_SINT(uint bufferOffset, constant uchar *buffer)
2337 {
2338     int4 color;
2339     color.r = bytesToShort<short>(buffer, bufferOffset);
2340     color.g = color.b = 0;
2341     color.a = 1;
2342     return color;
2343 }
2344 static inline void writeR16_SINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<int, 4> color, device uchar *buffer)
2345 {
2346     shortToBytes(static_cast<short>(color.r), bufferOffset, buffer);
2347 }
2348 
2349 
2350 static inline uint4 readR16_UINT(uint bufferOffset, constant uchar *buffer)
2351 {
2352     uint4 color;
2353     color.r = bytesToShort<ushort>(buffer, bufferOffset);
2354     color.g = color.b = 0;
2355     color.a = 1;
2356     return color;
2357 }
2358 static inline void writeR16_UINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<uint, 4> color, device uchar *buffer)
2359 {
2360     shortToBytes(static_cast<ushort>(color.r), bufferOffset, buffer);
2361 }
2362 
2363 
2364 static inline float4 readA16_FLOAT(uint bufferOffset, constant uchar *buffer)
2365 {
2366     float4 color;
2367     color.a = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset));
2368     color.rgb = 0.0;
2369     return color;
2370 }
2371 static inline void writeA16_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2372 {
2373     shortToBytes(as_type<ushort>(static_cast<half>(color.a)), bufferOffset, buffer);
2374 }
2375 
2376 
2377 static inline float4 readL16_FLOAT(uint bufferOffset, constant uchar *buffer)
2378 {
2379     float4 color;
2380     color.rgb = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset));
2381     color.a = 1.0;
2382     return color;
2383 }
2384 static inline void writeL16_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2385 {
2386     shortToBytes(as_type<ushort>(static_cast<half>(color.r)), bufferOffset, buffer);
2387 }
2388 
2389 
2390 static inline float4 readL16A16_FLOAT(uint bufferOffset, constant uchar *buffer)
2391 {
2392     float4 color;
2393     color.rgb = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset));
2394     color.a = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset + 2));
2395     return color;
2396 }
2397 static inline void writeL16A16_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2398 {
2399     shortToBytes(as_type<ushort>(static_cast<half>(color.r)), bufferOffset, buffer);
2400     shortToBytes(as_type<ushort>(static_cast<half>(color.a)), bufferOffset + 2, buffer);
2401 }
2402 
2403 
2404 static inline float4 readR16G16_FLOAT(uint bufferOffset, constant uchar *buffer)
2405 {
2406     float4 color;
2407     color.r = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset));
2408     color.g = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset + 2));
2409     color.b = 0.0;
2410     color.a = 1.0;
2411     return color;
2412 }
2413 static inline void writeR16G16_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2414 {
2415     shortToBytes(as_type<ushort>(static_cast<half>(color.r)), bufferOffset, buffer);
2416     shortToBytes(as_type<ushort>(static_cast<half>(color.g)), bufferOffset + 2, buffer);
2417 }
2418 
2419 
2420 template <typename ShortType>
2421 static inline float4 readR16G16_NORM(uint bufferOffset, constant uchar *buffer)
2422 {
2423     float4 color;
2424     color.r = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset));
2425     color.g = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset + 2));
2426     color.b = 0.0;
2427     color.a = 1.0;
2428     return color;
2429 }
2430 
2431 
2432 
2433 
2434 static inline int4 readR16G16_SINT(uint bufferOffset, constant uchar *buffer)
2435 {
2436     int4 color;
2437     color.r = bytesToShort<short>(buffer, bufferOffset);
2438     color.g = bytesToShort<short>(buffer, bufferOffset + 2);
2439     color.b = 0;
2440     color.a = 1;
2441     return color;
2442 }
2443 static inline void writeR16G16_SINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<int, 4> color, device uchar *buffer)
2444 {
2445     shortToBytes(static_cast<short>(color.r), bufferOffset, buffer);
2446     shortToBytes(static_cast<short>(color.g), bufferOffset + 2, buffer);
2447 }
2448 
2449 
2450 static inline uint4 readR16G16_UINT(uint bufferOffset, constant uchar *buffer)
2451 {
2452     uint4 color;
2453     color.r = bytesToShort<ushort>(buffer, bufferOffset);
2454     color.g = bytesToShort<ushort>(buffer, bufferOffset + 2);
2455     color.b = 0;
2456     color.a = 1;
2457     return color;
2458 }
2459 static inline void writeR16G16_UINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<uint, 4> color, device uchar *buffer)
2460 {
2461     shortToBytes(static_cast<ushort>(color.r), bufferOffset, buffer);
2462     shortToBytes(static_cast<ushort>(color.g), bufferOffset + 2, buffer);
2463 }
2464 
2465 
2466 static inline float4 readR16G16B16_FLOAT(uint bufferOffset, constant uchar *buffer)
2467 {
2468     float4 color;
2469     color.r = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset));
2470     color.g = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset + 2));
2471     color.b = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset + 4));
2472     color.a = 1.0;
2473     return color;
2474 }
2475 
2476 
2477 template <typename ShortType>
2478 static inline float4 readR16G16B16_NORM(uint bufferOffset, constant uchar *buffer)
2479 {
2480     float4 color;
2481     color.r = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset));
2482     color.g = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset + 2));
2483     color.b = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset + 4));
2484     color.a = 1.0;
2485     return color;
2486 }
2487 
2488 
2489 
2490 static inline int4 readR16G16B16_SINT(uint bufferOffset, constant uchar *buffer)
2491 {
2492     int4 color;
2493     color.r = bytesToShort<short>(buffer, bufferOffset);
2494     color.g = bytesToShort<short>(buffer, bufferOffset + 2);
2495     color.b = bytesToShort<short>(buffer, bufferOffset + 4);
2496     color.a = 1;
2497     return color;
2498 }
2499 
2500 
2501 static inline uint4 readR16G16B16_UINT(uint bufferOffset, constant uchar *buffer)
2502 {
2503     uint4 color;
2504     color.r = bytesToShort<ushort>(buffer, bufferOffset);
2505     color.g = bytesToShort<ushort>(buffer, bufferOffset + 2);
2506     color.b = bytesToShort<ushort>(buffer, bufferOffset + 4);
2507     color.a = 1;
2508     return color;
2509 }
2510 
2511 
2512 static inline float4 readR16G16B16A16_FLOAT(uint bufferOffset, constant uchar *buffer)
2513 {
2514     float4 color;
2515     color.r = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset));
2516     color.g = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset + 2));
2517     color.b = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset + 4));
2518     color.a = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset + 6));
2519     return color;
2520 }
2521 static inline void writeR16G16B16A16_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2522 {
2523     shortToBytes(as_type<ushort>(static_cast<half>(color.r)), bufferOffset, buffer);
2524     shortToBytes(as_type<ushort>(static_cast<half>(color.g)), bufferOffset + 2, buffer);
2525     shortToBytes(as_type<ushort>(static_cast<half>(color.b)), bufferOffset + 4, buffer);
2526     shortToBytes(as_type<ushort>(static_cast<half>(color.a)), bufferOffset + 6, buffer);
2527 }
2528 
2529 
2530 template <typename ShortType>
2531 static inline float4 readR16G16B16A16_NORM(uint bufferOffset, constant uchar *buffer)
2532 {
2533     float4 color;
2534     color.r = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset));
2535     color.g = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset + 2));
2536     color.b = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset + 4));
2537     color.a = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset + 6));
2538     return color;
2539 }
2540 
2541 
2542 
2543 
2544 static inline int4 readR16G16B16A16_SINT(uint bufferOffset, constant uchar *buffer)
2545 {
2546     int4 color;
2547     color.r = bytesToShort<short>(buffer, bufferOffset);
2548     color.g = bytesToShort<short>(buffer, bufferOffset + 2);
2549     color.b = bytesToShort<short>(buffer, bufferOffset + 4);
2550     color.a = bytesToShort<short>(buffer, bufferOffset + 6);
2551     return color;
2552 }
2553 static inline void writeR16G16B16A16_SINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<int, 4> color, device uchar *buffer)
2554 {
2555     shortToBytes(static_cast<short>(color.r), bufferOffset, buffer);
2556     shortToBytes(static_cast<short>(color.g), bufferOffset + 2, buffer);
2557     shortToBytes(static_cast<short>(color.b), bufferOffset + 4, buffer);
2558     shortToBytes(static_cast<short>(color.a), bufferOffset + 6, buffer);
2559 }
2560 
2561 
2562 static inline uint4 readR16G16B16A16_UINT(uint bufferOffset, constant uchar *buffer)
2563 {
2564     uint4 color;
2565     color.r = bytesToShort<ushort>(buffer, bufferOffset);
2566     color.g = bytesToShort<ushort>(buffer, bufferOffset + 2);
2567     color.b = bytesToShort<ushort>(buffer, bufferOffset + 4);
2568     color.a = bytesToShort<ushort>(buffer, bufferOffset + 6);
2569     return color;
2570 }
2571 static inline void writeR16G16B16A16_UINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<uint, 4> color, device uchar *buffer)
2572 {
2573     shortToBytes(static_cast<ushort>(color.r), bufferOffset, buffer);
2574     shortToBytes(static_cast<ushort>(color.g), bufferOffset + 2, buffer);
2575     shortToBytes(static_cast<ushort>(color.b), bufferOffset + 4, buffer);
2576     shortToBytes(static_cast<ushort>(color.a), bufferOffset + 6, buffer);
2577 }
2578 
2579 
2580 static inline float4 readR32_FLOAT(uint bufferOffset, constant uchar *buffer)
2581 {
2582     float4 color;
2583     color.r = as_type<float>(bytesToInt<uint>(buffer, bufferOffset));
2584     color.g = color.b = 0.0;
2585     color.a = 1.0;
2586     return color;
2587 }
2588 static inline void writeR32_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2589 {
2590     intToBytes(as_type<uint>(color.r), bufferOffset, buffer);
2591 }
2592 
2593 
2594 template <typename IntType>
2595 static inline float4 readR32_NORM(uint bufferOffset, constant uchar *buffer)
2596 {
2597     float4 color;
2598     color.r = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset));
2599     color.g = color.b = 0.0;
2600     color.a = 1.0;
2601     return color;
2602 }
2603 
2604 
2605 
2606 
2607 static inline float4 readA32_FLOAT(uint bufferOffset, constant uchar *buffer)
2608 {
2609     float4 color;
2610     color.a = as_type<float>(bytesToInt<uint>(buffer, bufferOffset));
2611     color.rgb = 0.0;
2612     return color;
2613 }
2614 static inline void writeA32_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2615 {
2616     intToBytes(as_type<uint>(color.a), bufferOffset, buffer);
2617 }
2618 
2619 
2620 static inline float4 readL32_FLOAT(uint bufferOffset, constant uchar *buffer)
2621 {
2622     float4 color;
2623     color.rgb = as_type<float>(bytesToInt<uint>(buffer, bufferOffset));
2624     color.a = 1.0;
2625     return color;
2626 }
2627 static inline void writeL32_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2628 {
2629     intToBytes(as_type<uint>(color.r), bufferOffset, buffer);
2630 }
2631 
2632 
2633 static inline int4 readR32_SINT(uint bufferOffset, constant uchar *buffer)
2634 {
2635     int4 color;
2636     color.r = bytesToInt<int>(buffer, bufferOffset);
2637     color.g = color.b = 0;
2638     color.a = 1;
2639     return color;
2640 }
2641 static inline void writeR32_SINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<int, 4> color, device uchar *buffer)
2642 {
2643     intToBytes(color.r, bufferOffset, buffer);
2644 }
2645 
2646 
2647 static inline float4 readR32_FIXED(uint bufferOffset, constant uchar *buffer)
2648 {
2649     float4 color;
2650     constexpr float kDivisor = 1.0f / (1 << 16);
2651     color.r = bytesToInt<int>(buffer, bufferOffset) * kDivisor;
2652     color.g = color.b = 0.0;
2653     color.a = 1.0;
2654     return color;
2655 }
2656 
2657 
2658 static inline uint4 readR32_UINT(uint bufferOffset, constant uchar *buffer)
2659 {
2660     uint4 color;
2661     color.r = bytesToInt<uint>(buffer, bufferOffset);
2662     color.g = color.b = 0;
2663     color.a = 1;
2664     return color;
2665 }
2666 static inline void writeR32_UINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<uint, 4> color, device uchar *buffer)
2667 {
2668     intToBytes(color.r, bufferOffset, buffer);
2669 }
2670 
2671 
2672 static inline float4 readL32A32_FLOAT(uint bufferOffset, constant uchar *buffer)
2673 {
2674     float4 color;
2675     color.rgb = as_type<float>(bytesToInt<uint>(buffer, bufferOffset));
2676     color.a = as_type<float>(bytesToInt<uint>(buffer, bufferOffset + 4));
2677     return color;
2678 }
2679 static inline void writeL32A32_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2680 {
2681     intToBytes(as_type<uint>(color.r), bufferOffset, buffer);
2682     intToBytes(as_type<uint>(color.a), bufferOffset + 4, buffer);
2683 }
2684 
2685 
2686 static inline float4 readR32G32_FLOAT(uint bufferOffset, constant uchar *buffer)
2687 {
2688     float4 color;
2689     color.r = as_type<float>(bytesToInt<uint>(buffer, bufferOffset));
2690     color.g = as_type<float>(bytesToInt<uint>(buffer, bufferOffset + 4));
2691     color.b = 0.0;
2692     color.a = 1.0;
2693     return color;
2694 }
2695 static inline void writeR32G32_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2696 {
2697     intToBytes(as_type<uint>(color.r), bufferOffset, buffer);
2698     intToBytes(as_type<uint>(color.g), bufferOffset + 4, buffer);
2699 }
2700 
2701 
2702 template <typename IntType>
2703 static inline float4 readR32G32_NORM(uint bufferOffset, constant uchar *buffer)
2704 {
2705     float4 color;
2706     color.r = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset));
2707     color.g = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset + 4));
2708     color.b = 0.0;
2709     color.a = 1.0;
2710     return color;
2711 }
2712 
2713 
2714 
2715 
2716 static inline int4 readR32G32_SINT(uint bufferOffset, constant uchar *buffer)
2717 {
2718     int4 color;
2719     color.r = bytesToInt<int>(buffer, bufferOffset);
2720     color.g = bytesToInt<int>(buffer, bufferOffset + 4);
2721     color.b = 0;
2722     color.a = 1;
2723     return color;
2724 }
2725 static inline void writeR32G32_SINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<int, 4> color, device uchar *buffer)
2726 {
2727     intToBytes(color.r, bufferOffset, buffer);
2728     intToBytes(color.g, bufferOffset + 4, buffer);
2729 }
2730 
2731 
2732 static inline float4 readR32G32_FIXED(uint bufferOffset, constant uchar *buffer)
2733 {
2734     float4 color;
2735     constexpr float kDivisor = 1.0f / (1 << 16);
2736     color.r = bytesToInt<int>(buffer, bufferOffset) * kDivisor;
2737     color.g = bytesToInt<int>(buffer, bufferOffset + 4) * kDivisor;
2738     color.b = 0.0;
2739     color.a = 1.0;
2740     return color;
2741 }
2742 
2743 
2744 static inline uint4 readR32G32_UINT(uint bufferOffset, constant uchar *buffer)
2745 {
2746     uint4 color;
2747     color.r = bytesToInt<uint>(buffer, bufferOffset);
2748     color.g = bytesToInt<uint>(buffer, bufferOffset + 4);
2749     color.b = 0;
2750     color.a = 1;
2751     return color;
2752 }
2753 static inline void writeR32G32_UINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<uint, 4> color, device uchar *buffer)
2754 {
2755     intToBytes(color.r, bufferOffset, buffer);
2756     intToBytes(color.g, bufferOffset + 4, buffer);
2757 }
2758 
2759 
2760 static inline float4 readR32G32B32_FLOAT(uint bufferOffset, constant uchar *buffer)
2761 {
2762     float4 color;
2763     color.r = as_type<float>(bytesToInt<uint>(buffer, bufferOffset));
2764     color.g = as_type<float>(bytesToInt<uint>(buffer, bufferOffset + 4));
2765     color.b = as_type<float>(bytesToInt<uint>(buffer, bufferOffset + 8));
2766     color.a = 1.0;
2767     return color;
2768 }
2769 
2770 
2771 template <typename IntType>
2772 static inline float4 readR32G32B32_NORM(uint bufferOffset, constant uchar *buffer)
2773 {
2774     float4 color;
2775     color.r = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset));
2776     color.g = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset + 4));
2777     color.b = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset + 8));
2778     color.a = 1.0;
2779     return color;
2780 }
2781 
2782 
2783 
2784 
2785 static inline int4 readR32G32B32_SINT(uint bufferOffset, constant uchar *buffer)
2786 {
2787     int4 color;
2788     color.r = bytesToInt<int>(buffer, bufferOffset);
2789     color.g = bytesToInt<int>(buffer, bufferOffset + 4);
2790     color.b = bytesToInt<int>(buffer, bufferOffset + 8);
2791     color.a = 1;
2792     return color;
2793 }
2794 
2795 
2796 static inline float4 readR32G32B32_FIXED(uint bufferOffset, constant uchar *buffer)
2797 {
2798     float4 color;
2799     constexpr float kDivisor = 1.0f / (1 << 16);
2800     color.r = bytesToInt<int>(buffer, bufferOffset) * kDivisor;
2801     color.g = bytesToInt<int>(buffer, bufferOffset + 4) * kDivisor;
2802     color.b = bytesToInt<int>(buffer, bufferOffset + 8) * kDivisor;
2803     color.a = 1.0;
2804     return color;
2805 }
2806 
2807 
2808 static inline uint4 readR32G32B32_UINT(uint bufferOffset, constant uchar *buffer)
2809 {
2810     uint4 color;
2811     color.r = bytesToInt<uint>(buffer, bufferOffset);
2812     color.g = bytesToInt<uint>(buffer, bufferOffset + 4);
2813     color.b = bytesToInt<uint>(buffer, bufferOffset + 8);
2814     color.a = 1;
2815     return color;
2816 }
2817 
2818 
2819 static inline float4 readR32G32B32A32_FLOAT(uint bufferOffset, constant uchar *buffer)
2820 {
2821     float4 color;
2822     color.r = as_type<float>(bytesToInt<uint>(buffer, bufferOffset));
2823     color.g = as_type<float>(bytesToInt<uint>(buffer, bufferOffset + 4));
2824     color.b = as_type<float>(bytesToInt<uint>(buffer, bufferOffset + 8));
2825     color.a = as_type<float>(bytesToInt<uint>(buffer, bufferOffset + 12));
2826     return color;
2827 }
2828 static inline void writeR32G32B32A32_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2829 {
2830     intToBytes(as_type<uint>(color.r), bufferOffset, buffer);
2831     intToBytes(as_type<uint>(color.g), bufferOffset + 4, buffer);
2832     intToBytes(as_type<uint>(color.b), bufferOffset + 8, buffer);
2833     intToBytes(as_type<uint>(color.a), bufferOffset + 12, buffer);
2834 }
2835 
2836 
2837 template <typename IntType>
2838 static inline float4 readR32G32B32A32_NORM(uint bufferOffset, constant uchar *buffer)
2839 {
2840     float4 color;
2841     color.r = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset));
2842     color.g = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset + 4));
2843     color.b = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset + 8));
2844     color.a = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset + 12));
2845     return color;
2846 }
2847 
2848 
2849 
2850 
2851 static inline int4 readR32G32B32A32_SINT(uint bufferOffset, constant uchar *buffer)
2852 {
2853     int4 color;
2854     color.r = bytesToInt<int>(buffer, bufferOffset);
2855     color.g = bytesToInt<int>(buffer, bufferOffset + 4);
2856     color.b = bytesToInt<int>(buffer, bufferOffset + 8);
2857     color.a = bytesToInt<int>(buffer, bufferOffset + 12);
2858     return color;
2859 }
2860 static inline void writeR32G32B32A32_SINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<int, 4> color, device uchar *buffer)
2861 {
2862     intToBytes(color.r, bufferOffset, buffer);
2863     intToBytes(color.g, bufferOffset + 4, buffer);
2864     intToBytes(color.b, bufferOffset + 8, buffer);
2865     intToBytes(color.a, bufferOffset + 12, buffer);
2866 }
2867 
2868 static inline float4 readR32G32B32A32_FIXED(uint bufferOffset, constant uchar *buffer)
2869 {
2870     float4 color;
2871     constexpr float kDivisor = 1.0f / (1 << 16);
2872     color.r = bytesToInt<int>(buffer, bufferOffset) * kDivisor;
2873     color.g = bytesToInt<int>(buffer, bufferOffset + 4) * kDivisor;
2874     color.b = bytesToInt<int>(buffer, bufferOffset + 8) * kDivisor;
2875     color.a = bytesToInt<int>(buffer, bufferOffset + 12) * kDivisor;
2876     return color;
2877 }
2878 
2879 
2880 static inline uint4 readR32G32B32A32_UINT(uint bufferOffset, constant uchar *buffer)
2881 {
2882     uint4 color;
2883     color.r = bytesToInt<uint>(buffer, bufferOffset);
2884     color.g = bytesToInt<uint>(buffer, bufferOffset + 4);
2885     color.b = bytesToInt<uint>(buffer, bufferOffset + 8);
2886     color.a = bytesToInt<uint>(buffer, bufferOffset + 12);
2887     return color;
2888 }
2889 static inline void writeR32G32B32A32_UINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<uint, 4> color, device uchar *buffer)
2890 {
2891     intToBytes(color.r, bufferOffset, buffer);
2892     intToBytes(color.g, bufferOffset + 4, buffer);
2893     intToBytes(color.b, bufferOffset + 8, buffer);
2894     intToBytes(color.a, bufferOffset + 12, buffer);
2895 }
2896 # 1292 "./copy_buffer.metal"
2897 static inline int4 readR8_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR8_SINT(bufferOffset, buffer); } static inline uint4 readR8_USCALED(uint bufferOffset, constant uchar *buffer) { return readR8_UINT(bufferOffset, buffer); } static inline int4 readR8G8_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR8G8_SINT(bufferOffset, buffer); } static inline uint4 readR8G8_USCALED(uint bufferOffset, constant uchar *buffer) { return readR8G8_UINT(bufferOffset, buffer); } static inline int4 readR8G8B8_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR8G8B8_SINT(bufferOffset, buffer); } static inline uint4 readR8G8B8_USCALED(uint bufferOffset, constant uchar *buffer) { return readR8G8B8_UINT(bufferOffset, buffer); } static inline int4 readR8G8B8A8_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR8G8B8A8_SINT(bufferOffset, buffer); } static inline uint4 readR8G8B8A8_USCALED(uint bufferOffset, constant uchar *buffer) { return readR8G8B8A8_UINT(bufferOffset, buffer); }
2898 static inline int4 readR16_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR16_SINT(bufferOffset, buffer); } static inline uint4 readR16_USCALED(uint bufferOffset, constant uchar *buffer) { return readR16_UINT(bufferOffset, buffer); } static inline int4 readR16G16_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR16G16_SINT(bufferOffset, buffer); } static inline uint4 readR16G16_USCALED(uint bufferOffset, constant uchar *buffer) { return readR16G16_UINT(bufferOffset, buffer); } static inline int4 readR16G16B16_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR16G16B16_SINT(bufferOffset, buffer); } static inline uint4 readR16G16B16_USCALED(uint bufferOffset, constant uchar *buffer) { return readR16G16B16_UINT(bufferOffset, buffer); } static inline int4 readR16G16B16A16_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR16G16B16A16_SINT(bufferOffset, buffer); } static inline uint4 readR16G16B16A16_USCALED(uint bufferOffset, constant uchar *buffer) { return readR16G16B16A16_UINT(bufferOffset, buffer); }
2899 static inline int4 readR32_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR32_SINT(bufferOffset, buffer); } static inline uint4 readR32_USCALED(uint bufferOffset, constant uchar *buffer) { return readR32_UINT(bufferOffset, buffer); } static inline int4 readR32G32_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR32G32_SINT(bufferOffset, buffer); } static inline uint4 readR32G32_USCALED(uint bufferOffset, constant uchar *buffer) { return readR32G32_UINT(bufferOffset, buffer); } static inline int4 readR32G32B32_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR32G32B32_SINT(bufferOffset, buffer); } static inline uint4 readR32G32B32_USCALED(uint bufferOffset, constant uchar *buffer) { return readR32G32B32_UINT(bufferOffset, buffer); } static inline int4 readR32G32B32A32_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR32G32B32A32_SINT(bufferOffset, buffer); } static inline uint4 readR32G32B32A32_USCALED(uint bufferOffset, constant uchar *buffer) { return readR32G32B32A32_UINT(bufferOffset, buffer); }
2900 
2901 static inline int4 readR10G10B10A2_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR10G10B10A2_SINT(bufferOffset, buffer); } static inline uint4 readR10G10B10A2_USCALED(uint bufferOffset, constant uchar *buffer) { return readR10G10B10A2_UINT(bufferOffset, buffer); }
2902 
2903 
2904 kernel void readFromBufferToFloatTexture(ushort3 gIndices [[thread_position_in_grid]], constant CopyPixelParams &options[[buffer(0)]], constant uchar *buffer [[buffer(1)]], texture2d<float, access::write> dstTexture2d [[texture(0), function_constant(kCopyTextureType2D)]], texture2d_array<float, access::write> dstTexture2dArray [[texture(0), function_constant(kCopyTextureType2DArray)]], texture3d<float, access::write> dstTexture3d [[texture(0), function_constant(kCopyTextureType3D)]], texturecube<float, access::write> dstTextureCube [[texture(0), function_constant(kCopyTextureTypeCube)]])
2905 {
2906     if (gIndices.x >= options.copySize.x || gIndices.y >= options.copySize.y || gIndices.z >= options.copySize.z) { return; }
2907 # 1336 "./copy_buffer.metal"
2908     uint bufferOffset = options.bufferStartOffset + (gIndices.z * options.bufferDepthPitch + gIndices.y * options.bufferRowPitch + gIndices.x * options.pixelSize);
2909 
2910     switch (kCopyFormatType)
2911     {
2912         case FormatID::R5G6B5_UNORM: { auto color = readR5G6B5_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8A8_UNORM: { auto color = readR8G8B8A8_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8A8_UNORM_SRGB: { auto color = readR8G8B8A8_UNORM_SRGB(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8A8_SNORM: { auto color = readR8G8B8A8_SNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::B8G8R8A8_UNORM: { auto color = readB8G8R8A8_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::B8G8R8A8_UNORM_SRGB: { auto color = readB8G8R8A8_UNORM_SRGB(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8_UNORM: { auto color = readR8G8B8_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8_UNORM_SRGB: { auto color = readR8G8B8_UNORM_SRGB(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8_SNORM: { auto color = readR8G8B8_SNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::L8_UNORM: { auto color = readL8_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::L8A8_UNORM: { auto color = readL8A8_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R5G5B5A1_UNORM: { auto color = readR5G5B5A1_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R4G4B4A4_UNORM: { auto color = readR4G4B4A4_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8_UNORM: { auto color = readR8_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8_SNORM: { auto color = readR8_SNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8_UNORM: { auto color = readR8G8_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8_SNORM: { auto color = readR8G8_SNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16_FLOAT: { auto color = readR16_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::A16_FLOAT: { auto color = readA16_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::L16_FLOAT: { auto color = readL16_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::L16A16_FLOAT: { auto color = readL16A16_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16_FLOAT: { auto color = readR16G16_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16B16_FLOAT: { auto color = readR16G16B16_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16B16A16_FLOAT: { auto color = readR16G16B16A16_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32_FLOAT: { auto color = readR32_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::A32_FLOAT: { auto color = readA32_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::L32_FLOAT: { auto color = readL32_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::L32A32_FLOAT: { auto color = readL32A32_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32G32_FLOAT: { auto color = readR32G32_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32G32B32_FLOAT: { auto color = readR32G32B32_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32G32B32A32_FLOAT: { auto color = readR32G32B32A32_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break;
2913     }
2914 
2915 
2916 }
2917 
2918 kernel void readFromBufferToIntTexture(ushort3 gIndices [[thread_position_in_grid]], constant CopyPixelParams &options[[buffer(0)]], constant uchar *buffer [[buffer(1)]], texture2d<int, access::write> dstTexture2d [[texture(0), function_constant(kCopyTextureType2D)]], texture2d_array<int, access::write> dstTexture2dArray [[texture(0), function_constant(kCopyTextureType2DArray)]], texture3d<int, access::write> dstTexture3d [[texture(0), function_constant(kCopyTextureType3D)]], texturecube<int, access::write> dstTextureCube [[texture(0), function_constant(kCopyTextureTypeCube)]])
2919 {
2920     if (gIndices.x >= options.copySize.x || gIndices.y >= options.copySize.y || gIndices.z >= options.copySize.z) { return; }
2921 # 1364 "./copy_buffer.metal"
2922     uint bufferOffset = options.bufferStartOffset + (gIndices.z * options.bufferDepthPitch + gIndices.y * options.bufferRowPitch + gIndices.x * options.pixelSize);
2923 
2924     switch (kCopyFormatType)
2925     {
2926         case FormatID::R8_SINT: { auto color = readR8_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8_SINT: { auto color = readR8G8_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8_SINT: { auto color = readR8G8B8_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8A8_SINT: { auto color = readR8G8B8A8_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16_SINT: { auto color = readR16_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16_SINT: { auto color = readR16G16_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16B16_SINT: { auto color = readR16G16B16_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16B16A16_SINT: { auto color = readR16G16B16A16_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32_SINT: { auto color = readR32_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32G32_SINT: { auto color = readR32G32_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32G32B32_SINT: { auto color = readR32G32B32_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32G32B32A32_SINT: { auto color = readR32G32B32A32_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break;
2927     }
2928 
2929 
2930 }
2931 
2932 kernel void readFromBufferToUIntTexture(ushort3 gIndices [[thread_position_in_grid]], constant CopyPixelParams &options[[buffer(0)]], constant uchar *buffer [[buffer(1)]], texture2d<uint, access::write> dstTexture2d [[texture(0), function_constant(kCopyTextureType2D)]], texture2d_array<uint, access::write> dstTexture2dArray [[texture(0), function_constant(kCopyTextureType2DArray)]], texture3d<uint, access::write> dstTexture3d [[texture(0), function_constant(kCopyTextureType3D)]], texturecube<uint, access::write> dstTextureCube [[texture(0), function_constant(kCopyTextureTypeCube)]])
2933 {
2934     if (gIndices.x >= options.copySize.x || gIndices.y >= options.copySize.y || gIndices.z >= options.copySize.z) { return; }
2935 # 1392 "./copy_buffer.metal"
2936     uint bufferOffset = options.bufferStartOffset + (gIndices.z * options.bufferDepthPitch + gIndices.y * options.bufferRowPitch + gIndices.x * options.pixelSize);
2937 
2938     switch (kCopyFormatType)
2939     {
2940         case FormatID::R8_UINT: { auto color = readR8_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8_UINT: { auto color = readR8G8_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8_UINT: { auto color = readR8G8B8_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8A8_UINT: { auto color = readR8G8B8A8_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16_UINT: { auto color = readR16_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16_UINT: { auto color = readR16G16_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16B16_UINT: { auto color = readR16G16B16_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16B16A16_UINT: { auto color = readR16G16B16A16_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32_UINT: { auto color = readR32_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32G32_UINT: { auto color = readR32G32_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32G32B32_UINT: { auto color = readR32G32B32_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32G32B32A32_UINT: { auto color = readR32G32B32A32_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break;
2941     }
2942 
2943 
2944 }
2945 
2946 
2947 kernel void writeFromFloatTextureToBuffer(ushort2 gIndices [[thread_position_in_grid]], constant WritePixelParams &options[[buffer(0)]], texture2d<float, access::read> srcTexture2d [[texture(0), function_constant(kCopyTextureType2D)]], texture2d_array<float, access::read> srcTexture2dArray [[texture(0), function_constant(kCopyTextureType2DArray)]], texture3d<float, access::read> srcTexture3d [[texture(0), function_constant(kCopyTextureType3D)]], texturecube<float, access::read> srcTextureCube [[texture(0), function_constant(kCopyTextureTypeCube)]], texture2d_ms<float, access::read> srcTexture2dMS [[texture(0), function_constant(kCopyTextureType2DMS)]], device uchar *buffer [[buffer(1)]])
2948 {
2949     if (gIndices.x >= options.copySize.x || gIndices.y >= options.copySize.y) { return; }
2950 # 1439 "./copy_buffer.metal"
2951     uint bufferOffset = options.bufferStartOffset + (gIndices.y * options.bufferRowPitch + gIndices.x * options.pixelSize);
2952 
2953     switch (kCopyFormatType)
2954     {
2955         case FormatID::R5G6B5_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR5G6B5_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8B8A8_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8B8A8_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8B8A8_UNORM_SRGB: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8B8A8_UNORM_SRGB(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8B8A8_SNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8B8A8_SNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::B8G8R8A8_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeB8G8R8A8_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::B8G8R8A8_UNORM_SRGB: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeB8G8R8A8_UNORM_SRGB(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8B8_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8B8_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8B8_UNORM_SRGB: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8B8_UNORM_SRGB(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8B8_SNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8B8_SNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::L8_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeL8_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::A8_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeA8_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::L8A8_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeL8A8_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R5G5B5A1_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR5G5B5A1_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R4G4B4A4_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR4G4B4A4_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8_SNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8_SNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8_SNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8_SNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::A16_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeA16_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::L16_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeL16_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::L16A16_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeL16A16_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16G16_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16G16_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16G16B16A16_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16G16B16A16_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R32_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR32_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::A32_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeA32_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::L32_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeL32_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::L32A32_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeL32A32_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R32G32_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR32G32_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R32G32B32A32_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR32G32B32A32_FLOAT(gIndices, options, bufferOffset, color, buffer); } break;
2956     }
2957 
2958 
2959 }
2960 
2961 kernel void writeFromIntTextureToBuffer(ushort2 gIndices [[thread_position_in_grid]], constant WritePixelParams &options[[buffer(0)]], texture2d<int, access::read> srcTexture2d [[texture(0), function_constant(kCopyTextureType2D)]], texture2d_array<int, access::read> srcTexture2dArray [[texture(0), function_constant(kCopyTextureType2DArray)]], texture3d<int, access::read> srcTexture3d [[texture(0), function_constant(kCopyTextureType3D)]], texturecube<int, access::read> srcTextureCube [[texture(0), function_constant(kCopyTextureTypeCube)]], texture2d_ms<int, access::read> srcTexture2dMS [[texture(0), function_constant(kCopyTextureType2DMS)]], device uchar *buffer [[buffer(1)]])
2962 {
2963     if (gIndices.x >= options.copySize.x || gIndices.y >= options.copySize.y) { return; }
2964 # 1464 "./copy_buffer.metal"
2965     uint bufferOffset = options.bufferStartOffset + (gIndices.y * options.bufferRowPitch + gIndices.x * options.pixelSize);
2966 
2967     switch (kCopyFormatType)
2968     {
2969         case FormatID::R8_SINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8_SINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8_SINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8_SINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8B8A8_SINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8B8A8_SINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16_SINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16_SINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16G16_SINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16G16_SINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16G16B16A16_SINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16G16B16A16_SINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R32_SINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR32_SINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R32G32_SINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR32G32_SINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R32G32B32A32_SINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR32G32B32A32_SINT(gIndices, options, bufferOffset, color, buffer); } break;
2970     }
2971 
2972 
2973 }
2974 
2975 kernel void writeFromUIntTextureToBuffer(ushort2 gIndices [[thread_position_in_grid]], constant WritePixelParams &options[[buffer(0)]], texture2d<uint, access::read> srcTexture2d [[texture(0), function_constant(kCopyTextureType2D)]], texture2d_array<uint, access::read> srcTexture2dArray [[texture(0), function_constant(kCopyTextureType2DArray)]], texture3d<uint, access::read> srcTexture3d [[texture(0), function_constant(kCopyTextureType3D)]], texturecube<uint, access::read> srcTextureCube [[texture(0), function_constant(kCopyTextureTypeCube)]], texture2d_ms<uint, access::read> srcTexture2dMS [[texture(0), function_constant(kCopyTextureType2DMS)]], device uchar *buffer [[buffer(1)]])
2976 {
2977     if (gIndices.x >= options.copySize.x || gIndices.y >= options.copySize.y) { return; }
2978 # 1489 "./copy_buffer.metal"
2979     uint bufferOffset = options.bufferStartOffset + (gIndices.y * options.bufferRowPitch + gIndices.x * options.pixelSize);
2980 
2981     switch (kCopyFormatType)
2982     {
2983         case FormatID::R8_UINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8_UINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8_UINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8_UINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8B8A8_UINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8B8A8_UINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16_UINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16_UINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16G16_UINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16G16_UINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16G16B16A16_UINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16G16B16A16_UINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R32_UINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR32_UINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R32G32_UINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR32G32_UINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R32G32B32A32_UINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR32G32B32A32_UINT(gIndices, options, bufferOffset, color, buffer); } break;
2984     }
2985 
2986 
2987 }
2988 
2989 
2990 struct CopyVertexParams
2991 {
2992     uint srcBufferStartOffset;
2993     uint srcStride;
2994     uint srcComponentBytes;
2995     uint srcComponents;
2996 
2997 
2998 
2999     uchar4 srcDefaultAlphaData;
3000 
3001     uint dstBufferStartOffset;
3002     uint dstStride;
3003     uint dstComponents;
3004 
3005     uint vertexCount;
3006 };
3007 # 1539 "./copy_buffer.metal"
3008 template <typename IntType>
3009 static inline void writeFloatVertex(constant CopyVertexParams &options,
3010                                     uint idx,
3011                                     vec<IntType, 4> data,
3012                                     device uchar *dst)
3013 {
3014     uint dstOffset = idx * options.dstStride + options.dstBufferStartOffset;
3015 
3016     for (uint component = 0; component < options.dstComponents; ++component, dstOffset += 4)
3017     {
3018         floatToBytes(static_cast<float>(data[component]), dstOffset, dst);
3019     }
3020 }
3021 
3022 template <>
3023 inline void writeFloatVertex(constant CopyVertexParams &options,
3024                              uint idx,
3025                              vec<float, 4> data,
3026                              device uchar *dst)
3027 {
3028     uint dstOffset = idx * options.dstStride + options.dstBufferStartOffset;
3029 
3030     for (uint component = 0; component < options.dstComponents; ++component, dstOffset += 4)
3031     {
3032         floatToBytes(data[component], dstOffset, dst);
3033     }
3034 }
3035 
3036 
3037 static inline void convertToFloatVertexFormat(uint index,
3038                                               constant CopyVertexParams &options,
3039                                               constant uchar *srcBuffer,
3040                                               device uchar *dstBuffer)
3041 {
3042 # 1585 "./copy_buffer.metal"
3043     uint bufferOffset = options.srcBufferStartOffset + options.srcStride * index;
3044 # 1594 "./copy_buffer.metal"
3045     switch (kCopyFormatType)
3046     {
3047         case FormatID::R8_UNORM: { auto data = readR8_UNORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8_SNORM: { auto data = readR8_SNORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8_UINT: { auto data = readR8_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8_SINT: { auto data = readR8_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8_USCALED: { auto data = readR8_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8_SSCALED: { auto data = readR8_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8_UNORM: { auto data = readR8G8_UNORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8_SNORM: { auto data = readR8G8_SNORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8_UINT: { auto data = readR8G8_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8_SINT: { auto data = readR8G8_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8_USCALED: { auto data = readR8G8_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8_SSCALED: { auto data = readR8G8_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8_UNORM: { auto data = readR8G8B8_UNORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8_SNORM: { auto data = readR8G8B8_SNORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8_UINT: { auto data = readR8G8B8_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8_SINT: { auto data = readR8G8B8_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8_USCALED: { auto data = readR8G8B8_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8_SSCALED: { auto data = readR8G8B8_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8A8_UNORM: { auto data = readR8G8B8A8_UNORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8A8_SNORM: { auto data = readR8G8B8A8_SNORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8A8_UINT: { auto data = readR8G8B8A8_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8A8_SINT: { auto data = readR8G8B8A8_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8A8_USCALED: { auto data = readR8G8B8A8_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8A8_SSCALED: { auto data = readR8G8B8A8_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16_UNORM: { auto data = readR16_NORM<ushort>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16_SNORM: { auto data = readR16_NORM<short>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16_UINT: { auto data = readR16_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16_SINT: { auto data = readR16_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16_USCALED: { auto data = readR16_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16_SSCALED: { auto data = readR16_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16_UNORM: { auto data = readR16G16_NORM<ushort>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16_SNORM: { auto data = readR16G16_NORM<short>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16_UINT: { auto data = readR16G16_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16_SINT: { auto data = readR16G16_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16_USCALED: { auto data = readR16G16_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16_SSCALED: { auto data = readR16G16_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16_UNORM: { auto data = readR16G16B16_NORM<ushort>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16_SNORM: { auto data = readR16G16B16_NORM<short>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16_UINT: { auto data = readR16G16B16_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16_SINT: { auto data = readR16G16B16_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16_USCALED: { auto data = readR16G16B16_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16_SSCALED: { auto data = readR16G16B16_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16A16_UNORM: { auto data = readR16G16B16A16_NORM<ushort>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16A16_SNORM: { auto data = readR16G16B16A16_NORM<short>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16A16_UINT: { auto data = readR16G16B16A16_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16A16_SINT: { auto data = readR16G16B16A16_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16A16_USCALED: { auto data = readR16G16B16A16_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16A16_SSCALED: { auto data = readR16G16B16A16_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32_UNORM: { auto data = readR32_NORM<uint>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32_SNORM: { auto data = readR32_NORM<int>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32_UINT: { auto data = readR32_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32_SINT: { auto data = readR32_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32_USCALED: { auto data = readR32_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32_SSCALED: { auto data = readR32_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32_UNORM: { auto data = readR32G32_NORM<uint>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32_SNORM: { auto data = readR32G32_NORM<int>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32_UINT: { auto data = readR32G32_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32_SINT: { auto data = readR32G32_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32_USCALED: { auto data = readR32G32_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32_SSCALED: { auto data = readR32G32_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32_UNORM: { auto data = readR32G32B32_NORM<uint>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32_SNORM: { auto data = readR32G32B32_NORM<int>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32_UINT: { auto data = readR32G32B32_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32_SINT: { auto data = readR32G32B32_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32_USCALED: { auto data = readR32G32B32_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32_SSCALED: { auto data = readR32G32B32_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32A32_UNORM: { auto data = readR32G32B32A32_NORM<uint>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32A32_SNORM: { auto data = readR32G32B32A32_NORM<int>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32A32_UINT: { auto data = readR32G32B32A32_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32A32_SINT: { auto data = readR32G32B32A32_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32A32_USCALED: { auto data = readR32G32B32A32_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32A32_SSCALED: { auto data = readR32G32B32A32_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16_FLOAT: { auto data = readR16_FLOAT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16_FLOAT: { auto data = readR16G16_FLOAT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16_FLOAT: { auto data = readR16G16B16_FLOAT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16A16_FLOAT: { auto data = readR16G16B16A16_FLOAT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32_FLOAT: { auto data = readR32_FLOAT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32_FLOAT: { auto data = readR32G32_FLOAT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32_FLOAT: { auto data = readR32G32B32_FLOAT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32A32_FLOAT: { auto data = readR32G32B32A32_FLOAT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32_FIXED: { auto data = readR32_FIXED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32_FIXED: { auto data = readR32G32_FIXED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32_FIXED: { auto data = readR32G32B32_FIXED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32A32_FIXED: { auto data = readR32G32B32A32_FIXED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R10G10B10A2_SINT: { auto data = readR10G10B10A2_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R10G10B10A2_UINT: { auto data = readR10G10B10A2_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R10G10B10A2_SSCALED: { auto data = readR10G10B10A2_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R10G10B10A2_USCALED: { auto data = readR10G10B10A2_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break;
3048     }
3049 
3050 
3051 }
3052 
3053 
3054 kernel void convertToFloatVertexFormatCS(uint index [[thread_position_in_grid]],
3055                                          constant CopyVertexParams &options [[buffer(0)]],
3056                                          constant uchar *srcBuffer [[buffer(1)]],
3057                                          device uchar *dstBuffer [[buffer(2)]])
3058 {
3059     if (index >= options.vertexCount) { return; };
3060     convertToFloatVertexFormat(index, options, srcBuffer, dstBuffer);
3061 }
3062 
3063 
3064 vertex void convertToFloatVertexFormatVS(uint index [[vertex_id]],
3065                                          constant CopyVertexParams &options [[buffer(0)]],
3066                                          constant uchar *srcBuffer [[buffer(1)]],
3067                                          device uchar *dstBuffer [[buffer(2)]])
3068 {
3069     convertToFloatVertexFormat(index, options, srcBuffer, dstBuffer);
3070 }
3071 
3072 
3073 static inline void expandVertexFormatComponents(uint index,
3074                                                 constant CopyVertexParams &options,
3075                                                 constant uchar *srcBuffer,
3076                                                 device uchar *dstBuffer)
3077 {
3078     uint srcOffset = options.srcBufferStartOffset + options.srcStride * index;
3079     uint dstOffset = options.dstBufferStartOffset + options.dstStride * index;
3080 
3081     uint dstComponentsBeforeAlpha = min(options.dstComponents, 3u);
3082     uint component;
3083     for (component = 0; component < options.srcComponents; ++component,
3084         srcOffset += options.srcComponentBytes, dstOffset += options.srcComponentBytes)
3085     {
3086         for (uint byte = 0; byte < options.srcComponentBytes; ++byte)
3087         {
3088             dstBuffer[dstOffset + byte] = srcBuffer[srcOffset + byte];
3089         }
3090     }
3091 
3092     for (; component < dstComponentsBeforeAlpha;
3093          ++component, dstOffset += options.srcComponentBytes)
3094     {
3095         for (uint byte = 0; byte < options.srcComponentBytes; ++byte)
3096         {
3097             dstBuffer[dstOffset + byte] = 0;
3098         }
3099     }
3100 
3101     if (component < options.dstComponents)
3102     {
3103 
3104         for (uint byte = 0; byte < options.srcComponentBytes; ++byte)
3105         {
3106             dstBuffer[dstOffset + byte] = options.srcDefaultAlphaData[byte];
3107         }
3108     }
3109 }
3110 
3111 
3112 kernel void expandVertexFormatComponentsCS(uint index [[thread_position_in_grid]],
3113                                            constant CopyVertexParams &options [[buffer(0)]],
3114                                            constant uchar *srcBuffer [[buffer(1)]],
3115                                            device uchar *dstBuffer [[buffer(2)]])
3116 {
3117     if (index >= options.vertexCount) { return; };
3118 
3119     expandVertexFormatComponents(index, options, srcBuffer, dstBuffer);
3120 }
3121 
3122 
3123 vertex void expandVertexFormatComponentsVS(uint index [[vertex_id]],
3124                                            constant CopyVertexParams &options [[buffer(0)]],
3125                                            constant uchar *srcBuffer [[buffer(1)]],
3126                                            device uchar *dstBuffer [[buffer(2)]])
3127 {
3128     expandVertexFormatComponents(index, options, srcBuffer, dstBuffer);
3129 }
3130 
3131 
3132 kernel void linearizeBlocks(ushort2 position [[thread_position_in_grid]],
3133                             constant uint2 *dimensions [[buffer(0)]],
3134                             constant uint2 *srcBuffer [[buffer(1)]],
3135                             device uint2 *dstBuffer [[buffer(2)]])
3136 {
3137     if (any(uint2(position) >= *dimensions))
3138     {
3139         return;
3140     }
3141     uint2 t = uint2(position);
3142     t = (t | (t << 8)) & 0x00FF00FF;
3143     t = (t | (t << 4)) & 0x0F0F0F0F;
3144     t = (t | (t << 2)) & 0x33333333;
3145     t = (t | (t << 1)) & 0x55555555;
3146     dstBuffer[position.y * (*dimensions).x + position.x] = srcBuffer[(t.x << 1) | t.y];
3147 }
3148 
3149 
3150 kernel void saturateDepth(uint2 position [[thread_position_in_grid]],
3151                           constant uint3 *dimensions [[buffer(0)]],
3152                           device float *srcBuffer [[buffer(1)]],
3153                           device float *dstBuffer [[buffer(2)]])
3154 {
3155     if (any(position >= (*dimensions).xy))
3156     {
3157         return;
3158     }
3159     const uint srcOffset = position.y * (*dimensions).z + position.x;
3160     const uint dstOffset = position.y * (*dimensions).x + position.x;
3161     dstBuffer[dstOffset] = saturate(srcBuffer[srcOffset]);
3162 }
3163 # 6 "temp_master_source.metal" 2
3164 # 1 "./visibility.metal" 1
3165 
3166 
3167 
3168 
3169 
3170 
3171 
3172 
3173 constant bool kCombineWithExistingResult [[function_constant(1000)]];
3174 
3175 
3176 
3177 struct CombineVisibilityResultOptions
3178 {
3179 
3180     uint startOffset;
3181 
3182     uint numOffsets;
3183 };
3184 
3185 kernel void combineVisibilityResult(uint idx [[thread_position_in_grid]],
3186                                     constant CombineVisibilityResultOptions &options [[buffer(0)]],
3187                                     constant ushort4 *renderpassVisibilityResult [[buffer(1)]],
3188                                     device ushort4 *finalResults [[buffer(2)]])
3189 {
3190     if (idx > 0)
3191     {
3192 
3193 
3194 
3195         return;
3196     }
3197     ushort4 finalResult16x4;
3198 
3199     if (kCombineWithExistingResult)
3200     {
3201         finalResult16x4 = finalResults[0];
3202     }
3203     else
3204     {
3205         finalResult16x4 = ushort4(0, 0, 0, 0);
3206     }
3207 
3208     for (uint i = 0; i < options.numOffsets; ++i)
3209     {
3210         uint offset = options.startOffset + i;
3211         ushort4 renderpassResult = renderpassVisibilityResult[offset];
3212 
3213 
3214         finalResult16x4 = finalResult16x4 | renderpassResult;
3215     }
3216     finalResults[0] = finalResult16x4;
3217 }
3218 # 7 "temp_master_source.metal" 2
3219 # 1 "./rewrite_indices.metal" 1
3220 # 11 "./rewrite_indices.metal"
3221 # 1 "./rewrite_indices_shared.h" 1
3222 # 12 "./rewrite_indices.metal" 2
3223 using namespace metal;
3224 
3225 constant uint fixIndexBufferKey [[ function_constant(2000) ]];
3226 constant bool indexBufferIsUint16 = (((fixIndexBufferKey >> 0U) & 0x03U) == 2U);
3227 constant bool indexBufferIsUint32 = (((fixIndexBufferKey >> 0U) & 0x03U) == 3U);
3228 constant bool outIndexBufferIsUint16 = (((fixIndexBufferKey >> 2U) & 0x03U) == 2U);
3229 constant bool outIndexBufferIsUint32 = (((fixIndexBufferKey >> 2U) & 0x03U) == 3U);
3230 constant bool doPrimRestart = (fixIndexBufferKey & 0x00100U);
3231 constant uint fixIndexBufferMode = (fixIndexBufferKey >> 4U) & 0x0FU;
3232 
3233 
3234 static inline uint readIdx(
3235                            const device ushort *indexBufferUint16,
3236                            const device uint *indexBufferUint32,
3237                            const uint restartIndex,
3238                            const uint indexCount,
3239                            uint idx,
3240                            thread bool &foundRestart,
3241                            thread uint &indexThatRestartedFirst
3242                            )
3243 {
3244     uint inIndex = idx;
3245     if(inIndex < indexCount)
3246     {
3247         if(indexBufferIsUint16)
3248         {
3249             inIndex = indexBufferUint16[inIndex];
3250         }
3251         else if(indexBufferIsUint32)
3252         {
3253             inIndex = indexBufferUint32[inIndex];
3254         }
3255     }
3256     else
3257     {
3258         foundRestart = true;
3259         indexThatRestartedFirst = idx;
3260     }
3261     if(doPrimRestart && !foundRestart && inIndex == restartIndex)
3262     {
3263         foundRestart = true;
3264         indexThatRestartedFirst = idx;
3265     }
3266     return inIndex;
3267 }
3268 
3269 static inline void outputPrimitive(
3270                                    const device ushort *indexBufferUint16,
3271                                    const device uint *indexBufferUint32,
3272                                    device ushort *outIndexBufferUint16,
3273                                    device uint *outIndexBufferUint32,
3274                                    const uint restartIndex,
3275                                    const uint indexCount,
3276                                    thread uint &baseIndex,
3277                                    uint onIndex,
3278                                    thread uint &onOutIndex
3279                                    )
3280 {
3281     if(baseIndex > onIndex) return;
3282     bool foundRestart = false;
3283     uint indexThatRestartedFirst = 0;
3284 # 86 "./rewrite_indices.metal"
3285     switch(fixIndexBufferMode)
3286     {
3287         case 0x00U:
3288         {
3289             auto tmpIndex = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex, foundRestart, indexThatRestartedFirst);
3290             if(foundRestart)
3291             {
3292                 baseIndex = indexThatRestartedFirst + 1;
3293                 return;
3294             }
3295 
3296             ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex; } onOutIndex++; });
3297         }
3298         break;
3299         case 0x01U:
3300         {
3301             auto tmpIndex0 = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex + 0, foundRestart, indexThatRestartedFirst);
3302             auto tmpIndex1 = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex + 1, foundRestart, indexThatRestartedFirst);
3303             if(foundRestart)
3304             {
3305                 baseIndex = indexThatRestartedFirst + 1;
3306                 return;
3307             }
3308             if((onIndex - baseIndex) & 1) return;
3309 
3310             if(fixIndexBufferKey & 0x00200U)
3311             {
3312                 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1; } onOutIndex++; });
3313                 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0; } onOutIndex++; });
3314             }
3315             else
3316             {
3317                 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0; } onOutIndex++; });
3318                 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1; } onOutIndex++; });
3319             }
3320         }
3321         break;
3322         case 0x03U:
3323         {
3324             auto tmpIndex0 = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex + 0, foundRestart, indexThatRestartedFirst);
3325             auto tmpIndex1 = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex + 1, foundRestart, indexThatRestartedFirst);
3326             if(foundRestart)
3327             {
3328                 baseIndex = indexThatRestartedFirst + 1;
3329                 return;
3330             }
3331 
3332             if(fixIndexBufferKey & 0x00200U)
3333             {
3334                 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1; } onOutIndex++; });
3335                 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0; } onOutIndex++; });
3336             }
3337             else
3338             {
3339                 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0; } onOutIndex++; });
3340                 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1; } onOutIndex++; });
3341             }
3342         }
3343         break;
3344         case 0x04U:
3345         {
3346             auto tmpIndex0 = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex + 0, foundRestart, indexThatRestartedFirst);
3347             auto tmpIndex1 = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex + 1, foundRestart, indexThatRestartedFirst);
3348             auto tmpIndex2 = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex + 2, foundRestart, indexThatRestartedFirst);
3349             if(foundRestart)
3350             {
3351                 baseIndex = indexThatRestartedFirst + 1;
3352                 return;
3353             }
3354             if(((onIndex - baseIndex) % 3) != 0) return;
3355 
3356             if(fixIndexBufferKey & 0x00200U)
3357             {
3358                 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2; } onOutIndex++; });
3359                 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0; } onOutIndex++; });
3360                 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1; } onOutIndex++; });
3361             }
3362             else
3363             {
3364                 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0; } onOutIndex++; });
3365                 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1; } onOutIndex++; });
3366                 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2; } onOutIndex++; });
3367             }
3368         }
3369         break;
3370         case 0x05U:
3371         {
3372             uint isOdd = ((onIndex - baseIndex) & 1);
3373             auto tmpIndex0 = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex + 0 + isOdd, foundRestart, indexThatRestartedFirst);
3374             auto tmpIndex1 = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex + 1 - isOdd, foundRestart, indexThatRestartedFirst);
3375             auto tmpIndex2 = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex + 2, foundRestart, indexThatRestartedFirst);
3376             if(foundRestart)
3377             {
3378                 baseIndex = indexThatRestartedFirst + 1;
3379                 return;
3380             }
3381 
3382             if(fixIndexBufferKey & 0x00200U)
3383             {
3384                 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2; } onOutIndex++; });
3385                 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0; } onOutIndex++; });
3386                 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1; } onOutIndex++; });
3387             }
3388             else
3389             {
3390 
3391                 if(isOdd)
3392                 {
3393                     ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1; } onOutIndex++; });
3394                     ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2; } onOutIndex++; });
3395                     ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0; } onOutIndex++; });
3396                 }
3397                 else
3398                 {
3399                     ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0; } onOutIndex++; });
3400                     ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1; } onOutIndex++; });
3401                     ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2; } onOutIndex++; });
3402                 }
3403             }
3404 
3405             assert(onOutIndex <= (onIndex + 1) * 3);
3406             assert(onOutIndex <= (indexCount - 2) * 3);
3407         }
3408         break;
3409 
3410     }
3411 
3412 
3413 }
3414 
3415 kernel void fixIndexBuffer(
3416                            const device ushort *indexBufferUint16 [[ buffer(0), function_constant(indexBufferIsUint16) ]],
3417                            const device uint *indexBufferUint32 [[ buffer(0), function_constant(indexBufferIsUint32) ]],
3418                            device ushort *outIndexBufferUint16 [[ buffer(1), function_constant(outIndexBufferIsUint16) ]],
3419                            device uint *outIndexBufferUint32 [[ buffer(1), function_constant(outIndexBufferIsUint32) ]],
3420                            constant uint &indexCount [[ buffer(2) ]],
3421                            constant uint &primCount [[ buffer(3) ]],
3422                            uint prim [[thread_position_in_grid]])
3423 {
3424     constexpr uint restartIndex = 0xFFFFFFFF;
3425     uint baseIndex = 0;
3426     uint onIndex = onIndex;
3427     uint onOutIndex = onOutIndex;
3428     if(prim < primCount)
3429     {
3430         switch(fixIndexBufferMode)
3431         {
3432             case 0x00U:
3433                 onIndex = prim;
3434                 onOutIndex = prim;
3435                 break;
3436             case 0x01U:
3437                 onIndex = prim * 2;
3438                 onOutIndex = prim * 2;
3439                 break;
3440             case 0x03U:
3441                 onIndex = prim;
3442                 onOutIndex = prim * 2;
3443                 break;
3444             case 0x04U:
3445                 onIndex = prim * 3;
3446                 onOutIndex = prim * 3;
3447                 break;
3448             case 0x05U:
3449                 onIndex = prim;
3450                 onOutIndex = prim * 3;
3451                 break;
3452         }
3453         outputPrimitive(indexBufferUint16, indexBufferUint32, outIndexBufferUint16, outIndexBufferUint32, restartIndex, indexCount, baseIndex, onIndex, onOutIndex);
3454     }
3455 }
3456 
3457 
3458 
3459 static inline void generatePrimitive(
3460                                    device ushort *outIndexBufferUint16,
3461                                    device uint *outIndexBufferUint32,
3462                                    const uint firstVertex,
3463                                    const uint indexCount,
3464                                    thread uint &baseIndex,
3465                                    uint onIndex,
3466                                    uint primCount,
3467                                    thread uint &onOutIndex
3468                                    )
3469 {
3470     if(baseIndex > onIndex) return;
3471 # 284 "./rewrite_indices.metal"
3472     switch(fixIndexBufferMode)
3473     {
3474         case 0x00U:
3475         {
3476             ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = onIndex + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = onIndex + firstVertex; } onOutIndex++; });
3477         }
3478         break;
3479         case 0x01U:
3480         {
3481             auto tmpIndex0 = onIndex + 0;
3482             auto tmpIndex1 = onIndex + 1;
3483             if(fixIndexBufferKey & 0x00200U)
3484             {
3485                 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; });
3486                 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; });
3487             }
3488             else
3489             {
3490                 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; });
3491                 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; });
3492             }
3493         }
3494         break;
3495         case 0x02U:
3496         {
3497             auto tmpIndex0 = onIndex + 0;
3498             auto tmpIndex1 = (onIndex + 1) % primCount;
3499             if(fixIndexBufferKey & 0x00200U)
3500             {
3501                 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; });
3502                 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; });
3503             }
3504             else
3505             {
3506                 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; });
3507                 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; });
3508             }
3509         }
3510         break;
3511         case 0x03U:
3512         {
3513             auto tmpIndex0 = onIndex + 0;
3514             auto tmpIndex1 = onIndex + 1;
3515             if(fixIndexBufferKey & 0x00200U)
3516             {
3517                 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; });
3518                 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; });
3519             }
3520             else
3521             {
3522                 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; });
3523                 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; });
3524             }
3525         }
3526         break;
3527         case 0x04U:
3528         {
3529             auto tmpIndex0 = onIndex + 0;
3530             auto tmpIndex1 = onIndex + 1;
3531             auto tmpIndex2 = onIndex + 2;
3532             if(fixIndexBufferKey & 0x00200U)
3533             {
3534                 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2 + firstVertex; } onOutIndex++; });
3535                 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; });
3536                 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; });
3537             }
3538             else
3539             {
3540                 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; });
3541                 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; });
3542                 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2 + firstVertex; } onOutIndex++; });
3543             }
3544         }
3545         break;
3546         case 0x05U:
3547         {
3548             uint isOdd = ((onIndex - baseIndex) & 1);
3549             auto tmpIndex0 = onIndex + 0 + isOdd;
3550             auto tmpIndex1 = onIndex + 1 - isOdd;
3551             auto tmpIndex2 = onIndex + 2;
3552             if(fixIndexBufferKey & 0x00200U)
3553             {
3554                 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2 + firstVertex; } onOutIndex++; });
3555                 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; });
3556                 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; });
3557             }
3558             else
3559             {
3560                 if(isOdd)
3561                 {
3562                     ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; });
3563                     ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2 + firstVertex; } onOutIndex++; });
3564                     ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; });
3565                 }
3566                 else
3567                 {
3568                     ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; });
3569                     ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; });
3570                     ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2 + firstVertex; } onOutIndex++; });
3571                 }
3572             }
3573 
3574             assert(onOutIndex <= (onIndex + 1) * 3);
3575             assert(onOutIndex <= (indexCount - 2) * 3);
3576             break;
3577         }
3578         case 0x06U:
3579         {
3580             auto tmpIndex0 = 0;
3581             auto tmpIndex1 = onIndex + 1;
3582             auto tmpIndex2 = onIndex + 2;
3583 
3584             if(fixIndexBufferKey & 0x00200U)
3585             {
3586                 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2 + firstVertex; } onOutIndex++; });
3587                 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; });
3588                 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; });
3589             }
3590             else
3591             {
3592                 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; });
3593                 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2 + firstVertex; } onOutIndex++; });
3594                 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; });
3595             }
3596         }
3597         break;
3598 
3599     }
3600 
3601 }
3602 
3603 
3604 
3605 kernel void genIndexBuffer(
3606                            device ushort *outIndexBufferUint16 [[ buffer(1), function_constant(outIndexBufferIsUint16) ]],
3607                            device uint *outIndexBufferUint32 [[ buffer(1), function_constant(outIndexBufferIsUint32) ]],
3608                            constant uint &indexCount [[ buffer(2) ]],
3609                            constant uint &primCount [[ buffer(3) ]],
3610                            constant uint &firstVertex [[ buffer(4) ]],
3611                            uint prim [[thread_position_in_grid]])
3612 {
3613     uint baseIndex = 0;
3614     uint onIndex = onIndex;
3615     uint onOutIndex = onOutIndex;
3616     if(prim < primCount)
3617     {
3618         switch(fixIndexBufferMode)
3619         {
3620             case 0x00U:
3621                 onIndex = prim;
3622                 onOutIndex = prim;
3623                 break;
3624             case 0x01U:
3625                 onIndex = prim * 2;
3626                 onOutIndex = prim * 2;
3627                 break;
3628             case 0x03U:
3629                 onIndex = prim;
3630                 onOutIndex = prim * 2;
3631                 break;
3632             case 0x02U:
3633                 onIndex = prim;
3634                 onOutIndex = prim * 2;
3635                 break;
3636             case 0x04U:
3637                 onIndex = prim * 3;
3638                 onOutIndex = prim * 3;
3639                 break;
3640             case 0x05U:
3641                 onIndex = prim;
3642                 onOutIndex = prim * 3;
3643                 break;
3644             case 0x06U:
3645                 onIndex = prim;
3646                 onOutIndex = prim * 3;
3647                 break;
3648         }
3649         generatePrimitive(outIndexBufferUint16, outIndexBufferUint32, firstVertex, indexCount, baseIndex, onIndex, primCount, onOutIndex);
3650     }
3651 }
3652 # 8 "temp_master_source.metal" 2
3653 
3654 
3655 )";
3656