• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1// GENERATED FILE - DO NOT EDIT.
2// Generated by gen_mtl_internal_shaders.py
3//
4// Copyright 2020 The ANGLE Project Authors. All rights reserved.
5// Use of this source code is governed by a BSD-style license that can be
6// found in the LICENSE file.
7//
8
9// Combined Metal default shaders.
10
11# 1 "temp_master_source.metal"
12# 1 "<built-in>" 1
13# 1 "<built-in>" 3
14# 483 "<built-in>" 3
15# 1 "<command line>" 1
16# 1 "<built-in>" 2
17# 1 "temp_master_source.metal" 2
18# 1 "./blit.metal" 1
19
20
21
22
23
24
25
26# 1 "./common.h" 1
27# 13 "./common.h"
28# include <simd/simd.h>
29# include <metal_stdlib>
30
31
32# 1 "./constants.h" 1
33# 11 "./constants.h"
34namespace rx
35{
36namespace mtl_shader
37{
38
39enum
40{
41    kTextureType2D = 0,
42    kTextureType2DMultisample = 1,
43    kTextureType2DArray = 2,
44    kTextureTypeCube = 3,
45    kTextureType3D = 4,
46    kTextureTypeCount = 5,
47};
48
49
50
51
52}
53}
54# 18 "./common.h" 2
55
56
57
58
59
60
61
62using namespace metal;
63
64
65
66constant uint32_t kNumColorOutputs [[function_constant(0)]];
67constant bool kColorOutputAvailable0 = kNumColorOutputs > 0;
68constant bool kColorOutputAvailable1 = kNumColorOutputs > 1;
69constant bool kColorOutputAvailable2 = kNumColorOutputs > 2;
70constant bool kColorOutputAvailable3 = kNumColorOutputs > 3;
71constant bool kColorOutputAvailable4 = kNumColorOutputs > 4;
72constant bool kColorOutputAvailable5 = kNumColorOutputs > 5;
73constant bool kColorOutputAvailable6 = kNumColorOutputs > 6;
74constant bool kColorOutputAvailable7 = kNumColorOutputs > 7;
75
76namespace rx
77{
78namespace mtl_shader
79{
80
81
82constant float2 gCorners[3] = {float2(-1.0f, -1.0f), float2(3.0f, -1.0f), float2(-1.0f, 3.0f)};
83
84template <typename T>
85struct MultipleColorOutputs
86{
87    vec<T, 4> color0 [[color(0), function_constant(kColorOutputAvailable0)]];
88    vec<T, 4> color1 [[color(1), function_constant(kColorOutputAvailable1)]];
89    vec<T, 4> color2 [[color(2), function_constant(kColorOutputAvailable2)]];
90    vec<T, 4> color3 [[color(3), function_constant(kColorOutputAvailable3)]];
91    vec<T, 4> color4 [[color(4), function_constant(kColorOutputAvailable4)]];
92    vec<T, 4> color5 [[color(5), function_constant(kColorOutputAvailable5)]];
93    vec<T, 4> color6 [[color(6), function_constant(kColorOutputAvailable6)]];
94    vec<T, 4> color7 [[color(7), function_constant(kColorOutputAvailable7)]];
95};
96# 69 "./common.h"
97template <typename T>
98static inline MultipleColorOutputs<T> toMultipleColorOutputs(vec<T, 4> color)
99{
100    MultipleColorOutputs<T> re;
101
102    do { if (kColorOutputAvailable0) { re.color0 = color; } } while (0);
103    do { if (kColorOutputAvailable1) { re.color1 = color; } } while (0);
104    do { if (kColorOutputAvailable2) { re.color2 = color; } } while (0);
105    do { if (kColorOutputAvailable3) { re.color3 = color; } } while (0);
106    do { if (kColorOutputAvailable4) { re.color4 = color; } } while (0);
107    do { if (kColorOutputAvailable5) { re.color5 = color; } } while (0);
108    do { if (kColorOutputAvailable6) { re.color6 = color; } } while (0);
109    do { if (kColorOutputAvailable7) { re.color7 = color; } } while (0);
110
111    return re;
112}
113
114static inline float3 cubeTexcoords(float2 texcoords, int face)
115{
116    texcoords = 2.0 * texcoords - 1.0;
117    switch (face)
118    {
119        case 0:
120            return float3(1.0, -texcoords.y, -texcoords.x);
121        case 1:
122            return float3(-1.0, -texcoords.y, texcoords.x);
123        case 2:
124            return float3(texcoords.x, 1.0, texcoords.y);
125        case 3:
126            return float3(texcoords.x, -1.0, -texcoords.y);
127        case 4:
128            return float3(texcoords.x, -texcoords.y, 1.0);
129        case 5:
130            return float3(-texcoords.x, -texcoords.y, -1.0);
131    }
132    return float3(texcoords, 0);
133}
134
135template <typename T>
136static inline vec<T, 4> resolveTextureMS(texture2d_ms<T> srcTexture, uint2 coords)
137{
138    uint samples = srcTexture.get_num_samples();
139
140    vec<T, 4> output(0);
141
142    for (uint sample = 0; sample < samples; ++sample)
143    {
144        output += srcTexture.read(coords, sample);
145    }
146
147    output = output / samples;
148
149    return output;
150}
151
152static inline float4 sRGBtoLinear(float4 color)
153{
154    float3 linear1 = color.rgb / 12.92;
155    float3 linear2 = powr((color.rgb + float3(0.055)) / 1.055, 2.4);
156    float3 factor = float3(color.rgb <= float3(0.04045));
157    float4 linear = float4(factor * linear1 + float3(1.0 - factor) * linear2, color.a);
158
159    return linear;
160}
161
162static inline float linearToSRGB(float color)
163{
164    if (color <= 0.0f)
165        return 0.0f;
166    if (color < 0.0031308f)
167        return 12.92f * color;
168    if (color < 1.0f)
169        return 1.055f * powr(color, 0.41666f) - 0.055f;
170    return 1.0f;
171}
172
173static inline float4 linearToSRGB(float4 color)
174{
175    return float4(linearToSRGB(color.r), linearToSRGB(color.g), linearToSRGB(color.b), color.a);
176}
177
178template <typename Short>
179static inline Short bytesToShort(constant uchar *input, uint offset)
180{
181    Short inputLo = input[offset];
182    Short inputHi = input[offset + 1];
183
184    return inputLo | (inputHi << 8);
185}
186
187template <typename Int>
188static inline Int bytesToInt(constant uchar *input, uint offset)
189{
190    Int input0 = input[offset];
191    Int input1 = input[offset + 1];
192    Int input2 = input[offset + 2];
193    Int input3 = input[offset + 3];
194
195    return input0 | (input1 << 8) | (input2 << 16) | (input3 << 24);
196}
197
198template <typename Short>
199static inline void shortToBytes(Short val, uint offset, device uchar *output)
200{
201    ushort valUnsigned = as_type<ushort>(val);
202    output[offset] = valUnsigned & 0xff;
203    output[offset + 1] = (valUnsigned >> 8) & 0xff;
204}
205
206template <typename Int>
207static inline void intToBytes(Int val, uint offset, device uchar *output)
208{
209    uint valUnsigned = as_type<uint>(val);
210    output[offset] = valUnsigned & 0xff;
211    output[offset + 1] = (valUnsigned >> 8) & 0xff;
212    output[offset + 2] = (valUnsigned >> 16) & 0xff;
213    output[offset + 3] = (valUnsigned >> 24) & 0xff;
214}
215
216static inline void floatToBytes(float val, uint offset, device uchar *output)
217{
218    intToBytes(as_type<uint>(val), offset, output);
219}
220
221static inline void int24bitToBytes(uint val, uint offset, device uchar *output)
222{
223    output[offset] = val & 0xff;
224    output[offset + 1] = (val >> 8) & 0xff;
225    output[offset + 2] = (val >> 16) & 0xff;
226}
227
228template <unsigned int inputBitCount, unsigned int inputBitStart, typename T>
229static inline T getShiftedData(T input)
230{
231    static_assert(inputBitCount + inputBitStart <= (sizeof(T) * 8),
232                  "T must have at least as many bits as inputBitCount + inputBitStart.");
233    const T mask = (1 << inputBitCount) - 1;
234    return (input >> inputBitStart) & mask;
235}
236
237template <unsigned int inputBitCount, unsigned int inputBitStart, typename T>
238static inline T shiftData(T input)
239{
240    static_assert(inputBitCount + inputBitStart <= (sizeof(T) * 8),
241                  "T must have at least as many bits as inputBitCount + inputBitStart.");
242    const T mask = (1 << inputBitCount) - 1;
243    return (input & mask) << inputBitStart;
244}
245
246template <unsigned int inputBitCount, typename T>
247static inline float normalizedToFloat(T input)
248{
249    static_assert(inputBitCount <= (sizeof(T) * 8),
250                  "T must have more bits than or same bits as inputBitCount.");
251    static_assert(inputBitCount <= 23, "Only single precision is supported");
252
253    constexpr float inverseMax = 1.0f / ((1 << inputBitCount) - 1);
254    return input * inverseMax;
255}
256
257template <typename T>
258static inline float normalizedToFloat(T input)
259{
260    return normalizedToFloat<sizeof(T) * 8, T>(input);
261}
262
263template <>
264inline float normalizedToFloat(short input)
265{
266    constexpr float inverseMax = 1.0f / 0x7fff;
267    return static_cast<float>(input) * inverseMax;
268}
269
270template <>
271inline float normalizedToFloat(int input)
272{
273    constexpr float inverseMax = 1.0f / 0x7fffffff;
274    return static_cast<float>(input) * inverseMax;
275}
276
277template <>
278inline float normalizedToFloat(uint input)
279{
280    constexpr float inverseMax = 1.0f / 0xffffffff;
281    return static_cast<float>(input) * inverseMax;
282}
283
284template <unsigned int outputBitCount, typename T>
285static inline T floatToNormalized(float input)
286{
287    static_assert(outputBitCount <= (sizeof(T) * 8),
288                  "T must have more bits than or same bits as inputBitCount.");
289    static_assert(outputBitCount <= 23, "Only single precision is supported");
290
291    return static_cast<T>(((1 << outputBitCount) - 1) * input + 0.5f);
292}
293
294template <typename T>
295static inline T floatToNormalized(float input)
296{
297    return floatToNormalized<sizeof(T) * 8, T>(input);
298}
299
300}
301}
302# 9 "./blit.metal" 2
303
304using namespace rx::mtl_shader;
305
306
307constant bool kPremultiplyAlpha [[function_constant(1)]];
308constant bool kUnmultiplyAlpha [[function_constant(2)]];
309constant bool kTransformLinearToSrgb [[function_constant(3)]];
310constant int kSourceTextureType [[function_constant(4)]];
311constant int kSourceTexture2Type [[function_constant(5)]];
312
313constant bool kSourceTextureType2D = kSourceTextureType == kTextureType2D;
314constant bool kSourceTextureType2DArray = kSourceTextureType == kTextureType2DArray;
315constant bool kSourceTextureType2DMS = kSourceTextureType == kTextureType2DMultisample;
316constant bool kSourceTextureTypeCube = kSourceTextureType == kTextureTypeCube;
317constant bool kSourceTextureType3D = kSourceTextureType == kTextureType3D;
318
319constant bool kSourceTexture2Type2D = kSourceTexture2Type == kTextureType2D;
320constant bool kSourceTexture2Type2DArray = kSourceTexture2Type == kTextureType2DArray;
321constant bool kSourceTexture2Type2DMS = kSourceTexture2Type == kTextureType2DMultisample;
322constant bool kSourceTexture2TypeCube = kSourceTexture2Type == kTextureTypeCube;
323
324struct BlitParams
325{
326
327    float4 srcTexCoords;
328    int srcLevel;
329    int srcLayer;
330    bool dstLuminance;
331    uint8_t padding[7];
332};
333
334struct BlitVSOut
335{
336    float4 position [[position]];
337    float2 texCoords [[center_no_perspective, user(locn1)]];
338};
339
340vertex BlitVSOut blitVS(unsigned int vid [[vertex_id]], constant BlitParams &options [[buffer(0)]])
341{
342    BlitVSOut output;
343    output.position.xy = select(float2(-1.0f), float2(1.0f), bool2(vid & uint2(2, 1)));
344    output.position.zw = float2(0.0, 1.0);
345    output.texCoords = select(options.srcTexCoords.xy, options.srcTexCoords.zw, bool2(vid & uint2(2, 1)));
346
347    return output;
348}
349
350template <typename SrcTexture2d>
351static uint2 getImageCoords(SrcTexture2d srcTexture, float2 texCoords)
352{
353    uint2 dimens(srcTexture.get_width(), srcTexture.get_height());
354    uint2 coords = uint2(texCoords * float2(dimens));
355
356    return coords;
357}
358
359template <typename T>
360static inline vec<T, 4> blitSampleTextureMS(texture2d_ms<T> srcTexture, float2 texCoords)
361{
362    uint2 coords = getImageCoords(srcTexture, texCoords);
363    return resolveTextureMS(srcTexture, coords);
364}
365
366template <typename T>
367static inline vec<T, 4> blitSampleTexture3D(texture3d<T> srcTexture,
368                                            sampler textureSampler,
369                                            float2 texCoords,
370                                            constant BlitParams &options)
371{
372    uint depth = srcTexture.get_depth(options.srcLevel);
373    float zCoord = (float(options.srcLayer) + 0.5) / float(depth);
374
375    return srcTexture.sample(textureSampler, float3(texCoords, zCoord), level(options.srcLevel));
376}
377# 101 "./blit.metal"
378template <typename T>
379static inline vec<T, 4> blitReadTexture(BlitVSOut input [[stage_in]], texture2d<T> srcTexture2d [[texture(0), function_constant(kSourceTextureType2D)]], texture2d_array<T> srcTexture2dArray [[texture(0), function_constant(kSourceTextureType2DArray)]], texture2d_ms<T> srcTexture2dMS [[texture(0), function_constant(kSourceTextureType2DMS)]], texturecube<T> srcTextureCube [[texture(0), function_constant(kSourceTextureTypeCube)]], texture3d<T> srcTexture3d [[texture(0), function_constant(kSourceTextureType3D)]], sampler textureSampler [[sampler(0)]], constant BlitParams &options [[buffer(0)]])
380{
381    vec<T, 4> output;
382
383    switch (kSourceTextureType)
384    {
385        case kTextureType2D:
386            output = srcTexture2d.sample(textureSampler, input.texCoords, level(options.srcLevel));
387            break;
388        case kTextureType2DArray:
389            output = srcTexture2dArray.sample(textureSampler, input.texCoords, options.srcLayer,
390                                              level(options.srcLevel));
391            break;
392        case kTextureType2DMultisample:
393            output = blitSampleTextureMS(srcTexture2dMS, input.texCoords);
394            break;
395        case kTextureTypeCube:
396            output = srcTextureCube.sample(textureSampler,
397                                           cubeTexcoords(input.texCoords, options.srcLayer),
398                                           level(options.srcLevel));
399            break;
400        case kTextureType3D:
401            output = blitSampleTexture3D(srcTexture3d, textureSampler, input.texCoords, options);
402            break;
403    }
404
405    if (kTransformLinearToSrgb) {
406        output.x = linearToSRGB(output.x);
407        output.y = linearToSRGB(output.y);
408        output.z = linearToSRGB(output.z);
409    }
410    if (kUnmultiplyAlpha)
411    {
412        if (output.a != 0.0)
413        {
414            output.xyz /= output.a;
415        }
416    }
417    if (kPremultiplyAlpha)
418    {
419        output.xyz *= output.a;
420    }
421
422    if (options.dstLuminance)
423    {
424        output.g = output.b = output.r;
425    }
426
427    return output;
428}
429
430template <typename T>
431static inline MultipleColorOutputs<T> blitFS(BlitVSOut input [[stage_in]], texture2d<T> srcTexture2d [[texture(0), function_constant(kSourceTextureType2D)]], texture2d_array<T> srcTexture2dArray [[texture(0), function_constant(kSourceTextureType2DArray)]], texture2d_ms<T> srcTexture2dMS [[texture(0), function_constant(kSourceTextureType2DMS)]], texturecube<T> srcTextureCube [[texture(0), function_constant(kSourceTextureTypeCube)]], texture3d<T> srcTexture3d [[texture(0), function_constant(kSourceTextureType3D)]], sampler textureSampler [[sampler(0)]], constant BlitParams &options [[buffer(0)]])
432{
433    vec<T, 4> output = blitReadTexture(input, srcTexture2d, srcTexture2dArray, srcTexture2dMS, srcTextureCube, srcTexture3d, textureSampler, options);
434
435    return toMultipleColorOutputs(output);
436}
437
438fragment MultipleColorOutputs<float> blitFloatFS(BlitVSOut input [[stage_in]], texture2d<float> srcTexture2d [[texture(0), function_constant(kSourceTextureType2D)]], texture2d_array<float> srcTexture2dArray [[texture(0), function_constant(kSourceTextureType2DArray)]], texture2d_ms<float> srcTexture2dMS [[texture(0), function_constant(kSourceTextureType2DMS)]], texturecube<float> srcTextureCube [[texture(0), function_constant(kSourceTextureTypeCube)]], texture3d<float> srcTexture3d [[texture(0), function_constant(kSourceTextureType3D)]], sampler textureSampler [[sampler(0)]], constant BlitParams &options [[buffer(0)]])
439{
440    return blitFS(input, srcTexture2d, srcTexture2dArray, srcTexture2dMS, srcTextureCube, srcTexture3d, textureSampler, options);
441}
442fragment MultipleColorOutputs<int> blitIntFS(BlitVSOut input [[stage_in]], texture2d<int> srcTexture2d [[texture(0), function_constant(kSourceTextureType2D)]], texture2d_array<int> srcTexture2dArray [[texture(0), function_constant(kSourceTextureType2DArray)]], texture2d_ms<int> srcTexture2dMS [[texture(0), function_constant(kSourceTextureType2DMS)]], texturecube<int> srcTextureCube [[texture(0), function_constant(kSourceTextureTypeCube)]], texture3d<int> srcTexture3d [[texture(0), function_constant(kSourceTextureType3D)]], sampler textureSampler [[sampler(0)]], constant BlitParams &options [[buffer(0)]])
443{
444    return blitFS(input, srcTexture2d, srcTexture2dArray, srcTexture2dMS, srcTextureCube, srcTexture3d, textureSampler, options);
445}
446fragment MultipleColorOutputs<uint> blitUIntFS(BlitVSOut input [[stage_in]], texture2d<uint> srcTexture2d [[texture(0), function_constant(kSourceTextureType2D)]], texture2d_array<uint> srcTexture2dArray [[texture(0), function_constant(kSourceTextureType2DArray)]], texture2d_ms<uint> srcTexture2dMS [[texture(0), function_constant(kSourceTextureType2DMS)]], texturecube<uint> srcTextureCube [[texture(0), function_constant(kSourceTextureTypeCube)]], texture3d<uint> srcTexture3d [[texture(0), function_constant(kSourceTextureType3D)]], sampler textureSampler [[sampler(0)]], constant BlitParams &options [[buffer(0)]])
447{
448    return blitFS(input, srcTexture2d, srcTexture2dArray, srcTexture2dMS, srcTextureCube, srcTexture3d, textureSampler, options);
449}
450
451fragment MultipleColorOutputs<uint> copyTextureFloatToUIntFS(BlitVSOut input [[stage_in]], texture2d<float> srcTexture2d [[texture(0), function_constant(kSourceTextureType2D)]], texture2d_array<float> srcTexture2dArray [[texture(0), function_constant(kSourceTextureType2DArray)]], texture2d_ms<float> srcTexture2dMS [[texture(0), function_constant(kSourceTextureType2DMS)]], texturecube<float> srcTextureCube [[texture(0), function_constant(kSourceTextureTypeCube)]], texture3d<float> srcTexture3d [[texture(0), function_constant(kSourceTextureType3D)]], sampler textureSampler [[sampler(0)]], constant BlitParams &options [[buffer(0)]])
452{
453    float4 inputColor = blitReadTexture<>(input, srcTexture2d, srcTexture2dArray, srcTexture2dMS, srcTextureCube, srcTexture3d, textureSampler, options);
454    uint4 output = uint4(inputColor * float4(255.0));
455
456    return toMultipleColorOutputs(output);
457}
458
459
460struct FragmentDepthOut
461{
462    float depth [[depth(any)]];
463};
464
465static inline float sampleDepth(
466    texture2d<float> srcTexture2d [[function_constant(kSourceTextureType2D)]],
467    texture2d_array<float> srcTexture2dArray [[function_constant(kSourceTextureType2DArray)]],
468    texture2d_ms<float> srcTexture2dMS [[function_constant(kSourceTextureType2DMS)]],
469    texturecube<float> srcTextureCube [[function_constant(kSourceTextureTypeCube)]],
470    float2 texCoords,
471    constant BlitParams &options)
472{
473    float4 output;
474
475    constexpr sampler textureSampler(mag_filter::nearest, min_filter::nearest);
476
477    switch (kSourceTextureType)
478    {
479        case kTextureType2D:
480            output = srcTexture2d.sample(textureSampler, texCoords, level(options.srcLevel));
481            break;
482        case kTextureType2DArray:
483            output = srcTexture2dArray.sample(textureSampler, texCoords, options.srcLayer,
484                                              level(options.srcLevel));
485            break;
486        case kTextureType2DMultisample:
487
488            output = srcTexture2dMS.read(getImageCoords(srcTexture2dMS, texCoords), 0);
489            break;
490        case kTextureTypeCube:
491            output =
492                srcTextureCube.sample(textureSampler, cubeTexcoords(texCoords, options.srcLayer),
493                                      level(options.srcLevel));
494            break;
495    }
496
497    return output.r;
498}
499
500fragment FragmentDepthOut blitDepthFS(BlitVSOut input [[stage_in]],
501                                      texture2d<float> srcTexture2d
502                                      [[texture(0), function_constant(kSourceTextureType2D)]],
503                                      texture2d_array<float> srcTexture2dArray
504                                      [[texture(0), function_constant(kSourceTextureType2DArray)]],
505                                      texture2d_ms<float> srcTexture2dMS
506                                      [[texture(0), function_constant(kSourceTextureType2DMS)]],
507                                      texturecube<float> srcTextureCube
508                                      [[texture(0), function_constant(kSourceTextureTypeCube)]],
509                                      constant BlitParams &options [[buffer(0)]])
510{
511    FragmentDepthOut re;
512
513    re.depth = sampleDepth(srcTexture2d, srcTexture2dArray, srcTexture2dMS, srcTextureCube,
514                           input.texCoords, options);
515
516    return re;
517}
518
519static inline uint32_t sampleStencil(
520    texture2d<uint32_t> srcTexture2d [[function_constant(kSourceTexture2Type2D)]],
521    texture2d_array<uint32_t> srcTexture2dArray [[function_constant(kSourceTexture2Type2DArray)]],
522    texture2d_ms<uint32_t> srcTexture2dMS [[function_constant(kSourceTexture2Type2DMS)]],
523    texturecube<uint32_t> srcTextureCube [[function_constant(kSourceTexture2TypeCube)]],
524    float2 texCoords,
525    int srcLevel,
526    int srcLayer)
527{
528    uint4 output;
529    constexpr sampler textureSampler(mag_filter::nearest, min_filter::nearest);
530
531    switch (kSourceTexture2Type)
532    {
533        case kTextureType2D:
534            output = srcTexture2d.sample(textureSampler, texCoords, level(srcLevel));
535            break;
536        case kTextureType2DArray:
537            output = srcTexture2dArray.sample(textureSampler, texCoords, srcLayer, level(srcLevel));
538            break;
539        case kTextureType2DMultisample:
540
541            output = srcTexture2dMS.read(getImageCoords(srcTexture2dMS, texCoords), 0);
542            break;
543        case kTextureTypeCube:
544            output = srcTextureCube.sample(textureSampler, cubeTexcoords(texCoords, srcLayer),
545                                           level(srcLevel));
546            break;
547    }
548
549    return output.r;
550}
551
552
553struct BlitStencilToBufferParams
554{
555    float2 srcStartTexCoords;
556    float2 srcTexCoordSteps;
557    int srcLevel;
558    int srcLayer;
559
560    uint2 dstSize;
561    uint dstBufferRowPitch;
562
563    bool resolveMS;
564};
565
566kernel void blitStencilToBufferCS(ushort2 gIndices [[thread_position_in_grid]],
567                                  texture2d<uint32_t> srcTexture2d
568                                  [[texture(1), function_constant(kSourceTexture2Type2D)]],
569                                  texture2d_array<uint32_t> srcTexture2dArray
570                                  [[texture(1), function_constant(kSourceTexture2Type2DArray)]],
571                                  texture2d_ms<uint32_t> srcTexture2dMS
572                                  [[texture(1), function_constant(kSourceTexture2Type2DMS)]],
573                                  texturecube<uint32_t> srcTextureCube
574                                  [[texture(1), function_constant(kSourceTexture2TypeCube)]],
575                                  constant BlitStencilToBufferParams &options [[buffer(0)]],
576                                  device uchar *buffer [[buffer(1)]])
577{
578    if (gIndices.x >= options.dstSize.x || gIndices.y >= options.dstSize.y)
579    {
580        return;
581    }
582
583    float2 srcTexCoords = options.srcStartTexCoords + float2(gIndices) * options.srcTexCoordSteps;
584
585    if (kSourceTexture2Type == kTextureType2DMultisample && !options.resolveMS)
586    {
587        uint samples = srcTexture2dMS.get_num_samples();
588        uint2 imageCoords = getImageCoords(srcTexture2dMS, srcTexCoords);
589        uint bufferOffset = options.dstBufferRowPitch * gIndices.y + samples * gIndices.x;
590
591        for (uint sample = 0; sample < samples; ++sample)
592        {
593            uint stencilPerSample = srcTexture2dMS.read(imageCoords, sample).r;
594            buffer[bufferOffset + sample] = static_cast<uchar>(stencilPerSample);
595        }
596    }
597    else
598    {
599        uint32_t stencil =
600            sampleStencil(srcTexture2d, srcTexture2dArray, srcTexture2dMS, srcTextureCube,
601                          srcTexCoords, options.srcLevel, options.srcLayer);
602
603        buffer[options.dstBufferRowPitch * gIndices.y + gIndices.x] = static_cast<uchar>(stencil);
604    }
605}
606
607
608#if __METAL_VERSION__ >= 210
609
610struct FragmentStencilOut
611{
612    uint32_t stencil [[stencil]];
613};
614
615struct FragmentDepthStencilOut
616{
617    float depth [[depth(any)]];
618    uint32_t stencil [[stencil]];
619};
620
621fragment FragmentStencilOut blitStencilFS(
622    BlitVSOut input [[stage_in]],
623    texture2d<uint32_t> srcTexture2d [[texture(1), function_constant(kSourceTexture2Type2D)]],
624    texture2d_array<uint32_t> srcTexture2dArray
625    [[texture(1), function_constant(kSourceTexture2Type2DArray)]],
626    texture2d_ms<uint32_t> srcTexture2dMS
627    [[texture(1), function_constant(kSourceTexture2Type2DMS)]],
628    texturecube<uint32_t> srcTextureCube [[texture(1), function_constant(kSourceTexture2TypeCube)]],
629    constant BlitParams &options [[buffer(0)]])
630{
631    FragmentStencilOut re;
632
633    re.stencil = sampleStencil(srcTexture2d, srcTexture2dArray, srcTexture2dMS, srcTextureCube,
634                               input.texCoords, options.srcLevel, options.srcLayer);
635
636    return re;
637}
638
639fragment FragmentDepthStencilOut blitDepthStencilFS(
640    BlitVSOut input [[stage_in]],
641
642    texture2d<float> srcDepthTexture2d [[texture(0), function_constant(kSourceTextureType2D)]],
643    texture2d_array<float> srcDepthTexture2dArray
644    [[texture(0), function_constant(kSourceTextureType2DArray)]],
645    texture2d_ms<float> srcDepthTexture2dMS
646    [[texture(0), function_constant(kSourceTextureType2DMS)]],
647    texturecube<float> srcDepthTextureCube
648    [[texture(0), function_constant(kSourceTextureTypeCube)]],
649
650
651    texture2d<uint32_t> srcStencilTexture2d
652    [[texture(1), function_constant(kSourceTexture2Type2D)]],
653    texture2d_array<uint32_t> srcStencilTexture2dArray
654    [[texture(1), function_constant(kSourceTexture2Type2DArray)]],
655    texture2d_ms<uint32_t> srcStencilTexture2dMS
656    [[texture(1), function_constant(kSourceTexture2Type2DMS)]],
657    texturecube<uint32_t> srcStencilTextureCube
658    [[texture(1), function_constant(kSourceTexture2TypeCube)]],
659
660    constant BlitParams &options [[buffer(0)]])
661{
662    FragmentDepthStencilOut re;
663
664    re.depth = sampleDepth(srcDepthTexture2d, srcDepthTexture2dArray, srcDepthTexture2dMS,
665                           srcDepthTextureCube, input.texCoords, options);
666    re.stencil =
667        sampleStencil(srcStencilTexture2d, srcStencilTexture2dArray, srcStencilTexture2dMS,
668                      srcStencilTextureCube, input.texCoords, options.srcLevel, options.srcLayer);
669    return re;
670}
671#endif
672# 2 "temp_master_source.metal" 2
673# 1 "./clear.metal" 1
674# 10 "./clear.metal"
675using namespace rx::mtl_shader;
676
677struct ClearParams
678{
679    float4 clearColor;
680    float clearDepth;
681};
682
683vertex float4 clearVS(unsigned int vid [[ vertex_id ]],
684                      constant ClearParams &clearParams [[buffer(0)]])
685{
686    return float4(gCorners[vid], clearParams.clearDepth, 1.0);
687}
688
689fragment MultipleColorOutputs<float> clearFloatFS(constant ClearParams &clearParams [[buffer(0)]])
690{
691    return toMultipleColorOutputs(clearParams.clearColor);
692}
693
694fragment MultipleColorOutputs<int> clearIntFS(constant ClearParams &clearParams [[buffer(0)]])
695{
696    return toMultipleColorOutputs(as_type<int4>(clearParams.clearColor));
697}
698
699fragment MultipleColorOutputs<uint> clearUIntFS(constant ClearParams &clearParams [[buffer(0)]])
700{
701    return toMultipleColorOutputs(as_type<uint4>(clearParams.clearColor));
702}
703# 3 "temp_master_source.metal" 2
704# 1 "./gen_indices.metal" 1
705
706
707
708
709
710
711
712
713using namespace rx::mtl_shader;
714
715
716constant bool kSourceBufferAligned[[function_constant(100)]];
717constant bool kSourceIndexIsU8[[function_constant(200)]];
718constant bool kSourceIndexIsU16[[function_constant(300)]];
719constant bool kSourceIndexIsU32[[function_constant(400)]];
720constant bool kSourceBufferUnaligned = !kSourceBufferAligned;
721constant bool kUseSourceBufferU8 = kSourceIndexIsU8 || kSourceBufferUnaligned;
722constant bool kUseSourceBufferU16 = kSourceIndexIsU16 && kSourceBufferAligned;
723constant bool kUseSourceBufferU32 = kSourceIndexIsU32 && kSourceBufferAligned;
724
725struct IndexConversionParams
726{
727    uint32_t srcOffset;
728    uint32_t indexCount;
729    bool primitiveRestartEnabled;
730};
731
732
733
734inline ushort getIndexAligned(constant ushort *inputAligned, uint offset, uint idx)
735{
736    return inputAligned[offset / 2 + idx];
737}
738inline uint getIndexAligned(constant uint *inputAligned, uint offset, uint idx)
739{
740    return inputAligned[offset / 4 + idx];
741}
742inline uchar getIndexAligned(constant uchar *input, uint offset, uint idx)
743{
744    return input[offset + idx];
745}
746inline ushort getIndexUnalignedU16(constant uchar *input, uint offset, uint idx)
747{
748    ushort inputLo = input[offset + 2 * idx];
749    ushort inputHi = input[offset + 2 * idx + 1];
750
751    return inputLo | (inputHi << 8);
752}
753inline uint getIndexUnalignedU32(constant uchar *input, uint offset, uint idx)
754{
755    uint input0 = input[offset + 4 * idx];
756    uint input1 = input[offset + 4 * idx + 1];
757    uint input2 = input[offset + 4 * idx + 2];
758    uint input3 = input[offset + 4 * idx + 3];
759
760    return input0 | (input1 << 8) | (input2 << 16) | (input3 << 24);
761}
762
763kernel void convertIndexU8ToU16(uint idx [[thread_position_in_grid]],
764                                constant IndexConversionParams &options [[buffer(0)]],
765                                constant uchar *input [[buffer(1)]],
766                                device ushort *output [[buffer(2)]])
767{
768    if (idx >= options.indexCount) { return; };
769
770    uchar value = getIndexAligned(input, options.srcOffset, idx);
771
772    if (options.primitiveRestartEnabled && value == 0xff)
773    {
774        output[idx] = 0xffff;
775    }
776    else
777    {
778        output[idx] = value;
779    }
780}
781
782kernel void convertIndexU16(uint idx [[thread_position_in_grid]],
783                            constant IndexConversionParams &options [[buffer(0)]],
784                            constant uchar *input
785                            [[buffer(1), function_constant(kSourceBufferUnaligned)]],
786                            constant ushort *inputAligned
787                            [[buffer(1), function_constant(kSourceBufferAligned)]],
788                            device ushort *output [[buffer(2)]])
789{
790    if (idx >= options.indexCount) { return; };
791
792    ushort value;
793    if (kSourceBufferAligned)
794    {
795        value = getIndexAligned(inputAligned, options.srcOffset, idx);
796    }
797    else
798    {
799        value = getIndexUnalignedU16(input, options.srcOffset, idx);
800    }
801    output[idx] = value;
802}
803
804kernel void convertIndexU32(uint idx [[thread_position_in_grid]],
805                            constant IndexConversionParams &options [[buffer(0)]],
806                            constant uchar *input
807                            [[buffer(1), function_constant(kSourceBufferUnaligned)]],
808                            constant uint *inputAligned
809                            [[buffer(1), function_constant(kSourceBufferAligned)]],
810                            device uint *output [[buffer(2)]])
811{
812    if (idx >= options.indexCount) { return; };
813
814    uint value;
815    if (kSourceBufferAligned)
816    {
817        value = getIndexAligned(inputAligned, options.srcOffset, idx);
818    }
819    else
820    {
821        value = getIndexUnalignedU32(input, options.srcOffset, idx);
822    }
823    output[idx] = value;
824}
825
826struct IndexFromArrayParams
827{
828    uint firstVertex;
829
830    uint vertexCount;
831};
832
833
834kernel void genTriFanIndicesFromArray(uint idx [[thread_position_in_grid]],
835                                      constant IndexFromArrayParams &options [[buffer(0)]],
836                                      device uint *output [[buffer(2)]])
837{
838    if (idx >= options.vertexCount) { return; };
839
840    uint vertexIdx = options.firstVertex + 2 + idx;
841
842
843
844    output[3 * idx ] = vertexIdx - 1;
845    output[3 * idx + 1] = vertexIdx;
846    output[3 * idx + 2] = options.firstVertex;
847}
848
849inline uint getIndexU32(uint offset,
850                        uint idx,
851                        constant uchar *inputU8 [[function_constant(kUseSourceBufferU8)]],
852                        constant ushort *inputU16 [[function_constant(kUseSourceBufferU16)]],
853                        constant uint *inputU32 [[function_constant(kUseSourceBufferU32)]])
854{
855    if (kUseSourceBufferU8)
856    {
857        if (kSourceIndexIsU16)
858        {
859            return getIndexUnalignedU16(inputU8, offset, idx);
860        }
861        else if (kSourceIndexIsU32)
862        {
863            return getIndexUnalignedU32(inputU8, offset, idx);
864        }
865        return getIndexAligned(inputU8, offset, idx);
866    }
867    else if (kUseSourceBufferU16)
868    {
869        return getIndexAligned(inputU16, offset, idx);
870    }
871    else if (kUseSourceBufferU32)
872    {
873        return getIndexAligned(inputU32, offset, idx);
874    }
875    return 0;
876}
877
878
879
880
881kernel void genTriFanIndicesFromElements(uint idx [[thread_position_in_grid]],
882                                         constant IndexConversionParams &options [[buffer(0)]],
883                                         constant uchar *inputU8
884                                         [[buffer(1), function_constant(kUseSourceBufferU8)]],
885                                         constant ushort *inputU16
886                                         [[buffer(1), function_constant(kUseSourceBufferU16)]],
887                                         constant uint *inputU32
888                                         [[buffer(1), function_constant(kUseSourceBufferU32)]],
889                                         device uint *output [[buffer(2)]])
890{
891    if (idx >= options.indexCount) { return; };
892
893    uint elemIdx = 2 + idx;
894
895    output[3 * idx] = getIndexU32(options.srcOffset, 0, inputU8, inputU16, inputU32);
896    output[3 * idx + 1] = getIndexU32(options.srcOffset, elemIdx - 1, inputU8, inputU16, inputU32);
897    output[3 * idx + 2] = getIndexU32(options.srcOffset, elemIdx, inputU8, inputU16, inputU32);
898}
899
900
901kernel void genLineLoopIndicesFromArray(uint idx [[thread_position_in_grid]],
902                                        constant IndexFromArrayParams &options [[buffer(0)]],
903                                        device uint *output [[buffer(2)]])
904{
905    uint totalIndices = options.vertexCount + 1;
906    if (idx >= totalIndices) { return; };
907
908    output[idx] = options.firstVertex + idx % options.vertexCount;
909}
910
911
912
913kernel void genLineLoopIndicesFromElements(uint idx [[thread_position_in_grid]],
914                                           constant IndexConversionParams &options [[buffer(0)]],
915                                           constant uchar *inputU8
916                                           [[buffer(1), function_constant(kUseSourceBufferU8)]],
917                                           constant ushort *inputU16
918                                           [[buffer(1), function_constant(kUseSourceBufferU16)]],
919                                           constant uint *inputU32
920                                           [[buffer(1), function_constant(kUseSourceBufferU32)]],
921                                           device uint *output [[buffer(2)]])
922{
923    uint totalTargetIndices = options.indexCount + 1;
924    if (idx >= totalTargetIndices) { return; };
925
926    output[idx] =
927        getIndexU32(options.srcOffset, idx % options.indexCount, inputU8, inputU16, inputU32);
928}
929# 4 "temp_master_source.metal" 2
930# 1 "./gen_mipmap.metal" 1
931
932
933
934
935
936
937
938
939using namespace rx::mtl_shader;
940# 31 "./gen_mipmap.metal"
941struct GenMipParams
942{
943    uint srcLevel;
944    uint numMipLevelsToGen;
945    bool sRGB;
946};
947
948
949
950kernel void generate3DMipmaps(uint lIndex [[thread_index_in_threadgroup]],
951                              ushort3 gIndices [[thread_position_in_grid]],
952                              texture3d<float> srcTexture [[texture(0)]],
953                              texture3d<float, access::write> dstMip1 [[texture(1)]],
954                              texture3d<float, access::write> dstMip2 [[texture(2)]],
955                              texture3d<float, access::write> dstMip3 [[texture(3)]],
956                              texture3d<float, access::write> dstMip4 [[texture(4)]],
957                              constant GenMipParams &options [[buffer(0)]])
958{
959    ushort3 mipSize = ushort3(dstMip1.get_width(), dstMip1.get_height(), dstMip1.get_depth());
960    bool validThread = gIndices.x < mipSize.x && gIndices.y < mipSize.y && gIndices.z < mipSize.z;
961
962    constexpr sampler textureSampler(mag_filter::linear, min_filter::linear, mip_filter::linear);
963
964
965
966
967    threadgroup float sR[(8 * 8 * 8)];
968    threadgroup float sG[(8 * 8 * 8)];
969    threadgroup float sB[(8 * 8 * 8)];
970    threadgroup float sA[(8 * 8 * 8)];
971
972
973    float4 texel1;
974    if (validThread)
975    {
976        float3 texCoords = (float3(gIndices) + float3(0.5, 0.5, 0.5)) / float3(mipSize);
977        texel1 = srcTexture.sample(textureSampler, texCoords, level(options.srcLevel));
978
979
980        dstMip1.write(texel1, gIndices);
981    }
982    else
983    {
984
985        lIndex = 0xffffffff;
986    }
987
988    if (options.numMipLevelsToGen == 1)
989    {
990        return;
991    }
992
993
994
995
996    if (options.sRGB)
997    {
998        texel1 = linearToSRGB(texel1);
999    }
1000    sR[lIndex] = texel1.r; sG[lIndex] = texel1.g; sB[lIndex] = texel1.b; sA[lIndex] = texel1.a;;
1001
1002    threadgroup_barrier(mem_flags::mem_threadgroup);
1003
1004
1005    if ((lIndex & 0x49) == 0)
1006    {
1007        bool3 atEdge = gIndices == (mipSize - ushort3(1));
1008
1009
1010
1011        float4 texel2 = (atEdge.x) ? (texel1) : (float4(sR[lIndex + 1], sG[lIndex + 1], sB[lIndex + 1], sA[lIndex + 1]));
1012
1013        float4 texel3 = (atEdge.y) ? (texel1) : (float4(sR[lIndex + 8], sG[lIndex + 8], sB[lIndex + 8], sA[lIndex + 8]));
1014
1015        float4 texel4 = (atEdge.z) ? (texel1) : (float4(sR[lIndex + (8 * 8)], sG[lIndex + (8 * 8)], sB[lIndex + (8 * 8)], sA[lIndex + (8 * 8)]));
1016
1017        float4 texel5 = (atEdge.x | atEdge.y) ? (texel2) : (float4(sR[lIndex + (8 + 1)], sG[lIndex + (8 + 1)], sB[lIndex + (8 + 1)], sA[lIndex + (8 + 1)]));
1018
1019
1020        float4 texel6 = (atEdge.x | atEdge.z) ? (texel2) : (float4(sR[lIndex + ((8 * 8) + 1)], sG[lIndex + ((8 * 8) + 1)], sB[lIndex + ((8 * 8) + 1)], sA[lIndex + ((8 * 8) + 1)]));
1021
1022
1023        float4 texel7 = (atEdge.y | atEdge.z) ? (texel3) : (float4(sR[lIndex + ((8 * 8) + 8)], sG[lIndex + ((8 * 8) + 8)], sB[lIndex + ((8 * 8) + 8)], sA[lIndex + ((8 * 8) + 8)]));
1024
1025
1026        float4 texel8 =
1027            (atEdge.x | atEdge.y | atEdge.z) ? (texel5) : (float4(sR[lIndex + ((8 * 8) + 8 + 1)], sG[lIndex + ((8 * 8) + 8 + 1)], sB[lIndex + ((8 * 8) + 8 + 1)], sA[lIndex + ((8 * 8) + 8 + 1)]));
1028
1029
1030        texel1 = (texel1 + texel2 + texel3 + texel4 + texel5 + texel6 + texel7 + texel8) / 8.0;
1031
1032        dstMip2.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices >> 1);
1033
1034
1035        sR[lIndex] = texel1.r; sG[lIndex] = texel1.g; sB[lIndex] = texel1.b; sA[lIndex] = texel1.a;;
1036    }
1037
1038    if (options.numMipLevelsToGen == 2)
1039    {
1040        return;
1041    }
1042
1043
1044    threadgroup_barrier(mem_flags::mem_threadgroup);
1045
1046
1047    if ((lIndex & 0xdb) == 0)
1048    {
1049        mipSize = max(mipSize >> 1, ushort3(1));
1050        bool3 atEdge = (gIndices >> 1) == (mipSize - ushort3(1));
1051
1052
1053
1054        float4 texel2 = (atEdge.x) ? (texel1) : (float4(sR[lIndex + 2], sG[lIndex + 2], sB[lIndex + 2], sA[lIndex + 2]));
1055
1056        float4 texel3 =
1057            (atEdge.y) ? (texel1) : (float4(sR[lIndex + (2 * 8)], sG[lIndex + (2 * 8)], sB[lIndex + (2 * 8)], sA[lIndex + (2 * 8)]));
1058
1059        float4 texel4 =
1060            (atEdge.z) ? (texel1) : (float4(sR[lIndex + (2 * (8 * 8))], sG[lIndex + (2 * (8 * 8))], sB[lIndex + (2 * (8 * 8))], sA[lIndex + (2 * (8 * 8))]));
1061
1062        float4 texel5 = (atEdge.x | atEdge.y) ? (texel2) : (float4(sR[lIndex + (2 * 8 + 2)], sG[lIndex + (2 * 8 + 2)], sB[lIndex + (2 * 8 + 2)], sA[lIndex + (2 * 8 + 2)]));
1063
1064
1065        float4 texel6 = (atEdge.x | atEdge.z) ? (texel2) : (float4(sR[lIndex + (2 * (8 * 8) + 2)], sG[lIndex + (2 * (8 * 8) + 2)], sB[lIndex + (2 * (8 * 8) + 2)], sA[lIndex + (2 * (8 * 8) + 2)]));
1066
1067
1068        float4 texel7 = (atEdge.y | atEdge.z) ? (texel3) : (float4(sR[lIndex + (2 * (8 * 8) + 2 * 8)], sG[lIndex + (2 * (8 * 8) + 2 * 8)], sB[lIndex + (2 * (8 * 8) + 2 * 8)], sA[lIndex + (2 * (8 * 8) + 2 * 8)]));
1069
1070
1071
1072        float4 texel8 = (atEdge.x | atEdge.y | atEdge.z) ? (texel5) : (float4(sR[lIndex + (2 * (8 * 8) + 2 * 8 + 2)], sG[lIndex + (2 * (8 * 8) + 2 * 8 + 2)], sB[lIndex + (2 * (8 * 8) + 2 * 8 + 2)], sA[lIndex + (2 * (8 * 8) + 2 * 8 + 2)]));
1073
1074
1075
1076        texel1 = (texel1 + texel2 + texel3 + texel4 + texel5 + texel6 + texel7 + texel8) / 8.0;
1077
1078        dstMip3.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices >> 2);
1079
1080
1081        sR[lIndex] = texel1.r; sG[lIndex] = texel1.g; sB[lIndex] = texel1.b; sA[lIndex] = texel1.a;;
1082    }
1083
1084    if (options.numMipLevelsToGen == 3)
1085    {
1086        return;
1087    }
1088
1089
1090    threadgroup_barrier(mem_flags::mem_threadgroup);
1091
1092
1093    if ((lIndex & 0x1ff) == 0)
1094    {
1095        mipSize = max(mipSize >> 1, ushort3(1));
1096        bool3 atEdge = (gIndices >> 2) == (mipSize - ushort3(1));
1097
1098
1099
1100        float4 texel2 = (atEdge.x) ? (texel1) : (float4(sR[lIndex + 4], sG[lIndex + 4], sB[lIndex + 4], sA[lIndex + 4]));
1101
1102        float4 texel3 =
1103            (atEdge.y) ? (texel1) : (float4(sR[lIndex + (4 * 8)], sG[lIndex + (4 * 8)], sB[lIndex + (4 * 8)], sA[lIndex + (4 * 8)]));
1104
1105        float4 texel4 =
1106            (atEdge.z) ? (texel1) : (float4(sR[lIndex + (4 * (8 * 8))], sG[lIndex + (4 * (8 * 8))], sB[lIndex + (4 * (8 * 8))], sA[lIndex + (4 * (8 * 8))]));
1107
1108        float4 texel5 = (atEdge.x | atEdge.y) ? (texel2) : (float4(sR[lIndex + (4 * 8 + 4)], sG[lIndex + (4 * 8 + 4)], sB[lIndex + (4 * 8 + 4)], sA[lIndex + (4 * 8 + 4)]));
1109
1110
1111        float4 texel6 = (atEdge.x | atEdge.z) ? (texel2) : (float4(sR[lIndex + (4 * (8 * 8) + 4)], sG[lIndex + (4 * (8 * 8) + 4)], sB[lIndex + (4 * (8 * 8) + 4)], sA[lIndex + (4 * (8 * 8) + 4)]));
1112
1113
1114        float4 texel7 = (atEdge.y | atEdge.z) ? (texel3) : (float4(sR[lIndex + (4 * (8 * 8) + 4 * 8)], sG[lIndex + (4 * (8 * 8) + 4 * 8)], sB[lIndex + (4 * (8 * 8) + 4 * 8)], sA[lIndex + (4 * (8 * 8) + 4 * 8)]));
1115
1116
1117
1118        float4 texel8 = (atEdge.x | atEdge.y | atEdge.z) ? (texel5) : (float4(sR[lIndex + (4 * (8 * 8) + 4 * 8 + 4)], sG[lIndex + (4 * (8 * 8) + 4 * 8 + 4)], sB[lIndex + (4 * (8 * 8) + 4 * 8 + 4)], sA[lIndex + (4 * (8 * 8) + 4 * 8 + 4)]));
1119
1120
1121
1122        texel1 = (texel1 + texel2 + texel3 + texel4 + texel5 + texel6 + texel7 + texel8) / 8.0;
1123
1124        dstMip4.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices >> 3);
1125    }
1126}
1127
1128kernel void generate2DMipmaps(uint lIndex [[thread_index_in_threadgroup]],
1129                              ushort2 gIndices [[thread_position_in_grid]],
1130                              texture2d<float> srcTexture [[texture(0)]],
1131                              texture2d<float, access::write> dstMip1 [[texture(1)]],
1132                              texture2d<float, access::write> dstMip2 [[texture(2)]],
1133                              texture2d<float, access::write> dstMip3 [[texture(3)]],
1134                              texture2d<float, access::write> dstMip4 [[texture(4)]],
1135                              constant GenMipParams &options [[buffer(0)]])
1136{
1137    uint firstMipLevel = options.srcLevel + 1;
1138    ushort2 mipSize =
1139        ushort2(srcTexture.get_width(firstMipLevel), srcTexture.get_height(firstMipLevel));
1140    bool validThread = gIndices.x < mipSize.x && gIndices.y < mipSize.y;
1141
1142    constexpr sampler textureSampler(mag_filter::linear, min_filter::linear, mip_filter::linear);
1143
1144
1145
1146
1147    threadgroup float sR[(8 * 8)];
1148    threadgroup float sG[(8 * 8)];
1149    threadgroup float sB[(8 * 8)];
1150    threadgroup float sA[(8 * 8)];
1151
1152
1153    float4 texel1;
1154    if (validThread)
1155    {
1156        float2 texCoords = (float2(gIndices) + float2(0.5, 0.5)) / float2(mipSize);
1157        texel1 = srcTexture.sample(textureSampler, texCoords, level(options.srcLevel));
1158
1159
1160        dstMip1.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices);
1161    }
1162    else
1163    {
1164
1165        lIndex = 0xffffffff;
1166    }
1167
1168    if (options.numMipLevelsToGen == 1)
1169    {
1170        return;
1171    }
1172
1173
1174
1175
1176    sR[lIndex] = texel1.r; sG[lIndex] = texel1.g; sB[lIndex] = texel1.b; sA[lIndex] = texel1.a;;
1177
1178    threadgroup_barrier(mem_flags::mem_threadgroup);
1179
1180
1181    if ((lIndex & 0x09) == 0)
1182    {
1183        bool2 atEdge = gIndices == (mipSize - ushort2(1));
1184
1185
1186
1187        float4 texel2 = (atEdge.x) ? (texel1) : (float4(sR[lIndex + 1], sG[lIndex + 1], sB[lIndex + 1], sA[lIndex + 1]));
1188
1189        float4 texel3 = (atEdge.y) ? (texel1) : (float4(sR[lIndex + 8], sG[lIndex + 8], sB[lIndex + 8], sA[lIndex + 8]));
1190
1191        float4 texel4 = (atEdge.x | atEdge.y) ? (texel2) : (float4(sR[lIndex + (8 + 1)], sG[lIndex + (8 + 1)], sB[lIndex + (8 + 1)], sA[lIndex + (8 + 1)]));
1192
1193
1194        texel1 = (texel1 + texel2 + texel3 + texel4) / 4.0;
1195
1196        dstMip2.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices >> 1);
1197
1198
1199        sR[lIndex] = texel1.r; sG[lIndex] = texel1.g; sB[lIndex] = texel1.b; sA[lIndex] = texel1.a;;
1200    }
1201
1202    if (options.numMipLevelsToGen == 2)
1203    {
1204        return;
1205    }
1206
1207
1208    threadgroup_barrier(mem_flags::mem_threadgroup);
1209
1210
1211    if ((lIndex & 0x1b) == 0)
1212    {
1213        mipSize = max(mipSize >> 1, ushort2(1));
1214        bool2 atEdge = (gIndices >> 1) == (mipSize - ushort2(1));
1215
1216
1217
1218        float4 texel2 = (atEdge.x) ? (texel1) : (float4(sR[lIndex + 2], sG[lIndex + 2], sB[lIndex + 2], sA[lIndex + 2]));
1219
1220        float4 texel3 =
1221            (atEdge.y) ? (texel1) : (float4(sR[lIndex + 2 * 8], sG[lIndex + 2 * 8], sB[lIndex + 2 * 8], sA[lIndex + 2 * 8]));
1222
1223        float4 texel4 = (atEdge.x | atEdge.y) ? (texel2) : (float4(sR[lIndex + (2 * 8 + 2)], sG[lIndex + (2 * 8 + 2)], sB[lIndex + (2 * 8 + 2)], sA[lIndex + (2 * 8 + 2)]));
1224
1225
1226        texel1 = (texel1 + texel2 + texel3 + texel4) / 4.0;
1227
1228        dstMip3.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices >> 2);
1229
1230
1231        sR[lIndex] = texel1.r; sG[lIndex] = texel1.g; sB[lIndex] = texel1.b; sA[lIndex] = texel1.a;;
1232    }
1233
1234    if (options.numMipLevelsToGen == 3)
1235    {
1236        return;
1237    }
1238
1239
1240    threadgroup_barrier(mem_flags::mem_threadgroup);
1241
1242
1243    if ((lIndex & 0x3f) == 0)
1244    {
1245        mipSize = max(mipSize >> 1, ushort2(1));
1246        bool2 atEdge = (gIndices >> 2) == (mipSize - ushort2(1));
1247
1248
1249
1250        float4 texel2 = (atEdge.x) ? (texel1) : (float4(sR[lIndex + 4], sG[lIndex + 4], sB[lIndex + 4], sA[lIndex + 4]));
1251
1252        float4 texel3 =
1253            (atEdge.y) ? (texel1) : (float4(sR[lIndex + 4 * 8], sG[lIndex + 4 * 8], sB[lIndex + 4 * 8], sA[lIndex + 4 * 8]));
1254
1255        float4 texel4 = (atEdge.x | atEdge.y) ? (texel2) : (float4(sR[lIndex + (4 * 8 + 4)], sG[lIndex + (4 * 8 + 4)], sB[lIndex + (4 * 8 + 4)], sA[lIndex + (4 * 8 + 4)]));
1256
1257
1258        texel1 = (texel1 + texel2 + texel3 + texel4) / 4.0;
1259
1260        dstMip4.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices >> 3);
1261    }
1262}
1263
1264template <typename TextureTypeR, typename TextureTypeW>
1265static __attribute__((always_inline)) void generateCubeOr2DArray2ndAndMoreMipmaps(
1266    uint lIndex,
1267    ushort3 gIndices,
1268    TextureTypeR srcTexture,
1269    TextureTypeW dstMip2,
1270    TextureTypeW dstMip3,
1271    TextureTypeW dstMip4,
1272    ushort2 mip1Size,
1273    float4 mip1Texel,
1274    threadgroup float *sR,
1275    threadgroup float *sG,
1276    threadgroup float *sB,
1277    threadgroup float *sA,
1278    constant GenMipParams &options)
1279{
1280    ushort2 mipSize = mip1Size;
1281    float4 texel1 = mip1Texel;
1282
1283
1284
1285
1286    sR[lIndex] = texel1.r; sG[lIndex] = texel1.g; sB[lIndex] = texel1.b; sA[lIndex] = texel1.a;;
1287
1288    threadgroup_barrier(mem_flags::mem_threadgroup);
1289
1290
1291    if ((lIndex & 0x09) == 0)
1292    {
1293        bool2 atEdge = gIndices.xy == (mipSize - ushort2(1));
1294
1295
1296
1297        float4 texel2 = (atEdge.x) ? (texel1) : (float4(sR[lIndex + 1], sG[lIndex + 1], sB[lIndex + 1], sA[lIndex + 1]));
1298
1299        float4 texel3 = (atEdge.y) ? (texel1) : (float4(sR[lIndex + 8], sG[lIndex + 8], sB[lIndex + 8], sA[lIndex + 8]));
1300
1301        float4 texel4 = (atEdge.x | atEdge.y) ? (texel2) : (float4(sR[lIndex + (8 + 1)], sG[lIndex + (8 + 1)], sB[lIndex + (8 + 1)], sA[lIndex + (8 + 1)]));
1302
1303
1304        texel1 = (texel1 + texel2 + texel3 + texel4) / 4.0;
1305
1306        dstMip2.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices.xy >> 1, gIndices.z);
1307
1308
1309        sR[lIndex] = texel1.r; sG[lIndex] = texel1.g; sB[lIndex] = texel1.b; sA[lIndex] = texel1.a;;
1310    }
1311
1312    if (options.numMipLevelsToGen == 2)
1313    {
1314        return;
1315    }
1316
1317
1318    threadgroup_barrier(mem_flags::mem_threadgroup);
1319
1320
1321    if ((lIndex & 0x1b) == 0)
1322    {
1323        mipSize = max(mipSize >> 1, ushort2(1));
1324        bool2 atEdge = (gIndices.xy >> 1) == (mipSize - ushort2(1));
1325
1326
1327
1328        float4 texel2 = (atEdge.x) ? (texel1) : (float4(sR[lIndex + 2], sG[lIndex + 2], sB[lIndex + 2], sA[lIndex + 2]));
1329
1330        float4 texel3 =
1331            (atEdge.y) ? (texel1) : (float4(sR[lIndex + 2 * 8], sG[lIndex + 2 * 8], sB[lIndex + 2 * 8], sA[lIndex + 2 * 8]));
1332
1333        float4 texel4 = (atEdge.x | atEdge.y) ? (texel2) : (float4(sR[lIndex + (2 * 8 + 2)], sG[lIndex + (2 * 8 + 2)], sB[lIndex + (2 * 8 + 2)], sA[lIndex + (2 * 8 + 2)]));
1334
1335
1336        texel1 = (texel1 + texel2 + texel3 + texel4) / 4.0;
1337
1338        dstMip3.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices.xy >> 2, gIndices.z);
1339
1340
1341        sR[lIndex] = texel1.r; sG[lIndex] = texel1.g; sB[lIndex] = texel1.b; sA[lIndex] = texel1.a;;
1342    }
1343
1344    if (options.numMipLevelsToGen == 3)
1345    {
1346        return;
1347    }
1348
1349
1350    threadgroup_barrier(mem_flags::mem_threadgroup);
1351
1352
1353    if ((lIndex & 0x3f) == 0)
1354    {
1355        mipSize = max(mipSize >> 1, ushort2(1));
1356        bool2 atEdge = (gIndices.xy >> 2) == (mipSize - ushort2(1));
1357
1358
1359
1360        float4 texel2 = (atEdge.x) ? (texel1) : (float4(sR[lIndex + 4], sG[lIndex + 4], sB[lIndex + 4], sA[lIndex + 4]));
1361
1362        float4 texel3 =
1363            (atEdge.y) ? (texel1) : (float4(sR[lIndex + 4 * 8], sG[lIndex + 4 * 8], sB[lIndex + 4 * 8], sA[lIndex + 4 * 8]));
1364
1365        float4 texel4 = (atEdge.x | atEdge.y) ? (texel2) : (float4(sR[lIndex + (4 * 8 + 4)], sG[lIndex + (4 * 8 + 4)], sB[lIndex + (4 * 8 + 4)], sA[lIndex + (4 * 8 + 4)]));
1366
1367
1368        texel1 = (texel1 + texel2 + texel3 + texel4) / 4.0;
1369
1370        dstMip4.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices.xy >> 3, gIndices.z);
1371    }
1372}
1373
1374kernel void generateCubeMipmaps(uint lIndex [[thread_index_in_threadgroup]],
1375                                ushort3 gIndices [[thread_position_in_grid]],
1376                                texturecube<float> srcTexture [[texture(0)]],
1377                                texturecube<float, access::write> dstMip1 [[texture(1)]],
1378                                texturecube<float, access::write> dstMip2 [[texture(2)]],
1379                                texturecube<float, access::write> dstMip3 [[texture(3)]],
1380                                texturecube<float, access::write> dstMip4 [[texture(4)]],
1381                                constant GenMipParams &options [[buffer(0)]])
1382{
1383    uint firstMipLevel = options.srcLevel + 1;
1384    ushort2 mip1Size =
1385        ushort2(srcTexture.get_width(firstMipLevel), srcTexture.get_height(firstMipLevel));
1386    bool validThread = gIndices.x < mip1Size.x && gIndices.y < mip1Size.y;
1387
1388    constexpr sampler textureSampler(mag_filter::linear, min_filter::linear, mip_filter::linear);
1389
1390
1391    float4 mip1Texel;
1392    if (validThread)
1393    {
1394        float2 texCoords = (float2(gIndices.xy) + float2(0.5, 0.5)) / float2(mip1Size);
1395        mip1Texel = srcTexture.sample(textureSampler, cubeTexcoords(texCoords, int(gIndices.z)),
1396                                      level(options.srcLevel));
1397
1398
1399        dstMip1.write((options.sRGB ? sRGBtoLinear(mip1Texel) : mip1Texel), gIndices.xy, gIndices.z);
1400    }
1401    else
1402    {
1403
1404        lIndex = 0xffffffff;
1405    }
1406
1407    if (options.numMipLevelsToGen == 1)
1408    {
1409        return;
1410    }
1411
1412
1413    threadgroup float sR[(8 * 8)];
1414    threadgroup float sG[(8 * 8)];
1415    threadgroup float sB[(8 * 8)];
1416    threadgroup float sA[(8 * 8)];
1417
1418    generateCubeOr2DArray2ndAndMoreMipmaps(lIndex, gIndices, srcTexture, dstMip2, dstMip3, dstMip4,
1419                                           mip1Size, mip1Texel, sR, sG, sB, sA, options);
1420}
1421
1422kernel void generate2DArrayMipmaps(uint lIndex [[thread_index_in_threadgroup]],
1423                                   ushort3 gIndices [[thread_position_in_grid]],
1424                                   texture2d_array<float> srcTexture [[texture(0)]],
1425                                   texture2d_array<float, access::write> dstMip1 [[texture(1)]],
1426                                   texture2d_array<float, access::write> dstMip2 [[texture(2)]],
1427                                   texture2d_array<float, access::write> dstMip3 [[texture(3)]],
1428                                   texture2d_array<float, access::write> dstMip4 [[texture(4)]],
1429                                   constant GenMipParams &options [[buffer(0)]])
1430{
1431    uint firstMipLevel = options.srcLevel + 1;
1432    ushort2 mip1Size =
1433        ushort2(srcTexture.get_width(firstMipLevel), srcTexture.get_height(firstMipLevel));
1434    bool validThread = gIndices.x < mip1Size.x && gIndices.y < mip1Size.y;
1435
1436    constexpr sampler textureSampler(mag_filter::linear, min_filter::linear, mip_filter::linear);
1437
1438
1439    float4 mip1Texel;
1440    if (validThread)
1441    {
1442        float2 texCoords = (float2(gIndices.xy) + float2(0.5, 0.5)) / float2(mip1Size);
1443        mip1Texel =
1444            srcTexture.sample(textureSampler, texCoords, gIndices.z, level(options.srcLevel));
1445
1446
1447        dstMip1.write((options.sRGB ? sRGBtoLinear(mip1Texel) : mip1Texel), gIndices.xy, gIndices.z);
1448    }
1449    else
1450    {
1451
1452        lIndex = 0xffffffff;
1453    }
1454
1455    if (options.numMipLevelsToGen == 1)
1456    {
1457        return;
1458    }
1459
1460
1461    threadgroup float sR[(8 * 8)];
1462    threadgroup float sG[(8 * 8)];
1463    threadgroup float sB[(8 * 8)];
1464    threadgroup float sA[(8 * 8)];
1465
1466    generateCubeOr2DArray2ndAndMoreMipmaps(lIndex, gIndices, srcTexture, dstMip2, dstMip3, dstMip4,
1467                                           mip1Size, mip1Texel, sR, sG, sB, sA, options);
1468}
1469# 5 "temp_master_source.metal" 2
1470# 1 "./copy_buffer.metal" 1
1471# 12 "./copy_buffer.metal"
1472#include <metal_pack>
1473
1474
1475# 1 "./format_autogen.h" 1
1476
1477
1478
1479namespace rx
1480{
1481namespace mtl_shader
1482{
1483
1484namespace FormatID
1485{
1486enum
1487{
1488    NONE,
1489    D16_UNORM,
1490    D24_UNORM_S8_UINT,
1491    D24_UNORM_X8_UINT,
1492    D32_FLOAT,
1493    D32_FLOAT_S8X24_UINT,
1494    D32_UNORM,
1495    S8_UINT,
1496    A16_FLOAT,
1497    A1R5G5B5_UNORM,
1498    A2R10G10B10_SINT_VERTEX,
1499    A2R10G10B10_SNORM_VERTEX,
1500    A2R10G10B10_SSCALED_VERTEX,
1501    A2R10G10B10_UINT_VERTEX,
1502    A2R10G10B10_UNORM_VERTEX,
1503    A2R10G10B10_USCALED_VERTEX,
1504    A32_FLOAT,
1505    A8_UNORM,
1506    ASTC_10x10_SRGB_BLOCK,
1507    ASTC_10x10_UNORM_BLOCK,
1508    ASTC_10x5_SRGB_BLOCK,
1509    ASTC_10x5_UNORM_BLOCK,
1510    ASTC_10x6_SRGB_BLOCK,
1511    ASTC_10x6_UNORM_BLOCK,
1512    ASTC_10x8_SRGB_BLOCK,
1513    ASTC_10x8_UNORM_BLOCK,
1514    ASTC_12x10_SRGB_BLOCK,
1515    ASTC_12x10_UNORM_BLOCK,
1516    ASTC_12x12_SRGB_BLOCK,
1517    ASTC_12x12_UNORM_BLOCK,
1518    ASTC_3x3x3_UNORM_BLOCK,
1519    ASTC_3x3x3_UNORM_SRGB_BLOCK,
1520    ASTC_4x3x3_UNORM_BLOCK,
1521    ASTC_4x3x3_UNORM_SRGB_BLOCK,
1522    ASTC_4x4_SRGB_BLOCK,
1523    ASTC_4x4_UNORM_BLOCK,
1524    ASTC_4x4x3_UNORM_BLOCK,
1525    ASTC_4x4x3_UNORM_SRGB_BLOCK,
1526    ASTC_4x4x4_UNORM_BLOCK,
1527    ASTC_4x4x4_UNORM_SRGB_BLOCK,
1528    ASTC_5x4_SRGB_BLOCK,
1529    ASTC_5x4_UNORM_BLOCK,
1530    ASTC_5x4x4_UNORM_BLOCK,
1531    ASTC_5x4x4_UNORM_SRGB_BLOCK,
1532    ASTC_5x5_SRGB_BLOCK,
1533    ASTC_5x5_UNORM_BLOCK,
1534    ASTC_5x5x4_UNORM_BLOCK,
1535    ASTC_5x5x4_UNORM_SRGB_BLOCK,
1536    ASTC_5x5x5_UNORM_BLOCK,
1537    ASTC_5x5x5_UNORM_SRGB_BLOCK,
1538    ASTC_6x5_SRGB_BLOCK,
1539    ASTC_6x5_UNORM_BLOCK,
1540    ASTC_6x5x5_UNORM_BLOCK,
1541    ASTC_6x5x5_UNORM_SRGB_BLOCK,
1542    ASTC_6x6_SRGB_BLOCK,
1543    ASTC_6x6_UNORM_BLOCK,
1544    ASTC_6x6x5_UNORM_BLOCK,
1545    ASTC_6x6x5_UNORM_SRGB_BLOCK,
1546    ASTC_6x6x6_UNORM_BLOCK,
1547    ASTC_6x6x6_UNORM_SRGB_BLOCK,
1548    ASTC_8x5_SRGB_BLOCK,
1549    ASTC_8x5_UNORM_BLOCK,
1550    ASTC_8x6_SRGB_BLOCK,
1551    ASTC_8x6_UNORM_BLOCK,
1552    ASTC_8x8_SRGB_BLOCK,
1553    ASTC_8x8_UNORM_BLOCK,
1554    B10G10R10A2_UNORM,
1555    B4G4R4A4_UNORM,
1556    B5G5R5A1_UNORM,
1557    B5G6R5_UNORM,
1558    B8G8R8A8_TYPELESS,
1559    B8G8R8A8_TYPELESS_SRGB,
1560    B8G8R8A8_UNORM,
1561    B8G8R8A8_UNORM_SRGB,
1562    B8G8R8X8_UNORM,
1563    B8G8R8X8_UNORM_SRGB,
1564    BC1_RGBA_UNORM_BLOCK,
1565    BC1_RGBA_UNORM_SRGB_BLOCK,
1566    BC1_RGB_UNORM_BLOCK,
1567    BC1_RGB_UNORM_SRGB_BLOCK,
1568    BC2_RGBA_UNORM_BLOCK,
1569    BC2_RGBA_UNORM_SRGB_BLOCK,
1570    BC3_RGBA_UNORM_BLOCK,
1571    BC3_RGBA_UNORM_SRGB_BLOCK,
1572    BC4_RED_SNORM_BLOCK,
1573    BC4_RED_UNORM_BLOCK,
1574    BC5_RG_SNORM_BLOCK,
1575    BC5_RG_UNORM_BLOCK,
1576    BC6H_RGB_SFLOAT_BLOCK,
1577    BC6H_RGB_UFLOAT_BLOCK,
1578    BC7_RGBA_UNORM_BLOCK,
1579    BC7_RGBA_UNORM_SRGB_BLOCK,
1580    EAC_R11G11_SNORM_BLOCK,
1581    EAC_R11G11_UNORM_BLOCK,
1582    EAC_R11_SNORM_BLOCK,
1583    EAC_R11_UNORM_BLOCK,
1584    ETC1_LOSSY_DECODE_R8G8B8_UNORM_BLOCK,
1585    ETC1_R8G8B8_UNORM_BLOCK,
1586    ETC2_R8G8B8A1_SRGB_BLOCK,
1587    ETC2_R8G8B8A1_UNORM_BLOCK,
1588    ETC2_R8G8B8A8_SRGB_BLOCK,
1589    ETC2_R8G8B8A8_UNORM_BLOCK,
1590    ETC2_R8G8B8_SRGB_BLOCK,
1591    ETC2_R8G8B8_UNORM_BLOCK,
1592    G8_B8R8_2PLANE_420_UNORM,
1593    G8_B8_R8_3PLANE_420_UNORM,
1594    L16A16_FLOAT,
1595    L16_FLOAT,
1596    L32A32_FLOAT,
1597    L32_FLOAT,
1598    L8A8_UNORM,
1599    L8_UNORM,
1600    PALETTE4_R4G4B4A4_UNORM,
1601    PALETTE4_R5G5B5A1_UNORM,
1602    PALETTE4_R5G6B5_UNORM,
1603    PALETTE4_R8G8B8A8_UNORM,
1604    PALETTE4_R8G8B8_UNORM,
1605    PALETTE8_R4G4B4A4_UNORM,
1606    PALETTE8_R5G5B5A1_UNORM,
1607    PALETTE8_R5G6B5_UNORM,
1608    PALETTE8_R8G8B8A8_UNORM,
1609    PALETTE8_R8G8B8_UNORM,
1610    PVRTC1_RGBA_2BPP_UNORM_BLOCK,
1611    PVRTC1_RGBA_2BPP_UNORM_SRGB_BLOCK,
1612    PVRTC1_RGBA_4BPP_UNORM_BLOCK,
1613    PVRTC1_RGBA_4BPP_UNORM_SRGB_BLOCK,
1614    PVRTC1_RGB_2BPP_UNORM_BLOCK,
1615    PVRTC1_RGB_2BPP_UNORM_SRGB_BLOCK,
1616    PVRTC1_RGB_4BPP_UNORM_BLOCK,
1617    PVRTC1_RGB_4BPP_UNORM_SRGB_BLOCK,
1618    R10G10B10A2_SINT,
1619    R10G10B10A2_SNORM,
1620    R10G10B10A2_SSCALED,
1621    R10G10B10A2_UINT,
1622    R10G10B10A2_UNORM,
1623    R10G10B10A2_USCALED,
1624    R10G10B10X2_UNORM,
1625    R11G11B10_FLOAT,
1626    R16G16B16A16_FLOAT,
1627    R16G16B16A16_SINT,
1628    R16G16B16A16_SNORM,
1629    R16G16B16A16_SSCALED,
1630    R16G16B16A16_UINT,
1631    R16G16B16A16_UNORM,
1632    R16G16B16A16_USCALED,
1633    R16G16B16_FLOAT,
1634    R16G16B16_SINT,
1635    R16G16B16_SNORM,
1636    R16G16B16_SSCALED,
1637    R16G16B16_UINT,
1638    R16G16B16_UNORM,
1639    R16G16B16_USCALED,
1640    R16G16_FLOAT,
1641    R16G16_SINT,
1642    R16G16_SNORM,
1643    R16G16_SSCALED,
1644    R16G16_UINT,
1645    R16G16_UNORM,
1646    R16G16_USCALED,
1647    R16_FLOAT,
1648    R16_SINT,
1649    R16_SNORM,
1650    R16_SSCALED,
1651    R16_UINT,
1652    R16_UNORM,
1653    R16_USCALED,
1654    R32G32B32A32_FIXED,
1655    R32G32B32A32_FLOAT,
1656    R32G32B32A32_SINT,
1657    R32G32B32A32_SNORM,
1658    R32G32B32A32_SSCALED,
1659    R32G32B32A32_UINT,
1660    R32G32B32A32_UNORM,
1661    R32G32B32A32_USCALED,
1662    R32G32B32_FIXED,
1663    R32G32B32_FLOAT,
1664    R32G32B32_SINT,
1665    R32G32B32_SNORM,
1666    R32G32B32_SSCALED,
1667    R32G32B32_UINT,
1668    R32G32B32_UNORM,
1669    R32G32B32_USCALED,
1670    R32G32_FIXED,
1671    R32G32_FLOAT,
1672    R32G32_SINT,
1673    R32G32_SNORM,
1674    R32G32_SSCALED,
1675    R32G32_UINT,
1676    R32G32_UNORM,
1677    R32G32_USCALED,
1678    R32_FIXED,
1679    R32_FLOAT,
1680    R32_SINT,
1681    R32_SNORM,
1682    R32_SSCALED,
1683    R32_UINT,
1684    R32_UNORM,
1685    R32_USCALED,
1686    R4G4B4A4_UNORM,
1687    R5G5B5A1_UNORM,
1688    R5G6B5_UNORM,
1689    R8G8B8A8_SINT,
1690    R8G8B8A8_SNORM,
1691    R8G8B8A8_SSCALED,
1692    R8G8B8A8_TYPELESS,
1693    R8G8B8A8_TYPELESS_SRGB,
1694    R8G8B8A8_UINT,
1695    R8G8B8A8_UNORM,
1696    R8G8B8A8_UNORM_SRGB,
1697    R8G8B8A8_USCALED,
1698    R8G8B8X8_UNORM,
1699    R8G8B8X8_UNORM_SRGB,
1700    R8G8B8_SINT,
1701    R8G8B8_SNORM,
1702    R8G8B8_SSCALED,
1703    R8G8B8_UINT,
1704    R8G8B8_UNORM,
1705    R8G8B8_UNORM_SRGB,
1706    R8G8B8_USCALED,
1707    R8G8_SINT,
1708    R8G8_SNORM,
1709    R8G8_SSCALED,
1710    R8G8_UINT,
1711    R8G8_UNORM,
1712    R8G8_UNORM_SRGB,
1713    R8G8_USCALED,
1714    R8_SINT,
1715    R8_SNORM,
1716    R8_SSCALED,
1717    R8_UINT,
1718    R8_UNORM,
1719    R8_UNORM_SRGB,
1720    R8_USCALED,
1721    R9G9B9E5_SHAREDEXP,
1722    X2R10G10B10_SINT_VERTEX,
1723    X2R10G10B10_SNORM_VERTEX,
1724    X2R10G10B10_SSCALED_VERTEX,
1725    X2R10G10B10_UINT_VERTEX,
1726    X2R10G10B10_UNORM_VERTEX,
1727    X2R10G10B10_USCALED_VERTEX,
1728    EXTERNAL0,
1729    EXTERNAL1,
1730    EXTERNAL2,
1731    EXTERNAL3,
1732    EXTERNAL4,
1733    EXTERNAL5,
1734    EXTERNAL6,
1735    EXTERNAL7
1736};
1737
1738}
1739
1740}
1741}
1742# 16 "./copy_buffer.metal" 2
1743
1744using namespace rx::mtl_shader;
1745
1746constant int kCopyFormatType [[function_constant(10)]];
1747
1748
1749constant int kCopyTextureType [[function_constant(20)]];
1750constant bool kCopyTextureType2D = kCopyTextureType == kTextureType2D;
1751constant bool kCopyTextureType2DArray = kCopyTextureType == kTextureType2DArray;
1752constant bool kCopyTextureType2DMS = kCopyTextureType == kTextureType2DMultisample;
1753constant bool kCopyTextureTypeCube = kCopyTextureType == kTextureTypeCube;
1754constant bool kCopyTextureType3D = kCopyTextureType == kTextureType3D;
1755
1756struct CopyPixelParams
1757{
1758    uint3 copySize;
1759    uint3 textureOffset;
1760
1761    uint bufferStartOffset;
1762    uint pixelSize;
1763    uint bufferRowPitch;
1764    uint bufferDepthPitch;
1765};
1766
1767struct WritePixelParams
1768{
1769    uint2 copySize;
1770    uint2 textureOffset;
1771
1772    uint bufferStartOffset;
1773
1774    uint pixelSize;
1775    uint bufferRowPitch;
1776
1777    uint textureLevel;
1778    uint textureLayer;
1779
1780    bool reverseTextureRowOrder;
1781};
1782# 120 "./copy_buffer.metal"
1783template <typename T>
1784static inline void textureWrite(ushort3 gIndices,
1785                                constant CopyPixelParams &options,
1786                                vec<T, 4> color,
1787                                texture2d<T, access::write> dstTexture2d [[texture(0), function_constant(kCopyTextureType2D)]], texture2d_array<T, access::write> dstTexture2dArray [[texture(0), function_constant(kCopyTextureType2DArray)]], texture3d<T, access::write> dstTexture3d [[texture(0), function_constant(kCopyTextureType3D)]], texturecube<T, access::write> dstTextureCube [[texture(0), function_constant(kCopyTextureTypeCube)]])
1788{
1789    uint3 writeIndices = options.textureOffset + uint3(gIndices);
1790    switch (kCopyTextureType)
1791    {
1792        case kTextureType2D:
1793            dstTexture2d.write(color, writeIndices.xy);
1794            break;
1795        case kTextureType2DArray:
1796            dstTexture2dArray.write(color, writeIndices.xy, writeIndices.z);
1797            break;
1798        case kTextureType3D:
1799            dstTexture3d.write(color, writeIndices);
1800            break;
1801        case kTextureTypeCube:
1802            dstTextureCube.write(color, writeIndices.xy, writeIndices.z);
1803            break;
1804    }
1805}
1806
1807
1808template <typename T>
1809static inline vec<T, 4> textureRead(ushort2 gIndices,
1810                                    constant WritePixelParams &options,
1811                                    texture2d<T, access::read> srcTexture2d [[texture(0), function_constant(kCopyTextureType2D)]], texture2d_array<T, access::read> srcTexture2dArray [[texture(0), function_constant(kCopyTextureType2DArray)]], texture3d<T, access::read> srcTexture3d [[texture(0), function_constant(kCopyTextureType3D)]], texturecube<T, access::read> srcTextureCube [[texture(0), function_constant(kCopyTextureTypeCube)]], texture2d_ms<T, access::read> srcTexture2dMS [[texture(0), function_constant(kCopyTextureType2DMS)]])
1812{
1813    vec<T, 4> color;
1814    uint2 coords = uint2(gIndices);
1815    if (options.reverseTextureRowOrder)
1816    {
1817        coords.y = options.copySize.y - 1 - gIndices.y;
1818    }
1819    coords += options.textureOffset;
1820    switch (kCopyTextureType)
1821    {
1822        case kTextureType2D:
1823            color = srcTexture2d.read(coords.xy, options.textureLevel);
1824            break;
1825        case kTextureType2DArray:
1826            color = srcTexture2dArray.read(coords.xy, options.textureLayer, options.textureLevel);
1827            break;
1828        case kTextureType2DMultisample:
1829            color = resolveTextureMS(srcTexture2dMS, coords.xy);
1830            break;
1831        case kTextureType3D:
1832            color = srcTexture3d.read(uint3(coords, options.textureLayer), options.textureLevel);
1833            break;
1834        case kTextureTypeCube:
1835            color = srcTextureCube.read(coords.xy, options.textureLayer, options.textureLevel);
1836            break;
1837    }
1838    return color;
1839}
1840# 215 "./copy_buffer.metal"
1841static inline float4 readR5G6B5_UNORM(uint bufferOffset, constant uchar *buffer)
1842{
1843    float4 color;
1844    ushort src = bytesToShort<ushort>(buffer, bufferOffset);
1845
1846    color.r = normalizedToFloat<5>(getShiftedData<5, 11>(src));
1847    color.g = normalizedToFloat<6>(getShiftedData<6, 5>(src));
1848    color.b = normalizedToFloat<5>(getShiftedData<5, 0>(src));
1849    color.a = 1.0;
1850    return color;
1851}
1852static inline void writeR5G6B5_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
1853{
1854    ushort dst = shiftData<5, 11>(floatToNormalized<5, ushort>(color.r)) |
1855                 shiftData<6, 5>(floatToNormalized<6, ushort>(color.g)) |
1856                 shiftData<5, 0>(floatToNormalized<5, ushort>(color.b));
1857
1858    shortToBytes(dst, bufferOffset, buffer);
1859}
1860
1861
1862static inline float4 readR4G4B4A4_UNORM(uint bufferOffset, constant uchar *buffer)
1863{
1864    float4 color;
1865    ushort src = bytesToShort<ushort>(buffer, bufferOffset);
1866
1867    color.r = normalizedToFloat<4>(getShiftedData<4, 12>(src));
1868    color.g = normalizedToFloat<4>(getShiftedData<4, 8>(src));
1869    color.b = normalizedToFloat<4>(getShiftedData<4, 4>(src));
1870    color.a = normalizedToFloat<4>(getShiftedData<4, 0>(src));
1871    return color;
1872}
1873static inline void writeR4G4B4A4_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
1874{
1875    ushort dst = shiftData<4, 12>(floatToNormalized<4, ushort>(color.r)) |
1876                 shiftData<4, 8>(floatToNormalized<4, ushort>(color.g)) |
1877                 shiftData<4, 4>(floatToNormalized<4, ushort>(color.b)) |
1878                 shiftData<4, 0>(floatToNormalized<4, ushort>(color.a));
1879    ;
1880
1881    shortToBytes(dst, bufferOffset, buffer);
1882}
1883
1884
1885static inline float4 readR5G5B5A1_UNORM(uint bufferOffset, constant uchar *buffer)
1886{
1887    float4 color;
1888    ushort src = bytesToShort<ushort>(buffer, bufferOffset);
1889
1890    color.r = normalizedToFloat<5>(getShiftedData<5, 11>(src));
1891    color.g = normalizedToFloat<5>(getShiftedData<5, 6>(src));
1892    color.b = normalizedToFloat<5>(getShiftedData<5, 1>(src));
1893    color.a = normalizedToFloat<1>(getShiftedData<1, 0>(src));
1894    return color;
1895}
1896static inline void writeR5G5B5A1_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
1897{
1898    ushort dst = shiftData<5, 11>(floatToNormalized<5, ushort>(color.r)) |
1899                 shiftData<5, 6>(floatToNormalized<5, ushort>(color.g)) |
1900                 shiftData<5, 1>(floatToNormalized<5, ushort>(color.b)) |
1901                 shiftData<1, 0>(floatToNormalized<1, ushort>(color.a));
1902    ;
1903
1904    shortToBytes(dst, bufferOffset, buffer);
1905}
1906
1907
1908static inline int4 readR10G10B10A2_SINT(uint bufferOffset, constant uchar *buffer)
1909{
1910    int4 color;
1911    int src = bytesToInt<int>(buffer, bufferOffset);
1912
1913    constexpr int3 rgbSignMask(0x200);
1914    constexpr int3 negativeMask(0xfffffc00);
1915    constexpr int alphaSignMask = 0x2;
1916    constexpr int alphaNegMask = 0xfffffffc;
1917
1918    color.r = getShiftedData<10, 0>(src);
1919    color.g = getShiftedData<10, 10>(src);
1920    color.b = getShiftedData<10, 20>(src);
1921
1922    int3 isRgbNegative = (color.rgb & rgbSignMask) >> 9;
1923    color.rgb = (isRgbNegative * negativeMask) | color.rgb;
1924
1925    color.a = getShiftedData<2, 30>(src);
1926    int isAlphaNegative = color.a & alphaSignMask >> 1;
1927    color.a = (isAlphaNegative * alphaNegMask) | color.a;
1928    return color;
1929}
1930
1931static inline uint4 readR10G10B10A2_UINT(uint bufferOffset, constant uchar *buffer)
1932{
1933    uint4 color;
1934    uint src = bytesToInt<uint>(buffer, bufferOffset);
1935
1936    color.r = getShiftedData<10, 0>(src);
1937    color.g = getShiftedData<10, 10>(src);
1938    color.b = getShiftedData<10, 20>(src);
1939    color.a = getShiftedData<2, 30>(src);
1940    return color;
1941}
1942
1943
1944static inline float4 readR8G8B8A8(uint bufferOffset, constant uchar *buffer, bool isSRGB)
1945{
1946    float4 color;
1947    uint src = bytesToInt<uint>(buffer, bufferOffset);
1948
1949    if (isSRGB)
1950    {
1951        color = unpack_unorm4x8_srgb_to_float(src);
1952    }
1953    else
1954    {
1955        color = unpack_unorm4x8_to_float(src);
1956    }
1957    return color;
1958}
1959static inline void writeR8G8B8A8(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer, bool isSRGB)
1960{
1961    uint dst;
1962
1963    if (isSRGB)
1964    {
1965        dst = pack_float_to_srgb_unorm4x8(color);
1966    }
1967    else
1968    {
1969        dst = pack_float_to_unorm4x8(color);
1970    }
1971
1972    intToBytes(dst, bufferOffset, buffer);
1973}
1974
1975static inline float4 readR8G8B8(uint bufferOffset, constant uchar *buffer, bool isSRGB)
1976{
1977    float4 color;
1978    color.r = normalizedToFloat<uchar>(buffer[bufferOffset]);
1979    color.g = normalizedToFloat<uchar>(buffer[bufferOffset + 1]);
1980    color.b = normalizedToFloat<uchar>(buffer[bufferOffset + 2]);
1981    color.a = 1.0;
1982
1983    if (isSRGB)
1984    {
1985        color = sRGBtoLinear(color);
1986    }
1987    return color;
1988}
1989static inline void writeR8G8B8(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer, bool isSRGB)
1990{
1991    color.a = 1.0;
1992    uint dst;
1993
1994    if (isSRGB)
1995    {
1996        dst = pack_float_to_srgb_unorm4x8(color);
1997    }
1998    else
1999    {
2000        dst = pack_float_to_unorm4x8(color);
2001    }
2002    int24bitToBytes(dst, bufferOffset, buffer);
2003}
2004
2005
2006static inline float4 readR8G8B8A8_SNORM(uint bufferOffset, constant uchar *buffer)
2007{
2008    float4 color;
2009    uint src = bytesToInt<uint>(buffer, bufferOffset);
2010
2011    color = unpack_snorm4x8_to_float(src);
2012
2013    return color;
2014}
2015static inline void writeR8G8B8A8_SNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2016{
2017    uint dst = pack_float_to_snorm4x8(color);
2018
2019    intToBytes(dst, bufferOffset, buffer);
2020}
2021
2022
2023static inline float4 readR8G8B8_SNORM(uint bufferOffset, constant uchar *buffer)
2024{
2025    float4 color;
2026    color.r = normalizedToFloat<7, char>(buffer[bufferOffset]);
2027    color.g = normalizedToFloat<7, char>(buffer[bufferOffset + 1]);
2028    color.b = normalizedToFloat<7, char>(buffer[bufferOffset + 2]);
2029    color.a = 1.0;
2030
2031    return color;
2032}
2033static inline void writeR8G8B8_SNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2034{
2035    uint dst = pack_float_to_snorm4x8(color);
2036
2037    int24bitToBytes(dst, bufferOffset, buffer);
2038}
2039
2040
2041static inline float4 readR8G8B8A8_UNORM(uint bufferOffset, constant uchar *buffer)
2042{
2043    return readR8G8B8A8(bufferOffset, buffer, false);
2044}
2045static inline void writeR8G8B8A8_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2046{
2047    return writeR8G8B8A8(gIndices, options, bufferOffset, color, buffer, false);
2048}
2049
2050static inline float4 readR8G8B8A8_UNORM_SRGB(uint bufferOffset, constant uchar *buffer)
2051{
2052    return readR8G8B8A8(bufferOffset, buffer, true);
2053}
2054static inline void writeR8G8B8A8_UNORM_SRGB(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2055{
2056    return writeR8G8B8A8(gIndices, options, bufferOffset, color, buffer, true);
2057}
2058
2059
2060static inline float4 readB8G8R8A8_UNORM(uint bufferOffset, constant uchar *buffer)
2061{
2062    return readR8G8B8A8(bufferOffset, buffer, false).bgra;
2063}
2064static inline void writeB8G8R8A8_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2065{
2066    color.rgba = color.bgra;
2067    return writeR8G8B8A8(gIndices, options, bufferOffset, color, buffer, false);
2068}
2069
2070static inline float4 readB8G8R8A8_UNORM_SRGB(uint bufferOffset, constant uchar *buffer)
2071{
2072    return readR8G8B8A8(bufferOffset, buffer, true).bgra;
2073}
2074static inline void writeB8G8R8A8_UNORM_SRGB(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2075{
2076    color.rgba = color.bgra;
2077    return writeR8G8B8A8(gIndices, options, bufferOffset, color, buffer, true);
2078}
2079
2080
2081static inline float4 readR8G8B8_UNORM(uint bufferOffset, constant uchar *buffer)
2082{
2083    return readR8G8B8(bufferOffset, buffer, false);
2084}
2085static inline void writeR8G8B8_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2086{
2087    return writeR8G8B8(gIndices, options, bufferOffset, color, buffer, false);
2088}
2089
2090static inline float4 readR8G8B8_UNORM_SRGB(uint bufferOffset, constant uchar *buffer)
2091{
2092    return readR8G8B8(bufferOffset, buffer, true);
2093}
2094static inline void writeR8G8B8_UNORM_SRGB(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2095{
2096    return writeR8G8B8(gIndices, options, bufferOffset, color, buffer, true);
2097}
2098
2099
2100static inline float4 readL8_UNORM(uint bufferOffset, constant uchar *buffer)
2101{
2102    float4 color;
2103    color.rgb = float3(normalizedToFloat<uchar>(buffer[bufferOffset]));
2104    color.a = 1.0;
2105    return color;
2106}
2107static inline void writeL8_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2108{
2109    buffer[bufferOffset] = floatToNormalized<uchar>(color.r);
2110}
2111
2112
2113static inline void writeA8_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2114{
2115    buffer[bufferOffset] = floatToNormalized<uchar>(color.a);
2116}
2117
2118
2119static inline float4 readL8A8_UNORM(uint bufferOffset, constant uchar *buffer)
2120{
2121    float4 color;
2122    color.rgb = float3(normalizedToFloat<uchar>(buffer[bufferOffset]));
2123    color.a = normalizedToFloat<uchar>(buffer[bufferOffset + 1]);
2124    return color;
2125}
2126static inline void writeL8A8_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2127{
2128    buffer[bufferOffset] = floatToNormalized<uchar>(color.r);
2129    buffer[bufferOffset + 1] = floatToNormalized<uchar>(color.a);
2130}
2131
2132
2133static inline float4 readR8_UNORM(uint bufferOffset, constant uchar *buffer)
2134{
2135    float4 color;
2136    color.r = normalizedToFloat<uchar>(buffer[bufferOffset]);
2137    color.g = color.b = 0.0;
2138    color.a = 1.0;
2139    return color;
2140}
2141static inline void writeR8_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2142{
2143    buffer[bufferOffset] = floatToNormalized<uchar>(color.r);
2144}
2145
2146static inline float4 readR8_SNORM(uint bufferOffset, constant uchar *buffer)
2147{
2148    float4 color;
2149    color.r = normalizedToFloat<7, char>(buffer[bufferOffset]);
2150    color.g = color.b = 0.0;
2151    color.a = 1.0;
2152    return color;
2153}
2154static inline void writeR8_SNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2155{
2156    buffer[bufferOffset] = as_type<uchar>(floatToNormalized<7, char>(color.r));
2157}
2158
2159
2160static inline int4 readR8_SINT(uint bufferOffset, constant uchar *buffer)
2161{
2162    int4 color;
2163    color.r = as_type<char>(buffer[bufferOffset]);
2164    color.g = color.b = 0;
2165    color.a = 1;
2166    return color;
2167}
2168static inline void writeR8_SINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<int, 4> color, device uchar *buffer)
2169{
2170    buffer[bufferOffset] = static_cast<uchar>(color.r);
2171}
2172
2173
2174static inline uint4 readR8_UINT(uint bufferOffset, constant uchar *buffer)
2175{
2176    uint4 color;
2177    color.r = as_type<uchar>(buffer[bufferOffset]);
2178    color.g = color.b = 0;
2179    color.a = 1;
2180    return color;
2181}
2182static inline void writeR8_UINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<uint, 4> color, device uchar *buffer)
2183{
2184    buffer[bufferOffset] = static_cast<uchar>(color.r);
2185}
2186
2187
2188static inline float4 readR8G8_UNORM(uint bufferOffset, constant uchar *buffer)
2189{
2190    float4 color;
2191    color.r = normalizedToFloat<uchar>(buffer[bufferOffset]);
2192    color.g = normalizedToFloat<uchar>(buffer[bufferOffset + 1]);
2193    color.b = 0.0;
2194    color.a = 1.0;
2195    return color;
2196}
2197static inline void writeR8G8_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2198{
2199    buffer[bufferOffset] = floatToNormalized<uchar>(color.r);
2200    buffer[bufferOffset + 1] = floatToNormalized<uchar>(color.g);
2201}
2202
2203static inline float4 readR8G8_SNORM(uint bufferOffset, constant uchar *buffer)
2204{
2205    float4 color;
2206    color.r = normalizedToFloat<7, char>(buffer[bufferOffset]);
2207    color.g = normalizedToFloat<7, char>(buffer[bufferOffset + 1]);
2208    color.b = 0.0;
2209    color.a = 1.0;
2210    return color;
2211}
2212static inline void writeR8G8_SNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2213{
2214    buffer[bufferOffset] = as_type<uchar>(floatToNormalized<7, char>(color.r));
2215    buffer[bufferOffset + 1] = as_type<uchar>(floatToNormalized<7, char>(color.g));
2216}
2217
2218
2219static inline int4 readR8G8_SINT(uint bufferOffset, constant uchar *buffer)
2220{
2221    int4 color;
2222    color.r = as_type<char>(buffer[bufferOffset]);
2223    color.g = as_type<char>(buffer[bufferOffset + 1]);
2224    color.b = 0;
2225    color.a = 1;
2226    return color;
2227}
2228static inline void writeR8G8_SINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<int, 4> color, device uchar *buffer)
2229{
2230    buffer[bufferOffset] = static_cast<uchar>(color.r);
2231    buffer[bufferOffset + 1] = static_cast<uchar>(color.g);
2232}
2233
2234
2235static inline uint4 readR8G8_UINT(uint bufferOffset, constant uchar *buffer)
2236{
2237    uint4 color;
2238    color.r = as_type<uchar>(buffer[bufferOffset]);
2239    color.g = as_type<uchar>(buffer[bufferOffset + 1]);
2240    color.b = 0;
2241    color.a = 1;
2242    return color;
2243}
2244static inline void writeR8G8_UINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<uint, 4> color, device uchar *buffer)
2245{
2246    buffer[bufferOffset] = static_cast<uchar>(color.r);
2247    buffer[bufferOffset + 1] = static_cast<uchar>(color.g);
2248}
2249
2250
2251static inline int4 readR8G8B8_SINT(uint bufferOffset, constant uchar *buffer)
2252{
2253    int4 color;
2254    color.r = as_type<char>(buffer[bufferOffset]);
2255    color.g = as_type<char>(buffer[bufferOffset + 1]);
2256    color.b = as_type<char>(buffer[bufferOffset + 2]);
2257    color.a = 1;
2258    return color;
2259}
2260
2261
2262static inline uint4 readR8G8B8_UINT(uint bufferOffset, constant uchar *buffer)
2263{
2264    uint4 color;
2265    color.r = as_type<uchar>(buffer[bufferOffset]);
2266    color.g = as_type<uchar>(buffer[bufferOffset + 1]);
2267    color.b = as_type<uchar>(buffer[bufferOffset + 2]);
2268    color.a = 1;
2269    return color;
2270}
2271
2272
2273static inline int4 readR8G8B8A8_SINT(uint bufferOffset, constant uchar *buffer)
2274{
2275    int4 color;
2276    color.r = as_type<char>(buffer[bufferOffset]);
2277    color.g = as_type<char>(buffer[bufferOffset + 1]);
2278    color.b = as_type<char>(buffer[bufferOffset + 2]);
2279    color.a = as_type<char>(buffer[bufferOffset + 3]);
2280    return color;
2281}
2282static inline void writeR8G8B8A8_SINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<int, 4> color, device uchar *buffer)
2283{
2284    buffer[bufferOffset] = static_cast<uchar>(color.r);
2285    buffer[bufferOffset + 1] = static_cast<uchar>(color.g);
2286    buffer[bufferOffset + 2] = static_cast<uchar>(color.b);
2287    buffer[bufferOffset + 3] = static_cast<uchar>(color.a);
2288}
2289
2290
2291static inline uint4 readR8G8B8A8_UINT(uint bufferOffset, constant uchar *buffer)
2292{
2293    uint4 color;
2294    color.r = as_type<uchar>(buffer[bufferOffset]);
2295    color.g = as_type<uchar>(buffer[bufferOffset + 1]);
2296    color.b = as_type<uchar>(buffer[bufferOffset + 2]);
2297    color.a = as_type<uchar>(buffer[bufferOffset + 3]);
2298    return color;
2299}
2300static inline void writeR8G8B8A8_UINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<uint, 4> color, device uchar *buffer)
2301{
2302    buffer[bufferOffset] = static_cast<uchar>(color.r);
2303    buffer[bufferOffset + 1] = static_cast<uchar>(color.g);
2304    buffer[bufferOffset + 2] = static_cast<uchar>(color.b);
2305    buffer[bufferOffset + 3] = static_cast<uchar>(color.a);
2306}
2307
2308
2309static inline float4 readR16_FLOAT(uint bufferOffset, constant uchar *buffer)
2310{
2311    float4 color;
2312    color.r = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset));
2313    color.g = color.b = 0.0;
2314    color.a = 1.0;
2315    return color;
2316}
2317static inline void writeR16_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2318{
2319    shortToBytes(as_type<ushort>(static_cast<half>(color.r)), bufferOffset, buffer);
2320}
2321
2322template <typename ShortType>
2323static inline float4 readR16_NORM(uint bufferOffset, constant uchar *buffer)
2324{
2325    float4 color;
2326    color.r = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset));
2327    color.g = color.b = 0.0;
2328    color.a = 1.0;
2329    return color;
2330}
2331
2332
2333
2334
2335static inline int4 readR16_SINT(uint bufferOffset, constant uchar *buffer)
2336{
2337    int4 color;
2338    color.r = bytesToShort<short>(buffer, bufferOffset);
2339    color.g = color.b = 0;
2340    color.a = 1;
2341    return color;
2342}
2343static inline void writeR16_SINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<int, 4> color, device uchar *buffer)
2344{
2345    shortToBytes(static_cast<short>(color.r), bufferOffset, buffer);
2346}
2347
2348
2349static inline uint4 readR16_UINT(uint bufferOffset, constant uchar *buffer)
2350{
2351    uint4 color;
2352    color.r = bytesToShort<ushort>(buffer, bufferOffset);
2353    color.g = color.b = 0;
2354    color.a = 1;
2355    return color;
2356}
2357static inline void writeR16_UINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<uint, 4> color, device uchar *buffer)
2358{
2359    shortToBytes(static_cast<ushort>(color.r), bufferOffset, buffer);
2360}
2361
2362
2363static inline float4 readA16_FLOAT(uint bufferOffset, constant uchar *buffer)
2364{
2365    float4 color;
2366    color.a = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset));
2367    color.rgb = 0.0;
2368    return color;
2369}
2370static inline void writeA16_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2371{
2372    shortToBytes(as_type<ushort>(static_cast<half>(color.a)), bufferOffset, buffer);
2373}
2374
2375
2376static inline float4 readL16_FLOAT(uint bufferOffset, constant uchar *buffer)
2377{
2378    float4 color;
2379    color.rgb = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset));
2380    color.a = 1.0;
2381    return color;
2382}
2383static inline void writeL16_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2384{
2385    shortToBytes(as_type<ushort>(static_cast<half>(color.r)), bufferOffset, buffer);
2386}
2387
2388
2389static inline float4 readL16A16_FLOAT(uint bufferOffset, constant uchar *buffer)
2390{
2391    float4 color;
2392    color.rgb = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset));
2393    color.a = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset + 2));
2394    return color;
2395}
2396static inline void writeL16A16_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2397{
2398    shortToBytes(as_type<ushort>(static_cast<half>(color.r)), bufferOffset, buffer);
2399    shortToBytes(as_type<ushort>(static_cast<half>(color.a)), bufferOffset + 2, buffer);
2400}
2401
2402
2403static inline float4 readR16G16_FLOAT(uint bufferOffset, constant uchar *buffer)
2404{
2405    float4 color;
2406    color.r = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset));
2407    color.g = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset + 2));
2408    color.b = 0.0;
2409    color.a = 1.0;
2410    return color;
2411}
2412static inline void writeR16G16_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2413{
2414    shortToBytes(as_type<ushort>(static_cast<half>(color.r)), bufferOffset, buffer);
2415    shortToBytes(as_type<ushort>(static_cast<half>(color.g)), bufferOffset + 2, buffer);
2416}
2417
2418
2419template <typename ShortType>
2420static inline float4 readR16G16_NORM(uint bufferOffset, constant uchar *buffer)
2421{
2422    float4 color;
2423    color.r = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset));
2424    color.g = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset + 2));
2425    color.b = 0.0;
2426    color.a = 1.0;
2427    return color;
2428}
2429
2430
2431
2432
2433static inline int4 readR16G16_SINT(uint bufferOffset, constant uchar *buffer)
2434{
2435    int4 color;
2436    color.r = bytesToShort<short>(buffer, bufferOffset);
2437    color.g = bytesToShort<short>(buffer, bufferOffset + 2);
2438    color.b = 0;
2439    color.a = 1;
2440    return color;
2441}
2442static inline void writeR16G16_SINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<int, 4> color, device uchar *buffer)
2443{
2444    shortToBytes(static_cast<short>(color.r), bufferOffset, buffer);
2445    shortToBytes(static_cast<short>(color.g), bufferOffset + 2, buffer);
2446}
2447
2448
2449static inline uint4 readR16G16_UINT(uint bufferOffset, constant uchar *buffer)
2450{
2451    uint4 color;
2452    color.r = bytesToShort<ushort>(buffer, bufferOffset);
2453    color.g = bytesToShort<ushort>(buffer, bufferOffset + 2);
2454    color.b = 0;
2455    color.a = 1;
2456    return color;
2457}
2458static inline void writeR16G16_UINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<uint, 4> color, device uchar *buffer)
2459{
2460    shortToBytes(static_cast<ushort>(color.r), bufferOffset, buffer);
2461    shortToBytes(static_cast<ushort>(color.g), bufferOffset + 2, buffer);
2462}
2463
2464
2465static inline float4 readR16G16B16_FLOAT(uint bufferOffset, constant uchar *buffer)
2466{
2467    float4 color;
2468    color.r = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset));
2469    color.g = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset + 2));
2470    color.b = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset + 4));
2471    color.a = 1.0;
2472    return color;
2473}
2474
2475
2476template <typename ShortType>
2477static inline float4 readR16G16B16_NORM(uint bufferOffset, constant uchar *buffer)
2478{
2479    float4 color;
2480    color.r = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset));
2481    color.g = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset + 2));
2482    color.b = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset + 4));
2483    color.a = 1.0;
2484    return color;
2485}
2486
2487
2488
2489static inline int4 readR16G16B16_SINT(uint bufferOffset, constant uchar *buffer)
2490{
2491    int4 color;
2492    color.r = bytesToShort<short>(buffer, bufferOffset);
2493    color.g = bytesToShort<short>(buffer, bufferOffset + 2);
2494    color.b = bytesToShort<short>(buffer, bufferOffset + 4);
2495    color.a = 1;
2496    return color;
2497}
2498
2499
2500static inline uint4 readR16G16B16_UINT(uint bufferOffset, constant uchar *buffer)
2501{
2502    uint4 color;
2503    color.r = bytesToShort<ushort>(buffer, bufferOffset);
2504    color.g = bytesToShort<ushort>(buffer, bufferOffset + 2);
2505    color.b = bytesToShort<ushort>(buffer, bufferOffset + 4);
2506    color.a = 1;
2507    return color;
2508}
2509
2510
2511static inline float4 readR16G16B16A16_FLOAT(uint bufferOffset, constant uchar *buffer)
2512{
2513    float4 color;
2514    color.r = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset));
2515    color.g = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset + 2));
2516    color.b = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset + 4));
2517    color.a = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset + 6));
2518    return color;
2519}
2520static inline void writeR16G16B16A16_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2521{
2522    shortToBytes(as_type<ushort>(static_cast<half>(color.r)), bufferOffset, buffer);
2523    shortToBytes(as_type<ushort>(static_cast<half>(color.g)), bufferOffset + 2, buffer);
2524    shortToBytes(as_type<ushort>(static_cast<half>(color.b)), bufferOffset + 4, buffer);
2525    shortToBytes(as_type<ushort>(static_cast<half>(color.a)), bufferOffset + 6, buffer);
2526}
2527
2528
2529template <typename ShortType>
2530static inline float4 readR16G16B16A16_NORM(uint bufferOffset, constant uchar *buffer)
2531{
2532    float4 color;
2533    color.r = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset));
2534    color.g = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset + 2));
2535    color.b = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset + 4));
2536    color.a = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset + 6));
2537    return color;
2538}
2539
2540
2541
2542
2543static inline int4 readR16G16B16A16_SINT(uint bufferOffset, constant uchar *buffer)
2544{
2545    int4 color;
2546    color.r = bytesToShort<short>(buffer, bufferOffset);
2547    color.g = bytesToShort<short>(buffer, bufferOffset + 2);
2548    color.b = bytesToShort<short>(buffer, bufferOffset + 4);
2549    color.a = bytesToShort<short>(buffer, bufferOffset + 6);
2550    return color;
2551}
2552static inline void writeR16G16B16A16_SINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<int, 4> color, device uchar *buffer)
2553{
2554    shortToBytes(static_cast<short>(color.r), bufferOffset, buffer);
2555    shortToBytes(static_cast<short>(color.g), bufferOffset + 2, buffer);
2556    shortToBytes(static_cast<short>(color.b), bufferOffset + 4, buffer);
2557    shortToBytes(static_cast<short>(color.a), bufferOffset + 6, buffer);
2558}
2559
2560
2561static inline uint4 readR16G16B16A16_UINT(uint bufferOffset, constant uchar *buffer)
2562{
2563    uint4 color;
2564    color.r = bytesToShort<ushort>(buffer, bufferOffset);
2565    color.g = bytesToShort<ushort>(buffer, bufferOffset + 2);
2566    color.b = bytesToShort<ushort>(buffer, bufferOffset + 4);
2567    color.a = bytesToShort<ushort>(buffer, bufferOffset + 6);
2568    return color;
2569}
2570static inline void writeR16G16B16A16_UINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<uint, 4> color, device uchar *buffer)
2571{
2572    shortToBytes(static_cast<ushort>(color.r), bufferOffset, buffer);
2573    shortToBytes(static_cast<ushort>(color.g), bufferOffset + 2, buffer);
2574    shortToBytes(static_cast<ushort>(color.b), bufferOffset + 4, buffer);
2575    shortToBytes(static_cast<ushort>(color.a), bufferOffset + 6, buffer);
2576}
2577
2578
2579static inline float4 readR32_FLOAT(uint bufferOffset, constant uchar *buffer)
2580{
2581    float4 color;
2582    color.r = as_type<float>(bytesToInt<uint>(buffer, bufferOffset));
2583    color.g = color.b = 0.0;
2584    color.a = 1.0;
2585    return color;
2586}
2587static inline void writeR32_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2588{
2589    intToBytes(as_type<uint>(color.r), bufferOffset, buffer);
2590}
2591
2592
2593template <typename IntType>
2594static inline float4 readR32_NORM(uint bufferOffset, constant uchar *buffer)
2595{
2596    float4 color;
2597    color.r = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset));
2598    color.g = color.b = 0.0;
2599    color.a = 1.0;
2600    return color;
2601}
2602
2603
2604
2605
2606static inline float4 readA32_FLOAT(uint bufferOffset, constant uchar *buffer)
2607{
2608    float4 color;
2609    color.a = as_type<float>(bytesToInt<uint>(buffer, bufferOffset));
2610    color.rgb = 0.0;
2611    return color;
2612}
2613static inline void writeA32_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2614{
2615    intToBytes(as_type<uint>(color.a), bufferOffset, buffer);
2616}
2617
2618
2619static inline float4 readL32_FLOAT(uint bufferOffset, constant uchar *buffer)
2620{
2621    float4 color;
2622    color.rgb = as_type<float>(bytesToInt<uint>(buffer, bufferOffset));
2623    color.a = 1.0;
2624    return color;
2625}
2626static inline void writeL32_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2627{
2628    intToBytes(as_type<uint>(color.r), bufferOffset, buffer);
2629}
2630
2631
2632static inline int4 readR32_SINT(uint bufferOffset, constant uchar *buffer)
2633{
2634    int4 color;
2635    color.r = bytesToInt<int>(buffer, bufferOffset);
2636    color.g = color.b = 0;
2637    color.a = 1;
2638    return color;
2639}
2640static inline void writeR32_SINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<int, 4> color, device uchar *buffer)
2641{
2642    intToBytes(color.r, bufferOffset, buffer);
2643}
2644
2645
2646static inline float4 readR32_FIXED(uint bufferOffset, constant uchar *buffer)
2647{
2648    float4 color;
2649    constexpr float kDivisor = 1.0f / (1 << 16);
2650    color.r = bytesToInt<int>(buffer, bufferOffset) * kDivisor;
2651    color.g = color.b = 0.0;
2652    color.a = 1.0;
2653    return color;
2654}
2655
2656
2657static inline uint4 readR32_UINT(uint bufferOffset, constant uchar *buffer)
2658{
2659    uint4 color;
2660    color.r = bytesToInt<uint>(buffer, bufferOffset);
2661    color.g = color.b = 0;
2662    color.a = 1;
2663    return color;
2664}
2665static inline void writeR32_UINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<uint, 4> color, device uchar *buffer)
2666{
2667    intToBytes(color.r, bufferOffset, buffer);
2668}
2669
2670
2671static inline float4 readL32A32_FLOAT(uint bufferOffset, constant uchar *buffer)
2672{
2673    float4 color;
2674    color.rgb = as_type<float>(bytesToInt<uint>(buffer, bufferOffset));
2675    color.a = as_type<float>(bytesToInt<uint>(buffer, bufferOffset + 4));
2676    return color;
2677}
2678static inline void writeL32A32_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2679{
2680    intToBytes(as_type<uint>(color.r), bufferOffset, buffer);
2681    intToBytes(as_type<uint>(color.a), bufferOffset + 4, buffer);
2682}
2683
2684
2685static inline float4 readR32G32_FLOAT(uint bufferOffset, constant uchar *buffer)
2686{
2687    float4 color;
2688    color.r = as_type<float>(bytesToInt<uint>(buffer, bufferOffset));
2689    color.g = as_type<float>(bytesToInt<uint>(buffer, bufferOffset + 4));
2690    color.b = 0.0;
2691    color.a = 1.0;
2692    return color;
2693}
2694static inline void writeR32G32_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2695{
2696    intToBytes(as_type<uint>(color.r), bufferOffset, buffer);
2697    intToBytes(as_type<uint>(color.g), bufferOffset + 4, buffer);
2698}
2699
2700
2701template <typename IntType>
2702static inline float4 readR32G32_NORM(uint bufferOffset, constant uchar *buffer)
2703{
2704    float4 color;
2705    color.r = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset));
2706    color.g = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset + 4));
2707    color.b = 0.0;
2708    color.a = 1.0;
2709    return color;
2710}
2711
2712
2713
2714
2715static inline int4 readR32G32_SINT(uint bufferOffset, constant uchar *buffer)
2716{
2717    int4 color;
2718    color.r = bytesToInt<int>(buffer, bufferOffset);
2719    color.g = bytesToInt<int>(buffer, bufferOffset + 4);
2720    color.b = 0;
2721    color.a = 1;
2722    return color;
2723}
2724static inline void writeR32G32_SINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<int, 4> color, device uchar *buffer)
2725{
2726    intToBytes(color.r, bufferOffset, buffer);
2727    intToBytes(color.g, bufferOffset + 4, buffer);
2728}
2729
2730
2731static inline float4 readR32G32_FIXED(uint bufferOffset, constant uchar *buffer)
2732{
2733    float4 color;
2734    constexpr float kDivisor = 1.0f / (1 << 16);
2735    color.r = bytesToInt<int>(buffer, bufferOffset) * kDivisor;
2736    color.g = bytesToInt<int>(buffer, bufferOffset + 4) * kDivisor;
2737    color.b = 0.0;
2738    color.a = 1.0;
2739    return color;
2740}
2741
2742
2743static inline uint4 readR32G32_UINT(uint bufferOffset, constant uchar *buffer)
2744{
2745    uint4 color;
2746    color.r = bytesToInt<uint>(buffer, bufferOffset);
2747    color.g = bytesToInt<uint>(buffer, bufferOffset + 4);
2748    color.b = 0;
2749    color.a = 1;
2750    return color;
2751}
2752static inline void writeR32G32_UINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<uint, 4> color, device uchar *buffer)
2753{
2754    intToBytes(color.r, bufferOffset, buffer);
2755    intToBytes(color.g, bufferOffset + 4, buffer);
2756}
2757
2758
2759static inline float4 readR32G32B32_FLOAT(uint bufferOffset, constant uchar *buffer)
2760{
2761    float4 color;
2762    color.r = as_type<float>(bytesToInt<uint>(buffer, bufferOffset));
2763    color.g = as_type<float>(bytesToInt<uint>(buffer, bufferOffset + 4));
2764    color.b = as_type<float>(bytesToInt<uint>(buffer, bufferOffset + 8));
2765    color.a = 1.0;
2766    return color;
2767}
2768
2769
2770template <typename IntType>
2771static inline float4 readR32G32B32_NORM(uint bufferOffset, constant uchar *buffer)
2772{
2773    float4 color;
2774    color.r = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset));
2775    color.g = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset + 4));
2776    color.b = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset + 8));
2777    color.a = 1.0;
2778    return color;
2779}
2780
2781
2782
2783
2784static inline int4 readR32G32B32_SINT(uint bufferOffset, constant uchar *buffer)
2785{
2786    int4 color;
2787    color.r = bytesToInt<int>(buffer, bufferOffset);
2788    color.g = bytesToInt<int>(buffer, bufferOffset + 4);
2789    color.b = bytesToInt<int>(buffer, bufferOffset + 8);
2790    color.a = 1;
2791    return color;
2792}
2793
2794
2795static inline float4 readR32G32B32_FIXED(uint bufferOffset, constant uchar *buffer)
2796{
2797    float4 color;
2798    constexpr float kDivisor = 1.0f / (1 << 16);
2799    color.r = bytesToInt<int>(buffer, bufferOffset) * kDivisor;
2800    color.g = bytesToInt<int>(buffer, bufferOffset + 4) * kDivisor;
2801    color.b = bytesToInt<int>(buffer, bufferOffset + 8) * kDivisor;
2802    color.a = 1.0;
2803    return color;
2804}
2805
2806
2807static inline uint4 readR32G32B32_UINT(uint bufferOffset, constant uchar *buffer)
2808{
2809    uint4 color;
2810    color.r = bytesToInt<uint>(buffer, bufferOffset);
2811    color.g = bytesToInt<uint>(buffer, bufferOffset + 4);
2812    color.b = bytesToInt<uint>(buffer, bufferOffset + 8);
2813    color.a = 1;
2814    return color;
2815}
2816
2817
2818static inline float4 readR32G32B32A32_FLOAT(uint bufferOffset, constant uchar *buffer)
2819{
2820    float4 color;
2821    color.r = as_type<float>(bytesToInt<uint>(buffer, bufferOffset));
2822    color.g = as_type<float>(bytesToInt<uint>(buffer, bufferOffset + 4));
2823    color.b = as_type<float>(bytesToInt<uint>(buffer, bufferOffset + 8));
2824    color.a = as_type<float>(bytesToInt<uint>(buffer, bufferOffset + 12));
2825    return color;
2826}
2827static inline void writeR32G32B32A32_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2828{
2829    intToBytes(as_type<uint>(color.r), bufferOffset, buffer);
2830    intToBytes(as_type<uint>(color.g), bufferOffset + 4, buffer);
2831    intToBytes(as_type<uint>(color.b), bufferOffset + 8, buffer);
2832    intToBytes(as_type<uint>(color.a), bufferOffset + 12, buffer);
2833}
2834
2835
2836template <typename IntType>
2837static inline float4 readR32G32B32A32_NORM(uint bufferOffset, constant uchar *buffer)
2838{
2839    float4 color;
2840    color.r = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset));
2841    color.g = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset + 4));
2842    color.b = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset + 8));
2843    color.a = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset + 12));
2844    return color;
2845}
2846
2847
2848
2849
2850static inline int4 readR32G32B32A32_SINT(uint bufferOffset, constant uchar *buffer)
2851{
2852    int4 color;
2853    color.r = bytesToInt<int>(buffer, bufferOffset);
2854    color.g = bytesToInt<int>(buffer, bufferOffset + 4);
2855    color.b = bytesToInt<int>(buffer, bufferOffset + 8);
2856    color.a = bytesToInt<int>(buffer, bufferOffset + 12);
2857    return color;
2858}
2859static inline void writeR32G32B32A32_SINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<int, 4> color, device uchar *buffer)
2860{
2861    intToBytes(color.r, bufferOffset, buffer);
2862    intToBytes(color.g, bufferOffset + 4, buffer);
2863    intToBytes(color.b, bufferOffset + 8, buffer);
2864    intToBytes(color.a, bufferOffset + 12, buffer);
2865}
2866
2867static inline float4 readR32G32B32A32_FIXED(uint bufferOffset, constant uchar *buffer)
2868{
2869    float4 color;
2870    constexpr float kDivisor = 1.0f / (1 << 16);
2871    color.r = bytesToInt<int>(buffer, bufferOffset) * kDivisor;
2872    color.g = bytesToInt<int>(buffer, bufferOffset + 4) * kDivisor;
2873    color.b = bytesToInt<int>(buffer, bufferOffset + 8) * kDivisor;
2874    color.a = bytesToInt<int>(buffer, bufferOffset + 12) * kDivisor;
2875    return color;
2876}
2877
2878
2879static inline uint4 readR32G32B32A32_UINT(uint bufferOffset, constant uchar *buffer)
2880{
2881    uint4 color;
2882    color.r = bytesToInt<uint>(buffer, bufferOffset);
2883    color.g = bytesToInt<uint>(buffer, bufferOffset + 4);
2884    color.b = bytesToInt<uint>(buffer, bufferOffset + 8);
2885    color.a = bytesToInt<uint>(buffer, bufferOffset + 12);
2886    return color;
2887}
2888static inline void writeR32G32B32A32_UINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<uint, 4> color, device uchar *buffer)
2889{
2890    intToBytes(color.r, bufferOffset, buffer);
2891    intToBytes(color.g, bufferOffset + 4, buffer);
2892    intToBytes(color.b, bufferOffset + 8, buffer);
2893    intToBytes(color.a, bufferOffset + 12, buffer);
2894}
2895# 1292 "./copy_buffer.metal"
2896static inline int4 readR8_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR8_SINT(bufferOffset, buffer); } static inline uint4 readR8_USCALED(uint bufferOffset, constant uchar *buffer) { return readR8_UINT(bufferOffset, buffer); } static inline int4 readR8G8_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR8G8_SINT(bufferOffset, buffer); } static inline uint4 readR8G8_USCALED(uint bufferOffset, constant uchar *buffer) { return readR8G8_UINT(bufferOffset, buffer); } static inline int4 readR8G8B8_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR8G8B8_SINT(bufferOffset, buffer); } static inline uint4 readR8G8B8_USCALED(uint bufferOffset, constant uchar *buffer) { return readR8G8B8_UINT(bufferOffset, buffer); } static inline int4 readR8G8B8A8_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR8G8B8A8_SINT(bufferOffset, buffer); } static inline uint4 readR8G8B8A8_USCALED(uint bufferOffset, constant uchar *buffer) { return readR8G8B8A8_UINT(bufferOffset, buffer); }
2897static inline int4 readR16_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR16_SINT(bufferOffset, buffer); } static inline uint4 readR16_USCALED(uint bufferOffset, constant uchar *buffer) { return readR16_UINT(bufferOffset, buffer); } static inline int4 readR16G16_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR16G16_SINT(bufferOffset, buffer); } static inline uint4 readR16G16_USCALED(uint bufferOffset, constant uchar *buffer) { return readR16G16_UINT(bufferOffset, buffer); } static inline int4 readR16G16B16_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR16G16B16_SINT(bufferOffset, buffer); } static inline uint4 readR16G16B16_USCALED(uint bufferOffset, constant uchar *buffer) { return readR16G16B16_UINT(bufferOffset, buffer); } static inline int4 readR16G16B16A16_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR16G16B16A16_SINT(bufferOffset, buffer); } static inline uint4 readR16G16B16A16_USCALED(uint bufferOffset, constant uchar *buffer) { return readR16G16B16A16_UINT(bufferOffset, buffer); }
2898static inline int4 readR32_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR32_SINT(bufferOffset, buffer); } static inline uint4 readR32_USCALED(uint bufferOffset, constant uchar *buffer) { return readR32_UINT(bufferOffset, buffer); } static inline int4 readR32G32_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR32G32_SINT(bufferOffset, buffer); } static inline uint4 readR32G32_USCALED(uint bufferOffset, constant uchar *buffer) { return readR32G32_UINT(bufferOffset, buffer); } static inline int4 readR32G32B32_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR32G32B32_SINT(bufferOffset, buffer); } static inline uint4 readR32G32B32_USCALED(uint bufferOffset, constant uchar *buffer) { return readR32G32B32_UINT(bufferOffset, buffer); } static inline int4 readR32G32B32A32_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR32G32B32A32_SINT(bufferOffset, buffer); } static inline uint4 readR32G32B32A32_USCALED(uint bufferOffset, constant uchar *buffer) { return readR32G32B32A32_UINT(bufferOffset, buffer); }
2899
2900static inline int4 readR10G10B10A2_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR10G10B10A2_SINT(bufferOffset, buffer); } static inline uint4 readR10G10B10A2_USCALED(uint bufferOffset, constant uchar *buffer) { return readR10G10B10A2_UINT(bufferOffset, buffer); }
2901
2902
2903kernel void readFromBufferToFloatTexture(ushort3 gIndices [[thread_position_in_grid]], constant CopyPixelParams &options[[buffer(0)]], constant uchar *buffer [[buffer(1)]], texture2d<float, access::write> dstTexture2d [[texture(0), function_constant(kCopyTextureType2D)]], texture2d_array<float, access::write> dstTexture2dArray [[texture(0), function_constant(kCopyTextureType2DArray)]], texture3d<float, access::write> dstTexture3d [[texture(0), function_constant(kCopyTextureType3D)]], texturecube<float, access::write> dstTextureCube [[texture(0), function_constant(kCopyTextureTypeCube)]])
2904{
2905    if (gIndices.x >= options.copySize.x || gIndices.y >= options.copySize.y || gIndices.z >= options.copySize.z) { return; }
2906# 1336 "./copy_buffer.metal"
2907    uint bufferOffset = options.bufferStartOffset + (gIndices.z * options.bufferDepthPitch + gIndices.y * options.bufferRowPitch + gIndices.x * options.pixelSize);
2908
2909    switch (kCopyFormatType)
2910    {
2911        case FormatID::R5G6B5_UNORM: { auto color = readR5G6B5_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8A8_UNORM: { auto color = readR8G8B8A8_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8A8_UNORM_SRGB: { auto color = readR8G8B8A8_UNORM_SRGB(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8A8_SNORM: { auto color = readR8G8B8A8_SNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::B8G8R8A8_UNORM: { auto color = readB8G8R8A8_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::B8G8R8A8_UNORM_SRGB: { auto color = readB8G8R8A8_UNORM_SRGB(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8_UNORM: { auto color = readR8G8B8_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8_UNORM_SRGB: { auto color = readR8G8B8_UNORM_SRGB(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8_SNORM: { auto color = readR8G8B8_SNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::L8_UNORM: { auto color = readL8_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::L8A8_UNORM: { auto color = readL8A8_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R5G5B5A1_UNORM: { auto color = readR5G5B5A1_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R4G4B4A4_UNORM: { auto color = readR4G4B4A4_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8_UNORM: { auto color = readR8_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8_SNORM: { auto color = readR8_SNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8_UNORM: { auto color = readR8G8_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8_SNORM: { auto color = readR8G8_SNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16_FLOAT: { auto color = readR16_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::A16_FLOAT: { auto color = readA16_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::L16_FLOAT: { auto color = readL16_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::L16A16_FLOAT: { auto color = readL16A16_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16_FLOAT: { auto color = readR16G16_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16B16_FLOAT: { auto color = readR16G16B16_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16B16A16_FLOAT: { auto color = readR16G16B16A16_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32_FLOAT: { auto color = readR32_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::A32_FLOAT: { auto color = readA32_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::L32_FLOAT: { auto color = readL32_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::L32A32_FLOAT: { auto color = readL32A32_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32G32_FLOAT: { auto color = readR32G32_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32G32B32_FLOAT: { auto color = readR32G32B32_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32G32B32A32_FLOAT: { auto color = readR32G32B32A32_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break;
2912    }
2913
2914
2915}
2916
2917kernel void readFromBufferToIntTexture(ushort3 gIndices [[thread_position_in_grid]], constant CopyPixelParams &options[[buffer(0)]], constant uchar *buffer [[buffer(1)]], texture2d<int, access::write> dstTexture2d [[texture(0), function_constant(kCopyTextureType2D)]], texture2d_array<int, access::write> dstTexture2dArray [[texture(0), function_constant(kCopyTextureType2DArray)]], texture3d<int, access::write> dstTexture3d [[texture(0), function_constant(kCopyTextureType3D)]], texturecube<int, access::write> dstTextureCube [[texture(0), function_constant(kCopyTextureTypeCube)]])
2918{
2919    if (gIndices.x >= options.copySize.x || gIndices.y >= options.copySize.y || gIndices.z >= options.copySize.z) { return; }
2920# 1364 "./copy_buffer.metal"
2921    uint bufferOffset = options.bufferStartOffset + (gIndices.z * options.bufferDepthPitch + gIndices.y * options.bufferRowPitch + gIndices.x * options.pixelSize);
2922
2923    switch (kCopyFormatType)
2924    {
2925        case FormatID::R8_SINT: { auto color = readR8_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8_SINT: { auto color = readR8G8_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8_SINT: { auto color = readR8G8B8_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8A8_SINT: { auto color = readR8G8B8A8_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16_SINT: { auto color = readR16_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16_SINT: { auto color = readR16G16_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16B16_SINT: { auto color = readR16G16B16_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16B16A16_SINT: { auto color = readR16G16B16A16_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32_SINT: { auto color = readR32_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32G32_SINT: { auto color = readR32G32_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32G32B32_SINT: { auto color = readR32G32B32_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32G32B32A32_SINT: { auto color = readR32G32B32A32_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break;
2926    }
2927
2928
2929}
2930
2931kernel void readFromBufferToUIntTexture(ushort3 gIndices [[thread_position_in_grid]], constant CopyPixelParams &options[[buffer(0)]], constant uchar *buffer [[buffer(1)]], texture2d<uint, access::write> dstTexture2d [[texture(0), function_constant(kCopyTextureType2D)]], texture2d_array<uint, access::write> dstTexture2dArray [[texture(0), function_constant(kCopyTextureType2DArray)]], texture3d<uint, access::write> dstTexture3d [[texture(0), function_constant(kCopyTextureType3D)]], texturecube<uint, access::write> dstTextureCube [[texture(0), function_constant(kCopyTextureTypeCube)]])
2932{
2933    if (gIndices.x >= options.copySize.x || gIndices.y >= options.copySize.y || gIndices.z >= options.copySize.z) { return; }
2934# 1392 "./copy_buffer.metal"
2935    uint bufferOffset = options.bufferStartOffset + (gIndices.z * options.bufferDepthPitch + gIndices.y * options.bufferRowPitch + gIndices.x * options.pixelSize);
2936
2937    switch (kCopyFormatType)
2938    {
2939        case FormatID::R8_UINT: { auto color = readR8_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8_UINT: { auto color = readR8G8_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8_UINT: { auto color = readR8G8B8_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8A8_UINT: { auto color = readR8G8B8A8_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16_UINT: { auto color = readR16_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16_UINT: { auto color = readR16G16_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16B16_UINT: { auto color = readR16G16B16_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16B16A16_UINT: { auto color = readR16G16B16A16_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32_UINT: { auto color = readR32_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32G32_UINT: { auto color = readR32G32_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32G32B32_UINT: { auto color = readR32G32B32_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32G32B32A32_UINT: { auto color = readR32G32B32A32_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break;
2940    }
2941
2942
2943}
2944
2945
2946kernel void writeFromFloatTextureToBuffer(ushort2 gIndices [[thread_position_in_grid]], constant WritePixelParams &options[[buffer(0)]], texture2d<float, access::read> srcTexture2d [[texture(0), function_constant(kCopyTextureType2D)]], texture2d_array<float, access::read> srcTexture2dArray [[texture(0), function_constant(kCopyTextureType2DArray)]], texture3d<float, access::read> srcTexture3d [[texture(0), function_constant(kCopyTextureType3D)]], texturecube<float, access::read> srcTextureCube [[texture(0), function_constant(kCopyTextureTypeCube)]], texture2d_ms<float, access::read> srcTexture2dMS [[texture(0), function_constant(kCopyTextureType2DMS)]], device uchar *buffer [[buffer(1)]])
2947{
2948    if (gIndices.x >= options.copySize.x || gIndices.y >= options.copySize.y) { return; }
2949# 1439 "./copy_buffer.metal"
2950    uint bufferOffset = options.bufferStartOffset + (gIndices.y * options.bufferRowPitch + gIndices.x * options.pixelSize);
2951
2952    switch (kCopyFormatType)
2953    {
2954        case FormatID::R5G6B5_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR5G6B5_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8B8A8_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8B8A8_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8B8A8_UNORM_SRGB: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8B8A8_UNORM_SRGB(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8B8A8_SNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8B8A8_SNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::B8G8R8A8_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeB8G8R8A8_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::B8G8R8A8_UNORM_SRGB: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeB8G8R8A8_UNORM_SRGB(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8B8_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8B8_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8B8_UNORM_SRGB: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8B8_UNORM_SRGB(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8B8_SNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8B8_SNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::L8_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeL8_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::A8_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeA8_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::L8A8_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeL8A8_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R5G5B5A1_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR5G5B5A1_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R4G4B4A4_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR4G4B4A4_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8_SNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8_SNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8_SNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8_SNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::A16_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeA16_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::L16_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeL16_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::L16A16_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeL16A16_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16G16_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16G16_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16G16B16A16_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16G16B16A16_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R32_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR32_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::A32_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeA32_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::L32_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeL32_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::L32A32_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeL32A32_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R32G32_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR32G32_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R32G32B32A32_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR32G32B32A32_FLOAT(gIndices, options, bufferOffset, color, buffer); } break;
2955    }
2956
2957
2958}
2959
2960kernel void writeFromIntTextureToBuffer(ushort2 gIndices [[thread_position_in_grid]], constant WritePixelParams &options[[buffer(0)]], texture2d<int, access::read> srcTexture2d [[texture(0), function_constant(kCopyTextureType2D)]], texture2d_array<int, access::read> srcTexture2dArray [[texture(0), function_constant(kCopyTextureType2DArray)]], texture3d<int, access::read> srcTexture3d [[texture(0), function_constant(kCopyTextureType3D)]], texturecube<int, access::read> srcTextureCube [[texture(0), function_constant(kCopyTextureTypeCube)]], texture2d_ms<int, access::read> srcTexture2dMS [[texture(0), function_constant(kCopyTextureType2DMS)]], device uchar *buffer [[buffer(1)]])
2961{
2962    if (gIndices.x >= options.copySize.x || gIndices.y >= options.copySize.y) { return; }
2963# 1464 "./copy_buffer.metal"
2964    uint bufferOffset = options.bufferStartOffset + (gIndices.y * options.bufferRowPitch + gIndices.x * options.pixelSize);
2965
2966    switch (kCopyFormatType)
2967    {
2968        case FormatID::R8_SINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8_SINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8_SINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8_SINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8B8A8_SINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8B8A8_SINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16_SINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16_SINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16G16_SINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16G16_SINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16G16B16A16_SINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16G16B16A16_SINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R32_SINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR32_SINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R32G32_SINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR32G32_SINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R32G32B32A32_SINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR32G32B32A32_SINT(gIndices, options, bufferOffset, color, buffer); } break;
2969    }
2970
2971
2972}
2973
2974kernel void writeFromUIntTextureToBuffer(ushort2 gIndices [[thread_position_in_grid]], constant WritePixelParams &options[[buffer(0)]], texture2d<uint, access::read> srcTexture2d [[texture(0), function_constant(kCopyTextureType2D)]], texture2d_array<uint, access::read> srcTexture2dArray [[texture(0), function_constant(kCopyTextureType2DArray)]], texture3d<uint, access::read> srcTexture3d [[texture(0), function_constant(kCopyTextureType3D)]], texturecube<uint, access::read> srcTextureCube [[texture(0), function_constant(kCopyTextureTypeCube)]], texture2d_ms<uint, access::read> srcTexture2dMS [[texture(0), function_constant(kCopyTextureType2DMS)]], device uchar *buffer [[buffer(1)]])
2975{
2976    if (gIndices.x >= options.copySize.x || gIndices.y >= options.copySize.y) { return; }
2977# 1489 "./copy_buffer.metal"
2978    uint bufferOffset = options.bufferStartOffset + (gIndices.y * options.bufferRowPitch + gIndices.x * options.pixelSize);
2979
2980    switch (kCopyFormatType)
2981    {
2982        case FormatID::R8_UINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8_UINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8_UINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8_UINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8B8A8_UINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8B8A8_UINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16_UINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16_UINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16G16_UINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16G16_UINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16G16B16A16_UINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16G16B16A16_UINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R32_UINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR32_UINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R32G32_UINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR32G32_UINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R32G32B32A32_UINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR32G32B32A32_UINT(gIndices, options, bufferOffset, color, buffer); } break;
2983    }
2984
2985
2986}
2987
2988
2989struct CopyVertexParams
2990{
2991    uint srcBufferStartOffset;
2992    uint srcStride;
2993    uint srcComponentBytes;
2994    uint srcComponents;
2995
2996
2997
2998    uchar4 srcDefaultAlphaData;
2999
3000    uint dstBufferStartOffset;
3001    uint dstStride;
3002    uint dstComponents;
3003
3004    uint vertexCount;
3005};
3006# 1539 "./copy_buffer.metal"
3007template <typename IntType>
3008static inline void writeFloatVertex(constant CopyVertexParams &options,
3009                                    uint idx,
3010                                    vec<IntType, 4> data,
3011                                    device uchar *dst)
3012{
3013    uint dstOffset = idx * options.dstStride + options.dstBufferStartOffset;
3014
3015    for (uint component = 0; component < options.dstComponents; ++component, dstOffset += 4)
3016    {
3017        floatToBytes(static_cast<float>(data[component]), dstOffset, dst);
3018    }
3019}
3020
3021template <>
3022inline void writeFloatVertex(constant CopyVertexParams &options,
3023                             uint idx,
3024                             vec<float, 4> data,
3025                             device uchar *dst)
3026{
3027    uint dstOffset = idx * options.dstStride + options.dstBufferStartOffset;
3028
3029    for (uint component = 0; component < options.dstComponents; ++component, dstOffset += 4)
3030    {
3031        floatToBytes(data[component], dstOffset, dst);
3032    }
3033}
3034
3035
3036static inline void convertToFloatVertexFormat(uint index,
3037                                              constant CopyVertexParams &options,
3038                                              constant uchar *srcBuffer,
3039                                              device uchar *dstBuffer)
3040{
3041# 1585 "./copy_buffer.metal"
3042    uint bufferOffset = options.srcBufferStartOffset + options.srcStride * index;
3043# 1594 "./copy_buffer.metal"
3044    switch (kCopyFormatType)
3045    {
3046        case FormatID::R8_UNORM: { auto data = readR8_UNORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8_SNORM: { auto data = readR8_SNORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8_UINT: { auto data = readR8_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8_SINT: { auto data = readR8_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8_USCALED: { auto data = readR8_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8_SSCALED: { auto data = readR8_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8_UNORM: { auto data = readR8G8_UNORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8_SNORM: { auto data = readR8G8_SNORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8_UINT: { auto data = readR8G8_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8_SINT: { auto data = readR8G8_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8_USCALED: { auto data = readR8G8_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8_SSCALED: { auto data = readR8G8_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8_UNORM: { auto data = readR8G8B8_UNORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8_SNORM: { auto data = readR8G8B8_SNORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8_UINT: { auto data = readR8G8B8_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8_SINT: { auto data = readR8G8B8_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8_USCALED: { auto data = readR8G8B8_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8_SSCALED: { auto data = readR8G8B8_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8A8_UNORM: { auto data = readR8G8B8A8_UNORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8A8_SNORM: { auto data = readR8G8B8A8_SNORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8A8_UINT: { auto data = readR8G8B8A8_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8A8_SINT: { auto data = readR8G8B8A8_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8A8_USCALED: { auto data = readR8G8B8A8_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8A8_SSCALED: { auto data = readR8G8B8A8_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16_UNORM: { auto data = readR16_NORM<ushort>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16_SNORM: { auto data = readR16_NORM<short>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16_UINT: { auto data = readR16_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16_SINT: { auto data = readR16_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16_USCALED: { auto data = readR16_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16_SSCALED: { auto data = readR16_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16_UNORM: { auto data = readR16G16_NORM<ushort>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16_SNORM: { auto data = readR16G16_NORM<short>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16_UINT: { auto data = readR16G16_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16_SINT: { auto data = readR16G16_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16_USCALED: { auto data = readR16G16_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16_SSCALED: { auto data = readR16G16_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16_UNORM: { auto data = readR16G16B16_NORM<ushort>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16_SNORM: { auto data = readR16G16B16_NORM<short>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16_UINT: { auto data = readR16G16B16_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16_SINT: { auto data = readR16G16B16_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16_USCALED: { auto data = readR16G16B16_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16_SSCALED: { auto data = readR16G16B16_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16A16_UNORM: { auto data = readR16G16B16A16_NORM<ushort>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16A16_SNORM: { auto data = readR16G16B16A16_NORM<short>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16A16_UINT: { auto data = readR16G16B16A16_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16A16_SINT: { auto data = readR16G16B16A16_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16A16_USCALED: { auto data = readR16G16B16A16_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16A16_SSCALED: { auto data = readR16G16B16A16_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32_UNORM: { auto data = readR32_NORM<uint>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32_SNORM: { auto data = readR32_NORM<int>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32_UINT: { auto data = readR32_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32_SINT: { auto data = readR32_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32_USCALED: { auto data = readR32_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32_SSCALED: { auto data = readR32_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32_UNORM: { auto data = readR32G32_NORM<uint>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32_SNORM: { auto data = readR32G32_NORM<int>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32_UINT: { auto data = readR32G32_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32_SINT: { auto data = readR32G32_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32_USCALED: { auto data = readR32G32_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32_SSCALED: { auto data = readR32G32_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32_UNORM: { auto data = readR32G32B32_NORM<uint>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32_SNORM: { auto data = readR32G32B32_NORM<int>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32_UINT: { auto data = readR32G32B32_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32_SINT: { auto data = readR32G32B32_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32_USCALED: { auto data = readR32G32B32_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32_SSCALED: { auto data = readR32G32B32_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32A32_UNORM: { auto data = readR32G32B32A32_NORM<uint>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32A32_SNORM: { auto data = readR32G32B32A32_NORM<int>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32A32_UINT: { auto data = readR32G32B32A32_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32A32_SINT: { auto data = readR32G32B32A32_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32A32_USCALED: { auto data = readR32G32B32A32_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32A32_SSCALED: { auto data = readR32G32B32A32_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16_FLOAT: { auto data = readR16_FLOAT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16_FLOAT: { auto data = readR16G16_FLOAT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16_FLOAT: { auto data = readR16G16B16_FLOAT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16A16_FLOAT: { auto data = readR16G16B16A16_FLOAT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32_FLOAT: { auto data = readR32_FLOAT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32_FLOAT: { auto data = readR32G32_FLOAT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32_FLOAT: { auto data = readR32G32B32_FLOAT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32A32_FLOAT: { auto data = readR32G32B32A32_FLOAT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32_FIXED: { auto data = readR32_FIXED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32_FIXED: { auto data = readR32G32_FIXED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32_FIXED: { auto data = readR32G32B32_FIXED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32A32_FIXED: { auto data = readR32G32B32A32_FIXED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R10G10B10A2_SINT: { auto data = readR10G10B10A2_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R10G10B10A2_UINT: { auto data = readR10G10B10A2_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R10G10B10A2_SSCALED: { auto data = readR10G10B10A2_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R10G10B10A2_USCALED: { auto data = readR10G10B10A2_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break;
3047    }
3048
3049
3050}
3051
3052
3053kernel void convertToFloatVertexFormatCS(uint index [[thread_position_in_grid]],
3054                                         constant CopyVertexParams &options [[buffer(0)]],
3055                                         constant uchar *srcBuffer [[buffer(1)]],
3056                                         device uchar *dstBuffer [[buffer(2)]])
3057{
3058    if (index >= options.vertexCount) { return; };
3059    convertToFloatVertexFormat(index, options, srcBuffer, dstBuffer);
3060}
3061
3062
3063vertex void convertToFloatVertexFormatVS(uint index [[vertex_id]],
3064                                         constant CopyVertexParams &options [[buffer(0)]],
3065                                         constant uchar *srcBuffer [[buffer(1)]],
3066                                         device uchar *dstBuffer [[buffer(2)]])
3067{
3068    convertToFloatVertexFormat(index, options, srcBuffer, dstBuffer);
3069}
3070
3071
3072static inline void expandVertexFormatComponents(uint index,
3073                                                constant CopyVertexParams &options,
3074                                                constant uchar *srcBuffer,
3075                                                device uchar *dstBuffer)
3076{
3077    uint srcOffset = options.srcBufferStartOffset + options.srcStride * index;
3078    uint dstOffset = options.dstBufferStartOffset + options.dstStride * index;
3079
3080    uint dstComponentsBeforeAlpha = min(options.dstComponents, 3u);
3081    uint component;
3082    for (component = 0; component < options.srcComponents; ++component,
3083        srcOffset += options.srcComponentBytes, dstOffset += options.srcComponentBytes)
3084    {
3085        for (uint byte = 0; byte < options.srcComponentBytes; ++byte)
3086        {
3087            dstBuffer[dstOffset + byte] = srcBuffer[srcOffset + byte];
3088        }
3089    }
3090
3091    for (; component < dstComponentsBeforeAlpha;
3092         ++component, dstOffset += options.srcComponentBytes)
3093    {
3094        for (uint byte = 0; byte < options.srcComponentBytes; ++byte)
3095        {
3096            dstBuffer[dstOffset + byte] = 0;
3097        }
3098    }
3099
3100    if (component < options.dstComponents)
3101    {
3102
3103        for (uint byte = 0; byte < options.srcComponentBytes; ++byte)
3104        {
3105            dstBuffer[dstOffset + byte] = options.srcDefaultAlphaData[byte];
3106        }
3107    }
3108}
3109
3110
3111kernel void expandVertexFormatComponentsCS(uint index [[thread_position_in_grid]],
3112                                           constant CopyVertexParams &options [[buffer(0)]],
3113                                           constant uchar *srcBuffer [[buffer(1)]],
3114                                           device uchar *dstBuffer [[buffer(2)]])
3115{
3116    if (index >= options.vertexCount) { return; };
3117
3118    expandVertexFormatComponents(index, options, srcBuffer, dstBuffer);
3119}
3120
3121
3122vertex void expandVertexFormatComponentsVS(uint index [[vertex_id]],
3123                                           constant CopyVertexParams &options [[buffer(0)]],
3124                                           constant uchar *srcBuffer [[buffer(1)]],
3125                                           device uchar *dstBuffer [[buffer(2)]])
3126{
3127    expandVertexFormatComponents(index, options, srcBuffer, dstBuffer);
3128}
3129
3130
3131kernel void linearizeBlocks(ushort2 position [[thread_position_in_grid]],
3132                            constant uint2 *dimensions [[buffer(0)]],
3133                            constant uint2 *srcBuffer [[buffer(1)]],
3134                            device uint2 *dstBuffer [[buffer(2)]])
3135{
3136    if (any(uint2(position) >= *dimensions))
3137    {
3138        return;
3139    }
3140    uint2 t = uint2(position);
3141    t = (t | (t << 8)) & 0x00FF00FF;
3142    t = (t | (t << 4)) & 0x0F0F0F0F;
3143    t = (t | (t << 2)) & 0x33333333;
3144    t = (t | (t << 1)) & 0x55555555;
3145    dstBuffer[position.y * (*dimensions).x + position.x] = srcBuffer[(t.x << 1) | t.y];
3146}
3147
3148
3149kernel void saturateDepth(uint2 position [[thread_position_in_grid]],
3150                          constant uint3 *dimensions [[buffer(0)]],
3151                          device float *srcBuffer [[buffer(1)]],
3152                          device float *dstBuffer [[buffer(2)]])
3153{
3154    if (any(position >= (*dimensions).xy))
3155    {
3156        return;
3157    }
3158    const uint srcOffset = position.y * (*dimensions).z + position.x;
3159    const uint dstOffset = position.y * (*dimensions).x + position.x;
3160    dstBuffer[dstOffset] = saturate(srcBuffer[srcOffset]);
3161}
3162# 6 "temp_master_source.metal" 2
3163# 1 "./visibility.metal" 1
3164
3165
3166
3167
3168
3169
3170
3171
3172constant bool kCombineWithExistingResult [[function_constant(1000)]];
3173
3174
3175
3176struct CombineVisibilityResultOptions
3177{
3178
3179    uint startOffset;
3180
3181    uint numOffsets;
3182};
3183
3184kernel void combineVisibilityResult(uint idx [[thread_position_in_grid]],
3185                                    constant CombineVisibilityResultOptions &options [[buffer(0)]],
3186                                    constant ushort4 *renderpassVisibilityResult [[buffer(1)]],
3187                                    device ushort4 *finalResults [[buffer(2)]])
3188{
3189    if (idx > 0)
3190    {
3191
3192
3193
3194        return;
3195    }
3196    ushort4 finalResult16x4;
3197
3198    if (kCombineWithExistingResult)
3199    {
3200        finalResult16x4 = finalResults[0];
3201    }
3202    else
3203    {
3204        finalResult16x4 = ushort4(0, 0, 0, 0);
3205    }
3206
3207    for (uint i = 0; i < options.numOffsets; ++i)
3208    {
3209        uint offset = options.startOffset + i;
3210        ushort4 renderpassResult = renderpassVisibilityResult[offset];
3211
3212
3213        finalResult16x4 = finalResult16x4 | renderpassResult;
3214    }
3215    finalResults[0] = finalResult16x4;
3216}
3217# 7 "temp_master_source.metal" 2
3218# 1 "./rewrite_indices.metal" 1
3219# 11 "./rewrite_indices.metal"
3220# 1 "./rewrite_indices_shared.h" 1
3221# 12 "./rewrite_indices.metal" 2
3222using namespace metal;
3223
3224constant uint fixIndexBufferKey [[ function_constant(2000) ]];
3225constant bool indexBufferIsUint16 = (((fixIndexBufferKey >> 0U) & 0x03U) == 2U);
3226constant bool indexBufferIsUint32 = (((fixIndexBufferKey >> 0U) & 0x03U) == 3U);
3227constant bool outIndexBufferIsUint16 = (((fixIndexBufferKey >> 2U) & 0x03U) == 2U);
3228constant bool outIndexBufferIsUint32 = (((fixIndexBufferKey >> 2U) & 0x03U) == 3U);
3229constant bool doPrimRestart = (fixIndexBufferKey & 0x00100U);
3230constant uint fixIndexBufferMode = (fixIndexBufferKey >> 4U) & 0x0FU;
3231
3232
3233static inline uint readIdx(
3234                           const device ushort *indexBufferUint16,
3235                           const device uint *indexBufferUint32,
3236                           const uint restartIndex,
3237                           const uint indexCount,
3238                           uint idx,
3239                           thread bool &foundRestart,
3240                           thread uint &indexThatRestartedFirst
3241                           )
3242{
3243    uint inIndex = idx;
3244    if(inIndex < indexCount)
3245    {
3246        if(indexBufferIsUint16)
3247        {
3248            inIndex = indexBufferUint16[inIndex];
3249        }
3250        else if(indexBufferIsUint32)
3251        {
3252            inIndex = indexBufferUint32[inIndex];
3253        }
3254    }
3255    else
3256    {
3257        foundRestart = true;
3258        indexThatRestartedFirst = idx;
3259    }
3260    if(doPrimRestart && !foundRestart && inIndex == restartIndex)
3261    {
3262        foundRestart = true;
3263        indexThatRestartedFirst = idx;
3264    }
3265    return inIndex;
3266}
3267
3268static inline void outputPrimitive(
3269                                   const device ushort *indexBufferUint16,
3270                                   const device uint *indexBufferUint32,
3271                                   device ushort *outIndexBufferUint16,
3272                                   device uint *outIndexBufferUint32,
3273                                   const uint restartIndex,
3274                                   const uint indexCount,
3275                                   thread uint &baseIndex,
3276                                   uint onIndex,
3277                                   thread uint &onOutIndex
3278                                   )
3279{
3280    if(baseIndex > onIndex) return;
3281    bool foundRestart = false;
3282    uint indexThatRestartedFirst = 0;
3283# 86 "./rewrite_indices.metal"
3284    switch(fixIndexBufferMode)
3285    {
3286        case 0x00U:
3287        {
3288            auto tmpIndex = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex, foundRestart, indexThatRestartedFirst);
3289            if(foundRestart)
3290            {
3291                baseIndex = indexThatRestartedFirst + 1;
3292                return;
3293            }
3294
3295            ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex; } onOutIndex++; });
3296        }
3297        break;
3298        case 0x01U:
3299        {
3300            auto tmpIndex0 = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex + 0, foundRestart, indexThatRestartedFirst);
3301            auto tmpIndex1 = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex + 1, foundRestart, indexThatRestartedFirst);
3302            if(foundRestart)
3303            {
3304                baseIndex = indexThatRestartedFirst + 1;
3305                return;
3306            }
3307            if((onIndex - baseIndex) & 1) return;
3308
3309            if(fixIndexBufferKey & 0x00200U)
3310            {
3311                ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1; } onOutIndex++; });
3312                ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0; } onOutIndex++; });
3313            }
3314            else
3315            {
3316                ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0; } onOutIndex++; });
3317                ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1; } onOutIndex++; });
3318            }
3319        }
3320        break;
3321        case 0x03U:
3322        {
3323            auto tmpIndex0 = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex + 0, foundRestart, indexThatRestartedFirst);
3324            auto tmpIndex1 = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex + 1, foundRestart, indexThatRestartedFirst);
3325            if(foundRestart)
3326            {
3327                baseIndex = indexThatRestartedFirst + 1;
3328                return;
3329            }
3330
3331            if(fixIndexBufferKey & 0x00200U)
3332            {
3333                ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1; } onOutIndex++; });
3334                ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0; } onOutIndex++; });
3335            }
3336            else
3337            {
3338                ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0; } onOutIndex++; });
3339                ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1; } onOutIndex++; });
3340            }
3341        }
3342        break;
3343        case 0x04U:
3344        {
3345            auto tmpIndex0 = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex + 0, foundRestart, indexThatRestartedFirst);
3346            auto tmpIndex1 = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex + 1, foundRestart, indexThatRestartedFirst);
3347            auto tmpIndex2 = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex + 2, foundRestart, indexThatRestartedFirst);
3348            if(foundRestart)
3349            {
3350                baseIndex = indexThatRestartedFirst + 1;
3351                return;
3352            }
3353            if(((onIndex - baseIndex) % 3) != 0) return;
3354
3355            if(fixIndexBufferKey & 0x00200U)
3356            {
3357                ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2; } onOutIndex++; });
3358                ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0; } onOutIndex++; });
3359                ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1; } onOutIndex++; });
3360            }
3361            else
3362            {
3363                ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0; } onOutIndex++; });
3364                ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1; } onOutIndex++; });
3365                ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2; } onOutIndex++; });
3366            }
3367        }
3368        break;
3369        case 0x05U:
3370        {
3371            uint isOdd = ((onIndex - baseIndex) & 1);
3372            auto tmpIndex0 = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex + 0 + isOdd, foundRestart, indexThatRestartedFirst);
3373            auto tmpIndex1 = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex + 1 - isOdd, foundRestart, indexThatRestartedFirst);
3374            auto tmpIndex2 = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex + 2, foundRestart, indexThatRestartedFirst);
3375            if(foundRestart)
3376            {
3377                baseIndex = indexThatRestartedFirst + 1;
3378                return;
3379            }
3380
3381            if(fixIndexBufferKey & 0x00200U)
3382            {
3383                ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2; } onOutIndex++; });
3384                ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0; } onOutIndex++; });
3385                ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1; } onOutIndex++; });
3386            }
3387            else
3388            {
3389
3390                if(isOdd)
3391                {
3392                    ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1; } onOutIndex++; });
3393                    ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2; } onOutIndex++; });
3394                    ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0; } onOutIndex++; });
3395                }
3396                else
3397                {
3398                    ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0; } onOutIndex++; });
3399                    ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1; } onOutIndex++; });
3400                    ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2; } onOutIndex++; });
3401                }
3402            }
3403
3404            assert(onOutIndex <= (onIndex + 1) * 3);
3405            assert(onOutIndex <= (indexCount - 2) * 3);
3406        }
3407        break;
3408
3409    }
3410
3411
3412}
3413
3414kernel void fixIndexBuffer(
3415                           const device ushort *indexBufferUint16 [[ buffer(0), function_constant(indexBufferIsUint16) ]],
3416                           const device uint *indexBufferUint32 [[ buffer(0), function_constant(indexBufferIsUint32) ]],
3417                           device ushort *outIndexBufferUint16 [[ buffer(1), function_constant(outIndexBufferIsUint16) ]],
3418                           device uint *outIndexBufferUint32 [[ buffer(1), function_constant(outIndexBufferIsUint32) ]],
3419                           constant uint &indexCount [[ buffer(2) ]],
3420                           constant uint &primCount [[ buffer(3) ]],
3421                           uint prim [[thread_position_in_grid]])
3422{
3423    constexpr uint restartIndex = 0xFFFFFFFF;
3424    uint baseIndex = 0;
3425    uint onIndex = onIndex;
3426    uint onOutIndex = onOutIndex;
3427    if(prim < primCount)
3428    {
3429        switch(fixIndexBufferMode)
3430        {
3431            case 0x00U:
3432                onIndex = prim;
3433                onOutIndex = prim;
3434                break;
3435            case 0x01U:
3436                onIndex = prim * 2;
3437                onOutIndex = prim * 2;
3438                break;
3439            case 0x03U:
3440                onIndex = prim;
3441                onOutIndex = prim * 2;
3442                break;
3443            case 0x04U:
3444                onIndex = prim * 3;
3445                onOutIndex = prim * 3;
3446                break;
3447            case 0x05U:
3448                onIndex = prim;
3449                onOutIndex = prim * 3;
3450                break;
3451        }
3452        outputPrimitive(indexBufferUint16, indexBufferUint32, outIndexBufferUint16, outIndexBufferUint32, restartIndex, indexCount, baseIndex, onIndex, onOutIndex);
3453    }
3454}
3455
3456
3457
3458static inline void generatePrimitive(
3459                                   device ushort *outIndexBufferUint16,
3460                                   device uint *outIndexBufferUint32,
3461                                   const uint firstVertex,
3462                                   const uint indexCount,
3463                                   thread uint &baseIndex,
3464                                   uint onIndex,
3465                                   uint primCount,
3466                                   thread uint &onOutIndex
3467                                   )
3468{
3469    if(baseIndex > onIndex) return;
3470# 284 "./rewrite_indices.metal"
3471    switch(fixIndexBufferMode)
3472    {
3473        case 0x00U:
3474        {
3475            ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = onIndex + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = onIndex + firstVertex; } onOutIndex++; });
3476        }
3477        break;
3478        case 0x01U:
3479        {
3480            auto tmpIndex0 = onIndex + 0;
3481            auto tmpIndex1 = onIndex + 1;
3482            if(fixIndexBufferKey & 0x00200U)
3483            {
3484                ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; });
3485                ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; });
3486            }
3487            else
3488            {
3489                ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; });
3490                ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; });
3491            }
3492        }
3493        break;
3494        case 0x02U:
3495        {
3496            auto tmpIndex0 = onIndex + 0;
3497            auto tmpIndex1 = (onIndex + 1) % primCount;
3498            if(fixIndexBufferKey & 0x00200U)
3499            {
3500                ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; });
3501                ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; });
3502            }
3503            else
3504            {
3505                ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; });
3506                ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; });
3507            }
3508        }
3509        break;
3510        case 0x03U:
3511        {
3512            auto tmpIndex0 = onIndex + 0;
3513            auto tmpIndex1 = onIndex + 1;
3514            if(fixIndexBufferKey & 0x00200U)
3515            {
3516                ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; });
3517                ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; });
3518            }
3519            else
3520            {
3521                ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; });
3522                ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; });
3523            }
3524        }
3525        break;
3526        case 0x04U:
3527        {
3528            auto tmpIndex0 = onIndex + 0;
3529            auto tmpIndex1 = onIndex + 1;
3530            auto tmpIndex2 = onIndex + 2;
3531            if(fixIndexBufferKey & 0x00200U)
3532            {
3533                ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2 + firstVertex; } onOutIndex++; });
3534                ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; });
3535                ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; });
3536            }
3537            else
3538            {
3539                ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; });
3540                ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; });
3541                ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2 + firstVertex; } onOutIndex++; });
3542            }
3543        }
3544        break;
3545        case 0x05U:
3546        {
3547            uint isOdd = ((onIndex - baseIndex) & 1);
3548            auto tmpIndex0 = onIndex + 0 + isOdd;
3549            auto tmpIndex1 = onIndex + 1 - isOdd;
3550            auto tmpIndex2 = onIndex + 2;
3551            if(fixIndexBufferKey & 0x00200U)
3552            {
3553                ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2 + firstVertex; } onOutIndex++; });
3554                ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; });
3555                ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; });
3556            }
3557            else
3558            {
3559                if(isOdd)
3560                {
3561                    ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; });
3562                    ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2 + firstVertex; } onOutIndex++; });
3563                    ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; });
3564                }
3565                else
3566                {
3567                    ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; });
3568                    ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; });
3569                    ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2 + firstVertex; } onOutIndex++; });
3570                }
3571            }
3572
3573            assert(onOutIndex <= (onIndex + 1) * 3);
3574            assert(onOutIndex <= (indexCount - 2) * 3);
3575            break;
3576        }
3577        case 0x06U:
3578        {
3579            auto tmpIndex0 = 0;
3580            auto tmpIndex1 = onIndex + 1;
3581            auto tmpIndex2 = onIndex + 2;
3582
3583            if(fixIndexBufferKey & 0x00200U)
3584            {
3585                ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2 + firstVertex; } onOutIndex++; });
3586                ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; });
3587                ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; });
3588            }
3589            else
3590            {
3591                ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; });
3592                ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2 + firstVertex; } onOutIndex++; });
3593                ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; });
3594            }
3595        }
3596        break;
3597
3598    }
3599
3600}
3601
3602
3603
3604kernel void genIndexBuffer(
3605                           device ushort *outIndexBufferUint16 [[ buffer(1), function_constant(outIndexBufferIsUint16) ]],
3606                           device uint *outIndexBufferUint32 [[ buffer(1), function_constant(outIndexBufferIsUint32) ]],
3607                           constant uint &indexCount [[ buffer(2) ]],
3608                           constant uint &primCount [[ buffer(3) ]],
3609                           constant uint &firstVertex [[ buffer(4) ]],
3610                           uint prim [[thread_position_in_grid]])
3611{
3612    uint baseIndex = 0;
3613    uint onIndex = onIndex;
3614    uint onOutIndex = onOutIndex;
3615    if(prim < primCount)
3616    {
3617        switch(fixIndexBufferMode)
3618        {
3619            case 0x00U:
3620                onIndex = prim;
3621                onOutIndex = prim;
3622                break;
3623            case 0x01U:
3624                onIndex = prim * 2;
3625                onOutIndex = prim * 2;
3626                break;
3627            case 0x03U:
3628                onIndex = prim;
3629                onOutIndex = prim * 2;
3630                break;
3631            case 0x02U:
3632                onIndex = prim;
3633                onOutIndex = prim * 2;
3634                break;
3635            case 0x04U:
3636                onIndex = prim * 3;
3637                onOutIndex = prim * 3;
3638                break;
3639            case 0x05U:
3640                onIndex = prim;
3641                onOutIndex = prim * 3;
3642                break;
3643            case 0x06U:
3644                onIndex = prim;
3645                onOutIndex = prim * 3;
3646                break;
3647        }
3648        generatePrimitive(outIndexBufferUint16, outIndexBufferUint32, firstVertex, indexCount, baseIndex, onIndex, primCount, onOutIndex);
3649    }
3650}
3651# 8 "temp_master_source.metal" 2
3652
3653
3654