• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1// GENERATED FILE - DO NOT EDIT.
2// Generated by gen_mtl_internal_shaders.py
3//
4// Copyright 2020 The ANGLE Project Authors. All rights reserved.
5// Use of this source code is governed by a BSD-style license that can be
6// found in the LICENSE file.
7//
8
9// C++ string version of combined Metal default shaders.
10
11
12
13static char gDefaultMetallibSrc[] = R"(
14# 1 "temp_master_source.metal"
15# 1 "<built-in>" 1
16# 1 "<built-in>" 3
17# 433 "<built-in>" 3
18# 1 "<command line>" 1
19# 1 "<built-in>" 2
20# 1 "temp_master_source.metal" 2
21# 1 "./blit.metal" 1
22
23
24
25
26
27
28
29# 1 "./common.h" 1
30# 13 "./common.h"
31# include <simd/simd.h>
32# include <metal_stdlib>
33
34
35# 1 "./constants.h" 1
36# 11 "./constants.h"
37namespace rx
38{
39namespace mtl_shader
40{
41
42enum
43{
44    kTextureType2D = 0,
45    kTextureType2DMultisample = 1,
46    kTextureType2DArray = 2,
47    kTextureTypeCube = 3,
48    kTextureType3D = 4,
49    kTextureTypeCount = 5,
50};
51
52
53
54
55}
56}
57# 18 "./common.h" 2
58
59
60
61
62
63
64
65using namespace metal;
66
67
68
69constant uint32_t kNumColorOutputs [[function_constant(0)]];
70constant bool kColorOutputAvailable0 = kNumColorOutputs > 0;
71constant bool kColorOutputAvailable1 = kNumColorOutputs > 1;
72constant bool kColorOutputAvailable2 = kNumColorOutputs > 2;
73constant bool kColorOutputAvailable3 = kNumColorOutputs > 3;
74
75namespace rx
76{
77namespace mtl_shader
78{
79
80
81constant float2 gCorners[3] = {float2(-1.0f, -1.0f), float2(3.0f, -1.0f), float2(-1.0f, 3.0f)};
82
83template <typename T>
84struct MultipleColorOutputs
85{
86    vec<T, 4> color0 [[color(0), function_constant(kColorOutputAvailable0)]];
87    vec<T, 4> color1 [[color(1), function_constant(kColorOutputAvailable1)]];
88    vec<T, 4> color2 [[color(2), function_constant(kColorOutputAvailable2)]];
89    vec<T, 4> color3 [[color(3), function_constant(kColorOutputAvailable3)]];
90};
91# 61 "./common.h"
92template <typename T>
93static inline MultipleColorOutputs<T> toMultipleColorOutputs(vec<T, 4> color)
94{
95    MultipleColorOutputs<T> re;
96
97    do { if (kColorOutputAvailable0) { re.color0 = color; } } while (0);
98    do { if (kColorOutputAvailable1) { re.color1 = color; } } while (0);
99    do { if (kColorOutputAvailable2) { re.color2 = color; } } while (0);
100    do { if (kColorOutputAvailable3) { re.color3 = color; } } while (0);
101
102    return re;
103}
104
105static inline float3 cubeTexcoords(float2 texcoords, int face)
106{
107    texcoords = 2.0 * texcoords - 1.0;
108    switch (face)
109    {
110        case 0:
111            return float3(1.0, -texcoords.y, -texcoords.x);
112        case 1:
113            return float3(-1.0, -texcoords.y, texcoords.x);
114        case 2:
115            return float3(texcoords.x, 1.0, texcoords.y);
116        case 3:
117            return float3(texcoords.x, -1.0, -texcoords.y);
118        case 4:
119            return float3(texcoords.x, -texcoords.y, 1.0);
120        case 5:
121            return float3(-texcoords.x, -texcoords.y, -1.0);
122    }
123    return float3(texcoords, 0);
124}
125
126template <typename T>
127static inline vec<T, 4> resolveTextureMS(texture2d_ms<T> srcTexture, uint2 coords)
128{
129    uint samples = srcTexture.get_num_samples();
130
131    vec<T, 4> output(0);
132
133    for (uint sample = 0; sample < samples; ++sample)
134    {
135        output += srcTexture.read(coords, sample);
136    }
137
138    output = output / samples;
139
140    return output;
141}
142
143static inline float4 sRGBtoLinear(float4 color)
144{
145    float3 linear1 = color.rgb / 12.92;
146    float3 linear2 = pow((color.rgb + float3(0.055)) / 1.055, 2.4);
147    float3 factor = float3(color.rgb <= float3(0.04045));
148    float4 linear = float4(factor * linear1 + float3(1.0 - factor) * linear2, color.a);
149
150    return linear;
151}
152
153static inline float linearToSRGB(float color)
154{
155    if (color <= 0.0f)
156        return 0.0f;
157    else if (color < 0.0031308f)
158        return 12.92f * color;
159    else if (color < 1.0f)
160        return 1.055f * pow(color, 0.41666f) - 0.055f;
161    else
162        return 1.0f;
163}
164
165static inline float4 linearToSRGB(float4 color)
166{
167    return float4(linearToSRGB(color.r), linearToSRGB(color.g), linearToSRGB(color.b), color.a);
168}
169
170template <typename Short>
171static inline Short bytesToShort(constant uchar *input, uint offset)
172{
173    Short inputLo = input[offset];
174    Short inputHi = input[offset + 1];
175
176    return inputLo | (inputHi << 8);
177}
178
179template <typename Int>
180static inline Int bytesToInt(constant uchar *input, uint offset)
181{
182    Int input0 = input[offset];
183    Int input1 = input[offset + 1];
184    Int input2 = input[offset + 2];
185    Int input3 = input[offset + 3];
186
187    return input0 | (input1 << 8) | (input2 << 16) | (input3 << 24);
188}
189
190template <typename Short>
191static inline void shortToBytes(Short val, uint offset, device uchar *output)
192{
193    ushort valUnsigned = as_type<ushort>(val);
194    output[offset] = valUnsigned & 0xff;
195    output[offset + 1] = (valUnsigned >> 8) & 0xff;
196}
197
198template <typename Int>
199static inline void intToBytes(Int val, uint offset, device uchar *output)
200{
201    uint valUnsigned = as_type<uint>(val);
202    output[offset] = valUnsigned & 0xff;
203    output[offset + 1] = (valUnsigned >> 8) & 0xff;
204    output[offset + 2] = (valUnsigned >> 16) & 0xff;
205    output[offset + 3] = (valUnsigned >> 24) & 0xff;
206}
207
208static inline void floatToBytes(float val, uint offset, device uchar *output)
209{
210    intToBytes(as_type<uint>(val), offset, output);
211}
212
213static inline void int24bitToBytes(uint val, uint offset, device uchar *output)
214{
215    output[offset] = val & 0xff;
216    output[offset + 1] = (val >> 8) & 0xff;
217    output[offset + 2] = (val >> 16) & 0xff;
218}
219
220template <unsigned int inputBitCount, unsigned int inputBitStart, typename T>
221static inline T getShiftedData(T input)
222{
223    static_assert(inputBitCount + inputBitStart <= (sizeof(T) * 8),
224                  "T must have at least as many bits as inputBitCount + inputBitStart.");
225    const T mask = (1 << inputBitCount) - 1;
226    return (input >> inputBitStart) & mask;
227}
228
229template <unsigned int inputBitCount, unsigned int inputBitStart, typename T>
230static inline T shiftData(T input)
231{
232    static_assert(inputBitCount + inputBitStart <= (sizeof(T) * 8),
233                  "T must have at least as many bits as inputBitCount + inputBitStart.");
234    const T mask = (1 << inputBitCount) - 1;
235    return (input & mask) << inputBitStart;
236}
237
238template <unsigned int inputBitCount, typename T>
239static inline float normalizedToFloat(T input)
240{
241    static_assert(inputBitCount <= (sizeof(T) * 8),
242                  "T must have more bits than or same bits as inputBitCount.");
243    static_assert(inputBitCount <= 23, "Only single precision is supported");
244
245    constexpr float inverseMax = 1.0f / ((1 << inputBitCount) - 1);
246    return input * inverseMax;
247}
248
249template <typename T>
250static inline float normalizedToFloat(T input)
251{
252    return normalizedToFloat<sizeof(T) * 8, T>(input);
253}
254
255template <>
256inline float normalizedToFloat(short input)
257{
258    constexpr float inverseMax = 1.0f / 0x7fff;
259    return static_cast<float>(input) * inverseMax;
260}
261
262template <>
263inline float normalizedToFloat(int input)
264{
265    constexpr float inverseMax = 1.0f / 0x7fffffff;
266    return static_cast<float>(input) * inverseMax;
267}
268
269template <>
270inline float normalizedToFloat(uint input)
271{
272    constexpr float inverseMax = 1.0f / 0xffffffff;
273    return static_cast<float>(input) * inverseMax;
274}
275
276template <unsigned int outputBitCount, typename T>
277static inline T floatToNormalized(float input)
278{
279    static_assert(outputBitCount <= (sizeof(T) * 8),
280                  "T must have more bits than or same bits as inputBitCount.");
281    static_assert(outputBitCount <= 23, "Only single precision is supported");
282
283    return static_cast<T>(((1 << outputBitCount) - 1) * input + 0.5f);
284}
285
286template <typename T>
287static inline T floatToNormalized(float input)
288{
289    return floatToNormalized<sizeof(T) * 8, T>(input);
290}
291
292}
293}
294# 9 "./blit.metal" 2
295
296using namespace rx::mtl_shader;
297
298
299constant bool kPremultiplyAlpha [[function_constant(1)]];
300constant bool kUnmultiplyAlpha [[function_constant(2)]];
301constant int kSourceTextureType [[function_constant(3)]];
302constant int kSourceTexture2Type [[function_constant(4)]];
303
304constant bool kSourceTextureType2D = kSourceTextureType == kTextureType2D;
305constant bool kSourceTextureType2DArray = kSourceTextureType == kTextureType2DArray;
306constant bool kSourceTextureType2DMS = kSourceTextureType == kTextureType2DMultisample;
307constant bool kSourceTextureTypeCube = kSourceTextureType == kTextureTypeCube;
308constant bool kSourceTextureType3D = kSourceTextureType == kTextureType3D;
309
310constant bool kSourceTexture2Type2D = kSourceTexture2Type == kTextureType2D;
311constant bool kSourceTexture2Type2DArray = kSourceTexture2Type == kTextureType2DArray;
312constant bool kSourceTexture2Type2DMS = kSourceTexture2Type == kTextureType2DMultisample;
313constant bool kSourceTexture2TypeCube = kSourceTexture2Type == kTextureTypeCube;
314
315struct BlitParams
316{
317
318    float2 srcTexCoords[3];
319    int srcLevel;
320    int srcLayer;
321    bool dstFlipViewportX;
322    bool dstFlipViewportY;
323    bool dstLuminance;
324    uint8_t padding[13];
325};
326
327struct BlitVSOut
328{
329    float4 position [[position]];
330    float2 texCoords [[user(locn1)]];
331};
332
333vertex BlitVSOut blitVS(unsigned int vid [[vertex_id]], constant BlitParams &options [[buffer(0)]])
334{
335    BlitVSOut output;
336    output.position = float4(gCorners[vid], 0.0, 1.0);
337    output.texCoords = options.srcTexCoords[vid];
338
339    if (options.dstFlipViewportX)
340    {
341        output.position.x = -output.position.x;
342    }
343    if (!options.dstFlipViewportY)
344    {
345
346
347        output.position.y = -output.position.y;
348    }
349
350    return output;
351}
352
353template <typename SrcTexture2d>
354static uint2 getImageCoords(SrcTexture2d srcTexture, float2 texCoords)
355{
356    uint2 dimens(srcTexture.get_width(), srcTexture.get_height());
357    uint2 coords = uint2(texCoords * float2(dimens));
358
359    return coords;
360}
361
362template <typename T>
363static inline vec<T, 4> blitSampleTextureMS(texture2d_ms<T> srcTexture, float2 texCoords)
364{
365    uint2 coords = getImageCoords(srcTexture, texCoords);
366    return resolveTextureMS(srcTexture, coords);
367}
368
369template <typename T>
370static inline vec<T, 4> blitSampleTexture3D(texture3d<T> srcTexture,
371                                            sampler textureSampler,
372                                            float2 texCoords,
373                                            constant BlitParams &options)
374{
375    uint depth = srcTexture.get_depth(options.srcLevel);
376    float zCoord = (float(options.srcLayer) + 0.5) / float(depth);
377
378    return srcTexture.sample(textureSampler, float3(texCoords, zCoord), level(options.srcLevel));
379}
380# 112 "./blit.metal"
381template <typename T>
382static inline vec<T, 4> blitReadTexture(BlitVSOut input [[stage_in]], texture2d<T> srcTexture2d [[texture(0), function_constant(kSourceTextureType2D)]], texture2d_array<T> srcTexture2dArray [[texture(0), function_constant(kSourceTextureType2DArray)]], texture2d_ms<T> srcTexture2dMS [[texture(0), function_constant(kSourceTextureType2DMS)]], texturecube<T> srcTextureCube [[texture(0), function_constant(kSourceTextureTypeCube)]], texture3d<T> srcTexture3d [[texture(0), function_constant(kSourceTextureType3D)]], sampler textureSampler [[sampler(0)]], constant BlitParams &options [[buffer(0)]])
383{
384    vec<T, 4> output;
385
386    switch (kSourceTextureType)
387    {
388        case kTextureType2D:
389            output = srcTexture2d.sample(textureSampler, input.texCoords, level(options.srcLevel));
390            break;
391        case kTextureType2DArray:
392            output = srcTexture2dArray.sample(textureSampler, input.texCoords, options.srcLayer,
393                                              level(options.srcLevel));
394            break;
395        case kTextureType2DMultisample:
396            output = blitSampleTextureMS(srcTexture2dMS, input.texCoords);
397            break;
398        case kTextureTypeCube:
399            output = srcTextureCube.sample(textureSampler,
400                                           cubeTexcoords(input.texCoords, options.srcLayer),
401                                           level(options.srcLevel));
402            break;
403        case kTextureType3D:
404            output = blitSampleTexture3D(srcTexture3d, textureSampler, input.texCoords, options);
405            break;
406    }
407
408    if (kPremultiplyAlpha)
409    {
410        output.xyz *= output.a;
411    }
412    else if (kUnmultiplyAlpha)
413    {
414        if (output.a != 0.0)
415        {
416            output.xyz /= output.a;
417        }
418    }
419
420    if (options.dstLuminance)
421    {
422        output.g = output.b = output.r;
423    }
424
425    return output;
426}
427
428template <typename T>
429static inline MultipleColorOutputs<T> blitFS(BlitVSOut input [[stage_in]], texture2d<T> srcTexture2d [[texture(0), function_constant(kSourceTextureType2D)]], texture2d_array<T> srcTexture2dArray [[texture(0), function_constant(kSourceTextureType2DArray)]], texture2d_ms<T> srcTexture2dMS [[texture(0), function_constant(kSourceTextureType2DMS)]], texturecube<T> srcTextureCube [[texture(0), function_constant(kSourceTextureTypeCube)]], texture3d<T> srcTexture3d [[texture(0), function_constant(kSourceTextureType3D)]], sampler textureSampler [[sampler(0)]], constant BlitParams &options [[buffer(0)]])
430{
431    vec<T, 4> output = blitReadTexture(input, srcTexture2d, srcTexture2dArray, srcTexture2dMS, srcTextureCube, srcTexture3d, textureSampler, options);
432
433    return toMultipleColorOutputs(output);
434}
435
436fragment MultipleColorOutputs<float> blitFloatFS(BlitVSOut input [[stage_in]], texture2d<float> srcTexture2d [[texture(0), function_constant(kSourceTextureType2D)]], texture2d_array<float> srcTexture2dArray [[texture(0), function_constant(kSourceTextureType2DArray)]], texture2d_ms<float> srcTexture2dMS [[texture(0), function_constant(kSourceTextureType2DMS)]], texturecube<float> srcTextureCube [[texture(0), function_constant(kSourceTextureTypeCube)]], texture3d<float> srcTexture3d [[texture(0), function_constant(kSourceTextureType3D)]], sampler textureSampler [[sampler(0)]], constant BlitParams &options [[buffer(0)]])
437{
438    return blitFS(input, srcTexture2d, srcTexture2dArray, srcTexture2dMS, srcTextureCube, srcTexture3d, textureSampler, options);
439}
440fragment MultipleColorOutputs<int> blitIntFS(BlitVSOut input [[stage_in]], texture2d<int> srcTexture2d [[texture(0), function_constant(kSourceTextureType2D)]], texture2d_array<int> srcTexture2dArray [[texture(0), function_constant(kSourceTextureType2DArray)]], texture2d_ms<int> srcTexture2dMS [[texture(0), function_constant(kSourceTextureType2DMS)]], texturecube<int> srcTextureCube [[texture(0), function_constant(kSourceTextureTypeCube)]], texture3d<int> srcTexture3d [[texture(0), function_constant(kSourceTextureType3D)]], sampler textureSampler [[sampler(0)]], constant BlitParams &options [[buffer(0)]])
441{
442    return blitFS(input, srcTexture2d, srcTexture2dArray, srcTexture2dMS, srcTextureCube, srcTexture3d, textureSampler, options);
443}
444fragment MultipleColorOutputs<uint> blitUIntFS(BlitVSOut input [[stage_in]], texture2d<uint> srcTexture2d [[texture(0), function_constant(kSourceTextureType2D)]], texture2d_array<uint> srcTexture2dArray [[texture(0), function_constant(kSourceTextureType2DArray)]], texture2d_ms<uint> srcTexture2dMS [[texture(0), function_constant(kSourceTextureType2DMS)]], texturecube<uint> srcTextureCube [[texture(0), function_constant(kSourceTextureTypeCube)]], texture3d<uint> srcTexture3d [[texture(0), function_constant(kSourceTextureType3D)]], sampler textureSampler [[sampler(0)]], constant BlitParams &options [[buffer(0)]])
445{
446    return blitFS(input, srcTexture2d, srcTexture2dArray, srcTexture2dMS, srcTextureCube, srcTexture3d, textureSampler, options);
447}
448
449fragment MultipleColorOutputs<uint> copyTextureFloatToUIntFS(BlitVSOut input [[stage_in]], texture2d<float> srcTexture2d [[texture(0), function_constant(kSourceTextureType2D)]], texture2d_array<float> srcTexture2dArray [[texture(0), function_constant(kSourceTextureType2DArray)]], texture2d_ms<float> srcTexture2dMS [[texture(0), function_constant(kSourceTextureType2DMS)]], texturecube<float> srcTextureCube [[texture(0), function_constant(kSourceTextureTypeCube)]], texture3d<float> srcTexture3d [[texture(0), function_constant(kSourceTextureType3D)]], sampler textureSampler [[sampler(0)]], constant BlitParams &options [[buffer(0)]])
450{
451    float4 inputColor = blitReadTexture<>(input, srcTexture2d, srcTexture2dArray, srcTexture2dMS, srcTextureCube, srcTexture3d, textureSampler, options);
452    uint4 output = uint4(inputColor * float4(255.0));
453
454    return toMultipleColorOutputs(output);
455}
456
457
458struct FragmentDepthOut
459{
460    float depth [[depth(any)]];
461};
462
463static inline float sampleDepth(
464    texture2d<float> srcTexture2d [[function_constant(kSourceTextureType2D)]],
465    texture2d_array<float> srcTexture2dArray [[function_constant(kSourceTextureType2DArray)]],
466    texture2d_ms<float> srcTexture2dMS [[function_constant(kSourceTextureType2DMS)]],
467    texturecube<float> srcTextureCube [[function_constant(kSourceTextureTypeCube)]],
468    float2 texCoords,
469    constant BlitParams &options)
470{
471    float4 output;
472
473    constexpr sampler textureSampler(mag_filter::nearest, min_filter::nearest);
474
475    switch (kSourceTextureType)
476    {
477        case kTextureType2D:
478            output = srcTexture2d.sample(textureSampler, texCoords, level(options.srcLevel));
479            break;
480        case kTextureType2DArray:
481            output = srcTexture2dArray.sample(textureSampler, texCoords, options.srcLayer,
482                                              level(options.srcLevel));
483            break;
484        case kTextureType2DMultisample:
485
486            output = srcTexture2dMS.read(getImageCoords(srcTexture2dMS, texCoords), 0);
487            break;
488        case kTextureTypeCube:
489            output =
490                srcTextureCube.sample(textureSampler, cubeTexcoords(texCoords, options.srcLayer),
491                                      level(options.srcLevel));
492            break;
493    }
494
495    return output.r;
496}
497
498fragment FragmentDepthOut blitDepthFS(BlitVSOut input [[stage_in]],
499                                      texture2d<float> srcTexture2d
500                                      [[texture(0), function_constant(kSourceTextureType2D)]],
501                                      texture2d_array<float> srcTexture2dArray
502                                      [[texture(0), function_constant(kSourceTextureType2DArray)]],
503                                      texture2d_ms<float> srcTexture2dMS
504                                      [[texture(0), function_constant(kSourceTextureType2DMS)]],
505                                      texturecube<float> srcTextureCube
506                                      [[texture(0), function_constant(kSourceTextureTypeCube)]],
507                                      constant BlitParams &options [[buffer(0)]])
508{
509    FragmentDepthOut re;
510
511    re.depth = sampleDepth(srcTexture2d, srcTexture2dArray, srcTexture2dMS, srcTextureCube,
512                           input.texCoords, options);
513
514    return re;
515}
516
517static inline uint32_t sampleStencil(
518    texture2d<uint32_t> srcTexture2d [[function_constant(kSourceTexture2Type2D)]],
519    texture2d_array<uint32_t> srcTexture2dArray [[function_constant(kSourceTexture2Type2DArray)]],
520    texture2d_ms<uint32_t> srcTexture2dMS [[function_constant(kSourceTexture2Type2DMS)]],
521    texturecube<uint32_t> srcTextureCube [[function_constant(kSourceTexture2TypeCube)]],
522    float2 texCoords,
523    int srcLevel,
524    int srcLayer)
525{
526    uint4 output;
527    constexpr sampler textureSampler(mag_filter::nearest, min_filter::nearest);
528
529    switch (kSourceTexture2Type)
530    {
531        case kTextureType2D:
532            output = srcTexture2d.sample(textureSampler, texCoords, level(srcLevel));
533            break;
534        case kTextureType2DArray:
535            output = srcTexture2dArray.sample(textureSampler, texCoords, srcLayer, level(srcLevel));
536            break;
537        case kTextureType2DMultisample:
538
539            output = srcTexture2dMS.read(getImageCoords(srcTexture2dMS, texCoords), 0);
540            break;
541        case kTextureTypeCube:
542            output = srcTextureCube.sample(textureSampler, cubeTexcoords(texCoords, srcLayer),
543                                           level(srcLevel));
544            break;
545    }
546
547    return output.r;
548}
549
550
551struct BlitStencilToBufferParams
552{
553    float2 srcStartTexCoords;
554    float2 srcTexCoordSteps;
555    int srcLevel;
556    int srcLayer;
557
558    uint2 dstSize;
559    uint dstBufferRowPitch;
560
561    bool resolveMS;
562};
563
564kernel void blitStencilToBufferCS(ushort2 gIndices [[thread_position_in_grid]],
565                                  texture2d<uint32_t> srcTexture2d
566                                  [[texture(1), function_constant(kSourceTexture2Type2D)]],
567                                  texture2d_array<uint32_t> srcTexture2dArray
568                                  [[texture(1), function_constant(kSourceTexture2Type2DArray)]],
569                                  texture2d_ms<uint32_t> srcTexture2dMS
570                                  [[texture(1), function_constant(kSourceTexture2Type2DMS)]],
571                                  texturecube<uint32_t> srcTextureCube
572                                  [[texture(1), function_constant(kSourceTexture2TypeCube)]],
573                                  constant BlitStencilToBufferParams &options [[buffer(0)]],
574                                  device uchar *buffer [[buffer(1)]])
575{
576    if (gIndices.x >= options.dstSize.x || gIndices.y >= options.dstSize.y)
577    {
578        return;
579    }
580
581    float2 srcTexCoords = options.srcStartTexCoords + float2(gIndices) * options.srcTexCoordSteps;
582
583    if (kSourceTexture2Type == kTextureType2DMultisample && !options.resolveMS)
584    {
585        uint samples = srcTexture2dMS.get_num_samples();
586        uint2 imageCoords = getImageCoords(srcTexture2dMS, srcTexCoords);
587        uint bufferOffset = options.dstBufferRowPitch * gIndices.y + samples * gIndices.x;
588
589        for (uint sample = 0; sample < samples; ++sample)
590        {
591            uint stencilPerSample = srcTexture2dMS.read(imageCoords, sample).r;
592            buffer[bufferOffset + sample] = static_cast<uchar>(stencilPerSample);
593        }
594    }
595    else
596    {
597        uint32_t stencil =
598            sampleStencil(srcTexture2d, srcTexture2dArray, srcTexture2dMS, srcTextureCube,
599                          srcTexCoords, options.srcLevel, options.srcLayer);
600
601        buffer[options.dstBufferRowPitch * gIndices.y + gIndices.x] = static_cast<uchar>(stencil);
602    }
603}
604
605
606#if __METAL_VERSION__ >= 210
607
608struct FragmentStencilOut
609{
610    uint32_t stencil [[stencil]];
611};
612
613struct FragmentDepthStencilOut
614{
615    float depth [[depth(any)]];
616    uint32_t stencil [[stencil]];
617};
618
619fragment FragmentStencilOut blitStencilFS(
620    BlitVSOut input [[stage_in]],
621    texture2d<uint32_t> srcTexture2d [[texture(1), function_constant(kSourceTexture2Type2D)]],
622    texture2d_array<uint32_t> srcTexture2dArray
623    [[texture(1), function_constant(kSourceTexture2Type2DArray)]],
624    texture2d_ms<uint32_t> srcTexture2dMS
625    [[texture(1), function_constant(kSourceTexture2Type2DMS)]],
626    texturecube<uint32_t> srcTextureCube [[texture(1), function_constant(kSourceTexture2TypeCube)]],
627    constant BlitParams &options [[buffer(0)]])
628{
629    FragmentStencilOut re;
630
631    re.stencil = sampleStencil(srcTexture2d, srcTexture2dArray, srcTexture2dMS, srcTextureCube,
632                               input.texCoords, options.srcLevel, options.srcLayer);
633
634    return re;
635}
636
637fragment FragmentDepthStencilOut blitDepthStencilFS(
638    BlitVSOut input [[stage_in]],
639
640    texture2d<float> srcDepthTexture2d [[texture(0), function_constant(kSourceTextureType2D)]],
641    texture2d_array<float> srcDepthTexture2dArray
642    [[texture(0), function_constant(kSourceTextureType2DArray)]],
643    texture2d_ms<float> srcDepthTexture2dMS
644    [[texture(0), function_constant(kSourceTextureType2DMS)]],
645    texturecube<float> srcDepthTextureCube
646    [[texture(0), function_constant(kSourceTextureTypeCube)]],
647
648
649    texture2d<uint32_t> srcStencilTexture2d
650    [[texture(1), function_constant(kSourceTexture2Type2D)]],
651    texture2d_array<uint32_t> srcStencilTexture2dArray
652    [[texture(1), function_constant(kSourceTexture2Type2DArray)]],
653    texture2d_ms<uint32_t> srcStencilTexture2dMS
654    [[texture(1), function_constant(kSourceTexture2Type2DMS)]],
655    texturecube<uint32_t> srcStencilTextureCube
656    [[texture(1), function_constant(kSourceTexture2TypeCube)]],
657
658    constant BlitParams &options [[buffer(0)]])
659{
660    FragmentDepthStencilOut re;
661
662    re.depth = sampleDepth(srcDepthTexture2d, srcDepthTexture2dArray, srcDepthTexture2dMS,
663                           srcDepthTextureCube, input.texCoords, options);
664    re.stencil =
665        sampleStencil(srcStencilTexture2d, srcStencilTexture2dArray, srcStencilTexture2dMS,
666                      srcStencilTextureCube, input.texCoords, options.srcLevel, options.srcLayer);
667    return re;
668}
669#endif
670# 2 "temp_master_source.metal" 2
671# 1 "./clear.metal" 1
672# 10 "./clear.metal"
673using namespace rx::mtl_shader;
674
675struct ClearParams
676{
677    float4 clearColor;
678    float clearDepth;
679};
680
681vertex float4 clearVS(unsigned int vid [[ vertex_id ]],
682                      constant ClearParams &clearParams [[buffer(0)]])
683{
684    return float4(gCorners[vid], clearParams.clearDepth, 1.0);
685}
686
687fragment MultipleColorOutputs<float> clearFloatFS(constant ClearParams &clearParams [[buffer(0)]])
688{
689    return toMultipleColorOutputs(clearParams.clearColor);
690}
691
692fragment MultipleColorOutputs<int> clearIntFS(constant ClearParams &clearParams [[buffer(0)]])
693{
694    return toMultipleColorOutputs(as_type<int4>(clearParams.clearColor));
695}
696
697fragment MultipleColorOutputs<uint> clearUIntFS(constant ClearParams &clearParams [[buffer(0)]])
698{
699    return toMultipleColorOutputs(as_type<uint4>(clearParams.clearColor));
700}
701# 3 "temp_master_source.metal" 2
702# 1 "./gen_indices.metal" 1
703
704
705
706
707
708
709
710
711using namespace rx::mtl_shader;
712
713
714constant bool kSourceBufferAligned[[function_constant(100)]];
715constant bool kSourceIndexIsU8[[function_constant(200)]];
716constant bool kSourceIndexIsU16[[function_constant(300)]];
717constant bool kSourceIndexIsU32[[function_constant(400)]];
718constant bool kSourceBufferUnaligned = !kSourceBufferAligned;
719constant bool kUseSourceBufferU8 = kSourceIndexIsU8 || kSourceBufferUnaligned;
720constant bool kUseSourceBufferU16 = kSourceIndexIsU16 && kSourceBufferAligned;
721constant bool kUseSourceBufferU32 = kSourceIndexIsU32 && kSourceBufferAligned;
722
723struct IndexConversionParams
724{
725    uint32_t srcOffset;
726    uint32_t indexCount;
727    bool primitiveRestartEnabled;
728};
729
730
731
732inline ushort getIndexAligned(constant ushort *inputAligned, uint offset, uint idx)
733{
734    return inputAligned[offset / 2 + idx];
735}
736inline uint getIndexAligned(constant uint *inputAligned, uint offset, uint idx)
737{
738    return inputAligned[offset / 4 + idx];
739}
740inline uchar getIndexAligned(constant uchar *input, uint offset, uint idx)
741{
742    return input[offset + idx];
743}
744inline ushort getIndexUnalignedU16(constant uchar *input, uint offset, uint idx)
745{
746    ushort inputLo = input[offset + 2 * idx];
747    ushort inputHi = input[offset + 2 * idx + 1];
748
749    return inputLo | (inputHi << 8);
750}
751inline uint getIndexUnalignedU32(constant uchar *input, uint offset, uint idx)
752{
753    uint input0 = input[offset + 4 * idx];
754    uint input1 = input[offset + 4 * idx + 1];
755    uint input2 = input[offset + 4 * idx + 2];
756    uint input3 = input[offset + 4 * idx + 3];
757
758    return input0 | (input1 << 8) | (input2 << 16) | (input3 << 24);
759}
760
761kernel void convertIndexU8ToU16(uint idx [[thread_position_in_grid]],
762                                constant IndexConversionParams &options [[buffer(0)]],
763                                constant uchar *input [[buffer(1)]],
764                                device ushort *output [[buffer(2)]])
765{
766    if (idx >= options.indexCount) { return; };
767
768    uchar value = getIndexAligned(input, options.srcOffset, idx);
769
770    if (options.primitiveRestartEnabled && value == 0xff)
771    {
772        output[idx] = 0xffff;
773    }
774    else
775    {
776        output[idx] = value;
777    }
778}
779
780kernel void convertIndexU16(uint idx [[thread_position_in_grid]],
781                            constant IndexConversionParams &options [[buffer(0)]],
782                            constant uchar *input
783                            [[buffer(1), function_constant(kSourceBufferUnaligned)]],
784                            constant ushort *inputAligned
785                            [[buffer(1), function_constant(kSourceBufferAligned)]],
786                            device ushort *output [[buffer(2)]])
787{
788    if (idx >= options.indexCount) { return; };
789
790    ushort value;
791    if (kSourceBufferAligned)
792    {
793        value = getIndexAligned(inputAligned, options.srcOffset, idx);
794    }
795    else
796    {
797        value = getIndexUnalignedU16(input, options.srcOffset, idx);
798    }
799    output[idx] = value;
800}
801
802kernel void convertIndexU32(uint idx [[thread_position_in_grid]],
803                            constant IndexConversionParams &options [[buffer(0)]],
804                            constant uchar *input
805                            [[buffer(1), function_constant(kSourceBufferUnaligned)]],
806                            constant uint *inputAligned
807                            [[buffer(1), function_constant(kSourceBufferAligned)]],
808                            device uint *output [[buffer(2)]])
809{
810    if (idx >= options.indexCount) { return; };
811
812    uint value;
813    if (kSourceBufferAligned)
814    {
815        value = getIndexAligned(inputAligned, options.srcOffset, idx);
816    }
817    else
818    {
819        value = getIndexUnalignedU32(input, options.srcOffset, idx);
820    }
821    output[idx] = value;
822}
823
824struct IndexFromArrayParams
825{
826    uint firstVertex;
827
828    uint vertexCount;
829};
830
831
832kernel void genTriFanIndicesFromArray(uint idx [[thread_position_in_grid]],
833                                      constant IndexFromArrayParams &options [[buffer(0)]],
834                                      device uint *output [[buffer(2)]])
835{
836    if (idx >= options.vertexCount) { return; };
837
838    uint vertexIdx = options.firstVertex + 2 + idx;
839
840
841
842    output[3 * idx ] = vertexIdx - 1;
843    output[3 * idx + 1] = vertexIdx;
844    output[3 * idx + 2] = options.firstVertex;
845}
846
847inline uint getIndexU32(uint offset,
848                        uint idx,
849                        constant uchar *inputU8 [[function_constant(kUseSourceBufferU8)]],
850                        constant ushort *inputU16 [[function_constant(kUseSourceBufferU16)]],
851                        constant uint *inputU32 [[function_constant(kUseSourceBufferU32)]])
852{
853    if (kUseSourceBufferU8)
854    {
855        if (kSourceIndexIsU16)
856        {
857            return getIndexUnalignedU16(inputU8, offset, idx);
858        }
859        else if (kSourceIndexIsU32)
860        {
861            return getIndexUnalignedU32(inputU8, offset, idx);
862        }
863        return getIndexAligned(inputU8, offset, idx);
864    }
865    else if (kUseSourceBufferU16)
866    {
867        return getIndexAligned(inputU16, offset, idx);
868    }
869    else if (kUseSourceBufferU32)
870    {
871        return getIndexAligned(inputU32, offset, idx);
872    }
873    return 0;
874}
875
876
877
878
879kernel void genTriFanIndicesFromElements(uint idx [[thread_position_in_grid]],
880                                         constant IndexConversionParams &options [[buffer(0)]],
881                                         constant uchar *inputU8
882                                         [[buffer(1), function_constant(kUseSourceBufferU8)]],
883                                         constant ushort *inputU16
884                                         [[buffer(1), function_constant(kUseSourceBufferU16)]],
885                                         constant uint *inputU32
886                                         [[buffer(1), function_constant(kUseSourceBufferU32)]],
887                                         device uint *output [[buffer(2)]])
888{
889    if (idx >= options.indexCount) { return; };
890
891    uint elemIdx = 2 + idx;
892
893    output[3 * idx] = getIndexU32(options.srcOffset, 0, inputU8, inputU16, inputU32);
894    output[3 * idx + 1] = getIndexU32(options.srcOffset, elemIdx - 1, inputU8, inputU16, inputU32);
895    output[3 * idx + 2] = getIndexU32(options.srcOffset, elemIdx, inputU8, inputU16, inputU32);
896}
897
898
899kernel void genLineLoopIndicesFromArray(uint idx [[thread_position_in_grid]],
900                                        constant IndexFromArrayParams &options [[buffer(0)]],
901                                        device uint *output [[buffer(2)]])
902{
903    uint totalIndices = options.vertexCount + 1;
904    if (idx >= totalIndices) { return; };
905
906    output[idx] = options.firstVertex + idx % options.vertexCount;
907}
908
909
910
911kernel void genLineLoopIndicesFromElements(uint idx [[thread_position_in_grid]],
912                                           constant IndexConversionParams &options [[buffer(0)]],
913                                           constant uchar *inputU8
914                                           [[buffer(1), function_constant(kUseSourceBufferU8)]],
915                                           constant ushort *inputU16
916                                           [[buffer(1), function_constant(kUseSourceBufferU16)]],
917                                           constant uint *inputU32
918                                           [[buffer(1), function_constant(kUseSourceBufferU32)]],
919                                           device uint *output [[buffer(2)]])
920{
921    uint totalTargetIndices = options.indexCount + 1;
922    if (idx >= totalTargetIndices) { return; };
923
924    output[idx] =
925        getIndexU32(options.srcOffset, idx % options.indexCount, inputU8, inputU16, inputU32);
926}
927# 4 "temp_master_source.metal" 2
928# 1 "./gen_mipmap.metal" 1
929
930
931
932
933
934
935
936
937using namespace rx::mtl_shader;
938# 31 "./gen_mipmap.metal"
939struct GenMipParams
940{
941    uint srcLevel;
942    uint numMipLevelsToGen;
943    bool sRGB;
944};
945
946
947
948kernel void generate3DMipmaps(uint lIndex [[thread_index_in_threadgroup]],
949                              ushort3 gIndices [[thread_position_in_grid]],
950                              texture3d<float> srcTexture [[texture(0)]],
951                              texture3d<float, access::write> dstMip1 [[texture(1)]],
952                              texture3d<float, access::write> dstMip2 [[texture(2)]],
953                              texture3d<float, access::write> dstMip3 [[texture(3)]],
954                              texture3d<float, access::write> dstMip4 [[texture(4)]],
955                              constant GenMipParams &options [[buffer(0)]])
956{
957    ushort3 mipSize = ushort3(dstMip1.get_width(), dstMip1.get_height(), dstMip1.get_depth());
958    bool validThread = gIndices.x < mipSize.x && gIndices.y < mipSize.y && gIndices.z < mipSize.z;
959
960    constexpr sampler textureSampler(mag_filter::linear, min_filter::linear, mip_filter::linear);
961
962
963
964
965    threadgroup float sR[(8 * 8 * 8)];
966    threadgroup float sG[(8 * 8 * 8)];
967    threadgroup float sB[(8 * 8 * 8)];
968    threadgroup float sA[(8 * 8 * 8)];
969
970
971    float4 texel1;
972    if (validThread)
973    {
974        float3 texCoords = (float3(gIndices) + float3(0.5, 0.5, 0.5)) / float3(mipSize);
975        texel1 = srcTexture.sample(textureSampler, texCoords, level(options.srcLevel));
976
977
978        dstMip1.write(texel1, gIndices);
979    }
980    else
981    {
982
983        lIndex = 0xffffffff;
984    }
985
986    if (options.numMipLevelsToGen == 1)
987    {
988        return;
989    }
990
991
992
993
994    if (options.sRGB)
995    {
996        texel1 = linearToSRGB(texel1);
997    }
998    sR[lIndex] = texel1.r; sG[lIndex] = texel1.g; sB[lIndex] = texel1.b; sA[lIndex] = texel1.a;;
999
1000    threadgroup_barrier(mem_flags::mem_threadgroup);
1001
1002
1003    if ((lIndex & 0x49) == 0)
1004    {
1005        bool3 atEdge = gIndices == (mipSize - ushort3(1));
1006
1007
1008
1009        float4 texel2 = (atEdge.x) ? (texel1) : (float4(sR[lIndex + 1], sG[lIndex + 1], sB[lIndex + 1], sA[lIndex + 1]));
1010
1011        float4 texel3 = (atEdge.y) ? (texel1) : (float4(sR[lIndex + 8], sG[lIndex + 8], sB[lIndex + 8], sA[lIndex + 8]));
1012
1013        float4 texel4 = (atEdge.z) ? (texel1) : (float4(sR[lIndex + (8 * 8)], sG[lIndex + (8 * 8)], sB[lIndex + (8 * 8)], sA[lIndex + (8 * 8)]));
1014
1015        float4 texel5 = (atEdge.x | atEdge.y) ? (texel2) : (float4(sR[lIndex + (8 + 1)], sG[lIndex + (8 + 1)], sB[lIndex + (8 + 1)], sA[lIndex + (8 + 1)]));
1016
1017
1018        float4 texel6 = (atEdge.x | atEdge.z) ? (texel2) : (float4(sR[lIndex + ((8 * 8) + 1)], sG[lIndex + ((8 * 8) + 1)], sB[lIndex + ((8 * 8) + 1)], sA[lIndex + ((8 * 8) + 1)]));
1019
1020
1021        float4 texel7 = (atEdge.y | atEdge.z) ? (texel3) : (float4(sR[lIndex + ((8 * 8) + 8)], sG[lIndex + ((8 * 8) + 8)], sB[lIndex + ((8 * 8) + 8)], sA[lIndex + ((8 * 8) + 8)]));
1022
1023
1024        float4 texel8 =
1025            (atEdge.x | atEdge.y | atEdge.z) ? (texel5) : (float4(sR[lIndex + ((8 * 8) + 8 + 1)], sG[lIndex + ((8 * 8) + 8 + 1)], sB[lIndex + ((8 * 8) + 8 + 1)], sA[lIndex + ((8 * 8) + 8 + 1)]));
1026
1027
1028        texel1 = (texel1 + texel2 + texel3 + texel4 + texel5 + texel6 + texel7 + texel8) / 8.0;
1029
1030        dstMip2.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices >> 1);
1031
1032
1033        sR[lIndex] = texel1.r; sG[lIndex] = texel1.g; sB[lIndex] = texel1.b; sA[lIndex] = texel1.a;;
1034    }
1035
1036    if (options.numMipLevelsToGen == 2)
1037    {
1038        return;
1039    }
1040
1041
1042    threadgroup_barrier(mem_flags::mem_threadgroup);
1043
1044
1045    if ((lIndex & 0xdb) == 0)
1046    {
1047        mipSize = max(mipSize >> 1, ushort3(1));
1048        bool3 atEdge = (gIndices >> 1) == (mipSize - ushort3(1));
1049
1050
1051
1052        float4 texel2 = (atEdge.x) ? (texel1) : (float4(sR[lIndex + 2], sG[lIndex + 2], sB[lIndex + 2], sA[lIndex + 2]));
1053
1054        float4 texel3 =
1055            (atEdge.y) ? (texel1) : (float4(sR[lIndex + (2 * 8)], sG[lIndex + (2 * 8)], sB[lIndex + (2 * 8)], sA[lIndex + (2 * 8)]));
1056
1057        float4 texel4 =
1058            (atEdge.z) ? (texel1) : (float4(sR[lIndex + (2 * (8 * 8))], sG[lIndex + (2 * (8 * 8))], sB[lIndex + (2 * (8 * 8))], sA[lIndex + (2 * (8 * 8))]));
1059
1060        float4 texel5 = (atEdge.x | atEdge.y) ? (texel2) : (float4(sR[lIndex + (2 * 8 + 2)], sG[lIndex + (2 * 8 + 2)], sB[lIndex + (2 * 8 + 2)], sA[lIndex + (2 * 8 + 2)]));
1061
1062
1063        float4 texel6 = (atEdge.x | atEdge.z) ? (texel2) : (float4(sR[lIndex + (2 * (8 * 8) + 2)], sG[lIndex + (2 * (8 * 8) + 2)], sB[lIndex + (2 * (8 * 8) + 2)], sA[lIndex + (2 * (8 * 8) + 2)]));
1064
1065
1066        float4 texel7 = (atEdge.y | atEdge.z) ? (texel3) : (float4(sR[lIndex + (2 * (8 * 8) + 2 * 8)], sG[lIndex + (2 * (8 * 8) + 2 * 8)], sB[lIndex + (2 * (8 * 8) + 2 * 8)], sA[lIndex + (2 * (8 * 8) + 2 * 8)]));
1067
1068
1069
1070        float4 texel8 = (atEdge.x | atEdge.y | atEdge.z) ? (texel5) : (float4(sR[lIndex + (2 * (8 * 8) + 2 * 8 + 2)], sG[lIndex + (2 * (8 * 8) + 2 * 8 + 2)], sB[lIndex + (2 * (8 * 8) + 2 * 8 + 2)], sA[lIndex + (2 * (8 * 8) + 2 * 8 + 2)]));
1071
1072
1073
1074        texel1 = (texel1 + texel2 + texel3 + texel4 + texel5 + texel6 + texel7 + texel8) / 8.0;
1075
1076        dstMip3.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices >> 2);
1077
1078
1079        sR[lIndex] = texel1.r; sG[lIndex] = texel1.g; sB[lIndex] = texel1.b; sA[lIndex] = texel1.a;;
1080    }
1081
1082    if (options.numMipLevelsToGen == 3)
1083    {
1084        return;
1085    }
1086
1087
1088    threadgroup_barrier(mem_flags::mem_threadgroup);
1089
1090
1091    if ((lIndex & 0x1ff) == 0)
1092    {
1093        mipSize = max(mipSize >> 1, ushort3(1));
1094        bool3 atEdge = (gIndices >> 2) == (mipSize - ushort3(1));
1095
1096
1097
1098        float4 texel2 = (atEdge.x) ? (texel1) : (float4(sR[lIndex + 4], sG[lIndex + 4], sB[lIndex + 4], sA[lIndex + 4]));
1099
1100        float4 texel3 =
1101            (atEdge.y) ? (texel1) : (float4(sR[lIndex + (4 * 8)], sG[lIndex + (4 * 8)], sB[lIndex + (4 * 8)], sA[lIndex + (4 * 8)]));
1102
1103        float4 texel4 =
1104            (atEdge.z) ? (texel1) : (float4(sR[lIndex + (4 * (8 * 8))], sG[lIndex + (4 * (8 * 8))], sB[lIndex + (4 * (8 * 8))], sA[lIndex + (4 * (8 * 8))]));
1105
1106        float4 texel5 = (atEdge.x | atEdge.y) ? (texel2) : (float4(sR[lIndex + (4 * 8 + 4)], sG[lIndex + (4 * 8 + 4)], sB[lIndex + (4 * 8 + 4)], sA[lIndex + (4 * 8 + 4)]));
1107
1108
1109        float4 texel6 = (atEdge.x | atEdge.z) ? (texel2) : (float4(sR[lIndex + (4 * (8 * 8) + 4)], sG[lIndex + (4 * (8 * 8) + 4)], sB[lIndex + (4 * (8 * 8) + 4)], sA[lIndex + (4 * (8 * 8) + 4)]));
1110
1111
1112        float4 texel7 = (atEdge.y | atEdge.z) ? (texel3) : (float4(sR[lIndex + (4 * (8 * 8) + 4 * 8)], sG[lIndex + (4 * (8 * 8) + 4 * 8)], sB[lIndex + (4 * (8 * 8) + 4 * 8)], sA[lIndex + (4 * (8 * 8) + 4 * 8)]));
1113
1114
1115
1116        float4 texel8 = (atEdge.x | atEdge.y | atEdge.z) ? (texel5) : (float4(sR[lIndex + (4 * (8 * 8) + 4 * 8 + 4)], sG[lIndex + (4 * (8 * 8) + 4 * 8 + 4)], sB[lIndex + (4 * (8 * 8) + 4 * 8 + 4)], sA[lIndex + (4 * (8 * 8) + 4 * 8 + 4)]));
1117
1118
1119
1120        texel1 = (texel1 + texel2 + texel3 + texel4 + texel5 + texel6 + texel7 + texel8) / 8.0;
1121
1122        dstMip4.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices >> 3);
1123    }
1124}
1125
1126kernel void generate2DMipmaps(uint lIndex [[thread_index_in_threadgroup]],
1127                              ushort2 gIndices [[thread_position_in_grid]],
1128                              texture2d<float> srcTexture [[texture(0)]],
1129                              texture2d<float, access::write> dstMip1 [[texture(1)]],
1130                              texture2d<float, access::write> dstMip2 [[texture(2)]],
1131                              texture2d<float, access::write> dstMip3 [[texture(3)]],
1132                              texture2d<float, access::write> dstMip4 [[texture(4)]],
1133                              constant GenMipParams &options [[buffer(0)]])
1134{
1135    uint firstMipLevel = options.srcLevel + 1;
1136    ushort2 mipSize =
1137        ushort2(srcTexture.get_width(firstMipLevel), srcTexture.get_height(firstMipLevel));
1138    bool validThread = gIndices.x < mipSize.x && gIndices.y < mipSize.y;
1139
1140    constexpr sampler textureSampler(mag_filter::linear, min_filter::linear, mip_filter::linear);
1141
1142
1143
1144
1145    threadgroup float sR[(8 * 8)];
1146    threadgroup float sG[(8 * 8)];
1147    threadgroup float sB[(8 * 8)];
1148    threadgroup float sA[(8 * 8)];
1149
1150
1151    float4 texel1;
1152    if (validThread)
1153    {
1154        float2 texCoords = (float2(gIndices) + float2(0.5, 0.5)) / float2(mipSize);
1155        texel1 = srcTexture.sample(textureSampler, texCoords, level(options.srcLevel));
1156
1157
1158        dstMip1.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices);
1159    }
1160    else
1161    {
1162
1163        lIndex = 0xffffffff;
1164    }
1165
1166    if (options.numMipLevelsToGen == 1)
1167    {
1168        return;
1169    }
1170
1171
1172
1173
1174    sR[lIndex] = texel1.r; sG[lIndex] = texel1.g; sB[lIndex] = texel1.b; sA[lIndex] = texel1.a;;
1175
1176    threadgroup_barrier(mem_flags::mem_threadgroup);
1177
1178
1179    if ((lIndex & 0x09) == 0)
1180    {
1181        bool2 atEdge = gIndices == (mipSize - ushort2(1));
1182
1183
1184
1185        float4 texel2 = (atEdge.x) ? (texel1) : (float4(sR[lIndex + 1], sG[lIndex + 1], sB[lIndex + 1], sA[lIndex + 1]));
1186
1187        float4 texel3 = (atEdge.y) ? (texel1) : (float4(sR[lIndex + 8], sG[lIndex + 8], sB[lIndex + 8], sA[lIndex + 8]));
1188
1189        float4 texel4 = (atEdge.x | atEdge.y) ? (texel2) : (float4(sR[lIndex + (8 + 1)], sG[lIndex + (8 + 1)], sB[lIndex + (8 + 1)], sA[lIndex + (8 + 1)]));
1190
1191
1192        texel1 = (texel1 + texel2 + texel3 + texel4) / 4.0;
1193
1194        dstMip2.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices >> 1);
1195
1196
1197        sR[lIndex] = texel1.r; sG[lIndex] = texel1.g; sB[lIndex] = texel1.b; sA[lIndex] = texel1.a;;
1198    }
1199
1200    if (options.numMipLevelsToGen == 2)
1201    {
1202        return;
1203    }
1204
1205
1206    threadgroup_barrier(mem_flags::mem_threadgroup);
1207
1208
1209    if ((lIndex & 0x1b) == 0)
1210    {
1211        mipSize = max(mipSize >> 1, ushort2(1));
1212        bool2 atEdge = (gIndices >> 1) == (mipSize - ushort2(1));
1213
1214
1215
1216        float4 texel2 = (atEdge.x) ? (texel1) : (float4(sR[lIndex + 2], sG[lIndex + 2], sB[lIndex + 2], sA[lIndex + 2]));
1217
1218        float4 texel3 =
1219            (atEdge.y) ? (texel1) : (float4(sR[lIndex + 2 * 8], sG[lIndex + 2 * 8], sB[lIndex + 2 * 8], sA[lIndex + 2 * 8]));
1220
1221        float4 texel4 = (atEdge.x | atEdge.y) ? (texel2) : (float4(sR[lIndex + (2 * 8 + 2)], sG[lIndex + (2 * 8 + 2)], sB[lIndex + (2 * 8 + 2)], sA[lIndex + (2 * 8 + 2)]));
1222
1223
1224        texel1 = (texel1 + texel2 + texel3 + texel4) / 4.0;
1225
1226        dstMip3.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices >> 2);
1227
1228
1229        sR[lIndex] = texel1.r; sG[lIndex] = texel1.g; sB[lIndex] = texel1.b; sA[lIndex] = texel1.a;;
1230    }
1231
1232    if (options.numMipLevelsToGen == 3)
1233    {
1234        return;
1235    }
1236
1237
1238    threadgroup_barrier(mem_flags::mem_threadgroup);
1239
1240
1241    if ((lIndex & 0x3f) == 0)
1242    {
1243        mipSize = max(mipSize >> 1, ushort2(1));
1244        bool2 atEdge = (gIndices >> 2) == (mipSize - ushort2(1));
1245
1246
1247
1248        float4 texel2 = (atEdge.x) ? (texel1) : (float4(sR[lIndex + 4], sG[lIndex + 4], sB[lIndex + 4], sA[lIndex + 4]));
1249
1250        float4 texel3 =
1251            (atEdge.y) ? (texel1) : (float4(sR[lIndex + 4 * 8], sG[lIndex + 4 * 8], sB[lIndex + 4 * 8], sA[lIndex + 4 * 8]));
1252
1253        float4 texel4 = (atEdge.x | atEdge.y) ? (texel2) : (float4(sR[lIndex + (4 * 8 + 4)], sG[lIndex + (4 * 8 + 4)], sB[lIndex + (4 * 8 + 4)], sA[lIndex + (4 * 8 + 4)]));
1254
1255
1256        texel1 = (texel1 + texel2 + texel3 + texel4) / 4.0;
1257
1258        dstMip4.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices >> 3);
1259    }
1260}
1261
1262template <typename TextureTypeR, typename TextureTypeW>
1263static __attribute__((always_inline)) void generateCubeOr2DArray2ndAndMoreMipmaps(
1264    uint lIndex,
1265    ushort3 gIndices,
1266    TextureTypeR srcTexture,
1267    TextureTypeW dstMip2,
1268    TextureTypeW dstMip3,
1269    TextureTypeW dstMip4,
1270    ushort2 mip1Size,
1271    float4 mip1Texel,
1272    threadgroup float *sR,
1273    threadgroup float *sG,
1274    threadgroup float *sB,
1275    threadgroup float *sA,
1276    constant GenMipParams &options)
1277{
1278    ushort2 mipSize = mip1Size;
1279    float4 texel1 = mip1Texel;
1280
1281
1282
1283
1284    sR[lIndex] = texel1.r; sG[lIndex] = texel1.g; sB[lIndex] = texel1.b; sA[lIndex] = texel1.a;;
1285
1286    threadgroup_barrier(mem_flags::mem_threadgroup);
1287
1288
1289    if ((lIndex & 0x09) == 0)
1290    {
1291        bool2 atEdge = gIndices.xy == (mipSize - ushort2(1));
1292
1293
1294
1295        float4 texel2 = (atEdge.x) ? (texel1) : (float4(sR[lIndex + 1], sG[lIndex + 1], sB[lIndex + 1], sA[lIndex + 1]));
1296
1297        float4 texel3 = (atEdge.y) ? (texel1) : (float4(sR[lIndex + 8], sG[lIndex + 8], sB[lIndex + 8], sA[lIndex + 8]));
1298
1299        float4 texel4 = (atEdge.x | atEdge.y) ? (texel2) : (float4(sR[lIndex + (8 + 1)], sG[lIndex + (8 + 1)], sB[lIndex + (8 + 1)], sA[lIndex + (8 + 1)]));
1300
1301
1302        texel1 = (texel1 + texel2 + texel3 + texel4) / 4.0;
1303
1304        dstMip2.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices.xy >> 1, gIndices.z);
1305
1306
1307        sR[lIndex] = texel1.r; sG[lIndex] = texel1.g; sB[lIndex] = texel1.b; sA[lIndex] = texel1.a;;
1308    }
1309
1310    if (options.numMipLevelsToGen == 2)
1311    {
1312        return;
1313    }
1314
1315
1316    threadgroup_barrier(mem_flags::mem_threadgroup);
1317
1318
1319    if ((lIndex & 0x1b) == 0)
1320    {
1321        mipSize = max(mipSize >> 1, ushort2(1));
1322        bool2 atEdge = (gIndices.xy >> 1) == (mipSize - ushort2(1));
1323
1324
1325
1326        float4 texel2 = (atEdge.x) ? (texel1) : (float4(sR[lIndex + 2], sG[lIndex + 2], sB[lIndex + 2], sA[lIndex + 2]));
1327
1328        float4 texel3 =
1329            (atEdge.y) ? (texel1) : (float4(sR[lIndex + 2 * 8], sG[lIndex + 2 * 8], sB[lIndex + 2 * 8], sA[lIndex + 2 * 8]));
1330
1331        float4 texel4 = (atEdge.x | atEdge.y) ? (texel2) : (float4(sR[lIndex + (2 * 8 + 2)], sG[lIndex + (2 * 8 + 2)], sB[lIndex + (2 * 8 + 2)], sA[lIndex + (2 * 8 + 2)]));
1332
1333
1334        texel1 = (texel1 + texel2 + texel3 + texel4) / 4.0;
1335
1336        dstMip3.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices.xy >> 2, gIndices.z);
1337
1338
1339        sR[lIndex] = texel1.r; sG[lIndex] = texel1.g; sB[lIndex] = texel1.b; sA[lIndex] = texel1.a;;
1340    }
1341
1342    if (options.numMipLevelsToGen == 3)
1343    {
1344        return;
1345    }
1346
1347
1348    threadgroup_barrier(mem_flags::mem_threadgroup);
1349
1350
1351    if ((lIndex & 0x3f) == 0)
1352    {
1353        mipSize = max(mipSize >> 1, ushort2(1));
1354        bool2 atEdge = (gIndices.xy >> 2) == (mipSize - ushort2(1));
1355
1356
1357
1358        float4 texel2 = (atEdge.x) ? (texel1) : (float4(sR[lIndex + 4], sG[lIndex + 4], sB[lIndex + 4], sA[lIndex + 4]));
1359
1360        float4 texel3 =
1361            (atEdge.y) ? (texel1) : (float4(sR[lIndex + 4 * 8], sG[lIndex + 4 * 8], sB[lIndex + 4 * 8], sA[lIndex + 4 * 8]));
1362
1363        float4 texel4 = (atEdge.x | atEdge.y) ? (texel2) : (float4(sR[lIndex + (4 * 8 + 4)], sG[lIndex + (4 * 8 + 4)], sB[lIndex + (4 * 8 + 4)], sA[lIndex + (4 * 8 + 4)]));
1364
1365
1366        texel1 = (texel1 + texel2 + texel3 + texel4) / 4.0;
1367
1368        dstMip4.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices.xy >> 3, gIndices.z);
1369    }
1370}
1371
1372kernel void generateCubeMipmaps(uint lIndex [[thread_index_in_threadgroup]],
1373                                ushort3 gIndices [[thread_position_in_grid]],
1374                                texturecube<float> srcTexture [[texture(0)]],
1375                                texturecube<float, access::write> dstMip1 [[texture(1)]],
1376                                texturecube<float, access::write> dstMip2 [[texture(2)]],
1377                                texturecube<float, access::write> dstMip3 [[texture(3)]],
1378                                texturecube<float, access::write> dstMip4 [[texture(4)]],
1379                                constant GenMipParams &options [[buffer(0)]])
1380{
1381    uint firstMipLevel = options.srcLevel + 1;
1382    ushort2 mip1Size =
1383        ushort2(srcTexture.get_width(firstMipLevel), srcTexture.get_height(firstMipLevel));
1384    bool validThread = gIndices.x < mip1Size.x && gIndices.y < mip1Size.y;
1385
1386    constexpr sampler textureSampler(mag_filter::linear, min_filter::linear, mip_filter::linear);
1387
1388
1389    float4 mip1Texel;
1390    if (validThread)
1391    {
1392        float2 texCoords = (float2(gIndices.xy) + float2(0.5, 0.5)) / float2(mip1Size);
1393        mip1Texel = srcTexture.sample(textureSampler, cubeTexcoords(texCoords, int(gIndices.z)),
1394                                      level(options.srcLevel));
1395
1396
1397        dstMip1.write((options.sRGB ? sRGBtoLinear(mip1Texel) : mip1Texel), gIndices.xy, gIndices.z);
1398    }
1399    else
1400    {
1401
1402        lIndex = 0xffffffff;
1403    }
1404
1405    if (options.numMipLevelsToGen == 1)
1406    {
1407        return;
1408    }
1409
1410
1411    threadgroup float sR[(8 * 8)];
1412    threadgroup float sG[(8 * 8)];
1413    threadgroup float sB[(8 * 8)];
1414    threadgroup float sA[(8 * 8)];
1415
1416    generateCubeOr2DArray2ndAndMoreMipmaps(lIndex, gIndices, srcTexture, dstMip2, dstMip3, dstMip4,
1417                                           mip1Size, mip1Texel, sR, sG, sB, sA, options);
1418}
1419
1420kernel void generate2DArrayMipmaps(uint lIndex [[thread_index_in_threadgroup]],
1421                                   ushort3 gIndices [[thread_position_in_grid]],
1422                                   texture2d_array<float> srcTexture [[texture(0)]],
1423                                   texture2d_array<float, access::write> dstMip1 [[texture(1)]],
1424                                   texture2d_array<float, access::write> dstMip2 [[texture(2)]],
1425                                   texture2d_array<float, access::write> dstMip3 [[texture(3)]],
1426                                   texture2d_array<float, access::write> dstMip4 [[texture(4)]],
1427                                   constant GenMipParams &options [[buffer(0)]])
1428{
1429    uint firstMipLevel = options.srcLevel + 1;
1430    ushort2 mip1Size =
1431        ushort2(srcTexture.get_width(firstMipLevel), srcTexture.get_height(firstMipLevel));
1432    bool validThread = gIndices.x < mip1Size.x && gIndices.y < mip1Size.y;
1433
1434    constexpr sampler textureSampler(mag_filter::linear, min_filter::linear, mip_filter::linear);
1435
1436
1437    float4 mip1Texel;
1438    if (validThread)
1439    {
1440        float2 texCoords = (float2(gIndices.xy) + float2(0.5, 0.5)) / float2(mip1Size);
1441        mip1Texel =
1442            srcTexture.sample(textureSampler, texCoords, gIndices.z, level(options.srcLevel));
1443
1444
1445        dstMip1.write((options.sRGB ? sRGBtoLinear(mip1Texel) : mip1Texel), gIndices.xy, gIndices.z);
1446    }
1447    else
1448    {
1449
1450        lIndex = 0xffffffff;
1451    }
1452
1453    if (options.numMipLevelsToGen == 1)
1454    {
1455        return;
1456    }
1457
1458
1459    threadgroup float sR[(8 * 8)];
1460    threadgroup float sG[(8 * 8)];
1461    threadgroup float sB[(8 * 8)];
1462    threadgroup float sA[(8 * 8)];
1463
1464    generateCubeOr2DArray2ndAndMoreMipmaps(lIndex, gIndices, srcTexture, dstMip2, dstMip3, dstMip4,
1465                                           mip1Size, mip1Texel, sR, sG, sB, sA, options);
1466}
1467# 5 "temp_master_source.metal" 2
1468# 1 "./copy_buffer.metal" 1
1469# 12 "./copy_buffer.metal"
1470#include <metal_pack>
1471
1472
1473# 1 "./format_autogen.h" 1
1474# 11 "./format_autogen.h"
1475namespace rx
1476{
1477namespace mtl_shader
1478{
1479
1480namespace FormatID
1481{
1482enum
1483{
1484    NONE,
1485    D16_UNORM,
1486    D24_UNORM_S8_UINT,
1487    D24_UNORM_X8_UINT,
1488    D32_FLOAT,
1489    D32_FLOAT_S8X24_UINT,
1490    D32_UNORM,
1491    S8_UINT,
1492    A16_FLOAT,
1493    A1R5G5B5_UNORM,
1494    A2R10G10B10_SINT_VERTEX,
1495    A2R10G10B10_SNORM_VERTEX,
1496    A2R10G10B10_SSCALED_VERTEX,
1497    A2R10G10B10_UINT_VERTEX,
1498    A2R10G10B10_UNORM_VERTEX,
1499    A2R10G10B10_USCALED_VERTEX,
1500    A32_FLOAT,
1501    A8_UNORM,
1502    ASTC_10x10_SRGB_BLOCK,
1503    ASTC_10x10_UNORM_BLOCK,
1504    ASTC_10x5_SRGB_BLOCK,
1505    ASTC_10x5_UNORM_BLOCK,
1506    ASTC_10x6_SRGB_BLOCK,
1507    ASTC_10x6_UNORM_BLOCK,
1508    ASTC_10x8_SRGB_BLOCK,
1509    ASTC_10x8_UNORM_BLOCK,
1510    ASTC_12x10_SRGB_BLOCK,
1511    ASTC_12x10_UNORM_BLOCK,
1512    ASTC_12x12_SRGB_BLOCK,
1513    ASTC_12x12_UNORM_BLOCK,
1514    ASTC_3x3x3_UNORM_BLOCK,
1515    ASTC_3x3x3_UNORM_SRGB_BLOCK,
1516    ASTC_4x3x3_UNORM_BLOCK,
1517    ASTC_4x3x3_UNORM_SRGB_BLOCK,
1518    ASTC_4x4_SRGB_BLOCK,
1519    ASTC_4x4_UNORM_BLOCK,
1520    ASTC_4x4x3_UNORM_BLOCK,
1521    ASTC_4x4x3_UNORM_SRGB_BLOCK,
1522    ASTC_4x4x4_UNORM_BLOCK,
1523    ASTC_4x4x4_UNORM_SRGB_BLOCK,
1524    ASTC_5x4_SRGB_BLOCK,
1525    ASTC_5x4_UNORM_BLOCK,
1526    ASTC_5x4x4_UNORM_BLOCK,
1527    ASTC_5x4x4_UNORM_SRGB_BLOCK,
1528    ASTC_5x5_SRGB_BLOCK,
1529    ASTC_5x5_UNORM_BLOCK,
1530    ASTC_5x5x4_UNORM_BLOCK,
1531    ASTC_5x5x4_UNORM_SRGB_BLOCK,
1532    ASTC_5x5x5_UNORM_BLOCK,
1533    ASTC_5x5x5_UNORM_SRGB_BLOCK,
1534    ASTC_6x5_SRGB_BLOCK,
1535    ASTC_6x5_UNORM_BLOCK,
1536    ASTC_6x5x5_UNORM_BLOCK,
1537    ASTC_6x5x5_UNORM_SRGB_BLOCK,
1538    ASTC_6x6_SRGB_BLOCK,
1539    ASTC_6x6_UNORM_BLOCK,
1540    ASTC_6x6x5_UNORM_BLOCK,
1541    ASTC_6x6x5_UNORM_SRGB_BLOCK,
1542    ASTC_6x6x6_UNORM_BLOCK,
1543    ASTC_6x6x6_UNORM_SRGB_BLOCK,
1544    ASTC_8x5_SRGB_BLOCK,
1545    ASTC_8x5_UNORM_BLOCK,
1546    ASTC_8x6_SRGB_BLOCK,
1547    ASTC_8x6_UNORM_BLOCK,
1548    ASTC_8x8_SRGB_BLOCK,
1549    ASTC_8x8_UNORM_BLOCK,
1550    B10G10R10A2_UNORM,
1551    B4G4R4A4_UNORM,
1552    B5G5R5A1_UNORM,
1553    B5G6R5_UNORM,
1554    B8G8R8A8_TYPELESS,
1555    B8G8R8A8_TYPELESS_SRGB,
1556    B8G8R8A8_UNORM,
1557    B8G8R8A8_UNORM_SRGB,
1558    B8G8R8X8_UNORM,
1559    BC1_RGBA_UNORM_BLOCK,
1560    BC1_RGBA_UNORM_SRGB_BLOCK,
1561    BC1_RGB_UNORM_BLOCK,
1562    BC1_RGB_UNORM_SRGB_BLOCK,
1563    BC2_RGBA_UNORM_BLOCK,
1564    BC2_RGBA_UNORM_SRGB_BLOCK,
1565    BC3_RGBA_UNORM_BLOCK,
1566    BC3_RGBA_UNORM_SRGB_BLOCK,
1567    BC4_RED_SNORM_BLOCK,
1568    BC4_RED_UNORM_BLOCK,
1569    BC5_RG_SNORM_BLOCK,
1570    BC5_RG_UNORM_BLOCK,
1571    BC6H_RGB_SFLOAT_BLOCK,
1572    BC6H_RGB_UFLOAT_BLOCK,
1573    BC7_RGBA_UNORM_BLOCK,
1574    BC7_RGBA_UNORM_SRGB_BLOCK,
1575    EAC_R11G11_SNORM_BLOCK,
1576    EAC_R11G11_UNORM_BLOCK,
1577    EAC_R11_SNORM_BLOCK,
1578    EAC_R11_UNORM_BLOCK,
1579    ETC1_LOSSY_DECODE_R8G8B8_UNORM_BLOCK,
1580    ETC1_R8G8B8_UNORM_BLOCK,
1581    ETC2_R8G8B8A1_SRGB_BLOCK,
1582    ETC2_R8G8B8A1_UNORM_BLOCK,
1583    ETC2_R8G8B8A8_SRGB_BLOCK,
1584    ETC2_R8G8B8A8_UNORM_BLOCK,
1585    ETC2_R8G8B8_SRGB_BLOCK,
1586    ETC2_R8G8B8_UNORM_BLOCK,
1587    G8_B8R8_2PLANE_420_UNORM,
1588    G8_B8_R8_3PLANE_420_UNORM,
1589    L16A16_FLOAT,
1590    L16_FLOAT,
1591    L32A32_FLOAT,
1592    L32_FLOAT,
1593    L8A8_UNORM,
1594    L8_UNORM,
1595    PVRTC1_RGBA_2BPP_UNORM_BLOCK,
1596    PVRTC1_RGBA_2BPP_UNORM_SRGB_BLOCK,
1597    PVRTC1_RGBA_4BPP_UNORM_BLOCK,
1598    PVRTC1_RGBA_4BPP_UNORM_SRGB_BLOCK,
1599    PVRTC1_RGB_2BPP_UNORM_BLOCK,
1600    PVRTC1_RGB_2BPP_UNORM_SRGB_BLOCK,
1601    PVRTC1_RGB_4BPP_UNORM_BLOCK,
1602    PVRTC1_RGB_4BPP_UNORM_SRGB_BLOCK,
1603    R10G10B10A2_SINT,
1604    R10G10B10A2_SNORM,
1605    R10G10B10A2_SSCALED,
1606    R10G10B10A2_UINT,
1607    R10G10B10A2_UNORM,
1608    R10G10B10A2_USCALED,
1609    R10G10B10X2_UNORM,
1610    R11G11B10_FLOAT,
1611    R16G16B16A16_FLOAT,
1612    R16G16B16A16_SINT,
1613    R16G16B16A16_SNORM,
1614    R16G16B16A16_SSCALED,
1615    R16G16B16A16_UINT,
1616    R16G16B16A16_UNORM,
1617    R16G16B16A16_USCALED,
1618    R16G16B16_FLOAT,
1619    R16G16B16_SINT,
1620    R16G16B16_SNORM,
1621    R16G16B16_SSCALED,
1622    R16G16B16_UINT,
1623    R16G16B16_UNORM,
1624    R16G16B16_USCALED,
1625    R16G16_FLOAT,
1626    R16G16_SINT,
1627    R16G16_SNORM,
1628    R16G16_SSCALED,
1629    R16G16_UINT,
1630    R16G16_UNORM,
1631    R16G16_USCALED,
1632    R16_FLOAT,
1633    R16_SINT,
1634    R16_SNORM,
1635    R16_SSCALED,
1636    R16_UINT,
1637    R16_UNORM,
1638    R16_USCALED,
1639    R32G32B32A32_FIXED,
1640    R32G32B32A32_FLOAT,
1641    R32G32B32A32_SINT,
1642    R32G32B32A32_SNORM,
1643    R32G32B32A32_SSCALED,
1644    R32G32B32A32_UINT,
1645    R32G32B32A32_UNORM,
1646    R32G32B32A32_USCALED,
1647    R32G32B32_FIXED,
1648    R32G32B32_FLOAT,
1649    R32G32B32_SINT,
1650    R32G32B32_SNORM,
1651    R32G32B32_SSCALED,
1652    R32G32B32_UINT,
1653    R32G32B32_UNORM,
1654    R32G32B32_USCALED,
1655    R32G32_FIXED,
1656    R32G32_FLOAT,
1657    R32G32_SINT,
1658    R32G32_SNORM,
1659    R32G32_SSCALED,
1660    R32G32_UINT,
1661    R32G32_UNORM,
1662    R32G32_USCALED,
1663    R32_FIXED,
1664    R32_FLOAT,
1665    R32_SINT,
1666    R32_SNORM,
1667    R32_SSCALED,
1668    R32_UINT,
1669    R32_UNORM,
1670    R32_USCALED,
1671    R4G4B4A4_UNORM,
1672    R5G5B5A1_UNORM,
1673    R5G6B5_UNORM,
1674    R8G8B8A8_SINT,
1675    R8G8B8A8_SNORM,
1676    R8G8B8A8_SSCALED,
1677    R8G8B8A8_TYPELESS,
1678    R8G8B8A8_TYPELESS_SRGB,
1679    R8G8B8A8_UINT,
1680    R8G8B8A8_UNORM,
1681    R8G8B8A8_UNORM_SRGB,
1682    R8G8B8A8_USCALED,
1683    R8G8B8X8_UNORM,
1684    R8G8B8_SINT,
1685    R8G8B8_SNORM,
1686    R8G8B8_SSCALED,
1687    R8G8B8_UINT,
1688    R8G8B8_UNORM,
1689    R8G8B8_UNORM_SRGB,
1690    R8G8B8_USCALED,
1691    R8G8_SINT,
1692    R8G8_SNORM,
1693    R8G8_SSCALED,
1694    R8G8_UINT,
1695    R8G8_UNORM,
1696    R8G8_UNORM_SRGB,
1697    R8G8_USCALED,
1698    R8_SINT,
1699    R8_SNORM,
1700    R8_SSCALED,
1701    R8_UINT,
1702    R8_UNORM,
1703    R8_UNORM_SRGB,
1704    R8_USCALED,
1705    R9G9B9E5_SHAREDEXP,
1706    X2R10G10B10_SINT_VERTEX,
1707    X2R10G10B10_SNORM_VERTEX,
1708    X2R10G10B10_SSCALED_VERTEX,
1709    X2R10G10B10_UINT_VERTEX,
1710    X2R10G10B10_UNORM_VERTEX,
1711    X2R10G10B10_USCALED_VERTEX
1712};
1713
1714}
1715
1716}
1717}
1718# 16 "./copy_buffer.metal" 2
1719
1720using namespace rx::mtl_shader;
1721
1722constant int kCopyFormatType [[function_constant(10)]];
1723
1724
1725constant int kCopyTextureType [[function_constant(20)]];
1726constant bool kCopyTextureType2D = kCopyTextureType == kTextureType2D;
1727constant bool kCopyTextureType2DArray = kCopyTextureType == kTextureType2DArray;
1728constant bool kCopyTextureType2DMS = kCopyTextureType == kTextureType2DMultisample;
1729constant bool kCopyTextureTypeCube = kCopyTextureType == kTextureTypeCube;
1730constant bool kCopyTextureType3D = kCopyTextureType == kTextureType3D;
1731
1732struct CopyPixelParams
1733{
1734    uint3 copySize;
1735    uint3 textureOffset;
1736
1737    uint bufferStartOffset;
1738    uint pixelSize;
1739    uint bufferRowPitch;
1740    uint bufferDepthPitch;
1741};
1742
1743struct WritePixelParams
1744{
1745    uint2 copySize;
1746    uint2 textureOffset;
1747
1748    uint bufferStartOffset;
1749
1750    uint pixelSize;
1751    uint bufferRowPitch;
1752
1753    uint textureLevel;
1754    uint textureLayer;
1755
1756    bool reverseTextureRowOrder;
1757};
1758# 120 "./copy_buffer.metal"
1759template <typename T>
1760static inline void textureWrite(ushort3 gIndices,
1761                                constant CopyPixelParams &options,
1762                                vec<T, 4> color,
1763                                texture2d<T, access::write> dstTexture2d [[texture(0), function_constant(kCopyTextureType2D)]], texture2d_array<T, access::write> dstTexture2dArray [[texture(0), function_constant(kCopyTextureType2DArray)]], texture3d<T, access::write> dstTexture3d [[texture(0), function_constant(kCopyTextureType3D)]], texturecube<T, access::write> dstTextureCube [[texture(0), function_constant(kCopyTextureTypeCube)]])
1764{
1765    uint3 writeIndices = options.textureOffset + uint3(gIndices);
1766    switch (kCopyTextureType)
1767    {
1768        case kTextureType2D:
1769            dstTexture2d.write(color, writeIndices.xy);
1770            break;
1771        case kTextureType2DArray:
1772            dstTexture2dArray.write(color, writeIndices.xy, writeIndices.z);
1773            break;
1774        case kTextureType3D:
1775            dstTexture3d.write(color, writeIndices);
1776            break;
1777        case kTextureTypeCube:
1778            dstTextureCube.write(color, writeIndices.xy, writeIndices.z);
1779            break;
1780    }
1781}
1782
1783
1784template <typename T>
1785static inline vec<T, 4> textureRead(ushort2 gIndices,
1786                                    constant WritePixelParams &options,
1787                                    texture2d<T, access::read> srcTexture2d [[texture(0), function_constant(kCopyTextureType2D)]], texture2d_array<T, access::read> srcTexture2dArray [[texture(0), function_constant(kCopyTextureType2DArray)]], texture3d<T, access::read> srcTexture3d [[texture(0), function_constant(kCopyTextureType3D)]], texturecube<T, access::read> srcTextureCube [[texture(0), function_constant(kCopyTextureTypeCube)]], texture2d_ms<T, access::read> srcTexture2dMS [[texture(0), function_constant(kCopyTextureType2DMS)]])
1788{
1789    vec<T, 4> color;
1790    uint2 coords = uint2(gIndices);
1791    if (options.reverseTextureRowOrder)
1792    {
1793        coords.y = options.copySize.y - 1 - gIndices.y;
1794    }
1795    coords += options.textureOffset;
1796    switch (kCopyTextureType)
1797    {
1798        case kTextureType2D:
1799            color = srcTexture2d.read(coords.xy, options.textureLevel);
1800            break;
1801        case kTextureType2DArray:
1802            color = srcTexture2dArray.read(coords.xy, options.textureLayer, options.textureLevel);
1803            break;
1804        case kTextureType2DMultisample:
1805            color = resolveTextureMS(srcTexture2dMS, coords.xy);
1806            break;
1807        case kTextureType3D:
1808            color = srcTexture3d.read(uint3(coords, options.textureLayer), options.textureLevel);
1809            break;
1810        case kTextureTypeCube:
1811            color = srcTextureCube.read(coords.xy, options.textureLayer, options.textureLevel);
1812            break;
1813    }
1814    return color;
1815}
1816# 215 "./copy_buffer.metal"
1817static inline float4 readR5G6B5_UNORM(uint bufferOffset, constant uchar *buffer)
1818{
1819    float4 color;
1820    ushort src = bytesToShort<ushort>(buffer, bufferOffset);
1821
1822    color.r = normalizedToFloat<5>(getShiftedData<5, 11>(src));
1823    color.g = normalizedToFloat<6>(getShiftedData<6, 5>(src));
1824    color.b = normalizedToFloat<5>(getShiftedData<5, 0>(src));
1825    color.a = 1.0;
1826    return color;
1827}
1828static inline void writeR5G6B5_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
1829{
1830    ushort dst = shiftData<5, 11>(floatToNormalized<5, ushort>(color.r)) |
1831                 shiftData<6, 5>(floatToNormalized<6, ushort>(color.g)) |
1832                 shiftData<5, 0>(floatToNormalized<5, ushort>(color.b));
1833
1834    shortToBytes(dst, bufferOffset, buffer);
1835}
1836
1837
1838static inline float4 readR4G4B4A4_UNORM(uint bufferOffset, constant uchar *buffer)
1839{
1840    float4 color;
1841    ushort src = bytesToShort<ushort>(buffer, bufferOffset);
1842
1843    color.r = normalizedToFloat<4>(getShiftedData<4, 12>(src));
1844    color.g = normalizedToFloat<4>(getShiftedData<4, 8>(src));
1845    color.b = normalizedToFloat<4>(getShiftedData<4, 4>(src));
1846    color.a = normalizedToFloat<4>(getShiftedData<4, 0>(src));
1847    return color;
1848}
1849static inline void writeR4G4B4A4_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
1850{
1851    ushort dst = shiftData<4, 12>(floatToNormalized<4, ushort>(color.r)) |
1852                 shiftData<4, 8>(floatToNormalized<4, ushort>(color.g)) |
1853                 shiftData<4, 4>(floatToNormalized<4, ushort>(color.b)) |
1854                 shiftData<4, 0>(floatToNormalized<4, ushort>(color.a));
1855    ;
1856
1857    shortToBytes(dst, bufferOffset, buffer);
1858}
1859
1860
1861static inline float4 readR5G5B5A1_UNORM(uint bufferOffset, constant uchar *buffer)
1862{
1863    float4 color;
1864    ushort src = bytesToShort<ushort>(buffer, bufferOffset);
1865
1866    color.r = normalizedToFloat<5>(getShiftedData<5, 11>(src));
1867    color.g = normalizedToFloat<5>(getShiftedData<5, 6>(src));
1868    color.b = normalizedToFloat<5>(getShiftedData<5, 1>(src));
1869    color.a = normalizedToFloat<1>(getShiftedData<1, 0>(src));
1870    return color;
1871}
1872static inline void writeR5G5B5A1_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
1873{
1874    ushort dst = shiftData<5, 11>(floatToNormalized<5, ushort>(color.r)) |
1875                 shiftData<5, 6>(floatToNormalized<5, ushort>(color.g)) |
1876                 shiftData<5, 1>(floatToNormalized<5, ushort>(color.b)) |
1877                 shiftData<1, 0>(floatToNormalized<1, ushort>(color.a));
1878    ;
1879
1880    shortToBytes(dst, bufferOffset, buffer);
1881}
1882
1883
1884static inline int4 readR10G10B10A2_SINT(uint bufferOffset, constant uchar *buffer)
1885{
1886    int4 color;
1887    int src = bytesToInt<int>(buffer, bufferOffset);
1888
1889    constexpr int3 rgbSignMask(0x200);
1890    constexpr int3 negativeMask(0xfffffc00);
1891    constexpr int alphaSignMask = 0x2;
1892    constexpr int alphaNegMask = 0xfffffffc;
1893
1894    color.r = getShiftedData<10, 0>(src);
1895    color.g = getShiftedData<10, 10>(src);
1896    color.b = getShiftedData<10, 20>(src);
1897
1898    int3 isRgbNegative = (color.rgb & rgbSignMask) >> 9;
1899    color.rgb = (isRgbNegative * negativeMask) | color.rgb;
1900
1901    color.a = getShiftedData<2, 30>(src);
1902    int isAlphaNegative = color.a & alphaSignMask >> 1;
1903    color.a = (isAlphaNegative * alphaNegMask) | color.a;
1904    return color;
1905}
1906
1907static inline uint4 readR10G10B10A2_UINT(uint bufferOffset, constant uchar *buffer)
1908{
1909    uint4 color;
1910    uint src = bytesToInt<uint>(buffer, bufferOffset);
1911
1912    color.r = getShiftedData<10, 0>(src);
1913    color.g = getShiftedData<10, 10>(src);
1914    color.b = getShiftedData<10, 20>(src);
1915    color.a = getShiftedData<2, 30>(src);
1916    return color;
1917}
1918
1919
1920static inline float4 readR8G8B8A8(uint bufferOffset, constant uchar *buffer, bool isSRGB)
1921{
1922    float4 color;
1923    uint src = bytesToInt<uint>(buffer, bufferOffset);
1924
1925    if (isSRGB)
1926    {
1927        color = unpack_unorm4x8_srgb_to_float(src);
1928    }
1929    else
1930    {
1931        color = unpack_unorm4x8_to_float(src);
1932    }
1933    return color;
1934}
1935static inline void writeR8G8B8A8(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer, bool isSRGB)
1936{
1937    uint dst;
1938
1939    if (isSRGB)
1940    {
1941        dst = pack_float_to_srgb_unorm4x8(color);
1942    }
1943    else
1944    {
1945        dst = pack_float_to_unorm4x8(color);
1946    }
1947
1948    intToBytes(dst, bufferOffset, buffer);
1949}
1950
1951static inline float4 readR8G8B8(uint bufferOffset, constant uchar *buffer, bool isSRGB)
1952{
1953    float4 color;
1954    color.r = normalizedToFloat<uchar>(buffer[bufferOffset]);
1955    color.g = normalizedToFloat<uchar>(buffer[bufferOffset + 1]);
1956    color.b = normalizedToFloat<uchar>(buffer[bufferOffset + 2]);
1957    color.a = 1.0;
1958
1959    if (isSRGB)
1960    {
1961        color = sRGBtoLinear(color);
1962    }
1963    return color;
1964}
1965static inline void writeR8G8B8(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer, bool isSRGB)
1966{
1967    color.a = 1.0;
1968    uint dst;
1969
1970    if (isSRGB)
1971    {
1972        dst = pack_float_to_srgb_unorm4x8(color);
1973    }
1974    else
1975    {
1976        dst = pack_float_to_unorm4x8(color);
1977    }
1978    int24bitToBytes(dst, bufferOffset, buffer);
1979}
1980
1981
1982static inline float4 readR8G8B8A8_SNORM(uint bufferOffset, constant uchar *buffer)
1983{
1984    float4 color;
1985    uint src = bytesToInt<uint>(buffer, bufferOffset);
1986
1987    color = unpack_snorm4x8_to_float(src);
1988
1989    return color;
1990}
1991static inline void writeR8G8B8A8_SNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
1992{
1993    uint dst = pack_float_to_snorm4x8(color);
1994
1995    intToBytes(dst, bufferOffset, buffer);
1996}
1997
1998
1999static inline float4 readR8G8B8_SNORM(uint bufferOffset, constant uchar *buffer)
2000{
2001    float4 color;
2002    color.r = normalizedToFloat<7, char>(buffer[bufferOffset]);
2003    color.g = normalizedToFloat<7, char>(buffer[bufferOffset + 1]);
2004    color.b = normalizedToFloat<7, char>(buffer[bufferOffset + 2]);
2005    color.a = 1.0;
2006
2007    return color;
2008}
2009static inline void writeR8G8B8_SNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2010{
2011    uint dst = pack_float_to_snorm4x8(color);
2012
2013    int24bitToBytes(dst, bufferOffset, buffer);
2014}
2015
2016
2017static inline float4 readR8G8B8A8_UNORM(uint bufferOffset, constant uchar *buffer)
2018{
2019    return readR8G8B8A8(bufferOffset, buffer, false);
2020}
2021static inline void writeR8G8B8A8_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2022{
2023    return writeR8G8B8A8(gIndices, options, bufferOffset, color, buffer, false);
2024}
2025
2026static inline float4 readR8G8B8A8_UNORM_SRGB(uint bufferOffset, constant uchar *buffer)
2027{
2028    return readR8G8B8A8(bufferOffset, buffer, true);
2029}
2030static inline void writeR8G8B8A8_UNORM_SRGB(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2031{
2032    return writeR8G8B8A8(gIndices, options, bufferOffset, color, buffer, true);
2033}
2034
2035
2036static inline float4 readB8G8R8A8_UNORM(uint bufferOffset, constant uchar *buffer)
2037{
2038    return readR8G8B8A8(bufferOffset, buffer, false).bgra;
2039}
2040static inline void writeB8G8R8A8_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2041{
2042    color.rgba = color.bgra;
2043    return writeR8G8B8A8(gIndices, options, bufferOffset, color, buffer, false);
2044}
2045
2046static inline float4 readB8G8R8A8_UNORM_SRGB(uint bufferOffset, constant uchar *buffer)
2047{
2048    return readR8G8B8A8(bufferOffset, buffer, true).bgra;
2049}
2050static inline void writeB8G8R8A8_UNORM_SRGB(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2051{
2052    color.rgba = color.bgra;
2053    return writeR8G8B8A8(gIndices, options, bufferOffset, color, buffer, true);
2054}
2055
2056
2057static inline float4 readR8G8B8_UNORM(uint bufferOffset, constant uchar *buffer)
2058{
2059    return readR8G8B8(bufferOffset, buffer, false);
2060}
2061static inline void writeR8G8B8_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2062{
2063    return writeR8G8B8(gIndices, options, bufferOffset, color, buffer, false);
2064}
2065
2066static inline float4 readR8G8B8_UNORM_SRGB(uint bufferOffset, constant uchar *buffer)
2067{
2068    return readR8G8B8(bufferOffset, buffer, true);
2069}
2070static inline void writeR8G8B8_UNORM_SRGB(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2071{
2072    return writeR8G8B8(gIndices, options, bufferOffset, color, buffer, true);
2073}
2074
2075
2076static inline float4 readL8_UNORM(uint bufferOffset, constant uchar *buffer)
2077{
2078    float4 color;
2079    color.rgb = float3(normalizedToFloat<uchar>(buffer[bufferOffset]));
2080    color.a = 1.0;
2081    return color;
2082}
2083static inline void writeL8_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2084{
2085    buffer[bufferOffset] = floatToNormalized<uchar>(color.r);
2086}
2087
2088
2089static inline void writeA8_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2090{
2091    buffer[bufferOffset] = floatToNormalized<uchar>(color.a);
2092}
2093
2094
2095static inline float4 readL8A8_UNORM(uint bufferOffset, constant uchar *buffer)
2096{
2097    float4 color;
2098    color.rgb = float3(normalizedToFloat<uchar>(buffer[bufferOffset]));
2099    color.a = normalizedToFloat<uchar>(buffer[bufferOffset + 1]);
2100    return color;
2101}
2102static inline void writeL8A8_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2103{
2104    buffer[bufferOffset] = floatToNormalized<uchar>(color.r);
2105    buffer[bufferOffset + 1] = floatToNormalized<uchar>(color.a);
2106}
2107
2108
2109static inline float4 readR8_UNORM(uint bufferOffset, constant uchar *buffer)
2110{
2111    float4 color;
2112    color.r = normalizedToFloat<uchar>(buffer[bufferOffset]);
2113    color.g = color.b = 0.0;
2114    color.a = 1.0;
2115    return color;
2116}
2117static inline void writeR8_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2118{
2119    buffer[bufferOffset] = floatToNormalized<uchar>(color.r);
2120}
2121
2122static inline float4 readR8_SNORM(uint bufferOffset, constant uchar *buffer)
2123{
2124    float4 color;
2125    color.r = normalizedToFloat<7, char>(buffer[bufferOffset]);
2126    color.g = color.b = 0.0;
2127    color.a = 1.0;
2128    return color;
2129}
2130static inline void writeR8_SNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2131{
2132    buffer[bufferOffset] = as_type<uchar>(floatToNormalized<7, char>(color.r));
2133}
2134
2135
2136static inline int4 readR8_SINT(uint bufferOffset, constant uchar *buffer)
2137{
2138    int4 color;
2139    color.r = as_type<char>(buffer[bufferOffset]);
2140    color.g = color.b = 0;
2141    color.a = 1;
2142    return color;
2143}
2144static inline void writeR8_SINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<int, 4> color, device uchar *buffer)
2145{
2146    buffer[bufferOffset] = static_cast<uchar>(color.r);
2147}
2148
2149
2150static inline uint4 readR8_UINT(uint bufferOffset, constant uchar *buffer)
2151{
2152    uint4 color;
2153    color.r = as_type<uchar>(buffer[bufferOffset]);
2154    color.g = color.b = 0;
2155    color.a = 1;
2156    return color;
2157}
2158static inline void writeR8_UINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<uint, 4> color, device uchar *buffer)
2159{
2160    buffer[bufferOffset] = static_cast<uchar>(color.r);
2161}
2162
2163
2164static inline float4 readR8G8_UNORM(uint bufferOffset, constant uchar *buffer)
2165{
2166    float4 color;
2167    color.r = normalizedToFloat<uchar>(buffer[bufferOffset]);
2168    color.g = normalizedToFloat<uchar>(buffer[bufferOffset + 1]);
2169    color.b = 0.0;
2170    color.a = 1.0;
2171    return color;
2172}
2173static inline void writeR8G8_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2174{
2175    buffer[bufferOffset] = floatToNormalized<uchar>(color.r);
2176    buffer[bufferOffset + 1] = floatToNormalized<uchar>(color.g);
2177}
2178
2179static inline float4 readR8G8_SNORM(uint bufferOffset, constant uchar *buffer)
2180{
2181    float4 color;
2182    color.r = normalizedToFloat<7, char>(buffer[bufferOffset]);
2183    color.g = normalizedToFloat<7, char>(buffer[bufferOffset + 1]);
2184    color.b = 0.0;
2185    color.a = 1.0;
2186    return color;
2187}
2188static inline void writeR8G8_SNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2189{
2190    buffer[bufferOffset] = as_type<uchar>(floatToNormalized<7, char>(color.r));
2191    buffer[bufferOffset + 1] = as_type<uchar>(floatToNormalized<7, char>(color.g));
2192}
2193
2194
2195static inline int4 readR8G8_SINT(uint bufferOffset, constant uchar *buffer)
2196{
2197    int4 color;
2198    color.r = as_type<char>(buffer[bufferOffset]);
2199    color.g = as_type<char>(buffer[bufferOffset + 1]);
2200    color.b = 0;
2201    color.a = 1;
2202    return color;
2203}
2204static inline void writeR8G8_SINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<int, 4> color, device uchar *buffer)
2205{
2206    buffer[bufferOffset] = static_cast<uchar>(color.r);
2207    buffer[bufferOffset + 1] = static_cast<uchar>(color.g);
2208}
2209
2210
2211static inline uint4 readR8G8_UINT(uint bufferOffset, constant uchar *buffer)
2212{
2213    uint4 color;
2214    color.r = as_type<uchar>(buffer[bufferOffset]);
2215    color.g = as_type<uchar>(buffer[bufferOffset + 1]);
2216    color.b = 0;
2217    color.a = 1;
2218    return color;
2219}
2220static inline void writeR8G8_UINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<uint, 4> color, device uchar *buffer)
2221{
2222    buffer[bufferOffset] = static_cast<uchar>(color.r);
2223    buffer[bufferOffset + 1] = static_cast<uchar>(color.g);
2224}
2225
2226
2227static inline int4 readR8G8B8_SINT(uint bufferOffset, constant uchar *buffer)
2228{
2229    int4 color;
2230    color.r = as_type<char>(buffer[bufferOffset]);
2231    color.g = as_type<char>(buffer[bufferOffset + 1]);
2232    color.b = as_type<char>(buffer[bufferOffset + 2]);
2233    color.a = 1;
2234    return color;
2235}
2236
2237
2238static inline uint4 readR8G8B8_UINT(uint bufferOffset, constant uchar *buffer)
2239{
2240    uint4 color;
2241    color.r = as_type<uchar>(buffer[bufferOffset]);
2242    color.g = as_type<uchar>(buffer[bufferOffset + 1]);
2243    color.b = as_type<uchar>(buffer[bufferOffset + 2]);
2244    color.a = 1;
2245    return color;
2246}
2247
2248
2249static inline int4 readR8G8B8A8_SINT(uint bufferOffset, constant uchar *buffer)
2250{
2251    int4 color;
2252    color.r = as_type<char>(buffer[bufferOffset]);
2253    color.g = as_type<char>(buffer[bufferOffset + 1]);
2254    color.b = as_type<char>(buffer[bufferOffset + 2]);
2255    color.a = as_type<char>(buffer[bufferOffset + 3]);
2256    return color;
2257}
2258static inline void writeR8G8B8A8_SINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<int, 4> color, device uchar *buffer)
2259{
2260    buffer[bufferOffset] = static_cast<uchar>(color.r);
2261    buffer[bufferOffset + 1] = static_cast<uchar>(color.g);
2262    buffer[bufferOffset + 2] = static_cast<uchar>(color.b);
2263    buffer[bufferOffset + 3] = static_cast<uchar>(color.a);
2264}
2265
2266
2267static inline uint4 readR8G8B8A8_UINT(uint bufferOffset, constant uchar *buffer)
2268{
2269    uint4 color;
2270    color.r = as_type<uchar>(buffer[bufferOffset]);
2271    color.g = as_type<uchar>(buffer[bufferOffset + 1]);
2272    color.b = as_type<uchar>(buffer[bufferOffset + 2]);
2273    color.a = as_type<uchar>(buffer[bufferOffset + 3]);
2274    return color;
2275}
2276static inline void writeR8G8B8A8_UINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<uint, 4> color, device uchar *buffer)
2277{
2278    buffer[bufferOffset] = static_cast<uchar>(color.r);
2279    buffer[bufferOffset + 1] = static_cast<uchar>(color.g);
2280    buffer[bufferOffset + 2] = static_cast<uchar>(color.b);
2281    buffer[bufferOffset + 3] = static_cast<uchar>(color.a);
2282}
2283
2284
2285static inline float4 readR16_FLOAT(uint bufferOffset, constant uchar *buffer)
2286{
2287    float4 color;
2288    color.r = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset));
2289    color.g = color.b = 0.0;
2290    color.a = 1.0;
2291    return color;
2292}
2293static inline void writeR16_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2294{
2295    shortToBytes(as_type<ushort>(static_cast<half>(color.r)), bufferOffset, buffer);
2296}
2297
2298template <typename ShortType>
2299static inline float4 readR16_NORM(uint bufferOffset, constant uchar *buffer)
2300{
2301    float4 color;
2302    color.r = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset));
2303    color.g = color.b = 0.0;
2304    color.a = 1.0;
2305    return color;
2306}
2307
2308
2309
2310
2311static inline int4 readR16_SINT(uint bufferOffset, constant uchar *buffer)
2312{
2313    int4 color;
2314    color.r = bytesToShort<short>(buffer, bufferOffset);
2315    color.g = color.b = 0;
2316    color.a = 1;
2317    return color;
2318}
2319static inline void writeR16_SINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<int, 4> color, device uchar *buffer)
2320{
2321    shortToBytes(static_cast<short>(color.r), bufferOffset, buffer);
2322}
2323
2324
2325static inline uint4 readR16_UINT(uint bufferOffset, constant uchar *buffer)
2326{
2327    uint4 color;
2328    color.r = bytesToShort<ushort>(buffer, bufferOffset);
2329    color.g = color.b = 0;
2330    color.a = 1;
2331    return color;
2332}
2333static inline void writeR16_UINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<uint, 4> color, device uchar *buffer)
2334{
2335    shortToBytes(static_cast<ushort>(color.r), bufferOffset, buffer);
2336}
2337
2338
2339static inline float4 readA16_FLOAT(uint bufferOffset, constant uchar *buffer)
2340{
2341    float4 color;
2342    color.a = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset));
2343    color.rgb = 0.0;
2344    return color;
2345}
2346static inline void writeA16_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2347{
2348    shortToBytes(as_type<ushort>(static_cast<half>(color.a)), bufferOffset, buffer);
2349}
2350
2351
2352static inline float4 readL16_FLOAT(uint bufferOffset, constant uchar *buffer)
2353{
2354    float4 color;
2355    color.rgb = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset));
2356    color.a = 1.0;
2357    return color;
2358}
2359static inline void writeL16_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2360{
2361    shortToBytes(as_type<ushort>(static_cast<half>(color.r)), bufferOffset, buffer);
2362}
2363
2364
2365static inline float4 readL16A16_FLOAT(uint bufferOffset, constant uchar *buffer)
2366{
2367    float4 color;
2368    color.rgb = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset));
2369    color.a = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset + 2));
2370    return color;
2371}
2372static inline void writeL16A16_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2373{
2374    shortToBytes(as_type<ushort>(static_cast<half>(color.r)), bufferOffset, buffer);
2375    shortToBytes(as_type<ushort>(static_cast<half>(color.a)), bufferOffset + 2, buffer);
2376}
2377
2378
2379static inline float4 readR16G16_FLOAT(uint bufferOffset, constant uchar *buffer)
2380{
2381    float4 color;
2382    color.r = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset));
2383    color.g = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset + 2));
2384    color.b = 0.0;
2385    color.a = 1.0;
2386    return color;
2387}
2388static inline void writeR16G16_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2389{
2390    shortToBytes(as_type<ushort>(static_cast<half>(color.r)), bufferOffset, buffer);
2391    shortToBytes(as_type<ushort>(static_cast<half>(color.g)), bufferOffset + 2, buffer);
2392}
2393
2394
2395template <typename ShortType>
2396static inline float4 readR16G16_NORM(uint bufferOffset, constant uchar *buffer)
2397{
2398    float4 color;
2399    color.r = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset));
2400    color.g = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset + 2));
2401    color.b = 0.0;
2402    color.a = 1.0;
2403    return color;
2404}
2405
2406
2407
2408
2409static inline int4 readR16G16_SINT(uint bufferOffset, constant uchar *buffer)
2410{
2411    int4 color;
2412    color.r = bytesToShort<short>(buffer, bufferOffset);
2413    color.g = bytesToShort<short>(buffer, bufferOffset + 2);
2414    color.b = 0;
2415    color.a = 1;
2416    return color;
2417}
2418static inline void writeR16G16_SINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<int, 4> color, device uchar *buffer)
2419{
2420    shortToBytes(static_cast<short>(color.r), bufferOffset, buffer);
2421    shortToBytes(static_cast<short>(color.g), bufferOffset + 2, buffer);
2422}
2423
2424
2425static inline uint4 readR16G16_UINT(uint bufferOffset, constant uchar *buffer)
2426{
2427    uint4 color;
2428    color.r = bytesToShort<ushort>(buffer, bufferOffset);
2429    color.g = bytesToShort<ushort>(buffer, bufferOffset + 2);
2430    color.b = 0;
2431    color.a = 1;
2432    return color;
2433}
2434static inline void writeR16G16_UINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<uint, 4> color, device uchar *buffer)
2435{
2436    shortToBytes(static_cast<ushort>(color.r), bufferOffset, buffer);
2437    shortToBytes(static_cast<ushort>(color.g), bufferOffset + 2, buffer);
2438}
2439
2440
2441static inline float4 readR16G16B16_FLOAT(uint bufferOffset, constant uchar *buffer)
2442{
2443    float4 color;
2444    color.r = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset));
2445    color.g = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset + 2));
2446    color.b = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset + 4));
2447    color.a = 1.0;
2448    return color;
2449}
2450
2451
2452template <typename ShortType>
2453static inline float4 readR16G16B16_NORM(uint bufferOffset, constant uchar *buffer)
2454{
2455    float4 color;
2456    color.r = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset));
2457    color.g = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset + 2));
2458    color.b = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset + 4));
2459    color.a = 1.0;
2460    return color;
2461}
2462
2463
2464
2465static inline int4 readR16G16B16_SINT(uint bufferOffset, constant uchar *buffer)
2466{
2467    int4 color;
2468    color.r = bytesToShort<short>(buffer, bufferOffset);
2469    color.g = bytesToShort<short>(buffer, bufferOffset + 2);
2470    color.b = bytesToShort<short>(buffer, bufferOffset + 4);
2471    color.a = 1;
2472    return color;
2473}
2474
2475
2476static inline uint4 readR16G16B16_UINT(uint bufferOffset, constant uchar *buffer)
2477{
2478    uint4 color;
2479    color.r = bytesToShort<ushort>(buffer, bufferOffset);
2480    color.g = bytesToShort<ushort>(buffer, bufferOffset + 2);
2481    color.b = bytesToShort<ushort>(buffer, bufferOffset + 4);
2482    color.a = 1;
2483    return color;
2484}
2485
2486
2487static inline float4 readR16G16B16A16_FLOAT(uint bufferOffset, constant uchar *buffer)
2488{
2489    float4 color;
2490    color.r = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset));
2491    color.g = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset + 2));
2492    color.b = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset + 4));
2493    color.a = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset + 6));
2494    return color;
2495}
2496static inline void writeR16G16B16A16_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2497{
2498    shortToBytes(as_type<ushort>(static_cast<half>(color.r)), bufferOffset, buffer);
2499    shortToBytes(as_type<ushort>(static_cast<half>(color.g)), bufferOffset + 2, buffer);
2500    shortToBytes(as_type<ushort>(static_cast<half>(color.b)), bufferOffset + 4, buffer);
2501    shortToBytes(as_type<ushort>(static_cast<half>(color.a)), bufferOffset + 6, buffer);
2502}
2503
2504
2505template <typename ShortType>
2506static inline float4 readR16G16B16A16_NORM(uint bufferOffset, constant uchar *buffer)
2507{
2508    float4 color;
2509    color.r = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset));
2510    color.g = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset + 2));
2511    color.b = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset + 4));
2512    color.a = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset + 6));
2513    return color;
2514}
2515
2516
2517
2518
2519static inline int4 readR16G16B16A16_SINT(uint bufferOffset, constant uchar *buffer)
2520{
2521    int4 color;
2522    color.r = bytesToShort<short>(buffer, bufferOffset);
2523    color.g = bytesToShort<short>(buffer, bufferOffset + 2);
2524    color.b = bytesToShort<short>(buffer, bufferOffset + 4);
2525    color.a = bytesToShort<short>(buffer, bufferOffset + 6);
2526    return color;
2527}
2528static inline void writeR16G16B16A16_SINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<int, 4> color, device uchar *buffer)
2529{
2530    shortToBytes(static_cast<short>(color.r), bufferOffset, buffer);
2531    shortToBytes(static_cast<short>(color.g), bufferOffset + 2, buffer);
2532    shortToBytes(static_cast<short>(color.b), bufferOffset + 4, buffer);
2533    shortToBytes(static_cast<short>(color.a), bufferOffset + 6, buffer);
2534}
2535
2536
2537static inline uint4 readR16G16B16A16_UINT(uint bufferOffset, constant uchar *buffer)
2538{
2539    uint4 color;
2540    color.r = bytesToShort<ushort>(buffer, bufferOffset);
2541    color.g = bytesToShort<ushort>(buffer, bufferOffset + 2);
2542    color.b = bytesToShort<ushort>(buffer, bufferOffset + 4);
2543    color.a = bytesToShort<ushort>(buffer, bufferOffset + 6);
2544    return color;
2545}
2546static inline void writeR16G16B16A16_UINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<uint, 4> color, device uchar *buffer)
2547{
2548    shortToBytes(static_cast<ushort>(color.r), bufferOffset, buffer);
2549    shortToBytes(static_cast<ushort>(color.g), bufferOffset + 2, buffer);
2550    shortToBytes(static_cast<ushort>(color.b), bufferOffset + 4, buffer);
2551    shortToBytes(static_cast<ushort>(color.a), bufferOffset + 6, buffer);
2552}
2553
2554
2555static inline float4 readR32_FLOAT(uint bufferOffset, constant uchar *buffer)
2556{
2557    float4 color;
2558    color.r = as_type<float>(bytesToInt<uint>(buffer, bufferOffset));
2559    color.g = color.b = 0.0;
2560    color.a = 1.0;
2561    return color;
2562}
2563static inline void writeR32_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2564{
2565    intToBytes(as_type<uint>(color.r), bufferOffset, buffer);
2566}
2567
2568
2569template <typename IntType>
2570static inline float4 readR32_NORM(uint bufferOffset, constant uchar *buffer)
2571{
2572    float4 color;
2573    color.r = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset));
2574    color.g = color.b = 0.0;
2575    color.a = 1.0;
2576    return color;
2577}
2578
2579
2580
2581
2582static inline float4 readA32_FLOAT(uint bufferOffset, constant uchar *buffer)
2583{
2584    float4 color;
2585    color.a = as_type<float>(bytesToInt<uint>(buffer, bufferOffset));
2586    color.rgb = 0.0;
2587    return color;
2588}
2589static inline void writeA32_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2590{
2591    intToBytes(as_type<uint>(color.a), bufferOffset, buffer);
2592}
2593
2594
2595static inline float4 readL32_FLOAT(uint bufferOffset, constant uchar *buffer)
2596{
2597    float4 color;
2598    color.rgb = as_type<float>(bytesToInt<uint>(buffer, bufferOffset));
2599    color.a = 1.0;
2600    return color;
2601}
2602static inline void writeL32_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2603{
2604    intToBytes(as_type<uint>(color.r), bufferOffset, buffer);
2605}
2606
2607
2608static inline int4 readR32_SINT(uint bufferOffset, constant uchar *buffer)
2609{
2610    int4 color;
2611    color.r = bytesToInt<int>(buffer, bufferOffset);
2612    color.g = color.b = 0;
2613    color.a = 1;
2614    return color;
2615}
2616static inline void writeR32_SINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<int, 4> color, device uchar *buffer)
2617{
2618    intToBytes(color.r, bufferOffset, buffer);
2619}
2620
2621
2622static inline float4 readR32_FIXED(uint bufferOffset, constant uchar *buffer)
2623{
2624    float4 color;
2625    constexpr float kDivisor = 1.0f / (1 << 16);
2626    color.r = bytesToInt<int>(buffer, bufferOffset) * kDivisor;
2627    color.g = color.b = 0.0;
2628    color.a = 1.0;
2629    return color;
2630}
2631
2632
2633static inline uint4 readR32_UINT(uint bufferOffset, constant uchar *buffer)
2634{
2635    uint4 color;
2636    color.r = bytesToInt<uint>(buffer, bufferOffset);
2637    color.g = color.b = 0;
2638    color.a = 1;
2639    return color;
2640}
2641static inline void writeR32_UINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<uint, 4> color, device uchar *buffer)
2642{
2643    intToBytes(color.r, bufferOffset, buffer);
2644}
2645
2646
2647static inline float4 readL32A32_FLOAT(uint bufferOffset, constant uchar *buffer)
2648{
2649    float4 color;
2650    color.rgb = as_type<float>(bytesToInt<uint>(buffer, bufferOffset));
2651    color.a = as_type<float>(bytesToInt<uint>(buffer, bufferOffset + 4));
2652    return color;
2653}
2654static inline void writeL32A32_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2655{
2656    intToBytes(as_type<uint>(color.r), bufferOffset, buffer);
2657    intToBytes(as_type<uint>(color.a), bufferOffset + 4, buffer);
2658}
2659
2660
2661static inline float4 readR32G32_FLOAT(uint bufferOffset, constant uchar *buffer)
2662{
2663    float4 color;
2664    color.r = as_type<float>(bytesToInt<uint>(buffer, bufferOffset));
2665    color.g = as_type<float>(bytesToInt<uint>(buffer, bufferOffset + 4));
2666    color.b = 0.0;
2667    color.a = 1.0;
2668    return color;
2669}
2670static inline void writeR32G32_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2671{
2672    intToBytes(as_type<uint>(color.r), bufferOffset, buffer);
2673    intToBytes(as_type<uint>(color.g), bufferOffset + 4, buffer);
2674}
2675
2676
2677template <typename IntType>
2678static inline float4 readR32G32_NORM(uint bufferOffset, constant uchar *buffer)
2679{
2680    float4 color;
2681    color.r = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset));
2682    color.g = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset + 4));
2683    color.b = 0.0;
2684    color.a = 1.0;
2685    return color;
2686}
2687
2688
2689
2690
2691static inline int4 readR32G32_SINT(uint bufferOffset, constant uchar *buffer)
2692{
2693    int4 color;
2694    color.r = bytesToInt<int>(buffer, bufferOffset);
2695    color.g = bytesToInt<int>(buffer, bufferOffset + 4);
2696    color.b = 0;
2697    color.a = 1;
2698    return color;
2699}
2700static inline void writeR32G32_SINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<int, 4> color, device uchar *buffer)
2701{
2702    intToBytes(color.r, bufferOffset, buffer);
2703    intToBytes(color.g, bufferOffset + 4, buffer);
2704}
2705
2706
2707static inline float4 readR32G32_FIXED(uint bufferOffset, constant uchar *buffer)
2708{
2709    float4 color;
2710    constexpr float kDivisor = 1.0f / (1 << 16);
2711    color.r = bytesToInt<int>(buffer, bufferOffset) * kDivisor;
2712    color.g = bytesToInt<int>(buffer, bufferOffset + 4) * kDivisor;
2713    color.b = 0.0;
2714    color.a = 1.0;
2715    return color;
2716}
2717
2718
2719static inline uint4 readR32G32_UINT(uint bufferOffset, constant uchar *buffer)
2720{
2721    uint4 color;
2722    color.r = bytesToInt<uint>(buffer, bufferOffset);
2723    color.g = bytesToInt<uint>(buffer, bufferOffset + 4);
2724    color.b = 0;
2725    color.a = 1;
2726    return color;
2727}
2728static inline void writeR32G32_UINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<uint, 4> color, device uchar *buffer)
2729{
2730    intToBytes(color.r, bufferOffset, buffer);
2731    intToBytes(color.g, bufferOffset + 4, buffer);
2732}
2733
2734
2735static inline float4 readR32G32B32_FLOAT(uint bufferOffset, constant uchar *buffer)
2736{
2737    float4 color;
2738    color.r = as_type<float>(bytesToInt<uint>(buffer, bufferOffset));
2739    color.g = as_type<float>(bytesToInt<uint>(buffer, bufferOffset + 4));
2740    color.b = as_type<float>(bytesToInt<uint>(buffer, bufferOffset + 8));
2741    color.a = 1.0;
2742    return color;
2743}
2744
2745
2746template <typename IntType>
2747static inline float4 readR32G32B32_NORM(uint bufferOffset, constant uchar *buffer)
2748{
2749    float4 color;
2750    color.r = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset));
2751    color.g = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset + 4));
2752    color.b = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset + 8));
2753    color.a = 1.0;
2754    return color;
2755}
2756
2757
2758
2759
2760static inline int4 readR32G32B32_SINT(uint bufferOffset, constant uchar *buffer)
2761{
2762    int4 color;
2763    color.r = bytesToInt<int>(buffer, bufferOffset);
2764    color.g = bytesToInt<int>(buffer, bufferOffset + 4);
2765    color.b = bytesToInt<int>(buffer, bufferOffset + 8);
2766    color.a = 1;
2767    return color;
2768}
2769
2770
2771static inline float4 readR32G32B32_FIXED(uint bufferOffset, constant uchar *buffer)
2772{
2773    float4 color;
2774    constexpr float kDivisor = 1.0f / (1 << 16);
2775    color.r = bytesToInt<int>(buffer, bufferOffset) * kDivisor;
2776    color.g = bytesToInt<int>(buffer, bufferOffset + 4) * kDivisor;
2777    color.b = bytesToInt<int>(buffer, bufferOffset + 8) * kDivisor;
2778    color.a = 1.0;
2779    return color;
2780}
2781
2782
2783static inline uint4 readR32G32B32_UINT(uint bufferOffset, constant uchar *buffer)
2784{
2785    uint4 color;
2786    color.r = bytesToInt<uint>(buffer, bufferOffset);
2787    color.g = bytesToInt<uint>(buffer, bufferOffset + 4);
2788    color.b = bytesToInt<uint>(buffer, bufferOffset + 8);
2789    color.a = 1;
2790    return color;
2791}
2792
2793
2794static inline float4 readR32G32B32A32_FLOAT(uint bufferOffset, constant uchar *buffer)
2795{
2796    float4 color;
2797    color.r = as_type<float>(bytesToInt<uint>(buffer, bufferOffset));
2798    color.g = as_type<float>(bytesToInt<uint>(buffer, bufferOffset + 4));
2799    color.b = as_type<float>(bytesToInt<uint>(buffer, bufferOffset + 8));
2800    color.a = as_type<float>(bytesToInt<uint>(buffer, bufferOffset + 12));
2801    return color;
2802}
2803static inline void writeR32G32B32A32_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2804{
2805    intToBytes(as_type<uint>(color.r), bufferOffset, buffer);
2806    intToBytes(as_type<uint>(color.g), bufferOffset + 4, buffer);
2807    intToBytes(as_type<uint>(color.b), bufferOffset + 8, buffer);
2808    intToBytes(as_type<uint>(color.a), bufferOffset + 12, buffer);
2809}
2810
2811
2812template <typename IntType>
2813static inline float4 readR32G32B32A32_NORM(uint bufferOffset, constant uchar *buffer)
2814{
2815    float4 color;
2816    color.r = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset));
2817    color.g = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset + 4));
2818    color.b = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset + 8));
2819    color.a = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset + 12));
2820    return color;
2821}
2822
2823
2824
2825
2826static inline int4 readR32G32B32A32_SINT(uint bufferOffset, constant uchar *buffer)
2827{
2828    int4 color;
2829    color.r = bytesToInt<int>(buffer, bufferOffset);
2830    color.g = bytesToInt<int>(buffer, bufferOffset + 4);
2831    color.b = bytesToInt<int>(buffer, bufferOffset + 8);
2832    color.a = bytesToInt<int>(buffer, bufferOffset + 12);
2833    return color;
2834}
2835static inline void writeR32G32B32A32_SINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<int, 4> color, device uchar *buffer)
2836{
2837    intToBytes(color.r, bufferOffset, buffer);
2838    intToBytes(color.g, bufferOffset + 4, buffer);
2839    intToBytes(color.b, bufferOffset + 8, buffer);
2840    intToBytes(color.a, bufferOffset + 12, buffer);
2841}
2842
2843static inline float4 readR32G32B32A32_FIXED(uint bufferOffset, constant uchar *buffer)
2844{
2845    float4 color;
2846    constexpr float kDivisor = 1.0f / (1 << 16);
2847    color.r = bytesToInt<int>(buffer, bufferOffset) * kDivisor;
2848    color.g = bytesToInt<int>(buffer, bufferOffset + 4) * kDivisor;
2849    color.b = bytesToInt<int>(buffer, bufferOffset + 8) * kDivisor;
2850    color.a = bytesToInt<int>(buffer, bufferOffset + 12) * kDivisor;
2851    return color;
2852}
2853
2854
2855static inline uint4 readR32G32B32A32_UINT(uint bufferOffset, constant uchar *buffer)
2856{
2857    uint4 color;
2858    color.r = bytesToInt<uint>(buffer, bufferOffset);
2859    color.g = bytesToInt<uint>(buffer, bufferOffset + 4);
2860    color.b = bytesToInt<uint>(buffer, bufferOffset + 8);
2861    color.a = bytesToInt<uint>(buffer, bufferOffset + 12);
2862    return color;
2863}
2864static inline void writeR32G32B32A32_UINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<uint, 4> color, device uchar *buffer)
2865{
2866    intToBytes(color.r, bufferOffset, buffer);
2867    intToBytes(color.g, bufferOffset + 4, buffer);
2868    intToBytes(color.b, bufferOffset + 8, buffer);
2869    intToBytes(color.a, bufferOffset + 12, buffer);
2870}
2871# 1292 "./copy_buffer.metal"
2872static inline int4 readR8_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR8_SINT(bufferOffset, buffer); } static inline uint4 readR8_USCALED(uint bufferOffset, constant uchar *buffer) { return readR8_UINT(bufferOffset, buffer); } static inline int4 readR8G8_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR8G8_SINT(bufferOffset, buffer); } static inline uint4 readR8G8_USCALED(uint bufferOffset, constant uchar *buffer) { return readR8G8_UINT(bufferOffset, buffer); } static inline int4 readR8G8B8_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR8G8B8_SINT(bufferOffset, buffer); } static inline uint4 readR8G8B8_USCALED(uint bufferOffset, constant uchar *buffer) { return readR8G8B8_UINT(bufferOffset, buffer); } static inline int4 readR8G8B8A8_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR8G8B8A8_SINT(bufferOffset, buffer); } static inline uint4 readR8G8B8A8_USCALED(uint bufferOffset, constant uchar *buffer) { return readR8G8B8A8_UINT(bufferOffset, buffer); }
2873static inline int4 readR16_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR16_SINT(bufferOffset, buffer); } static inline uint4 readR16_USCALED(uint bufferOffset, constant uchar *buffer) { return readR16_UINT(bufferOffset, buffer); } static inline int4 readR16G16_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR16G16_SINT(bufferOffset, buffer); } static inline uint4 readR16G16_USCALED(uint bufferOffset, constant uchar *buffer) { return readR16G16_UINT(bufferOffset, buffer); } static inline int4 readR16G16B16_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR16G16B16_SINT(bufferOffset, buffer); } static inline uint4 readR16G16B16_USCALED(uint bufferOffset, constant uchar *buffer) { return readR16G16B16_UINT(bufferOffset, buffer); } static inline int4 readR16G16B16A16_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR16G16B16A16_SINT(bufferOffset, buffer); } static inline uint4 readR16G16B16A16_USCALED(uint bufferOffset, constant uchar *buffer) { return readR16G16B16A16_UINT(bufferOffset, buffer); }
2874static inline int4 readR32_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR32_SINT(bufferOffset, buffer); } static inline uint4 readR32_USCALED(uint bufferOffset, constant uchar *buffer) { return readR32_UINT(bufferOffset, buffer); } static inline int4 readR32G32_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR32G32_SINT(bufferOffset, buffer); } static inline uint4 readR32G32_USCALED(uint bufferOffset, constant uchar *buffer) { return readR32G32_UINT(bufferOffset, buffer); } static inline int4 readR32G32B32_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR32G32B32_SINT(bufferOffset, buffer); } static inline uint4 readR32G32B32_USCALED(uint bufferOffset, constant uchar *buffer) { return readR32G32B32_UINT(bufferOffset, buffer); } static inline int4 readR32G32B32A32_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR32G32B32A32_SINT(bufferOffset, buffer); } static inline uint4 readR32G32B32A32_USCALED(uint bufferOffset, constant uchar *buffer) { return readR32G32B32A32_UINT(bufferOffset, buffer); }
2875
2876static inline int4 readR10G10B10A2_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR10G10B10A2_SINT(bufferOffset, buffer); } static inline uint4 readR10G10B10A2_USCALED(uint bufferOffset, constant uchar *buffer) { return readR10G10B10A2_UINT(bufferOffset, buffer); }
2877
2878
2879kernel void readFromBufferToFloatTexture(ushort3 gIndices [[thread_position_in_grid]], constant CopyPixelParams &options[[buffer(0)]], constant uchar *buffer [[buffer(1)]], texture2d<float, access::write> dstTexture2d [[texture(0), function_constant(kCopyTextureType2D)]], texture2d_array<float, access::write> dstTexture2dArray [[texture(0), function_constant(kCopyTextureType2DArray)]], texture3d<float, access::write> dstTexture3d [[texture(0), function_constant(kCopyTextureType3D)]], texturecube<float, access::write> dstTextureCube [[texture(0), function_constant(kCopyTextureTypeCube)]])
2880{
2881    if (gIndices.x >= options.copySize.x || gIndices.y >= options.copySize.y || gIndices.z >= options.copySize.z) { return; }
2882# 1336 "./copy_buffer.metal"
2883    uint bufferOffset = options.bufferStartOffset + (gIndices.z * options.bufferDepthPitch + gIndices.y * options.bufferRowPitch + gIndices.x * options.pixelSize);
2884
2885    switch (kCopyFormatType)
2886    {
2887        case FormatID::R5G6B5_UNORM: { auto color = readR5G6B5_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8A8_UNORM: { auto color = readR8G8B8A8_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8A8_UNORM_SRGB: { auto color = readR8G8B8A8_UNORM_SRGB(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8A8_SNORM: { auto color = readR8G8B8A8_SNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::B8G8R8A8_UNORM: { auto color = readB8G8R8A8_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::B8G8R8A8_UNORM_SRGB: { auto color = readB8G8R8A8_UNORM_SRGB(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8_UNORM: { auto color = readR8G8B8_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8_UNORM_SRGB: { auto color = readR8G8B8_UNORM_SRGB(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8_SNORM: { auto color = readR8G8B8_SNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::L8_UNORM: { auto color = readL8_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::L8A8_UNORM: { auto color = readL8A8_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R5G5B5A1_UNORM: { auto color = readR5G5B5A1_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R4G4B4A4_UNORM: { auto color = readR4G4B4A4_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8_UNORM: { auto color = readR8_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8_SNORM: { auto color = readR8_SNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8_UNORM: { auto color = readR8G8_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8_SNORM: { auto color = readR8G8_SNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16_FLOAT: { auto color = readR16_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::A16_FLOAT: { auto color = readA16_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::L16_FLOAT: { auto color = readL16_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::L16A16_FLOAT: { auto color = readL16A16_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16_FLOAT: { auto color = readR16G16_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16B16_FLOAT: { auto color = readR16G16B16_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16B16A16_FLOAT: { auto color = readR16G16B16A16_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32_FLOAT: { auto color = readR32_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::A32_FLOAT: { auto color = readA32_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::L32_FLOAT: { auto color = readL32_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::L32A32_FLOAT: { auto color = readL32A32_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32G32_FLOAT: { auto color = readR32G32_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32G32B32_FLOAT: { auto color = readR32G32B32_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32G32B32A32_FLOAT: { auto color = readR32G32B32A32_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break;
2888    }
2889
2890
2891}
2892
2893kernel void readFromBufferToIntTexture(ushort3 gIndices [[thread_position_in_grid]], constant CopyPixelParams &options[[buffer(0)]], constant uchar *buffer [[buffer(1)]], texture2d<int, access::write> dstTexture2d [[texture(0), function_constant(kCopyTextureType2D)]], texture2d_array<int, access::write> dstTexture2dArray [[texture(0), function_constant(kCopyTextureType2DArray)]], texture3d<int, access::write> dstTexture3d [[texture(0), function_constant(kCopyTextureType3D)]], texturecube<int, access::write> dstTextureCube [[texture(0), function_constant(kCopyTextureTypeCube)]])
2894{
2895    if (gIndices.x >= options.copySize.x || gIndices.y >= options.copySize.y || gIndices.z >= options.copySize.z) { return; }
2896# 1364 "./copy_buffer.metal"
2897    uint bufferOffset = options.bufferStartOffset + (gIndices.z * options.bufferDepthPitch + gIndices.y * options.bufferRowPitch + gIndices.x * options.pixelSize);
2898
2899    switch (kCopyFormatType)
2900    {
2901        case FormatID::R8_SINT: { auto color = readR8_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8_SINT: { auto color = readR8G8_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8_SINT: { auto color = readR8G8B8_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8A8_SINT: { auto color = readR8G8B8A8_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16_SINT: { auto color = readR16_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16_SINT: { auto color = readR16G16_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16B16_SINT: { auto color = readR16G16B16_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16B16A16_SINT: { auto color = readR16G16B16A16_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32_SINT: { auto color = readR32_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32G32_SINT: { auto color = readR32G32_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32G32B32_SINT: { auto color = readR32G32B32_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32G32B32A32_SINT: { auto color = readR32G32B32A32_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break;
2902    }
2903
2904
2905}
2906
2907kernel void readFromBufferToUIntTexture(ushort3 gIndices [[thread_position_in_grid]], constant CopyPixelParams &options[[buffer(0)]], constant uchar *buffer [[buffer(1)]], texture2d<uint, access::write> dstTexture2d [[texture(0), function_constant(kCopyTextureType2D)]], texture2d_array<uint, access::write> dstTexture2dArray [[texture(0), function_constant(kCopyTextureType2DArray)]], texture3d<uint, access::write> dstTexture3d [[texture(0), function_constant(kCopyTextureType3D)]], texturecube<uint, access::write> dstTextureCube [[texture(0), function_constant(kCopyTextureTypeCube)]])
2908{
2909    if (gIndices.x >= options.copySize.x || gIndices.y >= options.copySize.y || gIndices.z >= options.copySize.z) { return; }
2910# 1392 "./copy_buffer.metal"
2911    uint bufferOffset = options.bufferStartOffset + (gIndices.z * options.bufferDepthPitch + gIndices.y * options.bufferRowPitch + gIndices.x * options.pixelSize);
2912
2913    switch (kCopyFormatType)
2914    {
2915        case FormatID::R8_UINT: { auto color = readR8_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8_UINT: { auto color = readR8G8_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8_UINT: { auto color = readR8G8B8_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8A8_UINT: { auto color = readR8G8B8A8_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16_UINT: { auto color = readR16_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16_UINT: { auto color = readR16G16_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16B16_UINT: { auto color = readR16G16B16_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16B16A16_UINT: { auto color = readR16G16B16A16_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32_UINT: { auto color = readR32_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32G32_UINT: { auto color = readR32G32_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32G32B32_UINT: { auto color = readR32G32B32_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32G32B32A32_UINT: { auto color = readR32G32B32A32_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break;
2916    }
2917
2918
2919}
2920
2921
2922kernel void writeFromFloatTextureToBuffer(ushort2 gIndices [[thread_position_in_grid]], constant WritePixelParams &options[[buffer(0)]], texture2d<float, access::read> srcTexture2d [[texture(0), function_constant(kCopyTextureType2D)]], texture2d_array<float, access::read> srcTexture2dArray [[texture(0), function_constant(kCopyTextureType2DArray)]], texture3d<float, access::read> srcTexture3d [[texture(0), function_constant(kCopyTextureType3D)]], texturecube<float, access::read> srcTextureCube [[texture(0), function_constant(kCopyTextureTypeCube)]], texture2d_ms<float, access::read> srcTexture2dMS [[texture(0), function_constant(kCopyTextureType2DMS)]], device uchar *buffer [[buffer(1)]])
2923{
2924    if (gIndices.x >= options.copySize.x || gIndices.y >= options.copySize.y) { return; }
2925# 1439 "./copy_buffer.metal"
2926    uint bufferOffset = options.bufferStartOffset + (gIndices.y * options.bufferRowPitch + gIndices.x * options.pixelSize);
2927
2928    switch (kCopyFormatType)
2929    {
2930        case FormatID::R5G6B5_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR5G6B5_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8B8A8_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8B8A8_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8B8A8_UNORM_SRGB: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8B8A8_UNORM_SRGB(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8B8A8_SNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8B8A8_SNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::B8G8R8A8_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeB8G8R8A8_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::B8G8R8A8_UNORM_SRGB: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeB8G8R8A8_UNORM_SRGB(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8B8_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8B8_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8B8_UNORM_SRGB: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8B8_UNORM_SRGB(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8B8_SNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8B8_SNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::L8_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeL8_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::A8_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeA8_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::L8A8_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeL8A8_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R5G5B5A1_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR5G5B5A1_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R4G4B4A4_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR4G4B4A4_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8_SNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8_SNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8_SNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8_SNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::A16_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeA16_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::L16_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeL16_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::L16A16_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeL16A16_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16G16_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16G16_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16G16B16A16_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16G16B16A16_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R32_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR32_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::A32_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeA32_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::L32_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeL32_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::L32A32_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeL32A32_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R32G32_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR32G32_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R32G32B32A32_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR32G32B32A32_FLOAT(gIndices, options, bufferOffset, color, buffer); } break;
2931    }
2932
2933
2934}
2935
2936kernel void writeFromIntTextureToBuffer(ushort2 gIndices [[thread_position_in_grid]], constant WritePixelParams &options[[buffer(0)]], texture2d<int, access::read> srcTexture2d [[texture(0), function_constant(kCopyTextureType2D)]], texture2d_array<int, access::read> srcTexture2dArray [[texture(0), function_constant(kCopyTextureType2DArray)]], texture3d<int, access::read> srcTexture3d [[texture(0), function_constant(kCopyTextureType3D)]], texturecube<int, access::read> srcTextureCube [[texture(0), function_constant(kCopyTextureTypeCube)]], texture2d_ms<int, access::read> srcTexture2dMS [[texture(0), function_constant(kCopyTextureType2DMS)]], device uchar *buffer [[buffer(1)]])
2937{
2938    if (gIndices.x >= options.copySize.x || gIndices.y >= options.copySize.y) { return; }
2939# 1464 "./copy_buffer.metal"
2940    uint bufferOffset = options.bufferStartOffset + (gIndices.y * options.bufferRowPitch + gIndices.x * options.pixelSize);
2941
2942    switch (kCopyFormatType)
2943    {
2944        case FormatID::R8_SINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8_SINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8_SINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8_SINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8B8A8_SINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8B8A8_SINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16_SINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16_SINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16G16_SINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16G16_SINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16G16B16A16_SINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16G16B16A16_SINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R32_SINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR32_SINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R32G32_SINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR32G32_SINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R32G32B32A32_SINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR32G32B32A32_SINT(gIndices, options, bufferOffset, color, buffer); } break;
2945    }
2946
2947
2948}
2949
2950kernel void writeFromUIntTextureToBuffer(ushort2 gIndices [[thread_position_in_grid]], constant WritePixelParams &options[[buffer(0)]], texture2d<uint, access::read> srcTexture2d [[texture(0), function_constant(kCopyTextureType2D)]], texture2d_array<uint, access::read> srcTexture2dArray [[texture(0), function_constant(kCopyTextureType2DArray)]], texture3d<uint, access::read> srcTexture3d [[texture(0), function_constant(kCopyTextureType3D)]], texturecube<uint, access::read> srcTextureCube [[texture(0), function_constant(kCopyTextureTypeCube)]], texture2d_ms<uint, access::read> srcTexture2dMS [[texture(0), function_constant(kCopyTextureType2DMS)]], device uchar *buffer [[buffer(1)]])
2951{
2952    if (gIndices.x >= options.copySize.x || gIndices.y >= options.copySize.y) { return; }
2953# 1489 "./copy_buffer.metal"
2954    uint bufferOffset = options.bufferStartOffset + (gIndices.y * options.bufferRowPitch + gIndices.x * options.pixelSize);
2955
2956    switch (kCopyFormatType)
2957    {
2958        case FormatID::R8_UINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8_UINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8_UINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8_UINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8B8A8_UINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8B8A8_UINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16_UINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16_UINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16G16_UINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16G16_UINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16G16B16A16_UINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16G16B16A16_UINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R32_UINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR32_UINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R32G32_UINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR32G32_UINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R32G32B32A32_UINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR32G32B32A32_UINT(gIndices, options, bufferOffset, color, buffer); } break;
2959    }
2960
2961
2962}
2963
2964
2965struct CopyVertexParams
2966{
2967    uint srcBufferStartOffset;
2968    uint srcStride;
2969    uint srcComponentBytes;
2970    uint srcComponents;
2971
2972
2973
2974    uchar4 srcDefaultAlphaData;
2975
2976    uint dstBufferStartOffset;
2977    uint dstStride;
2978    uint dstComponents;
2979
2980    uint vertexCount;
2981};
2982# 1539 "./copy_buffer.metal"
2983template <typename IntType>
2984static inline void writeFloatVertex(constant CopyVertexParams &options,
2985                                    uint idx,
2986                                    vec<IntType, 4> data,
2987                                    device uchar *dst)
2988{
2989    uint dstOffset = idx * options.dstStride + options.dstBufferStartOffset;
2990
2991    for (uint component = 0; component < options.dstComponents; ++component, dstOffset += 4)
2992    {
2993        floatToBytes(static_cast<float>(data[component]), dstOffset, dst);
2994    }
2995}
2996
2997template <>
2998inline void writeFloatVertex(constant CopyVertexParams &options,
2999                             uint idx,
3000                             vec<float, 4> data,
3001                             device uchar *dst)
3002{
3003    uint dstOffset = idx * options.dstStride + options.dstBufferStartOffset;
3004
3005    for (uint component = 0; component < options.dstComponents; ++component, dstOffset += 4)
3006    {
3007        floatToBytes(data[component], dstOffset, dst);
3008    }
3009}
3010
3011
3012static inline void convertToFloatVertexFormat(uint index,
3013                                              constant CopyVertexParams &options,
3014                                              constant uchar *srcBuffer,
3015                                              device uchar *dstBuffer)
3016{
3017# 1585 "./copy_buffer.metal"
3018    uint bufferOffset = options.srcBufferStartOffset + options.srcStride * index;
3019# 1594 "./copy_buffer.metal"
3020    switch (kCopyFormatType)
3021    {
3022        case FormatID::R8_UNORM: { auto data = readR8_UNORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8_SNORM: { auto data = readR8_SNORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8_UINT: { auto data = readR8_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8_SINT: { auto data = readR8_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8_USCALED: { auto data = readR8_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8_SSCALED: { auto data = readR8_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8_UNORM: { auto data = readR8G8_UNORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8_SNORM: { auto data = readR8G8_SNORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8_UINT: { auto data = readR8G8_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8_SINT: { auto data = readR8G8_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8_USCALED: { auto data = readR8G8_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8_SSCALED: { auto data = readR8G8_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8_UNORM: { auto data = readR8G8B8_UNORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8_SNORM: { auto data = readR8G8B8_SNORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8_UINT: { auto data = readR8G8B8_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8_SINT: { auto data = readR8G8B8_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8_USCALED: { auto data = readR8G8B8_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8_SSCALED: { auto data = readR8G8B8_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8A8_UNORM: { auto data = readR8G8B8A8_UNORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8A8_SNORM: { auto data = readR8G8B8A8_SNORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8A8_UINT: { auto data = readR8G8B8A8_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8A8_SINT: { auto data = readR8G8B8A8_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8A8_USCALED: { auto data = readR8G8B8A8_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8A8_SSCALED: { auto data = readR8G8B8A8_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16_UNORM: { auto data = readR16_NORM<ushort>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16_SNORM: { auto data = readR16_NORM<short>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16_UINT: { auto data = readR16_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16_SINT: { auto data = readR16_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16_USCALED: { auto data = readR16_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16_SSCALED: { auto data = readR16_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16_UNORM: { auto data = readR16G16_NORM<ushort>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16_SNORM: { auto data = readR16G16_NORM<short>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16_UINT: { auto data = readR16G16_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16_SINT: { auto data = readR16G16_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16_USCALED: { auto data = readR16G16_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16_SSCALED: { auto data = readR16G16_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16_UNORM: { auto data = readR16G16B16_NORM<ushort>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16_SNORM: { auto data = readR16G16B16_NORM<short>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16_UINT: { auto data = readR16G16B16_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16_SINT: { auto data = readR16G16B16_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16_USCALED: { auto data = readR16G16B16_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16_SSCALED: { auto data = readR16G16B16_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16A16_UNORM: { auto data = readR16G16B16A16_NORM<ushort>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16A16_SNORM: { auto data = readR16G16B16A16_NORM<short>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16A16_UINT: { auto data = readR16G16B16A16_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16A16_SINT: { auto data = readR16G16B16A16_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16A16_USCALED: { auto data = readR16G16B16A16_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16A16_SSCALED: { auto data = readR16G16B16A16_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32_UNORM: { auto data = readR32_NORM<uint>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32_SNORM: { auto data = readR32_NORM<int>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32_UINT: { auto data = readR32_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32_SINT: { auto data = readR32_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32_USCALED: { auto data = readR32_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32_SSCALED: { auto data = readR32_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32_UNORM: { auto data = readR32G32_NORM<uint>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32_SNORM: { auto data = readR32G32_NORM<int>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32_UINT: { auto data = readR32G32_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32_SINT: { auto data = readR32G32_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32_USCALED: { auto data = readR32G32_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32_SSCALED: { auto data = readR32G32_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32_UNORM: { auto data = readR32G32B32_NORM<uint>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32_SNORM: { auto data = readR32G32B32_NORM<int>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32_UINT: { auto data = readR32G32B32_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32_SINT: { auto data = readR32G32B32_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32_USCALED: { auto data = readR32G32B32_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32_SSCALED: { auto data = readR32G32B32_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32A32_UNORM: { auto data = readR32G32B32A32_NORM<uint>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32A32_SNORM: { auto data = readR32G32B32A32_NORM<int>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32A32_UINT: { auto data = readR32G32B32A32_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32A32_SINT: { auto data = readR32G32B32A32_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32A32_USCALED: { auto data = readR32G32B32A32_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32A32_SSCALED: { auto data = readR32G32B32A32_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16_FLOAT: { auto data = readR16_FLOAT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16_FLOAT: { auto data = readR16G16_FLOAT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16_FLOAT: { auto data = readR16G16B16_FLOAT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16A16_FLOAT: { auto data = readR16G16B16A16_FLOAT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32_FLOAT: { auto data = readR32_FLOAT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32_FLOAT: { auto data = readR32G32_FLOAT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32_FLOAT: { auto data = readR32G32B32_FLOAT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32A32_FLOAT: { auto data = readR32G32B32A32_FLOAT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32_FIXED: { auto data = readR32_FIXED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32_FIXED: { auto data = readR32G32_FIXED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32_FIXED: { auto data = readR32G32B32_FIXED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32A32_FIXED: { auto data = readR32G32B32A32_FIXED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R10G10B10A2_SINT: { auto data = readR10G10B10A2_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R10G10B10A2_UINT: { auto data = readR10G10B10A2_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R10G10B10A2_SSCALED: { auto data = readR10G10B10A2_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R10G10B10A2_USCALED: { auto data = readR10G10B10A2_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break;
3023    }
3024
3025
3026}
3027
3028
3029kernel void convertToFloatVertexFormatCS(uint index [[thread_position_in_grid]],
3030                                         constant CopyVertexParams &options [[buffer(0)]],
3031                                         constant uchar *srcBuffer [[buffer(1)]],
3032                                         device uchar *dstBuffer [[buffer(2)]])
3033{
3034    if (index >= options.vertexCount) { return; };
3035    convertToFloatVertexFormat(index, options, srcBuffer, dstBuffer);
3036}
3037
3038
3039vertex void convertToFloatVertexFormatVS(uint index [[vertex_id]],
3040                                         constant CopyVertexParams &options [[buffer(0)]],
3041                                         constant uchar *srcBuffer [[buffer(1)]],
3042                                         device uchar *dstBuffer [[buffer(2)]])
3043{
3044    convertToFloatVertexFormat(index, options, srcBuffer, dstBuffer);
3045}
3046
3047
3048static inline void expandVertexFormatComponents(uint index,
3049                                                constant CopyVertexParams &options,
3050                                                constant uchar *srcBuffer,
3051                                                device uchar *dstBuffer)
3052{
3053    uint srcOffset = options.srcBufferStartOffset + options.srcStride * index;
3054    uint dstOffset = options.dstBufferStartOffset + options.dstStride * index;
3055
3056    uint dstComponentsBeforeAlpha = min(options.dstComponents, 3u);
3057    uint component;
3058    for (component = 0; component < options.srcComponents; ++component,
3059        srcOffset += options.srcComponentBytes, dstOffset += options.srcComponentBytes)
3060    {
3061        for (uint byte = 0; byte < options.srcComponentBytes; ++byte)
3062        {
3063            dstBuffer[dstOffset + byte] = srcBuffer[srcOffset + byte];
3064        }
3065    }
3066
3067    for (; component < dstComponentsBeforeAlpha;
3068         ++component, dstOffset += options.srcComponentBytes)
3069    {
3070        for (uint byte = 0; byte < options.srcComponentBytes; ++byte)
3071        {
3072            dstBuffer[dstOffset + byte] = 0;
3073        }
3074    }
3075
3076    if (component < options.dstComponents)
3077    {
3078
3079        for (uint byte = 0; byte < options.srcComponentBytes; ++byte)
3080        {
3081            dstBuffer[dstOffset + byte] = options.srcDefaultAlphaData[byte];
3082        }
3083    }
3084}
3085
3086
3087kernel void expandVertexFormatComponentsCS(uint index [[thread_position_in_grid]],
3088                                           constant CopyVertexParams &options [[buffer(0)]],
3089                                           constant uchar *srcBuffer [[buffer(1)]],
3090                                           device uchar *dstBuffer [[buffer(2)]])
3091{
3092    if (index >= options.vertexCount) { return; };
3093
3094    expandVertexFormatComponents(index, options, srcBuffer, dstBuffer);
3095}
3096
3097
3098vertex void expandVertexFormatComponentsVS(uint index [[vertex_id]],
3099                                           constant CopyVertexParams &options [[buffer(0)]],
3100                                           constant uchar *srcBuffer [[buffer(1)]],
3101                                           device uchar *dstBuffer [[buffer(2)]])
3102{
3103    expandVertexFormatComponents(index, options, srcBuffer, dstBuffer);
3104}
3105# 6 "temp_master_source.metal" 2
3106# 1 "./visibility.metal" 1
3107
3108
3109
3110
3111
3112
3113
3114
3115constant bool kCombineWithExistingResult [[function_constant(1000)]];
3116
3117
3118
3119struct CombineVisibilityResultOptions
3120{
3121
3122    uint startOffset;
3123
3124    uint numOffsets;
3125};
3126
3127kernel void combineVisibilityResult(uint idx [[thread_position_in_grid]],
3128                                    constant CombineVisibilityResultOptions &options [[buffer(0)]],
3129                                    constant ushort4 *renderpassVisibilityResult [[buffer(1)]],
3130                                    device ushort4 *finalResults [[buffer(2)]])
3131{
3132    if (idx > 0)
3133    {
3134
3135
3136
3137        return;
3138    }
3139    ushort4 finalResult16x4;
3140
3141    if (kCombineWithExistingResult)
3142    {
3143        finalResult16x4 = finalResults[0];
3144    }
3145    else
3146    {
3147        finalResult16x4 = ushort4(0, 0, 0, 0);
3148    }
3149
3150    for (uint i = 0; i < options.numOffsets; ++i)
3151    {
3152        uint offset = options.startOffset + i;
3153        ushort4 renderpassResult = renderpassVisibilityResult[offset];
3154
3155
3156        finalResult16x4 = finalResult16x4 | renderpassResult;
3157    }
3158    finalResults[0] = finalResult16x4;
3159}
3160# 7 "temp_master_source.metal" 2
3161# 1 "./rewrite_indices.metal" 1
3162# 11 "./rewrite_indices.metal"
3163# 1 "./rewrite_indices_shared.h" 1
3164# 12 "./rewrite_indices.metal" 2
3165using namespace metal;
3166
3167constant uint fixIndexBufferKey [[ function_constant(2000) ]];
3168constant bool indexBufferIsUint16 = (((fixIndexBufferKey >> 0U) & 0x03U) == 2U);
3169constant bool indexBufferIsUint32 = (((fixIndexBufferKey >> 0U) & 0x03U) == 3U);
3170constant bool outIndexBufferIsUint16 = (((fixIndexBufferKey >> 2U) & 0x03U) == 2U);
3171constant bool outIndexBufferIsUint32 = (((fixIndexBufferKey >> 2U) & 0x03U) == 3U);
3172constant bool doPrimRestart = (fixIndexBufferKey & 0x00100U);
3173constant uint fixIndexBufferMode = (fixIndexBufferKey >> 4U) & 0x0FU;
3174
3175
3176static inline uint readIdx(
3177                           const device ushort *indexBufferUint16,
3178                           const device uint *indexBufferUint32,
3179                           const uint restartIndex,
3180                           const uint indexCount,
3181                           uint idx,
3182                           thread bool &foundRestart,
3183                           thread uint &indexThatRestartedFirst
3184                           )
3185{
3186    uint inIndex = idx;
3187    if(inIndex < indexCount)
3188    {
3189        if(indexBufferIsUint16)
3190        {
3191            inIndex = indexBufferUint16[inIndex];
3192        }
3193        else if(indexBufferIsUint32)
3194        {
3195            inIndex = indexBufferUint32[inIndex];
3196        }
3197    }
3198    else
3199    {
3200        foundRestart = true;
3201        indexThatRestartedFirst = idx;
3202    }
3203    if(doPrimRestart && !foundRestart && inIndex == restartIndex)
3204    {
3205        foundRestart = true;
3206        indexThatRestartedFirst = idx;
3207    }
3208    return inIndex;
3209}
3210
3211static inline void outputPrimitive(
3212                                   const device ushort *indexBufferUint16,
3213                                   const device uint *indexBufferUint32,
3214                                   device ushort *outIndexBufferUint16,
3215                                   device uint *outIndexBufferUint32,
3216                                   const uint restartIndex,
3217                                   const uint indexCount,
3218                                   thread uint &baseIndex,
3219                                   uint onIndex,
3220                                   thread uint &onOutIndex
3221                                   )
3222{
3223    if(baseIndex > onIndex) return;
3224    bool foundRestart = false;
3225    uint indexThatRestartedFirst = 0;
3226# 86 "./rewrite_indices.metal"
3227    switch(fixIndexBufferMode)
3228    {
3229        case 0x00U:
3230        {
3231            auto tmpIndex = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex, foundRestart, indexThatRestartedFirst);
3232            if(foundRestart)
3233            {
3234                baseIndex = indexThatRestartedFirst + 1;
3235                return;
3236            }
3237
3238            ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex; } onOutIndex++; });
3239        }
3240        break;
3241        case 0x01U:
3242        {
3243            auto tmpIndex0 = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex + 0, foundRestart, indexThatRestartedFirst);
3244            auto tmpIndex1 = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex + 1, foundRestart, indexThatRestartedFirst);
3245            if(foundRestart)
3246            {
3247                baseIndex = indexThatRestartedFirst + 1;
3248                return;
3249            }
3250            if((onIndex - baseIndex) & 1) return;
3251
3252            if(fixIndexBufferKey & 0x00200U)
3253            {
3254                ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1; } onOutIndex++; });
3255                ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0; } onOutIndex++; });
3256            }
3257            else
3258            {
3259                ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0; } onOutIndex++; });
3260                ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1; } onOutIndex++; });
3261            }
3262        }
3263        break;
3264        case 0x03U:
3265        {
3266            auto tmpIndex0 = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex + 0, foundRestart, indexThatRestartedFirst);
3267            auto tmpIndex1 = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex + 1, foundRestart, indexThatRestartedFirst);
3268            if(foundRestart)
3269            {
3270                baseIndex = indexThatRestartedFirst + 1;
3271                return;
3272            }
3273
3274            if(fixIndexBufferKey & 0x00200U)
3275            {
3276                ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1; } onOutIndex++; });
3277                ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0; } onOutIndex++; });
3278            }
3279            else
3280            {
3281                ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0; } onOutIndex++; });
3282                ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1; } onOutIndex++; });
3283            }
3284        }
3285        break;
3286        case 0x04U:
3287        {
3288            auto tmpIndex0 = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex + 0, foundRestart, indexThatRestartedFirst);
3289            auto tmpIndex1 = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex + 1, foundRestart, indexThatRestartedFirst);
3290            auto tmpIndex2 = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex + 2, foundRestart, indexThatRestartedFirst);
3291            if(foundRestart)
3292            {
3293                baseIndex = indexThatRestartedFirst + 1;
3294                return;
3295            }
3296            if(((onIndex - baseIndex) % 3) != 0) return;
3297
3298            if(fixIndexBufferKey & 0x00200U)
3299            {
3300                ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2; } onOutIndex++; });
3301                ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0; } onOutIndex++; });
3302                ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1; } onOutIndex++; });
3303            }
3304            else
3305            {
3306                ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0; } onOutIndex++; });
3307                ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1; } onOutIndex++; });
3308                ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2; } onOutIndex++; });
3309            }
3310        }
3311        break;
3312        case 0x05U:
3313        {
3314            uint isOdd = ((onIndex - baseIndex) & 1);
3315            auto tmpIndex0 = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex + 0 + isOdd, foundRestart, indexThatRestartedFirst);
3316            auto tmpIndex1 = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex + 1 - isOdd, foundRestart, indexThatRestartedFirst);
3317            auto tmpIndex2 = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex + 2, foundRestart, indexThatRestartedFirst);
3318            if(foundRestart)
3319            {
3320                baseIndex = indexThatRestartedFirst + 1;
3321                return;
3322            }
3323
3324            if(fixIndexBufferKey & 0x00200U)
3325            {
3326                ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2; } onOutIndex++; });
3327                ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0; } onOutIndex++; });
3328                ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1; } onOutIndex++; });
3329            }
3330            else
3331            {
3332
3333                if(isOdd)
3334                {
3335                    ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1; } onOutIndex++; });
3336                    ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2; } onOutIndex++; });
3337                    ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0; } onOutIndex++; });
3338                }
3339                else
3340                {
3341                    ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0; } onOutIndex++; });
3342                    ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1; } onOutIndex++; });
3343                    ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2; } onOutIndex++; });
3344                }
3345            }
3346
3347            assert(onOutIndex <= (onIndex + 1) * 3);
3348            assert(onOutIndex <= (indexCount - 2) * 3);
3349        }
3350        break;
3351
3352    }
3353
3354
3355}
3356
3357kernel void fixIndexBuffer(
3358                           const device ushort *indexBufferUint16 [[ buffer(0), function_constant(indexBufferIsUint16) ]],
3359                           const device uint *indexBufferUint32 [[ buffer(0), function_constant(indexBufferIsUint32) ]],
3360                           device ushort *outIndexBufferUint16 [[ buffer(1), function_constant(outIndexBufferIsUint16) ]],
3361                           device uint *outIndexBufferUint32 [[ buffer(1), function_constant(outIndexBufferIsUint32) ]],
3362                           constant uint &indexCount [[ buffer(2) ]],
3363                           constant uint &primCount [[ buffer(3) ]],
3364                           uint prim [[thread_position_in_grid]])
3365{
3366    constexpr uint restartIndex = 0xFFFFFFFF;
3367    uint baseIndex = 0;
3368    uint onIndex = onIndex;
3369    uint onOutIndex = onOutIndex;
3370    if(prim < primCount)
3371    {
3372        switch(fixIndexBufferMode)
3373        {
3374            case 0x00U:
3375                onIndex = prim;
3376                onOutIndex = prim;
3377                break;
3378            case 0x01U:
3379                onIndex = prim * 2;
3380                onOutIndex = prim * 2;
3381                break;
3382            case 0x03U:
3383                onIndex = prim;
3384                onOutIndex = prim * 2;
3385                break;
3386            case 0x04U:
3387                onIndex = prim * 3;
3388                onOutIndex = prim * 3;
3389                break;
3390            case 0x05U:
3391                onIndex = prim;
3392                onOutIndex = prim * 3;
3393                break;
3394        }
3395        outputPrimitive(indexBufferUint16, indexBufferUint32, outIndexBufferUint16, outIndexBufferUint32, restartIndex, indexCount, baseIndex, onIndex, onOutIndex);
3396    }
3397}
3398
3399
3400
3401static inline void generatePrimitive(
3402                                   device ushort *outIndexBufferUint16,
3403                                   device uint *outIndexBufferUint32,
3404                                   const uint firstVertex,
3405                                   const uint indexCount,
3406                                   thread uint &baseIndex,
3407                                   uint onIndex,
3408                                   uint primCount,
3409                                   thread uint &onOutIndex
3410                                   )
3411{
3412    if(baseIndex > onIndex) return;
3413# 284 "./rewrite_indices.metal"
3414    switch(fixIndexBufferMode)
3415    {
3416        case 0x00U:
3417        {
3418            ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = onIndex + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = onIndex + firstVertex; } onOutIndex++; });
3419        }
3420        break;
3421        case 0x01U:
3422        {
3423            auto tmpIndex0 = onIndex + 0;
3424            auto tmpIndex1 = onIndex + 1;
3425            if(fixIndexBufferKey & 0x00200U)
3426            {
3427                ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; });
3428                ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; });
3429            }
3430            else
3431            {
3432                ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; });
3433                ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; });
3434            }
3435        }
3436        break;
3437        case 0x02U:
3438        {
3439            auto tmpIndex0 = onIndex + 0;
3440            auto tmpIndex1 = (onIndex + 1) % primCount;
3441            if(fixIndexBufferKey & 0x00200U)
3442            {
3443                ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; });
3444                ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; });
3445            }
3446            else
3447            {
3448                ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; });
3449                ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; });
3450            }
3451        }
3452        break;
3453        case 0x03U:
3454        {
3455            auto tmpIndex0 = onIndex + 0;
3456            auto tmpIndex1 = onIndex + 1;
3457            if(fixIndexBufferKey & 0x00200U)
3458            {
3459                ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; });
3460                ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; });
3461            }
3462            else
3463            {
3464                ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; });
3465                ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; });
3466            }
3467        }
3468        break;
3469        case 0x04U:
3470        {
3471            auto tmpIndex0 = onIndex + 0;
3472            auto tmpIndex1 = onIndex + 1;
3473            auto tmpIndex2 = onIndex + 2;
3474            if(fixIndexBufferKey & 0x00200U)
3475            {
3476                ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2 + firstVertex; } onOutIndex++; });
3477                ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; });
3478                ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; });
3479            }
3480            else
3481            {
3482                ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; });
3483                ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; });
3484                ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2 + firstVertex; } onOutIndex++; });
3485            }
3486        }
3487        break;
3488        case 0x05U:
3489        {
3490            uint isOdd = ((onIndex - baseIndex) & 1);
3491            auto tmpIndex0 = onIndex + 0 + isOdd;
3492            auto tmpIndex1 = onIndex + 1 - isOdd;
3493            auto tmpIndex2 = onIndex + 2;
3494            if(fixIndexBufferKey & 0x00200U)
3495            {
3496                ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2 + firstVertex; } onOutIndex++; });
3497                ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; });
3498                ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; });
3499            }
3500            else
3501            {
3502                if(isOdd)
3503                {
3504                    ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; });
3505                    ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2 + firstVertex; } onOutIndex++; });
3506                    ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; });
3507                }
3508                else
3509                {
3510                    ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; });
3511                    ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; });
3512                    ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2 + firstVertex; } onOutIndex++; });
3513                }
3514            }
3515
3516            assert(onOutIndex <= (onIndex + 1) * 3);
3517            assert(onOutIndex <= (indexCount - 2) * 3);
3518            break;
3519        }
3520        case 0x06U:
3521        {
3522            auto tmpIndex0 = 0;
3523            auto tmpIndex1 = onIndex + 1;
3524            auto tmpIndex2 = onIndex + 2;
3525
3526            if(fixIndexBufferKey & 0x00200U)
3527            {
3528                ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2 + firstVertex; } onOutIndex++; });
3529                ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; });
3530                ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; });
3531            }
3532            else
3533            {
3534                ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; });
3535                ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2 + firstVertex; } onOutIndex++; });
3536                ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; });
3537            }
3538        }
3539        break;
3540
3541    }
3542
3543}
3544
3545
3546
3547kernel void genIndexBuffer(
3548                           device ushort *outIndexBufferUint16 [[ buffer(1), function_constant(outIndexBufferIsUint16) ]],
3549                           device uint *outIndexBufferUint32 [[ buffer(1), function_constant(outIndexBufferIsUint32) ]],
3550                           constant uint &indexCount [[ buffer(2) ]],
3551                           constant uint &primCount [[ buffer(3) ]],
3552                           constant uint &firstVertex [[ buffer(4) ]],
3553                           uint prim [[thread_position_in_grid]])
3554{
3555    uint baseIndex = 0;
3556    uint onIndex = onIndex;
3557    uint onOutIndex = onOutIndex;
3558    if(prim < primCount)
3559    {
3560        switch(fixIndexBufferMode)
3561        {
3562            case 0x00U:
3563                onIndex = prim;
3564                onOutIndex = prim;
3565                break;
3566            case 0x01U:
3567                onIndex = prim * 2;
3568                onOutIndex = prim * 2;
3569                break;
3570            case 0x03U:
3571                onIndex = prim;
3572                onOutIndex = prim * 2;
3573                break;
3574            case 0x02U:
3575                onIndex = prim;
3576                onOutIndex = prim * 2;
3577                break;
3578            case 0x04U:
3579                onIndex = prim * 3;
3580                onOutIndex = prim * 3;
3581                break;
3582            case 0x05U:
3583                onIndex = prim;
3584                onOutIndex = prim * 3;
3585                break;
3586            case 0x06U:
3587                onIndex = prim;
3588                onOutIndex = prim * 3;
3589                break;
3590        }
3591        generatePrimitive(outIndexBufferUint16, outIndexBufferUint32, firstVertex, indexCount, baseIndex, onIndex, primCount, onOutIndex);
3592    }
3593}
3594# 8 "temp_master_source.metal" 2
3595
3596
3597)";
3598