1// GENERATED FILE - DO NOT EDIT. 2// Generated by gen_mtl_internal_shaders.py 3// 4// Copyright 2020 The ANGLE Project Authors. All rights reserved. 5// Use of this source code is governed by a BSD-style license that can be 6// found in the LICENSE file. 7// 8 9// Combined Metal default shaders. 10 11# include <simd/simd.h> 12# include <metal_stdlib> 13namespace rx 14{ 15namespace mtl_shader 16{ 17enum 18{ 19 kTextureType2D = 0, 20 kTextureType2DMultisample = 1, 21 kTextureType2DArray = 2, 22 kTextureTypeCube = 3, 23 kTextureType3D = 4, 24 kTextureTypeCount = 5, 25}; 26} 27} 28using namespace metal; 29constant uint32_t kNumColorOutputs [[function_constant(0)]]; 30constant bool kColorOutputAvailable0 = kNumColorOutputs > 0; 31constant bool kColorOutputAvailable1 = kNumColorOutputs > 1; 32constant bool kColorOutputAvailable2 = kNumColorOutputs > 2; 33constant bool kColorOutputAvailable3 = kNumColorOutputs > 3; 34constant bool kColorOutputAvailable4 = kNumColorOutputs > 4; 35constant bool kColorOutputAvailable5 = kNumColorOutputs > 5; 36constant bool kColorOutputAvailable6 = kNumColorOutputs > 6; 37constant bool kColorOutputAvailable7 = kNumColorOutputs > 7; 38namespace rx 39{ 40namespace mtl_shader 41{ 42constant float2 gCorners[3] = {float2(-1.0f, -1.0f), float2(3.0f, -1.0f), float2(-1.0f, 3.0f)}; 43template <typename T> 44struct MultipleColorOutputs 45{ 46 vec<T, 4> color0 [[color(0), function_constant(kColorOutputAvailable0)]]; 47 vec<T, 4> color1 [[color(1), function_constant(kColorOutputAvailable1)]]; 48 vec<T, 4> color2 [[color(2), function_constant(kColorOutputAvailable2)]]; 49 vec<T, 4> color3 [[color(3), function_constant(kColorOutputAvailable3)]]; 50 vec<T, 4> color4 [[color(4), function_constant(kColorOutputAvailable4)]]; 51 vec<T, 4> color5 [[color(5), function_constant(kColorOutputAvailable5)]]; 52 vec<T, 4> color6 [[color(6), function_constant(kColorOutputAvailable6)]]; 53 vec<T, 4> color7 [[color(7), function_constant(kColorOutputAvailable7)]]; 54}; 55template <typename T> 56static inline MultipleColorOutputs<T> toMultipleColorOutputs(vec<T, 4> color) 57{ 58 MultipleColorOutputs<T> re; 59 do { if (kColorOutputAvailable0) { re.color0 = color; } } while (0); 60 do { if (kColorOutputAvailable1) { re.color1 = color; } } while (0); 61 do { if (kColorOutputAvailable2) { re.color2 = color; } } while (0); 62 do { if (kColorOutputAvailable3) { re.color3 = color; } } while (0); 63 do { if (kColorOutputAvailable4) { re.color4 = color; } } while (0); 64 do { if (kColorOutputAvailable5) { re.color5 = color; } } while (0); 65 do { if (kColorOutputAvailable6) { re.color6 = color; } } while (0); 66 do { if (kColorOutputAvailable7) { re.color7 = color; } } while (0); 67 return re; 68} 69static inline float3 cubeTexcoords(float2 texcoords, int face) 70{ 71 texcoords = 2.0 * texcoords - 1.0; 72 switch (face) 73 { 74 case 0: 75 return float3(1.0, -texcoords.y, -texcoords.x); 76 case 1: 77 return float3(-1.0, -texcoords.y, texcoords.x); 78 case 2: 79 return float3(texcoords.x, 1.0, texcoords.y); 80 case 3: 81 return float3(texcoords.x, -1.0, -texcoords.y); 82 case 4: 83 return float3(texcoords.x, -texcoords.y, 1.0); 84 case 5: 85 return float3(-texcoords.x, -texcoords.y, -1.0); 86 } 87 return float3(texcoords, 0); 88} 89template <typename T> 90static inline vec<T, 4> resolveTextureMS(texture2d_ms<T> srcTexture, uint2 coords) 91{ 92 uint samples = srcTexture.get_num_samples(); 93 vec<T, 4> output(0); 94 for (uint sample = 0; sample < samples; ++sample) 95 { 96 output += srcTexture.read(coords, sample); 97 } 98 output = output / samples; 99 return output; 100} 101static inline float4 sRGBtoLinear(float4 color) 102{ 103 float3 linear1 = color.rgb / 12.92; 104 float3 linear2 = powr((color.rgb + float3(0.055)) / 1.055, 2.4); 105 float3 factor = float3(color.rgb <= float3(0.04045)); 106 float4 linear = float4(factor * linear1 + float3(1.0 - factor) * linear2, color.a); 107 return linear; 108} 109static inline float linearToSRGB(float color) 110{ 111 if (color <= 0.0f) 112 return 0.0f; 113 if (color < 0.0031308f) 114 return 12.92f * color; 115 if (color < 1.0f) 116 return 1.055f * powr(color, 0.41666f) - 0.055f; 117 return 1.0f; 118} 119static inline float4 linearToSRGB(float4 color) 120{ 121 return float4(linearToSRGB(color.r), linearToSRGB(color.g), linearToSRGB(color.b), color.a); 122} 123template <typename Short> 124static inline Short bytesToShort(constant uchar *input, uint offset) 125{ 126 Short inputLo = input[offset]; 127 Short inputHi = input[offset + 1]; 128 return inputLo | (inputHi << 8); 129} 130template <typename Int> 131static inline Int bytesToInt(constant uchar *input, uint offset) 132{ 133 Int input0 = input[offset]; 134 Int input1 = input[offset + 1]; 135 Int input2 = input[offset + 2]; 136 Int input3 = input[offset + 3]; 137 return input0 | (input1 << 8) | (input2 << 16) | (input3 << 24); 138} 139template <typename Short> 140static inline void shortToBytes(Short val, uint offset, device uchar *output) 141{ 142 ushort valUnsigned = as_type<ushort>(val); 143 output[offset] = valUnsigned & 0xff; 144 output[offset + 1] = (valUnsigned >> 8) & 0xff; 145} 146template <typename Int> 147static inline void intToBytes(Int val, uint offset, device uchar *output) 148{ 149 uint valUnsigned = as_type<uint>(val); 150 output[offset] = valUnsigned & 0xff; 151 output[offset + 1] = (valUnsigned >> 8) & 0xff; 152 output[offset + 2] = (valUnsigned >> 16) & 0xff; 153 output[offset + 3] = (valUnsigned >> 24) & 0xff; 154} 155static inline void floatToBytes(float val, uint offset, device uchar *output) 156{ 157 intToBytes(as_type<uint>(val), offset, output); 158} 159static inline void int24bitToBytes(uint val, uint offset, device uchar *output) 160{ 161 output[offset] = val & 0xff; 162 output[offset + 1] = (val >> 8) & 0xff; 163 output[offset + 2] = (val >> 16) & 0xff; 164} 165template <unsigned int inputBitCount, unsigned int inputBitStart, typename T> 166static inline T getShiftedData(T input) 167{ 168 static_assert(inputBitCount + inputBitStart <= (sizeof(T) * 8), 169 "T must have at least as many bits as inputBitCount + inputBitStart."); 170 const T mask = (1 << inputBitCount) - 1; 171 return (input >> inputBitStart) & mask; 172} 173template <unsigned int inputBitCount, unsigned int inputBitStart, typename T> 174static inline T shiftData(T input) 175{ 176 static_assert(inputBitCount + inputBitStart <= (sizeof(T) * 8), 177 "T must have at least as many bits as inputBitCount + inputBitStart."); 178 const T mask = (1 << inputBitCount) - 1; 179 return (input & mask) << inputBitStart; 180} 181template <unsigned int inputBitCount, typename T> 182static inline float normalizedToFloat(T input) 183{ 184 static_assert(inputBitCount <= (sizeof(T) * 8), 185 "T must have more bits than or same bits as inputBitCount."); 186 static_assert(inputBitCount <= 23, "Only single precision is supported"); 187 constexpr float inverseMax = 1.0f / ((1 << inputBitCount) - 1); 188 return input * inverseMax; 189} 190template <typename T> 191static inline float normalizedToFloat(T input) 192{ 193 return normalizedToFloat<sizeof(T) * 8, T>(input); 194} 195template <> 196inline float normalizedToFloat(short input) 197{ 198 constexpr float inverseMax = 1.0f / 0x7fff; 199 return static_cast<float>(input) * inverseMax; 200} 201template <> 202inline float normalizedToFloat(int input) 203{ 204 constexpr float inverseMax = 1.0f / 0x7fffffff; 205 return static_cast<float>(input) * inverseMax; 206} 207template <> 208inline float normalizedToFloat(uint input) 209{ 210 constexpr float inverseMax = 1.0f / 0xffffffff; 211 return static_cast<float>(input) * inverseMax; 212} 213template <unsigned int outputBitCount, typename T> 214static inline T floatToNormalized(float input) 215{ 216 static_assert(outputBitCount <= (sizeof(T) * 8), 217 "T must have more bits than or same bits as inputBitCount."); 218 static_assert(outputBitCount > (metal::is_unsigned<T>::value ? 0 : 1), 219 "outputBitCount must be at least 1 not counting the sign bit."); 220 constexpr unsigned int bits = 221 metal::is_unsigned<T>::value ? outputBitCount : outputBitCount - 1; 222 static_assert(bits <= 23, "Only single precision is supported"); 223 return static_cast<T>(metal::round(((1 << bits) - 1) * input)); 224} 225template <typename T> 226static inline T floatToNormalized(float input) 227{ 228 return floatToNormalized<sizeof(T) * 8, T>(input); 229} 230} 231} 232 233using namespace rx::mtl_shader; 234constant bool kPremultiplyAlpha [[function_constant(1)]]; 235constant bool kUnmultiplyAlpha [[function_constant(2)]]; 236constant bool kTransformLinearToSrgb [[function_constant(3)]]; 237constant int kSourceTextureType [[function_constant(4)]]; 238constant int kSourceTexture2Type [[function_constant(5)]]; 239constant bool kSourceTextureType2D = kSourceTextureType == kTextureType2D; 240constant bool kSourceTextureType2DArray = kSourceTextureType == kTextureType2DArray; 241constant bool kSourceTextureType2DMS = kSourceTextureType == kTextureType2DMultisample; 242constant bool kSourceTextureTypeCube = kSourceTextureType == kTextureTypeCube; 243constant bool kSourceTextureType3D = kSourceTextureType == kTextureType3D; 244constant bool kSourceTexture2Type2D = kSourceTexture2Type == kTextureType2D; 245constant bool kSourceTexture2Type2DArray = kSourceTexture2Type == kTextureType2DArray; 246constant bool kSourceTexture2Type2DMS = kSourceTexture2Type == kTextureType2DMultisample; 247constant bool kSourceTexture2TypeCube = kSourceTexture2Type == kTextureTypeCube; 248struct BlitParams 249{ 250 float4 srcTexCoords; 251 int srcLevel; 252 int srcLayer; 253 bool dstLuminance; 254 uint8_t padding[7]; 255}; 256struct BlitVSOut 257{ 258 float4 position [[position]]; 259 float2 texCoords [[center_no_perspective, user(locn1)]]; 260}; 261vertex BlitVSOut blitVS(unsigned int vid [[vertex_id]], constant BlitParams &options [[buffer(0)]]) 262{ 263 BlitVSOut output; 264 output.position.xy = select(float2(-1.0f), float2(1.0f), bool2(vid & uint2(2, 1))); 265 output.position.zw = float2(0.0, 1.0); 266 output.texCoords = select(options.srcTexCoords.xy, options.srcTexCoords.zw, bool2(vid & uint2(2, 1))); 267 return output; 268} 269template <typename SrcTexture2d> 270static uint2 getImageCoords(SrcTexture2d srcTexture, float2 texCoords) 271{ 272 uint2 dimens(srcTexture.get_width(), srcTexture.get_height()); 273 uint2 coords = uint2(texCoords * float2(dimens)); 274 return coords; 275} 276template <typename T> 277static inline vec<T, 4> blitSampleTextureMS(texture2d_ms<T> srcTexture, float2 texCoords) 278{ 279 uint2 coords = getImageCoords(srcTexture, texCoords); 280 return resolveTextureMS(srcTexture, coords); 281} 282template <typename T> 283static inline vec<T, 4> blitSampleTexture3D(texture3d<T> srcTexture, 284 sampler textureSampler, 285 float2 texCoords, 286 constant BlitParams &options) 287{ 288 uint depth = srcTexture.get_depth(options.srcLevel); 289 float zCoord = (float(options.srcLayer) + 0.5) / float(depth); 290 return srcTexture.sample(textureSampler, float3(texCoords, zCoord), level(options.srcLevel)); 291} 292template <typename T> 293static inline vec<T, 4> blitReadTexture(BlitVSOut input [[stage_in]], texture2d<T> srcTexture2d [[texture(0), function_constant(kSourceTextureType2D)]], texture2d_array<T> srcTexture2dArray [[texture(0), function_constant(kSourceTextureType2DArray)]], texture2d_ms<T> srcTexture2dMS [[texture(0), function_constant(kSourceTextureType2DMS)]], texturecube<T> srcTextureCube [[texture(0), function_constant(kSourceTextureTypeCube)]], texture3d<T> srcTexture3d [[texture(0), function_constant(kSourceTextureType3D)]], sampler textureSampler [[sampler(0)]], constant BlitParams &options [[buffer(0)]]) 294{ 295 vec<T, 4> output; 296 switch (kSourceTextureType) 297 { 298 case kTextureType2D: 299 output = srcTexture2d.sample(textureSampler, input.texCoords, level(options.srcLevel)); 300 break; 301 case kTextureType2DArray: 302 output = srcTexture2dArray.sample(textureSampler, input.texCoords, options.srcLayer, 303 level(options.srcLevel)); 304 break; 305 case kTextureType2DMultisample: 306 output = blitSampleTextureMS(srcTexture2dMS, input.texCoords); 307 break; 308 case kTextureTypeCube: 309 output = srcTextureCube.sample(textureSampler, 310 cubeTexcoords(input.texCoords, options.srcLayer), 311 level(options.srcLevel)); 312 break; 313 case kTextureType3D: 314 output = blitSampleTexture3D(srcTexture3d, textureSampler, input.texCoords, options); 315 break; 316 } 317 if (kTransformLinearToSrgb) { 318 output.x = linearToSRGB(output.x); 319 output.y = linearToSRGB(output.y); 320 output.z = linearToSRGB(output.z); 321 } 322 if (kUnmultiplyAlpha) 323 { 324 if (output.a != 0.0) 325 { 326 output.xyz /= output.a; 327 } 328 } 329 if (kPremultiplyAlpha) 330 { 331 output.xyz *= output.a; 332 } 333 if (options.dstLuminance) 334 { 335 output.g = output.b = output.r; 336 } 337 return output; 338} 339template <typename T> 340static inline MultipleColorOutputs<T> blitFS(BlitVSOut input [[stage_in]], texture2d<T> srcTexture2d [[texture(0), function_constant(kSourceTextureType2D)]], texture2d_array<T> srcTexture2dArray [[texture(0), function_constant(kSourceTextureType2DArray)]], texture2d_ms<T> srcTexture2dMS [[texture(0), function_constant(kSourceTextureType2DMS)]], texturecube<T> srcTextureCube [[texture(0), function_constant(kSourceTextureTypeCube)]], texture3d<T> srcTexture3d [[texture(0), function_constant(kSourceTextureType3D)]], sampler textureSampler [[sampler(0)]], constant BlitParams &options [[buffer(0)]]) 341{ 342 vec<T, 4> output = blitReadTexture(input, srcTexture2d, srcTexture2dArray, srcTexture2dMS, srcTextureCube, srcTexture3d, textureSampler, options); 343 return toMultipleColorOutputs(output); 344} 345fragment MultipleColorOutputs<float> blitFloatFS(BlitVSOut input [[stage_in]], texture2d<float> srcTexture2d [[texture(0), function_constant(kSourceTextureType2D)]], texture2d_array<float> srcTexture2dArray [[texture(0), function_constant(kSourceTextureType2DArray)]], texture2d_ms<float> srcTexture2dMS [[texture(0), function_constant(kSourceTextureType2DMS)]], texturecube<float> srcTextureCube [[texture(0), function_constant(kSourceTextureTypeCube)]], texture3d<float> srcTexture3d [[texture(0), function_constant(kSourceTextureType3D)]], sampler textureSampler [[sampler(0)]], constant BlitParams &options [[buffer(0)]]) 346{ 347 return blitFS(input, srcTexture2d, srcTexture2dArray, srcTexture2dMS, srcTextureCube, srcTexture3d, textureSampler, options); 348} 349fragment MultipleColorOutputs<int> blitIntFS(BlitVSOut input [[stage_in]], texture2d<int> srcTexture2d [[texture(0), function_constant(kSourceTextureType2D)]], texture2d_array<int> srcTexture2dArray [[texture(0), function_constant(kSourceTextureType2DArray)]], texture2d_ms<int> srcTexture2dMS [[texture(0), function_constant(kSourceTextureType2DMS)]], texturecube<int> srcTextureCube [[texture(0), function_constant(kSourceTextureTypeCube)]], texture3d<int> srcTexture3d [[texture(0), function_constant(kSourceTextureType3D)]], sampler textureSampler [[sampler(0)]], constant BlitParams &options [[buffer(0)]]) 350{ 351 return blitFS(input, srcTexture2d, srcTexture2dArray, srcTexture2dMS, srcTextureCube, srcTexture3d, textureSampler, options); 352} 353fragment MultipleColorOutputs<uint> blitUIntFS(BlitVSOut input [[stage_in]], texture2d<uint> srcTexture2d [[texture(0), function_constant(kSourceTextureType2D)]], texture2d_array<uint> srcTexture2dArray [[texture(0), function_constant(kSourceTextureType2DArray)]], texture2d_ms<uint> srcTexture2dMS [[texture(0), function_constant(kSourceTextureType2DMS)]], texturecube<uint> srcTextureCube [[texture(0), function_constant(kSourceTextureTypeCube)]], texture3d<uint> srcTexture3d [[texture(0), function_constant(kSourceTextureType3D)]], sampler textureSampler [[sampler(0)]], constant BlitParams &options [[buffer(0)]]) 354{ 355 return blitFS(input, srcTexture2d, srcTexture2dArray, srcTexture2dMS, srcTextureCube, srcTexture3d, textureSampler, options); 356} 357fragment MultipleColorOutputs<uint> copyTextureFloatToUIntFS(BlitVSOut input [[stage_in]], texture2d<float> srcTexture2d [[texture(0), function_constant(kSourceTextureType2D)]], texture2d_array<float> srcTexture2dArray [[texture(0), function_constant(kSourceTextureType2DArray)]], texture2d_ms<float> srcTexture2dMS [[texture(0), function_constant(kSourceTextureType2DMS)]], texturecube<float> srcTextureCube [[texture(0), function_constant(kSourceTextureTypeCube)]], texture3d<float> srcTexture3d [[texture(0), function_constant(kSourceTextureType3D)]], sampler textureSampler [[sampler(0)]], constant BlitParams &options [[buffer(0)]]) 358{ 359 float4 inputColor = blitReadTexture<>(input, srcTexture2d, srcTexture2dArray, srcTexture2dMS, srcTextureCube, srcTexture3d, textureSampler, options); 360 uint4 output = uint4(inputColor * float4(255.0)); 361 return toMultipleColorOutputs(output); 362} 363struct FragmentDepthOut 364{ 365 float depth [[depth(any)]]; 366}; 367static inline float sampleDepth( 368 texture2d<float> srcTexture2d [[function_constant(kSourceTextureType2D)]], 369 texture2d_array<float> srcTexture2dArray [[function_constant(kSourceTextureType2DArray)]], 370 texture2d_ms<float> srcTexture2dMS [[function_constant(kSourceTextureType2DMS)]], 371 texturecube<float> srcTextureCube [[function_constant(kSourceTextureTypeCube)]], 372 float2 texCoords, 373 constant BlitParams &options) 374{ 375 float4 output; 376 constexpr sampler textureSampler(mag_filter::nearest, min_filter::nearest); 377 switch (kSourceTextureType) 378 { 379 case kTextureType2D: 380 output = srcTexture2d.sample(textureSampler, texCoords, level(options.srcLevel)); 381 break; 382 case kTextureType2DArray: 383 output = srcTexture2dArray.sample(textureSampler, texCoords, options.srcLayer, 384 level(options.srcLevel)); 385 break; 386 case kTextureType2DMultisample: 387 output = srcTexture2dMS.read(getImageCoords(srcTexture2dMS, texCoords), 0); 388 break; 389 case kTextureTypeCube: 390 output = 391 srcTextureCube.sample(textureSampler, cubeTexcoords(texCoords, options.srcLayer), 392 level(options.srcLevel)); 393 break; 394 } 395 return output.r; 396} 397fragment FragmentDepthOut blitDepthFS(BlitVSOut input [[stage_in]], 398 texture2d<float> srcTexture2d 399 [[texture(0), function_constant(kSourceTextureType2D)]], 400 texture2d_array<float> srcTexture2dArray 401 [[texture(0), function_constant(kSourceTextureType2DArray)]], 402 texture2d_ms<float> srcTexture2dMS 403 [[texture(0), function_constant(kSourceTextureType2DMS)]], 404 texturecube<float> srcTextureCube 405 [[texture(0), function_constant(kSourceTextureTypeCube)]], 406 constant BlitParams &options [[buffer(0)]]) 407{ 408 FragmentDepthOut re; 409 re.depth = sampleDepth(srcTexture2d, srcTexture2dArray, srcTexture2dMS, srcTextureCube, 410 input.texCoords, options); 411 return re; 412} 413static inline uint32_t sampleStencil( 414 texture2d<uint32_t> srcTexture2d [[function_constant(kSourceTexture2Type2D)]], 415 texture2d_array<uint32_t> srcTexture2dArray [[function_constant(kSourceTexture2Type2DArray)]], 416 texture2d_ms<uint32_t> srcTexture2dMS [[function_constant(kSourceTexture2Type2DMS)]], 417 texturecube<uint32_t> srcTextureCube [[function_constant(kSourceTexture2TypeCube)]], 418 float2 texCoords, 419 int srcLevel, 420 int srcLayer) 421{ 422 uint4 output; 423 constexpr sampler textureSampler(mag_filter::nearest, min_filter::nearest); 424 switch (kSourceTexture2Type) 425 { 426 case kTextureType2D: 427 output = srcTexture2d.sample(textureSampler, texCoords, level(srcLevel)); 428 break; 429 case kTextureType2DArray: 430 output = srcTexture2dArray.sample(textureSampler, texCoords, srcLayer, level(srcLevel)); 431 break; 432 case kTextureType2DMultisample: 433 output = srcTexture2dMS.read(getImageCoords(srcTexture2dMS, texCoords), 0); 434 break; 435 case kTextureTypeCube: 436 output = srcTextureCube.sample(textureSampler, cubeTexcoords(texCoords, srcLayer), 437 level(srcLevel)); 438 break; 439 } 440 return output.r; 441} 442struct BlitStencilToBufferParams 443{ 444 float2 srcStartTexCoords; 445 float2 srcTexCoordSteps; 446 int srcLevel; 447 int srcLayer; 448 uint2 dstSize; 449 uint dstBufferRowPitch; 450 bool resolveMS; 451}; 452kernel void blitStencilToBufferCS(ushort2 gIndices [[thread_position_in_grid]], 453 texture2d<uint32_t> srcTexture2d 454 [[texture(1), function_constant(kSourceTexture2Type2D)]], 455 texture2d_array<uint32_t> srcTexture2dArray 456 [[texture(1), function_constant(kSourceTexture2Type2DArray)]], 457 texture2d_ms<uint32_t> srcTexture2dMS 458 [[texture(1), function_constant(kSourceTexture2Type2DMS)]], 459 texturecube<uint32_t> srcTextureCube 460 [[texture(1), function_constant(kSourceTexture2TypeCube)]], 461 constant BlitStencilToBufferParams &options [[buffer(0)]], 462 device uchar *buffer [[buffer(1)]]) 463{ 464 if (gIndices.x >= options.dstSize.x || gIndices.y >= options.dstSize.y) 465 { 466 return; 467 } 468 float2 srcTexCoords = options.srcStartTexCoords + float2(gIndices) * options.srcTexCoordSteps; 469 if (kSourceTexture2Type == kTextureType2DMultisample && !options.resolveMS) 470 { 471 uint samples = srcTexture2dMS.get_num_samples(); 472 uint2 imageCoords = getImageCoords(srcTexture2dMS, srcTexCoords); 473 uint bufferOffset = options.dstBufferRowPitch * gIndices.y + samples * gIndices.x; 474 for (uint sample = 0; sample < samples; ++sample) 475 { 476 uint stencilPerSample = srcTexture2dMS.read(imageCoords, sample).r; 477 buffer[bufferOffset + sample] = static_cast<uchar>(stencilPerSample); 478 } 479 } 480 else 481 { 482 uint32_t stencil = 483 sampleStencil(srcTexture2d, srcTexture2dArray, srcTexture2dMS, srcTextureCube, 484 srcTexCoords, options.srcLevel, options.srcLayer); 485 buffer[options.dstBufferRowPitch * gIndices.y + gIndices.x] = static_cast<uchar>(stencil); 486 } 487} 488#if __METAL_VERSION__ >= 210 489struct FragmentStencilOut 490{ 491 uint32_t stencil [[stencil]]; 492}; 493struct FragmentDepthStencilOut 494{ 495 float depth [[depth(any)]]; 496 uint32_t stencil [[stencil]]; 497}; 498fragment FragmentStencilOut blitStencilFS( 499 BlitVSOut input [[stage_in]], 500 texture2d<uint32_t> srcTexture2d [[texture(1), function_constant(kSourceTexture2Type2D)]], 501 texture2d_array<uint32_t> srcTexture2dArray 502 [[texture(1), function_constant(kSourceTexture2Type2DArray)]], 503 texture2d_ms<uint32_t> srcTexture2dMS 504 [[texture(1), function_constant(kSourceTexture2Type2DMS)]], 505 texturecube<uint32_t> srcTextureCube [[texture(1), function_constant(kSourceTexture2TypeCube)]], 506 constant BlitParams &options [[buffer(0)]]) 507{ 508 FragmentStencilOut re; 509 re.stencil = sampleStencil(srcTexture2d, srcTexture2dArray, srcTexture2dMS, srcTextureCube, 510 input.texCoords, options.srcLevel, options.srcLayer); 511 return re; 512} 513fragment FragmentDepthStencilOut blitDepthStencilFS( 514 BlitVSOut input [[stage_in]], 515 texture2d<float> srcDepthTexture2d [[texture(0), function_constant(kSourceTextureType2D)]], 516 texture2d_array<float> srcDepthTexture2dArray 517 [[texture(0), function_constant(kSourceTextureType2DArray)]], 518 texture2d_ms<float> srcDepthTexture2dMS 519 [[texture(0), function_constant(kSourceTextureType2DMS)]], 520 texturecube<float> srcDepthTextureCube 521 [[texture(0), function_constant(kSourceTextureTypeCube)]], 522 texture2d<uint32_t> srcStencilTexture2d 523 [[texture(1), function_constant(kSourceTexture2Type2D)]], 524 texture2d_array<uint32_t> srcStencilTexture2dArray 525 [[texture(1), function_constant(kSourceTexture2Type2DArray)]], 526 texture2d_ms<uint32_t> srcStencilTexture2dMS 527 [[texture(1), function_constant(kSourceTexture2Type2DMS)]], 528 texturecube<uint32_t> srcStencilTextureCube 529 [[texture(1), function_constant(kSourceTexture2TypeCube)]], 530 constant BlitParams &options [[buffer(0)]]) 531{ 532 FragmentDepthStencilOut re; 533 re.depth = sampleDepth(srcDepthTexture2d, srcDepthTexture2dArray, srcDepthTexture2dMS, 534 srcDepthTextureCube, input.texCoords, options); 535 re.stencil = 536 sampleStencil(srcStencilTexture2d, srcStencilTexture2dArray, srcStencilTexture2dMS, 537 srcStencilTextureCube, input.texCoords, options.srcLevel, options.srcLayer); 538 return re; 539} 540#endif 541using namespace rx::mtl_shader; 542struct ClearParams 543{ 544 float4 clearColor; 545 float clearDepth; 546}; 547vertex float4 clearVS(unsigned int vid [[ vertex_id ]], 548 constant ClearParams &clearParams [[buffer(0)]]) 549{ 550 return float4(gCorners[vid], clearParams.clearDepth, 1.0); 551} 552fragment MultipleColorOutputs<float> clearFloatFS(constant ClearParams &clearParams [[buffer(0)]]) 553{ 554 return toMultipleColorOutputs(clearParams.clearColor); 555} 556fragment MultipleColorOutputs<int> clearIntFS(constant ClearParams &clearParams [[buffer(0)]]) 557{ 558 return toMultipleColorOutputs(as_type<int4>(clearParams.clearColor)); 559} 560fragment MultipleColorOutputs<uint> clearUIntFS(constant ClearParams &clearParams [[buffer(0)]]) 561{ 562 return toMultipleColorOutputs(as_type<uint4>(clearParams.clearColor)); 563} 564using namespace rx::mtl_shader; 565constant bool kSourceBufferAligned[[function_constant(100)]]; 566constant bool kSourceIndexIsU8[[function_constant(200)]]; 567constant bool kSourceIndexIsU16[[function_constant(300)]]; 568constant bool kSourceIndexIsU32[[function_constant(400)]]; 569constant bool kSourceBufferUnaligned = !kSourceBufferAligned; 570constant bool kUseSourceBufferU8 = kSourceIndexIsU8 || kSourceBufferUnaligned; 571constant bool kUseSourceBufferU16 = kSourceIndexIsU16 && kSourceBufferAligned; 572constant bool kUseSourceBufferU32 = kSourceIndexIsU32 && kSourceBufferAligned; 573struct IndexConversionParams 574{ 575 uint32_t srcOffset; 576 uint32_t indexCount; 577 bool primitiveRestartEnabled; 578}; 579inline ushort getIndexAligned(constant ushort *inputAligned, uint offset, uint idx) 580{ 581 return inputAligned[offset / 2 + idx]; 582} 583inline uint getIndexAligned(constant uint *inputAligned, uint offset, uint idx) 584{ 585 return inputAligned[offset / 4 + idx]; 586} 587inline uchar getIndexAligned(constant uchar *input, uint offset, uint idx) 588{ 589 return input[offset + idx]; 590} 591inline ushort getIndexUnalignedU16(constant uchar *input, uint offset, uint idx) 592{ 593 ushort inputLo = input[offset + 2 * idx]; 594 ushort inputHi = input[offset + 2 * idx + 1]; 595 return inputLo | (inputHi << 8); 596} 597inline uint getIndexUnalignedU32(constant uchar *input, uint offset, uint idx) 598{ 599 uint input0 = input[offset + 4 * idx]; 600 uint input1 = input[offset + 4 * idx + 1]; 601 uint input2 = input[offset + 4 * idx + 2]; 602 uint input3 = input[offset + 4 * idx + 3]; 603 return input0 | (input1 << 8) | (input2 << 16) | (input3 << 24); 604} 605kernel void convertIndexU8ToU16(uint idx [[thread_position_in_grid]], 606 constant IndexConversionParams &options [[buffer(0)]], 607 constant uchar *input [[buffer(1)]], 608 device ushort *output [[buffer(2)]]) 609{ 610 if (idx >= options.indexCount) { return; }; 611 uchar value = getIndexAligned(input, options.srcOffset, idx); 612 if (options.primitiveRestartEnabled && value == 0xff) 613 { 614 output[idx] = 0xffff; 615 } 616 else 617 { 618 output[idx] = value; 619 } 620} 621kernel void convertIndexU16(uint idx [[thread_position_in_grid]], 622 constant IndexConversionParams &options [[buffer(0)]], 623 constant uchar *input 624 [[buffer(1), function_constant(kSourceBufferUnaligned)]], 625 constant ushort *inputAligned 626 [[buffer(1), function_constant(kSourceBufferAligned)]], 627 device ushort *output [[buffer(2)]]) 628{ 629 if (idx >= options.indexCount) { return; }; 630 ushort value; 631 if (kSourceBufferAligned) 632 { 633 value = getIndexAligned(inputAligned, options.srcOffset, idx); 634 } 635 else 636 { 637 value = getIndexUnalignedU16(input, options.srcOffset, idx); 638 } 639 output[idx] = value; 640} 641kernel void convertIndexU32(uint idx [[thread_position_in_grid]], 642 constant IndexConversionParams &options [[buffer(0)]], 643 constant uchar *input 644 [[buffer(1), function_constant(kSourceBufferUnaligned)]], 645 constant uint *inputAligned 646 [[buffer(1), function_constant(kSourceBufferAligned)]], 647 device uint *output [[buffer(2)]]) 648{ 649 if (idx >= options.indexCount) { return; }; 650 uint value; 651 if (kSourceBufferAligned) 652 { 653 value = getIndexAligned(inputAligned, options.srcOffset, idx); 654 } 655 else 656 { 657 value = getIndexUnalignedU32(input, options.srcOffset, idx); 658 } 659 output[idx] = value; 660} 661struct IndexFromArrayParams 662{ 663 uint firstVertex; 664 uint vertexCount; 665}; 666kernel void genTriFanIndicesFromArray(uint idx [[thread_position_in_grid]], 667 constant IndexFromArrayParams &options [[buffer(0)]], 668 device uint *output [[buffer(2)]]) 669{ 670 if (idx >= options.vertexCount) { return; }; 671 uint vertexIdx = options.firstVertex + 2 + idx; 672 output[3 * idx ] = vertexIdx - 1; 673 output[3 * idx + 1] = vertexIdx; 674 output[3 * idx + 2] = options.firstVertex; 675} 676inline uint getIndexU32(uint offset, 677 uint idx, 678 constant uchar *inputU8 [[function_constant(kUseSourceBufferU8)]], 679 constant ushort *inputU16 [[function_constant(kUseSourceBufferU16)]], 680 constant uint *inputU32 [[function_constant(kUseSourceBufferU32)]]) 681{ 682 if (kUseSourceBufferU8) 683 { 684 if (kSourceIndexIsU16) 685 { 686 return getIndexUnalignedU16(inputU8, offset, idx); 687 } 688 else if (kSourceIndexIsU32) 689 { 690 return getIndexUnalignedU32(inputU8, offset, idx); 691 } 692 return getIndexAligned(inputU8, offset, idx); 693 } 694 else if (kUseSourceBufferU16) 695 { 696 return getIndexAligned(inputU16, offset, idx); 697 } 698 else if (kUseSourceBufferU32) 699 { 700 return getIndexAligned(inputU32, offset, idx); 701 } 702 return 0; 703} 704kernel void genTriFanIndicesFromElements(uint idx [[thread_position_in_grid]], 705 constant IndexConversionParams &options [[buffer(0)]], 706 constant uchar *inputU8 707 [[buffer(1), function_constant(kUseSourceBufferU8)]], 708 constant ushort *inputU16 709 [[buffer(1), function_constant(kUseSourceBufferU16)]], 710 constant uint *inputU32 711 [[buffer(1), function_constant(kUseSourceBufferU32)]], 712 device uint *output [[buffer(2)]]) 713{ 714 if (idx >= options.indexCount) { return; }; 715 uint elemIdx = 2 + idx; 716 output[3 * idx] = getIndexU32(options.srcOffset, 0, inputU8, inputU16, inputU32); 717 output[3 * idx + 1] = getIndexU32(options.srcOffset, elemIdx - 1, inputU8, inputU16, inputU32); 718 output[3 * idx + 2] = getIndexU32(options.srcOffset, elemIdx, inputU8, inputU16, inputU32); 719} 720kernel void genLineLoopIndicesFromArray(uint idx [[thread_position_in_grid]], 721 constant IndexFromArrayParams &options [[buffer(0)]], 722 device uint *output [[buffer(2)]]) 723{ 724 uint totalIndices = options.vertexCount + 1; 725 if (idx >= totalIndices) { return; }; 726 output[idx] = options.firstVertex + idx % options.vertexCount; 727} 728kernel void genLineLoopIndicesFromElements(uint idx [[thread_position_in_grid]], 729 constant IndexConversionParams &options [[buffer(0)]], 730 constant uchar *inputU8 731 [[buffer(1), function_constant(kUseSourceBufferU8)]], 732 constant ushort *inputU16 733 [[buffer(1), function_constant(kUseSourceBufferU16)]], 734 constant uint *inputU32 735 [[buffer(1), function_constant(kUseSourceBufferU32)]], 736 device uint *output [[buffer(2)]]) 737{ 738 uint totalTargetIndices = options.indexCount + 1; 739 if (idx >= totalTargetIndices) { return; }; 740 output[idx] = 741 getIndexU32(options.srcOffset, idx % options.indexCount, inputU8, inputU16, inputU32); 742} 743using namespace rx::mtl_shader; 744struct GenMipParams 745{ 746 uint srcLevel; 747 uint numMipLevelsToGen; 748 bool sRGB; 749}; 750kernel void generate3DMipmaps(uint lIndex [[thread_index_in_threadgroup]], 751 ushort3 gIndices [[thread_position_in_grid]], 752 texture3d<float> srcTexture [[texture(0)]], 753 texture3d<float, access::write> dstMip1 [[texture(1)]], 754 texture3d<float, access::write> dstMip2 [[texture(2)]], 755 texture3d<float, access::write> dstMip3 [[texture(3)]], 756 texture3d<float, access::write> dstMip4 [[texture(4)]], 757 constant GenMipParams &options [[buffer(0)]]) 758{ 759 ushort3 mipSize = ushort3(dstMip1.get_width(), dstMip1.get_height(), dstMip1.get_depth()); 760 bool validThread = gIndices.x < mipSize.x && gIndices.y < mipSize.y && gIndices.z < mipSize.z; 761 constexpr sampler textureSampler(mag_filter::linear, min_filter::linear, mip_filter::linear); 762 threadgroup float sR[(8 * 8 * 8)]; 763 threadgroup float sG[(8 * 8 * 8)]; 764 threadgroup float sB[(8 * 8 * 8)]; 765 threadgroup float sA[(8 * 8 * 8)]; 766 float4 texel1; 767 if (validThread) 768 { 769 float3 texCoords = (float3(gIndices) + float3(0.5, 0.5, 0.5)) / float3(mipSize); 770 texel1 = srcTexture.sample(textureSampler, texCoords, level(options.srcLevel)); 771 dstMip1.write(texel1, gIndices); 772 } 773 else 774 { 775 lIndex = 0xffffffff; 776 } 777 if (options.numMipLevelsToGen == 1) 778 { 779 return; 780 } 781 if (options.sRGB) 782 { 783 texel1 = linearToSRGB(texel1); 784 } 785 sR[lIndex] = texel1.r; sG[lIndex] = texel1.g; sB[lIndex] = texel1.b; sA[lIndex] = texel1.a;; 786 threadgroup_barrier(mem_flags::mem_threadgroup); 787 if ((lIndex & 0x49) == 0) 788 { 789 bool3 atEdge = gIndices == (mipSize - ushort3(1)); 790 float4 texel2 = (atEdge.x) ? (texel1) : (float4(sR[lIndex + 1], sG[lIndex + 1], sB[lIndex + 1], sA[lIndex + 1])); 791 float4 texel3 = (atEdge.y) ? (texel1) : (float4(sR[lIndex + 8], sG[lIndex + 8], sB[lIndex + 8], sA[lIndex + 8])); 792 float4 texel4 = (atEdge.z) ? (texel1) : (float4(sR[lIndex + (8 * 8)], sG[lIndex + (8 * 8)], sB[lIndex + (8 * 8)], sA[lIndex + (8 * 8)])); 793 float4 texel5 = (atEdge.x | atEdge.y) ? (texel2) : (float4(sR[lIndex + (8 + 1)], sG[lIndex + (8 + 1)], sB[lIndex + (8 + 1)], sA[lIndex + (8 + 1)])); 794 float4 texel6 = (atEdge.x | atEdge.z) ? (texel2) : (float4(sR[lIndex + ((8 * 8) + 1)], sG[lIndex + ((8 * 8) + 1)], sB[lIndex + ((8 * 8) + 1)], sA[lIndex + ((8 * 8) + 1)])); 795 float4 texel7 = (atEdge.y | atEdge.z) ? (texel3) : (float4(sR[lIndex + ((8 * 8) + 8)], sG[lIndex + ((8 * 8) + 8)], sB[lIndex + ((8 * 8) + 8)], sA[lIndex + ((8 * 8) + 8)])); 796 float4 texel8 = 797 (atEdge.x | atEdge.y | atEdge.z) ? (texel5) : (float4(sR[lIndex + ((8 * 8) + 8 + 1)], sG[lIndex + ((8 * 8) + 8 + 1)], sB[lIndex + ((8 * 8) + 8 + 1)], sA[lIndex + ((8 * 8) + 8 + 1)])); 798 texel1 = (texel1 + texel2 + texel3 + texel4 + texel5 + texel6 + texel7 + texel8) / 8.0; 799 dstMip2.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices >> 1); 800 sR[lIndex] = texel1.r; sG[lIndex] = texel1.g; sB[lIndex] = texel1.b; sA[lIndex] = texel1.a;; 801 } 802 if (options.numMipLevelsToGen == 2) 803 { 804 return; 805 } 806 threadgroup_barrier(mem_flags::mem_threadgroup); 807 if ((lIndex & 0xdb) == 0) 808 { 809 mipSize = max(mipSize >> 1, ushort3(1)); 810 bool3 atEdge = (gIndices >> 1) == (mipSize - ushort3(1)); 811 float4 texel2 = (atEdge.x) ? (texel1) : (float4(sR[lIndex + 2], sG[lIndex + 2], sB[lIndex + 2], sA[lIndex + 2])); 812 float4 texel3 = 813 (atEdge.y) ? (texel1) : (float4(sR[lIndex + (2 * 8)], sG[lIndex + (2 * 8)], sB[lIndex + (2 * 8)], sA[lIndex + (2 * 8)])); 814 float4 texel4 = 815 (atEdge.z) ? (texel1) : (float4(sR[lIndex + (2 * (8 * 8))], sG[lIndex + (2 * (8 * 8))], sB[lIndex + (2 * (8 * 8))], sA[lIndex + (2 * (8 * 8))])); 816 float4 texel5 = (atEdge.x | atEdge.y) ? (texel2) : (float4(sR[lIndex + (2 * 8 + 2)], sG[lIndex + (2 * 8 + 2)], sB[lIndex + (2 * 8 + 2)], sA[lIndex + (2 * 8 + 2)])); 817 float4 texel6 = (atEdge.x | atEdge.z) ? (texel2) : (float4(sR[lIndex + (2 * (8 * 8) + 2)], sG[lIndex + (2 * (8 * 8) + 2)], sB[lIndex + (2 * (8 * 8) + 2)], sA[lIndex + (2 * (8 * 8) + 2)])); 818 float4 texel7 = (atEdge.y | atEdge.z) ? (texel3) : (float4(sR[lIndex + (2 * (8 * 8) + 2 * 8)], sG[lIndex + (2 * (8 * 8) + 2 * 8)], sB[lIndex + (2 * (8 * 8) + 2 * 8)], sA[lIndex + (2 * (8 * 8) + 2 * 8)])); 819 float4 texel8 = (atEdge.x | atEdge.y | atEdge.z) ? (texel5) : (float4(sR[lIndex + (2 * (8 * 8) + 2 * 8 + 2)], sG[lIndex + (2 * (8 * 8) + 2 * 8 + 2)], sB[lIndex + (2 * (8 * 8) + 2 * 8 + 2)], sA[lIndex + (2 * (8 * 8) + 2 * 8 + 2)])); 820 texel1 = (texel1 + texel2 + texel3 + texel4 + texel5 + texel6 + texel7 + texel8) / 8.0; 821 dstMip3.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices >> 2); 822 sR[lIndex] = texel1.r; sG[lIndex] = texel1.g; sB[lIndex] = texel1.b; sA[lIndex] = texel1.a;; 823 } 824 if (options.numMipLevelsToGen == 3) 825 { 826 return; 827 } 828 threadgroup_barrier(mem_flags::mem_threadgroup); 829 if ((lIndex & 0x1ff) == 0) 830 { 831 mipSize = max(mipSize >> 1, ushort3(1)); 832 bool3 atEdge = (gIndices >> 2) == (mipSize - ushort3(1)); 833 float4 texel2 = (atEdge.x) ? (texel1) : (float4(sR[lIndex + 4], sG[lIndex + 4], sB[lIndex + 4], sA[lIndex + 4])); 834 float4 texel3 = 835 (atEdge.y) ? (texel1) : (float4(sR[lIndex + (4 * 8)], sG[lIndex + (4 * 8)], sB[lIndex + (4 * 8)], sA[lIndex + (4 * 8)])); 836 float4 texel4 = 837 (atEdge.z) ? (texel1) : (float4(sR[lIndex + (4 * (8 * 8))], sG[lIndex + (4 * (8 * 8))], sB[lIndex + (4 * (8 * 8))], sA[lIndex + (4 * (8 * 8))])); 838 float4 texel5 = (atEdge.x | atEdge.y) ? (texel2) : (float4(sR[lIndex + (4 * 8 + 4)], sG[lIndex + (4 * 8 + 4)], sB[lIndex + (4 * 8 + 4)], sA[lIndex + (4 * 8 + 4)])); 839 float4 texel6 = (atEdge.x | atEdge.z) ? (texel2) : (float4(sR[lIndex + (4 * (8 * 8) + 4)], sG[lIndex + (4 * (8 * 8) + 4)], sB[lIndex + (4 * (8 * 8) + 4)], sA[lIndex + (4 * (8 * 8) + 4)])); 840 float4 texel7 = (atEdge.y | atEdge.z) ? (texel3) : (float4(sR[lIndex + (4 * (8 * 8) + 4 * 8)], sG[lIndex + (4 * (8 * 8) + 4 * 8)], sB[lIndex + (4 * (8 * 8) + 4 * 8)], sA[lIndex + (4 * (8 * 8) + 4 * 8)])); 841 float4 texel8 = (atEdge.x | atEdge.y | atEdge.z) ? (texel5) : (float4(sR[lIndex + (4 * (8 * 8) + 4 * 8 + 4)], sG[lIndex + (4 * (8 * 8) + 4 * 8 + 4)], sB[lIndex + (4 * (8 * 8) + 4 * 8 + 4)], sA[lIndex + (4 * (8 * 8) + 4 * 8 + 4)])); 842 texel1 = (texel1 + texel2 + texel3 + texel4 + texel5 + texel6 + texel7 + texel8) / 8.0; 843 dstMip4.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices >> 3); 844 } 845} 846kernel void generate2DMipmaps(uint lIndex [[thread_index_in_threadgroup]], 847 ushort2 gIndices [[thread_position_in_grid]], 848 texture2d<float> srcTexture [[texture(0)]], 849 texture2d<float, access::write> dstMip1 [[texture(1)]], 850 texture2d<float, access::write> dstMip2 [[texture(2)]], 851 texture2d<float, access::write> dstMip3 [[texture(3)]], 852 texture2d<float, access::write> dstMip4 [[texture(4)]], 853 constant GenMipParams &options [[buffer(0)]]) 854{ 855 uint firstMipLevel = options.srcLevel + 1; 856 ushort2 mipSize = 857 ushort2(srcTexture.get_width(firstMipLevel), srcTexture.get_height(firstMipLevel)); 858 bool validThread = gIndices.x < mipSize.x && gIndices.y < mipSize.y; 859 constexpr sampler textureSampler(mag_filter::linear, min_filter::linear, mip_filter::linear); 860 threadgroup float sR[(8 * 8)]; 861 threadgroup float sG[(8 * 8)]; 862 threadgroup float sB[(8 * 8)]; 863 threadgroup float sA[(8 * 8)]; 864 float4 texel1; 865 if (validThread) 866 { 867 float2 texCoords = (float2(gIndices) + float2(0.5, 0.5)) / float2(mipSize); 868 texel1 = srcTexture.sample(textureSampler, texCoords, level(options.srcLevel)); 869 dstMip1.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices); 870 } 871 else 872 { 873 lIndex = 0xffffffff; 874 } 875 if (options.numMipLevelsToGen == 1) 876 { 877 return; 878 } 879 sR[lIndex] = texel1.r; sG[lIndex] = texel1.g; sB[lIndex] = texel1.b; sA[lIndex] = texel1.a;; 880 threadgroup_barrier(mem_flags::mem_threadgroup); 881 if ((lIndex & 0x09) == 0) 882 { 883 bool2 atEdge = gIndices == (mipSize - ushort2(1)); 884 float4 texel2 = (atEdge.x) ? (texel1) : (float4(sR[lIndex + 1], sG[lIndex + 1], sB[lIndex + 1], sA[lIndex + 1])); 885 float4 texel3 = (atEdge.y) ? (texel1) : (float4(sR[lIndex + 8], sG[lIndex + 8], sB[lIndex + 8], sA[lIndex + 8])); 886 float4 texel4 = (atEdge.x | atEdge.y) ? (texel2) : (float4(sR[lIndex + (8 + 1)], sG[lIndex + (8 + 1)], sB[lIndex + (8 + 1)], sA[lIndex + (8 + 1)])); 887 texel1 = (texel1 + texel2 + texel3 + texel4) / 4.0; 888 dstMip2.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices >> 1); 889 sR[lIndex] = texel1.r; sG[lIndex] = texel1.g; sB[lIndex] = texel1.b; sA[lIndex] = texel1.a;; 890 } 891 if (options.numMipLevelsToGen == 2) 892 { 893 return; 894 } 895 threadgroup_barrier(mem_flags::mem_threadgroup); 896 if ((lIndex & 0x1b) == 0) 897 { 898 mipSize = max(mipSize >> 1, ushort2(1)); 899 bool2 atEdge = (gIndices >> 1) == (mipSize - ushort2(1)); 900 float4 texel2 = (atEdge.x) ? (texel1) : (float4(sR[lIndex + 2], sG[lIndex + 2], sB[lIndex + 2], sA[lIndex + 2])); 901 float4 texel3 = 902 (atEdge.y) ? (texel1) : (float4(sR[lIndex + 2 * 8], sG[lIndex + 2 * 8], sB[lIndex + 2 * 8], sA[lIndex + 2 * 8])); 903 float4 texel4 = (atEdge.x | atEdge.y) ? (texel2) : (float4(sR[lIndex + (2 * 8 + 2)], sG[lIndex + (2 * 8 + 2)], sB[lIndex + (2 * 8 + 2)], sA[lIndex + (2 * 8 + 2)])); 904 texel1 = (texel1 + texel2 + texel3 + texel4) / 4.0; 905 dstMip3.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices >> 2); 906 sR[lIndex] = texel1.r; sG[lIndex] = texel1.g; sB[lIndex] = texel1.b; sA[lIndex] = texel1.a;; 907 } 908 if (options.numMipLevelsToGen == 3) 909 { 910 return; 911 } 912 threadgroup_barrier(mem_flags::mem_threadgroup); 913 if ((lIndex & 0x3f) == 0) 914 { 915 mipSize = max(mipSize >> 1, ushort2(1)); 916 bool2 atEdge = (gIndices >> 2) == (mipSize - ushort2(1)); 917 float4 texel2 = (atEdge.x) ? (texel1) : (float4(sR[lIndex + 4], sG[lIndex + 4], sB[lIndex + 4], sA[lIndex + 4])); 918 float4 texel3 = 919 (atEdge.y) ? (texel1) : (float4(sR[lIndex + 4 * 8], sG[lIndex + 4 * 8], sB[lIndex + 4 * 8], sA[lIndex + 4 * 8])); 920 float4 texel4 = (atEdge.x | atEdge.y) ? (texel2) : (float4(sR[lIndex + (4 * 8 + 4)], sG[lIndex + (4 * 8 + 4)], sB[lIndex + (4 * 8 + 4)], sA[lIndex + (4 * 8 + 4)])); 921 texel1 = (texel1 + texel2 + texel3 + texel4) / 4.0; 922 dstMip4.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices >> 3); 923 } 924} 925template <typename TextureTypeR, typename TextureTypeW> 926static __attribute__((always_inline)) void generateCubeOr2DArray2ndAndMoreMipmaps( 927 uint lIndex, 928 ushort3 gIndices, 929 TextureTypeR srcTexture, 930 TextureTypeW dstMip2, 931 TextureTypeW dstMip3, 932 TextureTypeW dstMip4, 933 ushort2 mip1Size, 934 float4 mip1Texel, 935 threadgroup float *sR, 936 threadgroup float *sG, 937 threadgroup float *sB, 938 threadgroup float *sA, 939 constant GenMipParams &options) 940{ 941 ushort2 mipSize = mip1Size; 942 float4 texel1 = mip1Texel; 943 sR[lIndex] = texel1.r; sG[lIndex] = texel1.g; sB[lIndex] = texel1.b; sA[lIndex] = texel1.a;; 944 threadgroup_barrier(mem_flags::mem_threadgroup); 945 if ((lIndex & 0x09) == 0) 946 { 947 bool2 atEdge = gIndices.xy == (mipSize - ushort2(1)); 948 float4 texel2 = (atEdge.x) ? (texel1) : (float4(sR[lIndex + 1], sG[lIndex + 1], sB[lIndex + 1], sA[lIndex + 1])); 949 float4 texel3 = (atEdge.y) ? (texel1) : (float4(sR[lIndex + 8], sG[lIndex + 8], sB[lIndex + 8], sA[lIndex + 8])); 950 float4 texel4 = (atEdge.x | atEdge.y) ? (texel2) : (float4(sR[lIndex + (8 + 1)], sG[lIndex + (8 + 1)], sB[lIndex + (8 + 1)], sA[lIndex + (8 + 1)])); 951 texel1 = (texel1 + texel2 + texel3 + texel4) / 4.0; 952 dstMip2.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices.xy >> 1, gIndices.z); 953 sR[lIndex] = texel1.r; sG[lIndex] = texel1.g; sB[lIndex] = texel1.b; sA[lIndex] = texel1.a;; 954 } 955 if (options.numMipLevelsToGen == 2) 956 { 957 return; 958 } 959 threadgroup_barrier(mem_flags::mem_threadgroup); 960 if ((lIndex & 0x1b) == 0) 961 { 962 mipSize = max(mipSize >> 1, ushort2(1)); 963 bool2 atEdge = (gIndices.xy >> 1) == (mipSize - ushort2(1)); 964 float4 texel2 = (atEdge.x) ? (texel1) : (float4(sR[lIndex + 2], sG[lIndex + 2], sB[lIndex + 2], sA[lIndex + 2])); 965 float4 texel3 = 966 (atEdge.y) ? (texel1) : (float4(sR[lIndex + 2 * 8], sG[lIndex + 2 * 8], sB[lIndex + 2 * 8], sA[lIndex + 2 * 8])); 967 float4 texel4 = (atEdge.x | atEdge.y) ? (texel2) : (float4(sR[lIndex + (2 * 8 + 2)], sG[lIndex + (2 * 8 + 2)], sB[lIndex + (2 * 8 + 2)], sA[lIndex + (2 * 8 + 2)])); 968 texel1 = (texel1 + texel2 + texel3 + texel4) / 4.0; 969 dstMip3.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices.xy >> 2, gIndices.z); 970 sR[lIndex] = texel1.r; sG[lIndex] = texel1.g; sB[lIndex] = texel1.b; sA[lIndex] = texel1.a;; 971 } 972 if (options.numMipLevelsToGen == 3) 973 { 974 return; 975 } 976 threadgroup_barrier(mem_flags::mem_threadgroup); 977 if ((lIndex & 0x3f) == 0) 978 { 979 mipSize = max(mipSize >> 1, ushort2(1)); 980 bool2 atEdge = (gIndices.xy >> 2) == (mipSize - ushort2(1)); 981 float4 texel2 = (atEdge.x) ? (texel1) : (float4(sR[lIndex + 4], sG[lIndex + 4], sB[lIndex + 4], sA[lIndex + 4])); 982 float4 texel3 = 983 (atEdge.y) ? (texel1) : (float4(sR[lIndex + 4 * 8], sG[lIndex + 4 * 8], sB[lIndex + 4 * 8], sA[lIndex + 4 * 8])); 984 float4 texel4 = (atEdge.x | atEdge.y) ? (texel2) : (float4(sR[lIndex + (4 * 8 + 4)], sG[lIndex + (4 * 8 + 4)], sB[lIndex + (4 * 8 + 4)], sA[lIndex + (4 * 8 + 4)])); 985 texel1 = (texel1 + texel2 + texel3 + texel4) / 4.0; 986 dstMip4.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices.xy >> 3, gIndices.z); 987 } 988} 989kernel void generateCubeMipmaps(uint lIndex [[thread_index_in_threadgroup]], 990 ushort3 gIndices [[thread_position_in_grid]], 991 texturecube<float> srcTexture [[texture(0)]], 992 texturecube<float, access::write> dstMip1 [[texture(1)]], 993 texturecube<float, access::write> dstMip2 [[texture(2)]], 994 texturecube<float, access::write> dstMip3 [[texture(3)]], 995 texturecube<float, access::write> dstMip4 [[texture(4)]], 996 constant GenMipParams &options [[buffer(0)]]) 997{ 998 uint firstMipLevel = options.srcLevel + 1; 999 ushort2 mip1Size = 1000 ushort2(srcTexture.get_width(firstMipLevel), srcTexture.get_height(firstMipLevel)); 1001 bool validThread = gIndices.x < mip1Size.x && gIndices.y < mip1Size.y; 1002 constexpr sampler textureSampler(mag_filter::linear, min_filter::linear, mip_filter::linear); 1003 float4 mip1Texel; 1004 if (validThread) 1005 { 1006 float2 texCoords = (float2(gIndices.xy) + float2(0.5, 0.5)) / float2(mip1Size); 1007 mip1Texel = srcTexture.sample(textureSampler, cubeTexcoords(texCoords, int(gIndices.z)), 1008 level(options.srcLevel)); 1009 dstMip1.write((options.sRGB ? sRGBtoLinear(mip1Texel) : mip1Texel), gIndices.xy, gIndices.z); 1010 } 1011 else 1012 { 1013 lIndex = 0xffffffff; 1014 } 1015 if (options.numMipLevelsToGen == 1) 1016 { 1017 return; 1018 } 1019 threadgroup float sR[(8 * 8)]; 1020 threadgroup float sG[(8 * 8)]; 1021 threadgroup float sB[(8 * 8)]; 1022 threadgroup float sA[(8 * 8)]; 1023 generateCubeOr2DArray2ndAndMoreMipmaps(lIndex, gIndices, srcTexture, dstMip2, dstMip3, dstMip4, 1024 mip1Size, mip1Texel, sR, sG, sB, sA, options); 1025} 1026kernel void generate2DArrayMipmaps(uint lIndex [[thread_index_in_threadgroup]], 1027 ushort3 gIndices [[thread_position_in_grid]], 1028 texture2d_array<float> srcTexture [[texture(0)]], 1029 texture2d_array<float, access::write> dstMip1 [[texture(1)]], 1030 texture2d_array<float, access::write> dstMip2 [[texture(2)]], 1031 texture2d_array<float, access::write> dstMip3 [[texture(3)]], 1032 texture2d_array<float, access::write> dstMip4 [[texture(4)]], 1033 constant GenMipParams &options [[buffer(0)]]) 1034{ 1035 uint firstMipLevel = options.srcLevel + 1; 1036 ushort2 mip1Size = 1037 ushort2(srcTexture.get_width(firstMipLevel), srcTexture.get_height(firstMipLevel)); 1038 bool validThread = gIndices.x < mip1Size.x && gIndices.y < mip1Size.y; 1039 constexpr sampler textureSampler(mag_filter::linear, min_filter::linear, mip_filter::linear); 1040 float4 mip1Texel; 1041 if (validThread) 1042 { 1043 float2 texCoords = (float2(gIndices.xy) + float2(0.5, 0.5)) / float2(mip1Size); 1044 mip1Texel = 1045 srcTexture.sample(textureSampler, texCoords, gIndices.z, level(options.srcLevel)); 1046 dstMip1.write((options.sRGB ? sRGBtoLinear(mip1Texel) : mip1Texel), gIndices.xy, gIndices.z); 1047 } 1048 else 1049 { 1050 lIndex = 0xffffffff; 1051 } 1052 if (options.numMipLevelsToGen == 1) 1053 { 1054 return; 1055 } 1056 threadgroup float sR[(8 * 8)]; 1057 threadgroup float sG[(8 * 8)]; 1058 threadgroup float sB[(8 * 8)]; 1059 threadgroup float sA[(8 * 8)]; 1060 generateCubeOr2DArray2ndAndMoreMipmaps(lIndex, gIndices, srcTexture, dstMip2, dstMip3, dstMip4, 1061 mip1Size, mip1Texel, sR, sG, sB, sA, options); 1062} 1063#include <metal_pack> 1064namespace rx 1065{ 1066namespace mtl_shader 1067{ 1068namespace FormatID 1069{ 1070enum 1071{ 1072 NONE, 1073 D16_UNORM, 1074 D24_UNORM_S8_UINT, 1075 D24_UNORM_X8_UINT, 1076 D32_FLOAT, 1077 D32_FLOAT_S8X24_UINT, 1078 D32_UNORM, 1079 S8_UINT, 1080 A16_FLOAT, 1081 A1R5G5B5_UNORM, 1082 A2R10G10B10_SINT_VERTEX, 1083 A2R10G10B10_SNORM_VERTEX, 1084 A2R10G10B10_SSCALED_VERTEX, 1085 A2R10G10B10_UINT_VERTEX, 1086 A2R10G10B10_UNORM_VERTEX, 1087 A2R10G10B10_USCALED_VERTEX, 1088 A32_FLOAT, 1089 A8_UNORM, 1090 ASTC_10x10_SRGB_BLOCK, 1091 ASTC_10x10_UNORM_BLOCK, 1092 ASTC_10x5_SRGB_BLOCK, 1093 ASTC_10x5_UNORM_BLOCK, 1094 ASTC_10x6_SRGB_BLOCK, 1095 ASTC_10x6_UNORM_BLOCK, 1096 ASTC_10x8_SRGB_BLOCK, 1097 ASTC_10x8_UNORM_BLOCK, 1098 ASTC_12x10_SRGB_BLOCK, 1099 ASTC_12x10_UNORM_BLOCK, 1100 ASTC_12x12_SRGB_BLOCK, 1101 ASTC_12x12_UNORM_BLOCK, 1102 ASTC_3x3x3_UNORM_BLOCK, 1103 ASTC_3x3x3_UNORM_SRGB_BLOCK, 1104 ASTC_4x3x3_UNORM_BLOCK, 1105 ASTC_4x3x3_UNORM_SRGB_BLOCK, 1106 ASTC_4x4_SRGB_BLOCK, 1107 ASTC_4x4_UNORM_BLOCK, 1108 ASTC_4x4x3_UNORM_BLOCK, 1109 ASTC_4x4x3_UNORM_SRGB_BLOCK, 1110 ASTC_4x4x4_UNORM_BLOCK, 1111 ASTC_4x4x4_UNORM_SRGB_BLOCK, 1112 ASTC_5x4_SRGB_BLOCK, 1113 ASTC_5x4_UNORM_BLOCK, 1114 ASTC_5x4x4_UNORM_BLOCK, 1115 ASTC_5x4x4_UNORM_SRGB_BLOCK, 1116 ASTC_5x5_SRGB_BLOCK, 1117 ASTC_5x5_UNORM_BLOCK, 1118 ASTC_5x5x4_UNORM_BLOCK, 1119 ASTC_5x5x4_UNORM_SRGB_BLOCK, 1120 ASTC_5x5x5_UNORM_BLOCK, 1121 ASTC_5x5x5_UNORM_SRGB_BLOCK, 1122 ASTC_6x5_SRGB_BLOCK, 1123 ASTC_6x5_UNORM_BLOCK, 1124 ASTC_6x5x5_UNORM_BLOCK, 1125 ASTC_6x5x5_UNORM_SRGB_BLOCK, 1126 ASTC_6x6_SRGB_BLOCK, 1127 ASTC_6x6_UNORM_BLOCK, 1128 ASTC_6x6x5_UNORM_BLOCK, 1129 ASTC_6x6x5_UNORM_SRGB_BLOCK, 1130 ASTC_6x6x6_UNORM_BLOCK, 1131 ASTC_6x6x6_UNORM_SRGB_BLOCK, 1132 ASTC_8x5_SRGB_BLOCK, 1133 ASTC_8x5_UNORM_BLOCK, 1134 ASTC_8x6_SRGB_BLOCK, 1135 ASTC_8x6_UNORM_BLOCK, 1136 ASTC_8x8_SRGB_BLOCK, 1137 ASTC_8x8_UNORM_BLOCK, 1138 B10G10R10A2_UNORM, 1139 B4G4R4A4_UNORM, 1140 B5G5R5A1_UNORM, 1141 B5G6R5_UNORM, 1142 B8G8R8A8_TYPELESS, 1143 B8G8R8A8_TYPELESS_SRGB, 1144 B8G8R8A8_UNORM, 1145 B8G8R8A8_UNORM_SRGB, 1146 B8G8R8X8_UNORM, 1147 B8G8R8X8_UNORM_SRGB, 1148 BC1_RGBA_UNORM_BLOCK, 1149 BC1_RGBA_UNORM_SRGB_BLOCK, 1150 BC1_RGB_UNORM_BLOCK, 1151 BC1_RGB_UNORM_SRGB_BLOCK, 1152 BC2_RGBA_UNORM_BLOCK, 1153 BC2_RGBA_UNORM_SRGB_BLOCK, 1154 BC3_RGBA_UNORM_BLOCK, 1155 BC3_RGBA_UNORM_SRGB_BLOCK, 1156 BC4_RED_SNORM_BLOCK, 1157 BC4_RED_UNORM_BLOCK, 1158 BC5_RG_SNORM_BLOCK, 1159 BC5_RG_UNORM_BLOCK, 1160 BC6H_RGB_SFLOAT_BLOCK, 1161 BC6H_RGB_UFLOAT_BLOCK, 1162 BC7_RGBA_UNORM_BLOCK, 1163 BC7_RGBA_UNORM_SRGB_BLOCK, 1164 EAC_R11G11_SNORM_BLOCK, 1165 EAC_R11G11_UNORM_BLOCK, 1166 EAC_R11_SNORM_BLOCK, 1167 EAC_R11_UNORM_BLOCK, 1168 ETC1_LOSSY_DECODE_R8G8B8_UNORM_BLOCK, 1169 ETC1_R8G8B8_UNORM_BLOCK, 1170 ETC2_R8G8B8A1_SRGB_BLOCK, 1171 ETC2_R8G8B8A1_UNORM_BLOCK, 1172 ETC2_R8G8B8A8_SRGB_BLOCK, 1173 ETC2_R8G8B8A8_UNORM_BLOCK, 1174 ETC2_R8G8B8_SRGB_BLOCK, 1175 ETC2_R8G8B8_UNORM_BLOCK, 1176 G8_B8R8_2PLANE_420_UNORM, 1177 G8_B8_R8_3PLANE_420_UNORM, 1178 L16A16_FLOAT, 1179 L16_FLOAT, 1180 L32A32_FLOAT, 1181 L32_FLOAT, 1182 L4A4_UNORM, 1183 L8A8_UNORM, 1184 L8_UNORM, 1185 PALETTE4_R4G4B4A4_UNORM, 1186 PALETTE4_R5G5B5A1_UNORM, 1187 PALETTE4_R5G6B5_UNORM, 1188 PALETTE4_R8G8B8A8_UNORM, 1189 PALETTE4_R8G8B8_UNORM, 1190 PALETTE8_R4G4B4A4_UNORM, 1191 PALETTE8_R5G5B5A1_UNORM, 1192 PALETTE8_R5G6B5_UNORM, 1193 PALETTE8_R8G8B8A8_UNORM, 1194 PALETTE8_R8G8B8_UNORM, 1195 PVRTC1_RGBA_2BPP_UNORM_BLOCK, 1196 PVRTC1_RGBA_2BPP_UNORM_SRGB_BLOCK, 1197 PVRTC1_RGBA_4BPP_UNORM_BLOCK, 1198 PVRTC1_RGBA_4BPP_UNORM_SRGB_BLOCK, 1199 PVRTC1_RGB_2BPP_UNORM_BLOCK, 1200 PVRTC1_RGB_2BPP_UNORM_SRGB_BLOCK, 1201 PVRTC1_RGB_4BPP_UNORM_BLOCK, 1202 PVRTC1_RGB_4BPP_UNORM_SRGB_BLOCK, 1203 R10G10B10A2_SINT, 1204 R10G10B10A2_SNORM, 1205 R10G10B10A2_SSCALED, 1206 R10G10B10A2_UINT, 1207 R10G10B10A2_UNORM, 1208 R10G10B10A2_USCALED, 1209 R10G10B10X2_UNORM, 1210 R11G11B10_FLOAT, 1211 R16G16B16A16_FLOAT, 1212 R16G16B16A16_SINT, 1213 R16G16B16A16_SNORM, 1214 R16G16B16A16_SSCALED, 1215 R16G16B16A16_UINT, 1216 R16G16B16A16_UNORM, 1217 R16G16B16A16_USCALED, 1218 R16G16B16_FLOAT, 1219 R16G16B16_SINT, 1220 R16G16B16_SNORM, 1221 R16G16B16_SSCALED, 1222 R16G16B16_UINT, 1223 R16G16B16_UNORM, 1224 R16G16B16_USCALED, 1225 R16G16_FLOAT, 1226 R16G16_SINT, 1227 R16G16_SNORM, 1228 R16G16_SSCALED, 1229 R16G16_UINT, 1230 R16G16_UNORM, 1231 R16G16_USCALED, 1232 R16_FLOAT, 1233 R16_SINT, 1234 R16_SNORM, 1235 R16_SSCALED, 1236 R16_UINT, 1237 R16_UNORM, 1238 R16_USCALED, 1239 R32G32B32A32_FIXED, 1240 R32G32B32A32_FLOAT, 1241 R32G32B32A32_SINT, 1242 R32G32B32A32_SNORM, 1243 R32G32B32A32_SSCALED, 1244 R32G32B32A32_UINT, 1245 R32G32B32A32_UNORM, 1246 R32G32B32A32_USCALED, 1247 R32G32B32_FIXED, 1248 R32G32B32_FLOAT, 1249 R32G32B32_SINT, 1250 R32G32B32_SNORM, 1251 R32G32B32_SSCALED, 1252 R32G32B32_UINT, 1253 R32G32B32_UNORM, 1254 R32G32B32_USCALED, 1255 R32G32_FIXED, 1256 R32G32_FLOAT, 1257 R32G32_SINT, 1258 R32G32_SNORM, 1259 R32G32_SSCALED, 1260 R32G32_UINT, 1261 R32G32_UNORM, 1262 R32G32_USCALED, 1263 R32_FIXED, 1264 R32_FLOAT, 1265 R32_SINT, 1266 R32_SNORM, 1267 R32_SSCALED, 1268 R32_UINT, 1269 R32_UNORM, 1270 R32_USCALED, 1271 R4G4B4A4_UNORM, 1272 R5G5B5A1_UNORM, 1273 R5G6B5_UNORM, 1274 R8G8B8A8_SINT, 1275 R8G8B8A8_SNORM, 1276 R8G8B8A8_SSCALED, 1277 R8G8B8A8_TYPELESS, 1278 R8G8B8A8_TYPELESS_SRGB, 1279 R8G8B8A8_UINT, 1280 R8G8B8A8_UNORM, 1281 R8G8B8A8_UNORM_SRGB, 1282 R8G8B8A8_USCALED, 1283 R8G8B8X8_UNORM, 1284 R8G8B8X8_UNORM_SRGB, 1285 R8G8B8_SINT, 1286 R8G8B8_SNORM, 1287 R8G8B8_SSCALED, 1288 R8G8B8_UINT, 1289 R8G8B8_UNORM, 1290 R8G8B8_UNORM_SRGB, 1291 R8G8B8_USCALED, 1292 R8G8_SINT, 1293 R8G8_SNORM, 1294 R8G8_SSCALED, 1295 R8G8_UINT, 1296 R8G8_UNORM, 1297 R8G8_UNORM_SRGB, 1298 R8G8_USCALED, 1299 R8_SINT, 1300 R8_SNORM, 1301 R8_SSCALED, 1302 R8_UINT, 1303 R8_UNORM, 1304 R8_UNORM_SRGB, 1305 R8_USCALED, 1306 R9G9B9E5_SHAREDEXP, 1307 X2R10G10B10_SINT_VERTEX, 1308 X2R10G10B10_SNORM_VERTEX, 1309 X2R10G10B10_SSCALED_VERTEX, 1310 X2R10G10B10_UINT_VERTEX, 1311 X2R10G10B10_UNORM_VERTEX, 1312 X2R10G10B10_USCALED_VERTEX, 1313 EXTERNAL0, 1314 EXTERNAL1, 1315 EXTERNAL2, 1316 EXTERNAL3, 1317 EXTERNAL4, 1318 EXTERNAL5, 1319 EXTERNAL6, 1320 EXTERNAL7 1321}; 1322} 1323} 1324} 1325 1326using namespace rx::mtl_shader; 1327constant int kCopyFormatType [[function_constant(10)]]; 1328constant int kCopyTextureType [[function_constant(20)]]; 1329constant bool kCopyTextureType2D = kCopyTextureType == kTextureType2D; 1330constant bool kCopyTextureType2DArray = kCopyTextureType == kTextureType2DArray; 1331constant bool kCopyTextureType2DMS = kCopyTextureType == kTextureType2DMultisample; 1332constant bool kCopyTextureTypeCube = kCopyTextureType == kTextureTypeCube; 1333constant bool kCopyTextureType3D = kCopyTextureType == kTextureType3D; 1334struct CopyPixelParams 1335{ 1336 uint3 copySize; 1337 uint3 textureOffset; 1338 uint bufferStartOffset; 1339 uint pixelSize; 1340 uint bufferRowPitch; 1341 uint bufferDepthPitch; 1342}; 1343struct WritePixelParams 1344{ 1345 uint2 copySize; 1346 uint2 textureOffset; 1347 uint bufferStartOffset; 1348 uint pixelSize; 1349 uint bufferRowPitch; 1350 uint textureLevel; 1351 uint textureLayer; 1352 bool reverseTextureRowOrder; 1353}; 1354template <typename T> 1355static inline void textureWrite(ushort3 gIndices, 1356 constant CopyPixelParams &options, 1357 vec<T, 4> color, 1358 texture2d<T, access::write> dstTexture2d [[texture(0), function_constant(kCopyTextureType2D)]], texture2d_array<T, access::write> dstTexture2dArray [[texture(0), function_constant(kCopyTextureType2DArray)]], texture3d<T, access::write> dstTexture3d [[texture(0), function_constant(kCopyTextureType3D)]], texturecube<T, access::write> dstTextureCube [[texture(0), function_constant(kCopyTextureTypeCube)]]) 1359{ 1360 uint3 writeIndices = options.textureOffset + uint3(gIndices); 1361 switch (kCopyTextureType) 1362 { 1363 case kTextureType2D: 1364 dstTexture2d.write(color, writeIndices.xy); 1365 break; 1366 case kTextureType2DArray: 1367 dstTexture2dArray.write(color, writeIndices.xy, writeIndices.z); 1368 break; 1369 case kTextureType3D: 1370 dstTexture3d.write(color, writeIndices); 1371 break; 1372 case kTextureTypeCube: 1373 dstTextureCube.write(color, writeIndices.xy, writeIndices.z); 1374 break; 1375 } 1376} 1377template <typename T> 1378static inline vec<T, 4> textureRead(ushort2 gIndices, 1379 constant WritePixelParams &options, 1380 texture2d<T, access::read> srcTexture2d [[texture(0), function_constant(kCopyTextureType2D)]], texture2d_array<T, access::read> srcTexture2dArray [[texture(0), function_constant(kCopyTextureType2DArray)]], texture3d<T, access::read> srcTexture3d [[texture(0), function_constant(kCopyTextureType3D)]], texturecube<T, access::read> srcTextureCube [[texture(0), function_constant(kCopyTextureTypeCube)]], texture2d_ms<T, access::read> srcTexture2dMS [[texture(0), function_constant(kCopyTextureType2DMS)]]) 1381{ 1382 vec<T, 4> color; 1383 uint2 coords = uint2(gIndices); 1384 if (options.reverseTextureRowOrder) 1385 { 1386 coords.y = options.copySize.y - 1 - gIndices.y; 1387 } 1388 coords += options.textureOffset; 1389 switch (kCopyTextureType) 1390 { 1391 case kTextureType2D: 1392 color = srcTexture2d.read(coords.xy, options.textureLevel); 1393 break; 1394 case kTextureType2DArray: 1395 color = srcTexture2dArray.read(coords.xy, options.textureLayer, options.textureLevel); 1396 break; 1397 case kTextureType2DMultisample: 1398 color = resolveTextureMS(srcTexture2dMS, coords.xy); 1399 break; 1400 case kTextureType3D: 1401 color = srcTexture3d.read(uint3(coords, options.textureLayer), options.textureLevel); 1402 break; 1403 case kTextureTypeCube: 1404 color = srcTextureCube.read(coords.xy, options.textureLayer, options.textureLevel); 1405 break; 1406 } 1407 return color; 1408} 1409static inline float4 readR5G6B5_UNORM(uint bufferOffset, constant uchar *buffer) 1410{ 1411 float4 color; 1412 ushort src = bytesToShort<ushort>(buffer, bufferOffset); 1413 color.r = normalizedToFloat<5>(getShiftedData<5, 11>(src)); 1414 color.g = normalizedToFloat<6>(getShiftedData<6, 5>(src)); 1415 color.b = normalizedToFloat<5>(getShiftedData<5, 0>(src)); 1416 color.a = 1.0; 1417 return color; 1418} 1419static inline void writeR5G6B5_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 1420{ 1421 ushort dst = shiftData<5, 11>(floatToNormalized<5, ushort>(color.r)) | 1422 shiftData<6, 5>(floatToNormalized<6, ushort>(color.g)) | 1423 shiftData<5, 0>(floatToNormalized<5, ushort>(color.b)); 1424 shortToBytes(dst, bufferOffset, buffer); 1425} 1426static inline float4 readR4G4B4A4_UNORM(uint bufferOffset, constant uchar *buffer) 1427{ 1428 float4 color; 1429 ushort src = bytesToShort<ushort>(buffer, bufferOffset); 1430 color.r = normalizedToFloat<4>(getShiftedData<4, 12>(src)); 1431 color.g = normalizedToFloat<4>(getShiftedData<4, 8>(src)); 1432 color.b = normalizedToFloat<4>(getShiftedData<4, 4>(src)); 1433 color.a = normalizedToFloat<4>(getShiftedData<4, 0>(src)); 1434 return color; 1435} 1436static inline void writeR4G4B4A4_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 1437{ 1438 ushort dst = shiftData<4, 12>(floatToNormalized<4, ushort>(color.r)) | 1439 shiftData<4, 8>(floatToNormalized<4, ushort>(color.g)) | 1440 shiftData<4, 4>(floatToNormalized<4, ushort>(color.b)) | 1441 shiftData<4, 0>(floatToNormalized<4, ushort>(color.a)); 1442 ; 1443 shortToBytes(dst, bufferOffset, buffer); 1444} 1445static inline float4 readR5G5B5A1_UNORM(uint bufferOffset, constant uchar *buffer) 1446{ 1447 float4 color; 1448 ushort src = bytesToShort<ushort>(buffer, bufferOffset); 1449 color.r = normalizedToFloat<5>(getShiftedData<5, 11>(src)); 1450 color.g = normalizedToFloat<5>(getShiftedData<5, 6>(src)); 1451 color.b = normalizedToFloat<5>(getShiftedData<5, 1>(src)); 1452 color.a = normalizedToFloat<1>(getShiftedData<1, 0>(src)); 1453 return color; 1454} 1455static inline void writeR5G5B5A1_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 1456{ 1457 ushort dst = shiftData<5, 11>(floatToNormalized<5, ushort>(color.r)) | 1458 shiftData<5, 6>(floatToNormalized<5, ushort>(color.g)) | 1459 shiftData<5, 1>(floatToNormalized<5, ushort>(color.b)) | 1460 shiftData<1, 0>(floatToNormalized<1, ushort>(color.a)); 1461 ; 1462 shortToBytes(dst, bufferOffset, buffer); 1463} 1464static inline int4 readR10G10B10A2_SINT(uint bufferOffset, constant uchar *buffer) 1465{ 1466 int4 color; 1467 int src = bytesToInt<int>(buffer, bufferOffset); 1468 constexpr int3 rgbSignMask(0x200); 1469 constexpr int3 negativeMask(0xfffffc00); 1470 constexpr int alphaSignMask = 0x2; 1471 constexpr int alphaNegMask = 0xfffffffc; 1472 color.r = getShiftedData<10, 0>(src); 1473 color.g = getShiftedData<10, 10>(src); 1474 color.b = getShiftedData<10, 20>(src); 1475 int3 isRgbNegative = (color.rgb & rgbSignMask) >> 9; 1476 color.rgb = (isRgbNegative * negativeMask) | color.rgb; 1477 color.a = getShiftedData<2, 30>(src); 1478 int isAlphaNegative = color.a & alphaSignMask >> 1; 1479 color.a = (isAlphaNegative * alphaNegMask) | color.a; 1480 return color; 1481} 1482static inline uint4 readR10G10B10A2_UINT(uint bufferOffset, constant uchar *buffer) 1483{ 1484 uint4 color; 1485 uint src = bytesToInt<uint>(buffer, bufferOffset); 1486 color.r = getShiftedData<10, 0>(src); 1487 color.g = getShiftedData<10, 10>(src); 1488 color.b = getShiftedData<10, 20>(src); 1489 color.a = getShiftedData<2, 30>(src); 1490 return color; 1491} 1492static inline float4 readR8G8B8A8(uint bufferOffset, constant uchar *buffer, bool isSRGB) 1493{ 1494 float4 color; 1495 uint src = bytesToInt<uint>(buffer, bufferOffset); 1496 if (isSRGB) 1497 { 1498 color = unpack_unorm4x8_srgb_to_float(src); 1499 } 1500 else 1501 { 1502 color = unpack_unorm4x8_to_float(src); 1503 } 1504 return color; 1505} 1506static inline void writeR8G8B8A8(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer, bool isSRGB) 1507{ 1508 uint dst; 1509 if (isSRGB) 1510 { 1511 dst = pack_float_to_srgb_unorm4x8(color); 1512 } 1513 else 1514 { 1515 dst = pack_float_to_unorm4x8(color); 1516 } 1517 intToBytes(dst, bufferOffset, buffer); 1518} 1519static inline float4 readR8G8B8(uint bufferOffset, constant uchar *buffer, bool isSRGB) 1520{ 1521 float4 color; 1522 color.r = normalizedToFloat<uchar>(buffer[bufferOffset]); 1523 color.g = normalizedToFloat<uchar>(buffer[bufferOffset + 1]); 1524 color.b = normalizedToFloat<uchar>(buffer[bufferOffset + 2]); 1525 color.a = 1.0; 1526 if (isSRGB) 1527 { 1528 color = sRGBtoLinear(color); 1529 } 1530 return color; 1531} 1532static inline void writeR8G8B8(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer, bool isSRGB) 1533{ 1534 color.a = 1.0; 1535 uint dst; 1536 if (isSRGB) 1537 { 1538 dst = pack_float_to_srgb_unorm4x8(color); 1539 } 1540 else 1541 { 1542 dst = pack_float_to_unorm4x8(color); 1543 } 1544 int24bitToBytes(dst, bufferOffset, buffer); 1545} 1546static inline float4 readR8G8B8A8_SNORM(uint bufferOffset, constant uchar *buffer) 1547{ 1548 float4 color; 1549 uint src = bytesToInt<uint>(buffer, bufferOffset); 1550 color = unpack_snorm4x8_to_float(src); 1551 return color; 1552} 1553static inline void writeR8G8B8A8_SNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 1554{ 1555 uint dst = pack_float_to_snorm4x8(color); 1556 intToBytes(dst, bufferOffset, buffer); 1557} 1558static inline float4 readR8G8B8_SNORM(uint bufferOffset, constant uchar *buffer) 1559{ 1560 float4 color; 1561 color.r = normalizedToFloat<7, char>(buffer[bufferOffset]); 1562 color.g = normalizedToFloat<7, char>(buffer[bufferOffset + 1]); 1563 color.b = normalizedToFloat<7, char>(buffer[bufferOffset + 2]); 1564 color.a = 1.0; 1565 return color; 1566} 1567static inline void writeR8G8B8_SNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 1568{ 1569 uint dst = pack_float_to_snorm4x8(color); 1570 int24bitToBytes(dst, bufferOffset, buffer); 1571} 1572static inline float4 readR8G8B8A8_UNORM(uint bufferOffset, constant uchar *buffer) 1573{ 1574 return readR8G8B8A8(bufferOffset, buffer, false); 1575} 1576static inline void writeR8G8B8A8_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 1577{ 1578 return writeR8G8B8A8(gIndices, options, bufferOffset, color, buffer, false); 1579} 1580static inline float4 readR8G8B8A8_UNORM_SRGB(uint bufferOffset, constant uchar *buffer) 1581{ 1582 return readR8G8B8A8(bufferOffset, buffer, true); 1583} 1584static inline void writeR8G8B8A8_UNORM_SRGB(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 1585{ 1586 return writeR8G8B8A8(gIndices, options, bufferOffset, color, buffer, true); 1587} 1588static inline float4 readB8G8R8A8_UNORM(uint bufferOffset, constant uchar *buffer) 1589{ 1590 return readR8G8B8A8(bufferOffset, buffer, false).bgra; 1591} 1592static inline void writeB8G8R8A8_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 1593{ 1594 color.rgba = color.bgra; 1595 return writeR8G8B8A8(gIndices, options, bufferOffset, color, buffer, false); 1596} 1597static inline float4 readB8G8R8A8_UNORM_SRGB(uint bufferOffset, constant uchar *buffer) 1598{ 1599 return readR8G8B8A8(bufferOffset, buffer, true).bgra; 1600} 1601static inline void writeB8G8R8A8_UNORM_SRGB(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 1602{ 1603 color.rgba = color.bgra; 1604 return writeR8G8B8A8(gIndices, options, bufferOffset, color, buffer, true); 1605} 1606static inline float4 readR8G8B8_UNORM(uint bufferOffset, constant uchar *buffer) 1607{ 1608 return readR8G8B8(bufferOffset, buffer, false); 1609} 1610static inline void writeR8G8B8_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 1611{ 1612 return writeR8G8B8(gIndices, options, bufferOffset, color, buffer, false); 1613} 1614static inline float4 readR8G8B8_UNORM_SRGB(uint bufferOffset, constant uchar *buffer) 1615{ 1616 return readR8G8B8(bufferOffset, buffer, true); 1617} 1618static inline void writeR8G8B8_UNORM_SRGB(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 1619{ 1620 return writeR8G8B8(gIndices, options, bufferOffset, color, buffer, true); 1621} 1622static inline float4 readL8_UNORM(uint bufferOffset, constant uchar *buffer) 1623{ 1624 float4 color; 1625 color.rgb = float3(normalizedToFloat<uchar>(buffer[bufferOffset])); 1626 color.a = 1.0; 1627 return color; 1628} 1629static inline void writeL8_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 1630{ 1631 buffer[bufferOffset] = floatToNormalized<uchar>(color.r); 1632} 1633static inline void writeA8_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 1634{ 1635 buffer[bufferOffset] = floatToNormalized<uchar>(color.a); 1636} 1637static inline float4 readL8A8_UNORM(uint bufferOffset, constant uchar *buffer) 1638{ 1639 float4 color; 1640 color.rgb = float3(normalizedToFloat<uchar>(buffer[bufferOffset])); 1641 color.a = normalizedToFloat<uchar>(buffer[bufferOffset + 1]); 1642 return color; 1643} 1644static inline void writeL8A8_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 1645{ 1646 buffer[bufferOffset] = floatToNormalized<uchar>(color.r); 1647 buffer[bufferOffset + 1] = floatToNormalized<uchar>(color.a); 1648} 1649static inline float4 readR8_UNORM(uint bufferOffset, constant uchar *buffer) 1650{ 1651 float4 color; 1652 color.r = normalizedToFloat<uchar>(buffer[bufferOffset]); 1653 color.g = color.b = 0.0; 1654 color.a = 1.0; 1655 return color; 1656} 1657static inline void writeR8_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 1658{ 1659 buffer[bufferOffset] = floatToNormalized<uchar>(color.r); 1660} 1661static inline float4 readR8_SNORM(uint bufferOffset, constant uchar *buffer) 1662{ 1663 float4 color; 1664 color.r = normalizedToFloat<7, char>(buffer[bufferOffset]); 1665 color.g = color.b = 0.0; 1666 color.a = 1.0; 1667 return color; 1668} 1669static inline void writeR8_SNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 1670{ 1671 buffer[bufferOffset] = as_type<uchar>(floatToNormalized<char>(color.r)); 1672} 1673static inline int4 readR8_SINT(uint bufferOffset, constant uchar *buffer) 1674{ 1675 int4 color; 1676 color.r = as_type<char>(buffer[bufferOffset]); 1677 color.g = color.b = 0; 1678 color.a = 1; 1679 return color; 1680} 1681static inline void writeR8_SINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<int, 4> color, device uchar *buffer) 1682{ 1683 buffer[bufferOffset] = static_cast<uchar>(color.r); 1684} 1685static inline uint4 readR8_UINT(uint bufferOffset, constant uchar *buffer) 1686{ 1687 uint4 color; 1688 color.r = as_type<uchar>(buffer[bufferOffset]); 1689 color.g = color.b = 0; 1690 color.a = 1; 1691 return color; 1692} 1693static inline void writeR8_UINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<uint, 4> color, device uchar *buffer) 1694{ 1695 buffer[bufferOffset] = static_cast<uchar>(color.r); 1696} 1697static inline float4 readR8G8_UNORM(uint bufferOffset, constant uchar *buffer) 1698{ 1699 float4 color; 1700 color.r = normalizedToFloat<uchar>(buffer[bufferOffset]); 1701 color.g = normalizedToFloat<uchar>(buffer[bufferOffset + 1]); 1702 color.b = 0.0; 1703 color.a = 1.0; 1704 return color; 1705} 1706static inline void writeR8G8_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 1707{ 1708 buffer[bufferOffset] = floatToNormalized<uchar>(color.r); 1709 buffer[bufferOffset + 1] = floatToNormalized<uchar>(color.g); 1710} 1711static inline float4 readR8G8_SNORM(uint bufferOffset, constant uchar *buffer) 1712{ 1713 float4 color; 1714 color.r = normalizedToFloat<7, char>(buffer[bufferOffset]); 1715 color.g = normalizedToFloat<7, char>(buffer[bufferOffset + 1]); 1716 color.b = 0.0; 1717 color.a = 1.0; 1718 return color; 1719} 1720static inline void writeR8G8_SNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 1721{ 1722 buffer[bufferOffset] = as_type<uchar>(floatToNormalized<char>(color.r)); 1723 buffer[bufferOffset + 1] = as_type<uchar>(floatToNormalized<char>(color.g)); 1724} 1725static inline int4 readR8G8_SINT(uint bufferOffset, constant uchar *buffer) 1726{ 1727 int4 color; 1728 color.r = as_type<char>(buffer[bufferOffset]); 1729 color.g = as_type<char>(buffer[bufferOffset + 1]); 1730 color.b = 0; 1731 color.a = 1; 1732 return color; 1733} 1734static inline void writeR8G8_SINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<int, 4> color, device uchar *buffer) 1735{ 1736 buffer[bufferOffset] = static_cast<uchar>(color.r); 1737 buffer[bufferOffset + 1] = static_cast<uchar>(color.g); 1738} 1739static inline uint4 readR8G8_UINT(uint bufferOffset, constant uchar *buffer) 1740{ 1741 uint4 color; 1742 color.r = as_type<uchar>(buffer[bufferOffset]); 1743 color.g = as_type<uchar>(buffer[bufferOffset + 1]); 1744 color.b = 0; 1745 color.a = 1; 1746 return color; 1747} 1748static inline void writeR8G8_UINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<uint, 4> color, device uchar *buffer) 1749{ 1750 buffer[bufferOffset] = static_cast<uchar>(color.r); 1751 buffer[bufferOffset + 1] = static_cast<uchar>(color.g); 1752} 1753static inline int4 readR8G8B8_SINT(uint bufferOffset, constant uchar *buffer) 1754{ 1755 int4 color; 1756 color.r = as_type<char>(buffer[bufferOffset]); 1757 color.g = as_type<char>(buffer[bufferOffset + 1]); 1758 color.b = as_type<char>(buffer[bufferOffset + 2]); 1759 color.a = 1; 1760 return color; 1761} 1762static inline uint4 readR8G8B8_UINT(uint bufferOffset, constant uchar *buffer) 1763{ 1764 uint4 color; 1765 color.r = as_type<uchar>(buffer[bufferOffset]); 1766 color.g = as_type<uchar>(buffer[bufferOffset + 1]); 1767 color.b = as_type<uchar>(buffer[bufferOffset + 2]); 1768 color.a = 1; 1769 return color; 1770} 1771static inline int4 readR8G8B8A8_SINT(uint bufferOffset, constant uchar *buffer) 1772{ 1773 int4 color; 1774 color.r = as_type<char>(buffer[bufferOffset]); 1775 color.g = as_type<char>(buffer[bufferOffset + 1]); 1776 color.b = as_type<char>(buffer[bufferOffset + 2]); 1777 color.a = as_type<char>(buffer[bufferOffset + 3]); 1778 return color; 1779} 1780static inline void writeR8G8B8A8_SINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<int, 4> color, device uchar *buffer) 1781{ 1782 buffer[bufferOffset] = static_cast<uchar>(color.r); 1783 buffer[bufferOffset + 1] = static_cast<uchar>(color.g); 1784 buffer[bufferOffset + 2] = static_cast<uchar>(color.b); 1785 buffer[bufferOffset + 3] = static_cast<uchar>(color.a); 1786} 1787static inline uint4 readR8G8B8A8_UINT(uint bufferOffset, constant uchar *buffer) 1788{ 1789 uint4 color; 1790 color.r = as_type<uchar>(buffer[bufferOffset]); 1791 color.g = as_type<uchar>(buffer[bufferOffset + 1]); 1792 color.b = as_type<uchar>(buffer[bufferOffset + 2]); 1793 color.a = as_type<uchar>(buffer[bufferOffset + 3]); 1794 return color; 1795} 1796static inline void writeR8G8B8A8_UINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<uint, 4> color, device uchar *buffer) 1797{ 1798 buffer[bufferOffset] = static_cast<uchar>(color.r); 1799 buffer[bufferOffset + 1] = static_cast<uchar>(color.g); 1800 buffer[bufferOffset + 2] = static_cast<uchar>(color.b); 1801 buffer[bufferOffset + 3] = static_cast<uchar>(color.a); 1802} 1803static inline float4 readR16_FLOAT(uint bufferOffset, constant uchar *buffer) 1804{ 1805 float4 color; 1806 color.r = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset)); 1807 color.g = color.b = 0.0; 1808 color.a = 1.0; 1809 return color; 1810} 1811static inline void writeR16_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 1812{ 1813 shortToBytes(as_type<ushort>(static_cast<half>(color.r)), bufferOffset, buffer); 1814} 1815template <typename ShortType> 1816static inline float4 readR16_NORM(uint bufferOffset, constant uchar *buffer) 1817{ 1818 float4 color; 1819 color.r = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset)); 1820 color.g = color.b = 0.0; 1821 color.a = 1.0; 1822 return color; 1823} 1824template<typename ShortType> 1825static inline void writeR16_NORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 1826{ 1827 shortToBytes(floatToNormalized<ShortType>(color.r), bufferOffset, buffer); 1828} 1829static inline int4 readR16_SINT(uint bufferOffset, constant uchar *buffer) 1830{ 1831 int4 color; 1832 color.r = bytesToShort<short>(buffer, bufferOffset); 1833 color.g = color.b = 0; 1834 color.a = 1; 1835 return color; 1836} 1837static inline void writeR16_SINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<int, 4> color, device uchar *buffer) 1838{ 1839 shortToBytes(static_cast<short>(color.r), bufferOffset, buffer); 1840} 1841static inline uint4 readR16_UINT(uint bufferOffset, constant uchar *buffer) 1842{ 1843 uint4 color; 1844 color.r = bytesToShort<ushort>(buffer, bufferOffset); 1845 color.g = color.b = 0; 1846 color.a = 1; 1847 return color; 1848} 1849static inline void writeR16_UINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<uint, 4> color, device uchar *buffer) 1850{ 1851 shortToBytes(static_cast<ushort>(color.r), bufferOffset, buffer); 1852} 1853static inline float4 readA16_FLOAT(uint bufferOffset, constant uchar *buffer) 1854{ 1855 float4 color; 1856 color.a = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset)); 1857 color.rgb = 0.0; 1858 return color; 1859} 1860static inline void writeA16_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 1861{ 1862 shortToBytes(as_type<ushort>(static_cast<half>(color.a)), bufferOffset, buffer); 1863} 1864static inline float4 readL16_FLOAT(uint bufferOffset, constant uchar *buffer) 1865{ 1866 float4 color; 1867 color.rgb = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset)); 1868 color.a = 1.0; 1869 return color; 1870} 1871static inline void writeL16_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 1872{ 1873 shortToBytes(as_type<ushort>(static_cast<half>(color.r)), bufferOffset, buffer); 1874} 1875static inline float4 readL16A16_FLOAT(uint bufferOffset, constant uchar *buffer) 1876{ 1877 float4 color; 1878 color.rgb = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset)); 1879 color.a = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset + 2)); 1880 return color; 1881} 1882static inline void writeL16A16_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 1883{ 1884 shortToBytes(as_type<ushort>(static_cast<half>(color.r)), bufferOffset, buffer); 1885 shortToBytes(as_type<ushort>(static_cast<half>(color.a)), bufferOffset + 2, buffer); 1886} 1887static inline float4 readR16G16_FLOAT(uint bufferOffset, constant uchar *buffer) 1888{ 1889 float4 color; 1890 color.r = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset)); 1891 color.g = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset + 2)); 1892 color.b = 0.0; 1893 color.a = 1.0; 1894 return color; 1895} 1896static inline void writeR16G16_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 1897{ 1898 shortToBytes(as_type<ushort>(static_cast<half>(color.r)), bufferOffset, buffer); 1899 shortToBytes(as_type<ushort>(static_cast<half>(color.g)), bufferOffset + 2, buffer); 1900} 1901template <typename ShortType> 1902static inline float4 readR16G16_NORM(uint bufferOffset, constant uchar *buffer) 1903{ 1904 float4 color; 1905 color.r = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset)); 1906 color.g = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset + 2)); 1907 color.b = 0.0; 1908 color.a = 1.0; 1909 return color; 1910} 1911template<typename ShortType> 1912static inline void writeR16G16_NORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 1913{ 1914 shortToBytes(floatToNormalized<ShortType>(color.r), bufferOffset, buffer); 1915 shortToBytes(floatToNormalized<ShortType>(color.g), bufferOffset + 2, buffer); 1916} 1917static inline int4 readR16G16_SINT(uint bufferOffset, constant uchar *buffer) 1918{ 1919 int4 color; 1920 color.r = bytesToShort<short>(buffer, bufferOffset); 1921 color.g = bytesToShort<short>(buffer, bufferOffset + 2); 1922 color.b = 0; 1923 color.a = 1; 1924 return color; 1925} 1926static inline void writeR16G16_SINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<int, 4> color, device uchar *buffer) 1927{ 1928 shortToBytes(static_cast<short>(color.r), bufferOffset, buffer); 1929 shortToBytes(static_cast<short>(color.g), bufferOffset + 2, buffer); 1930} 1931static inline uint4 readR16G16_UINT(uint bufferOffset, constant uchar *buffer) 1932{ 1933 uint4 color; 1934 color.r = bytesToShort<ushort>(buffer, bufferOffset); 1935 color.g = bytesToShort<ushort>(buffer, bufferOffset + 2); 1936 color.b = 0; 1937 color.a = 1; 1938 return color; 1939} 1940static inline void writeR16G16_UINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<uint, 4> color, device uchar *buffer) 1941{ 1942 shortToBytes(static_cast<ushort>(color.r), bufferOffset, buffer); 1943 shortToBytes(static_cast<ushort>(color.g), bufferOffset + 2, buffer); 1944} 1945static inline float4 readR16G16B16_FLOAT(uint bufferOffset, constant uchar *buffer) 1946{ 1947 float4 color; 1948 color.r = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset)); 1949 color.g = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset + 2)); 1950 color.b = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset + 4)); 1951 color.a = 1.0; 1952 return color; 1953} 1954template <typename ShortType> 1955static inline float4 readR16G16B16_NORM(uint bufferOffset, constant uchar *buffer) 1956{ 1957 float4 color; 1958 color.r = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset)); 1959 color.g = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset + 2)); 1960 color.b = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset + 4)); 1961 color.a = 1.0; 1962 return color; 1963} 1964static inline int4 readR16G16B16_SINT(uint bufferOffset, constant uchar *buffer) 1965{ 1966 int4 color; 1967 color.r = bytesToShort<short>(buffer, bufferOffset); 1968 color.g = bytesToShort<short>(buffer, bufferOffset + 2); 1969 color.b = bytesToShort<short>(buffer, bufferOffset + 4); 1970 color.a = 1; 1971 return color; 1972} 1973static inline uint4 readR16G16B16_UINT(uint bufferOffset, constant uchar *buffer) 1974{ 1975 uint4 color; 1976 color.r = bytesToShort<ushort>(buffer, bufferOffset); 1977 color.g = bytesToShort<ushort>(buffer, bufferOffset + 2); 1978 color.b = bytesToShort<ushort>(buffer, bufferOffset + 4); 1979 color.a = 1; 1980 return color; 1981} 1982static inline float4 readR16G16B16A16_FLOAT(uint bufferOffset, constant uchar *buffer) 1983{ 1984 float4 color; 1985 color.r = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset)); 1986 color.g = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset + 2)); 1987 color.b = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset + 4)); 1988 color.a = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset + 6)); 1989 return color; 1990} 1991static inline void writeR16G16B16A16_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 1992{ 1993 shortToBytes(as_type<ushort>(static_cast<half>(color.r)), bufferOffset, buffer); 1994 shortToBytes(as_type<ushort>(static_cast<half>(color.g)), bufferOffset + 2, buffer); 1995 shortToBytes(as_type<ushort>(static_cast<half>(color.b)), bufferOffset + 4, buffer); 1996 shortToBytes(as_type<ushort>(static_cast<half>(color.a)), bufferOffset + 6, buffer); 1997} 1998template <typename ShortType> 1999static inline float4 readR16G16B16A16_NORM(uint bufferOffset, constant uchar *buffer) 2000{ 2001 float4 color; 2002 color.r = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset)); 2003 color.g = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset + 2)); 2004 color.b = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset + 4)); 2005 color.a = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset + 6)); 2006 return color; 2007} 2008template<typename ShortType> 2009static inline void writeR16G16B16A16_NORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2010{ 2011 shortToBytes(floatToNormalized<ShortType>(color.r), bufferOffset, buffer); 2012 shortToBytes(floatToNormalized<ShortType>(color.g), bufferOffset + 2, buffer); 2013 shortToBytes(floatToNormalized<ShortType>(color.b), bufferOffset + 4, buffer); 2014 shortToBytes(floatToNormalized<ShortType>(color.a), bufferOffset + 6, buffer); 2015} 2016static inline int4 readR16G16B16A16_SINT(uint bufferOffset, constant uchar *buffer) 2017{ 2018 int4 color; 2019 color.r = bytesToShort<short>(buffer, bufferOffset); 2020 color.g = bytesToShort<short>(buffer, bufferOffset + 2); 2021 color.b = bytesToShort<short>(buffer, bufferOffset + 4); 2022 color.a = bytesToShort<short>(buffer, bufferOffset + 6); 2023 return color; 2024} 2025static inline void writeR16G16B16A16_SINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<int, 4> color, device uchar *buffer) 2026{ 2027 shortToBytes(static_cast<short>(color.r), bufferOffset, buffer); 2028 shortToBytes(static_cast<short>(color.g), bufferOffset + 2, buffer); 2029 shortToBytes(static_cast<short>(color.b), bufferOffset + 4, buffer); 2030 shortToBytes(static_cast<short>(color.a), bufferOffset + 6, buffer); 2031} 2032static inline uint4 readR16G16B16A16_UINT(uint bufferOffset, constant uchar *buffer) 2033{ 2034 uint4 color; 2035 color.r = bytesToShort<ushort>(buffer, bufferOffset); 2036 color.g = bytesToShort<ushort>(buffer, bufferOffset + 2); 2037 color.b = bytesToShort<ushort>(buffer, bufferOffset + 4); 2038 color.a = bytesToShort<ushort>(buffer, bufferOffset + 6); 2039 return color; 2040} 2041static inline void writeR16G16B16A16_UINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<uint, 4> color, device uchar *buffer) 2042{ 2043 shortToBytes(static_cast<ushort>(color.r), bufferOffset, buffer); 2044 shortToBytes(static_cast<ushort>(color.g), bufferOffset + 2, buffer); 2045 shortToBytes(static_cast<ushort>(color.b), bufferOffset + 4, buffer); 2046 shortToBytes(static_cast<ushort>(color.a), bufferOffset + 6, buffer); 2047} 2048static inline float4 readR32_FLOAT(uint bufferOffset, constant uchar *buffer) 2049{ 2050 float4 color; 2051 color.r = as_type<float>(bytesToInt<uint>(buffer, bufferOffset)); 2052 color.g = color.b = 0.0; 2053 color.a = 1.0; 2054 return color; 2055} 2056static inline void writeR32_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2057{ 2058 intToBytes(as_type<uint>(color.r), bufferOffset, buffer); 2059} 2060template <typename IntType> 2061static inline float4 readR32_NORM(uint bufferOffset, constant uchar *buffer) 2062{ 2063 float4 color; 2064 color.r = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset)); 2065 color.g = color.b = 0.0; 2066 color.a = 1.0; 2067 return color; 2068} 2069static inline float4 readA32_FLOAT(uint bufferOffset, constant uchar *buffer) 2070{ 2071 float4 color; 2072 color.a = as_type<float>(bytesToInt<uint>(buffer, bufferOffset)); 2073 color.rgb = 0.0; 2074 return color; 2075} 2076static inline void writeA32_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2077{ 2078 intToBytes(as_type<uint>(color.a), bufferOffset, buffer); 2079} 2080static inline float4 readL32_FLOAT(uint bufferOffset, constant uchar *buffer) 2081{ 2082 float4 color; 2083 color.rgb = as_type<float>(bytesToInt<uint>(buffer, bufferOffset)); 2084 color.a = 1.0; 2085 return color; 2086} 2087static inline void writeL32_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2088{ 2089 intToBytes(as_type<uint>(color.r), bufferOffset, buffer); 2090} 2091static inline int4 readR32_SINT(uint bufferOffset, constant uchar *buffer) 2092{ 2093 int4 color; 2094 color.r = bytesToInt<int>(buffer, bufferOffset); 2095 color.g = color.b = 0; 2096 color.a = 1; 2097 return color; 2098} 2099static inline void writeR32_SINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<int, 4> color, device uchar *buffer) 2100{ 2101 intToBytes(color.r, bufferOffset, buffer); 2102} 2103static inline float4 readR32_FIXED(uint bufferOffset, constant uchar *buffer) 2104{ 2105 float4 color; 2106 constexpr float kDivisor = 1.0f / (1 << 16); 2107 color.r = bytesToInt<int>(buffer, bufferOffset) * kDivisor; 2108 color.g = color.b = 0.0; 2109 color.a = 1.0; 2110 return color; 2111} 2112static inline uint4 readR32_UINT(uint bufferOffset, constant uchar *buffer) 2113{ 2114 uint4 color; 2115 color.r = bytesToInt<uint>(buffer, bufferOffset); 2116 color.g = color.b = 0; 2117 color.a = 1; 2118 return color; 2119} 2120static inline void writeR32_UINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<uint, 4> color, device uchar *buffer) 2121{ 2122 intToBytes(color.r, bufferOffset, buffer); 2123} 2124static inline float4 readL32A32_FLOAT(uint bufferOffset, constant uchar *buffer) 2125{ 2126 float4 color; 2127 color.rgb = as_type<float>(bytesToInt<uint>(buffer, bufferOffset)); 2128 color.a = as_type<float>(bytesToInt<uint>(buffer, bufferOffset + 4)); 2129 return color; 2130} 2131static inline void writeL32A32_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2132{ 2133 intToBytes(as_type<uint>(color.r), bufferOffset, buffer); 2134 intToBytes(as_type<uint>(color.a), bufferOffset + 4, buffer); 2135} 2136static inline float4 readR32G32_FLOAT(uint bufferOffset, constant uchar *buffer) 2137{ 2138 float4 color; 2139 color.r = as_type<float>(bytesToInt<uint>(buffer, bufferOffset)); 2140 color.g = as_type<float>(bytesToInt<uint>(buffer, bufferOffset + 4)); 2141 color.b = 0.0; 2142 color.a = 1.0; 2143 return color; 2144} 2145static inline void writeR32G32_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2146{ 2147 intToBytes(as_type<uint>(color.r), bufferOffset, buffer); 2148 intToBytes(as_type<uint>(color.g), bufferOffset + 4, buffer); 2149} 2150template <typename IntType> 2151static inline float4 readR32G32_NORM(uint bufferOffset, constant uchar *buffer) 2152{ 2153 float4 color; 2154 color.r = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset)); 2155 color.g = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset + 4)); 2156 color.b = 0.0; 2157 color.a = 1.0; 2158 return color; 2159} 2160static inline int4 readR32G32_SINT(uint bufferOffset, constant uchar *buffer) 2161{ 2162 int4 color; 2163 color.r = bytesToInt<int>(buffer, bufferOffset); 2164 color.g = bytesToInt<int>(buffer, bufferOffset + 4); 2165 color.b = 0; 2166 color.a = 1; 2167 return color; 2168} 2169static inline void writeR32G32_SINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<int, 4> color, device uchar *buffer) 2170{ 2171 intToBytes(color.r, bufferOffset, buffer); 2172 intToBytes(color.g, bufferOffset + 4, buffer); 2173} 2174static inline float4 readR32G32_FIXED(uint bufferOffset, constant uchar *buffer) 2175{ 2176 float4 color; 2177 constexpr float kDivisor = 1.0f / (1 << 16); 2178 color.r = bytesToInt<int>(buffer, bufferOffset) * kDivisor; 2179 color.g = bytesToInt<int>(buffer, bufferOffset + 4) * kDivisor; 2180 color.b = 0.0; 2181 color.a = 1.0; 2182 return color; 2183} 2184static inline uint4 readR32G32_UINT(uint bufferOffset, constant uchar *buffer) 2185{ 2186 uint4 color; 2187 color.r = bytesToInt<uint>(buffer, bufferOffset); 2188 color.g = bytesToInt<uint>(buffer, bufferOffset + 4); 2189 color.b = 0; 2190 color.a = 1; 2191 return color; 2192} 2193static inline void writeR32G32_UINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<uint, 4> color, device uchar *buffer) 2194{ 2195 intToBytes(color.r, bufferOffset, buffer); 2196 intToBytes(color.g, bufferOffset + 4, buffer); 2197} 2198static inline float4 readR32G32B32_FLOAT(uint bufferOffset, constant uchar *buffer) 2199{ 2200 float4 color; 2201 color.r = as_type<float>(bytesToInt<uint>(buffer, bufferOffset)); 2202 color.g = as_type<float>(bytesToInt<uint>(buffer, bufferOffset + 4)); 2203 color.b = as_type<float>(bytesToInt<uint>(buffer, bufferOffset + 8)); 2204 color.a = 1.0; 2205 return color; 2206} 2207template <typename IntType> 2208static inline float4 readR32G32B32_NORM(uint bufferOffset, constant uchar *buffer) 2209{ 2210 float4 color; 2211 color.r = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset)); 2212 color.g = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset + 4)); 2213 color.b = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset + 8)); 2214 color.a = 1.0; 2215 return color; 2216} 2217static inline int4 readR32G32B32_SINT(uint bufferOffset, constant uchar *buffer) 2218{ 2219 int4 color; 2220 color.r = bytesToInt<int>(buffer, bufferOffset); 2221 color.g = bytesToInt<int>(buffer, bufferOffset + 4); 2222 color.b = bytesToInt<int>(buffer, bufferOffset + 8); 2223 color.a = 1; 2224 return color; 2225} 2226static inline float4 readR32G32B32_FIXED(uint bufferOffset, constant uchar *buffer) 2227{ 2228 float4 color; 2229 constexpr float kDivisor = 1.0f / (1 << 16); 2230 color.r = bytesToInt<int>(buffer, bufferOffset) * kDivisor; 2231 color.g = bytesToInt<int>(buffer, bufferOffset + 4) * kDivisor; 2232 color.b = bytesToInt<int>(buffer, bufferOffset + 8) * kDivisor; 2233 color.a = 1.0; 2234 return color; 2235} 2236static inline uint4 readR32G32B32_UINT(uint bufferOffset, constant uchar *buffer) 2237{ 2238 uint4 color; 2239 color.r = bytesToInt<uint>(buffer, bufferOffset); 2240 color.g = bytesToInt<uint>(buffer, bufferOffset + 4); 2241 color.b = bytesToInt<uint>(buffer, bufferOffset + 8); 2242 color.a = 1; 2243 return color; 2244} 2245static inline float4 readR32G32B32A32_FLOAT(uint bufferOffset, constant uchar *buffer) 2246{ 2247 float4 color; 2248 color.r = as_type<float>(bytesToInt<uint>(buffer, bufferOffset)); 2249 color.g = as_type<float>(bytesToInt<uint>(buffer, bufferOffset + 4)); 2250 color.b = as_type<float>(bytesToInt<uint>(buffer, bufferOffset + 8)); 2251 color.a = as_type<float>(bytesToInt<uint>(buffer, bufferOffset + 12)); 2252 return color; 2253} 2254static inline void writeR32G32B32A32_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2255{ 2256 intToBytes(as_type<uint>(color.r), bufferOffset, buffer); 2257 intToBytes(as_type<uint>(color.g), bufferOffset + 4, buffer); 2258 intToBytes(as_type<uint>(color.b), bufferOffset + 8, buffer); 2259 intToBytes(as_type<uint>(color.a), bufferOffset + 12, buffer); 2260} 2261template <typename IntType> 2262static inline float4 readR32G32B32A32_NORM(uint bufferOffset, constant uchar *buffer) 2263{ 2264 float4 color; 2265 color.r = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset)); 2266 color.g = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset + 4)); 2267 color.b = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset + 8)); 2268 color.a = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset + 12)); 2269 return color; 2270} 2271static inline int4 readR32G32B32A32_SINT(uint bufferOffset, constant uchar *buffer) 2272{ 2273 int4 color; 2274 color.r = bytesToInt<int>(buffer, bufferOffset); 2275 color.g = bytesToInt<int>(buffer, bufferOffset + 4); 2276 color.b = bytesToInt<int>(buffer, bufferOffset + 8); 2277 color.a = bytesToInt<int>(buffer, bufferOffset + 12); 2278 return color; 2279} 2280static inline void writeR32G32B32A32_SINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<int, 4> color, device uchar *buffer) 2281{ 2282 intToBytes(color.r, bufferOffset, buffer); 2283 intToBytes(color.g, bufferOffset + 4, buffer); 2284 intToBytes(color.b, bufferOffset + 8, buffer); 2285 intToBytes(color.a, bufferOffset + 12, buffer); 2286} 2287static inline float4 readR32G32B32A32_FIXED(uint bufferOffset, constant uchar *buffer) 2288{ 2289 float4 color; 2290 constexpr float kDivisor = 1.0f / (1 << 16); 2291 color.r = bytesToInt<int>(buffer, bufferOffset) * kDivisor; 2292 color.g = bytesToInt<int>(buffer, bufferOffset + 4) * kDivisor; 2293 color.b = bytesToInt<int>(buffer, bufferOffset + 8) * kDivisor; 2294 color.a = bytesToInt<int>(buffer, bufferOffset + 12) * kDivisor; 2295 return color; 2296} 2297static inline uint4 readR32G32B32A32_UINT(uint bufferOffset, constant uchar *buffer) 2298{ 2299 uint4 color; 2300 color.r = bytesToInt<uint>(buffer, bufferOffset); 2301 color.g = bytesToInt<uint>(buffer, bufferOffset + 4); 2302 color.b = bytesToInt<uint>(buffer, bufferOffset + 8); 2303 color.a = bytesToInt<uint>(buffer, bufferOffset + 12); 2304 return color; 2305} 2306static inline void writeR32G32B32A32_UINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<uint, 4> color, device uchar *buffer) 2307{ 2308 intToBytes(color.r, bufferOffset, buffer); 2309 intToBytes(color.g, bufferOffset + 4, buffer); 2310 intToBytes(color.b, bufferOffset + 8, buffer); 2311 intToBytes(color.a, bufferOffset + 12, buffer); 2312} 2313static inline int4 readR8_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR8_SINT(bufferOffset, buffer); } static inline uint4 readR8_USCALED(uint bufferOffset, constant uchar *buffer) { return readR8_UINT(bufferOffset, buffer); } static inline int4 readR8G8_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR8G8_SINT(bufferOffset, buffer); } static inline uint4 readR8G8_USCALED(uint bufferOffset, constant uchar *buffer) { return readR8G8_UINT(bufferOffset, buffer); } static inline int4 readR8G8B8_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR8G8B8_SINT(bufferOffset, buffer); } static inline uint4 readR8G8B8_USCALED(uint bufferOffset, constant uchar *buffer) { return readR8G8B8_UINT(bufferOffset, buffer); } static inline int4 readR8G8B8A8_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR8G8B8A8_SINT(bufferOffset, buffer); } static inline uint4 readR8G8B8A8_USCALED(uint bufferOffset, constant uchar *buffer) { return readR8G8B8A8_UINT(bufferOffset, buffer); } 2314static inline int4 readR16_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR16_SINT(bufferOffset, buffer); } static inline uint4 readR16_USCALED(uint bufferOffset, constant uchar *buffer) { return readR16_UINT(bufferOffset, buffer); } static inline int4 readR16G16_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR16G16_SINT(bufferOffset, buffer); } static inline uint4 readR16G16_USCALED(uint bufferOffset, constant uchar *buffer) { return readR16G16_UINT(bufferOffset, buffer); } static inline int4 readR16G16B16_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR16G16B16_SINT(bufferOffset, buffer); } static inline uint4 readR16G16B16_USCALED(uint bufferOffset, constant uchar *buffer) { return readR16G16B16_UINT(bufferOffset, buffer); } static inline int4 readR16G16B16A16_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR16G16B16A16_SINT(bufferOffset, buffer); } static inline uint4 readR16G16B16A16_USCALED(uint bufferOffset, constant uchar *buffer) { return readR16G16B16A16_UINT(bufferOffset, buffer); } 2315static inline int4 readR32_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR32_SINT(bufferOffset, buffer); } static inline uint4 readR32_USCALED(uint bufferOffset, constant uchar *buffer) { return readR32_UINT(bufferOffset, buffer); } static inline int4 readR32G32_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR32G32_SINT(bufferOffset, buffer); } static inline uint4 readR32G32_USCALED(uint bufferOffset, constant uchar *buffer) { return readR32G32_UINT(bufferOffset, buffer); } static inline int4 readR32G32B32_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR32G32B32_SINT(bufferOffset, buffer); } static inline uint4 readR32G32B32_USCALED(uint bufferOffset, constant uchar *buffer) { return readR32G32B32_UINT(bufferOffset, buffer); } static inline int4 readR32G32B32A32_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR32G32B32A32_SINT(bufferOffset, buffer); } static inline uint4 readR32G32B32A32_USCALED(uint bufferOffset, constant uchar *buffer) { return readR32G32B32A32_UINT(bufferOffset, buffer); } 2316static inline int4 readR10G10B10A2_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR10G10B10A2_SINT(bufferOffset, buffer); } static inline uint4 readR10G10B10A2_USCALED(uint bufferOffset, constant uchar *buffer) { return readR10G10B10A2_UINT(bufferOffset, buffer); } 2317kernel void readFromBufferToFloatTexture(ushort3 gIndices [[thread_position_in_grid]], constant CopyPixelParams &options[[buffer(0)]], constant uchar *buffer [[buffer(1)]], texture2d<float, access::write> dstTexture2d [[texture(0), function_constant(kCopyTextureType2D)]], texture2d_array<float, access::write> dstTexture2dArray [[texture(0), function_constant(kCopyTextureType2DArray)]], texture3d<float, access::write> dstTexture3d [[texture(0), function_constant(kCopyTextureType3D)]], texturecube<float, access::write> dstTextureCube [[texture(0), function_constant(kCopyTextureTypeCube)]]) 2318{ 2319 if (gIndices.x >= options.copySize.x || gIndices.y >= options.copySize.y || gIndices.z >= options.copySize.z) { return; } 2320 uint bufferOffset = options.bufferStartOffset + (gIndices.z * options.bufferDepthPitch + gIndices.y * options.bufferRowPitch + gIndices.x * options.pixelSize); 2321 switch (kCopyFormatType) 2322 { 2323 case FormatID::R5G6B5_UNORM: { auto color = readR5G6B5_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8A8_UNORM: { auto color = readR8G8B8A8_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8A8_UNORM_SRGB: { auto color = readR8G8B8A8_UNORM_SRGB(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8A8_SNORM: { auto color = readR8G8B8A8_SNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::B8G8R8A8_UNORM: { auto color = readB8G8R8A8_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::B8G8R8A8_UNORM_SRGB: { auto color = readB8G8R8A8_UNORM_SRGB(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8_UNORM: { auto color = readR8G8B8_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8_UNORM_SRGB: { auto color = readR8G8B8_UNORM_SRGB(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8_SNORM: { auto color = readR8G8B8_SNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::L8_UNORM: { auto color = readL8_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::L8A8_UNORM: { auto color = readL8A8_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R5G5B5A1_UNORM: { auto color = readR5G5B5A1_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R4G4B4A4_UNORM: { auto color = readR4G4B4A4_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8_UNORM: { auto color = readR8_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8_SNORM: { auto color = readR8_SNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8_UNORM: { auto color = readR8G8_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8_SNORM: { auto color = readR8G8_SNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16_FLOAT: { auto color = readR16_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16_SNORM: { auto color = readR16_NORM<short>(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16_UNORM: { auto color = readR16_NORM<ushort>(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::A16_FLOAT: { auto color = readA16_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::L16_FLOAT: { auto color = readL16_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::L16A16_FLOAT: { auto color = readL16A16_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16_FLOAT: { auto color = readR16G16_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16_SNORM: { auto color = readR16G16_NORM<short>(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16_UNORM: { auto color = readR16G16_NORM<ushort>(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16B16_FLOAT: { auto color = readR16G16B16_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16B16_SNORM: { auto color = readR16G16B16_NORM<short>(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16B16_UNORM: { auto color = readR16G16B16_NORM<ushort>(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16B16A16_FLOAT: { auto color = readR16G16B16A16_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16B16A16_SNORM: { auto color = readR16G16B16A16_NORM<short>(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16B16A16_UNORM: { auto color = readR16G16B16A16_NORM<ushort>(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32_FLOAT: { auto color = readR32_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::A32_FLOAT: { auto color = readA32_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::L32_FLOAT: { auto color = readL32_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::L32A32_FLOAT: { auto color = readL32A32_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32G32_FLOAT: { auto color = readR32G32_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32G32B32_FLOAT: { auto color = readR32G32B32_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32G32B32A32_FLOAT: { auto color = readR32G32B32A32_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; 2324 } 2325} 2326kernel void readFromBufferToIntTexture(ushort3 gIndices [[thread_position_in_grid]], constant CopyPixelParams &options[[buffer(0)]], constant uchar *buffer [[buffer(1)]], texture2d<int, access::write> dstTexture2d [[texture(0), function_constant(kCopyTextureType2D)]], texture2d_array<int, access::write> dstTexture2dArray [[texture(0), function_constant(kCopyTextureType2DArray)]], texture3d<int, access::write> dstTexture3d [[texture(0), function_constant(kCopyTextureType3D)]], texturecube<int, access::write> dstTextureCube [[texture(0), function_constant(kCopyTextureTypeCube)]]) 2327{ 2328 if (gIndices.x >= options.copySize.x || gIndices.y >= options.copySize.y || gIndices.z >= options.copySize.z) { return; } 2329 uint bufferOffset = options.bufferStartOffset + (gIndices.z * options.bufferDepthPitch + gIndices.y * options.bufferRowPitch + gIndices.x * options.pixelSize); 2330 switch (kCopyFormatType) 2331 { 2332 case FormatID::R8_SINT: { auto color = readR8_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8_SINT: { auto color = readR8G8_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8_SINT: { auto color = readR8G8B8_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8A8_SINT: { auto color = readR8G8B8A8_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16_SINT: { auto color = readR16_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16_SINT: { auto color = readR16G16_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16B16_SINT: { auto color = readR16G16B16_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16B16A16_SINT: { auto color = readR16G16B16A16_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32_SINT: { auto color = readR32_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32G32_SINT: { auto color = readR32G32_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32G32B32_SINT: { auto color = readR32G32B32_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32G32B32A32_SINT: { auto color = readR32G32B32A32_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; 2333 } 2334} 2335kernel void readFromBufferToUIntTexture(ushort3 gIndices [[thread_position_in_grid]], constant CopyPixelParams &options[[buffer(0)]], constant uchar *buffer [[buffer(1)]], texture2d<uint, access::write> dstTexture2d [[texture(0), function_constant(kCopyTextureType2D)]], texture2d_array<uint, access::write> dstTexture2dArray [[texture(0), function_constant(kCopyTextureType2DArray)]], texture3d<uint, access::write> dstTexture3d [[texture(0), function_constant(kCopyTextureType3D)]], texturecube<uint, access::write> dstTextureCube [[texture(0), function_constant(kCopyTextureTypeCube)]]) 2336{ 2337 if (gIndices.x >= options.copySize.x || gIndices.y >= options.copySize.y || gIndices.z >= options.copySize.z) { return; } 2338 uint bufferOffset = options.bufferStartOffset + (gIndices.z * options.bufferDepthPitch + gIndices.y * options.bufferRowPitch + gIndices.x * options.pixelSize); 2339 switch (kCopyFormatType) 2340 { 2341 case FormatID::R8_UINT: { auto color = readR8_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8_UINT: { auto color = readR8G8_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8_UINT: { auto color = readR8G8B8_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8A8_UINT: { auto color = readR8G8B8A8_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16_UINT: { auto color = readR16_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16_UINT: { auto color = readR16G16_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16B16_UINT: { auto color = readR16G16B16_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16B16A16_UINT: { auto color = readR16G16B16A16_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32_UINT: { auto color = readR32_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32G32_UINT: { auto color = readR32G32_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32G32B32_UINT: { auto color = readR32G32B32_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32G32B32A32_UINT: { auto color = readR32G32B32A32_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; 2342 } 2343} 2344kernel void writeFromFloatTextureToBuffer(ushort2 gIndices [[thread_position_in_grid]], constant WritePixelParams &options[[buffer(0)]], texture2d<float, access::read> srcTexture2d [[texture(0), function_constant(kCopyTextureType2D)]], texture2d_array<float, access::read> srcTexture2dArray [[texture(0), function_constant(kCopyTextureType2DArray)]], texture3d<float, access::read> srcTexture3d [[texture(0), function_constant(kCopyTextureType3D)]], texturecube<float, access::read> srcTextureCube [[texture(0), function_constant(kCopyTextureTypeCube)]], texture2d_ms<float, access::read> srcTexture2dMS [[texture(0), function_constant(kCopyTextureType2DMS)]], device uchar *buffer [[buffer(1)]]) 2345{ 2346 if (gIndices.x >= options.copySize.x || gIndices.y >= options.copySize.y) { return; } 2347 uint bufferOffset = options.bufferStartOffset + (gIndices.y * options.bufferRowPitch + gIndices.x * options.pixelSize); 2348 switch (kCopyFormatType) 2349 { 2350 case FormatID::R5G6B5_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR5G6B5_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8B8A8_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8B8A8_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8B8A8_UNORM_SRGB: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8B8A8_UNORM_SRGB(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8B8A8_SNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8B8A8_SNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::B8G8R8A8_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeB8G8R8A8_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::B8G8R8A8_UNORM_SRGB: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeB8G8R8A8_UNORM_SRGB(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8B8_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8B8_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8B8_UNORM_SRGB: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8B8_UNORM_SRGB(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8B8_SNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8B8_SNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::L8_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeL8_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::A8_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeA8_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::L8A8_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeL8A8_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R5G5B5A1_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR5G5B5A1_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R4G4B4A4_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR4G4B4A4_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8_SNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8_SNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8_SNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8_SNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16_SNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16_NORM<short>(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16_NORM<ushort>(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::A16_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeA16_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::L16_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeL16_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::L16A16_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeL16A16_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16G16_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16G16_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16G16_SNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16G16_NORM<short>(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16G16_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16G16_NORM<ushort>(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16G16B16A16_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16G16B16A16_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16G16B16A16_SNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16G16B16A16_NORM<short>(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16G16B16A16_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16G16B16A16_NORM<ushort>(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R32_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR32_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::A32_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeA32_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::L32_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeL32_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::L32A32_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeL32A32_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R32G32_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR32G32_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R32G32B32A32_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR32G32B32A32_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; 2351 } 2352} 2353kernel void writeFromIntTextureToBuffer(ushort2 gIndices [[thread_position_in_grid]], constant WritePixelParams &options[[buffer(0)]], texture2d<int, access::read> srcTexture2d [[texture(0), function_constant(kCopyTextureType2D)]], texture2d_array<int, access::read> srcTexture2dArray [[texture(0), function_constant(kCopyTextureType2DArray)]], texture3d<int, access::read> srcTexture3d [[texture(0), function_constant(kCopyTextureType3D)]], texturecube<int, access::read> srcTextureCube [[texture(0), function_constant(kCopyTextureTypeCube)]], texture2d_ms<int, access::read> srcTexture2dMS [[texture(0), function_constant(kCopyTextureType2DMS)]], device uchar *buffer [[buffer(1)]]) 2354{ 2355 if (gIndices.x >= options.copySize.x || gIndices.y >= options.copySize.y) { return; } 2356 uint bufferOffset = options.bufferStartOffset + (gIndices.y * options.bufferRowPitch + gIndices.x * options.pixelSize); 2357 switch (kCopyFormatType) 2358 { 2359 case FormatID::R8_SINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8_SINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8_SINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8_SINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8B8A8_SINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8B8A8_SINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16_SINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16_SINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16G16_SINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16G16_SINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16G16B16A16_SINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16G16B16A16_SINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R32_SINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR32_SINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R32G32_SINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR32G32_SINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R32G32B32A32_SINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR32G32B32A32_SINT(gIndices, options, bufferOffset, color, buffer); } break; 2360 } 2361} 2362kernel void writeFromUIntTextureToBuffer(ushort2 gIndices [[thread_position_in_grid]], constant WritePixelParams &options[[buffer(0)]], texture2d<uint, access::read> srcTexture2d [[texture(0), function_constant(kCopyTextureType2D)]], texture2d_array<uint, access::read> srcTexture2dArray [[texture(0), function_constant(kCopyTextureType2DArray)]], texture3d<uint, access::read> srcTexture3d [[texture(0), function_constant(kCopyTextureType3D)]], texturecube<uint, access::read> srcTextureCube [[texture(0), function_constant(kCopyTextureTypeCube)]], texture2d_ms<uint, access::read> srcTexture2dMS [[texture(0), function_constant(kCopyTextureType2DMS)]], device uchar *buffer [[buffer(1)]]) 2363{ 2364 if (gIndices.x >= options.copySize.x || gIndices.y >= options.copySize.y) { return; } 2365 uint bufferOffset = options.bufferStartOffset + (gIndices.y * options.bufferRowPitch + gIndices.x * options.pixelSize); 2366 switch (kCopyFormatType) 2367 { 2368 case FormatID::R8_UINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8_UINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8_UINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8_UINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8B8A8_UINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8B8A8_UINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16_UINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16_UINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16G16_UINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16G16_UINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16G16B16A16_UINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16G16B16A16_UINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R32_UINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR32_UINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R32G32_UINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR32G32_UINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R32G32B32A32_UINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR32G32B32A32_UINT(gIndices, options, bufferOffset, color, buffer); } break; 2369 } 2370} 2371struct CopyVertexParams 2372{ 2373 uint srcBufferStartOffset; 2374 uint srcStride; 2375 uint srcComponentBytes; 2376 uint srcComponents; 2377 uchar4 srcDefaultAlphaData; 2378 uint dstBufferStartOffset; 2379 uint dstStride; 2380 uint dstComponents; 2381 uint vertexCount; 2382}; 2383template <typename IntType> 2384static inline void writeFloatVertex(constant CopyVertexParams &options, 2385 uint idx, 2386 vec<IntType, 4> data, 2387 device uchar *dst) 2388{ 2389 uint dstOffset = idx * options.dstStride + options.dstBufferStartOffset; 2390 for (uint component = 0; component < options.dstComponents; ++component, dstOffset += 4) 2391 { 2392 floatToBytes(static_cast<float>(data[component]), dstOffset, dst); 2393 } 2394} 2395template <> 2396inline void writeFloatVertex(constant CopyVertexParams &options, 2397 uint idx, 2398 vec<float, 4> data, 2399 device uchar *dst) 2400{ 2401 uint dstOffset = idx * options.dstStride + options.dstBufferStartOffset; 2402 for (uint component = 0; component < options.dstComponents; ++component, dstOffset += 4) 2403 { 2404 floatToBytes(data[component], dstOffset, dst); 2405 } 2406} 2407static inline void convertToFloatVertexFormat(uint index, 2408 constant CopyVertexParams &options, 2409 constant uchar *srcBuffer, 2410 device uchar *dstBuffer) 2411{ 2412 uint bufferOffset = options.srcBufferStartOffset + options.srcStride * index; 2413 switch (kCopyFormatType) 2414 { 2415 case FormatID::R8_UNORM: { auto data = readR8_UNORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8_SNORM: { auto data = readR8_SNORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8_UINT: { auto data = readR8_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8_SINT: { auto data = readR8_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8_USCALED: { auto data = readR8_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8_SSCALED: { auto data = readR8_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8_UNORM: { auto data = readR8G8_UNORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8_SNORM: { auto data = readR8G8_SNORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8_UINT: { auto data = readR8G8_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8_SINT: { auto data = readR8G8_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8_USCALED: { auto data = readR8G8_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8_SSCALED: { auto data = readR8G8_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8_UNORM: { auto data = readR8G8B8_UNORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8_SNORM: { auto data = readR8G8B8_SNORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8_UINT: { auto data = readR8G8B8_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8_SINT: { auto data = readR8G8B8_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8_USCALED: { auto data = readR8G8B8_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8_SSCALED: { auto data = readR8G8B8_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8A8_UNORM: { auto data = readR8G8B8A8_UNORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8A8_SNORM: { auto data = readR8G8B8A8_SNORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8A8_UINT: { auto data = readR8G8B8A8_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8A8_SINT: { auto data = readR8G8B8A8_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8A8_USCALED: { auto data = readR8G8B8A8_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8A8_SSCALED: { auto data = readR8G8B8A8_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16_UNORM: { auto data = readR16_NORM<ushort>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16_SNORM: { auto data = readR16_NORM<short>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16_UINT: { auto data = readR16_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16_SINT: { auto data = readR16_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16_USCALED: { auto data = readR16_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16_SSCALED: { auto data = readR16_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16_UNORM: { auto data = readR16G16_NORM<ushort>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16_SNORM: { auto data = readR16G16_NORM<short>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16_UINT: { auto data = readR16G16_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16_SINT: { auto data = readR16G16_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16_USCALED: { auto data = readR16G16_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16_SSCALED: { auto data = readR16G16_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16_UNORM: { auto data = readR16G16B16_NORM<ushort>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16_SNORM: { auto data = readR16G16B16_NORM<short>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16_UINT: { auto data = readR16G16B16_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16_SINT: { auto data = readR16G16B16_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16_USCALED: { auto data = readR16G16B16_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16_SSCALED: { auto data = readR16G16B16_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16A16_UNORM: { auto data = readR16G16B16A16_NORM<ushort>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16A16_SNORM: { auto data = readR16G16B16A16_NORM<short>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16A16_UINT: { auto data = readR16G16B16A16_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16A16_SINT: { auto data = readR16G16B16A16_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16A16_USCALED: { auto data = readR16G16B16A16_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16A16_SSCALED: { auto data = readR16G16B16A16_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32_UNORM: { auto data = readR32_NORM<uint>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32_SNORM: { auto data = readR32_NORM<int>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32_UINT: { auto data = readR32_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32_SINT: { auto data = readR32_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32_USCALED: { auto data = readR32_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32_SSCALED: { auto data = readR32_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32_UNORM: { auto data = readR32G32_NORM<uint>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32_SNORM: { auto data = readR32G32_NORM<int>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32_UINT: { auto data = readR32G32_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32_SINT: { auto data = readR32G32_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32_USCALED: { auto data = readR32G32_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32_SSCALED: { auto data = readR32G32_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32_UNORM: { auto data = readR32G32B32_NORM<uint>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32_SNORM: { auto data = readR32G32B32_NORM<int>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32_UINT: { auto data = readR32G32B32_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32_SINT: { auto data = readR32G32B32_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32_USCALED: { auto data = readR32G32B32_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32_SSCALED: { auto data = readR32G32B32_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32A32_UNORM: { auto data = readR32G32B32A32_NORM<uint>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32A32_SNORM: { auto data = readR32G32B32A32_NORM<int>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32A32_UINT: { auto data = readR32G32B32A32_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32A32_SINT: { auto data = readR32G32B32A32_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32A32_USCALED: { auto data = readR32G32B32A32_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32A32_SSCALED: { auto data = readR32G32B32A32_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16_FLOAT: { auto data = readR16_FLOAT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16_FLOAT: { auto data = readR16G16_FLOAT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16_FLOAT: { auto data = readR16G16B16_FLOAT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16A16_FLOAT: { auto data = readR16G16B16A16_FLOAT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32_FLOAT: { auto data = readR32_FLOAT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32_FLOAT: { auto data = readR32G32_FLOAT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32_FLOAT: { auto data = readR32G32B32_FLOAT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32A32_FLOAT: { auto data = readR32G32B32A32_FLOAT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32_FIXED: { auto data = readR32_FIXED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32_FIXED: { auto data = readR32G32_FIXED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32_FIXED: { auto data = readR32G32B32_FIXED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32A32_FIXED: { auto data = readR32G32B32A32_FIXED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R10G10B10A2_SINT: { auto data = readR10G10B10A2_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R10G10B10A2_UINT: { auto data = readR10G10B10A2_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R10G10B10A2_SSCALED: { auto data = readR10G10B10A2_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R10G10B10A2_USCALED: { auto data = readR10G10B10A2_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; 2416 } 2417} 2418kernel void convertToFloatVertexFormatCS(uint index [[thread_position_in_grid]], 2419 constant CopyVertexParams &options [[buffer(0)]], 2420 constant uchar *srcBuffer [[buffer(1)]], 2421 device uchar *dstBuffer [[buffer(2)]]) 2422{ 2423 if (index >= options.vertexCount) { return; }; 2424 convertToFloatVertexFormat(index, options, srcBuffer, dstBuffer); 2425} 2426vertex void convertToFloatVertexFormatVS(uint index [[vertex_id]], 2427 constant CopyVertexParams &options [[buffer(0)]], 2428 constant uchar *srcBuffer [[buffer(1)]], 2429 device uchar *dstBuffer [[buffer(2)]]) 2430{ 2431 convertToFloatVertexFormat(index, options, srcBuffer, dstBuffer); 2432} 2433static inline void expandVertexFormatComponents(uint index, 2434 constant CopyVertexParams &options, 2435 constant uchar *srcBuffer, 2436 device uchar *dstBuffer) 2437{ 2438 uint srcOffset = options.srcBufferStartOffset + options.srcStride * index; 2439 uint dstOffset = options.dstBufferStartOffset + options.dstStride * index; 2440 uint dstComponentsBeforeAlpha = min(options.dstComponents, 3u); 2441 uint component; 2442 for (component = 0; component < options.srcComponents; ++component, 2443 srcOffset += options.srcComponentBytes, dstOffset += options.srcComponentBytes) 2444 { 2445 for (uint byte = 0; byte < options.srcComponentBytes; ++byte) 2446 { 2447 dstBuffer[dstOffset + byte] = srcBuffer[srcOffset + byte]; 2448 } 2449 } 2450 for (; component < dstComponentsBeforeAlpha; 2451 ++component, dstOffset += options.srcComponentBytes) 2452 { 2453 for (uint byte = 0; byte < options.srcComponentBytes; ++byte) 2454 { 2455 dstBuffer[dstOffset + byte] = 0; 2456 } 2457 } 2458 if (component < options.dstComponents) 2459 { 2460 for (uint byte = 0; byte < options.srcComponentBytes; ++byte) 2461 { 2462 dstBuffer[dstOffset + byte] = options.srcDefaultAlphaData[byte]; 2463 } 2464 } 2465} 2466kernel void expandVertexFormatComponentsCS(uint index [[thread_position_in_grid]], 2467 constant CopyVertexParams &options [[buffer(0)]], 2468 constant uchar *srcBuffer [[buffer(1)]], 2469 device uchar *dstBuffer [[buffer(2)]]) 2470{ 2471 if (index >= options.vertexCount) { return; }; 2472 expandVertexFormatComponents(index, options, srcBuffer, dstBuffer); 2473} 2474vertex void expandVertexFormatComponentsVS(uint index [[vertex_id]], 2475 constant CopyVertexParams &options [[buffer(0)]], 2476 constant uchar *srcBuffer [[buffer(1)]], 2477 device uchar *dstBuffer [[buffer(2)]]) 2478{ 2479 expandVertexFormatComponents(index, options, srcBuffer, dstBuffer); 2480} 2481kernel void linearizeBlocks(ushort2 position [[thread_position_in_grid]], 2482 constant uint2 *dimensions [[buffer(0)]], 2483 constant uint2 *srcBuffer [[buffer(1)]], 2484 device uint2 *dstBuffer [[buffer(2)]]) 2485{ 2486 if (any(uint2(position) >= *dimensions)) 2487 { 2488 return; 2489 } 2490 uint2 t = uint2(position); 2491 t = (t | (t << 8)) & 0x00FF00FF; 2492 t = (t | (t << 4)) & 0x0F0F0F0F; 2493 t = (t | (t << 2)) & 0x33333333; 2494 t = (t | (t << 1)) & 0x55555555; 2495 dstBuffer[position.y * (*dimensions).x + position.x] = srcBuffer[(t.x << 1) | t.y]; 2496} 2497kernel void saturateDepth(uint2 position [[thread_position_in_grid]], 2498 constant uint3 *dimensions [[buffer(0)]], 2499 device float *srcBuffer [[buffer(1)]], 2500 device float *dstBuffer [[buffer(2)]]) 2501{ 2502 if (any(position >= (*dimensions).xy)) 2503 { 2504 return; 2505 } 2506 const uint srcOffset = position.y * (*dimensions).z + position.x; 2507 const uint dstOffset = position.y * (*dimensions).x + position.x; 2508 dstBuffer[dstOffset] = saturate(srcBuffer[srcOffset]); 2509} 2510constant bool kCombineWithExistingResult [[function_constant(1000)]]; 2511struct CombineVisibilityResultOptions 2512{ 2513 uint startOffset; 2514 uint numOffsets; 2515}; 2516kernel void combineVisibilityResult(uint idx [[thread_position_in_grid]], 2517 constant CombineVisibilityResultOptions &options [[buffer(0)]], 2518 constant ushort4 *renderpassVisibilityResult [[buffer(1)]], 2519 device ushort4 *finalResults [[buffer(2)]]) 2520{ 2521 if (idx > 0) 2522 { 2523 return; 2524 } 2525 ushort4 finalResult16x4; 2526 if (kCombineWithExistingResult) 2527 { 2528 finalResult16x4 = finalResults[0]; 2529 } 2530 else 2531 { 2532 finalResult16x4 = ushort4(0, 0, 0, 0); 2533 } 2534 for (uint i = 0; i < options.numOffsets; ++i) 2535 { 2536 uint offset = options.startOffset + i; 2537 ushort4 renderpassResult = renderpassVisibilityResult[offset]; 2538 finalResult16x4 = finalResult16x4 | renderpassResult; 2539 } 2540 finalResults[0] = finalResult16x4; 2541} 2542using namespace metal; 2543constant uint fixIndexBufferKey [[ function_constant(2000) ]]; 2544constant bool indexBufferIsUint16 = (((fixIndexBufferKey >> 0U) & 0x03U) == 2U); 2545constant bool indexBufferIsUint32 = (((fixIndexBufferKey >> 0U) & 0x03U) == 3U); 2546constant bool outIndexBufferIsUint16 = (((fixIndexBufferKey >> 2U) & 0x03U) == 2U); 2547constant bool outIndexBufferIsUint32 = (((fixIndexBufferKey >> 2U) & 0x03U) == 3U); 2548constant bool doPrimRestart = (fixIndexBufferKey & 0x00100U); 2549constant uint fixIndexBufferMode = (fixIndexBufferKey >> 4U) & 0x0FU; 2550static inline uint readIdx( 2551 const device ushort *indexBufferUint16, 2552 const device uint *indexBufferUint32, 2553 const uint restartIndex, 2554 const uint indexCount, 2555 uint idx, 2556 thread bool &foundRestart, 2557 thread uint &indexThatRestartedFirst 2558 ) 2559{ 2560 uint inIndex = idx; 2561 if(inIndex < indexCount) 2562 { 2563 if(indexBufferIsUint16) 2564 { 2565 inIndex = indexBufferUint16[inIndex]; 2566 } 2567 else if(indexBufferIsUint32) 2568 { 2569 inIndex = indexBufferUint32[inIndex]; 2570 } 2571 } 2572 else 2573 { 2574 foundRestart = true; 2575 indexThatRestartedFirst = idx; 2576 } 2577 if(doPrimRestart && !foundRestart && inIndex == restartIndex) 2578 { 2579 foundRestart = true; 2580 indexThatRestartedFirst = idx; 2581 } 2582 return inIndex; 2583} 2584static inline void outputPrimitive( 2585 const device ushort *indexBufferUint16, 2586 const device uint *indexBufferUint32, 2587 device ushort *outIndexBufferUint16, 2588 device uint *outIndexBufferUint32, 2589 const uint restartIndex, 2590 const uint indexCount, 2591 thread uint &baseIndex, 2592 uint onIndex, 2593 thread uint &onOutIndex 2594 ) 2595{ 2596 if(baseIndex > onIndex) return; 2597 bool foundRestart = false; 2598 uint indexThatRestartedFirst = 0; 2599 switch(fixIndexBufferMode) 2600 { 2601 case 0x00U: 2602 { 2603 auto tmpIndex = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex, foundRestart, indexThatRestartedFirst); 2604 if(foundRestart) 2605 { 2606 baseIndex = indexThatRestartedFirst + 1; 2607 return; 2608 } 2609 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex; } onOutIndex++; }); 2610 } 2611 break; 2612 case 0x01U: 2613 { 2614 auto tmpIndex0 = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex + 0, foundRestart, indexThatRestartedFirst); 2615 auto tmpIndex1 = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex + 1, foundRestart, indexThatRestartedFirst); 2616 if(foundRestart) 2617 { 2618 baseIndex = indexThatRestartedFirst + 1; 2619 return; 2620 } 2621 if((onIndex - baseIndex) & 1) return; 2622 if(fixIndexBufferKey & 0x00200U) 2623 { 2624 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1; } onOutIndex++; }); 2625 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0; } onOutIndex++; }); 2626 } 2627 else 2628 { 2629 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0; } onOutIndex++; }); 2630 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1; } onOutIndex++; }); 2631 } 2632 } 2633 break; 2634 case 0x03U: 2635 { 2636 auto tmpIndex0 = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex + 0, foundRestart, indexThatRestartedFirst); 2637 auto tmpIndex1 = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex + 1, foundRestart, indexThatRestartedFirst); 2638 if(foundRestart) 2639 { 2640 baseIndex = indexThatRestartedFirst + 1; 2641 return; 2642 } 2643 if(fixIndexBufferKey & 0x00200U) 2644 { 2645 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1; } onOutIndex++; }); 2646 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0; } onOutIndex++; }); 2647 } 2648 else 2649 { 2650 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0; } onOutIndex++; }); 2651 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1; } onOutIndex++; }); 2652 } 2653 } 2654 break; 2655 case 0x04U: 2656 { 2657 auto tmpIndex0 = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex + 0, foundRestart, indexThatRestartedFirst); 2658 auto tmpIndex1 = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex + 1, foundRestart, indexThatRestartedFirst); 2659 auto tmpIndex2 = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex + 2, foundRestart, indexThatRestartedFirst); 2660 if(foundRestart) 2661 { 2662 baseIndex = indexThatRestartedFirst + 1; 2663 return; 2664 } 2665 if(((onIndex - baseIndex) % 3) != 0) return; 2666 if(fixIndexBufferKey & 0x00200U) 2667 { 2668 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2; } onOutIndex++; }); 2669 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0; } onOutIndex++; }); 2670 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1; } onOutIndex++; }); 2671 } 2672 else 2673 { 2674 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0; } onOutIndex++; }); 2675 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1; } onOutIndex++; }); 2676 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2; } onOutIndex++; }); 2677 } 2678 } 2679 break; 2680 case 0x05U: 2681 { 2682 uint isOdd = ((onIndex - baseIndex) & 1); 2683 auto tmpIndex0 = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex + 0 + isOdd, foundRestart, indexThatRestartedFirst); 2684 auto tmpIndex1 = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex + 1 - isOdd, foundRestart, indexThatRestartedFirst); 2685 auto tmpIndex2 = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex + 2, foundRestart, indexThatRestartedFirst); 2686 if(foundRestart) 2687 { 2688 baseIndex = indexThatRestartedFirst + 1; 2689 return; 2690 } 2691 if(fixIndexBufferKey & 0x00200U) 2692 { 2693 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2; } onOutIndex++; }); 2694 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0; } onOutIndex++; }); 2695 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1; } onOutIndex++; }); 2696 } 2697 else 2698 { 2699 if(isOdd) 2700 { 2701 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1; } onOutIndex++; }); 2702 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2; } onOutIndex++; }); 2703 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0; } onOutIndex++; }); 2704 } 2705 else 2706 { 2707 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0; } onOutIndex++; }); 2708 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1; } onOutIndex++; }); 2709 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2; } onOutIndex++; }); 2710 } 2711 } 2712 assert(onOutIndex <= (onIndex + 1) * 3); 2713 assert(onOutIndex <= (indexCount - 2) * 3); 2714 } 2715 break; 2716 } 2717} 2718kernel void fixIndexBuffer( 2719 const device ushort *indexBufferUint16 [[ buffer(0), function_constant(indexBufferIsUint16) ]], 2720 const device uint *indexBufferUint32 [[ buffer(0), function_constant(indexBufferIsUint32) ]], 2721 device ushort *outIndexBufferUint16 [[ buffer(1), function_constant(outIndexBufferIsUint16) ]], 2722 device uint *outIndexBufferUint32 [[ buffer(1), function_constant(outIndexBufferIsUint32) ]], 2723 constant uint &indexCount [[ buffer(2) ]], 2724 constant uint &primCount [[ buffer(3) ]], 2725 uint prim [[thread_position_in_grid]]) 2726{ 2727 constexpr uint restartIndex = 0xFFFFFFFF; 2728 uint baseIndex = 0; 2729 uint onIndex = onIndex; 2730 uint onOutIndex = onOutIndex; 2731 if(prim < primCount) 2732 { 2733 switch(fixIndexBufferMode) 2734 { 2735 case 0x00U: 2736 onIndex = prim; 2737 onOutIndex = prim; 2738 break; 2739 case 0x01U: 2740 onIndex = prim * 2; 2741 onOutIndex = prim * 2; 2742 break; 2743 case 0x03U: 2744 onIndex = prim; 2745 onOutIndex = prim * 2; 2746 break; 2747 case 0x04U: 2748 onIndex = prim * 3; 2749 onOutIndex = prim * 3; 2750 break; 2751 case 0x05U: 2752 onIndex = prim; 2753 onOutIndex = prim * 3; 2754 break; 2755 } 2756 outputPrimitive(indexBufferUint16, indexBufferUint32, outIndexBufferUint16, outIndexBufferUint32, restartIndex, indexCount, baseIndex, onIndex, onOutIndex); 2757 } 2758} 2759static inline void generatePrimitive( 2760 device ushort *outIndexBufferUint16, 2761 device uint *outIndexBufferUint32, 2762 const uint firstVertex, 2763 const uint indexCount, 2764 thread uint &baseIndex, 2765 uint onIndex, 2766 uint primCount, 2767 thread uint &onOutIndex 2768 ) 2769{ 2770 if(baseIndex > onIndex) return; 2771 switch(fixIndexBufferMode) 2772 { 2773 case 0x00U: 2774 { 2775 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = onIndex + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = onIndex + firstVertex; } onOutIndex++; }); 2776 } 2777 break; 2778 case 0x01U: 2779 { 2780 auto tmpIndex0 = onIndex + 0; 2781 auto tmpIndex1 = onIndex + 1; 2782 if(fixIndexBufferKey & 0x00200U) 2783 { 2784 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; }); 2785 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; }); 2786 } 2787 else 2788 { 2789 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; }); 2790 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; }); 2791 } 2792 } 2793 break; 2794 case 0x02U: 2795 { 2796 auto tmpIndex0 = onIndex + 0; 2797 auto tmpIndex1 = (onIndex + 1) % primCount; 2798 if(fixIndexBufferKey & 0x00200U) 2799 { 2800 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; }); 2801 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; }); 2802 } 2803 else 2804 { 2805 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; }); 2806 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; }); 2807 } 2808 } 2809 break; 2810 case 0x03U: 2811 { 2812 auto tmpIndex0 = onIndex + 0; 2813 auto tmpIndex1 = onIndex + 1; 2814 if(fixIndexBufferKey & 0x00200U) 2815 { 2816 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; }); 2817 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; }); 2818 } 2819 else 2820 { 2821 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; }); 2822 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; }); 2823 } 2824 } 2825 break; 2826 case 0x04U: 2827 { 2828 auto tmpIndex0 = onIndex + 0; 2829 auto tmpIndex1 = onIndex + 1; 2830 auto tmpIndex2 = onIndex + 2; 2831 if(fixIndexBufferKey & 0x00200U) 2832 { 2833 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2 + firstVertex; } onOutIndex++; }); 2834 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; }); 2835 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; }); 2836 } 2837 else 2838 { 2839 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; }); 2840 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; }); 2841 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2 + firstVertex; } onOutIndex++; }); 2842 } 2843 } 2844 break; 2845 case 0x05U: 2846 { 2847 uint isOdd = ((onIndex - baseIndex) & 1); 2848 auto tmpIndex0 = onIndex + 0 + isOdd; 2849 auto tmpIndex1 = onIndex + 1 - isOdd; 2850 auto tmpIndex2 = onIndex + 2; 2851 if(fixIndexBufferKey & 0x00200U) 2852 { 2853 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2 + firstVertex; } onOutIndex++; }); 2854 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; }); 2855 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; }); 2856 } 2857 else 2858 { 2859 if(isOdd) 2860 { 2861 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; }); 2862 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2 + firstVertex; } onOutIndex++; }); 2863 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; }); 2864 } 2865 else 2866 { 2867 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; }); 2868 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; }); 2869 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2 + firstVertex; } onOutIndex++; }); 2870 } 2871 } 2872 assert(onOutIndex <= (onIndex + 1) * 3); 2873 assert(onOutIndex <= (indexCount - 2) * 3); 2874 break; 2875 } 2876 case 0x06U: 2877 { 2878 auto tmpIndex0 = 0; 2879 auto tmpIndex1 = onIndex + 1; 2880 auto tmpIndex2 = onIndex + 2; 2881 if(fixIndexBufferKey & 0x00200U) 2882 { 2883 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2 + firstVertex; } onOutIndex++; }); 2884 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; }); 2885 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; }); 2886 } 2887 else 2888 { 2889 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; }); 2890 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2 + firstVertex; } onOutIndex++; }); 2891 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; }); 2892 } 2893 } 2894 break; 2895 } 2896} 2897kernel void genIndexBuffer( 2898 device ushort *outIndexBufferUint16 [[ buffer(1), function_constant(outIndexBufferIsUint16) ]], 2899 device uint *outIndexBufferUint32 [[ buffer(1), function_constant(outIndexBufferIsUint32) ]], 2900 constant uint &indexCount [[ buffer(2) ]], 2901 constant uint &primCount [[ buffer(3) ]], 2902 constant uint &firstVertex [[ buffer(4) ]], 2903 uint prim [[thread_position_in_grid]]) 2904{ 2905 uint baseIndex = 0; 2906 uint onIndex = onIndex; 2907 uint onOutIndex = onOutIndex; 2908 if(prim < primCount) 2909 { 2910 switch(fixIndexBufferMode) 2911 { 2912 case 0x00U: 2913 onIndex = prim; 2914 onOutIndex = prim; 2915 break; 2916 case 0x01U: 2917 onIndex = prim * 2; 2918 onOutIndex = prim * 2; 2919 break; 2920 case 0x03U: 2921 onIndex = prim; 2922 onOutIndex = prim * 2; 2923 break; 2924 case 0x02U: 2925 onIndex = prim; 2926 onOutIndex = prim * 2; 2927 break; 2928 case 0x04U: 2929 onIndex = prim * 3; 2930 onOutIndex = prim * 3; 2931 break; 2932 case 0x05U: 2933 onIndex = prim; 2934 onOutIndex = prim * 3; 2935 break; 2936 case 0x06U: 2937 onIndex = prim; 2938 onOutIndex = prim * 3; 2939 break; 2940 } 2941 generatePrimitive(outIndexBufferUint16, outIndexBufferUint32, firstVertex, indexCount, baseIndex, onIndex, primCount, onOutIndex); 2942 } 2943} 2944 2945 2946