1 // GENERATED FILE - DO NOT EDIT. 2 // Generated by gen_mtl_internal_shaders.py 3 // 4 // Copyright 2020 The ANGLE Project Authors. All rights reserved. 5 // Use of this source code is governed by a BSD-style license that can be 6 // found in the LICENSE file. 7 // 8 9 // C++ string version of combined Metal default shaders. 10 11 static char gDefaultMetallibSrc[] = R"( 12 # include <simd/simd.h> 13 # include <metal_stdlib> 14 namespace rx 15 { 16 namespace mtl_shader 17 { 18 enum 19 { 20 kTextureType2D = 0, 21 kTextureType2DMultisample = 1, 22 kTextureType2DArray = 2, 23 kTextureTypeCube = 3, 24 kTextureType3D = 4, 25 kTextureTypeCount = 5, 26 }; 27 } 28 } 29 using namespace metal; 30 constant uint32_t kNumColorOutputs [[function_constant(0)]]; 31 constant bool kColorOutputAvailable0 = kNumColorOutputs > 0; 32 constant bool kColorOutputAvailable1 = kNumColorOutputs > 1; 33 constant bool kColorOutputAvailable2 = kNumColorOutputs > 2; 34 constant bool kColorOutputAvailable3 = kNumColorOutputs > 3; 35 constant bool kColorOutputAvailable4 = kNumColorOutputs > 4; 36 constant bool kColorOutputAvailable5 = kNumColorOutputs > 5; 37 constant bool kColorOutputAvailable6 = kNumColorOutputs > 6; 38 constant bool kColorOutputAvailable7 = kNumColorOutputs > 7; 39 namespace rx 40 { 41 namespace mtl_shader 42 { 43 constant float2 gCorners[3] = {float2(-1.0f, -1.0f), float2(3.0f, -1.0f), float2(-1.0f, 3.0f)}; 44 template <typename T> 45 struct MultipleColorOutputs 46 { 47 vec<T, 4> color0 [[color(0), function_constant(kColorOutputAvailable0)]]; 48 vec<T, 4> color1 [[color(1), function_constant(kColorOutputAvailable1)]]; 49 vec<T, 4> color2 [[color(2), function_constant(kColorOutputAvailable2)]]; 50 vec<T, 4> color3 [[color(3), function_constant(kColorOutputAvailable3)]]; 51 vec<T, 4> color4 [[color(4), function_constant(kColorOutputAvailable4)]]; 52 vec<T, 4> color5 [[color(5), function_constant(kColorOutputAvailable5)]]; 53 vec<T, 4> color6 [[color(6), function_constant(kColorOutputAvailable6)]]; 54 vec<T, 4> color7 [[color(7), function_constant(kColorOutputAvailable7)]]; 55 }; 56 template <typename T> 57 static inline MultipleColorOutputs<T> toMultipleColorOutputs(vec<T, 4> color) 58 { 59 MultipleColorOutputs<T> re; 60 do { if (kColorOutputAvailable0) { re.color0 = color; } } while (0); 61 do { if (kColorOutputAvailable1) { re.color1 = color; } } while (0); 62 do { if (kColorOutputAvailable2) { re.color2 = color; } } while (0); 63 do { if (kColorOutputAvailable3) { re.color3 = color; } } while (0); 64 do { if (kColorOutputAvailable4) { re.color4 = color; } } while (0); 65 do { if (kColorOutputAvailable5) { re.color5 = color; } } while (0); 66 do { if (kColorOutputAvailable6) { re.color6 = color; } } while (0); 67 do { if (kColorOutputAvailable7) { re.color7 = color; } } while (0); 68 return re; 69 } 70 static inline float3 cubeTexcoords(float2 texcoords, int face) 71 { 72 texcoords = 2.0 * texcoords - 1.0; 73 switch (face) 74 { 75 case 0: 76 return float3(1.0, -texcoords.y, -texcoords.x); 77 case 1: 78 return float3(-1.0, -texcoords.y, texcoords.x); 79 case 2: 80 return float3(texcoords.x, 1.0, texcoords.y); 81 case 3: 82 return float3(texcoords.x, -1.0, -texcoords.y); 83 case 4: 84 return float3(texcoords.x, -texcoords.y, 1.0); 85 case 5: 86 return float3(-texcoords.x, -texcoords.y, -1.0); 87 } 88 return float3(texcoords, 0); 89 } 90 template <typename T> 91 static inline vec<T, 4> resolveTextureMS(texture2d_ms<T> srcTexture, uint2 coords) 92 { 93 uint samples = srcTexture.get_num_samples(); 94 vec<T, 4> output(0); 95 for (uint sample = 0; sample < samples; ++sample) 96 { 97 output += srcTexture.read(coords, sample); 98 } 99 output = output / samples; 100 return output; 101 } 102 static inline float4 sRGBtoLinear(float4 color) 103 { 104 float3 linear1 = color.rgb / 12.92; 105 float3 linear2 = powr((color.rgb + float3(0.055)) / 1.055, 2.4); 106 float3 factor = float3(color.rgb <= float3(0.04045)); 107 float4 linear = float4(factor * linear1 + float3(1.0 - factor) * linear2, color.a); 108 return linear; 109 } 110 static inline float linearToSRGB(float color) 111 { 112 if (color <= 0.0f) 113 return 0.0f; 114 if (color < 0.0031308f) 115 return 12.92f * color; 116 if (color < 1.0f) 117 return 1.055f * powr(color, 0.41666f) - 0.055f; 118 return 1.0f; 119 } 120 static inline float4 linearToSRGB(float4 color) 121 { 122 return float4(linearToSRGB(color.r), linearToSRGB(color.g), linearToSRGB(color.b), color.a); 123 } 124 template <typename Short> 125 static inline Short bytesToShort(constant uchar *input, uint offset) 126 { 127 Short inputLo = input[offset]; 128 Short inputHi = input[offset + 1]; 129 return inputLo | (inputHi << 8); 130 } 131 template <typename Int> 132 static inline Int bytesToInt(constant uchar *input, uint offset) 133 { 134 Int input0 = input[offset]; 135 Int input1 = input[offset + 1]; 136 Int input2 = input[offset + 2]; 137 Int input3 = input[offset + 3]; 138 return input0 | (input1 << 8) | (input2 << 16) | (input3 << 24); 139 } 140 template <typename Short> 141 static inline void shortToBytes(Short val, uint offset, device uchar *output) 142 { 143 ushort valUnsigned = as_type<ushort>(val); 144 output[offset] = valUnsigned & 0xff; 145 output[offset + 1] = (valUnsigned >> 8) & 0xff; 146 } 147 template <typename Int> 148 static inline void intToBytes(Int val, uint offset, device uchar *output) 149 { 150 uint valUnsigned = as_type<uint>(val); 151 output[offset] = valUnsigned & 0xff; 152 output[offset + 1] = (valUnsigned >> 8) & 0xff; 153 output[offset + 2] = (valUnsigned >> 16) & 0xff; 154 output[offset + 3] = (valUnsigned >> 24) & 0xff; 155 } 156 static inline void floatToBytes(float val, uint offset, device uchar *output) 157 { 158 intToBytes(as_type<uint>(val), offset, output); 159 } 160 static inline void int24bitToBytes(uint val, uint offset, device uchar *output) 161 { 162 output[offset] = val & 0xff; 163 output[offset + 1] = (val >> 8) & 0xff; 164 output[offset + 2] = (val >> 16) & 0xff; 165 } 166 template <unsigned int inputBitCount, unsigned int inputBitStart, typename T> 167 static inline T getShiftedData(T input) 168 { 169 static_assert(inputBitCount + inputBitStart <= (sizeof(T) * 8), 170 "T must have at least as many bits as inputBitCount + inputBitStart."); 171 const T mask = (1 << inputBitCount) - 1; 172 return (input >> inputBitStart) & mask; 173 } 174 template <unsigned int inputBitCount, unsigned int inputBitStart, typename T> 175 static inline T shiftData(T input) 176 { 177 static_assert(inputBitCount + inputBitStart <= (sizeof(T) * 8), 178 "T must have at least as many bits as inputBitCount + inputBitStart."); 179 const T mask = (1 << inputBitCount) - 1; 180 return (input & mask) << inputBitStart; 181 } 182 template <unsigned int inputBitCount, typename T> 183 static inline float normalizedToFloat(T input) 184 { 185 static_assert(inputBitCount <= (sizeof(T) * 8), 186 "T must have more bits than or same bits as inputBitCount."); 187 static_assert(inputBitCount <= 23, "Only single precision is supported"); 188 constexpr float inverseMax = 1.0f / ((1 << inputBitCount) - 1); 189 return input * inverseMax; 190 } 191 template <typename T> 192 static inline float normalizedToFloat(T input) 193 { 194 return normalizedToFloat<sizeof(T) * 8, T>(input); 195 } 196 template <> 197 inline float normalizedToFloat(short input) 198 { 199 constexpr float inverseMax = 1.0f / 0x7fff; 200 return static_cast<float>(input) * inverseMax; 201 } 202 template <> 203 inline float normalizedToFloat(int input) 204 { 205 constexpr float inverseMax = 1.0f / 0x7fffffff; 206 return static_cast<float>(input) * inverseMax; 207 } 208 template <> 209 inline float normalizedToFloat(uint input) 210 { 211 constexpr float inverseMax = 1.0f / 0xffffffff; 212 return static_cast<float>(input) * inverseMax; 213 } 214 template <unsigned int outputBitCount, typename T> 215 static inline T floatToNormalized(float input) 216 { 217 static_assert(outputBitCount <= (sizeof(T) * 8), 218 "T must have more bits than or same bits as inputBitCount."); 219 static_assert(outputBitCount > (metal::is_unsigned<T>::value ? 0 : 1), 220 "outputBitCount must be at least 1 not counting the sign bit."); 221 constexpr unsigned int bits = 222 metal::is_unsigned<T>::value ? outputBitCount : outputBitCount - 1; 223 static_assert(bits <= 23, "Only single precision is supported"); 224 return static_cast<T>(metal::round(((1 << bits) - 1) * input)); 225 } 226 template <typename T> 227 static inline T floatToNormalized(float input) 228 { 229 return floatToNormalized<sizeof(T) * 8, T>(input); 230 } 231 } 232 } 233 234 using namespace rx::mtl_shader; 235 constant bool kPremultiplyAlpha [[function_constant(1)]]; 236 constant bool kUnmultiplyAlpha [[function_constant(2)]]; 237 constant bool kTransformLinearToSrgb [[function_constant(3)]]; 238 constant int kSourceTextureType [[function_constant(4)]]; 239 constant int kSourceTexture2Type [[function_constant(5)]]; 240 constant bool kSourceTextureType2D = kSourceTextureType == kTextureType2D; 241 constant bool kSourceTextureType2DArray = kSourceTextureType == kTextureType2DArray; 242 constant bool kSourceTextureType2DMS = kSourceTextureType == kTextureType2DMultisample; 243 constant bool kSourceTextureTypeCube = kSourceTextureType == kTextureTypeCube; 244 constant bool kSourceTextureType3D = kSourceTextureType == kTextureType3D; 245 constant bool kSourceTexture2Type2D = kSourceTexture2Type == kTextureType2D; 246 constant bool kSourceTexture2Type2DArray = kSourceTexture2Type == kTextureType2DArray; 247 constant bool kSourceTexture2Type2DMS = kSourceTexture2Type == kTextureType2DMultisample; 248 constant bool kSourceTexture2TypeCube = kSourceTexture2Type == kTextureTypeCube; 249 struct BlitParams 250 { 251 float4 srcTexCoords; 252 int srcLevel; 253 int srcLayer; 254 bool dstLuminance; 255 uint8_t padding[7]; 256 }; 257 struct BlitVSOut 258 { 259 float4 position [[position]]; 260 float2 texCoords [[center_no_perspective, user(locn1)]]; 261 }; 262 vertex BlitVSOut blitVS(unsigned int vid [[vertex_id]], constant BlitParams &options [[buffer(0)]]) 263 { 264 BlitVSOut output; 265 output.position.xy = select(float2(-1.0f), float2(1.0f), bool2(vid & uint2(2, 1))); 266 output.position.zw = float2(0.0, 1.0); 267 output.texCoords = select(options.srcTexCoords.xy, options.srcTexCoords.zw, bool2(vid & uint2(2, 1))); 268 return output; 269 } 270 template <typename SrcTexture2d> 271 static uint2 getImageCoords(SrcTexture2d srcTexture, float2 texCoords) 272 { 273 uint2 dimens(srcTexture.get_width(), srcTexture.get_height()); 274 uint2 coords = uint2(texCoords * float2(dimens)); 275 return coords; 276 } 277 template <typename T> 278 static inline vec<T, 4> blitSampleTextureMS(texture2d_ms<T> srcTexture, float2 texCoords) 279 { 280 uint2 coords = getImageCoords(srcTexture, texCoords); 281 return resolveTextureMS(srcTexture, coords); 282 } 283 template <typename T> 284 static inline vec<T, 4> blitSampleTexture3D(texture3d<T> srcTexture, 285 sampler textureSampler, 286 float2 texCoords, 287 constant BlitParams &options) 288 { 289 uint depth = srcTexture.get_depth(options.srcLevel); 290 float zCoord = (float(options.srcLayer) + 0.5) / float(depth); 291 return srcTexture.sample(textureSampler, float3(texCoords, zCoord), level(options.srcLevel)); 292 } 293 template <typename T> 294 static inline vec<T, 4> blitReadTexture(BlitVSOut input [[stage_in]], texture2d<T> srcTexture2d [[texture(0), function_constant(kSourceTextureType2D)]], texture2d_array<T> srcTexture2dArray [[texture(0), function_constant(kSourceTextureType2DArray)]], texture2d_ms<T> srcTexture2dMS [[texture(0), function_constant(kSourceTextureType2DMS)]], texturecube<T> srcTextureCube [[texture(0), function_constant(kSourceTextureTypeCube)]], texture3d<T> srcTexture3d [[texture(0), function_constant(kSourceTextureType3D)]], sampler textureSampler [[sampler(0)]], constant BlitParams &options [[buffer(0)]]) 295 { 296 vec<T, 4> output; 297 switch (kSourceTextureType) 298 { 299 case kTextureType2D: 300 output = srcTexture2d.sample(textureSampler, input.texCoords, level(options.srcLevel)); 301 break; 302 case kTextureType2DArray: 303 output = srcTexture2dArray.sample(textureSampler, input.texCoords, options.srcLayer, 304 level(options.srcLevel)); 305 break; 306 case kTextureType2DMultisample: 307 output = blitSampleTextureMS(srcTexture2dMS, input.texCoords); 308 break; 309 case kTextureTypeCube: 310 output = srcTextureCube.sample(textureSampler, 311 cubeTexcoords(input.texCoords, options.srcLayer), 312 level(options.srcLevel)); 313 break; 314 case kTextureType3D: 315 output = blitSampleTexture3D(srcTexture3d, textureSampler, input.texCoords, options); 316 break; 317 } 318 if (kTransformLinearToSrgb) { 319 output.x = linearToSRGB(output.x); 320 output.y = linearToSRGB(output.y); 321 output.z = linearToSRGB(output.z); 322 } 323 if (kUnmultiplyAlpha) 324 { 325 if (output.a != 0.0) 326 { 327 output.xyz /= output.a; 328 } 329 } 330 if (kPremultiplyAlpha) 331 { 332 output.xyz *= output.a; 333 } 334 if (options.dstLuminance) 335 { 336 output.g = output.b = output.r; 337 } 338 return output; 339 } 340 template <typename T> 341 static inline MultipleColorOutputs<T> blitFS(BlitVSOut input [[stage_in]], texture2d<T> srcTexture2d [[texture(0), function_constant(kSourceTextureType2D)]], texture2d_array<T> srcTexture2dArray [[texture(0), function_constant(kSourceTextureType2DArray)]], texture2d_ms<T> srcTexture2dMS [[texture(0), function_constant(kSourceTextureType2DMS)]], texturecube<T> srcTextureCube [[texture(0), function_constant(kSourceTextureTypeCube)]], texture3d<T> srcTexture3d [[texture(0), function_constant(kSourceTextureType3D)]], sampler textureSampler [[sampler(0)]], constant BlitParams &options [[buffer(0)]]) 342 { 343 vec<T, 4> output = blitReadTexture(input, srcTexture2d, srcTexture2dArray, srcTexture2dMS, srcTextureCube, srcTexture3d, textureSampler, options); 344 return toMultipleColorOutputs(output); 345 } 346 fragment MultipleColorOutputs<float> blitFloatFS(BlitVSOut input [[stage_in]], texture2d<float> srcTexture2d [[texture(0), function_constant(kSourceTextureType2D)]], texture2d_array<float> srcTexture2dArray [[texture(0), function_constant(kSourceTextureType2DArray)]], texture2d_ms<float> srcTexture2dMS [[texture(0), function_constant(kSourceTextureType2DMS)]], texturecube<float> srcTextureCube [[texture(0), function_constant(kSourceTextureTypeCube)]], texture3d<float> srcTexture3d [[texture(0), function_constant(kSourceTextureType3D)]], sampler textureSampler [[sampler(0)]], constant BlitParams &options [[buffer(0)]]) 347 { 348 return blitFS(input, srcTexture2d, srcTexture2dArray, srcTexture2dMS, srcTextureCube, srcTexture3d, textureSampler, options); 349 } 350 fragment MultipleColorOutputs<int> blitIntFS(BlitVSOut input [[stage_in]], texture2d<int> srcTexture2d [[texture(0), function_constant(kSourceTextureType2D)]], texture2d_array<int> srcTexture2dArray [[texture(0), function_constant(kSourceTextureType2DArray)]], texture2d_ms<int> srcTexture2dMS [[texture(0), function_constant(kSourceTextureType2DMS)]], texturecube<int> srcTextureCube [[texture(0), function_constant(kSourceTextureTypeCube)]], texture3d<int> srcTexture3d [[texture(0), function_constant(kSourceTextureType3D)]], sampler textureSampler [[sampler(0)]], constant BlitParams &options [[buffer(0)]]) 351 { 352 return blitFS(input, srcTexture2d, srcTexture2dArray, srcTexture2dMS, srcTextureCube, srcTexture3d, textureSampler, options); 353 } 354 fragment MultipleColorOutputs<uint> blitUIntFS(BlitVSOut input [[stage_in]], texture2d<uint> srcTexture2d [[texture(0), function_constant(kSourceTextureType2D)]], texture2d_array<uint> srcTexture2dArray [[texture(0), function_constant(kSourceTextureType2DArray)]], texture2d_ms<uint> srcTexture2dMS [[texture(0), function_constant(kSourceTextureType2DMS)]], texturecube<uint> srcTextureCube [[texture(0), function_constant(kSourceTextureTypeCube)]], texture3d<uint> srcTexture3d [[texture(0), function_constant(kSourceTextureType3D)]], sampler textureSampler [[sampler(0)]], constant BlitParams &options [[buffer(0)]]) 355 { 356 return blitFS(input, srcTexture2d, srcTexture2dArray, srcTexture2dMS, srcTextureCube, srcTexture3d, textureSampler, options); 357 } 358 fragment MultipleColorOutputs<uint> copyTextureFloatToUIntFS(BlitVSOut input [[stage_in]], texture2d<float> srcTexture2d [[texture(0), function_constant(kSourceTextureType2D)]], texture2d_array<float> srcTexture2dArray [[texture(0), function_constant(kSourceTextureType2DArray)]], texture2d_ms<float> srcTexture2dMS [[texture(0), function_constant(kSourceTextureType2DMS)]], texturecube<float> srcTextureCube [[texture(0), function_constant(kSourceTextureTypeCube)]], texture3d<float> srcTexture3d [[texture(0), function_constant(kSourceTextureType3D)]], sampler textureSampler [[sampler(0)]], constant BlitParams &options [[buffer(0)]]) 359 { 360 float4 inputColor = blitReadTexture<>(input, srcTexture2d, srcTexture2dArray, srcTexture2dMS, srcTextureCube, srcTexture3d, textureSampler, options); 361 uint4 output = uint4(inputColor * float4(255.0)); 362 return toMultipleColorOutputs(output); 363 } 364 struct FragmentDepthOut 365 { 366 float depth [[depth(any)]]; 367 }; 368 static inline float sampleDepth( 369 texture2d<float> srcTexture2d [[function_constant(kSourceTextureType2D)]], 370 texture2d_array<float> srcTexture2dArray [[function_constant(kSourceTextureType2DArray)]], 371 texture2d_ms<float> srcTexture2dMS [[function_constant(kSourceTextureType2DMS)]], 372 texturecube<float> srcTextureCube [[function_constant(kSourceTextureTypeCube)]], 373 float2 texCoords, 374 constant BlitParams &options) 375 { 376 float4 output; 377 constexpr sampler textureSampler(mag_filter::nearest, min_filter::nearest); 378 switch (kSourceTextureType) 379 { 380 case kTextureType2D: 381 output = srcTexture2d.sample(textureSampler, texCoords, level(options.srcLevel)); 382 break; 383 case kTextureType2DArray: 384 output = srcTexture2dArray.sample(textureSampler, texCoords, options.srcLayer, 385 level(options.srcLevel)); 386 break; 387 case kTextureType2DMultisample: 388 output = srcTexture2dMS.read(getImageCoords(srcTexture2dMS, texCoords), 0); 389 break; 390 case kTextureTypeCube: 391 output = 392 srcTextureCube.sample(textureSampler, cubeTexcoords(texCoords, options.srcLayer), 393 level(options.srcLevel)); 394 break; 395 } 396 return output.r; 397 } 398 fragment FragmentDepthOut blitDepthFS(BlitVSOut input [[stage_in]], 399 texture2d<float> srcTexture2d 400 [[texture(0), function_constant(kSourceTextureType2D)]], 401 texture2d_array<float> srcTexture2dArray 402 [[texture(0), function_constant(kSourceTextureType2DArray)]], 403 texture2d_ms<float> srcTexture2dMS 404 [[texture(0), function_constant(kSourceTextureType2DMS)]], 405 texturecube<float> srcTextureCube 406 [[texture(0), function_constant(kSourceTextureTypeCube)]], 407 constant BlitParams &options [[buffer(0)]]) 408 { 409 FragmentDepthOut re; 410 re.depth = sampleDepth(srcTexture2d, srcTexture2dArray, srcTexture2dMS, srcTextureCube, 411 input.texCoords, options); 412 return re; 413 } 414 static inline uint32_t sampleStencil( 415 texture2d<uint32_t> srcTexture2d [[function_constant(kSourceTexture2Type2D)]], 416 texture2d_array<uint32_t> srcTexture2dArray [[function_constant(kSourceTexture2Type2DArray)]], 417 texture2d_ms<uint32_t> srcTexture2dMS [[function_constant(kSourceTexture2Type2DMS)]], 418 texturecube<uint32_t> srcTextureCube [[function_constant(kSourceTexture2TypeCube)]], 419 float2 texCoords, 420 int srcLevel, 421 int srcLayer) 422 { 423 uint4 output; 424 constexpr sampler textureSampler(mag_filter::nearest, min_filter::nearest); 425 switch (kSourceTexture2Type) 426 { 427 case kTextureType2D: 428 output = srcTexture2d.sample(textureSampler, texCoords, level(srcLevel)); 429 break; 430 case kTextureType2DArray: 431 output = srcTexture2dArray.sample(textureSampler, texCoords, srcLayer, level(srcLevel)); 432 break; 433 case kTextureType2DMultisample: 434 output = srcTexture2dMS.read(getImageCoords(srcTexture2dMS, texCoords), 0); 435 break; 436 case kTextureTypeCube: 437 output = srcTextureCube.sample(textureSampler, cubeTexcoords(texCoords, srcLayer), 438 level(srcLevel)); 439 break; 440 } 441 return output.r; 442 } 443 struct BlitStencilToBufferParams 444 { 445 float2 srcStartTexCoords; 446 float2 srcTexCoordSteps; 447 int srcLevel; 448 int srcLayer; 449 uint2 dstSize; 450 uint dstBufferRowPitch; 451 bool resolveMS; 452 }; 453 kernel void blitStencilToBufferCS(ushort2 gIndices [[thread_position_in_grid]], 454 texture2d<uint32_t> srcTexture2d 455 [[texture(1), function_constant(kSourceTexture2Type2D)]], 456 texture2d_array<uint32_t> srcTexture2dArray 457 [[texture(1), function_constant(kSourceTexture2Type2DArray)]], 458 texture2d_ms<uint32_t> srcTexture2dMS 459 [[texture(1), function_constant(kSourceTexture2Type2DMS)]], 460 texturecube<uint32_t> srcTextureCube 461 [[texture(1), function_constant(kSourceTexture2TypeCube)]], 462 constant BlitStencilToBufferParams &options [[buffer(0)]], 463 device uchar *buffer [[buffer(1)]]) 464 { 465 if (gIndices.x >= options.dstSize.x || gIndices.y >= options.dstSize.y) 466 { 467 return; 468 } 469 float2 srcTexCoords = options.srcStartTexCoords + float2(gIndices) * options.srcTexCoordSteps; 470 if (kSourceTexture2Type == kTextureType2DMultisample && !options.resolveMS) 471 { 472 uint samples = srcTexture2dMS.get_num_samples(); 473 uint2 imageCoords = getImageCoords(srcTexture2dMS, srcTexCoords); 474 uint bufferOffset = options.dstBufferRowPitch * gIndices.y + samples * gIndices.x; 475 for (uint sample = 0; sample < samples; ++sample) 476 { 477 uint stencilPerSample = srcTexture2dMS.read(imageCoords, sample).r; 478 buffer[bufferOffset + sample] = static_cast<uchar>(stencilPerSample); 479 } 480 } 481 else 482 { 483 uint32_t stencil = 484 sampleStencil(srcTexture2d, srcTexture2dArray, srcTexture2dMS, srcTextureCube, 485 srcTexCoords, options.srcLevel, options.srcLayer); 486 buffer[options.dstBufferRowPitch * gIndices.y + gIndices.x] = static_cast<uchar>(stencil); 487 } 488 } 489 #if __METAL_VERSION__ >= 210 490 struct FragmentStencilOut 491 { 492 uint32_t stencil [[stencil]]; 493 }; 494 struct FragmentDepthStencilOut 495 { 496 float depth [[depth(any)]]; 497 uint32_t stencil [[stencil]]; 498 }; 499 fragment FragmentStencilOut blitStencilFS( 500 BlitVSOut input [[stage_in]], 501 texture2d<uint32_t> srcTexture2d [[texture(1), function_constant(kSourceTexture2Type2D)]], 502 texture2d_array<uint32_t> srcTexture2dArray 503 [[texture(1), function_constant(kSourceTexture2Type2DArray)]], 504 texture2d_ms<uint32_t> srcTexture2dMS 505 [[texture(1), function_constant(kSourceTexture2Type2DMS)]], 506 texturecube<uint32_t> srcTextureCube [[texture(1), function_constant(kSourceTexture2TypeCube)]], 507 constant BlitParams &options [[buffer(0)]]) 508 { 509 FragmentStencilOut re; 510 re.stencil = sampleStencil(srcTexture2d, srcTexture2dArray, srcTexture2dMS, srcTextureCube, 511 input.texCoords, options.srcLevel, options.srcLayer); 512 return re; 513 } 514 fragment FragmentDepthStencilOut blitDepthStencilFS( 515 BlitVSOut input [[stage_in]], 516 texture2d<float> srcDepthTexture2d [[texture(0), function_constant(kSourceTextureType2D)]], 517 texture2d_array<float> srcDepthTexture2dArray 518 [[texture(0), function_constant(kSourceTextureType2DArray)]], 519 texture2d_ms<float> srcDepthTexture2dMS 520 [[texture(0), function_constant(kSourceTextureType2DMS)]], 521 texturecube<float> srcDepthTextureCube 522 [[texture(0), function_constant(kSourceTextureTypeCube)]], 523 texture2d<uint32_t> srcStencilTexture2d 524 [[texture(1), function_constant(kSourceTexture2Type2D)]], 525 texture2d_array<uint32_t> srcStencilTexture2dArray 526 [[texture(1), function_constant(kSourceTexture2Type2DArray)]], 527 texture2d_ms<uint32_t> srcStencilTexture2dMS 528 [[texture(1), function_constant(kSourceTexture2Type2DMS)]], 529 texturecube<uint32_t> srcStencilTextureCube 530 [[texture(1), function_constant(kSourceTexture2TypeCube)]], 531 constant BlitParams &options [[buffer(0)]]) 532 { 533 FragmentDepthStencilOut re; 534 re.depth = sampleDepth(srcDepthTexture2d, srcDepthTexture2dArray, srcDepthTexture2dMS, 535 srcDepthTextureCube, input.texCoords, options); 536 re.stencil = 537 sampleStencil(srcStencilTexture2d, srcStencilTexture2dArray, srcStencilTexture2dMS, 538 srcStencilTextureCube, input.texCoords, options.srcLevel, options.srcLayer); 539 return re; 540 } 541 #endif 542 using namespace rx::mtl_shader; 543 struct ClearParams 544 { 545 float4 clearColor; 546 float clearDepth; 547 }; 548 vertex float4 clearVS(unsigned int vid [[ vertex_id ]], 549 constant ClearParams &clearParams [[buffer(0)]]) 550 { 551 return float4(gCorners[vid], clearParams.clearDepth, 1.0); 552 } 553 fragment MultipleColorOutputs<float> clearFloatFS(constant ClearParams &clearParams [[buffer(0)]]) 554 { 555 return toMultipleColorOutputs(clearParams.clearColor); 556 } 557 fragment MultipleColorOutputs<int> clearIntFS(constant ClearParams &clearParams [[buffer(0)]]) 558 { 559 return toMultipleColorOutputs(as_type<int4>(clearParams.clearColor)); 560 } 561 fragment MultipleColorOutputs<uint> clearUIntFS(constant ClearParams &clearParams [[buffer(0)]]) 562 { 563 return toMultipleColorOutputs(as_type<uint4>(clearParams.clearColor)); 564 } 565 using namespace rx::mtl_shader; 566 constant bool kSourceBufferAligned[[function_constant(100)]]; 567 constant bool kSourceIndexIsU8[[function_constant(200)]]; 568 constant bool kSourceIndexIsU16[[function_constant(300)]]; 569 constant bool kSourceIndexIsU32[[function_constant(400)]]; 570 constant bool kSourceBufferUnaligned = !kSourceBufferAligned; 571 constant bool kUseSourceBufferU8 = kSourceIndexIsU8 || kSourceBufferUnaligned; 572 constant bool kUseSourceBufferU16 = kSourceIndexIsU16 && kSourceBufferAligned; 573 constant bool kUseSourceBufferU32 = kSourceIndexIsU32 && kSourceBufferAligned; 574 struct IndexConversionParams 575 { 576 uint32_t srcOffset; 577 uint32_t indexCount; 578 bool primitiveRestartEnabled; 579 }; 580 inline ushort getIndexAligned(constant ushort *inputAligned, uint offset, uint idx) 581 { 582 return inputAligned[offset / 2 + idx]; 583 } 584 inline uint getIndexAligned(constant uint *inputAligned, uint offset, uint idx) 585 { 586 return inputAligned[offset / 4 + idx]; 587 } 588 inline uchar getIndexAligned(constant uchar *input, uint offset, uint idx) 589 { 590 return input[offset + idx]; 591 } 592 inline ushort getIndexUnalignedU16(constant uchar *input, uint offset, uint idx) 593 { 594 ushort inputLo = input[offset + 2 * idx]; 595 ushort inputHi = input[offset + 2 * idx + 1]; 596 return inputLo | (inputHi << 8); 597 } 598 inline uint getIndexUnalignedU32(constant uchar *input, uint offset, uint idx) 599 { 600 uint input0 = input[offset + 4 * idx]; 601 uint input1 = input[offset + 4 * idx + 1]; 602 uint input2 = input[offset + 4 * idx + 2]; 603 uint input3 = input[offset + 4 * idx + 3]; 604 return input0 | (input1 << 8) | (input2 << 16) | (input3 << 24); 605 } 606 kernel void convertIndexU8ToU16(uint idx [[thread_position_in_grid]], 607 constant IndexConversionParams &options [[buffer(0)]], 608 constant uchar *input [[buffer(1)]], 609 device ushort *output [[buffer(2)]]) 610 { 611 if (idx >= options.indexCount) { return; }; 612 uchar value = getIndexAligned(input, options.srcOffset, idx); 613 if (options.primitiveRestartEnabled && value == 0xff) 614 { 615 output[idx] = 0xffff; 616 } 617 else 618 { 619 output[idx] = value; 620 } 621 } 622 kernel void convertIndexU16(uint idx [[thread_position_in_grid]], 623 constant IndexConversionParams &options [[buffer(0)]], 624 constant uchar *input 625 [[buffer(1), function_constant(kSourceBufferUnaligned)]], 626 constant ushort *inputAligned 627 [[buffer(1), function_constant(kSourceBufferAligned)]], 628 device ushort *output [[buffer(2)]]) 629 { 630 if (idx >= options.indexCount) { return; }; 631 ushort value; 632 if (kSourceBufferAligned) 633 { 634 value = getIndexAligned(inputAligned, options.srcOffset, idx); 635 } 636 else 637 { 638 value = getIndexUnalignedU16(input, options.srcOffset, idx); 639 } 640 output[idx] = value; 641 } 642 kernel void convertIndexU32(uint idx [[thread_position_in_grid]], 643 constant IndexConversionParams &options [[buffer(0)]], 644 constant uchar *input 645 [[buffer(1), function_constant(kSourceBufferUnaligned)]], 646 constant uint *inputAligned 647 [[buffer(1), function_constant(kSourceBufferAligned)]], 648 device uint *output [[buffer(2)]]) 649 { 650 if (idx >= options.indexCount) { return; }; 651 uint value; 652 if (kSourceBufferAligned) 653 { 654 value = getIndexAligned(inputAligned, options.srcOffset, idx); 655 } 656 else 657 { 658 value = getIndexUnalignedU32(input, options.srcOffset, idx); 659 } 660 output[idx] = value; 661 } 662 struct IndexFromArrayParams 663 { 664 uint firstVertex; 665 uint vertexCount; 666 }; 667 kernel void genTriFanIndicesFromArray(uint idx [[thread_position_in_grid]], 668 constant IndexFromArrayParams &options [[buffer(0)]], 669 device uint *output [[buffer(2)]]) 670 { 671 if (idx >= options.vertexCount) { return; }; 672 uint vertexIdx = options.firstVertex + 2 + idx; 673 output[3 * idx ] = vertexIdx - 1; 674 output[3 * idx + 1] = vertexIdx; 675 output[3 * idx + 2] = options.firstVertex; 676 } 677 inline uint getIndexU32(uint offset, 678 uint idx, 679 constant uchar *inputU8 [[function_constant(kUseSourceBufferU8)]], 680 constant ushort *inputU16 [[function_constant(kUseSourceBufferU16)]], 681 constant uint *inputU32 [[function_constant(kUseSourceBufferU32)]]) 682 { 683 if (kUseSourceBufferU8) 684 { 685 if (kSourceIndexIsU16) 686 { 687 return getIndexUnalignedU16(inputU8, offset, idx); 688 } 689 else if (kSourceIndexIsU32) 690 { 691 return getIndexUnalignedU32(inputU8, offset, idx); 692 } 693 return getIndexAligned(inputU8, offset, idx); 694 } 695 else if (kUseSourceBufferU16) 696 { 697 return getIndexAligned(inputU16, offset, idx); 698 } 699 else if (kUseSourceBufferU32) 700 { 701 return getIndexAligned(inputU32, offset, idx); 702 } 703 return 0; 704 } 705 kernel void genTriFanIndicesFromElements(uint idx [[thread_position_in_grid]], 706 constant IndexConversionParams &options [[buffer(0)]], 707 constant uchar *inputU8 708 [[buffer(1), function_constant(kUseSourceBufferU8)]], 709 constant ushort *inputU16 710 [[buffer(1), function_constant(kUseSourceBufferU16)]], 711 constant uint *inputU32 712 [[buffer(1), function_constant(kUseSourceBufferU32)]], 713 device uint *output [[buffer(2)]]) 714 { 715 if (idx >= options.indexCount) { return; }; 716 uint elemIdx = 2 + idx; 717 output[3 * idx] = getIndexU32(options.srcOffset, 0, inputU8, inputU16, inputU32); 718 output[3 * idx + 1] = getIndexU32(options.srcOffset, elemIdx - 1, inputU8, inputU16, inputU32); 719 output[3 * idx + 2] = getIndexU32(options.srcOffset, elemIdx, inputU8, inputU16, inputU32); 720 } 721 kernel void genLineLoopIndicesFromArray(uint idx [[thread_position_in_grid]], 722 constant IndexFromArrayParams &options [[buffer(0)]], 723 device uint *output [[buffer(2)]]) 724 { 725 uint totalIndices = options.vertexCount + 1; 726 if (idx >= totalIndices) { return; }; 727 output[idx] = options.firstVertex + idx % options.vertexCount; 728 } 729 kernel void genLineLoopIndicesFromElements(uint idx [[thread_position_in_grid]], 730 constant IndexConversionParams &options [[buffer(0)]], 731 constant uchar *inputU8 732 [[buffer(1), function_constant(kUseSourceBufferU8)]], 733 constant ushort *inputU16 734 [[buffer(1), function_constant(kUseSourceBufferU16)]], 735 constant uint *inputU32 736 [[buffer(1), function_constant(kUseSourceBufferU32)]], 737 device uint *output [[buffer(2)]]) 738 { 739 uint totalTargetIndices = options.indexCount + 1; 740 if (idx >= totalTargetIndices) { return; }; 741 output[idx] = 742 getIndexU32(options.srcOffset, idx % options.indexCount, inputU8, inputU16, inputU32); 743 } 744 using namespace rx::mtl_shader; 745 struct GenMipParams 746 { 747 uint srcLevel; 748 uint numMipLevelsToGen; 749 bool sRGB; 750 }; 751 kernel void generate3DMipmaps(uint lIndex [[thread_index_in_threadgroup]], 752 ushort3 gIndices [[thread_position_in_grid]], 753 texture3d<float> srcTexture [[texture(0)]], 754 texture3d<float, access::write> dstMip1 [[texture(1)]], 755 texture3d<float, access::write> dstMip2 [[texture(2)]], 756 texture3d<float, access::write> dstMip3 [[texture(3)]], 757 texture3d<float, access::write> dstMip4 [[texture(4)]], 758 constant GenMipParams &options [[buffer(0)]]) 759 { 760 ushort3 mipSize = ushort3(dstMip1.get_width(), dstMip1.get_height(), dstMip1.get_depth()); 761 bool validThread = gIndices.x < mipSize.x && gIndices.y < mipSize.y && gIndices.z < mipSize.z; 762 constexpr sampler textureSampler(mag_filter::linear, min_filter::linear, mip_filter::linear); 763 threadgroup float sR[(8 * 8 * 8)]; 764 threadgroup float sG[(8 * 8 * 8)]; 765 threadgroup float sB[(8 * 8 * 8)]; 766 threadgroup float sA[(8 * 8 * 8)]; 767 float4 texel1; 768 if (validThread) 769 { 770 float3 texCoords = (float3(gIndices) + float3(0.5, 0.5, 0.5)) / float3(mipSize); 771 texel1 = srcTexture.sample(textureSampler, texCoords, level(options.srcLevel)); 772 dstMip1.write(texel1, gIndices); 773 } 774 else 775 { 776 lIndex = 0xffffffff; 777 } 778 if (options.numMipLevelsToGen == 1) 779 { 780 return; 781 } 782 if (options.sRGB) 783 { 784 texel1 = linearToSRGB(texel1); 785 } 786 sR[lIndex] = texel1.r; sG[lIndex] = texel1.g; sB[lIndex] = texel1.b; sA[lIndex] = texel1.a;; 787 threadgroup_barrier(mem_flags::mem_threadgroup); 788 if ((lIndex & 0x49) == 0) 789 { 790 bool3 atEdge = gIndices == (mipSize - ushort3(1)); 791 float4 texel2 = (atEdge.x) ? (texel1) : (float4(sR[lIndex + 1], sG[lIndex + 1], sB[lIndex + 1], sA[lIndex + 1])); 792 float4 texel3 = (atEdge.y) ? (texel1) : (float4(sR[lIndex + 8], sG[lIndex + 8], sB[lIndex + 8], sA[lIndex + 8])); 793 float4 texel4 = (atEdge.z) ? (texel1) : (float4(sR[lIndex + (8 * 8)], sG[lIndex + (8 * 8)], sB[lIndex + (8 * 8)], sA[lIndex + (8 * 8)])); 794 float4 texel5 = (atEdge.x | atEdge.y) ? (texel2) : (float4(sR[lIndex + (8 + 1)], sG[lIndex + (8 + 1)], sB[lIndex + (8 + 1)], sA[lIndex + (8 + 1)])); 795 float4 texel6 = (atEdge.x | atEdge.z) ? (texel2) : (float4(sR[lIndex + ((8 * 8) + 1)], sG[lIndex + ((8 * 8) + 1)], sB[lIndex + ((8 * 8) + 1)], sA[lIndex + ((8 * 8) + 1)])); 796 float4 texel7 = (atEdge.y | atEdge.z) ? (texel3) : (float4(sR[lIndex + ((8 * 8) + 8)], sG[lIndex + ((8 * 8) + 8)], sB[lIndex + ((8 * 8) + 8)], sA[lIndex + ((8 * 8) + 8)])); 797 float4 texel8 = 798 (atEdge.x | atEdge.y | atEdge.z) ? (texel5) : (float4(sR[lIndex + ((8 * 8) + 8 + 1)], sG[lIndex + ((8 * 8) + 8 + 1)], sB[lIndex + ((8 * 8) + 8 + 1)], sA[lIndex + ((8 * 8) + 8 + 1)])); 799 texel1 = (texel1 + texel2 + texel3 + texel4 + texel5 + texel6 + texel7 + texel8) / 8.0; 800 dstMip2.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices >> 1); 801 sR[lIndex] = texel1.r; sG[lIndex] = texel1.g; sB[lIndex] = texel1.b; sA[lIndex] = texel1.a;; 802 } 803 if (options.numMipLevelsToGen == 2) 804 { 805 return; 806 } 807 threadgroup_barrier(mem_flags::mem_threadgroup); 808 if ((lIndex & 0xdb) == 0) 809 { 810 mipSize = max(mipSize >> 1, ushort3(1)); 811 bool3 atEdge = (gIndices >> 1) == (mipSize - ushort3(1)); 812 float4 texel2 = (atEdge.x) ? (texel1) : (float4(sR[lIndex + 2], sG[lIndex + 2], sB[lIndex + 2], sA[lIndex + 2])); 813 float4 texel3 = 814 (atEdge.y) ? (texel1) : (float4(sR[lIndex + (2 * 8)], sG[lIndex + (2 * 8)], sB[lIndex + (2 * 8)], sA[lIndex + (2 * 8)])); 815 float4 texel4 = 816 (atEdge.z) ? (texel1) : (float4(sR[lIndex + (2 * (8 * 8))], sG[lIndex + (2 * (8 * 8))], sB[lIndex + (2 * (8 * 8))], sA[lIndex + (2 * (8 * 8))])); 817 float4 texel5 = (atEdge.x | atEdge.y) ? (texel2) : (float4(sR[lIndex + (2 * 8 + 2)], sG[lIndex + (2 * 8 + 2)], sB[lIndex + (2 * 8 + 2)], sA[lIndex + (2 * 8 + 2)])); 818 float4 texel6 = (atEdge.x | atEdge.z) ? (texel2) : (float4(sR[lIndex + (2 * (8 * 8) + 2)], sG[lIndex + (2 * (8 * 8) + 2)], sB[lIndex + (2 * (8 * 8) + 2)], sA[lIndex + (2 * (8 * 8) + 2)])); 819 float4 texel7 = (atEdge.y | atEdge.z) ? (texel3) : (float4(sR[lIndex + (2 * (8 * 8) + 2 * 8)], sG[lIndex + (2 * (8 * 8) + 2 * 8)], sB[lIndex + (2 * (8 * 8) + 2 * 8)], sA[lIndex + (2 * (8 * 8) + 2 * 8)])); 820 float4 texel8 = (atEdge.x | atEdge.y | atEdge.z) ? (texel5) : (float4(sR[lIndex + (2 * (8 * 8) + 2 * 8 + 2)], sG[lIndex + (2 * (8 * 8) + 2 * 8 + 2)], sB[lIndex + (2 * (8 * 8) + 2 * 8 + 2)], sA[lIndex + (2 * (8 * 8) + 2 * 8 + 2)])); 821 texel1 = (texel1 + texel2 + texel3 + texel4 + texel5 + texel6 + texel7 + texel8) / 8.0; 822 dstMip3.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices >> 2); 823 sR[lIndex] = texel1.r; sG[lIndex] = texel1.g; sB[lIndex] = texel1.b; sA[lIndex] = texel1.a;; 824 } 825 if (options.numMipLevelsToGen == 3) 826 { 827 return; 828 } 829 threadgroup_barrier(mem_flags::mem_threadgroup); 830 if ((lIndex & 0x1ff) == 0) 831 { 832 mipSize = max(mipSize >> 1, ushort3(1)); 833 bool3 atEdge = (gIndices >> 2) == (mipSize - ushort3(1)); 834 float4 texel2 = (atEdge.x) ? (texel1) : (float4(sR[lIndex + 4], sG[lIndex + 4], sB[lIndex + 4], sA[lIndex + 4])); 835 float4 texel3 = 836 (atEdge.y) ? (texel1) : (float4(sR[lIndex + (4 * 8)], sG[lIndex + (4 * 8)], sB[lIndex + (4 * 8)], sA[lIndex + (4 * 8)])); 837 float4 texel4 = 838 (atEdge.z) ? (texel1) : (float4(sR[lIndex + (4 * (8 * 8))], sG[lIndex + (4 * (8 * 8))], sB[lIndex + (4 * (8 * 8))], sA[lIndex + (4 * (8 * 8))])); 839 float4 texel5 = (atEdge.x | atEdge.y) ? (texel2) : (float4(sR[lIndex + (4 * 8 + 4)], sG[lIndex + (4 * 8 + 4)], sB[lIndex + (4 * 8 + 4)], sA[lIndex + (4 * 8 + 4)])); 840 float4 texel6 = (atEdge.x | atEdge.z) ? (texel2) : (float4(sR[lIndex + (4 * (8 * 8) + 4)], sG[lIndex + (4 * (8 * 8) + 4)], sB[lIndex + (4 * (8 * 8) + 4)], sA[lIndex + (4 * (8 * 8) + 4)])); 841 float4 texel7 = (atEdge.y | atEdge.z) ? (texel3) : (float4(sR[lIndex + (4 * (8 * 8) + 4 * 8)], sG[lIndex + (4 * (8 * 8) + 4 * 8)], sB[lIndex + (4 * (8 * 8) + 4 * 8)], sA[lIndex + (4 * (8 * 8) + 4 * 8)])); 842 float4 texel8 = (atEdge.x | atEdge.y | atEdge.z) ? (texel5) : (float4(sR[lIndex + (4 * (8 * 8) + 4 * 8 + 4)], sG[lIndex + (4 * (8 * 8) + 4 * 8 + 4)], sB[lIndex + (4 * (8 * 8) + 4 * 8 + 4)], sA[lIndex + (4 * (8 * 8) + 4 * 8 + 4)])); 843 texel1 = (texel1 + texel2 + texel3 + texel4 + texel5 + texel6 + texel7 + texel8) / 8.0; 844 dstMip4.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices >> 3); 845 } 846 } 847 kernel void generate2DMipmaps(uint lIndex [[thread_index_in_threadgroup]], 848 ushort2 gIndices [[thread_position_in_grid]], 849 texture2d<float> srcTexture [[texture(0)]], 850 texture2d<float, access::write> dstMip1 [[texture(1)]], 851 texture2d<float, access::write> dstMip2 [[texture(2)]], 852 texture2d<float, access::write> dstMip3 [[texture(3)]], 853 texture2d<float, access::write> dstMip4 [[texture(4)]], 854 constant GenMipParams &options [[buffer(0)]]) 855 { 856 uint firstMipLevel = options.srcLevel + 1; 857 ushort2 mipSize = 858 ushort2(srcTexture.get_width(firstMipLevel), srcTexture.get_height(firstMipLevel)); 859 bool validThread = gIndices.x < mipSize.x && gIndices.y < mipSize.y; 860 constexpr sampler textureSampler(mag_filter::linear, min_filter::linear, mip_filter::linear); 861 threadgroup float sR[(8 * 8)]; 862 threadgroup float sG[(8 * 8)]; 863 threadgroup float sB[(8 * 8)]; 864 threadgroup float sA[(8 * 8)]; 865 float4 texel1; 866 if (validThread) 867 { 868 float2 texCoords = (float2(gIndices) + float2(0.5, 0.5)) / float2(mipSize); 869 texel1 = srcTexture.sample(textureSampler, texCoords, level(options.srcLevel)); 870 dstMip1.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices); 871 } 872 else 873 { 874 lIndex = 0xffffffff; 875 } 876 if (options.numMipLevelsToGen == 1) 877 { 878 return; 879 } 880 sR[lIndex] = texel1.r; sG[lIndex] = texel1.g; sB[lIndex] = texel1.b; sA[lIndex] = texel1.a;; 881 threadgroup_barrier(mem_flags::mem_threadgroup); 882 if ((lIndex & 0x09) == 0) 883 { 884 bool2 atEdge = gIndices == (mipSize - ushort2(1)); 885 float4 texel2 = (atEdge.x) ? (texel1) : (float4(sR[lIndex + 1], sG[lIndex + 1], sB[lIndex + 1], sA[lIndex + 1])); 886 float4 texel3 = (atEdge.y) ? (texel1) : (float4(sR[lIndex + 8], sG[lIndex + 8], sB[lIndex + 8], sA[lIndex + 8])); 887 float4 texel4 = (atEdge.x | atEdge.y) ? (texel2) : (float4(sR[lIndex + (8 + 1)], sG[lIndex + (8 + 1)], sB[lIndex + (8 + 1)], sA[lIndex + (8 + 1)])); 888 texel1 = (texel1 + texel2 + texel3 + texel4) / 4.0; 889 dstMip2.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices >> 1); 890 sR[lIndex] = texel1.r; sG[lIndex] = texel1.g; sB[lIndex] = texel1.b; sA[lIndex] = texel1.a;; 891 } 892 if (options.numMipLevelsToGen == 2) 893 { 894 return; 895 } 896 threadgroup_barrier(mem_flags::mem_threadgroup); 897 if ((lIndex & 0x1b) == 0) 898 { 899 mipSize = max(mipSize >> 1, ushort2(1)); 900 bool2 atEdge = (gIndices >> 1) == (mipSize - ushort2(1)); 901 float4 texel2 = (atEdge.x) ? (texel1) : (float4(sR[lIndex + 2], sG[lIndex + 2], sB[lIndex + 2], sA[lIndex + 2])); 902 float4 texel3 = 903 (atEdge.y) ? (texel1) : (float4(sR[lIndex + 2 * 8], sG[lIndex + 2 * 8], sB[lIndex + 2 * 8], sA[lIndex + 2 * 8])); 904 float4 texel4 = (atEdge.x | atEdge.y) ? (texel2) : (float4(sR[lIndex + (2 * 8 + 2)], sG[lIndex + (2 * 8 + 2)], sB[lIndex + (2 * 8 + 2)], sA[lIndex + (2 * 8 + 2)])); 905 texel1 = (texel1 + texel2 + texel3 + texel4) / 4.0; 906 dstMip3.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices >> 2); 907 sR[lIndex] = texel1.r; sG[lIndex] = texel1.g; sB[lIndex] = texel1.b; sA[lIndex] = texel1.a;; 908 } 909 if (options.numMipLevelsToGen == 3) 910 { 911 return; 912 } 913 threadgroup_barrier(mem_flags::mem_threadgroup); 914 if ((lIndex & 0x3f) == 0) 915 { 916 mipSize = max(mipSize >> 1, ushort2(1)); 917 bool2 atEdge = (gIndices >> 2) == (mipSize - ushort2(1)); 918 float4 texel2 = (atEdge.x) ? (texel1) : (float4(sR[lIndex + 4], sG[lIndex + 4], sB[lIndex + 4], sA[lIndex + 4])); 919 float4 texel3 = 920 (atEdge.y) ? (texel1) : (float4(sR[lIndex + 4 * 8], sG[lIndex + 4 * 8], sB[lIndex + 4 * 8], sA[lIndex + 4 * 8])); 921 float4 texel4 = (atEdge.x | atEdge.y) ? (texel2) : (float4(sR[lIndex + (4 * 8 + 4)], sG[lIndex + (4 * 8 + 4)], sB[lIndex + (4 * 8 + 4)], sA[lIndex + (4 * 8 + 4)])); 922 texel1 = (texel1 + texel2 + texel3 + texel4) / 4.0; 923 dstMip4.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices >> 3); 924 } 925 } 926 template <typename TextureTypeR, typename TextureTypeW> 927 static __attribute__((always_inline)) void generateCubeOr2DArray2ndAndMoreMipmaps( 928 uint lIndex, 929 ushort3 gIndices, 930 TextureTypeR srcTexture, 931 TextureTypeW dstMip2, 932 TextureTypeW dstMip3, 933 TextureTypeW dstMip4, 934 ushort2 mip1Size, 935 float4 mip1Texel, 936 threadgroup float *sR, 937 threadgroup float *sG, 938 threadgroup float *sB, 939 threadgroup float *sA, 940 constant GenMipParams &options) 941 { 942 ushort2 mipSize = mip1Size; 943 float4 texel1 = mip1Texel; 944 sR[lIndex] = texel1.r; sG[lIndex] = texel1.g; sB[lIndex] = texel1.b; sA[lIndex] = texel1.a;; 945 threadgroup_barrier(mem_flags::mem_threadgroup); 946 if ((lIndex & 0x09) == 0) 947 { 948 bool2 atEdge = gIndices.xy == (mipSize - ushort2(1)); 949 float4 texel2 = (atEdge.x) ? (texel1) : (float4(sR[lIndex + 1], sG[lIndex + 1], sB[lIndex + 1], sA[lIndex + 1])); 950 float4 texel3 = (atEdge.y) ? (texel1) : (float4(sR[lIndex + 8], sG[lIndex + 8], sB[lIndex + 8], sA[lIndex + 8])); 951 float4 texel4 = (atEdge.x | atEdge.y) ? (texel2) : (float4(sR[lIndex + (8 + 1)], sG[lIndex + (8 + 1)], sB[lIndex + (8 + 1)], sA[lIndex + (8 + 1)])); 952 texel1 = (texel1 + texel2 + texel3 + texel4) / 4.0; 953 dstMip2.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices.xy >> 1, gIndices.z); 954 sR[lIndex] = texel1.r; sG[lIndex] = texel1.g; sB[lIndex] = texel1.b; sA[lIndex] = texel1.a;; 955 } 956 if (options.numMipLevelsToGen == 2) 957 { 958 return; 959 } 960 threadgroup_barrier(mem_flags::mem_threadgroup); 961 if ((lIndex & 0x1b) == 0) 962 { 963 mipSize = max(mipSize >> 1, ushort2(1)); 964 bool2 atEdge = (gIndices.xy >> 1) == (mipSize - ushort2(1)); 965 float4 texel2 = (atEdge.x) ? (texel1) : (float4(sR[lIndex + 2], sG[lIndex + 2], sB[lIndex + 2], sA[lIndex + 2])); 966 float4 texel3 = 967 (atEdge.y) ? (texel1) : (float4(sR[lIndex + 2 * 8], sG[lIndex + 2 * 8], sB[lIndex + 2 * 8], sA[lIndex + 2 * 8])); 968 float4 texel4 = (atEdge.x | atEdge.y) ? (texel2) : (float4(sR[lIndex + (2 * 8 + 2)], sG[lIndex + (2 * 8 + 2)], sB[lIndex + (2 * 8 + 2)], sA[lIndex + (2 * 8 + 2)])); 969 texel1 = (texel1 + texel2 + texel3 + texel4) / 4.0; 970 dstMip3.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices.xy >> 2, gIndices.z); 971 sR[lIndex] = texel1.r; sG[lIndex] = texel1.g; sB[lIndex] = texel1.b; sA[lIndex] = texel1.a;; 972 } 973 if (options.numMipLevelsToGen == 3) 974 { 975 return; 976 } 977 threadgroup_barrier(mem_flags::mem_threadgroup); 978 if ((lIndex & 0x3f) == 0) 979 { 980 mipSize = max(mipSize >> 1, ushort2(1)); 981 bool2 atEdge = (gIndices.xy >> 2) == (mipSize - ushort2(1)); 982 float4 texel2 = (atEdge.x) ? (texel1) : (float4(sR[lIndex + 4], sG[lIndex + 4], sB[lIndex + 4], sA[lIndex + 4])); 983 float4 texel3 = 984 (atEdge.y) ? (texel1) : (float4(sR[lIndex + 4 * 8], sG[lIndex + 4 * 8], sB[lIndex + 4 * 8], sA[lIndex + 4 * 8])); 985 float4 texel4 = (atEdge.x | atEdge.y) ? (texel2) : (float4(sR[lIndex + (4 * 8 + 4)], sG[lIndex + (4 * 8 + 4)], sB[lIndex + (4 * 8 + 4)], sA[lIndex + (4 * 8 + 4)])); 986 texel1 = (texel1 + texel2 + texel3 + texel4) / 4.0; 987 dstMip4.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices.xy >> 3, gIndices.z); 988 } 989 } 990 kernel void generateCubeMipmaps(uint lIndex [[thread_index_in_threadgroup]], 991 ushort3 gIndices [[thread_position_in_grid]], 992 texturecube<float> srcTexture [[texture(0)]], 993 texturecube<float, access::write> dstMip1 [[texture(1)]], 994 texturecube<float, access::write> dstMip2 [[texture(2)]], 995 texturecube<float, access::write> dstMip3 [[texture(3)]], 996 texturecube<float, access::write> dstMip4 [[texture(4)]], 997 constant GenMipParams &options [[buffer(0)]]) 998 { 999 uint firstMipLevel = options.srcLevel + 1; 1000 ushort2 mip1Size = 1001 ushort2(srcTexture.get_width(firstMipLevel), srcTexture.get_height(firstMipLevel)); 1002 bool validThread = gIndices.x < mip1Size.x && gIndices.y < mip1Size.y; 1003 constexpr sampler textureSampler(mag_filter::linear, min_filter::linear, mip_filter::linear); 1004 float4 mip1Texel; 1005 if (validThread) 1006 { 1007 float2 texCoords = (float2(gIndices.xy) + float2(0.5, 0.5)) / float2(mip1Size); 1008 mip1Texel = srcTexture.sample(textureSampler, cubeTexcoords(texCoords, int(gIndices.z)), 1009 level(options.srcLevel)); 1010 dstMip1.write((options.sRGB ? sRGBtoLinear(mip1Texel) : mip1Texel), gIndices.xy, gIndices.z); 1011 } 1012 else 1013 { 1014 lIndex = 0xffffffff; 1015 } 1016 if (options.numMipLevelsToGen == 1) 1017 { 1018 return; 1019 } 1020 threadgroup float sR[(8 * 8)]; 1021 threadgroup float sG[(8 * 8)]; 1022 threadgroup float sB[(8 * 8)]; 1023 threadgroup float sA[(8 * 8)]; 1024 generateCubeOr2DArray2ndAndMoreMipmaps(lIndex, gIndices, srcTexture, dstMip2, dstMip3, dstMip4, 1025 mip1Size, mip1Texel, sR, sG, sB, sA, options); 1026 } 1027 kernel void generate2DArrayMipmaps(uint lIndex [[thread_index_in_threadgroup]], 1028 ushort3 gIndices [[thread_position_in_grid]], 1029 texture2d_array<float> srcTexture [[texture(0)]], 1030 texture2d_array<float, access::write> dstMip1 [[texture(1)]], 1031 texture2d_array<float, access::write> dstMip2 [[texture(2)]], 1032 texture2d_array<float, access::write> dstMip3 [[texture(3)]], 1033 texture2d_array<float, access::write> dstMip4 [[texture(4)]], 1034 constant GenMipParams &options [[buffer(0)]]) 1035 { 1036 uint firstMipLevel = options.srcLevel + 1; 1037 ushort2 mip1Size = 1038 ushort2(srcTexture.get_width(firstMipLevel), srcTexture.get_height(firstMipLevel)); 1039 bool validThread = gIndices.x < mip1Size.x && gIndices.y < mip1Size.y; 1040 constexpr sampler textureSampler(mag_filter::linear, min_filter::linear, mip_filter::linear); 1041 float4 mip1Texel; 1042 if (validThread) 1043 { 1044 float2 texCoords = (float2(gIndices.xy) + float2(0.5, 0.5)) / float2(mip1Size); 1045 mip1Texel = 1046 srcTexture.sample(textureSampler, texCoords, gIndices.z, level(options.srcLevel)); 1047 dstMip1.write((options.sRGB ? sRGBtoLinear(mip1Texel) : mip1Texel), gIndices.xy, gIndices.z); 1048 } 1049 else 1050 { 1051 lIndex = 0xffffffff; 1052 } 1053 if (options.numMipLevelsToGen == 1) 1054 { 1055 return; 1056 } 1057 threadgroup float sR[(8 * 8)]; 1058 threadgroup float sG[(8 * 8)]; 1059 threadgroup float sB[(8 * 8)]; 1060 threadgroup float sA[(8 * 8)]; 1061 generateCubeOr2DArray2ndAndMoreMipmaps(lIndex, gIndices, srcTexture, dstMip2, dstMip3, dstMip4, 1062 mip1Size, mip1Texel, sR, sG, sB, sA, options); 1063 } 1064 #include <metal_pack> 1065 namespace rx 1066 { 1067 namespace mtl_shader 1068 { 1069 namespace FormatID 1070 { 1071 enum 1072 { 1073 NONE, 1074 D16_UNORM, 1075 D24_UNORM_S8_UINT, 1076 D24_UNORM_X8_UINT, 1077 D32_FLOAT, 1078 D32_FLOAT_S8X24_UINT, 1079 D32_UNORM, 1080 S8_UINT, 1081 A16_FLOAT, 1082 A1R5G5B5_UNORM, 1083 A2R10G10B10_SINT_VERTEX, 1084 A2R10G10B10_SNORM_VERTEX, 1085 A2R10G10B10_SSCALED_VERTEX, 1086 A2R10G10B10_UINT_VERTEX, 1087 A2R10G10B10_UNORM_VERTEX, 1088 A2R10G10B10_USCALED_VERTEX, 1089 A32_FLOAT, 1090 A8_UNORM, 1091 ASTC_10x10_SRGB_BLOCK, 1092 ASTC_10x10_UNORM_BLOCK, 1093 ASTC_10x5_SRGB_BLOCK, 1094 ASTC_10x5_UNORM_BLOCK, 1095 ASTC_10x6_SRGB_BLOCK, 1096 ASTC_10x6_UNORM_BLOCK, 1097 ASTC_10x8_SRGB_BLOCK, 1098 ASTC_10x8_UNORM_BLOCK, 1099 ASTC_12x10_SRGB_BLOCK, 1100 ASTC_12x10_UNORM_BLOCK, 1101 ASTC_12x12_SRGB_BLOCK, 1102 ASTC_12x12_UNORM_BLOCK, 1103 ASTC_3x3x3_UNORM_BLOCK, 1104 ASTC_3x3x3_UNORM_SRGB_BLOCK, 1105 ASTC_4x3x3_UNORM_BLOCK, 1106 ASTC_4x3x3_UNORM_SRGB_BLOCK, 1107 ASTC_4x4_SRGB_BLOCK, 1108 ASTC_4x4_UNORM_BLOCK, 1109 ASTC_4x4x3_UNORM_BLOCK, 1110 ASTC_4x4x3_UNORM_SRGB_BLOCK, 1111 ASTC_4x4x4_UNORM_BLOCK, 1112 ASTC_4x4x4_UNORM_SRGB_BLOCK, 1113 ASTC_5x4_SRGB_BLOCK, 1114 ASTC_5x4_UNORM_BLOCK, 1115 ASTC_5x4x4_UNORM_BLOCK, 1116 ASTC_5x4x4_UNORM_SRGB_BLOCK, 1117 ASTC_5x5_SRGB_BLOCK, 1118 ASTC_5x5_UNORM_BLOCK, 1119 ASTC_5x5x4_UNORM_BLOCK, 1120 ASTC_5x5x4_UNORM_SRGB_BLOCK, 1121 ASTC_5x5x5_UNORM_BLOCK, 1122 ASTC_5x5x5_UNORM_SRGB_BLOCK, 1123 ASTC_6x5_SRGB_BLOCK, 1124 ASTC_6x5_UNORM_BLOCK, 1125 ASTC_6x5x5_UNORM_BLOCK, 1126 ASTC_6x5x5_UNORM_SRGB_BLOCK, 1127 ASTC_6x6_SRGB_BLOCK, 1128 ASTC_6x6_UNORM_BLOCK, 1129 ASTC_6x6x5_UNORM_BLOCK, 1130 ASTC_6x6x5_UNORM_SRGB_BLOCK, 1131 ASTC_6x6x6_UNORM_BLOCK, 1132 ASTC_6x6x6_UNORM_SRGB_BLOCK, 1133 ASTC_8x5_SRGB_BLOCK, 1134 ASTC_8x5_UNORM_BLOCK, 1135 ASTC_8x6_SRGB_BLOCK, 1136 ASTC_8x6_UNORM_BLOCK, 1137 ASTC_8x8_SRGB_BLOCK, 1138 ASTC_8x8_UNORM_BLOCK, 1139 B10G10R10A2_UNORM, 1140 B4G4R4A4_UNORM, 1141 B5G5R5A1_UNORM, 1142 B5G6R5_UNORM, 1143 B8G8R8A8_TYPELESS, 1144 B8G8R8A8_TYPELESS_SRGB, 1145 B8G8R8A8_UNORM, 1146 B8G8R8A8_UNORM_SRGB, 1147 B8G8R8X8_UNORM, 1148 B8G8R8X8_UNORM_SRGB, 1149 BC1_RGBA_UNORM_BLOCK, 1150 BC1_RGBA_UNORM_SRGB_BLOCK, 1151 BC1_RGB_UNORM_BLOCK, 1152 BC1_RGB_UNORM_SRGB_BLOCK, 1153 BC2_RGBA_UNORM_BLOCK, 1154 BC2_RGBA_UNORM_SRGB_BLOCK, 1155 BC3_RGBA_UNORM_BLOCK, 1156 BC3_RGBA_UNORM_SRGB_BLOCK, 1157 BC4_RED_SNORM_BLOCK, 1158 BC4_RED_UNORM_BLOCK, 1159 BC5_RG_SNORM_BLOCK, 1160 BC5_RG_UNORM_BLOCK, 1161 BC6H_RGB_SFLOAT_BLOCK, 1162 BC6H_RGB_UFLOAT_BLOCK, 1163 BC7_RGBA_UNORM_BLOCK, 1164 BC7_RGBA_UNORM_SRGB_BLOCK, 1165 EAC_R11G11_SNORM_BLOCK, 1166 EAC_R11G11_UNORM_BLOCK, 1167 EAC_R11_SNORM_BLOCK, 1168 EAC_R11_UNORM_BLOCK, 1169 ETC1_LOSSY_DECODE_R8G8B8_UNORM_BLOCK, 1170 ETC1_R8G8B8_UNORM_BLOCK, 1171 ETC2_R8G8B8A1_SRGB_BLOCK, 1172 ETC2_R8G8B8A1_UNORM_BLOCK, 1173 ETC2_R8G8B8A8_SRGB_BLOCK, 1174 ETC2_R8G8B8A8_UNORM_BLOCK, 1175 ETC2_R8G8B8_SRGB_BLOCK, 1176 ETC2_R8G8B8_UNORM_BLOCK, 1177 G8_B8R8_2PLANE_420_UNORM, 1178 G8_B8_R8_3PLANE_420_UNORM, 1179 L16A16_FLOAT, 1180 L16_FLOAT, 1181 L32A32_FLOAT, 1182 L32_FLOAT, 1183 L4A4_UNORM, 1184 L8A8_UNORM, 1185 L8_UNORM, 1186 PALETTE4_R4G4B4A4_UNORM, 1187 PALETTE4_R5G5B5A1_UNORM, 1188 PALETTE4_R5G6B5_UNORM, 1189 PALETTE4_R8G8B8A8_UNORM, 1190 PALETTE4_R8G8B8_UNORM, 1191 PALETTE8_R4G4B4A4_UNORM, 1192 PALETTE8_R5G5B5A1_UNORM, 1193 PALETTE8_R5G6B5_UNORM, 1194 PALETTE8_R8G8B8A8_UNORM, 1195 PALETTE8_R8G8B8_UNORM, 1196 PVRTC1_RGBA_2BPP_UNORM_BLOCK, 1197 PVRTC1_RGBA_2BPP_UNORM_SRGB_BLOCK, 1198 PVRTC1_RGBA_4BPP_UNORM_BLOCK, 1199 PVRTC1_RGBA_4BPP_UNORM_SRGB_BLOCK, 1200 PVRTC1_RGB_2BPP_UNORM_BLOCK, 1201 PVRTC1_RGB_2BPP_UNORM_SRGB_BLOCK, 1202 PVRTC1_RGB_4BPP_UNORM_BLOCK, 1203 PVRTC1_RGB_4BPP_UNORM_SRGB_BLOCK, 1204 R10G10B10A2_SINT, 1205 R10G10B10A2_SNORM, 1206 R10G10B10A2_SSCALED, 1207 R10G10B10A2_UINT, 1208 R10G10B10A2_UNORM, 1209 R10G10B10A2_USCALED, 1210 R10G10B10X2_UNORM, 1211 R11G11B10_FLOAT, 1212 R16G16B16A16_FLOAT, 1213 R16G16B16A16_SINT, 1214 R16G16B16A16_SNORM, 1215 R16G16B16A16_SSCALED, 1216 R16G16B16A16_UINT, 1217 R16G16B16A16_UNORM, 1218 R16G16B16A16_USCALED, 1219 R16G16B16_FLOAT, 1220 R16G16B16_SINT, 1221 R16G16B16_SNORM, 1222 R16G16B16_SSCALED, 1223 R16G16B16_UINT, 1224 R16G16B16_UNORM, 1225 R16G16B16_USCALED, 1226 R16G16_FLOAT, 1227 R16G16_SINT, 1228 R16G16_SNORM, 1229 R16G16_SSCALED, 1230 R16G16_UINT, 1231 R16G16_UNORM, 1232 R16G16_USCALED, 1233 R16_FLOAT, 1234 R16_SINT, 1235 R16_SNORM, 1236 R16_SSCALED, 1237 R16_UINT, 1238 R16_UNORM, 1239 R16_USCALED, 1240 R32G32B32A32_FIXED, 1241 R32G32B32A32_FLOAT, 1242 R32G32B32A32_SINT, 1243 R32G32B32A32_SNORM, 1244 R32G32B32A32_SSCALED, 1245 R32G32B32A32_UINT, 1246 R32G32B32A32_UNORM, 1247 R32G32B32A32_USCALED, 1248 R32G32B32_FIXED, 1249 R32G32B32_FLOAT, 1250 R32G32B32_SINT, 1251 R32G32B32_SNORM, 1252 R32G32B32_SSCALED, 1253 R32G32B32_UINT, 1254 R32G32B32_UNORM, 1255 R32G32B32_USCALED, 1256 R32G32_FIXED, 1257 R32G32_FLOAT, 1258 R32G32_SINT, 1259 R32G32_SNORM, 1260 R32G32_SSCALED, 1261 R32G32_UINT, 1262 R32G32_UNORM, 1263 R32G32_USCALED, 1264 R32_FIXED, 1265 R32_FLOAT, 1266 R32_SINT, 1267 R32_SNORM, 1268 R32_SSCALED, 1269 R32_UINT, 1270 R32_UNORM, 1271 R32_USCALED, 1272 R4G4B4A4_UNORM, 1273 R5G5B5A1_UNORM, 1274 R5G6B5_UNORM, 1275 R8G8B8A8_SINT, 1276 R8G8B8A8_SNORM, 1277 R8G8B8A8_SSCALED, 1278 R8G8B8A8_TYPELESS, 1279 R8G8B8A8_TYPELESS_SRGB, 1280 R8G8B8A8_UINT, 1281 R8G8B8A8_UNORM, 1282 R8G8B8A8_UNORM_SRGB, 1283 R8G8B8A8_USCALED, 1284 R8G8B8X8_UNORM, 1285 R8G8B8X8_UNORM_SRGB, 1286 R8G8B8_SINT, 1287 R8G8B8_SNORM, 1288 R8G8B8_SSCALED, 1289 R8G8B8_UINT, 1290 R8G8B8_UNORM, 1291 R8G8B8_UNORM_SRGB, 1292 R8G8B8_USCALED, 1293 R8G8_SINT, 1294 R8G8_SNORM, 1295 R8G8_SSCALED, 1296 R8G8_UINT, 1297 R8G8_UNORM, 1298 R8G8_UNORM_SRGB, 1299 R8G8_USCALED, 1300 R8_SINT, 1301 R8_SNORM, 1302 R8_SSCALED, 1303 R8_UINT, 1304 R8_UNORM, 1305 R8_UNORM_SRGB, 1306 R8_USCALED, 1307 R9G9B9E5_SHAREDEXP, 1308 X2R10G10B10_SINT_VERTEX, 1309 X2R10G10B10_SNORM_VERTEX, 1310 X2R10G10B10_SSCALED_VERTEX, 1311 X2R10G10B10_UINT_VERTEX, 1312 X2R10G10B10_UNORM_VERTEX, 1313 X2R10G10B10_USCALED_VERTEX, 1314 EXTERNAL0, 1315 EXTERNAL1, 1316 EXTERNAL2, 1317 EXTERNAL3, 1318 EXTERNAL4, 1319 EXTERNAL5, 1320 EXTERNAL6, 1321 EXTERNAL7 1322 }; 1323 } 1324 } 1325 } 1326 1327 using namespace rx::mtl_shader; 1328 constant int kCopyFormatType [[function_constant(10)]]; 1329 constant int kCopyTextureType [[function_constant(20)]]; 1330 constant bool kCopyTextureType2D = kCopyTextureType == kTextureType2D; 1331 constant bool kCopyTextureType2DArray = kCopyTextureType == kTextureType2DArray; 1332 constant bool kCopyTextureType2DMS = kCopyTextureType == kTextureType2DMultisample; 1333 constant bool kCopyTextureTypeCube = kCopyTextureType == kTextureTypeCube; 1334 constant bool kCopyTextureType3D = kCopyTextureType == kTextureType3D; 1335 struct CopyPixelParams 1336 { 1337 uint3 copySize; 1338 uint3 textureOffset; 1339 uint bufferStartOffset; 1340 uint pixelSize; 1341 uint bufferRowPitch; 1342 uint bufferDepthPitch; 1343 }; 1344 struct WritePixelParams 1345 { 1346 uint2 copySize; 1347 uint2 textureOffset; 1348 uint bufferStartOffset; 1349 uint pixelSize; 1350 uint bufferRowPitch; 1351 uint textureLevel; 1352 uint textureLayer; 1353 bool reverseTextureRowOrder; 1354 }; 1355 template <typename T> 1356 static inline void textureWrite(ushort3 gIndices, 1357 constant CopyPixelParams &options, 1358 vec<T, 4> color, 1359 texture2d<T, access::write> dstTexture2d [[texture(0), function_constant(kCopyTextureType2D)]], texture2d_array<T, access::write> dstTexture2dArray [[texture(0), function_constant(kCopyTextureType2DArray)]], texture3d<T, access::write> dstTexture3d [[texture(0), function_constant(kCopyTextureType3D)]], texturecube<T, access::write> dstTextureCube [[texture(0), function_constant(kCopyTextureTypeCube)]]) 1360 { 1361 uint3 writeIndices = options.textureOffset + uint3(gIndices); 1362 switch (kCopyTextureType) 1363 { 1364 case kTextureType2D: 1365 dstTexture2d.write(color, writeIndices.xy); 1366 break; 1367 case kTextureType2DArray: 1368 dstTexture2dArray.write(color, writeIndices.xy, writeIndices.z); 1369 break; 1370 case kTextureType3D: 1371 dstTexture3d.write(color, writeIndices); 1372 break; 1373 case kTextureTypeCube: 1374 dstTextureCube.write(color, writeIndices.xy, writeIndices.z); 1375 break; 1376 } 1377 } 1378 template <typename T> 1379 static inline vec<T, 4> textureRead(ushort2 gIndices, 1380 constant WritePixelParams &options, 1381 texture2d<T, access::read> srcTexture2d [[texture(0), function_constant(kCopyTextureType2D)]], texture2d_array<T, access::read> srcTexture2dArray [[texture(0), function_constant(kCopyTextureType2DArray)]], texture3d<T, access::read> srcTexture3d [[texture(0), function_constant(kCopyTextureType3D)]], texturecube<T, access::read> srcTextureCube [[texture(0), function_constant(kCopyTextureTypeCube)]], texture2d_ms<T, access::read> srcTexture2dMS [[texture(0), function_constant(kCopyTextureType2DMS)]]) 1382 { 1383 vec<T, 4> color; 1384 uint2 coords = uint2(gIndices); 1385 if (options.reverseTextureRowOrder) 1386 { 1387 coords.y = options.copySize.y - 1 - gIndices.y; 1388 } 1389 coords += options.textureOffset; 1390 switch (kCopyTextureType) 1391 { 1392 case kTextureType2D: 1393 color = srcTexture2d.read(coords.xy, options.textureLevel); 1394 break; 1395 case kTextureType2DArray: 1396 color = srcTexture2dArray.read(coords.xy, options.textureLayer, options.textureLevel); 1397 break; 1398 case kTextureType2DMultisample: 1399 color = resolveTextureMS(srcTexture2dMS, coords.xy); 1400 break; 1401 case kTextureType3D: 1402 color = srcTexture3d.read(uint3(coords, options.textureLayer), options.textureLevel); 1403 break; 1404 case kTextureTypeCube: 1405 color = srcTextureCube.read(coords.xy, options.textureLayer, options.textureLevel); 1406 break; 1407 } 1408 return color; 1409 } 1410 static inline float4 readR5G6B5_UNORM(uint bufferOffset, constant uchar *buffer) 1411 { 1412 float4 color; 1413 ushort src = bytesToShort<ushort>(buffer, bufferOffset); 1414 color.r = normalizedToFloat<5>(getShiftedData<5, 11>(src)); 1415 color.g = normalizedToFloat<6>(getShiftedData<6, 5>(src)); 1416 color.b = normalizedToFloat<5>(getShiftedData<5, 0>(src)); 1417 color.a = 1.0; 1418 return color; 1419 } 1420 static inline void writeR5G6B5_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 1421 { 1422 ushort dst = shiftData<5, 11>(floatToNormalized<5, ushort>(color.r)) | 1423 shiftData<6, 5>(floatToNormalized<6, ushort>(color.g)) | 1424 shiftData<5, 0>(floatToNormalized<5, ushort>(color.b)); 1425 shortToBytes(dst, bufferOffset, buffer); 1426 } 1427 static inline float4 readR4G4B4A4_UNORM(uint bufferOffset, constant uchar *buffer) 1428 { 1429 float4 color; 1430 ushort src = bytesToShort<ushort>(buffer, bufferOffset); 1431 color.r = normalizedToFloat<4>(getShiftedData<4, 12>(src)); 1432 color.g = normalizedToFloat<4>(getShiftedData<4, 8>(src)); 1433 color.b = normalizedToFloat<4>(getShiftedData<4, 4>(src)); 1434 color.a = normalizedToFloat<4>(getShiftedData<4, 0>(src)); 1435 return color; 1436 } 1437 static inline void writeR4G4B4A4_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 1438 { 1439 ushort dst = shiftData<4, 12>(floatToNormalized<4, ushort>(color.r)) | 1440 shiftData<4, 8>(floatToNormalized<4, ushort>(color.g)) | 1441 shiftData<4, 4>(floatToNormalized<4, ushort>(color.b)) | 1442 shiftData<4, 0>(floatToNormalized<4, ushort>(color.a)); 1443 ; 1444 shortToBytes(dst, bufferOffset, buffer); 1445 } 1446 static inline float4 readR5G5B5A1_UNORM(uint bufferOffset, constant uchar *buffer) 1447 { 1448 float4 color; 1449 ushort src = bytesToShort<ushort>(buffer, bufferOffset); 1450 color.r = normalizedToFloat<5>(getShiftedData<5, 11>(src)); 1451 color.g = normalizedToFloat<5>(getShiftedData<5, 6>(src)); 1452 color.b = normalizedToFloat<5>(getShiftedData<5, 1>(src)); 1453 color.a = normalizedToFloat<1>(getShiftedData<1, 0>(src)); 1454 return color; 1455 } 1456 static inline void writeR5G5B5A1_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 1457 { 1458 ushort dst = shiftData<5, 11>(floatToNormalized<5, ushort>(color.r)) | 1459 shiftData<5, 6>(floatToNormalized<5, ushort>(color.g)) | 1460 shiftData<5, 1>(floatToNormalized<5, ushort>(color.b)) | 1461 shiftData<1, 0>(floatToNormalized<1, ushort>(color.a)); 1462 ; 1463 shortToBytes(dst, bufferOffset, buffer); 1464 } 1465 static inline int4 readR10G10B10A2_SINT(uint bufferOffset, constant uchar *buffer) 1466 { 1467 int4 color; 1468 int src = bytesToInt<int>(buffer, bufferOffset); 1469 constexpr int3 rgbSignMask(0x200); 1470 constexpr int3 negativeMask(0xfffffc00); 1471 constexpr int alphaSignMask = 0x2; 1472 constexpr int alphaNegMask = 0xfffffffc; 1473 color.r = getShiftedData<10, 0>(src); 1474 color.g = getShiftedData<10, 10>(src); 1475 color.b = getShiftedData<10, 20>(src); 1476 int3 isRgbNegative = (color.rgb & rgbSignMask) >> 9; 1477 color.rgb = (isRgbNegative * negativeMask) | color.rgb; 1478 color.a = getShiftedData<2, 30>(src); 1479 int isAlphaNegative = color.a & alphaSignMask >> 1; 1480 color.a = (isAlphaNegative * alphaNegMask) | color.a; 1481 return color; 1482 } 1483 static inline uint4 readR10G10B10A2_UINT(uint bufferOffset, constant uchar *buffer) 1484 { 1485 uint4 color; 1486 uint src = bytesToInt<uint>(buffer, bufferOffset); 1487 color.r = getShiftedData<10, 0>(src); 1488 color.g = getShiftedData<10, 10>(src); 1489 color.b = getShiftedData<10, 20>(src); 1490 color.a = getShiftedData<2, 30>(src); 1491 return color; 1492 } 1493 static inline float4 readR8G8B8A8(uint bufferOffset, constant uchar *buffer, bool isSRGB) 1494 { 1495 float4 color; 1496 uint src = bytesToInt<uint>(buffer, bufferOffset); 1497 if (isSRGB) 1498 { 1499 color = unpack_unorm4x8_srgb_to_float(src); 1500 } 1501 else 1502 { 1503 color = unpack_unorm4x8_to_float(src); 1504 } 1505 return color; 1506 } 1507 static inline void writeR8G8B8A8(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer, bool isSRGB) 1508 { 1509 uint dst; 1510 if (isSRGB) 1511 { 1512 dst = pack_float_to_srgb_unorm4x8(color); 1513 } 1514 else 1515 { 1516 dst = pack_float_to_unorm4x8(color); 1517 } 1518 intToBytes(dst, bufferOffset, buffer); 1519 } 1520 static inline float4 readR8G8B8(uint bufferOffset, constant uchar *buffer, bool isSRGB) 1521 { 1522 float4 color; 1523 color.r = normalizedToFloat<uchar>(buffer[bufferOffset]); 1524 color.g = normalizedToFloat<uchar>(buffer[bufferOffset + 1]); 1525 color.b = normalizedToFloat<uchar>(buffer[bufferOffset + 2]); 1526 color.a = 1.0; 1527 if (isSRGB) 1528 { 1529 color = sRGBtoLinear(color); 1530 } 1531 return color; 1532 } 1533 static inline void writeR8G8B8(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer, bool isSRGB) 1534 { 1535 color.a = 1.0; 1536 uint dst; 1537 if (isSRGB) 1538 { 1539 dst = pack_float_to_srgb_unorm4x8(color); 1540 } 1541 else 1542 { 1543 dst = pack_float_to_unorm4x8(color); 1544 } 1545 int24bitToBytes(dst, bufferOffset, buffer); 1546 } 1547 static inline float4 readR8G8B8A8_SNORM(uint bufferOffset, constant uchar *buffer) 1548 { 1549 float4 color; 1550 uint src = bytesToInt<uint>(buffer, bufferOffset); 1551 color = unpack_snorm4x8_to_float(src); 1552 return color; 1553 } 1554 static inline void writeR8G8B8A8_SNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 1555 { 1556 uint dst = pack_float_to_snorm4x8(color); 1557 intToBytes(dst, bufferOffset, buffer); 1558 } 1559 static inline float4 readR8G8B8_SNORM(uint bufferOffset, constant uchar *buffer) 1560 { 1561 float4 color; 1562 color.r = normalizedToFloat<7, char>(buffer[bufferOffset]); 1563 color.g = normalizedToFloat<7, char>(buffer[bufferOffset + 1]); 1564 color.b = normalizedToFloat<7, char>(buffer[bufferOffset + 2]); 1565 color.a = 1.0; 1566 return color; 1567 } 1568 static inline void writeR8G8B8_SNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 1569 { 1570 uint dst = pack_float_to_snorm4x8(color); 1571 int24bitToBytes(dst, bufferOffset, buffer); 1572 } 1573 static inline float4 readR8G8B8A8_UNORM(uint bufferOffset, constant uchar *buffer) 1574 { 1575 return readR8G8B8A8(bufferOffset, buffer, false); 1576 } 1577 static inline void writeR8G8B8A8_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 1578 { 1579 return writeR8G8B8A8(gIndices, options, bufferOffset, color, buffer, false); 1580 } 1581 static inline float4 readR8G8B8A8_UNORM_SRGB(uint bufferOffset, constant uchar *buffer) 1582 { 1583 return readR8G8B8A8(bufferOffset, buffer, true); 1584 } 1585 static inline void writeR8G8B8A8_UNORM_SRGB(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 1586 { 1587 return writeR8G8B8A8(gIndices, options, bufferOffset, color, buffer, true); 1588 } 1589 static inline float4 readB8G8R8A8_UNORM(uint bufferOffset, constant uchar *buffer) 1590 { 1591 return readR8G8B8A8(bufferOffset, buffer, false).bgra; 1592 } 1593 static inline void writeB8G8R8A8_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 1594 { 1595 color.rgba = color.bgra; 1596 return writeR8G8B8A8(gIndices, options, bufferOffset, color, buffer, false); 1597 } 1598 static inline float4 readB8G8R8A8_UNORM_SRGB(uint bufferOffset, constant uchar *buffer) 1599 { 1600 return readR8G8B8A8(bufferOffset, buffer, true).bgra; 1601 } 1602 static inline void writeB8G8R8A8_UNORM_SRGB(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 1603 { 1604 color.rgba = color.bgra; 1605 return writeR8G8B8A8(gIndices, options, bufferOffset, color, buffer, true); 1606 } 1607 static inline float4 readR8G8B8_UNORM(uint bufferOffset, constant uchar *buffer) 1608 { 1609 return readR8G8B8(bufferOffset, buffer, false); 1610 } 1611 static inline void writeR8G8B8_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 1612 { 1613 return writeR8G8B8(gIndices, options, bufferOffset, color, buffer, false); 1614 } 1615 static inline float4 readR8G8B8_UNORM_SRGB(uint bufferOffset, constant uchar *buffer) 1616 { 1617 return readR8G8B8(bufferOffset, buffer, true); 1618 } 1619 static inline void writeR8G8B8_UNORM_SRGB(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 1620 { 1621 return writeR8G8B8(gIndices, options, bufferOffset, color, buffer, true); 1622 } 1623 static inline float4 readL8_UNORM(uint bufferOffset, constant uchar *buffer) 1624 { 1625 float4 color; 1626 color.rgb = float3(normalizedToFloat<uchar>(buffer[bufferOffset])); 1627 color.a = 1.0; 1628 return color; 1629 } 1630 static inline void writeL8_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 1631 { 1632 buffer[bufferOffset] = floatToNormalized<uchar>(color.r); 1633 } 1634 static inline void writeA8_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 1635 { 1636 buffer[bufferOffset] = floatToNormalized<uchar>(color.a); 1637 } 1638 static inline float4 readL8A8_UNORM(uint bufferOffset, constant uchar *buffer) 1639 { 1640 float4 color; 1641 color.rgb = float3(normalizedToFloat<uchar>(buffer[bufferOffset])); 1642 color.a = normalizedToFloat<uchar>(buffer[bufferOffset + 1]); 1643 return color; 1644 } 1645 static inline void writeL8A8_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 1646 { 1647 buffer[bufferOffset] = floatToNormalized<uchar>(color.r); 1648 buffer[bufferOffset + 1] = floatToNormalized<uchar>(color.a); 1649 } 1650 static inline float4 readR8_UNORM(uint bufferOffset, constant uchar *buffer) 1651 { 1652 float4 color; 1653 color.r = normalizedToFloat<uchar>(buffer[bufferOffset]); 1654 color.g = color.b = 0.0; 1655 color.a = 1.0; 1656 return color; 1657 } 1658 static inline void writeR8_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 1659 { 1660 buffer[bufferOffset] = floatToNormalized<uchar>(color.r); 1661 } 1662 static inline float4 readR8_SNORM(uint bufferOffset, constant uchar *buffer) 1663 { 1664 float4 color; 1665 color.r = normalizedToFloat<7, char>(buffer[bufferOffset]); 1666 color.g = color.b = 0.0; 1667 color.a = 1.0; 1668 return color; 1669 } 1670 static inline void writeR8_SNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 1671 { 1672 buffer[bufferOffset] = as_type<uchar>(floatToNormalized<char>(color.r)); 1673 } 1674 static inline int4 readR8_SINT(uint bufferOffset, constant uchar *buffer) 1675 { 1676 int4 color; 1677 color.r = as_type<char>(buffer[bufferOffset]); 1678 color.g = color.b = 0; 1679 color.a = 1; 1680 return color; 1681 } 1682 static inline void writeR8_SINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<int, 4> color, device uchar *buffer) 1683 { 1684 buffer[bufferOffset] = static_cast<uchar>(color.r); 1685 } 1686 static inline uint4 readR8_UINT(uint bufferOffset, constant uchar *buffer) 1687 { 1688 uint4 color; 1689 color.r = as_type<uchar>(buffer[bufferOffset]); 1690 color.g = color.b = 0; 1691 color.a = 1; 1692 return color; 1693 } 1694 static inline void writeR8_UINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<uint, 4> color, device uchar *buffer) 1695 { 1696 buffer[bufferOffset] = static_cast<uchar>(color.r); 1697 } 1698 static inline float4 readR8G8_UNORM(uint bufferOffset, constant uchar *buffer) 1699 { 1700 float4 color; 1701 color.r = normalizedToFloat<uchar>(buffer[bufferOffset]); 1702 color.g = normalizedToFloat<uchar>(buffer[bufferOffset + 1]); 1703 color.b = 0.0; 1704 color.a = 1.0; 1705 return color; 1706 } 1707 static inline void writeR8G8_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 1708 { 1709 buffer[bufferOffset] = floatToNormalized<uchar>(color.r); 1710 buffer[bufferOffset + 1] = floatToNormalized<uchar>(color.g); 1711 } 1712 static inline float4 readR8G8_SNORM(uint bufferOffset, constant uchar *buffer) 1713 { 1714 float4 color; 1715 color.r = normalizedToFloat<7, char>(buffer[bufferOffset]); 1716 color.g = normalizedToFloat<7, char>(buffer[bufferOffset + 1]); 1717 color.b = 0.0; 1718 color.a = 1.0; 1719 return color; 1720 } 1721 static inline void writeR8G8_SNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 1722 { 1723 buffer[bufferOffset] = as_type<uchar>(floatToNormalized<char>(color.r)); 1724 buffer[bufferOffset + 1] = as_type<uchar>(floatToNormalized<char>(color.g)); 1725 } 1726 static inline int4 readR8G8_SINT(uint bufferOffset, constant uchar *buffer) 1727 { 1728 int4 color; 1729 color.r = as_type<char>(buffer[bufferOffset]); 1730 color.g = as_type<char>(buffer[bufferOffset + 1]); 1731 color.b = 0; 1732 color.a = 1; 1733 return color; 1734 } 1735 static inline void writeR8G8_SINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<int, 4> color, device uchar *buffer) 1736 { 1737 buffer[bufferOffset] = static_cast<uchar>(color.r); 1738 buffer[bufferOffset + 1] = static_cast<uchar>(color.g); 1739 } 1740 static inline uint4 readR8G8_UINT(uint bufferOffset, constant uchar *buffer) 1741 { 1742 uint4 color; 1743 color.r = as_type<uchar>(buffer[bufferOffset]); 1744 color.g = as_type<uchar>(buffer[bufferOffset + 1]); 1745 color.b = 0; 1746 color.a = 1; 1747 return color; 1748 } 1749 static inline void writeR8G8_UINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<uint, 4> color, device uchar *buffer) 1750 { 1751 buffer[bufferOffset] = static_cast<uchar>(color.r); 1752 buffer[bufferOffset + 1] = static_cast<uchar>(color.g); 1753 } 1754 static inline int4 readR8G8B8_SINT(uint bufferOffset, constant uchar *buffer) 1755 { 1756 int4 color; 1757 color.r = as_type<char>(buffer[bufferOffset]); 1758 color.g = as_type<char>(buffer[bufferOffset + 1]); 1759 color.b = as_type<char>(buffer[bufferOffset + 2]); 1760 color.a = 1; 1761 return color; 1762 } 1763 static inline uint4 readR8G8B8_UINT(uint bufferOffset, constant uchar *buffer) 1764 { 1765 uint4 color; 1766 color.r = as_type<uchar>(buffer[bufferOffset]); 1767 color.g = as_type<uchar>(buffer[bufferOffset + 1]); 1768 color.b = as_type<uchar>(buffer[bufferOffset + 2]); 1769 color.a = 1; 1770 return color; 1771 } 1772 static inline int4 readR8G8B8A8_SINT(uint bufferOffset, constant uchar *buffer) 1773 { 1774 int4 color; 1775 color.r = as_type<char>(buffer[bufferOffset]); 1776 color.g = as_type<char>(buffer[bufferOffset + 1]); 1777 color.b = as_type<char>(buffer[bufferOffset + 2]); 1778 color.a = as_type<char>(buffer[bufferOffset + 3]); 1779 return color; 1780 } 1781 static inline void writeR8G8B8A8_SINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<int, 4> color, device uchar *buffer) 1782 { 1783 buffer[bufferOffset] = static_cast<uchar>(color.r); 1784 buffer[bufferOffset + 1] = static_cast<uchar>(color.g); 1785 buffer[bufferOffset + 2] = static_cast<uchar>(color.b); 1786 buffer[bufferOffset + 3] = static_cast<uchar>(color.a); 1787 } 1788 static inline uint4 readR8G8B8A8_UINT(uint bufferOffset, constant uchar *buffer) 1789 { 1790 uint4 color; 1791 color.r = as_type<uchar>(buffer[bufferOffset]); 1792 color.g = as_type<uchar>(buffer[bufferOffset + 1]); 1793 color.b = as_type<uchar>(buffer[bufferOffset + 2]); 1794 color.a = as_type<uchar>(buffer[bufferOffset + 3]); 1795 return color; 1796 } 1797 static inline void writeR8G8B8A8_UINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<uint, 4> color, device uchar *buffer) 1798 { 1799 buffer[bufferOffset] = static_cast<uchar>(color.r); 1800 buffer[bufferOffset + 1] = static_cast<uchar>(color.g); 1801 buffer[bufferOffset + 2] = static_cast<uchar>(color.b); 1802 buffer[bufferOffset + 3] = static_cast<uchar>(color.a); 1803 } 1804 static inline float4 readR16_FLOAT(uint bufferOffset, constant uchar *buffer) 1805 { 1806 float4 color; 1807 color.r = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset)); 1808 color.g = color.b = 0.0; 1809 color.a = 1.0; 1810 return color; 1811 } 1812 static inline void writeR16_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 1813 { 1814 shortToBytes(as_type<ushort>(static_cast<half>(color.r)), bufferOffset, buffer); 1815 } 1816 template <typename ShortType> 1817 static inline float4 readR16_NORM(uint bufferOffset, constant uchar *buffer) 1818 { 1819 float4 color; 1820 color.r = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset)); 1821 color.g = color.b = 0.0; 1822 color.a = 1.0; 1823 return color; 1824 } 1825 template<typename ShortType> 1826 static inline void writeR16_NORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 1827 { 1828 shortToBytes(floatToNormalized<ShortType>(color.r), bufferOffset, buffer); 1829 } 1830 static inline int4 readR16_SINT(uint bufferOffset, constant uchar *buffer) 1831 { 1832 int4 color; 1833 color.r = bytesToShort<short>(buffer, bufferOffset); 1834 color.g = color.b = 0; 1835 color.a = 1; 1836 return color; 1837 } 1838 static inline void writeR16_SINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<int, 4> color, device uchar *buffer) 1839 { 1840 shortToBytes(static_cast<short>(color.r), bufferOffset, buffer); 1841 } 1842 static inline uint4 readR16_UINT(uint bufferOffset, constant uchar *buffer) 1843 { 1844 uint4 color; 1845 color.r = bytesToShort<ushort>(buffer, bufferOffset); 1846 color.g = color.b = 0; 1847 color.a = 1; 1848 return color; 1849 } 1850 static inline void writeR16_UINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<uint, 4> color, device uchar *buffer) 1851 { 1852 shortToBytes(static_cast<ushort>(color.r), bufferOffset, buffer); 1853 } 1854 static inline float4 readA16_FLOAT(uint bufferOffset, constant uchar *buffer) 1855 { 1856 float4 color; 1857 color.a = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset)); 1858 color.rgb = 0.0; 1859 return color; 1860 } 1861 static inline void writeA16_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 1862 { 1863 shortToBytes(as_type<ushort>(static_cast<half>(color.a)), bufferOffset, buffer); 1864 } 1865 static inline float4 readL16_FLOAT(uint bufferOffset, constant uchar *buffer) 1866 { 1867 float4 color; 1868 color.rgb = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset)); 1869 color.a = 1.0; 1870 return color; 1871 } 1872 static inline void writeL16_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 1873 { 1874 shortToBytes(as_type<ushort>(static_cast<half>(color.r)), bufferOffset, buffer); 1875 } 1876 static inline float4 readL16A16_FLOAT(uint bufferOffset, constant uchar *buffer) 1877 { 1878 float4 color; 1879 color.rgb = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset)); 1880 color.a = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset + 2)); 1881 return color; 1882 } 1883 static inline void writeL16A16_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 1884 { 1885 shortToBytes(as_type<ushort>(static_cast<half>(color.r)), bufferOffset, buffer); 1886 shortToBytes(as_type<ushort>(static_cast<half>(color.a)), bufferOffset + 2, buffer); 1887 } 1888 static inline float4 readR16G16_FLOAT(uint bufferOffset, constant uchar *buffer) 1889 { 1890 float4 color; 1891 color.r = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset)); 1892 color.g = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset + 2)); 1893 color.b = 0.0; 1894 color.a = 1.0; 1895 return color; 1896 } 1897 static inline void writeR16G16_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 1898 { 1899 shortToBytes(as_type<ushort>(static_cast<half>(color.r)), bufferOffset, buffer); 1900 shortToBytes(as_type<ushort>(static_cast<half>(color.g)), bufferOffset + 2, buffer); 1901 } 1902 template <typename ShortType> 1903 static inline float4 readR16G16_NORM(uint bufferOffset, constant uchar *buffer) 1904 { 1905 float4 color; 1906 color.r = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset)); 1907 color.g = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset + 2)); 1908 color.b = 0.0; 1909 color.a = 1.0; 1910 return color; 1911 } 1912 template<typename ShortType> 1913 static inline void writeR16G16_NORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 1914 { 1915 shortToBytes(floatToNormalized<ShortType>(color.r), bufferOffset, buffer); 1916 shortToBytes(floatToNormalized<ShortType>(color.g), bufferOffset + 2, buffer); 1917 } 1918 static inline int4 readR16G16_SINT(uint bufferOffset, constant uchar *buffer) 1919 { 1920 int4 color; 1921 color.r = bytesToShort<short>(buffer, bufferOffset); 1922 color.g = bytesToShort<short>(buffer, bufferOffset + 2); 1923 color.b = 0; 1924 color.a = 1; 1925 return color; 1926 } 1927 static inline void writeR16G16_SINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<int, 4> color, device uchar *buffer) 1928 { 1929 shortToBytes(static_cast<short>(color.r), bufferOffset, buffer); 1930 shortToBytes(static_cast<short>(color.g), bufferOffset + 2, buffer); 1931 } 1932 static inline uint4 readR16G16_UINT(uint bufferOffset, constant uchar *buffer) 1933 { 1934 uint4 color; 1935 color.r = bytesToShort<ushort>(buffer, bufferOffset); 1936 color.g = bytesToShort<ushort>(buffer, bufferOffset + 2); 1937 color.b = 0; 1938 color.a = 1; 1939 return color; 1940 } 1941 static inline void writeR16G16_UINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<uint, 4> color, device uchar *buffer) 1942 { 1943 shortToBytes(static_cast<ushort>(color.r), bufferOffset, buffer); 1944 shortToBytes(static_cast<ushort>(color.g), bufferOffset + 2, buffer); 1945 } 1946 static inline float4 readR16G16B16_FLOAT(uint bufferOffset, constant uchar *buffer) 1947 { 1948 float4 color; 1949 color.r = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset)); 1950 color.g = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset + 2)); 1951 color.b = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset + 4)); 1952 color.a = 1.0; 1953 return color; 1954 } 1955 template <typename ShortType> 1956 static inline float4 readR16G16B16_NORM(uint bufferOffset, constant uchar *buffer) 1957 { 1958 float4 color; 1959 color.r = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset)); 1960 color.g = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset + 2)); 1961 color.b = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset + 4)); 1962 color.a = 1.0; 1963 return color; 1964 } 1965 static inline int4 readR16G16B16_SINT(uint bufferOffset, constant uchar *buffer) 1966 { 1967 int4 color; 1968 color.r = bytesToShort<short>(buffer, bufferOffset); 1969 color.g = bytesToShort<short>(buffer, bufferOffset + 2); 1970 color.b = bytesToShort<short>(buffer, bufferOffset + 4); 1971 color.a = 1; 1972 return color; 1973 } 1974 static inline uint4 readR16G16B16_UINT(uint bufferOffset, constant uchar *buffer) 1975 { 1976 uint4 color; 1977 color.r = bytesToShort<ushort>(buffer, bufferOffset); 1978 color.g = bytesToShort<ushort>(buffer, bufferOffset + 2); 1979 color.b = bytesToShort<ushort>(buffer, bufferOffset + 4); 1980 color.a = 1; 1981 return color; 1982 } 1983 static inline float4 readR16G16B16A16_FLOAT(uint bufferOffset, constant uchar *buffer) 1984 { 1985 float4 color; 1986 color.r = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset)); 1987 color.g = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset + 2)); 1988 color.b = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset + 4)); 1989 color.a = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset + 6)); 1990 return color; 1991 } 1992 static inline void writeR16G16B16A16_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 1993 { 1994 shortToBytes(as_type<ushort>(static_cast<half>(color.r)), bufferOffset, buffer); 1995 shortToBytes(as_type<ushort>(static_cast<half>(color.g)), bufferOffset + 2, buffer); 1996 shortToBytes(as_type<ushort>(static_cast<half>(color.b)), bufferOffset + 4, buffer); 1997 shortToBytes(as_type<ushort>(static_cast<half>(color.a)), bufferOffset + 6, buffer); 1998 } 1999 template <typename ShortType> 2000 static inline float4 readR16G16B16A16_NORM(uint bufferOffset, constant uchar *buffer) 2001 { 2002 float4 color; 2003 color.r = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset)); 2004 color.g = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset + 2)); 2005 color.b = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset + 4)); 2006 color.a = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset + 6)); 2007 return color; 2008 } 2009 template<typename ShortType> 2010 static inline void writeR16G16B16A16_NORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2011 { 2012 shortToBytes(floatToNormalized<ShortType>(color.r), bufferOffset, buffer); 2013 shortToBytes(floatToNormalized<ShortType>(color.g), bufferOffset + 2, buffer); 2014 shortToBytes(floatToNormalized<ShortType>(color.b), bufferOffset + 4, buffer); 2015 shortToBytes(floatToNormalized<ShortType>(color.a), bufferOffset + 6, buffer); 2016 } 2017 static inline int4 readR16G16B16A16_SINT(uint bufferOffset, constant uchar *buffer) 2018 { 2019 int4 color; 2020 color.r = bytesToShort<short>(buffer, bufferOffset); 2021 color.g = bytesToShort<short>(buffer, bufferOffset + 2); 2022 color.b = bytesToShort<short>(buffer, bufferOffset + 4); 2023 color.a = bytesToShort<short>(buffer, bufferOffset + 6); 2024 return color; 2025 } 2026 static inline void writeR16G16B16A16_SINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<int, 4> color, device uchar *buffer) 2027 { 2028 shortToBytes(static_cast<short>(color.r), bufferOffset, buffer); 2029 shortToBytes(static_cast<short>(color.g), bufferOffset + 2, buffer); 2030 shortToBytes(static_cast<short>(color.b), bufferOffset + 4, buffer); 2031 shortToBytes(static_cast<short>(color.a), bufferOffset + 6, buffer); 2032 } 2033 static inline uint4 readR16G16B16A16_UINT(uint bufferOffset, constant uchar *buffer) 2034 { 2035 uint4 color; 2036 color.r = bytesToShort<ushort>(buffer, bufferOffset); 2037 color.g = bytesToShort<ushort>(buffer, bufferOffset + 2); 2038 color.b = bytesToShort<ushort>(buffer, bufferOffset + 4); 2039 color.a = bytesToShort<ushort>(buffer, bufferOffset + 6); 2040 return color; 2041 } 2042 static inline void writeR16G16B16A16_UINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<uint, 4> color, device uchar *buffer) 2043 { 2044 shortToBytes(static_cast<ushort>(color.r), bufferOffset, buffer); 2045 shortToBytes(static_cast<ushort>(color.g), bufferOffset + 2, buffer); 2046 shortToBytes(static_cast<ushort>(color.b), bufferOffset + 4, buffer); 2047 shortToBytes(static_cast<ushort>(color.a), bufferOffset + 6, buffer); 2048 } 2049 static inline float4 readR32_FLOAT(uint bufferOffset, constant uchar *buffer) 2050 { 2051 float4 color; 2052 color.r = as_type<float>(bytesToInt<uint>(buffer, bufferOffset)); 2053 color.g = color.b = 0.0; 2054 color.a = 1.0; 2055 return color; 2056 } 2057 static inline void writeR32_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2058 { 2059 intToBytes(as_type<uint>(color.r), bufferOffset, buffer); 2060 } 2061 template <typename IntType> 2062 static inline float4 readR32_NORM(uint bufferOffset, constant uchar *buffer) 2063 { 2064 float4 color; 2065 color.r = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset)); 2066 color.g = color.b = 0.0; 2067 color.a = 1.0; 2068 return color; 2069 } 2070 static inline float4 readA32_FLOAT(uint bufferOffset, constant uchar *buffer) 2071 { 2072 float4 color; 2073 color.a = as_type<float>(bytesToInt<uint>(buffer, bufferOffset)); 2074 color.rgb = 0.0; 2075 return color; 2076 } 2077 static inline void writeA32_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2078 { 2079 intToBytes(as_type<uint>(color.a), bufferOffset, buffer); 2080 } 2081 static inline float4 readL32_FLOAT(uint bufferOffset, constant uchar *buffer) 2082 { 2083 float4 color; 2084 color.rgb = as_type<float>(bytesToInt<uint>(buffer, bufferOffset)); 2085 color.a = 1.0; 2086 return color; 2087 } 2088 static inline void writeL32_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2089 { 2090 intToBytes(as_type<uint>(color.r), bufferOffset, buffer); 2091 } 2092 static inline int4 readR32_SINT(uint bufferOffset, constant uchar *buffer) 2093 { 2094 int4 color; 2095 color.r = bytesToInt<int>(buffer, bufferOffset); 2096 color.g = color.b = 0; 2097 color.a = 1; 2098 return color; 2099 } 2100 static inline void writeR32_SINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<int, 4> color, device uchar *buffer) 2101 { 2102 intToBytes(color.r, bufferOffset, buffer); 2103 } 2104 static inline float4 readR32_FIXED(uint bufferOffset, constant uchar *buffer) 2105 { 2106 float4 color; 2107 constexpr float kDivisor = 1.0f / (1 << 16); 2108 color.r = bytesToInt<int>(buffer, bufferOffset) * kDivisor; 2109 color.g = color.b = 0.0; 2110 color.a = 1.0; 2111 return color; 2112 } 2113 static inline uint4 readR32_UINT(uint bufferOffset, constant uchar *buffer) 2114 { 2115 uint4 color; 2116 color.r = bytesToInt<uint>(buffer, bufferOffset); 2117 color.g = color.b = 0; 2118 color.a = 1; 2119 return color; 2120 } 2121 static inline void writeR32_UINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<uint, 4> color, device uchar *buffer) 2122 { 2123 intToBytes(color.r, bufferOffset, buffer); 2124 } 2125 static inline float4 readL32A32_FLOAT(uint bufferOffset, constant uchar *buffer) 2126 { 2127 float4 color; 2128 color.rgb = as_type<float>(bytesToInt<uint>(buffer, bufferOffset)); 2129 color.a = as_type<float>(bytesToInt<uint>(buffer, bufferOffset + 4)); 2130 return color; 2131 } 2132 static inline void writeL32A32_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2133 { 2134 intToBytes(as_type<uint>(color.r), bufferOffset, buffer); 2135 intToBytes(as_type<uint>(color.a), bufferOffset + 4, buffer); 2136 } 2137 static inline float4 readR32G32_FLOAT(uint bufferOffset, constant uchar *buffer) 2138 { 2139 float4 color; 2140 color.r = as_type<float>(bytesToInt<uint>(buffer, bufferOffset)); 2141 color.g = as_type<float>(bytesToInt<uint>(buffer, bufferOffset + 4)); 2142 color.b = 0.0; 2143 color.a = 1.0; 2144 return color; 2145 } 2146 static inline void writeR32G32_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2147 { 2148 intToBytes(as_type<uint>(color.r), bufferOffset, buffer); 2149 intToBytes(as_type<uint>(color.g), bufferOffset + 4, buffer); 2150 } 2151 template <typename IntType> 2152 static inline float4 readR32G32_NORM(uint bufferOffset, constant uchar *buffer) 2153 { 2154 float4 color; 2155 color.r = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset)); 2156 color.g = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset + 4)); 2157 color.b = 0.0; 2158 color.a = 1.0; 2159 return color; 2160 } 2161 static inline int4 readR32G32_SINT(uint bufferOffset, constant uchar *buffer) 2162 { 2163 int4 color; 2164 color.r = bytesToInt<int>(buffer, bufferOffset); 2165 color.g = bytesToInt<int>(buffer, bufferOffset + 4); 2166 color.b = 0; 2167 color.a = 1; 2168 return color; 2169 } 2170 static inline void writeR32G32_SINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<int, 4> color, device uchar *buffer) 2171 { 2172 intToBytes(color.r, bufferOffset, buffer); 2173 intToBytes(color.g, bufferOffset + 4, buffer); 2174 } 2175 static inline float4 readR32G32_FIXED(uint bufferOffset, constant uchar *buffer) 2176 { 2177 float4 color; 2178 constexpr float kDivisor = 1.0f / (1 << 16); 2179 color.r = bytesToInt<int>(buffer, bufferOffset) * kDivisor; 2180 color.g = bytesToInt<int>(buffer, bufferOffset + 4) * kDivisor; 2181 color.b = 0.0; 2182 color.a = 1.0; 2183 return color; 2184 } 2185 static inline uint4 readR32G32_UINT(uint bufferOffset, constant uchar *buffer) 2186 { 2187 uint4 color; 2188 color.r = bytesToInt<uint>(buffer, bufferOffset); 2189 color.g = bytesToInt<uint>(buffer, bufferOffset + 4); 2190 color.b = 0; 2191 color.a = 1; 2192 return color; 2193 } 2194 static inline void writeR32G32_UINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<uint, 4> color, device uchar *buffer) 2195 { 2196 intToBytes(color.r, bufferOffset, buffer); 2197 intToBytes(color.g, bufferOffset + 4, buffer); 2198 } 2199 static inline float4 readR32G32B32_FLOAT(uint bufferOffset, constant uchar *buffer) 2200 { 2201 float4 color; 2202 color.r = as_type<float>(bytesToInt<uint>(buffer, bufferOffset)); 2203 color.g = as_type<float>(bytesToInt<uint>(buffer, bufferOffset + 4)); 2204 color.b = as_type<float>(bytesToInt<uint>(buffer, bufferOffset + 8)); 2205 color.a = 1.0; 2206 return color; 2207 } 2208 template <typename IntType> 2209 static inline float4 readR32G32B32_NORM(uint bufferOffset, constant uchar *buffer) 2210 { 2211 float4 color; 2212 color.r = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset)); 2213 color.g = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset + 4)); 2214 color.b = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset + 8)); 2215 color.a = 1.0; 2216 return color; 2217 } 2218 static inline int4 readR32G32B32_SINT(uint bufferOffset, constant uchar *buffer) 2219 { 2220 int4 color; 2221 color.r = bytesToInt<int>(buffer, bufferOffset); 2222 color.g = bytesToInt<int>(buffer, bufferOffset + 4); 2223 color.b = bytesToInt<int>(buffer, bufferOffset + 8); 2224 color.a = 1; 2225 return color; 2226 } 2227 static inline float4 readR32G32B32_FIXED(uint bufferOffset, constant uchar *buffer) 2228 { 2229 float4 color; 2230 constexpr float kDivisor = 1.0f / (1 << 16); 2231 color.r = bytesToInt<int>(buffer, bufferOffset) * kDivisor; 2232 color.g = bytesToInt<int>(buffer, bufferOffset + 4) * kDivisor; 2233 color.b = bytesToInt<int>(buffer, bufferOffset + 8) * kDivisor; 2234 color.a = 1.0; 2235 return color; 2236 } 2237 static inline uint4 readR32G32B32_UINT(uint bufferOffset, constant uchar *buffer) 2238 { 2239 uint4 color; 2240 color.r = bytesToInt<uint>(buffer, bufferOffset); 2241 color.g = bytesToInt<uint>(buffer, bufferOffset + 4); 2242 color.b = bytesToInt<uint>(buffer, bufferOffset + 8); 2243 color.a = 1; 2244 return color; 2245 } 2246 static inline float4 readR32G32B32A32_FLOAT(uint bufferOffset, constant uchar *buffer) 2247 { 2248 float4 color; 2249 color.r = as_type<float>(bytesToInt<uint>(buffer, bufferOffset)); 2250 color.g = as_type<float>(bytesToInt<uint>(buffer, bufferOffset + 4)); 2251 color.b = as_type<float>(bytesToInt<uint>(buffer, bufferOffset + 8)); 2252 color.a = as_type<float>(bytesToInt<uint>(buffer, bufferOffset + 12)); 2253 return color; 2254 } 2255 static inline void writeR32G32B32A32_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2256 { 2257 intToBytes(as_type<uint>(color.r), bufferOffset, buffer); 2258 intToBytes(as_type<uint>(color.g), bufferOffset + 4, buffer); 2259 intToBytes(as_type<uint>(color.b), bufferOffset + 8, buffer); 2260 intToBytes(as_type<uint>(color.a), bufferOffset + 12, buffer); 2261 } 2262 template <typename IntType> 2263 static inline float4 readR32G32B32A32_NORM(uint bufferOffset, constant uchar *buffer) 2264 { 2265 float4 color; 2266 color.r = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset)); 2267 color.g = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset + 4)); 2268 color.b = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset + 8)); 2269 color.a = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset + 12)); 2270 return color; 2271 } 2272 static inline int4 readR32G32B32A32_SINT(uint bufferOffset, constant uchar *buffer) 2273 { 2274 int4 color; 2275 color.r = bytesToInt<int>(buffer, bufferOffset); 2276 color.g = bytesToInt<int>(buffer, bufferOffset + 4); 2277 color.b = bytesToInt<int>(buffer, bufferOffset + 8); 2278 color.a = bytesToInt<int>(buffer, bufferOffset + 12); 2279 return color; 2280 } 2281 static inline void writeR32G32B32A32_SINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<int, 4> color, device uchar *buffer) 2282 { 2283 intToBytes(color.r, bufferOffset, buffer); 2284 intToBytes(color.g, bufferOffset + 4, buffer); 2285 intToBytes(color.b, bufferOffset + 8, buffer); 2286 intToBytes(color.a, bufferOffset + 12, buffer); 2287 } 2288 static inline float4 readR32G32B32A32_FIXED(uint bufferOffset, constant uchar *buffer) 2289 { 2290 float4 color; 2291 constexpr float kDivisor = 1.0f / (1 << 16); 2292 color.r = bytesToInt<int>(buffer, bufferOffset) * kDivisor; 2293 color.g = bytesToInt<int>(buffer, bufferOffset + 4) * kDivisor; 2294 color.b = bytesToInt<int>(buffer, bufferOffset + 8) * kDivisor; 2295 color.a = bytesToInt<int>(buffer, bufferOffset + 12) * kDivisor; 2296 return color; 2297 } 2298 static inline uint4 readR32G32B32A32_UINT(uint bufferOffset, constant uchar *buffer) 2299 { 2300 uint4 color; 2301 color.r = bytesToInt<uint>(buffer, bufferOffset); 2302 color.g = bytesToInt<uint>(buffer, bufferOffset + 4); 2303 color.b = bytesToInt<uint>(buffer, bufferOffset + 8); 2304 color.a = bytesToInt<uint>(buffer, bufferOffset + 12); 2305 return color; 2306 } 2307 static inline void writeR32G32B32A32_UINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<uint, 4> color, device uchar *buffer) 2308 { 2309 intToBytes(color.r, bufferOffset, buffer); 2310 intToBytes(color.g, bufferOffset + 4, buffer); 2311 intToBytes(color.b, bufferOffset + 8, buffer); 2312 intToBytes(color.a, bufferOffset + 12, buffer); 2313 } 2314 static inline int4 readR8_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR8_SINT(bufferOffset, buffer); } static inline uint4 readR8_USCALED(uint bufferOffset, constant uchar *buffer) { return readR8_UINT(bufferOffset, buffer); } static inline int4 readR8G8_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR8G8_SINT(bufferOffset, buffer); } static inline uint4 readR8G8_USCALED(uint bufferOffset, constant uchar *buffer) { return readR8G8_UINT(bufferOffset, buffer); } static inline int4 readR8G8B8_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR8G8B8_SINT(bufferOffset, buffer); } static inline uint4 readR8G8B8_USCALED(uint bufferOffset, constant uchar *buffer) { return readR8G8B8_UINT(bufferOffset, buffer); } static inline int4 readR8G8B8A8_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR8G8B8A8_SINT(bufferOffset, buffer); } static inline uint4 readR8G8B8A8_USCALED(uint bufferOffset, constant uchar *buffer) { return readR8G8B8A8_UINT(bufferOffset, buffer); } 2315 static inline int4 readR16_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR16_SINT(bufferOffset, buffer); } static inline uint4 readR16_USCALED(uint bufferOffset, constant uchar *buffer) { return readR16_UINT(bufferOffset, buffer); } static inline int4 readR16G16_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR16G16_SINT(bufferOffset, buffer); } static inline uint4 readR16G16_USCALED(uint bufferOffset, constant uchar *buffer) { return readR16G16_UINT(bufferOffset, buffer); } static inline int4 readR16G16B16_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR16G16B16_SINT(bufferOffset, buffer); } static inline uint4 readR16G16B16_USCALED(uint bufferOffset, constant uchar *buffer) { return readR16G16B16_UINT(bufferOffset, buffer); } static inline int4 readR16G16B16A16_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR16G16B16A16_SINT(bufferOffset, buffer); } static inline uint4 readR16G16B16A16_USCALED(uint bufferOffset, constant uchar *buffer) { return readR16G16B16A16_UINT(bufferOffset, buffer); } 2316 static inline int4 readR32_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR32_SINT(bufferOffset, buffer); } static inline uint4 readR32_USCALED(uint bufferOffset, constant uchar *buffer) { return readR32_UINT(bufferOffset, buffer); } static inline int4 readR32G32_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR32G32_SINT(bufferOffset, buffer); } static inline uint4 readR32G32_USCALED(uint bufferOffset, constant uchar *buffer) { return readR32G32_UINT(bufferOffset, buffer); } static inline int4 readR32G32B32_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR32G32B32_SINT(bufferOffset, buffer); } static inline uint4 readR32G32B32_USCALED(uint bufferOffset, constant uchar *buffer) { return readR32G32B32_UINT(bufferOffset, buffer); } static inline int4 readR32G32B32A32_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR32G32B32A32_SINT(bufferOffset, buffer); } static inline uint4 readR32G32B32A32_USCALED(uint bufferOffset, constant uchar *buffer) { return readR32G32B32A32_UINT(bufferOffset, buffer); } 2317 static inline int4 readR10G10B10A2_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR10G10B10A2_SINT(bufferOffset, buffer); } static inline uint4 readR10G10B10A2_USCALED(uint bufferOffset, constant uchar *buffer) { return readR10G10B10A2_UINT(bufferOffset, buffer); } 2318 kernel void readFromBufferToFloatTexture(ushort3 gIndices [[thread_position_in_grid]], constant CopyPixelParams &options[[buffer(0)]], constant uchar *buffer [[buffer(1)]], texture2d<float, access::write> dstTexture2d [[texture(0), function_constant(kCopyTextureType2D)]], texture2d_array<float, access::write> dstTexture2dArray [[texture(0), function_constant(kCopyTextureType2DArray)]], texture3d<float, access::write> dstTexture3d [[texture(0), function_constant(kCopyTextureType3D)]], texturecube<float, access::write> dstTextureCube [[texture(0), function_constant(kCopyTextureTypeCube)]]) 2319 { 2320 if (gIndices.x >= options.copySize.x || gIndices.y >= options.copySize.y || gIndices.z >= options.copySize.z) { return; } 2321 uint bufferOffset = options.bufferStartOffset + (gIndices.z * options.bufferDepthPitch + gIndices.y * options.bufferRowPitch + gIndices.x * options.pixelSize); 2322 switch (kCopyFormatType) 2323 { 2324 case FormatID::R5G6B5_UNORM: { auto color = readR5G6B5_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8A8_UNORM: { auto color = readR8G8B8A8_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8A8_UNORM_SRGB: { auto color = readR8G8B8A8_UNORM_SRGB(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8A8_SNORM: { auto color = readR8G8B8A8_SNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::B8G8R8A8_UNORM: { auto color = readB8G8R8A8_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::B8G8R8A8_UNORM_SRGB: { auto color = readB8G8R8A8_UNORM_SRGB(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8_UNORM: { auto color = readR8G8B8_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8_UNORM_SRGB: { auto color = readR8G8B8_UNORM_SRGB(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8_SNORM: { auto color = readR8G8B8_SNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::L8_UNORM: { auto color = readL8_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::L8A8_UNORM: { auto color = readL8A8_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R5G5B5A1_UNORM: { auto color = readR5G5B5A1_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R4G4B4A4_UNORM: { auto color = readR4G4B4A4_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8_UNORM: { auto color = readR8_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8_SNORM: { auto color = readR8_SNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8_UNORM: { auto color = readR8G8_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8_SNORM: { auto color = readR8G8_SNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16_FLOAT: { auto color = readR16_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16_SNORM: { auto color = readR16_NORM<short>(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16_UNORM: { auto color = readR16_NORM<ushort>(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::A16_FLOAT: { auto color = readA16_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::L16_FLOAT: { auto color = readL16_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::L16A16_FLOAT: { auto color = readL16A16_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16_FLOAT: { auto color = readR16G16_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16_SNORM: { auto color = readR16G16_NORM<short>(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16_UNORM: { auto color = readR16G16_NORM<ushort>(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16B16_FLOAT: { auto color = readR16G16B16_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16B16_SNORM: { auto color = readR16G16B16_NORM<short>(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16B16_UNORM: { auto color = readR16G16B16_NORM<ushort>(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16B16A16_FLOAT: { auto color = readR16G16B16A16_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16B16A16_SNORM: { auto color = readR16G16B16A16_NORM<short>(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16B16A16_UNORM: { auto color = readR16G16B16A16_NORM<ushort>(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32_FLOAT: { auto color = readR32_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::A32_FLOAT: { auto color = readA32_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::L32_FLOAT: { auto color = readL32_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::L32A32_FLOAT: { auto color = readL32A32_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32G32_FLOAT: { auto color = readR32G32_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32G32B32_FLOAT: { auto color = readR32G32B32_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32G32B32A32_FLOAT: { auto color = readR32G32B32A32_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; 2325 } 2326 } 2327 kernel void readFromBufferToIntTexture(ushort3 gIndices [[thread_position_in_grid]], constant CopyPixelParams &options[[buffer(0)]], constant uchar *buffer [[buffer(1)]], texture2d<int, access::write> dstTexture2d [[texture(0), function_constant(kCopyTextureType2D)]], texture2d_array<int, access::write> dstTexture2dArray [[texture(0), function_constant(kCopyTextureType2DArray)]], texture3d<int, access::write> dstTexture3d [[texture(0), function_constant(kCopyTextureType3D)]], texturecube<int, access::write> dstTextureCube [[texture(0), function_constant(kCopyTextureTypeCube)]]) 2328 { 2329 if (gIndices.x >= options.copySize.x || gIndices.y >= options.copySize.y || gIndices.z >= options.copySize.z) { return; } 2330 uint bufferOffset = options.bufferStartOffset + (gIndices.z * options.bufferDepthPitch + gIndices.y * options.bufferRowPitch + gIndices.x * options.pixelSize); 2331 switch (kCopyFormatType) 2332 { 2333 case FormatID::R8_SINT: { auto color = readR8_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8_SINT: { auto color = readR8G8_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8_SINT: { auto color = readR8G8B8_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8A8_SINT: { auto color = readR8G8B8A8_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16_SINT: { auto color = readR16_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16_SINT: { auto color = readR16G16_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16B16_SINT: { auto color = readR16G16B16_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16B16A16_SINT: { auto color = readR16G16B16A16_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32_SINT: { auto color = readR32_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32G32_SINT: { auto color = readR32G32_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32G32B32_SINT: { auto color = readR32G32B32_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32G32B32A32_SINT: { auto color = readR32G32B32A32_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; 2334 } 2335 } 2336 kernel void readFromBufferToUIntTexture(ushort3 gIndices [[thread_position_in_grid]], constant CopyPixelParams &options[[buffer(0)]], constant uchar *buffer [[buffer(1)]], texture2d<uint, access::write> dstTexture2d [[texture(0), function_constant(kCopyTextureType2D)]], texture2d_array<uint, access::write> dstTexture2dArray [[texture(0), function_constant(kCopyTextureType2DArray)]], texture3d<uint, access::write> dstTexture3d [[texture(0), function_constant(kCopyTextureType3D)]], texturecube<uint, access::write> dstTextureCube [[texture(0), function_constant(kCopyTextureTypeCube)]]) 2337 { 2338 if (gIndices.x >= options.copySize.x || gIndices.y >= options.copySize.y || gIndices.z >= options.copySize.z) { return; } 2339 uint bufferOffset = options.bufferStartOffset + (gIndices.z * options.bufferDepthPitch + gIndices.y * options.bufferRowPitch + gIndices.x * options.pixelSize); 2340 switch (kCopyFormatType) 2341 { 2342 case FormatID::R8_UINT: { auto color = readR8_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8_UINT: { auto color = readR8G8_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8_UINT: { auto color = readR8G8B8_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8A8_UINT: { auto color = readR8G8B8A8_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16_UINT: { auto color = readR16_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16_UINT: { auto color = readR16G16_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16B16_UINT: { auto color = readR16G16B16_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16B16A16_UINT: { auto color = readR16G16B16A16_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32_UINT: { auto color = readR32_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32G32_UINT: { auto color = readR32G32_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32G32B32_UINT: { auto color = readR32G32B32_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32G32B32A32_UINT: { auto color = readR32G32B32A32_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; 2343 } 2344 } 2345 kernel void writeFromFloatTextureToBuffer(ushort2 gIndices [[thread_position_in_grid]], constant WritePixelParams &options[[buffer(0)]], texture2d<float, access::read> srcTexture2d [[texture(0), function_constant(kCopyTextureType2D)]], texture2d_array<float, access::read> srcTexture2dArray [[texture(0), function_constant(kCopyTextureType2DArray)]], texture3d<float, access::read> srcTexture3d [[texture(0), function_constant(kCopyTextureType3D)]], texturecube<float, access::read> srcTextureCube [[texture(0), function_constant(kCopyTextureTypeCube)]], texture2d_ms<float, access::read> srcTexture2dMS [[texture(0), function_constant(kCopyTextureType2DMS)]], device uchar *buffer [[buffer(1)]]) 2346 { 2347 if (gIndices.x >= options.copySize.x || gIndices.y >= options.copySize.y) { return; } 2348 uint bufferOffset = options.bufferStartOffset + (gIndices.y * options.bufferRowPitch + gIndices.x * options.pixelSize); 2349 switch (kCopyFormatType) 2350 { 2351 case FormatID::R5G6B5_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR5G6B5_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8B8A8_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8B8A8_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8B8A8_UNORM_SRGB: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8B8A8_UNORM_SRGB(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8B8A8_SNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8B8A8_SNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::B8G8R8A8_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeB8G8R8A8_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::B8G8R8A8_UNORM_SRGB: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeB8G8R8A8_UNORM_SRGB(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8B8_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8B8_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8B8_UNORM_SRGB: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8B8_UNORM_SRGB(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8B8_SNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8B8_SNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::L8_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeL8_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::A8_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeA8_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::L8A8_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeL8A8_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R5G5B5A1_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR5G5B5A1_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R4G4B4A4_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR4G4B4A4_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8_SNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8_SNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8_SNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8_SNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16_SNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16_NORM<short>(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16_NORM<ushort>(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::A16_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeA16_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::L16_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeL16_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::L16A16_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeL16A16_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16G16_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16G16_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16G16_SNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16G16_NORM<short>(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16G16_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16G16_NORM<ushort>(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16G16B16A16_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16G16B16A16_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16G16B16A16_SNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16G16B16A16_NORM<short>(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16G16B16A16_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16G16B16A16_NORM<ushort>(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R32_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR32_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::A32_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeA32_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::L32_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeL32_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::L32A32_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeL32A32_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R32G32_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR32G32_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R32G32B32A32_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR32G32B32A32_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; 2352 } 2353 } 2354 kernel void writeFromIntTextureToBuffer(ushort2 gIndices [[thread_position_in_grid]], constant WritePixelParams &options[[buffer(0)]], texture2d<int, access::read> srcTexture2d [[texture(0), function_constant(kCopyTextureType2D)]], texture2d_array<int, access::read> srcTexture2dArray [[texture(0), function_constant(kCopyTextureType2DArray)]], texture3d<int, access::read> srcTexture3d [[texture(0), function_constant(kCopyTextureType3D)]], texturecube<int, access::read> srcTextureCube [[texture(0), function_constant(kCopyTextureTypeCube)]], texture2d_ms<int, access::read> srcTexture2dMS [[texture(0), function_constant(kCopyTextureType2DMS)]], device uchar *buffer [[buffer(1)]]) 2355 { 2356 if (gIndices.x >= options.copySize.x || gIndices.y >= options.copySize.y) { return; } 2357 uint bufferOffset = options.bufferStartOffset + (gIndices.y * options.bufferRowPitch + gIndices.x * options.pixelSize); 2358 switch (kCopyFormatType) 2359 { 2360 case FormatID::R8_SINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8_SINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8_SINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8_SINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8B8A8_SINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8B8A8_SINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16_SINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16_SINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16G16_SINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16G16_SINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16G16B16A16_SINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16G16B16A16_SINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R32_SINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR32_SINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R32G32_SINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR32G32_SINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R32G32B32A32_SINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR32G32B32A32_SINT(gIndices, options, bufferOffset, color, buffer); } break; 2361 } 2362 } 2363 kernel void writeFromUIntTextureToBuffer(ushort2 gIndices [[thread_position_in_grid]], constant WritePixelParams &options[[buffer(0)]], texture2d<uint, access::read> srcTexture2d [[texture(0), function_constant(kCopyTextureType2D)]], texture2d_array<uint, access::read> srcTexture2dArray [[texture(0), function_constant(kCopyTextureType2DArray)]], texture3d<uint, access::read> srcTexture3d [[texture(0), function_constant(kCopyTextureType3D)]], texturecube<uint, access::read> srcTextureCube [[texture(0), function_constant(kCopyTextureTypeCube)]], texture2d_ms<uint, access::read> srcTexture2dMS [[texture(0), function_constant(kCopyTextureType2DMS)]], device uchar *buffer [[buffer(1)]]) 2364 { 2365 if (gIndices.x >= options.copySize.x || gIndices.y >= options.copySize.y) { return; } 2366 uint bufferOffset = options.bufferStartOffset + (gIndices.y * options.bufferRowPitch + gIndices.x * options.pixelSize); 2367 switch (kCopyFormatType) 2368 { 2369 case FormatID::R8_UINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8_UINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8_UINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8_UINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8B8A8_UINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8B8A8_UINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16_UINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16_UINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16G16_UINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16G16_UINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16G16B16A16_UINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16G16B16A16_UINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R32_UINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR32_UINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R32G32_UINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR32G32_UINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R32G32B32A32_UINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR32G32B32A32_UINT(gIndices, options, bufferOffset, color, buffer); } break; 2370 } 2371 } 2372 struct CopyVertexParams 2373 { 2374 uint srcBufferStartOffset; 2375 uint srcStride; 2376 uint srcComponentBytes; 2377 uint srcComponents; 2378 uchar4 srcDefaultAlphaData; 2379 uint dstBufferStartOffset; 2380 uint dstStride; 2381 uint dstComponents; 2382 uint vertexCount; 2383 }; 2384 template <typename IntType> 2385 static inline void writeFloatVertex(constant CopyVertexParams &options, 2386 uint idx, 2387 vec<IntType, 4> data, 2388 device uchar *dst) 2389 { 2390 uint dstOffset = idx * options.dstStride + options.dstBufferStartOffset; 2391 for (uint component = 0; component < options.dstComponents; ++component, dstOffset += 4) 2392 { 2393 floatToBytes(static_cast<float>(data[component]), dstOffset, dst); 2394 } 2395 } 2396 template <> 2397 inline void writeFloatVertex(constant CopyVertexParams &options, 2398 uint idx, 2399 vec<float, 4> data, 2400 device uchar *dst) 2401 { 2402 uint dstOffset = idx * options.dstStride + options.dstBufferStartOffset; 2403 for (uint component = 0; component < options.dstComponents; ++component, dstOffset += 4) 2404 { 2405 floatToBytes(data[component], dstOffset, dst); 2406 } 2407 } 2408 static inline void convertToFloatVertexFormat(uint index, 2409 constant CopyVertexParams &options, 2410 constant uchar *srcBuffer, 2411 device uchar *dstBuffer) 2412 { 2413 uint bufferOffset = options.srcBufferStartOffset + options.srcStride * index; 2414 switch (kCopyFormatType) 2415 { 2416 case FormatID::R8_UNORM: { auto data = readR8_UNORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8_SNORM: { auto data = readR8_SNORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8_UINT: { auto data = readR8_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8_SINT: { auto data = readR8_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8_USCALED: { auto data = readR8_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8_SSCALED: { auto data = readR8_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8_UNORM: { auto data = readR8G8_UNORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8_SNORM: { auto data = readR8G8_SNORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8_UINT: { auto data = readR8G8_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8_SINT: { auto data = readR8G8_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8_USCALED: { auto data = readR8G8_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8_SSCALED: { auto data = readR8G8_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8_UNORM: { auto data = readR8G8B8_UNORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8_SNORM: { auto data = readR8G8B8_SNORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8_UINT: { auto data = readR8G8B8_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8_SINT: { auto data = readR8G8B8_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8_USCALED: { auto data = readR8G8B8_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8_SSCALED: { auto data = readR8G8B8_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8A8_UNORM: { auto data = readR8G8B8A8_UNORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8A8_SNORM: { auto data = readR8G8B8A8_SNORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8A8_UINT: { auto data = readR8G8B8A8_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8A8_SINT: { auto data = readR8G8B8A8_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8A8_USCALED: { auto data = readR8G8B8A8_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8A8_SSCALED: { auto data = readR8G8B8A8_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16_UNORM: { auto data = readR16_NORM<ushort>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16_SNORM: { auto data = readR16_NORM<short>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16_UINT: { auto data = readR16_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16_SINT: { auto data = readR16_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16_USCALED: { auto data = readR16_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16_SSCALED: { auto data = readR16_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16_UNORM: { auto data = readR16G16_NORM<ushort>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16_SNORM: { auto data = readR16G16_NORM<short>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16_UINT: { auto data = readR16G16_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16_SINT: { auto data = readR16G16_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16_USCALED: { auto data = readR16G16_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16_SSCALED: { auto data = readR16G16_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16_UNORM: { auto data = readR16G16B16_NORM<ushort>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16_SNORM: { auto data = readR16G16B16_NORM<short>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16_UINT: { auto data = readR16G16B16_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16_SINT: { auto data = readR16G16B16_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16_USCALED: { auto data = readR16G16B16_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16_SSCALED: { auto data = readR16G16B16_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16A16_UNORM: { auto data = readR16G16B16A16_NORM<ushort>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16A16_SNORM: { auto data = readR16G16B16A16_NORM<short>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16A16_UINT: { auto data = readR16G16B16A16_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16A16_SINT: { auto data = readR16G16B16A16_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16A16_USCALED: { auto data = readR16G16B16A16_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16A16_SSCALED: { auto data = readR16G16B16A16_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32_UNORM: { auto data = readR32_NORM<uint>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32_SNORM: { auto data = readR32_NORM<int>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32_UINT: { auto data = readR32_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32_SINT: { auto data = readR32_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32_USCALED: { auto data = readR32_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32_SSCALED: { auto data = readR32_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32_UNORM: { auto data = readR32G32_NORM<uint>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32_SNORM: { auto data = readR32G32_NORM<int>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32_UINT: { auto data = readR32G32_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32_SINT: { auto data = readR32G32_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32_USCALED: { auto data = readR32G32_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32_SSCALED: { auto data = readR32G32_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32_UNORM: { auto data = readR32G32B32_NORM<uint>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32_SNORM: { auto data = readR32G32B32_NORM<int>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32_UINT: { auto data = readR32G32B32_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32_SINT: { auto data = readR32G32B32_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32_USCALED: { auto data = readR32G32B32_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32_SSCALED: { auto data = readR32G32B32_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32A32_UNORM: { auto data = readR32G32B32A32_NORM<uint>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32A32_SNORM: { auto data = readR32G32B32A32_NORM<int>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32A32_UINT: { auto data = readR32G32B32A32_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32A32_SINT: { auto data = readR32G32B32A32_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32A32_USCALED: { auto data = readR32G32B32A32_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32A32_SSCALED: { auto data = readR32G32B32A32_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16_FLOAT: { auto data = readR16_FLOAT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16_FLOAT: { auto data = readR16G16_FLOAT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16_FLOAT: { auto data = readR16G16B16_FLOAT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16A16_FLOAT: { auto data = readR16G16B16A16_FLOAT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32_FLOAT: { auto data = readR32_FLOAT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32_FLOAT: { auto data = readR32G32_FLOAT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32_FLOAT: { auto data = readR32G32B32_FLOAT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32A32_FLOAT: { auto data = readR32G32B32A32_FLOAT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32_FIXED: { auto data = readR32_FIXED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32_FIXED: { auto data = readR32G32_FIXED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32_FIXED: { auto data = readR32G32B32_FIXED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32A32_FIXED: { auto data = readR32G32B32A32_FIXED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R10G10B10A2_SINT: { auto data = readR10G10B10A2_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R10G10B10A2_UINT: { auto data = readR10G10B10A2_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R10G10B10A2_SSCALED: { auto data = readR10G10B10A2_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R10G10B10A2_USCALED: { auto data = readR10G10B10A2_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; 2417 } 2418 } 2419 kernel void convertToFloatVertexFormatCS(uint index [[thread_position_in_grid]], 2420 constant CopyVertexParams &options [[buffer(0)]], 2421 constant uchar *srcBuffer [[buffer(1)]], 2422 device uchar *dstBuffer [[buffer(2)]]) 2423 { 2424 if (index >= options.vertexCount) { return; }; 2425 convertToFloatVertexFormat(index, options, srcBuffer, dstBuffer); 2426 } 2427 vertex void convertToFloatVertexFormatVS(uint index [[vertex_id]], 2428 constant CopyVertexParams &options [[buffer(0)]], 2429 constant uchar *srcBuffer [[buffer(1)]], 2430 device uchar *dstBuffer [[buffer(2)]]) 2431 { 2432 convertToFloatVertexFormat(index, options, srcBuffer, dstBuffer); 2433 } 2434 static inline void expandVertexFormatComponents(uint index, 2435 constant CopyVertexParams &options, 2436 constant uchar *srcBuffer, 2437 device uchar *dstBuffer) 2438 { 2439 uint srcOffset = options.srcBufferStartOffset + options.srcStride * index; 2440 uint dstOffset = options.dstBufferStartOffset + options.dstStride * index; 2441 uint dstComponentsBeforeAlpha = min(options.dstComponents, 3u); 2442 uint component; 2443 for (component = 0; component < options.srcComponents; ++component, 2444 srcOffset += options.srcComponentBytes, dstOffset += options.srcComponentBytes) 2445 { 2446 for (uint byte = 0; byte < options.srcComponentBytes; ++byte) 2447 { 2448 dstBuffer[dstOffset + byte] = srcBuffer[srcOffset + byte]; 2449 } 2450 } 2451 for (; component < dstComponentsBeforeAlpha; 2452 ++component, dstOffset += options.srcComponentBytes) 2453 { 2454 for (uint byte = 0; byte < options.srcComponentBytes; ++byte) 2455 { 2456 dstBuffer[dstOffset + byte] = 0; 2457 } 2458 } 2459 if (component < options.dstComponents) 2460 { 2461 for (uint byte = 0; byte < options.srcComponentBytes; ++byte) 2462 { 2463 dstBuffer[dstOffset + byte] = options.srcDefaultAlphaData[byte]; 2464 } 2465 } 2466 } 2467 kernel void expandVertexFormatComponentsCS(uint index [[thread_position_in_grid]], 2468 constant CopyVertexParams &options [[buffer(0)]], 2469 constant uchar *srcBuffer [[buffer(1)]], 2470 device uchar *dstBuffer [[buffer(2)]]) 2471 { 2472 if (index >= options.vertexCount) { return; }; 2473 expandVertexFormatComponents(index, options, srcBuffer, dstBuffer); 2474 } 2475 vertex void expandVertexFormatComponentsVS(uint index [[vertex_id]], 2476 constant CopyVertexParams &options [[buffer(0)]], 2477 constant uchar *srcBuffer [[buffer(1)]], 2478 device uchar *dstBuffer [[buffer(2)]]) 2479 { 2480 expandVertexFormatComponents(index, options, srcBuffer, dstBuffer); 2481 } 2482 kernel void linearizeBlocks(ushort2 position [[thread_position_in_grid]], 2483 constant uint2 *dimensions [[buffer(0)]], 2484 constant uint2 *srcBuffer [[buffer(1)]], 2485 device uint2 *dstBuffer [[buffer(2)]]) 2486 { 2487 if (any(uint2(position) >= *dimensions)) 2488 { 2489 return; 2490 } 2491 uint2 t = uint2(position); 2492 t = (t | (t << 8)) & 0x00FF00FF; 2493 t = (t | (t << 4)) & 0x0F0F0F0F; 2494 t = (t | (t << 2)) & 0x33333333; 2495 t = (t | (t << 1)) & 0x55555555; 2496 dstBuffer[position.y * (*dimensions).x + position.x] = srcBuffer[(t.x << 1) | t.y]; 2497 } 2498 kernel void saturateDepth(uint2 position [[thread_position_in_grid]], 2499 constant uint3 *dimensions [[buffer(0)]], 2500 device float *srcBuffer [[buffer(1)]], 2501 device float *dstBuffer [[buffer(2)]]) 2502 { 2503 if (any(position >= (*dimensions).xy)) 2504 { 2505 return; 2506 } 2507 const uint srcOffset = position.y * (*dimensions).z + position.x; 2508 const uint dstOffset = position.y * (*dimensions).x + position.x; 2509 dstBuffer[dstOffset] = saturate(srcBuffer[srcOffset]); 2510 } 2511 constant bool kCombineWithExistingResult [[function_constant(1000)]]; 2512 struct CombineVisibilityResultOptions 2513 { 2514 uint startOffset; 2515 uint numOffsets; 2516 }; 2517 kernel void combineVisibilityResult(uint idx [[thread_position_in_grid]], 2518 constant CombineVisibilityResultOptions &options [[buffer(0)]], 2519 constant ushort4 *renderpassVisibilityResult [[buffer(1)]], 2520 device ushort4 *finalResults [[buffer(2)]]) 2521 { 2522 if (idx > 0) 2523 { 2524 return; 2525 } 2526 ushort4 finalResult16x4; 2527 if (kCombineWithExistingResult) 2528 { 2529 finalResult16x4 = finalResults[0]; 2530 } 2531 else 2532 { 2533 finalResult16x4 = ushort4(0, 0, 0, 0); 2534 } 2535 for (uint i = 0; i < options.numOffsets; ++i) 2536 { 2537 uint offset = options.startOffset + i; 2538 ushort4 renderpassResult = renderpassVisibilityResult[offset]; 2539 finalResult16x4 = finalResult16x4 | renderpassResult; 2540 } 2541 finalResults[0] = finalResult16x4; 2542 } 2543 using namespace metal; 2544 constant uint fixIndexBufferKey [[ function_constant(2000) ]]; 2545 constant bool indexBufferIsUint16 = (((fixIndexBufferKey >> 0U) & 0x03U) == 2U); 2546 constant bool indexBufferIsUint32 = (((fixIndexBufferKey >> 0U) & 0x03U) == 3U); 2547 constant bool outIndexBufferIsUint16 = (((fixIndexBufferKey >> 2U) & 0x03U) == 2U); 2548 constant bool outIndexBufferIsUint32 = (((fixIndexBufferKey >> 2U) & 0x03U) == 3U); 2549 constant bool doPrimRestart = (fixIndexBufferKey & 0x00100U); 2550 constant uint fixIndexBufferMode = (fixIndexBufferKey >> 4U) & 0x0FU; 2551 static inline uint readIdx( 2552 const device ushort *indexBufferUint16, 2553 const device uint *indexBufferUint32, 2554 const uint restartIndex, 2555 const uint indexCount, 2556 uint idx, 2557 thread bool &foundRestart, 2558 thread uint &indexThatRestartedFirst 2559 ) 2560 { 2561 uint inIndex = idx; 2562 if(inIndex < indexCount) 2563 { 2564 if(indexBufferIsUint16) 2565 { 2566 inIndex = indexBufferUint16[inIndex]; 2567 } 2568 else if(indexBufferIsUint32) 2569 { 2570 inIndex = indexBufferUint32[inIndex]; 2571 } 2572 } 2573 else 2574 { 2575 foundRestart = true; 2576 indexThatRestartedFirst = idx; 2577 } 2578 if(doPrimRestart && !foundRestart && inIndex == restartIndex) 2579 { 2580 foundRestart = true; 2581 indexThatRestartedFirst = idx; 2582 } 2583 return inIndex; 2584 } 2585 static inline void outputPrimitive( 2586 const device ushort *indexBufferUint16, 2587 const device uint *indexBufferUint32, 2588 device ushort *outIndexBufferUint16, 2589 device uint *outIndexBufferUint32, 2590 const uint restartIndex, 2591 const uint indexCount, 2592 thread uint &baseIndex, 2593 uint onIndex, 2594 thread uint &onOutIndex 2595 ) 2596 { 2597 if(baseIndex > onIndex) return; 2598 bool foundRestart = false; 2599 uint indexThatRestartedFirst = 0; 2600 switch(fixIndexBufferMode) 2601 { 2602 case 0x00U: 2603 { 2604 auto tmpIndex = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex, foundRestart, indexThatRestartedFirst); 2605 if(foundRestart) 2606 { 2607 baseIndex = indexThatRestartedFirst + 1; 2608 return; 2609 } 2610 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex; } onOutIndex++; }); 2611 } 2612 break; 2613 case 0x01U: 2614 { 2615 auto tmpIndex0 = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex + 0, foundRestart, indexThatRestartedFirst); 2616 auto tmpIndex1 = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex + 1, foundRestart, indexThatRestartedFirst); 2617 if(foundRestart) 2618 { 2619 baseIndex = indexThatRestartedFirst + 1; 2620 return; 2621 } 2622 if((onIndex - baseIndex) & 1) return; 2623 if(fixIndexBufferKey & 0x00200U) 2624 { 2625 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1; } onOutIndex++; }); 2626 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0; } onOutIndex++; }); 2627 } 2628 else 2629 { 2630 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0; } onOutIndex++; }); 2631 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1; } onOutIndex++; }); 2632 } 2633 } 2634 break; 2635 case 0x03U: 2636 { 2637 auto tmpIndex0 = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex + 0, foundRestart, indexThatRestartedFirst); 2638 auto tmpIndex1 = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex + 1, foundRestart, indexThatRestartedFirst); 2639 if(foundRestart) 2640 { 2641 baseIndex = indexThatRestartedFirst + 1; 2642 return; 2643 } 2644 if(fixIndexBufferKey & 0x00200U) 2645 { 2646 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1; } onOutIndex++; }); 2647 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0; } onOutIndex++; }); 2648 } 2649 else 2650 { 2651 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0; } onOutIndex++; }); 2652 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1; } onOutIndex++; }); 2653 } 2654 } 2655 break; 2656 case 0x04U: 2657 { 2658 auto tmpIndex0 = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex + 0, foundRestart, indexThatRestartedFirst); 2659 auto tmpIndex1 = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex + 1, foundRestart, indexThatRestartedFirst); 2660 auto tmpIndex2 = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex + 2, foundRestart, indexThatRestartedFirst); 2661 if(foundRestart) 2662 { 2663 baseIndex = indexThatRestartedFirst + 1; 2664 return; 2665 } 2666 if(((onIndex - baseIndex) % 3) != 0) return; 2667 if(fixIndexBufferKey & 0x00200U) 2668 { 2669 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2; } onOutIndex++; }); 2670 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0; } onOutIndex++; }); 2671 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1; } onOutIndex++; }); 2672 } 2673 else 2674 { 2675 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0; } onOutIndex++; }); 2676 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1; } onOutIndex++; }); 2677 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2; } onOutIndex++; }); 2678 } 2679 } 2680 break; 2681 case 0x05U: 2682 { 2683 uint isOdd = ((onIndex - baseIndex) & 1); 2684 auto tmpIndex0 = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex + 0 + isOdd, foundRestart, indexThatRestartedFirst); 2685 auto tmpIndex1 = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex + 1 - isOdd, foundRestart, indexThatRestartedFirst); 2686 auto tmpIndex2 = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex + 2, foundRestart, indexThatRestartedFirst); 2687 if(foundRestart) 2688 { 2689 baseIndex = indexThatRestartedFirst + 1; 2690 return; 2691 } 2692 if(fixIndexBufferKey & 0x00200U) 2693 { 2694 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2; } onOutIndex++; }); 2695 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0; } onOutIndex++; }); 2696 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1; } onOutIndex++; }); 2697 } 2698 else 2699 { 2700 if(isOdd) 2701 { 2702 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1; } onOutIndex++; }); 2703 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2; } onOutIndex++; }); 2704 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0; } onOutIndex++; }); 2705 } 2706 else 2707 { 2708 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0; } onOutIndex++; }); 2709 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1; } onOutIndex++; }); 2710 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2; } onOutIndex++; }); 2711 } 2712 } 2713 assert(onOutIndex <= (onIndex + 1) * 3); 2714 assert(onOutIndex <= (indexCount - 2) * 3); 2715 } 2716 break; 2717 } 2718 } 2719 kernel void fixIndexBuffer( 2720 const device ushort *indexBufferUint16 [[ buffer(0), function_constant(indexBufferIsUint16) ]], 2721 const device uint *indexBufferUint32 [[ buffer(0), function_constant(indexBufferIsUint32) ]], 2722 device ushort *outIndexBufferUint16 [[ buffer(1), function_constant(outIndexBufferIsUint16) ]], 2723 device uint *outIndexBufferUint32 [[ buffer(1), function_constant(outIndexBufferIsUint32) ]], 2724 constant uint &indexCount [[ buffer(2) ]], 2725 constant uint &primCount [[ buffer(3) ]], 2726 uint prim [[thread_position_in_grid]]) 2727 { 2728 constexpr uint restartIndex = 0xFFFFFFFF; 2729 uint baseIndex = 0; 2730 uint onIndex = onIndex; 2731 uint onOutIndex = onOutIndex; 2732 if(prim < primCount) 2733 { 2734 switch(fixIndexBufferMode) 2735 { 2736 case 0x00U: 2737 onIndex = prim; 2738 onOutIndex = prim; 2739 break; 2740 case 0x01U: 2741 onIndex = prim * 2; 2742 onOutIndex = prim * 2; 2743 break; 2744 case 0x03U: 2745 onIndex = prim; 2746 onOutIndex = prim * 2; 2747 break; 2748 case 0x04U: 2749 onIndex = prim * 3; 2750 onOutIndex = prim * 3; 2751 break; 2752 case 0x05U: 2753 onIndex = prim; 2754 onOutIndex = prim * 3; 2755 break; 2756 } 2757 outputPrimitive(indexBufferUint16, indexBufferUint32, outIndexBufferUint16, outIndexBufferUint32, restartIndex, indexCount, baseIndex, onIndex, onOutIndex); 2758 } 2759 } 2760 static inline void generatePrimitive( 2761 device ushort *outIndexBufferUint16, 2762 device uint *outIndexBufferUint32, 2763 const uint firstVertex, 2764 const uint indexCount, 2765 thread uint &baseIndex, 2766 uint onIndex, 2767 uint primCount, 2768 thread uint &onOutIndex 2769 ) 2770 { 2771 if(baseIndex > onIndex) return; 2772 switch(fixIndexBufferMode) 2773 { 2774 case 0x00U: 2775 { 2776 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = onIndex + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = onIndex + firstVertex; } onOutIndex++; }); 2777 } 2778 break; 2779 case 0x01U: 2780 { 2781 auto tmpIndex0 = onIndex + 0; 2782 auto tmpIndex1 = onIndex + 1; 2783 if(fixIndexBufferKey & 0x00200U) 2784 { 2785 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; }); 2786 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; }); 2787 } 2788 else 2789 { 2790 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; }); 2791 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; }); 2792 } 2793 } 2794 break; 2795 case 0x02U: 2796 { 2797 auto tmpIndex0 = onIndex + 0; 2798 auto tmpIndex1 = (onIndex + 1) % primCount; 2799 if(fixIndexBufferKey & 0x00200U) 2800 { 2801 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; }); 2802 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; }); 2803 } 2804 else 2805 { 2806 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; }); 2807 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; }); 2808 } 2809 } 2810 break; 2811 case 0x03U: 2812 { 2813 auto tmpIndex0 = onIndex + 0; 2814 auto tmpIndex1 = onIndex + 1; 2815 if(fixIndexBufferKey & 0x00200U) 2816 { 2817 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; }); 2818 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; }); 2819 } 2820 else 2821 { 2822 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; }); 2823 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; }); 2824 } 2825 } 2826 break; 2827 case 0x04U: 2828 { 2829 auto tmpIndex0 = onIndex + 0; 2830 auto tmpIndex1 = onIndex + 1; 2831 auto tmpIndex2 = onIndex + 2; 2832 if(fixIndexBufferKey & 0x00200U) 2833 { 2834 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2 + firstVertex; } onOutIndex++; }); 2835 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; }); 2836 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; }); 2837 } 2838 else 2839 { 2840 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; }); 2841 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; }); 2842 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2 + firstVertex; } onOutIndex++; }); 2843 } 2844 } 2845 break; 2846 case 0x05U: 2847 { 2848 uint isOdd = ((onIndex - baseIndex) & 1); 2849 auto tmpIndex0 = onIndex + 0 + isOdd; 2850 auto tmpIndex1 = onIndex + 1 - isOdd; 2851 auto tmpIndex2 = onIndex + 2; 2852 if(fixIndexBufferKey & 0x00200U) 2853 { 2854 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2 + firstVertex; } onOutIndex++; }); 2855 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; }); 2856 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; }); 2857 } 2858 else 2859 { 2860 if(isOdd) 2861 { 2862 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; }); 2863 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2 + firstVertex; } onOutIndex++; }); 2864 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; }); 2865 } 2866 else 2867 { 2868 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; }); 2869 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; }); 2870 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2 + firstVertex; } onOutIndex++; }); 2871 } 2872 } 2873 assert(onOutIndex <= (onIndex + 1) * 3); 2874 assert(onOutIndex <= (indexCount - 2) * 3); 2875 break; 2876 } 2877 case 0x06U: 2878 { 2879 auto tmpIndex0 = 0; 2880 auto tmpIndex1 = onIndex + 1; 2881 auto tmpIndex2 = onIndex + 2; 2882 if(fixIndexBufferKey & 0x00200U) 2883 { 2884 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2 + firstVertex; } onOutIndex++; }); 2885 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; }); 2886 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; }); 2887 } 2888 else 2889 { 2890 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; }); 2891 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2 + firstVertex; } onOutIndex++; }); 2892 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; }); 2893 } 2894 } 2895 break; 2896 } 2897 } 2898 kernel void genIndexBuffer( 2899 device ushort *outIndexBufferUint16 [[ buffer(1), function_constant(outIndexBufferIsUint16) ]], 2900 device uint *outIndexBufferUint32 [[ buffer(1), function_constant(outIndexBufferIsUint32) ]], 2901 constant uint &indexCount [[ buffer(2) ]], 2902 constant uint &primCount [[ buffer(3) ]], 2903 constant uint &firstVertex [[ buffer(4) ]], 2904 uint prim [[thread_position_in_grid]]) 2905 { 2906 uint baseIndex = 0; 2907 uint onIndex = onIndex; 2908 uint onOutIndex = onOutIndex; 2909 if(prim < primCount) 2910 { 2911 switch(fixIndexBufferMode) 2912 { 2913 case 0x00U: 2914 onIndex = prim; 2915 onOutIndex = prim; 2916 break; 2917 case 0x01U: 2918 onIndex = prim * 2; 2919 onOutIndex = prim * 2; 2920 break; 2921 case 0x03U: 2922 onIndex = prim; 2923 onOutIndex = prim * 2; 2924 break; 2925 case 0x02U: 2926 onIndex = prim; 2927 onOutIndex = prim * 2; 2928 break; 2929 case 0x04U: 2930 onIndex = prim * 3; 2931 onOutIndex = prim * 3; 2932 break; 2933 case 0x05U: 2934 onIndex = prim; 2935 onOutIndex = prim * 3; 2936 break; 2937 case 0x06U: 2938 onIndex = prim; 2939 onOutIndex = prim * 3; 2940 break; 2941 } 2942 generatePrimitive(outIndexBufferUint16, outIndexBufferUint32, firstVertex, indexCount, baseIndex, onIndex, primCount, onOutIndex); 2943 } 2944 } 2945 2946 2947 )"; 2948