1 // GENERATED FILE - DO NOT EDIT. 2 // Generated by gen_mtl_internal_shaders.py 3 // 4 // Copyright 2020 The ANGLE Project Authors. All rights reserved. 5 // Use of this source code is governed by a BSD-style license that can be 6 // found in the LICENSE file. 7 // 8 9 // C++ string version of combined Metal default shaders. 10 11 static char gDefaultMetallibSrc[] = R"( 12 # 1 "temp_master_source.metal" 13 # 1 "<built-in>" 1 14 # 1 "<built-in>" 3 15 # 483 "<built-in>" 3 16 # 1 "<command line>" 1 17 # 1 "<built-in>" 2 18 # 1 "temp_master_source.metal" 2 19 # 1 "./blit.metal" 1 20 21 22 23 24 25 26 27 # 1 "./common.h" 1 28 # 13 "./common.h" 29 # include <simd/simd.h> 30 # include <metal_stdlib> 31 32 33 # 1 "./constants.h" 1 34 # 11 "./constants.h" 35 namespace rx 36 { 37 namespace mtl_shader 38 { 39 40 enum 41 { 42 kTextureType2D = 0, 43 kTextureType2DMultisample = 1, 44 kTextureType2DArray = 2, 45 kTextureTypeCube = 3, 46 kTextureType3D = 4, 47 kTextureTypeCount = 5, 48 }; 49 50 51 52 53 } 54 } 55 # 18 "./common.h" 2 56 57 58 59 60 61 62 63 using namespace metal; 64 65 66 67 constant uint32_t kNumColorOutputs [[function_constant(0)]]; 68 constant bool kColorOutputAvailable0 = kNumColorOutputs > 0; 69 constant bool kColorOutputAvailable1 = kNumColorOutputs > 1; 70 constant bool kColorOutputAvailable2 = kNumColorOutputs > 2; 71 constant bool kColorOutputAvailable3 = kNumColorOutputs > 3; 72 constant bool kColorOutputAvailable4 = kNumColorOutputs > 4; 73 constant bool kColorOutputAvailable5 = kNumColorOutputs > 5; 74 constant bool kColorOutputAvailable6 = kNumColorOutputs > 6; 75 constant bool kColorOutputAvailable7 = kNumColorOutputs > 7; 76 77 namespace rx 78 { 79 namespace mtl_shader 80 { 81 82 83 constant float2 gCorners[3] = {float2(-1.0f, -1.0f), float2(3.0f, -1.0f), float2(-1.0f, 3.0f)}; 84 85 template <typename T> 86 struct MultipleColorOutputs 87 { 88 vec<T, 4> color0 [[color(0), function_constant(kColorOutputAvailable0)]]; 89 vec<T, 4> color1 [[color(1), function_constant(kColorOutputAvailable1)]]; 90 vec<T, 4> color2 [[color(2), function_constant(kColorOutputAvailable2)]]; 91 vec<T, 4> color3 [[color(3), function_constant(kColorOutputAvailable3)]]; 92 vec<T, 4> color4 [[color(4), function_constant(kColorOutputAvailable4)]]; 93 vec<T, 4> color5 [[color(5), function_constant(kColorOutputAvailable5)]]; 94 vec<T, 4> color6 [[color(6), function_constant(kColorOutputAvailable6)]]; 95 vec<T, 4> color7 [[color(7), function_constant(kColorOutputAvailable7)]]; 96 }; 97 # 69 "./common.h" 98 template <typename T> 99 static inline MultipleColorOutputs<T> toMultipleColorOutputs(vec<T, 4> color) 100 { 101 MultipleColorOutputs<T> re; 102 103 do { if (kColorOutputAvailable0) { re.color0 = color; } } while (0); 104 do { if (kColorOutputAvailable1) { re.color1 = color; } } while (0); 105 do { if (kColorOutputAvailable2) { re.color2 = color; } } while (0); 106 do { if (kColorOutputAvailable3) { re.color3 = color; } } while (0); 107 do { if (kColorOutputAvailable4) { re.color4 = color; } } while (0); 108 do { if (kColorOutputAvailable5) { re.color5 = color; } } while (0); 109 do { if (kColorOutputAvailable6) { re.color6 = color; } } while (0); 110 do { if (kColorOutputAvailable7) { re.color7 = color; } } while (0); 111 112 return re; 113 } 114 115 static inline float3 cubeTexcoords(float2 texcoords, int face) 116 { 117 texcoords = 2.0 * texcoords - 1.0; 118 switch (face) 119 { 120 case 0: 121 return float3(1.0, -texcoords.y, -texcoords.x); 122 case 1: 123 return float3(-1.0, -texcoords.y, texcoords.x); 124 case 2: 125 return float3(texcoords.x, 1.0, texcoords.y); 126 case 3: 127 return float3(texcoords.x, -1.0, -texcoords.y); 128 case 4: 129 return float3(texcoords.x, -texcoords.y, 1.0); 130 case 5: 131 return float3(-texcoords.x, -texcoords.y, -1.0); 132 } 133 return float3(texcoords, 0); 134 } 135 136 template <typename T> 137 static inline vec<T, 4> resolveTextureMS(texture2d_ms<T> srcTexture, uint2 coords) 138 { 139 uint samples = srcTexture.get_num_samples(); 140 141 vec<T, 4> output(0); 142 143 for (uint sample = 0; sample < samples; ++sample) 144 { 145 output += srcTexture.read(coords, sample); 146 } 147 148 output = output / samples; 149 150 return output; 151 } 152 153 static inline float4 sRGBtoLinear(float4 color) 154 { 155 float3 linear1 = color.rgb / 12.92; 156 float3 linear2 = powr((color.rgb + float3(0.055)) / 1.055, 2.4); 157 float3 factor = float3(color.rgb <= float3(0.04045)); 158 float4 linear = float4(factor * linear1 + float3(1.0 - factor) * linear2, color.a); 159 160 return linear; 161 } 162 163 static inline float linearToSRGB(float color) 164 { 165 if (color <= 0.0f) 166 return 0.0f; 167 if (color < 0.0031308f) 168 return 12.92f * color; 169 if (color < 1.0f) 170 return 1.055f * powr(color, 0.41666f) - 0.055f; 171 return 1.0f; 172 } 173 174 static inline float4 linearToSRGB(float4 color) 175 { 176 return float4(linearToSRGB(color.r), linearToSRGB(color.g), linearToSRGB(color.b), color.a); 177 } 178 179 template <typename Short> 180 static inline Short bytesToShort(constant uchar *input, uint offset) 181 { 182 Short inputLo = input[offset]; 183 Short inputHi = input[offset + 1]; 184 185 return inputLo | (inputHi << 8); 186 } 187 188 template <typename Int> 189 static inline Int bytesToInt(constant uchar *input, uint offset) 190 { 191 Int input0 = input[offset]; 192 Int input1 = input[offset + 1]; 193 Int input2 = input[offset + 2]; 194 Int input3 = input[offset + 3]; 195 196 return input0 | (input1 << 8) | (input2 << 16) | (input3 << 24); 197 } 198 199 template <typename Short> 200 static inline void shortToBytes(Short val, uint offset, device uchar *output) 201 { 202 ushort valUnsigned = as_type<ushort>(val); 203 output[offset] = valUnsigned & 0xff; 204 output[offset + 1] = (valUnsigned >> 8) & 0xff; 205 } 206 207 template <typename Int> 208 static inline void intToBytes(Int val, uint offset, device uchar *output) 209 { 210 uint valUnsigned = as_type<uint>(val); 211 output[offset] = valUnsigned & 0xff; 212 output[offset + 1] = (valUnsigned >> 8) & 0xff; 213 output[offset + 2] = (valUnsigned >> 16) & 0xff; 214 output[offset + 3] = (valUnsigned >> 24) & 0xff; 215 } 216 217 static inline void floatToBytes(float val, uint offset, device uchar *output) 218 { 219 intToBytes(as_type<uint>(val), offset, output); 220 } 221 222 static inline void int24bitToBytes(uint val, uint offset, device uchar *output) 223 { 224 output[offset] = val & 0xff; 225 output[offset + 1] = (val >> 8) & 0xff; 226 output[offset + 2] = (val >> 16) & 0xff; 227 } 228 229 template <unsigned int inputBitCount, unsigned int inputBitStart, typename T> 230 static inline T getShiftedData(T input) 231 { 232 static_assert(inputBitCount + inputBitStart <= (sizeof(T) * 8), 233 "T must have at least as many bits as inputBitCount + inputBitStart."); 234 const T mask = (1 << inputBitCount) - 1; 235 return (input >> inputBitStart) & mask; 236 } 237 238 template <unsigned int inputBitCount, unsigned int inputBitStart, typename T> 239 static inline T shiftData(T input) 240 { 241 static_assert(inputBitCount + inputBitStart <= (sizeof(T) * 8), 242 "T must have at least as many bits as inputBitCount + inputBitStart."); 243 const T mask = (1 << inputBitCount) - 1; 244 return (input & mask) << inputBitStart; 245 } 246 247 template <unsigned int inputBitCount, typename T> 248 static inline float normalizedToFloat(T input) 249 { 250 static_assert(inputBitCount <= (sizeof(T) * 8), 251 "T must have more bits than or same bits as inputBitCount."); 252 static_assert(inputBitCount <= 23, "Only single precision is supported"); 253 254 constexpr float inverseMax = 1.0f / ((1 << inputBitCount) - 1); 255 return input * inverseMax; 256 } 257 258 template <typename T> 259 static inline float normalizedToFloat(T input) 260 { 261 return normalizedToFloat<sizeof(T) * 8, T>(input); 262 } 263 264 template <> 265 inline float normalizedToFloat(short input) 266 { 267 constexpr float inverseMax = 1.0f / 0x7fff; 268 return static_cast<float>(input) * inverseMax; 269 } 270 271 template <> 272 inline float normalizedToFloat(int input) 273 { 274 constexpr float inverseMax = 1.0f / 0x7fffffff; 275 return static_cast<float>(input) * inverseMax; 276 } 277 278 template <> 279 inline float normalizedToFloat(uint input) 280 { 281 constexpr float inverseMax = 1.0f / 0xffffffff; 282 return static_cast<float>(input) * inverseMax; 283 } 284 285 template <unsigned int outputBitCount, typename T> 286 static inline T floatToNormalized(float input) 287 { 288 static_assert(outputBitCount <= (sizeof(T) * 8), 289 "T must have more bits than or same bits as inputBitCount."); 290 static_assert(outputBitCount <= 23, "Only single precision is supported"); 291 292 return static_cast<T>(((1 << outputBitCount) - 1) * input + 0.5f); 293 } 294 295 template <typename T> 296 static inline T floatToNormalized(float input) 297 { 298 return floatToNormalized<sizeof(T) * 8, T>(input); 299 } 300 301 } 302 } 303 # 9 "./blit.metal" 2 304 305 using namespace rx::mtl_shader; 306 307 308 constant bool kPremultiplyAlpha [[function_constant(1)]]; 309 constant bool kUnmultiplyAlpha [[function_constant(2)]]; 310 constant bool kTransformLinearToSrgb [[function_constant(3)]]; 311 constant int kSourceTextureType [[function_constant(4)]]; 312 constant int kSourceTexture2Type [[function_constant(5)]]; 313 314 constant bool kSourceTextureType2D = kSourceTextureType == kTextureType2D; 315 constant bool kSourceTextureType2DArray = kSourceTextureType == kTextureType2DArray; 316 constant bool kSourceTextureType2DMS = kSourceTextureType == kTextureType2DMultisample; 317 constant bool kSourceTextureTypeCube = kSourceTextureType == kTextureTypeCube; 318 constant bool kSourceTextureType3D = kSourceTextureType == kTextureType3D; 319 320 constant bool kSourceTexture2Type2D = kSourceTexture2Type == kTextureType2D; 321 constant bool kSourceTexture2Type2DArray = kSourceTexture2Type == kTextureType2DArray; 322 constant bool kSourceTexture2Type2DMS = kSourceTexture2Type == kTextureType2DMultisample; 323 constant bool kSourceTexture2TypeCube = kSourceTexture2Type == kTextureTypeCube; 324 325 struct BlitParams 326 { 327 328 float4 srcTexCoords; 329 int srcLevel; 330 int srcLayer; 331 bool dstLuminance; 332 uint8_t padding[7]; 333 }; 334 335 struct BlitVSOut 336 { 337 float4 position [[position]]; 338 float2 texCoords [[center_no_perspective, user(locn1)]]; 339 }; 340 341 vertex BlitVSOut blitVS(unsigned int vid [[vertex_id]], constant BlitParams &options [[buffer(0)]]) 342 { 343 BlitVSOut output; 344 output.position.xy = select(float2(-1.0f), float2(1.0f), bool2(vid & uint2(2, 1))); 345 output.position.zw = float2(0.0, 1.0); 346 output.texCoords = select(options.srcTexCoords.xy, options.srcTexCoords.zw, bool2(vid & uint2(2, 1))); 347 348 return output; 349 } 350 351 template <typename SrcTexture2d> 352 static uint2 getImageCoords(SrcTexture2d srcTexture, float2 texCoords) 353 { 354 uint2 dimens(srcTexture.get_width(), srcTexture.get_height()); 355 uint2 coords = uint2(texCoords * float2(dimens)); 356 357 return coords; 358 } 359 360 template <typename T> 361 static inline vec<T, 4> blitSampleTextureMS(texture2d_ms<T> srcTexture, float2 texCoords) 362 { 363 uint2 coords = getImageCoords(srcTexture, texCoords); 364 return resolveTextureMS(srcTexture, coords); 365 } 366 367 template <typename T> 368 static inline vec<T, 4> blitSampleTexture3D(texture3d<T> srcTexture, 369 sampler textureSampler, 370 float2 texCoords, 371 constant BlitParams &options) 372 { 373 uint depth = srcTexture.get_depth(options.srcLevel); 374 float zCoord = (float(options.srcLayer) + 0.5) / float(depth); 375 376 return srcTexture.sample(textureSampler, float3(texCoords, zCoord), level(options.srcLevel)); 377 } 378 # 101 "./blit.metal" 379 template <typename T> 380 static inline vec<T, 4> blitReadTexture(BlitVSOut input [[stage_in]], texture2d<T> srcTexture2d [[texture(0), function_constant(kSourceTextureType2D)]], texture2d_array<T> srcTexture2dArray [[texture(0), function_constant(kSourceTextureType2DArray)]], texture2d_ms<T> srcTexture2dMS [[texture(0), function_constant(kSourceTextureType2DMS)]], texturecube<T> srcTextureCube [[texture(0), function_constant(kSourceTextureTypeCube)]], texture3d<T> srcTexture3d [[texture(0), function_constant(kSourceTextureType3D)]], sampler textureSampler [[sampler(0)]], constant BlitParams &options [[buffer(0)]]) 381 { 382 vec<T, 4> output; 383 384 switch (kSourceTextureType) 385 { 386 case kTextureType2D: 387 output = srcTexture2d.sample(textureSampler, input.texCoords, level(options.srcLevel)); 388 break; 389 case kTextureType2DArray: 390 output = srcTexture2dArray.sample(textureSampler, input.texCoords, options.srcLayer, 391 level(options.srcLevel)); 392 break; 393 case kTextureType2DMultisample: 394 output = blitSampleTextureMS(srcTexture2dMS, input.texCoords); 395 break; 396 case kTextureTypeCube: 397 output = srcTextureCube.sample(textureSampler, 398 cubeTexcoords(input.texCoords, options.srcLayer), 399 level(options.srcLevel)); 400 break; 401 case kTextureType3D: 402 output = blitSampleTexture3D(srcTexture3d, textureSampler, input.texCoords, options); 403 break; 404 } 405 406 if (kTransformLinearToSrgb) { 407 output.x = linearToSRGB(output.x); 408 output.y = linearToSRGB(output.y); 409 output.z = linearToSRGB(output.z); 410 } 411 if (kUnmultiplyAlpha) 412 { 413 if (output.a != 0.0) 414 { 415 output.xyz /= output.a; 416 } 417 } 418 if (kPremultiplyAlpha) 419 { 420 output.xyz *= output.a; 421 } 422 423 if (options.dstLuminance) 424 { 425 output.g = output.b = output.r; 426 } 427 428 return output; 429 } 430 431 template <typename T> 432 static inline MultipleColorOutputs<T> blitFS(BlitVSOut input [[stage_in]], texture2d<T> srcTexture2d [[texture(0), function_constant(kSourceTextureType2D)]], texture2d_array<T> srcTexture2dArray [[texture(0), function_constant(kSourceTextureType2DArray)]], texture2d_ms<T> srcTexture2dMS [[texture(0), function_constant(kSourceTextureType2DMS)]], texturecube<T> srcTextureCube [[texture(0), function_constant(kSourceTextureTypeCube)]], texture3d<T> srcTexture3d [[texture(0), function_constant(kSourceTextureType3D)]], sampler textureSampler [[sampler(0)]], constant BlitParams &options [[buffer(0)]]) 433 { 434 vec<T, 4> output = blitReadTexture(input, srcTexture2d, srcTexture2dArray, srcTexture2dMS, srcTextureCube, srcTexture3d, textureSampler, options); 435 436 return toMultipleColorOutputs(output); 437 } 438 439 fragment MultipleColorOutputs<float> blitFloatFS(BlitVSOut input [[stage_in]], texture2d<float> srcTexture2d [[texture(0), function_constant(kSourceTextureType2D)]], texture2d_array<float> srcTexture2dArray [[texture(0), function_constant(kSourceTextureType2DArray)]], texture2d_ms<float> srcTexture2dMS [[texture(0), function_constant(kSourceTextureType2DMS)]], texturecube<float> srcTextureCube [[texture(0), function_constant(kSourceTextureTypeCube)]], texture3d<float> srcTexture3d [[texture(0), function_constant(kSourceTextureType3D)]], sampler textureSampler [[sampler(0)]], constant BlitParams &options [[buffer(0)]]) 440 { 441 return blitFS(input, srcTexture2d, srcTexture2dArray, srcTexture2dMS, srcTextureCube, srcTexture3d, textureSampler, options); 442 } 443 fragment MultipleColorOutputs<int> blitIntFS(BlitVSOut input [[stage_in]], texture2d<int> srcTexture2d [[texture(0), function_constant(kSourceTextureType2D)]], texture2d_array<int> srcTexture2dArray [[texture(0), function_constant(kSourceTextureType2DArray)]], texture2d_ms<int> srcTexture2dMS [[texture(0), function_constant(kSourceTextureType2DMS)]], texturecube<int> srcTextureCube [[texture(0), function_constant(kSourceTextureTypeCube)]], texture3d<int> srcTexture3d [[texture(0), function_constant(kSourceTextureType3D)]], sampler textureSampler [[sampler(0)]], constant BlitParams &options [[buffer(0)]]) 444 { 445 return blitFS(input, srcTexture2d, srcTexture2dArray, srcTexture2dMS, srcTextureCube, srcTexture3d, textureSampler, options); 446 } 447 fragment MultipleColorOutputs<uint> blitUIntFS(BlitVSOut input [[stage_in]], texture2d<uint> srcTexture2d [[texture(0), function_constant(kSourceTextureType2D)]], texture2d_array<uint> srcTexture2dArray [[texture(0), function_constant(kSourceTextureType2DArray)]], texture2d_ms<uint> srcTexture2dMS [[texture(0), function_constant(kSourceTextureType2DMS)]], texturecube<uint> srcTextureCube [[texture(0), function_constant(kSourceTextureTypeCube)]], texture3d<uint> srcTexture3d [[texture(0), function_constant(kSourceTextureType3D)]], sampler textureSampler [[sampler(0)]], constant BlitParams &options [[buffer(0)]]) 448 { 449 return blitFS(input, srcTexture2d, srcTexture2dArray, srcTexture2dMS, srcTextureCube, srcTexture3d, textureSampler, options); 450 } 451 452 fragment MultipleColorOutputs<uint> copyTextureFloatToUIntFS(BlitVSOut input [[stage_in]], texture2d<float> srcTexture2d [[texture(0), function_constant(kSourceTextureType2D)]], texture2d_array<float> srcTexture2dArray [[texture(0), function_constant(kSourceTextureType2DArray)]], texture2d_ms<float> srcTexture2dMS [[texture(0), function_constant(kSourceTextureType2DMS)]], texturecube<float> srcTextureCube [[texture(0), function_constant(kSourceTextureTypeCube)]], texture3d<float> srcTexture3d [[texture(0), function_constant(kSourceTextureType3D)]], sampler textureSampler [[sampler(0)]], constant BlitParams &options [[buffer(0)]]) 453 { 454 float4 inputColor = blitReadTexture<>(input, srcTexture2d, srcTexture2dArray, srcTexture2dMS, srcTextureCube, srcTexture3d, textureSampler, options); 455 uint4 output = uint4(inputColor * float4(255.0)); 456 457 return toMultipleColorOutputs(output); 458 } 459 460 461 struct FragmentDepthOut 462 { 463 float depth [[depth(any)]]; 464 }; 465 466 static inline float sampleDepth( 467 texture2d<float> srcTexture2d [[function_constant(kSourceTextureType2D)]], 468 texture2d_array<float> srcTexture2dArray [[function_constant(kSourceTextureType2DArray)]], 469 texture2d_ms<float> srcTexture2dMS [[function_constant(kSourceTextureType2DMS)]], 470 texturecube<float> srcTextureCube [[function_constant(kSourceTextureTypeCube)]], 471 float2 texCoords, 472 constant BlitParams &options) 473 { 474 float4 output; 475 476 constexpr sampler textureSampler(mag_filter::nearest, min_filter::nearest); 477 478 switch (kSourceTextureType) 479 { 480 case kTextureType2D: 481 output = srcTexture2d.sample(textureSampler, texCoords, level(options.srcLevel)); 482 break; 483 case kTextureType2DArray: 484 output = srcTexture2dArray.sample(textureSampler, texCoords, options.srcLayer, 485 level(options.srcLevel)); 486 break; 487 case kTextureType2DMultisample: 488 489 output = srcTexture2dMS.read(getImageCoords(srcTexture2dMS, texCoords), 0); 490 break; 491 case kTextureTypeCube: 492 output = 493 srcTextureCube.sample(textureSampler, cubeTexcoords(texCoords, options.srcLayer), 494 level(options.srcLevel)); 495 break; 496 } 497 498 return output.r; 499 } 500 501 fragment FragmentDepthOut blitDepthFS(BlitVSOut input [[stage_in]], 502 texture2d<float> srcTexture2d 503 [[texture(0), function_constant(kSourceTextureType2D)]], 504 texture2d_array<float> srcTexture2dArray 505 [[texture(0), function_constant(kSourceTextureType2DArray)]], 506 texture2d_ms<float> srcTexture2dMS 507 [[texture(0), function_constant(kSourceTextureType2DMS)]], 508 texturecube<float> srcTextureCube 509 [[texture(0), function_constant(kSourceTextureTypeCube)]], 510 constant BlitParams &options [[buffer(0)]]) 511 { 512 FragmentDepthOut re; 513 514 re.depth = sampleDepth(srcTexture2d, srcTexture2dArray, srcTexture2dMS, srcTextureCube, 515 input.texCoords, options); 516 517 return re; 518 } 519 520 static inline uint32_t sampleStencil( 521 texture2d<uint32_t> srcTexture2d [[function_constant(kSourceTexture2Type2D)]], 522 texture2d_array<uint32_t> srcTexture2dArray [[function_constant(kSourceTexture2Type2DArray)]], 523 texture2d_ms<uint32_t> srcTexture2dMS [[function_constant(kSourceTexture2Type2DMS)]], 524 texturecube<uint32_t> srcTextureCube [[function_constant(kSourceTexture2TypeCube)]], 525 float2 texCoords, 526 int srcLevel, 527 int srcLayer) 528 { 529 uint4 output; 530 constexpr sampler textureSampler(mag_filter::nearest, min_filter::nearest); 531 532 switch (kSourceTexture2Type) 533 { 534 case kTextureType2D: 535 output = srcTexture2d.sample(textureSampler, texCoords, level(srcLevel)); 536 break; 537 case kTextureType2DArray: 538 output = srcTexture2dArray.sample(textureSampler, texCoords, srcLayer, level(srcLevel)); 539 break; 540 case kTextureType2DMultisample: 541 542 output = srcTexture2dMS.read(getImageCoords(srcTexture2dMS, texCoords), 0); 543 break; 544 case kTextureTypeCube: 545 output = srcTextureCube.sample(textureSampler, cubeTexcoords(texCoords, srcLayer), 546 level(srcLevel)); 547 break; 548 } 549 550 return output.r; 551 } 552 553 554 struct BlitStencilToBufferParams 555 { 556 float2 srcStartTexCoords; 557 float2 srcTexCoordSteps; 558 int srcLevel; 559 int srcLayer; 560 561 uint2 dstSize; 562 uint dstBufferRowPitch; 563 564 bool resolveMS; 565 }; 566 567 kernel void blitStencilToBufferCS(ushort2 gIndices [[thread_position_in_grid]], 568 texture2d<uint32_t> srcTexture2d 569 [[texture(1), function_constant(kSourceTexture2Type2D)]], 570 texture2d_array<uint32_t> srcTexture2dArray 571 [[texture(1), function_constant(kSourceTexture2Type2DArray)]], 572 texture2d_ms<uint32_t> srcTexture2dMS 573 [[texture(1), function_constant(kSourceTexture2Type2DMS)]], 574 texturecube<uint32_t> srcTextureCube 575 [[texture(1), function_constant(kSourceTexture2TypeCube)]], 576 constant BlitStencilToBufferParams &options [[buffer(0)]], 577 device uchar *buffer [[buffer(1)]]) 578 { 579 if (gIndices.x >= options.dstSize.x || gIndices.y >= options.dstSize.y) 580 { 581 return; 582 } 583 584 float2 srcTexCoords = options.srcStartTexCoords + float2(gIndices) * options.srcTexCoordSteps; 585 586 if (kSourceTexture2Type == kTextureType2DMultisample && !options.resolveMS) 587 { 588 uint samples = srcTexture2dMS.get_num_samples(); 589 uint2 imageCoords = getImageCoords(srcTexture2dMS, srcTexCoords); 590 uint bufferOffset = options.dstBufferRowPitch * gIndices.y + samples * gIndices.x; 591 592 for (uint sample = 0; sample < samples; ++sample) 593 { 594 uint stencilPerSample = srcTexture2dMS.read(imageCoords, sample).r; 595 buffer[bufferOffset + sample] = static_cast<uchar>(stencilPerSample); 596 } 597 } 598 else 599 { 600 uint32_t stencil = 601 sampleStencil(srcTexture2d, srcTexture2dArray, srcTexture2dMS, srcTextureCube, 602 srcTexCoords, options.srcLevel, options.srcLayer); 603 604 buffer[options.dstBufferRowPitch * gIndices.y + gIndices.x] = static_cast<uchar>(stencil); 605 } 606 } 607 608 609 #if __METAL_VERSION__ >= 210 610 611 struct FragmentStencilOut 612 { 613 uint32_t stencil [[stencil]]; 614 }; 615 616 struct FragmentDepthStencilOut 617 { 618 float depth [[depth(any)]]; 619 uint32_t stencil [[stencil]]; 620 }; 621 622 fragment FragmentStencilOut blitStencilFS( 623 BlitVSOut input [[stage_in]], 624 texture2d<uint32_t> srcTexture2d [[texture(1), function_constant(kSourceTexture2Type2D)]], 625 texture2d_array<uint32_t> srcTexture2dArray 626 [[texture(1), function_constant(kSourceTexture2Type2DArray)]], 627 texture2d_ms<uint32_t> srcTexture2dMS 628 [[texture(1), function_constant(kSourceTexture2Type2DMS)]], 629 texturecube<uint32_t> srcTextureCube [[texture(1), function_constant(kSourceTexture2TypeCube)]], 630 constant BlitParams &options [[buffer(0)]]) 631 { 632 FragmentStencilOut re; 633 634 re.stencil = sampleStencil(srcTexture2d, srcTexture2dArray, srcTexture2dMS, srcTextureCube, 635 input.texCoords, options.srcLevel, options.srcLayer); 636 637 return re; 638 } 639 640 fragment FragmentDepthStencilOut blitDepthStencilFS( 641 BlitVSOut input [[stage_in]], 642 643 texture2d<float> srcDepthTexture2d [[texture(0), function_constant(kSourceTextureType2D)]], 644 texture2d_array<float> srcDepthTexture2dArray 645 [[texture(0), function_constant(kSourceTextureType2DArray)]], 646 texture2d_ms<float> srcDepthTexture2dMS 647 [[texture(0), function_constant(kSourceTextureType2DMS)]], 648 texturecube<float> srcDepthTextureCube 649 [[texture(0), function_constant(kSourceTextureTypeCube)]], 650 651 652 texture2d<uint32_t> srcStencilTexture2d 653 [[texture(1), function_constant(kSourceTexture2Type2D)]], 654 texture2d_array<uint32_t> srcStencilTexture2dArray 655 [[texture(1), function_constant(kSourceTexture2Type2DArray)]], 656 texture2d_ms<uint32_t> srcStencilTexture2dMS 657 [[texture(1), function_constant(kSourceTexture2Type2DMS)]], 658 texturecube<uint32_t> srcStencilTextureCube 659 [[texture(1), function_constant(kSourceTexture2TypeCube)]], 660 661 constant BlitParams &options [[buffer(0)]]) 662 { 663 FragmentDepthStencilOut re; 664 665 re.depth = sampleDepth(srcDepthTexture2d, srcDepthTexture2dArray, srcDepthTexture2dMS, 666 srcDepthTextureCube, input.texCoords, options); 667 re.stencil = 668 sampleStencil(srcStencilTexture2d, srcStencilTexture2dArray, srcStencilTexture2dMS, 669 srcStencilTextureCube, input.texCoords, options.srcLevel, options.srcLayer); 670 return re; 671 } 672 #endif 673 # 2 "temp_master_source.metal" 2 674 # 1 "./clear.metal" 1 675 # 10 "./clear.metal" 676 using namespace rx::mtl_shader; 677 678 struct ClearParams 679 { 680 float4 clearColor; 681 float clearDepth; 682 }; 683 684 vertex float4 clearVS(unsigned int vid [[ vertex_id ]], 685 constant ClearParams &clearParams [[buffer(0)]]) 686 { 687 return float4(gCorners[vid], clearParams.clearDepth, 1.0); 688 } 689 690 fragment MultipleColorOutputs<float> clearFloatFS(constant ClearParams &clearParams [[buffer(0)]]) 691 { 692 return toMultipleColorOutputs(clearParams.clearColor); 693 } 694 695 fragment MultipleColorOutputs<int> clearIntFS(constant ClearParams &clearParams [[buffer(0)]]) 696 { 697 return toMultipleColorOutputs(as_type<int4>(clearParams.clearColor)); 698 } 699 700 fragment MultipleColorOutputs<uint> clearUIntFS(constant ClearParams &clearParams [[buffer(0)]]) 701 { 702 return toMultipleColorOutputs(as_type<uint4>(clearParams.clearColor)); 703 } 704 # 3 "temp_master_source.metal" 2 705 # 1 "./gen_indices.metal" 1 706 707 708 709 710 711 712 713 714 using namespace rx::mtl_shader; 715 716 717 constant bool kSourceBufferAligned[[function_constant(100)]]; 718 constant bool kSourceIndexIsU8[[function_constant(200)]]; 719 constant bool kSourceIndexIsU16[[function_constant(300)]]; 720 constant bool kSourceIndexIsU32[[function_constant(400)]]; 721 constant bool kSourceBufferUnaligned = !kSourceBufferAligned; 722 constant bool kUseSourceBufferU8 = kSourceIndexIsU8 || kSourceBufferUnaligned; 723 constant bool kUseSourceBufferU16 = kSourceIndexIsU16 && kSourceBufferAligned; 724 constant bool kUseSourceBufferU32 = kSourceIndexIsU32 && kSourceBufferAligned; 725 726 struct IndexConversionParams 727 { 728 uint32_t srcOffset; 729 uint32_t indexCount; 730 bool primitiveRestartEnabled; 731 }; 732 733 734 735 inline ushort getIndexAligned(constant ushort *inputAligned, uint offset, uint idx) 736 { 737 return inputAligned[offset / 2 + idx]; 738 } 739 inline uint getIndexAligned(constant uint *inputAligned, uint offset, uint idx) 740 { 741 return inputAligned[offset / 4 + idx]; 742 } 743 inline uchar getIndexAligned(constant uchar *input, uint offset, uint idx) 744 { 745 return input[offset + idx]; 746 } 747 inline ushort getIndexUnalignedU16(constant uchar *input, uint offset, uint idx) 748 { 749 ushort inputLo = input[offset + 2 * idx]; 750 ushort inputHi = input[offset + 2 * idx + 1]; 751 752 return inputLo | (inputHi << 8); 753 } 754 inline uint getIndexUnalignedU32(constant uchar *input, uint offset, uint idx) 755 { 756 uint input0 = input[offset + 4 * idx]; 757 uint input1 = input[offset + 4 * idx + 1]; 758 uint input2 = input[offset + 4 * idx + 2]; 759 uint input3 = input[offset + 4 * idx + 3]; 760 761 return input0 | (input1 << 8) | (input2 << 16) | (input3 << 24); 762 } 763 764 kernel void convertIndexU8ToU16(uint idx [[thread_position_in_grid]], 765 constant IndexConversionParams &options [[buffer(0)]], 766 constant uchar *input [[buffer(1)]], 767 device ushort *output [[buffer(2)]]) 768 { 769 if (idx >= options.indexCount) { return; }; 770 771 uchar value = getIndexAligned(input, options.srcOffset, idx); 772 773 if (options.primitiveRestartEnabled && value == 0xff) 774 { 775 output[idx] = 0xffff; 776 } 777 else 778 { 779 output[idx] = value; 780 } 781 } 782 783 kernel void convertIndexU16(uint idx [[thread_position_in_grid]], 784 constant IndexConversionParams &options [[buffer(0)]], 785 constant uchar *input 786 [[buffer(1), function_constant(kSourceBufferUnaligned)]], 787 constant ushort *inputAligned 788 [[buffer(1), function_constant(kSourceBufferAligned)]], 789 device ushort *output [[buffer(2)]]) 790 { 791 if (idx >= options.indexCount) { return; }; 792 793 ushort value; 794 if (kSourceBufferAligned) 795 { 796 value = getIndexAligned(inputAligned, options.srcOffset, idx); 797 } 798 else 799 { 800 value = getIndexUnalignedU16(input, options.srcOffset, idx); 801 } 802 output[idx] = value; 803 } 804 805 kernel void convertIndexU32(uint idx [[thread_position_in_grid]], 806 constant IndexConversionParams &options [[buffer(0)]], 807 constant uchar *input 808 [[buffer(1), function_constant(kSourceBufferUnaligned)]], 809 constant uint *inputAligned 810 [[buffer(1), function_constant(kSourceBufferAligned)]], 811 device uint *output [[buffer(2)]]) 812 { 813 if (idx >= options.indexCount) { return; }; 814 815 uint value; 816 if (kSourceBufferAligned) 817 { 818 value = getIndexAligned(inputAligned, options.srcOffset, idx); 819 } 820 else 821 { 822 value = getIndexUnalignedU32(input, options.srcOffset, idx); 823 } 824 output[idx] = value; 825 } 826 827 struct IndexFromArrayParams 828 { 829 uint firstVertex; 830 831 uint vertexCount; 832 }; 833 834 835 kernel void genTriFanIndicesFromArray(uint idx [[thread_position_in_grid]], 836 constant IndexFromArrayParams &options [[buffer(0)]], 837 device uint *output [[buffer(2)]]) 838 { 839 if (idx >= options.vertexCount) { return; }; 840 841 uint vertexIdx = options.firstVertex + 2 + idx; 842 843 844 845 output[3 * idx ] = vertexIdx - 1; 846 output[3 * idx + 1] = vertexIdx; 847 output[3 * idx + 2] = options.firstVertex; 848 } 849 850 inline uint getIndexU32(uint offset, 851 uint idx, 852 constant uchar *inputU8 [[function_constant(kUseSourceBufferU8)]], 853 constant ushort *inputU16 [[function_constant(kUseSourceBufferU16)]], 854 constant uint *inputU32 [[function_constant(kUseSourceBufferU32)]]) 855 { 856 if (kUseSourceBufferU8) 857 { 858 if (kSourceIndexIsU16) 859 { 860 return getIndexUnalignedU16(inputU8, offset, idx); 861 } 862 else if (kSourceIndexIsU32) 863 { 864 return getIndexUnalignedU32(inputU8, offset, idx); 865 } 866 return getIndexAligned(inputU8, offset, idx); 867 } 868 else if (kUseSourceBufferU16) 869 { 870 return getIndexAligned(inputU16, offset, idx); 871 } 872 else if (kUseSourceBufferU32) 873 { 874 return getIndexAligned(inputU32, offset, idx); 875 } 876 return 0; 877 } 878 879 880 881 882 kernel void genTriFanIndicesFromElements(uint idx [[thread_position_in_grid]], 883 constant IndexConversionParams &options [[buffer(0)]], 884 constant uchar *inputU8 885 [[buffer(1), function_constant(kUseSourceBufferU8)]], 886 constant ushort *inputU16 887 [[buffer(1), function_constant(kUseSourceBufferU16)]], 888 constant uint *inputU32 889 [[buffer(1), function_constant(kUseSourceBufferU32)]], 890 device uint *output [[buffer(2)]]) 891 { 892 if (idx >= options.indexCount) { return; }; 893 894 uint elemIdx = 2 + idx; 895 896 output[3 * idx] = getIndexU32(options.srcOffset, 0, inputU8, inputU16, inputU32); 897 output[3 * idx + 1] = getIndexU32(options.srcOffset, elemIdx - 1, inputU8, inputU16, inputU32); 898 output[3 * idx + 2] = getIndexU32(options.srcOffset, elemIdx, inputU8, inputU16, inputU32); 899 } 900 901 902 kernel void genLineLoopIndicesFromArray(uint idx [[thread_position_in_grid]], 903 constant IndexFromArrayParams &options [[buffer(0)]], 904 device uint *output [[buffer(2)]]) 905 { 906 uint totalIndices = options.vertexCount + 1; 907 if (idx >= totalIndices) { return; }; 908 909 output[idx] = options.firstVertex + idx % options.vertexCount; 910 } 911 912 913 914 kernel void genLineLoopIndicesFromElements(uint idx [[thread_position_in_grid]], 915 constant IndexConversionParams &options [[buffer(0)]], 916 constant uchar *inputU8 917 [[buffer(1), function_constant(kUseSourceBufferU8)]], 918 constant ushort *inputU16 919 [[buffer(1), function_constant(kUseSourceBufferU16)]], 920 constant uint *inputU32 921 [[buffer(1), function_constant(kUseSourceBufferU32)]], 922 device uint *output [[buffer(2)]]) 923 { 924 uint totalTargetIndices = options.indexCount + 1; 925 if (idx >= totalTargetIndices) { return; }; 926 927 output[idx] = 928 getIndexU32(options.srcOffset, idx % options.indexCount, inputU8, inputU16, inputU32); 929 } 930 # 4 "temp_master_source.metal" 2 931 # 1 "./gen_mipmap.metal" 1 932 933 934 935 936 937 938 939 940 using namespace rx::mtl_shader; 941 # 31 "./gen_mipmap.metal" 942 struct GenMipParams 943 { 944 uint srcLevel; 945 uint numMipLevelsToGen; 946 bool sRGB; 947 }; 948 949 950 951 kernel void generate3DMipmaps(uint lIndex [[thread_index_in_threadgroup]], 952 ushort3 gIndices [[thread_position_in_grid]], 953 texture3d<float> srcTexture [[texture(0)]], 954 texture3d<float, access::write> dstMip1 [[texture(1)]], 955 texture3d<float, access::write> dstMip2 [[texture(2)]], 956 texture3d<float, access::write> dstMip3 [[texture(3)]], 957 texture3d<float, access::write> dstMip4 [[texture(4)]], 958 constant GenMipParams &options [[buffer(0)]]) 959 { 960 ushort3 mipSize = ushort3(dstMip1.get_width(), dstMip1.get_height(), dstMip1.get_depth()); 961 bool validThread = gIndices.x < mipSize.x && gIndices.y < mipSize.y && gIndices.z < mipSize.z; 962 963 constexpr sampler textureSampler(mag_filter::linear, min_filter::linear, mip_filter::linear); 964 965 966 967 968 threadgroup float sR[(8 * 8 * 8)]; 969 threadgroup float sG[(8 * 8 * 8)]; 970 threadgroup float sB[(8 * 8 * 8)]; 971 threadgroup float sA[(8 * 8 * 8)]; 972 973 974 float4 texel1; 975 if (validThread) 976 { 977 float3 texCoords = (float3(gIndices) + float3(0.5, 0.5, 0.5)) / float3(mipSize); 978 texel1 = srcTexture.sample(textureSampler, texCoords, level(options.srcLevel)); 979 980 981 dstMip1.write(texel1, gIndices); 982 } 983 else 984 { 985 986 lIndex = 0xffffffff; 987 } 988 989 if (options.numMipLevelsToGen == 1) 990 { 991 return; 992 } 993 994 995 996 997 if (options.sRGB) 998 { 999 texel1 = linearToSRGB(texel1); 1000 } 1001 sR[lIndex] = texel1.r; sG[lIndex] = texel1.g; sB[lIndex] = texel1.b; sA[lIndex] = texel1.a;; 1002 1003 threadgroup_barrier(mem_flags::mem_threadgroup); 1004 1005 1006 if ((lIndex & 0x49) == 0) 1007 { 1008 bool3 atEdge = gIndices == (mipSize - ushort3(1)); 1009 1010 1011 1012 float4 texel2 = (atEdge.x) ? (texel1) : (float4(sR[lIndex + 1], sG[lIndex + 1], sB[lIndex + 1], sA[lIndex + 1])); 1013 1014 float4 texel3 = (atEdge.y) ? (texel1) : (float4(sR[lIndex + 8], sG[lIndex + 8], sB[lIndex + 8], sA[lIndex + 8])); 1015 1016 float4 texel4 = (atEdge.z) ? (texel1) : (float4(sR[lIndex + (8 * 8)], sG[lIndex + (8 * 8)], sB[lIndex + (8 * 8)], sA[lIndex + (8 * 8)])); 1017 1018 float4 texel5 = (atEdge.x | atEdge.y) ? (texel2) : (float4(sR[lIndex + (8 + 1)], sG[lIndex + (8 + 1)], sB[lIndex + (8 + 1)], sA[lIndex + (8 + 1)])); 1019 1020 1021 float4 texel6 = (atEdge.x | atEdge.z) ? (texel2) : (float4(sR[lIndex + ((8 * 8) + 1)], sG[lIndex + ((8 * 8) + 1)], sB[lIndex + ((8 * 8) + 1)], sA[lIndex + ((8 * 8) + 1)])); 1022 1023 1024 float4 texel7 = (atEdge.y | atEdge.z) ? (texel3) : (float4(sR[lIndex + ((8 * 8) + 8)], sG[lIndex + ((8 * 8) + 8)], sB[lIndex + ((8 * 8) + 8)], sA[lIndex + ((8 * 8) + 8)])); 1025 1026 1027 float4 texel8 = 1028 (atEdge.x | atEdge.y | atEdge.z) ? (texel5) : (float4(sR[lIndex + ((8 * 8) + 8 + 1)], sG[lIndex + ((8 * 8) + 8 + 1)], sB[lIndex + ((8 * 8) + 8 + 1)], sA[lIndex + ((8 * 8) + 8 + 1)])); 1029 1030 1031 texel1 = (texel1 + texel2 + texel3 + texel4 + texel5 + texel6 + texel7 + texel8) / 8.0; 1032 1033 dstMip2.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices >> 1); 1034 1035 1036 sR[lIndex] = texel1.r; sG[lIndex] = texel1.g; sB[lIndex] = texel1.b; sA[lIndex] = texel1.a;; 1037 } 1038 1039 if (options.numMipLevelsToGen == 2) 1040 { 1041 return; 1042 } 1043 1044 1045 threadgroup_barrier(mem_flags::mem_threadgroup); 1046 1047 1048 if ((lIndex & 0xdb) == 0) 1049 { 1050 mipSize = max(mipSize >> 1, ushort3(1)); 1051 bool3 atEdge = (gIndices >> 1) == (mipSize - ushort3(1)); 1052 1053 1054 1055 float4 texel2 = (atEdge.x) ? (texel1) : (float4(sR[lIndex + 2], sG[lIndex + 2], sB[lIndex + 2], sA[lIndex + 2])); 1056 1057 float4 texel3 = 1058 (atEdge.y) ? (texel1) : (float4(sR[lIndex + (2 * 8)], sG[lIndex + (2 * 8)], sB[lIndex + (2 * 8)], sA[lIndex + (2 * 8)])); 1059 1060 float4 texel4 = 1061 (atEdge.z) ? (texel1) : (float4(sR[lIndex + (2 * (8 * 8))], sG[lIndex + (2 * (8 * 8))], sB[lIndex + (2 * (8 * 8))], sA[lIndex + (2 * (8 * 8))])); 1062 1063 float4 texel5 = (atEdge.x | atEdge.y) ? (texel2) : (float4(sR[lIndex + (2 * 8 + 2)], sG[lIndex + (2 * 8 + 2)], sB[lIndex + (2 * 8 + 2)], sA[lIndex + (2 * 8 + 2)])); 1064 1065 1066 float4 texel6 = (atEdge.x | atEdge.z) ? (texel2) : (float4(sR[lIndex + (2 * (8 * 8) + 2)], sG[lIndex + (2 * (8 * 8) + 2)], sB[lIndex + (2 * (8 * 8) + 2)], sA[lIndex + (2 * (8 * 8) + 2)])); 1067 1068 1069 float4 texel7 = (atEdge.y | atEdge.z) ? (texel3) : (float4(sR[lIndex + (2 * (8 * 8) + 2 * 8)], sG[lIndex + (2 * (8 * 8) + 2 * 8)], sB[lIndex + (2 * (8 * 8) + 2 * 8)], sA[lIndex + (2 * (8 * 8) + 2 * 8)])); 1070 1071 1072 1073 float4 texel8 = (atEdge.x | atEdge.y | atEdge.z) ? (texel5) : (float4(sR[lIndex + (2 * (8 * 8) + 2 * 8 + 2)], sG[lIndex + (2 * (8 * 8) + 2 * 8 + 2)], sB[lIndex + (2 * (8 * 8) + 2 * 8 + 2)], sA[lIndex + (2 * (8 * 8) + 2 * 8 + 2)])); 1074 1075 1076 1077 texel1 = (texel1 + texel2 + texel3 + texel4 + texel5 + texel6 + texel7 + texel8) / 8.0; 1078 1079 dstMip3.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices >> 2); 1080 1081 1082 sR[lIndex] = texel1.r; sG[lIndex] = texel1.g; sB[lIndex] = texel1.b; sA[lIndex] = texel1.a;; 1083 } 1084 1085 if (options.numMipLevelsToGen == 3) 1086 { 1087 return; 1088 } 1089 1090 1091 threadgroup_barrier(mem_flags::mem_threadgroup); 1092 1093 1094 if ((lIndex & 0x1ff) == 0) 1095 { 1096 mipSize = max(mipSize >> 1, ushort3(1)); 1097 bool3 atEdge = (gIndices >> 2) == (mipSize - ushort3(1)); 1098 1099 1100 1101 float4 texel2 = (atEdge.x) ? (texel1) : (float4(sR[lIndex + 4], sG[lIndex + 4], sB[lIndex + 4], sA[lIndex + 4])); 1102 1103 float4 texel3 = 1104 (atEdge.y) ? (texel1) : (float4(sR[lIndex + (4 * 8)], sG[lIndex + (4 * 8)], sB[lIndex + (4 * 8)], sA[lIndex + (4 * 8)])); 1105 1106 float4 texel4 = 1107 (atEdge.z) ? (texel1) : (float4(sR[lIndex + (4 * (8 * 8))], sG[lIndex + (4 * (8 * 8))], sB[lIndex + (4 * (8 * 8))], sA[lIndex + (4 * (8 * 8))])); 1108 1109 float4 texel5 = (atEdge.x | atEdge.y) ? (texel2) : (float4(sR[lIndex + (4 * 8 + 4)], sG[lIndex + (4 * 8 + 4)], sB[lIndex + (4 * 8 + 4)], sA[lIndex + (4 * 8 + 4)])); 1110 1111 1112 float4 texel6 = (atEdge.x | atEdge.z) ? (texel2) : (float4(sR[lIndex + (4 * (8 * 8) + 4)], sG[lIndex + (4 * (8 * 8) + 4)], sB[lIndex + (4 * (8 * 8) + 4)], sA[lIndex + (4 * (8 * 8) + 4)])); 1113 1114 1115 float4 texel7 = (atEdge.y | atEdge.z) ? (texel3) : (float4(sR[lIndex + (4 * (8 * 8) + 4 * 8)], sG[lIndex + (4 * (8 * 8) + 4 * 8)], sB[lIndex + (4 * (8 * 8) + 4 * 8)], sA[lIndex + (4 * (8 * 8) + 4 * 8)])); 1116 1117 1118 1119 float4 texel8 = (atEdge.x | atEdge.y | atEdge.z) ? (texel5) : (float4(sR[lIndex + (4 * (8 * 8) + 4 * 8 + 4)], sG[lIndex + (4 * (8 * 8) + 4 * 8 + 4)], sB[lIndex + (4 * (8 * 8) + 4 * 8 + 4)], sA[lIndex + (4 * (8 * 8) + 4 * 8 + 4)])); 1120 1121 1122 1123 texel1 = (texel1 + texel2 + texel3 + texel4 + texel5 + texel6 + texel7 + texel8) / 8.0; 1124 1125 dstMip4.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices >> 3); 1126 } 1127 } 1128 1129 kernel void generate2DMipmaps(uint lIndex [[thread_index_in_threadgroup]], 1130 ushort2 gIndices [[thread_position_in_grid]], 1131 texture2d<float> srcTexture [[texture(0)]], 1132 texture2d<float, access::write> dstMip1 [[texture(1)]], 1133 texture2d<float, access::write> dstMip2 [[texture(2)]], 1134 texture2d<float, access::write> dstMip3 [[texture(3)]], 1135 texture2d<float, access::write> dstMip4 [[texture(4)]], 1136 constant GenMipParams &options [[buffer(0)]]) 1137 { 1138 uint firstMipLevel = options.srcLevel + 1; 1139 ushort2 mipSize = 1140 ushort2(srcTexture.get_width(firstMipLevel), srcTexture.get_height(firstMipLevel)); 1141 bool validThread = gIndices.x < mipSize.x && gIndices.y < mipSize.y; 1142 1143 constexpr sampler textureSampler(mag_filter::linear, min_filter::linear, mip_filter::linear); 1144 1145 1146 1147 1148 threadgroup float sR[(8 * 8)]; 1149 threadgroup float sG[(8 * 8)]; 1150 threadgroup float sB[(8 * 8)]; 1151 threadgroup float sA[(8 * 8)]; 1152 1153 1154 float4 texel1; 1155 if (validThread) 1156 { 1157 float2 texCoords = (float2(gIndices) + float2(0.5, 0.5)) / float2(mipSize); 1158 texel1 = srcTexture.sample(textureSampler, texCoords, level(options.srcLevel)); 1159 1160 1161 dstMip1.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices); 1162 } 1163 else 1164 { 1165 1166 lIndex = 0xffffffff; 1167 } 1168 1169 if (options.numMipLevelsToGen == 1) 1170 { 1171 return; 1172 } 1173 1174 1175 1176 1177 sR[lIndex] = texel1.r; sG[lIndex] = texel1.g; sB[lIndex] = texel1.b; sA[lIndex] = texel1.a;; 1178 1179 threadgroup_barrier(mem_flags::mem_threadgroup); 1180 1181 1182 if ((lIndex & 0x09) == 0) 1183 { 1184 bool2 atEdge = gIndices == (mipSize - ushort2(1)); 1185 1186 1187 1188 float4 texel2 = (atEdge.x) ? (texel1) : (float4(sR[lIndex + 1], sG[lIndex + 1], sB[lIndex + 1], sA[lIndex + 1])); 1189 1190 float4 texel3 = (atEdge.y) ? (texel1) : (float4(sR[lIndex + 8], sG[lIndex + 8], sB[lIndex + 8], sA[lIndex + 8])); 1191 1192 float4 texel4 = (atEdge.x | atEdge.y) ? (texel2) : (float4(sR[lIndex + (8 + 1)], sG[lIndex + (8 + 1)], sB[lIndex + (8 + 1)], sA[lIndex + (8 + 1)])); 1193 1194 1195 texel1 = (texel1 + texel2 + texel3 + texel4) / 4.0; 1196 1197 dstMip2.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices >> 1); 1198 1199 1200 sR[lIndex] = texel1.r; sG[lIndex] = texel1.g; sB[lIndex] = texel1.b; sA[lIndex] = texel1.a;; 1201 } 1202 1203 if (options.numMipLevelsToGen == 2) 1204 { 1205 return; 1206 } 1207 1208 1209 threadgroup_barrier(mem_flags::mem_threadgroup); 1210 1211 1212 if ((lIndex & 0x1b) == 0) 1213 { 1214 mipSize = max(mipSize >> 1, ushort2(1)); 1215 bool2 atEdge = (gIndices >> 1) == (mipSize - ushort2(1)); 1216 1217 1218 1219 float4 texel2 = (atEdge.x) ? (texel1) : (float4(sR[lIndex + 2], sG[lIndex + 2], sB[lIndex + 2], sA[lIndex + 2])); 1220 1221 float4 texel3 = 1222 (atEdge.y) ? (texel1) : (float4(sR[lIndex + 2 * 8], sG[lIndex + 2 * 8], sB[lIndex + 2 * 8], sA[lIndex + 2 * 8])); 1223 1224 float4 texel4 = (atEdge.x | atEdge.y) ? (texel2) : (float4(sR[lIndex + (2 * 8 + 2)], sG[lIndex + (2 * 8 + 2)], sB[lIndex + (2 * 8 + 2)], sA[lIndex + (2 * 8 + 2)])); 1225 1226 1227 texel1 = (texel1 + texel2 + texel3 + texel4) / 4.0; 1228 1229 dstMip3.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices >> 2); 1230 1231 1232 sR[lIndex] = texel1.r; sG[lIndex] = texel1.g; sB[lIndex] = texel1.b; sA[lIndex] = texel1.a;; 1233 } 1234 1235 if (options.numMipLevelsToGen == 3) 1236 { 1237 return; 1238 } 1239 1240 1241 threadgroup_barrier(mem_flags::mem_threadgroup); 1242 1243 1244 if ((lIndex & 0x3f) == 0) 1245 { 1246 mipSize = max(mipSize >> 1, ushort2(1)); 1247 bool2 atEdge = (gIndices >> 2) == (mipSize - ushort2(1)); 1248 1249 1250 1251 float4 texel2 = (atEdge.x) ? (texel1) : (float4(sR[lIndex + 4], sG[lIndex + 4], sB[lIndex + 4], sA[lIndex + 4])); 1252 1253 float4 texel3 = 1254 (atEdge.y) ? (texel1) : (float4(sR[lIndex + 4 * 8], sG[lIndex + 4 * 8], sB[lIndex + 4 * 8], sA[lIndex + 4 * 8])); 1255 1256 float4 texel4 = (atEdge.x | atEdge.y) ? (texel2) : (float4(sR[lIndex + (4 * 8 + 4)], sG[lIndex + (4 * 8 + 4)], sB[lIndex + (4 * 8 + 4)], sA[lIndex + (4 * 8 + 4)])); 1257 1258 1259 texel1 = (texel1 + texel2 + texel3 + texel4) / 4.0; 1260 1261 dstMip4.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices >> 3); 1262 } 1263 } 1264 1265 template <typename TextureTypeR, typename TextureTypeW> 1266 static __attribute__((always_inline)) void generateCubeOr2DArray2ndAndMoreMipmaps( 1267 uint lIndex, 1268 ushort3 gIndices, 1269 TextureTypeR srcTexture, 1270 TextureTypeW dstMip2, 1271 TextureTypeW dstMip3, 1272 TextureTypeW dstMip4, 1273 ushort2 mip1Size, 1274 float4 mip1Texel, 1275 threadgroup float *sR, 1276 threadgroup float *sG, 1277 threadgroup float *sB, 1278 threadgroup float *sA, 1279 constant GenMipParams &options) 1280 { 1281 ushort2 mipSize = mip1Size; 1282 float4 texel1 = mip1Texel; 1283 1284 1285 1286 1287 sR[lIndex] = texel1.r; sG[lIndex] = texel1.g; sB[lIndex] = texel1.b; sA[lIndex] = texel1.a;; 1288 1289 threadgroup_barrier(mem_flags::mem_threadgroup); 1290 1291 1292 if ((lIndex & 0x09) == 0) 1293 { 1294 bool2 atEdge = gIndices.xy == (mipSize - ushort2(1)); 1295 1296 1297 1298 float4 texel2 = (atEdge.x) ? (texel1) : (float4(sR[lIndex + 1], sG[lIndex + 1], sB[lIndex + 1], sA[lIndex + 1])); 1299 1300 float4 texel3 = (atEdge.y) ? (texel1) : (float4(sR[lIndex + 8], sG[lIndex + 8], sB[lIndex + 8], sA[lIndex + 8])); 1301 1302 float4 texel4 = (atEdge.x | atEdge.y) ? (texel2) : (float4(sR[lIndex + (8 + 1)], sG[lIndex + (8 + 1)], sB[lIndex + (8 + 1)], sA[lIndex + (8 + 1)])); 1303 1304 1305 texel1 = (texel1 + texel2 + texel3 + texel4) / 4.0; 1306 1307 dstMip2.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices.xy >> 1, gIndices.z); 1308 1309 1310 sR[lIndex] = texel1.r; sG[lIndex] = texel1.g; sB[lIndex] = texel1.b; sA[lIndex] = texel1.a;; 1311 } 1312 1313 if (options.numMipLevelsToGen == 2) 1314 { 1315 return; 1316 } 1317 1318 1319 threadgroup_barrier(mem_flags::mem_threadgroup); 1320 1321 1322 if ((lIndex & 0x1b) == 0) 1323 { 1324 mipSize = max(mipSize >> 1, ushort2(1)); 1325 bool2 atEdge = (gIndices.xy >> 1) == (mipSize - ushort2(1)); 1326 1327 1328 1329 float4 texel2 = (atEdge.x) ? (texel1) : (float4(sR[lIndex + 2], sG[lIndex + 2], sB[lIndex + 2], sA[lIndex + 2])); 1330 1331 float4 texel3 = 1332 (atEdge.y) ? (texel1) : (float4(sR[lIndex + 2 * 8], sG[lIndex + 2 * 8], sB[lIndex + 2 * 8], sA[lIndex + 2 * 8])); 1333 1334 float4 texel4 = (atEdge.x | atEdge.y) ? (texel2) : (float4(sR[lIndex + (2 * 8 + 2)], sG[lIndex + (2 * 8 + 2)], sB[lIndex + (2 * 8 + 2)], sA[lIndex + (2 * 8 + 2)])); 1335 1336 1337 texel1 = (texel1 + texel2 + texel3 + texel4) / 4.0; 1338 1339 dstMip3.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices.xy >> 2, gIndices.z); 1340 1341 1342 sR[lIndex] = texel1.r; sG[lIndex] = texel1.g; sB[lIndex] = texel1.b; sA[lIndex] = texel1.a;; 1343 } 1344 1345 if (options.numMipLevelsToGen == 3) 1346 { 1347 return; 1348 } 1349 1350 1351 threadgroup_barrier(mem_flags::mem_threadgroup); 1352 1353 1354 if ((lIndex & 0x3f) == 0) 1355 { 1356 mipSize = max(mipSize >> 1, ushort2(1)); 1357 bool2 atEdge = (gIndices.xy >> 2) == (mipSize - ushort2(1)); 1358 1359 1360 1361 float4 texel2 = (atEdge.x) ? (texel1) : (float4(sR[lIndex + 4], sG[lIndex + 4], sB[lIndex + 4], sA[lIndex + 4])); 1362 1363 float4 texel3 = 1364 (atEdge.y) ? (texel1) : (float4(sR[lIndex + 4 * 8], sG[lIndex + 4 * 8], sB[lIndex + 4 * 8], sA[lIndex + 4 * 8])); 1365 1366 float4 texel4 = (atEdge.x | atEdge.y) ? (texel2) : (float4(sR[lIndex + (4 * 8 + 4)], sG[lIndex + (4 * 8 + 4)], sB[lIndex + (4 * 8 + 4)], sA[lIndex + (4 * 8 + 4)])); 1367 1368 1369 texel1 = (texel1 + texel2 + texel3 + texel4) / 4.0; 1370 1371 dstMip4.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices.xy >> 3, gIndices.z); 1372 } 1373 } 1374 1375 kernel void generateCubeMipmaps(uint lIndex [[thread_index_in_threadgroup]], 1376 ushort3 gIndices [[thread_position_in_grid]], 1377 texturecube<float> srcTexture [[texture(0)]], 1378 texturecube<float, access::write> dstMip1 [[texture(1)]], 1379 texturecube<float, access::write> dstMip2 [[texture(2)]], 1380 texturecube<float, access::write> dstMip3 [[texture(3)]], 1381 texturecube<float, access::write> dstMip4 [[texture(4)]], 1382 constant GenMipParams &options [[buffer(0)]]) 1383 { 1384 uint firstMipLevel = options.srcLevel + 1; 1385 ushort2 mip1Size = 1386 ushort2(srcTexture.get_width(firstMipLevel), srcTexture.get_height(firstMipLevel)); 1387 bool validThread = gIndices.x < mip1Size.x && gIndices.y < mip1Size.y; 1388 1389 constexpr sampler textureSampler(mag_filter::linear, min_filter::linear, mip_filter::linear); 1390 1391 1392 float4 mip1Texel; 1393 if (validThread) 1394 { 1395 float2 texCoords = (float2(gIndices.xy) + float2(0.5, 0.5)) / float2(mip1Size); 1396 mip1Texel = srcTexture.sample(textureSampler, cubeTexcoords(texCoords, int(gIndices.z)), 1397 level(options.srcLevel)); 1398 1399 1400 dstMip1.write((options.sRGB ? sRGBtoLinear(mip1Texel) : mip1Texel), gIndices.xy, gIndices.z); 1401 } 1402 else 1403 { 1404 1405 lIndex = 0xffffffff; 1406 } 1407 1408 if (options.numMipLevelsToGen == 1) 1409 { 1410 return; 1411 } 1412 1413 1414 threadgroup float sR[(8 * 8)]; 1415 threadgroup float sG[(8 * 8)]; 1416 threadgroup float sB[(8 * 8)]; 1417 threadgroup float sA[(8 * 8)]; 1418 1419 generateCubeOr2DArray2ndAndMoreMipmaps(lIndex, gIndices, srcTexture, dstMip2, dstMip3, dstMip4, 1420 mip1Size, mip1Texel, sR, sG, sB, sA, options); 1421 } 1422 1423 kernel void generate2DArrayMipmaps(uint lIndex [[thread_index_in_threadgroup]], 1424 ushort3 gIndices [[thread_position_in_grid]], 1425 texture2d_array<float> srcTexture [[texture(0)]], 1426 texture2d_array<float, access::write> dstMip1 [[texture(1)]], 1427 texture2d_array<float, access::write> dstMip2 [[texture(2)]], 1428 texture2d_array<float, access::write> dstMip3 [[texture(3)]], 1429 texture2d_array<float, access::write> dstMip4 [[texture(4)]], 1430 constant GenMipParams &options [[buffer(0)]]) 1431 { 1432 uint firstMipLevel = options.srcLevel + 1; 1433 ushort2 mip1Size = 1434 ushort2(srcTexture.get_width(firstMipLevel), srcTexture.get_height(firstMipLevel)); 1435 bool validThread = gIndices.x < mip1Size.x && gIndices.y < mip1Size.y; 1436 1437 constexpr sampler textureSampler(mag_filter::linear, min_filter::linear, mip_filter::linear); 1438 1439 1440 float4 mip1Texel; 1441 if (validThread) 1442 { 1443 float2 texCoords = (float2(gIndices.xy) + float2(0.5, 0.5)) / float2(mip1Size); 1444 mip1Texel = 1445 srcTexture.sample(textureSampler, texCoords, gIndices.z, level(options.srcLevel)); 1446 1447 1448 dstMip1.write((options.sRGB ? sRGBtoLinear(mip1Texel) : mip1Texel), gIndices.xy, gIndices.z); 1449 } 1450 else 1451 { 1452 1453 lIndex = 0xffffffff; 1454 } 1455 1456 if (options.numMipLevelsToGen == 1) 1457 { 1458 return; 1459 } 1460 1461 1462 threadgroup float sR[(8 * 8)]; 1463 threadgroup float sG[(8 * 8)]; 1464 threadgroup float sB[(8 * 8)]; 1465 threadgroup float sA[(8 * 8)]; 1466 1467 generateCubeOr2DArray2ndAndMoreMipmaps(lIndex, gIndices, srcTexture, dstMip2, dstMip3, dstMip4, 1468 mip1Size, mip1Texel, sR, sG, sB, sA, options); 1469 } 1470 # 5 "temp_master_source.metal" 2 1471 # 1 "./copy_buffer.metal" 1 1472 # 12 "./copy_buffer.metal" 1473 #include <metal_pack> 1474 1475 1476 # 1 "./format_autogen.h" 1 1477 1478 1479 1480 namespace rx 1481 { 1482 namespace mtl_shader 1483 { 1484 1485 namespace FormatID 1486 { 1487 enum 1488 { 1489 NONE, 1490 D16_UNORM, 1491 D24_UNORM_S8_UINT, 1492 D24_UNORM_X8_UINT, 1493 D32_FLOAT, 1494 D32_FLOAT_S8X24_UINT, 1495 D32_UNORM, 1496 S8_UINT, 1497 A16_FLOAT, 1498 A1R5G5B5_UNORM, 1499 A2R10G10B10_SINT_VERTEX, 1500 A2R10G10B10_SNORM_VERTEX, 1501 A2R10G10B10_SSCALED_VERTEX, 1502 A2R10G10B10_UINT_VERTEX, 1503 A2R10G10B10_UNORM_VERTEX, 1504 A2R10G10B10_USCALED_VERTEX, 1505 A32_FLOAT, 1506 A8_UNORM, 1507 ASTC_10x10_SRGB_BLOCK, 1508 ASTC_10x10_UNORM_BLOCK, 1509 ASTC_10x5_SRGB_BLOCK, 1510 ASTC_10x5_UNORM_BLOCK, 1511 ASTC_10x6_SRGB_BLOCK, 1512 ASTC_10x6_UNORM_BLOCK, 1513 ASTC_10x8_SRGB_BLOCK, 1514 ASTC_10x8_UNORM_BLOCK, 1515 ASTC_12x10_SRGB_BLOCK, 1516 ASTC_12x10_UNORM_BLOCK, 1517 ASTC_12x12_SRGB_BLOCK, 1518 ASTC_12x12_UNORM_BLOCK, 1519 ASTC_3x3x3_UNORM_BLOCK, 1520 ASTC_3x3x3_UNORM_SRGB_BLOCK, 1521 ASTC_4x3x3_UNORM_BLOCK, 1522 ASTC_4x3x3_UNORM_SRGB_BLOCK, 1523 ASTC_4x4_SRGB_BLOCK, 1524 ASTC_4x4_UNORM_BLOCK, 1525 ASTC_4x4x3_UNORM_BLOCK, 1526 ASTC_4x4x3_UNORM_SRGB_BLOCK, 1527 ASTC_4x4x4_UNORM_BLOCK, 1528 ASTC_4x4x4_UNORM_SRGB_BLOCK, 1529 ASTC_5x4_SRGB_BLOCK, 1530 ASTC_5x4_UNORM_BLOCK, 1531 ASTC_5x4x4_UNORM_BLOCK, 1532 ASTC_5x4x4_UNORM_SRGB_BLOCK, 1533 ASTC_5x5_SRGB_BLOCK, 1534 ASTC_5x5_UNORM_BLOCK, 1535 ASTC_5x5x4_UNORM_BLOCK, 1536 ASTC_5x5x4_UNORM_SRGB_BLOCK, 1537 ASTC_5x5x5_UNORM_BLOCK, 1538 ASTC_5x5x5_UNORM_SRGB_BLOCK, 1539 ASTC_6x5_SRGB_BLOCK, 1540 ASTC_6x5_UNORM_BLOCK, 1541 ASTC_6x5x5_UNORM_BLOCK, 1542 ASTC_6x5x5_UNORM_SRGB_BLOCK, 1543 ASTC_6x6_SRGB_BLOCK, 1544 ASTC_6x6_UNORM_BLOCK, 1545 ASTC_6x6x5_UNORM_BLOCK, 1546 ASTC_6x6x5_UNORM_SRGB_BLOCK, 1547 ASTC_6x6x6_UNORM_BLOCK, 1548 ASTC_6x6x6_UNORM_SRGB_BLOCK, 1549 ASTC_8x5_SRGB_BLOCK, 1550 ASTC_8x5_UNORM_BLOCK, 1551 ASTC_8x6_SRGB_BLOCK, 1552 ASTC_8x6_UNORM_BLOCK, 1553 ASTC_8x8_SRGB_BLOCK, 1554 ASTC_8x8_UNORM_BLOCK, 1555 B10G10R10A2_UNORM, 1556 B4G4R4A4_UNORM, 1557 B5G5R5A1_UNORM, 1558 B5G6R5_UNORM, 1559 B8G8R8A8_TYPELESS, 1560 B8G8R8A8_TYPELESS_SRGB, 1561 B8G8R8A8_UNORM, 1562 B8G8R8A8_UNORM_SRGB, 1563 B8G8R8X8_UNORM, 1564 B8G8R8X8_UNORM_SRGB, 1565 BC1_RGBA_UNORM_BLOCK, 1566 BC1_RGBA_UNORM_SRGB_BLOCK, 1567 BC1_RGB_UNORM_BLOCK, 1568 BC1_RGB_UNORM_SRGB_BLOCK, 1569 BC2_RGBA_UNORM_BLOCK, 1570 BC2_RGBA_UNORM_SRGB_BLOCK, 1571 BC3_RGBA_UNORM_BLOCK, 1572 BC3_RGBA_UNORM_SRGB_BLOCK, 1573 BC4_RED_SNORM_BLOCK, 1574 BC4_RED_UNORM_BLOCK, 1575 BC5_RG_SNORM_BLOCK, 1576 BC5_RG_UNORM_BLOCK, 1577 BC6H_RGB_SFLOAT_BLOCK, 1578 BC6H_RGB_UFLOAT_BLOCK, 1579 BC7_RGBA_UNORM_BLOCK, 1580 BC7_RGBA_UNORM_SRGB_BLOCK, 1581 EAC_R11G11_SNORM_BLOCK, 1582 EAC_R11G11_UNORM_BLOCK, 1583 EAC_R11_SNORM_BLOCK, 1584 EAC_R11_UNORM_BLOCK, 1585 ETC1_LOSSY_DECODE_R8G8B8_UNORM_BLOCK, 1586 ETC1_R8G8B8_UNORM_BLOCK, 1587 ETC2_R8G8B8A1_SRGB_BLOCK, 1588 ETC2_R8G8B8A1_UNORM_BLOCK, 1589 ETC2_R8G8B8A8_SRGB_BLOCK, 1590 ETC2_R8G8B8A8_UNORM_BLOCK, 1591 ETC2_R8G8B8_SRGB_BLOCK, 1592 ETC2_R8G8B8_UNORM_BLOCK, 1593 G8_B8R8_2PLANE_420_UNORM, 1594 G8_B8_R8_3PLANE_420_UNORM, 1595 L16A16_FLOAT, 1596 L16_FLOAT, 1597 L32A32_FLOAT, 1598 L32_FLOAT, 1599 L8A8_UNORM, 1600 L8_UNORM, 1601 PALETTE4_R4G4B4A4_UNORM, 1602 PALETTE4_R5G5B5A1_UNORM, 1603 PALETTE4_R5G6B5_UNORM, 1604 PALETTE4_R8G8B8A8_UNORM, 1605 PALETTE4_R8G8B8_UNORM, 1606 PALETTE8_R4G4B4A4_UNORM, 1607 PALETTE8_R5G5B5A1_UNORM, 1608 PALETTE8_R5G6B5_UNORM, 1609 PALETTE8_R8G8B8A8_UNORM, 1610 PALETTE8_R8G8B8_UNORM, 1611 PVRTC1_RGBA_2BPP_UNORM_BLOCK, 1612 PVRTC1_RGBA_2BPP_UNORM_SRGB_BLOCK, 1613 PVRTC1_RGBA_4BPP_UNORM_BLOCK, 1614 PVRTC1_RGBA_4BPP_UNORM_SRGB_BLOCK, 1615 PVRTC1_RGB_2BPP_UNORM_BLOCK, 1616 PVRTC1_RGB_2BPP_UNORM_SRGB_BLOCK, 1617 PVRTC1_RGB_4BPP_UNORM_BLOCK, 1618 PVRTC1_RGB_4BPP_UNORM_SRGB_BLOCK, 1619 R10G10B10A2_SINT, 1620 R10G10B10A2_SNORM, 1621 R10G10B10A2_SSCALED, 1622 R10G10B10A2_UINT, 1623 R10G10B10A2_UNORM, 1624 R10G10B10A2_USCALED, 1625 R10G10B10X2_UNORM, 1626 R11G11B10_FLOAT, 1627 R16G16B16A16_FLOAT, 1628 R16G16B16A16_SINT, 1629 R16G16B16A16_SNORM, 1630 R16G16B16A16_SSCALED, 1631 R16G16B16A16_UINT, 1632 R16G16B16A16_UNORM, 1633 R16G16B16A16_USCALED, 1634 R16G16B16_FLOAT, 1635 R16G16B16_SINT, 1636 R16G16B16_SNORM, 1637 R16G16B16_SSCALED, 1638 R16G16B16_UINT, 1639 R16G16B16_UNORM, 1640 R16G16B16_USCALED, 1641 R16G16_FLOAT, 1642 R16G16_SINT, 1643 R16G16_SNORM, 1644 R16G16_SSCALED, 1645 R16G16_UINT, 1646 R16G16_UNORM, 1647 R16G16_USCALED, 1648 R16_FLOAT, 1649 R16_SINT, 1650 R16_SNORM, 1651 R16_SSCALED, 1652 R16_UINT, 1653 R16_UNORM, 1654 R16_USCALED, 1655 R32G32B32A32_FIXED, 1656 R32G32B32A32_FLOAT, 1657 R32G32B32A32_SINT, 1658 R32G32B32A32_SNORM, 1659 R32G32B32A32_SSCALED, 1660 R32G32B32A32_UINT, 1661 R32G32B32A32_UNORM, 1662 R32G32B32A32_USCALED, 1663 R32G32B32_FIXED, 1664 R32G32B32_FLOAT, 1665 R32G32B32_SINT, 1666 R32G32B32_SNORM, 1667 R32G32B32_SSCALED, 1668 R32G32B32_UINT, 1669 R32G32B32_UNORM, 1670 R32G32B32_USCALED, 1671 R32G32_FIXED, 1672 R32G32_FLOAT, 1673 R32G32_SINT, 1674 R32G32_SNORM, 1675 R32G32_SSCALED, 1676 R32G32_UINT, 1677 R32G32_UNORM, 1678 R32G32_USCALED, 1679 R32_FIXED, 1680 R32_FLOAT, 1681 R32_SINT, 1682 R32_SNORM, 1683 R32_SSCALED, 1684 R32_UINT, 1685 R32_UNORM, 1686 R32_USCALED, 1687 R4G4B4A4_UNORM, 1688 R5G5B5A1_UNORM, 1689 R5G6B5_UNORM, 1690 R8G8B8A8_SINT, 1691 R8G8B8A8_SNORM, 1692 R8G8B8A8_SSCALED, 1693 R8G8B8A8_TYPELESS, 1694 R8G8B8A8_TYPELESS_SRGB, 1695 R8G8B8A8_UINT, 1696 R8G8B8A8_UNORM, 1697 R8G8B8A8_UNORM_SRGB, 1698 R8G8B8A8_USCALED, 1699 R8G8B8X8_UNORM, 1700 R8G8B8X8_UNORM_SRGB, 1701 R8G8B8_SINT, 1702 R8G8B8_SNORM, 1703 R8G8B8_SSCALED, 1704 R8G8B8_UINT, 1705 R8G8B8_UNORM, 1706 R8G8B8_UNORM_SRGB, 1707 R8G8B8_USCALED, 1708 R8G8_SINT, 1709 R8G8_SNORM, 1710 R8G8_SSCALED, 1711 R8G8_UINT, 1712 R8G8_UNORM, 1713 R8G8_UNORM_SRGB, 1714 R8G8_USCALED, 1715 R8_SINT, 1716 R8_SNORM, 1717 R8_SSCALED, 1718 R8_UINT, 1719 R8_UNORM, 1720 R8_UNORM_SRGB, 1721 R8_USCALED, 1722 R9G9B9E5_SHAREDEXP, 1723 X2R10G10B10_SINT_VERTEX, 1724 X2R10G10B10_SNORM_VERTEX, 1725 X2R10G10B10_SSCALED_VERTEX, 1726 X2R10G10B10_UINT_VERTEX, 1727 X2R10G10B10_UNORM_VERTEX, 1728 X2R10G10B10_USCALED_VERTEX, 1729 EXTERNAL0, 1730 EXTERNAL1, 1731 EXTERNAL2, 1732 EXTERNAL3, 1733 EXTERNAL4, 1734 EXTERNAL5, 1735 EXTERNAL6, 1736 EXTERNAL7 1737 }; 1738 1739 } 1740 1741 } 1742 } 1743 # 16 "./copy_buffer.metal" 2 1744 1745 using namespace rx::mtl_shader; 1746 1747 constant int kCopyFormatType [[function_constant(10)]]; 1748 1749 1750 constant int kCopyTextureType [[function_constant(20)]]; 1751 constant bool kCopyTextureType2D = kCopyTextureType == kTextureType2D; 1752 constant bool kCopyTextureType2DArray = kCopyTextureType == kTextureType2DArray; 1753 constant bool kCopyTextureType2DMS = kCopyTextureType == kTextureType2DMultisample; 1754 constant bool kCopyTextureTypeCube = kCopyTextureType == kTextureTypeCube; 1755 constant bool kCopyTextureType3D = kCopyTextureType == kTextureType3D; 1756 1757 struct CopyPixelParams 1758 { 1759 uint3 copySize; 1760 uint3 textureOffset; 1761 1762 uint bufferStartOffset; 1763 uint pixelSize; 1764 uint bufferRowPitch; 1765 uint bufferDepthPitch; 1766 }; 1767 1768 struct WritePixelParams 1769 { 1770 uint2 copySize; 1771 uint2 textureOffset; 1772 1773 uint bufferStartOffset; 1774 1775 uint pixelSize; 1776 uint bufferRowPitch; 1777 1778 uint textureLevel; 1779 uint textureLayer; 1780 1781 bool reverseTextureRowOrder; 1782 }; 1783 # 120 "./copy_buffer.metal" 1784 template <typename T> 1785 static inline void textureWrite(ushort3 gIndices, 1786 constant CopyPixelParams &options, 1787 vec<T, 4> color, 1788 texture2d<T, access::write> dstTexture2d [[texture(0), function_constant(kCopyTextureType2D)]], texture2d_array<T, access::write> dstTexture2dArray [[texture(0), function_constant(kCopyTextureType2DArray)]], texture3d<T, access::write> dstTexture3d [[texture(0), function_constant(kCopyTextureType3D)]], texturecube<T, access::write> dstTextureCube [[texture(0), function_constant(kCopyTextureTypeCube)]]) 1789 { 1790 uint3 writeIndices = options.textureOffset + uint3(gIndices); 1791 switch (kCopyTextureType) 1792 { 1793 case kTextureType2D: 1794 dstTexture2d.write(color, writeIndices.xy); 1795 break; 1796 case kTextureType2DArray: 1797 dstTexture2dArray.write(color, writeIndices.xy, writeIndices.z); 1798 break; 1799 case kTextureType3D: 1800 dstTexture3d.write(color, writeIndices); 1801 break; 1802 case kTextureTypeCube: 1803 dstTextureCube.write(color, writeIndices.xy, writeIndices.z); 1804 break; 1805 } 1806 } 1807 1808 1809 template <typename T> 1810 static inline vec<T, 4> textureRead(ushort2 gIndices, 1811 constant WritePixelParams &options, 1812 texture2d<T, access::read> srcTexture2d [[texture(0), function_constant(kCopyTextureType2D)]], texture2d_array<T, access::read> srcTexture2dArray [[texture(0), function_constant(kCopyTextureType2DArray)]], texture3d<T, access::read> srcTexture3d [[texture(0), function_constant(kCopyTextureType3D)]], texturecube<T, access::read> srcTextureCube [[texture(0), function_constant(kCopyTextureTypeCube)]], texture2d_ms<T, access::read> srcTexture2dMS [[texture(0), function_constant(kCopyTextureType2DMS)]]) 1813 { 1814 vec<T, 4> color; 1815 uint2 coords = uint2(gIndices); 1816 if (options.reverseTextureRowOrder) 1817 { 1818 coords.y = options.copySize.y - 1 - gIndices.y; 1819 } 1820 coords += options.textureOffset; 1821 switch (kCopyTextureType) 1822 { 1823 case kTextureType2D: 1824 color = srcTexture2d.read(coords.xy, options.textureLevel); 1825 break; 1826 case kTextureType2DArray: 1827 color = srcTexture2dArray.read(coords.xy, options.textureLayer, options.textureLevel); 1828 break; 1829 case kTextureType2DMultisample: 1830 color = resolveTextureMS(srcTexture2dMS, coords.xy); 1831 break; 1832 case kTextureType3D: 1833 color = srcTexture3d.read(uint3(coords, options.textureLayer), options.textureLevel); 1834 break; 1835 case kTextureTypeCube: 1836 color = srcTextureCube.read(coords.xy, options.textureLayer, options.textureLevel); 1837 break; 1838 } 1839 return color; 1840 } 1841 # 215 "./copy_buffer.metal" 1842 static inline float4 readR5G6B5_UNORM(uint bufferOffset, constant uchar *buffer) 1843 { 1844 float4 color; 1845 ushort src = bytesToShort<ushort>(buffer, bufferOffset); 1846 1847 color.r = normalizedToFloat<5>(getShiftedData<5, 11>(src)); 1848 color.g = normalizedToFloat<6>(getShiftedData<6, 5>(src)); 1849 color.b = normalizedToFloat<5>(getShiftedData<5, 0>(src)); 1850 color.a = 1.0; 1851 return color; 1852 } 1853 static inline void writeR5G6B5_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 1854 { 1855 ushort dst = shiftData<5, 11>(floatToNormalized<5, ushort>(color.r)) | 1856 shiftData<6, 5>(floatToNormalized<6, ushort>(color.g)) | 1857 shiftData<5, 0>(floatToNormalized<5, ushort>(color.b)); 1858 1859 shortToBytes(dst, bufferOffset, buffer); 1860 } 1861 1862 1863 static inline float4 readR4G4B4A4_UNORM(uint bufferOffset, constant uchar *buffer) 1864 { 1865 float4 color; 1866 ushort src = bytesToShort<ushort>(buffer, bufferOffset); 1867 1868 color.r = normalizedToFloat<4>(getShiftedData<4, 12>(src)); 1869 color.g = normalizedToFloat<4>(getShiftedData<4, 8>(src)); 1870 color.b = normalizedToFloat<4>(getShiftedData<4, 4>(src)); 1871 color.a = normalizedToFloat<4>(getShiftedData<4, 0>(src)); 1872 return color; 1873 } 1874 static inline void writeR4G4B4A4_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 1875 { 1876 ushort dst = shiftData<4, 12>(floatToNormalized<4, ushort>(color.r)) | 1877 shiftData<4, 8>(floatToNormalized<4, ushort>(color.g)) | 1878 shiftData<4, 4>(floatToNormalized<4, ushort>(color.b)) | 1879 shiftData<4, 0>(floatToNormalized<4, ushort>(color.a)); 1880 ; 1881 1882 shortToBytes(dst, bufferOffset, buffer); 1883 } 1884 1885 1886 static inline float4 readR5G5B5A1_UNORM(uint bufferOffset, constant uchar *buffer) 1887 { 1888 float4 color; 1889 ushort src = bytesToShort<ushort>(buffer, bufferOffset); 1890 1891 color.r = normalizedToFloat<5>(getShiftedData<5, 11>(src)); 1892 color.g = normalizedToFloat<5>(getShiftedData<5, 6>(src)); 1893 color.b = normalizedToFloat<5>(getShiftedData<5, 1>(src)); 1894 color.a = normalizedToFloat<1>(getShiftedData<1, 0>(src)); 1895 return color; 1896 } 1897 static inline void writeR5G5B5A1_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 1898 { 1899 ushort dst = shiftData<5, 11>(floatToNormalized<5, ushort>(color.r)) | 1900 shiftData<5, 6>(floatToNormalized<5, ushort>(color.g)) | 1901 shiftData<5, 1>(floatToNormalized<5, ushort>(color.b)) | 1902 shiftData<1, 0>(floatToNormalized<1, ushort>(color.a)); 1903 ; 1904 1905 shortToBytes(dst, bufferOffset, buffer); 1906 } 1907 1908 1909 static inline int4 readR10G10B10A2_SINT(uint bufferOffset, constant uchar *buffer) 1910 { 1911 int4 color; 1912 int src = bytesToInt<int>(buffer, bufferOffset); 1913 1914 constexpr int3 rgbSignMask(0x200); 1915 constexpr int3 negativeMask(0xfffffc00); 1916 constexpr int alphaSignMask = 0x2; 1917 constexpr int alphaNegMask = 0xfffffffc; 1918 1919 color.r = getShiftedData<10, 0>(src); 1920 color.g = getShiftedData<10, 10>(src); 1921 color.b = getShiftedData<10, 20>(src); 1922 1923 int3 isRgbNegative = (color.rgb & rgbSignMask) >> 9; 1924 color.rgb = (isRgbNegative * negativeMask) | color.rgb; 1925 1926 color.a = getShiftedData<2, 30>(src); 1927 int isAlphaNegative = color.a & alphaSignMask >> 1; 1928 color.a = (isAlphaNegative * alphaNegMask) | color.a; 1929 return color; 1930 } 1931 1932 static inline uint4 readR10G10B10A2_UINT(uint bufferOffset, constant uchar *buffer) 1933 { 1934 uint4 color; 1935 uint src = bytesToInt<uint>(buffer, bufferOffset); 1936 1937 color.r = getShiftedData<10, 0>(src); 1938 color.g = getShiftedData<10, 10>(src); 1939 color.b = getShiftedData<10, 20>(src); 1940 color.a = getShiftedData<2, 30>(src); 1941 return color; 1942 } 1943 1944 1945 static inline float4 readR8G8B8A8(uint bufferOffset, constant uchar *buffer, bool isSRGB) 1946 { 1947 float4 color; 1948 uint src = bytesToInt<uint>(buffer, bufferOffset); 1949 1950 if (isSRGB) 1951 { 1952 color = unpack_unorm4x8_srgb_to_float(src); 1953 } 1954 else 1955 { 1956 color = unpack_unorm4x8_to_float(src); 1957 } 1958 return color; 1959 } 1960 static inline void writeR8G8B8A8(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer, bool isSRGB) 1961 { 1962 uint dst; 1963 1964 if (isSRGB) 1965 { 1966 dst = pack_float_to_srgb_unorm4x8(color); 1967 } 1968 else 1969 { 1970 dst = pack_float_to_unorm4x8(color); 1971 } 1972 1973 intToBytes(dst, bufferOffset, buffer); 1974 } 1975 1976 static inline float4 readR8G8B8(uint bufferOffset, constant uchar *buffer, bool isSRGB) 1977 { 1978 float4 color; 1979 color.r = normalizedToFloat<uchar>(buffer[bufferOffset]); 1980 color.g = normalizedToFloat<uchar>(buffer[bufferOffset + 1]); 1981 color.b = normalizedToFloat<uchar>(buffer[bufferOffset + 2]); 1982 color.a = 1.0; 1983 1984 if (isSRGB) 1985 { 1986 color = sRGBtoLinear(color); 1987 } 1988 return color; 1989 } 1990 static inline void writeR8G8B8(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer, bool isSRGB) 1991 { 1992 color.a = 1.0; 1993 uint dst; 1994 1995 if (isSRGB) 1996 { 1997 dst = pack_float_to_srgb_unorm4x8(color); 1998 } 1999 else 2000 { 2001 dst = pack_float_to_unorm4x8(color); 2002 } 2003 int24bitToBytes(dst, bufferOffset, buffer); 2004 } 2005 2006 2007 static inline float4 readR8G8B8A8_SNORM(uint bufferOffset, constant uchar *buffer) 2008 { 2009 float4 color; 2010 uint src = bytesToInt<uint>(buffer, bufferOffset); 2011 2012 color = unpack_snorm4x8_to_float(src); 2013 2014 return color; 2015 } 2016 static inline void writeR8G8B8A8_SNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2017 { 2018 uint dst = pack_float_to_snorm4x8(color); 2019 2020 intToBytes(dst, bufferOffset, buffer); 2021 } 2022 2023 2024 static inline float4 readR8G8B8_SNORM(uint bufferOffset, constant uchar *buffer) 2025 { 2026 float4 color; 2027 color.r = normalizedToFloat<7, char>(buffer[bufferOffset]); 2028 color.g = normalizedToFloat<7, char>(buffer[bufferOffset + 1]); 2029 color.b = normalizedToFloat<7, char>(buffer[bufferOffset + 2]); 2030 color.a = 1.0; 2031 2032 return color; 2033 } 2034 static inline void writeR8G8B8_SNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2035 { 2036 uint dst = pack_float_to_snorm4x8(color); 2037 2038 int24bitToBytes(dst, bufferOffset, buffer); 2039 } 2040 2041 2042 static inline float4 readR8G8B8A8_UNORM(uint bufferOffset, constant uchar *buffer) 2043 { 2044 return readR8G8B8A8(bufferOffset, buffer, false); 2045 } 2046 static inline void writeR8G8B8A8_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2047 { 2048 return writeR8G8B8A8(gIndices, options, bufferOffset, color, buffer, false); 2049 } 2050 2051 static inline float4 readR8G8B8A8_UNORM_SRGB(uint bufferOffset, constant uchar *buffer) 2052 { 2053 return readR8G8B8A8(bufferOffset, buffer, true); 2054 } 2055 static inline void writeR8G8B8A8_UNORM_SRGB(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2056 { 2057 return writeR8G8B8A8(gIndices, options, bufferOffset, color, buffer, true); 2058 } 2059 2060 2061 static inline float4 readB8G8R8A8_UNORM(uint bufferOffset, constant uchar *buffer) 2062 { 2063 return readR8G8B8A8(bufferOffset, buffer, false).bgra; 2064 } 2065 static inline void writeB8G8R8A8_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2066 { 2067 color.rgba = color.bgra; 2068 return writeR8G8B8A8(gIndices, options, bufferOffset, color, buffer, false); 2069 } 2070 2071 static inline float4 readB8G8R8A8_UNORM_SRGB(uint bufferOffset, constant uchar *buffer) 2072 { 2073 return readR8G8B8A8(bufferOffset, buffer, true).bgra; 2074 } 2075 static inline void writeB8G8R8A8_UNORM_SRGB(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2076 { 2077 color.rgba = color.bgra; 2078 return writeR8G8B8A8(gIndices, options, bufferOffset, color, buffer, true); 2079 } 2080 2081 2082 static inline float4 readR8G8B8_UNORM(uint bufferOffset, constant uchar *buffer) 2083 { 2084 return readR8G8B8(bufferOffset, buffer, false); 2085 } 2086 static inline void writeR8G8B8_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2087 { 2088 return writeR8G8B8(gIndices, options, bufferOffset, color, buffer, false); 2089 } 2090 2091 static inline float4 readR8G8B8_UNORM_SRGB(uint bufferOffset, constant uchar *buffer) 2092 { 2093 return readR8G8B8(bufferOffset, buffer, true); 2094 } 2095 static inline void writeR8G8B8_UNORM_SRGB(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2096 { 2097 return writeR8G8B8(gIndices, options, bufferOffset, color, buffer, true); 2098 } 2099 2100 2101 static inline float4 readL8_UNORM(uint bufferOffset, constant uchar *buffer) 2102 { 2103 float4 color; 2104 color.rgb = float3(normalizedToFloat<uchar>(buffer[bufferOffset])); 2105 color.a = 1.0; 2106 return color; 2107 } 2108 static inline void writeL8_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2109 { 2110 buffer[bufferOffset] = floatToNormalized<uchar>(color.r); 2111 } 2112 2113 2114 static inline void writeA8_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2115 { 2116 buffer[bufferOffset] = floatToNormalized<uchar>(color.a); 2117 } 2118 2119 2120 static inline float4 readL8A8_UNORM(uint bufferOffset, constant uchar *buffer) 2121 { 2122 float4 color; 2123 color.rgb = float3(normalizedToFloat<uchar>(buffer[bufferOffset])); 2124 color.a = normalizedToFloat<uchar>(buffer[bufferOffset + 1]); 2125 return color; 2126 } 2127 static inline void writeL8A8_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2128 { 2129 buffer[bufferOffset] = floatToNormalized<uchar>(color.r); 2130 buffer[bufferOffset + 1] = floatToNormalized<uchar>(color.a); 2131 } 2132 2133 2134 static inline float4 readR8_UNORM(uint bufferOffset, constant uchar *buffer) 2135 { 2136 float4 color; 2137 color.r = normalizedToFloat<uchar>(buffer[bufferOffset]); 2138 color.g = color.b = 0.0; 2139 color.a = 1.0; 2140 return color; 2141 } 2142 static inline void writeR8_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2143 { 2144 buffer[bufferOffset] = floatToNormalized<uchar>(color.r); 2145 } 2146 2147 static inline float4 readR8_SNORM(uint bufferOffset, constant uchar *buffer) 2148 { 2149 float4 color; 2150 color.r = normalizedToFloat<7, char>(buffer[bufferOffset]); 2151 color.g = color.b = 0.0; 2152 color.a = 1.0; 2153 return color; 2154 } 2155 static inline void writeR8_SNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2156 { 2157 buffer[bufferOffset] = as_type<uchar>(floatToNormalized<7, char>(color.r)); 2158 } 2159 2160 2161 static inline int4 readR8_SINT(uint bufferOffset, constant uchar *buffer) 2162 { 2163 int4 color; 2164 color.r = as_type<char>(buffer[bufferOffset]); 2165 color.g = color.b = 0; 2166 color.a = 1; 2167 return color; 2168 } 2169 static inline void writeR8_SINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<int, 4> color, device uchar *buffer) 2170 { 2171 buffer[bufferOffset] = static_cast<uchar>(color.r); 2172 } 2173 2174 2175 static inline uint4 readR8_UINT(uint bufferOffset, constant uchar *buffer) 2176 { 2177 uint4 color; 2178 color.r = as_type<uchar>(buffer[bufferOffset]); 2179 color.g = color.b = 0; 2180 color.a = 1; 2181 return color; 2182 } 2183 static inline void writeR8_UINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<uint, 4> color, device uchar *buffer) 2184 { 2185 buffer[bufferOffset] = static_cast<uchar>(color.r); 2186 } 2187 2188 2189 static inline float4 readR8G8_UNORM(uint bufferOffset, constant uchar *buffer) 2190 { 2191 float4 color; 2192 color.r = normalizedToFloat<uchar>(buffer[bufferOffset]); 2193 color.g = normalizedToFloat<uchar>(buffer[bufferOffset + 1]); 2194 color.b = 0.0; 2195 color.a = 1.0; 2196 return color; 2197 } 2198 static inline void writeR8G8_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2199 { 2200 buffer[bufferOffset] = floatToNormalized<uchar>(color.r); 2201 buffer[bufferOffset + 1] = floatToNormalized<uchar>(color.g); 2202 } 2203 2204 static inline float4 readR8G8_SNORM(uint bufferOffset, constant uchar *buffer) 2205 { 2206 float4 color; 2207 color.r = normalizedToFloat<7, char>(buffer[bufferOffset]); 2208 color.g = normalizedToFloat<7, char>(buffer[bufferOffset + 1]); 2209 color.b = 0.0; 2210 color.a = 1.0; 2211 return color; 2212 } 2213 static inline void writeR8G8_SNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2214 { 2215 buffer[bufferOffset] = as_type<uchar>(floatToNormalized<7, char>(color.r)); 2216 buffer[bufferOffset + 1] = as_type<uchar>(floatToNormalized<7, char>(color.g)); 2217 } 2218 2219 2220 static inline int4 readR8G8_SINT(uint bufferOffset, constant uchar *buffer) 2221 { 2222 int4 color; 2223 color.r = as_type<char>(buffer[bufferOffset]); 2224 color.g = as_type<char>(buffer[bufferOffset + 1]); 2225 color.b = 0; 2226 color.a = 1; 2227 return color; 2228 } 2229 static inline void writeR8G8_SINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<int, 4> color, device uchar *buffer) 2230 { 2231 buffer[bufferOffset] = static_cast<uchar>(color.r); 2232 buffer[bufferOffset + 1] = static_cast<uchar>(color.g); 2233 } 2234 2235 2236 static inline uint4 readR8G8_UINT(uint bufferOffset, constant uchar *buffer) 2237 { 2238 uint4 color; 2239 color.r = as_type<uchar>(buffer[bufferOffset]); 2240 color.g = as_type<uchar>(buffer[bufferOffset + 1]); 2241 color.b = 0; 2242 color.a = 1; 2243 return color; 2244 } 2245 static inline void writeR8G8_UINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<uint, 4> color, device uchar *buffer) 2246 { 2247 buffer[bufferOffset] = static_cast<uchar>(color.r); 2248 buffer[bufferOffset + 1] = static_cast<uchar>(color.g); 2249 } 2250 2251 2252 static inline int4 readR8G8B8_SINT(uint bufferOffset, constant uchar *buffer) 2253 { 2254 int4 color; 2255 color.r = as_type<char>(buffer[bufferOffset]); 2256 color.g = as_type<char>(buffer[bufferOffset + 1]); 2257 color.b = as_type<char>(buffer[bufferOffset + 2]); 2258 color.a = 1; 2259 return color; 2260 } 2261 2262 2263 static inline uint4 readR8G8B8_UINT(uint bufferOffset, constant uchar *buffer) 2264 { 2265 uint4 color; 2266 color.r = as_type<uchar>(buffer[bufferOffset]); 2267 color.g = as_type<uchar>(buffer[bufferOffset + 1]); 2268 color.b = as_type<uchar>(buffer[bufferOffset + 2]); 2269 color.a = 1; 2270 return color; 2271 } 2272 2273 2274 static inline int4 readR8G8B8A8_SINT(uint bufferOffset, constant uchar *buffer) 2275 { 2276 int4 color; 2277 color.r = as_type<char>(buffer[bufferOffset]); 2278 color.g = as_type<char>(buffer[bufferOffset + 1]); 2279 color.b = as_type<char>(buffer[bufferOffset + 2]); 2280 color.a = as_type<char>(buffer[bufferOffset + 3]); 2281 return color; 2282 } 2283 static inline void writeR8G8B8A8_SINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<int, 4> color, device uchar *buffer) 2284 { 2285 buffer[bufferOffset] = static_cast<uchar>(color.r); 2286 buffer[bufferOffset + 1] = static_cast<uchar>(color.g); 2287 buffer[bufferOffset + 2] = static_cast<uchar>(color.b); 2288 buffer[bufferOffset + 3] = static_cast<uchar>(color.a); 2289 } 2290 2291 2292 static inline uint4 readR8G8B8A8_UINT(uint bufferOffset, constant uchar *buffer) 2293 { 2294 uint4 color; 2295 color.r = as_type<uchar>(buffer[bufferOffset]); 2296 color.g = as_type<uchar>(buffer[bufferOffset + 1]); 2297 color.b = as_type<uchar>(buffer[bufferOffset + 2]); 2298 color.a = as_type<uchar>(buffer[bufferOffset + 3]); 2299 return color; 2300 } 2301 static inline void writeR8G8B8A8_UINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<uint, 4> color, device uchar *buffer) 2302 { 2303 buffer[bufferOffset] = static_cast<uchar>(color.r); 2304 buffer[bufferOffset + 1] = static_cast<uchar>(color.g); 2305 buffer[bufferOffset + 2] = static_cast<uchar>(color.b); 2306 buffer[bufferOffset + 3] = static_cast<uchar>(color.a); 2307 } 2308 2309 2310 static inline float4 readR16_FLOAT(uint bufferOffset, constant uchar *buffer) 2311 { 2312 float4 color; 2313 color.r = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset)); 2314 color.g = color.b = 0.0; 2315 color.a = 1.0; 2316 return color; 2317 } 2318 static inline void writeR16_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2319 { 2320 shortToBytes(as_type<ushort>(static_cast<half>(color.r)), bufferOffset, buffer); 2321 } 2322 2323 template <typename ShortType> 2324 static inline float4 readR16_NORM(uint bufferOffset, constant uchar *buffer) 2325 { 2326 float4 color; 2327 color.r = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset)); 2328 color.g = color.b = 0.0; 2329 color.a = 1.0; 2330 return color; 2331 } 2332 2333 2334 2335 2336 static inline int4 readR16_SINT(uint bufferOffset, constant uchar *buffer) 2337 { 2338 int4 color; 2339 color.r = bytesToShort<short>(buffer, bufferOffset); 2340 color.g = color.b = 0; 2341 color.a = 1; 2342 return color; 2343 } 2344 static inline void writeR16_SINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<int, 4> color, device uchar *buffer) 2345 { 2346 shortToBytes(static_cast<short>(color.r), bufferOffset, buffer); 2347 } 2348 2349 2350 static inline uint4 readR16_UINT(uint bufferOffset, constant uchar *buffer) 2351 { 2352 uint4 color; 2353 color.r = bytesToShort<ushort>(buffer, bufferOffset); 2354 color.g = color.b = 0; 2355 color.a = 1; 2356 return color; 2357 } 2358 static inline void writeR16_UINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<uint, 4> color, device uchar *buffer) 2359 { 2360 shortToBytes(static_cast<ushort>(color.r), bufferOffset, buffer); 2361 } 2362 2363 2364 static inline float4 readA16_FLOAT(uint bufferOffset, constant uchar *buffer) 2365 { 2366 float4 color; 2367 color.a = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset)); 2368 color.rgb = 0.0; 2369 return color; 2370 } 2371 static inline void writeA16_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2372 { 2373 shortToBytes(as_type<ushort>(static_cast<half>(color.a)), bufferOffset, buffer); 2374 } 2375 2376 2377 static inline float4 readL16_FLOAT(uint bufferOffset, constant uchar *buffer) 2378 { 2379 float4 color; 2380 color.rgb = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset)); 2381 color.a = 1.0; 2382 return color; 2383 } 2384 static inline void writeL16_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2385 { 2386 shortToBytes(as_type<ushort>(static_cast<half>(color.r)), bufferOffset, buffer); 2387 } 2388 2389 2390 static inline float4 readL16A16_FLOAT(uint bufferOffset, constant uchar *buffer) 2391 { 2392 float4 color; 2393 color.rgb = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset)); 2394 color.a = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset + 2)); 2395 return color; 2396 } 2397 static inline void writeL16A16_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2398 { 2399 shortToBytes(as_type<ushort>(static_cast<half>(color.r)), bufferOffset, buffer); 2400 shortToBytes(as_type<ushort>(static_cast<half>(color.a)), bufferOffset + 2, buffer); 2401 } 2402 2403 2404 static inline float4 readR16G16_FLOAT(uint bufferOffset, constant uchar *buffer) 2405 { 2406 float4 color; 2407 color.r = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset)); 2408 color.g = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset + 2)); 2409 color.b = 0.0; 2410 color.a = 1.0; 2411 return color; 2412 } 2413 static inline void writeR16G16_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2414 { 2415 shortToBytes(as_type<ushort>(static_cast<half>(color.r)), bufferOffset, buffer); 2416 shortToBytes(as_type<ushort>(static_cast<half>(color.g)), bufferOffset + 2, buffer); 2417 } 2418 2419 2420 template <typename ShortType> 2421 static inline float4 readR16G16_NORM(uint bufferOffset, constant uchar *buffer) 2422 { 2423 float4 color; 2424 color.r = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset)); 2425 color.g = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset + 2)); 2426 color.b = 0.0; 2427 color.a = 1.0; 2428 return color; 2429 } 2430 2431 2432 2433 2434 static inline int4 readR16G16_SINT(uint bufferOffset, constant uchar *buffer) 2435 { 2436 int4 color; 2437 color.r = bytesToShort<short>(buffer, bufferOffset); 2438 color.g = bytesToShort<short>(buffer, bufferOffset + 2); 2439 color.b = 0; 2440 color.a = 1; 2441 return color; 2442 } 2443 static inline void writeR16G16_SINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<int, 4> color, device uchar *buffer) 2444 { 2445 shortToBytes(static_cast<short>(color.r), bufferOffset, buffer); 2446 shortToBytes(static_cast<short>(color.g), bufferOffset + 2, buffer); 2447 } 2448 2449 2450 static inline uint4 readR16G16_UINT(uint bufferOffset, constant uchar *buffer) 2451 { 2452 uint4 color; 2453 color.r = bytesToShort<ushort>(buffer, bufferOffset); 2454 color.g = bytesToShort<ushort>(buffer, bufferOffset + 2); 2455 color.b = 0; 2456 color.a = 1; 2457 return color; 2458 } 2459 static inline void writeR16G16_UINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<uint, 4> color, device uchar *buffer) 2460 { 2461 shortToBytes(static_cast<ushort>(color.r), bufferOffset, buffer); 2462 shortToBytes(static_cast<ushort>(color.g), bufferOffset + 2, buffer); 2463 } 2464 2465 2466 static inline float4 readR16G16B16_FLOAT(uint bufferOffset, constant uchar *buffer) 2467 { 2468 float4 color; 2469 color.r = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset)); 2470 color.g = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset + 2)); 2471 color.b = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset + 4)); 2472 color.a = 1.0; 2473 return color; 2474 } 2475 2476 2477 template <typename ShortType> 2478 static inline float4 readR16G16B16_NORM(uint bufferOffset, constant uchar *buffer) 2479 { 2480 float4 color; 2481 color.r = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset)); 2482 color.g = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset + 2)); 2483 color.b = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset + 4)); 2484 color.a = 1.0; 2485 return color; 2486 } 2487 2488 2489 2490 static inline int4 readR16G16B16_SINT(uint bufferOffset, constant uchar *buffer) 2491 { 2492 int4 color; 2493 color.r = bytesToShort<short>(buffer, bufferOffset); 2494 color.g = bytesToShort<short>(buffer, bufferOffset + 2); 2495 color.b = bytesToShort<short>(buffer, bufferOffset + 4); 2496 color.a = 1; 2497 return color; 2498 } 2499 2500 2501 static inline uint4 readR16G16B16_UINT(uint bufferOffset, constant uchar *buffer) 2502 { 2503 uint4 color; 2504 color.r = bytesToShort<ushort>(buffer, bufferOffset); 2505 color.g = bytesToShort<ushort>(buffer, bufferOffset + 2); 2506 color.b = bytesToShort<ushort>(buffer, bufferOffset + 4); 2507 color.a = 1; 2508 return color; 2509 } 2510 2511 2512 static inline float4 readR16G16B16A16_FLOAT(uint bufferOffset, constant uchar *buffer) 2513 { 2514 float4 color; 2515 color.r = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset)); 2516 color.g = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset + 2)); 2517 color.b = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset + 4)); 2518 color.a = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset + 6)); 2519 return color; 2520 } 2521 static inline void writeR16G16B16A16_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2522 { 2523 shortToBytes(as_type<ushort>(static_cast<half>(color.r)), bufferOffset, buffer); 2524 shortToBytes(as_type<ushort>(static_cast<half>(color.g)), bufferOffset + 2, buffer); 2525 shortToBytes(as_type<ushort>(static_cast<half>(color.b)), bufferOffset + 4, buffer); 2526 shortToBytes(as_type<ushort>(static_cast<half>(color.a)), bufferOffset + 6, buffer); 2527 } 2528 2529 2530 template <typename ShortType> 2531 static inline float4 readR16G16B16A16_NORM(uint bufferOffset, constant uchar *buffer) 2532 { 2533 float4 color; 2534 color.r = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset)); 2535 color.g = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset + 2)); 2536 color.b = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset + 4)); 2537 color.a = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset + 6)); 2538 return color; 2539 } 2540 2541 2542 2543 2544 static inline int4 readR16G16B16A16_SINT(uint bufferOffset, constant uchar *buffer) 2545 { 2546 int4 color; 2547 color.r = bytesToShort<short>(buffer, bufferOffset); 2548 color.g = bytesToShort<short>(buffer, bufferOffset + 2); 2549 color.b = bytesToShort<short>(buffer, bufferOffset + 4); 2550 color.a = bytesToShort<short>(buffer, bufferOffset + 6); 2551 return color; 2552 } 2553 static inline void writeR16G16B16A16_SINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<int, 4> color, device uchar *buffer) 2554 { 2555 shortToBytes(static_cast<short>(color.r), bufferOffset, buffer); 2556 shortToBytes(static_cast<short>(color.g), bufferOffset + 2, buffer); 2557 shortToBytes(static_cast<short>(color.b), bufferOffset + 4, buffer); 2558 shortToBytes(static_cast<short>(color.a), bufferOffset + 6, buffer); 2559 } 2560 2561 2562 static inline uint4 readR16G16B16A16_UINT(uint bufferOffset, constant uchar *buffer) 2563 { 2564 uint4 color; 2565 color.r = bytesToShort<ushort>(buffer, bufferOffset); 2566 color.g = bytesToShort<ushort>(buffer, bufferOffset + 2); 2567 color.b = bytesToShort<ushort>(buffer, bufferOffset + 4); 2568 color.a = bytesToShort<ushort>(buffer, bufferOffset + 6); 2569 return color; 2570 } 2571 static inline void writeR16G16B16A16_UINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<uint, 4> color, device uchar *buffer) 2572 { 2573 shortToBytes(static_cast<ushort>(color.r), bufferOffset, buffer); 2574 shortToBytes(static_cast<ushort>(color.g), bufferOffset + 2, buffer); 2575 shortToBytes(static_cast<ushort>(color.b), bufferOffset + 4, buffer); 2576 shortToBytes(static_cast<ushort>(color.a), bufferOffset + 6, buffer); 2577 } 2578 2579 2580 static inline float4 readR32_FLOAT(uint bufferOffset, constant uchar *buffer) 2581 { 2582 float4 color; 2583 color.r = as_type<float>(bytesToInt<uint>(buffer, bufferOffset)); 2584 color.g = color.b = 0.0; 2585 color.a = 1.0; 2586 return color; 2587 } 2588 static inline void writeR32_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2589 { 2590 intToBytes(as_type<uint>(color.r), bufferOffset, buffer); 2591 } 2592 2593 2594 template <typename IntType> 2595 static inline float4 readR32_NORM(uint bufferOffset, constant uchar *buffer) 2596 { 2597 float4 color; 2598 color.r = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset)); 2599 color.g = color.b = 0.0; 2600 color.a = 1.0; 2601 return color; 2602 } 2603 2604 2605 2606 2607 static inline float4 readA32_FLOAT(uint bufferOffset, constant uchar *buffer) 2608 { 2609 float4 color; 2610 color.a = as_type<float>(bytesToInt<uint>(buffer, bufferOffset)); 2611 color.rgb = 0.0; 2612 return color; 2613 } 2614 static inline void writeA32_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2615 { 2616 intToBytes(as_type<uint>(color.a), bufferOffset, buffer); 2617 } 2618 2619 2620 static inline float4 readL32_FLOAT(uint bufferOffset, constant uchar *buffer) 2621 { 2622 float4 color; 2623 color.rgb = as_type<float>(bytesToInt<uint>(buffer, bufferOffset)); 2624 color.a = 1.0; 2625 return color; 2626 } 2627 static inline void writeL32_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2628 { 2629 intToBytes(as_type<uint>(color.r), bufferOffset, buffer); 2630 } 2631 2632 2633 static inline int4 readR32_SINT(uint bufferOffset, constant uchar *buffer) 2634 { 2635 int4 color; 2636 color.r = bytesToInt<int>(buffer, bufferOffset); 2637 color.g = color.b = 0; 2638 color.a = 1; 2639 return color; 2640 } 2641 static inline void writeR32_SINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<int, 4> color, device uchar *buffer) 2642 { 2643 intToBytes(color.r, bufferOffset, buffer); 2644 } 2645 2646 2647 static inline float4 readR32_FIXED(uint bufferOffset, constant uchar *buffer) 2648 { 2649 float4 color; 2650 constexpr float kDivisor = 1.0f / (1 << 16); 2651 color.r = bytesToInt<int>(buffer, bufferOffset) * kDivisor; 2652 color.g = color.b = 0.0; 2653 color.a = 1.0; 2654 return color; 2655 } 2656 2657 2658 static inline uint4 readR32_UINT(uint bufferOffset, constant uchar *buffer) 2659 { 2660 uint4 color; 2661 color.r = bytesToInt<uint>(buffer, bufferOffset); 2662 color.g = color.b = 0; 2663 color.a = 1; 2664 return color; 2665 } 2666 static inline void writeR32_UINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<uint, 4> color, device uchar *buffer) 2667 { 2668 intToBytes(color.r, bufferOffset, buffer); 2669 } 2670 2671 2672 static inline float4 readL32A32_FLOAT(uint bufferOffset, constant uchar *buffer) 2673 { 2674 float4 color; 2675 color.rgb = as_type<float>(bytesToInt<uint>(buffer, bufferOffset)); 2676 color.a = as_type<float>(bytesToInt<uint>(buffer, bufferOffset + 4)); 2677 return color; 2678 } 2679 static inline void writeL32A32_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2680 { 2681 intToBytes(as_type<uint>(color.r), bufferOffset, buffer); 2682 intToBytes(as_type<uint>(color.a), bufferOffset + 4, buffer); 2683 } 2684 2685 2686 static inline float4 readR32G32_FLOAT(uint bufferOffset, constant uchar *buffer) 2687 { 2688 float4 color; 2689 color.r = as_type<float>(bytesToInt<uint>(buffer, bufferOffset)); 2690 color.g = as_type<float>(bytesToInt<uint>(buffer, bufferOffset + 4)); 2691 color.b = 0.0; 2692 color.a = 1.0; 2693 return color; 2694 } 2695 static inline void writeR32G32_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2696 { 2697 intToBytes(as_type<uint>(color.r), bufferOffset, buffer); 2698 intToBytes(as_type<uint>(color.g), bufferOffset + 4, buffer); 2699 } 2700 2701 2702 template <typename IntType> 2703 static inline float4 readR32G32_NORM(uint bufferOffset, constant uchar *buffer) 2704 { 2705 float4 color; 2706 color.r = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset)); 2707 color.g = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset + 4)); 2708 color.b = 0.0; 2709 color.a = 1.0; 2710 return color; 2711 } 2712 2713 2714 2715 2716 static inline int4 readR32G32_SINT(uint bufferOffset, constant uchar *buffer) 2717 { 2718 int4 color; 2719 color.r = bytesToInt<int>(buffer, bufferOffset); 2720 color.g = bytesToInt<int>(buffer, bufferOffset + 4); 2721 color.b = 0; 2722 color.a = 1; 2723 return color; 2724 } 2725 static inline void writeR32G32_SINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<int, 4> color, device uchar *buffer) 2726 { 2727 intToBytes(color.r, bufferOffset, buffer); 2728 intToBytes(color.g, bufferOffset + 4, buffer); 2729 } 2730 2731 2732 static inline float4 readR32G32_FIXED(uint bufferOffset, constant uchar *buffer) 2733 { 2734 float4 color; 2735 constexpr float kDivisor = 1.0f / (1 << 16); 2736 color.r = bytesToInt<int>(buffer, bufferOffset) * kDivisor; 2737 color.g = bytesToInt<int>(buffer, bufferOffset + 4) * kDivisor; 2738 color.b = 0.0; 2739 color.a = 1.0; 2740 return color; 2741 } 2742 2743 2744 static inline uint4 readR32G32_UINT(uint bufferOffset, constant uchar *buffer) 2745 { 2746 uint4 color; 2747 color.r = bytesToInt<uint>(buffer, bufferOffset); 2748 color.g = bytesToInt<uint>(buffer, bufferOffset + 4); 2749 color.b = 0; 2750 color.a = 1; 2751 return color; 2752 } 2753 static inline void writeR32G32_UINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<uint, 4> color, device uchar *buffer) 2754 { 2755 intToBytes(color.r, bufferOffset, buffer); 2756 intToBytes(color.g, bufferOffset + 4, buffer); 2757 } 2758 2759 2760 static inline float4 readR32G32B32_FLOAT(uint bufferOffset, constant uchar *buffer) 2761 { 2762 float4 color; 2763 color.r = as_type<float>(bytesToInt<uint>(buffer, bufferOffset)); 2764 color.g = as_type<float>(bytesToInt<uint>(buffer, bufferOffset + 4)); 2765 color.b = as_type<float>(bytesToInt<uint>(buffer, bufferOffset + 8)); 2766 color.a = 1.0; 2767 return color; 2768 } 2769 2770 2771 template <typename IntType> 2772 static inline float4 readR32G32B32_NORM(uint bufferOffset, constant uchar *buffer) 2773 { 2774 float4 color; 2775 color.r = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset)); 2776 color.g = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset + 4)); 2777 color.b = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset + 8)); 2778 color.a = 1.0; 2779 return color; 2780 } 2781 2782 2783 2784 2785 static inline int4 readR32G32B32_SINT(uint bufferOffset, constant uchar *buffer) 2786 { 2787 int4 color; 2788 color.r = bytesToInt<int>(buffer, bufferOffset); 2789 color.g = bytesToInt<int>(buffer, bufferOffset + 4); 2790 color.b = bytesToInt<int>(buffer, bufferOffset + 8); 2791 color.a = 1; 2792 return color; 2793 } 2794 2795 2796 static inline float4 readR32G32B32_FIXED(uint bufferOffset, constant uchar *buffer) 2797 { 2798 float4 color; 2799 constexpr float kDivisor = 1.0f / (1 << 16); 2800 color.r = bytesToInt<int>(buffer, bufferOffset) * kDivisor; 2801 color.g = bytesToInt<int>(buffer, bufferOffset + 4) * kDivisor; 2802 color.b = bytesToInt<int>(buffer, bufferOffset + 8) * kDivisor; 2803 color.a = 1.0; 2804 return color; 2805 } 2806 2807 2808 static inline uint4 readR32G32B32_UINT(uint bufferOffset, constant uchar *buffer) 2809 { 2810 uint4 color; 2811 color.r = bytesToInt<uint>(buffer, bufferOffset); 2812 color.g = bytesToInt<uint>(buffer, bufferOffset + 4); 2813 color.b = bytesToInt<uint>(buffer, bufferOffset + 8); 2814 color.a = 1; 2815 return color; 2816 } 2817 2818 2819 static inline float4 readR32G32B32A32_FLOAT(uint bufferOffset, constant uchar *buffer) 2820 { 2821 float4 color; 2822 color.r = as_type<float>(bytesToInt<uint>(buffer, bufferOffset)); 2823 color.g = as_type<float>(bytesToInt<uint>(buffer, bufferOffset + 4)); 2824 color.b = as_type<float>(bytesToInt<uint>(buffer, bufferOffset + 8)); 2825 color.a = as_type<float>(bytesToInt<uint>(buffer, bufferOffset + 12)); 2826 return color; 2827 } 2828 static inline void writeR32G32B32A32_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2829 { 2830 intToBytes(as_type<uint>(color.r), bufferOffset, buffer); 2831 intToBytes(as_type<uint>(color.g), bufferOffset + 4, buffer); 2832 intToBytes(as_type<uint>(color.b), bufferOffset + 8, buffer); 2833 intToBytes(as_type<uint>(color.a), bufferOffset + 12, buffer); 2834 } 2835 2836 2837 template <typename IntType> 2838 static inline float4 readR32G32B32A32_NORM(uint bufferOffset, constant uchar *buffer) 2839 { 2840 float4 color; 2841 color.r = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset)); 2842 color.g = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset + 4)); 2843 color.b = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset + 8)); 2844 color.a = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset + 12)); 2845 return color; 2846 } 2847 2848 2849 2850 2851 static inline int4 readR32G32B32A32_SINT(uint bufferOffset, constant uchar *buffer) 2852 { 2853 int4 color; 2854 color.r = bytesToInt<int>(buffer, bufferOffset); 2855 color.g = bytesToInt<int>(buffer, bufferOffset + 4); 2856 color.b = bytesToInt<int>(buffer, bufferOffset + 8); 2857 color.a = bytesToInt<int>(buffer, bufferOffset + 12); 2858 return color; 2859 } 2860 static inline void writeR32G32B32A32_SINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<int, 4> color, device uchar *buffer) 2861 { 2862 intToBytes(color.r, bufferOffset, buffer); 2863 intToBytes(color.g, bufferOffset + 4, buffer); 2864 intToBytes(color.b, bufferOffset + 8, buffer); 2865 intToBytes(color.a, bufferOffset + 12, buffer); 2866 } 2867 2868 static inline float4 readR32G32B32A32_FIXED(uint bufferOffset, constant uchar *buffer) 2869 { 2870 float4 color; 2871 constexpr float kDivisor = 1.0f / (1 << 16); 2872 color.r = bytesToInt<int>(buffer, bufferOffset) * kDivisor; 2873 color.g = bytesToInt<int>(buffer, bufferOffset + 4) * kDivisor; 2874 color.b = bytesToInt<int>(buffer, bufferOffset + 8) * kDivisor; 2875 color.a = bytesToInt<int>(buffer, bufferOffset + 12) * kDivisor; 2876 return color; 2877 } 2878 2879 2880 static inline uint4 readR32G32B32A32_UINT(uint bufferOffset, constant uchar *buffer) 2881 { 2882 uint4 color; 2883 color.r = bytesToInt<uint>(buffer, bufferOffset); 2884 color.g = bytesToInt<uint>(buffer, bufferOffset + 4); 2885 color.b = bytesToInt<uint>(buffer, bufferOffset + 8); 2886 color.a = bytesToInt<uint>(buffer, bufferOffset + 12); 2887 return color; 2888 } 2889 static inline void writeR32G32B32A32_UINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<uint, 4> color, device uchar *buffer) 2890 { 2891 intToBytes(color.r, bufferOffset, buffer); 2892 intToBytes(color.g, bufferOffset + 4, buffer); 2893 intToBytes(color.b, bufferOffset + 8, buffer); 2894 intToBytes(color.a, bufferOffset + 12, buffer); 2895 } 2896 # 1292 "./copy_buffer.metal" 2897 static inline int4 readR8_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR8_SINT(bufferOffset, buffer); } static inline uint4 readR8_USCALED(uint bufferOffset, constant uchar *buffer) { return readR8_UINT(bufferOffset, buffer); } static inline int4 readR8G8_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR8G8_SINT(bufferOffset, buffer); } static inline uint4 readR8G8_USCALED(uint bufferOffset, constant uchar *buffer) { return readR8G8_UINT(bufferOffset, buffer); } static inline int4 readR8G8B8_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR8G8B8_SINT(bufferOffset, buffer); } static inline uint4 readR8G8B8_USCALED(uint bufferOffset, constant uchar *buffer) { return readR8G8B8_UINT(bufferOffset, buffer); } static inline int4 readR8G8B8A8_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR8G8B8A8_SINT(bufferOffset, buffer); } static inline uint4 readR8G8B8A8_USCALED(uint bufferOffset, constant uchar *buffer) { return readR8G8B8A8_UINT(bufferOffset, buffer); } 2898 static inline int4 readR16_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR16_SINT(bufferOffset, buffer); } static inline uint4 readR16_USCALED(uint bufferOffset, constant uchar *buffer) { return readR16_UINT(bufferOffset, buffer); } static inline int4 readR16G16_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR16G16_SINT(bufferOffset, buffer); } static inline uint4 readR16G16_USCALED(uint bufferOffset, constant uchar *buffer) { return readR16G16_UINT(bufferOffset, buffer); } static inline int4 readR16G16B16_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR16G16B16_SINT(bufferOffset, buffer); } static inline uint4 readR16G16B16_USCALED(uint bufferOffset, constant uchar *buffer) { return readR16G16B16_UINT(bufferOffset, buffer); } static inline int4 readR16G16B16A16_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR16G16B16A16_SINT(bufferOffset, buffer); } static inline uint4 readR16G16B16A16_USCALED(uint bufferOffset, constant uchar *buffer) { return readR16G16B16A16_UINT(bufferOffset, buffer); } 2899 static inline int4 readR32_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR32_SINT(bufferOffset, buffer); } static inline uint4 readR32_USCALED(uint bufferOffset, constant uchar *buffer) { return readR32_UINT(bufferOffset, buffer); } static inline int4 readR32G32_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR32G32_SINT(bufferOffset, buffer); } static inline uint4 readR32G32_USCALED(uint bufferOffset, constant uchar *buffer) { return readR32G32_UINT(bufferOffset, buffer); } static inline int4 readR32G32B32_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR32G32B32_SINT(bufferOffset, buffer); } static inline uint4 readR32G32B32_USCALED(uint bufferOffset, constant uchar *buffer) { return readR32G32B32_UINT(bufferOffset, buffer); } static inline int4 readR32G32B32A32_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR32G32B32A32_SINT(bufferOffset, buffer); } static inline uint4 readR32G32B32A32_USCALED(uint bufferOffset, constant uchar *buffer) { return readR32G32B32A32_UINT(bufferOffset, buffer); } 2900 2901 static inline int4 readR10G10B10A2_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR10G10B10A2_SINT(bufferOffset, buffer); } static inline uint4 readR10G10B10A2_USCALED(uint bufferOffset, constant uchar *buffer) { return readR10G10B10A2_UINT(bufferOffset, buffer); } 2902 2903 2904 kernel void readFromBufferToFloatTexture(ushort3 gIndices [[thread_position_in_grid]], constant CopyPixelParams &options[[buffer(0)]], constant uchar *buffer [[buffer(1)]], texture2d<float, access::write> dstTexture2d [[texture(0), function_constant(kCopyTextureType2D)]], texture2d_array<float, access::write> dstTexture2dArray [[texture(0), function_constant(kCopyTextureType2DArray)]], texture3d<float, access::write> dstTexture3d [[texture(0), function_constant(kCopyTextureType3D)]], texturecube<float, access::write> dstTextureCube [[texture(0), function_constant(kCopyTextureTypeCube)]]) 2905 { 2906 if (gIndices.x >= options.copySize.x || gIndices.y >= options.copySize.y || gIndices.z >= options.copySize.z) { return; } 2907 # 1336 "./copy_buffer.metal" 2908 uint bufferOffset = options.bufferStartOffset + (gIndices.z * options.bufferDepthPitch + gIndices.y * options.bufferRowPitch + gIndices.x * options.pixelSize); 2909 2910 switch (kCopyFormatType) 2911 { 2912 case FormatID::R5G6B5_UNORM: { auto color = readR5G6B5_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8A8_UNORM: { auto color = readR8G8B8A8_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8A8_UNORM_SRGB: { auto color = readR8G8B8A8_UNORM_SRGB(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8A8_SNORM: { auto color = readR8G8B8A8_SNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::B8G8R8A8_UNORM: { auto color = readB8G8R8A8_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::B8G8R8A8_UNORM_SRGB: { auto color = readB8G8R8A8_UNORM_SRGB(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8_UNORM: { auto color = readR8G8B8_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8_UNORM_SRGB: { auto color = readR8G8B8_UNORM_SRGB(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8_SNORM: { auto color = readR8G8B8_SNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::L8_UNORM: { auto color = readL8_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::L8A8_UNORM: { auto color = readL8A8_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R5G5B5A1_UNORM: { auto color = readR5G5B5A1_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R4G4B4A4_UNORM: { auto color = readR4G4B4A4_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8_UNORM: { auto color = readR8_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8_SNORM: { auto color = readR8_SNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8_UNORM: { auto color = readR8G8_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8_SNORM: { auto color = readR8G8_SNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16_FLOAT: { auto color = readR16_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::A16_FLOAT: { auto color = readA16_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::L16_FLOAT: { auto color = readL16_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::L16A16_FLOAT: { auto color = readL16A16_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16_FLOAT: { auto color = readR16G16_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16B16_FLOAT: { auto color = readR16G16B16_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16B16A16_FLOAT: { auto color = readR16G16B16A16_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32_FLOAT: { auto color = readR32_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::A32_FLOAT: { auto color = readA32_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::L32_FLOAT: { auto color = readL32_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::L32A32_FLOAT: { auto color = readL32A32_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32G32_FLOAT: { auto color = readR32G32_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32G32B32_FLOAT: { auto color = readR32G32B32_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32G32B32A32_FLOAT: { auto color = readR32G32B32A32_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; 2913 } 2914 2915 2916 } 2917 2918 kernel void readFromBufferToIntTexture(ushort3 gIndices [[thread_position_in_grid]], constant CopyPixelParams &options[[buffer(0)]], constant uchar *buffer [[buffer(1)]], texture2d<int, access::write> dstTexture2d [[texture(0), function_constant(kCopyTextureType2D)]], texture2d_array<int, access::write> dstTexture2dArray [[texture(0), function_constant(kCopyTextureType2DArray)]], texture3d<int, access::write> dstTexture3d [[texture(0), function_constant(kCopyTextureType3D)]], texturecube<int, access::write> dstTextureCube [[texture(0), function_constant(kCopyTextureTypeCube)]]) 2919 { 2920 if (gIndices.x >= options.copySize.x || gIndices.y >= options.copySize.y || gIndices.z >= options.copySize.z) { return; } 2921 # 1364 "./copy_buffer.metal" 2922 uint bufferOffset = options.bufferStartOffset + (gIndices.z * options.bufferDepthPitch + gIndices.y * options.bufferRowPitch + gIndices.x * options.pixelSize); 2923 2924 switch (kCopyFormatType) 2925 { 2926 case FormatID::R8_SINT: { auto color = readR8_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8_SINT: { auto color = readR8G8_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8_SINT: { auto color = readR8G8B8_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8A8_SINT: { auto color = readR8G8B8A8_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16_SINT: { auto color = readR16_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16_SINT: { auto color = readR16G16_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16B16_SINT: { auto color = readR16G16B16_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16B16A16_SINT: { auto color = readR16G16B16A16_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32_SINT: { auto color = readR32_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32G32_SINT: { auto color = readR32G32_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32G32B32_SINT: { auto color = readR32G32B32_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32G32B32A32_SINT: { auto color = readR32G32B32A32_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; 2927 } 2928 2929 2930 } 2931 2932 kernel void readFromBufferToUIntTexture(ushort3 gIndices [[thread_position_in_grid]], constant CopyPixelParams &options[[buffer(0)]], constant uchar *buffer [[buffer(1)]], texture2d<uint, access::write> dstTexture2d [[texture(0), function_constant(kCopyTextureType2D)]], texture2d_array<uint, access::write> dstTexture2dArray [[texture(0), function_constant(kCopyTextureType2DArray)]], texture3d<uint, access::write> dstTexture3d [[texture(0), function_constant(kCopyTextureType3D)]], texturecube<uint, access::write> dstTextureCube [[texture(0), function_constant(kCopyTextureTypeCube)]]) 2933 { 2934 if (gIndices.x >= options.copySize.x || gIndices.y >= options.copySize.y || gIndices.z >= options.copySize.z) { return; } 2935 # 1392 "./copy_buffer.metal" 2936 uint bufferOffset = options.bufferStartOffset + (gIndices.z * options.bufferDepthPitch + gIndices.y * options.bufferRowPitch + gIndices.x * options.pixelSize); 2937 2938 switch (kCopyFormatType) 2939 { 2940 case FormatID::R8_UINT: { auto color = readR8_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8_UINT: { auto color = readR8G8_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8_UINT: { auto color = readR8G8B8_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8A8_UINT: { auto color = readR8G8B8A8_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16_UINT: { auto color = readR16_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16_UINT: { auto color = readR16G16_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16B16_UINT: { auto color = readR16G16B16_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16B16A16_UINT: { auto color = readR16G16B16A16_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32_UINT: { auto color = readR32_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32G32_UINT: { auto color = readR32G32_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32G32B32_UINT: { auto color = readR32G32B32_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32G32B32A32_UINT: { auto color = readR32G32B32A32_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; 2941 } 2942 2943 2944 } 2945 2946 2947 kernel void writeFromFloatTextureToBuffer(ushort2 gIndices [[thread_position_in_grid]], constant WritePixelParams &options[[buffer(0)]], texture2d<float, access::read> srcTexture2d [[texture(0), function_constant(kCopyTextureType2D)]], texture2d_array<float, access::read> srcTexture2dArray [[texture(0), function_constant(kCopyTextureType2DArray)]], texture3d<float, access::read> srcTexture3d [[texture(0), function_constant(kCopyTextureType3D)]], texturecube<float, access::read> srcTextureCube [[texture(0), function_constant(kCopyTextureTypeCube)]], texture2d_ms<float, access::read> srcTexture2dMS [[texture(0), function_constant(kCopyTextureType2DMS)]], device uchar *buffer [[buffer(1)]]) 2948 { 2949 if (gIndices.x >= options.copySize.x || gIndices.y >= options.copySize.y) { return; } 2950 # 1439 "./copy_buffer.metal" 2951 uint bufferOffset = options.bufferStartOffset + (gIndices.y * options.bufferRowPitch + gIndices.x * options.pixelSize); 2952 2953 switch (kCopyFormatType) 2954 { 2955 case FormatID::R5G6B5_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR5G6B5_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8B8A8_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8B8A8_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8B8A8_UNORM_SRGB: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8B8A8_UNORM_SRGB(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8B8A8_SNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8B8A8_SNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::B8G8R8A8_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeB8G8R8A8_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::B8G8R8A8_UNORM_SRGB: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeB8G8R8A8_UNORM_SRGB(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8B8_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8B8_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8B8_UNORM_SRGB: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8B8_UNORM_SRGB(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8B8_SNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8B8_SNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::L8_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeL8_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::A8_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeA8_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::L8A8_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeL8A8_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R5G5B5A1_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR5G5B5A1_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R4G4B4A4_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR4G4B4A4_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8_SNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8_SNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8_SNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8_SNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::A16_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeA16_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::L16_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeL16_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::L16A16_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeL16A16_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16G16_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16G16_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16G16B16A16_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16G16B16A16_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R32_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR32_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::A32_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeA32_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::L32_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeL32_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::L32A32_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeL32A32_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R32G32_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR32G32_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R32G32B32A32_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR32G32B32A32_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; 2956 } 2957 2958 2959 } 2960 2961 kernel void writeFromIntTextureToBuffer(ushort2 gIndices [[thread_position_in_grid]], constant WritePixelParams &options[[buffer(0)]], texture2d<int, access::read> srcTexture2d [[texture(0), function_constant(kCopyTextureType2D)]], texture2d_array<int, access::read> srcTexture2dArray [[texture(0), function_constant(kCopyTextureType2DArray)]], texture3d<int, access::read> srcTexture3d [[texture(0), function_constant(kCopyTextureType3D)]], texturecube<int, access::read> srcTextureCube [[texture(0), function_constant(kCopyTextureTypeCube)]], texture2d_ms<int, access::read> srcTexture2dMS [[texture(0), function_constant(kCopyTextureType2DMS)]], device uchar *buffer [[buffer(1)]]) 2962 { 2963 if (gIndices.x >= options.copySize.x || gIndices.y >= options.copySize.y) { return; } 2964 # 1464 "./copy_buffer.metal" 2965 uint bufferOffset = options.bufferStartOffset + (gIndices.y * options.bufferRowPitch + gIndices.x * options.pixelSize); 2966 2967 switch (kCopyFormatType) 2968 { 2969 case FormatID::R8_SINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8_SINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8_SINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8_SINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8B8A8_SINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8B8A8_SINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16_SINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16_SINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16G16_SINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16G16_SINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16G16B16A16_SINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16G16B16A16_SINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R32_SINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR32_SINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R32G32_SINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR32G32_SINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R32G32B32A32_SINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR32G32B32A32_SINT(gIndices, options, bufferOffset, color, buffer); } break; 2970 } 2971 2972 2973 } 2974 2975 kernel void writeFromUIntTextureToBuffer(ushort2 gIndices [[thread_position_in_grid]], constant WritePixelParams &options[[buffer(0)]], texture2d<uint, access::read> srcTexture2d [[texture(0), function_constant(kCopyTextureType2D)]], texture2d_array<uint, access::read> srcTexture2dArray [[texture(0), function_constant(kCopyTextureType2DArray)]], texture3d<uint, access::read> srcTexture3d [[texture(0), function_constant(kCopyTextureType3D)]], texturecube<uint, access::read> srcTextureCube [[texture(0), function_constant(kCopyTextureTypeCube)]], texture2d_ms<uint, access::read> srcTexture2dMS [[texture(0), function_constant(kCopyTextureType2DMS)]], device uchar *buffer [[buffer(1)]]) 2976 { 2977 if (gIndices.x >= options.copySize.x || gIndices.y >= options.copySize.y) { return; } 2978 # 1489 "./copy_buffer.metal" 2979 uint bufferOffset = options.bufferStartOffset + (gIndices.y * options.bufferRowPitch + gIndices.x * options.pixelSize); 2980 2981 switch (kCopyFormatType) 2982 { 2983 case FormatID::R8_UINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8_UINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8_UINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8_UINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8B8A8_UINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8B8A8_UINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16_UINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16_UINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16G16_UINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16G16_UINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16G16B16A16_UINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16G16B16A16_UINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R32_UINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR32_UINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R32G32_UINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR32G32_UINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R32G32B32A32_UINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR32G32B32A32_UINT(gIndices, options, bufferOffset, color, buffer); } break; 2984 } 2985 2986 2987 } 2988 2989 2990 struct CopyVertexParams 2991 { 2992 uint srcBufferStartOffset; 2993 uint srcStride; 2994 uint srcComponentBytes; 2995 uint srcComponents; 2996 2997 2998 2999 uchar4 srcDefaultAlphaData; 3000 3001 uint dstBufferStartOffset; 3002 uint dstStride; 3003 uint dstComponents; 3004 3005 uint vertexCount; 3006 }; 3007 # 1539 "./copy_buffer.metal" 3008 template <typename IntType> 3009 static inline void writeFloatVertex(constant CopyVertexParams &options, 3010 uint idx, 3011 vec<IntType, 4> data, 3012 device uchar *dst) 3013 { 3014 uint dstOffset = idx * options.dstStride + options.dstBufferStartOffset; 3015 3016 for (uint component = 0; component < options.dstComponents; ++component, dstOffset += 4) 3017 { 3018 floatToBytes(static_cast<float>(data[component]), dstOffset, dst); 3019 } 3020 } 3021 3022 template <> 3023 inline void writeFloatVertex(constant CopyVertexParams &options, 3024 uint idx, 3025 vec<float, 4> data, 3026 device uchar *dst) 3027 { 3028 uint dstOffset = idx * options.dstStride + options.dstBufferStartOffset; 3029 3030 for (uint component = 0; component < options.dstComponents; ++component, dstOffset += 4) 3031 { 3032 floatToBytes(data[component], dstOffset, dst); 3033 } 3034 } 3035 3036 3037 static inline void convertToFloatVertexFormat(uint index, 3038 constant CopyVertexParams &options, 3039 constant uchar *srcBuffer, 3040 device uchar *dstBuffer) 3041 { 3042 # 1585 "./copy_buffer.metal" 3043 uint bufferOffset = options.srcBufferStartOffset + options.srcStride * index; 3044 # 1594 "./copy_buffer.metal" 3045 switch (kCopyFormatType) 3046 { 3047 case FormatID::R8_UNORM: { auto data = readR8_UNORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8_SNORM: { auto data = readR8_SNORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8_UINT: { auto data = readR8_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8_SINT: { auto data = readR8_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8_USCALED: { auto data = readR8_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8_SSCALED: { auto data = readR8_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8_UNORM: { auto data = readR8G8_UNORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8_SNORM: { auto data = readR8G8_SNORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8_UINT: { auto data = readR8G8_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8_SINT: { auto data = readR8G8_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8_USCALED: { auto data = readR8G8_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8_SSCALED: { auto data = readR8G8_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8_UNORM: { auto data = readR8G8B8_UNORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8_SNORM: { auto data = readR8G8B8_SNORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8_UINT: { auto data = readR8G8B8_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8_SINT: { auto data = readR8G8B8_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8_USCALED: { auto data = readR8G8B8_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8_SSCALED: { auto data = readR8G8B8_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8A8_UNORM: { auto data = readR8G8B8A8_UNORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8A8_SNORM: { auto data = readR8G8B8A8_SNORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8A8_UINT: { auto data = readR8G8B8A8_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8A8_SINT: { auto data = readR8G8B8A8_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8A8_USCALED: { auto data = readR8G8B8A8_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8A8_SSCALED: { auto data = readR8G8B8A8_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16_UNORM: { auto data = readR16_NORM<ushort>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16_SNORM: { auto data = readR16_NORM<short>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16_UINT: { auto data = readR16_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16_SINT: { auto data = readR16_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16_USCALED: { auto data = readR16_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16_SSCALED: { auto data = readR16_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16_UNORM: { auto data = readR16G16_NORM<ushort>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16_SNORM: { auto data = readR16G16_NORM<short>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16_UINT: { auto data = readR16G16_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16_SINT: { auto data = readR16G16_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16_USCALED: { auto data = readR16G16_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16_SSCALED: { auto data = readR16G16_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16_UNORM: { auto data = readR16G16B16_NORM<ushort>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16_SNORM: { auto data = readR16G16B16_NORM<short>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16_UINT: { auto data = readR16G16B16_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16_SINT: { auto data = readR16G16B16_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16_USCALED: { auto data = readR16G16B16_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16_SSCALED: { auto data = readR16G16B16_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16A16_UNORM: { auto data = readR16G16B16A16_NORM<ushort>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16A16_SNORM: { auto data = readR16G16B16A16_NORM<short>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16A16_UINT: { auto data = readR16G16B16A16_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16A16_SINT: { auto data = readR16G16B16A16_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16A16_USCALED: { auto data = readR16G16B16A16_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16A16_SSCALED: { auto data = readR16G16B16A16_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32_UNORM: { auto data = readR32_NORM<uint>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32_SNORM: { auto data = readR32_NORM<int>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32_UINT: { auto data = readR32_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32_SINT: { auto data = readR32_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32_USCALED: { auto data = readR32_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32_SSCALED: { auto data = readR32_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32_UNORM: { auto data = readR32G32_NORM<uint>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32_SNORM: { auto data = readR32G32_NORM<int>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32_UINT: { auto data = readR32G32_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32_SINT: { auto data = readR32G32_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32_USCALED: { auto data = readR32G32_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32_SSCALED: { auto data = readR32G32_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32_UNORM: { auto data = readR32G32B32_NORM<uint>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32_SNORM: { auto data = readR32G32B32_NORM<int>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32_UINT: { auto data = readR32G32B32_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32_SINT: { auto data = readR32G32B32_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32_USCALED: { auto data = readR32G32B32_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32_SSCALED: { auto data = readR32G32B32_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32A32_UNORM: { auto data = readR32G32B32A32_NORM<uint>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32A32_SNORM: { auto data = readR32G32B32A32_NORM<int>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32A32_UINT: { auto data = readR32G32B32A32_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32A32_SINT: { auto data = readR32G32B32A32_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32A32_USCALED: { auto data = readR32G32B32A32_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32A32_SSCALED: { auto data = readR32G32B32A32_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16_FLOAT: { auto data = readR16_FLOAT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16_FLOAT: { auto data = readR16G16_FLOAT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16_FLOAT: { auto data = readR16G16B16_FLOAT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16A16_FLOAT: { auto data = readR16G16B16A16_FLOAT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32_FLOAT: { auto data = readR32_FLOAT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32_FLOAT: { auto data = readR32G32_FLOAT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32_FLOAT: { auto data = readR32G32B32_FLOAT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32A32_FLOAT: { auto data = readR32G32B32A32_FLOAT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32_FIXED: { auto data = readR32_FIXED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32_FIXED: { auto data = readR32G32_FIXED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32_FIXED: { auto data = readR32G32B32_FIXED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32A32_FIXED: { auto data = readR32G32B32A32_FIXED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R10G10B10A2_SINT: { auto data = readR10G10B10A2_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R10G10B10A2_UINT: { auto data = readR10G10B10A2_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R10G10B10A2_SSCALED: { auto data = readR10G10B10A2_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R10G10B10A2_USCALED: { auto data = readR10G10B10A2_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; 3048 } 3049 3050 3051 } 3052 3053 3054 kernel void convertToFloatVertexFormatCS(uint index [[thread_position_in_grid]], 3055 constant CopyVertexParams &options [[buffer(0)]], 3056 constant uchar *srcBuffer [[buffer(1)]], 3057 device uchar *dstBuffer [[buffer(2)]]) 3058 { 3059 if (index >= options.vertexCount) { return; }; 3060 convertToFloatVertexFormat(index, options, srcBuffer, dstBuffer); 3061 } 3062 3063 3064 vertex void convertToFloatVertexFormatVS(uint index [[vertex_id]], 3065 constant CopyVertexParams &options [[buffer(0)]], 3066 constant uchar *srcBuffer [[buffer(1)]], 3067 device uchar *dstBuffer [[buffer(2)]]) 3068 { 3069 convertToFloatVertexFormat(index, options, srcBuffer, dstBuffer); 3070 } 3071 3072 3073 static inline void expandVertexFormatComponents(uint index, 3074 constant CopyVertexParams &options, 3075 constant uchar *srcBuffer, 3076 device uchar *dstBuffer) 3077 { 3078 uint srcOffset = options.srcBufferStartOffset + options.srcStride * index; 3079 uint dstOffset = options.dstBufferStartOffset + options.dstStride * index; 3080 3081 uint dstComponentsBeforeAlpha = min(options.dstComponents, 3u); 3082 uint component; 3083 for (component = 0; component < options.srcComponents; ++component, 3084 srcOffset += options.srcComponentBytes, dstOffset += options.srcComponentBytes) 3085 { 3086 for (uint byte = 0; byte < options.srcComponentBytes; ++byte) 3087 { 3088 dstBuffer[dstOffset + byte] = srcBuffer[srcOffset + byte]; 3089 } 3090 } 3091 3092 for (; component < dstComponentsBeforeAlpha; 3093 ++component, dstOffset += options.srcComponentBytes) 3094 { 3095 for (uint byte = 0; byte < options.srcComponentBytes; ++byte) 3096 { 3097 dstBuffer[dstOffset + byte] = 0; 3098 } 3099 } 3100 3101 if (component < options.dstComponents) 3102 { 3103 3104 for (uint byte = 0; byte < options.srcComponentBytes; ++byte) 3105 { 3106 dstBuffer[dstOffset + byte] = options.srcDefaultAlphaData[byte]; 3107 } 3108 } 3109 } 3110 3111 3112 kernel void expandVertexFormatComponentsCS(uint index [[thread_position_in_grid]], 3113 constant CopyVertexParams &options [[buffer(0)]], 3114 constant uchar *srcBuffer [[buffer(1)]], 3115 device uchar *dstBuffer [[buffer(2)]]) 3116 { 3117 if (index >= options.vertexCount) { return; }; 3118 3119 expandVertexFormatComponents(index, options, srcBuffer, dstBuffer); 3120 } 3121 3122 3123 vertex void expandVertexFormatComponentsVS(uint index [[vertex_id]], 3124 constant CopyVertexParams &options [[buffer(0)]], 3125 constant uchar *srcBuffer [[buffer(1)]], 3126 device uchar *dstBuffer [[buffer(2)]]) 3127 { 3128 expandVertexFormatComponents(index, options, srcBuffer, dstBuffer); 3129 } 3130 3131 3132 kernel void linearizeBlocks(ushort2 position [[thread_position_in_grid]], 3133 constant uint2 *dimensions [[buffer(0)]], 3134 constant uint2 *srcBuffer [[buffer(1)]], 3135 device uint2 *dstBuffer [[buffer(2)]]) 3136 { 3137 if (any(uint2(position) >= *dimensions)) 3138 { 3139 return; 3140 } 3141 uint2 t = uint2(position); 3142 t = (t | (t << 8)) & 0x00FF00FF; 3143 t = (t | (t << 4)) & 0x0F0F0F0F; 3144 t = (t | (t << 2)) & 0x33333333; 3145 t = (t | (t << 1)) & 0x55555555; 3146 dstBuffer[position.y * (*dimensions).x + position.x] = srcBuffer[(t.x << 1) | t.y]; 3147 } 3148 3149 3150 kernel void saturateDepth(uint2 position [[thread_position_in_grid]], 3151 constant uint3 *dimensions [[buffer(0)]], 3152 device float *srcBuffer [[buffer(1)]], 3153 device float *dstBuffer [[buffer(2)]]) 3154 { 3155 if (any(position >= (*dimensions).xy)) 3156 { 3157 return; 3158 } 3159 const uint srcOffset = position.y * (*dimensions).z + position.x; 3160 const uint dstOffset = position.y * (*dimensions).x + position.x; 3161 dstBuffer[dstOffset] = saturate(srcBuffer[srcOffset]); 3162 } 3163 # 6 "temp_master_source.metal" 2 3164 # 1 "./visibility.metal" 1 3165 3166 3167 3168 3169 3170 3171 3172 3173 constant bool kCombineWithExistingResult [[function_constant(1000)]]; 3174 3175 3176 3177 struct CombineVisibilityResultOptions 3178 { 3179 3180 uint startOffset; 3181 3182 uint numOffsets; 3183 }; 3184 3185 kernel void combineVisibilityResult(uint idx [[thread_position_in_grid]], 3186 constant CombineVisibilityResultOptions &options [[buffer(0)]], 3187 constant ushort4 *renderpassVisibilityResult [[buffer(1)]], 3188 device ushort4 *finalResults [[buffer(2)]]) 3189 { 3190 if (idx > 0) 3191 { 3192 3193 3194 3195 return; 3196 } 3197 ushort4 finalResult16x4; 3198 3199 if (kCombineWithExistingResult) 3200 { 3201 finalResult16x4 = finalResults[0]; 3202 } 3203 else 3204 { 3205 finalResult16x4 = ushort4(0, 0, 0, 0); 3206 } 3207 3208 for (uint i = 0; i < options.numOffsets; ++i) 3209 { 3210 uint offset = options.startOffset + i; 3211 ushort4 renderpassResult = renderpassVisibilityResult[offset]; 3212 3213 3214 finalResult16x4 = finalResult16x4 | renderpassResult; 3215 } 3216 finalResults[0] = finalResult16x4; 3217 } 3218 # 7 "temp_master_source.metal" 2 3219 # 1 "./rewrite_indices.metal" 1 3220 # 11 "./rewrite_indices.metal" 3221 # 1 "./rewrite_indices_shared.h" 1 3222 # 12 "./rewrite_indices.metal" 2 3223 using namespace metal; 3224 3225 constant uint fixIndexBufferKey [[ function_constant(2000) ]]; 3226 constant bool indexBufferIsUint16 = (((fixIndexBufferKey >> 0U) & 0x03U) == 2U); 3227 constant bool indexBufferIsUint32 = (((fixIndexBufferKey >> 0U) & 0x03U) == 3U); 3228 constant bool outIndexBufferIsUint16 = (((fixIndexBufferKey >> 2U) & 0x03U) == 2U); 3229 constant bool outIndexBufferIsUint32 = (((fixIndexBufferKey >> 2U) & 0x03U) == 3U); 3230 constant bool doPrimRestart = (fixIndexBufferKey & 0x00100U); 3231 constant uint fixIndexBufferMode = (fixIndexBufferKey >> 4U) & 0x0FU; 3232 3233 3234 static inline uint readIdx( 3235 const device ushort *indexBufferUint16, 3236 const device uint *indexBufferUint32, 3237 const uint restartIndex, 3238 const uint indexCount, 3239 uint idx, 3240 thread bool &foundRestart, 3241 thread uint &indexThatRestartedFirst 3242 ) 3243 { 3244 uint inIndex = idx; 3245 if(inIndex < indexCount) 3246 { 3247 if(indexBufferIsUint16) 3248 { 3249 inIndex = indexBufferUint16[inIndex]; 3250 } 3251 else if(indexBufferIsUint32) 3252 { 3253 inIndex = indexBufferUint32[inIndex]; 3254 } 3255 } 3256 else 3257 { 3258 foundRestart = true; 3259 indexThatRestartedFirst = idx; 3260 } 3261 if(doPrimRestart && !foundRestart && inIndex == restartIndex) 3262 { 3263 foundRestart = true; 3264 indexThatRestartedFirst = idx; 3265 } 3266 return inIndex; 3267 } 3268 3269 static inline void outputPrimitive( 3270 const device ushort *indexBufferUint16, 3271 const device uint *indexBufferUint32, 3272 device ushort *outIndexBufferUint16, 3273 device uint *outIndexBufferUint32, 3274 const uint restartIndex, 3275 const uint indexCount, 3276 thread uint &baseIndex, 3277 uint onIndex, 3278 thread uint &onOutIndex 3279 ) 3280 { 3281 if(baseIndex > onIndex) return; 3282 bool foundRestart = false; 3283 uint indexThatRestartedFirst = 0; 3284 # 86 "./rewrite_indices.metal" 3285 switch(fixIndexBufferMode) 3286 { 3287 case 0x00U: 3288 { 3289 auto tmpIndex = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex, foundRestart, indexThatRestartedFirst); 3290 if(foundRestart) 3291 { 3292 baseIndex = indexThatRestartedFirst + 1; 3293 return; 3294 } 3295 3296 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex; } onOutIndex++; }); 3297 } 3298 break; 3299 case 0x01U: 3300 { 3301 auto tmpIndex0 = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex + 0, foundRestart, indexThatRestartedFirst); 3302 auto tmpIndex1 = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex + 1, foundRestart, indexThatRestartedFirst); 3303 if(foundRestart) 3304 { 3305 baseIndex = indexThatRestartedFirst + 1; 3306 return; 3307 } 3308 if((onIndex - baseIndex) & 1) return; 3309 3310 if(fixIndexBufferKey & 0x00200U) 3311 { 3312 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1; } onOutIndex++; }); 3313 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0; } onOutIndex++; }); 3314 } 3315 else 3316 { 3317 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0; } onOutIndex++; }); 3318 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1; } onOutIndex++; }); 3319 } 3320 } 3321 break; 3322 case 0x03U: 3323 { 3324 auto tmpIndex0 = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex + 0, foundRestart, indexThatRestartedFirst); 3325 auto tmpIndex1 = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex + 1, foundRestart, indexThatRestartedFirst); 3326 if(foundRestart) 3327 { 3328 baseIndex = indexThatRestartedFirst + 1; 3329 return; 3330 } 3331 3332 if(fixIndexBufferKey & 0x00200U) 3333 { 3334 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1; } onOutIndex++; }); 3335 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0; } onOutIndex++; }); 3336 } 3337 else 3338 { 3339 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0; } onOutIndex++; }); 3340 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1; } onOutIndex++; }); 3341 } 3342 } 3343 break; 3344 case 0x04U: 3345 { 3346 auto tmpIndex0 = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex + 0, foundRestart, indexThatRestartedFirst); 3347 auto tmpIndex1 = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex + 1, foundRestart, indexThatRestartedFirst); 3348 auto tmpIndex2 = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex + 2, foundRestart, indexThatRestartedFirst); 3349 if(foundRestart) 3350 { 3351 baseIndex = indexThatRestartedFirst + 1; 3352 return; 3353 } 3354 if(((onIndex - baseIndex) % 3) != 0) return; 3355 3356 if(fixIndexBufferKey & 0x00200U) 3357 { 3358 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2; } onOutIndex++; }); 3359 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0; } onOutIndex++; }); 3360 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1; } onOutIndex++; }); 3361 } 3362 else 3363 { 3364 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0; } onOutIndex++; }); 3365 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1; } onOutIndex++; }); 3366 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2; } onOutIndex++; }); 3367 } 3368 } 3369 break; 3370 case 0x05U: 3371 { 3372 uint isOdd = ((onIndex - baseIndex) & 1); 3373 auto tmpIndex0 = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex + 0 + isOdd, foundRestart, indexThatRestartedFirst); 3374 auto tmpIndex1 = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex + 1 - isOdd, foundRestart, indexThatRestartedFirst); 3375 auto tmpIndex2 = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex + 2, foundRestart, indexThatRestartedFirst); 3376 if(foundRestart) 3377 { 3378 baseIndex = indexThatRestartedFirst + 1; 3379 return; 3380 } 3381 3382 if(fixIndexBufferKey & 0x00200U) 3383 { 3384 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2; } onOutIndex++; }); 3385 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0; } onOutIndex++; }); 3386 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1; } onOutIndex++; }); 3387 } 3388 else 3389 { 3390 3391 if(isOdd) 3392 { 3393 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1; } onOutIndex++; }); 3394 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2; } onOutIndex++; }); 3395 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0; } onOutIndex++; }); 3396 } 3397 else 3398 { 3399 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0; } onOutIndex++; }); 3400 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1; } onOutIndex++; }); 3401 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2; } onOutIndex++; }); 3402 } 3403 } 3404 3405 assert(onOutIndex <= (onIndex + 1) * 3); 3406 assert(onOutIndex <= (indexCount - 2) * 3); 3407 } 3408 break; 3409 3410 } 3411 3412 3413 } 3414 3415 kernel void fixIndexBuffer( 3416 const device ushort *indexBufferUint16 [[ buffer(0), function_constant(indexBufferIsUint16) ]], 3417 const device uint *indexBufferUint32 [[ buffer(0), function_constant(indexBufferIsUint32) ]], 3418 device ushort *outIndexBufferUint16 [[ buffer(1), function_constant(outIndexBufferIsUint16) ]], 3419 device uint *outIndexBufferUint32 [[ buffer(1), function_constant(outIndexBufferIsUint32) ]], 3420 constant uint &indexCount [[ buffer(2) ]], 3421 constant uint &primCount [[ buffer(3) ]], 3422 uint prim [[thread_position_in_grid]]) 3423 { 3424 constexpr uint restartIndex = 0xFFFFFFFF; 3425 uint baseIndex = 0; 3426 uint onIndex = onIndex; 3427 uint onOutIndex = onOutIndex; 3428 if(prim < primCount) 3429 { 3430 switch(fixIndexBufferMode) 3431 { 3432 case 0x00U: 3433 onIndex = prim; 3434 onOutIndex = prim; 3435 break; 3436 case 0x01U: 3437 onIndex = prim * 2; 3438 onOutIndex = prim * 2; 3439 break; 3440 case 0x03U: 3441 onIndex = prim; 3442 onOutIndex = prim * 2; 3443 break; 3444 case 0x04U: 3445 onIndex = prim * 3; 3446 onOutIndex = prim * 3; 3447 break; 3448 case 0x05U: 3449 onIndex = prim; 3450 onOutIndex = prim * 3; 3451 break; 3452 } 3453 outputPrimitive(indexBufferUint16, indexBufferUint32, outIndexBufferUint16, outIndexBufferUint32, restartIndex, indexCount, baseIndex, onIndex, onOutIndex); 3454 } 3455 } 3456 3457 3458 3459 static inline void generatePrimitive( 3460 device ushort *outIndexBufferUint16, 3461 device uint *outIndexBufferUint32, 3462 const uint firstVertex, 3463 const uint indexCount, 3464 thread uint &baseIndex, 3465 uint onIndex, 3466 uint primCount, 3467 thread uint &onOutIndex 3468 ) 3469 { 3470 if(baseIndex > onIndex) return; 3471 # 284 "./rewrite_indices.metal" 3472 switch(fixIndexBufferMode) 3473 { 3474 case 0x00U: 3475 { 3476 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = onIndex + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = onIndex + firstVertex; } onOutIndex++; }); 3477 } 3478 break; 3479 case 0x01U: 3480 { 3481 auto tmpIndex0 = onIndex + 0; 3482 auto tmpIndex1 = onIndex + 1; 3483 if(fixIndexBufferKey & 0x00200U) 3484 { 3485 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; }); 3486 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; }); 3487 } 3488 else 3489 { 3490 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; }); 3491 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; }); 3492 } 3493 } 3494 break; 3495 case 0x02U: 3496 { 3497 auto tmpIndex0 = onIndex + 0; 3498 auto tmpIndex1 = (onIndex + 1) % primCount; 3499 if(fixIndexBufferKey & 0x00200U) 3500 { 3501 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; }); 3502 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; }); 3503 } 3504 else 3505 { 3506 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; }); 3507 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; }); 3508 } 3509 } 3510 break; 3511 case 0x03U: 3512 { 3513 auto tmpIndex0 = onIndex + 0; 3514 auto tmpIndex1 = onIndex + 1; 3515 if(fixIndexBufferKey & 0x00200U) 3516 { 3517 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; }); 3518 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; }); 3519 } 3520 else 3521 { 3522 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; }); 3523 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; }); 3524 } 3525 } 3526 break; 3527 case 0x04U: 3528 { 3529 auto tmpIndex0 = onIndex + 0; 3530 auto tmpIndex1 = onIndex + 1; 3531 auto tmpIndex2 = onIndex + 2; 3532 if(fixIndexBufferKey & 0x00200U) 3533 { 3534 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2 + firstVertex; } onOutIndex++; }); 3535 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; }); 3536 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; }); 3537 } 3538 else 3539 { 3540 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; }); 3541 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; }); 3542 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2 + firstVertex; } onOutIndex++; }); 3543 } 3544 } 3545 break; 3546 case 0x05U: 3547 { 3548 uint isOdd = ((onIndex - baseIndex) & 1); 3549 auto tmpIndex0 = onIndex + 0 + isOdd; 3550 auto tmpIndex1 = onIndex + 1 - isOdd; 3551 auto tmpIndex2 = onIndex + 2; 3552 if(fixIndexBufferKey & 0x00200U) 3553 { 3554 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2 + firstVertex; } onOutIndex++; }); 3555 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; }); 3556 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; }); 3557 } 3558 else 3559 { 3560 if(isOdd) 3561 { 3562 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; }); 3563 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2 + firstVertex; } onOutIndex++; }); 3564 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; }); 3565 } 3566 else 3567 { 3568 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; }); 3569 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; }); 3570 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2 + firstVertex; } onOutIndex++; }); 3571 } 3572 } 3573 3574 assert(onOutIndex <= (onIndex + 1) * 3); 3575 assert(onOutIndex <= (indexCount - 2) * 3); 3576 break; 3577 } 3578 case 0x06U: 3579 { 3580 auto tmpIndex0 = 0; 3581 auto tmpIndex1 = onIndex + 1; 3582 auto tmpIndex2 = onIndex + 2; 3583 3584 if(fixIndexBufferKey & 0x00200U) 3585 { 3586 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2 + firstVertex; } onOutIndex++; }); 3587 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; }); 3588 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; }); 3589 } 3590 else 3591 { 3592 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; }); 3593 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2 + firstVertex; } onOutIndex++; }); 3594 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; }); 3595 } 3596 } 3597 break; 3598 3599 } 3600 3601 } 3602 3603 3604 3605 kernel void genIndexBuffer( 3606 device ushort *outIndexBufferUint16 [[ buffer(1), function_constant(outIndexBufferIsUint16) ]], 3607 device uint *outIndexBufferUint32 [[ buffer(1), function_constant(outIndexBufferIsUint32) ]], 3608 constant uint &indexCount [[ buffer(2) ]], 3609 constant uint &primCount [[ buffer(3) ]], 3610 constant uint &firstVertex [[ buffer(4) ]], 3611 uint prim [[thread_position_in_grid]]) 3612 { 3613 uint baseIndex = 0; 3614 uint onIndex = onIndex; 3615 uint onOutIndex = onOutIndex; 3616 if(prim < primCount) 3617 { 3618 switch(fixIndexBufferMode) 3619 { 3620 case 0x00U: 3621 onIndex = prim; 3622 onOutIndex = prim; 3623 break; 3624 case 0x01U: 3625 onIndex = prim * 2; 3626 onOutIndex = prim * 2; 3627 break; 3628 case 0x03U: 3629 onIndex = prim; 3630 onOutIndex = prim * 2; 3631 break; 3632 case 0x02U: 3633 onIndex = prim; 3634 onOutIndex = prim * 2; 3635 break; 3636 case 0x04U: 3637 onIndex = prim * 3; 3638 onOutIndex = prim * 3; 3639 break; 3640 case 0x05U: 3641 onIndex = prim; 3642 onOutIndex = prim * 3; 3643 break; 3644 case 0x06U: 3645 onIndex = prim; 3646 onOutIndex = prim * 3; 3647 break; 3648 } 3649 generatePrimitive(outIndexBufferUint16, outIndexBufferUint32, firstVertex, indexCount, baseIndex, onIndex, primCount, onOutIndex); 3650 } 3651 } 3652 # 8 "temp_master_source.metal" 2 3653 3654 3655 )"; 3656