1// GENERATED FILE - DO NOT EDIT. 2// Generated by gen_mtl_internal_shaders.py 3// 4// Copyright 2020 The ANGLE Project Authors. All rights reserved. 5// Use of this source code is governed by a BSD-style license that can be 6// found in the LICENSE file. 7// 8 9// Combined Metal default shaders. 10 11# 1 "temp_master_source.metal" 12# 1 "<built-in>" 1 13# 1 "<built-in>" 3 14# 483 "<built-in>" 3 15# 1 "<command line>" 1 16# 1 "<built-in>" 2 17# 1 "temp_master_source.metal" 2 18# 1 "./blit.metal" 1 19 20 21 22 23 24 25 26# 1 "./common.h" 1 27# 13 "./common.h" 28# include <simd/simd.h> 29# include <metal_stdlib> 30 31 32# 1 "./constants.h" 1 33# 11 "./constants.h" 34namespace rx 35{ 36namespace mtl_shader 37{ 38 39enum 40{ 41 kTextureType2D = 0, 42 kTextureType2DMultisample = 1, 43 kTextureType2DArray = 2, 44 kTextureTypeCube = 3, 45 kTextureType3D = 4, 46 kTextureTypeCount = 5, 47}; 48 49 50 51 52} 53} 54# 18 "./common.h" 2 55 56 57 58 59 60 61 62using namespace metal; 63 64 65 66constant uint32_t kNumColorOutputs [[function_constant(0)]]; 67constant bool kColorOutputAvailable0 = kNumColorOutputs > 0; 68constant bool kColorOutputAvailable1 = kNumColorOutputs > 1; 69constant bool kColorOutputAvailable2 = kNumColorOutputs > 2; 70constant bool kColorOutputAvailable3 = kNumColorOutputs > 3; 71constant bool kColorOutputAvailable4 = kNumColorOutputs > 4; 72constant bool kColorOutputAvailable5 = kNumColorOutputs > 5; 73constant bool kColorOutputAvailable6 = kNumColorOutputs > 6; 74constant bool kColorOutputAvailable7 = kNumColorOutputs > 7; 75 76namespace rx 77{ 78namespace mtl_shader 79{ 80 81 82constant float2 gCorners[3] = {float2(-1.0f, -1.0f), float2(3.0f, -1.0f), float2(-1.0f, 3.0f)}; 83 84template <typename T> 85struct MultipleColorOutputs 86{ 87 vec<T, 4> color0 [[color(0), function_constant(kColorOutputAvailable0)]]; 88 vec<T, 4> color1 [[color(1), function_constant(kColorOutputAvailable1)]]; 89 vec<T, 4> color2 [[color(2), function_constant(kColorOutputAvailable2)]]; 90 vec<T, 4> color3 [[color(3), function_constant(kColorOutputAvailable3)]]; 91 vec<T, 4> color4 [[color(4), function_constant(kColorOutputAvailable4)]]; 92 vec<T, 4> color5 [[color(5), function_constant(kColorOutputAvailable5)]]; 93 vec<T, 4> color6 [[color(6), function_constant(kColorOutputAvailable6)]]; 94 vec<T, 4> color7 [[color(7), function_constant(kColorOutputAvailable7)]]; 95}; 96# 69 "./common.h" 97template <typename T> 98static inline MultipleColorOutputs<T> toMultipleColorOutputs(vec<T, 4> color) 99{ 100 MultipleColorOutputs<T> re; 101 102 do { if (kColorOutputAvailable0) { re.color0 = color; } } while (0); 103 do { if (kColorOutputAvailable1) { re.color1 = color; } } while (0); 104 do { if (kColorOutputAvailable2) { re.color2 = color; } } while (0); 105 do { if (kColorOutputAvailable3) { re.color3 = color; } } while (0); 106 do { if (kColorOutputAvailable4) { re.color4 = color; } } while (0); 107 do { if (kColorOutputAvailable5) { re.color5 = color; } } while (0); 108 do { if (kColorOutputAvailable6) { re.color6 = color; } } while (0); 109 do { if (kColorOutputAvailable7) { re.color7 = color; } } while (0); 110 111 return re; 112} 113 114static inline float3 cubeTexcoords(float2 texcoords, int face) 115{ 116 texcoords = 2.0 * texcoords - 1.0; 117 switch (face) 118 { 119 case 0: 120 return float3(1.0, -texcoords.y, -texcoords.x); 121 case 1: 122 return float3(-1.0, -texcoords.y, texcoords.x); 123 case 2: 124 return float3(texcoords.x, 1.0, texcoords.y); 125 case 3: 126 return float3(texcoords.x, -1.0, -texcoords.y); 127 case 4: 128 return float3(texcoords.x, -texcoords.y, 1.0); 129 case 5: 130 return float3(-texcoords.x, -texcoords.y, -1.0); 131 } 132 return float3(texcoords, 0); 133} 134 135template <typename T> 136static inline vec<T, 4> resolveTextureMS(texture2d_ms<T> srcTexture, uint2 coords) 137{ 138 uint samples = srcTexture.get_num_samples(); 139 140 vec<T, 4> output(0); 141 142 for (uint sample = 0; sample < samples; ++sample) 143 { 144 output += srcTexture.read(coords, sample); 145 } 146 147 output = output / samples; 148 149 return output; 150} 151 152static inline float4 sRGBtoLinear(float4 color) 153{ 154 float3 linear1 = color.rgb / 12.92; 155 float3 linear2 = powr((color.rgb + float3(0.055)) / 1.055, 2.4); 156 float3 factor = float3(color.rgb <= float3(0.04045)); 157 float4 linear = float4(factor * linear1 + float3(1.0 - factor) * linear2, color.a); 158 159 return linear; 160} 161 162static inline float linearToSRGB(float color) 163{ 164 if (color <= 0.0f) 165 return 0.0f; 166 if (color < 0.0031308f) 167 return 12.92f * color; 168 if (color < 1.0f) 169 return 1.055f * powr(color, 0.41666f) - 0.055f; 170 return 1.0f; 171} 172 173static inline float4 linearToSRGB(float4 color) 174{ 175 return float4(linearToSRGB(color.r), linearToSRGB(color.g), linearToSRGB(color.b), color.a); 176} 177 178template <typename Short> 179static inline Short bytesToShort(constant uchar *input, uint offset) 180{ 181 Short inputLo = input[offset]; 182 Short inputHi = input[offset + 1]; 183 184 return inputLo | (inputHi << 8); 185} 186 187template <typename Int> 188static inline Int bytesToInt(constant uchar *input, uint offset) 189{ 190 Int input0 = input[offset]; 191 Int input1 = input[offset + 1]; 192 Int input2 = input[offset + 2]; 193 Int input3 = input[offset + 3]; 194 195 return input0 | (input1 << 8) | (input2 << 16) | (input3 << 24); 196} 197 198template <typename Short> 199static inline void shortToBytes(Short val, uint offset, device uchar *output) 200{ 201 ushort valUnsigned = as_type<ushort>(val); 202 output[offset] = valUnsigned & 0xff; 203 output[offset + 1] = (valUnsigned >> 8) & 0xff; 204} 205 206template <typename Int> 207static inline void intToBytes(Int val, uint offset, device uchar *output) 208{ 209 uint valUnsigned = as_type<uint>(val); 210 output[offset] = valUnsigned & 0xff; 211 output[offset + 1] = (valUnsigned >> 8) & 0xff; 212 output[offset + 2] = (valUnsigned >> 16) & 0xff; 213 output[offset + 3] = (valUnsigned >> 24) & 0xff; 214} 215 216static inline void floatToBytes(float val, uint offset, device uchar *output) 217{ 218 intToBytes(as_type<uint>(val), offset, output); 219} 220 221static inline void int24bitToBytes(uint val, uint offset, device uchar *output) 222{ 223 output[offset] = val & 0xff; 224 output[offset + 1] = (val >> 8) & 0xff; 225 output[offset + 2] = (val >> 16) & 0xff; 226} 227 228template <unsigned int inputBitCount, unsigned int inputBitStart, typename T> 229static inline T getShiftedData(T input) 230{ 231 static_assert(inputBitCount + inputBitStart <= (sizeof(T) * 8), 232 "T must have at least as many bits as inputBitCount + inputBitStart."); 233 const T mask = (1 << inputBitCount) - 1; 234 return (input >> inputBitStart) & mask; 235} 236 237template <unsigned int inputBitCount, unsigned int inputBitStart, typename T> 238static inline T shiftData(T input) 239{ 240 static_assert(inputBitCount + inputBitStart <= (sizeof(T) * 8), 241 "T must have at least as many bits as inputBitCount + inputBitStart."); 242 const T mask = (1 << inputBitCount) - 1; 243 return (input & mask) << inputBitStart; 244} 245 246template <unsigned int inputBitCount, typename T> 247static inline float normalizedToFloat(T input) 248{ 249 static_assert(inputBitCount <= (sizeof(T) * 8), 250 "T must have more bits than or same bits as inputBitCount."); 251 static_assert(inputBitCount <= 23, "Only single precision is supported"); 252 253 constexpr float inverseMax = 1.0f / ((1 << inputBitCount) - 1); 254 return input * inverseMax; 255} 256 257template <typename T> 258static inline float normalizedToFloat(T input) 259{ 260 return normalizedToFloat<sizeof(T) * 8, T>(input); 261} 262 263template <> 264inline float normalizedToFloat(short input) 265{ 266 constexpr float inverseMax = 1.0f / 0x7fff; 267 return static_cast<float>(input) * inverseMax; 268} 269 270template <> 271inline float normalizedToFloat(int input) 272{ 273 constexpr float inverseMax = 1.0f / 0x7fffffff; 274 return static_cast<float>(input) * inverseMax; 275} 276 277template <> 278inline float normalizedToFloat(uint input) 279{ 280 constexpr float inverseMax = 1.0f / 0xffffffff; 281 return static_cast<float>(input) * inverseMax; 282} 283 284template <unsigned int outputBitCount, typename T> 285static inline T floatToNormalized(float input) 286{ 287 static_assert(outputBitCount <= (sizeof(T) * 8), 288 "T must have more bits than or same bits as inputBitCount."); 289 static_assert(outputBitCount <= 23, "Only single precision is supported"); 290 291 return static_cast<T>(((1 << outputBitCount) - 1) * input + 0.5f); 292} 293 294template <typename T> 295static inline T floatToNormalized(float input) 296{ 297 return floatToNormalized<sizeof(T) * 8, T>(input); 298} 299 300} 301} 302# 9 "./blit.metal" 2 303 304using namespace rx::mtl_shader; 305 306 307constant bool kPremultiplyAlpha [[function_constant(1)]]; 308constant bool kUnmultiplyAlpha [[function_constant(2)]]; 309constant bool kTransformLinearToSrgb [[function_constant(3)]]; 310constant int kSourceTextureType [[function_constant(4)]]; 311constant int kSourceTexture2Type [[function_constant(5)]]; 312 313constant bool kSourceTextureType2D = kSourceTextureType == kTextureType2D; 314constant bool kSourceTextureType2DArray = kSourceTextureType == kTextureType2DArray; 315constant bool kSourceTextureType2DMS = kSourceTextureType == kTextureType2DMultisample; 316constant bool kSourceTextureTypeCube = kSourceTextureType == kTextureTypeCube; 317constant bool kSourceTextureType3D = kSourceTextureType == kTextureType3D; 318 319constant bool kSourceTexture2Type2D = kSourceTexture2Type == kTextureType2D; 320constant bool kSourceTexture2Type2DArray = kSourceTexture2Type == kTextureType2DArray; 321constant bool kSourceTexture2Type2DMS = kSourceTexture2Type == kTextureType2DMultisample; 322constant bool kSourceTexture2TypeCube = kSourceTexture2Type == kTextureTypeCube; 323 324struct BlitParams 325{ 326 327 float4 srcTexCoords; 328 int srcLevel; 329 int srcLayer; 330 bool dstLuminance; 331 uint8_t padding[7]; 332}; 333 334struct BlitVSOut 335{ 336 float4 position [[position]]; 337 float2 texCoords [[center_no_perspective, user(locn1)]]; 338}; 339 340vertex BlitVSOut blitVS(unsigned int vid [[vertex_id]], constant BlitParams &options [[buffer(0)]]) 341{ 342 BlitVSOut output; 343 output.position.xy = select(float2(-1.0f), float2(1.0f), bool2(vid & uint2(2, 1))); 344 output.position.zw = float2(0.0, 1.0); 345 output.texCoords = select(options.srcTexCoords.xy, options.srcTexCoords.zw, bool2(vid & uint2(2, 1))); 346 347 return output; 348} 349 350template <typename SrcTexture2d> 351static uint2 getImageCoords(SrcTexture2d srcTexture, float2 texCoords) 352{ 353 uint2 dimens(srcTexture.get_width(), srcTexture.get_height()); 354 uint2 coords = uint2(texCoords * float2(dimens)); 355 356 return coords; 357} 358 359template <typename T> 360static inline vec<T, 4> blitSampleTextureMS(texture2d_ms<T> srcTexture, float2 texCoords) 361{ 362 uint2 coords = getImageCoords(srcTexture, texCoords); 363 return resolveTextureMS(srcTexture, coords); 364} 365 366template <typename T> 367static inline vec<T, 4> blitSampleTexture3D(texture3d<T> srcTexture, 368 sampler textureSampler, 369 float2 texCoords, 370 constant BlitParams &options) 371{ 372 uint depth = srcTexture.get_depth(options.srcLevel); 373 float zCoord = (float(options.srcLayer) + 0.5) / float(depth); 374 375 return srcTexture.sample(textureSampler, float3(texCoords, zCoord), level(options.srcLevel)); 376} 377# 101 "./blit.metal" 378template <typename T> 379static inline vec<T, 4> blitReadTexture(BlitVSOut input [[stage_in]], texture2d<T> srcTexture2d [[texture(0), function_constant(kSourceTextureType2D)]], texture2d_array<T> srcTexture2dArray [[texture(0), function_constant(kSourceTextureType2DArray)]], texture2d_ms<T> srcTexture2dMS [[texture(0), function_constant(kSourceTextureType2DMS)]], texturecube<T> srcTextureCube [[texture(0), function_constant(kSourceTextureTypeCube)]], texture3d<T> srcTexture3d [[texture(0), function_constant(kSourceTextureType3D)]], sampler textureSampler [[sampler(0)]], constant BlitParams &options [[buffer(0)]]) 380{ 381 vec<T, 4> output; 382 383 switch (kSourceTextureType) 384 { 385 case kTextureType2D: 386 output = srcTexture2d.sample(textureSampler, input.texCoords, level(options.srcLevel)); 387 break; 388 case kTextureType2DArray: 389 output = srcTexture2dArray.sample(textureSampler, input.texCoords, options.srcLayer, 390 level(options.srcLevel)); 391 break; 392 case kTextureType2DMultisample: 393 output = blitSampleTextureMS(srcTexture2dMS, input.texCoords); 394 break; 395 case kTextureTypeCube: 396 output = srcTextureCube.sample(textureSampler, 397 cubeTexcoords(input.texCoords, options.srcLayer), 398 level(options.srcLevel)); 399 break; 400 case kTextureType3D: 401 output = blitSampleTexture3D(srcTexture3d, textureSampler, input.texCoords, options); 402 break; 403 } 404 405 if (kTransformLinearToSrgb) { 406 output.x = linearToSRGB(output.x); 407 output.y = linearToSRGB(output.y); 408 output.z = linearToSRGB(output.z); 409 } 410 if (kUnmultiplyAlpha) 411 { 412 if (output.a != 0.0) 413 { 414 output.xyz /= output.a; 415 } 416 } 417 if (kPremultiplyAlpha) 418 { 419 output.xyz *= output.a; 420 } 421 422 if (options.dstLuminance) 423 { 424 output.g = output.b = output.r; 425 } 426 427 return output; 428} 429 430template <typename T> 431static inline MultipleColorOutputs<T> blitFS(BlitVSOut input [[stage_in]], texture2d<T> srcTexture2d [[texture(0), function_constant(kSourceTextureType2D)]], texture2d_array<T> srcTexture2dArray [[texture(0), function_constant(kSourceTextureType2DArray)]], texture2d_ms<T> srcTexture2dMS [[texture(0), function_constant(kSourceTextureType2DMS)]], texturecube<T> srcTextureCube [[texture(0), function_constant(kSourceTextureTypeCube)]], texture3d<T> srcTexture3d [[texture(0), function_constant(kSourceTextureType3D)]], sampler textureSampler [[sampler(0)]], constant BlitParams &options [[buffer(0)]]) 432{ 433 vec<T, 4> output = blitReadTexture(input, srcTexture2d, srcTexture2dArray, srcTexture2dMS, srcTextureCube, srcTexture3d, textureSampler, options); 434 435 return toMultipleColorOutputs(output); 436} 437 438fragment MultipleColorOutputs<float> blitFloatFS(BlitVSOut input [[stage_in]], texture2d<float> srcTexture2d [[texture(0), function_constant(kSourceTextureType2D)]], texture2d_array<float> srcTexture2dArray [[texture(0), function_constant(kSourceTextureType2DArray)]], texture2d_ms<float> srcTexture2dMS [[texture(0), function_constant(kSourceTextureType2DMS)]], texturecube<float> srcTextureCube [[texture(0), function_constant(kSourceTextureTypeCube)]], texture3d<float> srcTexture3d [[texture(0), function_constant(kSourceTextureType3D)]], sampler textureSampler [[sampler(0)]], constant BlitParams &options [[buffer(0)]]) 439{ 440 return blitFS(input, srcTexture2d, srcTexture2dArray, srcTexture2dMS, srcTextureCube, srcTexture3d, textureSampler, options); 441} 442fragment MultipleColorOutputs<int> blitIntFS(BlitVSOut input [[stage_in]], texture2d<int> srcTexture2d [[texture(0), function_constant(kSourceTextureType2D)]], texture2d_array<int> srcTexture2dArray [[texture(0), function_constant(kSourceTextureType2DArray)]], texture2d_ms<int> srcTexture2dMS [[texture(0), function_constant(kSourceTextureType2DMS)]], texturecube<int> srcTextureCube [[texture(0), function_constant(kSourceTextureTypeCube)]], texture3d<int> srcTexture3d [[texture(0), function_constant(kSourceTextureType3D)]], sampler textureSampler [[sampler(0)]], constant BlitParams &options [[buffer(0)]]) 443{ 444 return blitFS(input, srcTexture2d, srcTexture2dArray, srcTexture2dMS, srcTextureCube, srcTexture3d, textureSampler, options); 445} 446fragment MultipleColorOutputs<uint> blitUIntFS(BlitVSOut input [[stage_in]], texture2d<uint> srcTexture2d [[texture(0), function_constant(kSourceTextureType2D)]], texture2d_array<uint> srcTexture2dArray [[texture(0), function_constant(kSourceTextureType2DArray)]], texture2d_ms<uint> srcTexture2dMS [[texture(0), function_constant(kSourceTextureType2DMS)]], texturecube<uint> srcTextureCube [[texture(0), function_constant(kSourceTextureTypeCube)]], texture3d<uint> srcTexture3d [[texture(0), function_constant(kSourceTextureType3D)]], sampler textureSampler [[sampler(0)]], constant BlitParams &options [[buffer(0)]]) 447{ 448 return blitFS(input, srcTexture2d, srcTexture2dArray, srcTexture2dMS, srcTextureCube, srcTexture3d, textureSampler, options); 449} 450 451fragment MultipleColorOutputs<uint> copyTextureFloatToUIntFS(BlitVSOut input [[stage_in]], texture2d<float> srcTexture2d [[texture(0), function_constant(kSourceTextureType2D)]], texture2d_array<float> srcTexture2dArray [[texture(0), function_constant(kSourceTextureType2DArray)]], texture2d_ms<float> srcTexture2dMS [[texture(0), function_constant(kSourceTextureType2DMS)]], texturecube<float> srcTextureCube [[texture(0), function_constant(kSourceTextureTypeCube)]], texture3d<float> srcTexture3d [[texture(0), function_constant(kSourceTextureType3D)]], sampler textureSampler [[sampler(0)]], constant BlitParams &options [[buffer(0)]]) 452{ 453 float4 inputColor = blitReadTexture<>(input, srcTexture2d, srcTexture2dArray, srcTexture2dMS, srcTextureCube, srcTexture3d, textureSampler, options); 454 uint4 output = uint4(inputColor * float4(255.0)); 455 456 return toMultipleColorOutputs(output); 457} 458 459 460struct FragmentDepthOut 461{ 462 float depth [[depth(any)]]; 463}; 464 465static inline float sampleDepth( 466 texture2d<float> srcTexture2d [[function_constant(kSourceTextureType2D)]], 467 texture2d_array<float> srcTexture2dArray [[function_constant(kSourceTextureType2DArray)]], 468 texture2d_ms<float> srcTexture2dMS [[function_constant(kSourceTextureType2DMS)]], 469 texturecube<float> srcTextureCube [[function_constant(kSourceTextureTypeCube)]], 470 float2 texCoords, 471 constant BlitParams &options) 472{ 473 float4 output; 474 475 constexpr sampler textureSampler(mag_filter::nearest, min_filter::nearest); 476 477 switch (kSourceTextureType) 478 { 479 case kTextureType2D: 480 output = srcTexture2d.sample(textureSampler, texCoords, level(options.srcLevel)); 481 break; 482 case kTextureType2DArray: 483 output = srcTexture2dArray.sample(textureSampler, texCoords, options.srcLayer, 484 level(options.srcLevel)); 485 break; 486 case kTextureType2DMultisample: 487 488 output = srcTexture2dMS.read(getImageCoords(srcTexture2dMS, texCoords), 0); 489 break; 490 case kTextureTypeCube: 491 output = 492 srcTextureCube.sample(textureSampler, cubeTexcoords(texCoords, options.srcLayer), 493 level(options.srcLevel)); 494 break; 495 } 496 497 return output.r; 498} 499 500fragment FragmentDepthOut blitDepthFS(BlitVSOut input [[stage_in]], 501 texture2d<float> srcTexture2d 502 [[texture(0), function_constant(kSourceTextureType2D)]], 503 texture2d_array<float> srcTexture2dArray 504 [[texture(0), function_constant(kSourceTextureType2DArray)]], 505 texture2d_ms<float> srcTexture2dMS 506 [[texture(0), function_constant(kSourceTextureType2DMS)]], 507 texturecube<float> srcTextureCube 508 [[texture(0), function_constant(kSourceTextureTypeCube)]], 509 constant BlitParams &options [[buffer(0)]]) 510{ 511 FragmentDepthOut re; 512 513 re.depth = sampleDepth(srcTexture2d, srcTexture2dArray, srcTexture2dMS, srcTextureCube, 514 input.texCoords, options); 515 516 return re; 517} 518 519static inline uint32_t sampleStencil( 520 texture2d<uint32_t> srcTexture2d [[function_constant(kSourceTexture2Type2D)]], 521 texture2d_array<uint32_t> srcTexture2dArray [[function_constant(kSourceTexture2Type2DArray)]], 522 texture2d_ms<uint32_t> srcTexture2dMS [[function_constant(kSourceTexture2Type2DMS)]], 523 texturecube<uint32_t> srcTextureCube [[function_constant(kSourceTexture2TypeCube)]], 524 float2 texCoords, 525 int srcLevel, 526 int srcLayer) 527{ 528 uint4 output; 529 constexpr sampler textureSampler(mag_filter::nearest, min_filter::nearest); 530 531 switch (kSourceTexture2Type) 532 { 533 case kTextureType2D: 534 output = srcTexture2d.sample(textureSampler, texCoords, level(srcLevel)); 535 break; 536 case kTextureType2DArray: 537 output = srcTexture2dArray.sample(textureSampler, texCoords, srcLayer, level(srcLevel)); 538 break; 539 case kTextureType2DMultisample: 540 541 output = srcTexture2dMS.read(getImageCoords(srcTexture2dMS, texCoords), 0); 542 break; 543 case kTextureTypeCube: 544 output = srcTextureCube.sample(textureSampler, cubeTexcoords(texCoords, srcLayer), 545 level(srcLevel)); 546 break; 547 } 548 549 return output.r; 550} 551 552 553struct BlitStencilToBufferParams 554{ 555 float2 srcStartTexCoords; 556 float2 srcTexCoordSteps; 557 int srcLevel; 558 int srcLayer; 559 560 uint2 dstSize; 561 uint dstBufferRowPitch; 562 563 bool resolveMS; 564}; 565 566kernel void blitStencilToBufferCS(ushort2 gIndices [[thread_position_in_grid]], 567 texture2d<uint32_t> srcTexture2d 568 [[texture(1), function_constant(kSourceTexture2Type2D)]], 569 texture2d_array<uint32_t> srcTexture2dArray 570 [[texture(1), function_constant(kSourceTexture2Type2DArray)]], 571 texture2d_ms<uint32_t> srcTexture2dMS 572 [[texture(1), function_constant(kSourceTexture2Type2DMS)]], 573 texturecube<uint32_t> srcTextureCube 574 [[texture(1), function_constant(kSourceTexture2TypeCube)]], 575 constant BlitStencilToBufferParams &options [[buffer(0)]], 576 device uchar *buffer [[buffer(1)]]) 577{ 578 if (gIndices.x >= options.dstSize.x || gIndices.y >= options.dstSize.y) 579 { 580 return; 581 } 582 583 float2 srcTexCoords = options.srcStartTexCoords + float2(gIndices) * options.srcTexCoordSteps; 584 585 if (kSourceTexture2Type == kTextureType2DMultisample && !options.resolveMS) 586 { 587 uint samples = srcTexture2dMS.get_num_samples(); 588 uint2 imageCoords = getImageCoords(srcTexture2dMS, srcTexCoords); 589 uint bufferOffset = options.dstBufferRowPitch * gIndices.y + samples * gIndices.x; 590 591 for (uint sample = 0; sample < samples; ++sample) 592 { 593 uint stencilPerSample = srcTexture2dMS.read(imageCoords, sample).r; 594 buffer[bufferOffset + sample] = static_cast<uchar>(stencilPerSample); 595 } 596 } 597 else 598 { 599 uint32_t stencil = 600 sampleStencil(srcTexture2d, srcTexture2dArray, srcTexture2dMS, srcTextureCube, 601 srcTexCoords, options.srcLevel, options.srcLayer); 602 603 buffer[options.dstBufferRowPitch * gIndices.y + gIndices.x] = static_cast<uchar>(stencil); 604 } 605} 606 607 608#if __METAL_VERSION__ >= 210 609 610struct FragmentStencilOut 611{ 612 uint32_t stencil [[stencil]]; 613}; 614 615struct FragmentDepthStencilOut 616{ 617 float depth [[depth(any)]]; 618 uint32_t stencil [[stencil]]; 619}; 620 621fragment FragmentStencilOut blitStencilFS( 622 BlitVSOut input [[stage_in]], 623 texture2d<uint32_t> srcTexture2d [[texture(1), function_constant(kSourceTexture2Type2D)]], 624 texture2d_array<uint32_t> srcTexture2dArray 625 [[texture(1), function_constant(kSourceTexture2Type2DArray)]], 626 texture2d_ms<uint32_t> srcTexture2dMS 627 [[texture(1), function_constant(kSourceTexture2Type2DMS)]], 628 texturecube<uint32_t> srcTextureCube [[texture(1), function_constant(kSourceTexture2TypeCube)]], 629 constant BlitParams &options [[buffer(0)]]) 630{ 631 FragmentStencilOut re; 632 633 re.stencil = sampleStencil(srcTexture2d, srcTexture2dArray, srcTexture2dMS, srcTextureCube, 634 input.texCoords, options.srcLevel, options.srcLayer); 635 636 return re; 637} 638 639fragment FragmentDepthStencilOut blitDepthStencilFS( 640 BlitVSOut input [[stage_in]], 641 642 texture2d<float> srcDepthTexture2d [[texture(0), function_constant(kSourceTextureType2D)]], 643 texture2d_array<float> srcDepthTexture2dArray 644 [[texture(0), function_constant(kSourceTextureType2DArray)]], 645 texture2d_ms<float> srcDepthTexture2dMS 646 [[texture(0), function_constant(kSourceTextureType2DMS)]], 647 texturecube<float> srcDepthTextureCube 648 [[texture(0), function_constant(kSourceTextureTypeCube)]], 649 650 651 texture2d<uint32_t> srcStencilTexture2d 652 [[texture(1), function_constant(kSourceTexture2Type2D)]], 653 texture2d_array<uint32_t> srcStencilTexture2dArray 654 [[texture(1), function_constant(kSourceTexture2Type2DArray)]], 655 texture2d_ms<uint32_t> srcStencilTexture2dMS 656 [[texture(1), function_constant(kSourceTexture2Type2DMS)]], 657 texturecube<uint32_t> srcStencilTextureCube 658 [[texture(1), function_constant(kSourceTexture2TypeCube)]], 659 660 constant BlitParams &options [[buffer(0)]]) 661{ 662 FragmentDepthStencilOut re; 663 664 re.depth = sampleDepth(srcDepthTexture2d, srcDepthTexture2dArray, srcDepthTexture2dMS, 665 srcDepthTextureCube, input.texCoords, options); 666 re.stencil = 667 sampleStencil(srcStencilTexture2d, srcStencilTexture2dArray, srcStencilTexture2dMS, 668 srcStencilTextureCube, input.texCoords, options.srcLevel, options.srcLayer); 669 return re; 670} 671#endif 672# 2 "temp_master_source.metal" 2 673# 1 "./clear.metal" 1 674# 10 "./clear.metal" 675using namespace rx::mtl_shader; 676 677struct ClearParams 678{ 679 float4 clearColor; 680 float clearDepth; 681}; 682 683vertex float4 clearVS(unsigned int vid [[ vertex_id ]], 684 constant ClearParams &clearParams [[buffer(0)]]) 685{ 686 return float4(gCorners[vid], clearParams.clearDepth, 1.0); 687} 688 689fragment MultipleColorOutputs<float> clearFloatFS(constant ClearParams &clearParams [[buffer(0)]]) 690{ 691 return toMultipleColorOutputs(clearParams.clearColor); 692} 693 694fragment MultipleColorOutputs<int> clearIntFS(constant ClearParams &clearParams [[buffer(0)]]) 695{ 696 return toMultipleColorOutputs(as_type<int4>(clearParams.clearColor)); 697} 698 699fragment MultipleColorOutputs<uint> clearUIntFS(constant ClearParams &clearParams [[buffer(0)]]) 700{ 701 return toMultipleColorOutputs(as_type<uint4>(clearParams.clearColor)); 702} 703# 3 "temp_master_source.metal" 2 704# 1 "./gen_indices.metal" 1 705 706 707 708 709 710 711 712 713using namespace rx::mtl_shader; 714 715 716constant bool kSourceBufferAligned[[function_constant(100)]]; 717constant bool kSourceIndexIsU8[[function_constant(200)]]; 718constant bool kSourceIndexIsU16[[function_constant(300)]]; 719constant bool kSourceIndexIsU32[[function_constant(400)]]; 720constant bool kSourceBufferUnaligned = !kSourceBufferAligned; 721constant bool kUseSourceBufferU8 = kSourceIndexIsU8 || kSourceBufferUnaligned; 722constant bool kUseSourceBufferU16 = kSourceIndexIsU16 && kSourceBufferAligned; 723constant bool kUseSourceBufferU32 = kSourceIndexIsU32 && kSourceBufferAligned; 724 725struct IndexConversionParams 726{ 727 uint32_t srcOffset; 728 uint32_t indexCount; 729 bool primitiveRestartEnabled; 730}; 731 732 733 734inline ushort getIndexAligned(constant ushort *inputAligned, uint offset, uint idx) 735{ 736 return inputAligned[offset / 2 + idx]; 737} 738inline uint getIndexAligned(constant uint *inputAligned, uint offset, uint idx) 739{ 740 return inputAligned[offset / 4 + idx]; 741} 742inline uchar getIndexAligned(constant uchar *input, uint offset, uint idx) 743{ 744 return input[offset + idx]; 745} 746inline ushort getIndexUnalignedU16(constant uchar *input, uint offset, uint idx) 747{ 748 ushort inputLo = input[offset + 2 * idx]; 749 ushort inputHi = input[offset + 2 * idx + 1]; 750 751 return inputLo | (inputHi << 8); 752} 753inline uint getIndexUnalignedU32(constant uchar *input, uint offset, uint idx) 754{ 755 uint input0 = input[offset + 4 * idx]; 756 uint input1 = input[offset + 4 * idx + 1]; 757 uint input2 = input[offset + 4 * idx + 2]; 758 uint input3 = input[offset + 4 * idx + 3]; 759 760 return input0 | (input1 << 8) | (input2 << 16) | (input3 << 24); 761} 762 763kernel void convertIndexU8ToU16(uint idx [[thread_position_in_grid]], 764 constant IndexConversionParams &options [[buffer(0)]], 765 constant uchar *input [[buffer(1)]], 766 device ushort *output [[buffer(2)]]) 767{ 768 if (idx >= options.indexCount) { return; }; 769 770 uchar value = getIndexAligned(input, options.srcOffset, idx); 771 772 if (options.primitiveRestartEnabled && value == 0xff) 773 { 774 output[idx] = 0xffff; 775 } 776 else 777 { 778 output[idx] = value; 779 } 780} 781 782kernel void convertIndexU16(uint idx [[thread_position_in_grid]], 783 constant IndexConversionParams &options [[buffer(0)]], 784 constant uchar *input 785 [[buffer(1), function_constant(kSourceBufferUnaligned)]], 786 constant ushort *inputAligned 787 [[buffer(1), function_constant(kSourceBufferAligned)]], 788 device ushort *output [[buffer(2)]]) 789{ 790 if (idx >= options.indexCount) { return; }; 791 792 ushort value; 793 if (kSourceBufferAligned) 794 { 795 value = getIndexAligned(inputAligned, options.srcOffset, idx); 796 } 797 else 798 { 799 value = getIndexUnalignedU16(input, options.srcOffset, idx); 800 } 801 output[idx] = value; 802} 803 804kernel void convertIndexU32(uint idx [[thread_position_in_grid]], 805 constant IndexConversionParams &options [[buffer(0)]], 806 constant uchar *input 807 [[buffer(1), function_constant(kSourceBufferUnaligned)]], 808 constant uint *inputAligned 809 [[buffer(1), function_constant(kSourceBufferAligned)]], 810 device uint *output [[buffer(2)]]) 811{ 812 if (idx >= options.indexCount) { return; }; 813 814 uint value; 815 if (kSourceBufferAligned) 816 { 817 value = getIndexAligned(inputAligned, options.srcOffset, idx); 818 } 819 else 820 { 821 value = getIndexUnalignedU32(input, options.srcOffset, idx); 822 } 823 output[idx] = value; 824} 825 826struct IndexFromArrayParams 827{ 828 uint firstVertex; 829 830 uint vertexCount; 831}; 832 833 834kernel void genTriFanIndicesFromArray(uint idx [[thread_position_in_grid]], 835 constant IndexFromArrayParams &options [[buffer(0)]], 836 device uint *output [[buffer(2)]]) 837{ 838 if (idx >= options.vertexCount) { return; }; 839 840 uint vertexIdx = options.firstVertex + 2 + idx; 841 842 843 844 output[3 * idx ] = vertexIdx - 1; 845 output[3 * idx + 1] = vertexIdx; 846 output[3 * idx + 2] = options.firstVertex; 847} 848 849inline uint getIndexU32(uint offset, 850 uint idx, 851 constant uchar *inputU8 [[function_constant(kUseSourceBufferU8)]], 852 constant ushort *inputU16 [[function_constant(kUseSourceBufferU16)]], 853 constant uint *inputU32 [[function_constant(kUseSourceBufferU32)]]) 854{ 855 if (kUseSourceBufferU8) 856 { 857 if (kSourceIndexIsU16) 858 { 859 return getIndexUnalignedU16(inputU8, offset, idx); 860 } 861 else if (kSourceIndexIsU32) 862 { 863 return getIndexUnalignedU32(inputU8, offset, idx); 864 } 865 return getIndexAligned(inputU8, offset, idx); 866 } 867 else if (kUseSourceBufferU16) 868 { 869 return getIndexAligned(inputU16, offset, idx); 870 } 871 else if (kUseSourceBufferU32) 872 { 873 return getIndexAligned(inputU32, offset, idx); 874 } 875 return 0; 876} 877 878 879 880 881kernel void genTriFanIndicesFromElements(uint idx [[thread_position_in_grid]], 882 constant IndexConversionParams &options [[buffer(0)]], 883 constant uchar *inputU8 884 [[buffer(1), function_constant(kUseSourceBufferU8)]], 885 constant ushort *inputU16 886 [[buffer(1), function_constant(kUseSourceBufferU16)]], 887 constant uint *inputU32 888 [[buffer(1), function_constant(kUseSourceBufferU32)]], 889 device uint *output [[buffer(2)]]) 890{ 891 if (idx >= options.indexCount) { return; }; 892 893 uint elemIdx = 2 + idx; 894 895 output[3 * idx] = getIndexU32(options.srcOffset, 0, inputU8, inputU16, inputU32); 896 output[3 * idx + 1] = getIndexU32(options.srcOffset, elemIdx - 1, inputU8, inputU16, inputU32); 897 output[3 * idx + 2] = getIndexU32(options.srcOffset, elemIdx, inputU8, inputU16, inputU32); 898} 899 900 901kernel void genLineLoopIndicesFromArray(uint idx [[thread_position_in_grid]], 902 constant IndexFromArrayParams &options [[buffer(0)]], 903 device uint *output [[buffer(2)]]) 904{ 905 uint totalIndices = options.vertexCount + 1; 906 if (idx >= totalIndices) { return; }; 907 908 output[idx] = options.firstVertex + idx % options.vertexCount; 909} 910 911 912 913kernel void genLineLoopIndicesFromElements(uint idx [[thread_position_in_grid]], 914 constant IndexConversionParams &options [[buffer(0)]], 915 constant uchar *inputU8 916 [[buffer(1), function_constant(kUseSourceBufferU8)]], 917 constant ushort *inputU16 918 [[buffer(1), function_constant(kUseSourceBufferU16)]], 919 constant uint *inputU32 920 [[buffer(1), function_constant(kUseSourceBufferU32)]], 921 device uint *output [[buffer(2)]]) 922{ 923 uint totalTargetIndices = options.indexCount + 1; 924 if (idx >= totalTargetIndices) { return; }; 925 926 output[idx] = 927 getIndexU32(options.srcOffset, idx % options.indexCount, inputU8, inputU16, inputU32); 928} 929# 4 "temp_master_source.metal" 2 930# 1 "./gen_mipmap.metal" 1 931 932 933 934 935 936 937 938 939using namespace rx::mtl_shader; 940# 31 "./gen_mipmap.metal" 941struct GenMipParams 942{ 943 uint srcLevel; 944 uint numMipLevelsToGen; 945 bool sRGB; 946}; 947 948 949 950kernel void generate3DMipmaps(uint lIndex [[thread_index_in_threadgroup]], 951 ushort3 gIndices [[thread_position_in_grid]], 952 texture3d<float> srcTexture [[texture(0)]], 953 texture3d<float, access::write> dstMip1 [[texture(1)]], 954 texture3d<float, access::write> dstMip2 [[texture(2)]], 955 texture3d<float, access::write> dstMip3 [[texture(3)]], 956 texture3d<float, access::write> dstMip4 [[texture(4)]], 957 constant GenMipParams &options [[buffer(0)]]) 958{ 959 ushort3 mipSize = ushort3(dstMip1.get_width(), dstMip1.get_height(), dstMip1.get_depth()); 960 bool validThread = gIndices.x < mipSize.x && gIndices.y < mipSize.y && gIndices.z < mipSize.z; 961 962 constexpr sampler textureSampler(mag_filter::linear, min_filter::linear, mip_filter::linear); 963 964 965 966 967 threadgroup float sR[(8 * 8 * 8)]; 968 threadgroup float sG[(8 * 8 * 8)]; 969 threadgroup float sB[(8 * 8 * 8)]; 970 threadgroup float sA[(8 * 8 * 8)]; 971 972 973 float4 texel1; 974 if (validThread) 975 { 976 float3 texCoords = (float3(gIndices) + float3(0.5, 0.5, 0.5)) / float3(mipSize); 977 texel1 = srcTexture.sample(textureSampler, texCoords, level(options.srcLevel)); 978 979 980 dstMip1.write(texel1, gIndices); 981 } 982 else 983 { 984 985 lIndex = 0xffffffff; 986 } 987 988 if (options.numMipLevelsToGen == 1) 989 { 990 return; 991 } 992 993 994 995 996 if (options.sRGB) 997 { 998 texel1 = linearToSRGB(texel1); 999 } 1000 sR[lIndex] = texel1.r; sG[lIndex] = texel1.g; sB[lIndex] = texel1.b; sA[lIndex] = texel1.a;; 1001 1002 threadgroup_barrier(mem_flags::mem_threadgroup); 1003 1004 1005 if ((lIndex & 0x49) == 0) 1006 { 1007 bool3 atEdge = gIndices == (mipSize - ushort3(1)); 1008 1009 1010 1011 float4 texel2 = (atEdge.x) ? (texel1) : (float4(sR[lIndex + 1], sG[lIndex + 1], sB[lIndex + 1], sA[lIndex + 1])); 1012 1013 float4 texel3 = (atEdge.y) ? (texel1) : (float4(sR[lIndex + 8], sG[lIndex + 8], sB[lIndex + 8], sA[lIndex + 8])); 1014 1015 float4 texel4 = (atEdge.z) ? (texel1) : (float4(sR[lIndex + (8 * 8)], sG[lIndex + (8 * 8)], sB[lIndex + (8 * 8)], sA[lIndex + (8 * 8)])); 1016 1017 float4 texel5 = (atEdge.x | atEdge.y) ? (texel2) : (float4(sR[lIndex + (8 + 1)], sG[lIndex + (8 + 1)], sB[lIndex + (8 + 1)], sA[lIndex + (8 + 1)])); 1018 1019 1020 float4 texel6 = (atEdge.x | atEdge.z) ? (texel2) : (float4(sR[lIndex + ((8 * 8) + 1)], sG[lIndex + ((8 * 8) + 1)], sB[lIndex + ((8 * 8) + 1)], sA[lIndex + ((8 * 8) + 1)])); 1021 1022 1023 float4 texel7 = (atEdge.y | atEdge.z) ? (texel3) : (float4(sR[lIndex + ((8 * 8) + 8)], sG[lIndex + ((8 * 8) + 8)], sB[lIndex + ((8 * 8) + 8)], sA[lIndex + ((8 * 8) + 8)])); 1024 1025 1026 float4 texel8 = 1027 (atEdge.x | atEdge.y | atEdge.z) ? (texel5) : (float4(sR[lIndex + ((8 * 8) + 8 + 1)], sG[lIndex + ((8 * 8) + 8 + 1)], sB[lIndex + ((8 * 8) + 8 + 1)], sA[lIndex + ((8 * 8) + 8 + 1)])); 1028 1029 1030 texel1 = (texel1 + texel2 + texel3 + texel4 + texel5 + texel6 + texel7 + texel8) / 8.0; 1031 1032 dstMip2.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices >> 1); 1033 1034 1035 sR[lIndex] = texel1.r; sG[lIndex] = texel1.g; sB[lIndex] = texel1.b; sA[lIndex] = texel1.a;; 1036 } 1037 1038 if (options.numMipLevelsToGen == 2) 1039 { 1040 return; 1041 } 1042 1043 1044 threadgroup_barrier(mem_flags::mem_threadgroup); 1045 1046 1047 if ((lIndex & 0xdb) == 0) 1048 { 1049 mipSize = max(mipSize >> 1, ushort3(1)); 1050 bool3 atEdge = (gIndices >> 1) == (mipSize - ushort3(1)); 1051 1052 1053 1054 float4 texel2 = (atEdge.x) ? (texel1) : (float4(sR[lIndex + 2], sG[lIndex + 2], sB[lIndex + 2], sA[lIndex + 2])); 1055 1056 float4 texel3 = 1057 (atEdge.y) ? (texel1) : (float4(sR[lIndex + (2 * 8)], sG[lIndex + (2 * 8)], sB[lIndex + (2 * 8)], sA[lIndex + (2 * 8)])); 1058 1059 float4 texel4 = 1060 (atEdge.z) ? (texel1) : (float4(sR[lIndex + (2 * (8 * 8))], sG[lIndex + (2 * (8 * 8))], sB[lIndex + (2 * (8 * 8))], sA[lIndex + (2 * (8 * 8))])); 1061 1062 float4 texel5 = (atEdge.x | atEdge.y) ? (texel2) : (float4(sR[lIndex + (2 * 8 + 2)], sG[lIndex + (2 * 8 + 2)], sB[lIndex + (2 * 8 + 2)], sA[lIndex + (2 * 8 + 2)])); 1063 1064 1065 float4 texel6 = (atEdge.x | atEdge.z) ? (texel2) : (float4(sR[lIndex + (2 * (8 * 8) + 2)], sG[lIndex + (2 * (8 * 8) + 2)], sB[lIndex + (2 * (8 * 8) + 2)], sA[lIndex + (2 * (8 * 8) + 2)])); 1066 1067 1068 float4 texel7 = (atEdge.y | atEdge.z) ? (texel3) : (float4(sR[lIndex + (2 * (8 * 8) + 2 * 8)], sG[lIndex + (2 * (8 * 8) + 2 * 8)], sB[lIndex + (2 * (8 * 8) + 2 * 8)], sA[lIndex + (2 * (8 * 8) + 2 * 8)])); 1069 1070 1071 1072 float4 texel8 = (atEdge.x | atEdge.y | atEdge.z) ? (texel5) : (float4(sR[lIndex + (2 * (8 * 8) + 2 * 8 + 2)], sG[lIndex + (2 * (8 * 8) + 2 * 8 + 2)], sB[lIndex + (2 * (8 * 8) + 2 * 8 + 2)], sA[lIndex + (2 * (8 * 8) + 2 * 8 + 2)])); 1073 1074 1075 1076 texel1 = (texel1 + texel2 + texel3 + texel4 + texel5 + texel6 + texel7 + texel8) / 8.0; 1077 1078 dstMip3.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices >> 2); 1079 1080 1081 sR[lIndex] = texel1.r; sG[lIndex] = texel1.g; sB[lIndex] = texel1.b; sA[lIndex] = texel1.a;; 1082 } 1083 1084 if (options.numMipLevelsToGen == 3) 1085 { 1086 return; 1087 } 1088 1089 1090 threadgroup_barrier(mem_flags::mem_threadgroup); 1091 1092 1093 if ((lIndex & 0x1ff) == 0) 1094 { 1095 mipSize = max(mipSize >> 1, ushort3(1)); 1096 bool3 atEdge = (gIndices >> 2) == (mipSize - ushort3(1)); 1097 1098 1099 1100 float4 texel2 = (atEdge.x) ? (texel1) : (float4(sR[lIndex + 4], sG[lIndex + 4], sB[lIndex + 4], sA[lIndex + 4])); 1101 1102 float4 texel3 = 1103 (atEdge.y) ? (texel1) : (float4(sR[lIndex + (4 * 8)], sG[lIndex + (4 * 8)], sB[lIndex + (4 * 8)], sA[lIndex + (4 * 8)])); 1104 1105 float4 texel4 = 1106 (atEdge.z) ? (texel1) : (float4(sR[lIndex + (4 * (8 * 8))], sG[lIndex + (4 * (8 * 8))], sB[lIndex + (4 * (8 * 8))], sA[lIndex + (4 * (8 * 8))])); 1107 1108 float4 texel5 = (atEdge.x | atEdge.y) ? (texel2) : (float4(sR[lIndex + (4 * 8 + 4)], sG[lIndex + (4 * 8 + 4)], sB[lIndex + (4 * 8 + 4)], sA[lIndex + (4 * 8 + 4)])); 1109 1110 1111 float4 texel6 = (atEdge.x | atEdge.z) ? (texel2) : (float4(sR[lIndex + (4 * (8 * 8) + 4)], sG[lIndex + (4 * (8 * 8) + 4)], sB[lIndex + (4 * (8 * 8) + 4)], sA[lIndex + (4 * (8 * 8) + 4)])); 1112 1113 1114 float4 texel7 = (atEdge.y | atEdge.z) ? (texel3) : (float4(sR[lIndex + (4 * (8 * 8) + 4 * 8)], sG[lIndex + (4 * (8 * 8) + 4 * 8)], sB[lIndex + (4 * (8 * 8) + 4 * 8)], sA[lIndex + (4 * (8 * 8) + 4 * 8)])); 1115 1116 1117 1118 float4 texel8 = (atEdge.x | atEdge.y | atEdge.z) ? (texel5) : (float4(sR[lIndex + (4 * (8 * 8) + 4 * 8 + 4)], sG[lIndex + (4 * (8 * 8) + 4 * 8 + 4)], sB[lIndex + (4 * (8 * 8) + 4 * 8 + 4)], sA[lIndex + (4 * (8 * 8) + 4 * 8 + 4)])); 1119 1120 1121 1122 texel1 = (texel1 + texel2 + texel3 + texel4 + texel5 + texel6 + texel7 + texel8) / 8.0; 1123 1124 dstMip4.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices >> 3); 1125 } 1126} 1127 1128kernel void generate2DMipmaps(uint lIndex [[thread_index_in_threadgroup]], 1129 ushort2 gIndices [[thread_position_in_grid]], 1130 texture2d<float> srcTexture [[texture(0)]], 1131 texture2d<float, access::write> dstMip1 [[texture(1)]], 1132 texture2d<float, access::write> dstMip2 [[texture(2)]], 1133 texture2d<float, access::write> dstMip3 [[texture(3)]], 1134 texture2d<float, access::write> dstMip4 [[texture(4)]], 1135 constant GenMipParams &options [[buffer(0)]]) 1136{ 1137 uint firstMipLevel = options.srcLevel + 1; 1138 ushort2 mipSize = 1139 ushort2(srcTexture.get_width(firstMipLevel), srcTexture.get_height(firstMipLevel)); 1140 bool validThread = gIndices.x < mipSize.x && gIndices.y < mipSize.y; 1141 1142 constexpr sampler textureSampler(mag_filter::linear, min_filter::linear, mip_filter::linear); 1143 1144 1145 1146 1147 threadgroup float sR[(8 * 8)]; 1148 threadgroup float sG[(8 * 8)]; 1149 threadgroup float sB[(8 * 8)]; 1150 threadgroup float sA[(8 * 8)]; 1151 1152 1153 float4 texel1; 1154 if (validThread) 1155 { 1156 float2 texCoords = (float2(gIndices) + float2(0.5, 0.5)) / float2(mipSize); 1157 texel1 = srcTexture.sample(textureSampler, texCoords, level(options.srcLevel)); 1158 1159 1160 dstMip1.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices); 1161 } 1162 else 1163 { 1164 1165 lIndex = 0xffffffff; 1166 } 1167 1168 if (options.numMipLevelsToGen == 1) 1169 { 1170 return; 1171 } 1172 1173 1174 1175 1176 sR[lIndex] = texel1.r; sG[lIndex] = texel1.g; sB[lIndex] = texel1.b; sA[lIndex] = texel1.a;; 1177 1178 threadgroup_barrier(mem_flags::mem_threadgroup); 1179 1180 1181 if ((lIndex & 0x09) == 0) 1182 { 1183 bool2 atEdge = gIndices == (mipSize - ushort2(1)); 1184 1185 1186 1187 float4 texel2 = (atEdge.x) ? (texel1) : (float4(sR[lIndex + 1], sG[lIndex + 1], sB[lIndex + 1], sA[lIndex + 1])); 1188 1189 float4 texel3 = (atEdge.y) ? (texel1) : (float4(sR[lIndex + 8], sG[lIndex + 8], sB[lIndex + 8], sA[lIndex + 8])); 1190 1191 float4 texel4 = (atEdge.x | atEdge.y) ? (texel2) : (float4(sR[lIndex + (8 + 1)], sG[lIndex + (8 + 1)], sB[lIndex + (8 + 1)], sA[lIndex + (8 + 1)])); 1192 1193 1194 texel1 = (texel1 + texel2 + texel3 + texel4) / 4.0; 1195 1196 dstMip2.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices >> 1); 1197 1198 1199 sR[lIndex] = texel1.r; sG[lIndex] = texel1.g; sB[lIndex] = texel1.b; sA[lIndex] = texel1.a;; 1200 } 1201 1202 if (options.numMipLevelsToGen == 2) 1203 { 1204 return; 1205 } 1206 1207 1208 threadgroup_barrier(mem_flags::mem_threadgroup); 1209 1210 1211 if ((lIndex & 0x1b) == 0) 1212 { 1213 mipSize = max(mipSize >> 1, ushort2(1)); 1214 bool2 atEdge = (gIndices >> 1) == (mipSize - ushort2(1)); 1215 1216 1217 1218 float4 texel2 = (atEdge.x) ? (texel1) : (float4(sR[lIndex + 2], sG[lIndex + 2], sB[lIndex + 2], sA[lIndex + 2])); 1219 1220 float4 texel3 = 1221 (atEdge.y) ? (texel1) : (float4(sR[lIndex + 2 * 8], sG[lIndex + 2 * 8], sB[lIndex + 2 * 8], sA[lIndex + 2 * 8])); 1222 1223 float4 texel4 = (atEdge.x | atEdge.y) ? (texel2) : (float4(sR[lIndex + (2 * 8 + 2)], sG[lIndex + (2 * 8 + 2)], sB[lIndex + (2 * 8 + 2)], sA[lIndex + (2 * 8 + 2)])); 1224 1225 1226 texel1 = (texel1 + texel2 + texel3 + texel4) / 4.0; 1227 1228 dstMip3.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices >> 2); 1229 1230 1231 sR[lIndex] = texel1.r; sG[lIndex] = texel1.g; sB[lIndex] = texel1.b; sA[lIndex] = texel1.a;; 1232 } 1233 1234 if (options.numMipLevelsToGen == 3) 1235 { 1236 return; 1237 } 1238 1239 1240 threadgroup_barrier(mem_flags::mem_threadgroup); 1241 1242 1243 if ((lIndex & 0x3f) == 0) 1244 { 1245 mipSize = max(mipSize >> 1, ushort2(1)); 1246 bool2 atEdge = (gIndices >> 2) == (mipSize - ushort2(1)); 1247 1248 1249 1250 float4 texel2 = (atEdge.x) ? (texel1) : (float4(sR[lIndex + 4], sG[lIndex + 4], sB[lIndex + 4], sA[lIndex + 4])); 1251 1252 float4 texel3 = 1253 (atEdge.y) ? (texel1) : (float4(sR[lIndex + 4 * 8], sG[lIndex + 4 * 8], sB[lIndex + 4 * 8], sA[lIndex + 4 * 8])); 1254 1255 float4 texel4 = (atEdge.x | atEdge.y) ? (texel2) : (float4(sR[lIndex + (4 * 8 + 4)], sG[lIndex + (4 * 8 + 4)], sB[lIndex + (4 * 8 + 4)], sA[lIndex + (4 * 8 + 4)])); 1256 1257 1258 texel1 = (texel1 + texel2 + texel3 + texel4) / 4.0; 1259 1260 dstMip4.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices >> 3); 1261 } 1262} 1263 1264template <typename TextureTypeR, typename TextureTypeW> 1265static __attribute__((always_inline)) void generateCubeOr2DArray2ndAndMoreMipmaps( 1266 uint lIndex, 1267 ushort3 gIndices, 1268 TextureTypeR srcTexture, 1269 TextureTypeW dstMip2, 1270 TextureTypeW dstMip3, 1271 TextureTypeW dstMip4, 1272 ushort2 mip1Size, 1273 float4 mip1Texel, 1274 threadgroup float *sR, 1275 threadgroup float *sG, 1276 threadgroup float *sB, 1277 threadgroup float *sA, 1278 constant GenMipParams &options) 1279{ 1280 ushort2 mipSize = mip1Size; 1281 float4 texel1 = mip1Texel; 1282 1283 1284 1285 1286 sR[lIndex] = texel1.r; sG[lIndex] = texel1.g; sB[lIndex] = texel1.b; sA[lIndex] = texel1.a;; 1287 1288 threadgroup_barrier(mem_flags::mem_threadgroup); 1289 1290 1291 if ((lIndex & 0x09) == 0) 1292 { 1293 bool2 atEdge = gIndices.xy == (mipSize - ushort2(1)); 1294 1295 1296 1297 float4 texel2 = (atEdge.x) ? (texel1) : (float4(sR[lIndex + 1], sG[lIndex + 1], sB[lIndex + 1], sA[lIndex + 1])); 1298 1299 float4 texel3 = (atEdge.y) ? (texel1) : (float4(sR[lIndex + 8], sG[lIndex + 8], sB[lIndex + 8], sA[lIndex + 8])); 1300 1301 float4 texel4 = (atEdge.x | atEdge.y) ? (texel2) : (float4(sR[lIndex + (8 + 1)], sG[lIndex + (8 + 1)], sB[lIndex + (8 + 1)], sA[lIndex + (8 + 1)])); 1302 1303 1304 texel1 = (texel1 + texel2 + texel3 + texel4) / 4.0; 1305 1306 dstMip2.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices.xy >> 1, gIndices.z); 1307 1308 1309 sR[lIndex] = texel1.r; sG[lIndex] = texel1.g; sB[lIndex] = texel1.b; sA[lIndex] = texel1.a;; 1310 } 1311 1312 if (options.numMipLevelsToGen == 2) 1313 { 1314 return; 1315 } 1316 1317 1318 threadgroup_barrier(mem_flags::mem_threadgroup); 1319 1320 1321 if ((lIndex & 0x1b) == 0) 1322 { 1323 mipSize = max(mipSize >> 1, ushort2(1)); 1324 bool2 atEdge = (gIndices.xy >> 1) == (mipSize - ushort2(1)); 1325 1326 1327 1328 float4 texel2 = (atEdge.x) ? (texel1) : (float4(sR[lIndex + 2], sG[lIndex + 2], sB[lIndex + 2], sA[lIndex + 2])); 1329 1330 float4 texel3 = 1331 (atEdge.y) ? (texel1) : (float4(sR[lIndex + 2 * 8], sG[lIndex + 2 * 8], sB[lIndex + 2 * 8], sA[lIndex + 2 * 8])); 1332 1333 float4 texel4 = (atEdge.x | atEdge.y) ? (texel2) : (float4(sR[lIndex + (2 * 8 + 2)], sG[lIndex + (2 * 8 + 2)], sB[lIndex + (2 * 8 + 2)], sA[lIndex + (2 * 8 + 2)])); 1334 1335 1336 texel1 = (texel1 + texel2 + texel3 + texel4) / 4.0; 1337 1338 dstMip3.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices.xy >> 2, gIndices.z); 1339 1340 1341 sR[lIndex] = texel1.r; sG[lIndex] = texel1.g; sB[lIndex] = texel1.b; sA[lIndex] = texel1.a;; 1342 } 1343 1344 if (options.numMipLevelsToGen == 3) 1345 { 1346 return; 1347 } 1348 1349 1350 threadgroup_barrier(mem_flags::mem_threadgroup); 1351 1352 1353 if ((lIndex & 0x3f) == 0) 1354 { 1355 mipSize = max(mipSize >> 1, ushort2(1)); 1356 bool2 atEdge = (gIndices.xy >> 2) == (mipSize - ushort2(1)); 1357 1358 1359 1360 float4 texel2 = (atEdge.x) ? (texel1) : (float4(sR[lIndex + 4], sG[lIndex + 4], sB[lIndex + 4], sA[lIndex + 4])); 1361 1362 float4 texel3 = 1363 (atEdge.y) ? (texel1) : (float4(sR[lIndex + 4 * 8], sG[lIndex + 4 * 8], sB[lIndex + 4 * 8], sA[lIndex + 4 * 8])); 1364 1365 float4 texel4 = (atEdge.x | atEdge.y) ? (texel2) : (float4(sR[lIndex + (4 * 8 + 4)], sG[lIndex + (4 * 8 + 4)], sB[lIndex + (4 * 8 + 4)], sA[lIndex + (4 * 8 + 4)])); 1366 1367 1368 texel1 = (texel1 + texel2 + texel3 + texel4) / 4.0; 1369 1370 dstMip4.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices.xy >> 3, gIndices.z); 1371 } 1372} 1373 1374kernel void generateCubeMipmaps(uint lIndex [[thread_index_in_threadgroup]], 1375 ushort3 gIndices [[thread_position_in_grid]], 1376 texturecube<float> srcTexture [[texture(0)]], 1377 texturecube<float, access::write> dstMip1 [[texture(1)]], 1378 texturecube<float, access::write> dstMip2 [[texture(2)]], 1379 texturecube<float, access::write> dstMip3 [[texture(3)]], 1380 texturecube<float, access::write> dstMip4 [[texture(4)]], 1381 constant GenMipParams &options [[buffer(0)]]) 1382{ 1383 uint firstMipLevel = options.srcLevel + 1; 1384 ushort2 mip1Size = 1385 ushort2(srcTexture.get_width(firstMipLevel), srcTexture.get_height(firstMipLevel)); 1386 bool validThread = gIndices.x < mip1Size.x && gIndices.y < mip1Size.y; 1387 1388 constexpr sampler textureSampler(mag_filter::linear, min_filter::linear, mip_filter::linear); 1389 1390 1391 float4 mip1Texel; 1392 if (validThread) 1393 { 1394 float2 texCoords = (float2(gIndices.xy) + float2(0.5, 0.5)) / float2(mip1Size); 1395 mip1Texel = srcTexture.sample(textureSampler, cubeTexcoords(texCoords, int(gIndices.z)), 1396 level(options.srcLevel)); 1397 1398 1399 dstMip1.write((options.sRGB ? sRGBtoLinear(mip1Texel) : mip1Texel), gIndices.xy, gIndices.z); 1400 } 1401 else 1402 { 1403 1404 lIndex = 0xffffffff; 1405 } 1406 1407 if (options.numMipLevelsToGen == 1) 1408 { 1409 return; 1410 } 1411 1412 1413 threadgroup float sR[(8 * 8)]; 1414 threadgroup float sG[(8 * 8)]; 1415 threadgroup float sB[(8 * 8)]; 1416 threadgroup float sA[(8 * 8)]; 1417 1418 generateCubeOr2DArray2ndAndMoreMipmaps(lIndex, gIndices, srcTexture, dstMip2, dstMip3, dstMip4, 1419 mip1Size, mip1Texel, sR, sG, sB, sA, options); 1420} 1421 1422kernel void generate2DArrayMipmaps(uint lIndex [[thread_index_in_threadgroup]], 1423 ushort3 gIndices [[thread_position_in_grid]], 1424 texture2d_array<float> srcTexture [[texture(0)]], 1425 texture2d_array<float, access::write> dstMip1 [[texture(1)]], 1426 texture2d_array<float, access::write> dstMip2 [[texture(2)]], 1427 texture2d_array<float, access::write> dstMip3 [[texture(3)]], 1428 texture2d_array<float, access::write> dstMip4 [[texture(4)]], 1429 constant GenMipParams &options [[buffer(0)]]) 1430{ 1431 uint firstMipLevel = options.srcLevel + 1; 1432 ushort2 mip1Size = 1433 ushort2(srcTexture.get_width(firstMipLevel), srcTexture.get_height(firstMipLevel)); 1434 bool validThread = gIndices.x < mip1Size.x && gIndices.y < mip1Size.y; 1435 1436 constexpr sampler textureSampler(mag_filter::linear, min_filter::linear, mip_filter::linear); 1437 1438 1439 float4 mip1Texel; 1440 if (validThread) 1441 { 1442 float2 texCoords = (float2(gIndices.xy) + float2(0.5, 0.5)) / float2(mip1Size); 1443 mip1Texel = 1444 srcTexture.sample(textureSampler, texCoords, gIndices.z, level(options.srcLevel)); 1445 1446 1447 dstMip1.write((options.sRGB ? sRGBtoLinear(mip1Texel) : mip1Texel), gIndices.xy, gIndices.z); 1448 } 1449 else 1450 { 1451 1452 lIndex = 0xffffffff; 1453 } 1454 1455 if (options.numMipLevelsToGen == 1) 1456 { 1457 return; 1458 } 1459 1460 1461 threadgroup float sR[(8 * 8)]; 1462 threadgroup float sG[(8 * 8)]; 1463 threadgroup float sB[(8 * 8)]; 1464 threadgroup float sA[(8 * 8)]; 1465 1466 generateCubeOr2DArray2ndAndMoreMipmaps(lIndex, gIndices, srcTexture, dstMip2, dstMip3, dstMip4, 1467 mip1Size, mip1Texel, sR, sG, sB, sA, options); 1468} 1469# 5 "temp_master_source.metal" 2 1470# 1 "./copy_buffer.metal" 1 1471# 12 "./copy_buffer.metal" 1472#include <metal_pack> 1473 1474 1475# 1 "./format_autogen.h" 1 1476 1477 1478 1479namespace rx 1480{ 1481namespace mtl_shader 1482{ 1483 1484namespace FormatID 1485{ 1486enum 1487{ 1488 NONE, 1489 D16_UNORM, 1490 D24_UNORM_S8_UINT, 1491 D24_UNORM_X8_UINT, 1492 D32_FLOAT, 1493 D32_FLOAT_S8X24_UINT, 1494 D32_UNORM, 1495 S8_UINT, 1496 A16_FLOAT, 1497 A1R5G5B5_UNORM, 1498 A2R10G10B10_SINT_VERTEX, 1499 A2R10G10B10_SNORM_VERTEX, 1500 A2R10G10B10_SSCALED_VERTEX, 1501 A2R10G10B10_UINT_VERTEX, 1502 A2R10G10B10_UNORM_VERTEX, 1503 A2R10G10B10_USCALED_VERTEX, 1504 A32_FLOAT, 1505 A8_UNORM, 1506 ASTC_10x10_SRGB_BLOCK, 1507 ASTC_10x10_UNORM_BLOCK, 1508 ASTC_10x5_SRGB_BLOCK, 1509 ASTC_10x5_UNORM_BLOCK, 1510 ASTC_10x6_SRGB_BLOCK, 1511 ASTC_10x6_UNORM_BLOCK, 1512 ASTC_10x8_SRGB_BLOCK, 1513 ASTC_10x8_UNORM_BLOCK, 1514 ASTC_12x10_SRGB_BLOCK, 1515 ASTC_12x10_UNORM_BLOCK, 1516 ASTC_12x12_SRGB_BLOCK, 1517 ASTC_12x12_UNORM_BLOCK, 1518 ASTC_3x3x3_UNORM_BLOCK, 1519 ASTC_3x3x3_UNORM_SRGB_BLOCK, 1520 ASTC_4x3x3_UNORM_BLOCK, 1521 ASTC_4x3x3_UNORM_SRGB_BLOCK, 1522 ASTC_4x4_SRGB_BLOCK, 1523 ASTC_4x4_UNORM_BLOCK, 1524 ASTC_4x4x3_UNORM_BLOCK, 1525 ASTC_4x4x3_UNORM_SRGB_BLOCK, 1526 ASTC_4x4x4_UNORM_BLOCK, 1527 ASTC_4x4x4_UNORM_SRGB_BLOCK, 1528 ASTC_5x4_SRGB_BLOCK, 1529 ASTC_5x4_UNORM_BLOCK, 1530 ASTC_5x4x4_UNORM_BLOCK, 1531 ASTC_5x4x4_UNORM_SRGB_BLOCK, 1532 ASTC_5x5_SRGB_BLOCK, 1533 ASTC_5x5_UNORM_BLOCK, 1534 ASTC_5x5x4_UNORM_BLOCK, 1535 ASTC_5x5x4_UNORM_SRGB_BLOCK, 1536 ASTC_5x5x5_UNORM_BLOCK, 1537 ASTC_5x5x5_UNORM_SRGB_BLOCK, 1538 ASTC_6x5_SRGB_BLOCK, 1539 ASTC_6x5_UNORM_BLOCK, 1540 ASTC_6x5x5_UNORM_BLOCK, 1541 ASTC_6x5x5_UNORM_SRGB_BLOCK, 1542 ASTC_6x6_SRGB_BLOCK, 1543 ASTC_6x6_UNORM_BLOCK, 1544 ASTC_6x6x5_UNORM_BLOCK, 1545 ASTC_6x6x5_UNORM_SRGB_BLOCK, 1546 ASTC_6x6x6_UNORM_BLOCK, 1547 ASTC_6x6x6_UNORM_SRGB_BLOCK, 1548 ASTC_8x5_SRGB_BLOCK, 1549 ASTC_8x5_UNORM_BLOCK, 1550 ASTC_8x6_SRGB_BLOCK, 1551 ASTC_8x6_UNORM_BLOCK, 1552 ASTC_8x8_SRGB_BLOCK, 1553 ASTC_8x8_UNORM_BLOCK, 1554 B10G10R10A2_UNORM, 1555 B4G4R4A4_UNORM, 1556 B5G5R5A1_UNORM, 1557 B5G6R5_UNORM, 1558 B8G8R8A8_TYPELESS, 1559 B8G8R8A8_TYPELESS_SRGB, 1560 B8G8R8A8_UNORM, 1561 B8G8R8A8_UNORM_SRGB, 1562 B8G8R8X8_UNORM, 1563 B8G8R8X8_UNORM_SRGB, 1564 BC1_RGBA_UNORM_BLOCK, 1565 BC1_RGBA_UNORM_SRGB_BLOCK, 1566 BC1_RGB_UNORM_BLOCK, 1567 BC1_RGB_UNORM_SRGB_BLOCK, 1568 BC2_RGBA_UNORM_BLOCK, 1569 BC2_RGBA_UNORM_SRGB_BLOCK, 1570 BC3_RGBA_UNORM_BLOCK, 1571 BC3_RGBA_UNORM_SRGB_BLOCK, 1572 BC4_RED_SNORM_BLOCK, 1573 BC4_RED_UNORM_BLOCK, 1574 BC5_RG_SNORM_BLOCK, 1575 BC5_RG_UNORM_BLOCK, 1576 BC6H_RGB_SFLOAT_BLOCK, 1577 BC6H_RGB_UFLOAT_BLOCK, 1578 BC7_RGBA_UNORM_BLOCK, 1579 BC7_RGBA_UNORM_SRGB_BLOCK, 1580 EAC_R11G11_SNORM_BLOCK, 1581 EAC_R11G11_UNORM_BLOCK, 1582 EAC_R11_SNORM_BLOCK, 1583 EAC_R11_UNORM_BLOCK, 1584 ETC1_LOSSY_DECODE_R8G8B8_UNORM_BLOCK, 1585 ETC1_R8G8B8_UNORM_BLOCK, 1586 ETC2_R8G8B8A1_SRGB_BLOCK, 1587 ETC2_R8G8B8A1_UNORM_BLOCK, 1588 ETC2_R8G8B8A8_SRGB_BLOCK, 1589 ETC2_R8G8B8A8_UNORM_BLOCK, 1590 ETC2_R8G8B8_SRGB_BLOCK, 1591 ETC2_R8G8B8_UNORM_BLOCK, 1592 G8_B8R8_2PLANE_420_UNORM, 1593 G8_B8_R8_3PLANE_420_UNORM, 1594 L16A16_FLOAT, 1595 L16_FLOAT, 1596 L32A32_FLOAT, 1597 L32_FLOAT, 1598 L8A8_UNORM, 1599 L8_UNORM, 1600 PALETTE4_R4G4B4A4_UNORM, 1601 PALETTE4_R5G5B5A1_UNORM, 1602 PALETTE4_R5G6B5_UNORM, 1603 PALETTE4_R8G8B8A8_UNORM, 1604 PALETTE4_R8G8B8_UNORM, 1605 PALETTE8_R4G4B4A4_UNORM, 1606 PALETTE8_R5G5B5A1_UNORM, 1607 PALETTE8_R5G6B5_UNORM, 1608 PALETTE8_R8G8B8A8_UNORM, 1609 PALETTE8_R8G8B8_UNORM, 1610 PVRTC1_RGBA_2BPP_UNORM_BLOCK, 1611 PVRTC1_RGBA_2BPP_UNORM_SRGB_BLOCK, 1612 PVRTC1_RGBA_4BPP_UNORM_BLOCK, 1613 PVRTC1_RGBA_4BPP_UNORM_SRGB_BLOCK, 1614 PVRTC1_RGB_2BPP_UNORM_BLOCK, 1615 PVRTC1_RGB_2BPP_UNORM_SRGB_BLOCK, 1616 PVRTC1_RGB_4BPP_UNORM_BLOCK, 1617 PVRTC1_RGB_4BPP_UNORM_SRGB_BLOCK, 1618 R10G10B10A2_SINT, 1619 R10G10B10A2_SNORM, 1620 R10G10B10A2_SSCALED, 1621 R10G10B10A2_UINT, 1622 R10G10B10A2_UNORM, 1623 R10G10B10A2_USCALED, 1624 R10G10B10X2_UNORM, 1625 R11G11B10_FLOAT, 1626 R16G16B16A16_FLOAT, 1627 R16G16B16A16_SINT, 1628 R16G16B16A16_SNORM, 1629 R16G16B16A16_SSCALED, 1630 R16G16B16A16_UINT, 1631 R16G16B16A16_UNORM, 1632 R16G16B16A16_USCALED, 1633 R16G16B16_FLOAT, 1634 R16G16B16_SINT, 1635 R16G16B16_SNORM, 1636 R16G16B16_SSCALED, 1637 R16G16B16_UINT, 1638 R16G16B16_UNORM, 1639 R16G16B16_USCALED, 1640 R16G16_FLOAT, 1641 R16G16_SINT, 1642 R16G16_SNORM, 1643 R16G16_SSCALED, 1644 R16G16_UINT, 1645 R16G16_UNORM, 1646 R16G16_USCALED, 1647 R16_FLOAT, 1648 R16_SINT, 1649 R16_SNORM, 1650 R16_SSCALED, 1651 R16_UINT, 1652 R16_UNORM, 1653 R16_USCALED, 1654 R32G32B32A32_FIXED, 1655 R32G32B32A32_FLOAT, 1656 R32G32B32A32_SINT, 1657 R32G32B32A32_SNORM, 1658 R32G32B32A32_SSCALED, 1659 R32G32B32A32_UINT, 1660 R32G32B32A32_UNORM, 1661 R32G32B32A32_USCALED, 1662 R32G32B32_FIXED, 1663 R32G32B32_FLOAT, 1664 R32G32B32_SINT, 1665 R32G32B32_SNORM, 1666 R32G32B32_SSCALED, 1667 R32G32B32_UINT, 1668 R32G32B32_UNORM, 1669 R32G32B32_USCALED, 1670 R32G32_FIXED, 1671 R32G32_FLOAT, 1672 R32G32_SINT, 1673 R32G32_SNORM, 1674 R32G32_SSCALED, 1675 R32G32_UINT, 1676 R32G32_UNORM, 1677 R32G32_USCALED, 1678 R32_FIXED, 1679 R32_FLOAT, 1680 R32_SINT, 1681 R32_SNORM, 1682 R32_SSCALED, 1683 R32_UINT, 1684 R32_UNORM, 1685 R32_USCALED, 1686 R4G4B4A4_UNORM, 1687 R5G5B5A1_UNORM, 1688 R5G6B5_UNORM, 1689 R8G8B8A8_SINT, 1690 R8G8B8A8_SNORM, 1691 R8G8B8A8_SSCALED, 1692 R8G8B8A8_TYPELESS, 1693 R8G8B8A8_TYPELESS_SRGB, 1694 R8G8B8A8_UINT, 1695 R8G8B8A8_UNORM, 1696 R8G8B8A8_UNORM_SRGB, 1697 R8G8B8A8_USCALED, 1698 R8G8B8X8_UNORM, 1699 R8G8B8X8_UNORM_SRGB, 1700 R8G8B8_SINT, 1701 R8G8B8_SNORM, 1702 R8G8B8_SSCALED, 1703 R8G8B8_UINT, 1704 R8G8B8_UNORM, 1705 R8G8B8_UNORM_SRGB, 1706 R8G8B8_USCALED, 1707 R8G8_SINT, 1708 R8G8_SNORM, 1709 R8G8_SSCALED, 1710 R8G8_UINT, 1711 R8G8_UNORM, 1712 R8G8_UNORM_SRGB, 1713 R8G8_USCALED, 1714 R8_SINT, 1715 R8_SNORM, 1716 R8_SSCALED, 1717 R8_UINT, 1718 R8_UNORM, 1719 R8_UNORM_SRGB, 1720 R8_USCALED, 1721 R9G9B9E5_SHAREDEXP, 1722 X2R10G10B10_SINT_VERTEX, 1723 X2R10G10B10_SNORM_VERTEX, 1724 X2R10G10B10_SSCALED_VERTEX, 1725 X2R10G10B10_UINT_VERTEX, 1726 X2R10G10B10_UNORM_VERTEX, 1727 X2R10G10B10_USCALED_VERTEX, 1728 EXTERNAL0, 1729 EXTERNAL1, 1730 EXTERNAL2, 1731 EXTERNAL3, 1732 EXTERNAL4, 1733 EXTERNAL5, 1734 EXTERNAL6, 1735 EXTERNAL7 1736}; 1737 1738} 1739 1740} 1741} 1742# 16 "./copy_buffer.metal" 2 1743 1744using namespace rx::mtl_shader; 1745 1746constant int kCopyFormatType [[function_constant(10)]]; 1747 1748 1749constant int kCopyTextureType [[function_constant(20)]]; 1750constant bool kCopyTextureType2D = kCopyTextureType == kTextureType2D; 1751constant bool kCopyTextureType2DArray = kCopyTextureType == kTextureType2DArray; 1752constant bool kCopyTextureType2DMS = kCopyTextureType == kTextureType2DMultisample; 1753constant bool kCopyTextureTypeCube = kCopyTextureType == kTextureTypeCube; 1754constant bool kCopyTextureType3D = kCopyTextureType == kTextureType3D; 1755 1756struct CopyPixelParams 1757{ 1758 uint3 copySize; 1759 uint3 textureOffset; 1760 1761 uint bufferStartOffset; 1762 uint pixelSize; 1763 uint bufferRowPitch; 1764 uint bufferDepthPitch; 1765}; 1766 1767struct WritePixelParams 1768{ 1769 uint2 copySize; 1770 uint2 textureOffset; 1771 1772 uint bufferStartOffset; 1773 1774 uint pixelSize; 1775 uint bufferRowPitch; 1776 1777 uint textureLevel; 1778 uint textureLayer; 1779 1780 bool reverseTextureRowOrder; 1781}; 1782# 120 "./copy_buffer.metal" 1783template <typename T> 1784static inline void textureWrite(ushort3 gIndices, 1785 constant CopyPixelParams &options, 1786 vec<T, 4> color, 1787 texture2d<T, access::write> dstTexture2d [[texture(0), function_constant(kCopyTextureType2D)]], texture2d_array<T, access::write> dstTexture2dArray [[texture(0), function_constant(kCopyTextureType2DArray)]], texture3d<T, access::write> dstTexture3d [[texture(0), function_constant(kCopyTextureType3D)]], texturecube<T, access::write> dstTextureCube [[texture(0), function_constant(kCopyTextureTypeCube)]]) 1788{ 1789 uint3 writeIndices = options.textureOffset + uint3(gIndices); 1790 switch (kCopyTextureType) 1791 { 1792 case kTextureType2D: 1793 dstTexture2d.write(color, writeIndices.xy); 1794 break; 1795 case kTextureType2DArray: 1796 dstTexture2dArray.write(color, writeIndices.xy, writeIndices.z); 1797 break; 1798 case kTextureType3D: 1799 dstTexture3d.write(color, writeIndices); 1800 break; 1801 case kTextureTypeCube: 1802 dstTextureCube.write(color, writeIndices.xy, writeIndices.z); 1803 break; 1804 } 1805} 1806 1807 1808template <typename T> 1809static inline vec<T, 4> textureRead(ushort2 gIndices, 1810 constant WritePixelParams &options, 1811 texture2d<T, access::read> srcTexture2d [[texture(0), function_constant(kCopyTextureType2D)]], texture2d_array<T, access::read> srcTexture2dArray [[texture(0), function_constant(kCopyTextureType2DArray)]], texture3d<T, access::read> srcTexture3d [[texture(0), function_constant(kCopyTextureType3D)]], texturecube<T, access::read> srcTextureCube [[texture(0), function_constant(kCopyTextureTypeCube)]], texture2d_ms<T, access::read> srcTexture2dMS [[texture(0), function_constant(kCopyTextureType2DMS)]]) 1812{ 1813 vec<T, 4> color; 1814 uint2 coords = uint2(gIndices); 1815 if (options.reverseTextureRowOrder) 1816 { 1817 coords.y = options.copySize.y - 1 - gIndices.y; 1818 } 1819 coords += options.textureOffset; 1820 switch (kCopyTextureType) 1821 { 1822 case kTextureType2D: 1823 color = srcTexture2d.read(coords.xy, options.textureLevel); 1824 break; 1825 case kTextureType2DArray: 1826 color = srcTexture2dArray.read(coords.xy, options.textureLayer, options.textureLevel); 1827 break; 1828 case kTextureType2DMultisample: 1829 color = resolveTextureMS(srcTexture2dMS, coords.xy); 1830 break; 1831 case kTextureType3D: 1832 color = srcTexture3d.read(uint3(coords, options.textureLayer), options.textureLevel); 1833 break; 1834 case kTextureTypeCube: 1835 color = srcTextureCube.read(coords.xy, options.textureLayer, options.textureLevel); 1836 break; 1837 } 1838 return color; 1839} 1840# 215 "./copy_buffer.metal" 1841static inline float4 readR5G6B5_UNORM(uint bufferOffset, constant uchar *buffer) 1842{ 1843 float4 color; 1844 ushort src = bytesToShort<ushort>(buffer, bufferOffset); 1845 1846 color.r = normalizedToFloat<5>(getShiftedData<5, 11>(src)); 1847 color.g = normalizedToFloat<6>(getShiftedData<6, 5>(src)); 1848 color.b = normalizedToFloat<5>(getShiftedData<5, 0>(src)); 1849 color.a = 1.0; 1850 return color; 1851} 1852static inline void writeR5G6B5_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 1853{ 1854 ushort dst = shiftData<5, 11>(floatToNormalized<5, ushort>(color.r)) | 1855 shiftData<6, 5>(floatToNormalized<6, ushort>(color.g)) | 1856 shiftData<5, 0>(floatToNormalized<5, ushort>(color.b)); 1857 1858 shortToBytes(dst, bufferOffset, buffer); 1859} 1860 1861 1862static inline float4 readR4G4B4A4_UNORM(uint bufferOffset, constant uchar *buffer) 1863{ 1864 float4 color; 1865 ushort src = bytesToShort<ushort>(buffer, bufferOffset); 1866 1867 color.r = normalizedToFloat<4>(getShiftedData<4, 12>(src)); 1868 color.g = normalizedToFloat<4>(getShiftedData<4, 8>(src)); 1869 color.b = normalizedToFloat<4>(getShiftedData<4, 4>(src)); 1870 color.a = normalizedToFloat<4>(getShiftedData<4, 0>(src)); 1871 return color; 1872} 1873static inline void writeR4G4B4A4_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 1874{ 1875 ushort dst = shiftData<4, 12>(floatToNormalized<4, ushort>(color.r)) | 1876 shiftData<4, 8>(floatToNormalized<4, ushort>(color.g)) | 1877 shiftData<4, 4>(floatToNormalized<4, ushort>(color.b)) | 1878 shiftData<4, 0>(floatToNormalized<4, ushort>(color.a)); 1879 ; 1880 1881 shortToBytes(dst, bufferOffset, buffer); 1882} 1883 1884 1885static inline float4 readR5G5B5A1_UNORM(uint bufferOffset, constant uchar *buffer) 1886{ 1887 float4 color; 1888 ushort src = bytesToShort<ushort>(buffer, bufferOffset); 1889 1890 color.r = normalizedToFloat<5>(getShiftedData<5, 11>(src)); 1891 color.g = normalizedToFloat<5>(getShiftedData<5, 6>(src)); 1892 color.b = normalizedToFloat<5>(getShiftedData<5, 1>(src)); 1893 color.a = normalizedToFloat<1>(getShiftedData<1, 0>(src)); 1894 return color; 1895} 1896static inline void writeR5G5B5A1_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 1897{ 1898 ushort dst = shiftData<5, 11>(floatToNormalized<5, ushort>(color.r)) | 1899 shiftData<5, 6>(floatToNormalized<5, ushort>(color.g)) | 1900 shiftData<5, 1>(floatToNormalized<5, ushort>(color.b)) | 1901 shiftData<1, 0>(floatToNormalized<1, ushort>(color.a)); 1902 ; 1903 1904 shortToBytes(dst, bufferOffset, buffer); 1905} 1906 1907 1908static inline int4 readR10G10B10A2_SINT(uint bufferOffset, constant uchar *buffer) 1909{ 1910 int4 color; 1911 int src = bytesToInt<int>(buffer, bufferOffset); 1912 1913 constexpr int3 rgbSignMask(0x200); 1914 constexpr int3 negativeMask(0xfffffc00); 1915 constexpr int alphaSignMask = 0x2; 1916 constexpr int alphaNegMask = 0xfffffffc; 1917 1918 color.r = getShiftedData<10, 0>(src); 1919 color.g = getShiftedData<10, 10>(src); 1920 color.b = getShiftedData<10, 20>(src); 1921 1922 int3 isRgbNegative = (color.rgb & rgbSignMask) >> 9; 1923 color.rgb = (isRgbNegative * negativeMask) | color.rgb; 1924 1925 color.a = getShiftedData<2, 30>(src); 1926 int isAlphaNegative = color.a & alphaSignMask >> 1; 1927 color.a = (isAlphaNegative * alphaNegMask) | color.a; 1928 return color; 1929} 1930 1931static inline uint4 readR10G10B10A2_UINT(uint bufferOffset, constant uchar *buffer) 1932{ 1933 uint4 color; 1934 uint src = bytesToInt<uint>(buffer, bufferOffset); 1935 1936 color.r = getShiftedData<10, 0>(src); 1937 color.g = getShiftedData<10, 10>(src); 1938 color.b = getShiftedData<10, 20>(src); 1939 color.a = getShiftedData<2, 30>(src); 1940 return color; 1941} 1942 1943 1944static inline float4 readR8G8B8A8(uint bufferOffset, constant uchar *buffer, bool isSRGB) 1945{ 1946 float4 color; 1947 uint src = bytesToInt<uint>(buffer, bufferOffset); 1948 1949 if (isSRGB) 1950 { 1951 color = unpack_unorm4x8_srgb_to_float(src); 1952 } 1953 else 1954 { 1955 color = unpack_unorm4x8_to_float(src); 1956 } 1957 return color; 1958} 1959static inline void writeR8G8B8A8(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer, bool isSRGB) 1960{ 1961 uint dst; 1962 1963 if (isSRGB) 1964 { 1965 dst = pack_float_to_srgb_unorm4x8(color); 1966 } 1967 else 1968 { 1969 dst = pack_float_to_unorm4x8(color); 1970 } 1971 1972 intToBytes(dst, bufferOffset, buffer); 1973} 1974 1975static inline float4 readR8G8B8(uint bufferOffset, constant uchar *buffer, bool isSRGB) 1976{ 1977 float4 color; 1978 color.r = normalizedToFloat<uchar>(buffer[bufferOffset]); 1979 color.g = normalizedToFloat<uchar>(buffer[bufferOffset + 1]); 1980 color.b = normalizedToFloat<uchar>(buffer[bufferOffset + 2]); 1981 color.a = 1.0; 1982 1983 if (isSRGB) 1984 { 1985 color = sRGBtoLinear(color); 1986 } 1987 return color; 1988} 1989static inline void writeR8G8B8(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer, bool isSRGB) 1990{ 1991 color.a = 1.0; 1992 uint dst; 1993 1994 if (isSRGB) 1995 { 1996 dst = pack_float_to_srgb_unorm4x8(color); 1997 } 1998 else 1999 { 2000 dst = pack_float_to_unorm4x8(color); 2001 } 2002 int24bitToBytes(dst, bufferOffset, buffer); 2003} 2004 2005 2006static inline float4 readR8G8B8A8_SNORM(uint bufferOffset, constant uchar *buffer) 2007{ 2008 float4 color; 2009 uint src = bytesToInt<uint>(buffer, bufferOffset); 2010 2011 color = unpack_snorm4x8_to_float(src); 2012 2013 return color; 2014} 2015static inline void writeR8G8B8A8_SNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2016{ 2017 uint dst = pack_float_to_snorm4x8(color); 2018 2019 intToBytes(dst, bufferOffset, buffer); 2020} 2021 2022 2023static inline float4 readR8G8B8_SNORM(uint bufferOffset, constant uchar *buffer) 2024{ 2025 float4 color; 2026 color.r = normalizedToFloat<7, char>(buffer[bufferOffset]); 2027 color.g = normalizedToFloat<7, char>(buffer[bufferOffset + 1]); 2028 color.b = normalizedToFloat<7, char>(buffer[bufferOffset + 2]); 2029 color.a = 1.0; 2030 2031 return color; 2032} 2033static inline void writeR8G8B8_SNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2034{ 2035 uint dst = pack_float_to_snorm4x8(color); 2036 2037 int24bitToBytes(dst, bufferOffset, buffer); 2038} 2039 2040 2041static inline float4 readR8G8B8A8_UNORM(uint bufferOffset, constant uchar *buffer) 2042{ 2043 return readR8G8B8A8(bufferOffset, buffer, false); 2044} 2045static inline void writeR8G8B8A8_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2046{ 2047 return writeR8G8B8A8(gIndices, options, bufferOffset, color, buffer, false); 2048} 2049 2050static inline float4 readR8G8B8A8_UNORM_SRGB(uint bufferOffset, constant uchar *buffer) 2051{ 2052 return readR8G8B8A8(bufferOffset, buffer, true); 2053} 2054static inline void writeR8G8B8A8_UNORM_SRGB(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2055{ 2056 return writeR8G8B8A8(gIndices, options, bufferOffset, color, buffer, true); 2057} 2058 2059 2060static inline float4 readB8G8R8A8_UNORM(uint bufferOffset, constant uchar *buffer) 2061{ 2062 return readR8G8B8A8(bufferOffset, buffer, false).bgra; 2063} 2064static inline void writeB8G8R8A8_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2065{ 2066 color.rgba = color.bgra; 2067 return writeR8G8B8A8(gIndices, options, bufferOffset, color, buffer, false); 2068} 2069 2070static inline float4 readB8G8R8A8_UNORM_SRGB(uint bufferOffset, constant uchar *buffer) 2071{ 2072 return readR8G8B8A8(bufferOffset, buffer, true).bgra; 2073} 2074static inline void writeB8G8R8A8_UNORM_SRGB(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2075{ 2076 color.rgba = color.bgra; 2077 return writeR8G8B8A8(gIndices, options, bufferOffset, color, buffer, true); 2078} 2079 2080 2081static inline float4 readR8G8B8_UNORM(uint bufferOffset, constant uchar *buffer) 2082{ 2083 return readR8G8B8(bufferOffset, buffer, false); 2084} 2085static inline void writeR8G8B8_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2086{ 2087 return writeR8G8B8(gIndices, options, bufferOffset, color, buffer, false); 2088} 2089 2090static inline float4 readR8G8B8_UNORM_SRGB(uint bufferOffset, constant uchar *buffer) 2091{ 2092 return readR8G8B8(bufferOffset, buffer, true); 2093} 2094static inline void writeR8G8B8_UNORM_SRGB(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2095{ 2096 return writeR8G8B8(gIndices, options, bufferOffset, color, buffer, true); 2097} 2098 2099 2100static inline float4 readL8_UNORM(uint bufferOffset, constant uchar *buffer) 2101{ 2102 float4 color; 2103 color.rgb = float3(normalizedToFloat<uchar>(buffer[bufferOffset])); 2104 color.a = 1.0; 2105 return color; 2106} 2107static inline void writeL8_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2108{ 2109 buffer[bufferOffset] = floatToNormalized<uchar>(color.r); 2110} 2111 2112 2113static inline void writeA8_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2114{ 2115 buffer[bufferOffset] = floatToNormalized<uchar>(color.a); 2116} 2117 2118 2119static inline float4 readL8A8_UNORM(uint bufferOffset, constant uchar *buffer) 2120{ 2121 float4 color; 2122 color.rgb = float3(normalizedToFloat<uchar>(buffer[bufferOffset])); 2123 color.a = normalizedToFloat<uchar>(buffer[bufferOffset + 1]); 2124 return color; 2125} 2126static inline void writeL8A8_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2127{ 2128 buffer[bufferOffset] = floatToNormalized<uchar>(color.r); 2129 buffer[bufferOffset + 1] = floatToNormalized<uchar>(color.a); 2130} 2131 2132 2133static inline float4 readR8_UNORM(uint bufferOffset, constant uchar *buffer) 2134{ 2135 float4 color; 2136 color.r = normalizedToFloat<uchar>(buffer[bufferOffset]); 2137 color.g = color.b = 0.0; 2138 color.a = 1.0; 2139 return color; 2140} 2141static inline void writeR8_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2142{ 2143 buffer[bufferOffset] = floatToNormalized<uchar>(color.r); 2144} 2145 2146static inline float4 readR8_SNORM(uint bufferOffset, constant uchar *buffer) 2147{ 2148 float4 color; 2149 color.r = normalizedToFloat<7, char>(buffer[bufferOffset]); 2150 color.g = color.b = 0.0; 2151 color.a = 1.0; 2152 return color; 2153} 2154static inline void writeR8_SNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2155{ 2156 buffer[bufferOffset] = as_type<uchar>(floatToNormalized<7, char>(color.r)); 2157} 2158 2159 2160static inline int4 readR8_SINT(uint bufferOffset, constant uchar *buffer) 2161{ 2162 int4 color; 2163 color.r = as_type<char>(buffer[bufferOffset]); 2164 color.g = color.b = 0; 2165 color.a = 1; 2166 return color; 2167} 2168static inline void writeR8_SINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<int, 4> color, device uchar *buffer) 2169{ 2170 buffer[bufferOffset] = static_cast<uchar>(color.r); 2171} 2172 2173 2174static inline uint4 readR8_UINT(uint bufferOffset, constant uchar *buffer) 2175{ 2176 uint4 color; 2177 color.r = as_type<uchar>(buffer[bufferOffset]); 2178 color.g = color.b = 0; 2179 color.a = 1; 2180 return color; 2181} 2182static inline void writeR8_UINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<uint, 4> color, device uchar *buffer) 2183{ 2184 buffer[bufferOffset] = static_cast<uchar>(color.r); 2185} 2186 2187 2188static inline float4 readR8G8_UNORM(uint bufferOffset, constant uchar *buffer) 2189{ 2190 float4 color; 2191 color.r = normalizedToFloat<uchar>(buffer[bufferOffset]); 2192 color.g = normalizedToFloat<uchar>(buffer[bufferOffset + 1]); 2193 color.b = 0.0; 2194 color.a = 1.0; 2195 return color; 2196} 2197static inline void writeR8G8_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2198{ 2199 buffer[bufferOffset] = floatToNormalized<uchar>(color.r); 2200 buffer[bufferOffset + 1] = floatToNormalized<uchar>(color.g); 2201} 2202 2203static inline float4 readR8G8_SNORM(uint bufferOffset, constant uchar *buffer) 2204{ 2205 float4 color; 2206 color.r = normalizedToFloat<7, char>(buffer[bufferOffset]); 2207 color.g = normalizedToFloat<7, char>(buffer[bufferOffset + 1]); 2208 color.b = 0.0; 2209 color.a = 1.0; 2210 return color; 2211} 2212static inline void writeR8G8_SNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2213{ 2214 buffer[bufferOffset] = as_type<uchar>(floatToNormalized<7, char>(color.r)); 2215 buffer[bufferOffset + 1] = as_type<uchar>(floatToNormalized<7, char>(color.g)); 2216} 2217 2218 2219static inline int4 readR8G8_SINT(uint bufferOffset, constant uchar *buffer) 2220{ 2221 int4 color; 2222 color.r = as_type<char>(buffer[bufferOffset]); 2223 color.g = as_type<char>(buffer[bufferOffset + 1]); 2224 color.b = 0; 2225 color.a = 1; 2226 return color; 2227} 2228static inline void writeR8G8_SINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<int, 4> color, device uchar *buffer) 2229{ 2230 buffer[bufferOffset] = static_cast<uchar>(color.r); 2231 buffer[bufferOffset + 1] = static_cast<uchar>(color.g); 2232} 2233 2234 2235static inline uint4 readR8G8_UINT(uint bufferOffset, constant uchar *buffer) 2236{ 2237 uint4 color; 2238 color.r = as_type<uchar>(buffer[bufferOffset]); 2239 color.g = as_type<uchar>(buffer[bufferOffset + 1]); 2240 color.b = 0; 2241 color.a = 1; 2242 return color; 2243} 2244static inline void writeR8G8_UINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<uint, 4> color, device uchar *buffer) 2245{ 2246 buffer[bufferOffset] = static_cast<uchar>(color.r); 2247 buffer[bufferOffset + 1] = static_cast<uchar>(color.g); 2248} 2249 2250 2251static inline int4 readR8G8B8_SINT(uint bufferOffset, constant uchar *buffer) 2252{ 2253 int4 color; 2254 color.r = as_type<char>(buffer[bufferOffset]); 2255 color.g = as_type<char>(buffer[bufferOffset + 1]); 2256 color.b = as_type<char>(buffer[bufferOffset + 2]); 2257 color.a = 1; 2258 return color; 2259} 2260 2261 2262static inline uint4 readR8G8B8_UINT(uint bufferOffset, constant uchar *buffer) 2263{ 2264 uint4 color; 2265 color.r = as_type<uchar>(buffer[bufferOffset]); 2266 color.g = as_type<uchar>(buffer[bufferOffset + 1]); 2267 color.b = as_type<uchar>(buffer[bufferOffset + 2]); 2268 color.a = 1; 2269 return color; 2270} 2271 2272 2273static inline int4 readR8G8B8A8_SINT(uint bufferOffset, constant uchar *buffer) 2274{ 2275 int4 color; 2276 color.r = as_type<char>(buffer[bufferOffset]); 2277 color.g = as_type<char>(buffer[bufferOffset + 1]); 2278 color.b = as_type<char>(buffer[bufferOffset + 2]); 2279 color.a = as_type<char>(buffer[bufferOffset + 3]); 2280 return color; 2281} 2282static inline void writeR8G8B8A8_SINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<int, 4> color, device uchar *buffer) 2283{ 2284 buffer[bufferOffset] = static_cast<uchar>(color.r); 2285 buffer[bufferOffset + 1] = static_cast<uchar>(color.g); 2286 buffer[bufferOffset + 2] = static_cast<uchar>(color.b); 2287 buffer[bufferOffset + 3] = static_cast<uchar>(color.a); 2288} 2289 2290 2291static inline uint4 readR8G8B8A8_UINT(uint bufferOffset, constant uchar *buffer) 2292{ 2293 uint4 color; 2294 color.r = as_type<uchar>(buffer[bufferOffset]); 2295 color.g = as_type<uchar>(buffer[bufferOffset + 1]); 2296 color.b = as_type<uchar>(buffer[bufferOffset + 2]); 2297 color.a = as_type<uchar>(buffer[bufferOffset + 3]); 2298 return color; 2299} 2300static inline void writeR8G8B8A8_UINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<uint, 4> color, device uchar *buffer) 2301{ 2302 buffer[bufferOffset] = static_cast<uchar>(color.r); 2303 buffer[bufferOffset + 1] = static_cast<uchar>(color.g); 2304 buffer[bufferOffset + 2] = static_cast<uchar>(color.b); 2305 buffer[bufferOffset + 3] = static_cast<uchar>(color.a); 2306} 2307 2308 2309static inline float4 readR16_FLOAT(uint bufferOffset, constant uchar *buffer) 2310{ 2311 float4 color; 2312 color.r = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset)); 2313 color.g = color.b = 0.0; 2314 color.a = 1.0; 2315 return color; 2316} 2317static inline void writeR16_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2318{ 2319 shortToBytes(as_type<ushort>(static_cast<half>(color.r)), bufferOffset, buffer); 2320} 2321 2322template <typename ShortType> 2323static inline float4 readR16_NORM(uint bufferOffset, constant uchar *buffer) 2324{ 2325 float4 color; 2326 color.r = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset)); 2327 color.g = color.b = 0.0; 2328 color.a = 1.0; 2329 return color; 2330} 2331 2332 2333 2334 2335static inline int4 readR16_SINT(uint bufferOffset, constant uchar *buffer) 2336{ 2337 int4 color; 2338 color.r = bytesToShort<short>(buffer, bufferOffset); 2339 color.g = color.b = 0; 2340 color.a = 1; 2341 return color; 2342} 2343static inline void writeR16_SINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<int, 4> color, device uchar *buffer) 2344{ 2345 shortToBytes(static_cast<short>(color.r), bufferOffset, buffer); 2346} 2347 2348 2349static inline uint4 readR16_UINT(uint bufferOffset, constant uchar *buffer) 2350{ 2351 uint4 color; 2352 color.r = bytesToShort<ushort>(buffer, bufferOffset); 2353 color.g = color.b = 0; 2354 color.a = 1; 2355 return color; 2356} 2357static inline void writeR16_UINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<uint, 4> color, device uchar *buffer) 2358{ 2359 shortToBytes(static_cast<ushort>(color.r), bufferOffset, buffer); 2360} 2361 2362 2363static inline float4 readA16_FLOAT(uint bufferOffset, constant uchar *buffer) 2364{ 2365 float4 color; 2366 color.a = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset)); 2367 color.rgb = 0.0; 2368 return color; 2369} 2370static inline void writeA16_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2371{ 2372 shortToBytes(as_type<ushort>(static_cast<half>(color.a)), bufferOffset, buffer); 2373} 2374 2375 2376static inline float4 readL16_FLOAT(uint bufferOffset, constant uchar *buffer) 2377{ 2378 float4 color; 2379 color.rgb = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset)); 2380 color.a = 1.0; 2381 return color; 2382} 2383static inline void writeL16_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2384{ 2385 shortToBytes(as_type<ushort>(static_cast<half>(color.r)), bufferOffset, buffer); 2386} 2387 2388 2389static inline float4 readL16A16_FLOAT(uint bufferOffset, constant uchar *buffer) 2390{ 2391 float4 color; 2392 color.rgb = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset)); 2393 color.a = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset + 2)); 2394 return color; 2395} 2396static inline void writeL16A16_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2397{ 2398 shortToBytes(as_type<ushort>(static_cast<half>(color.r)), bufferOffset, buffer); 2399 shortToBytes(as_type<ushort>(static_cast<half>(color.a)), bufferOffset + 2, buffer); 2400} 2401 2402 2403static inline float4 readR16G16_FLOAT(uint bufferOffset, constant uchar *buffer) 2404{ 2405 float4 color; 2406 color.r = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset)); 2407 color.g = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset + 2)); 2408 color.b = 0.0; 2409 color.a = 1.0; 2410 return color; 2411} 2412static inline void writeR16G16_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2413{ 2414 shortToBytes(as_type<ushort>(static_cast<half>(color.r)), bufferOffset, buffer); 2415 shortToBytes(as_type<ushort>(static_cast<half>(color.g)), bufferOffset + 2, buffer); 2416} 2417 2418 2419template <typename ShortType> 2420static inline float4 readR16G16_NORM(uint bufferOffset, constant uchar *buffer) 2421{ 2422 float4 color; 2423 color.r = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset)); 2424 color.g = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset + 2)); 2425 color.b = 0.0; 2426 color.a = 1.0; 2427 return color; 2428} 2429 2430 2431 2432 2433static inline int4 readR16G16_SINT(uint bufferOffset, constant uchar *buffer) 2434{ 2435 int4 color; 2436 color.r = bytesToShort<short>(buffer, bufferOffset); 2437 color.g = bytesToShort<short>(buffer, bufferOffset + 2); 2438 color.b = 0; 2439 color.a = 1; 2440 return color; 2441} 2442static inline void writeR16G16_SINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<int, 4> color, device uchar *buffer) 2443{ 2444 shortToBytes(static_cast<short>(color.r), bufferOffset, buffer); 2445 shortToBytes(static_cast<short>(color.g), bufferOffset + 2, buffer); 2446} 2447 2448 2449static inline uint4 readR16G16_UINT(uint bufferOffset, constant uchar *buffer) 2450{ 2451 uint4 color; 2452 color.r = bytesToShort<ushort>(buffer, bufferOffset); 2453 color.g = bytesToShort<ushort>(buffer, bufferOffset + 2); 2454 color.b = 0; 2455 color.a = 1; 2456 return color; 2457} 2458static inline void writeR16G16_UINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<uint, 4> color, device uchar *buffer) 2459{ 2460 shortToBytes(static_cast<ushort>(color.r), bufferOffset, buffer); 2461 shortToBytes(static_cast<ushort>(color.g), bufferOffset + 2, buffer); 2462} 2463 2464 2465static inline float4 readR16G16B16_FLOAT(uint bufferOffset, constant uchar *buffer) 2466{ 2467 float4 color; 2468 color.r = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset)); 2469 color.g = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset + 2)); 2470 color.b = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset + 4)); 2471 color.a = 1.0; 2472 return color; 2473} 2474 2475 2476template <typename ShortType> 2477static inline float4 readR16G16B16_NORM(uint bufferOffset, constant uchar *buffer) 2478{ 2479 float4 color; 2480 color.r = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset)); 2481 color.g = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset + 2)); 2482 color.b = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset + 4)); 2483 color.a = 1.0; 2484 return color; 2485} 2486 2487 2488 2489static inline int4 readR16G16B16_SINT(uint bufferOffset, constant uchar *buffer) 2490{ 2491 int4 color; 2492 color.r = bytesToShort<short>(buffer, bufferOffset); 2493 color.g = bytesToShort<short>(buffer, bufferOffset + 2); 2494 color.b = bytesToShort<short>(buffer, bufferOffset + 4); 2495 color.a = 1; 2496 return color; 2497} 2498 2499 2500static inline uint4 readR16G16B16_UINT(uint bufferOffset, constant uchar *buffer) 2501{ 2502 uint4 color; 2503 color.r = bytesToShort<ushort>(buffer, bufferOffset); 2504 color.g = bytesToShort<ushort>(buffer, bufferOffset + 2); 2505 color.b = bytesToShort<ushort>(buffer, bufferOffset + 4); 2506 color.a = 1; 2507 return color; 2508} 2509 2510 2511static inline float4 readR16G16B16A16_FLOAT(uint bufferOffset, constant uchar *buffer) 2512{ 2513 float4 color; 2514 color.r = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset)); 2515 color.g = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset + 2)); 2516 color.b = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset + 4)); 2517 color.a = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset + 6)); 2518 return color; 2519} 2520static inline void writeR16G16B16A16_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2521{ 2522 shortToBytes(as_type<ushort>(static_cast<half>(color.r)), bufferOffset, buffer); 2523 shortToBytes(as_type<ushort>(static_cast<half>(color.g)), bufferOffset + 2, buffer); 2524 shortToBytes(as_type<ushort>(static_cast<half>(color.b)), bufferOffset + 4, buffer); 2525 shortToBytes(as_type<ushort>(static_cast<half>(color.a)), bufferOffset + 6, buffer); 2526} 2527 2528 2529template <typename ShortType> 2530static inline float4 readR16G16B16A16_NORM(uint bufferOffset, constant uchar *buffer) 2531{ 2532 float4 color; 2533 color.r = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset)); 2534 color.g = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset + 2)); 2535 color.b = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset + 4)); 2536 color.a = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset + 6)); 2537 return color; 2538} 2539 2540 2541 2542 2543static inline int4 readR16G16B16A16_SINT(uint bufferOffset, constant uchar *buffer) 2544{ 2545 int4 color; 2546 color.r = bytesToShort<short>(buffer, bufferOffset); 2547 color.g = bytesToShort<short>(buffer, bufferOffset + 2); 2548 color.b = bytesToShort<short>(buffer, bufferOffset + 4); 2549 color.a = bytesToShort<short>(buffer, bufferOffset + 6); 2550 return color; 2551} 2552static inline void writeR16G16B16A16_SINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<int, 4> color, device uchar *buffer) 2553{ 2554 shortToBytes(static_cast<short>(color.r), bufferOffset, buffer); 2555 shortToBytes(static_cast<short>(color.g), bufferOffset + 2, buffer); 2556 shortToBytes(static_cast<short>(color.b), bufferOffset + 4, buffer); 2557 shortToBytes(static_cast<short>(color.a), bufferOffset + 6, buffer); 2558} 2559 2560 2561static inline uint4 readR16G16B16A16_UINT(uint bufferOffset, constant uchar *buffer) 2562{ 2563 uint4 color; 2564 color.r = bytesToShort<ushort>(buffer, bufferOffset); 2565 color.g = bytesToShort<ushort>(buffer, bufferOffset + 2); 2566 color.b = bytesToShort<ushort>(buffer, bufferOffset + 4); 2567 color.a = bytesToShort<ushort>(buffer, bufferOffset + 6); 2568 return color; 2569} 2570static inline void writeR16G16B16A16_UINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<uint, 4> color, device uchar *buffer) 2571{ 2572 shortToBytes(static_cast<ushort>(color.r), bufferOffset, buffer); 2573 shortToBytes(static_cast<ushort>(color.g), bufferOffset + 2, buffer); 2574 shortToBytes(static_cast<ushort>(color.b), bufferOffset + 4, buffer); 2575 shortToBytes(static_cast<ushort>(color.a), bufferOffset + 6, buffer); 2576} 2577 2578 2579static inline float4 readR32_FLOAT(uint bufferOffset, constant uchar *buffer) 2580{ 2581 float4 color; 2582 color.r = as_type<float>(bytesToInt<uint>(buffer, bufferOffset)); 2583 color.g = color.b = 0.0; 2584 color.a = 1.0; 2585 return color; 2586} 2587static inline void writeR32_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2588{ 2589 intToBytes(as_type<uint>(color.r), bufferOffset, buffer); 2590} 2591 2592 2593template <typename IntType> 2594static inline float4 readR32_NORM(uint bufferOffset, constant uchar *buffer) 2595{ 2596 float4 color; 2597 color.r = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset)); 2598 color.g = color.b = 0.0; 2599 color.a = 1.0; 2600 return color; 2601} 2602 2603 2604 2605 2606static inline float4 readA32_FLOAT(uint bufferOffset, constant uchar *buffer) 2607{ 2608 float4 color; 2609 color.a = as_type<float>(bytesToInt<uint>(buffer, bufferOffset)); 2610 color.rgb = 0.0; 2611 return color; 2612} 2613static inline void writeA32_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2614{ 2615 intToBytes(as_type<uint>(color.a), bufferOffset, buffer); 2616} 2617 2618 2619static inline float4 readL32_FLOAT(uint bufferOffset, constant uchar *buffer) 2620{ 2621 float4 color; 2622 color.rgb = as_type<float>(bytesToInt<uint>(buffer, bufferOffset)); 2623 color.a = 1.0; 2624 return color; 2625} 2626static inline void writeL32_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2627{ 2628 intToBytes(as_type<uint>(color.r), bufferOffset, buffer); 2629} 2630 2631 2632static inline int4 readR32_SINT(uint bufferOffset, constant uchar *buffer) 2633{ 2634 int4 color; 2635 color.r = bytesToInt<int>(buffer, bufferOffset); 2636 color.g = color.b = 0; 2637 color.a = 1; 2638 return color; 2639} 2640static inline void writeR32_SINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<int, 4> color, device uchar *buffer) 2641{ 2642 intToBytes(color.r, bufferOffset, buffer); 2643} 2644 2645 2646static inline float4 readR32_FIXED(uint bufferOffset, constant uchar *buffer) 2647{ 2648 float4 color; 2649 constexpr float kDivisor = 1.0f / (1 << 16); 2650 color.r = bytesToInt<int>(buffer, bufferOffset) * kDivisor; 2651 color.g = color.b = 0.0; 2652 color.a = 1.0; 2653 return color; 2654} 2655 2656 2657static inline uint4 readR32_UINT(uint bufferOffset, constant uchar *buffer) 2658{ 2659 uint4 color; 2660 color.r = bytesToInt<uint>(buffer, bufferOffset); 2661 color.g = color.b = 0; 2662 color.a = 1; 2663 return color; 2664} 2665static inline void writeR32_UINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<uint, 4> color, device uchar *buffer) 2666{ 2667 intToBytes(color.r, bufferOffset, buffer); 2668} 2669 2670 2671static inline float4 readL32A32_FLOAT(uint bufferOffset, constant uchar *buffer) 2672{ 2673 float4 color; 2674 color.rgb = as_type<float>(bytesToInt<uint>(buffer, bufferOffset)); 2675 color.a = as_type<float>(bytesToInt<uint>(buffer, bufferOffset + 4)); 2676 return color; 2677} 2678static inline void writeL32A32_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2679{ 2680 intToBytes(as_type<uint>(color.r), bufferOffset, buffer); 2681 intToBytes(as_type<uint>(color.a), bufferOffset + 4, buffer); 2682} 2683 2684 2685static inline float4 readR32G32_FLOAT(uint bufferOffset, constant uchar *buffer) 2686{ 2687 float4 color; 2688 color.r = as_type<float>(bytesToInt<uint>(buffer, bufferOffset)); 2689 color.g = as_type<float>(bytesToInt<uint>(buffer, bufferOffset + 4)); 2690 color.b = 0.0; 2691 color.a = 1.0; 2692 return color; 2693} 2694static inline void writeR32G32_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2695{ 2696 intToBytes(as_type<uint>(color.r), bufferOffset, buffer); 2697 intToBytes(as_type<uint>(color.g), bufferOffset + 4, buffer); 2698} 2699 2700 2701template <typename IntType> 2702static inline float4 readR32G32_NORM(uint bufferOffset, constant uchar *buffer) 2703{ 2704 float4 color; 2705 color.r = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset)); 2706 color.g = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset + 4)); 2707 color.b = 0.0; 2708 color.a = 1.0; 2709 return color; 2710} 2711 2712 2713 2714 2715static inline int4 readR32G32_SINT(uint bufferOffset, constant uchar *buffer) 2716{ 2717 int4 color; 2718 color.r = bytesToInt<int>(buffer, bufferOffset); 2719 color.g = bytesToInt<int>(buffer, bufferOffset + 4); 2720 color.b = 0; 2721 color.a = 1; 2722 return color; 2723} 2724static inline void writeR32G32_SINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<int, 4> color, device uchar *buffer) 2725{ 2726 intToBytes(color.r, bufferOffset, buffer); 2727 intToBytes(color.g, bufferOffset + 4, buffer); 2728} 2729 2730 2731static inline float4 readR32G32_FIXED(uint bufferOffset, constant uchar *buffer) 2732{ 2733 float4 color; 2734 constexpr float kDivisor = 1.0f / (1 << 16); 2735 color.r = bytesToInt<int>(buffer, bufferOffset) * kDivisor; 2736 color.g = bytesToInt<int>(buffer, bufferOffset + 4) * kDivisor; 2737 color.b = 0.0; 2738 color.a = 1.0; 2739 return color; 2740} 2741 2742 2743static inline uint4 readR32G32_UINT(uint bufferOffset, constant uchar *buffer) 2744{ 2745 uint4 color; 2746 color.r = bytesToInt<uint>(buffer, bufferOffset); 2747 color.g = bytesToInt<uint>(buffer, bufferOffset + 4); 2748 color.b = 0; 2749 color.a = 1; 2750 return color; 2751} 2752static inline void writeR32G32_UINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<uint, 4> color, device uchar *buffer) 2753{ 2754 intToBytes(color.r, bufferOffset, buffer); 2755 intToBytes(color.g, bufferOffset + 4, buffer); 2756} 2757 2758 2759static inline float4 readR32G32B32_FLOAT(uint bufferOffset, constant uchar *buffer) 2760{ 2761 float4 color; 2762 color.r = as_type<float>(bytesToInt<uint>(buffer, bufferOffset)); 2763 color.g = as_type<float>(bytesToInt<uint>(buffer, bufferOffset + 4)); 2764 color.b = as_type<float>(bytesToInt<uint>(buffer, bufferOffset + 8)); 2765 color.a = 1.0; 2766 return color; 2767} 2768 2769 2770template <typename IntType> 2771static inline float4 readR32G32B32_NORM(uint bufferOffset, constant uchar *buffer) 2772{ 2773 float4 color; 2774 color.r = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset)); 2775 color.g = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset + 4)); 2776 color.b = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset + 8)); 2777 color.a = 1.0; 2778 return color; 2779} 2780 2781 2782 2783 2784static inline int4 readR32G32B32_SINT(uint bufferOffset, constant uchar *buffer) 2785{ 2786 int4 color; 2787 color.r = bytesToInt<int>(buffer, bufferOffset); 2788 color.g = bytesToInt<int>(buffer, bufferOffset + 4); 2789 color.b = bytesToInt<int>(buffer, bufferOffset + 8); 2790 color.a = 1; 2791 return color; 2792} 2793 2794 2795static inline float4 readR32G32B32_FIXED(uint bufferOffset, constant uchar *buffer) 2796{ 2797 float4 color; 2798 constexpr float kDivisor = 1.0f / (1 << 16); 2799 color.r = bytesToInt<int>(buffer, bufferOffset) * kDivisor; 2800 color.g = bytesToInt<int>(buffer, bufferOffset + 4) * kDivisor; 2801 color.b = bytesToInt<int>(buffer, bufferOffset + 8) * kDivisor; 2802 color.a = 1.0; 2803 return color; 2804} 2805 2806 2807static inline uint4 readR32G32B32_UINT(uint bufferOffset, constant uchar *buffer) 2808{ 2809 uint4 color; 2810 color.r = bytesToInt<uint>(buffer, bufferOffset); 2811 color.g = bytesToInt<uint>(buffer, bufferOffset + 4); 2812 color.b = bytesToInt<uint>(buffer, bufferOffset + 8); 2813 color.a = 1; 2814 return color; 2815} 2816 2817 2818static inline float4 readR32G32B32A32_FLOAT(uint bufferOffset, constant uchar *buffer) 2819{ 2820 float4 color; 2821 color.r = as_type<float>(bytesToInt<uint>(buffer, bufferOffset)); 2822 color.g = as_type<float>(bytesToInt<uint>(buffer, bufferOffset + 4)); 2823 color.b = as_type<float>(bytesToInt<uint>(buffer, bufferOffset + 8)); 2824 color.a = as_type<float>(bytesToInt<uint>(buffer, bufferOffset + 12)); 2825 return color; 2826} 2827static inline void writeR32G32B32A32_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2828{ 2829 intToBytes(as_type<uint>(color.r), bufferOffset, buffer); 2830 intToBytes(as_type<uint>(color.g), bufferOffset + 4, buffer); 2831 intToBytes(as_type<uint>(color.b), bufferOffset + 8, buffer); 2832 intToBytes(as_type<uint>(color.a), bufferOffset + 12, buffer); 2833} 2834 2835 2836template <typename IntType> 2837static inline float4 readR32G32B32A32_NORM(uint bufferOffset, constant uchar *buffer) 2838{ 2839 float4 color; 2840 color.r = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset)); 2841 color.g = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset + 4)); 2842 color.b = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset + 8)); 2843 color.a = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset + 12)); 2844 return color; 2845} 2846 2847 2848 2849 2850static inline int4 readR32G32B32A32_SINT(uint bufferOffset, constant uchar *buffer) 2851{ 2852 int4 color; 2853 color.r = bytesToInt<int>(buffer, bufferOffset); 2854 color.g = bytesToInt<int>(buffer, bufferOffset + 4); 2855 color.b = bytesToInt<int>(buffer, bufferOffset + 8); 2856 color.a = bytesToInt<int>(buffer, bufferOffset + 12); 2857 return color; 2858} 2859static inline void writeR32G32B32A32_SINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<int, 4> color, device uchar *buffer) 2860{ 2861 intToBytes(color.r, bufferOffset, buffer); 2862 intToBytes(color.g, bufferOffset + 4, buffer); 2863 intToBytes(color.b, bufferOffset + 8, buffer); 2864 intToBytes(color.a, bufferOffset + 12, buffer); 2865} 2866 2867static inline float4 readR32G32B32A32_FIXED(uint bufferOffset, constant uchar *buffer) 2868{ 2869 float4 color; 2870 constexpr float kDivisor = 1.0f / (1 << 16); 2871 color.r = bytesToInt<int>(buffer, bufferOffset) * kDivisor; 2872 color.g = bytesToInt<int>(buffer, bufferOffset + 4) * kDivisor; 2873 color.b = bytesToInt<int>(buffer, bufferOffset + 8) * kDivisor; 2874 color.a = bytesToInt<int>(buffer, bufferOffset + 12) * kDivisor; 2875 return color; 2876} 2877 2878 2879static inline uint4 readR32G32B32A32_UINT(uint bufferOffset, constant uchar *buffer) 2880{ 2881 uint4 color; 2882 color.r = bytesToInt<uint>(buffer, bufferOffset); 2883 color.g = bytesToInt<uint>(buffer, bufferOffset + 4); 2884 color.b = bytesToInt<uint>(buffer, bufferOffset + 8); 2885 color.a = bytesToInt<uint>(buffer, bufferOffset + 12); 2886 return color; 2887} 2888static inline void writeR32G32B32A32_UINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<uint, 4> color, device uchar *buffer) 2889{ 2890 intToBytes(color.r, bufferOffset, buffer); 2891 intToBytes(color.g, bufferOffset + 4, buffer); 2892 intToBytes(color.b, bufferOffset + 8, buffer); 2893 intToBytes(color.a, bufferOffset + 12, buffer); 2894} 2895# 1292 "./copy_buffer.metal" 2896static inline int4 readR8_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR8_SINT(bufferOffset, buffer); } static inline uint4 readR8_USCALED(uint bufferOffset, constant uchar *buffer) { return readR8_UINT(bufferOffset, buffer); } static inline int4 readR8G8_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR8G8_SINT(bufferOffset, buffer); } static inline uint4 readR8G8_USCALED(uint bufferOffset, constant uchar *buffer) { return readR8G8_UINT(bufferOffset, buffer); } static inline int4 readR8G8B8_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR8G8B8_SINT(bufferOffset, buffer); } static inline uint4 readR8G8B8_USCALED(uint bufferOffset, constant uchar *buffer) { return readR8G8B8_UINT(bufferOffset, buffer); } static inline int4 readR8G8B8A8_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR8G8B8A8_SINT(bufferOffset, buffer); } static inline uint4 readR8G8B8A8_USCALED(uint bufferOffset, constant uchar *buffer) { return readR8G8B8A8_UINT(bufferOffset, buffer); } 2897static inline int4 readR16_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR16_SINT(bufferOffset, buffer); } static inline uint4 readR16_USCALED(uint bufferOffset, constant uchar *buffer) { return readR16_UINT(bufferOffset, buffer); } static inline int4 readR16G16_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR16G16_SINT(bufferOffset, buffer); } static inline uint4 readR16G16_USCALED(uint bufferOffset, constant uchar *buffer) { return readR16G16_UINT(bufferOffset, buffer); } static inline int4 readR16G16B16_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR16G16B16_SINT(bufferOffset, buffer); } static inline uint4 readR16G16B16_USCALED(uint bufferOffset, constant uchar *buffer) { return readR16G16B16_UINT(bufferOffset, buffer); } static inline int4 readR16G16B16A16_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR16G16B16A16_SINT(bufferOffset, buffer); } static inline uint4 readR16G16B16A16_USCALED(uint bufferOffset, constant uchar *buffer) { return readR16G16B16A16_UINT(bufferOffset, buffer); } 2898static inline int4 readR32_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR32_SINT(bufferOffset, buffer); } static inline uint4 readR32_USCALED(uint bufferOffset, constant uchar *buffer) { return readR32_UINT(bufferOffset, buffer); } static inline int4 readR32G32_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR32G32_SINT(bufferOffset, buffer); } static inline uint4 readR32G32_USCALED(uint bufferOffset, constant uchar *buffer) { return readR32G32_UINT(bufferOffset, buffer); } static inline int4 readR32G32B32_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR32G32B32_SINT(bufferOffset, buffer); } static inline uint4 readR32G32B32_USCALED(uint bufferOffset, constant uchar *buffer) { return readR32G32B32_UINT(bufferOffset, buffer); } static inline int4 readR32G32B32A32_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR32G32B32A32_SINT(bufferOffset, buffer); } static inline uint4 readR32G32B32A32_USCALED(uint bufferOffset, constant uchar *buffer) { return readR32G32B32A32_UINT(bufferOffset, buffer); } 2899 2900static inline int4 readR10G10B10A2_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR10G10B10A2_SINT(bufferOffset, buffer); } static inline uint4 readR10G10B10A2_USCALED(uint bufferOffset, constant uchar *buffer) { return readR10G10B10A2_UINT(bufferOffset, buffer); } 2901 2902 2903kernel void readFromBufferToFloatTexture(ushort3 gIndices [[thread_position_in_grid]], constant CopyPixelParams &options[[buffer(0)]], constant uchar *buffer [[buffer(1)]], texture2d<float, access::write> dstTexture2d [[texture(0), function_constant(kCopyTextureType2D)]], texture2d_array<float, access::write> dstTexture2dArray [[texture(0), function_constant(kCopyTextureType2DArray)]], texture3d<float, access::write> dstTexture3d [[texture(0), function_constant(kCopyTextureType3D)]], texturecube<float, access::write> dstTextureCube [[texture(0), function_constant(kCopyTextureTypeCube)]]) 2904{ 2905 if (gIndices.x >= options.copySize.x || gIndices.y >= options.copySize.y || gIndices.z >= options.copySize.z) { return; } 2906# 1336 "./copy_buffer.metal" 2907 uint bufferOffset = options.bufferStartOffset + (gIndices.z * options.bufferDepthPitch + gIndices.y * options.bufferRowPitch + gIndices.x * options.pixelSize); 2908 2909 switch (kCopyFormatType) 2910 { 2911 case FormatID::R5G6B5_UNORM: { auto color = readR5G6B5_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8A8_UNORM: { auto color = readR8G8B8A8_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8A8_UNORM_SRGB: { auto color = readR8G8B8A8_UNORM_SRGB(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8A8_SNORM: { auto color = readR8G8B8A8_SNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::B8G8R8A8_UNORM: { auto color = readB8G8R8A8_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::B8G8R8A8_UNORM_SRGB: { auto color = readB8G8R8A8_UNORM_SRGB(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8_UNORM: { auto color = readR8G8B8_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8_UNORM_SRGB: { auto color = readR8G8B8_UNORM_SRGB(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8_SNORM: { auto color = readR8G8B8_SNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::L8_UNORM: { auto color = readL8_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::L8A8_UNORM: { auto color = readL8A8_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R5G5B5A1_UNORM: { auto color = readR5G5B5A1_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R4G4B4A4_UNORM: { auto color = readR4G4B4A4_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8_UNORM: { auto color = readR8_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8_SNORM: { auto color = readR8_SNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8_UNORM: { auto color = readR8G8_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8_SNORM: { auto color = readR8G8_SNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16_FLOAT: { auto color = readR16_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::A16_FLOAT: { auto color = readA16_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::L16_FLOAT: { auto color = readL16_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::L16A16_FLOAT: { auto color = readL16A16_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16_FLOAT: { auto color = readR16G16_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16B16_FLOAT: { auto color = readR16G16B16_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16B16A16_FLOAT: { auto color = readR16G16B16A16_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32_FLOAT: { auto color = readR32_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::A32_FLOAT: { auto color = readA32_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::L32_FLOAT: { auto color = readL32_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::L32A32_FLOAT: { auto color = readL32A32_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32G32_FLOAT: { auto color = readR32G32_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32G32B32_FLOAT: { auto color = readR32G32B32_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32G32B32A32_FLOAT: { auto color = readR32G32B32A32_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; 2912 } 2913 2914 2915} 2916 2917kernel void readFromBufferToIntTexture(ushort3 gIndices [[thread_position_in_grid]], constant CopyPixelParams &options[[buffer(0)]], constant uchar *buffer [[buffer(1)]], texture2d<int, access::write> dstTexture2d [[texture(0), function_constant(kCopyTextureType2D)]], texture2d_array<int, access::write> dstTexture2dArray [[texture(0), function_constant(kCopyTextureType2DArray)]], texture3d<int, access::write> dstTexture3d [[texture(0), function_constant(kCopyTextureType3D)]], texturecube<int, access::write> dstTextureCube [[texture(0), function_constant(kCopyTextureTypeCube)]]) 2918{ 2919 if (gIndices.x >= options.copySize.x || gIndices.y >= options.copySize.y || gIndices.z >= options.copySize.z) { return; } 2920# 1364 "./copy_buffer.metal" 2921 uint bufferOffset = options.bufferStartOffset + (gIndices.z * options.bufferDepthPitch + gIndices.y * options.bufferRowPitch + gIndices.x * options.pixelSize); 2922 2923 switch (kCopyFormatType) 2924 { 2925 case FormatID::R8_SINT: { auto color = readR8_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8_SINT: { auto color = readR8G8_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8_SINT: { auto color = readR8G8B8_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8A8_SINT: { auto color = readR8G8B8A8_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16_SINT: { auto color = readR16_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16_SINT: { auto color = readR16G16_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16B16_SINT: { auto color = readR16G16B16_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16B16A16_SINT: { auto color = readR16G16B16A16_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32_SINT: { auto color = readR32_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32G32_SINT: { auto color = readR32G32_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32G32B32_SINT: { auto color = readR32G32B32_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32G32B32A32_SINT: { auto color = readR32G32B32A32_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; 2926 } 2927 2928 2929} 2930 2931kernel void readFromBufferToUIntTexture(ushort3 gIndices [[thread_position_in_grid]], constant CopyPixelParams &options[[buffer(0)]], constant uchar *buffer [[buffer(1)]], texture2d<uint, access::write> dstTexture2d [[texture(0), function_constant(kCopyTextureType2D)]], texture2d_array<uint, access::write> dstTexture2dArray [[texture(0), function_constant(kCopyTextureType2DArray)]], texture3d<uint, access::write> dstTexture3d [[texture(0), function_constant(kCopyTextureType3D)]], texturecube<uint, access::write> dstTextureCube [[texture(0), function_constant(kCopyTextureTypeCube)]]) 2932{ 2933 if (gIndices.x >= options.copySize.x || gIndices.y >= options.copySize.y || gIndices.z >= options.copySize.z) { return; } 2934# 1392 "./copy_buffer.metal" 2935 uint bufferOffset = options.bufferStartOffset + (gIndices.z * options.bufferDepthPitch + gIndices.y * options.bufferRowPitch + gIndices.x * options.pixelSize); 2936 2937 switch (kCopyFormatType) 2938 { 2939 case FormatID::R8_UINT: { auto color = readR8_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8_UINT: { auto color = readR8G8_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8_UINT: { auto color = readR8G8B8_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8A8_UINT: { auto color = readR8G8B8A8_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16_UINT: { auto color = readR16_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16_UINT: { auto color = readR16G16_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16B16_UINT: { auto color = readR16G16B16_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16B16A16_UINT: { auto color = readR16G16B16A16_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32_UINT: { auto color = readR32_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32G32_UINT: { auto color = readR32G32_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32G32B32_UINT: { auto color = readR32G32B32_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32G32B32A32_UINT: { auto color = readR32G32B32A32_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; 2940 } 2941 2942 2943} 2944 2945 2946kernel void writeFromFloatTextureToBuffer(ushort2 gIndices [[thread_position_in_grid]], constant WritePixelParams &options[[buffer(0)]], texture2d<float, access::read> srcTexture2d [[texture(0), function_constant(kCopyTextureType2D)]], texture2d_array<float, access::read> srcTexture2dArray [[texture(0), function_constant(kCopyTextureType2DArray)]], texture3d<float, access::read> srcTexture3d [[texture(0), function_constant(kCopyTextureType3D)]], texturecube<float, access::read> srcTextureCube [[texture(0), function_constant(kCopyTextureTypeCube)]], texture2d_ms<float, access::read> srcTexture2dMS [[texture(0), function_constant(kCopyTextureType2DMS)]], device uchar *buffer [[buffer(1)]]) 2947{ 2948 if (gIndices.x >= options.copySize.x || gIndices.y >= options.copySize.y) { return; } 2949# 1439 "./copy_buffer.metal" 2950 uint bufferOffset = options.bufferStartOffset + (gIndices.y * options.bufferRowPitch + gIndices.x * options.pixelSize); 2951 2952 switch (kCopyFormatType) 2953 { 2954 case FormatID::R5G6B5_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR5G6B5_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8B8A8_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8B8A8_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8B8A8_UNORM_SRGB: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8B8A8_UNORM_SRGB(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8B8A8_SNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8B8A8_SNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::B8G8R8A8_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeB8G8R8A8_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::B8G8R8A8_UNORM_SRGB: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeB8G8R8A8_UNORM_SRGB(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8B8_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8B8_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8B8_UNORM_SRGB: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8B8_UNORM_SRGB(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8B8_SNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8B8_SNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::L8_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeL8_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::A8_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeA8_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::L8A8_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeL8A8_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R5G5B5A1_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR5G5B5A1_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R4G4B4A4_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR4G4B4A4_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8_SNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8_SNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8_SNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8_SNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::A16_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeA16_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::L16_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeL16_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::L16A16_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeL16A16_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16G16_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16G16_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16G16B16A16_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16G16B16A16_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R32_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR32_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::A32_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeA32_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::L32_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeL32_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::L32A32_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeL32A32_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R32G32_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR32G32_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R32G32B32A32_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR32G32B32A32_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; 2955 } 2956 2957 2958} 2959 2960kernel void writeFromIntTextureToBuffer(ushort2 gIndices [[thread_position_in_grid]], constant WritePixelParams &options[[buffer(0)]], texture2d<int, access::read> srcTexture2d [[texture(0), function_constant(kCopyTextureType2D)]], texture2d_array<int, access::read> srcTexture2dArray [[texture(0), function_constant(kCopyTextureType2DArray)]], texture3d<int, access::read> srcTexture3d [[texture(0), function_constant(kCopyTextureType3D)]], texturecube<int, access::read> srcTextureCube [[texture(0), function_constant(kCopyTextureTypeCube)]], texture2d_ms<int, access::read> srcTexture2dMS [[texture(0), function_constant(kCopyTextureType2DMS)]], device uchar *buffer [[buffer(1)]]) 2961{ 2962 if (gIndices.x >= options.copySize.x || gIndices.y >= options.copySize.y) { return; } 2963# 1464 "./copy_buffer.metal" 2964 uint bufferOffset = options.bufferStartOffset + (gIndices.y * options.bufferRowPitch + gIndices.x * options.pixelSize); 2965 2966 switch (kCopyFormatType) 2967 { 2968 case FormatID::R8_SINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8_SINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8_SINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8_SINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8B8A8_SINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8B8A8_SINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16_SINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16_SINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16G16_SINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16G16_SINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16G16B16A16_SINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16G16B16A16_SINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R32_SINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR32_SINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R32G32_SINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR32G32_SINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R32G32B32A32_SINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR32G32B32A32_SINT(gIndices, options, bufferOffset, color, buffer); } break; 2969 } 2970 2971 2972} 2973 2974kernel void writeFromUIntTextureToBuffer(ushort2 gIndices [[thread_position_in_grid]], constant WritePixelParams &options[[buffer(0)]], texture2d<uint, access::read> srcTexture2d [[texture(0), function_constant(kCopyTextureType2D)]], texture2d_array<uint, access::read> srcTexture2dArray [[texture(0), function_constant(kCopyTextureType2DArray)]], texture3d<uint, access::read> srcTexture3d [[texture(0), function_constant(kCopyTextureType3D)]], texturecube<uint, access::read> srcTextureCube [[texture(0), function_constant(kCopyTextureTypeCube)]], texture2d_ms<uint, access::read> srcTexture2dMS [[texture(0), function_constant(kCopyTextureType2DMS)]], device uchar *buffer [[buffer(1)]]) 2975{ 2976 if (gIndices.x >= options.copySize.x || gIndices.y >= options.copySize.y) { return; } 2977# 1489 "./copy_buffer.metal" 2978 uint bufferOffset = options.bufferStartOffset + (gIndices.y * options.bufferRowPitch + gIndices.x * options.pixelSize); 2979 2980 switch (kCopyFormatType) 2981 { 2982 case FormatID::R8_UINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8_UINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8_UINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8_UINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8B8A8_UINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8B8A8_UINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16_UINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16_UINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16G16_UINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16G16_UINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16G16B16A16_UINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16G16B16A16_UINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R32_UINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR32_UINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R32G32_UINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR32G32_UINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R32G32B32A32_UINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR32G32B32A32_UINT(gIndices, options, bufferOffset, color, buffer); } break; 2983 } 2984 2985 2986} 2987 2988 2989struct CopyVertexParams 2990{ 2991 uint srcBufferStartOffset; 2992 uint srcStride; 2993 uint srcComponentBytes; 2994 uint srcComponents; 2995 2996 2997 2998 uchar4 srcDefaultAlphaData; 2999 3000 uint dstBufferStartOffset; 3001 uint dstStride; 3002 uint dstComponents; 3003 3004 uint vertexCount; 3005}; 3006# 1539 "./copy_buffer.metal" 3007template <typename IntType> 3008static inline void writeFloatVertex(constant CopyVertexParams &options, 3009 uint idx, 3010 vec<IntType, 4> data, 3011 device uchar *dst) 3012{ 3013 uint dstOffset = idx * options.dstStride + options.dstBufferStartOffset; 3014 3015 for (uint component = 0; component < options.dstComponents; ++component, dstOffset += 4) 3016 { 3017 floatToBytes(static_cast<float>(data[component]), dstOffset, dst); 3018 } 3019} 3020 3021template <> 3022inline void writeFloatVertex(constant CopyVertexParams &options, 3023 uint idx, 3024 vec<float, 4> data, 3025 device uchar *dst) 3026{ 3027 uint dstOffset = idx * options.dstStride + options.dstBufferStartOffset; 3028 3029 for (uint component = 0; component < options.dstComponents; ++component, dstOffset += 4) 3030 { 3031 floatToBytes(data[component], dstOffset, dst); 3032 } 3033} 3034 3035 3036static inline void convertToFloatVertexFormat(uint index, 3037 constant CopyVertexParams &options, 3038 constant uchar *srcBuffer, 3039 device uchar *dstBuffer) 3040{ 3041# 1585 "./copy_buffer.metal" 3042 uint bufferOffset = options.srcBufferStartOffset + options.srcStride * index; 3043# 1594 "./copy_buffer.metal" 3044 switch (kCopyFormatType) 3045 { 3046 case FormatID::R8_UNORM: { auto data = readR8_UNORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8_SNORM: { auto data = readR8_SNORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8_UINT: { auto data = readR8_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8_SINT: { auto data = readR8_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8_USCALED: { auto data = readR8_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8_SSCALED: { auto data = readR8_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8_UNORM: { auto data = readR8G8_UNORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8_SNORM: { auto data = readR8G8_SNORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8_UINT: { auto data = readR8G8_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8_SINT: { auto data = readR8G8_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8_USCALED: { auto data = readR8G8_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8_SSCALED: { auto data = readR8G8_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8_UNORM: { auto data = readR8G8B8_UNORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8_SNORM: { auto data = readR8G8B8_SNORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8_UINT: { auto data = readR8G8B8_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8_SINT: { auto data = readR8G8B8_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8_USCALED: { auto data = readR8G8B8_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8_SSCALED: { auto data = readR8G8B8_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8A8_UNORM: { auto data = readR8G8B8A8_UNORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8A8_SNORM: { auto data = readR8G8B8A8_SNORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8A8_UINT: { auto data = readR8G8B8A8_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8A8_SINT: { auto data = readR8G8B8A8_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8A8_USCALED: { auto data = readR8G8B8A8_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8A8_SSCALED: { auto data = readR8G8B8A8_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16_UNORM: { auto data = readR16_NORM<ushort>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16_SNORM: { auto data = readR16_NORM<short>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16_UINT: { auto data = readR16_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16_SINT: { auto data = readR16_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16_USCALED: { auto data = readR16_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16_SSCALED: { auto data = readR16_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16_UNORM: { auto data = readR16G16_NORM<ushort>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16_SNORM: { auto data = readR16G16_NORM<short>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16_UINT: { auto data = readR16G16_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16_SINT: { auto data = readR16G16_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16_USCALED: { auto data = readR16G16_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16_SSCALED: { auto data = readR16G16_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16_UNORM: { auto data = readR16G16B16_NORM<ushort>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16_SNORM: { auto data = readR16G16B16_NORM<short>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16_UINT: { auto data = readR16G16B16_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16_SINT: { auto data = readR16G16B16_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16_USCALED: { auto data = readR16G16B16_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16_SSCALED: { auto data = readR16G16B16_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16A16_UNORM: { auto data = readR16G16B16A16_NORM<ushort>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16A16_SNORM: { auto data = readR16G16B16A16_NORM<short>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16A16_UINT: { auto data = readR16G16B16A16_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16A16_SINT: { auto data = readR16G16B16A16_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16A16_USCALED: { auto data = readR16G16B16A16_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16A16_SSCALED: { auto data = readR16G16B16A16_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32_UNORM: { auto data = readR32_NORM<uint>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32_SNORM: { auto data = readR32_NORM<int>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32_UINT: { auto data = readR32_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32_SINT: { auto data = readR32_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32_USCALED: { auto data = readR32_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32_SSCALED: { auto data = readR32_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32_UNORM: { auto data = readR32G32_NORM<uint>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32_SNORM: { auto data = readR32G32_NORM<int>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32_UINT: { auto data = readR32G32_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32_SINT: { auto data = readR32G32_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32_USCALED: { auto data = readR32G32_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32_SSCALED: { auto data = readR32G32_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32_UNORM: { auto data = readR32G32B32_NORM<uint>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32_SNORM: { auto data = readR32G32B32_NORM<int>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32_UINT: { auto data = readR32G32B32_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32_SINT: { auto data = readR32G32B32_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32_USCALED: { auto data = readR32G32B32_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32_SSCALED: { auto data = readR32G32B32_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32A32_UNORM: { auto data = readR32G32B32A32_NORM<uint>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32A32_SNORM: { auto data = readR32G32B32A32_NORM<int>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32A32_UINT: { auto data = readR32G32B32A32_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32A32_SINT: { auto data = readR32G32B32A32_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32A32_USCALED: { auto data = readR32G32B32A32_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32A32_SSCALED: { auto data = readR32G32B32A32_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16_FLOAT: { auto data = readR16_FLOAT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16_FLOAT: { auto data = readR16G16_FLOAT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16_FLOAT: { auto data = readR16G16B16_FLOAT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16A16_FLOAT: { auto data = readR16G16B16A16_FLOAT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32_FLOAT: { auto data = readR32_FLOAT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32_FLOAT: { auto data = readR32G32_FLOAT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32_FLOAT: { auto data = readR32G32B32_FLOAT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32A32_FLOAT: { auto data = readR32G32B32A32_FLOAT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32_FIXED: { auto data = readR32_FIXED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32_FIXED: { auto data = readR32G32_FIXED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32_FIXED: { auto data = readR32G32B32_FIXED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32A32_FIXED: { auto data = readR32G32B32A32_FIXED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R10G10B10A2_SINT: { auto data = readR10G10B10A2_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R10G10B10A2_UINT: { auto data = readR10G10B10A2_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R10G10B10A2_SSCALED: { auto data = readR10G10B10A2_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R10G10B10A2_USCALED: { auto data = readR10G10B10A2_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; 3047 } 3048 3049 3050} 3051 3052 3053kernel void convertToFloatVertexFormatCS(uint index [[thread_position_in_grid]], 3054 constant CopyVertexParams &options [[buffer(0)]], 3055 constant uchar *srcBuffer [[buffer(1)]], 3056 device uchar *dstBuffer [[buffer(2)]]) 3057{ 3058 if (index >= options.vertexCount) { return; }; 3059 convertToFloatVertexFormat(index, options, srcBuffer, dstBuffer); 3060} 3061 3062 3063vertex void convertToFloatVertexFormatVS(uint index [[vertex_id]], 3064 constant CopyVertexParams &options [[buffer(0)]], 3065 constant uchar *srcBuffer [[buffer(1)]], 3066 device uchar *dstBuffer [[buffer(2)]]) 3067{ 3068 convertToFloatVertexFormat(index, options, srcBuffer, dstBuffer); 3069} 3070 3071 3072static inline void expandVertexFormatComponents(uint index, 3073 constant CopyVertexParams &options, 3074 constant uchar *srcBuffer, 3075 device uchar *dstBuffer) 3076{ 3077 uint srcOffset = options.srcBufferStartOffset + options.srcStride * index; 3078 uint dstOffset = options.dstBufferStartOffset + options.dstStride * index; 3079 3080 uint dstComponentsBeforeAlpha = min(options.dstComponents, 3u); 3081 uint component; 3082 for (component = 0; component < options.srcComponents; ++component, 3083 srcOffset += options.srcComponentBytes, dstOffset += options.srcComponentBytes) 3084 { 3085 for (uint byte = 0; byte < options.srcComponentBytes; ++byte) 3086 { 3087 dstBuffer[dstOffset + byte] = srcBuffer[srcOffset + byte]; 3088 } 3089 } 3090 3091 for (; component < dstComponentsBeforeAlpha; 3092 ++component, dstOffset += options.srcComponentBytes) 3093 { 3094 for (uint byte = 0; byte < options.srcComponentBytes; ++byte) 3095 { 3096 dstBuffer[dstOffset + byte] = 0; 3097 } 3098 } 3099 3100 if (component < options.dstComponents) 3101 { 3102 3103 for (uint byte = 0; byte < options.srcComponentBytes; ++byte) 3104 { 3105 dstBuffer[dstOffset + byte] = options.srcDefaultAlphaData[byte]; 3106 } 3107 } 3108} 3109 3110 3111kernel void expandVertexFormatComponentsCS(uint index [[thread_position_in_grid]], 3112 constant CopyVertexParams &options [[buffer(0)]], 3113 constant uchar *srcBuffer [[buffer(1)]], 3114 device uchar *dstBuffer [[buffer(2)]]) 3115{ 3116 if (index >= options.vertexCount) { return; }; 3117 3118 expandVertexFormatComponents(index, options, srcBuffer, dstBuffer); 3119} 3120 3121 3122vertex void expandVertexFormatComponentsVS(uint index [[vertex_id]], 3123 constant CopyVertexParams &options [[buffer(0)]], 3124 constant uchar *srcBuffer [[buffer(1)]], 3125 device uchar *dstBuffer [[buffer(2)]]) 3126{ 3127 expandVertexFormatComponents(index, options, srcBuffer, dstBuffer); 3128} 3129 3130 3131kernel void linearizeBlocks(ushort2 position [[thread_position_in_grid]], 3132 constant uint2 *dimensions [[buffer(0)]], 3133 constant uint2 *srcBuffer [[buffer(1)]], 3134 device uint2 *dstBuffer [[buffer(2)]]) 3135{ 3136 if (any(uint2(position) >= *dimensions)) 3137 { 3138 return; 3139 } 3140 uint2 t = uint2(position); 3141 t = (t | (t << 8)) & 0x00FF00FF; 3142 t = (t | (t << 4)) & 0x0F0F0F0F; 3143 t = (t | (t << 2)) & 0x33333333; 3144 t = (t | (t << 1)) & 0x55555555; 3145 dstBuffer[position.y * (*dimensions).x + position.x] = srcBuffer[(t.x << 1) | t.y]; 3146} 3147 3148 3149kernel void saturateDepth(uint2 position [[thread_position_in_grid]], 3150 constant uint3 *dimensions [[buffer(0)]], 3151 device float *srcBuffer [[buffer(1)]], 3152 device float *dstBuffer [[buffer(2)]]) 3153{ 3154 if (any(position >= (*dimensions).xy)) 3155 { 3156 return; 3157 } 3158 const uint srcOffset = position.y * (*dimensions).z + position.x; 3159 const uint dstOffset = position.y * (*dimensions).x + position.x; 3160 dstBuffer[dstOffset] = saturate(srcBuffer[srcOffset]); 3161} 3162# 6 "temp_master_source.metal" 2 3163# 1 "./visibility.metal" 1 3164 3165 3166 3167 3168 3169 3170 3171 3172constant bool kCombineWithExistingResult [[function_constant(1000)]]; 3173 3174 3175 3176struct CombineVisibilityResultOptions 3177{ 3178 3179 uint startOffset; 3180 3181 uint numOffsets; 3182}; 3183 3184kernel void combineVisibilityResult(uint idx [[thread_position_in_grid]], 3185 constant CombineVisibilityResultOptions &options [[buffer(0)]], 3186 constant ushort4 *renderpassVisibilityResult [[buffer(1)]], 3187 device ushort4 *finalResults [[buffer(2)]]) 3188{ 3189 if (idx > 0) 3190 { 3191 3192 3193 3194 return; 3195 } 3196 ushort4 finalResult16x4; 3197 3198 if (kCombineWithExistingResult) 3199 { 3200 finalResult16x4 = finalResults[0]; 3201 } 3202 else 3203 { 3204 finalResult16x4 = ushort4(0, 0, 0, 0); 3205 } 3206 3207 for (uint i = 0; i < options.numOffsets; ++i) 3208 { 3209 uint offset = options.startOffset + i; 3210 ushort4 renderpassResult = renderpassVisibilityResult[offset]; 3211 3212 3213 finalResult16x4 = finalResult16x4 | renderpassResult; 3214 } 3215 finalResults[0] = finalResult16x4; 3216} 3217# 7 "temp_master_source.metal" 2 3218# 1 "./rewrite_indices.metal" 1 3219# 11 "./rewrite_indices.metal" 3220# 1 "./rewrite_indices_shared.h" 1 3221# 12 "./rewrite_indices.metal" 2 3222using namespace metal; 3223 3224constant uint fixIndexBufferKey [[ function_constant(2000) ]]; 3225constant bool indexBufferIsUint16 = (((fixIndexBufferKey >> 0U) & 0x03U) == 2U); 3226constant bool indexBufferIsUint32 = (((fixIndexBufferKey >> 0U) & 0x03U) == 3U); 3227constant bool outIndexBufferIsUint16 = (((fixIndexBufferKey >> 2U) & 0x03U) == 2U); 3228constant bool outIndexBufferIsUint32 = (((fixIndexBufferKey >> 2U) & 0x03U) == 3U); 3229constant bool doPrimRestart = (fixIndexBufferKey & 0x00100U); 3230constant uint fixIndexBufferMode = (fixIndexBufferKey >> 4U) & 0x0FU; 3231 3232 3233static inline uint readIdx( 3234 const device ushort *indexBufferUint16, 3235 const device uint *indexBufferUint32, 3236 const uint restartIndex, 3237 const uint indexCount, 3238 uint idx, 3239 thread bool &foundRestart, 3240 thread uint &indexThatRestartedFirst 3241 ) 3242{ 3243 uint inIndex = idx; 3244 if(inIndex < indexCount) 3245 { 3246 if(indexBufferIsUint16) 3247 { 3248 inIndex = indexBufferUint16[inIndex]; 3249 } 3250 else if(indexBufferIsUint32) 3251 { 3252 inIndex = indexBufferUint32[inIndex]; 3253 } 3254 } 3255 else 3256 { 3257 foundRestart = true; 3258 indexThatRestartedFirst = idx; 3259 } 3260 if(doPrimRestart && !foundRestart && inIndex == restartIndex) 3261 { 3262 foundRestart = true; 3263 indexThatRestartedFirst = idx; 3264 } 3265 return inIndex; 3266} 3267 3268static inline void outputPrimitive( 3269 const device ushort *indexBufferUint16, 3270 const device uint *indexBufferUint32, 3271 device ushort *outIndexBufferUint16, 3272 device uint *outIndexBufferUint32, 3273 const uint restartIndex, 3274 const uint indexCount, 3275 thread uint &baseIndex, 3276 uint onIndex, 3277 thread uint &onOutIndex 3278 ) 3279{ 3280 if(baseIndex > onIndex) return; 3281 bool foundRestart = false; 3282 uint indexThatRestartedFirst = 0; 3283# 86 "./rewrite_indices.metal" 3284 switch(fixIndexBufferMode) 3285 { 3286 case 0x00U: 3287 { 3288 auto tmpIndex = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex, foundRestart, indexThatRestartedFirst); 3289 if(foundRestart) 3290 { 3291 baseIndex = indexThatRestartedFirst + 1; 3292 return; 3293 } 3294 3295 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex; } onOutIndex++; }); 3296 } 3297 break; 3298 case 0x01U: 3299 { 3300 auto tmpIndex0 = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex + 0, foundRestart, indexThatRestartedFirst); 3301 auto tmpIndex1 = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex + 1, foundRestart, indexThatRestartedFirst); 3302 if(foundRestart) 3303 { 3304 baseIndex = indexThatRestartedFirst + 1; 3305 return; 3306 } 3307 if((onIndex - baseIndex) & 1) return; 3308 3309 if(fixIndexBufferKey & 0x00200U) 3310 { 3311 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1; } onOutIndex++; }); 3312 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0; } onOutIndex++; }); 3313 } 3314 else 3315 { 3316 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0; } onOutIndex++; }); 3317 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1; } onOutIndex++; }); 3318 } 3319 } 3320 break; 3321 case 0x03U: 3322 { 3323 auto tmpIndex0 = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex + 0, foundRestart, indexThatRestartedFirst); 3324 auto tmpIndex1 = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex + 1, foundRestart, indexThatRestartedFirst); 3325 if(foundRestart) 3326 { 3327 baseIndex = indexThatRestartedFirst + 1; 3328 return; 3329 } 3330 3331 if(fixIndexBufferKey & 0x00200U) 3332 { 3333 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1; } onOutIndex++; }); 3334 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0; } onOutIndex++; }); 3335 } 3336 else 3337 { 3338 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0; } onOutIndex++; }); 3339 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1; } onOutIndex++; }); 3340 } 3341 } 3342 break; 3343 case 0x04U: 3344 { 3345 auto tmpIndex0 = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex + 0, foundRestart, indexThatRestartedFirst); 3346 auto tmpIndex1 = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex + 1, foundRestart, indexThatRestartedFirst); 3347 auto tmpIndex2 = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex + 2, foundRestart, indexThatRestartedFirst); 3348 if(foundRestart) 3349 { 3350 baseIndex = indexThatRestartedFirst + 1; 3351 return; 3352 } 3353 if(((onIndex - baseIndex) % 3) != 0) return; 3354 3355 if(fixIndexBufferKey & 0x00200U) 3356 { 3357 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2; } onOutIndex++; }); 3358 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0; } onOutIndex++; }); 3359 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1; } onOutIndex++; }); 3360 } 3361 else 3362 { 3363 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0; } onOutIndex++; }); 3364 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1; } onOutIndex++; }); 3365 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2; } onOutIndex++; }); 3366 } 3367 } 3368 break; 3369 case 0x05U: 3370 { 3371 uint isOdd = ((onIndex - baseIndex) & 1); 3372 auto tmpIndex0 = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex + 0 + isOdd, foundRestart, indexThatRestartedFirst); 3373 auto tmpIndex1 = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex + 1 - isOdd, foundRestart, indexThatRestartedFirst); 3374 auto tmpIndex2 = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex + 2, foundRestart, indexThatRestartedFirst); 3375 if(foundRestart) 3376 { 3377 baseIndex = indexThatRestartedFirst + 1; 3378 return; 3379 } 3380 3381 if(fixIndexBufferKey & 0x00200U) 3382 { 3383 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2; } onOutIndex++; }); 3384 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0; } onOutIndex++; }); 3385 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1; } onOutIndex++; }); 3386 } 3387 else 3388 { 3389 3390 if(isOdd) 3391 { 3392 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1; } onOutIndex++; }); 3393 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2; } onOutIndex++; }); 3394 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0; } onOutIndex++; }); 3395 } 3396 else 3397 { 3398 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0; } onOutIndex++; }); 3399 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1; } onOutIndex++; }); 3400 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2; } onOutIndex++; }); 3401 } 3402 } 3403 3404 assert(onOutIndex <= (onIndex + 1) * 3); 3405 assert(onOutIndex <= (indexCount - 2) * 3); 3406 } 3407 break; 3408 3409 } 3410 3411 3412} 3413 3414kernel void fixIndexBuffer( 3415 const device ushort *indexBufferUint16 [[ buffer(0), function_constant(indexBufferIsUint16) ]], 3416 const device uint *indexBufferUint32 [[ buffer(0), function_constant(indexBufferIsUint32) ]], 3417 device ushort *outIndexBufferUint16 [[ buffer(1), function_constant(outIndexBufferIsUint16) ]], 3418 device uint *outIndexBufferUint32 [[ buffer(1), function_constant(outIndexBufferIsUint32) ]], 3419 constant uint &indexCount [[ buffer(2) ]], 3420 constant uint &primCount [[ buffer(3) ]], 3421 uint prim [[thread_position_in_grid]]) 3422{ 3423 constexpr uint restartIndex = 0xFFFFFFFF; 3424 uint baseIndex = 0; 3425 uint onIndex = onIndex; 3426 uint onOutIndex = onOutIndex; 3427 if(prim < primCount) 3428 { 3429 switch(fixIndexBufferMode) 3430 { 3431 case 0x00U: 3432 onIndex = prim; 3433 onOutIndex = prim; 3434 break; 3435 case 0x01U: 3436 onIndex = prim * 2; 3437 onOutIndex = prim * 2; 3438 break; 3439 case 0x03U: 3440 onIndex = prim; 3441 onOutIndex = prim * 2; 3442 break; 3443 case 0x04U: 3444 onIndex = prim * 3; 3445 onOutIndex = prim * 3; 3446 break; 3447 case 0x05U: 3448 onIndex = prim; 3449 onOutIndex = prim * 3; 3450 break; 3451 } 3452 outputPrimitive(indexBufferUint16, indexBufferUint32, outIndexBufferUint16, outIndexBufferUint32, restartIndex, indexCount, baseIndex, onIndex, onOutIndex); 3453 } 3454} 3455 3456 3457 3458static inline void generatePrimitive( 3459 device ushort *outIndexBufferUint16, 3460 device uint *outIndexBufferUint32, 3461 const uint firstVertex, 3462 const uint indexCount, 3463 thread uint &baseIndex, 3464 uint onIndex, 3465 uint primCount, 3466 thread uint &onOutIndex 3467 ) 3468{ 3469 if(baseIndex > onIndex) return; 3470# 284 "./rewrite_indices.metal" 3471 switch(fixIndexBufferMode) 3472 { 3473 case 0x00U: 3474 { 3475 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = onIndex + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = onIndex + firstVertex; } onOutIndex++; }); 3476 } 3477 break; 3478 case 0x01U: 3479 { 3480 auto tmpIndex0 = onIndex + 0; 3481 auto tmpIndex1 = onIndex + 1; 3482 if(fixIndexBufferKey & 0x00200U) 3483 { 3484 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; }); 3485 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; }); 3486 } 3487 else 3488 { 3489 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; }); 3490 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; }); 3491 } 3492 } 3493 break; 3494 case 0x02U: 3495 { 3496 auto tmpIndex0 = onIndex + 0; 3497 auto tmpIndex1 = (onIndex + 1) % primCount; 3498 if(fixIndexBufferKey & 0x00200U) 3499 { 3500 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; }); 3501 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; }); 3502 } 3503 else 3504 { 3505 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; }); 3506 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; }); 3507 } 3508 } 3509 break; 3510 case 0x03U: 3511 { 3512 auto tmpIndex0 = onIndex + 0; 3513 auto tmpIndex1 = onIndex + 1; 3514 if(fixIndexBufferKey & 0x00200U) 3515 { 3516 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; }); 3517 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; }); 3518 } 3519 else 3520 { 3521 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; }); 3522 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; }); 3523 } 3524 } 3525 break; 3526 case 0x04U: 3527 { 3528 auto tmpIndex0 = onIndex + 0; 3529 auto tmpIndex1 = onIndex + 1; 3530 auto tmpIndex2 = onIndex + 2; 3531 if(fixIndexBufferKey & 0x00200U) 3532 { 3533 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2 + firstVertex; } onOutIndex++; }); 3534 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; }); 3535 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; }); 3536 } 3537 else 3538 { 3539 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; }); 3540 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; }); 3541 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2 + firstVertex; } onOutIndex++; }); 3542 } 3543 } 3544 break; 3545 case 0x05U: 3546 { 3547 uint isOdd = ((onIndex - baseIndex) & 1); 3548 auto tmpIndex0 = onIndex + 0 + isOdd; 3549 auto tmpIndex1 = onIndex + 1 - isOdd; 3550 auto tmpIndex2 = onIndex + 2; 3551 if(fixIndexBufferKey & 0x00200U) 3552 { 3553 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2 + firstVertex; } onOutIndex++; }); 3554 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; }); 3555 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; }); 3556 } 3557 else 3558 { 3559 if(isOdd) 3560 { 3561 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; }); 3562 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2 + firstVertex; } onOutIndex++; }); 3563 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; }); 3564 } 3565 else 3566 { 3567 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; }); 3568 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; }); 3569 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2 + firstVertex; } onOutIndex++; }); 3570 } 3571 } 3572 3573 assert(onOutIndex <= (onIndex + 1) * 3); 3574 assert(onOutIndex <= (indexCount - 2) * 3); 3575 break; 3576 } 3577 case 0x06U: 3578 { 3579 auto tmpIndex0 = 0; 3580 auto tmpIndex1 = onIndex + 1; 3581 auto tmpIndex2 = onIndex + 2; 3582 3583 if(fixIndexBufferKey & 0x00200U) 3584 { 3585 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2 + firstVertex; } onOutIndex++; }); 3586 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; }); 3587 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; }); 3588 } 3589 else 3590 { 3591 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; }); 3592 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2 + firstVertex; } onOutIndex++; }); 3593 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; }); 3594 } 3595 } 3596 break; 3597 3598 } 3599 3600} 3601 3602 3603 3604kernel void genIndexBuffer( 3605 device ushort *outIndexBufferUint16 [[ buffer(1), function_constant(outIndexBufferIsUint16) ]], 3606 device uint *outIndexBufferUint32 [[ buffer(1), function_constant(outIndexBufferIsUint32) ]], 3607 constant uint &indexCount [[ buffer(2) ]], 3608 constant uint &primCount [[ buffer(3) ]], 3609 constant uint &firstVertex [[ buffer(4) ]], 3610 uint prim [[thread_position_in_grid]]) 3611{ 3612 uint baseIndex = 0; 3613 uint onIndex = onIndex; 3614 uint onOutIndex = onOutIndex; 3615 if(prim < primCount) 3616 { 3617 switch(fixIndexBufferMode) 3618 { 3619 case 0x00U: 3620 onIndex = prim; 3621 onOutIndex = prim; 3622 break; 3623 case 0x01U: 3624 onIndex = prim * 2; 3625 onOutIndex = prim * 2; 3626 break; 3627 case 0x03U: 3628 onIndex = prim; 3629 onOutIndex = prim * 2; 3630 break; 3631 case 0x02U: 3632 onIndex = prim; 3633 onOutIndex = prim * 2; 3634 break; 3635 case 0x04U: 3636 onIndex = prim * 3; 3637 onOutIndex = prim * 3; 3638 break; 3639 case 0x05U: 3640 onIndex = prim; 3641 onOutIndex = prim * 3; 3642 break; 3643 case 0x06U: 3644 onIndex = prim; 3645 onOutIndex = prim * 3; 3646 break; 3647 } 3648 generatePrimitive(outIndexBufferUint16, outIndexBufferUint32, firstVertex, indexCount, baseIndex, onIndex, primCount, onOutIndex); 3649 } 3650} 3651# 8 "temp_master_source.metal" 2 3652 3653 3654