1 // Copyright 2016 The SwiftShader Authors. All Rights Reserved. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #include "SamplerCore.hpp" 16 17 #include "Constants.hpp" 18 #include "Debug.hpp" 19 20 namespace 21 { applySwizzle(sw::SwizzleType swizzle,sw::Short4 & s,const sw::Vector4s & c)22 void applySwizzle(sw::SwizzleType swizzle, sw::Short4& s, const sw::Vector4s& c) 23 { 24 switch(swizzle) 25 { 26 case sw::SWIZZLE_RED: s = c.x; break; 27 case sw::SWIZZLE_GREEN: s = c.y; break; 28 case sw::SWIZZLE_BLUE: s = c.z; break; 29 case sw::SWIZZLE_ALPHA: s = c.w; break; 30 case sw::SWIZZLE_ZERO: s = sw::Short4(0x0000); break; 31 case sw::SWIZZLE_ONE: s = sw::Short4(0x1000); break; 32 default: ASSERT(false); 33 } 34 } 35 applySwizzle(sw::SwizzleType swizzle,sw::Float4 & f,const sw::Vector4f & c)36 void applySwizzle(sw::SwizzleType swizzle, sw::Float4& f, const sw::Vector4f& c) 37 { 38 switch(swizzle) 39 { 40 case sw::SWIZZLE_RED: f = c.x; break; 41 case sw::SWIZZLE_GREEN: f = c.y; break; 42 case sw::SWIZZLE_BLUE: f = c.z; break; 43 case sw::SWIZZLE_ALPHA: f = c.w; break; 44 case sw::SWIZZLE_ZERO: f = sw::Float4(0.0f, 0.0f, 0.0f, 0.0f); break; 45 case sw::SWIZZLE_ONE: f = sw::Float4(1.0f, 1.0f, 1.0f, 1.0f); break; 46 default: ASSERT(false); 47 } 48 } 49 } 50 51 namespace sw 52 { 53 extern bool colorsDefaultToZero; 54 SamplerCore(Pointer<Byte> & constants,const Sampler::State & state)55 SamplerCore::SamplerCore(Pointer<Byte> &constants, const Sampler::State &state) : constants(constants), state(state) 56 { 57 } 58 sampleTexture(Pointer<Byte> & texture,Vector4s & c,Float4 & u,Float4 & v,Float4 & w,Float4 & q,Vector4f & dsx,Vector4f & dsy)59 void SamplerCore::sampleTexture(Pointer<Byte> &texture, Vector4s &c, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &dsx, Vector4f &dsy) 60 { 61 sampleTexture(texture, c, u, v, w, q, dsx, dsy, dsx, Implicit, true); 62 } 63 sampleTexture(Pointer<Byte> & texture,Vector4s & c,Float4 & u,Float4 & v,Float4 & w,Float4 & q,Vector4f & dsx,Vector4f & dsy,Vector4f & offset,SamplerFunction function,bool fixed12)64 void SamplerCore::sampleTexture(Pointer<Byte> &texture, Vector4s &c, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &dsx, Vector4f &dsy, Vector4f &offset, SamplerFunction function, bool fixed12) 65 { 66 #if PERF_PROFILE 67 AddAtomic(Pointer<Long>(&profiler.texOperations), 4); 68 69 if(state.compressedFormat) 70 { 71 AddAtomic(Pointer<Long>(&profiler.compressedTex), 4); 72 } 73 #endif 74 75 Float4 uuuu = u; 76 Float4 vvvv = v; 77 Float4 wwww = w; 78 79 if(state.textureType == TEXTURE_NULL) 80 { 81 c.x = Short4(0x0000); 82 c.y = Short4(0x0000); 83 c.z = Short4(0x0000); 84 85 if(fixed12) // FIXME: Convert to fixed12 at higher level, when required 86 { 87 c.w = Short4(0x1000); 88 } 89 else 90 { 91 c.w = Short4(0xFFFFu); // FIXME 92 } 93 } 94 else 95 { 96 Int face[4]; 97 Float4 lodX; 98 Float4 lodY; 99 Float4 lodZ; 100 101 if(state.textureType == TEXTURE_CUBE) 102 { 103 cubeFace(face, uuuu, vvvv, lodX, lodY, lodZ, u, v, w); 104 } 105 106 Float lod; 107 Float anisotropy; 108 Float4 uDelta; 109 Float4 vDelta; 110 Float lodBias = (function == Fetch) ? Float4(As<Int4>(q)).x : q.x; 111 112 if(state.textureType != TEXTURE_3D) 113 { 114 if(state.textureType != TEXTURE_CUBE) 115 { 116 computeLod(texture, lod, anisotropy, uDelta, vDelta, uuuu, vvvv, lodBias, dsx, dsy, function); 117 } 118 else 119 { 120 computeLodCube(texture, lod, lodX, lodY, lodZ, lodBias, dsx, dsy, function); 121 } 122 } 123 else 124 { 125 computeLod3D(texture, lod, uuuu, vvvv, wwww, lodBias, dsx, dsy, function); 126 } 127 128 if(!hasFloatTexture()) 129 { 130 sampleFilter(texture, c, uuuu, vvvv, wwww, offset, lod, anisotropy, uDelta, vDelta, face, function); 131 } 132 else 133 { 134 Vector4f cf; 135 136 sampleFloatFilter(texture, cf, uuuu, vvvv, wwww, offset, lod, anisotropy, uDelta, vDelta, face, function); 137 138 convertFixed12(c, cf); 139 } 140 141 if(fixed12 && !hasFloatTexture()) 142 { 143 if(has16bitTextureFormat()) 144 { 145 switch(state.textureFormat) 146 { 147 case FORMAT_R5G6B5: 148 if(state.sRGB) 149 { 150 sRGBtoLinear16_5_12(c.x); 151 sRGBtoLinear16_6_12(c.y); 152 sRGBtoLinear16_5_12(c.z); 153 } 154 else 155 { 156 c.x = MulHigh(As<UShort4>(c.x), UShort4(0x10000000 / 0xF800)); 157 c.y = MulHigh(As<UShort4>(c.y), UShort4(0x10000000 / 0xFC00)); 158 c.z = MulHigh(As<UShort4>(c.z), UShort4(0x10000000 / 0xF800)); 159 } 160 break; 161 default: 162 ASSERT(false); 163 } 164 } 165 else 166 { 167 for(int component = 0; component < textureComponentCount(); component++) 168 { 169 if(state.sRGB && isRGBComponent(component)) 170 { 171 sRGBtoLinear16_8_12(c[component]); // FIXME: Perform linearization at surface level for read-only textures 172 } 173 else 174 { 175 if(hasUnsignedTextureComponent(component)) 176 { 177 c[component] = As<UShort4>(c[component]) >> 4; 178 } 179 else 180 { 181 c[component] = c[component] >> 3; 182 } 183 } 184 } 185 } 186 } 187 188 if(fixed12 && state.textureFilter != FILTER_GATHER) 189 { 190 int componentCount = textureComponentCount(); 191 short defaultColorValue = colorsDefaultToZero ? 0x0000 : 0x1000; 192 193 switch(state.textureFormat) 194 { 195 case FORMAT_R8I_SNORM: 196 case FORMAT_G8R8I_SNORM: 197 case FORMAT_X8B8G8R8I_SNORM: 198 case FORMAT_A8B8G8R8I_SNORM: 199 case FORMAT_R8: 200 case FORMAT_R5G6B5: 201 case FORMAT_G8R8: 202 case FORMAT_R8I: 203 case FORMAT_R8UI: 204 case FORMAT_G8R8I: 205 case FORMAT_G8R8UI: 206 case FORMAT_X8B8G8R8I: 207 case FORMAT_X8B8G8R8UI: 208 case FORMAT_A8B8G8R8I: 209 case FORMAT_A8B8G8R8UI: 210 case FORMAT_R16I: 211 case FORMAT_R16UI: 212 case FORMAT_G16R16: 213 case FORMAT_G16R16I: 214 case FORMAT_G16R16UI: 215 case FORMAT_X16B16G16R16I: 216 case FORMAT_X16B16G16R16UI: 217 case FORMAT_A16B16G16R16: 218 case FORMAT_A16B16G16R16I: 219 case FORMAT_A16B16G16R16UI: 220 case FORMAT_R32I: 221 case FORMAT_R32UI: 222 case FORMAT_G32R32I: 223 case FORMAT_G32R32UI: 224 case FORMAT_X32B32G32R32I: 225 case FORMAT_X32B32G32R32UI: 226 case FORMAT_A32B32G32R32I: 227 case FORMAT_A32B32G32R32UI: 228 case FORMAT_X8R8G8B8: 229 case FORMAT_X8B8G8R8: 230 case FORMAT_A8R8G8B8: 231 case FORMAT_A8B8G8R8: 232 case FORMAT_SRGB8_X8: 233 case FORMAT_SRGB8_A8: 234 case FORMAT_V8U8: 235 case FORMAT_Q8W8V8U8: 236 case FORMAT_X8L8V8U8: 237 case FORMAT_V16U16: 238 case FORMAT_A16W16V16U16: 239 case FORMAT_Q16W16V16U16: 240 case FORMAT_YV12_BT601: 241 case FORMAT_YV12_BT709: 242 case FORMAT_YV12_JFIF: 243 if(componentCount < 2) c.y = Short4(defaultColorValue); 244 if(componentCount < 3) c.z = Short4(defaultColorValue); 245 if(componentCount < 4) c.w = Short4(0x1000); 246 break; 247 case FORMAT_A8: 248 c.w = c.x; 249 c.x = Short4(0x0000); 250 c.y = Short4(0x0000); 251 c.z = Short4(0x0000); 252 break; 253 case FORMAT_L8: 254 case FORMAT_L16: 255 c.y = c.x; 256 c.z = c.x; 257 c.w = Short4(0x1000); 258 break; 259 case FORMAT_A8L8: 260 c.w = c.y; 261 c.y = c.x; 262 c.z = c.x; 263 break; 264 case FORMAT_R32F: 265 c.y = Short4(defaultColorValue); 266 case FORMAT_G32R32F: 267 c.z = Short4(defaultColorValue); 268 case FORMAT_X32B32G32R32F: 269 c.w = Short4(0x1000); 270 case FORMAT_A32B32G32R32F: 271 break; 272 case FORMAT_D32F: 273 case FORMAT_D32F_LOCKABLE: 274 case FORMAT_D32FS8_TEXTURE: 275 case FORMAT_D32FS8_SHADOW: 276 c.y = c.x; 277 c.z = c.x; 278 c.w = c.x; 279 break; 280 default: 281 ASSERT(false); 282 } 283 } 284 } 285 286 if(fixed12 && 287 ((state.swizzleR != SWIZZLE_RED) || 288 (state.swizzleG != SWIZZLE_GREEN) || 289 (state.swizzleB != SWIZZLE_BLUE) || 290 (state.swizzleA != SWIZZLE_ALPHA))) 291 { 292 const Vector4s col(c); 293 applySwizzle(state.swizzleR, c.x, col); 294 applySwizzle(state.swizzleG, c.y, col); 295 applySwizzle(state.swizzleB, c.z, col); 296 applySwizzle(state.swizzleA, c.w, col); 297 } 298 } 299 sampleTexture(Pointer<Byte> & texture,Vector4f & c,Float4 & u,Float4 & v,Float4 & w,Float4 & q,Vector4f & dsx,Vector4f & dsy,Vector4f & offset,SamplerFunction function)300 void SamplerCore::sampleTexture(Pointer<Byte> &texture, Vector4f &c, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &dsx, Vector4f &dsy, Vector4f &offset, SamplerFunction function) 301 { 302 #if PERF_PROFILE 303 AddAtomic(Pointer<Long>(&profiler.texOperations), 4); 304 305 if(state.compressedFormat) 306 { 307 AddAtomic(Pointer<Long>(&profiler.compressedTex), 4); 308 } 309 #endif 310 311 if(state.textureType == TEXTURE_NULL) 312 { 313 c.x = Float4(0.0f); 314 c.y = Float4(0.0f); 315 c.z = Float4(0.0f); 316 c.w = Float4(1.0f); 317 } 318 else 319 { 320 // FIXME: YUV and sRGB are not supported by the floating point path 321 bool forceFloatFiltering = state.highPrecisionFiltering && !state.sRGB && !hasYuvFormat() && (state.textureFilter != FILTER_POINT); 322 if(hasFloatTexture() || hasUnnormalizedIntegerTexture() || forceFloatFiltering) // FIXME: Mostly identical to integer sampling 323 { 324 Float4 uuuu = u; 325 Float4 vvvv = v; 326 Float4 wwww = w; 327 328 Int face[4]; 329 Float4 lodX; 330 Float4 lodY; 331 Float4 lodZ; 332 333 if(state.textureType == TEXTURE_CUBE) 334 { 335 cubeFace(face, uuuu, vvvv, lodX, lodY, lodZ, u, v, w); 336 } 337 338 Float lod; 339 Float anisotropy; 340 Float4 uDelta; 341 Float4 vDelta; 342 Float lodBias = (function == Fetch) ? Float4(As<Int4>(q)).x : q.x; 343 344 if(state.textureType != TEXTURE_3D) 345 { 346 if(state.textureType != TEXTURE_CUBE) 347 { 348 computeLod(texture, lod, anisotropy, uDelta, vDelta, uuuu, vvvv, lodBias, dsx, dsy, function); 349 } 350 else 351 { 352 computeLodCube(texture, lod, lodX, lodY, lodZ, lodBias, dsx, dsy, function); 353 } 354 } 355 else 356 { 357 computeLod3D(texture, lod, uuuu, vvvv, wwww, lodBias, dsx, dsy, function); 358 } 359 360 sampleFloatFilter(texture, c, uuuu, vvvv, wwww, offset, lod, anisotropy, uDelta, vDelta, face, function); 361 362 if(!hasFloatTexture() && !hasUnnormalizedIntegerTexture()) 363 { 364 if(has16bitTextureFormat()) 365 { 366 switch(state.textureFormat) 367 { 368 case FORMAT_R5G6B5: 369 c.x *= Float4(1.0f / 0xF800); 370 c.y *= Float4(1.0f / 0xFC00); 371 c.z *= Float4(1.0f / 0xF800); 372 break; 373 default: 374 ASSERT(false); 375 } 376 } 377 else 378 { 379 for(int component = 0; component < textureComponentCount(); component++) 380 { 381 c[component] *= Float4(hasUnsignedTextureComponent(component) ? 1.0f / 0xFFFF : 1.0f / 0x7FFF); 382 } 383 } 384 } 385 } 386 else 387 { 388 Vector4s cs; 389 390 sampleTexture(texture, cs, u, v, w, q, dsx, dsy, offset, function, false); 391 392 if(has16bitTextureFormat()) 393 { 394 switch(state.textureFormat) 395 { 396 case FORMAT_R5G6B5: 397 if(state.sRGB) 398 { 399 sRGBtoLinear16_5_12(cs.x); 400 sRGBtoLinear16_6_12(cs.y); 401 sRGBtoLinear16_5_12(cs.z); 402 403 convertSigned12(c.x, cs.x); 404 convertSigned12(c.y, cs.y); 405 convertSigned12(c.z, cs.z); 406 } 407 else 408 { 409 c.x = Float4(As<UShort4>(cs.x)) * Float4(1.0f / 0xF800); 410 c.y = Float4(As<UShort4>(cs.y)) * Float4(1.0f / 0xFC00); 411 c.z = Float4(As<UShort4>(cs.z)) * Float4(1.0f / 0xF800); 412 } 413 break; 414 default: 415 ASSERT(false); 416 } 417 } 418 else 419 { 420 for(int component = 0; component < textureComponentCount(); component++) 421 { 422 // Normalized integer formats 423 if(state.sRGB && isRGBComponent(component)) 424 { 425 sRGBtoLinear16_8_12(cs[component]); // FIXME: Perform linearization at surface level for read-only textures 426 convertSigned12(c[component], cs[component]); 427 } 428 else 429 { 430 if(hasUnsignedTextureComponent(component)) 431 { 432 convertUnsigned16(c[component], cs[component]); 433 } 434 else 435 { 436 convertSigned15(c[component], cs[component]); 437 } 438 } 439 } 440 } 441 } 442 443 int componentCount = textureComponentCount(); 444 float defaultColorValue = colorsDefaultToZero ? 0.0f : 1.0f; 445 446 if(state.textureFilter != FILTER_GATHER) 447 { 448 switch(state.textureFormat) 449 { 450 case FORMAT_R8I: 451 case FORMAT_R8UI: 452 case FORMAT_R16I: 453 case FORMAT_R16UI: 454 case FORMAT_R32I: 455 case FORMAT_R32UI: 456 c.y = As<Float4>(UInt4(0)); 457 case FORMAT_G8R8I: 458 case FORMAT_G8R8UI: 459 case FORMAT_G16R16I: 460 case FORMAT_G16R16UI: 461 case FORMAT_G32R32I: 462 case FORMAT_G32R32UI: 463 c.z = As<Float4>(UInt4(0)); 464 case FORMAT_X8B8G8R8I: 465 case FORMAT_X8B8G8R8UI: 466 case FORMAT_X16B16G16R16I: 467 case FORMAT_X16B16G16R16UI: 468 case FORMAT_X32B32G32R32I: 469 case FORMAT_X32B32G32R32UI: 470 c.w = As<Float4>(UInt4(1)); 471 case FORMAT_A8B8G8R8I: 472 case FORMAT_A8B8G8R8UI: 473 case FORMAT_A16B16G16R16I: 474 case FORMAT_A16B16G16R16UI: 475 case FORMAT_A32B32G32R32I: 476 case FORMAT_A32B32G32R32UI: 477 break; 478 case FORMAT_R8I_SNORM: 479 case FORMAT_G8R8I_SNORM: 480 case FORMAT_X8B8G8R8I_SNORM: 481 case FORMAT_A8B8G8R8I_SNORM: 482 case FORMAT_R8: 483 case FORMAT_R5G6B5: 484 case FORMAT_G8R8: 485 case FORMAT_G16R16: 486 case FORMAT_A16B16G16R16: 487 case FORMAT_X8R8G8B8: 488 case FORMAT_X8B8G8R8: 489 case FORMAT_A8R8G8B8: 490 case FORMAT_A8B8G8R8: 491 case FORMAT_SRGB8_X8: 492 case FORMAT_SRGB8_A8: 493 case FORMAT_V8U8: 494 case FORMAT_Q8W8V8U8: 495 case FORMAT_X8L8V8U8: 496 case FORMAT_V16U16: 497 case FORMAT_A16W16V16U16: 498 case FORMAT_Q16W16V16U16: 499 case FORMAT_YV12_BT601: 500 case FORMAT_YV12_BT709: 501 case FORMAT_YV12_JFIF: 502 if(componentCount < 2) c.y = Float4(defaultColorValue); 503 if(componentCount < 3) c.z = Float4(defaultColorValue); 504 if(componentCount < 4) c.w = Float4(1.0f); 505 break; 506 case FORMAT_A8: 507 c.w = c.x; 508 c.x = Float4(0.0f); 509 c.y = Float4(0.0f); 510 c.z = Float4(0.0f); 511 break; 512 case FORMAT_L8: 513 case FORMAT_L16: 514 c.y = c.x; 515 c.z = c.x; 516 c.w = Float4(1.0f); 517 break; 518 case FORMAT_A8L8: 519 c.w = c.y; 520 c.y = c.x; 521 c.z = c.x; 522 break; 523 case FORMAT_R32F: 524 c.y = Float4(defaultColorValue); 525 case FORMAT_G32R32F: 526 c.z = Float4(defaultColorValue); 527 case FORMAT_X32B32G32R32F: 528 c.w = Float4(1.0f); 529 case FORMAT_A32B32G32R32F: 530 break; 531 case FORMAT_D32F: 532 case FORMAT_D32F_LOCKABLE: 533 case FORMAT_D32FS8_TEXTURE: 534 case FORMAT_D32FS8_SHADOW: 535 c.y = c.x; 536 c.z = c.x; 537 c.w = c.x; 538 break; 539 default: 540 ASSERT(false); 541 } 542 } 543 } 544 545 if((state.swizzleR != SWIZZLE_RED) || 546 (state.swizzleG != SWIZZLE_GREEN) || 547 (state.swizzleB != SWIZZLE_BLUE) || 548 (state.swizzleA != SWIZZLE_ALPHA)) 549 { 550 const Vector4f col(c); 551 applySwizzle(state.swizzleR, c.x, col); 552 applySwizzle(state.swizzleG, c.y, col); 553 applySwizzle(state.swizzleB, c.z, col); 554 applySwizzle(state.swizzleA, c.w, col); 555 } 556 } 557 textureSize(Pointer<Byte> & texture,Vector4f & size,Float4 & lod)558 void SamplerCore::textureSize(Pointer<Byte> &texture, Vector4f &size, Float4 &lod) 559 { 560 for(int i = 0; i < 4; ++i) 561 { 562 Int baseLevel = *Pointer<Int>(texture + OFFSET(Texture, baseLevel)); 563 Pointer<Byte> mipmap = texture + OFFSET(Texture, mipmap) + (As<Int>(Extract(lod, i)) + baseLevel) * sizeof(Mipmap); 564 size.x = Insert(size.x, As<Float>(Int(*Pointer<Short>(mipmap + OFFSET(Mipmap, width)))), i); 565 size.y = Insert(size.y, As<Float>(Int(*Pointer<Short>(mipmap + OFFSET(Mipmap, height)))), i); 566 size.z = Insert(size.z, As<Float>(Int(*Pointer<Short>(mipmap + OFFSET(Mipmap, depth)))), i); 567 } 568 } 569 border(Short4 & mask,Float4 & coordinates)570 void SamplerCore::border(Short4 &mask, Float4 &coordinates) 571 { 572 Int4 border = As<Int4>(CmpLT(Abs(coordinates - Float4(0.5f)), Float4(0.5f))); 573 mask = As<Short4>(Int2(As<Int4>(Pack(border, border)))); 574 } 575 border(Int4 & mask,Float4 & coordinates)576 void SamplerCore::border(Int4 &mask, Float4 &coordinates) 577 { 578 mask = As<Int4>(CmpLT(Abs(coordinates - Float4(0.5f)), Float4(0.5f))); 579 } 580 offsetSample(Short4 & uvw,Pointer<Byte> & mipmap,int halfOffset,bool wrap,int count,Float & lod)581 Short4 SamplerCore::offsetSample(Short4 &uvw, Pointer<Byte> &mipmap, int halfOffset, bool wrap, int count, Float &lod) 582 { 583 Short4 offset = *Pointer<Short4>(mipmap + halfOffset); 584 585 if(state.textureFilter == FILTER_MIN_LINEAR_MAG_POINT) 586 { 587 offset &= Short4(CmpNLE(Float4(lod), Float4(0.0f))); 588 } 589 else if(state.textureFilter == FILTER_MIN_POINT_MAG_LINEAR) 590 { 591 offset &= Short4(CmpLE(Float4(lod), Float4(0.0f))); 592 } 593 594 if(wrap) 595 { 596 switch(count) 597 { 598 case -1: return uvw - offset; 599 case 0: return uvw; 600 case +1: return uvw + offset; 601 case 2: return uvw + offset + offset; 602 } 603 } 604 else // Clamp or mirror 605 { 606 switch(count) 607 { 608 case -1: return SubSat(As<UShort4>(uvw), As<UShort4>(offset)); 609 case 0: return uvw; 610 case +1: return AddSat(As<UShort4>(uvw), As<UShort4>(offset)); 611 case 2: return AddSat(AddSat(As<UShort4>(uvw), As<UShort4>(offset)), As<UShort4>(offset)); 612 } 613 } 614 615 return uvw; 616 } 617 sampleFilter(Pointer<Byte> & texture,Vector4s & c,Float4 & u,Float4 & v,Float4 & w,Vector4f & offset,Float & lod,Float & anisotropy,Float4 & uDelta,Float4 & vDelta,Int face[4],SamplerFunction function)618 void SamplerCore::sampleFilter(Pointer<Byte> &texture, Vector4s &c, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, Int face[4], SamplerFunction function) 619 { 620 sampleAniso(texture, c, u, v, w, offset, lod, anisotropy, uDelta, vDelta, face, false, function); 621 622 if(function == Fetch) 623 { 624 return; 625 } 626 627 if(state.mipmapFilter > MIPMAP_POINT) 628 { 629 Vector4s cc; 630 631 sampleAniso(texture, cc, u, v, w, offset, lod, anisotropy, uDelta, vDelta, face, true, function); 632 633 lod *= Float(1 << 16); 634 635 UShort4 utri = UShort4(Float4(lod)); // FIXME: Optimize 636 Short4 stri = utri >> 1; // FIXME: Optimize 637 638 if(hasUnsignedTextureComponent(0)) cc.x = MulHigh(As<UShort4>(cc.x), utri); else cc.x = MulHigh(cc.x, stri); 639 if(hasUnsignedTextureComponent(1)) cc.y = MulHigh(As<UShort4>(cc.y), utri); else cc.y = MulHigh(cc.y, stri); 640 if(hasUnsignedTextureComponent(2)) cc.z = MulHigh(As<UShort4>(cc.z), utri); else cc.z = MulHigh(cc.z, stri); 641 if(hasUnsignedTextureComponent(3)) cc.w = MulHigh(As<UShort4>(cc.w), utri); else cc.w = MulHigh(cc.w, stri); 642 643 utri = ~utri; 644 stri = Short4(0x7FFF) - stri; 645 646 if(hasUnsignedTextureComponent(0)) c.x = MulHigh(As<UShort4>(c.x), utri); else c.x = MulHigh(c.x, stri); 647 if(hasUnsignedTextureComponent(1)) c.y = MulHigh(As<UShort4>(c.y), utri); else c.y = MulHigh(c.y, stri); 648 if(hasUnsignedTextureComponent(2)) c.z = MulHigh(As<UShort4>(c.z), utri); else c.z = MulHigh(c.z, stri); 649 if(hasUnsignedTextureComponent(3)) c.w = MulHigh(As<UShort4>(c.w), utri); else c.w = MulHigh(c.w, stri); 650 651 c.x += cc.x; 652 c.y += cc.y; 653 c.z += cc.z; 654 c.w += cc.w; 655 656 if(!hasUnsignedTextureComponent(0)) c.x += c.x; 657 if(!hasUnsignedTextureComponent(1)) c.y += c.y; 658 if(!hasUnsignedTextureComponent(2)) c.z += c.z; 659 if(!hasUnsignedTextureComponent(3)) c.w += c.w; 660 } 661 662 Short4 borderMask; 663 664 if(state.addressingModeU == ADDRESSING_BORDER) 665 { 666 Short4 u0; 667 668 border(u0, u); 669 670 borderMask = u0; 671 } 672 673 if(state.addressingModeV == ADDRESSING_BORDER) 674 { 675 Short4 v0; 676 677 border(v0, v); 678 679 if(state.addressingModeU == ADDRESSING_BORDER) 680 { 681 borderMask &= v0; 682 } 683 else 684 { 685 borderMask = v0; 686 } 687 } 688 689 if(state.addressingModeW == ADDRESSING_BORDER && state.textureType == TEXTURE_3D) 690 { 691 Short4 s0; 692 693 border(s0, w); 694 695 if(state.addressingModeU == ADDRESSING_BORDER || 696 state.addressingModeV == ADDRESSING_BORDER) 697 { 698 borderMask &= s0; 699 } 700 else 701 { 702 borderMask = s0; 703 } 704 } 705 706 if(state.addressingModeU == ADDRESSING_BORDER || 707 state.addressingModeV == ADDRESSING_BORDER || 708 (state.addressingModeW == ADDRESSING_BORDER && state.textureType == TEXTURE_3D)) 709 { 710 Short4 b; 711 712 c.x = (borderMask & c.x) | (~borderMask & (*Pointer<Short4>(texture + OFFSET(Texture,borderColor4[0])) >> (hasUnsignedTextureComponent(0) ? 0 : 1))); 713 c.y = (borderMask & c.y) | (~borderMask & (*Pointer<Short4>(texture + OFFSET(Texture,borderColor4[1])) >> (hasUnsignedTextureComponent(1) ? 0 : 1))); 714 c.z = (borderMask & c.z) | (~borderMask & (*Pointer<Short4>(texture + OFFSET(Texture,borderColor4[2])) >> (hasUnsignedTextureComponent(2) ? 0 : 1))); 715 c.w = (borderMask & c.w) | (~borderMask & (*Pointer<Short4>(texture + OFFSET(Texture,borderColor4[3])) >> (hasUnsignedTextureComponent(3) ? 0 : 1))); 716 } 717 } 718 sampleAniso(Pointer<Byte> & texture,Vector4s & c,Float4 & u,Float4 & v,Float4 & w,Vector4f & offset,Float & lod,Float & anisotropy,Float4 & uDelta,Float4 & vDelta,Int face[4],bool secondLOD,SamplerFunction function)719 void SamplerCore::sampleAniso(Pointer<Byte> &texture, Vector4s &c, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, Int face[4], bool secondLOD, SamplerFunction function) 720 { 721 if(state.textureFilter != FILTER_ANISOTROPIC || function == Lod || function == Fetch) 722 { 723 sampleQuad(texture, c, u, v, w, offset, lod, face, secondLOD, function); 724 } 725 else 726 { 727 Int a = RoundInt(anisotropy); 728 729 Vector4s cSum; 730 731 cSum.x = Short4(0); 732 cSum.y = Short4(0); 733 cSum.z = Short4(0); 734 cSum.w = Short4(0); 735 736 Float4 A = *Pointer<Float4>(constants + OFFSET(Constants,uvWeight) + 16 * a); 737 Float4 B = *Pointer<Float4>(constants + OFFSET(Constants,uvStart) + 16 * a); 738 UShort4 cw = *Pointer<UShort4>(constants + OFFSET(Constants,cWeight) + 8 * a); 739 Short4 sw = Short4(cw >> 1); 740 741 Float4 du = uDelta; 742 Float4 dv = vDelta; 743 744 Float4 u0 = u + B * du; 745 Float4 v0 = v + B * dv; 746 747 du *= A; 748 dv *= A; 749 750 Int i = 0; 751 752 Do 753 { 754 sampleQuad(texture, c, u0, v0, w, offset, lod, face, secondLOD, function); 755 756 u0 += du; 757 v0 += dv; 758 759 if(hasUnsignedTextureComponent(0)) cSum.x += As<Short4>(MulHigh(As<UShort4>(c.x), cw)); else cSum.x += MulHigh(c.x, sw); 760 if(hasUnsignedTextureComponent(1)) cSum.y += As<Short4>(MulHigh(As<UShort4>(c.y), cw)); else cSum.y += MulHigh(c.y, sw); 761 if(hasUnsignedTextureComponent(2)) cSum.z += As<Short4>(MulHigh(As<UShort4>(c.z), cw)); else cSum.z += MulHigh(c.z, sw); 762 if(hasUnsignedTextureComponent(3)) cSum.w += As<Short4>(MulHigh(As<UShort4>(c.w), cw)); else cSum.w += MulHigh(c.w, sw); 763 764 i++; 765 } 766 Until(i >= a) 767 768 if(hasUnsignedTextureComponent(0)) c.x = cSum.x; else c.x = AddSat(cSum.x, cSum.x); 769 if(hasUnsignedTextureComponent(1)) c.y = cSum.y; else c.y = AddSat(cSum.y, cSum.y); 770 if(hasUnsignedTextureComponent(2)) c.z = cSum.z; else c.z = AddSat(cSum.z, cSum.z); 771 if(hasUnsignedTextureComponent(3)) c.w = cSum.w; else c.w = AddSat(cSum.w, cSum.w); 772 } 773 } 774 sampleQuad(Pointer<Byte> & texture,Vector4s & c,Float4 & u,Float4 & v,Float4 & w,Vector4f & offset,Float & lod,Int face[4],bool secondLOD,SamplerFunction function)775 void SamplerCore::sampleQuad(Pointer<Byte> &texture, Vector4s &c, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, Float &lod, Int face[4], bool secondLOD, SamplerFunction function) 776 { 777 if(state.textureType != TEXTURE_3D) 778 { 779 sampleQuad2D(texture, c, u, v, w, offset, lod, face, secondLOD, function); 780 } 781 else 782 { 783 sample3D(texture, c, u, v, w, offset, lod, secondLOD, function); 784 } 785 } 786 sampleQuad2D(Pointer<Byte> & texture,Vector4s & c,Float4 & u,Float4 & v,Float4 & w,Vector4f & offset,Float & lod,Int face[4],bool secondLOD,SamplerFunction function)787 void SamplerCore::sampleQuad2D(Pointer<Byte> &texture, Vector4s &c, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, Float &lod, Int face[4], bool secondLOD, SamplerFunction function) 788 { 789 int componentCount = textureComponentCount(); 790 bool gather = state.textureFilter == FILTER_GATHER; 791 792 Pointer<Byte> mipmap; 793 Pointer<Byte> buffer[4]; 794 795 selectMipmap(texture, buffer, mipmap, lod, face, secondLOD); 796 797 bool texelFetch = (function == Fetch); 798 799 Short4 uuuu = texelFetch ? Short4(As<Int4>(u)) : address(u, state.addressingModeU, mipmap); 800 Short4 vvvv = texelFetch ? Short4(As<Int4>(v)) : address(v, state.addressingModeV, mipmap); 801 Short4 wwww = texelFetch ? Short4(As<Int4>(w)) : address(w, state.addressingModeW, mipmap); 802 803 if(state.textureFilter == FILTER_POINT || texelFetch) 804 { 805 c = sampleTexel(uuuu, vvvv, wwww, offset, mipmap, buffer, function); 806 } 807 else 808 { 809 Short4 uuuu0 = offsetSample(uuuu, mipmap, OFFSET(Mipmap,uHalf), state.addressingModeU == ADDRESSING_WRAP, gather ? 0 : -1, lod); 810 Short4 vvvv0 = offsetSample(vvvv, mipmap, OFFSET(Mipmap,vHalf), state.addressingModeV == ADDRESSING_WRAP, gather ? 0 : -1, lod); 811 Short4 uuuu1 = offsetSample(uuuu, mipmap, OFFSET(Mipmap,uHalf), state.addressingModeU == ADDRESSING_WRAP, gather ? 2 : +1, lod); 812 Short4 vvvv1 = offsetSample(vvvv, mipmap, OFFSET(Mipmap,vHalf), state.addressingModeV == ADDRESSING_WRAP, gather ? 2 : +1, lod); 813 814 Vector4s c0 = sampleTexel(uuuu0, vvvv0, wwww, offset, mipmap, buffer, function); 815 Vector4s c1 = sampleTexel(uuuu1, vvvv0, wwww, offset, mipmap, buffer, function); 816 Vector4s c2 = sampleTexel(uuuu0, vvvv1, wwww, offset, mipmap, buffer, function); 817 Vector4s c3 = sampleTexel(uuuu1, vvvv1, wwww, offset, mipmap, buffer, function); 818 819 if(!gather) // Blend 820 { 821 // Fractions 822 UShort4 f0u = As<UShort4>(uuuu0) * *Pointer<UShort4>(mipmap + OFFSET(Mipmap,width)); 823 UShort4 f0v = As<UShort4>(vvvv0) * *Pointer<UShort4>(mipmap + OFFSET(Mipmap,height)); 824 825 UShort4 f1u = ~f0u; 826 UShort4 f1v = ~f0v; 827 828 UShort4 f0u0v = MulHigh(f0u, f0v); 829 UShort4 f1u0v = MulHigh(f1u, f0v); 830 UShort4 f0u1v = MulHigh(f0u, f1v); 831 UShort4 f1u1v = MulHigh(f1u, f1v); 832 833 // Signed fractions 834 Short4 f1u1vs; 835 Short4 f0u1vs; 836 Short4 f1u0vs; 837 Short4 f0u0vs; 838 839 if(!hasUnsignedTextureComponent(0) || !hasUnsignedTextureComponent(1) || !hasUnsignedTextureComponent(2) || !hasUnsignedTextureComponent(3)) 840 { 841 f1u1vs = f1u1v >> 1; 842 f0u1vs = f0u1v >> 1; 843 f1u0vs = f1u0v >> 1; 844 f0u0vs = f0u0v >> 1; 845 } 846 847 // Bilinear interpolation 848 if(componentCount >= 1) 849 { 850 if(has16bitTextureComponents() && hasUnsignedTextureComponent(0)) 851 { 852 c0.x = As<UShort4>(c0.x) - MulHigh(As<UShort4>(c0.x), f0u) + MulHigh(As<UShort4>(c1.x), f0u); 853 c2.x = As<UShort4>(c2.x) - MulHigh(As<UShort4>(c2.x), f0u) + MulHigh(As<UShort4>(c3.x), f0u); 854 c.x = As<UShort4>(c0.x) - MulHigh(As<UShort4>(c0.x), f0v) + MulHigh(As<UShort4>(c2.x), f0v); 855 } 856 else 857 { 858 if(hasUnsignedTextureComponent(0)) 859 { 860 c0.x = MulHigh(As<UShort4>(c0.x), f1u1v); 861 c1.x = MulHigh(As<UShort4>(c1.x), f0u1v); 862 c2.x = MulHigh(As<UShort4>(c2.x), f1u0v); 863 c3.x = MulHigh(As<UShort4>(c3.x), f0u0v); 864 } 865 else 866 { 867 c0.x = MulHigh(c0.x, f1u1vs); 868 c1.x = MulHigh(c1.x, f0u1vs); 869 c2.x = MulHigh(c2.x, f1u0vs); 870 c3.x = MulHigh(c3.x, f0u0vs); 871 } 872 873 c.x = (c0.x + c1.x) + (c2.x + c3.x); 874 if(!hasUnsignedTextureComponent(0)) c.x = AddSat(c.x, c.x); // Correct for signed fractions 875 } 876 } 877 878 if(componentCount >= 2) 879 { 880 if(has16bitTextureComponents() && hasUnsignedTextureComponent(1)) 881 { 882 c0.y = As<UShort4>(c0.y) - MulHigh(As<UShort4>(c0.y), f0u) + MulHigh(As<UShort4>(c1.y), f0u); 883 c2.y = As<UShort4>(c2.y) - MulHigh(As<UShort4>(c2.y), f0u) + MulHigh(As<UShort4>(c3.y), f0u); 884 c.y = As<UShort4>(c0.y) - MulHigh(As<UShort4>(c0.y), f0v) + MulHigh(As<UShort4>(c2.y), f0v); 885 } 886 else 887 { 888 if(hasUnsignedTextureComponent(1)) 889 { 890 c0.y = MulHigh(As<UShort4>(c0.y), f1u1v); 891 c1.y = MulHigh(As<UShort4>(c1.y), f0u1v); 892 c2.y = MulHigh(As<UShort4>(c2.y), f1u0v); 893 c3.y = MulHigh(As<UShort4>(c3.y), f0u0v); 894 } 895 else 896 { 897 c0.y = MulHigh(c0.y, f1u1vs); 898 c1.y = MulHigh(c1.y, f0u1vs); 899 c2.y = MulHigh(c2.y, f1u0vs); 900 c3.y = MulHigh(c3.y, f0u0vs); 901 } 902 903 c.y = (c0.y + c1.y) + (c2.y + c3.y); 904 if(!hasUnsignedTextureComponent(1)) c.y = AddSat(c.y, c.y); // Correct for signed fractions 905 } 906 } 907 908 if(componentCount >= 3) 909 { 910 if(has16bitTextureComponents() && hasUnsignedTextureComponent(2)) 911 { 912 c0.z = As<UShort4>(c0.z) - MulHigh(As<UShort4>(c0.z), f0u) + MulHigh(As<UShort4>(c1.z), f0u); 913 c2.z = As<UShort4>(c2.z) - MulHigh(As<UShort4>(c2.z), f0u) + MulHigh(As<UShort4>(c3.z), f0u); 914 c.z = As<UShort4>(c0.z) - MulHigh(As<UShort4>(c0.z), f0v) + MulHigh(As<UShort4>(c2.z), f0v); 915 } 916 else 917 { 918 if(hasUnsignedTextureComponent(2)) 919 { 920 c0.z = MulHigh(As<UShort4>(c0.z), f1u1v); 921 c1.z = MulHigh(As<UShort4>(c1.z), f0u1v); 922 c2.z = MulHigh(As<UShort4>(c2.z), f1u0v); 923 c3.z = MulHigh(As<UShort4>(c3.z), f0u0v); 924 } 925 else 926 { 927 c0.z = MulHigh(c0.z, f1u1vs); 928 c1.z = MulHigh(c1.z, f0u1vs); 929 c2.z = MulHigh(c2.z, f1u0vs); 930 c3.z = MulHigh(c3.z, f0u0vs); 931 } 932 933 c.z = (c0.z + c1.z) + (c2.z + c3.z); 934 if(!hasUnsignedTextureComponent(2)) c.z = AddSat(c.z, c.z); // Correct for signed fractions 935 } 936 } 937 938 if(componentCount >= 4) 939 { 940 if(has16bitTextureComponents() && hasUnsignedTextureComponent(3)) 941 { 942 c0.w = As<UShort4>(c0.w) - MulHigh(As<UShort4>(c0.w), f0u) + MulHigh(As<UShort4>(c1.w), f0u); 943 c2.w = As<UShort4>(c2.w) - MulHigh(As<UShort4>(c2.w), f0u) + MulHigh(As<UShort4>(c3.w), f0u); 944 c.w = As<UShort4>(c0.w) - MulHigh(As<UShort4>(c0.w), f0v) + MulHigh(As<UShort4>(c2.w), f0v); 945 } 946 else 947 { 948 if(hasUnsignedTextureComponent(3)) 949 { 950 c0.w = MulHigh(As<UShort4>(c0.w), f1u1v); 951 c1.w = MulHigh(As<UShort4>(c1.w), f0u1v); 952 c2.w = MulHigh(As<UShort4>(c2.w), f1u0v); 953 c3.w = MulHigh(As<UShort4>(c3.w), f0u0v); 954 } 955 else 956 { 957 c0.w = MulHigh(c0.w, f1u1vs); 958 c1.w = MulHigh(c1.w, f0u1vs); 959 c2.w = MulHigh(c2.w, f1u0vs); 960 c3.w = MulHigh(c3.w, f0u0vs); 961 } 962 963 c.w = (c0.w + c1.w) + (c2.w + c3.w); 964 if(!hasUnsignedTextureComponent(3)) c.w = AddSat(c.w, c.w); // Correct for signed fractions 965 } 966 } 967 } 968 else 969 { 970 c.x = c1.x; 971 c.y = c2.x; 972 c.z = c3.x; 973 c.w = c0.x; 974 } 975 } 976 } 977 sample3D(Pointer<Byte> & texture,Vector4s & c_,Float4 & u_,Float4 & v_,Float4 & w_,Vector4f & offset,Float & lod,bool secondLOD,SamplerFunction function)978 void SamplerCore::sample3D(Pointer<Byte> &texture, Vector4s &c_, Float4 &u_, Float4 &v_, Float4 &w_, Vector4f &offset, Float &lod, bool secondLOD, SamplerFunction function) 979 { 980 int componentCount = textureComponentCount(); 981 982 Pointer<Byte> mipmap; 983 Pointer<Byte> buffer[4]; 984 Int face[4]; 985 986 selectMipmap(texture, buffer, mipmap, lod, face, secondLOD); 987 988 bool texelFetch = (function == Fetch); 989 990 Short4 uuuu = texelFetch ? Short4(As<Int4>(u_)) : address(u_, state.addressingModeU, mipmap); 991 Short4 vvvv = texelFetch ? Short4(As<Int4>(v_)) : address(v_, state.addressingModeV, mipmap); 992 Short4 wwww = texelFetch ? Short4(As<Int4>(w_)) : address(w_, state.addressingModeW, mipmap); 993 994 if(state.textureFilter == FILTER_POINT || texelFetch) 995 { 996 c_ = sampleTexel(uuuu, vvvv, wwww, offset, mipmap, buffer, function); 997 } 998 else 999 { 1000 Vector4s c[2][2][2]; 1001 1002 Short4 u[2][2][2]; 1003 Short4 v[2][2][2]; 1004 Short4 s[2][2][2]; 1005 1006 for(int i = 0; i < 2; i++) 1007 { 1008 for(int j = 0; j < 2; j++) 1009 { 1010 for(int k = 0; k < 2; k++) 1011 { 1012 u[i][j][k] = offsetSample(uuuu, mipmap, OFFSET(Mipmap,uHalf), state.addressingModeU == ADDRESSING_WRAP, i * 2 - 1, lod); 1013 v[i][j][k] = offsetSample(vvvv, mipmap, OFFSET(Mipmap,vHalf), state.addressingModeV == ADDRESSING_WRAP, j * 2 - 1, lod); 1014 s[i][j][k] = offsetSample(wwww, mipmap, OFFSET(Mipmap,wHalf), state.addressingModeW == ADDRESSING_WRAP, k * 2 - 1, lod); 1015 } 1016 } 1017 } 1018 1019 // Fractions 1020 UShort4 f0u = As<UShort4>(u[0][0][0]) * *Pointer<UShort4>(mipmap + OFFSET(Mipmap,width)); 1021 UShort4 f0v = As<UShort4>(v[0][0][0]) * *Pointer<UShort4>(mipmap + OFFSET(Mipmap,height)); 1022 UShort4 f0s = As<UShort4>(s[0][0][0]) * *Pointer<UShort4>(mipmap + OFFSET(Mipmap,depth)); 1023 1024 UShort4 f1u = ~f0u; 1025 UShort4 f1v = ~f0v; 1026 UShort4 f1s = ~f0s; 1027 1028 UShort4 f[2][2][2]; 1029 Short4 fs[2][2][2]; 1030 1031 f[1][1][1] = MulHigh(f1u, f1v); 1032 f[0][1][1] = MulHigh(f0u, f1v); 1033 f[1][0][1] = MulHigh(f1u, f0v); 1034 f[0][0][1] = MulHigh(f0u, f0v); 1035 f[1][1][0] = MulHigh(f1u, f1v); 1036 f[0][1][0] = MulHigh(f0u, f1v); 1037 f[1][0][0] = MulHigh(f1u, f0v); 1038 f[0][0][0] = MulHigh(f0u, f0v); 1039 1040 f[1][1][1] = MulHigh(f[1][1][1], f1s); 1041 f[0][1][1] = MulHigh(f[0][1][1], f1s); 1042 f[1][0][1] = MulHigh(f[1][0][1], f1s); 1043 f[0][0][1] = MulHigh(f[0][0][1], f1s); 1044 f[1][1][0] = MulHigh(f[1][1][0], f0s); 1045 f[0][1][0] = MulHigh(f[0][1][0], f0s); 1046 f[1][0][0] = MulHigh(f[1][0][0], f0s); 1047 f[0][0][0] = MulHigh(f[0][0][0], f0s); 1048 1049 // Signed fractions 1050 if(!hasUnsignedTextureComponent(0) || !hasUnsignedTextureComponent(1) || !hasUnsignedTextureComponent(2) || !hasUnsignedTextureComponent(3)) 1051 { 1052 fs[0][0][0] = f[0][0][0] >> 1; 1053 fs[0][0][1] = f[0][0][1] >> 1; 1054 fs[0][1][0] = f[0][1][0] >> 1; 1055 fs[0][1][1] = f[0][1][1] >> 1; 1056 fs[1][0][0] = f[1][0][0] >> 1; 1057 fs[1][0][1] = f[1][0][1] >> 1; 1058 fs[1][1][0] = f[1][1][0] >> 1; 1059 fs[1][1][1] = f[1][1][1] >> 1; 1060 } 1061 1062 for(int i = 0; i < 2; i++) 1063 { 1064 for(int j = 0; j < 2; j++) 1065 { 1066 for(int k = 0; k < 2; k++) 1067 { 1068 c[i][j][k] = sampleTexel(u[i][j][k], v[i][j][k], s[i][j][k], offset, mipmap, buffer, function); 1069 1070 if(componentCount >= 1) { if(hasUnsignedTextureComponent(0)) c[i][j][k].x = MulHigh(As<UShort4>(c[i][j][k].x), f[1 - i][1 - j][1 - k]); else c[i][j][k].x = MulHigh(c[i][j][k].x, fs[1 - i][1 - j][1 - k]); } 1071 if(componentCount >= 2) { if(hasUnsignedTextureComponent(1)) c[i][j][k].y = MulHigh(As<UShort4>(c[i][j][k].y), f[1 - i][1 - j][1 - k]); else c[i][j][k].y = MulHigh(c[i][j][k].y, fs[1 - i][1 - j][1 - k]); } 1072 if(componentCount >= 3) { if(hasUnsignedTextureComponent(2)) c[i][j][k].z = MulHigh(As<UShort4>(c[i][j][k].z), f[1 - i][1 - j][1 - k]); else c[i][j][k].z = MulHigh(c[i][j][k].z, fs[1 - i][1 - j][1 - k]); } 1073 if(componentCount >= 4) { if(hasUnsignedTextureComponent(3)) c[i][j][k].w = MulHigh(As<UShort4>(c[i][j][k].w), f[1 - i][1 - j][1 - k]); else c[i][j][k].w = MulHigh(c[i][j][k].w, fs[1 - i][1 - j][1 - k]); } 1074 1075 if(i != 0 || j != 0 || k != 0) 1076 { 1077 if(componentCount >= 1) c[0][0][0].x += c[i][j][k].x; 1078 if(componentCount >= 2) c[0][0][0].y += c[i][j][k].y; 1079 if(componentCount >= 3) c[0][0][0].z += c[i][j][k].z; 1080 if(componentCount >= 4) c[0][0][0].w += c[i][j][k].w; 1081 } 1082 } 1083 } 1084 } 1085 1086 if(componentCount >= 1) c_.x = c[0][0][0].x; 1087 if(componentCount >= 2) c_.y = c[0][0][0].y; 1088 if(componentCount >= 3) c_.z = c[0][0][0].z; 1089 if(componentCount >= 4) c_.w = c[0][0][0].w; 1090 1091 // Correct for signed fractions 1092 if(componentCount >= 1) if(!hasUnsignedTextureComponent(0)) c_.x = AddSat(c_.x, c_.x); 1093 if(componentCount >= 2) if(!hasUnsignedTextureComponent(1)) c_.y = AddSat(c_.y, c_.y); 1094 if(componentCount >= 3) if(!hasUnsignedTextureComponent(2)) c_.z = AddSat(c_.z, c_.z); 1095 if(componentCount >= 4) if(!hasUnsignedTextureComponent(3)) c_.w = AddSat(c_.w, c_.w); 1096 } 1097 } 1098 sampleFloatFilter(Pointer<Byte> & texture,Vector4f & c,Float4 & u,Float4 & v,Float4 & w,Vector4f & offset,Float & lod,Float & anisotropy,Float4 & uDelta,Float4 & vDelta,Int face[4],SamplerFunction function)1099 void SamplerCore::sampleFloatFilter(Pointer<Byte> &texture, Vector4f &c, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, Int face[4], SamplerFunction function) 1100 { 1101 sampleFloatAniso(texture, c, u, v, w, offset, lod, anisotropy, uDelta, vDelta, face, false, function); 1102 1103 if(function == Fetch) 1104 { 1105 return; 1106 } 1107 1108 if(state.mipmapFilter > MIPMAP_POINT) 1109 { 1110 Vector4f cc; 1111 1112 sampleFloatAniso(texture, cc, u, v, w, offset, lod, anisotropy, uDelta, vDelta, face, true, function); 1113 1114 Float4 lod4 = Float4(Frac(lod)); 1115 1116 c.x = (cc.x - c.x) * lod4 + c.x; 1117 c.y = (cc.y - c.y) * lod4 + c.y; 1118 c.z = (cc.z - c.z) * lod4 + c.z; 1119 c.w = (cc.w - c.w) * lod4 + c.w; 1120 } 1121 1122 Int4 borderMask; 1123 1124 if(state.addressingModeU == ADDRESSING_BORDER) 1125 { 1126 Int4 u0; 1127 1128 border(u0, u); 1129 1130 borderMask = u0; 1131 } 1132 1133 if(state.addressingModeV == ADDRESSING_BORDER) 1134 { 1135 Int4 v0; 1136 1137 border(v0, v); 1138 1139 if(state.addressingModeU == ADDRESSING_BORDER) 1140 { 1141 borderMask &= v0; 1142 } 1143 else 1144 { 1145 borderMask = v0; 1146 } 1147 } 1148 1149 if(state.addressingModeW == ADDRESSING_BORDER && state.textureType == TEXTURE_3D) 1150 { 1151 Int4 s0; 1152 1153 border(s0, w); 1154 1155 if(state.addressingModeU == ADDRESSING_BORDER || 1156 state.addressingModeV == ADDRESSING_BORDER) 1157 { 1158 borderMask &= s0; 1159 } 1160 else 1161 { 1162 borderMask = s0; 1163 } 1164 } 1165 1166 if(state.addressingModeU == ADDRESSING_BORDER || 1167 state.addressingModeV == ADDRESSING_BORDER || 1168 (state.addressingModeW == ADDRESSING_BORDER && state.textureType == TEXTURE_3D)) 1169 { 1170 Int4 b; 1171 1172 c.x = As<Float4>((borderMask & As<Int4>(c.x)) | (~borderMask & *Pointer<Int4>(texture + OFFSET(Texture,borderColorF[0])))); 1173 c.y = As<Float4>((borderMask & As<Int4>(c.y)) | (~borderMask & *Pointer<Int4>(texture + OFFSET(Texture,borderColorF[1])))); 1174 c.z = As<Float4>((borderMask & As<Int4>(c.z)) | (~borderMask & *Pointer<Int4>(texture + OFFSET(Texture,borderColorF[2])))); 1175 c.w = As<Float4>((borderMask & As<Int4>(c.w)) | (~borderMask & *Pointer<Int4>(texture + OFFSET(Texture,borderColorF[3])))); 1176 } 1177 } 1178 sampleFloatAniso(Pointer<Byte> & texture,Vector4f & c,Float4 & u,Float4 & v,Float4 & w,Vector4f & offset,Float & lod,Float & anisotropy,Float4 & uDelta,Float4 & vDelta,Int face[4],bool secondLOD,SamplerFunction function)1179 void SamplerCore::sampleFloatAniso(Pointer<Byte> &texture, Vector4f &c, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, Int face[4], bool secondLOD, SamplerFunction function) 1180 { 1181 if(state.textureFilter != FILTER_ANISOTROPIC || function == Lod || function == Fetch) 1182 { 1183 sampleFloat(texture, c, u, v, w, offset, lod, face, secondLOD, function); 1184 } 1185 else 1186 { 1187 Int a = RoundInt(anisotropy); 1188 1189 Vector4f cSum; 1190 1191 cSum.x = Float4(0.0f); 1192 cSum.y = Float4(0.0f); 1193 cSum.z = Float4(0.0f); 1194 cSum.w = Float4(0.0f); 1195 1196 Float4 A = *Pointer<Float4>(constants + OFFSET(Constants,uvWeight) + 16 * a); 1197 Float4 B = *Pointer<Float4>(constants + OFFSET(Constants,uvStart) + 16 * a); 1198 1199 Float4 du = uDelta; 1200 Float4 dv = vDelta; 1201 1202 Float4 u0 = u + B * du; 1203 Float4 v0 = v + B * dv; 1204 1205 du *= A; 1206 dv *= A; 1207 1208 Int i = 0; 1209 1210 Do 1211 { 1212 sampleFloat(texture, c, u0, v0, w, offset, lod, face, secondLOD, function); 1213 1214 u0 += du; 1215 v0 += dv; 1216 1217 cSum.x += c.x * A; 1218 cSum.y += c.y * A; 1219 cSum.z += c.z * A; 1220 cSum.w += c.w * A; 1221 1222 i++; 1223 } 1224 Until(i >= a) 1225 1226 c.x = cSum.x; 1227 c.y = cSum.y; 1228 c.z = cSum.z; 1229 c.w = cSum.w; 1230 } 1231 } 1232 sampleFloat(Pointer<Byte> & texture,Vector4f & c,Float4 & u,Float4 & v,Float4 & w,Vector4f & offset,Float & lod,Int face[4],bool secondLOD,SamplerFunction function)1233 void SamplerCore::sampleFloat(Pointer<Byte> &texture, Vector4f &c, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, Float &lod, Int face[4], bool secondLOD, SamplerFunction function) 1234 { 1235 if(state.textureType != TEXTURE_3D) 1236 { 1237 sampleFloat2D(texture, c, u, v, w, offset, lod, face, secondLOD, function); 1238 } 1239 else 1240 { 1241 sampleFloat3D(texture, c, u, v, w, offset, lod, secondLOD, function); 1242 } 1243 } 1244 sampleFloat2D(Pointer<Byte> & texture,Vector4f & c,Float4 & u,Float4 & v,Float4 & w,Vector4f & offset,Float & lod,Int face[4],bool secondLOD,SamplerFunction function)1245 void SamplerCore::sampleFloat2D(Pointer<Byte> &texture, Vector4f &c, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, Float &lod, Int face[4], bool secondLOD, SamplerFunction function) 1246 { 1247 int componentCount = textureComponentCount(); 1248 bool gather = state.textureFilter == FILTER_GATHER; 1249 1250 Pointer<Byte> mipmap; 1251 Pointer<Byte> buffer[4]; 1252 1253 selectMipmap(texture, buffer, mipmap, lod, face, secondLOD); 1254 1255 Int4 x0, x1, y0, y1, z0; 1256 Float4 fu, fv; 1257 Int4 filter = computeFilterOffset(lod); 1258 address(w, z0, z0, fv, mipmap, offset.z, filter, OFFSET(Mipmap, depth), state.addressingModeW, function); 1259 address(v, y0, y1, fv, mipmap, offset.y, filter, OFFSET(Mipmap, height), state.addressingModeV, function); 1260 address(u, x0, x1, fu, mipmap, offset.x, filter, OFFSET(Mipmap, width), state.addressingModeU, function); 1261 1262 Int4 pitchP = *Pointer<Int4>(mipmap + OFFSET(Mipmap, pitchP), 16); 1263 y0 *= pitchP; 1264 if(hasThirdCoordinate()) 1265 { 1266 Int4 sliceP = *Pointer<Int4>(mipmap + OFFSET(Mipmap, sliceP), 16); 1267 z0 *= sliceP; 1268 } 1269 1270 if(state.textureFilter == FILTER_POINT || (function == Fetch)) 1271 { 1272 c = sampleTexel(x0, y0, z0, w, mipmap, buffer, function); 1273 } 1274 else 1275 { 1276 y1 *= pitchP; 1277 1278 Vector4f c0 = sampleTexel(x0, y0, z0, w, mipmap, buffer, function); 1279 Vector4f c1 = sampleTexel(x1, y0, z0, w, mipmap, buffer, function); 1280 Vector4f c2 = sampleTexel(x0, y1, z0, w, mipmap, buffer, function); 1281 Vector4f c3 = sampleTexel(x1, y1, z0, w, mipmap, buffer, function); 1282 1283 if(!gather) // Blend 1284 { 1285 if(componentCount >= 1) c0.x = c0.x + fu * (c1.x - c0.x); 1286 if(componentCount >= 2) c0.y = c0.y + fu * (c1.y - c0.y); 1287 if(componentCount >= 3) c0.z = c0.z + fu * (c1.z - c0.z); 1288 if(componentCount >= 4) c0.w = c0.w + fu * (c1.w - c0.w); 1289 1290 if(componentCount >= 1) c2.x = c2.x + fu * (c3.x - c2.x); 1291 if(componentCount >= 2) c2.y = c2.y + fu * (c3.y - c2.y); 1292 if(componentCount >= 3) c2.z = c2.z + fu * (c3.z - c2.z); 1293 if(componentCount >= 4) c2.w = c2.w + fu * (c3.w - c2.w); 1294 1295 if(componentCount >= 1) c.x = c0.x + fv * (c2.x - c0.x); 1296 if(componentCount >= 2) c.y = c0.y + fv * (c2.y - c0.y); 1297 if(componentCount >= 3) c.z = c0.z + fv * (c2.z - c0.z); 1298 if(componentCount >= 4) c.w = c0.w + fv * (c2.w - c0.w); 1299 } 1300 else 1301 { 1302 c.x = c1.x; 1303 c.y = c2.x; 1304 c.z = c3.x; 1305 c.w = c0.x; 1306 } 1307 } 1308 } 1309 sampleFloat3D(Pointer<Byte> & texture,Vector4f & c,Float4 & u,Float4 & v,Float4 & w,Vector4f & offset,Float & lod,bool secondLOD,SamplerFunction function)1310 void SamplerCore::sampleFloat3D(Pointer<Byte> &texture, Vector4f &c, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, Float &lod, bool secondLOD, SamplerFunction function) 1311 { 1312 int componentCount = textureComponentCount(); 1313 1314 Pointer<Byte> mipmap; 1315 Pointer<Byte> buffer[4]; 1316 Int face[4]; 1317 1318 selectMipmap(texture, buffer, mipmap, lod, face, secondLOD); 1319 1320 Int4 x0, x1, y0, y1, z0, z1; 1321 Float4 fu, fv, fw; 1322 Int4 filter = computeFilterOffset(lod); 1323 address(u, x0, x1, fu, mipmap, offset.x, filter, OFFSET(Mipmap, width), state.addressingModeU, function); 1324 address(v, y0, y1, fv, mipmap, offset.y, filter, OFFSET(Mipmap, height), state.addressingModeV, function); 1325 address(w, z0, z1, fw, mipmap, offset.z, filter, OFFSET(Mipmap, depth), state.addressingModeW, function); 1326 1327 Int4 pitchP = *Pointer<Int4>(mipmap + OFFSET(Mipmap, pitchP), 16); 1328 Int4 sliceP = *Pointer<Int4>(mipmap + OFFSET(Mipmap, sliceP), 16); 1329 y0 *= pitchP; 1330 z0 *= sliceP; 1331 1332 if(state.textureFilter == FILTER_POINT || (function == Fetch)) 1333 { 1334 c = sampleTexel(x0, y0, z0, w, mipmap, buffer, function); 1335 } 1336 else 1337 { 1338 y1 *= pitchP; 1339 z1 *= sliceP; 1340 1341 Vector4f c0 = sampleTexel(x0, y0, z0, w, mipmap, buffer, function); 1342 Vector4f c1 = sampleTexel(x1, y0, z0, w, mipmap, buffer, function); 1343 Vector4f c2 = sampleTexel(x0, y1, z0, w, mipmap, buffer, function); 1344 Vector4f c3 = sampleTexel(x1, y1, z0, w, mipmap, buffer, function); 1345 Vector4f c4 = sampleTexel(x0, y0, z1, w, mipmap, buffer, function); 1346 Vector4f c5 = sampleTexel(x1, y0, z1, w, mipmap, buffer, function); 1347 Vector4f c6 = sampleTexel(x0, y1, z1, w, mipmap, buffer, function); 1348 Vector4f c7 = sampleTexel(x1, y1, z1, w, mipmap, buffer, function); 1349 1350 // Blend first slice 1351 if(componentCount >= 1) c0.x = c0.x + fu * (c1.x - c0.x); 1352 if(componentCount >= 2) c0.y = c0.y + fu * (c1.y - c0.y); 1353 if(componentCount >= 3) c0.z = c0.z + fu * (c1.z - c0.z); 1354 if(componentCount >= 4) c0.w = c0.w + fu * (c1.w - c0.w); 1355 1356 if(componentCount >= 1) c2.x = c2.x + fu * (c3.x - c2.x); 1357 if(componentCount >= 2) c2.y = c2.y + fu * (c3.y - c2.y); 1358 if(componentCount >= 3) c2.z = c2.z + fu * (c3.z - c2.z); 1359 if(componentCount >= 4) c2.w = c2.w + fu * (c3.w - c2.w); 1360 1361 if(componentCount >= 1) c0.x = c0.x + fv * (c2.x - c0.x); 1362 if(componentCount >= 2) c0.y = c0.y + fv * (c2.y - c0.y); 1363 if(componentCount >= 3) c0.z = c0.z + fv * (c2.z - c0.z); 1364 if(componentCount >= 4) c0.w = c0.w + fv * (c2.w - c0.w); 1365 1366 // Blend second slice 1367 if(componentCount >= 1) c4.x = c4.x + fu * (c5.x - c4.x); 1368 if(componentCount >= 2) c4.y = c4.y + fu * (c5.y - c4.y); 1369 if(componentCount >= 3) c4.z = c4.z + fu * (c5.z - c4.z); 1370 if(componentCount >= 4) c4.w = c4.w + fu * (c5.w - c4.w); 1371 1372 if(componentCount >= 1) c6.x = c6.x + fu * (c7.x - c6.x); 1373 if(componentCount >= 2) c6.y = c6.y + fu * (c7.y - c6.y); 1374 if(componentCount >= 3) c6.z = c6.z + fu * (c7.z - c6.z); 1375 if(componentCount >= 4) c6.w = c6.w + fu * (c7.w - c6.w); 1376 1377 if(componentCount >= 1) c4.x = c4.x + fv * (c6.x - c4.x); 1378 if(componentCount >= 2) c4.y = c4.y + fv * (c6.y - c4.y); 1379 if(componentCount >= 3) c4.z = c4.z + fv * (c6.z - c4.z); 1380 if(componentCount >= 4) c4.w = c4.w + fv * (c6.w - c4.w); 1381 1382 // Blend slices 1383 if(componentCount >= 1) c.x = c0.x + fw * (c4.x - c0.x); 1384 if(componentCount >= 2) c.y = c0.y + fw * (c4.y - c0.y); 1385 if(componentCount >= 3) c.z = c0.z + fw * (c4.z - c0.z); 1386 if(componentCount >= 4) c.w = c0.w + fw * (c4.w - c0.w); 1387 } 1388 } 1389 log2sqrt(Float lod)1390 Float SamplerCore::log2sqrt(Float lod) 1391 { 1392 // log2(sqrt(lod)) // Equals 0.25 * log2(lod^2). 1393 lod *= lod; // Squaring doubles the exponent and produces an extra bit of precision. 1394 lod = Float(As<Int>(lod)) - Float(0x3F800000); // Interpret as integer and subtract the exponent bias. 1395 lod *= As<Float>(Int(0x33000000)); // Scale by 0.25 * 2^-23 (mantissa length). 1396 1397 return lod; 1398 } 1399 computeLod(Pointer<Byte> & texture,Float & lod,Float & anisotropy,Float4 & uDelta,Float4 & vDelta,Float4 & uuuu,Float4 & vvvv,const Float & lodBias,Vector4f & dsx,Vector4f & dsy,SamplerFunction function)1400 void SamplerCore::computeLod(Pointer<Byte> &texture, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, Float4 &uuuu, Float4 &vvvv, const Float &lodBias, Vector4f &dsx, Vector4f &dsy, SamplerFunction function) 1401 { 1402 if(function != Lod && function != Fetch) 1403 { 1404 Float4 duvdxy; 1405 1406 if(function != Grad) 1407 { 1408 duvdxy = Float4(uuuu.yz, vvvv.yz) - Float4(uuuu.xx, vvvv.xx); 1409 } 1410 else 1411 { 1412 Float4 dudxy = Float4(dsx.x.xx, dsy.x.xx); 1413 Float4 dvdxy = Float4(dsx.y.xx, dsy.y.xx); 1414 1415 duvdxy = Float4(dudxy.xz, dvdxy.xz); 1416 } 1417 1418 // Scale by texture dimensions and LOD 1419 Float4 dUVdxy = duvdxy * *Pointer<Float4>(texture + OFFSET(Texture,widthHeightLOD)); 1420 1421 Float4 dUV2dxy = dUVdxy * dUVdxy; 1422 Float4 dUV2 = dUV2dxy.xy + dUV2dxy.zw; 1423 1424 lod = Max(Float(dUV2.x), Float(dUV2.y)); // Square length of major axis 1425 1426 if(state.textureFilter == FILTER_ANISOTROPIC) 1427 { 1428 Float det = Abs(Float(dUVdxy.x) * Float(dUVdxy.w) - Float(dUVdxy.y) * Float(dUVdxy.z)); 1429 1430 Float4 dudx = duvdxy.xxxx; 1431 Float4 dudy = duvdxy.yyyy; 1432 Float4 dvdx = duvdxy.zzzz; 1433 Float4 dvdy = duvdxy.wwww; 1434 1435 Int4 mask = As<Int4>(CmpNLT(dUV2.x, dUV2.y)); 1436 uDelta = As<Float4>((As<Int4>(dudx) & mask) | ((As<Int4>(dudy) & ~mask))); 1437 vDelta = As<Float4>((As<Int4>(dvdx) & mask) | ((As<Int4>(dvdy) & ~mask))); 1438 1439 anisotropy = lod * Rcp_pp(det); 1440 anisotropy = Min(anisotropy, *Pointer<Float>(texture + OFFSET(Texture,maxAnisotropy))); 1441 1442 lod *= Rcp_pp(anisotropy * anisotropy); 1443 } 1444 1445 lod = log2sqrt(lod); // log2(sqrt(lod)) 1446 1447 if(function == Bias) 1448 { 1449 lod += lodBias; 1450 } 1451 } 1452 else 1453 { 1454 lod = lodBias + Float(*Pointer<Int>(texture + OFFSET(Texture,baseLevel))); 1455 } 1456 1457 lod = Max(lod, *Pointer<Float>(texture + OFFSET(Texture, minLod))); 1458 lod = Min(lod, *Pointer<Float>(texture + OFFSET(Texture, maxLod))); 1459 } 1460 computeLodCube(Pointer<Byte> & texture,Float & lod,Float4 & u,Float4 & v,Float4 & s,const Float & lodBias,Vector4f & dsx,Vector4f & dsy,SamplerFunction function)1461 void SamplerCore::computeLodCube(Pointer<Byte> &texture, Float &lod, Float4 &u, Float4 &v, Float4 &s, const Float &lodBias, Vector4f &dsx, Vector4f &dsy, SamplerFunction function) 1462 { 1463 if(function != Lod && function != Fetch) 1464 { 1465 if(function != Grad) 1466 { 1467 Float4 dudxy = u.ywyw - u; 1468 Float4 dvdxy = v.ywyw - v; 1469 Float4 dsdxy = s.ywyw - s; 1470 1471 // Scale by texture dimensions and LOD 1472 dudxy *= *Pointer<Float4>(texture + OFFSET(Texture,widthLOD)); 1473 dvdxy *= *Pointer<Float4>(texture + OFFSET(Texture,widthLOD)); 1474 dsdxy *= *Pointer<Float4>(texture + OFFSET(Texture,widthLOD)); 1475 1476 dudxy *= dudxy; 1477 dvdxy *= dvdxy; 1478 dsdxy *= dsdxy; 1479 1480 dudxy += dvdxy; 1481 dudxy += dsdxy; 1482 1483 lod = Max(Float(dudxy.x), Float(dudxy.y)); // FIXME: Max(dudxy.x, dudxy.y); 1484 } 1485 else 1486 { 1487 Float4 dudxy = Float4(dsx.x.xx, dsy.x.xx); 1488 Float4 dvdxy = Float4(dsx.y.xx, dsy.y.xx); 1489 1490 Float4 duvdxy = Float4(dudxy.xz, dvdxy.xz); 1491 1492 // Scale by texture dimensions and LOD 1493 Float4 dUVdxy = duvdxy * *Pointer<Float4>(texture + OFFSET(Texture,widthLOD)); 1494 1495 Float4 dUV2dxy = dUVdxy * dUVdxy; 1496 Float4 dUV2 = dUV2dxy.xy + dUV2dxy.zw; 1497 1498 lod = Max(Float(dUV2.x), Float(dUV2.y)); // Square length of major axis 1499 } 1500 1501 lod = log2sqrt(lod); // log2(sqrt(lod)) 1502 1503 if(function == Bias) 1504 { 1505 lod += lodBias; 1506 } 1507 } 1508 else 1509 { 1510 lod = lodBias + Float(*Pointer<Int>(texture + OFFSET(Texture,baseLevel))); 1511 } 1512 1513 lod = Max(lod, *Pointer<Float>(texture + OFFSET(Texture, minLod))); 1514 lod = Min(lod, *Pointer<Float>(texture + OFFSET(Texture, maxLod))); 1515 } 1516 computeLod3D(Pointer<Byte> & texture,Float & lod,Float4 & uuuu,Float4 & vvvv,Float4 & wwww,const Float & lodBias,Vector4f & dsx,Vector4f & dsy,SamplerFunction function)1517 void SamplerCore::computeLod3D(Pointer<Byte> &texture, Float &lod, Float4 &uuuu, Float4 &vvvv, Float4 &wwww, const Float &lodBias, Vector4f &dsx, Vector4f &dsy, SamplerFunction function) 1518 { 1519 if(state.mipmapFilter == MIPMAP_NONE) 1520 { 1521 } 1522 else // Point and linear filter 1523 { 1524 if(function != Lod && function != Fetch) 1525 { 1526 Float4 dudxy; 1527 Float4 dvdxy; 1528 Float4 dsdxy; 1529 1530 if(function != Grad) 1531 { 1532 dudxy = uuuu.ywyw - uuuu; 1533 dvdxy = vvvv.ywyw - vvvv; 1534 dsdxy = wwww.ywyw - wwww; 1535 } 1536 else 1537 { 1538 dudxy = dsx.x; 1539 dvdxy = dsx.y; 1540 dsdxy = dsx.z; 1541 1542 dudxy = Float4(dudxy.xx, dsy.x.xx); 1543 dvdxy = Float4(dvdxy.xx, dsy.y.xx); 1544 dsdxy = Float4(dsdxy.xx, dsy.z.xx); 1545 1546 dudxy = Float4(dudxy.xz, dudxy.xz); 1547 dvdxy = Float4(dvdxy.xz, dvdxy.xz); 1548 dsdxy = Float4(dsdxy.xz, dsdxy.xz); 1549 } 1550 1551 // Scale by texture dimensions and LOD 1552 dudxy *= *Pointer<Float4>(texture + OFFSET(Texture,widthLOD)); 1553 dvdxy *= *Pointer<Float4>(texture + OFFSET(Texture,heightLOD)); 1554 dsdxy *= *Pointer<Float4>(texture + OFFSET(Texture,depthLOD)); 1555 1556 dudxy *= dudxy; 1557 dvdxy *= dvdxy; 1558 dsdxy *= dsdxy; 1559 1560 dudxy += dvdxy; 1561 dudxy += dsdxy; 1562 1563 lod = Max(Float(dudxy.x), Float(dudxy.y)); // FIXME: Max(dudxy.x, dudxy.y); 1564 1565 lod = log2sqrt(lod); // log2(sqrt(lod)) 1566 1567 if(function == Bias) 1568 { 1569 lod += lodBias; 1570 } 1571 } 1572 else 1573 { 1574 lod = lodBias + Float(*Pointer<Int>(texture + OFFSET(Texture,baseLevel))); 1575 } 1576 1577 lod = Max(lod, *Pointer<Float>(texture + OFFSET(Texture, minLod))); 1578 lod = Min(lod, *Pointer<Float>(texture + OFFSET(Texture, maxLod))); 1579 } 1580 } 1581 cubeFace(Int face[4],Float4 & U,Float4 & V,Float4 & lodX,Float4 & lodY,Float4 & lodZ,Float4 & x,Float4 & y,Float4 & z)1582 void SamplerCore::cubeFace(Int face[4], Float4 &U, Float4 &V, Float4 &lodX, Float4 &lodY, Float4 &lodZ, Float4 &x, Float4 &y, Float4 &z) 1583 { 1584 Int4 xn = CmpLT(x, Float4(0.0f)); // x < 0 1585 Int4 yn = CmpLT(y, Float4(0.0f)); // y < 0 1586 Int4 zn = CmpLT(z, Float4(0.0f)); // z < 0 1587 1588 Float4 absX = Abs(x); 1589 Float4 absY = Abs(y); 1590 Float4 absZ = Abs(z); 1591 1592 Int4 xy = CmpNLE(absX, absY); // abs(x) > abs(y) 1593 Int4 yz = CmpNLE(absY, absZ); // abs(y) > abs(z) 1594 Int4 zx = CmpNLE(absZ, absX); // abs(z) > abs(x) 1595 Int4 xMajor = xy & ~zx; // abs(x) > abs(y) && abs(x) > abs(z) 1596 Int4 yMajor = yz & ~xy; // abs(y) > abs(z) && abs(y) > abs(x) 1597 Int4 zMajor = zx & ~yz; // abs(z) > abs(x) && abs(z) > abs(y) 1598 1599 // FACE_POSITIVE_X = 000b 1600 // FACE_NEGATIVE_X = 001b 1601 // FACE_POSITIVE_Y = 010b 1602 // FACE_NEGATIVE_Y = 011b 1603 // FACE_POSITIVE_Z = 100b 1604 // FACE_NEGATIVE_Z = 101b 1605 1606 Int yAxis = SignMask(yMajor); 1607 Int zAxis = SignMask(zMajor); 1608 1609 Int4 n = ((xn & xMajor) | (yn & yMajor) | (zn & zMajor)) & Int4(0x80000000); 1610 Int negative = SignMask(n); 1611 1612 face[0] = *Pointer<Int>(constants + OFFSET(Constants,transposeBit0) + negative * 4); 1613 face[0] |= *Pointer<Int>(constants + OFFSET(Constants,transposeBit1) + yAxis * 4); 1614 face[0] |= *Pointer<Int>(constants + OFFSET(Constants,transposeBit2) + zAxis * 4); 1615 face[1] = (face[0] >> 4) & 0x7; 1616 face[2] = (face[0] >> 8) & 0x7; 1617 face[3] = (face[0] >> 12) & 0x7; 1618 face[0] &= 0x7; 1619 1620 Float4 M = Max(Max(absX, absY), absZ); 1621 1622 // U = xMajor ? (neg ^ -z) : (zMajor & neg) ^ x) 1623 U = As<Float4>((xMajor & (n ^ As<Int4>(-z))) | (~xMajor & ((zMajor & n) ^ As<Int4>(x)))); 1624 1625 // V = !yMajor ? -y : (n ^ z) 1626 V = As<Float4>((~yMajor & As<Int4>(-y)) | (yMajor & (n ^ As<Int4>(z)))); 1627 1628 M = reciprocal(M) * Float4(0.5f); 1629 U = U * M + Float4(0.5f); 1630 V = V * M + Float4(0.5f); 1631 1632 lodX = x * M; 1633 lodY = y * M; 1634 lodZ = z * M; 1635 } 1636 applyOffset(Short4 & uvw,Float4 & offset,const Int4 & whd,AddressingMode mode)1637 Short4 SamplerCore::applyOffset(Short4 &uvw, Float4 &offset, const Int4 &whd, AddressingMode mode) 1638 { 1639 Int4 tmp = Int4(As<UShort4>(uvw)); 1640 tmp = tmp + As<Int4>(offset); 1641 1642 switch (mode) 1643 { 1644 case AddressingMode::ADDRESSING_WRAP: 1645 tmp = (tmp + whd * Int4(-MIN_PROGRAM_TEXEL_OFFSET)) % whd; 1646 break; 1647 case AddressingMode::ADDRESSING_CLAMP: 1648 case AddressingMode::ADDRESSING_MIRROR: 1649 case AddressingMode::ADDRESSING_MIRRORONCE: 1650 case AddressingMode::ADDRESSING_BORDER: // FIXME: Implement and test ADDRESSING_MIRROR, ADDRESSING_MIRRORONCE, ADDRESSING_BORDER 1651 tmp = Min(Max(tmp, Int4(0)), whd - Int4(1)); 1652 break; 1653 case ADDRESSING_TEXELFETCH: 1654 break; 1655 default: 1656 ASSERT(false); 1657 } 1658 1659 return As<Short4>(UShort4(tmp)); 1660 } 1661 computeIndices(UInt index[4],Short4 uuuu,Short4 vvvv,Short4 wwww,Vector4f & offset,const Pointer<Byte> & mipmap,SamplerFunction function)1662 void SamplerCore::computeIndices(UInt index[4], Short4 uuuu, Short4 vvvv, Short4 wwww, Vector4f &offset, const Pointer<Byte> &mipmap, SamplerFunction function) 1663 { 1664 bool texelFetch = (function == Fetch); 1665 bool hasOffset = (function.option == Offset); 1666 1667 if(!texelFetch) 1668 { 1669 uuuu = MulHigh(As<UShort4>(uuuu), *Pointer<UShort4>(mipmap + OFFSET(Mipmap, width))); 1670 vvvv = MulHigh(As<UShort4>(vvvv), *Pointer<UShort4>(mipmap + OFFSET(Mipmap, height))); 1671 } 1672 1673 if(hasOffset) 1674 { 1675 uuuu = applyOffset(uuuu, offset.x, Int4(*Pointer<UShort4>(mipmap + OFFSET(Mipmap, width))), texelFetch ? ADDRESSING_TEXELFETCH : state.addressingModeU); 1676 vvvv = applyOffset(vvvv, offset.y, Int4(*Pointer<UShort4>(mipmap + OFFSET(Mipmap, height))), texelFetch ? ADDRESSING_TEXELFETCH : state.addressingModeV); 1677 } 1678 1679 Short4 uuu2 = uuuu; 1680 uuuu = As<Short4>(UnpackLow(uuuu, vvvv)); 1681 uuu2 = As<Short4>(UnpackHigh(uuu2, vvvv)); 1682 uuuu = As<Short4>(MulAdd(uuuu, *Pointer<Short4>(mipmap + OFFSET(Mipmap,onePitchP)))); 1683 uuu2 = As<Short4>(MulAdd(uuu2, *Pointer<Short4>(mipmap + OFFSET(Mipmap,onePitchP)))); 1684 1685 if(hasThirdCoordinate()) 1686 { 1687 if(state.textureType != TEXTURE_2D_ARRAY) 1688 { 1689 if(!texelFetch) 1690 { 1691 wwww = MulHigh(As<UShort4>(wwww), *Pointer<UShort4>(mipmap + OFFSET(Mipmap, depth))); 1692 } 1693 if(hasOffset) 1694 { 1695 wwww = applyOffset(wwww, offset.z, Int4(*Pointer<UShort4>(mipmap + OFFSET(Mipmap, depth))), texelFetch ? ADDRESSING_TEXELFETCH : state.addressingModeW); 1696 } 1697 } 1698 Short4 www2 = wwww; 1699 wwww = As<Short4>(UnpackLow(wwww, Short4(0x0000))); 1700 www2 = As<Short4>(UnpackHigh(www2, Short4(0x0000))); 1701 wwww = As<Short4>(MulAdd(wwww, *Pointer<Short4>(mipmap + OFFSET(Mipmap,sliceP)))); 1702 www2 = As<Short4>(MulAdd(www2, *Pointer<Short4>(mipmap + OFFSET(Mipmap,sliceP)))); 1703 uuuu = As<Short4>(As<Int2>(uuuu) + As<Int2>(wwww)); 1704 uuu2 = As<Short4>(As<Int2>(uuu2) + As<Int2>(www2)); 1705 } 1706 1707 index[0] = Extract(As<Int2>(uuuu), 0); 1708 index[1] = Extract(As<Int2>(uuuu), 1); 1709 index[2] = Extract(As<Int2>(uuu2), 0); 1710 index[3] = Extract(As<Int2>(uuu2), 1); 1711 1712 if(texelFetch) 1713 { 1714 Int size = Int(*Pointer<Int>(mipmap + OFFSET(Mipmap, sliceP))); 1715 if(hasThirdCoordinate()) 1716 { 1717 size *= Int(*Pointer<Short>(mipmap + OFFSET(Mipmap, depth))); 1718 } 1719 UInt min = 0; 1720 UInt max = size - 1; 1721 1722 for(int i = 0; i < 4; i++) 1723 { 1724 index[i] = Min(Max(index[i], min), max); 1725 } 1726 } 1727 } 1728 computeIndices(UInt index[4],Int4 & uuuu,Int4 & vvvv,Int4 & wwww,const Pointer<Byte> & mipmap,SamplerFunction function)1729 void SamplerCore::computeIndices(UInt index[4], Int4& uuuu, Int4& vvvv, Int4& wwww, const Pointer<Byte> &mipmap, SamplerFunction function) 1730 { 1731 UInt4 indices = uuuu + vvvv; 1732 1733 if(hasThirdCoordinate()) 1734 { 1735 indices += As<UInt4>(wwww); 1736 } 1737 1738 for(int i = 0; i < 4; i++) 1739 { 1740 index[i] = Extract(As<Int4>(indices), i); 1741 } 1742 } 1743 sampleTexel(UInt index[4],Pointer<Byte> buffer[4])1744 Vector4s SamplerCore::sampleTexel(UInt index[4], Pointer<Byte> buffer[4]) 1745 { 1746 Vector4s c; 1747 1748 int f0 = state.textureType == TEXTURE_CUBE ? 0 : 0; 1749 int f1 = state.textureType == TEXTURE_CUBE ? 1 : 0; 1750 int f2 = state.textureType == TEXTURE_CUBE ? 2 : 0; 1751 int f3 = state.textureType == TEXTURE_CUBE ? 3 : 0; 1752 1753 if(has16bitTextureFormat()) 1754 { 1755 c.x = Insert(c.x, Pointer<Short>(buffer[f0])[index[0]], 0); 1756 c.x = Insert(c.x, Pointer<Short>(buffer[f1])[index[1]], 1); 1757 c.x = Insert(c.x, Pointer<Short>(buffer[f2])[index[2]], 2); 1758 c.x = Insert(c.x, Pointer<Short>(buffer[f3])[index[3]], 3); 1759 1760 switch(state.textureFormat) 1761 { 1762 case FORMAT_R5G6B5: 1763 c.z = (c.x & Short4(0x001Fu)) << 11; 1764 c.y = (c.x & Short4(0x07E0u)) << 5; 1765 c.x = (c.x & Short4(0xF800u)); 1766 break; 1767 default: 1768 ASSERT(false); 1769 } 1770 } 1771 else if(has8bitTextureComponents()) 1772 { 1773 switch(textureComponentCount()) 1774 { 1775 case 4: 1776 { 1777 Byte4 c0 = Pointer<Byte4>(buffer[f0])[index[0]]; 1778 Byte4 c1 = Pointer<Byte4>(buffer[f1])[index[1]]; 1779 Byte4 c2 = Pointer<Byte4>(buffer[f2])[index[2]]; 1780 Byte4 c3 = Pointer<Byte4>(buffer[f3])[index[3]]; 1781 c.x = Unpack(c0, c1); 1782 c.y = Unpack(c2, c3); 1783 1784 switch(state.textureFormat) 1785 { 1786 case FORMAT_A8R8G8B8: 1787 c.z = As<Short4>(UnpackLow(c.x, c.y)); 1788 c.x = As<Short4>(UnpackHigh(c.x, c.y)); 1789 c.y = c.z; 1790 c.w = c.x; 1791 c.z = UnpackLow(As<Byte8>(c.z), As<Byte8>(c.z)); 1792 c.y = UnpackHigh(As<Byte8>(c.y), As<Byte8>(c.y)); 1793 c.x = UnpackLow(As<Byte8>(c.x), As<Byte8>(c.x)); 1794 c.w = UnpackHigh(As<Byte8>(c.w), As<Byte8>(c.w)); 1795 break; 1796 case FORMAT_A8B8G8R8: 1797 case FORMAT_A8B8G8R8I: 1798 case FORMAT_A8B8G8R8I_SNORM: 1799 case FORMAT_Q8W8V8U8: 1800 case FORMAT_SRGB8_A8: 1801 c.z = As<Short4>(UnpackHigh(c.x, c.y)); 1802 c.x = As<Short4>(UnpackLow(c.x, c.y)); 1803 c.y = c.x; 1804 c.w = c.z; 1805 c.x = UnpackLow(As<Byte8>(c.x), As<Byte8>(c.x)); 1806 c.y = UnpackHigh(As<Byte8>(c.y), As<Byte8>(c.y)); 1807 c.z = UnpackLow(As<Byte8>(c.z), As<Byte8>(c.z)); 1808 c.w = UnpackHigh(As<Byte8>(c.w), As<Byte8>(c.w)); 1809 // Propagate sign bit 1810 if(state.textureFormat == FORMAT_A8B8G8R8I) 1811 { 1812 c.x >>= 8; 1813 c.y >>= 8; 1814 c.z >>= 8; 1815 c.w >>= 8; 1816 } 1817 break; 1818 case FORMAT_A8B8G8R8UI: 1819 c.z = As<Short4>(UnpackHigh(c.x, c.y)); 1820 c.x = As<Short4>(UnpackLow(c.x, c.y)); 1821 c.y = c.x; 1822 c.w = c.z; 1823 c.x = UnpackLow(As<Byte8>(c.x), As<Byte8>(Short4(0))); 1824 c.y = UnpackHigh(As<Byte8>(c.y), As<Byte8>(Short4(0))); 1825 c.z = UnpackLow(As<Byte8>(c.z), As<Byte8>(Short4(0))); 1826 c.w = UnpackHigh(As<Byte8>(c.w), As<Byte8>(Short4(0))); 1827 break; 1828 default: 1829 ASSERT(false); 1830 } 1831 } 1832 break; 1833 case 3: 1834 { 1835 Byte4 c0 = Pointer<Byte4>(buffer[f0])[index[0]]; 1836 Byte4 c1 = Pointer<Byte4>(buffer[f1])[index[1]]; 1837 Byte4 c2 = Pointer<Byte4>(buffer[f2])[index[2]]; 1838 Byte4 c3 = Pointer<Byte4>(buffer[f3])[index[3]]; 1839 c.x = Unpack(c0, c1); 1840 c.y = Unpack(c2, c3); 1841 1842 switch(state.textureFormat) 1843 { 1844 case FORMAT_X8R8G8B8: 1845 c.z = As<Short4>(UnpackLow(c.x, c.y)); 1846 c.x = As<Short4>(UnpackHigh(c.x, c.y)); 1847 c.y = c.z; 1848 c.z = UnpackLow(As<Byte8>(c.z), As<Byte8>(c.z)); 1849 c.y = UnpackHigh(As<Byte8>(c.y), As<Byte8>(c.y)); 1850 c.x = UnpackLow(As<Byte8>(c.x), As<Byte8>(c.x)); 1851 break; 1852 case FORMAT_X8B8G8R8I_SNORM: 1853 case FORMAT_X8B8G8R8I: 1854 case FORMAT_X8B8G8R8: 1855 case FORMAT_X8L8V8U8: 1856 case FORMAT_SRGB8_X8: 1857 c.z = As<Short4>(UnpackHigh(c.x, c.y)); 1858 c.x = As<Short4>(UnpackLow(c.x, c.y)); 1859 c.y = c.x; 1860 c.x = UnpackLow(As<Byte8>(c.x), As<Byte8>(c.x)); 1861 c.y = UnpackHigh(As<Byte8>(c.y), As<Byte8>(c.y)); 1862 c.z = UnpackLow(As<Byte8>(c.z), As<Byte8>(c.z)); 1863 // Propagate sign bit 1864 if(state.textureFormat == FORMAT_X8B8G8R8I) 1865 { 1866 c.x >>= 8; 1867 c.y >>= 8; 1868 c.z >>= 8; 1869 } 1870 break; 1871 case FORMAT_X8B8G8R8UI: 1872 c.z = As<Short4>(UnpackHigh(c.x, c.y)); 1873 c.x = As<Short4>(UnpackLow(c.x, c.y)); 1874 c.y = c.x; 1875 c.x = UnpackLow(As<Byte8>(c.x), As<Byte8>(Short4(0))); 1876 c.y = UnpackHigh(As<Byte8>(c.y), As<Byte8>(Short4(0))); 1877 c.z = UnpackLow(As<Byte8>(c.z), As<Byte8>(Short4(0))); 1878 break; 1879 default: 1880 ASSERT(false); 1881 } 1882 } 1883 break; 1884 case 2: 1885 c.x = Insert(c.x, Pointer<Short>(buffer[f0])[index[0]], 0); 1886 c.x = Insert(c.x, Pointer<Short>(buffer[f1])[index[1]], 1); 1887 c.x = Insert(c.x, Pointer<Short>(buffer[f2])[index[2]], 2); 1888 c.x = Insert(c.x, Pointer<Short>(buffer[f3])[index[3]], 3); 1889 1890 switch(state.textureFormat) 1891 { 1892 case FORMAT_G8R8: 1893 case FORMAT_G8R8I_SNORM: 1894 case FORMAT_V8U8: 1895 case FORMAT_A8L8: 1896 c.y = (c.x & Short4(0xFF00u)) | As<Short4>(As<UShort4>(c.x) >> 8); 1897 c.x = (c.x & Short4(0x00FFu)) | (c.x << 8); 1898 break; 1899 case FORMAT_G8R8I: 1900 c.y = c.x >> 8; 1901 c.x = (c.x << 8) >> 8; // Propagate sign bit 1902 break; 1903 case FORMAT_G8R8UI: 1904 c.y = As<Short4>(As<UShort4>(c.x) >> 8); 1905 c.x &= Short4(0x00FFu); 1906 break; 1907 default: 1908 ASSERT(false); 1909 } 1910 break; 1911 case 1: 1912 { 1913 Int c0 = Int(*Pointer<Byte>(buffer[f0] + index[0])); 1914 Int c1 = Int(*Pointer<Byte>(buffer[f1] + index[1])); 1915 Int c2 = Int(*Pointer<Byte>(buffer[f2] + index[2])); 1916 Int c3 = Int(*Pointer<Byte>(buffer[f3] + index[3])); 1917 c0 = c0 | (c1 << 8) | (c2 << 16) | (c3 << 24); 1918 1919 switch(state.textureFormat) 1920 { 1921 case FORMAT_R8I: 1922 case FORMAT_R8UI: 1923 { 1924 Int zero(0); 1925 c.x = Unpack(As<Byte4>(c0), As<Byte4>(zero)); 1926 // Propagate sign bit 1927 if(state.textureFormat == FORMAT_R8I) 1928 { 1929 c.x = (c.x << 8) >> 8; 1930 } 1931 } 1932 break; 1933 default: 1934 c.x = Unpack(As<Byte4>(c0)); 1935 break; 1936 } 1937 } 1938 break; 1939 default: 1940 ASSERT(false); 1941 } 1942 } 1943 else if(has16bitTextureComponents()) 1944 { 1945 switch(textureComponentCount()) 1946 { 1947 case 4: 1948 c.x = Pointer<Short4>(buffer[f0])[index[0]]; 1949 c.y = Pointer<Short4>(buffer[f1])[index[1]]; 1950 c.z = Pointer<Short4>(buffer[f2])[index[2]]; 1951 c.w = Pointer<Short4>(buffer[f3])[index[3]]; 1952 transpose4x4(c.x, c.y, c.z, c.w); 1953 break; 1954 case 2: 1955 c.x = *Pointer<Short4>(buffer[f0] + 4 * index[0]); 1956 c.x = As<Short4>(UnpackLow(c.x, *Pointer<Short4>(buffer[f1] + 4 * index[1]))); 1957 c.z = *Pointer<Short4>(buffer[f2] + 4 * index[2]); 1958 c.z = As<Short4>(UnpackLow(c.z, *Pointer<Short4>(buffer[f3] + 4 * index[3]))); 1959 c.y = c.x; 1960 c.x = UnpackLow(As<Int2>(c.x), As<Int2>(c.z)); 1961 c.y = UnpackHigh(As<Int2>(c.y), As<Int2>(c.z)); 1962 break; 1963 case 1: 1964 c.x = Insert(c.x, Pointer<Short>(buffer[f0])[index[0]], 0); 1965 c.x = Insert(c.x, Pointer<Short>(buffer[f1])[index[1]], 1); 1966 c.x = Insert(c.x, Pointer<Short>(buffer[f2])[index[2]], 2); 1967 c.x = Insert(c.x, Pointer<Short>(buffer[f3])[index[3]], 3); 1968 break; 1969 default: 1970 ASSERT(false); 1971 } 1972 } 1973 else ASSERT(false); 1974 1975 return c; 1976 } 1977 sampleTexel(Short4 & uuuu,Short4 & vvvv,Short4 & wwww,Vector4f & offset,Pointer<Byte> & mipmap,Pointer<Byte> buffer[4],SamplerFunction function)1978 Vector4s SamplerCore::sampleTexel(Short4 &uuuu, Short4 &vvvv, Short4 &wwww, Vector4f &offset, Pointer<Byte> &mipmap, Pointer<Byte> buffer[4], SamplerFunction function) 1979 { 1980 Vector4s c; 1981 1982 UInt index[4]; 1983 computeIndices(index, uuuu, vvvv, wwww, offset, mipmap, function); 1984 1985 if(hasYuvFormat()) 1986 { 1987 // Generic YPbPr to RGB transformation 1988 // R = Y + 2 * (1 - Kr) * Pr 1989 // G = Y - 2 * Kb * (1 - Kb) / Kg * Pb - 2 * Kr * (1 - Kr) / Kg * Pr 1990 // B = Y + 2 * (1 - Kb) * Pb 1991 1992 float Kb = 0.114f; 1993 float Kr = 0.299f; 1994 int studioSwing = 1; 1995 1996 switch(state.textureFormat) 1997 { 1998 case FORMAT_YV12_BT601: 1999 Kb = 0.114f; 2000 Kr = 0.299f; 2001 studioSwing = 1; 2002 break; 2003 case FORMAT_YV12_BT709: 2004 Kb = 0.0722f; 2005 Kr = 0.2126f; 2006 studioSwing = 1; 2007 break; 2008 case FORMAT_YV12_JFIF: 2009 Kb = 0.114f; 2010 Kr = 0.299f; 2011 studioSwing = 0; 2012 break; 2013 default: 2014 ASSERT(false); 2015 } 2016 2017 const float Kg = 1.0f - Kr - Kb; 2018 2019 const float Rr = 2 * (1 - Kr); 2020 const float Gb = -2 * Kb * (1 - Kb) / Kg; 2021 const float Gr = -2 * Kr * (1 - Kr) / Kg; 2022 const float Bb = 2 * (1 - Kb); 2023 2024 // Scaling and bias for studio-swing range: Y = [16 .. 235], U/V = [16 .. 240] 2025 const float Yy = studioSwing ? 255.0f / (235 - 16) : 1.0f; 2026 const float Uu = studioSwing ? 255.0f / (240 - 16) : 1.0f; 2027 const float Vv = studioSwing ? 255.0f / (240 - 16) : 1.0f; 2028 2029 const float Rv = Vv * Rr; 2030 const float Gu = Uu * Gb; 2031 const float Gv = Vv * Gr; 2032 const float Bu = Uu * Bb; 2033 2034 const float R0 = (studioSwing * -16 * Yy - 128 * Rv) / 255; 2035 const float G0 = (studioSwing * -16 * Yy - 128 * Gu - 128 * Gv) / 255; 2036 const float B0 = (studioSwing * -16 * Yy - 128 * Bu) / 255; 2037 2038 Int c0 = Int(buffer[0][index[0]]); 2039 Int c1 = Int(buffer[0][index[1]]); 2040 Int c2 = Int(buffer[0][index[2]]); 2041 Int c3 = Int(buffer[0][index[3]]); 2042 c0 = c0 | (c1 << 8) | (c2 << 16) | (c3 << 24); 2043 UShort4 Y = As<UShort4>(Unpack(As<Byte4>(c0))); 2044 2045 computeIndices(index, uuuu, vvvv, wwww, offset, mipmap + sizeof(Mipmap), function); 2046 c0 = Int(buffer[1][index[0]]); 2047 c1 = Int(buffer[1][index[1]]); 2048 c2 = Int(buffer[1][index[2]]); 2049 c3 = Int(buffer[1][index[3]]); 2050 c0 = c0 | (c1 << 8) | (c2 << 16) | (c3 << 24); 2051 UShort4 V = As<UShort4>(Unpack(As<Byte4>(c0))); 2052 2053 c0 = Int(buffer[2][index[0]]); 2054 c1 = Int(buffer[2][index[1]]); 2055 c2 = Int(buffer[2][index[2]]); 2056 c3 = Int(buffer[2][index[3]]); 2057 c0 = c0 | (c1 << 8) | (c2 << 16) | (c3 << 24); 2058 UShort4 U = As<UShort4>(Unpack(As<Byte4>(c0))); 2059 2060 const UShort4 yY = UShort4(iround(Yy * 0x4000)); 2061 const UShort4 rV = UShort4(iround(Rv * 0x4000)); 2062 const UShort4 gU = UShort4(iround(-Gu * 0x4000)); 2063 const UShort4 gV = UShort4(iround(-Gv * 0x4000)); 2064 const UShort4 bU = UShort4(iround(Bu * 0x4000)); 2065 2066 const UShort4 r0 = UShort4(iround(-R0 * 0x4000)); 2067 const UShort4 g0 = UShort4(iround(G0 * 0x4000)); 2068 const UShort4 b0 = UShort4(iround(-B0 * 0x4000)); 2069 2070 UShort4 y = MulHigh(Y, yY); 2071 UShort4 r = SubSat(y + MulHigh(V, rV), r0); 2072 UShort4 g = SubSat(y + g0, MulHigh(U, gU) + MulHigh(V, gV)); 2073 UShort4 b = SubSat(y + MulHigh(U, bU), b0); 2074 2075 c.x = Min(r, UShort4(0x3FFF)) << 2; 2076 c.y = Min(g, UShort4(0x3FFF)) << 2; 2077 c.z = Min(b, UShort4(0x3FFF)) << 2; 2078 } 2079 else 2080 { 2081 return sampleTexel(index, buffer); 2082 } 2083 2084 return c; 2085 } 2086 sampleTexel(Int4 & uuuu,Int4 & vvvv,Int4 & wwww,Float4 & z,Pointer<Byte> & mipmap,Pointer<Byte> buffer[4],SamplerFunction function)2087 Vector4f SamplerCore::sampleTexel(Int4 &uuuu, Int4 &vvvv, Int4 &wwww, Float4 &z, Pointer<Byte> &mipmap, Pointer<Byte> buffer[4], SamplerFunction function) 2088 { 2089 Vector4f c; 2090 2091 UInt index[4]; 2092 computeIndices(index, uuuu, vvvv, wwww, mipmap, function); 2093 2094 if(hasFloatTexture() || has32bitIntegerTextureComponents()) 2095 { 2096 int f0 = state.textureType == TEXTURE_CUBE ? 0 : 0; 2097 int f1 = state.textureType == TEXTURE_CUBE ? 1 : 0; 2098 int f2 = state.textureType == TEXTURE_CUBE ? 2 : 0; 2099 int f3 = state.textureType == TEXTURE_CUBE ? 3 : 0; 2100 2101 // Read texels 2102 switch(textureComponentCount()) 2103 { 2104 case 4: 2105 c.x = *Pointer<Float4>(buffer[f0] + index[0] * 16, 16); 2106 c.y = *Pointer<Float4>(buffer[f1] + index[1] * 16, 16); 2107 c.z = *Pointer<Float4>(buffer[f2] + index[2] * 16, 16); 2108 c.w = *Pointer<Float4>(buffer[f3] + index[3] * 16, 16); 2109 transpose4x4(c.x, c.y, c.z, c.w); 2110 break; 2111 case 3: 2112 ASSERT(state.textureFormat == FORMAT_X32B32G32R32F); 2113 c.x = *Pointer<Float4>(buffer[f0] + index[0] * 16, 16); 2114 c.y = *Pointer<Float4>(buffer[f1] + index[1] * 16, 16); 2115 c.z = *Pointer<Float4>(buffer[f2] + index[2] * 16, 16); 2116 c.w = *Pointer<Float4>(buffer[f3] + index[3] * 16, 16); 2117 transpose4x3(c.x, c.y, c.z, c.w); 2118 c.w = Float4(1.0f); 2119 break; 2120 case 2: 2121 // FIXME: Optimal shuffling? 2122 c.x.xy = *Pointer<Float4>(buffer[f0] + index[0] * 8); 2123 c.x.zw = *Pointer<Float4>(buffer[f1] + index[1] * 8 - 8); 2124 c.z.xy = *Pointer<Float4>(buffer[f2] + index[2] * 8); 2125 c.z.zw = *Pointer<Float4>(buffer[f3] + index[3] * 8 - 8); 2126 c.y = c.x; 2127 c.x = Float4(c.x.xz, c.z.xz); 2128 c.y = Float4(c.y.yw, c.z.yw); 2129 break; 2130 case 1: 2131 // FIXME: Optimal shuffling? 2132 c.x.x = *Pointer<Float>(buffer[f0] + index[0] * 4); 2133 c.x.y = *Pointer<Float>(buffer[f1] + index[1] * 4); 2134 c.x.z = *Pointer<Float>(buffer[f2] + index[2] * 4); 2135 c.x.w = *Pointer<Float>(buffer[f3] + index[3] * 4); 2136 2137 if(state.textureFormat == FORMAT_D32FS8_SHADOW && state.textureFilter != FILTER_GATHER) 2138 { 2139 Float4 d = Min(Max(z, Float4(0.0f)), Float4(1.0f)); 2140 2141 c.x = As<Float4>(As<Int4>(CmpNLT(c.x, d)) & As<Int4>(Float4(1.0f))); // FIXME: Only less-equal? 2142 } 2143 break; 2144 default: 2145 ASSERT(false); 2146 } 2147 } 2148 else 2149 { 2150 ASSERT(!hasYuvFormat()); 2151 2152 Vector4s cs = sampleTexel(index, buffer); 2153 2154 bool isInteger = Surface::isNonNormalizedInteger(state.textureFormat); 2155 int componentCount = textureComponentCount(); 2156 for(int n = 0; n < componentCount; ++n) 2157 { 2158 if(hasUnsignedTextureComponent(n)) 2159 { 2160 if(isInteger) 2161 { 2162 c[n] = As<Float4>(Int4(As<UShort4>(cs[n]))); 2163 } 2164 else 2165 { 2166 c[n] = Float4(As<UShort4>(cs[n])); 2167 } 2168 } 2169 else 2170 { 2171 if(isInteger) 2172 { 2173 c[n] = As<Float4>(Int4(cs[n])); 2174 } 2175 else 2176 { 2177 c[n] = Float4(cs[n]); 2178 } 2179 } 2180 } 2181 } 2182 2183 return c; 2184 } 2185 selectMipmap(Pointer<Byte> & texture,Pointer<Byte> buffer[4],Pointer<Byte> & mipmap,Float & lod,Int face[4],bool secondLOD)2186 void SamplerCore::selectMipmap(Pointer<Byte> &texture, Pointer<Byte> buffer[4], Pointer<Byte> &mipmap, Float &lod, Int face[4], bool secondLOD) 2187 { 2188 if(state.mipmapFilter < MIPMAP_POINT) 2189 { 2190 mipmap = texture + OFFSET(Texture,mipmap[0]); 2191 } 2192 else 2193 { 2194 Int ilod; 2195 2196 if(state.mipmapFilter == MIPMAP_POINT) 2197 { 2198 ilod = RoundInt(lod); 2199 } 2200 else // Linear 2201 { 2202 ilod = Int(lod); 2203 } 2204 2205 mipmap = texture + OFFSET(Texture,mipmap) + ilod * sizeof(Mipmap) + secondLOD * sizeof(Mipmap); 2206 } 2207 2208 if(state.textureType != TEXTURE_CUBE) 2209 { 2210 buffer[0] = *Pointer<Pointer<Byte> >(mipmap + OFFSET(Mipmap,buffer[0])); 2211 2212 if(hasYuvFormat()) 2213 { 2214 buffer[1] = *Pointer<Pointer<Byte> >(mipmap + OFFSET(Mipmap,buffer[1])); 2215 buffer[2] = *Pointer<Pointer<Byte> >(mipmap + OFFSET(Mipmap,buffer[2])); 2216 } 2217 } 2218 else 2219 { 2220 for(int i = 0; i < 4; i++) 2221 { 2222 buffer[i] = *Pointer<Pointer<Byte> >(mipmap + OFFSET(Mipmap,buffer) + face[i] * sizeof(void*)); 2223 } 2224 } 2225 } 2226 computeFilterOffset(Float & lod)2227 Int4 SamplerCore::computeFilterOffset(Float &lod) 2228 { 2229 Int4 filtering((state.textureFilter == FILTER_POINT) ? 0 : 1); 2230 if(state.textureFilter == FILTER_MIN_LINEAR_MAG_POINT) 2231 { 2232 filtering &= CmpNLE(Float4(lod), Float4(0.0f)); 2233 } 2234 else if(state.textureFilter == FILTER_MIN_POINT_MAG_LINEAR) 2235 { 2236 filtering &= CmpLE(Float4(lod), Float4(0.0f)); 2237 } 2238 2239 return filtering; 2240 } 2241 address(Float4 & uw,AddressingMode addressingMode,Pointer<Byte> & mipmap)2242 Short4 SamplerCore::address(Float4 &uw, AddressingMode addressingMode, Pointer<Byte>& mipmap) 2243 { 2244 if(addressingMode == ADDRESSING_LAYER && state.textureType != TEXTURE_2D_ARRAY) 2245 { 2246 return Short4(); // Unused 2247 } 2248 else if(addressingMode == ADDRESSING_LAYER && state.textureType == TEXTURE_2D_ARRAY) 2249 { 2250 return Min(Max(Short4(RoundInt(uw)), Short4(0)), *Pointer<Short4>(mipmap + OFFSET(Mipmap, depth)) - Short4(1)); 2251 } 2252 else if(addressingMode == ADDRESSING_CLAMP) 2253 { 2254 Float4 clamp = Min(Max(uw, Float4(0.0f)), Float4(65535.0f / 65536.0f)); 2255 2256 return Short4(Int4(clamp * Float4(1 << 16))); 2257 } 2258 else if(addressingMode == ADDRESSING_MIRROR) 2259 { 2260 Int4 convert = Int4(uw * Float4(1 << 16)); 2261 Int4 mirror = (convert << 15) >> 31; 2262 2263 convert ^= mirror; 2264 2265 return Short4(convert); 2266 } 2267 else if(addressingMode == ADDRESSING_MIRRORONCE) 2268 { 2269 // Absolute value 2270 Int4 convert = Int4(Abs(uw * Float4(1 << 16))); 2271 2272 // Clamp 2273 convert -= Int4(0x00008000, 0x00008000, 0x00008000, 0x00008000); 2274 convert = As<Int4>(Pack(convert, convert)); 2275 2276 return As<Short4>(Int2(convert)) + Short4(0x8000u); 2277 } 2278 else // Wrap (or border) 2279 { 2280 return Short4(Int4(uw * Float4(1 << 16))); 2281 } 2282 } 2283 address(Float4 & uvw,Int4 & xyz0,Int4 & xyz1,Float4 & f,Pointer<Byte> & mipmap,Float4 & texOffset,Int4 & filter,int whd,AddressingMode addressingMode,SamplerFunction function)2284 void SamplerCore::address(Float4 &uvw, Int4& xyz0, Int4& xyz1, Float4& f, Pointer<Byte>& mipmap, Float4 &texOffset, Int4 &filter, int whd, AddressingMode addressingMode, SamplerFunction function) 2285 { 2286 if(addressingMode == ADDRESSING_LAYER && state.textureType != TEXTURE_2D_ARRAY) 2287 { 2288 return; // Unused 2289 } 2290 2291 Int4 dim = Int4(*Pointer<Short4>(mipmap + whd, 16)); 2292 Int4 maxXYZ = dim - Int4(1); 2293 2294 if(function == Fetch) 2295 { 2296 xyz0 = Min(Max(((function.option == Offset) && (addressingMode != ADDRESSING_LAYER)) ? As<Int4>(uvw) + As<Int4>(texOffset) : As<Int4>(uvw), Int4(0)), maxXYZ); 2297 } 2298 else if(addressingMode == ADDRESSING_LAYER && state.textureType == TEXTURE_2D_ARRAY) // Note: Offset does not apply to array layers 2299 { 2300 xyz0 = Min(Max(RoundInt(uvw), Int4(0)), maxXYZ); 2301 } 2302 else 2303 { 2304 const int halfBits = 0x3effffff; // Value just under 0.5f 2305 const int oneBits = 0x3f7fffff; // Value just under 1.0f 2306 const int twoBits = 0x3fffffff; // Value just under 2.0f 2307 2308 Float4 coord = Float4(dim); 2309 switch(addressingMode) 2310 { 2311 case ADDRESSING_CLAMP: 2312 { 2313 Float4 one = As<Float4>(Int4(oneBits)); 2314 coord *= Min(Max(uvw, Float4(0.0f)), one); 2315 } 2316 break; 2317 case ADDRESSING_MIRROR: 2318 { 2319 Float4 half = As<Float4>(Int4(halfBits)); 2320 Float4 one = As<Float4>(Int4(oneBits)); 2321 Float4 two = As<Float4>(Int4(twoBits)); 2322 coord *= one - Abs(two * Frac(uvw * half) - one); 2323 } 2324 break; 2325 case ADDRESSING_MIRRORONCE: 2326 { 2327 Float4 half = As<Float4>(Int4(halfBits)); 2328 Float4 one = As<Float4>(Int4(oneBits)); 2329 Float4 two = As<Float4>(Int4(twoBits)); 2330 coord *= one - Abs(two * Frac(Min(Max(uvw, -one), two) * half) - one); 2331 } 2332 break; 2333 default: // Wrap (or border) 2334 coord *= Frac(uvw); 2335 break; 2336 } 2337 2338 xyz0 = Int4(coord); 2339 2340 if(function.option == Offset) 2341 { 2342 xyz0 += As<Int4>(texOffset); 2343 switch(addressingMode) 2344 { 2345 case ADDRESSING_MIRROR: 2346 case ADDRESSING_MIRRORONCE: 2347 case ADDRESSING_BORDER: 2348 // FIXME: Implement ADDRESSING_MIRROR, ADDRESSING_MIRRORONCE and ADDRESSING_BORDER. Fall through to Clamp. 2349 case ADDRESSING_CLAMP: 2350 xyz0 = Min(Max(xyz0, Int4(0)), maxXYZ); 2351 break; 2352 default: // Wrap 2353 xyz0 = (xyz0 + dim * Int4(-MIN_PROGRAM_TEXEL_OFFSET)) % dim; 2354 break; 2355 } 2356 } 2357 2358 if(state.textureFilter != FILTER_POINT) // Compute 2nd coordinate, if needed 2359 { 2360 bool gather = state.textureFilter == FILTER_GATHER; 2361 2362 xyz1 = xyz0 + filter; // Increment 2363 2364 if(!gather) 2365 { 2366 Float4 frac = Frac(coord); 2367 f = Abs(frac - Float4(0.5f)); 2368 xyz1 -= CmpLT(frac, Float4(0.5f)) & (filter + filter); // Decrement xyz if necessary 2369 } 2370 2371 switch(addressingMode) 2372 { 2373 case ADDRESSING_MIRROR: 2374 case ADDRESSING_MIRRORONCE: 2375 case ADDRESSING_BORDER: 2376 // FIXME: Implement ADDRESSING_MIRROR, ADDRESSING_MIRRORONCE and ADDRESSING_BORDER. Fall through to Clamp. 2377 case ADDRESSING_CLAMP: 2378 xyz1 = gather ? Min(xyz1, maxXYZ) : Min(Max(xyz1, Int4(0)), maxXYZ); 2379 break; 2380 default: // Wrap 2381 { 2382 // The coordinates overflow or underflow by at most 1 2383 Int4 over = CmpNLT(xyz1, dim); 2384 xyz1 = (over & Int4(0)) | (~over & xyz1); // xyz >= dim ? 0 : xyz 2385 if(!gather) 2386 { 2387 Int4 under = CmpLT(xyz1, Int4(0)); 2388 xyz1 = (under & maxXYZ) | (~under & xyz1); // xyz < 0 ? dim - 1 : xyz 2389 } 2390 } 2391 break; 2392 } 2393 } 2394 } 2395 } 2396 convertFixed12(Short4 & cs,Float4 & cf)2397 void SamplerCore::convertFixed12(Short4 &cs, Float4 &cf) 2398 { 2399 cs = RoundShort4(cf * Float4(0x1000)); 2400 } 2401 convertFixed12(Vector4s & cs,Vector4f & cf)2402 void SamplerCore::convertFixed12(Vector4s &cs, Vector4f &cf) 2403 { 2404 convertFixed12(cs.x, cf.x); 2405 convertFixed12(cs.y, cf.y); 2406 convertFixed12(cs.z, cf.z); 2407 convertFixed12(cs.w, cf.w); 2408 } 2409 convertSigned12(Float4 & cf,Short4 & cs)2410 void SamplerCore::convertSigned12(Float4 &cf, Short4 &cs) 2411 { 2412 cf = Float4(cs) * Float4(1.0f / 0x0FFE); 2413 } 2414 2415 // void SamplerCore::convertSigned12(Vector4f &cf, Vector4s &cs) 2416 // { 2417 // convertSigned12(cf.x, cs.x); 2418 // convertSigned12(cf.y, cs.y); 2419 // convertSigned12(cf.z, cs.z); 2420 // convertSigned12(cf.w, cs.w); 2421 // } 2422 convertSigned15(Float4 & cf,Short4 & cs)2423 void SamplerCore::convertSigned15(Float4 &cf, Short4 &cs) 2424 { 2425 cf = Float4(cs) * Float4(1.0f / 0x7FFF); 2426 } 2427 convertUnsigned16(Float4 & cf,Short4 & cs)2428 void SamplerCore::convertUnsigned16(Float4 &cf, Short4 &cs) 2429 { 2430 cf = Float4(As<UShort4>(cs)) * Float4(1.0f / 0xFFFF); 2431 } 2432 sRGBtoLinear16_8_12(Short4 & c)2433 void SamplerCore::sRGBtoLinear16_8_12(Short4 &c) 2434 { 2435 c = As<UShort4>(c) >> 8; 2436 2437 Pointer<Byte> LUT = Pointer<Byte>(constants + OFFSET(Constants,sRGBtoLinear8_12)); 2438 2439 c = Insert(c, *Pointer<Short>(LUT + 2 * Int(Extract(c, 0))), 0); 2440 c = Insert(c, *Pointer<Short>(LUT + 2 * Int(Extract(c, 1))), 1); 2441 c = Insert(c, *Pointer<Short>(LUT + 2 * Int(Extract(c, 2))), 2); 2442 c = Insert(c, *Pointer<Short>(LUT + 2 * Int(Extract(c, 3))), 3); 2443 } 2444 sRGBtoLinear16_6_12(Short4 & c)2445 void SamplerCore::sRGBtoLinear16_6_12(Short4 &c) 2446 { 2447 c = As<UShort4>(c) >> 10; 2448 2449 Pointer<Byte> LUT = Pointer<Byte>(constants + OFFSET(Constants,sRGBtoLinear6_12)); 2450 2451 c = Insert(c, *Pointer<Short>(LUT + 2 * Int(Extract(c, 0))), 0); 2452 c = Insert(c, *Pointer<Short>(LUT + 2 * Int(Extract(c, 1))), 1); 2453 c = Insert(c, *Pointer<Short>(LUT + 2 * Int(Extract(c, 2))), 2); 2454 c = Insert(c, *Pointer<Short>(LUT + 2 * Int(Extract(c, 3))), 3); 2455 } 2456 sRGBtoLinear16_5_12(Short4 & c)2457 void SamplerCore::sRGBtoLinear16_5_12(Short4 &c) 2458 { 2459 c = As<UShort4>(c) >> 11; 2460 2461 Pointer<Byte> LUT = Pointer<Byte>(constants + OFFSET(Constants,sRGBtoLinear5_12)); 2462 2463 c = Insert(c, *Pointer<Short>(LUT + 2 * Int(Extract(c, 0))), 0); 2464 c = Insert(c, *Pointer<Short>(LUT + 2 * Int(Extract(c, 1))), 1); 2465 c = Insert(c, *Pointer<Short>(LUT + 2 * Int(Extract(c, 2))), 2); 2466 c = Insert(c, *Pointer<Short>(LUT + 2 * Int(Extract(c, 3))), 3); 2467 } 2468 hasFloatTexture() const2469 bool SamplerCore::hasFloatTexture() const 2470 { 2471 return Surface::isFloatFormat(state.textureFormat); 2472 } 2473 hasUnnormalizedIntegerTexture() const2474 bool SamplerCore::hasUnnormalizedIntegerTexture() const 2475 { 2476 return Surface::isNonNormalizedInteger(state.textureFormat); 2477 } 2478 hasUnsignedTextureComponent(int component) const2479 bool SamplerCore::hasUnsignedTextureComponent(int component) const 2480 { 2481 return Surface::isUnsignedComponent(state.textureFormat, component); 2482 } 2483 textureComponentCount() const2484 int SamplerCore::textureComponentCount() const 2485 { 2486 return Surface::componentCount(state.textureFormat); 2487 } 2488 hasThirdCoordinate() const2489 bool SamplerCore::hasThirdCoordinate() const 2490 { 2491 return (state.textureType == TEXTURE_3D) || (state.textureType == TEXTURE_2D_ARRAY); 2492 } 2493 has16bitTextureFormat() const2494 bool SamplerCore::has16bitTextureFormat() const 2495 { 2496 switch(state.textureFormat) 2497 { 2498 case FORMAT_R5G6B5: 2499 return true; 2500 case FORMAT_R8I_SNORM: 2501 case FORMAT_G8R8I_SNORM: 2502 case FORMAT_X8B8G8R8I_SNORM: 2503 case FORMAT_A8B8G8R8I_SNORM: 2504 case FORMAT_R8I: 2505 case FORMAT_R8UI: 2506 case FORMAT_G8R8I: 2507 case FORMAT_G8R8UI: 2508 case FORMAT_X8B8G8R8I: 2509 case FORMAT_X8B8G8R8UI: 2510 case FORMAT_A8B8G8R8I: 2511 case FORMAT_A8B8G8R8UI: 2512 case FORMAT_R32I: 2513 case FORMAT_R32UI: 2514 case FORMAT_G32R32I: 2515 case FORMAT_G32R32UI: 2516 case FORMAT_X32B32G32R32I: 2517 case FORMAT_X32B32G32R32UI: 2518 case FORMAT_A32B32G32R32I: 2519 case FORMAT_A32B32G32R32UI: 2520 case FORMAT_G8R8: 2521 case FORMAT_X8R8G8B8: 2522 case FORMAT_X8B8G8R8: 2523 case FORMAT_A8R8G8B8: 2524 case FORMAT_A8B8G8R8: 2525 case FORMAT_SRGB8_X8: 2526 case FORMAT_SRGB8_A8: 2527 case FORMAT_V8U8: 2528 case FORMAT_Q8W8V8U8: 2529 case FORMAT_X8L8V8U8: 2530 case FORMAT_R32F: 2531 case FORMAT_G32R32F: 2532 case FORMAT_X32B32G32R32F: 2533 case FORMAT_A32B32G32R32F: 2534 case FORMAT_A8: 2535 case FORMAT_R8: 2536 case FORMAT_L8: 2537 case FORMAT_A8L8: 2538 case FORMAT_D32F: 2539 case FORMAT_D32F_LOCKABLE: 2540 case FORMAT_D32FS8_TEXTURE: 2541 case FORMAT_D32FS8_SHADOW: 2542 case FORMAT_L16: 2543 case FORMAT_G16R16: 2544 case FORMAT_A16B16G16R16: 2545 case FORMAT_V16U16: 2546 case FORMAT_A16W16V16U16: 2547 case FORMAT_Q16W16V16U16: 2548 case FORMAT_R16I: 2549 case FORMAT_R16UI: 2550 case FORMAT_G16R16I: 2551 case FORMAT_G16R16UI: 2552 case FORMAT_X16B16G16R16I: 2553 case FORMAT_X16B16G16R16UI: 2554 case FORMAT_A16B16G16R16I: 2555 case FORMAT_A16B16G16R16UI: 2556 case FORMAT_YV12_BT601: 2557 case FORMAT_YV12_BT709: 2558 case FORMAT_YV12_JFIF: 2559 return false; 2560 default: 2561 ASSERT(false); 2562 } 2563 2564 return false; 2565 } 2566 has8bitTextureComponents() const2567 bool SamplerCore::has8bitTextureComponents() const 2568 { 2569 switch(state.textureFormat) 2570 { 2571 case FORMAT_G8R8: 2572 case FORMAT_X8R8G8B8: 2573 case FORMAT_X8B8G8R8: 2574 case FORMAT_A8R8G8B8: 2575 case FORMAT_A8B8G8R8: 2576 case FORMAT_SRGB8_X8: 2577 case FORMAT_SRGB8_A8: 2578 case FORMAT_V8U8: 2579 case FORMAT_Q8W8V8U8: 2580 case FORMAT_X8L8V8U8: 2581 case FORMAT_A8: 2582 case FORMAT_R8: 2583 case FORMAT_L8: 2584 case FORMAT_A8L8: 2585 case FORMAT_R8I_SNORM: 2586 case FORMAT_G8R8I_SNORM: 2587 case FORMAT_X8B8G8R8I_SNORM: 2588 case FORMAT_A8B8G8R8I_SNORM: 2589 case FORMAT_R8I: 2590 case FORMAT_R8UI: 2591 case FORMAT_G8R8I: 2592 case FORMAT_G8R8UI: 2593 case FORMAT_X8B8G8R8I: 2594 case FORMAT_X8B8G8R8UI: 2595 case FORMAT_A8B8G8R8I: 2596 case FORMAT_A8B8G8R8UI: 2597 return true; 2598 case FORMAT_R5G6B5: 2599 case FORMAT_R32F: 2600 case FORMAT_G32R32F: 2601 case FORMAT_X32B32G32R32F: 2602 case FORMAT_A32B32G32R32F: 2603 case FORMAT_D32F: 2604 case FORMAT_D32F_LOCKABLE: 2605 case FORMAT_D32FS8_TEXTURE: 2606 case FORMAT_D32FS8_SHADOW: 2607 case FORMAT_L16: 2608 case FORMAT_G16R16: 2609 case FORMAT_A16B16G16R16: 2610 case FORMAT_V16U16: 2611 case FORMAT_A16W16V16U16: 2612 case FORMAT_Q16W16V16U16: 2613 case FORMAT_R32I: 2614 case FORMAT_R32UI: 2615 case FORMAT_G32R32I: 2616 case FORMAT_G32R32UI: 2617 case FORMAT_X32B32G32R32I: 2618 case FORMAT_X32B32G32R32UI: 2619 case FORMAT_A32B32G32R32I: 2620 case FORMAT_A32B32G32R32UI: 2621 case FORMAT_R16I: 2622 case FORMAT_R16UI: 2623 case FORMAT_G16R16I: 2624 case FORMAT_G16R16UI: 2625 case FORMAT_X16B16G16R16I: 2626 case FORMAT_X16B16G16R16UI: 2627 case FORMAT_A16B16G16R16I: 2628 case FORMAT_A16B16G16R16UI: 2629 case FORMAT_YV12_BT601: 2630 case FORMAT_YV12_BT709: 2631 case FORMAT_YV12_JFIF: 2632 return false; 2633 default: 2634 ASSERT(false); 2635 } 2636 2637 return false; 2638 } 2639 has16bitTextureComponents() const2640 bool SamplerCore::has16bitTextureComponents() const 2641 { 2642 switch(state.textureFormat) 2643 { 2644 case FORMAT_R5G6B5: 2645 case FORMAT_R8I_SNORM: 2646 case FORMAT_G8R8I_SNORM: 2647 case FORMAT_X8B8G8R8I_SNORM: 2648 case FORMAT_A8B8G8R8I_SNORM: 2649 case FORMAT_R8I: 2650 case FORMAT_R8UI: 2651 case FORMAT_G8R8I: 2652 case FORMAT_G8R8UI: 2653 case FORMAT_X8B8G8R8I: 2654 case FORMAT_X8B8G8R8UI: 2655 case FORMAT_A8B8G8R8I: 2656 case FORMAT_A8B8G8R8UI: 2657 case FORMAT_R32I: 2658 case FORMAT_R32UI: 2659 case FORMAT_G32R32I: 2660 case FORMAT_G32R32UI: 2661 case FORMAT_X32B32G32R32I: 2662 case FORMAT_X32B32G32R32UI: 2663 case FORMAT_A32B32G32R32I: 2664 case FORMAT_A32B32G32R32UI: 2665 case FORMAT_G8R8: 2666 case FORMAT_X8R8G8B8: 2667 case FORMAT_X8B8G8R8: 2668 case FORMAT_A8R8G8B8: 2669 case FORMAT_A8B8G8R8: 2670 case FORMAT_SRGB8_X8: 2671 case FORMAT_SRGB8_A8: 2672 case FORMAT_V8U8: 2673 case FORMAT_Q8W8V8U8: 2674 case FORMAT_X8L8V8U8: 2675 case FORMAT_R32F: 2676 case FORMAT_G32R32F: 2677 case FORMAT_X32B32G32R32F: 2678 case FORMAT_A32B32G32R32F: 2679 case FORMAT_A8: 2680 case FORMAT_R8: 2681 case FORMAT_L8: 2682 case FORMAT_A8L8: 2683 case FORMAT_D32F: 2684 case FORMAT_D32F_LOCKABLE: 2685 case FORMAT_D32FS8_TEXTURE: 2686 case FORMAT_D32FS8_SHADOW: 2687 case FORMAT_YV12_BT601: 2688 case FORMAT_YV12_BT709: 2689 case FORMAT_YV12_JFIF: 2690 return false; 2691 case FORMAT_L16: 2692 case FORMAT_G16R16: 2693 case FORMAT_A16B16G16R16: 2694 case FORMAT_R16I: 2695 case FORMAT_R16UI: 2696 case FORMAT_G16R16I: 2697 case FORMAT_G16R16UI: 2698 case FORMAT_X16B16G16R16I: 2699 case FORMAT_X16B16G16R16UI: 2700 case FORMAT_A16B16G16R16I: 2701 case FORMAT_A16B16G16R16UI: 2702 case FORMAT_V16U16: 2703 case FORMAT_A16W16V16U16: 2704 case FORMAT_Q16W16V16U16: 2705 return true; 2706 default: 2707 ASSERT(false); 2708 } 2709 2710 return false; 2711 } 2712 has32bitIntegerTextureComponents() const2713 bool SamplerCore::has32bitIntegerTextureComponents() const 2714 { 2715 switch(state.textureFormat) 2716 { 2717 case FORMAT_R5G6B5: 2718 case FORMAT_R8I_SNORM: 2719 case FORMAT_G8R8I_SNORM: 2720 case FORMAT_X8B8G8R8I_SNORM: 2721 case FORMAT_A8B8G8R8I_SNORM: 2722 case FORMAT_R8I: 2723 case FORMAT_R8UI: 2724 case FORMAT_G8R8I: 2725 case FORMAT_G8R8UI: 2726 case FORMAT_X8B8G8R8I: 2727 case FORMAT_X8B8G8R8UI: 2728 case FORMAT_A8B8G8R8I: 2729 case FORMAT_A8B8G8R8UI: 2730 case FORMAT_G8R8: 2731 case FORMAT_X8R8G8B8: 2732 case FORMAT_X8B8G8R8: 2733 case FORMAT_A8R8G8B8: 2734 case FORMAT_A8B8G8R8: 2735 case FORMAT_SRGB8_X8: 2736 case FORMAT_SRGB8_A8: 2737 case FORMAT_V8U8: 2738 case FORMAT_Q8W8V8U8: 2739 case FORMAT_X8L8V8U8: 2740 case FORMAT_L16: 2741 case FORMAT_G16R16: 2742 case FORMAT_A16B16G16R16: 2743 case FORMAT_R16I: 2744 case FORMAT_R16UI: 2745 case FORMAT_G16R16I: 2746 case FORMAT_G16R16UI: 2747 case FORMAT_X16B16G16R16I: 2748 case FORMAT_X16B16G16R16UI: 2749 case FORMAT_A16B16G16R16I: 2750 case FORMAT_A16B16G16R16UI: 2751 case FORMAT_V16U16: 2752 case FORMAT_A16W16V16U16: 2753 case FORMAT_Q16W16V16U16: 2754 case FORMAT_R32F: 2755 case FORMAT_G32R32F: 2756 case FORMAT_X32B32G32R32F: 2757 case FORMAT_A32B32G32R32F: 2758 case FORMAT_A8: 2759 case FORMAT_R8: 2760 case FORMAT_L8: 2761 case FORMAT_A8L8: 2762 case FORMAT_D32F: 2763 case FORMAT_D32F_LOCKABLE: 2764 case FORMAT_D32FS8_TEXTURE: 2765 case FORMAT_D32FS8_SHADOW: 2766 case FORMAT_YV12_BT601: 2767 case FORMAT_YV12_BT709: 2768 case FORMAT_YV12_JFIF: 2769 return false; 2770 case FORMAT_R32I: 2771 case FORMAT_R32UI: 2772 case FORMAT_G32R32I: 2773 case FORMAT_G32R32UI: 2774 case FORMAT_X32B32G32R32I: 2775 case FORMAT_X32B32G32R32UI: 2776 case FORMAT_A32B32G32R32I: 2777 case FORMAT_A32B32G32R32UI: 2778 return true; 2779 default: 2780 ASSERT(false); 2781 } 2782 2783 return false; 2784 } 2785 hasYuvFormat() const2786 bool SamplerCore::hasYuvFormat() const 2787 { 2788 switch(state.textureFormat) 2789 { 2790 case FORMAT_YV12_BT601: 2791 case FORMAT_YV12_BT709: 2792 case FORMAT_YV12_JFIF: 2793 return true; 2794 case FORMAT_R5G6B5: 2795 case FORMAT_R8I_SNORM: 2796 case FORMAT_G8R8I_SNORM: 2797 case FORMAT_X8B8G8R8I_SNORM: 2798 case FORMAT_A8B8G8R8I_SNORM: 2799 case FORMAT_R8I: 2800 case FORMAT_R8UI: 2801 case FORMAT_G8R8I: 2802 case FORMAT_G8R8UI: 2803 case FORMAT_X8B8G8R8I: 2804 case FORMAT_X8B8G8R8UI: 2805 case FORMAT_A8B8G8R8I: 2806 case FORMAT_A8B8G8R8UI: 2807 case FORMAT_R32I: 2808 case FORMAT_R32UI: 2809 case FORMAT_G32R32I: 2810 case FORMAT_G32R32UI: 2811 case FORMAT_X32B32G32R32I: 2812 case FORMAT_X32B32G32R32UI: 2813 case FORMAT_A32B32G32R32I: 2814 case FORMAT_A32B32G32R32UI: 2815 case FORMAT_G8R8: 2816 case FORMAT_X8R8G8B8: 2817 case FORMAT_X8B8G8R8: 2818 case FORMAT_A8R8G8B8: 2819 case FORMAT_A8B8G8R8: 2820 case FORMAT_SRGB8_X8: 2821 case FORMAT_SRGB8_A8: 2822 case FORMAT_V8U8: 2823 case FORMAT_Q8W8V8U8: 2824 case FORMAT_X8L8V8U8: 2825 case FORMAT_R32F: 2826 case FORMAT_G32R32F: 2827 case FORMAT_X32B32G32R32F: 2828 case FORMAT_A32B32G32R32F: 2829 case FORMAT_A8: 2830 case FORMAT_R8: 2831 case FORMAT_L8: 2832 case FORMAT_A8L8: 2833 case FORMAT_D32F: 2834 case FORMAT_D32F_LOCKABLE: 2835 case FORMAT_D32FS8_TEXTURE: 2836 case FORMAT_D32FS8_SHADOW: 2837 case FORMAT_L16: 2838 case FORMAT_G16R16: 2839 case FORMAT_A16B16G16R16: 2840 case FORMAT_R16I: 2841 case FORMAT_R16UI: 2842 case FORMAT_G16R16I: 2843 case FORMAT_G16R16UI: 2844 case FORMAT_X16B16G16R16I: 2845 case FORMAT_X16B16G16R16UI: 2846 case FORMAT_A16B16G16R16I: 2847 case FORMAT_A16B16G16R16UI: 2848 case FORMAT_V16U16: 2849 case FORMAT_A16W16V16U16: 2850 case FORMAT_Q16W16V16U16: 2851 return false; 2852 default: 2853 ASSERT(false); 2854 } 2855 2856 return false; 2857 } 2858 isRGBComponent(int component) const2859 bool SamplerCore::isRGBComponent(int component) const 2860 { 2861 switch(state.textureFormat) 2862 { 2863 case FORMAT_R5G6B5: return component < 3; 2864 case FORMAT_R8I_SNORM: return component < 1; 2865 case FORMAT_G8R8I_SNORM: return component < 2; 2866 case FORMAT_X8B8G8R8I_SNORM: return component < 3; 2867 case FORMAT_A8B8G8R8I_SNORM: return component < 3; 2868 case FORMAT_R8I: return component < 1; 2869 case FORMAT_R8UI: return component < 1; 2870 case FORMAT_G8R8I: return component < 2; 2871 case FORMAT_G8R8UI: return component < 2; 2872 case FORMAT_X8B8G8R8I: return component < 3; 2873 case FORMAT_X8B8G8R8UI: return component < 3; 2874 case FORMAT_A8B8G8R8I: return component < 3; 2875 case FORMAT_A8B8G8R8UI: return component < 3; 2876 case FORMAT_R32I: return component < 1; 2877 case FORMAT_R32UI: return component < 1; 2878 case FORMAT_G32R32I: return component < 2; 2879 case FORMAT_G32R32UI: return component < 2; 2880 case FORMAT_X32B32G32R32I: return component < 3; 2881 case FORMAT_X32B32G32R32UI: return component < 3; 2882 case FORMAT_A32B32G32R32I: return component < 3; 2883 case FORMAT_A32B32G32R32UI: return component < 3; 2884 case FORMAT_G8R8: return component < 2; 2885 case FORMAT_X8R8G8B8: return component < 3; 2886 case FORMAT_X8B8G8R8: return component < 3; 2887 case FORMAT_A8R8G8B8: return component < 3; 2888 case FORMAT_A8B8G8R8: return component < 3; 2889 case FORMAT_SRGB8_X8: return component < 3; 2890 case FORMAT_SRGB8_A8: return component < 3; 2891 case FORMAT_V8U8: return false; 2892 case FORMAT_Q8W8V8U8: return false; 2893 case FORMAT_X8L8V8U8: return false; 2894 case FORMAT_R32F: return component < 1; 2895 case FORMAT_G32R32F: return component < 2; 2896 case FORMAT_X32B32G32R32F: return component < 3; 2897 case FORMAT_A32B32G32R32F: return component < 3; 2898 case FORMAT_A8: return false; 2899 case FORMAT_R8: return component < 1; 2900 case FORMAT_L8: return component < 1; 2901 case FORMAT_A8L8: return component < 1; 2902 case FORMAT_D32F: return false; 2903 case FORMAT_D32F_LOCKABLE: return false; 2904 case FORMAT_D32FS8_TEXTURE: return false; 2905 case FORMAT_D32FS8_SHADOW: return false; 2906 case FORMAT_L16: return component < 1; 2907 case FORMAT_G16R16: return component < 2; 2908 case FORMAT_A16B16G16R16: return component < 3; 2909 case FORMAT_R16I: return component < 1; 2910 case FORMAT_R16UI: return component < 1; 2911 case FORMAT_G16R16I: return component < 2; 2912 case FORMAT_G16R16UI: return component < 2; 2913 case FORMAT_X16B16G16R16I: return component < 3; 2914 case FORMAT_X16B16G16R16UI: return component < 3; 2915 case FORMAT_A16B16G16R16I: return component < 3; 2916 case FORMAT_A16B16G16R16UI: return component < 3; 2917 case FORMAT_V16U16: return false; 2918 case FORMAT_A16W16V16U16: return false; 2919 case FORMAT_Q16W16V16U16: return false; 2920 case FORMAT_YV12_BT601: return component < 3; 2921 case FORMAT_YV12_BT709: return component < 3; 2922 case FORMAT_YV12_JFIF: return component < 3; 2923 default: 2924 ASSERT(false); 2925 } 2926 2927 return false; 2928 } 2929 } 2930