1 // Copyright 2016 The SwiftShader Authors. All Rights Reserved. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #include "PixelRoutine.hpp" 16 17 #include "SamplerCore.hpp" 18 #include "Constants.hpp" 19 #include "Renderer/Renderer.hpp" 20 #include "Renderer/QuadRasterizer.hpp" 21 #include "Renderer/Surface.hpp" 22 #include "Renderer/Primitive.hpp" 23 #include "Common/Debug.hpp" 24 25 namespace sw 26 { 27 extern bool complementaryDepthBuffer; 28 extern bool postBlendSRGB; 29 extern bool exactColorRounding; 30 extern bool forceClearRegisters; 31 PixelRoutine(const PixelProcessor::State & state,const PixelShader * shader)32 PixelRoutine::PixelRoutine(const PixelProcessor::State &state, const PixelShader *shader) 33 : QuadRasterizer(state, shader), v(shader && shader->indirectAddressableInput) 34 { 35 if(!shader || shader->getShaderModel() < 0x0200 || forceClearRegisters) 36 { 37 for(int i = 0; i < MAX_FRAGMENT_INPUTS; i++) 38 { 39 v[i].x = Float4(0.0f); 40 v[i].y = Float4(0.0f); 41 v[i].z = Float4(0.0f); 42 v[i].w = Float4(0.0f); 43 } 44 } 45 } 46 ~PixelRoutine()47 PixelRoutine::~PixelRoutine() 48 { 49 } 50 quad(Pointer<Byte> cBuffer[RENDERTARGETS],Pointer<Byte> & zBuffer,Pointer<Byte> & sBuffer,Int cMask[4],Int & x,Int & y)51 void PixelRoutine::quad(Pointer<Byte> cBuffer[RENDERTARGETS], Pointer<Byte> &zBuffer, Pointer<Byte> &sBuffer, Int cMask[4], Int &x, Int &y) 52 { 53 #if PERF_PROFILE 54 Long pipeTime = Ticks(); 55 #endif 56 57 const bool earlyDepthTest = !state.depthOverride && !state.alphaTestActive(); 58 59 Int zMask[4]; // Depth mask 60 Int sMask[4]; // Stencil mask 61 62 for(unsigned int q = 0; q < state.multiSample; q++) 63 { 64 zMask[q] = cMask[q]; 65 sMask[q] = cMask[q]; 66 } 67 68 for(unsigned int q = 0; q < state.multiSample; q++) 69 { 70 stencilTest(sBuffer, q, x, sMask[q], cMask[q]); 71 } 72 73 Float4 f; 74 Float4 rhwCentroid; 75 76 Float4 xxxx = Float4(Float(x)) + *Pointer<Float4>(primitive + OFFSET(Primitive,xQuad), 16); 77 78 if(interpolateZ()) 79 { 80 for(unsigned int q = 0; q < state.multiSample; q++) 81 { 82 Float4 x = xxxx; 83 84 if(state.multiSample > 1) 85 { 86 x -= *Pointer<Float4>(constants + OFFSET(Constants,X) + q * sizeof(float4)); 87 } 88 89 z[q] = interpolate(x, Dz[q], z[q], primitive + OFFSET(Primitive,z), false, false, state.depthClamp); 90 } 91 } 92 93 Bool depthPass = false; 94 95 if(earlyDepthTest) 96 { 97 for(unsigned int q = 0; q < state.multiSample; q++) 98 { 99 depthPass = depthPass || depthTest(zBuffer, q, x, z[q], sMask[q], zMask[q], cMask[q]); 100 } 101 } 102 103 If(depthPass || Bool(!earlyDepthTest)) 104 { 105 #if PERF_PROFILE 106 Long interpTime = Ticks(); 107 #endif 108 109 Float4 yyyy = Float4(Float(y)) + *Pointer<Float4>(primitive + OFFSET(Primitive,yQuad), 16); 110 111 // Centroid locations 112 Float4 XXXX = Float4(0.0f); 113 Float4 YYYY = Float4(0.0f); 114 115 if(state.centroid) 116 { 117 Float4 WWWW(1.0e-9f); 118 119 for(unsigned int q = 0; q < state.multiSample; q++) 120 { 121 XXXX += *Pointer<Float4>(constants + OFFSET(Constants,sampleX[q]) + 16 * cMask[q]); 122 YYYY += *Pointer<Float4>(constants + OFFSET(Constants,sampleY[q]) + 16 * cMask[q]); 123 WWWW += *Pointer<Float4>(constants + OFFSET(Constants,weight) + 16 * cMask[q]); 124 } 125 126 WWWW = Rcp_pp(WWWW); 127 XXXX *= WWWW; 128 YYYY *= WWWW; 129 130 XXXX += xxxx; 131 YYYY += yyyy; 132 } 133 134 if(interpolateW()) 135 { 136 w = interpolate(xxxx, Dw, rhw, primitive + OFFSET(Primitive,w), false, false, false); 137 rhw = reciprocal(w, false, false, true); 138 139 if(state.centroid) 140 { 141 rhwCentroid = reciprocal(interpolateCentroid(XXXX, YYYY, rhwCentroid, primitive + OFFSET(Primitive,w), false, false)); 142 } 143 } 144 145 for(int interpolant = 0; interpolant < MAX_FRAGMENT_INPUTS; interpolant++) 146 { 147 for(int component = 0; component < 4; component++) 148 { 149 if(state.interpolant[interpolant].component & (1 << component)) 150 { 151 if(!state.interpolant[interpolant].centroid) 152 { 153 v[interpolant][component] = interpolate(xxxx, Dv[interpolant][component], rhw, primitive + OFFSET(Primitive, V[interpolant][component]), (state.interpolant[interpolant].flat & (1 << component)) != 0, state.perspective, false); 154 } 155 else 156 { 157 v[interpolant][component] = interpolateCentroid(XXXX, YYYY, rhwCentroid, primitive + OFFSET(Primitive, V[interpolant][component]), (state.interpolant[interpolant].flat & (1 << component)) != 0, state.perspective); 158 } 159 } 160 } 161 162 Float4 rcp; 163 164 switch(state.interpolant[interpolant].project) 165 { 166 case 0: 167 break; 168 case 1: 169 rcp = reciprocal(v[interpolant].y); 170 v[interpolant].x = v[interpolant].x * rcp; 171 break; 172 case 2: 173 rcp = reciprocal(v[interpolant].z); 174 v[interpolant].x = v[interpolant].x * rcp; 175 v[interpolant].y = v[interpolant].y * rcp; 176 break; 177 case 3: 178 rcp = reciprocal(v[interpolant].w); 179 v[interpolant].x = v[interpolant].x * rcp; 180 v[interpolant].y = v[interpolant].y * rcp; 181 v[interpolant].z = v[interpolant].z * rcp; 182 break; 183 } 184 } 185 186 if(state.fog.component) 187 { 188 f = interpolate(xxxx, Df, rhw, primitive + OFFSET(Primitive,f), state.fog.flat & 0x01, state.perspective, false); 189 } 190 191 setBuiltins(x, y, z, w); 192 193 #if PERF_PROFILE 194 cycles[PERF_INTERP] += Ticks() - interpTime; 195 #endif 196 197 Bool alphaPass = true; 198 199 if(colorUsed()) 200 { 201 #if PERF_PROFILE 202 Long shaderTime = Ticks(); 203 #endif 204 205 applyShader(cMask); 206 207 #if PERF_PROFILE 208 cycles[PERF_SHADER] += Ticks() - shaderTime; 209 #endif 210 211 alphaPass = alphaTest(cMask); 212 213 if((shader && shader->containsKill()) || state.alphaTestActive()) 214 { 215 for(unsigned int q = 0; q < state.multiSample; q++) 216 { 217 zMask[q] &= cMask[q]; 218 sMask[q] &= cMask[q]; 219 } 220 } 221 } 222 223 If(alphaPass) 224 { 225 if(!earlyDepthTest) 226 { 227 for(unsigned int q = 0; q < state.multiSample; q++) 228 { 229 depthPass = depthPass || depthTest(zBuffer, q, x, z[q], sMask[q], zMask[q], cMask[q]); 230 } 231 } 232 233 #if PERF_PROFILE 234 Long ropTime = Ticks(); 235 #endif 236 237 If(depthPass || Bool(earlyDepthTest)) 238 { 239 for(unsigned int q = 0; q < state.multiSample; q++) 240 { 241 if(state.multiSampleMask & (1 << q)) 242 { 243 writeDepth(zBuffer, q, x, z[q], zMask[q]); 244 245 if(state.occlusionEnabled) 246 { 247 occlusion += *Pointer<UInt>(constants + OFFSET(Constants,occlusionCount) + 4 * (zMask[q] & sMask[q])); 248 } 249 } 250 } 251 252 if(colorUsed()) 253 { 254 #if PERF_PROFILE 255 AddAtomic(Pointer<Long>(&profiler.ropOperations), 4); 256 #endif 257 258 rasterOperation(f, cBuffer, x, sMask, zMask, cMask); 259 } 260 } 261 262 #if PERF_PROFILE 263 cycles[PERF_ROP] += Ticks() - ropTime; 264 #endif 265 } 266 } 267 268 for(unsigned int q = 0; q < state.multiSample; q++) 269 { 270 if(state.multiSampleMask & (1 << q)) 271 { 272 writeStencil(sBuffer, q, x, sMask[q], zMask[q], cMask[q]); 273 } 274 } 275 276 #if PERF_PROFILE 277 cycles[PERF_PIPE] += Ticks() - pipeTime; 278 #endif 279 } 280 interpolateCentroid(Float4 & x,Float4 & y,Float4 & rhw,Pointer<Byte> planeEquation,bool flat,bool perspective)281 Float4 PixelRoutine::interpolateCentroid(Float4 &x, Float4 &y, Float4 &rhw, Pointer<Byte> planeEquation, bool flat, bool perspective) 282 { 283 Float4 interpolant = *Pointer<Float4>(planeEquation + OFFSET(PlaneEquation,C), 16); 284 285 if(!flat) 286 { 287 interpolant += x * *Pointer<Float4>(planeEquation + OFFSET(PlaneEquation,A), 16) + 288 y * *Pointer<Float4>(planeEquation + OFFSET(PlaneEquation,B), 16); 289 290 if(perspective) 291 { 292 interpolant *= rhw; 293 } 294 } 295 296 return interpolant; 297 } 298 stencilTest(Pointer<Byte> & sBuffer,int q,Int & x,Int & sMask,Int & cMask)299 void PixelRoutine::stencilTest(Pointer<Byte> &sBuffer, int q, Int &x, Int &sMask, Int &cMask) 300 { 301 if(!state.stencilActive) 302 { 303 return; 304 } 305 306 // (StencilRef & StencilMask) CompFunc (StencilBufferValue & StencilMask) 307 308 Pointer<Byte> buffer = sBuffer + 2 * x; 309 310 if(q > 0) 311 { 312 buffer += q * *Pointer<Int>(data + OFFSET(DrawData,stencilSliceB)); 313 } 314 315 Byte8 value = *Pointer<Byte8>(buffer); 316 Byte8 valueCCW = value; 317 318 if(!state.noStencilMask) 319 { 320 value &= *Pointer<Byte8>(data + OFFSET(DrawData,stencil[0].testMaskQ)); 321 } 322 323 stencilTest(value, state.stencilCompareMode, false); 324 325 if(state.twoSidedStencil) 326 { 327 if(!state.noStencilMaskCCW) 328 { 329 valueCCW &= *Pointer<Byte8>(data + OFFSET(DrawData,stencil[1].testMaskQ)); 330 } 331 332 stencilTest(valueCCW, state.stencilCompareModeCCW, true); 333 334 value &= *Pointer<Byte8>(primitive + OFFSET(Primitive,clockwiseMask)); 335 valueCCW &= *Pointer<Byte8>(primitive + OFFSET(Primitive,invClockwiseMask)); 336 value |= valueCCW; 337 } 338 339 sMask = SignMask(value) & cMask; 340 } 341 stencilTest(Byte8 & value,StencilCompareMode stencilCompareMode,bool CCW)342 void PixelRoutine::stencilTest(Byte8 &value, StencilCompareMode stencilCompareMode, bool CCW) 343 { 344 Byte8 equal; 345 346 switch(stencilCompareMode) 347 { 348 case STENCIL_ALWAYS: 349 value = Byte8(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF); 350 break; 351 case STENCIL_NEVER: 352 value = Byte8(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); 353 break; 354 case STENCIL_LESS: // a < b ~ b > a 355 value += Byte8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80); 356 value = CmpGT(As<SByte8>(value), *Pointer<SByte8>(data + OFFSET(DrawData,stencil[CCW].referenceMaskedSignedQ))); 357 break; 358 case STENCIL_EQUAL: 359 value = CmpEQ(value, *Pointer<Byte8>(data + OFFSET(DrawData,stencil[CCW].referenceMaskedQ))); 360 break; 361 case STENCIL_NOTEQUAL: // a != b ~ !(a == b) 362 value = CmpEQ(value, *Pointer<Byte8>(data + OFFSET(DrawData,stencil[CCW].referenceMaskedQ))); 363 value ^= Byte8(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF); 364 break; 365 case STENCIL_LESSEQUAL: // a <= b ~ (b > a) || (a == b) 366 equal = value; 367 equal = CmpEQ(equal, *Pointer<Byte8>(data + OFFSET(DrawData,stencil[CCW].referenceMaskedQ))); 368 value += Byte8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80); 369 value = CmpGT(As<SByte8>(value), *Pointer<SByte8>(data + OFFSET(DrawData,stencil[CCW].referenceMaskedSignedQ))); 370 value |= equal; 371 break; 372 case STENCIL_GREATER: // a > b 373 equal = *Pointer<Byte8>(data + OFFSET(DrawData,stencil[CCW].referenceMaskedSignedQ)); 374 value += Byte8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80); 375 equal = CmpGT(As<SByte8>(equal), As<SByte8>(value)); 376 value = equal; 377 break; 378 case STENCIL_GREATEREQUAL: // a >= b ~ !(a < b) ~ !(b > a) 379 value += Byte8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80); 380 value = CmpGT(As<SByte8>(value), *Pointer<SByte8>(data + OFFSET(DrawData,stencil[CCW].referenceMaskedSignedQ))); 381 value ^= Byte8(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF); 382 break; 383 default: 384 ASSERT(false); 385 } 386 } 387 depthTest(Pointer<Byte> & zBuffer,int q,Int & x,Float4 & z,Int & sMask,Int & zMask,Int & cMask)388 Bool PixelRoutine::depthTest(Pointer<Byte> &zBuffer, int q, Int &x, Float4 &z, Int &sMask, Int &zMask, Int &cMask) 389 { 390 if(!state.depthTestActive) 391 { 392 return true; 393 } 394 395 Float4 Z = z; 396 397 if(shader && shader->depthOverride()) 398 { 399 if(complementaryDepthBuffer) 400 { 401 Z = Float4(1.0f) - oDepth; 402 } 403 else 404 { 405 Z = oDepth; 406 } 407 } 408 409 Pointer<Byte> buffer; 410 Int pitch; 411 412 if(!state.quadLayoutDepthBuffer) 413 { 414 buffer = zBuffer + 4 * x; 415 pitch = *Pointer<Int>(data + OFFSET(DrawData,depthPitchB)); 416 } 417 else 418 { 419 buffer = zBuffer + 8 * x; 420 } 421 422 if(q > 0) 423 { 424 buffer += q * *Pointer<Int>(data + OFFSET(DrawData,depthSliceB)); 425 } 426 427 Float4 zValue; 428 429 if(state.depthCompareMode != DEPTH_NEVER || (state.depthCompareMode != DEPTH_ALWAYS && !state.depthWriteEnable)) 430 { 431 if(!state.quadLayoutDepthBuffer) 432 { 433 // FIXME: Properly optimizes? 434 zValue.xy = *Pointer<Float4>(buffer); 435 zValue.zw = *Pointer<Float4>(buffer + pitch - 8); 436 } 437 else 438 { 439 zValue = *Pointer<Float4>(buffer, 16); 440 } 441 } 442 443 Int4 zTest; 444 445 switch(state.depthCompareMode) 446 { 447 case DEPTH_ALWAYS: 448 // Optimized 449 break; 450 case DEPTH_NEVER: 451 // Optimized 452 break; 453 case DEPTH_EQUAL: 454 zTest = CmpEQ(zValue, Z); 455 break; 456 case DEPTH_NOTEQUAL: 457 zTest = CmpNEQ(zValue, Z); 458 break; 459 case DEPTH_LESS: 460 if(complementaryDepthBuffer) 461 { 462 zTest = CmpLT(zValue, Z); 463 } 464 else 465 { 466 zTest = CmpNLE(zValue, Z); 467 } 468 break; 469 case DEPTH_GREATEREQUAL: 470 if(complementaryDepthBuffer) 471 { 472 zTest = CmpNLT(zValue, Z); 473 } 474 else 475 { 476 zTest = CmpLE(zValue, Z); 477 } 478 break; 479 case DEPTH_LESSEQUAL: 480 if(complementaryDepthBuffer) 481 { 482 zTest = CmpLE(zValue, Z); 483 } 484 else 485 { 486 zTest = CmpNLT(zValue, Z); 487 } 488 break; 489 case DEPTH_GREATER: 490 if(complementaryDepthBuffer) 491 { 492 zTest = CmpNLE(zValue, Z); 493 } 494 else 495 { 496 zTest = CmpLT(zValue, Z); 497 } 498 break; 499 default: 500 ASSERT(false); 501 } 502 503 switch(state.depthCompareMode) 504 { 505 case DEPTH_ALWAYS: 506 zMask = cMask; 507 break; 508 case DEPTH_NEVER: 509 zMask = 0x0; 510 break; 511 default: 512 zMask = SignMask(zTest) & cMask; 513 break; 514 } 515 516 if(state.stencilActive) 517 { 518 zMask &= sMask; 519 } 520 521 return zMask != 0; 522 } 523 alphaTest(Int & aMask,Short4 & alpha)524 void PixelRoutine::alphaTest(Int &aMask, Short4 &alpha) 525 { 526 Short4 cmp; 527 Short4 equal; 528 529 switch(state.alphaCompareMode) 530 { 531 case ALPHA_ALWAYS: 532 aMask = 0xF; 533 break; 534 case ALPHA_NEVER: 535 aMask = 0x0; 536 break; 537 case ALPHA_EQUAL: 538 cmp = CmpEQ(alpha, *Pointer<Short4>(data + OFFSET(DrawData,factor.alphaReference4))); 539 aMask = SignMask(PackSigned(cmp, Short4(0x0000))); 540 break; 541 case ALPHA_NOTEQUAL: // a != b ~ !(a == b) 542 cmp = CmpEQ(alpha, *Pointer<Short4>(data + OFFSET(DrawData,factor.alphaReference4))) ^ Short4(0xFFFFu); // FIXME 543 aMask = SignMask(PackSigned(cmp, Short4(0x0000))); 544 break; 545 case ALPHA_LESS: // a < b ~ b > a 546 cmp = CmpGT(*Pointer<Short4>(data + OFFSET(DrawData,factor.alphaReference4)), alpha); 547 aMask = SignMask(PackSigned(cmp, Short4(0x0000))); 548 break; 549 case ALPHA_GREATEREQUAL: // a >= b ~ (a > b) || (a == b) ~ !(b > a) // TODO: Approximate 550 equal = CmpEQ(alpha, *Pointer<Short4>(data + OFFSET(DrawData,factor.alphaReference4))); 551 cmp = CmpGT(alpha, *Pointer<Short4>(data + OFFSET(DrawData,factor.alphaReference4))); 552 cmp |= equal; 553 aMask = SignMask(PackSigned(cmp, Short4(0x0000))); 554 break; 555 case ALPHA_LESSEQUAL: // a <= b ~ !(a > b) 556 cmp = CmpGT(alpha, *Pointer<Short4>(data + OFFSET(DrawData,factor.alphaReference4))) ^ Short4(0xFFFFu); // FIXME 557 aMask = SignMask(PackSigned(cmp, Short4(0x0000))); 558 break; 559 case ALPHA_GREATER: // a > b 560 cmp = CmpGT(alpha, *Pointer<Short4>(data + OFFSET(DrawData,factor.alphaReference4))); 561 aMask = SignMask(PackSigned(cmp, Short4(0x0000))); 562 break; 563 default: 564 ASSERT(false); 565 } 566 } 567 alphaToCoverage(Int cMask[4],Float4 & alpha)568 void PixelRoutine::alphaToCoverage(Int cMask[4], Float4 &alpha) 569 { 570 Int4 coverage0 = CmpNLT(alpha, *Pointer<Float4>(data + OFFSET(DrawData,a2c0))); 571 Int4 coverage1 = CmpNLT(alpha, *Pointer<Float4>(data + OFFSET(DrawData,a2c1))); 572 Int4 coverage2 = CmpNLT(alpha, *Pointer<Float4>(data + OFFSET(DrawData,a2c2))); 573 Int4 coverage3 = CmpNLT(alpha, *Pointer<Float4>(data + OFFSET(DrawData,a2c3))); 574 575 Int aMask0 = SignMask(coverage0); 576 Int aMask1 = SignMask(coverage1); 577 Int aMask2 = SignMask(coverage2); 578 Int aMask3 = SignMask(coverage3); 579 580 cMask[0] &= aMask0; 581 cMask[1] &= aMask1; 582 cMask[2] &= aMask2; 583 cMask[3] &= aMask3; 584 } 585 fogBlend(Vector4f & c0,Float4 & fog)586 void PixelRoutine::fogBlend(Vector4f &c0, Float4 &fog) 587 { 588 if(!state.fogActive) 589 { 590 return; 591 } 592 593 if(state.pixelFogMode != FOG_NONE) 594 { 595 pixelFog(fog); 596 597 fog = Min(fog, Float4(1.0f)); 598 fog = Max(fog, Float4(0.0f)); 599 } 600 601 c0.x -= *Pointer<Float4>(data + OFFSET(DrawData,fog.colorF[0])); 602 c0.y -= *Pointer<Float4>(data + OFFSET(DrawData,fog.colorF[1])); 603 c0.z -= *Pointer<Float4>(data + OFFSET(DrawData,fog.colorF[2])); 604 605 c0.x *= fog; 606 c0.y *= fog; 607 c0.z *= fog; 608 609 c0.x += *Pointer<Float4>(data + OFFSET(DrawData,fog.colorF[0])); 610 c0.y += *Pointer<Float4>(data + OFFSET(DrawData,fog.colorF[1])); 611 c0.z += *Pointer<Float4>(data + OFFSET(DrawData,fog.colorF[2])); 612 } 613 pixelFog(Float4 & visibility)614 void PixelRoutine::pixelFog(Float4 &visibility) 615 { 616 Float4 &zw = visibility; 617 618 if(state.pixelFogMode != FOG_NONE) 619 { 620 if(state.wBasedFog) 621 { 622 zw = rhw; 623 } 624 else 625 { 626 if(complementaryDepthBuffer) 627 { 628 zw = Float4(1.0f) - z[0]; 629 } 630 else 631 { 632 zw = z[0]; 633 } 634 } 635 } 636 637 switch(state.pixelFogMode) 638 { 639 case FOG_NONE: 640 break; 641 case FOG_LINEAR: 642 zw *= *Pointer<Float4>(data + OFFSET(DrawData,fog.scale)); 643 zw += *Pointer<Float4>(data + OFFSET(DrawData,fog.offset)); 644 break; 645 case FOG_EXP: 646 zw *= *Pointer<Float4>(data + OFFSET(DrawData,fog.densityE)); 647 zw = exponential2(zw, true); 648 break; 649 case FOG_EXP2: 650 zw *= zw; 651 zw *= *Pointer<Float4>(data + OFFSET(DrawData,fog.density2E)); 652 zw = exponential2(zw, true); 653 break; 654 default: 655 ASSERT(false); 656 } 657 } 658 writeDepth(Pointer<Byte> & zBuffer,int q,Int & x,Float4 & z,Int & zMask)659 void PixelRoutine::writeDepth(Pointer<Byte> &zBuffer, int q, Int &x, Float4 &z, Int &zMask) 660 { 661 if(!state.depthWriteEnable) 662 { 663 return; 664 } 665 666 Float4 Z = z; 667 668 if(shader && shader->depthOverride()) 669 { 670 if(complementaryDepthBuffer) 671 { 672 Z = Float4(1.0f) - oDepth; 673 } 674 else 675 { 676 Z = oDepth; 677 } 678 } 679 680 Pointer<Byte> buffer; 681 Int pitch; 682 683 if(!state.quadLayoutDepthBuffer) 684 { 685 buffer = zBuffer + 4 * x; 686 pitch = *Pointer<Int>(data + OFFSET(DrawData,depthPitchB)); 687 } 688 else 689 { 690 buffer = zBuffer + 8 * x; 691 } 692 693 if(q > 0) 694 { 695 buffer += q * *Pointer<Int>(data + OFFSET(DrawData,depthSliceB)); 696 } 697 698 Float4 zValue; 699 700 if(state.depthCompareMode != DEPTH_NEVER || (state.depthCompareMode != DEPTH_ALWAYS && !state.depthWriteEnable)) 701 { 702 if(!state.quadLayoutDepthBuffer) 703 { 704 // FIXME: Properly optimizes? 705 zValue.xy = *Pointer<Float4>(buffer); 706 zValue.zw = *Pointer<Float4>(buffer + pitch - 8); 707 } 708 else 709 { 710 zValue = *Pointer<Float4>(buffer, 16); 711 } 712 } 713 714 Z = As<Float4>(As<Int4>(Z) & *Pointer<Int4>(constants + OFFSET(Constants,maskD4X) + zMask * 16, 16)); 715 zValue = As<Float4>(As<Int4>(zValue) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskD4X) + zMask * 16, 16)); 716 Z = As<Float4>(As<Int4>(Z) | As<Int4>(zValue)); 717 718 if(!state.quadLayoutDepthBuffer) 719 { 720 // FIXME: Properly optimizes? 721 *Pointer<Float2>(buffer) = Float2(Z.xy); 722 *Pointer<Float2>(buffer + pitch) = Float2(Z.zw); 723 } 724 else 725 { 726 *Pointer<Float4>(buffer, 16) = Z; 727 } 728 } 729 writeStencil(Pointer<Byte> & sBuffer,int q,Int & x,Int & sMask,Int & zMask,Int & cMask)730 void PixelRoutine::writeStencil(Pointer<Byte> &sBuffer, int q, Int &x, Int &sMask, Int &zMask, Int &cMask) 731 { 732 if(!state.stencilActive) 733 { 734 return; 735 } 736 737 if(state.stencilPassOperation == OPERATION_KEEP && state.stencilZFailOperation == OPERATION_KEEP && state.stencilFailOperation == OPERATION_KEEP) 738 { 739 if(!state.twoSidedStencil || (state.stencilPassOperationCCW == OPERATION_KEEP && state.stencilZFailOperationCCW == OPERATION_KEEP && state.stencilFailOperationCCW == OPERATION_KEEP)) 740 { 741 return; 742 } 743 } 744 745 if(state.stencilWriteMasked && (!state.twoSidedStencil || state.stencilWriteMaskedCCW)) 746 { 747 return; 748 } 749 750 Pointer<Byte> buffer = sBuffer + 2 * x; 751 752 if(q > 0) 753 { 754 buffer += q * *Pointer<Int>(data + OFFSET(DrawData,stencilSliceB)); 755 } 756 757 Byte8 bufferValue = *Pointer<Byte8>(buffer); 758 759 Byte8 newValue; 760 stencilOperation(newValue, bufferValue, state.stencilPassOperation, state.stencilZFailOperation, state.stencilFailOperation, false, zMask, sMask); 761 762 if(!state.noStencilWriteMask) 763 { 764 Byte8 maskedValue = bufferValue; 765 newValue &= *Pointer<Byte8>(data + OFFSET(DrawData,stencil[0].writeMaskQ)); 766 maskedValue &= *Pointer<Byte8>(data + OFFSET(DrawData,stencil[0].invWriteMaskQ)); 767 newValue |= maskedValue; 768 } 769 770 if(state.twoSidedStencil) 771 { 772 Byte8 newValueCCW; 773 774 stencilOperation(newValueCCW, bufferValue, state.stencilPassOperationCCW, state.stencilZFailOperationCCW, state.stencilFailOperationCCW, true, zMask, sMask); 775 776 if(!state.noStencilWriteMaskCCW) 777 { 778 Byte8 maskedValue = bufferValue; 779 newValueCCW &= *Pointer<Byte8>(data + OFFSET(DrawData,stencil[1].writeMaskQ)); 780 maskedValue &= *Pointer<Byte8>(data + OFFSET(DrawData,stencil[1].invWriteMaskQ)); 781 newValueCCW |= maskedValue; 782 } 783 784 newValue &= *Pointer<Byte8>(primitive + OFFSET(Primitive,clockwiseMask)); 785 newValueCCW &= *Pointer<Byte8>(primitive + OFFSET(Primitive,invClockwiseMask)); 786 newValue |= newValueCCW; 787 } 788 789 newValue &= *Pointer<Byte8>(constants + OFFSET(Constants,maskB4Q) + 8 * cMask); 790 bufferValue &= *Pointer<Byte8>(constants + OFFSET(Constants,invMaskB4Q) + 8 * cMask); 791 newValue |= bufferValue; 792 793 *Pointer<Byte4>(buffer) = Byte4(newValue); 794 } 795 stencilOperation(Byte8 & newValue,Byte8 & bufferValue,StencilOperation stencilPassOperation,StencilOperation stencilZFailOperation,StencilOperation stencilFailOperation,bool CCW,Int & zMask,Int & sMask)796 void PixelRoutine::stencilOperation(Byte8 &newValue, Byte8 &bufferValue, StencilOperation stencilPassOperation, StencilOperation stencilZFailOperation, StencilOperation stencilFailOperation, bool CCW, Int &zMask, Int &sMask) 797 { 798 Byte8 &pass = newValue; 799 Byte8 fail; 800 Byte8 zFail; 801 802 stencilOperation(pass, bufferValue, stencilPassOperation, CCW); 803 804 if(stencilZFailOperation != stencilPassOperation) 805 { 806 stencilOperation(zFail, bufferValue, stencilZFailOperation, CCW); 807 } 808 809 if(stencilFailOperation != stencilPassOperation || stencilFailOperation != stencilZFailOperation) 810 { 811 stencilOperation(fail, bufferValue, stencilFailOperation, CCW); 812 } 813 814 if(stencilFailOperation != stencilPassOperation || stencilFailOperation != stencilZFailOperation) 815 { 816 if(state.depthTestActive && stencilZFailOperation != stencilPassOperation) // zMask valid and values not the same 817 { 818 pass &= *Pointer<Byte8>(constants + OFFSET(Constants,maskB4Q) + 8 * zMask); 819 zFail &= *Pointer<Byte8>(constants + OFFSET(Constants,invMaskB4Q) + 8 * zMask); 820 pass |= zFail; 821 } 822 823 pass &= *Pointer<Byte8>(constants + OFFSET(Constants,maskB4Q) + 8 * sMask); 824 fail &= *Pointer<Byte8>(constants + OFFSET(Constants,invMaskB4Q) + 8 * sMask); 825 pass |= fail; 826 } 827 } 828 stencilOperation(Byte8 & output,Byte8 & bufferValue,StencilOperation operation,bool CCW)829 void PixelRoutine::stencilOperation(Byte8 &output, Byte8 &bufferValue, StencilOperation operation, bool CCW) 830 { 831 switch(operation) 832 { 833 case OPERATION_KEEP: 834 output = bufferValue; 835 break; 836 case OPERATION_ZERO: 837 output = Byte8(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); 838 break; 839 case OPERATION_REPLACE: 840 output = *Pointer<Byte8>(data + OFFSET(DrawData,stencil[CCW].referenceQ)); 841 break; 842 case OPERATION_INCRSAT: 843 output = AddSat(bufferValue, Byte8(1, 1, 1, 1, 1, 1, 1, 1)); 844 break; 845 case OPERATION_DECRSAT: 846 output = SubSat(bufferValue, Byte8(1, 1, 1, 1, 1, 1, 1, 1)); 847 break; 848 case OPERATION_INVERT: 849 output = bufferValue ^ Byte8(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF); 850 break; 851 case OPERATION_INCR: 852 output = bufferValue + Byte8(1, 1, 1, 1, 1, 1, 1, 1); 853 break; 854 case OPERATION_DECR: 855 output = bufferValue - Byte8(1, 1, 1, 1, 1, 1, 1, 1); 856 break; 857 default: 858 ASSERT(false); 859 } 860 } 861 blendFactor(Vector4s & blendFactor,const Vector4s & current,const Vector4s & pixel,BlendFactor blendFactorActive)862 void PixelRoutine::blendFactor(Vector4s &blendFactor, const Vector4s ¤t, const Vector4s &pixel, BlendFactor blendFactorActive) 863 { 864 switch(blendFactorActive) 865 { 866 case BLEND_ZERO: 867 // Optimized 868 break; 869 case BLEND_ONE: 870 // Optimized 871 break; 872 case BLEND_SOURCE: 873 blendFactor.x = current.x; 874 blendFactor.y = current.y; 875 blendFactor.z = current.z; 876 break; 877 case BLEND_INVSOURCE: 878 blendFactor.x = Short4(0xFFFFu) - current.x; 879 blendFactor.y = Short4(0xFFFFu) - current.y; 880 blendFactor.z = Short4(0xFFFFu) - current.z; 881 break; 882 case BLEND_DEST: 883 blendFactor.x = pixel.x; 884 blendFactor.y = pixel.y; 885 blendFactor.z = pixel.z; 886 break; 887 case BLEND_INVDEST: 888 blendFactor.x = Short4(0xFFFFu) - pixel.x; 889 blendFactor.y = Short4(0xFFFFu) - pixel.y; 890 blendFactor.z = Short4(0xFFFFu) - pixel.z; 891 break; 892 case BLEND_SOURCEALPHA: 893 blendFactor.x = current.w; 894 blendFactor.y = current.w; 895 blendFactor.z = current.w; 896 break; 897 case BLEND_INVSOURCEALPHA: 898 blendFactor.x = Short4(0xFFFFu) - current.w; 899 blendFactor.y = Short4(0xFFFFu) - current.w; 900 blendFactor.z = Short4(0xFFFFu) - current.w; 901 break; 902 case BLEND_DESTALPHA: 903 blendFactor.x = pixel.w; 904 blendFactor.y = pixel.w; 905 blendFactor.z = pixel.w; 906 break; 907 case BLEND_INVDESTALPHA: 908 blendFactor.x = Short4(0xFFFFu) - pixel.w; 909 blendFactor.y = Short4(0xFFFFu) - pixel.w; 910 blendFactor.z = Short4(0xFFFFu) - pixel.w; 911 break; 912 case BLEND_SRCALPHASAT: 913 blendFactor.x = Short4(0xFFFFu) - pixel.w; 914 blendFactor.x = Min(As<UShort4>(blendFactor.x), As<UShort4>(current.w)); 915 blendFactor.y = blendFactor.x; 916 blendFactor.z = blendFactor.x; 917 break; 918 case BLEND_CONSTANT: 919 blendFactor.x = *Pointer<Short4>(data + OFFSET(DrawData,factor.blendConstant4W[0])); 920 blendFactor.y = *Pointer<Short4>(data + OFFSET(DrawData,factor.blendConstant4W[1])); 921 blendFactor.z = *Pointer<Short4>(data + OFFSET(DrawData,factor.blendConstant4W[2])); 922 break; 923 case BLEND_INVCONSTANT: 924 blendFactor.x = *Pointer<Short4>(data + OFFSET(DrawData,factor.invBlendConstant4W[0])); 925 blendFactor.y = *Pointer<Short4>(data + OFFSET(DrawData,factor.invBlendConstant4W[1])); 926 blendFactor.z = *Pointer<Short4>(data + OFFSET(DrawData,factor.invBlendConstant4W[2])); 927 break; 928 case BLEND_CONSTANTALPHA: 929 blendFactor.x = *Pointer<Short4>(data + OFFSET(DrawData,factor.blendConstant4W[3])); 930 blendFactor.y = *Pointer<Short4>(data + OFFSET(DrawData,factor.blendConstant4W[3])); 931 blendFactor.z = *Pointer<Short4>(data + OFFSET(DrawData,factor.blendConstant4W[3])); 932 break; 933 case BLEND_INVCONSTANTALPHA: 934 blendFactor.x = *Pointer<Short4>(data + OFFSET(DrawData,factor.invBlendConstant4W[3])); 935 blendFactor.y = *Pointer<Short4>(data + OFFSET(DrawData,factor.invBlendConstant4W[3])); 936 blendFactor.z = *Pointer<Short4>(data + OFFSET(DrawData,factor.invBlendConstant4W[3])); 937 break; 938 default: 939 ASSERT(false); 940 } 941 } 942 blendFactorAlpha(Vector4s & blendFactor,const Vector4s & current,const Vector4s & pixel,BlendFactor blendFactorAlphaActive)943 void PixelRoutine::blendFactorAlpha(Vector4s &blendFactor, const Vector4s ¤t, const Vector4s &pixel, BlendFactor blendFactorAlphaActive) 944 { 945 switch(blendFactorAlphaActive) 946 { 947 case BLEND_ZERO: 948 // Optimized 949 break; 950 case BLEND_ONE: 951 // Optimized 952 break; 953 case BLEND_SOURCE: 954 blendFactor.w = current.w; 955 break; 956 case BLEND_INVSOURCE: 957 blendFactor.w = Short4(0xFFFFu) - current.w; 958 break; 959 case BLEND_DEST: 960 blendFactor.w = pixel.w; 961 break; 962 case BLEND_INVDEST: 963 blendFactor.w = Short4(0xFFFFu) - pixel.w; 964 break; 965 case BLEND_SOURCEALPHA: 966 blendFactor.w = current.w; 967 break; 968 case BLEND_INVSOURCEALPHA: 969 blendFactor.w = Short4(0xFFFFu) - current.w; 970 break; 971 case BLEND_DESTALPHA: 972 blendFactor.w = pixel.w; 973 break; 974 case BLEND_INVDESTALPHA: 975 blendFactor.w = Short4(0xFFFFu) - pixel.w; 976 break; 977 case BLEND_SRCALPHASAT: 978 blendFactor.w = Short4(0xFFFFu); 979 break; 980 case BLEND_CONSTANT: 981 case BLEND_CONSTANTALPHA: 982 blendFactor.w = *Pointer<Short4>(data + OFFSET(DrawData,factor.blendConstant4W[3])); 983 break; 984 case BLEND_INVCONSTANT: 985 case BLEND_INVCONSTANTALPHA: 986 blendFactor.w = *Pointer<Short4>(data + OFFSET(DrawData,factor.invBlendConstant4W[3])); 987 break; 988 default: 989 ASSERT(false); 990 } 991 } 992 isSRGB(int index) const993 bool PixelRoutine::isSRGB(int index) const 994 { 995 return Surface::isSRGBformat(state.targetFormat[index]); 996 } 997 readPixel(int index,Pointer<Byte> & cBuffer,Int & x,Vector4s & pixel)998 void PixelRoutine::readPixel(int index, Pointer<Byte> &cBuffer, Int &x, Vector4s &pixel) 999 { 1000 Short4 c01; 1001 Short4 c23; 1002 Pointer<Byte> buffer; 1003 Pointer<Byte> buffer2; 1004 1005 switch(state.targetFormat[index]) 1006 { 1007 case FORMAT_R5G6B5: 1008 buffer = cBuffer + 2 * x; 1009 buffer2 = buffer + *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); 1010 c01 = As<Short4>(Int2(*Pointer<Int>(buffer), *Pointer<Int>(buffer2))); 1011 1012 pixel.x = c01 & Short4(0xF800u); 1013 pixel.y = (c01 & Short4(0x07E0u)) << 5; 1014 pixel.z = (c01 & Short4(0x001Fu)) << 11; 1015 pixel.w = Short4(0xFFFFu); 1016 break; 1017 case FORMAT_A8R8G8B8: 1018 buffer = cBuffer + 4 * x; 1019 c01 = *Pointer<Short4>(buffer); 1020 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); 1021 c23 = *Pointer<Short4>(buffer); 1022 pixel.z = c01; 1023 pixel.y = c01; 1024 pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(c23)); 1025 pixel.y = UnpackHigh(As<Byte8>(pixel.y), As<Byte8>(c23)); 1026 pixel.x = pixel.z; 1027 pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(pixel.y)); 1028 pixel.x = UnpackHigh(As<Byte8>(pixel.x), As<Byte8>(pixel.y)); 1029 pixel.y = pixel.z; 1030 pixel.w = pixel.x; 1031 pixel.x = UnpackLow(As<Byte8>(pixel.x), As<Byte8>(pixel.x)); 1032 pixel.y = UnpackHigh(As<Byte8>(pixel.y), As<Byte8>(pixel.y)); 1033 pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(pixel.z)); 1034 pixel.w = UnpackHigh(As<Byte8>(pixel.w), As<Byte8>(pixel.w)); 1035 break; 1036 case FORMAT_A8B8G8R8: 1037 case FORMAT_SRGB8_A8: 1038 buffer = cBuffer + 4 * x; 1039 c01 = *Pointer<Short4>(buffer); 1040 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); 1041 c23 = *Pointer<Short4>(buffer); 1042 pixel.z = c01; 1043 pixel.y = c01; 1044 pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(c23)); 1045 pixel.y = UnpackHigh(As<Byte8>(pixel.y), As<Byte8>(c23)); 1046 pixel.x = pixel.z; 1047 pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(pixel.y)); 1048 pixel.x = UnpackHigh(As<Byte8>(pixel.x), As<Byte8>(pixel.y)); 1049 pixel.y = pixel.z; 1050 pixel.w = pixel.x; 1051 pixel.x = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(pixel.z)); 1052 pixel.y = UnpackHigh(As<Byte8>(pixel.y), As<Byte8>(pixel.y)); 1053 pixel.z = UnpackLow(As<Byte8>(pixel.w), As<Byte8>(pixel.w)); 1054 pixel.w = UnpackHigh(As<Byte8>(pixel.w), As<Byte8>(pixel.w)); 1055 break; 1056 case FORMAT_A8: 1057 buffer = cBuffer + 1 * x; 1058 pixel.w = Insert(pixel.w, *Pointer<Short>(buffer), 0); 1059 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); 1060 pixel.w = Insert(pixel.w, *Pointer<Short>(buffer), 1); 1061 pixel.w = UnpackLow(As<Byte8>(pixel.w), As<Byte8>(pixel.w)); 1062 pixel.x = Short4(0x0000); 1063 pixel.y = Short4(0x0000); 1064 pixel.z = Short4(0x0000); 1065 break; 1066 case FORMAT_R8: 1067 buffer = cBuffer + 1 * x; 1068 pixel.x = Insert(pixel.x, *Pointer<Short>(buffer), 0); 1069 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); 1070 pixel.x = Insert(pixel.x, *Pointer<Short>(buffer), 1); 1071 pixel.x = UnpackLow(As<Byte8>(pixel.x), As<Byte8>(pixel.x)); 1072 pixel.y = Short4(0x0000); 1073 pixel.z = Short4(0x0000); 1074 pixel.w = Short4(0xFFFFu); 1075 break; 1076 case FORMAT_X8R8G8B8: 1077 buffer = cBuffer + 4 * x; 1078 c01 = *Pointer<Short4>(buffer); 1079 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); 1080 c23 = *Pointer<Short4>(buffer); 1081 pixel.z = c01; 1082 pixel.y = c01; 1083 pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(c23)); 1084 pixel.y = UnpackHigh(As<Byte8>(pixel.y), As<Byte8>(c23)); 1085 pixel.x = pixel.z; 1086 pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(pixel.y)); 1087 pixel.x = UnpackHigh(As<Byte8>(pixel.x), As<Byte8>(pixel.y)); 1088 pixel.y = pixel.z; 1089 pixel.x = UnpackLow(As<Byte8>(pixel.x), As<Byte8>(pixel.x)); 1090 pixel.y = UnpackHigh(As<Byte8>(pixel.y), As<Byte8>(pixel.y)); 1091 pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(pixel.z)); 1092 pixel.w = Short4(0xFFFFu); 1093 break; 1094 case FORMAT_G8R8: 1095 buffer = cBuffer + 2 * x; 1096 c01 = As<Short4>(Insert(As<Int2>(c01), *Pointer<Int>(buffer), 0)); 1097 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); 1098 c01 = As<Short4>(Insert(As<Int2>(c01), *Pointer<Int>(buffer), 1)); 1099 pixel.x = (c01 & Short4(0x00FFu)) | (c01 << 8); 1100 pixel.y = (c01 & Short4(0xFF00u)) | As<Short4>(As<UShort4>(c01) >> 8); 1101 pixel.z = Short4(0x0000u); 1102 pixel.w = Short4(0xFFFFu); 1103 break; 1104 case FORMAT_X8B8G8R8: 1105 case FORMAT_SRGB8_X8: 1106 buffer = cBuffer + 4 * x; 1107 c01 = *Pointer<Short4>(buffer); 1108 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); 1109 c23 = *Pointer<Short4>(buffer); 1110 pixel.z = c01; 1111 pixel.y = c01; 1112 pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(c23)); 1113 pixel.y = UnpackHigh(As<Byte8>(pixel.y), As<Byte8>(c23)); 1114 pixel.x = pixel.z; 1115 pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(pixel.y)); 1116 pixel.x = UnpackHigh(As<Byte8>(pixel.x), As<Byte8>(pixel.y)); 1117 pixel.y = pixel.z; 1118 pixel.w = pixel.x; 1119 pixel.x = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(pixel.z)); 1120 pixel.y = UnpackHigh(As<Byte8>(pixel.y), As<Byte8>(pixel.y)); 1121 pixel.z = UnpackLow(As<Byte8>(pixel.w), As<Byte8>(pixel.w)); 1122 pixel.w = Short4(0xFFFFu); 1123 break; 1124 case FORMAT_A8G8R8B8Q: 1125 UNIMPLEMENTED(); 1126 // pixel.z = UnpackLow(As<Byte8>(pixel.z), *Pointer<Byte8>(cBuffer + 8 * x + 0)); 1127 // pixel.x = UnpackHigh(As<Byte8>(pixel.x), *Pointer<Byte8>(cBuffer + 8 * x + 0)); 1128 // pixel.y = UnpackLow(As<Byte8>(pixel.y), *Pointer<Byte8>(cBuffer + 8 * x + 8)); 1129 // pixel.w = UnpackHigh(As<Byte8>(pixel.w), *Pointer<Byte8>(cBuffer + 8 * x + 8)); 1130 break; 1131 case FORMAT_X8G8R8B8Q: 1132 UNIMPLEMENTED(); 1133 // pixel.z = UnpackLow(As<Byte8>(pixel.z), *Pointer<Byte8>(cBuffer + 8 * x + 0)); 1134 // pixel.x = UnpackHigh(As<Byte8>(pixel.x), *Pointer<Byte8>(cBuffer + 8 * x + 0)); 1135 // pixel.y = UnpackLow(As<Byte8>(pixel.y), *Pointer<Byte8>(cBuffer + 8 * x + 8)); 1136 // pixel.w = Short4(0xFFFFu); 1137 break; 1138 case FORMAT_A16B16G16R16: 1139 buffer = cBuffer; 1140 pixel.x = *Pointer<Short4>(buffer + 8 * x); 1141 pixel.y = *Pointer<Short4>(buffer + 8 * x + 8); 1142 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); 1143 pixel.z = *Pointer<Short4>(buffer + 8 * x); 1144 pixel.w = *Pointer<Short4>(buffer + 8 * x + 8); 1145 transpose4x4(pixel.x, pixel.y, pixel.z, pixel.w); 1146 break; 1147 case FORMAT_G16R16: 1148 buffer = cBuffer; 1149 pixel.x = *Pointer<Short4>(buffer + 4 * x); 1150 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); 1151 pixel.y = *Pointer<Short4>(buffer + 4 * x); 1152 pixel.z = pixel.x; 1153 pixel.x = As<Short4>(UnpackLow(pixel.x, pixel.y)); 1154 pixel.z = As<Short4>(UnpackHigh(pixel.z, pixel.y)); 1155 pixel.y = pixel.z; 1156 pixel.x = As<Short4>(UnpackLow(pixel.x, pixel.z)); 1157 pixel.y = As<Short4>(UnpackHigh(pixel.y, pixel.z)); 1158 pixel.z = Short4(0xFFFFu); 1159 pixel.w = Short4(0xFFFFu); 1160 break; 1161 default: 1162 ASSERT(false); 1163 } 1164 1165 if((postBlendSRGB && state.writeSRGB) || isSRGB(index)) 1166 { 1167 sRGBtoLinear16_12_16(pixel); 1168 } 1169 } 1170 alphaBlend(int index,Pointer<Byte> & cBuffer,Vector4s & current,Int & x)1171 void PixelRoutine::alphaBlend(int index, Pointer<Byte> &cBuffer, Vector4s ¤t, Int &x) 1172 { 1173 if(!state.alphaBlendActive) 1174 { 1175 return; 1176 } 1177 1178 Vector4s pixel; 1179 readPixel(index, cBuffer, x, pixel); 1180 1181 // Final Color = ObjectColor * SourceBlendFactor + PixelColor * DestinationBlendFactor 1182 Vector4s sourceFactor; 1183 Vector4s destFactor; 1184 1185 blendFactor(sourceFactor, current, pixel, state.sourceBlendFactor); 1186 blendFactor(destFactor, current, pixel, state.destBlendFactor); 1187 1188 if(state.sourceBlendFactor != BLEND_ONE && state.sourceBlendFactor != BLEND_ZERO) 1189 { 1190 current.x = MulHigh(As<UShort4>(current.x), As<UShort4>(sourceFactor.x)); 1191 current.y = MulHigh(As<UShort4>(current.y), As<UShort4>(sourceFactor.y)); 1192 current.z = MulHigh(As<UShort4>(current.z), As<UShort4>(sourceFactor.z)); 1193 } 1194 1195 if(state.destBlendFactor != BLEND_ONE && state.destBlendFactor != BLEND_ZERO) 1196 { 1197 pixel.x = MulHigh(As<UShort4>(pixel.x), As<UShort4>(destFactor.x)); 1198 pixel.y = MulHigh(As<UShort4>(pixel.y), As<UShort4>(destFactor.y)); 1199 pixel.z = MulHigh(As<UShort4>(pixel.z), As<UShort4>(destFactor.z)); 1200 } 1201 1202 switch(state.blendOperation) 1203 { 1204 case BLENDOP_ADD: 1205 current.x = AddSat(As<UShort4>(current.x), As<UShort4>(pixel.x)); 1206 current.y = AddSat(As<UShort4>(current.y), As<UShort4>(pixel.y)); 1207 current.z = AddSat(As<UShort4>(current.z), As<UShort4>(pixel.z)); 1208 break; 1209 case BLENDOP_SUB: 1210 current.x = SubSat(As<UShort4>(current.x), As<UShort4>(pixel.x)); 1211 current.y = SubSat(As<UShort4>(current.y), As<UShort4>(pixel.y)); 1212 current.z = SubSat(As<UShort4>(current.z), As<UShort4>(pixel.z)); 1213 break; 1214 case BLENDOP_INVSUB: 1215 current.x = SubSat(As<UShort4>(pixel.x), As<UShort4>(current.x)); 1216 current.y = SubSat(As<UShort4>(pixel.y), As<UShort4>(current.y)); 1217 current.z = SubSat(As<UShort4>(pixel.z), As<UShort4>(current.z)); 1218 break; 1219 case BLENDOP_MIN: 1220 current.x = Min(As<UShort4>(current.x), As<UShort4>(pixel.x)); 1221 current.y = Min(As<UShort4>(current.y), As<UShort4>(pixel.y)); 1222 current.z = Min(As<UShort4>(current.z), As<UShort4>(pixel.z)); 1223 break; 1224 case BLENDOP_MAX: 1225 current.x = Max(As<UShort4>(current.x), As<UShort4>(pixel.x)); 1226 current.y = Max(As<UShort4>(current.y), As<UShort4>(pixel.y)); 1227 current.z = Max(As<UShort4>(current.z), As<UShort4>(pixel.z)); 1228 break; 1229 case BLENDOP_SOURCE: 1230 // No operation 1231 break; 1232 case BLENDOP_DEST: 1233 current.x = pixel.x; 1234 current.y = pixel.y; 1235 current.z = pixel.z; 1236 break; 1237 case BLENDOP_NULL: 1238 current.x = Short4(0x0000); 1239 current.y = Short4(0x0000); 1240 current.z = Short4(0x0000); 1241 break; 1242 default: 1243 ASSERT(false); 1244 } 1245 1246 blendFactorAlpha(sourceFactor, current, pixel, state.sourceBlendFactorAlpha); 1247 blendFactorAlpha(destFactor, current, pixel, state.destBlendFactorAlpha); 1248 1249 if(state.sourceBlendFactorAlpha != BLEND_ONE && state.sourceBlendFactorAlpha != BLEND_ZERO) 1250 { 1251 current.w = MulHigh(As<UShort4>(current.w), As<UShort4>(sourceFactor.w)); 1252 } 1253 1254 if(state.destBlendFactorAlpha != BLEND_ONE && state.destBlendFactorAlpha != BLEND_ZERO) 1255 { 1256 pixel.w = MulHigh(As<UShort4>(pixel.w), As<UShort4>(destFactor.w)); 1257 } 1258 1259 switch(state.blendOperationAlpha) 1260 { 1261 case BLENDOP_ADD: 1262 current.w = AddSat(As<UShort4>(current.w), As<UShort4>(pixel.w)); 1263 break; 1264 case BLENDOP_SUB: 1265 current.w = SubSat(As<UShort4>(current.w), As<UShort4>(pixel.w)); 1266 break; 1267 case BLENDOP_INVSUB: 1268 current.w = SubSat(As<UShort4>(pixel.w), As<UShort4>(current.w)); 1269 break; 1270 case BLENDOP_MIN: 1271 current.w = Min(As<UShort4>(current.w), As<UShort4>(pixel.w)); 1272 break; 1273 case BLENDOP_MAX: 1274 current.w = Max(As<UShort4>(current.w), As<UShort4>(pixel.w)); 1275 break; 1276 case BLENDOP_SOURCE: 1277 // No operation 1278 break; 1279 case BLENDOP_DEST: 1280 current.w = pixel.w; 1281 break; 1282 case BLENDOP_NULL: 1283 current.w = Short4(0x0000); 1284 break; 1285 default: 1286 ASSERT(false); 1287 } 1288 } 1289 logicOperation(int index,Pointer<Byte> & cBuffer,Vector4s & current,Int & x)1290 void PixelRoutine::logicOperation(int index, Pointer<Byte> &cBuffer, Vector4s ¤t, Int &x) 1291 { 1292 if(state.logicalOperation == LOGICALOP_COPY) 1293 { 1294 return; 1295 } 1296 1297 Vector4s pixel; 1298 readPixel(index, cBuffer, x, pixel); 1299 1300 switch(state.logicalOperation) 1301 { 1302 case LOGICALOP_CLEAR: 1303 current.x = UShort4(0); 1304 current.y = UShort4(0); 1305 current.z = UShort4(0); 1306 break; 1307 case LOGICALOP_SET: 1308 current.x = UShort4(0xFFFFu); 1309 current.y = UShort4(0xFFFFu); 1310 current.z = UShort4(0xFFFFu); 1311 break; 1312 case LOGICALOP_COPY: 1313 ASSERT(false); // Optimized out 1314 break; 1315 case LOGICALOP_COPY_INVERTED: 1316 current.x = ~current.x; 1317 current.y = ~current.y; 1318 current.z = ~current.z; 1319 break; 1320 case LOGICALOP_NOOP: 1321 current.x = pixel.x; 1322 current.y = pixel.y; 1323 current.z = pixel.z; 1324 break; 1325 case LOGICALOP_INVERT: 1326 current.x = ~pixel.x; 1327 current.y = ~pixel.y; 1328 current.z = ~pixel.z; 1329 break; 1330 case LOGICALOP_AND: 1331 current.x = pixel.x & current.x; 1332 current.y = pixel.y & current.y; 1333 current.z = pixel.z & current.z; 1334 break; 1335 case LOGICALOP_NAND: 1336 current.x = ~(pixel.x & current.x); 1337 current.y = ~(pixel.y & current.y); 1338 current.z = ~(pixel.z & current.z); 1339 break; 1340 case LOGICALOP_OR: 1341 current.x = pixel.x | current.x; 1342 current.y = pixel.y | current.y; 1343 current.z = pixel.z | current.z; 1344 break; 1345 case LOGICALOP_NOR: 1346 current.x = ~(pixel.x | current.x); 1347 current.y = ~(pixel.y | current.y); 1348 current.z = ~(pixel.z | current.z); 1349 break; 1350 case LOGICALOP_XOR: 1351 current.x = pixel.x ^ current.x; 1352 current.y = pixel.y ^ current.y; 1353 current.z = pixel.z ^ current.z; 1354 break; 1355 case LOGICALOP_EQUIV: 1356 current.x = ~(pixel.x ^ current.x); 1357 current.y = ~(pixel.y ^ current.y); 1358 current.z = ~(pixel.z ^ current.z); 1359 break; 1360 case LOGICALOP_AND_REVERSE: 1361 current.x = ~pixel.x & current.x; 1362 current.y = ~pixel.y & current.y; 1363 current.z = ~pixel.z & current.z; 1364 break; 1365 case LOGICALOP_AND_INVERTED: 1366 current.x = pixel.x & ~current.x; 1367 current.y = pixel.y & ~current.y; 1368 current.z = pixel.z & ~current.z; 1369 break; 1370 case LOGICALOP_OR_REVERSE: 1371 current.x = ~pixel.x | current.x; 1372 current.y = ~pixel.y | current.y; 1373 current.z = ~pixel.z | current.z; 1374 break; 1375 case LOGICALOP_OR_INVERTED: 1376 current.x = pixel.x | ~current.x; 1377 current.y = pixel.y | ~current.y; 1378 current.z = pixel.z | ~current.z; 1379 break; 1380 default: 1381 ASSERT(false); 1382 } 1383 } 1384 writeColor(int index,Pointer<Byte> & cBuffer,Int & x,Vector4s & current,Int & sMask,Int & zMask,Int & cMask)1385 void PixelRoutine::writeColor(int index, Pointer<Byte> &cBuffer, Int &x, Vector4s ¤t, Int &sMask, Int &zMask, Int &cMask) 1386 { 1387 if((postBlendSRGB && state.writeSRGB) || isSRGB(index)) 1388 { 1389 linearToSRGB16_12_16(current); 1390 } 1391 1392 if(exactColorRounding) 1393 { 1394 switch(state.targetFormat[index]) 1395 { 1396 case FORMAT_R5G6B5: 1397 current.x = AddSat(As<UShort4>(current.x), UShort4(0x0400)); 1398 current.y = AddSat(As<UShort4>(current.y), UShort4(0x0200)); 1399 current.z = AddSat(As<UShort4>(current.z), UShort4(0x0400)); 1400 break; 1401 case FORMAT_X8G8R8B8Q: 1402 case FORMAT_A8G8R8B8Q: 1403 case FORMAT_X8R8G8B8: 1404 case FORMAT_X8B8G8R8: 1405 case FORMAT_A8R8G8B8: 1406 case FORMAT_A8B8G8R8: 1407 case FORMAT_SRGB8_X8: 1408 case FORMAT_SRGB8_A8: 1409 case FORMAT_G8R8: 1410 case FORMAT_R8: 1411 current.x = current.x - As<Short4>(As<UShort4>(current.x) >> 8) + Short4(0x0080); 1412 current.y = current.y - As<Short4>(As<UShort4>(current.y) >> 8) + Short4(0x0080); 1413 current.z = current.z - As<Short4>(As<UShort4>(current.z) >> 8) + Short4(0x0080); 1414 current.w = current.w - As<Short4>(As<UShort4>(current.w) >> 8) + Short4(0x0080); 1415 break; 1416 default: 1417 break; 1418 } 1419 } 1420 1421 int rgbaWriteMask = state.colorWriteActive(index); 1422 int bgraWriteMask = (rgbaWriteMask & 0x0000000A) | (rgbaWriteMask & 0x00000001) << 2 | (rgbaWriteMask & 0x00000004) >> 2; 1423 1424 switch(state.targetFormat[index]) 1425 { 1426 case FORMAT_R5G6B5: 1427 { 1428 current.x = current.x & Short4(0xF800u); 1429 current.y = As<UShort4>(current.y & Short4(0xFC00u)) >> 5; 1430 current.z = As<UShort4>(current.z) >> 11; 1431 1432 current.x = current.x | current.y | current.z; 1433 } 1434 break; 1435 case FORMAT_X8G8R8B8Q: 1436 UNIMPLEMENTED(); 1437 // current.x = As<Short4>(As<UShort4>(current.x) >> 8); 1438 // current.y = As<Short4>(As<UShort4>(current.y) >> 8); 1439 // current.z = As<Short4>(As<UShort4>(current.z) >> 8); 1440 1441 // current.z = As<Short4>(Pack(As<UShort4>(current.z), As<UShort4>(current.x))); 1442 // current.y = As<Short4>(Pack(As<UShort4>(current.y), As<UShort4>(current.y))); 1443 break; 1444 case FORMAT_A8G8R8B8Q: 1445 UNIMPLEMENTED(); 1446 // current.x = As<Short4>(As<UShort4>(current.x) >> 8); 1447 // current.y = As<Short4>(As<UShort4>(current.y) >> 8); 1448 // current.z = As<Short4>(As<UShort4>(current.z) >> 8); 1449 // current.w = As<Short4>(As<UShort4>(current.w) >> 8); 1450 1451 // current.z = As<Short4>(Pack(As<UShort4>(current.z), As<UShort4>(current.x))); 1452 // current.y = As<Short4>(Pack(As<UShort4>(current.y), As<UShort4>(current.w))); 1453 break; 1454 case FORMAT_X8R8G8B8: 1455 case FORMAT_A8R8G8B8: 1456 if(state.targetFormat[index] == FORMAT_X8R8G8B8 || rgbaWriteMask == 0x7) 1457 { 1458 current.x = As<Short4>(As<UShort4>(current.x) >> 8); 1459 current.y = As<Short4>(As<UShort4>(current.y) >> 8); 1460 current.z = As<Short4>(As<UShort4>(current.z) >> 8); 1461 1462 current.z = As<Short4>(PackUnsigned(current.z, current.x)); 1463 current.y = As<Short4>(PackUnsigned(current.y, current.y)); 1464 1465 current.x = current.z; 1466 current.z = UnpackLow(As<Byte8>(current.z), As<Byte8>(current.y)); 1467 current.x = UnpackHigh(As<Byte8>(current.x), As<Byte8>(current.y)); 1468 current.y = current.z; 1469 current.z = As<Short4>(UnpackLow(current.z, current.x)); 1470 current.y = As<Short4>(UnpackHigh(current.y, current.x)); 1471 } 1472 else 1473 { 1474 current.x = As<Short4>(As<UShort4>(current.x) >> 8); 1475 current.y = As<Short4>(As<UShort4>(current.y) >> 8); 1476 current.z = As<Short4>(As<UShort4>(current.z) >> 8); 1477 current.w = As<Short4>(As<UShort4>(current.w) >> 8); 1478 1479 current.z = As<Short4>(PackUnsigned(current.z, current.x)); 1480 current.y = As<Short4>(PackUnsigned(current.y, current.w)); 1481 1482 current.x = current.z; 1483 current.z = UnpackLow(As<Byte8>(current.z), As<Byte8>(current.y)); 1484 current.x = UnpackHigh(As<Byte8>(current.x), As<Byte8>(current.y)); 1485 current.y = current.z; 1486 current.z = As<Short4>(UnpackLow(current.z, current.x)); 1487 current.y = As<Short4>(UnpackHigh(current.y, current.x)); 1488 } 1489 break; 1490 case FORMAT_X8B8G8R8: 1491 case FORMAT_A8B8G8R8: 1492 case FORMAT_SRGB8_X8: 1493 case FORMAT_SRGB8_A8: 1494 if(state.targetFormat[index] == FORMAT_X8B8G8R8 || state.targetFormat[index] == FORMAT_SRGB8_X8 || rgbaWriteMask == 0x7) 1495 { 1496 current.x = As<Short4>(As<UShort4>(current.x) >> 8); 1497 current.y = As<Short4>(As<UShort4>(current.y) >> 8); 1498 current.z = As<Short4>(As<UShort4>(current.z) >> 8); 1499 1500 current.z = As<Short4>(PackUnsigned(current.x, current.z)); 1501 current.y = As<Short4>(PackUnsigned(current.y, current.y)); 1502 1503 current.x = current.z; 1504 current.z = UnpackLow(As<Byte8>(current.z), As<Byte8>(current.y)); 1505 current.x = UnpackHigh(As<Byte8>(current.x), As<Byte8>(current.y)); 1506 current.y = current.z; 1507 current.z = As<Short4>(UnpackLow(current.z, current.x)); 1508 current.y = As<Short4>(UnpackHigh(current.y, current.x)); 1509 } 1510 else 1511 { 1512 current.x = As<Short4>(As<UShort4>(current.x) >> 8); 1513 current.y = As<Short4>(As<UShort4>(current.y) >> 8); 1514 current.z = As<Short4>(As<UShort4>(current.z) >> 8); 1515 current.w = As<Short4>(As<UShort4>(current.w) >> 8); 1516 1517 current.z = As<Short4>(PackUnsigned(current.x, current.z)); 1518 current.y = As<Short4>(PackUnsigned(current.y, current.w)); 1519 1520 current.x = current.z; 1521 current.z = UnpackLow(As<Byte8>(current.z), As<Byte8>(current.y)); 1522 current.x = UnpackHigh(As<Byte8>(current.x), As<Byte8>(current.y)); 1523 current.y = current.z; 1524 current.z = As<Short4>(UnpackLow(current.z, current.x)); 1525 current.y = As<Short4>(UnpackHigh(current.y, current.x)); 1526 } 1527 break; 1528 case FORMAT_G8R8: 1529 current.x = As<Short4>(As<UShort4>(current.x) >> 8); 1530 current.y = As<Short4>(As<UShort4>(current.y) >> 8); 1531 current.x = As<Short4>(PackUnsigned(current.x, current.x)); 1532 current.y = As<Short4>(PackUnsigned(current.y, current.y)); 1533 current.x = UnpackLow(As<Byte8>(current.x), As<Byte8>(current.y)); 1534 break; 1535 case FORMAT_R8: 1536 current.x = As<Short4>(As<UShort4>(current.x) >> 8); 1537 current.x = As<Short4>(PackUnsigned(current.x, current.x)); 1538 break; 1539 case FORMAT_A8: 1540 current.w = As<Short4>(As<UShort4>(current.w) >> 8); 1541 current.w = As<Short4>(PackUnsigned(current.w, current.w)); 1542 break; 1543 case FORMAT_G16R16: 1544 current.z = current.x; 1545 current.x = As<Short4>(UnpackLow(current.x, current.y)); 1546 current.z = As<Short4>(UnpackHigh(current.z, current.y)); 1547 current.y = current.z; 1548 break; 1549 case FORMAT_A16B16G16R16: 1550 transpose4x4(current.x, current.y, current.z, current.w); 1551 break; 1552 default: 1553 ASSERT(false); 1554 } 1555 1556 Short4 c01 = current.z; 1557 Short4 c23 = current.y; 1558 1559 Int xMask; // Combination of all masks 1560 1561 if(state.depthTestActive) 1562 { 1563 xMask = zMask; 1564 } 1565 else 1566 { 1567 xMask = cMask; 1568 } 1569 1570 if(state.stencilActive) 1571 { 1572 xMask &= sMask; 1573 } 1574 1575 switch(state.targetFormat[index]) 1576 { 1577 case FORMAT_R5G6B5: 1578 { 1579 Pointer<Byte> buffer = cBuffer + 2 * x; 1580 Int value = *Pointer<Int>(buffer); 1581 1582 Int c01 = Extract(As<Int2>(current.x), 0); 1583 1584 if((bgraWriteMask & 0x00000007) != 0x00000007) 1585 { 1586 Int masked = value; 1587 c01 &= *Pointer<Int>(constants + OFFSET(Constants,mask565Q[bgraWriteMask & 0x7][0])); 1588 masked &= *Pointer<Int>(constants + OFFSET(Constants,mask565Q[~bgraWriteMask & 0x7][0])); 1589 c01 |= masked; 1590 } 1591 1592 c01 &= *Pointer<Int>(constants + OFFSET(Constants,maskW4Q[0][0]) + xMask * 8); 1593 value &= *Pointer<Int>(constants + OFFSET(Constants,invMaskW4Q[0][0]) + xMask * 8); 1594 c01 |= value; 1595 *Pointer<Int>(buffer) = c01; 1596 1597 buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index])); 1598 value = *Pointer<Int>(buffer); 1599 1600 Int c23 = Extract(As<Int2>(current.x), 1); 1601 1602 if((bgraWriteMask & 0x00000007) != 0x00000007) 1603 { 1604 Int masked = value; 1605 c23 &= *Pointer<Int>(constants + OFFSET(Constants,mask565Q[bgraWriteMask & 0x7][0])); 1606 masked &= *Pointer<Int>(constants + OFFSET(Constants,mask565Q[~bgraWriteMask & 0x7][0])); 1607 c23 |= masked; 1608 } 1609 1610 c23 &= *Pointer<Int>(constants + OFFSET(Constants,maskW4Q[0][2]) + xMask * 8); 1611 value &= *Pointer<Int>(constants + OFFSET(Constants,invMaskW4Q[0][2]) + xMask * 8); 1612 c23 |= value; 1613 *Pointer<Int>(buffer) = c23; 1614 } 1615 break; 1616 case FORMAT_A8G8R8B8Q: 1617 case FORMAT_X8G8R8B8Q: // FIXME: Don't touch alpha? 1618 UNIMPLEMENTED(); 1619 // value = *Pointer<Short4>(cBuffer + 8 * x + 0); 1620 1621 // if((state.targetFormat[index] == FORMAT_A8G8R8B8Q && bgraWriteMask != 0x0000000F) || 1622 // ((state.targetFormat[index] == FORMAT_X8G8R8B8Q && bgraWriteMask != 0x00000007) && 1623 // (state.targetFormat[index] == FORMAT_X8G8R8B8Q && bgraWriteMask != 0x0000000F))) // FIXME: Need for masking when XRGB && Fh? 1624 // { 1625 // Short4 masked = value; 1626 // c01 &= *Pointer<Short4>(constants + OFFSET(Constants,maskB4Q[bgraWriteMask][0])); 1627 // masked &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskB4Q[bgraWriteMask][0])); 1628 // c01 |= masked; 1629 // } 1630 1631 // c01 &= *Pointer<Short4>(constants + OFFSET(Constants,maskD01Q) + xMask * 8); 1632 // value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskD01Q) + xMask * 8); 1633 // c01 |= value; 1634 // *Pointer<Short4>(cBuffer + 8 * x + 0) = c01; 1635 1636 // value = *Pointer<Short4>(cBuffer + 8 * x + 8); 1637 1638 // if((state.targetFormat[index] == FORMAT_A8G8R8B8Q && bgraWriteMask != 0x0000000F) || 1639 // ((state.targetFormat[index] == FORMAT_X8G8R8B8Q && bgraWriteMask != 0x00000007) && 1640 // (state.targetFormat[index] == FORMAT_X8G8R8B8Q && bgraWriteMask != 0x0000000F))) // FIXME: Need for masking when XRGB && Fh? 1641 // { 1642 // Short4 masked = value; 1643 // c23 &= *Pointer<Short4>(constants + OFFSET(Constants,maskB4Q[bgraWriteMask][0])); 1644 // masked &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskB4Q[bgraWriteMask][0])); 1645 // c23 |= masked; 1646 // } 1647 1648 // c23 &= *Pointer<Short4>(constants + OFFSET(Constants,maskD23Q) + xMask * 8); 1649 // value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskD23Q) + xMask * 8); 1650 // c23 |= value; 1651 // *Pointer<Short4>(cBuffer + 8 * x + 8) = c23; 1652 break; 1653 case FORMAT_A8R8G8B8: 1654 case FORMAT_X8R8G8B8: // FIXME: Don't touch alpha? 1655 { 1656 Pointer<Byte> buffer = cBuffer + x * 4; 1657 Short4 value = *Pointer<Short4>(buffer); 1658 1659 if((state.targetFormat[index] == FORMAT_A8R8G8B8 && bgraWriteMask != 0x0000000F) || 1660 ((state.targetFormat[index] == FORMAT_X8R8G8B8 && bgraWriteMask != 0x00000007) && 1661 (state.targetFormat[index] == FORMAT_X8R8G8B8 && bgraWriteMask != 0x0000000F))) // FIXME: Need for masking when XRGB && Fh? 1662 { 1663 Short4 masked = value; 1664 c01 &= *Pointer<Short4>(constants + OFFSET(Constants,maskB4Q[bgraWriteMask][0])); 1665 masked &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskB4Q[bgraWriteMask][0])); 1666 c01 |= masked; 1667 } 1668 1669 c01 &= *Pointer<Short4>(constants + OFFSET(Constants,maskD01Q) + xMask * 8); 1670 value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskD01Q) + xMask * 8); 1671 c01 |= value; 1672 *Pointer<Short4>(buffer) = c01; 1673 1674 buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index])); 1675 value = *Pointer<Short4>(buffer); 1676 1677 if((state.targetFormat[index] == FORMAT_A8R8G8B8 && bgraWriteMask != 0x0000000F) || 1678 ((state.targetFormat[index] == FORMAT_X8R8G8B8 && bgraWriteMask != 0x00000007) && 1679 (state.targetFormat[index] == FORMAT_X8R8G8B8 && bgraWriteMask != 0x0000000F))) // FIXME: Need for masking when XRGB && Fh? 1680 { 1681 Short4 masked = value; 1682 c23 &= *Pointer<Short4>(constants + OFFSET(Constants,maskB4Q[bgraWriteMask][0])); 1683 masked &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskB4Q[bgraWriteMask][0])); 1684 c23 |= masked; 1685 } 1686 1687 c23 &= *Pointer<Short4>(constants + OFFSET(Constants,maskD23Q) + xMask * 8); 1688 value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskD23Q) + xMask * 8); 1689 c23 |= value; 1690 *Pointer<Short4>(buffer) = c23; 1691 } 1692 break; 1693 case FORMAT_A8B8G8R8: 1694 case FORMAT_X8B8G8R8: // FIXME: Don't touch alpha? 1695 case FORMAT_SRGB8_X8: 1696 case FORMAT_SRGB8_A8: 1697 { 1698 Pointer<Byte> buffer = cBuffer + x * 4; 1699 Short4 value = *Pointer<Short4>(buffer); 1700 1701 bool masked = (((state.targetFormat[index] == FORMAT_A8B8G8R8 || state.targetFormat[index] == FORMAT_SRGB8_A8) && rgbaWriteMask != 0x0000000F) || 1702 (((state.targetFormat[index] == FORMAT_X8B8G8R8 || state.targetFormat[index] == FORMAT_SRGB8_X8) && rgbaWriteMask != 0x00000007) && 1703 ((state.targetFormat[index] == FORMAT_X8B8G8R8 || state.targetFormat[index] == FORMAT_SRGB8_X8) && rgbaWriteMask != 0x0000000F))); // FIXME: Need for masking when XBGR && Fh? 1704 1705 if(masked) 1706 { 1707 Short4 masked = value; 1708 c01 &= *Pointer<Short4>(constants + OFFSET(Constants,maskB4Q[rgbaWriteMask][0])); 1709 masked &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskB4Q[rgbaWriteMask][0])); 1710 c01 |= masked; 1711 } 1712 1713 c01 &= *Pointer<Short4>(constants + OFFSET(Constants,maskD01Q) + xMask * 8); 1714 value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskD01Q) + xMask * 8); 1715 c01 |= value; 1716 *Pointer<Short4>(buffer) = c01; 1717 1718 buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index])); 1719 value = *Pointer<Short4>(buffer); 1720 1721 if(masked) 1722 { 1723 Short4 masked = value; 1724 c23 &= *Pointer<Short4>(constants + OFFSET(Constants,maskB4Q[rgbaWriteMask][0])); 1725 masked &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskB4Q[rgbaWriteMask][0])); 1726 c23 |= masked; 1727 } 1728 1729 c23 &= *Pointer<Short4>(constants + OFFSET(Constants,maskD23Q) + xMask * 8); 1730 value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskD23Q) + xMask * 8); 1731 c23 |= value; 1732 *Pointer<Short4>(buffer) = c23; 1733 } 1734 break; 1735 case FORMAT_G8R8: 1736 if((rgbaWriteMask & 0x00000003) != 0x0) 1737 { 1738 Pointer<Byte> buffer = cBuffer + 2 * x; 1739 Int2 value; 1740 value = Insert(value, *Pointer<Int>(buffer), 0); 1741 Int pitch = *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); 1742 value = Insert(value, *Pointer<Int>(buffer + pitch), 1); 1743 1744 Int2 packedCol = As<Int2>(current.x); 1745 1746 UInt2 mergedMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskW4Q) + xMask * 8); 1747 if((rgbaWriteMask & 0x3) != 0x3) 1748 { 1749 Int tmpMask = *Pointer<Int>(constants + OFFSET(Constants, maskB4Q[5 * (rgbaWriteMask & 0x3)][0])); 1750 UInt2 rgbaMask = As<UInt2>(Int2(tmpMask, tmpMask)); 1751 mergedMask &= rgbaMask; 1752 } 1753 1754 packedCol = As<Int2>((As<UInt2>(packedCol) & mergedMask) | (As<UInt2>(value) & ~mergedMask)); 1755 1756 *Pointer<UInt>(buffer) = As<UInt>(Extract(packedCol, 0)); 1757 *Pointer<UInt>(buffer + pitch) = As<UInt>(Extract(packedCol, 1)); 1758 } 1759 break; 1760 case FORMAT_R8: 1761 if(rgbaWriteMask & 0x00000001) 1762 { 1763 Pointer<Byte> buffer = cBuffer + 1 * x; 1764 Short4 value; 1765 value = Insert(value, *Pointer<Short>(buffer), 0); 1766 Int pitch = *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); 1767 value = Insert(value, *Pointer<Short>(buffer + pitch), 1); 1768 1769 current.x &= *Pointer<Short4>(constants + OFFSET(Constants, maskB4Q) + 8 * xMask); 1770 value &= *Pointer<Short4>(constants + OFFSET(Constants, invMaskB4Q) + 8 * xMask); 1771 current.x |= value; 1772 1773 *Pointer<Short>(buffer) = Extract(current.x, 0); 1774 *Pointer<Short>(buffer + pitch) = Extract(current.x, 1); 1775 } 1776 break; 1777 case FORMAT_A8: 1778 if(rgbaWriteMask & 0x00000008) 1779 { 1780 Pointer<Byte> buffer = cBuffer + 1 * x; 1781 Short4 value; 1782 value = Insert(value, *Pointer<Short>(buffer), 0); 1783 Int pitch = *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index])); 1784 value = Insert(value, *Pointer<Short>(buffer + pitch), 1); 1785 1786 current.w &= *Pointer<Short4>(constants + OFFSET(Constants,maskB4Q) + 8 * xMask); 1787 value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskB4Q) + 8 * xMask); 1788 current.w |= value; 1789 1790 *Pointer<Short>(buffer) = Extract(current.w, 0); 1791 *Pointer<Short>(buffer + pitch) = Extract(current.w, 1); 1792 } 1793 break; 1794 case FORMAT_G16R16: 1795 { 1796 Pointer<Byte> buffer = cBuffer + 4 * x; 1797 1798 Short4 value = *Pointer<Short4>(buffer); 1799 1800 if((rgbaWriteMask & 0x00000003) != 0x00000003) 1801 { 1802 Short4 masked = value; 1803 current.x &= *Pointer<Short4>(constants + OFFSET(Constants,maskW01Q[rgbaWriteMask & 0x3][0])); 1804 masked &= *Pointer<Short4>(constants + OFFSET(Constants,maskW01Q[~rgbaWriteMask & 0x3][0])); 1805 current.x |= masked; 1806 } 1807 1808 current.x &= *Pointer<Short4>(constants + OFFSET(Constants,maskD01Q) + xMask * 8); 1809 value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskD01Q) + xMask * 8); 1810 current.x |= value; 1811 *Pointer<Short4>(buffer) = current.x; 1812 1813 buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index])); 1814 1815 value = *Pointer<Short4>(buffer); 1816 1817 if((rgbaWriteMask & 0x00000003) != 0x00000003) 1818 { 1819 Short4 masked = value; 1820 current.y &= *Pointer<Short4>(constants + OFFSET(Constants,maskW01Q[rgbaWriteMask & 0x3][0])); 1821 masked &= *Pointer<Short4>(constants + OFFSET(Constants,maskW01Q[~rgbaWriteMask & 0x3][0])); 1822 current.y |= masked; 1823 } 1824 1825 current.y &= *Pointer<Short4>(constants + OFFSET(Constants,maskD23Q) + xMask * 8); 1826 value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskD23Q) + xMask * 8); 1827 current.y |= value; 1828 *Pointer<Short4>(buffer) = current.y; 1829 } 1830 break; 1831 case FORMAT_A16B16G16R16: 1832 { 1833 Pointer<Byte> buffer = cBuffer + 8 * x; 1834 1835 { 1836 Short4 value = *Pointer<Short4>(buffer); 1837 1838 if(rgbaWriteMask != 0x0000000F) 1839 { 1840 Short4 masked = value; 1841 current.x &= *Pointer<Short4>(constants + OFFSET(Constants,maskW4Q[rgbaWriteMask][0])); 1842 masked &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskW4Q[rgbaWriteMask][0])); 1843 current.x |= masked; 1844 } 1845 1846 current.x &= *Pointer<Short4>(constants + OFFSET(Constants,maskQ0Q) + xMask * 8); 1847 value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskQ0Q) + xMask * 8); 1848 current.x |= value; 1849 *Pointer<Short4>(buffer) = current.x; 1850 } 1851 1852 { 1853 Short4 value = *Pointer<Short4>(buffer + 8); 1854 1855 if(rgbaWriteMask != 0x0000000F) 1856 { 1857 Short4 masked = value; 1858 current.y &= *Pointer<Short4>(constants + OFFSET(Constants,maskW4Q[rgbaWriteMask][0])); 1859 masked &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskW4Q[rgbaWriteMask][0])); 1860 current.y |= masked; 1861 } 1862 1863 current.y &= *Pointer<Short4>(constants + OFFSET(Constants,maskQ1Q) + xMask * 8); 1864 value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskQ1Q) + xMask * 8); 1865 current.y |= value; 1866 *Pointer<Short4>(buffer + 8) = current.y; 1867 } 1868 1869 buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index])); 1870 1871 { 1872 Short4 value = *Pointer<Short4>(buffer); 1873 1874 if(rgbaWriteMask != 0x0000000F) 1875 { 1876 Short4 masked = value; 1877 current.z &= *Pointer<Short4>(constants + OFFSET(Constants,maskW4Q[rgbaWriteMask][0])); 1878 masked &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskW4Q[rgbaWriteMask][0])); 1879 current.z |= masked; 1880 } 1881 1882 current.z &= *Pointer<Short4>(constants + OFFSET(Constants,maskQ2Q) + xMask * 8); 1883 value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskQ2Q) + xMask * 8); 1884 current.z |= value; 1885 *Pointer<Short4>(buffer) = current.z; 1886 } 1887 1888 { 1889 Short4 value = *Pointer<Short4>(buffer + 8); 1890 1891 if(rgbaWriteMask != 0x0000000F) 1892 { 1893 Short4 masked = value; 1894 current.w &= *Pointer<Short4>(constants + OFFSET(Constants,maskW4Q[rgbaWriteMask][0])); 1895 masked &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskW4Q[rgbaWriteMask][0])); 1896 current.w |= masked; 1897 } 1898 1899 current.w &= *Pointer<Short4>(constants + OFFSET(Constants,maskQ3Q) + xMask * 8); 1900 value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskQ3Q) + xMask * 8); 1901 current.w |= value; 1902 *Pointer<Short4>(buffer + 8) = current.w; 1903 } 1904 } 1905 break; 1906 default: 1907 ASSERT(false); 1908 } 1909 } 1910 blendFactor(Vector4f & blendFactor,const Vector4f & oC,const Vector4f & pixel,BlendFactor blendFactorActive)1911 void PixelRoutine::blendFactor(Vector4f &blendFactor, const Vector4f &oC, const Vector4f &pixel, BlendFactor blendFactorActive) 1912 { 1913 switch(blendFactorActive) 1914 { 1915 case BLEND_ZERO: 1916 // Optimized 1917 break; 1918 case BLEND_ONE: 1919 // Optimized 1920 break; 1921 case BLEND_SOURCE: 1922 blendFactor.x = oC.x; 1923 blendFactor.y = oC.y; 1924 blendFactor.z = oC.z; 1925 break; 1926 case BLEND_INVSOURCE: 1927 blendFactor.x = Float4(1.0f) - oC.x; 1928 blendFactor.y = Float4(1.0f) - oC.y; 1929 blendFactor.z = Float4(1.0f) - oC.z; 1930 break; 1931 case BLEND_DEST: 1932 blendFactor.x = pixel.x; 1933 blendFactor.y = pixel.y; 1934 blendFactor.z = pixel.z; 1935 break; 1936 case BLEND_INVDEST: 1937 blendFactor.x = Float4(1.0f) - pixel.x; 1938 blendFactor.y = Float4(1.0f) - pixel.y; 1939 blendFactor.z = Float4(1.0f) - pixel.z; 1940 break; 1941 case BLEND_SOURCEALPHA: 1942 blendFactor.x = oC.w; 1943 blendFactor.y = oC.w; 1944 blendFactor.z = oC.w; 1945 break; 1946 case BLEND_INVSOURCEALPHA: 1947 blendFactor.x = Float4(1.0f) - oC.w; 1948 blendFactor.y = Float4(1.0f) - oC.w; 1949 blendFactor.z = Float4(1.0f) - oC.w; 1950 break; 1951 case BLEND_DESTALPHA: 1952 blendFactor.x = pixel.w; 1953 blendFactor.y = pixel.w; 1954 blendFactor.z = pixel.w; 1955 break; 1956 case BLEND_INVDESTALPHA: 1957 blendFactor.x = Float4(1.0f) - pixel.w; 1958 blendFactor.y = Float4(1.0f) - pixel.w; 1959 blendFactor.z = Float4(1.0f) - pixel.w; 1960 break; 1961 case BLEND_SRCALPHASAT: 1962 blendFactor.x = Float4(1.0f) - pixel.w; 1963 blendFactor.x = Min(blendFactor.x, oC.w); 1964 blendFactor.y = blendFactor.x; 1965 blendFactor.z = blendFactor.x; 1966 break; 1967 case BLEND_CONSTANT: 1968 blendFactor.x = *Pointer<Float4>(data + OFFSET(DrawData,factor.blendConstant4F[0])); 1969 blendFactor.y = *Pointer<Float4>(data + OFFSET(DrawData,factor.blendConstant4F[1])); 1970 blendFactor.z = *Pointer<Float4>(data + OFFSET(DrawData,factor.blendConstant4F[2])); 1971 break; 1972 case BLEND_INVCONSTANT: 1973 blendFactor.x = *Pointer<Float4>(data + OFFSET(DrawData,factor.invBlendConstant4F[0])); 1974 blendFactor.y = *Pointer<Float4>(data + OFFSET(DrawData,factor.invBlendConstant4F[1])); 1975 blendFactor.z = *Pointer<Float4>(data + OFFSET(DrawData,factor.invBlendConstant4F[2])); 1976 break; 1977 default: 1978 ASSERT(false); 1979 } 1980 } 1981 blendFactorAlpha(Vector4f & blendFactor,const Vector4f & oC,const Vector4f & pixel,BlendFactor blendFactorAlphaActive)1982 void PixelRoutine::blendFactorAlpha(Vector4f &blendFactor, const Vector4f &oC, const Vector4f &pixel, BlendFactor blendFactorAlphaActive) 1983 { 1984 switch(blendFactorAlphaActive) 1985 { 1986 case BLEND_ZERO: 1987 // Optimized 1988 break; 1989 case BLEND_ONE: 1990 // Optimized 1991 break; 1992 case BLEND_SOURCE: 1993 blendFactor.w = oC.w; 1994 break; 1995 case BLEND_INVSOURCE: 1996 blendFactor.w = Float4(1.0f) - oC.w; 1997 break; 1998 case BLEND_DEST: 1999 blendFactor.w = pixel.w; 2000 break; 2001 case BLEND_INVDEST: 2002 blendFactor.w = Float4(1.0f) - pixel.w; 2003 break; 2004 case BLEND_SOURCEALPHA: 2005 blendFactor.w = oC.w; 2006 break; 2007 case BLEND_INVSOURCEALPHA: 2008 blendFactor.w = Float4(1.0f) - oC.w; 2009 break; 2010 case BLEND_DESTALPHA: 2011 blendFactor.w = pixel.w; 2012 break; 2013 case BLEND_INVDESTALPHA: 2014 blendFactor.w = Float4(1.0f) - pixel.w; 2015 break; 2016 case BLEND_SRCALPHASAT: 2017 blendFactor.w = Float4(1.0f); 2018 break; 2019 case BLEND_CONSTANT: 2020 blendFactor.w = *Pointer<Float4>(data + OFFSET(DrawData,factor.blendConstant4F[3])); 2021 break; 2022 case BLEND_INVCONSTANT: 2023 blendFactor.w = *Pointer<Float4>(data + OFFSET(DrawData,factor.invBlendConstant4F[3])); 2024 break; 2025 default: 2026 ASSERT(false); 2027 } 2028 } 2029 alphaBlend(int index,Pointer<Byte> & cBuffer,Vector4f & oC,Int & x)2030 void PixelRoutine::alphaBlend(int index, Pointer<Byte> &cBuffer, Vector4f &oC, Int &x) 2031 { 2032 if(!state.alphaBlendActive) 2033 { 2034 return; 2035 } 2036 2037 Pointer<Byte> buffer; 2038 Vector4f pixel; 2039 2040 Vector4s color; 2041 Short4 c01; 2042 Short4 c23; 2043 2044 Float4 one; 2045 if(Surface::isFloatFormat(state.targetFormat[index])) 2046 { 2047 one = Float4(1.0f); 2048 } 2049 else if(Surface::isNonNormalizedInteger(state.targetFormat[index])) 2050 { 2051 one = As<Float4>(Surface::isUnsignedComponent(state.targetFormat[index], 0) ? Int4(0xFFFFFFFF) : Int4(0x7FFFFFFF)); 2052 } 2053 2054 switch(state.targetFormat[index]) 2055 { 2056 case FORMAT_R32I: 2057 case FORMAT_R32UI: 2058 case FORMAT_R32F: 2059 buffer = cBuffer; 2060 // FIXME: movlps 2061 pixel.x.x = *Pointer<Float>(buffer + 4 * x + 0); 2062 pixel.x.y = *Pointer<Float>(buffer + 4 * x + 4); 2063 buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index])); 2064 // FIXME: movhps 2065 pixel.x.z = *Pointer<Float>(buffer + 4 * x + 0); 2066 pixel.x.w = *Pointer<Float>(buffer + 4 * x + 4); 2067 pixel.y = pixel.z = pixel.w = one; 2068 break; 2069 case FORMAT_G32R32I: 2070 case FORMAT_G32R32UI: 2071 case FORMAT_G32R32F: 2072 buffer = cBuffer; 2073 pixel.x = *Pointer<Float4>(buffer + 8 * x, 16); 2074 buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index])); 2075 pixel.y = *Pointer<Float4>(buffer + 8 * x, 16); 2076 pixel.z = pixel.x; 2077 pixel.x = ShuffleLowHigh(pixel.x, pixel.y, 0x88); 2078 pixel.z = ShuffleLowHigh(pixel.z, pixel.y, 0xDD); 2079 pixel.y = pixel.z; 2080 pixel.z = pixel.w = one; 2081 break; 2082 case FORMAT_X32B32G32R32F: 2083 case FORMAT_A32B32G32R32F: 2084 case FORMAT_X32B32G32R32F_UNSIGNED: 2085 case FORMAT_A32B32G32R32I: 2086 case FORMAT_A32B32G32R32UI: 2087 buffer = cBuffer; 2088 pixel.x = *Pointer<Float4>(buffer + 16 * x, 16); 2089 pixel.y = *Pointer<Float4>(buffer + 16 * x + 16, 16); 2090 buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index])); 2091 pixel.z = *Pointer<Float4>(buffer + 16 * x, 16); 2092 pixel.w = *Pointer<Float4>(buffer + 16 * x + 16, 16); 2093 transpose4x4(pixel.x, pixel.y, pixel.z, pixel.w); 2094 if(state.targetFormat[index] == FORMAT_X32B32G32R32F || 2095 state.targetFormat[index] == FORMAT_X32B32G32R32F_UNSIGNED) 2096 { 2097 pixel.w = Float4(1.0f); 2098 } 2099 break; 2100 default: 2101 ASSERT(false); 2102 } 2103 2104 if((postBlendSRGB && state.writeSRGB) || isSRGB(index)) 2105 { 2106 sRGBtoLinear(pixel.x); 2107 sRGBtoLinear(pixel.y); 2108 sRGBtoLinear(pixel.z); 2109 } 2110 2111 // Final Color = ObjectColor * SourceBlendFactor + PixelColor * DestinationBlendFactor 2112 Vector4f sourceFactor; 2113 Vector4f destFactor; 2114 2115 blendFactor(sourceFactor, oC, pixel, state.sourceBlendFactor); 2116 blendFactor(destFactor, oC, pixel, state.destBlendFactor); 2117 2118 if(state.sourceBlendFactor != BLEND_ONE && state.sourceBlendFactor != BLEND_ZERO) 2119 { 2120 oC.x *= sourceFactor.x; 2121 oC.y *= sourceFactor.y; 2122 oC.z *= sourceFactor.z; 2123 } 2124 2125 if(state.destBlendFactor != BLEND_ONE && state.destBlendFactor != BLEND_ZERO) 2126 { 2127 pixel.x *= destFactor.x; 2128 pixel.y *= destFactor.y; 2129 pixel.z *= destFactor.z; 2130 } 2131 2132 switch(state.blendOperation) 2133 { 2134 case BLENDOP_ADD: 2135 oC.x += pixel.x; 2136 oC.y += pixel.y; 2137 oC.z += pixel.z; 2138 break; 2139 case BLENDOP_SUB: 2140 oC.x -= pixel.x; 2141 oC.y -= pixel.y; 2142 oC.z -= pixel.z; 2143 break; 2144 case BLENDOP_INVSUB: 2145 oC.x = pixel.x - oC.x; 2146 oC.y = pixel.y - oC.y; 2147 oC.z = pixel.z - oC.z; 2148 break; 2149 case BLENDOP_MIN: 2150 oC.x = Min(oC.x, pixel.x); 2151 oC.y = Min(oC.y, pixel.y); 2152 oC.z = Min(oC.z, pixel.z); 2153 break; 2154 case BLENDOP_MAX: 2155 oC.x = Max(oC.x, pixel.x); 2156 oC.y = Max(oC.y, pixel.y); 2157 oC.z = Max(oC.z, pixel.z); 2158 break; 2159 case BLENDOP_SOURCE: 2160 // No operation 2161 break; 2162 case BLENDOP_DEST: 2163 oC.x = pixel.x; 2164 oC.y = pixel.y; 2165 oC.z = pixel.z; 2166 break; 2167 case BLENDOP_NULL: 2168 oC.x = Float4(0.0f); 2169 oC.y = Float4(0.0f); 2170 oC.z = Float4(0.0f); 2171 break; 2172 default: 2173 ASSERT(false); 2174 } 2175 2176 blendFactorAlpha(sourceFactor, oC, pixel, state.sourceBlendFactorAlpha); 2177 blendFactorAlpha(destFactor, oC, pixel, state.destBlendFactorAlpha); 2178 2179 if(state.sourceBlendFactorAlpha != BLEND_ONE && state.sourceBlendFactorAlpha != BLEND_ZERO) 2180 { 2181 oC.w *= sourceFactor.w; 2182 } 2183 2184 if(state.destBlendFactorAlpha != BLEND_ONE && state.destBlendFactorAlpha != BLEND_ZERO) 2185 { 2186 pixel.w *= destFactor.w; 2187 } 2188 2189 switch(state.blendOperationAlpha) 2190 { 2191 case BLENDOP_ADD: 2192 oC.w += pixel.w; 2193 break; 2194 case BLENDOP_SUB: 2195 oC.w -= pixel.w; 2196 break; 2197 case BLENDOP_INVSUB: 2198 pixel.w -= oC.w; 2199 oC.w = pixel.w; 2200 break; 2201 case BLENDOP_MIN: 2202 oC.w = Min(oC.w, pixel.w); 2203 break; 2204 case BLENDOP_MAX: 2205 oC.w = Max(oC.w, pixel.w); 2206 break; 2207 case BLENDOP_SOURCE: 2208 // No operation 2209 break; 2210 case BLENDOP_DEST: 2211 oC.w = pixel.w; 2212 break; 2213 case BLENDOP_NULL: 2214 oC.w = Float4(0.0f); 2215 break; 2216 default: 2217 ASSERT(false); 2218 } 2219 } 2220 writeColor(int index,Pointer<Byte> & cBuffer,Int & x,Vector4f & oC,Int & sMask,Int & zMask,Int & cMask)2221 void PixelRoutine::writeColor(int index, Pointer<Byte> &cBuffer, Int &x, Vector4f &oC, Int &sMask, Int &zMask, Int &cMask) 2222 { 2223 switch(state.targetFormat[index]) 2224 { 2225 case FORMAT_R32F: 2226 case FORMAT_R32I: 2227 case FORMAT_R32UI: 2228 case FORMAT_R16I: 2229 case FORMAT_R16UI: 2230 case FORMAT_R8I: 2231 case FORMAT_R8UI: 2232 break; 2233 case FORMAT_G32R32F: 2234 case FORMAT_G32R32I: 2235 case FORMAT_G32R32UI: 2236 case FORMAT_G16R16I: 2237 case FORMAT_G16R16UI: 2238 case FORMAT_G8R8I: 2239 case FORMAT_G8R8UI: 2240 oC.z = oC.x; 2241 oC.x = UnpackLow(oC.x, oC.y); 2242 oC.z = UnpackHigh(oC.z, oC.y); 2243 oC.y = oC.z; 2244 break; 2245 case FORMAT_X32B32G32R32F: 2246 case FORMAT_A32B32G32R32F: 2247 case FORMAT_X32B32G32R32F_UNSIGNED: 2248 case FORMAT_A32B32G32R32I: 2249 case FORMAT_A32B32G32R32UI: 2250 case FORMAT_A16B16G16R16I: 2251 case FORMAT_A16B16G16R16UI: 2252 case FORMAT_A8B8G8R8I: 2253 case FORMAT_A8B8G8R8UI: 2254 transpose4x4(oC.x, oC.y, oC.z, oC.w); 2255 break; 2256 default: 2257 ASSERT(false); 2258 } 2259 2260 int rgbaWriteMask = state.colorWriteActive(index); 2261 2262 Int xMask; // Combination of all masks 2263 2264 if(state.depthTestActive) 2265 { 2266 xMask = zMask; 2267 } 2268 else 2269 { 2270 xMask = cMask; 2271 } 2272 2273 if(state.stencilActive) 2274 { 2275 xMask &= sMask; 2276 } 2277 2278 Pointer<Byte> buffer; 2279 Float4 value; 2280 2281 switch(state.targetFormat[index]) 2282 { 2283 case FORMAT_R32F: 2284 case FORMAT_R32I: 2285 case FORMAT_R32UI: 2286 if(rgbaWriteMask & 0x00000001) 2287 { 2288 buffer = cBuffer + 4 * x; 2289 2290 // FIXME: movlps 2291 value.x = *Pointer<Float>(buffer + 0); 2292 value.y = *Pointer<Float>(buffer + 4); 2293 2294 buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index])); 2295 2296 // FIXME: movhps 2297 value.z = *Pointer<Float>(buffer + 0); 2298 value.w = *Pointer<Float>(buffer + 4); 2299 2300 oC.x = As<Float4>(As<Int4>(oC.x) & *Pointer<Int4>(constants + OFFSET(Constants,maskD4X) + xMask * 16, 16)); 2301 value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskD4X) + xMask * 16, 16)); 2302 oC.x = As<Float4>(As<Int4>(oC.x) | As<Int4>(value)); 2303 2304 // FIXME: movhps 2305 *Pointer<Float>(buffer + 0) = oC.x.z; 2306 *Pointer<Float>(buffer + 4) = oC.x.w; 2307 2308 buffer -= *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index])); 2309 2310 // FIXME: movlps 2311 *Pointer<Float>(buffer + 0) = oC.x.x; 2312 *Pointer<Float>(buffer + 4) = oC.x.y; 2313 } 2314 break; 2315 case FORMAT_R16I: 2316 case FORMAT_R16UI: 2317 if(rgbaWriteMask & 0x00000001) 2318 { 2319 buffer = cBuffer + 2 * x; 2320 2321 UShort4 xyzw; 2322 xyzw = As<UShort4>(Insert(As<Int2>(xyzw), *Pointer<Int>(buffer), 0)); 2323 2324 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); 2325 2326 xyzw = As<UShort4>(Insert(As<Int2>(xyzw), *Pointer<Int>(buffer), 1)); 2327 value = As<Float4>(Int4(xyzw)); 2328 2329 oC.x = As<Float4>(As<Int4>(oC.x) & *Pointer<Int4>(constants + OFFSET(Constants, maskD4X) + xMask * 16, 16)); 2330 value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants, invMaskD4X) + xMask * 16, 16)); 2331 oC.x = As<Float4>(As<Int4>(oC.x) | As<Int4>(value)); 2332 2333 if(state.targetFormat[index] == FORMAT_R16I) 2334 { 2335 Float component = oC.x.z; 2336 *Pointer<Short>(buffer + 0) = Short(As<Int>(component)); 2337 component = oC.x.w; 2338 *Pointer<Short>(buffer + 2) = Short(As<Int>(component)); 2339 2340 buffer -= *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); 2341 2342 component = oC.x.x; 2343 *Pointer<Short>(buffer + 0) = Short(As<Int>(component)); 2344 component = oC.x.y; 2345 *Pointer<Short>(buffer + 2) = Short(As<Int>(component)); 2346 } 2347 else // FORMAT_R16UI 2348 { 2349 Float component = oC.x.z; 2350 *Pointer<UShort>(buffer + 0) = UShort(As<Int>(component)); 2351 component = oC.x.w; 2352 *Pointer<UShort>(buffer + 2) = UShort(As<Int>(component)); 2353 2354 buffer -= *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); 2355 2356 component = oC.x.x; 2357 *Pointer<UShort>(buffer + 0) = UShort(As<Int>(component)); 2358 component = oC.x.y; 2359 *Pointer<UShort>(buffer + 2) = UShort(As<Int>(component)); 2360 } 2361 } 2362 break; 2363 case FORMAT_R8I: 2364 case FORMAT_R8UI: 2365 if(rgbaWriteMask & 0x00000001) 2366 { 2367 buffer = cBuffer + x; 2368 2369 UInt xyzw, packedCol; 2370 2371 xyzw = UInt(*Pointer<UShort>(buffer)) & 0xFFFF; 2372 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); 2373 xyzw |= UInt(*Pointer<UShort>(buffer)) << 16; 2374 2375 Short4 tmpCol = Short4(As<Int4>(oC.x)); 2376 if(state.targetFormat[index] == FORMAT_R8I) 2377 { 2378 tmpCol = As<Short4>(PackSigned(tmpCol, tmpCol)); 2379 } 2380 else 2381 { 2382 tmpCol = As<Short4>(PackUnsigned(tmpCol, tmpCol)); 2383 } 2384 packedCol = Extract(As<Int2>(tmpCol), 0); 2385 2386 packedCol = (packedCol & *Pointer<UInt>(constants + OFFSET(Constants, maskB4Q) + 8 * xMask)) | 2387 (xyzw & *Pointer<UInt>(constants + OFFSET(Constants, invMaskB4Q) + 8 * xMask)); 2388 2389 *Pointer<UShort>(buffer) = UShort(packedCol >> 16); 2390 buffer -= *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); 2391 *Pointer<UShort>(buffer) = UShort(packedCol); 2392 } 2393 break; 2394 case FORMAT_G32R32F: 2395 case FORMAT_G32R32I: 2396 case FORMAT_G32R32UI: 2397 buffer = cBuffer + 8 * x; 2398 2399 value = *Pointer<Float4>(buffer); 2400 2401 if((rgbaWriteMask & 0x00000003) != 0x00000003) 2402 { 2403 Float4 masked = value; 2404 oC.x = As<Float4>(As<Int4>(oC.x) & *Pointer<Int4>(constants + OFFSET(Constants,maskD01X[rgbaWriteMask & 0x3][0]))); 2405 masked = As<Float4>(As<Int4>(masked) & *Pointer<Int4>(constants + OFFSET(Constants,maskD01X[~rgbaWriteMask & 0x3][0]))); 2406 oC.x = As<Float4>(As<Int4>(oC.x) | As<Int4>(masked)); 2407 } 2408 2409 oC.x = As<Float4>(As<Int4>(oC.x) & *Pointer<Int4>(constants + OFFSET(Constants,maskQ01X) + xMask * 16, 16)); 2410 value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskQ01X) + xMask * 16, 16)); 2411 oC.x = As<Float4>(As<Int4>(oC.x) | As<Int4>(value)); 2412 *Pointer<Float4>(buffer) = oC.x; 2413 2414 buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index])); 2415 2416 value = *Pointer<Float4>(buffer); 2417 2418 if((rgbaWriteMask & 0x00000003) != 0x00000003) 2419 { 2420 Float4 masked; 2421 2422 masked = value; 2423 oC.y = As<Float4>(As<Int4>(oC.y) & *Pointer<Int4>(constants + OFFSET(Constants,maskD01X[rgbaWriteMask & 0x3][0]))); 2424 masked = As<Float4>(As<Int4>(masked) & *Pointer<Int4>(constants + OFFSET(Constants,maskD01X[~rgbaWriteMask & 0x3][0]))); 2425 oC.y = As<Float4>(As<Int4>(oC.y) | As<Int4>(masked)); 2426 } 2427 2428 oC.y = As<Float4>(As<Int4>(oC.y) & *Pointer<Int4>(constants + OFFSET(Constants,maskQ23X) + xMask * 16, 16)); 2429 value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskQ23X) + xMask * 16, 16)); 2430 oC.y = As<Float4>(As<Int4>(oC.y) | As<Int4>(value)); 2431 *Pointer<Float4>(buffer) = oC.y; 2432 break; 2433 case FORMAT_G16R16I: 2434 case FORMAT_G16R16UI: 2435 if((rgbaWriteMask & 0x00000003) != 0x0) 2436 { 2437 buffer = cBuffer + 4 * x; 2438 2439 UInt2 rgbaMask; 2440 UShort4 packedCol = UShort4(As<Int4>(oC.x)); 2441 UShort4 value = *Pointer<UShort4>(buffer); 2442 UInt2 mergedMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskD01Q) + xMask * 8); 2443 if((rgbaWriteMask & 0x3) != 0x3) 2444 { 2445 Int tmpMask = *Pointer<Int>(constants + OFFSET(Constants, maskW4Q[rgbaWriteMask & 0x3][0])); 2446 rgbaMask = As<UInt2>(Int2(tmpMask, tmpMask)); 2447 mergedMask &= rgbaMask; 2448 } 2449 *Pointer<UInt2>(buffer) = (As<UInt2>(packedCol) & mergedMask) | (As<UInt2>(value) & ~mergedMask); 2450 2451 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); 2452 2453 packedCol = UShort4(As<Int4>(oC.y)); 2454 value = *Pointer<UShort4>(buffer); 2455 mergedMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskD23Q) + xMask * 8); 2456 if((rgbaWriteMask & 0x3) != 0x3) 2457 { 2458 mergedMask &= rgbaMask; 2459 } 2460 *Pointer<UInt2>(buffer) = (As<UInt2>(packedCol) & mergedMask) | (As<UInt2>(value) & ~mergedMask); 2461 } 2462 break; 2463 case FORMAT_G8R8I: 2464 case FORMAT_G8R8UI: 2465 if((rgbaWriteMask & 0x00000003) != 0x0) 2466 { 2467 buffer = cBuffer + 2 * x; 2468 2469 Int2 xyzw, packedCol; 2470 2471 xyzw = Insert(xyzw, *Pointer<Int>(buffer), 0); 2472 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); 2473 xyzw = Insert(xyzw, *Pointer<Int>(buffer), 1); 2474 2475 if(state.targetFormat[index] == FORMAT_G8R8I) 2476 { 2477 packedCol = As<Int2>(PackSigned(Short4(As<Int4>(oC.x)), Short4(As<Int4>(oC.y)))); 2478 } 2479 else 2480 { 2481 packedCol = As<Int2>(PackUnsigned(Short4(As<Int4>(oC.x)), Short4(As<Int4>(oC.y)))); 2482 } 2483 2484 UInt2 mergedMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskW4Q) + xMask * 8); 2485 if((rgbaWriteMask & 0x3) != 0x3) 2486 { 2487 Int tmpMask = *Pointer<Int>(constants + OFFSET(Constants, maskB4Q[5 * (rgbaWriteMask & 0x3)][0])); 2488 UInt2 rgbaMask = As<UInt2>(Int2(tmpMask, tmpMask)); 2489 mergedMask &= rgbaMask; 2490 } 2491 2492 packedCol = As<Int2>((As<UInt2>(packedCol) & mergedMask) | (As<UInt2>(xyzw) & ~mergedMask)); 2493 2494 *Pointer<UInt>(buffer) = As<UInt>(Extract(packedCol, 1)); 2495 buffer -= *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); 2496 *Pointer<UInt>(buffer) = As<UInt>(Extract(packedCol, 0)); 2497 } 2498 break; 2499 case FORMAT_X32B32G32R32F: 2500 case FORMAT_A32B32G32R32F: 2501 case FORMAT_X32B32G32R32F_UNSIGNED: 2502 case FORMAT_A32B32G32R32I: 2503 case FORMAT_A32B32G32R32UI: 2504 buffer = cBuffer + 16 * x; 2505 2506 { 2507 value = *Pointer<Float4>(buffer, 16); 2508 2509 if(rgbaWriteMask != 0x0000000F) 2510 { 2511 Float4 masked = value; 2512 oC.x = As<Float4>(As<Int4>(oC.x) & *Pointer<Int4>(constants + OFFSET(Constants,maskD4X[rgbaWriteMask][0]))); 2513 masked = As<Float4>(As<Int4>(masked) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskD4X[rgbaWriteMask][0]))); 2514 oC.x = As<Float4>(As<Int4>(oC.x) | As<Int4>(masked)); 2515 } 2516 2517 oC.x = As<Float4>(As<Int4>(oC.x) & *Pointer<Int4>(constants + OFFSET(Constants,maskX0X) + xMask * 16, 16)); 2518 value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskX0X) + xMask * 16, 16)); 2519 oC.x = As<Float4>(As<Int4>(oC.x) | As<Int4>(value)); 2520 *Pointer<Float4>(buffer, 16) = oC.x; 2521 } 2522 2523 { 2524 value = *Pointer<Float4>(buffer + 16, 16); 2525 2526 if(rgbaWriteMask != 0x0000000F) 2527 { 2528 Float4 masked = value; 2529 oC.y = As<Float4>(As<Int4>(oC.y) & *Pointer<Int4>(constants + OFFSET(Constants,maskD4X[rgbaWriteMask][0]))); 2530 masked = As<Float4>(As<Int4>(masked) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskD4X[rgbaWriteMask][0]))); 2531 oC.y = As<Float4>(As<Int4>(oC.y) | As<Int4>(masked)); 2532 } 2533 2534 oC.y = As<Float4>(As<Int4>(oC.y) & *Pointer<Int4>(constants + OFFSET(Constants,maskX1X) + xMask * 16, 16)); 2535 value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskX1X) + xMask * 16, 16)); 2536 oC.y = As<Float4>(As<Int4>(oC.y) | As<Int4>(value)); 2537 *Pointer<Float4>(buffer + 16, 16) = oC.y; 2538 } 2539 2540 buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index])); 2541 2542 { 2543 value = *Pointer<Float4>(buffer, 16); 2544 2545 if(rgbaWriteMask != 0x0000000F) 2546 { 2547 Float4 masked = value; 2548 oC.z = As<Float4>(As<Int4>(oC.z) & *Pointer<Int4>(constants + OFFSET(Constants,maskD4X[rgbaWriteMask][0]))); 2549 masked = As<Float4>(As<Int4>(masked) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskD4X[rgbaWriteMask][0]))); 2550 oC.z = As<Float4>(As<Int4>(oC.z) | As<Int4>(masked)); 2551 } 2552 2553 oC.z = As<Float4>(As<Int4>(oC.z) & *Pointer<Int4>(constants + OFFSET(Constants,maskX2X) + xMask * 16, 16)); 2554 value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskX2X) + xMask * 16, 16)); 2555 oC.z = As<Float4>(As<Int4>(oC.z) | As<Int4>(value)); 2556 *Pointer<Float4>(buffer, 16) = oC.z; 2557 } 2558 2559 { 2560 value = *Pointer<Float4>(buffer + 16, 16); 2561 2562 if(rgbaWriteMask != 0x0000000F) 2563 { 2564 Float4 masked = value; 2565 oC.w = As<Float4>(As<Int4>(oC.w) & *Pointer<Int4>(constants + OFFSET(Constants,maskD4X[rgbaWriteMask][0]))); 2566 masked = As<Float4>(As<Int4>(masked) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskD4X[rgbaWriteMask][0]))); 2567 oC.w = As<Float4>(As<Int4>(oC.w) | As<Int4>(masked)); 2568 } 2569 2570 oC.w = As<Float4>(As<Int4>(oC.w) & *Pointer<Int4>(constants + OFFSET(Constants,maskX3X) + xMask * 16, 16)); 2571 value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskX3X) + xMask * 16, 16)); 2572 oC.w = As<Float4>(As<Int4>(oC.w) | As<Int4>(value)); 2573 *Pointer<Float4>(buffer + 16, 16) = oC.w; 2574 } 2575 break; 2576 case FORMAT_A16B16G16R16I: 2577 case FORMAT_A16B16G16R16UI: 2578 if((rgbaWriteMask & 0x0000000F) != 0x0) 2579 { 2580 buffer = cBuffer + 8 * x; 2581 2582 UInt4 rgbaMask; 2583 UShort8 value = *Pointer<UShort8>(buffer); 2584 UShort8 packedCol = UShort8(UShort4(As<Int4>(oC.x)), UShort4(As<Int4>(oC.y))); 2585 UInt4 mergedMask = *Pointer<UInt4>(constants + OFFSET(Constants, maskQ01X) + xMask * 16); 2586 if((rgbaWriteMask & 0xF) != 0xF) 2587 { 2588 UInt2 tmpMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskW4Q[rgbaWriteMask][0])); 2589 rgbaMask = UInt4(tmpMask, tmpMask); 2590 mergedMask &= rgbaMask; 2591 } 2592 *Pointer<UInt4>(buffer) = (As<UInt4>(packedCol) & mergedMask) | (As<UInt4>(value) & ~mergedMask); 2593 2594 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); 2595 2596 value = *Pointer<UShort8>(buffer); 2597 packedCol = UShort8(UShort4(As<Int4>(oC.z)), UShort4(As<Int4>(oC.w))); 2598 mergedMask = *Pointer<UInt4>(constants + OFFSET(Constants, maskQ23X) + xMask * 16); 2599 if((rgbaWriteMask & 0xF) != 0xF) 2600 { 2601 mergedMask &= rgbaMask; 2602 } 2603 *Pointer<UInt4>(buffer) = (As<UInt4>(packedCol) & mergedMask) | (As<UInt4>(value) & ~mergedMask); 2604 } 2605 break; 2606 case FORMAT_A8B8G8R8I: 2607 case FORMAT_A8B8G8R8UI: 2608 if((rgbaWriteMask & 0x0000000F) != 0x0) 2609 { 2610 UInt2 value, packedCol, mergedMask; 2611 2612 buffer = cBuffer + 4 * x; 2613 2614 if(state.targetFormat[index] == FORMAT_A8B8G8R8I) 2615 { 2616 packedCol = As<UInt2>(PackSigned(Short4(As<Int4>(oC.x)), Short4(As<Int4>(oC.y)))); 2617 } 2618 else 2619 { 2620 packedCol = As<UInt2>(PackUnsigned(Short4(As<Int4>(oC.x)), Short4(As<Int4>(oC.y)))); 2621 } 2622 value = *Pointer<UInt2>(buffer, 16); 2623 mergedMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskD01Q) + xMask * 8); 2624 if(rgbaWriteMask != 0xF) 2625 { 2626 mergedMask &= *Pointer<UInt2>(constants + OFFSET(Constants, maskB4Q[rgbaWriteMask][0])); 2627 } 2628 *Pointer<UInt2>(buffer) = (packedCol & mergedMask) | (value & ~mergedMask); 2629 2630 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); 2631 2632 if(state.targetFormat[index] == FORMAT_A8B8G8R8I) 2633 { 2634 packedCol = As<UInt2>(PackSigned(Short4(As<Int4>(oC.z)), Short4(As<Int4>(oC.w)))); 2635 } 2636 else 2637 { 2638 packedCol = As<UInt2>(PackUnsigned(Short4(As<Int4>(oC.z)), Short4(As<Int4>(oC.w)))); 2639 } 2640 value = *Pointer<UInt2>(buffer, 16); 2641 mergedMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskD23Q) + xMask * 8); 2642 if(rgbaWriteMask != 0xF) 2643 { 2644 mergedMask &= *Pointer<UInt2>(constants + OFFSET(Constants, maskB4Q[rgbaWriteMask][0])); 2645 } 2646 *Pointer<UInt2>(buffer) = (packedCol & mergedMask) | (value & ~mergedMask); 2647 } 2648 break; 2649 default: 2650 ASSERT(false); 2651 } 2652 } 2653 convertFixed16(Float4 & cf,bool saturate)2654 UShort4 PixelRoutine::convertFixed16(Float4 &cf, bool saturate) 2655 { 2656 return UShort4(cf * Float4(0xFFFF), saturate); 2657 } 2658 sRGBtoLinear16_12_16(Vector4s & c)2659 void PixelRoutine::sRGBtoLinear16_12_16(Vector4s &c) 2660 { 2661 Pointer<Byte> LUT = constants + OFFSET(Constants,sRGBtoLinear12_16); 2662 2663 c.x = As<UShort4>(c.x) >> 4; 2664 c.y = As<UShort4>(c.y) >> 4; 2665 c.z = As<UShort4>(c.z) >> 4; 2666 2667 c.x = Insert(c.x, *Pointer<Short>(LUT + 2 * Int(Extract(c.x, 0))), 0); 2668 c.x = Insert(c.x, *Pointer<Short>(LUT + 2 * Int(Extract(c.x, 1))), 1); 2669 c.x = Insert(c.x, *Pointer<Short>(LUT + 2 * Int(Extract(c.x, 2))), 2); 2670 c.x = Insert(c.x, *Pointer<Short>(LUT + 2 * Int(Extract(c.x, 3))), 3); 2671 2672 c.y = Insert(c.y, *Pointer<Short>(LUT + 2 * Int(Extract(c.y, 0))), 0); 2673 c.y = Insert(c.y, *Pointer<Short>(LUT + 2 * Int(Extract(c.y, 1))), 1); 2674 c.y = Insert(c.y, *Pointer<Short>(LUT + 2 * Int(Extract(c.y, 2))), 2); 2675 c.y = Insert(c.y, *Pointer<Short>(LUT + 2 * Int(Extract(c.y, 3))), 3); 2676 2677 c.z = Insert(c.z, *Pointer<Short>(LUT + 2 * Int(Extract(c.z, 0))), 0); 2678 c.z = Insert(c.z, *Pointer<Short>(LUT + 2 * Int(Extract(c.z, 1))), 1); 2679 c.z = Insert(c.z, *Pointer<Short>(LUT + 2 * Int(Extract(c.z, 2))), 2); 2680 c.z = Insert(c.z, *Pointer<Short>(LUT + 2 * Int(Extract(c.z, 3))), 3); 2681 } 2682 linearToSRGB16_12_16(Vector4s & c)2683 void PixelRoutine::linearToSRGB16_12_16(Vector4s &c) 2684 { 2685 c.x = As<UShort4>(c.x) >> 4; 2686 c.y = As<UShort4>(c.y) >> 4; 2687 c.z = As<UShort4>(c.z) >> 4; 2688 2689 linearToSRGB12_16(c); 2690 } 2691 linearToSRGB12_16(Vector4s & c)2692 void PixelRoutine::linearToSRGB12_16(Vector4s &c) 2693 { 2694 Pointer<Byte> LUT = constants + OFFSET(Constants,linearToSRGB12_16); 2695 2696 c.x = Insert(c.x, *Pointer<Short>(LUT + 2 * Int(Extract(c.x, 0))), 0); 2697 c.x = Insert(c.x, *Pointer<Short>(LUT + 2 * Int(Extract(c.x, 1))), 1); 2698 c.x = Insert(c.x, *Pointer<Short>(LUT + 2 * Int(Extract(c.x, 2))), 2); 2699 c.x = Insert(c.x, *Pointer<Short>(LUT + 2 * Int(Extract(c.x, 3))), 3); 2700 2701 c.y = Insert(c.y, *Pointer<Short>(LUT + 2 * Int(Extract(c.y, 0))), 0); 2702 c.y = Insert(c.y, *Pointer<Short>(LUT + 2 * Int(Extract(c.y, 1))), 1); 2703 c.y = Insert(c.y, *Pointer<Short>(LUT + 2 * Int(Extract(c.y, 2))), 2); 2704 c.y = Insert(c.y, *Pointer<Short>(LUT + 2 * Int(Extract(c.y, 3))), 3); 2705 2706 c.z = Insert(c.z, *Pointer<Short>(LUT + 2 * Int(Extract(c.z, 0))), 0); 2707 c.z = Insert(c.z, *Pointer<Short>(LUT + 2 * Int(Extract(c.z, 1))), 1); 2708 c.z = Insert(c.z, *Pointer<Short>(LUT + 2 * Int(Extract(c.z, 2))), 2); 2709 c.z = Insert(c.z, *Pointer<Short>(LUT + 2 * Int(Extract(c.z, 3))), 3); 2710 } 2711 sRGBtoLinear(const Float4 & x)2712 Float4 PixelRoutine::sRGBtoLinear(const Float4 &x) // Approximates x^2.2 2713 { 2714 Float4 linear = x * x; 2715 linear = linear * Float4(0.73f) + linear * x * Float4(0.27f); 2716 2717 return Min(Max(linear, Float4(0.0f)), Float4(1.0f)); 2718 } 2719 colorUsed()2720 bool PixelRoutine::colorUsed() 2721 { 2722 return state.colorWriteMask || state.alphaTestActive() || state.shaderContainsKill; 2723 } 2724 } 2725