1 // Copyright 2016 The SwiftShader Authors. All Rights Reserved. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #include "PixelRoutine.hpp" 16 17 #include "Renderer.hpp" 18 #include "QuadRasterizer.hpp" 19 #include "Surface.hpp" 20 #include "Primitive.hpp" 21 #include "SamplerCore.hpp" 22 #include "Constants.hpp" 23 #include "Debug.hpp" 24 25 namespace sw 26 { 27 extern bool complementaryDepthBuffer; 28 extern bool postBlendSRGB; 29 extern bool exactColorRounding; 30 extern bool forceClearRegisters; 31 PixelRoutine(const PixelProcessor::State & state,const PixelShader * shader)32 PixelRoutine::PixelRoutine(const PixelProcessor::State &state, const PixelShader *shader) : QuadRasterizer(state, shader), v(shader && shader->dynamicallyIndexedInput) 33 { 34 if(!shader || shader->getVersion() < 0x0200 || forceClearRegisters) 35 { 36 for(int i = 0; i < MAX_FRAGMENT_INPUTS; i++) 37 { 38 v[i].x = Float4(0.0f); 39 v[i].y = Float4(0.0f); 40 v[i].z = Float4(0.0f); 41 v[i].w = Float4(0.0f); 42 } 43 } 44 } 45 ~PixelRoutine()46 PixelRoutine::~PixelRoutine() 47 { 48 for(int i = 0; i < TEXTURE_IMAGE_UNITS; i++) 49 { 50 delete sampler[i]; 51 } 52 } 53 quad(Pointer<Byte> cBuffer[RENDERTARGETS],Pointer<Byte> & zBuffer,Pointer<Byte> & sBuffer,Int cMask[4],Int & x,Int & y)54 void PixelRoutine::quad(Pointer<Byte> cBuffer[RENDERTARGETS], Pointer<Byte> &zBuffer, Pointer<Byte> &sBuffer, Int cMask[4], Int &x, Int &y) 55 { 56 #if PERF_PROFILE 57 Long pipeTime = Ticks(); 58 #endif 59 60 for(int i = 0; i < TEXTURE_IMAGE_UNITS; i++) 61 { 62 sampler[i] = new SamplerCore(constants, state.sampler[i]); 63 } 64 65 const bool earlyDepthTest = !state.depthOverride && !state.alphaTestActive(); 66 67 Int zMask[4]; // Depth mask 68 Int sMask[4]; // Stencil mask 69 70 for(unsigned int q = 0; q < state.multiSample; q++) 71 { 72 zMask[q] = cMask[q]; 73 sMask[q] = cMask[q]; 74 } 75 76 for(unsigned int q = 0; q < state.multiSample; q++) 77 { 78 stencilTest(sBuffer, q, x, sMask[q], cMask[q]); 79 } 80 81 Float4 f; 82 Float4 rhwCentroid; 83 84 Float4 xxxx = Float4(Float(x)) + *Pointer<Float4>(primitive + OFFSET(Primitive,xQuad), 16); 85 86 if(interpolateZ()) 87 { 88 for(unsigned int q = 0; q < state.multiSample; q++) 89 { 90 Float4 x = xxxx; 91 92 if(state.multiSample > 1) 93 { 94 x -= *Pointer<Float4>(constants + OFFSET(Constants,X) + q * sizeof(float4)); 95 } 96 97 z[q] = interpolate(x, Dz[q], z[q], primitive + OFFSET(Primitive,z), false, false); 98 } 99 } 100 101 Bool depthPass = false; 102 103 if(earlyDepthTest) 104 { 105 for(unsigned int q = 0; q < state.multiSample; q++) 106 { 107 depthPass = depthPass || depthTest(zBuffer, q, x, z[q], sMask[q], zMask[q], cMask[q]); 108 } 109 } 110 111 If(depthPass || Bool(!earlyDepthTest)) 112 { 113 #if PERF_PROFILE 114 Long interpTime = Ticks(); 115 #endif 116 117 Float4 yyyy = Float4(Float(y)) + *Pointer<Float4>(primitive + OFFSET(Primitive,yQuad), 16); 118 119 // Centroid locations 120 Float4 XXXX = Float4(0.0f); 121 Float4 YYYY = Float4(0.0f); 122 123 if(state.centroid) 124 { 125 Float4 WWWW(1.0e-9f); 126 127 for(unsigned int q = 0; q < state.multiSample; q++) 128 { 129 XXXX += *Pointer<Float4>(constants + OFFSET(Constants,sampleX[q]) + 16 * cMask[q]); 130 YYYY += *Pointer<Float4>(constants + OFFSET(Constants,sampleY[q]) + 16 * cMask[q]); 131 WWWW += *Pointer<Float4>(constants + OFFSET(Constants,weight) + 16 * cMask[q]); 132 } 133 134 WWWW = Rcp_pp(WWWW); 135 XXXX *= WWWW; 136 YYYY *= WWWW; 137 138 XXXX += xxxx; 139 YYYY += yyyy; 140 } 141 142 if(interpolateW()) 143 { 144 w = interpolate(xxxx, Dw, rhw, primitive + OFFSET(Primitive,w), false, false); 145 rhw = reciprocal(w, false, false, true); 146 147 if(state.centroid) 148 { 149 rhwCentroid = reciprocal(interpolateCentroid(XXXX, YYYY, rhwCentroid, primitive + OFFSET(Primitive,w), false, false)); 150 } 151 } 152 153 for(int interpolant = 0; interpolant < MAX_FRAGMENT_INPUTS; interpolant++) 154 { 155 for(int component = 0; component < 4; component++) 156 { 157 if(state.interpolant[interpolant].component & (1 << component)) 158 { 159 if(!state.interpolant[interpolant].centroid) 160 { 161 v[interpolant][component] = interpolate(xxxx, Dv[interpolant][component], rhw, primitive + OFFSET(Primitive, V[interpolant][component]), (state.interpolant[interpolant].flat & (1 << component)) != 0, state.perspective); 162 } 163 else 164 { 165 v[interpolant][component] = interpolateCentroid(XXXX, YYYY, rhwCentroid, primitive + OFFSET(Primitive, V[interpolant][component]), (state.interpolant[interpolant].flat & (1 << component)) != 0, state.perspective); 166 } 167 } 168 } 169 170 Float4 rcp; 171 172 switch(state.interpolant[interpolant].project) 173 { 174 case 0: 175 break; 176 case 1: 177 rcp = reciprocal(v[interpolant].y); 178 v[interpolant].x = v[interpolant].x * rcp; 179 break; 180 case 2: 181 rcp = reciprocal(v[interpolant].z); 182 v[interpolant].x = v[interpolant].x * rcp; 183 v[interpolant].y = v[interpolant].y * rcp; 184 break; 185 case 3: 186 rcp = reciprocal(v[interpolant].w); 187 v[interpolant].x = v[interpolant].x * rcp; 188 v[interpolant].y = v[interpolant].y * rcp; 189 v[interpolant].z = v[interpolant].z * rcp; 190 break; 191 } 192 } 193 194 if(state.fog.component) 195 { 196 f = interpolate(xxxx, Df, rhw, primitive + OFFSET(Primitive,f), state.fog.flat & 0x01, state.perspective); 197 } 198 199 setBuiltins(x, y, z, w); 200 201 #if PERF_PROFILE 202 cycles[PERF_INTERP] += Ticks() - interpTime; 203 #endif 204 205 Bool alphaPass = true; 206 207 if(colorUsed()) 208 { 209 #if PERF_PROFILE 210 Long shaderTime = Ticks(); 211 #endif 212 213 applyShader(cMask); 214 215 #if PERF_PROFILE 216 cycles[PERF_SHADER] += Ticks() - shaderTime; 217 #endif 218 219 alphaPass = alphaTest(cMask); 220 221 if((shader && shader->containsKill()) || state.alphaTestActive()) 222 { 223 for(unsigned int q = 0; q < state.multiSample; q++) 224 { 225 zMask[q] &= cMask[q]; 226 sMask[q] &= cMask[q]; 227 } 228 } 229 } 230 231 If(alphaPass) 232 { 233 if(!earlyDepthTest) 234 { 235 for(unsigned int q = 0; q < state.multiSample; q++) 236 { 237 depthPass = depthPass || depthTest(zBuffer, q, x, z[q], sMask[q], zMask[q], cMask[q]); 238 } 239 } 240 241 #if PERF_PROFILE 242 Long ropTime = Ticks(); 243 #endif 244 245 If(depthPass || Bool(earlyDepthTest)) 246 { 247 for(unsigned int q = 0; q < state.multiSample; q++) 248 { 249 if(state.multiSampleMask & (1 << q)) 250 { 251 writeDepth(zBuffer, q, x, z[q], zMask[q]); 252 253 if(state.occlusionEnabled) 254 { 255 occlusion += *Pointer<UInt>(constants + OFFSET(Constants,occlusionCount) + 4 * (zMask[q] & sMask[q])); 256 } 257 } 258 } 259 260 if(colorUsed()) 261 { 262 #if PERF_PROFILE 263 AddAtomic(Pointer<Long>(&profiler.ropOperations), 4); 264 #endif 265 266 rasterOperation(f, cBuffer, x, sMask, zMask, cMask); 267 } 268 } 269 270 #if PERF_PROFILE 271 cycles[PERF_ROP] += Ticks() - ropTime; 272 #endif 273 } 274 } 275 276 for(unsigned int q = 0; q < state.multiSample; q++) 277 { 278 if(state.multiSampleMask & (1 << q)) 279 { 280 writeStencil(sBuffer, q, x, sMask[q], zMask[q], cMask[q]); 281 } 282 } 283 284 #if PERF_PROFILE 285 cycles[PERF_PIPE] += Ticks() - pipeTime; 286 #endif 287 } 288 interpolateCentroid(Float4 & x,Float4 & y,Float4 & rhw,Pointer<Byte> planeEquation,bool flat,bool perspective)289 Float4 PixelRoutine::interpolateCentroid(Float4 &x, Float4 &y, Float4 &rhw, Pointer<Byte> planeEquation, bool flat, bool perspective) 290 { 291 Float4 interpolant = *Pointer<Float4>(planeEquation + OFFSET(PlaneEquation,C), 16); 292 293 if(!flat) 294 { 295 interpolant += x * *Pointer<Float4>(planeEquation + OFFSET(PlaneEquation,A), 16) + 296 y * *Pointer<Float4>(planeEquation + OFFSET(PlaneEquation,B), 16); 297 298 if(perspective) 299 { 300 interpolant *= rhw; 301 } 302 } 303 304 return interpolant; 305 } 306 stencilTest(Pointer<Byte> & sBuffer,int q,Int & x,Int & sMask,Int & cMask)307 void PixelRoutine::stencilTest(Pointer<Byte> &sBuffer, int q, Int &x, Int &sMask, Int &cMask) 308 { 309 if(!state.stencilActive) 310 { 311 return; 312 } 313 314 // (StencilRef & StencilMask) CompFunc (StencilBufferValue & StencilMask) 315 316 Pointer<Byte> buffer = sBuffer + 2 * x; 317 318 if(q > 0) 319 { 320 buffer += q * *Pointer<Int>(data + OFFSET(DrawData,stencilSliceB)); 321 } 322 323 Byte8 value = *Pointer<Byte8>(buffer); 324 Byte8 valueCCW = value; 325 326 if(!state.noStencilMask) 327 { 328 value &= *Pointer<Byte8>(data + OFFSET(DrawData,stencil[0].testMaskQ)); 329 } 330 331 stencilTest(value, state.stencilCompareMode, false); 332 333 if(state.twoSidedStencil) 334 { 335 if(!state.noStencilMaskCCW) 336 { 337 valueCCW &= *Pointer<Byte8>(data + OFFSET(DrawData,stencil[1].testMaskQ)); 338 } 339 340 stencilTest(valueCCW, state.stencilCompareModeCCW, true); 341 342 value &= *Pointer<Byte8>(primitive + OFFSET(Primitive,clockwiseMask)); 343 valueCCW &= *Pointer<Byte8>(primitive + OFFSET(Primitive,invClockwiseMask)); 344 value |= valueCCW; 345 } 346 347 sMask = SignMask(value) & cMask; 348 } 349 stencilTest(Byte8 & value,StencilCompareMode stencilCompareMode,bool CCW)350 void PixelRoutine::stencilTest(Byte8 &value, StencilCompareMode stencilCompareMode, bool CCW) 351 { 352 Byte8 equal; 353 354 switch(stencilCompareMode) 355 { 356 case STENCIL_ALWAYS: 357 value = Byte8(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF); 358 break; 359 case STENCIL_NEVER: 360 value = Byte8(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); 361 break; 362 case STENCIL_LESS: // a < b ~ b > a 363 value += Byte8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80); 364 value = CmpGT(As<SByte8>(value), *Pointer<SByte8>(data + OFFSET(DrawData,stencil[CCW].referenceMaskedSignedQ))); 365 break; 366 case STENCIL_EQUAL: 367 value = CmpEQ(value, *Pointer<Byte8>(data + OFFSET(DrawData,stencil[CCW].referenceMaskedQ))); 368 break; 369 case STENCIL_NOTEQUAL: // a != b ~ !(a == b) 370 value = CmpEQ(value, *Pointer<Byte8>(data + OFFSET(DrawData,stencil[CCW].referenceMaskedQ))); 371 value ^= Byte8(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF); 372 break; 373 case STENCIL_LESSEQUAL: // a <= b ~ (b > a) || (a == b) 374 equal = value; 375 equal = CmpEQ(equal, *Pointer<Byte8>(data + OFFSET(DrawData,stencil[CCW].referenceMaskedQ))); 376 value += Byte8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80); 377 value = CmpGT(As<SByte8>(value), *Pointer<SByte8>(data + OFFSET(DrawData,stencil[CCW].referenceMaskedSignedQ))); 378 value |= equal; 379 break; 380 case STENCIL_GREATER: // a > b 381 equal = *Pointer<Byte8>(data + OFFSET(DrawData,stencil[CCW].referenceMaskedSignedQ)); 382 value += Byte8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80); 383 equal = CmpGT(As<SByte8>(equal), As<SByte8>(value)); 384 value = equal; 385 break; 386 case STENCIL_GREATEREQUAL: // a >= b ~ !(a < b) ~ !(b > a) 387 value += Byte8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80); 388 value = CmpGT(As<SByte8>(value), *Pointer<SByte8>(data + OFFSET(DrawData,stencil[CCW].referenceMaskedSignedQ))); 389 value ^= Byte8(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF); 390 break; 391 default: 392 ASSERT(false); 393 } 394 } 395 depthTest(Pointer<Byte> & zBuffer,int q,Int & x,Float4 & z,Int & sMask,Int & zMask,Int & cMask)396 Bool PixelRoutine::depthTest(Pointer<Byte> &zBuffer, int q, Int &x, Float4 &z, Int &sMask, Int &zMask, Int &cMask) 397 { 398 if(!state.depthTestActive) 399 { 400 return true; 401 } 402 403 Float4 Z = z; 404 405 if(shader && shader->depthOverride()) 406 { 407 if(complementaryDepthBuffer) 408 { 409 Z = Float4(1.0f) - oDepth; 410 } 411 else 412 { 413 Z = oDepth; 414 } 415 } 416 417 Pointer<Byte> buffer; 418 Int pitch; 419 420 if(!state.quadLayoutDepthBuffer) 421 { 422 buffer = zBuffer + 4 * x; 423 pitch = *Pointer<Int>(data + OFFSET(DrawData,depthPitchB)); 424 } 425 else 426 { 427 buffer = zBuffer + 8 * x; 428 } 429 430 if(q > 0) 431 { 432 buffer += q * *Pointer<Int>(data + OFFSET(DrawData,depthSliceB)); 433 } 434 435 Float4 zValue; 436 437 if(state.depthCompareMode != DEPTH_NEVER || (state.depthCompareMode != DEPTH_ALWAYS && !state.depthWriteEnable)) 438 { 439 if(!state.quadLayoutDepthBuffer) 440 { 441 // FIXME: Properly optimizes? 442 zValue.xy = *Pointer<Float4>(buffer); 443 zValue.zw = *Pointer<Float4>(buffer + pitch - 8); 444 } 445 else 446 { 447 zValue = *Pointer<Float4>(buffer, 16); 448 } 449 } 450 451 Int4 zTest; 452 453 switch(state.depthCompareMode) 454 { 455 case DEPTH_ALWAYS: 456 // Optimized 457 break; 458 case DEPTH_NEVER: 459 // Optimized 460 break; 461 case DEPTH_EQUAL: 462 zTest = CmpEQ(zValue, Z); 463 break; 464 case DEPTH_NOTEQUAL: 465 zTest = CmpNEQ(zValue, Z); 466 break; 467 case DEPTH_LESS: 468 if(complementaryDepthBuffer) 469 { 470 zTest = CmpLT(zValue, Z); 471 } 472 else 473 { 474 zTest = CmpNLE(zValue, Z); 475 } 476 break; 477 case DEPTH_GREATEREQUAL: 478 if(complementaryDepthBuffer) 479 { 480 zTest = CmpNLT(zValue, Z); 481 } 482 else 483 { 484 zTest = CmpLE(zValue, Z); 485 } 486 break; 487 case DEPTH_LESSEQUAL: 488 if(complementaryDepthBuffer) 489 { 490 zTest = CmpLE(zValue, Z); 491 } 492 else 493 { 494 zTest = CmpNLT(zValue, Z); 495 } 496 break; 497 case DEPTH_GREATER: 498 if(complementaryDepthBuffer) 499 { 500 zTest = CmpNLE(zValue, Z); 501 } 502 else 503 { 504 zTest = CmpLT(zValue, Z); 505 } 506 break; 507 default: 508 ASSERT(false); 509 } 510 511 switch(state.depthCompareMode) 512 { 513 case DEPTH_ALWAYS: 514 zMask = cMask; 515 break; 516 case DEPTH_NEVER: 517 zMask = 0x0; 518 break; 519 default: 520 zMask = SignMask(zTest) & cMask; 521 break; 522 } 523 524 if(state.stencilActive) 525 { 526 zMask &= sMask; 527 } 528 529 return zMask != 0; 530 } 531 alphaTest(Int & aMask,Short4 & alpha)532 void PixelRoutine::alphaTest(Int &aMask, Short4 &alpha) 533 { 534 Short4 cmp; 535 Short4 equal; 536 537 switch(state.alphaCompareMode) 538 { 539 case ALPHA_ALWAYS: 540 aMask = 0xF; 541 break; 542 case ALPHA_NEVER: 543 aMask = 0x0; 544 break; 545 case ALPHA_EQUAL: 546 cmp = CmpEQ(alpha, *Pointer<Short4>(data + OFFSET(DrawData,factor.alphaReference4))); 547 aMask = SignMask(Pack(cmp, Short4(0x0000))); 548 break; 549 case ALPHA_NOTEQUAL: // a != b ~ !(a == b) 550 cmp = CmpEQ(alpha, *Pointer<Short4>(data + OFFSET(DrawData,factor.alphaReference4))) ^ Short4(0xFFFFu); // FIXME 551 aMask = SignMask(Pack(cmp, Short4(0x0000))); 552 break; 553 case ALPHA_LESS: // a < b ~ b > a 554 cmp = CmpGT(*Pointer<Short4>(data + OFFSET(DrawData,factor.alphaReference4)), alpha); 555 aMask = SignMask(Pack(cmp, Short4(0x0000))); 556 break; 557 case ALPHA_GREATEREQUAL: // a >= b ~ (a > b) || (a == b) ~ !(b > a) // TODO: Approximate 558 equal = CmpEQ(alpha, *Pointer<Short4>(data + OFFSET(DrawData,factor.alphaReference4))); 559 cmp = CmpGT(alpha, *Pointer<Short4>(data + OFFSET(DrawData,factor.alphaReference4))); 560 cmp |= equal; 561 aMask = SignMask(Pack(cmp, Short4(0x0000))); 562 break; 563 case ALPHA_LESSEQUAL: // a <= b ~ !(a > b) 564 cmp = CmpGT(alpha, *Pointer<Short4>(data + OFFSET(DrawData,factor.alphaReference4))) ^ Short4(0xFFFFu); // FIXME 565 aMask = SignMask(Pack(cmp, Short4(0x0000))); 566 break; 567 case ALPHA_GREATER: // a > b 568 cmp = CmpGT(alpha, *Pointer<Short4>(data + OFFSET(DrawData,factor.alphaReference4))); 569 aMask = SignMask(Pack(cmp, Short4(0x0000))); 570 break; 571 default: 572 ASSERT(false); 573 } 574 } 575 alphaToCoverage(Int cMask[4],Float4 & alpha)576 void PixelRoutine::alphaToCoverage(Int cMask[4], Float4 &alpha) 577 { 578 Int4 coverage0 = CmpNLT(alpha, *Pointer<Float4>(data + OFFSET(DrawData,a2c0))); 579 Int4 coverage1 = CmpNLT(alpha, *Pointer<Float4>(data + OFFSET(DrawData,a2c1))); 580 Int4 coverage2 = CmpNLT(alpha, *Pointer<Float4>(data + OFFSET(DrawData,a2c2))); 581 Int4 coverage3 = CmpNLT(alpha, *Pointer<Float4>(data + OFFSET(DrawData,a2c3))); 582 583 Int aMask0 = SignMask(coverage0); 584 Int aMask1 = SignMask(coverage1); 585 Int aMask2 = SignMask(coverage2); 586 Int aMask3 = SignMask(coverage3); 587 588 cMask[0] &= aMask0; 589 cMask[1] &= aMask1; 590 cMask[2] &= aMask2; 591 cMask[3] &= aMask3; 592 } 593 fogBlend(Vector4f & c0,Float4 & fog)594 void PixelRoutine::fogBlend(Vector4f &c0, Float4 &fog) 595 { 596 if(!state.fogActive) 597 { 598 return; 599 } 600 601 if(state.pixelFogMode != FOG_NONE) 602 { 603 pixelFog(fog); 604 605 fog = Min(fog, Float4(1.0f)); 606 fog = Max(fog, Float4(0.0f)); 607 } 608 609 c0.x -= *Pointer<Float4>(data + OFFSET(DrawData,fog.colorF[0])); 610 c0.y -= *Pointer<Float4>(data + OFFSET(DrawData,fog.colorF[1])); 611 c0.z -= *Pointer<Float4>(data + OFFSET(DrawData,fog.colorF[2])); 612 613 c0.x *= fog; 614 c0.y *= fog; 615 c0.z *= fog; 616 617 c0.x += *Pointer<Float4>(data + OFFSET(DrawData,fog.colorF[0])); 618 c0.y += *Pointer<Float4>(data + OFFSET(DrawData,fog.colorF[1])); 619 c0.z += *Pointer<Float4>(data + OFFSET(DrawData,fog.colorF[2])); 620 } 621 pixelFog(Float4 & visibility)622 void PixelRoutine::pixelFog(Float4 &visibility) 623 { 624 Float4 &zw = visibility; 625 626 if(state.pixelFogMode != FOG_NONE) 627 { 628 if(state.wBasedFog) 629 { 630 zw = rhw; 631 } 632 else 633 { 634 if(complementaryDepthBuffer) 635 { 636 zw = Float4(1.0f) - z[0]; 637 } 638 else 639 { 640 zw = z[0]; 641 } 642 } 643 } 644 645 switch(state.pixelFogMode) 646 { 647 case FOG_NONE: 648 break; 649 case FOG_LINEAR: 650 zw *= *Pointer<Float4>(data + OFFSET(DrawData,fog.scale)); 651 zw += *Pointer<Float4>(data + OFFSET(DrawData,fog.offset)); 652 break; 653 case FOG_EXP: 654 zw *= *Pointer<Float4>(data + OFFSET(DrawData,fog.densityE)); 655 zw = exponential2(zw, true); 656 break; 657 case FOG_EXP2: 658 zw *= zw; 659 zw *= *Pointer<Float4>(data + OFFSET(DrawData,fog.density2E)); 660 zw = exponential2(zw, true); 661 break; 662 default: 663 ASSERT(false); 664 } 665 } 666 writeDepth(Pointer<Byte> & zBuffer,int q,Int & x,Float4 & z,Int & zMask)667 void PixelRoutine::writeDepth(Pointer<Byte> &zBuffer, int q, Int &x, Float4 &z, Int &zMask) 668 { 669 if(!state.depthWriteEnable) 670 { 671 return; 672 } 673 674 Float4 Z = z; 675 676 if(shader && shader->depthOverride()) 677 { 678 if(complementaryDepthBuffer) 679 { 680 Z = Float4(1.0f) - oDepth; 681 } 682 else 683 { 684 Z = oDepth; 685 } 686 } 687 688 Pointer<Byte> buffer; 689 Int pitch; 690 691 if(!state.quadLayoutDepthBuffer) 692 { 693 buffer = zBuffer + 4 * x; 694 pitch = *Pointer<Int>(data + OFFSET(DrawData,depthPitchB)); 695 } 696 else 697 { 698 buffer = zBuffer + 8 * x; 699 } 700 701 if(q > 0) 702 { 703 buffer += q * *Pointer<Int>(data + OFFSET(DrawData,depthSliceB)); 704 } 705 706 Float4 zValue; 707 708 if(state.depthCompareMode != DEPTH_NEVER || (state.depthCompareMode != DEPTH_ALWAYS && !state.depthWriteEnable)) 709 { 710 if(!state.quadLayoutDepthBuffer) 711 { 712 // FIXME: Properly optimizes? 713 zValue.xy = *Pointer<Float4>(buffer); 714 zValue.zw = *Pointer<Float4>(buffer + pitch - 8); 715 } 716 else 717 { 718 zValue = *Pointer<Float4>(buffer, 16); 719 } 720 } 721 722 Z = As<Float4>(As<Int4>(Z) & *Pointer<Int4>(constants + OFFSET(Constants,maskD4X) + zMask * 16, 16)); 723 zValue = As<Float4>(As<Int4>(zValue) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskD4X) + zMask * 16, 16)); 724 Z = As<Float4>(As<Int4>(Z) | As<Int4>(zValue)); 725 726 if(!state.quadLayoutDepthBuffer) 727 { 728 // FIXME: Properly optimizes? 729 *Pointer<Float2>(buffer) = Float2(Z.xy); 730 *Pointer<Float2>(buffer + pitch) = Float2(Z.zw); 731 } 732 else 733 { 734 *Pointer<Float4>(buffer, 16) = Z; 735 } 736 } 737 writeStencil(Pointer<Byte> & sBuffer,int q,Int & x,Int & sMask,Int & zMask,Int & cMask)738 void PixelRoutine::writeStencil(Pointer<Byte> &sBuffer, int q, Int &x, Int &sMask, Int &zMask, Int &cMask) 739 { 740 if(!state.stencilActive) 741 { 742 return; 743 } 744 745 if(state.stencilPassOperation == OPERATION_KEEP && state.stencilZFailOperation == OPERATION_KEEP && state.stencilFailOperation == OPERATION_KEEP) 746 { 747 if(!state.twoSidedStencil || (state.stencilPassOperationCCW == OPERATION_KEEP && state.stencilZFailOperationCCW == OPERATION_KEEP && state.stencilFailOperationCCW == OPERATION_KEEP)) 748 { 749 return; 750 } 751 } 752 753 if(state.stencilWriteMasked && (!state.twoSidedStencil || state.stencilWriteMaskedCCW)) 754 { 755 return; 756 } 757 758 Pointer<Byte> buffer = sBuffer + 2 * x; 759 760 if(q > 0) 761 { 762 buffer += q * *Pointer<Int>(data + OFFSET(DrawData,stencilSliceB)); 763 } 764 765 Byte8 bufferValue = *Pointer<Byte8>(buffer); 766 767 Byte8 newValue; 768 stencilOperation(newValue, bufferValue, state.stencilPassOperation, state.stencilZFailOperation, state.stencilFailOperation, false, zMask, sMask); 769 770 if(!state.noStencilWriteMask) 771 { 772 Byte8 maskedValue = bufferValue; 773 newValue &= *Pointer<Byte8>(data + OFFSET(DrawData,stencil[0].writeMaskQ)); 774 maskedValue &= *Pointer<Byte8>(data + OFFSET(DrawData,stencil[0].invWriteMaskQ)); 775 newValue |= maskedValue; 776 } 777 778 if(state.twoSidedStencil) 779 { 780 Byte8 newValueCCW; 781 782 stencilOperation(newValueCCW, bufferValue, state.stencilPassOperationCCW, state.stencilZFailOperationCCW, state.stencilFailOperationCCW, true, zMask, sMask); 783 784 if(!state.noStencilWriteMaskCCW) 785 { 786 Byte8 maskedValue = bufferValue; 787 newValueCCW &= *Pointer<Byte8>(data + OFFSET(DrawData,stencil[1].writeMaskQ)); 788 maskedValue &= *Pointer<Byte8>(data + OFFSET(DrawData,stencil[1].invWriteMaskQ)); 789 newValueCCW |= maskedValue; 790 } 791 792 newValue &= *Pointer<Byte8>(primitive + OFFSET(Primitive,clockwiseMask)); 793 newValueCCW &= *Pointer<Byte8>(primitive + OFFSET(Primitive,invClockwiseMask)); 794 newValue |= newValueCCW; 795 } 796 797 newValue &= *Pointer<Byte8>(constants + OFFSET(Constants,maskB4Q) + 8 * cMask); 798 bufferValue &= *Pointer<Byte8>(constants + OFFSET(Constants,invMaskB4Q) + 8 * cMask); 799 newValue |= bufferValue; 800 801 *Pointer<Byte4>(buffer) = Byte4(newValue); 802 } 803 stencilOperation(Byte8 & newValue,Byte8 & bufferValue,StencilOperation stencilPassOperation,StencilOperation stencilZFailOperation,StencilOperation stencilFailOperation,bool CCW,Int & zMask,Int & sMask)804 void PixelRoutine::stencilOperation(Byte8 &newValue, Byte8 &bufferValue, StencilOperation stencilPassOperation, StencilOperation stencilZFailOperation, StencilOperation stencilFailOperation, bool CCW, Int &zMask, Int &sMask) 805 { 806 Byte8 &pass = newValue; 807 Byte8 fail; 808 Byte8 zFail; 809 810 stencilOperation(pass, bufferValue, stencilPassOperation, CCW); 811 812 if(stencilZFailOperation != stencilPassOperation) 813 { 814 stencilOperation(zFail, bufferValue, stencilZFailOperation, CCW); 815 } 816 817 if(stencilFailOperation != stencilPassOperation || stencilFailOperation != stencilZFailOperation) 818 { 819 stencilOperation(fail, bufferValue, stencilFailOperation, CCW); 820 } 821 822 if(stencilFailOperation != stencilPassOperation || stencilFailOperation != stencilZFailOperation) 823 { 824 if(state.depthTestActive && stencilZFailOperation != stencilPassOperation) // zMask valid and values not the same 825 { 826 pass &= *Pointer<Byte8>(constants + OFFSET(Constants,maskB4Q) + 8 * zMask); 827 zFail &= *Pointer<Byte8>(constants + OFFSET(Constants,invMaskB4Q) + 8 * zMask); 828 pass |= zFail; 829 } 830 831 pass &= *Pointer<Byte8>(constants + OFFSET(Constants,maskB4Q) + 8 * sMask); 832 fail &= *Pointer<Byte8>(constants + OFFSET(Constants,invMaskB4Q) + 8 * sMask); 833 pass |= fail; 834 } 835 } 836 stencilOperation(Byte8 & output,Byte8 & bufferValue,StencilOperation operation,bool CCW)837 void PixelRoutine::stencilOperation(Byte8 &output, Byte8 &bufferValue, StencilOperation operation, bool CCW) 838 { 839 switch(operation) 840 { 841 case OPERATION_KEEP: 842 output = bufferValue; 843 break; 844 case OPERATION_ZERO: 845 output = Byte8(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); 846 break; 847 case OPERATION_REPLACE: 848 output = *Pointer<Byte8>(data + OFFSET(DrawData,stencil[CCW].referenceQ)); 849 break; 850 case OPERATION_INCRSAT: 851 output = AddSat(bufferValue, Byte8(1, 1, 1, 1, 1, 1, 1, 1)); 852 break; 853 case OPERATION_DECRSAT: 854 output = SubSat(bufferValue, Byte8(1, 1, 1, 1, 1, 1, 1, 1)); 855 break; 856 case OPERATION_INVERT: 857 output = bufferValue ^ Byte8(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF); 858 break; 859 case OPERATION_INCR: 860 output = bufferValue + Byte8(1, 1, 1, 1, 1, 1, 1, 1); 861 break; 862 case OPERATION_DECR: 863 output = bufferValue - Byte8(1, 1, 1, 1, 1, 1, 1, 1); 864 break; 865 default: 866 ASSERT(false); 867 } 868 } 869 blendFactor(Vector4s & blendFactor,const Vector4s & current,const Vector4s & pixel,BlendFactor blendFactorActive)870 void PixelRoutine::blendFactor(Vector4s &blendFactor, const Vector4s ¤t, const Vector4s &pixel, BlendFactor blendFactorActive) 871 { 872 switch(blendFactorActive) 873 { 874 case BLEND_ZERO: 875 // Optimized 876 break; 877 case BLEND_ONE: 878 // Optimized 879 break; 880 case BLEND_SOURCE: 881 blendFactor.x = current.x; 882 blendFactor.y = current.y; 883 blendFactor.z = current.z; 884 break; 885 case BLEND_INVSOURCE: 886 blendFactor.x = Short4(0xFFFFu) - current.x; 887 blendFactor.y = Short4(0xFFFFu) - current.y; 888 blendFactor.z = Short4(0xFFFFu) - current.z; 889 break; 890 case BLEND_DEST: 891 blendFactor.x = pixel.x; 892 blendFactor.y = pixel.y; 893 blendFactor.z = pixel.z; 894 break; 895 case BLEND_INVDEST: 896 blendFactor.x = Short4(0xFFFFu) - pixel.x; 897 blendFactor.y = Short4(0xFFFFu) - pixel.y; 898 blendFactor.z = Short4(0xFFFFu) - pixel.z; 899 break; 900 case BLEND_SOURCEALPHA: 901 blendFactor.x = current.w; 902 blendFactor.y = current.w; 903 blendFactor.z = current.w; 904 break; 905 case BLEND_INVSOURCEALPHA: 906 blendFactor.x = Short4(0xFFFFu) - current.w; 907 blendFactor.y = Short4(0xFFFFu) - current.w; 908 blendFactor.z = Short4(0xFFFFu) - current.w; 909 break; 910 case BLEND_DESTALPHA: 911 blendFactor.x = pixel.w; 912 blendFactor.y = pixel.w; 913 blendFactor.z = pixel.w; 914 break; 915 case BLEND_INVDESTALPHA: 916 blendFactor.x = Short4(0xFFFFu) - pixel.w; 917 blendFactor.y = Short4(0xFFFFu) - pixel.w; 918 blendFactor.z = Short4(0xFFFFu) - pixel.w; 919 break; 920 case BLEND_SRCALPHASAT: 921 blendFactor.x = Short4(0xFFFFu) - pixel.w; 922 blendFactor.x = Min(As<UShort4>(blendFactor.x), As<UShort4>(current.w)); 923 blendFactor.y = blendFactor.x; 924 blendFactor.z = blendFactor.x; 925 break; 926 case BLEND_CONSTANT: 927 blendFactor.x = *Pointer<Short4>(data + OFFSET(DrawData,factor.blendConstant4W[0])); 928 blendFactor.y = *Pointer<Short4>(data + OFFSET(DrawData,factor.blendConstant4W[1])); 929 blendFactor.z = *Pointer<Short4>(data + OFFSET(DrawData,factor.blendConstant4W[2])); 930 break; 931 case BLEND_INVCONSTANT: 932 blendFactor.x = *Pointer<Short4>(data + OFFSET(DrawData,factor.invBlendConstant4W[0])); 933 blendFactor.y = *Pointer<Short4>(data + OFFSET(DrawData,factor.invBlendConstant4W[1])); 934 blendFactor.z = *Pointer<Short4>(data + OFFSET(DrawData,factor.invBlendConstant4W[2])); 935 break; 936 case BLEND_CONSTANTALPHA: 937 blendFactor.x = *Pointer<Short4>(data + OFFSET(DrawData,factor.blendConstant4W[3])); 938 blendFactor.y = *Pointer<Short4>(data + OFFSET(DrawData,factor.blendConstant4W[3])); 939 blendFactor.z = *Pointer<Short4>(data + OFFSET(DrawData,factor.blendConstant4W[3])); 940 break; 941 case BLEND_INVCONSTANTALPHA: 942 blendFactor.x = *Pointer<Short4>(data + OFFSET(DrawData,factor.invBlendConstant4W[3])); 943 blendFactor.y = *Pointer<Short4>(data + OFFSET(DrawData,factor.invBlendConstant4W[3])); 944 blendFactor.z = *Pointer<Short4>(data + OFFSET(DrawData,factor.invBlendConstant4W[3])); 945 break; 946 default: 947 ASSERT(false); 948 } 949 } 950 blendFactorAlpha(Vector4s & blendFactor,const Vector4s & current,const Vector4s & pixel,BlendFactor blendFactorAlphaActive)951 void PixelRoutine::blendFactorAlpha(Vector4s &blendFactor, const Vector4s ¤t, const Vector4s &pixel, BlendFactor blendFactorAlphaActive) 952 { 953 switch(blendFactorAlphaActive) 954 { 955 case BLEND_ZERO: 956 // Optimized 957 break; 958 case BLEND_ONE: 959 // Optimized 960 break; 961 case BLEND_SOURCE: 962 blendFactor.w = current.w; 963 break; 964 case BLEND_INVSOURCE: 965 blendFactor.w = Short4(0xFFFFu) - current.w; 966 break; 967 case BLEND_DEST: 968 blendFactor.w = pixel.w; 969 break; 970 case BLEND_INVDEST: 971 blendFactor.w = Short4(0xFFFFu) - pixel.w; 972 break; 973 case BLEND_SOURCEALPHA: 974 blendFactor.w = current.w; 975 break; 976 case BLEND_INVSOURCEALPHA: 977 blendFactor.w = Short4(0xFFFFu) - current.w; 978 break; 979 case BLEND_DESTALPHA: 980 blendFactor.w = pixel.w; 981 break; 982 case BLEND_INVDESTALPHA: 983 blendFactor.w = Short4(0xFFFFu) - pixel.w; 984 break; 985 case BLEND_SRCALPHASAT: 986 blendFactor.w = Short4(0xFFFFu); 987 break; 988 case BLEND_CONSTANT: 989 case BLEND_CONSTANTALPHA: 990 blendFactor.w = *Pointer<Short4>(data + OFFSET(DrawData,factor.blendConstant4W[3])); 991 break; 992 case BLEND_INVCONSTANT: 993 case BLEND_INVCONSTANTALPHA: 994 blendFactor.w = *Pointer<Short4>(data + OFFSET(DrawData,factor.invBlendConstant4W[3])); 995 break; 996 default: 997 ASSERT(false); 998 } 999 } 1000 isSRGB(int index) const1001 bool PixelRoutine::isSRGB(int index) const 1002 { 1003 return state.targetFormat[index] == FORMAT_SRGB8_A8 || state.targetFormat[index] == FORMAT_SRGB8_X8; 1004 } 1005 readPixel(int index,Pointer<Byte> & cBuffer,Int & x,Vector4s & pixel)1006 void PixelRoutine::readPixel(int index, Pointer<Byte> &cBuffer, Int &x, Vector4s &pixel) 1007 { 1008 Short4 c01; 1009 Short4 c23; 1010 Pointer<Byte> buffer; 1011 Pointer<Byte> buffer2; 1012 1013 switch(state.targetFormat[index]) 1014 { 1015 case FORMAT_R5G6B5: 1016 buffer = cBuffer + 2 * x; 1017 buffer2 = buffer + *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); 1018 c01 = As<Short4>(Int2(*Pointer<Int>(buffer), *Pointer<Int>(buffer2))); 1019 1020 pixel.x = c01 & Short4(0xF800u); 1021 pixel.y = (c01 & Short4(0x07E0u)) << 5; 1022 pixel.z = (c01 & Short4(0x001Fu)) << 11; 1023 pixel.w = Short4(0xFFFFu); 1024 break; 1025 case FORMAT_A8R8G8B8: 1026 buffer = cBuffer + 4 * x; 1027 c01 = *Pointer<Short4>(buffer); 1028 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); 1029 c23 = *Pointer<Short4>(buffer); 1030 pixel.z = c01; 1031 pixel.y = c01; 1032 pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(c23)); 1033 pixel.y = UnpackHigh(As<Byte8>(pixel.y), As<Byte8>(c23)); 1034 pixel.x = pixel.z; 1035 pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(pixel.y)); 1036 pixel.x = UnpackHigh(As<Byte8>(pixel.x), As<Byte8>(pixel.y)); 1037 pixel.y = pixel.z; 1038 pixel.w = pixel.x; 1039 pixel.x = UnpackLow(As<Byte8>(pixel.x), As<Byte8>(pixel.x)); 1040 pixel.y = UnpackHigh(As<Byte8>(pixel.y), As<Byte8>(pixel.y)); 1041 pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(pixel.z)); 1042 pixel.w = UnpackHigh(As<Byte8>(pixel.w), As<Byte8>(pixel.w)); 1043 break; 1044 case FORMAT_A8B8G8R8: 1045 case FORMAT_SRGB8_A8: 1046 buffer = cBuffer + 4 * x; 1047 c01 = *Pointer<Short4>(buffer); 1048 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); 1049 c23 = *Pointer<Short4>(buffer); 1050 pixel.z = c01; 1051 pixel.y = c01; 1052 pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(c23)); 1053 pixel.y = UnpackHigh(As<Byte8>(pixel.y), As<Byte8>(c23)); 1054 pixel.x = pixel.z; 1055 pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(pixel.y)); 1056 pixel.x = UnpackHigh(As<Byte8>(pixel.x), As<Byte8>(pixel.y)); 1057 pixel.y = pixel.z; 1058 pixel.w = pixel.x; 1059 pixel.x = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(pixel.z)); 1060 pixel.y = UnpackHigh(As<Byte8>(pixel.y), As<Byte8>(pixel.y)); 1061 pixel.z = UnpackLow(As<Byte8>(pixel.w), As<Byte8>(pixel.w)); 1062 pixel.w = UnpackHigh(As<Byte8>(pixel.w), As<Byte8>(pixel.w)); 1063 break; 1064 case FORMAT_A8: 1065 buffer = cBuffer + 1 * x; 1066 pixel.w = Insert(pixel.w, *Pointer<Short>(buffer), 0); 1067 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); 1068 pixel.w = Insert(pixel.w, *Pointer<Short>(buffer), 1); 1069 pixel.w = UnpackLow(As<Byte8>(pixel.w), As<Byte8>(pixel.w)); 1070 pixel.x = Short4(0x0000); 1071 pixel.y = Short4(0x0000); 1072 pixel.z = Short4(0x0000); 1073 break; 1074 case FORMAT_X8R8G8B8: 1075 buffer = cBuffer + 4 * x; 1076 c01 = *Pointer<Short4>(buffer); 1077 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); 1078 c23 = *Pointer<Short4>(buffer); 1079 pixel.z = c01; 1080 pixel.y = c01; 1081 pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(c23)); 1082 pixel.y = UnpackHigh(As<Byte8>(pixel.y), As<Byte8>(c23)); 1083 pixel.x = pixel.z; 1084 pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(pixel.y)); 1085 pixel.x = UnpackHigh(As<Byte8>(pixel.x), As<Byte8>(pixel.y)); 1086 pixel.y = pixel.z; 1087 pixel.x = UnpackLow(As<Byte8>(pixel.x), As<Byte8>(pixel.x)); 1088 pixel.y = UnpackHigh(As<Byte8>(pixel.y), As<Byte8>(pixel.y)); 1089 pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(pixel.z)); 1090 pixel.w = Short4(0xFFFFu); 1091 break; 1092 case FORMAT_X8B8G8R8: 1093 case FORMAT_SRGB8_X8: 1094 buffer = cBuffer + 4 * x; 1095 c01 = *Pointer<Short4>(buffer); 1096 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); 1097 c23 = *Pointer<Short4>(buffer); 1098 pixel.z = c01; 1099 pixel.y = c01; 1100 pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(c23)); 1101 pixel.y = UnpackHigh(As<Byte8>(pixel.y), As<Byte8>(c23)); 1102 pixel.x = pixel.z; 1103 pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(pixel.y)); 1104 pixel.x = UnpackHigh(As<Byte8>(pixel.x), As<Byte8>(pixel.y)); 1105 pixel.y = pixel.z; 1106 pixel.w = pixel.x; 1107 pixel.x = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(pixel.z)); 1108 pixel.y = UnpackHigh(As<Byte8>(pixel.y), As<Byte8>(pixel.y)); 1109 pixel.z = UnpackLow(As<Byte8>(pixel.w), As<Byte8>(pixel.w)); 1110 pixel.w = Short4(0xFFFFu); 1111 break; 1112 case FORMAT_A8G8R8B8Q: 1113 UNIMPLEMENTED(); 1114 // pixel.z = UnpackLow(As<Byte8>(pixel.z), *Pointer<Byte8>(cBuffer + 8 * x + 0)); 1115 // pixel.x = UnpackHigh(As<Byte8>(pixel.x), *Pointer<Byte8>(cBuffer + 8 * x + 0)); 1116 // pixel.y = UnpackLow(As<Byte8>(pixel.y), *Pointer<Byte8>(cBuffer + 8 * x + 8)); 1117 // pixel.w = UnpackHigh(As<Byte8>(pixel.w), *Pointer<Byte8>(cBuffer + 8 * x + 8)); 1118 break; 1119 case FORMAT_X8G8R8B8Q: 1120 UNIMPLEMENTED(); 1121 // pixel.z = UnpackLow(As<Byte8>(pixel.z), *Pointer<Byte8>(cBuffer + 8 * x + 0)); 1122 // pixel.x = UnpackHigh(As<Byte8>(pixel.x), *Pointer<Byte8>(cBuffer + 8 * x + 0)); 1123 // pixel.y = UnpackLow(As<Byte8>(pixel.y), *Pointer<Byte8>(cBuffer + 8 * x + 8)); 1124 // pixel.w = Short4(0xFFFFu); 1125 break; 1126 case FORMAT_A16B16G16R16: 1127 buffer = cBuffer; 1128 pixel.x = *Pointer<Short4>(buffer + 8 * x); 1129 pixel.y = *Pointer<Short4>(buffer + 8 * x + 8); 1130 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); 1131 pixel.z = *Pointer<Short4>(buffer + 8 * x); 1132 pixel.w = *Pointer<Short4>(buffer + 8 * x + 8); 1133 transpose4x4(pixel.x, pixel.y, pixel.z, pixel.w); 1134 break; 1135 case FORMAT_G16R16: 1136 buffer = cBuffer; 1137 pixel.x = *Pointer<Short4>(buffer + 4 * x); 1138 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); 1139 pixel.y = *Pointer<Short4>(buffer + 4 * x); 1140 pixel.z = pixel.x; 1141 pixel.x = As<Short4>(UnpackLow(pixel.x, pixel.y)); 1142 pixel.z = As<Short4>(UnpackHigh(pixel.z, pixel.y)); 1143 pixel.y = pixel.z; 1144 pixel.x = As<Short4>(UnpackLow(pixel.x, pixel.z)); 1145 pixel.y = As<Short4>(UnpackHigh(pixel.y, pixel.z)); 1146 pixel.z = Short4(0xFFFFu); 1147 pixel.w = Short4(0xFFFFu); 1148 break; 1149 default: 1150 ASSERT(false); 1151 } 1152 1153 if((postBlendSRGB && state.writeSRGB) || isSRGB(index)) 1154 { 1155 sRGBtoLinear16_12_16(pixel); 1156 } 1157 } 1158 alphaBlend(int index,Pointer<Byte> & cBuffer,Vector4s & current,Int & x)1159 void PixelRoutine::alphaBlend(int index, Pointer<Byte> &cBuffer, Vector4s ¤t, Int &x) 1160 { 1161 if(!state.alphaBlendActive) 1162 { 1163 return; 1164 } 1165 1166 Vector4s pixel; 1167 readPixel(index, cBuffer, x, pixel); 1168 1169 // Final Color = ObjectColor * SourceBlendFactor + PixelColor * DestinationBlendFactor 1170 Vector4s sourceFactor; 1171 Vector4s destFactor; 1172 1173 blendFactor(sourceFactor, current, pixel, state.sourceBlendFactor); 1174 blendFactor(destFactor, current, pixel, state.destBlendFactor); 1175 1176 if(state.sourceBlendFactor != BLEND_ONE && state.sourceBlendFactor != BLEND_ZERO) 1177 { 1178 current.x = MulHigh(As<UShort4>(current.x), As<UShort4>(sourceFactor.x)); 1179 current.y = MulHigh(As<UShort4>(current.y), As<UShort4>(sourceFactor.y)); 1180 current.z = MulHigh(As<UShort4>(current.z), As<UShort4>(sourceFactor.z)); 1181 } 1182 1183 if(state.destBlendFactor != BLEND_ONE && state.destBlendFactor != BLEND_ZERO) 1184 { 1185 pixel.x = MulHigh(As<UShort4>(pixel.x), As<UShort4>(destFactor.x)); 1186 pixel.y = MulHigh(As<UShort4>(pixel.y), As<UShort4>(destFactor.y)); 1187 pixel.z = MulHigh(As<UShort4>(pixel.z), As<UShort4>(destFactor.z)); 1188 } 1189 1190 switch(state.blendOperation) 1191 { 1192 case BLENDOP_ADD: 1193 current.x = AddSat(As<UShort4>(current.x), As<UShort4>(pixel.x)); 1194 current.y = AddSat(As<UShort4>(current.y), As<UShort4>(pixel.y)); 1195 current.z = AddSat(As<UShort4>(current.z), As<UShort4>(pixel.z)); 1196 break; 1197 case BLENDOP_SUB: 1198 current.x = SubSat(As<UShort4>(current.x), As<UShort4>(pixel.x)); 1199 current.y = SubSat(As<UShort4>(current.y), As<UShort4>(pixel.y)); 1200 current.z = SubSat(As<UShort4>(current.z), As<UShort4>(pixel.z)); 1201 break; 1202 case BLENDOP_INVSUB: 1203 current.x = SubSat(As<UShort4>(pixel.x), As<UShort4>(current.x)); 1204 current.y = SubSat(As<UShort4>(pixel.y), As<UShort4>(current.y)); 1205 current.z = SubSat(As<UShort4>(pixel.z), As<UShort4>(current.z)); 1206 break; 1207 case BLENDOP_MIN: 1208 current.x = Min(As<UShort4>(current.x), As<UShort4>(pixel.x)); 1209 current.y = Min(As<UShort4>(current.y), As<UShort4>(pixel.y)); 1210 current.z = Min(As<UShort4>(current.z), As<UShort4>(pixel.z)); 1211 break; 1212 case BLENDOP_MAX: 1213 current.x = Max(As<UShort4>(current.x), As<UShort4>(pixel.x)); 1214 current.y = Max(As<UShort4>(current.y), As<UShort4>(pixel.y)); 1215 current.z = Max(As<UShort4>(current.z), As<UShort4>(pixel.z)); 1216 break; 1217 case BLENDOP_SOURCE: 1218 // No operation 1219 break; 1220 case BLENDOP_DEST: 1221 current.x = pixel.x; 1222 current.y = pixel.y; 1223 current.z = pixel.z; 1224 break; 1225 case BLENDOP_NULL: 1226 current.x = Short4(0x0000); 1227 current.y = Short4(0x0000); 1228 current.z = Short4(0x0000); 1229 break; 1230 default: 1231 ASSERT(false); 1232 } 1233 1234 blendFactorAlpha(sourceFactor, current, pixel, state.sourceBlendFactorAlpha); 1235 blendFactorAlpha(destFactor, current, pixel, state.destBlendFactorAlpha); 1236 1237 if(state.sourceBlendFactorAlpha != BLEND_ONE && state.sourceBlendFactorAlpha != BLEND_ZERO) 1238 { 1239 current.w = MulHigh(As<UShort4>(current.w), As<UShort4>(sourceFactor.w)); 1240 } 1241 1242 if(state.destBlendFactorAlpha != BLEND_ONE && state.destBlendFactorAlpha != BLEND_ZERO) 1243 { 1244 pixel.w = MulHigh(As<UShort4>(pixel.w), As<UShort4>(destFactor.w)); 1245 } 1246 1247 switch(state.blendOperationAlpha) 1248 { 1249 case BLENDOP_ADD: 1250 current.w = AddSat(As<UShort4>(current.w), As<UShort4>(pixel.w)); 1251 break; 1252 case BLENDOP_SUB: 1253 current.w = SubSat(As<UShort4>(current.w), As<UShort4>(pixel.w)); 1254 break; 1255 case BLENDOP_INVSUB: 1256 current.w = SubSat(As<UShort4>(pixel.w), As<UShort4>(current.w)); 1257 break; 1258 case BLENDOP_MIN: 1259 current.w = Min(As<UShort4>(current.w), As<UShort4>(pixel.w)); 1260 break; 1261 case BLENDOP_MAX: 1262 current.w = Max(As<UShort4>(current.w), As<UShort4>(pixel.w)); 1263 break; 1264 case BLENDOP_SOURCE: 1265 // No operation 1266 break; 1267 case BLENDOP_DEST: 1268 current.w = pixel.w; 1269 break; 1270 case BLENDOP_NULL: 1271 current.w = Short4(0x0000); 1272 break; 1273 default: 1274 ASSERT(false); 1275 } 1276 } 1277 logicOperation(int index,Pointer<Byte> & cBuffer,Vector4s & current,Int & x)1278 void PixelRoutine::logicOperation(int index, Pointer<Byte> &cBuffer, Vector4s ¤t, Int &x) 1279 { 1280 if(state.logicalOperation == LOGICALOP_COPY) 1281 { 1282 return; 1283 } 1284 1285 Vector4s pixel; 1286 readPixel(index, cBuffer, x, pixel); 1287 1288 switch(state.logicalOperation) 1289 { 1290 case LOGICALOP_CLEAR: 1291 current.x = UShort4(0); 1292 current.y = UShort4(0); 1293 current.z = UShort4(0); 1294 break; 1295 case LOGICALOP_SET: 1296 current.x = UShort4(0xFFFFu); 1297 current.y = UShort4(0xFFFFu); 1298 current.z = UShort4(0xFFFFu); 1299 break; 1300 case LOGICALOP_COPY: 1301 ASSERT(false); // Optimized out 1302 break; 1303 case LOGICALOP_COPY_INVERTED: 1304 current.x = ~current.x; 1305 current.y = ~current.y; 1306 current.z = ~current.z; 1307 break; 1308 case LOGICALOP_NOOP: 1309 current.x = pixel.x; 1310 current.y = pixel.y; 1311 current.z = pixel.z; 1312 break; 1313 case LOGICALOP_INVERT: 1314 current.x = ~pixel.x; 1315 current.y = ~pixel.y; 1316 current.z = ~pixel.z; 1317 break; 1318 case LOGICALOP_AND: 1319 current.x = pixel.x & current.x; 1320 current.y = pixel.y & current.y; 1321 current.z = pixel.z & current.z; 1322 break; 1323 case LOGICALOP_NAND: 1324 current.x = ~(pixel.x & current.x); 1325 current.y = ~(pixel.y & current.y); 1326 current.z = ~(pixel.z & current.z); 1327 break; 1328 case LOGICALOP_OR: 1329 current.x = pixel.x | current.x; 1330 current.y = pixel.y | current.y; 1331 current.z = pixel.z | current.z; 1332 break; 1333 case LOGICALOP_NOR: 1334 current.x = ~(pixel.x | current.x); 1335 current.y = ~(pixel.y | current.y); 1336 current.z = ~(pixel.z | current.z); 1337 break; 1338 case LOGICALOP_XOR: 1339 current.x = pixel.x ^ current.x; 1340 current.y = pixel.y ^ current.y; 1341 current.z = pixel.z ^ current.z; 1342 break; 1343 case LOGICALOP_EQUIV: 1344 current.x = ~(pixel.x ^ current.x); 1345 current.y = ~(pixel.y ^ current.y); 1346 current.z = ~(pixel.z ^ current.z); 1347 break; 1348 case LOGICALOP_AND_REVERSE: 1349 current.x = ~pixel.x & current.x; 1350 current.y = ~pixel.y & current.y; 1351 current.z = ~pixel.z & current.z; 1352 break; 1353 case LOGICALOP_AND_INVERTED: 1354 current.x = pixel.x & ~current.x; 1355 current.y = pixel.y & ~current.y; 1356 current.z = pixel.z & ~current.z; 1357 break; 1358 case LOGICALOP_OR_REVERSE: 1359 current.x = ~pixel.x | current.x; 1360 current.y = ~pixel.y | current.y; 1361 current.z = ~pixel.z | current.z; 1362 break; 1363 case LOGICALOP_OR_INVERTED: 1364 current.x = pixel.x | ~current.x; 1365 current.y = pixel.y | ~current.y; 1366 current.z = pixel.z | ~current.z; 1367 break; 1368 default: 1369 ASSERT(false); 1370 } 1371 } 1372 writeColor(int index,Pointer<Byte> & cBuffer,Int & x,Vector4s & current,Int & sMask,Int & zMask,Int & cMask)1373 void PixelRoutine::writeColor(int index, Pointer<Byte> &cBuffer, Int &x, Vector4s ¤t, Int &sMask, Int &zMask, Int &cMask) 1374 { 1375 if((postBlendSRGB && state.writeSRGB) || isSRGB(index)) 1376 { 1377 linearToSRGB16_12_16(current); 1378 } 1379 1380 if(exactColorRounding) 1381 { 1382 switch(state.targetFormat[index]) 1383 { 1384 case FORMAT_R5G6B5: 1385 current.x = AddSat(As<UShort4>(current.x), UShort4(0x0400)); 1386 current.y = AddSat(As<UShort4>(current.y), UShort4(0x0200)); 1387 current.z = AddSat(As<UShort4>(current.z), UShort4(0x0400)); 1388 break; 1389 case FORMAT_X8G8R8B8Q: 1390 case FORMAT_A8G8R8B8Q: 1391 case FORMAT_X8R8G8B8: 1392 case FORMAT_X8B8G8R8: 1393 case FORMAT_A8R8G8B8: 1394 case FORMAT_A8B8G8R8: 1395 case FORMAT_SRGB8_X8: 1396 case FORMAT_SRGB8_A8: 1397 case FORMAT_G8R8: 1398 case FORMAT_R8: 1399 current.x = current.x - As<Short4>(As<UShort4>(current.x) >> 8) + Short4(0x0080); 1400 current.y = current.y - As<Short4>(As<UShort4>(current.y) >> 8) + Short4(0x0080); 1401 current.z = current.z - As<Short4>(As<UShort4>(current.z) >> 8) + Short4(0x0080); 1402 current.w = current.w - As<Short4>(As<UShort4>(current.w) >> 8) + Short4(0x0080); 1403 break; 1404 default: 1405 break; 1406 } 1407 } 1408 1409 int rgbaWriteMask = state.colorWriteActive(index); 1410 int bgraWriteMask = (rgbaWriteMask & 0x0000000A) | (rgbaWriteMask & 0x00000001) << 2 | (rgbaWriteMask & 0x00000004) >> 2; 1411 1412 switch(state.targetFormat[index]) 1413 { 1414 case FORMAT_R5G6B5: 1415 { 1416 current.x = current.x & Short4(0xF800u); 1417 current.y = As<UShort4>(current.y & Short4(0xFC00u)) >> 5; 1418 current.z = As<UShort4>(current.z) >> 11; 1419 1420 current.x = current.x | current.y | current.z; 1421 } 1422 break; 1423 case FORMAT_X8G8R8B8Q: 1424 UNIMPLEMENTED(); 1425 // current.x = As<Short4>(As<UShort4>(current.x) >> 8); 1426 // current.y = As<Short4>(As<UShort4>(current.y) >> 8); 1427 // current.z = As<Short4>(As<UShort4>(current.z) >> 8); 1428 1429 // current.z = As<Short4>(Pack(As<UShort4>(current.z), As<UShort4>(current.x))); 1430 // current.y = As<Short4>(Pack(As<UShort4>(current.y), As<UShort4>(current.y))); 1431 break; 1432 case FORMAT_A8G8R8B8Q: 1433 UNIMPLEMENTED(); 1434 // current.x = As<Short4>(As<UShort4>(current.x) >> 8); 1435 // current.y = As<Short4>(As<UShort4>(current.y) >> 8); 1436 // current.z = As<Short4>(As<UShort4>(current.z) >> 8); 1437 // current.w = As<Short4>(As<UShort4>(current.w) >> 8); 1438 1439 // current.z = As<Short4>(Pack(As<UShort4>(current.z), As<UShort4>(current.x))); 1440 // current.y = As<Short4>(Pack(As<UShort4>(current.y), As<UShort4>(current.w))); 1441 break; 1442 case FORMAT_X8R8G8B8: 1443 case FORMAT_A8R8G8B8: 1444 if(state.targetFormat[index] == FORMAT_X8R8G8B8 || rgbaWriteMask == 0x7) 1445 { 1446 current.x = As<Short4>(As<UShort4>(current.x) >> 8); 1447 current.y = As<Short4>(As<UShort4>(current.y) >> 8); 1448 current.z = As<Short4>(As<UShort4>(current.z) >> 8); 1449 1450 current.z = As<Short4>(Pack(As<UShort4>(current.z), As<UShort4>(current.x))); 1451 current.y = As<Short4>(Pack(As<UShort4>(current.y), As<UShort4>(current.y))); 1452 1453 current.x = current.z; 1454 current.z = UnpackLow(As<Byte8>(current.z), As<Byte8>(current.y)); 1455 current.x = UnpackHigh(As<Byte8>(current.x), As<Byte8>(current.y)); 1456 current.y = current.z; 1457 current.z = As<Short4>(UnpackLow(current.z, current.x)); 1458 current.y = As<Short4>(UnpackHigh(current.y, current.x)); 1459 } 1460 else 1461 { 1462 current.x = As<Short4>(As<UShort4>(current.x) >> 8); 1463 current.y = As<Short4>(As<UShort4>(current.y) >> 8); 1464 current.z = As<Short4>(As<UShort4>(current.z) >> 8); 1465 current.w = As<Short4>(As<UShort4>(current.w) >> 8); 1466 1467 current.z = As<Short4>(Pack(As<UShort4>(current.z), As<UShort4>(current.x))); 1468 current.y = As<Short4>(Pack(As<UShort4>(current.y), As<UShort4>(current.w))); 1469 1470 current.x = current.z; 1471 current.z = UnpackLow(As<Byte8>(current.z), As<Byte8>(current.y)); 1472 current.x = UnpackHigh(As<Byte8>(current.x), As<Byte8>(current.y)); 1473 current.y = current.z; 1474 current.z = As<Short4>(UnpackLow(current.z, current.x)); 1475 current.y = As<Short4>(UnpackHigh(current.y, current.x)); 1476 } 1477 break; 1478 case FORMAT_X8B8G8R8: 1479 case FORMAT_A8B8G8R8: 1480 case FORMAT_SRGB8_X8: 1481 case FORMAT_SRGB8_A8: 1482 if(state.targetFormat[index] == FORMAT_X8B8G8R8 || state.targetFormat[index] == FORMAT_SRGB8_X8 || rgbaWriteMask == 0x7) 1483 { 1484 current.x = As<Short4>(As<UShort4>(current.x) >> 8); 1485 current.y = As<Short4>(As<UShort4>(current.y) >> 8); 1486 current.z = As<Short4>(As<UShort4>(current.z) >> 8); 1487 1488 current.z = As<Short4>(Pack(As<UShort4>(current.x), As<UShort4>(current.z))); 1489 current.y = As<Short4>(Pack(As<UShort4>(current.y), As<UShort4>(current.y))); 1490 1491 current.x = current.z; 1492 current.z = UnpackLow(As<Byte8>(current.z), As<Byte8>(current.y)); 1493 current.x = UnpackHigh(As<Byte8>(current.x), As<Byte8>(current.y)); 1494 current.y = current.z; 1495 current.z = As<Short4>(UnpackLow(current.z, current.x)); 1496 current.y = As<Short4>(UnpackHigh(current.y, current.x)); 1497 } 1498 else 1499 { 1500 current.x = As<Short4>(As<UShort4>(current.x) >> 8); 1501 current.y = As<Short4>(As<UShort4>(current.y) >> 8); 1502 current.z = As<Short4>(As<UShort4>(current.z) >> 8); 1503 current.w = As<Short4>(As<UShort4>(current.w) >> 8); 1504 1505 current.z = As<Short4>(Pack(As<UShort4>(current.x), As<UShort4>(current.z))); 1506 current.y = As<Short4>(Pack(As<UShort4>(current.y), As<UShort4>(current.w))); 1507 1508 current.x = current.z; 1509 current.z = UnpackLow(As<Byte8>(current.z), As<Byte8>(current.y)); 1510 current.x = UnpackHigh(As<Byte8>(current.x), As<Byte8>(current.y)); 1511 current.y = current.z; 1512 current.z = As<Short4>(UnpackLow(current.z, current.x)); 1513 current.y = As<Short4>(UnpackHigh(current.y, current.x)); 1514 } 1515 break; 1516 case FORMAT_G8R8: 1517 current.x = As<Short4>(As<UShort4>(current.x) >> 8); 1518 current.y = As<Short4>(As<UShort4>(current.y) >> 8); 1519 current.x = As<Short4>(Pack(As<UShort4>(current.x), As<UShort4>(current.x))); 1520 current.y = As<Short4>(Pack(As<UShort4>(current.y), As<UShort4>(current.y))); 1521 current.x = UnpackLow(As<Byte8>(current.x), As<Byte8>(current.y)); 1522 break; 1523 case FORMAT_R8: 1524 current.x = As<Short4>(As<UShort4>(current.x) >> 8); 1525 current.x = As<Short4>(Pack(As<UShort4>(current.x), As<UShort4>(current.x))); 1526 break; 1527 case FORMAT_A8: 1528 current.w = As<Short4>(As<UShort4>(current.w) >> 8); 1529 current.w = As<Short4>(Pack(As<UShort4>(current.w), As<UShort4>(current.w))); 1530 break; 1531 case FORMAT_G16R16: 1532 current.z = current.x; 1533 current.x = As<Short4>(UnpackLow(current.x, current.y)); 1534 current.z = As<Short4>(UnpackHigh(current.z, current.y)); 1535 current.y = current.z; 1536 break; 1537 case FORMAT_A16B16G16R16: 1538 transpose4x4(current.x, current.y, current.z, current.w); 1539 break; 1540 default: 1541 ASSERT(false); 1542 } 1543 1544 Short4 c01 = current.z; 1545 Short4 c23 = current.y; 1546 1547 Int xMask; // Combination of all masks 1548 1549 if(state.depthTestActive) 1550 { 1551 xMask = zMask; 1552 } 1553 else 1554 { 1555 xMask = cMask; 1556 } 1557 1558 if(state.stencilActive) 1559 { 1560 xMask &= sMask; 1561 } 1562 1563 switch(state.targetFormat[index]) 1564 { 1565 case FORMAT_R5G6B5: 1566 { 1567 Pointer<Byte> buffer = cBuffer + 2 * x; 1568 Int value = *Pointer<Int>(buffer); 1569 1570 Int c01 = Extract(As<Int2>(current.x), 0); 1571 1572 if((bgraWriteMask & 0x00000007) != 0x00000007) 1573 { 1574 Int masked = value; 1575 c01 &= *Pointer<Int>(constants + OFFSET(Constants,mask565Q[bgraWriteMask & 0x7][0])); 1576 masked &= *Pointer<Int>(constants + OFFSET(Constants,mask565Q[~bgraWriteMask & 0x7][0])); 1577 c01 |= masked; 1578 } 1579 1580 c01 &= *Pointer<Int>(constants + OFFSET(Constants,maskW4Q[0][0]) + xMask * 8); 1581 value &= *Pointer<Int>(constants + OFFSET(Constants,invMaskW4Q[0][0]) + xMask * 8); 1582 c01 |= value; 1583 *Pointer<Int>(buffer) = c01; 1584 1585 buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index])); 1586 value = *Pointer<Int>(buffer); 1587 1588 Int c23 = Extract(As<Int2>(current.x), 1); 1589 1590 if((bgraWriteMask & 0x00000007) != 0x00000007) 1591 { 1592 Int masked = value; 1593 c23 &= *Pointer<Int>(constants + OFFSET(Constants,mask565Q[bgraWriteMask & 0x7][0])); 1594 masked &= *Pointer<Int>(constants + OFFSET(Constants,mask565Q[~bgraWriteMask & 0x7][0])); 1595 c23 |= masked; 1596 } 1597 1598 c23 &= *Pointer<Int>(constants + OFFSET(Constants,maskW4Q[0][2]) + xMask * 8); 1599 value &= *Pointer<Int>(constants + OFFSET(Constants,invMaskW4Q[0][2]) + xMask * 8); 1600 c23 |= value; 1601 *Pointer<Int>(buffer) = c23; 1602 } 1603 break; 1604 case FORMAT_A8G8R8B8Q: 1605 case FORMAT_X8G8R8B8Q: // FIXME: Don't touch alpha? 1606 UNIMPLEMENTED(); 1607 // value = *Pointer<Short4>(cBuffer + 8 * x + 0); 1608 1609 // if((state.targetFormat[index] == FORMAT_A8G8R8B8Q && bgraWriteMask != 0x0000000F) || 1610 // ((state.targetFormat[index] == FORMAT_X8G8R8B8Q && bgraWriteMask != 0x00000007) && 1611 // (state.targetFormat[index] == FORMAT_X8G8R8B8Q && bgraWriteMask != 0x0000000F))) // FIXME: Need for masking when XRGB && Fh? 1612 // { 1613 // Short4 masked = value; 1614 // c01 &= *Pointer<Short4>(constants + OFFSET(Constants,maskB4Q[bgraWriteMask][0])); 1615 // masked &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskB4Q[bgraWriteMask][0])); 1616 // c01 |= masked; 1617 // } 1618 1619 // c01 &= *Pointer<Short4>(constants + OFFSET(Constants,maskD01Q) + xMask * 8); 1620 // value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskD01Q) + xMask * 8); 1621 // c01 |= value; 1622 // *Pointer<Short4>(cBuffer + 8 * x + 0) = c01; 1623 1624 // value = *Pointer<Short4>(cBuffer + 8 * x + 8); 1625 1626 // if((state.targetFormat[index] == FORMAT_A8G8R8B8Q && bgraWriteMask != 0x0000000F) || 1627 // ((state.targetFormat[index] == FORMAT_X8G8R8B8Q && bgraWriteMask != 0x00000007) && 1628 // (state.targetFormat[index] == FORMAT_X8G8R8B8Q && bgraWriteMask != 0x0000000F))) // FIXME: Need for masking when XRGB && Fh? 1629 // { 1630 // Short4 masked = value; 1631 // c23 &= *Pointer<Short4>(constants + OFFSET(Constants,maskB4Q[bgraWriteMask][0])); 1632 // masked &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskB4Q[bgraWriteMask][0])); 1633 // c23 |= masked; 1634 // } 1635 1636 // c23 &= *Pointer<Short4>(constants + OFFSET(Constants,maskD23Q) + xMask * 8); 1637 // value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskD23Q) + xMask * 8); 1638 // c23 |= value; 1639 // *Pointer<Short4>(cBuffer + 8 * x + 8) = c23; 1640 break; 1641 case FORMAT_A8R8G8B8: 1642 case FORMAT_X8R8G8B8: // FIXME: Don't touch alpha? 1643 { 1644 Pointer<Byte> buffer = cBuffer + x * 4; 1645 Short4 value = *Pointer<Short4>(buffer); 1646 1647 if((state.targetFormat[index] == FORMAT_A8R8G8B8 && bgraWriteMask != 0x0000000F) || 1648 ((state.targetFormat[index] == FORMAT_X8R8G8B8 && bgraWriteMask != 0x00000007) && 1649 (state.targetFormat[index] == FORMAT_X8R8G8B8 && bgraWriteMask != 0x0000000F))) // FIXME: Need for masking when XRGB && Fh? 1650 { 1651 Short4 masked = value; 1652 c01 &= *Pointer<Short4>(constants + OFFSET(Constants,maskB4Q[bgraWriteMask][0])); 1653 masked &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskB4Q[bgraWriteMask][0])); 1654 c01 |= masked; 1655 } 1656 1657 c01 &= *Pointer<Short4>(constants + OFFSET(Constants,maskD01Q) + xMask * 8); 1658 value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskD01Q) + xMask * 8); 1659 c01 |= value; 1660 *Pointer<Short4>(buffer) = c01; 1661 1662 buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index])); 1663 value = *Pointer<Short4>(buffer); 1664 1665 if((state.targetFormat[index] == FORMAT_A8R8G8B8 && bgraWriteMask != 0x0000000F) || 1666 ((state.targetFormat[index] == FORMAT_X8R8G8B8 && bgraWriteMask != 0x00000007) && 1667 (state.targetFormat[index] == FORMAT_X8R8G8B8 && bgraWriteMask != 0x0000000F))) // FIXME: Need for masking when XRGB && Fh? 1668 { 1669 Short4 masked = value; 1670 c23 &= *Pointer<Short4>(constants + OFFSET(Constants,maskB4Q[bgraWriteMask][0])); 1671 masked &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskB4Q[bgraWriteMask][0])); 1672 c23 |= masked; 1673 } 1674 1675 c23 &= *Pointer<Short4>(constants + OFFSET(Constants,maskD23Q) + xMask * 8); 1676 value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskD23Q) + xMask * 8); 1677 c23 |= value; 1678 *Pointer<Short4>(buffer) = c23; 1679 } 1680 break; 1681 case FORMAT_A8B8G8R8: 1682 case FORMAT_X8B8G8R8: // FIXME: Don't touch alpha? 1683 case FORMAT_SRGB8_X8: 1684 case FORMAT_SRGB8_A8: 1685 { 1686 Pointer<Byte> buffer = cBuffer + x * 4; 1687 Short4 value = *Pointer<Short4>(buffer); 1688 1689 bool masked = (((state.targetFormat[index] == FORMAT_A8B8G8R8 || state.targetFormat[index] == FORMAT_SRGB8_A8) && rgbaWriteMask != 0x0000000F) || 1690 (((state.targetFormat[index] == FORMAT_X8B8G8R8 || state.targetFormat[index] == FORMAT_SRGB8_X8) && rgbaWriteMask != 0x00000007) && 1691 ((state.targetFormat[index] == FORMAT_X8B8G8R8 || state.targetFormat[index] == FORMAT_SRGB8_X8) && rgbaWriteMask != 0x0000000F))); // FIXME: Need for masking when XBGR && Fh? 1692 1693 if(masked) 1694 { 1695 Short4 masked = value; 1696 c01 &= *Pointer<Short4>(constants + OFFSET(Constants,maskB4Q[rgbaWriteMask][0])); 1697 masked &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskB4Q[rgbaWriteMask][0])); 1698 c01 |= masked; 1699 } 1700 1701 c01 &= *Pointer<Short4>(constants + OFFSET(Constants,maskD01Q) + xMask * 8); 1702 value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskD01Q) + xMask * 8); 1703 c01 |= value; 1704 *Pointer<Short4>(buffer) = c01; 1705 1706 buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index])); 1707 value = *Pointer<Short4>(buffer); 1708 1709 if(masked) 1710 { 1711 Short4 masked = value; 1712 c23 &= *Pointer<Short4>(constants + OFFSET(Constants,maskB4Q[rgbaWriteMask][0])); 1713 masked &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskB4Q[rgbaWriteMask][0])); 1714 c23 |= masked; 1715 } 1716 1717 c23 &= *Pointer<Short4>(constants + OFFSET(Constants,maskD23Q) + xMask * 8); 1718 value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskD23Q) + xMask * 8); 1719 c23 |= value; 1720 *Pointer<Short4>(buffer) = c23; 1721 } 1722 break; 1723 case FORMAT_G8R8: 1724 if((rgbaWriteMask & 0x00000003) != 0x0) 1725 { 1726 Pointer<Byte> buffer = cBuffer + 2 * x; 1727 Int2 value; 1728 value = Insert(value, *Pointer<Int>(buffer), 0); 1729 Int pitch = *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); 1730 value = Insert(value, *Pointer<Int>(buffer + pitch), 1); 1731 1732 Int2 packedCol = As<Int2>(current.x); 1733 1734 UInt2 mergedMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskW4Q) + xMask * 8); 1735 if((rgbaWriteMask & 0x3) != 0x3) 1736 { 1737 Int tmpMask = *Pointer<Int>(constants + OFFSET(Constants, maskB4Q[5 * (rgbaWriteMask & 0x3)][0])); 1738 UInt2 rgbaMask = As<UInt2>(Int2(tmpMask, tmpMask)); 1739 mergedMask &= rgbaMask; 1740 } 1741 1742 packedCol = As<Int2>((As<UInt2>(packedCol) & mergedMask) | (As<UInt2>(value) & ~mergedMask)); 1743 1744 *Pointer<UInt>(buffer) = As<UInt>(Extract(packedCol, 0)); 1745 *Pointer<UInt>(buffer + pitch) = As<UInt>(Extract(packedCol, 1)); 1746 } 1747 break; 1748 case FORMAT_R8: 1749 if(rgbaWriteMask & 0x00000001) 1750 { 1751 Pointer<Byte> buffer = cBuffer + 1 * x; 1752 Short4 value; 1753 value = Insert(value, *Pointer<Short>(buffer), 0); 1754 Int pitch = *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); 1755 value = Insert(value, *Pointer<Short>(buffer + pitch), 1); 1756 value = UnpackLow(As<Byte8>(value), As<Byte8>(value)); 1757 1758 current.x &= *Pointer<Short4>(constants + OFFSET(Constants, maskB4Q) + 8 * xMask); 1759 value &= *Pointer<Short4>(constants + OFFSET(Constants, invMaskB4Q) + 8 * xMask); 1760 current.x |= value; 1761 1762 *Pointer<Short>(buffer) = Extract(current.x, 0); 1763 *Pointer<Short>(buffer + pitch) = Extract(current.x, 1); 1764 } 1765 break; 1766 case FORMAT_A8: 1767 if(rgbaWriteMask & 0x00000008) 1768 { 1769 Pointer<Byte> buffer = cBuffer + 1 * x; 1770 Short4 value; 1771 value = Insert(value, *Pointer<Short>(buffer), 0); 1772 Int pitch = *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index])); 1773 value = Insert(value, *Pointer<Short>(buffer + pitch), 1); 1774 value = UnpackLow(As<Byte8>(value), As<Byte8>(value)); 1775 1776 current.w &= *Pointer<Short4>(constants + OFFSET(Constants,maskB4Q) + 8 * xMask); 1777 value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskB4Q) + 8 * xMask); 1778 current.w |= value; 1779 1780 *Pointer<Short>(buffer) = Extract(current.w, 0); 1781 *Pointer<Short>(buffer + pitch) = Extract(current.w, 1); 1782 } 1783 break; 1784 case FORMAT_G16R16: 1785 { 1786 Pointer<Byte> buffer = cBuffer + 4 * x; 1787 1788 Short4 value = *Pointer<Short4>(buffer); 1789 1790 if((rgbaWriteMask & 0x00000003) != 0x00000003) 1791 { 1792 Short4 masked = value; 1793 current.x &= *Pointer<Short4>(constants + OFFSET(Constants,maskW01Q[rgbaWriteMask & 0x3][0])); 1794 masked &= *Pointer<Short4>(constants + OFFSET(Constants,maskW01Q[~rgbaWriteMask & 0x3][0])); 1795 current.x |= masked; 1796 } 1797 1798 current.x &= *Pointer<Short4>(constants + OFFSET(Constants,maskD01Q) + xMask * 8); 1799 value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskD01Q) + xMask * 8); 1800 current.x |= value; 1801 *Pointer<Short4>(buffer) = current.x; 1802 1803 buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index])); 1804 1805 value = *Pointer<Short4>(buffer); 1806 1807 if((rgbaWriteMask & 0x00000003) != 0x00000003) 1808 { 1809 Short4 masked = value; 1810 current.y &= *Pointer<Short4>(constants + OFFSET(Constants,maskW01Q[rgbaWriteMask & 0x3][0])); 1811 masked &= *Pointer<Short4>(constants + OFFSET(Constants,maskW01Q[~rgbaWriteMask & 0x3][0])); 1812 current.y |= masked; 1813 } 1814 1815 current.y &= *Pointer<Short4>(constants + OFFSET(Constants,maskD23Q) + xMask * 8); 1816 value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskD23Q) + xMask * 8); 1817 current.y |= value; 1818 *Pointer<Short4>(buffer) = current.y; 1819 } 1820 break; 1821 case FORMAT_A16B16G16R16: 1822 { 1823 Pointer<Byte> buffer = cBuffer + 8 * x; 1824 1825 { 1826 Short4 value = *Pointer<Short4>(buffer); 1827 1828 if(rgbaWriteMask != 0x0000000F) 1829 { 1830 Short4 masked = value; 1831 current.x &= *Pointer<Short4>(constants + OFFSET(Constants,maskW4Q[rgbaWriteMask][0])); 1832 masked &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskW4Q[rgbaWriteMask][0])); 1833 current.x |= masked; 1834 } 1835 1836 current.x &= *Pointer<Short4>(constants + OFFSET(Constants,maskQ0Q) + xMask * 8); 1837 value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskQ0Q) + xMask * 8); 1838 current.x |= value; 1839 *Pointer<Short4>(buffer) = current.x; 1840 } 1841 1842 { 1843 Short4 value = *Pointer<Short4>(buffer + 8); 1844 1845 if(rgbaWriteMask != 0x0000000F) 1846 { 1847 Short4 masked = value; 1848 current.y &= *Pointer<Short4>(constants + OFFSET(Constants,maskW4Q[rgbaWriteMask][0])); 1849 masked &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskW4Q[rgbaWriteMask][0])); 1850 current.y |= masked; 1851 } 1852 1853 current.y &= *Pointer<Short4>(constants + OFFSET(Constants,maskQ1Q) + xMask * 8); 1854 value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskQ1Q) + xMask * 8); 1855 current.y |= value; 1856 *Pointer<Short4>(buffer + 8) = current.y; 1857 } 1858 1859 buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index])); 1860 1861 { 1862 Short4 value = *Pointer<Short4>(buffer); 1863 1864 if(rgbaWriteMask != 0x0000000F) 1865 { 1866 Short4 masked = value; 1867 current.z &= *Pointer<Short4>(constants + OFFSET(Constants,maskW4Q[rgbaWriteMask][0])); 1868 masked &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskW4Q[rgbaWriteMask][0])); 1869 current.z |= masked; 1870 } 1871 1872 current.z &= *Pointer<Short4>(constants + OFFSET(Constants,maskQ2Q) + xMask * 8); 1873 value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskQ2Q) + xMask * 8); 1874 current.z |= value; 1875 *Pointer<Short4>(buffer) = current.z; 1876 } 1877 1878 { 1879 Short4 value = *Pointer<Short4>(buffer + 8); 1880 1881 if(rgbaWriteMask != 0x0000000F) 1882 { 1883 Short4 masked = value; 1884 current.w &= *Pointer<Short4>(constants + OFFSET(Constants,maskW4Q[rgbaWriteMask][0])); 1885 masked &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskW4Q[rgbaWriteMask][0])); 1886 current.w |= masked; 1887 } 1888 1889 current.w &= *Pointer<Short4>(constants + OFFSET(Constants,maskQ3Q) + xMask * 8); 1890 value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskQ3Q) + xMask * 8); 1891 current.w |= value; 1892 *Pointer<Short4>(buffer + 8) = current.w; 1893 } 1894 } 1895 break; 1896 default: 1897 ASSERT(false); 1898 } 1899 } 1900 blendFactor(Vector4f & blendFactor,const Vector4f & oC,const Vector4f & pixel,BlendFactor blendFactorActive)1901 void PixelRoutine::blendFactor(Vector4f &blendFactor, const Vector4f &oC, const Vector4f &pixel, BlendFactor blendFactorActive) 1902 { 1903 switch(blendFactorActive) 1904 { 1905 case BLEND_ZERO: 1906 // Optimized 1907 break; 1908 case BLEND_ONE: 1909 // Optimized 1910 break; 1911 case BLEND_SOURCE: 1912 blendFactor.x = oC.x; 1913 blendFactor.y = oC.y; 1914 blendFactor.z = oC.z; 1915 break; 1916 case BLEND_INVSOURCE: 1917 blendFactor.x = Float4(1.0f) - oC.x; 1918 blendFactor.y = Float4(1.0f) - oC.y; 1919 blendFactor.z = Float4(1.0f) - oC.z; 1920 break; 1921 case BLEND_DEST: 1922 blendFactor.x = pixel.x; 1923 blendFactor.y = pixel.y; 1924 blendFactor.z = pixel.z; 1925 break; 1926 case BLEND_INVDEST: 1927 blendFactor.x = Float4(1.0f) - pixel.x; 1928 blendFactor.y = Float4(1.0f) - pixel.y; 1929 blendFactor.z = Float4(1.0f) - pixel.z; 1930 break; 1931 case BLEND_SOURCEALPHA: 1932 blendFactor.x = oC.w; 1933 blendFactor.y = oC.w; 1934 blendFactor.z = oC.w; 1935 break; 1936 case BLEND_INVSOURCEALPHA: 1937 blendFactor.x = Float4(1.0f) - oC.w; 1938 blendFactor.y = Float4(1.0f) - oC.w; 1939 blendFactor.z = Float4(1.0f) - oC.w; 1940 break; 1941 case BLEND_DESTALPHA: 1942 blendFactor.x = pixel.w; 1943 blendFactor.y = pixel.w; 1944 blendFactor.z = pixel.w; 1945 break; 1946 case BLEND_INVDESTALPHA: 1947 blendFactor.x = Float4(1.0f) - pixel.w; 1948 blendFactor.y = Float4(1.0f) - pixel.w; 1949 blendFactor.z = Float4(1.0f) - pixel.w; 1950 break; 1951 case BLEND_SRCALPHASAT: 1952 blendFactor.x = Float4(1.0f) - pixel.w; 1953 blendFactor.x = Min(blendFactor.x, oC.w); 1954 blendFactor.y = blendFactor.x; 1955 blendFactor.z = blendFactor.x; 1956 break; 1957 case BLEND_CONSTANT: 1958 blendFactor.x = *Pointer<Float4>(data + OFFSET(DrawData,factor.blendConstant4F[0])); 1959 blendFactor.y = *Pointer<Float4>(data + OFFSET(DrawData,factor.blendConstant4F[1])); 1960 blendFactor.z = *Pointer<Float4>(data + OFFSET(DrawData,factor.blendConstant4F[2])); 1961 break; 1962 case BLEND_INVCONSTANT: 1963 blendFactor.x = *Pointer<Float4>(data + OFFSET(DrawData,factor.invBlendConstant4F[0])); 1964 blendFactor.y = *Pointer<Float4>(data + OFFSET(DrawData,factor.invBlendConstant4F[1])); 1965 blendFactor.z = *Pointer<Float4>(data + OFFSET(DrawData,factor.invBlendConstant4F[2])); 1966 break; 1967 default: 1968 ASSERT(false); 1969 } 1970 } 1971 blendFactorAlpha(Vector4f & blendFactor,const Vector4f & oC,const Vector4f & pixel,BlendFactor blendFactorAlphaActive)1972 void PixelRoutine::blendFactorAlpha(Vector4f &blendFactor, const Vector4f &oC, const Vector4f &pixel, BlendFactor blendFactorAlphaActive) 1973 { 1974 switch(blendFactorAlphaActive) 1975 { 1976 case BLEND_ZERO: 1977 // Optimized 1978 break; 1979 case BLEND_ONE: 1980 // Optimized 1981 break; 1982 case BLEND_SOURCE: 1983 blendFactor.w = oC.w; 1984 break; 1985 case BLEND_INVSOURCE: 1986 blendFactor.w = Float4(1.0f) - oC.w; 1987 break; 1988 case BLEND_DEST: 1989 blendFactor.w = pixel.w; 1990 break; 1991 case BLEND_INVDEST: 1992 blendFactor.w = Float4(1.0f) - pixel.w; 1993 break; 1994 case BLEND_SOURCEALPHA: 1995 blendFactor.w = oC.w; 1996 break; 1997 case BLEND_INVSOURCEALPHA: 1998 blendFactor.w = Float4(1.0f) - oC.w; 1999 break; 2000 case BLEND_DESTALPHA: 2001 blendFactor.w = pixel.w; 2002 break; 2003 case BLEND_INVDESTALPHA: 2004 blendFactor.w = Float4(1.0f) - pixel.w; 2005 break; 2006 case BLEND_SRCALPHASAT: 2007 blendFactor.w = Float4(1.0f); 2008 break; 2009 case BLEND_CONSTANT: 2010 blendFactor.w = *Pointer<Float4>(data + OFFSET(DrawData,factor.blendConstant4F[3])); 2011 break; 2012 case BLEND_INVCONSTANT: 2013 blendFactor.w = *Pointer<Float4>(data + OFFSET(DrawData,factor.invBlendConstant4F[3])); 2014 break; 2015 default: 2016 ASSERT(false); 2017 } 2018 } 2019 alphaBlend(int index,Pointer<Byte> & cBuffer,Vector4f & oC,Int & x)2020 void PixelRoutine::alphaBlend(int index, Pointer<Byte> &cBuffer, Vector4f &oC, Int &x) 2021 { 2022 if(!state.alphaBlendActive) 2023 { 2024 return; 2025 } 2026 2027 Pointer<Byte> buffer; 2028 Vector4f pixel; 2029 2030 Vector4s color; 2031 Short4 c01; 2032 Short4 c23; 2033 2034 Float4 one; 2035 if(Surface::isFloatFormat(state.targetFormat[index])) 2036 { 2037 one = Float4(1.0f); 2038 } 2039 else if(Surface::isNonNormalizedInteger(state.targetFormat[index])) 2040 { 2041 one = As<Float4>(Surface::isUnsignedComponent(state.targetFormat[index], 0) ? Int4(0xFFFFFFFF) : Int4(0x7FFFFFFF)); 2042 } 2043 2044 switch(state.targetFormat[index]) 2045 { 2046 case FORMAT_R32I: 2047 case FORMAT_R32UI: 2048 case FORMAT_R32F: 2049 buffer = cBuffer; 2050 // FIXME: movlps 2051 pixel.x.x = *Pointer<Float>(buffer + 4 * x + 0); 2052 pixel.x.y = *Pointer<Float>(buffer + 4 * x + 4); 2053 buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index])); 2054 // FIXME: movhps 2055 pixel.x.z = *Pointer<Float>(buffer + 4 * x + 0); 2056 pixel.x.w = *Pointer<Float>(buffer + 4 * x + 4); 2057 pixel.y = pixel.z = pixel.w = one; 2058 break; 2059 case FORMAT_G32R32I: 2060 case FORMAT_G32R32UI: 2061 case FORMAT_G32R32F: 2062 buffer = cBuffer; 2063 pixel.x = *Pointer<Float4>(buffer + 8 * x, 16); 2064 buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index])); 2065 pixel.y = *Pointer<Float4>(buffer + 8 * x, 16); 2066 pixel.z = pixel.x; 2067 pixel.x = ShuffleLowHigh(pixel.x, pixel.y, 0x88); 2068 pixel.z = ShuffleLowHigh(pixel.z, pixel.y, 0xDD); 2069 pixel.y = pixel.z; 2070 pixel.z = pixel.w = one; 2071 break; 2072 case FORMAT_X32B32G32R32F: 2073 case FORMAT_A32B32G32R32F: 2074 case FORMAT_A32B32G32R32I: 2075 case FORMAT_A32B32G32R32UI: 2076 buffer = cBuffer; 2077 pixel.x = *Pointer<Float4>(buffer + 16 * x, 16); 2078 pixel.y = *Pointer<Float4>(buffer + 16 * x + 16, 16); 2079 buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index])); 2080 pixel.z = *Pointer<Float4>(buffer + 16 * x, 16); 2081 pixel.w = *Pointer<Float4>(buffer + 16 * x + 16, 16); 2082 transpose4x4(pixel.x, pixel.y, pixel.z, pixel.w); 2083 if(state.targetFormat[index] == FORMAT_X32B32G32R32F) 2084 { 2085 pixel.w = Float4(1.0f); 2086 } 2087 break; 2088 default: 2089 ASSERT(false); 2090 } 2091 2092 if((postBlendSRGB && state.writeSRGB) || isSRGB(index)) 2093 { 2094 sRGBtoLinear(pixel.x); 2095 sRGBtoLinear(pixel.y); 2096 sRGBtoLinear(pixel.z); 2097 } 2098 2099 // Final Color = ObjectColor * SourceBlendFactor + PixelColor * DestinationBlendFactor 2100 Vector4f sourceFactor; 2101 Vector4f destFactor; 2102 2103 blendFactor(sourceFactor, oC, pixel, state.sourceBlendFactor); 2104 blendFactor(destFactor, oC, pixel, state.destBlendFactor); 2105 2106 if(state.sourceBlendFactor != BLEND_ONE && state.sourceBlendFactor != BLEND_ZERO) 2107 { 2108 oC.x *= sourceFactor.x; 2109 oC.y *= sourceFactor.y; 2110 oC.z *= sourceFactor.z; 2111 } 2112 2113 if(state.destBlendFactor != BLEND_ONE && state.destBlendFactor != BLEND_ZERO) 2114 { 2115 pixel.x *= destFactor.x; 2116 pixel.y *= destFactor.y; 2117 pixel.z *= destFactor.z; 2118 } 2119 2120 switch(state.blendOperation) 2121 { 2122 case BLENDOP_ADD: 2123 oC.x += pixel.x; 2124 oC.y += pixel.y; 2125 oC.z += pixel.z; 2126 break; 2127 case BLENDOP_SUB: 2128 oC.x -= pixel.x; 2129 oC.y -= pixel.y; 2130 oC.z -= pixel.z; 2131 break; 2132 case BLENDOP_INVSUB: 2133 oC.x = pixel.x - oC.x; 2134 oC.y = pixel.y - oC.y; 2135 oC.z = pixel.z - oC.z; 2136 break; 2137 case BLENDOP_MIN: 2138 oC.x = Min(oC.x, pixel.x); 2139 oC.y = Min(oC.y, pixel.y); 2140 oC.z = Min(oC.z, pixel.z); 2141 break; 2142 case BLENDOP_MAX: 2143 oC.x = Max(oC.x, pixel.x); 2144 oC.y = Max(oC.y, pixel.y); 2145 oC.z = Max(oC.z, pixel.z); 2146 break; 2147 case BLENDOP_SOURCE: 2148 // No operation 2149 break; 2150 case BLENDOP_DEST: 2151 oC.x = pixel.x; 2152 oC.y = pixel.y; 2153 oC.z = pixel.z; 2154 break; 2155 case BLENDOP_NULL: 2156 oC.x = Float4(0.0f); 2157 oC.y = Float4(0.0f); 2158 oC.z = Float4(0.0f); 2159 break; 2160 default: 2161 ASSERT(false); 2162 } 2163 2164 blendFactorAlpha(sourceFactor, oC, pixel, state.sourceBlendFactorAlpha); 2165 blendFactorAlpha(destFactor, oC, pixel, state.destBlendFactorAlpha); 2166 2167 if(state.sourceBlendFactorAlpha != BLEND_ONE && state.sourceBlendFactorAlpha != BLEND_ZERO) 2168 { 2169 oC.w *= sourceFactor.w; 2170 } 2171 2172 if(state.destBlendFactorAlpha != BLEND_ONE && state.destBlendFactorAlpha != BLEND_ZERO) 2173 { 2174 pixel.w *= destFactor.w; 2175 } 2176 2177 switch(state.blendOperationAlpha) 2178 { 2179 case BLENDOP_ADD: 2180 oC.w += pixel.w; 2181 break; 2182 case BLENDOP_SUB: 2183 oC.w -= pixel.w; 2184 break; 2185 case BLENDOP_INVSUB: 2186 pixel.w -= oC.w; 2187 oC.w = pixel.w; 2188 break; 2189 case BLENDOP_MIN: 2190 oC.w = Min(oC.w, pixel.w); 2191 break; 2192 case BLENDOP_MAX: 2193 oC.w = Max(oC.w, pixel.w); 2194 break; 2195 case BLENDOP_SOURCE: 2196 // No operation 2197 break; 2198 case BLENDOP_DEST: 2199 oC.w = pixel.w; 2200 break; 2201 case BLENDOP_NULL: 2202 oC.w = Float4(0.0f); 2203 break; 2204 default: 2205 ASSERT(false); 2206 } 2207 } 2208 writeColor(int index,Pointer<Byte> & cBuffer,Int & x,Vector4f & oC,Int & sMask,Int & zMask,Int & cMask)2209 void PixelRoutine::writeColor(int index, Pointer<Byte> &cBuffer, Int &x, Vector4f &oC, Int &sMask, Int &zMask, Int &cMask) 2210 { 2211 switch(state.targetFormat[index]) 2212 { 2213 case FORMAT_R32F: 2214 case FORMAT_R32I: 2215 case FORMAT_R32UI: 2216 case FORMAT_R16I: 2217 case FORMAT_R16UI: 2218 case FORMAT_R8I: 2219 case FORMAT_R8UI: 2220 break; 2221 case FORMAT_G32R32F: 2222 case FORMAT_G32R32I: 2223 case FORMAT_G32R32UI: 2224 case FORMAT_G16R16I: 2225 case FORMAT_G16R16UI: 2226 case FORMAT_G8R8I: 2227 case FORMAT_G8R8UI: 2228 oC.z = oC.x; 2229 oC.x = UnpackLow(oC.x, oC.y); 2230 oC.z = UnpackHigh(oC.z, oC.y); 2231 oC.y = oC.z; 2232 break; 2233 case FORMAT_X32B32G32R32F: 2234 case FORMAT_A32B32G32R32F: 2235 case FORMAT_A32B32G32R32I: 2236 case FORMAT_A32B32G32R32UI: 2237 case FORMAT_A16B16G16R16I: 2238 case FORMAT_A16B16G16R16UI: 2239 case FORMAT_A8B8G8R8I: 2240 case FORMAT_A8B8G8R8UI: 2241 transpose4x4(oC.x, oC.y, oC.z, oC.w); 2242 break; 2243 default: 2244 ASSERT(false); 2245 } 2246 2247 int rgbaWriteMask = state.colorWriteActive(index); 2248 2249 Int xMask; // Combination of all masks 2250 2251 if(state.depthTestActive) 2252 { 2253 xMask = zMask; 2254 } 2255 else 2256 { 2257 xMask = cMask; 2258 } 2259 2260 if(state.stencilActive) 2261 { 2262 xMask &= sMask; 2263 } 2264 2265 Pointer<Byte> buffer; 2266 Float4 value; 2267 2268 switch(state.targetFormat[index]) 2269 { 2270 case FORMAT_R32F: 2271 case FORMAT_R32I: 2272 case FORMAT_R32UI: 2273 if(rgbaWriteMask & 0x00000001) 2274 { 2275 buffer = cBuffer + 4 * x; 2276 2277 // FIXME: movlps 2278 value.x = *Pointer<Float>(buffer + 0); 2279 value.y = *Pointer<Float>(buffer + 4); 2280 2281 buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index])); 2282 2283 // FIXME: movhps 2284 value.z = *Pointer<Float>(buffer + 0); 2285 value.w = *Pointer<Float>(buffer + 4); 2286 2287 oC.x = As<Float4>(As<Int4>(oC.x) & *Pointer<Int4>(constants + OFFSET(Constants,maskD4X) + xMask * 16, 16)); 2288 value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskD4X) + xMask * 16, 16)); 2289 oC.x = As<Float4>(As<Int4>(oC.x) | As<Int4>(value)); 2290 2291 // FIXME: movhps 2292 *Pointer<Float>(buffer + 0) = oC.x.z; 2293 *Pointer<Float>(buffer + 4) = oC.x.w; 2294 2295 buffer -= *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index])); 2296 2297 // FIXME: movlps 2298 *Pointer<Float>(buffer + 0) = oC.x.x; 2299 *Pointer<Float>(buffer + 4) = oC.x.y; 2300 } 2301 break; 2302 case FORMAT_R16I: 2303 case FORMAT_R16UI: 2304 if(rgbaWriteMask & 0x00000001) 2305 { 2306 buffer = cBuffer + 2 * x; 2307 2308 UShort4 xyzw; 2309 xyzw = As<UShort4>(Insert(As<Int2>(xyzw), *Pointer<Int>(buffer), 0)); 2310 2311 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); 2312 2313 xyzw = As<UShort4>(Insert(As<Int2>(xyzw), *Pointer<Int>(buffer), 1)); 2314 value = As<Float4>(Int4(xyzw)); 2315 2316 oC.x = As<Float4>(As<Int4>(oC.x) & *Pointer<Int4>(constants + OFFSET(Constants, maskD4X) + xMask * 16, 16)); 2317 value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants, invMaskD4X) + xMask * 16, 16)); 2318 oC.x = As<Float4>(As<Int4>(oC.x) | As<Int4>(value)); 2319 2320 if(state.targetFormat[index] == FORMAT_R16I) 2321 { 2322 Float component = oC.x.z; 2323 *Pointer<Short>(buffer + 0) = Short(As<Int>(component)); 2324 component = oC.x.w; 2325 *Pointer<Short>(buffer + 2) = Short(As<Int>(component)); 2326 2327 buffer -= *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); 2328 2329 component = oC.x.x; 2330 *Pointer<Short>(buffer + 0) = Short(As<Int>(component)); 2331 component = oC.x.y; 2332 *Pointer<Short>(buffer + 2) = Short(As<Int>(component)); 2333 } 2334 else // FORMAT_R16UI 2335 { 2336 Float component = oC.x.z; 2337 *Pointer<UShort>(buffer + 0) = UShort(As<Int>(component)); 2338 component = oC.x.w; 2339 *Pointer<UShort>(buffer + 2) = UShort(As<Int>(component)); 2340 2341 buffer -= *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); 2342 2343 component = oC.x.x; 2344 *Pointer<UShort>(buffer + 0) = UShort(As<Int>(component)); 2345 component = oC.x.y; 2346 *Pointer<UShort>(buffer + 2) = UShort(As<Int>(component)); 2347 } 2348 } 2349 break; 2350 case FORMAT_R8I: 2351 case FORMAT_R8UI: 2352 if(rgbaWriteMask & 0x00000001) 2353 { 2354 buffer = cBuffer + x; 2355 2356 UInt xyzw, packedCol; 2357 2358 xyzw = UInt(*Pointer<UShort>(buffer)) & 0xFFFF; 2359 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); 2360 xyzw |= UInt(*Pointer<UShort>(buffer)) << 16; 2361 2362 Short4 tmpCol = Short4(As<Int4>(oC.x)); 2363 if(state.targetFormat[index] == FORMAT_R8I) 2364 { 2365 tmpCol = As<Short4>(Pack(tmpCol, tmpCol)); 2366 } 2367 else 2368 { 2369 tmpCol = As<Short4>(Pack(As<UShort4>(tmpCol), As<UShort4>(tmpCol))); 2370 } 2371 packedCol = Extract(As<Int2>(tmpCol), 0); 2372 2373 packedCol = (packedCol & *Pointer<UInt>(constants + OFFSET(Constants, maskB4Q) + 8 * xMask)) | 2374 (xyzw & *Pointer<UInt>(constants + OFFSET(Constants, invMaskB4Q) + 8 * xMask)); 2375 2376 *Pointer<UShort>(buffer) = UShort(packedCol >> 16); 2377 buffer -= *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); 2378 *Pointer<UShort>(buffer) = UShort(packedCol); 2379 } 2380 break; 2381 case FORMAT_G32R32F: 2382 case FORMAT_G32R32I: 2383 case FORMAT_G32R32UI: 2384 buffer = cBuffer + 8 * x; 2385 2386 value = *Pointer<Float4>(buffer); 2387 2388 if((rgbaWriteMask & 0x00000003) != 0x00000003) 2389 { 2390 Float4 masked = value; 2391 oC.x = As<Float4>(As<Int4>(oC.x) & *Pointer<Int4>(constants + OFFSET(Constants,maskD01X[rgbaWriteMask & 0x3][0]))); 2392 masked = As<Float4>(As<Int4>(masked) & *Pointer<Int4>(constants + OFFSET(Constants,maskD01X[~rgbaWriteMask & 0x3][0]))); 2393 oC.x = As<Float4>(As<Int4>(oC.x) | As<Int4>(masked)); 2394 } 2395 2396 oC.x = As<Float4>(As<Int4>(oC.x) & *Pointer<Int4>(constants + OFFSET(Constants,maskQ01X) + xMask * 16, 16)); 2397 value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskQ01X) + xMask * 16, 16)); 2398 oC.x = As<Float4>(As<Int4>(oC.x) | As<Int4>(value)); 2399 *Pointer<Float4>(buffer) = oC.x; 2400 2401 buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index])); 2402 2403 value = *Pointer<Float4>(buffer); 2404 2405 if((rgbaWriteMask & 0x00000003) != 0x00000003) 2406 { 2407 Float4 masked; 2408 2409 masked = value; 2410 oC.y = As<Float4>(As<Int4>(oC.y) & *Pointer<Int4>(constants + OFFSET(Constants,maskD01X[rgbaWriteMask & 0x3][0]))); 2411 masked = As<Float4>(As<Int4>(masked) & *Pointer<Int4>(constants + OFFSET(Constants,maskD01X[~rgbaWriteMask & 0x3][0]))); 2412 oC.y = As<Float4>(As<Int4>(oC.y) | As<Int4>(masked)); 2413 } 2414 2415 oC.y = As<Float4>(As<Int4>(oC.y) & *Pointer<Int4>(constants + OFFSET(Constants,maskQ23X) + xMask * 16, 16)); 2416 value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskQ23X) + xMask * 16, 16)); 2417 oC.y = As<Float4>(As<Int4>(oC.y) | As<Int4>(value)); 2418 *Pointer<Float4>(buffer) = oC.y; 2419 break; 2420 case FORMAT_G16R16I: 2421 case FORMAT_G16R16UI: 2422 if((rgbaWriteMask & 0x00000003) != 0x0) 2423 { 2424 buffer = cBuffer + 4 * x; 2425 2426 UInt2 rgbaMask; 2427 UShort4 packedCol = UShort4(As<Int4>(oC.x)); 2428 UShort4 value = *Pointer<UShort4>(buffer); 2429 UInt2 mergedMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskD01Q) + xMask * 8); 2430 if((rgbaWriteMask & 0x3) != 0x3) 2431 { 2432 Int tmpMask = *Pointer<Int>(constants + OFFSET(Constants, maskW4Q[rgbaWriteMask & 0x3][0])); 2433 rgbaMask = As<UInt2>(Int2(tmpMask, tmpMask)); 2434 mergedMask &= rgbaMask; 2435 } 2436 *Pointer<UInt2>(buffer) = (As<UInt2>(packedCol) & mergedMask) | (As<UInt2>(value) & ~mergedMask); 2437 2438 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); 2439 2440 packedCol = UShort4(As<Int4>(oC.y)); 2441 value = *Pointer<UShort4>(buffer); 2442 mergedMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskD23Q) + xMask * 8); 2443 if((rgbaWriteMask & 0x3) != 0x3) 2444 { 2445 mergedMask &= rgbaMask; 2446 } 2447 *Pointer<UInt2>(buffer) = (As<UInt2>(packedCol) & mergedMask) | (As<UInt2>(value) & ~mergedMask); 2448 } 2449 break; 2450 case FORMAT_G8R8I: 2451 case FORMAT_G8R8UI: 2452 if((rgbaWriteMask & 0x00000003) != 0x0) 2453 { 2454 buffer = cBuffer + 2 * x; 2455 2456 Int2 xyzw, packedCol; 2457 2458 xyzw = Insert(xyzw, *Pointer<Int>(buffer), 0); 2459 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); 2460 xyzw = Insert(xyzw, *Pointer<Int>(buffer), 1); 2461 2462 if(state.targetFormat[index] == FORMAT_G8R8I) 2463 { 2464 packedCol = As<Int2>(Pack(Short4(As<Int4>(oC.x)), Short4(As<Int4>(oC.y)))); 2465 } 2466 else 2467 { 2468 packedCol = As<Int2>(Pack(UShort4(As<Int4>(oC.x)), UShort4(As<Int4>(oC.y)))); 2469 } 2470 2471 UInt2 mergedMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskW4Q) + xMask * 8); 2472 if((rgbaWriteMask & 0x3) != 0x3) 2473 { 2474 Int tmpMask = *Pointer<Int>(constants + OFFSET(Constants, maskB4Q[5 * (rgbaWriteMask & 0x3)][0])); 2475 UInt2 rgbaMask = As<UInt2>(Int2(tmpMask, tmpMask)); 2476 mergedMask &= rgbaMask; 2477 } 2478 2479 packedCol = As<Int2>((As<UInt2>(packedCol) & mergedMask) | (As<UInt2>(xyzw) & ~mergedMask)); 2480 2481 *Pointer<UInt>(buffer) = As<UInt>(Extract(packedCol, 1)); 2482 buffer -= *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); 2483 *Pointer<UInt>(buffer) = As<UInt>(Extract(packedCol, 0)); 2484 } 2485 break; 2486 case FORMAT_X32B32G32R32F: 2487 case FORMAT_A32B32G32R32F: 2488 case FORMAT_A32B32G32R32I: 2489 case FORMAT_A32B32G32R32UI: 2490 buffer = cBuffer + 16 * x; 2491 2492 { 2493 value = *Pointer<Float4>(buffer, 16); 2494 2495 if(rgbaWriteMask != 0x0000000F) 2496 { 2497 Float4 masked = value; 2498 oC.x = As<Float4>(As<Int4>(oC.x) & *Pointer<Int4>(constants + OFFSET(Constants,maskD4X[rgbaWriteMask][0]))); 2499 masked = As<Float4>(As<Int4>(masked) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskD4X[rgbaWriteMask][0]))); 2500 oC.x = As<Float4>(As<Int4>(oC.x) | As<Int4>(masked)); 2501 } 2502 2503 oC.x = As<Float4>(As<Int4>(oC.x) & *Pointer<Int4>(constants + OFFSET(Constants,maskX0X) + xMask * 16, 16)); 2504 value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskX0X) + xMask * 16, 16)); 2505 oC.x = As<Float4>(As<Int4>(oC.x) | As<Int4>(value)); 2506 *Pointer<Float4>(buffer, 16) = oC.x; 2507 } 2508 2509 { 2510 value = *Pointer<Float4>(buffer + 16, 16); 2511 2512 if(rgbaWriteMask != 0x0000000F) 2513 { 2514 Float4 masked = value; 2515 oC.y = As<Float4>(As<Int4>(oC.y) & *Pointer<Int4>(constants + OFFSET(Constants,maskD4X[rgbaWriteMask][0]))); 2516 masked = As<Float4>(As<Int4>(masked) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskD4X[rgbaWriteMask][0]))); 2517 oC.y = As<Float4>(As<Int4>(oC.y) | As<Int4>(masked)); 2518 } 2519 2520 oC.y = As<Float4>(As<Int4>(oC.y) & *Pointer<Int4>(constants + OFFSET(Constants,maskX1X) + xMask * 16, 16)); 2521 value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskX1X) + xMask * 16, 16)); 2522 oC.y = As<Float4>(As<Int4>(oC.y) | As<Int4>(value)); 2523 *Pointer<Float4>(buffer + 16, 16) = oC.y; 2524 } 2525 2526 buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index])); 2527 2528 { 2529 value = *Pointer<Float4>(buffer, 16); 2530 2531 if(rgbaWriteMask != 0x0000000F) 2532 { 2533 Float4 masked = value; 2534 oC.z = As<Float4>(As<Int4>(oC.z) & *Pointer<Int4>(constants + OFFSET(Constants,maskD4X[rgbaWriteMask][0]))); 2535 masked = As<Float4>(As<Int4>(masked) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskD4X[rgbaWriteMask][0]))); 2536 oC.z = As<Float4>(As<Int4>(oC.z) | As<Int4>(masked)); 2537 } 2538 2539 oC.z = As<Float4>(As<Int4>(oC.z) & *Pointer<Int4>(constants + OFFSET(Constants,maskX2X) + xMask * 16, 16)); 2540 value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskX2X) + xMask * 16, 16)); 2541 oC.z = As<Float4>(As<Int4>(oC.z) | As<Int4>(value)); 2542 *Pointer<Float4>(buffer, 16) = oC.z; 2543 } 2544 2545 { 2546 value = *Pointer<Float4>(buffer + 16, 16); 2547 2548 if(rgbaWriteMask != 0x0000000F) 2549 { 2550 Float4 masked = value; 2551 oC.w = As<Float4>(As<Int4>(oC.w) & *Pointer<Int4>(constants + OFFSET(Constants,maskD4X[rgbaWriteMask][0]))); 2552 masked = As<Float4>(As<Int4>(masked) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskD4X[rgbaWriteMask][0]))); 2553 oC.w = As<Float4>(As<Int4>(oC.w) | As<Int4>(masked)); 2554 } 2555 2556 oC.w = As<Float4>(As<Int4>(oC.w) & *Pointer<Int4>(constants + OFFSET(Constants,maskX3X) + xMask * 16, 16)); 2557 value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskX3X) + xMask * 16, 16)); 2558 oC.w = As<Float4>(As<Int4>(oC.w) | As<Int4>(value)); 2559 *Pointer<Float4>(buffer + 16, 16) = oC.w; 2560 } 2561 break; 2562 case FORMAT_A16B16G16R16I: 2563 case FORMAT_A16B16G16R16UI: 2564 if((rgbaWriteMask & 0x0000000F) != 0x0) 2565 { 2566 buffer = cBuffer + 8 * x; 2567 2568 UInt4 rgbaMask; 2569 UShort8 value = *Pointer<UShort8>(buffer); 2570 UShort8 packedCol = UShort8(UShort4(As<Int4>(oC.x)), UShort4(As<Int4>(oC.y))); 2571 UInt4 mergedMask = *Pointer<UInt4>(constants + OFFSET(Constants, maskQ01X) + xMask * 16); 2572 if((rgbaWriteMask & 0xF) != 0xF) 2573 { 2574 UInt2 tmpMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskW4Q[rgbaWriteMask][0])); 2575 rgbaMask = UInt4(tmpMask, tmpMask); 2576 mergedMask &= rgbaMask; 2577 } 2578 *Pointer<UInt4>(buffer) = (As<UInt4>(packedCol) & mergedMask) | (As<UInt4>(value) & ~mergedMask); 2579 2580 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); 2581 2582 value = *Pointer<UShort8>(buffer); 2583 packedCol = UShort8(UShort4(As<Int4>(oC.z)), UShort4(As<Int4>(oC.w))); 2584 mergedMask = *Pointer<UInt4>(constants + OFFSET(Constants, maskQ23X) + xMask * 16); 2585 if((rgbaWriteMask & 0xF) != 0xF) 2586 { 2587 mergedMask &= rgbaMask; 2588 } 2589 *Pointer<UInt4>(buffer) = (As<UInt4>(packedCol) & mergedMask) | (As<UInt4>(value) & ~mergedMask); 2590 } 2591 break; 2592 case FORMAT_A8B8G8R8I: 2593 case FORMAT_A8B8G8R8UI: 2594 if((rgbaWriteMask & 0x0000000F) != 0x0) 2595 { 2596 UInt2 value, packedCol, mergedMask; 2597 2598 buffer = cBuffer + 4 * x; 2599 2600 if(state.targetFormat[index] == FORMAT_A8B8G8R8I) 2601 { 2602 packedCol = As<UInt2>(Pack(Short4(As<Int4>(oC.x)), Short4(As<Int4>(oC.y)))); 2603 } 2604 else 2605 { 2606 packedCol = As<UInt2>(Pack(UShort4(As<Int4>(oC.x)), UShort4(As<Int4>(oC.y)))); 2607 } 2608 value = *Pointer<UInt2>(buffer, 16); 2609 mergedMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskD01Q) + xMask * 8); 2610 if(rgbaWriteMask != 0xF) 2611 { 2612 mergedMask &= *Pointer<UInt2>(constants + OFFSET(Constants, maskB4Q[rgbaWriteMask][0])); 2613 } 2614 *Pointer<UInt2>(buffer) = (packedCol & mergedMask) | (value & ~mergedMask); 2615 2616 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); 2617 2618 if(state.targetFormat[index] == FORMAT_A8B8G8R8I) 2619 { 2620 packedCol = As<UInt2>(Pack(Short4(As<Int4>(oC.z)), Short4(As<Int4>(oC.w)))); 2621 } 2622 else 2623 { 2624 packedCol = As<UInt2>(Pack(UShort4(As<Int4>(oC.z)), UShort4(As<Int4>(oC.w)))); 2625 } 2626 value = *Pointer<UInt2>(buffer, 16); 2627 mergedMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskD23Q) + xMask * 8); 2628 if(rgbaWriteMask != 0xF) 2629 { 2630 mergedMask &= *Pointer<UInt2>(constants + OFFSET(Constants, maskB4Q[rgbaWriteMask][0])); 2631 } 2632 *Pointer<UInt2>(buffer) = (packedCol & mergedMask) | (value & ~mergedMask); 2633 } 2634 break; 2635 default: 2636 ASSERT(false); 2637 } 2638 } 2639 convertFixed16(Float4 & cf,bool saturate)2640 UShort4 PixelRoutine::convertFixed16(Float4 &cf, bool saturate) 2641 { 2642 return UShort4(cf * Float4(0xFFFF), saturate); 2643 } 2644 sRGBtoLinear16_12_16(Vector4s & c)2645 void PixelRoutine::sRGBtoLinear16_12_16(Vector4s &c) 2646 { 2647 c.x = As<UShort4>(c.x) >> 4; 2648 c.y = As<UShort4>(c.y) >> 4; 2649 c.z = As<UShort4>(c.z) >> 4; 2650 2651 sRGBtoLinear12_16(c); 2652 } 2653 sRGBtoLinear12_16(Vector4s & c)2654 void PixelRoutine::sRGBtoLinear12_16(Vector4s &c) 2655 { 2656 Pointer<Byte> LUT = constants + OFFSET(Constants,sRGBtoLinear12_16); 2657 2658 c.x = Insert(c.x, *Pointer<Short>(LUT + 2 * Int(Extract(c.x, 0))), 0); 2659 c.x = Insert(c.x, *Pointer<Short>(LUT + 2 * Int(Extract(c.x, 1))), 1); 2660 c.x = Insert(c.x, *Pointer<Short>(LUT + 2 * Int(Extract(c.x, 2))), 2); 2661 c.x = Insert(c.x, *Pointer<Short>(LUT + 2 * Int(Extract(c.x, 3))), 3); 2662 2663 c.y = Insert(c.y, *Pointer<Short>(LUT + 2 * Int(Extract(c.y, 0))), 0); 2664 c.y = Insert(c.y, *Pointer<Short>(LUT + 2 * Int(Extract(c.y, 1))), 1); 2665 c.y = Insert(c.y, *Pointer<Short>(LUT + 2 * Int(Extract(c.y, 2))), 2); 2666 c.y = Insert(c.y, *Pointer<Short>(LUT + 2 * Int(Extract(c.y, 3))), 3); 2667 2668 c.z = Insert(c.z, *Pointer<Short>(LUT + 2 * Int(Extract(c.z, 0))), 0); 2669 c.z = Insert(c.z, *Pointer<Short>(LUT + 2 * Int(Extract(c.z, 1))), 1); 2670 c.z = Insert(c.z, *Pointer<Short>(LUT + 2 * Int(Extract(c.z, 2))), 2); 2671 c.z = Insert(c.z, *Pointer<Short>(LUT + 2 * Int(Extract(c.z, 3))), 3); 2672 } 2673 linearToSRGB16_12_16(Vector4s & c)2674 void PixelRoutine::linearToSRGB16_12_16(Vector4s &c) 2675 { 2676 c.x = As<UShort4>(c.x) >> 4; 2677 c.y = As<UShort4>(c.y) >> 4; 2678 c.z = As<UShort4>(c.z) >> 4; 2679 2680 linearToSRGB12_16(c); 2681 } 2682 linearToSRGB12_16(Vector4s & c)2683 void PixelRoutine::linearToSRGB12_16(Vector4s &c) 2684 { 2685 Pointer<Byte> LUT = constants + OFFSET(Constants,linearToSRGB12_16); 2686 2687 c.x = Insert(c.x, *Pointer<Short>(LUT + 2 * Int(Extract(c.x, 0))), 0); 2688 c.x = Insert(c.x, *Pointer<Short>(LUT + 2 * Int(Extract(c.x, 1))), 1); 2689 c.x = Insert(c.x, *Pointer<Short>(LUT + 2 * Int(Extract(c.x, 2))), 2); 2690 c.x = Insert(c.x, *Pointer<Short>(LUT + 2 * Int(Extract(c.x, 3))), 3); 2691 2692 c.y = Insert(c.y, *Pointer<Short>(LUT + 2 * Int(Extract(c.y, 0))), 0); 2693 c.y = Insert(c.y, *Pointer<Short>(LUT + 2 * Int(Extract(c.y, 1))), 1); 2694 c.y = Insert(c.y, *Pointer<Short>(LUT + 2 * Int(Extract(c.y, 2))), 2); 2695 c.y = Insert(c.y, *Pointer<Short>(LUT + 2 * Int(Extract(c.y, 3))), 3); 2696 2697 c.z = Insert(c.z, *Pointer<Short>(LUT + 2 * Int(Extract(c.z, 0))), 0); 2698 c.z = Insert(c.z, *Pointer<Short>(LUT + 2 * Int(Extract(c.z, 1))), 1); 2699 c.z = Insert(c.z, *Pointer<Short>(LUT + 2 * Int(Extract(c.z, 2))), 2); 2700 c.z = Insert(c.z, *Pointer<Short>(LUT + 2 * Int(Extract(c.z, 3))), 3); 2701 } 2702 sRGBtoLinear(const Float4 & x)2703 Float4 PixelRoutine::sRGBtoLinear(const Float4 &x) // Approximates x^2.2 2704 { 2705 Float4 linear = x * x; 2706 linear = linear * Float4(0.73f) + linear * x * Float4(0.27f); 2707 2708 return Min(Max(linear, Float4(0.0f)), Float4(1.0f)); 2709 } 2710 colorUsed()2711 bool PixelRoutine::colorUsed() 2712 { 2713 return state.colorWriteMask || state.alphaTestActive() || state.shaderContainsKill; 2714 } 2715 } 2716