1 // Copyright 2016 The SwiftShader Authors. All Rights Reserved. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #include "PixelPipeline.hpp" 16 #include "SamplerCore.hpp" 17 #include "Renderer/Renderer.hpp" 18 19 namespace sw 20 { 21 extern bool postBlendSRGB; 22 setBuiltins(Int & x,Int & y,Float4 (& z)[4],Float4 & w)23 void PixelPipeline::setBuiltins(Int &x, Int &y, Float4(&z)[4], Float4 &w) 24 { 25 if(state.color[0].component & 0x1) diffuse.x = convertFixed12(v[0].x); else diffuse.x = Short4(0x1000); 26 if(state.color[0].component & 0x2) diffuse.y = convertFixed12(v[0].y); else diffuse.y = Short4(0x1000); 27 if(state.color[0].component & 0x4) diffuse.z = convertFixed12(v[0].z); else diffuse.z = Short4(0x1000); 28 if(state.color[0].component & 0x8) diffuse.w = convertFixed12(v[0].w); else diffuse.w = Short4(0x1000); 29 30 if(state.color[1].component & 0x1) specular.x = convertFixed12(v[1].x); else specular.x = Short4(0x0000); 31 if(state.color[1].component & 0x2) specular.y = convertFixed12(v[1].y); else specular.y = Short4(0x0000); 32 if(state.color[1].component & 0x4) specular.z = convertFixed12(v[1].z); else specular.z = Short4(0x0000); 33 if(state.color[1].component & 0x8) specular.w = convertFixed12(v[1].w); else specular.w = Short4(0x0000); 34 } 35 fixedFunction()36 void PixelPipeline::fixedFunction() 37 { 38 current = diffuse; 39 Vector4s temp(0x0000, 0x0000, 0x0000, 0x0000); 40 41 for(int stage = 0; stage < 8; stage++) 42 { 43 if(state.textureStage[stage].stageOperation == TextureStage::STAGE_DISABLE) 44 { 45 break; 46 } 47 48 Vector4s texture; 49 50 if(state.textureStage[stage].usesTexture) 51 { 52 texture = sampleTexture(stage, stage); 53 } 54 55 blendTexture(temp, texture, stage); 56 } 57 58 specularPixel(current, specular); 59 } 60 applyShader(Int cMask[4])61 void PixelPipeline::applyShader(Int cMask[4]) 62 { 63 if(!shader) 64 { 65 fixedFunction(); 66 return; 67 } 68 69 int pad = 0; // Count number of texm3x3pad instructions 70 Vector4s dPairing; // Destination for first pairing instruction 71 72 for(size_t i = 0; i < shader->getLength(); i++) 73 { 74 const Shader::Instruction *instruction = shader->getInstruction(i); 75 Shader::Opcode opcode = instruction->opcode; 76 77 // #ifndef NDEBUG // FIXME: Centralize debug output control 78 // shader->printInstruction(i, "debug.txt"); 79 // #endif 80 81 if(opcode == Shader::OPCODE_DCL || opcode == Shader::OPCODE_DEF || opcode == Shader::OPCODE_DEFI || opcode == Shader::OPCODE_DEFB) 82 { 83 continue; 84 } 85 86 const Dst &dst = instruction->dst; 87 const Src &src0 = instruction->src[0]; 88 const Src &src1 = instruction->src[1]; 89 const Src &src2 = instruction->src[2]; 90 91 unsigned short shaderModel = shader->getShaderModel(); 92 bool pairing = i + 1 < shader->getLength() && shader->getInstruction(i + 1)->coissue; // First instruction of pair 93 bool coissue = instruction->coissue; // Second instruction of pair 94 95 Vector4s d; 96 Vector4s s0; 97 Vector4s s1; 98 Vector4s s2; 99 100 if(src0.type != Shader::PARAMETER_VOID) s0 = fetchRegister(src0); 101 if(src1.type != Shader::PARAMETER_VOID) s1 = fetchRegister(src1); 102 if(src2.type != Shader::PARAMETER_VOID) s2 = fetchRegister(src2); 103 104 Float4 x = shaderModel < 0x0104 ? v[2 + dst.index].x : v[2 + src0.index].x; 105 Float4 y = shaderModel < 0x0104 ? v[2 + dst.index].y : v[2 + src0.index].y; 106 Float4 z = shaderModel < 0x0104 ? v[2 + dst.index].z : v[2 + src0.index].z; 107 Float4 w = shaderModel < 0x0104 ? v[2 + dst.index].w : v[2 + src0.index].w; 108 109 switch(opcode) 110 { 111 case Shader::OPCODE_PS_1_0: break; 112 case Shader::OPCODE_PS_1_1: break; 113 case Shader::OPCODE_PS_1_2: break; 114 case Shader::OPCODE_PS_1_3: break; 115 case Shader::OPCODE_PS_1_4: break; 116 117 case Shader::OPCODE_DEF: break; 118 119 case Shader::OPCODE_NOP: break; 120 case Shader::OPCODE_MOV: MOV(d, s0); break; 121 case Shader::OPCODE_ADD: ADD(d, s0, s1); break; 122 case Shader::OPCODE_SUB: SUB(d, s0, s1); break; 123 case Shader::OPCODE_MAD: MAD(d, s0, s1, s2); break; 124 case Shader::OPCODE_MUL: MUL(d, s0, s1); break; 125 case Shader::OPCODE_DP3: DP3(d, s0, s1); break; 126 case Shader::OPCODE_DP4: DP4(d, s0, s1); break; 127 case Shader::OPCODE_LRP: LRP(d, s0, s1, s2); break; 128 case Shader::OPCODE_TEXCOORD: 129 if(shaderModel < 0x0104) 130 { 131 TEXCOORD(d, x, y, z, dst.index); 132 } 133 else 134 { 135 if((src0.swizzle & 0x30) == 0x20) // .xyz 136 { 137 TEXCRD(d, x, y, z, src0.index, src0.modifier == Shader::MODIFIER_DZ || src0.modifier == Shader::MODIFIER_DW); 138 } 139 else // .xwy 140 { 141 TEXCRD(d, x, y, w, src0.index, src0.modifier == Shader::MODIFIER_DZ || src0.modifier == Shader::MODIFIER_DW); 142 } 143 } 144 break; 145 case Shader::OPCODE_TEXKILL: 146 if(shaderModel < 0x0104) 147 { 148 TEXKILL(cMask, x, y, z); 149 } 150 else if(shaderModel == 0x0104) 151 { 152 if(dst.type == Shader::PARAMETER_TEXTURE) 153 { 154 TEXKILL(cMask, x, y, z); 155 } 156 else 157 { 158 TEXKILL(cMask, rs[dst.index]); 159 } 160 } 161 else ASSERT(false); 162 break; 163 case Shader::OPCODE_TEX: 164 if(shaderModel < 0x0104) 165 { 166 TEX(d, x, y, z, dst.index, false); 167 } 168 else if(shaderModel == 0x0104) 169 { 170 if(src0.type == Shader::PARAMETER_TEXTURE) 171 { 172 if((src0.swizzle & 0x30) == 0x20) // .xyz 173 { 174 TEX(d, x, y, z, dst.index, src0.modifier == Shader::MODIFIER_DZ || src0.modifier == Shader::MODIFIER_DW); 175 } 176 else // .xyw 177 { 178 TEX(d, x, y, w, dst.index, src0.modifier == Shader::MODIFIER_DZ || src0.modifier == Shader::MODIFIER_DW); 179 } 180 } 181 else 182 { 183 TEXLD(d, s0, dst.index, src0.modifier == Shader::MODIFIER_DZ || src0.modifier == Shader::MODIFIER_DW); 184 } 185 } 186 else ASSERT(false); 187 break; 188 case Shader::OPCODE_TEXBEM: TEXBEM(d, s0, x, y, z, dst.index); break; 189 case Shader::OPCODE_TEXBEML: TEXBEML(d, s0, x, y, z, dst.index); break; 190 case Shader::OPCODE_TEXREG2AR: TEXREG2AR(d, s0, dst.index); break; 191 case Shader::OPCODE_TEXREG2GB: TEXREG2GB(d, s0, dst.index); break; 192 case Shader::OPCODE_TEXM3X2PAD: TEXM3X2PAD(x, y, z, s0, 0, src0.modifier == Shader::MODIFIER_SIGN); break; 193 case Shader::OPCODE_TEXM3X2TEX: TEXM3X2TEX(d, x, y, z, dst.index, s0, src0.modifier == Shader::MODIFIER_SIGN); break; 194 case Shader::OPCODE_TEXM3X3PAD: TEXM3X3PAD(x, y, z, s0, pad++ % 2, src0.modifier == Shader::MODIFIER_SIGN); break; 195 case Shader::OPCODE_TEXM3X3TEX: TEXM3X3TEX(d, x, y, z, dst.index, s0, src0.modifier == Shader::MODIFIER_SIGN); break; 196 case Shader::OPCODE_TEXM3X3SPEC: TEXM3X3SPEC(d, x, y, z, dst.index, s0, s1); break; 197 case Shader::OPCODE_TEXM3X3VSPEC: TEXM3X3VSPEC(d, x, y, z, dst.index, s0); break; 198 case Shader::OPCODE_CND: CND(d, s0, s1, s2); break; 199 case Shader::OPCODE_TEXREG2RGB: TEXREG2RGB(d, s0, dst.index); break; 200 case Shader::OPCODE_TEXDP3TEX: TEXDP3TEX(d, x, y, z, dst.index, s0); break; 201 case Shader::OPCODE_TEXM3X2DEPTH: TEXM3X2DEPTH(d, x, y, z, s0, src0.modifier == Shader::MODIFIER_SIGN); break; 202 case Shader::OPCODE_TEXDP3: TEXDP3(d, x, y, z, s0); break; 203 case Shader::OPCODE_TEXM3X3: TEXM3X3(d, x, y, z, s0, src0.modifier == Shader::MODIFIER_SIGN); break; 204 case Shader::OPCODE_TEXDEPTH: TEXDEPTH(); break; 205 case Shader::OPCODE_CMP0: CMP(d, s0, s1, s2); break; 206 case Shader::OPCODE_BEM: BEM(d, s0, s1, dst.index); break; 207 case Shader::OPCODE_PHASE: break; 208 case Shader::OPCODE_END: break; 209 default: 210 ASSERT(false); 211 } 212 213 if(dst.type != Shader::PARAMETER_VOID && opcode != Shader::OPCODE_TEXKILL) 214 { 215 if(dst.shift > 0) 216 { 217 if(dst.mask & 0x1) { d.x = AddSat(d.x, d.x); if(dst.shift > 1) d.x = AddSat(d.x, d.x); if(dst.shift > 2) d.x = AddSat(d.x, d.x); } 218 if(dst.mask & 0x2) { d.y = AddSat(d.y, d.y); if(dst.shift > 1) d.y = AddSat(d.y, d.y); if(dst.shift > 2) d.y = AddSat(d.y, d.y); } 219 if(dst.mask & 0x4) { d.z = AddSat(d.z, d.z); if(dst.shift > 1) d.z = AddSat(d.z, d.z); if(dst.shift > 2) d.z = AddSat(d.z, d.z); } 220 if(dst.mask & 0x8) { d.w = AddSat(d.w, d.w); if(dst.shift > 1) d.w = AddSat(d.w, d.w); if(dst.shift > 2) d.w = AddSat(d.w, d.w); } 221 } 222 else if(dst.shift < 0) 223 { 224 if(dst.mask & 0x1) d.x = d.x >> -dst.shift; 225 if(dst.mask & 0x2) d.y = d.y >> -dst.shift; 226 if(dst.mask & 0x4) d.z = d.z >> -dst.shift; 227 if(dst.mask & 0x8) d.w = d.w >> -dst.shift; 228 } 229 230 if(dst.saturate) 231 { 232 if(dst.mask & 0x1) { d.x = Min(d.x, Short4(0x1000)); d.x = Max(d.x, Short4(0x0000)); } 233 if(dst.mask & 0x2) { d.y = Min(d.y, Short4(0x1000)); d.y = Max(d.y, Short4(0x0000)); } 234 if(dst.mask & 0x4) { d.z = Min(d.z, Short4(0x1000)); d.z = Max(d.z, Short4(0x0000)); } 235 if(dst.mask & 0x8) { d.w = Min(d.w, Short4(0x1000)); d.w = Max(d.w, Short4(0x0000)); } 236 } 237 238 if(pairing) 239 { 240 if(dst.mask & 0x1) dPairing.x = d.x; 241 if(dst.mask & 0x2) dPairing.y = d.y; 242 if(dst.mask & 0x4) dPairing.z = d.z; 243 if(dst.mask & 0x8) dPairing.w = d.w; 244 } 245 246 if(coissue) 247 { 248 const Dst &dst = shader->getInstruction(i - 1)->dst; 249 250 writeDestination(dPairing, dst); 251 } 252 253 if(!pairing) 254 { 255 writeDestination(d, dst); 256 } 257 } 258 } 259 260 current.x = Min(current.x, Short4(0x0FFF)); current.x = Max(current.x, Short4(0x0000)); 261 current.y = Min(current.y, Short4(0x0FFF)); current.y = Max(current.y, Short4(0x0000)); 262 current.z = Min(current.z, Short4(0x0FFF)); current.z = Max(current.z, Short4(0x0000)); 263 current.w = Min(current.w, Short4(0x0FFF)); current.w = Max(current.w, Short4(0x0000)); 264 } 265 alphaTest(Int cMask[4])266 Bool PixelPipeline::alphaTest(Int cMask[4]) 267 { 268 if(!state.alphaTestActive()) 269 { 270 return true; 271 } 272 273 Int aMask; 274 275 if(state.transparencyAntialiasing == TRANSPARENCY_NONE) 276 { 277 PixelRoutine::alphaTest(aMask, current.w); 278 279 for(unsigned int q = 0; q < state.multiSample; q++) 280 { 281 cMask[q] &= aMask; 282 } 283 } 284 else if(state.transparencyAntialiasing == TRANSPARENCY_ALPHA_TO_COVERAGE) 285 { 286 Float4 alpha = Float4(current.w) * Float4(1.0f / 0x1000); 287 288 alphaToCoverage(cMask, alpha); 289 } 290 else ASSERT(false); 291 292 Int pass = cMask[0]; 293 294 for(unsigned int q = 1; q < state.multiSample; q++) 295 { 296 pass = pass | cMask[q]; 297 } 298 299 return pass != 0x0; 300 } 301 rasterOperation(Float4 & fog,Pointer<Byte> cBuffer[4],Int & x,Int sMask[4],Int zMask[4],Int cMask[4])302 void PixelPipeline::rasterOperation(Float4 &fog, Pointer<Byte> cBuffer[4], Int &x, Int sMask[4], Int zMask[4], Int cMask[4]) 303 { 304 if(!state.colorWriteActive(0)) 305 { 306 return; 307 } 308 309 Vector4f oC; 310 311 switch(state.targetFormat[0]) 312 { 313 case FORMAT_R5G6B5: 314 case FORMAT_X8R8G8B8: 315 case FORMAT_X8B8G8R8: 316 case FORMAT_A8R8G8B8: 317 case FORMAT_A8B8G8R8: 318 case FORMAT_A8: 319 case FORMAT_G16R16: 320 case FORMAT_A16B16G16R16: 321 if(!postBlendSRGB && state.writeSRGB) 322 { 323 linearToSRGB12_16(current); 324 } 325 else 326 { 327 current.x <<= 4; 328 current.y <<= 4; 329 current.z <<= 4; 330 current.w <<= 4; 331 } 332 333 if(state.targetFormat[0] == FORMAT_R5G6B5) 334 { 335 current.x &= Short4(0xF800u); 336 current.y &= Short4(0xFC00u); 337 current.z &= Short4(0xF800u); 338 } 339 340 fogBlend(current, fog); 341 342 for(unsigned int q = 0; q < state.multiSample; q++) 343 { 344 Pointer<Byte> buffer = cBuffer[0] + q * *Pointer<Int>(data + OFFSET(DrawData, colorSliceB[0])); 345 Vector4s color = current; 346 347 if(state.multiSampleMask & (1 << q)) 348 { 349 alphaBlend(0, buffer, color, x); 350 logicOperation(0, buffer, color, x); 351 writeColor(0, buffer, x, color, sMask[q], zMask[q], cMask[q]); 352 } 353 } 354 break; 355 case FORMAT_R32F: 356 case FORMAT_G32R32F: 357 case FORMAT_X32B32G32R32F: 358 case FORMAT_A32B32G32R32F: 359 // case FORMAT_X32B32G32R32F_UNSIGNED: // Not renderable in any fixed-function API. 360 convertSigned12(oC, current); 361 PixelRoutine::fogBlend(oC, fog); 362 363 for(unsigned int q = 0; q < state.multiSample; q++) 364 { 365 Pointer<Byte> buffer = cBuffer[0] + q * *Pointer<Int>(data + OFFSET(DrawData, colorSliceB[0])); 366 Vector4f color = oC; 367 368 if(state.multiSampleMask & (1 << q)) 369 { 370 alphaBlend(0, buffer, color, x); 371 writeColor(0, buffer, x, color, sMask[q], zMask[q], cMask[q]); 372 } 373 } 374 break; 375 default: 376 ASSERT(false); 377 } 378 } 379 blendTexture(Vector4s & temp,Vector4s & texture,int stage)380 void PixelPipeline::blendTexture(Vector4s &temp, Vector4s &texture, int stage) 381 { 382 Vector4s *arg1 = nullptr; 383 Vector4s *arg2 = nullptr; 384 Vector4s *arg3 = nullptr; 385 Vector4s res; 386 387 Vector4s constant; 388 Vector4s tfactor; 389 390 const TextureStage::State &textureStage = state.textureStage[stage]; 391 392 if(textureStage.firstArgument == TextureStage::SOURCE_CONSTANT || 393 textureStage.firstArgumentAlpha == TextureStage::SOURCE_CONSTANT || 394 textureStage.secondArgument == TextureStage::SOURCE_CONSTANT || 395 textureStage.secondArgumentAlpha == TextureStage::SOURCE_CONSTANT || 396 textureStage.thirdArgument == TextureStage::SOURCE_CONSTANT || 397 textureStage.thirdArgumentAlpha == TextureStage::SOURCE_CONSTANT) 398 { 399 constant.x = *Pointer<Short4>(data + OFFSET(DrawData, textureStage[stage].constantColor4[0])); 400 constant.y = *Pointer<Short4>(data + OFFSET(DrawData, textureStage[stage].constantColor4[1])); 401 constant.z = *Pointer<Short4>(data + OFFSET(DrawData, textureStage[stage].constantColor4[2])); 402 constant.w = *Pointer<Short4>(data + OFFSET(DrawData, textureStage[stage].constantColor4[3])); 403 } 404 405 if(textureStage.firstArgument == TextureStage::SOURCE_TFACTOR || 406 textureStage.firstArgumentAlpha == TextureStage::SOURCE_TFACTOR || 407 textureStage.secondArgument == TextureStage::SOURCE_TFACTOR || 408 textureStage.secondArgumentAlpha == TextureStage::SOURCE_TFACTOR || 409 textureStage.thirdArgument == TextureStage::SOURCE_TFACTOR || 410 textureStage.thirdArgumentAlpha == TextureStage::SOURCE_TFACTOR) 411 { 412 tfactor.x = *Pointer<Short4>(data + OFFSET(DrawData, factor.textureFactor4[0])); 413 tfactor.y = *Pointer<Short4>(data + OFFSET(DrawData, factor.textureFactor4[1])); 414 tfactor.z = *Pointer<Short4>(data + OFFSET(DrawData, factor.textureFactor4[2])); 415 tfactor.w = *Pointer<Short4>(data + OFFSET(DrawData, factor.textureFactor4[3])); 416 } 417 418 // Premodulate 419 if(stage > 0 && textureStage.usesTexture) 420 { 421 if(state.textureStage[stage - 1].stageOperation == TextureStage::STAGE_PREMODULATE) 422 { 423 current.x = MulHigh(current.x, texture.x) << 4; 424 current.y = MulHigh(current.y, texture.y) << 4; 425 current.z = MulHigh(current.z, texture.z) << 4; 426 } 427 428 if(state.textureStage[stage - 1].stageOperationAlpha == TextureStage::STAGE_PREMODULATE) 429 { 430 current.w = MulHigh(current.w, texture.w) << 4; 431 } 432 } 433 434 if(luminance) 435 { 436 texture.x = MulHigh(texture.x, L) << 4; 437 texture.y = MulHigh(texture.y, L) << 4; 438 texture.z = MulHigh(texture.z, L) << 4; 439 440 luminance = false; 441 } 442 443 switch(textureStage.firstArgument) 444 { 445 case TextureStage::SOURCE_TEXTURE: arg1 = &texture; break; 446 case TextureStage::SOURCE_CONSTANT: arg1 = &constant; break; 447 case TextureStage::SOURCE_CURRENT: arg1 = ¤t; break; 448 case TextureStage::SOURCE_DIFFUSE: arg1 = &diffuse; break; 449 case TextureStage::SOURCE_SPECULAR: arg1 = &specular; break; 450 case TextureStage::SOURCE_TEMP: arg1 = &temp; break; 451 case TextureStage::SOURCE_TFACTOR: arg1 = &tfactor; break; 452 default: 453 ASSERT(false); 454 } 455 456 switch(textureStage.secondArgument) 457 { 458 case TextureStage::SOURCE_TEXTURE: arg2 = &texture; break; 459 case TextureStage::SOURCE_CONSTANT: arg2 = &constant; break; 460 case TextureStage::SOURCE_CURRENT: arg2 = ¤t; break; 461 case TextureStage::SOURCE_DIFFUSE: arg2 = &diffuse; break; 462 case TextureStage::SOURCE_SPECULAR: arg2 = &specular; break; 463 case TextureStage::SOURCE_TEMP: arg2 = &temp; break; 464 case TextureStage::SOURCE_TFACTOR: arg2 = &tfactor; break; 465 default: 466 ASSERT(false); 467 } 468 469 switch(textureStage.thirdArgument) 470 { 471 case TextureStage::SOURCE_TEXTURE: arg3 = &texture; break; 472 case TextureStage::SOURCE_CONSTANT: arg3 = &constant; break; 473 case TextureStage::SOURCE_CURRENT: arg3 = ¤t; break; 474 case TextureStage::SOURCE_DIFFUSE: arg3 = &diffuse; break; 475 case TextureStage::SOURCE_SPECULAR: arg3 = &specular; break; 476 case TextureStage::SOURCE_TEMP: arg3 = &temp; break; 477 case TextureStage::SOURCE_TFACTOR: arg3 = &tfactor; break; 478 default: 479 ASSERT(false); 480 } 481 482 Vector4s mod1; 483 Vector4s mod2; 484 Vector4s mod3; 485 486 switch(textureStage.firstModifier) 487 { 488 case TextureStage::MODIFIER_COLOR: 489 break; 490 case TextureStage::MODIFIER_INVCOLOR: 491 mod1.x = SubSat(Short4(0x1000), arg1->x); 492 mod1.y = SubSat(Short4(0x1000), arg1->y); 493 mod1.z = SubSat(Short4(0x1000), arg1->z); 494 mod1.w = SubSat(Short4(0x1000), arg1->w); 495 496 arg1 = &mod1; 497 break; 498 case TextureStage::MODIFIER_ALPHA: 499 mod1.x = arg1->w; 500 mod1.y = arg1->w; 501 mod1.z = arg1->w; 502 mod1.w = arg1->w; 503 504 arg1 = &mod1; 505 break; 506 case TextureStage::MODIFIER_INVALPHA: 507 mod1.x = SubSat(Short4(0x1000), arg1->w); 508 mod1.y = SubSat(Short4(0x1000), arg1->w); 509 mod1.z = SubSat(Short4(0x1000), arg1->w); 510 mod1.w = SubSat(Short4(0x1000), arg1->w); 511 512 arg1 = &mod1; 513 break; 514 default: 515 ASSERT(false); 516 } 517 518 switch(textureStage.secondModifier) 519 { 520 case TextureStage::MODIFIER_COLOR: 521 break; 522 case TextureStage::MODIFIER_INVCOLOR: 523 mod2.x = SubSat(Short4(0x1000), arg2->x); 524 mod2.y = SubSat(Short4(0x1000), arg2->y); 525 mod2.z = SubSat(Short4(0x1000), arg2->z); 526 mod2.w = SubSat(Short4(0x1000), arg2->w); 527 528 arg2 = &mod2; 529 break; 530 case TextureStage::MODIFIER_ALPHA: 531 mod2.x = arg2->w; 532 mod2.y = arg2->w; 533 mod2.z = arg2->w; 534 mod2.w = arg2->w; 535 536 arg2 = &mod2; 537 break; 538 case TextureStage::MODIFIER_INVALPHA: 539 mod2.x = SubSat(Short4(0x1000), arg2->w); 540 mod2.y = SubSat(Short4(0x1000), arg2->w); 541 mod2.z = SubSat(Short4(0x1000), arg2->w); 542 mod2.w = SubSat(Short4(0x1000), arg2->w); 543 544 arg2 = &mod2; 545 break; 546 default: 547 ASSERT(false); 548 } 549 550 switch(textureStage.thirdModifier) 551 { 552 case TextureStage::MODIFIER_COLOR: 553 break; 554 case TextureStage::MODIFIER_INVCOLOR: 555 mod3.x = SubSat(Short4(0x1000), arg3->x); 556 mod3.y = SubSat(Short4(0x1000), arg3->y); 557 mod3.z = SubSat(Short4(0x1000), arg3->z); 558 mod3.w = SubSat(Short4(0x1000), arg3->w); 559 560 arg3 = &mod3; 561 break; 562 case TextureStage::MODIFIER_ALPHA: 563 mod3.x = arg3->w; 564 mod3.y = arg3->w; 565 mod3.z = arg3->w; 566 mod3.w = arg3->w; 567 568 arg3 = &mod3; 569 break; 570 case TextureStage::MODIFIER_INVALPHA: 571 mod3.x = SubSat(Short4(0x1000), arg3->w); 572 mod3.y = SubSat(Short4(0x1000), arg3->w); 573 mod3.z = SubSat(Short4(0x1000), arg3->w); 574 mod3.w = SubSat(Short4(0x1000), arg3->w); 575 576 arg3 = &mod3; 577 break; 578 default: 579 ASSERT(false); 580 } 581 582 switch(textureStage.stageOperation) 583 { 584 case TextureStage::STAGE_DISABLE: 585 break; 586 case TextureStage::STAGE_SELECTARG1: // Arg1 587 res.x = arg1->x; 588 res.y = arg1->y; 589 res.z = arg1->z; 590 break; 591 case TextureStage::STAGE_SELECTARG2: // Arg2 592 res.x = arg2->x; 593 res.y = arg2->y; 594 res.z = arg2->z; 595 break; 596 case TextureStage::STAGE_SELECTARG3: // Arg3 597 res.x = arg3->x; 598 res.y = arg3->y; 599 res.z = arg3->z; 600 break; 601 case TextureStage::STAGE_MODULATE: // Arg1 * Arg2 602 res.x = MulHigh(arg1->x, arg2->x) << 4; 603 res.y = MulHigh(arg1->y, arg2->y) << 4; 604 res.z = MulHigh(arg1->z, arg2->z) << 4; 605 break; 606 case TextureStage::STAGE_MODULATE2X: // Arg1 * Arg2 * 2 607 res.x = MulHigh(arg1->x, arg2->x) << 5; 608 res.y = MulHigh(arg1->y, arg2->y) << 5; 609 res.z = MulHigh(arg1->z, arg2->z) << 5; 610 break; 611 case TextureStage::STAGE_MODULATE4X: // Arg1 * Arg2 * 4 612 res.x = MulHigh(arg1->x, arg2->x) << 6; 613 res.y = MulHigh(arg1->y, arg2->y) << 6; 614 res.z = MulHigh(arg1->z, arg2->z) << 6; 615 break; 616 case TextureStage::STAGE_ADD: // Arg1 + Arg2 617 res.x = AddSat(arg1->x, arg2->x); 618 res.y = AddSat(arg1->y, arg2->y); 619 res.z = AddSat(arg1->z, arg2->z); 620 break; 621 case TextureStage::STAGE_ADDSIGNED: // Arg1 + Arg2 - 0.5 622 res.x = AddSat(arg1->x, arg2->x); 623 res.y = AddSat(arg1->y, arg2->y); 624 res.z = AddSat(arg1->z, arg2->z); 625 626 res.x = SubSat(res.x, Short4(0x0800)); 627 res.y = SubSat(res.y, Short4(0x0800)); 628 res.z = SubSat(res.z, Short4(0x0800)); 629 break; 630 case TextureStage::STAGE_ADDSIGNED2X: // (Arg1 + Arg2 - 0.5) << 1 631 res.x = AddSat(arg1->x, arg2->x); 632 res.y = AddSat(arg1->y, arg2->y); 633 res.z = AddSat(arg1->z, arg2->z); 634 635 res.x = SubSat(res.x, Short4(0x0800)); 636 res.y = SubSat(res.y, Short4(0x0800)); 637 res.z = SubSat(res.z, Short4(0x0800)); 638 639 res.x = AddSat(res.x, res.x); 640 res.y = AddSat(res.y, res.y); 641 res.z = AddSat(res.z, res.z); 642 break; 643 case TextureStage::STAGE_SUBTRACT: // Arg1 - Arg2 644 res.x = SubSat(arg1->x, arg2->x); 645 res.y = SubSat(arg1->y, arg2->y); 646 res.z = SubSat(arg1->z, arg2->z); 647 break; 648 case TextureStage::STAGE_ADDSMOOTH: // Arg1 + Arg2 - Arg1 * Arg2 649 { 650 Short4 tmp; 651 652 tmp = MulHigh(arg1->x, arg2->x) << 4; res.x = AddSat(arg1->x, arg2->x); res.x = SubSat(res.x, tmp); 653 tmp = MulHigh(arg1->y, arg2->y) << 4; res.y = AddSat(arg1->y, arg2->y); res.y = SubSat(res.y, tmp); 654 tmp = MulHigh(arg1->z, arg2->z) << 4; res.z = AddSat(arg1->z, arg2->z); res.z = SubSat(res.z, tmp); 655 } 656 break; 657 case TextureStage::STAGE_MULTIPLYADD: // Arg3 + Arg1 * Arg2 658 res.x = MulHigh(arg1->x, arg2->x) << 4; res.x = AddSat(res.x, arg3->x); 659 res.y = MulHigh(arg1->y, arg2->y) << 4; res.y = AddSat(res.y, arg3->y); 660 res.z = MulHigh(arg1->z, arg2->z) << 4; res.z = AddSat(res.z, arg3->z); 661 break; 662 case TextureStage::STAGE_LERP: // Arg3 * (Arg1 - Arg2) + Arg2 663 res.x = SubSat(arg1->x, arg2->x); res.x = MulHigh(res.x, arg3->x) << 4; res.x = AddSat(res.x, arg2->x); 664 res.y = SubSat(arg1->y, arg2->y); res.y = MulHigh(res.y, arg3->y) << 4; res.y = AddSat(res.y, arg2->y); 665 res.z = SubSat(arg1->z, arg2->z); res.z = MulHigh(res.z, arg3->z) << 4; res.z = AddSat(res.z, arg2->z); 666 break; 667 case TextureStage::STAGE_DOT3: // 2 * (Arg1.x - 0.5) * 2 * (Arg2.x - 0.5) + 2 * (Arg1.y - 0.5) * 2 * (Arg2.y - 0.5) + 2 * (Arg1.z - 0.5) * 2 * (Arg2.z - 0.5) 668 { 669 Short4 tmp; 670 671 res.x = SubSat(arg1->x, Short4(0x0800)); tmp = SubSat(arg2->x, Short4(0x0800)); res.x = MulHigh(res.x, tmp); 672 res.y = SubSat(arg1->y, Short4(0x0800)); tmp = SubSat(arg2->y, Short4(0x0800)); res.y = MulHigh(res.y, tmp); 673 res.z = SubSat(arg1->z, Short4(0x0800)); tmp = SubSat(arg2->z, Short4(0x0800)); res.z = MulHigh(res.z, tmp); 674 675 res.x = res.x << 6; 676 res.y = res.y << 6; 677 res.z = res.z << 6; 678 679 res.x = AddSat(res.x, res.y); 680 res.x = AddSat(res.x, res.z); 681 682 // Clamp to [0, 1] 683 res.x = Max(res.x, Short4(0x0000)); 684 res.x = Min(res.x, Short4(0x1000)); 685 686 res.y = res.x; 687 res.z = res.x; 688 res.w = res.x; 689 } 690 break; 691 case TextureStage::STAGE_BLENDCURRENTALPHA: // Alpha * (Arg1 - Arg2) + Arg2 692 res.x = SubSat(arg1->x, arg2->x); res.x = MulHigh(res.x, current.w) << 4; res.x = AddSat(res.x, arg2->x); 693 res.y = SubSat(arg1->y, arg2->y); res.y = MulHigh(res.y, current.w) << 4; res.y = AddSat(res.y, arg2->y); 694 res.z = SubSat(arg1->z, arg2->z); res.z = MulHigh(res.z, current.w) << 4; res.z = AddSat(res.z, arg2->z); 695 break; 696 case TextureStage::STAGE_BLENDDIFFUSEALPHA: // Alpha * (Arg1 - Arg2) + Arg2 697 res.x = SubSat(arg1->x, arg2->x); res.x = MulHigh(res.x, diffuse.w) << 4; res.x = AddSat(res.x, arg2->x); 698 res.y = SubSat(arg1->y, arg2->y); res.y = MulHigh(res.y, diffuse.w) << 4; res.y = AddSat(res.y, arg2->y); 699 res.z = SubSat(arg1->z, arg2->z); res.z = MulHigh(res.z, diffuse.w) << 4; res.z = AddSat(res.z, arg2->z); 700 break; 701 case TextureStage::STAGE_BLENDFACTORALPHA: // Alpha * (Arg1 - Arg2) + Arg2 702 res.x = SubSat(arg1->x, arg2->x); res.x = MulHigh(res.x, *Pointer<Short4>(data + OFFSET(DrawData, factor.textureFactor4[3]))) << 4; res.x = AddSat(res.x, arg2->x); 703 res.y = SubSat(arg1->y, arg2->y); res.y = MulHigh(res.y, *Pointer<Short4>(data + OFFSET(DrawData, factor.textureFactor4[3]))) << 4; res.y = AddSat(res.y, arg2->y); 704 res.z = SubSat(arg1->z, arg2->z); res.z = MulHigh(res.z, *Pointer<Short4>(data + OFFSET(DrawData, factor.textureFactor4[3]))) << 4; res.z = AddSat(res.z, arg2->z); 705 break; 706 case TextureStage::STAGE_BLENDTEXTUREALPHA: // Alpha * (Arg1 - Arg2) + Arg2 707 res.x = SubSat(arg1->x, arg2->x); res.x = MulHigh(res.x, texture.w) << 4; res.x = AddSat(res.x, arg2->x); 708 res.y = SubSat(arg1->y, arg2->y); res.y = MulHigh(res.y, texture.w) << 4; res.y = AddSat(res.y, arg2->y); 709 res.z = SubSat(arg1->z, arg2->z); res.z = MulHigh(res.z, texture.w) << 4; res.z = AddSat(res.z, arg2->z); 710 break; 711 case TextureStage::STAGE_BLENDTEXTUREALPHAPM: // Arg1 + Arg2 * (1 - Alpha) 712 res.x = SubSat(Short4(0x1000), texture.w); res.x = MulHigh(res.x, arg2->x) << 4; res.x = AddSat(res.x, arg1->x); 713 res.y = SubSat(Short4(0x1000), texture.w); res.y = MulHigh(res.y, arg2->y) << 4; res.y = AddSat(res.y, arg1->y); 714 res.z = SubSat(Short4(0x1000), texture.w); res.z = MulHigh(res.z, arg2->z) << 4; res.z = AddSat(res.z, arg1->z); 715 break; 716 case TextureStage::STAGE_PREMODULATE: 717 res.x = arg1->x; 718 res.y = arg1->y; 719 res.z = arg1->z; 720 break; 721 case TextureStage::STAGE_MODULATEALPHA_ADDCOLOR: // Arg1 + Arg1.w * Arg2 722 res.x = MulHigh(arg1->w, arg2->x) << 4; res.x = AddSat(res.x, arg1->x); 723 res.y = MulHigh(arg1->w, arg2->y) << 4; res.y = AddSat(res.y, arg1->y); 724 res.z = MulHigh(arg1->w, arg2->z) << 4; res.z = AddSat(res.z, arg1->z); 725 break; 726 case TextureStage::STAGE_MODULATECOLOR_ADDALPHA: // Arg1 * Arg2 + Arg1.w 727 res.x = MulHigh(arg1->x, arg2->x) << 4; res.x = AddSat(res.x, arg1->w); 728 res.y = MulHigh(arg1->y, arg2->y) << 4; res.y = AddSat(res.y, arg1->w); 729 res.z = MulHigh(arg1->z, arg2->z) << 4; res.z = AddSat(res.z, arg1->w); 730 break; 731 case TextureStage::STAGE_MODULATEINVALPHA_ADDCOLOR: // (1 - Arg1.w) * Arg2 + Arg1 732 { 733 Short4 tmp; 734 735 res.x = AddSat(arg1->x, arg2->x); tmp = MulHigh(arg1->w, arg2->x) << 4; res.x = SubSat(res.x, tmp); 736 res.y = AddSat(arg1->y, arg2->y); tmp = MulHigh(arg1->w, arg2->y) << 4; res.y = SubSat(res.y, tmp); 737 res.z = AddSat(arg1->z, arg2->z); tmp = MulHigh(arg1->w, arg2->z) << 4; res.z = SubSat(res.z, tmp); 738 } 739 break; 740 case TextureStage::STAGE_MODULATEINVCOLOR_ADDALPHA: // (1 - Arg1) * Arg2 + Arg1.w 741 { 742 Short4 tmp; 743 744 res.x = AddSat(arg1->w, arg2->x); tmp = MulHigh(arg1->x, arg2->x) << 4; res.x = SubSat(res.x, tmp); 745 res.y = AddSat(arg1->w, arg2->y); tmp = MulHigh(arg1->y, arg2->y) << 4; res.y = SubSat(res.y, tmp); 746 res.z = AddSat(arg1->w, arg2->z); tmp = MulHigh(arg1->z, arg2->z) << 4; res.z = SubSat(res.z, tmp); 747 } 748 break; 749 case TextureStage::STAGE_BUMPENVMAP: 750 { 751 du = Float4(texture.x) * Float4(1.0f / 0x0FE0); 752 dv = Float4(texture.y) * Float4(1.0f / 0x0FE0); 753 754 Float4 du2; 755 Float4 dv2; 756 757 du2 = du; 758 dv2 = dv; 759 du *= *Pointer<Float4>(data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4F[0][0])); 760 dv2 *= *Pointer<Float4>(data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4F[1][0])); 761 du += dv2; 762 dv *= *Pointer<Float4>(data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4F[1][1])); 763 du2 *= *Pointer<Float4>(data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4F[0][1])); 764 dv += du2; 765 766 perturbate = true; 767 768 res.x = current.x; 769 res.y = current.y; 770 res.z = current.z; 771 res.w = current.w; 772 } 773 break; 774 case TextureStage::STAGE_BUMPENVMAPLUMINANCE: 775 { 776 du = Float4(texture.x) * Float4(1.0f / 0x0FE0); 777 dv = Float4(texture.y) * Float4(1.0f / 0x0FE0); 778 779 Float4 du2; 780 Float4 dv2; 781 782 du2 = du; 783 dv2 = dv; 784 785 du *= *Pointer<Float4>(data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4F[0][0])); 786 dv2 *= *Pointer<Float4>(data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4F[1][0])); 787 du += dv2; 788 dv *= *Pointer<Float4>(data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4F[1][1])); 789 du2 *= *Pointer<Float4>(data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4F[0][1])); 790 dv += du2; 791 792 perturbate = true; 793 794 L = texture.z; 795 L = MulHigh(L, *Pointer<Short4>(data + OFFSET(DrawData, textureStage[stage].luminanceScale4))); 796 L = L << 4; 797 L = AddSat(L, *Pointer<Short4>(data + OFFSET(DrawData, textureStage[stage].luminanceOffset4))); 798 L = Max(L, Short4(0x0000)); 799 L = Min(L, Short4(0x1000)); 800 801 luminance = true; 802 803 res.x = current.x; 804 res.y = current.y; 805 res.z = current.z; 806 res.w = current.w; 807 } 808 break; 809 default: 810 ASSERT(false); 811 } 812 813 if(textureStage.stageOperation != TextureStage::STAGE_DOT3) 814 { 815 switch(textureStage.firstArgumentAlpha) 816 { 817 case TextureStage::SOURCE_TEXTURE: arg1 = &texture; break; 818 case TextureStage::SOURCE_CONSTANT: arg1 = &constant; break; 819 case TextureStage::SOURCE_CURRENT: arg1 = ¤t; break; 820 case TextureStage::SOURCE_DIFFUSE: arg1 = &diffuse; break; 821 case TextureStage::SOURCE_SPECULAR: arg1 = &specular; break; 822 case TextureStage::SOURCE_TEMP: arg1 = &temp; break; 823 case TextureStage::SOURCE_TFACTOR: arg1 = &tfactor; break; 824 default: 825 ASSERT(false); 826 } 827 828 switch(textureStage.secondArgumentAlpha) 829 { 830 case TextureStage::SOURCE_TEXTURE: arg2 = &texture; break; 831 case TextureStage::SOURCE_CONSTANT: arg2 = &constant; break; 832 case TextureStage::SOURCE_CURRENT: arg2 = ¤t; break; 833 case TextureStage::SOURCE_DIFFUSE: arg2 = &diffuse; break; 834 case TextureStage::SOURCE_SPECULAR: arg2 = &specular; break; 835 case TextureStage::SOURCE_TEMP: arg2 = &temp; break; 836 case TextureStage::SOURCE_TFACTOR: arg2 = &tfactor; break; 837 default: 838 ASSERT(false); 839 } 840 841 switch(textureStage.thirdArgumentAlpha) 842 { 843 case TextureStage::SOURCE_TEXTURE: arg3 = &texture; break; 844 case TextureStage::SOURCE_CONSTANT: arg3 = &constant; break; 845 case TextureStage::SOURCE_CURRENT: arg3 = ¤t; break; 846 case TextureStage::SOURCE_DIFFUSE: arg3 = &diffuse; break; 847 case TextureStage::SOURCE_SPECULAR: arg3 = &specular; break; 848 case TextureStage::SOURCE_TEMP: arg3 = &temp; break; 849 case TextureStage::SOURCE_TFACTOR: arg3 = &tfactor; break; 850 default: 851 ASSERT(false); 852 } 853 854 switch(textureStage.firstModifierAlpha) // FIXME: Check if actually used 855 { 856 case TextureStage::MODIFIER_COLOR: 857 break; 858 case TextureStage::MODIFIER_INVCOLOR: 859 mod1.w = SubSat(Short4(0x1000), arg1->w); 860 861 arg1 = &mod1; 862 break; 863 case TextureStage::MODIFIER_ALPHA: 864 // Redudant 865 break; 866 case TextureStage::MODIFIER_INVALPHA: 867 mod1.w = SubSat(Short4(0x1000), arg1->w); 868 869 arg1 = &mod1; 870 break; 871 default: 872 ASSERT(false); 873 } 874 875 switch(textureStage.secondModifierAlpha) // FIXME: Check if actually used 876 { 877 case TextureStage::MODIFIER_COLOR: 878 break; 879 case TextureStage::MODIFIER_INVCOLOR: 880 mod2.w = SubSat(Short4(0x1000), arg2->w); 881 882 arg2 = &mod2; 883 break; 884 case TextureStage::MODIFIER_ALPHA: 885 // Redudant 886 break; 887 case TextureStage::MODIFIER_INVALPHA: 888 mod2.w = SubSat(Short4(0x1000), arg2->w); 889 890 arg2 = &mod2; 891 break; 892 default: 893 ASSERT(false); 894 } 895 896 switch(textureStage.thirdModifierAlpha) // FIXME: Check if actually used 897 { 898 case TextureStage::MODIFIER_COLOR: 899 break; 900 case TextureStage::MODIFIER_INVCOLOR: 901 mod3.w = SubSat(Short4(0x1000), arg3->w); 902 903 arg3 = &mod3; 904 break; 905 case TextureStage::MODIFIER_ALPHA: 906 // Redudant 907 break; 908 case TextureStage::MODIFIER_INVALPHA: 909 mod3.w = SubSat(Short4(0x1000), arg3->w); 910 911 arg3 = &mod3; 912 break; 913 default: 914 ASSERT(false); 915 } 916 917 switch(textureStage.stageOperationAlpha) 918 { 919 case TextureStage::STAGE_DISABLE: 920 break; 921 case TextureStage::STAGE_SELECTARG1: // Arg1 922 res.w = arg1->w; 923 break; 924 case TextureStage::STAGE_SELECTARG2: // Arg2 925 res.w = arg2->w; 926 break; 927 case TextureStage::STAGE_SELECTARG3: // Arg3 928 res.w = arg3->w; 929 break; 930 case TextureStage::STAGE_MODULATE: // Arg1 * Arg2 931 res.w = MulHigh(arg1->w, arg2->w) << 4; 932 break; 933 case TextureStage::STAGE_MODULATE2X: // Arg1 * Arg2 * 2 934 res.w = MulHigh(arg1->w, arg2->w) << 5; 935 break; 936 case TextureStage::STAGE_MODULATE4X: // Arg1 * Arg2 * 4 937 res.w = MulHigh(arg1->w, arg2->w) << 6; 938 break; 939 case TextureStage::STAGE_ADD: // Arg1 + Arg2 940 res.w = AddSat(arg1->w, arg2->w); 941 break; 942 case TextureStage::STAGE_ADDSIGNED: // Arg1 + Arg2 - 0.5 943 res.w = AddSat(arg1->w, arg2->w); 944 res.w = SubSat(res.w, Short4(0x0800)); 945 break; 946 case TextureStage::STAGE_ADDSIGNED2X: // (Arg1 + Arg2 - 0.5) << 1 947 res.w = AddSat(arg1->w, arg2->w); 948 res.w = SubSat(res.w, Short4(0x0800)); 949 res.w = AddSat(res.w, res.w); 950 break; 951 case TextureStage::STAGE_SUBTRACT: // Arg1 - Arg2 952 res.w = SubSat(arg1->w, arg2->w); 953 break; 954 case TextureStage::STAGE_ADDSMOOTH: // Arg1 + Arg2 - Arg1 * Arg2 955 { 956 Short4 tmp; 957 958 tmp = MulHigh(arg1->w, arg2->w) << 4; res.w = AddSat(arg1->w, arg2->w); res.w = SubSat(res.w, tmp); 959 } 960 break; 961 case TextureStage::STAGE_MULTIPLYADD: // Arg3 + Arg1 * Arg2 962 res.w = MulHigh(arg1->w, arg2->w) << 4; res.w = AddSat(res.w, arg3->w); 963 break; 964 case TextureStage::STAGE_LERP: // Arg3 * (Arg1 - Arg2) + Arg2 965 res.w = SubSat(arg1->w, arg2->w); res.w = MulHigh(res.w, arg3->w) << 4; res.w = AddSat(res.w, arg2->w); 966 break; 967 case TextureStage::STAGE_DOT3: 968 break; // Already computed in color channel 969 case TextureStage::STAGE_BLENDCURRENTALPHA: // Alpha * (Arg1 - Arg2) + Arg2 970 res.w = SubSat(arg1->w, arg2->w); res.w = MulHigh(res.w, current.w) << 4; res.w = AddSat(res.w, arg2->w); 971 break; 972 case TextureStage::STAGE_BLENDDIFFUSEALPHA: // Arg1 * (Alpha) + Arg2 * (1 - Alpha) 973 res.w = SubSat(arg1->w, arg2->w); res.w = MulHigh(res.w, diffuse.w) << 4; res.w = AddSat(res.w, arg2->w); 974 break; 975 case TextureStage::STAGE_BLENDFACTORALPHA: 976 res.w = SubSat(arg1->w, arg2->w); res.w = MulHigh(res.w, *Pointer<Short4>(data + OFFSET(DrawData, factor.textureFactor4[3]))) << 4; res.w = AddSat(res.w, arg2->w); 977 break; 978 case TextureStage::STAGE_BLENDTEXTUREALPHA: // Arg1 * (Alpha) + Arg2 * (1 - Alpha) 979 res.w = SubSat(arg1->w, arg2->w); res.w = MulHigh(res.w, texture.w) << 4; res.w = AddSat(res.w, arg2->w); 980 break; 981 case TextureStage::STAGE_BLENDTEXTUREALPHAPM: // Arg1 + Arg2 * (1 - Alpha) 982 res.w = SubSat(Short4(0x1000), texture.w); res.w = MulHigh(res.w, arg2->w) << 4; res.w = AddSat(res.w, arg1->w); 983 break; 984 case TextureStage::STAGE_PREMODULATE: 985 res.w = arg1->w; 986 break; 987 case TextureStage::STAGE_MODULATEALPHA_ADDCOLOR: 988 case TextureStage::STAGE_MODULATECOLOR_ADDALPHA: 989 case TextureStage::STAGE_MODULATEINVALPHA_ADDCOLOR: 990 case TextureStage::STAGE_MODULATEINVCOLOR_ADDALPHA: 991 case TextureStage::STAGE_BUMPENVMAP: 992 case TextureStage::STAGE_BUMPENVMAPLUMINANCE: 993 break; // Invalid alpha operations 994 default: 995 ASSERT(false); 996 } 997 } 998 999 // Clamp result to [0, 1] 1000 1001 switch(textureStage.stageOperation) 1002 { 1003 case TextureStage::STAGE_DISABLE: 1004 case TextureStage::STAGE_SELECTARG1: 1005 case TextureStage::STAGE_SELECTARG2: 1006 case TextureStage::STAGE_SELECTARG3: 1007 case TextureStage::STAGE_MODULATE: 1008 case TextureStage::STAGE_MODULATE2X: 1009 case TextureStage::STAGE_MODULATE4X: 1010 case TextureStage::STAGE_ADD: 1011 case TextureStage::STAGE_MULTIPLYADD: 1012 case TextureStage::STAGE_LERP: 1013 case TextureStage::STAGE_BLENDCURRENTALPHA: 1014 case TextureStage::STAGE_BLENDDIFFUSEALPHA: 1015 case TextureStage::STAGE_BLENDFACTORALPHA: 1016 case TextureStage::STAGE_BLENDTEXTUREALPHA: 1017 case TextureStage::STAGE_BLENDTEXTUREALPHAPM: 1018 case TextureStage::STAGE_DOT3: // Already clamped 1019 case TextureStage::STAGE_PREMODULATE: 1020 case TextureStage::STAGE_MODULATEALPHA_ADDCOLOR: 1021 case TextureStage::STAGE_MODULATECOLOR_ADDALPHA: 1022 case TextureStage::STAGE_MODULATEINVALPHA_ADDCOLOR: 1023 case TextureStage::STAGE_MODULATEINVCOLOR_ADDALPHA: 1024 case TextureStage::STAGE_BUMPENVMAP: 1025 case TextureStage::STAGE_BUMPENVMAPLUMINANCE: 1026 if(state.textureStage[stage].cantUnderflow) 1027 { 1028 break; // Can't go below zero 1029 } 1030 case TextureStage::STAGE_ADDSIGNED: 1031 case TextureStage::STAGE_ADDSIGNED2X: 1032 case TextureStage::STAGE_SUBTRACT: 1033 case TextureStage::STAGE_ADDSMOOTH: 1034 res.x = Max(res.x, Short4(0x0000)); 1035 res.y = Max(res.y, Short4(0x0000)); 1036 res.z = Max(res.z, Short4(0x0000)); 1037 break; 1038 default: 1039 ASSERT(false); 1040 } 1041 1042 switch(textureStage.stageOperationAlpha) 1043 { 1044 case TextureStage::STAGE_DISABLE: 1045 case TextureStage::STAGE_SELECTARG1: 1046 case TextureStage::STAGE_SELECTARG2: 1047 case TextureStage::STAGE_SELECTARG3: 1048 case TextureStage::STAGE_MODULATE: 1049 case TextureStage::STAGE_MODULATE2X: 1050 case TextureStage::STAGE_MODULATE4X: 1051 case TextureStage::STAGE_ADD: 1052 case TextureStage::STAGE_MULTIPLYADD: 1053 case TextureStage::STAGE_LERP: 1054 case TextureStage::STAGE_BLENDCURRENTALPHA: 1055 case TextureStage::STAGE_BLENDDIFFUSEALPHA: 1056 case TextureStage::STAGE_BLENDFACTORALPHA: 1057 case TextureStage::STAGE_BLENDTEXTUREALPHA: 1058 case TextureStage::STAGE_BLENDTEXTUREALPHAPM: 1059 case TextureStage::STAGE_DOT3: // Already clamped 1060 case TextureStage::STAGE_PREMODULATE: 1061 case TextureStage::STAGE_MODULATEALPHA_ADDCOLOR: 1062 case TextureStage::STAGE_MODULATECOLOR_ADDALPHA: 1063 case TextureStage::STAGE_MODULATEINVALPHA_ADDCOLOR: 1064 case TextureStage::STAGE_MODULATEINVCOLOR_ADDALPHA: 1065 case TextureStage::STAGE_BUMPENVMAP: 1066 case TextureStage::STAGE_BUMPENVMAPLUMINANCE: 1067 if(state.textureStage[stage].cantUnderflow) 1068 { 1069 break; // Can't go below zero 1070 } 1071 case TextureStage::STAGE_ADDSIGNED: 1072 case TextureStage::STAGE_ADDSIGNED2X: 1073 case TextureStage::STAGE_SUBTRACT: 1074 case TextureStage::STAGE_ADDSMOOTH: 1075 res.w = Max(res.w, Short4(0x0000)); 1076 break; 1077 default: 1078 ASSERT(false); 1079 } 1080 1081 switch(textureStage.stageOperation) 1082 { 1083 case TextureStage::STAGE_DISABLE: 1084 case TextureStage::STAGE_SELECTARG1: 1085 case TextureStage::STAGE_SELECTARG2: 1086 case TextureStage::STAGE_SELECTARG3: 1087 case TextureStage::STAGE_MODULATE: 1088 case TextureStage::STAGE_SUBTRACT: 1089 case TextureStage::STAGE_ADDSMOOTH: 1090 case TextureStage::STAGE_LERP: 1091 case TextureStage::STAGE_BLENDCURRENTALPHA: 1092 case TextureStage::STAGE_BLENDDIFFUSEALPHA: 1093 case TextureStage::STAGE_BLENDFACTORALPHA: 1094 case TextureStage::STAGE_BLENDTEXTUREALPHA: 1095 case TextureStage::STAGE_DOT3: // Already clamped 1096 case TextureStage::STAGE_PREMODULATE: 1097 case TextureStage::STAGE_MODULATEINVALPHA_ADDCOLOR: 1098 case TextureStage::STAGE_MODULATEINVCOLOR_ADDALPHA: 1099 case TextureStage::STAGE_BUMPENVMAP: 1100 case TextureStage::STAGE_BUMPENVMAPLUMINANCE: 1101 break; // Can't go above one 1102 case TextureStage::STAGE_MODULATE2X: 1103 case TextureStage::STAGE_MODULATE4X: 1104 case TextureStage::STAGE_ADD: 1105 case TextureStage::STAGE_ADDSIGNED: 1106 case TextureStage::STAGE_ADDSIGNED2X: 1107 case TextureStage::STAGE_MULTIPLYADD: 1108 case TextureStage::STAGE_BLENDTEXTUREALPHAPM: 1109 case TextureStage::STAGE_MODULATEALPHA_ADDCOLOR: 1110 case TextureStage::STAGE_MODULATECOLOR_ADDALPHA: 1111 res.x = Min(res.x, Short4(0x1000)); 1112 res.y = Min(res.y, Short4(0x1000)); 1113 res.z = Min(res.z, Short4(0x1000)); 1114 break; 1115 default: 1116 ASSERT(false); 1117 } 1118 1119 switch(textureStage.stageOperationAlpha) 1120 { 1121 case TextureStage::STAGE_DISABLE: 1122 case TextureStage::STAGE_SELECTARG1: 1123 case TextureStage::STAGE_SELECTARG2: 1124 case TextureStage::STAGE_SELECTARG3: 1125 case TextureStage::STAGE_MODULATE: 1126 case TextureStage::STAGE_SUBTRACT: 1127 case TextureStage::STAGE_ADDSMOOTH: 1128 case TextureStage::STAGE_LERP: 1129 case TextureStage::STAGE_BLENDCURRENTALPHA: 1130 case TextureStage::STAGE_BLENDDIFFUSEALPHA: 1131 case TextureStage::STAGE_BLENDFACTORALPHA: 1132 case TextureStage::STAGE_BLENDTEXTUREALPHA: 1133 case TextureStage::STAGE_DOT3: // Already clamped 1134 case TextureStage::STAGE_PREMODULATE: 1135 case TextureStage::STAGE_MODULATEINVALPHA_ADDCOLOR: 1136 case TextureStage::STAGE_MODULATEINVCOLOR_ADDALPHA: 1137 case TextureStage::STAGE_BUMPENVMAP: 1138 case TextureStage::STAGE_BUMPENVMAPLUMINANCE: 1139 break; // Can't go above one 1140 case TextureStage::STAGE_MODULATE2X: 1141 case TextureStage::STAGE_MODULATE4X: 1142 case TextureStage::STAGE_ADD: 1143 case TextureStage::STAGE_ADDSIGNED: 1144 case TextureStage::STAGE_ADDSIGNED2X: 1145 case TextureStage::STAGE_MULTIPLYADD: 1146 case TextureStage::STAGE_BLENDTEXTUREALPHAPM: 1147 case TextureStage::STAGE_MODULATEALPHA_ADDCOLOR: 1148 case TextureStage::STAGE_MODULATECOLOR_ADDALPHA: 1149 res.w = Min(res.w, Short4(0x1000)); 1150 break; 1151 default: 1152 ASSERT(false); 1153 } 1154 1155 switch(textureStage.destinationArgument) 1156 { 1157 case TextureStage::DESTINATION_CURRENT: 1158 current.x = res.x; 1159 current.y = res.y; 1160 current.z = res.z; 1161 current.w = res.w; 1162 break; 1163 case TextureStage::DESTINATION_TEMP: 1164 temp.x = res.x; 1165 temp.y = res.y; 1166 temp.z = res.z; 1167 temp.w = res.w; 1168 break; 1169 default: 1170 ASSERT(false); 1171 } 1172 } 1173 fogBlend(Vector4s & current,Float4 & f)1174 void PixelPipeline::fogBlend(Vector4s ¤t, Float4 &f) 1175 { 1176 if(!state.fogActive) 1177 { 1178 return; 1179 } 1180 1181 if(state.pixelFogMode != FOG_NONE) 1182 { 1183 pixelFog(f); 1184 } 1185 1186 UShort4 fog = convertFixed16(f, true); 1187 1188 current.x = As<Short4>(MulHigh(As<UShort4>(current.x), fog)); 1189 current.y = As<Short4>(MulHigh(As<UShort4>(current.y), fog)); 1190 current.z = As<Short4>(MulHigh(As<UShort4>(current.z), fog)); 1191 1192 UShort4 invFog = UShort4(0xFFFFu) - fog; 1193 1194 current.x += As<Short4>(MulHigh(invFog, *Pointer<UShort4>(data + OFFSET(DrawData, fog.color4[0])))); 1195 current.y += As<Short4>(MulHigh(invFog, *Pointer<UShort4>(data + OFFSET(DrawData, fog.color4[1])))); 1196 current.z += As<Short4>(MulHigh(invFog, *Pointer<UShort4>(data + OFFSET(DrawData, fog.color4[2])))); 1197 } 1198 specularPixel(Vector4s & current,Vector4s & specular)1199 void PixelPipeline::specularPixel(Vector4s ¤t, Vector4s &specular) 1200 { 1201 if(!state.specularAdd) 1202 { 1203 return; 1204 } 1205 1206 current.x = AddSat(current.x, specular.x); 1207 current.y = AddSat(current.y, specular.y); 1208 current.z = AddSat(current.z, specular.z); 1209 } 1210 sampleTexture(int coordinates,int stage,bool project)1211 Vector4s PixelPipeline::sampleTexture(int coordinates, int stage, bool project) 1212 { 1213 Float4 x = v[2 + coordinates].x; 1214 Float4 y = v[2 + coordinates].y; 1215 Float4 z = v[2 + coordinates].z; 1216 Float4 w = v[2 + coordinates].w; 1217 1218 if(perturbate) 1219 { 1220 x += du; 1221 y += dv; 1222 1223 perturbate = false; 1224 } 1225 1226 return sampleTexture(stage, x, y, z, w, project); 1227 } 1228 sampleTexture(int stage,Float4 & u,Float4 & v,Float4 & w,Float4 & q,bool project)1229 Vector4s PixelPipeline::sampleTexture(int stage, Float4 &u, Float4 &v, Float4 &w, Float4 &q, bool project) 1230 { 1231 Vector4s c; 1232 1233 #if PERF_PROFILE 1234 Long texTime = Ticks(); 1235 #endif 1236 1237 Vector4f dsx; 1238 Vector4f dsy; 1239 1240 Pointer<Byte> texture = data + OFFSET(DrawData, mipmap) + stage * sizeof(Texture); 1241 1242 if(!project) 1243 { 1244 c = SamplerCore(constants, state.sampler[stage]).sampleTexture(texture, u, v, w, q, q, dsx, dsy); 1245 } 1246 else 1247 { 1248 Float4 rq = reciprocal(q); 1249 1250 Float4 u_q = u * rq; 1251 Float4 v_q = v * rq; 1252 Float4 w_q = w * rq; 1253 1254 c = SamplerCore(constants, state.sampler[stage]).sampleTexture(texture, u_q, v_q, w_q, q, q, dsx, dsy); 1255 } 1256 1257 #if PERF_PROFILE 1258 cycles[PERF_TEX] += Ticks() - texTime; 1259 #endif 1260 1261 return c; 1262 } 1263 convertFixed12(RValue<Float4> cf)1264 Short4 PixelPipeline::convertFixed12(RValue<Float4> cf) 1265 { 1266 return RoundShort4(cf * Float4(0x1000)); 1267 } 1268 convertFixed12(Vector4s & cs,Vector4f & cf)1269 void PixelPipeline::convertFixed12(Vector4s &cs, Vector4f &cf) 1270 { 1271 cs.x = convertFixed12(cf.x); 1272 cs.y = convertFixed12(cf.y); 1273 cs.z = convertFixed12(cf.z); 1274 cs.w = convertFixed12(cf.w); 1275 } 1276 convertSigned12(Short4 & cs)1277 Float4 PixelPipeline::convertSigned12(Short4 &cs) 1278 { 1279 return Float4(cs) * Float4(1.0f / 0x0FFE); 1280 } 1281 convertSigned12(Vector4f & cf,Vector4s & cs)1282 void PixelPipeline::convertSigned12(Vector4f &cf, Vector4s &cs) 1283 { 1284 cf.x = convertSigned12(cs.x); 1285 cf.y = convertSigned12(cs.y); 1286 cf.z = convertSigned12(cs.z); 1287 cf.w = convertSigned12(cs.w); 1288 } 1289 writeDestination(Vector4s & d,const Dst & dst)1290 void PixelPipeline::writeDestination(Vector4s &d, const Dst &dst) 1291 { 1292 switch(dst.type) 1293 { 1294 case Shader::PARAMETER_TEMP: 1295 if(dst.mask & 0x1) rs[dst.index].x = d.x; 1296 if(dst.mask & 0x2) rs[dst.index].y = d.y; 1297 if(dst.mask & 0x4) rs[dst.index].z = d.z; 1298 if(dst.mask & 0x8) rs[dst.index].w = d.w; 1299 break; 1300 case Shader::PARAMETER_INPUT: 1301 if(dst.mask & 0x1) vs[dst.index].x = d.x; 1302 if(dst.mask & 0x2) vs[dst.index].y = d.y; 1303 if(dst.mask & 0x4) vs[dst.index].z = d.z; 1304 if(dst.mask & 0x8) vs[dst.index].w = d.w; 1305 break; 1306 case Shader::PARAMETER_CONST: ASSERT(false); break; 1307 case Shader::PARAMETER_TEXTURE: 1308 if(dst.mask & 0x1) ts[dst.index].x = d.x; 1309 if(dst.mask & 0x2) ts[dst.index].y = d.y; 1310 if(dst.mask & 0x4) ts[dst.index].z = d.z; 1311 if(dst.mask & 0x8) ts[dst.index].w = d.w; 1312 break; 1313 case Shader::PARAMETER_COLOROUT: 1314 if(dst.mask & 0x1) vs[dst.index].x = d.x; 1315 if(dst.mask & 0x2) vs[dst.index].y = d.y; 1316 if(dst.mask & 0x4) vs[dst.index].z = d.z; 1317 if(dst.mask & 0x8) vs[dst.index].w = d.w; 1318 break; 1319 default: 1320 ASSERT(false); 1321 } 1322 } 1323 fetchRegister(const Src & src)1324 Vector4s PixelPipeline::fetchRegister(const Src &src) 1325 { 1326 Vector4s *reg; 1327 int i = src.index; 1328 1329 Vector4s c; 1330 1331 if(src.type == Shader::PARAMETER_CONST) 1332 { 1333 c.x = *Pointer<Short4>(data + OFFSET(DrawData, ps.cW[i][0])); 1334 c.y = *Pointer<Short4>(data + OFFSET(DrawData, ps.cW[i][1])); 1335 c.z = *Pointer<Short4>(data + OFFSET(DrawData, ps.cW[i][2])); 1336 c.w = *Pointer<Short4>(data + OFFSET(DrawData, ps.cW[i][3])); 1337 } 1338 1339 switch(src.type) 1340 { 1341 case Shader::PARAMETER_TEMP: reg = &rs[i]; break; 1342 case Shader::PARAMETER_INPUT: reg = &vs[i]; break; 1343 case Shader::PARAMETER_CONST: reg = &c; break; 1344 case Shader::PARAMETER_TEXTURE: reg = &ts[i]; break; 1345 case Shader::PARAMETER_VOID: return rs[0]; // Dummy 1346 case Shader::PARAMETER_FLOAT4LITERAL: return rs[0]; // Dummy 1347 default: ASSERT(false); return rs[0]; 1348 } 1349 1350 const Short4 &x = (*reg)[(src.swizzle >> 0) & 0x3]; 1351 const Short4 &y = (*reg)[(src.swizzle >> 2) & 0x3]; 1352 const Short4 &z = (*reg)[(src.swizzle >> 4) & 0x3]; 1353 const Short4 &w = (*reg)[(src.swizzle >> 6) & 0x3]; 1354 1355 Vector4s mod; 1356 1357 switch(src.modifier) 1358 { 1359 case Shader::MODIFIER_NONE: 1360 mod.x = x; 1361 mod.y = y; 1362 mod.z = z; 1363 mod.w = w; 1364 break; 1365 case Shader::MODIFIER_BIAS: 1366 mod.x = SubSat(x, Short4(0x0800)); 1367 mod.y = SubSat(y, Short4(0x0800)); 1368 mod.z = SubSat(z, Short4(0x0800)); 1369 mod.w = SubSat(w, Short4(0x0800)); 1370 break; 1371 case Shader::MODIFIER_BIAS_NEGATE: 1372 mod.x = SubSat(Short4(0x0800), x); 1373 mod.y = SubSat(Short4(0x0800), y); 1374 mod.z = SubSat(Short4(0x0800), z); 1375 mod.w = SubSat(Short4(0x0800), w); 1376 break; 1377 case Shader::MODIFIER_COMPLEMENT: 1378 mod.x = SubSat(Short4(0x1000), x); 1379 mod.y = SubSat(Short4(0x1000), y); 1380 mod.z = SubSat(Short4(0x1000), z); 1381 mod.w = SubSat(Short4(0x1000), w); 1382 break; 1383 case Shader::MODIFIER_NEGATE: 1384 mod.x = -x; 1385 mod.y = -y; 1386 mod.z = -z; 1387 mod.w = -w; 1388 break; 1389 case Shader::MODIFIER_X2: 1390 mod.x = AddSat(x, x); 1391 mod.y = AddSat(y, y); 1392 mod.z = AddSat(z, z); 1393 mod.w = AddSat(w, w); 1394 break; 1395 case Shader::MODIFIER_X2_NEGATE: 1396 mod.x = -AddSat(x, x); 1397 mod.y = -AddSat(y, y); 1398 mod.z = -AddSat(z, z); 1399 mod.w = -AddSat(w, w); 1400 break; 1401 case Shader::MODIFIER_SIGN: 1402 mod.x = SubSat(x, Short4(0x0800)); 1403 mod.y = SubSat(y, Short4(0x0800)); 1404 mod.z = SubSat(z, Short4(0x0800)); 1405 mod.w = SubSat(w, Short4(0x0800)); 1406 mod.x = AddSat(mod.x, mod.x); 1407 mod.y = AddSat(mod.y, mod.y); 1408 mod.z = AddSat(mod.z, mod.z); 1409 mod.w = AddSat(mod.w, mod.w); 1410 break; 1411 case Shader::MODIFIER_SIGN_NEGATE: 1412 mod.x = SubSat(Short4(0x0800), x); 1413 mod.y = SubSat(Short4(0x0800), y); 1414 mod.z = SubSat(Short4(0x0800), z); 1415 mod.w = SubSat(Short4(0x0800), w); 1416 mod.x = AddSat(mod.x, mod.x); 1417 mod.y = AddSat(mod.y, mod.y); 1418 mod.z = AddSat(mod.z, mod.z); 1419 mod.w = AddSat(mod.w, mod.w); 1420 break; 1421 case Shader::MODIFIER_DZ: 1422 mod.x = x; 1423 mod.y = y; 1424 mod.z = z; 1425 mod.w = w; 1426 // Projection performed by texture sampler 1427 break; 1428 case Shader::MODIFIER_DW: 1429 mod.x = x; 1430 mod.y = y; 1431 mod.z = z; 1432 mod.w = w; 1433 // Projection performed by texture sampler 1434 break; 1435 default: 1436 ASSERT(false); 1437 } 1438 1439 if(src.type == Shader::PARAMETER_CONST && (src.modifier == Shader::MODIFIER_X2 || src.modifier == Shader::MODIFIER_X2_NEGATE)) 1440 { 1441 mod.x = Min(mod.x, Short4(0x1000)); mod.x = Max(mod.x, Short4(-0x1000)); 1442 mod.y = Min(mod.y, Short4(0x1000)); mod.y = Max(mod.y, Short4(-0x1000)); 1443 mod.z = Min(mod.z, Short4(0x1000)); mod.z = Max(mod.z, Short4(-0x1000)); 1444 mod.w = Min(mod.w, Short4(0x1000)); mod.w = Max(mod.w, Short4(-0x1000)); 1445 } 1446 1447 return mod; 1448 } 1449 MOV(Vector4s & dst,Vector4s & src0)1450 void PixelPipeline::MOV(Vector4s &dst, Vector4s &src0) 1451 { 1452 dst.x = src0.x; 1453 dst.y = src0.y; 1454 dst.z = src0.z; 1455 dst.w = src0.w; 1456 } 1457 ADD(Vector4s & dst,Vector4s & src0,Vector4s & src1)1458 void PixelPipeline::ADD(Vector4s &dst, Vector4s &src0, Vector4s &src1) 1459 { 1460 dst.x = AddSat(src0.x, src1.x); 1461 dst.y = AddSat(src0.y, src1.y); 1462 dst.z = AddSat(src0.z, src1.z); 1463 dst.w = AddSat(src0.w, src1.w); 1464 } 1465 SUB(Vector4s & dst,Vector4s & src0,Vector4s & src1)1466 void PixelPipeline::SUB(Vector4s &dst, Vector4s &src0, Vector4s &src1) 1467 { 1468 dst.x = SubSat(src0.x, src1.x); 1469 dst.y = SubSat(src0.y, src1.y); 1470 dst.z = SubSat(src0.z, src1.z); 1471 dst.w = SubSat(src0.w, src1.w); 1472 } 1473 MAD(Vector4s & dst,Vector4s & src0,Vector4s & src1,Vector4s & src2)1474 void PixelPipeline::MAD(Vector4s &dst, Vector4s &src0, Vector4s &src1, Vector4s &src2) 1475 { 1476 // FIXME: Long fixed-point multiply fixup 1477 { dst.x = MulHigh(src0.x, src1.x); dst.x = AddSat(dst.x, dst.x); dst.x = AddSat(dst.x, dst.x); dst.x = AddSat(dst.x, dst.x); dst.x = AddSat(dst.x, dst.x); dst.x = AddSat(dst.x, src2.x); } 1478 { dst.y = MulHigh(src0.y, src1.y); dst.y = AddSat(dst.y, dst.y); dst.y = AddSat(dst.y, dst.y); dst.y = AddSat(dst.y, dst.y); dst.y = AddSat(dst.y, dst.y); dst.y = AddSat(dst.y, src2.y); } 1479 { dst.z = MulHigh(src0.z, src1.z); dst.z = AddSat(dst.z, dst.z); dst.z = AddSat(dst.z, dst.z); dst.z = AddSat(dst.z, dst.z); dst.z = AddSat(dst.z, dst.z); dst.z = AddSat(dst.z, src2.z); } 1480 { dst.w = MulHigh(src0.w, src1.w); dst.w = AddSat(dst.w, dst.w); dst.w = AddSat(dst.w, dst.w); dst.w = AddSat(dst.w, dst.w); dst.w = AddSat(dst.w, dst.w); dst.w = AddSat(dst.w, src2.w); } 1481 } 1482 MUL(Vector4s & dst,Vector4s & src0,Vector4s & src1)1483 void PixelPipeline::MUL(Vector4s &dst, Vector4s &src0, Vector4s &src1) 1484 { 1485 // FIXME: Long fixed-point multiply fixup 1486 { dst.x = MulHigh(src0.x, src1.x); dst.x = AddSat(dst.x, dst.x); dst.x = AddSat(dst.x, dst.x); dst.x = AddSat(dst.x, dst.x); dst.x = AddSat(dst.x, dst.x); } 1487 { dst.y = MulHigh(src0.y, src1.y); dst.y = AddSat(dst.y, dst.y); dst.y = AddSat(dst.y, dst.y); dst.y = AddSat(dst.y, dst.y); dst.y = AddSat(dst.y, dst.y); } 1488 { dst.z = MulHigh(src0.z, src1.z); dst.z = AddSat(dst.z, dst.z); dst.z = AddSat(dst.z, dst.z); dst.z = AddSat(dst.z, dst.z); dst.z = AddSat(dst.z, dst.z); } 1489 { dst.w = MulHigh(src0.w, src1.w); dst.w = AddSat(dst.w, dst.w); dst.w = AddSat(dst.w, dst.w); dst.w = AddSat(dst.w, dst.w); dst.w = AddSat(dst.w, dst.w); } 1490 } 1491 DP3(Vector4s & dst,Vector4s & src0,Vector4s & src1)1492 void PixelPipeline::DP3(Vector4s &dst, Vector4s &src0, Vector4s &src1) 1493 { 1494 Short4 t0; 1495 Short4 t1; 1496 1497 // FIXME: Long fixed-point multiply fixup 1498 t0 = MulHigh(src0.x, src1.x); t0 = AddSat(t0, t0); t0 = AddSat(t0, t0); t0 = AddSat(t0, t0); t0 = AddSat(t0, t0); 1499 t1 = MulHigh(src0.y, src1.y); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1); 1500 t0 = AddSat(t0, t1); 1501 t1 = MulHigh(src0.z, src1.z); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1); 1502 t0 = AddSat(t0, t1); 1503 1504 dst.x = t0; 1505 dst.y = t0; 1506 dst.z = t0; 1507 dst.w = t0; 1508 } 1509 DP4(Vector4s & dst,Vector4s & src0,Vector4s & src1)1510 void PixelPipeline::DP4(Vector4s &dst, Vector4s &src0, Vector4s &src1) 1511 { 1512 Short4 t0; 1513 Short4 t1; 1514 1515 // FIXME: Long fixed-point multiply fixup 1516 t0 = MulHigh(src0.x, src1.x); t0 = AddSat(t0, t0); t0 = AddSat(t0, t0); t0 = AddSat(t0, t0); t0 = AddSat(t0, t0); 1517 t1 = MulHigh(src0.y, src1.y); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1); 1518 t0 = AddSat(t0, t1); 1519 t1 = MulHigh(src0.z, src1.z); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1); 1520 t0 = AddSat(t0, t1); 1521 t1 = MulHigh(src0.w, src1.w); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1); 1522 t0 = AddSat(t0, t1); 1523 1524 dst.x = t0; 1525 dst.y = t0; 1526 dst.z = t0; 1527 dst.w = t0; 1528 } 1529 LRP(Vector4s & dst,Vector4s & src0,Vector4s & src1,Vector4s & src2)1530 void PixelPipeline::LRP(Vector4s &dst, Vector4s &src0, Vector4s &src1, Vector4s &src2) 1531 { 1532 // FIXME: Long fixed-point multiply fixup 1533 { dst.x = SubSat(src1.x, src2.x); dst.x = MulHigh(dst.x, src0.x); dst.x = AddSat(dst.x, dst.x); dst.x = AddSat(dst.x, dst.x); dst.x = AddSat(dst.x, dst.x); dst.x = AddSat(dst.x, dst.x); dst.x = AddSat(dst.x, src2.x); } 1534 { 1535 dst.y = SubSat(src1.y, src2.y); dst.y = MulHigh(dst.y, src0.y); dst.y = AddSat(dst.y, dst.y); dst.y = AddSat(dst.y, dst.y); dst.y = AddSat(dst.y, dst.y); dst.y = AddSat(dst.y, dst.y); dst.y = AddSat(dst.y, src2.y); 1536 } 1537 {dst.z = SubSat(src1.z, src2.z); dst.z = MulHigh(dst.z, src0.z); dst.z = AddSat(dst.z, dst.z); dst.z = AddSat(dst.z, dst.z); dst.z = AddSat(dst.z, dst.z); dst.z = AddSat(dst.z, dst.z); dst.z = AddSat(dst.z, src2.z); } 1538 {dst.w = SubSat(src1.w, src2.w); dst.w = MulHigh(dst.w, src0.w); dst.w = AddSat(dst.w, dst.w); dst.w = AddSat(dst.w, dst.w); dst.w = AddSat(dst.w, dst.w); dst.w = AddSat(dst.w, dst.w); dst.w = AddSat(dst.w, src2.w); } 1539 } 1540 TEXCOORD(Vector4s & dst,Float4 & u,Float4 & v,Float4 & s,int coordinate)1541 void PixelPipeline::TEXCOORD(Vector4s &dst, Float4 &u, Float4 &v, Float4 &s, int coordinate) 1542 { 1543 Float4 uw; 1544 Float4 vw; 1545 Float4 sw; 1546 1547 if(state.interpolant[2 + coordinate].component & 0x01) 1548 { 1549 uw = Max(u, Float4(0.0f)); 1550 uw = Min(uw, Float4(1.0f)); 1551 dst.x = convertFixed12(uw); 1552 } 1553 else 1554 { 1555 dst.x = Short4(0x0000); 1556 } 1557 1558 if(state.interpolant[2 + coordinate].component & 0x02) 1559 { 1560 vw = Max(v, Float4(0.0f)); 1561 vw = Min(vw, Float4(1.0f)); 1562 dst.y = convertFixed12(vw); 1563 } 1564 else 1565 { 1566 dst.y = Short4(0x0000); 1567 } 1568 1569 if(state.interpolant[2 + coordinate].component & 0x04) 1570 { 1571 sw = Max(s, Float4(0.0f)); 1572 sw = Min(sw, Float4(1.0f)); 1573 dst.z = convertFixed12(sw); 1574 } 1575 else 1576 { 1577 dst.z = Short4(0x0000); 1578 } 1579 1580 dst.w = Short4(0x1000); 1581 } 1582 TEXCRD(Vector4s & dst,Float4 & u,Float4 & v,Float4 & s,int coordinate,bool project)1583 void PixelPipeline::TEXCRD(Vector4s &dst, Float4 &u, Float4 &v, Float4 &s, int coordinate, bool project) 1584 { 1585 Float4 uw = u; 1586 Float4 vw = v; 1587 Float4 sw = s; 1588 1589 if(project) 1590 { 1591 uw *= Rcp_pp(s); 1592 vw *= Rcp_pp(s); 1593 } 1594 1595 if(state.interpolant[2 + coordinate].component & 0x01) 1596 { 1597 uw *= Float4(0x1000); 1598 uw = Max(uw, Float4(-0x8000)); 1599 uw = Min(uw, Float4(0x7FFF)); 1600 dst.x = RoundShort4(uw); 1601 } 1602 else 1603 { 1604 dst.x = Short4(0x0000); 1605 } 1606 1607 if(state.interpolant[2 + coordinate].component & 0x02) 1608 { 1609 vw *= Float4(0x1000); 1610 vw = Max(vw, Float4(-0x8000)); 1611 vw = Min(vw, Float4(0x7FFF)); 1612 dst.y = RoundShort4(vw); 1613 } 1614 else 1615 { 1616 dst.y = Short4(0x0000); 1617 } 1618 1619 if(state.interpolant[2 + coordinate].component & 0x04) 1620 { 1621 sw *= Float4(0x1000); 1622 sw = Max(sw, Float4(-0x8000)); 1623 sw = Min(sw, Float4(0x7FFF)); 1624 dst.z = RoundShort4(sw); 1625 } 1626 else 1627 { 1628 dst.z = Short4(0x0000); 1629 } 1630 } 1631 TEXDP3(Vector4s & dst,Float4 & u,Float4 & v,Float4 & s,Vector4s & src)1632 void PixelPipeline::TEXDP3(Vector4s &dst, Float4 &u, Float4 &v, Float4 &s, Vector4s &src) 1633 { 1634 TEXM3X3PAD(u, v, s, src, 0, false); 1635 1636 Short4 t0 = RoundShort4(u_ * Float4(0x1000)); 1637 1638 dst.x = t0; 1639 dst.y = t0; 1640 dst.z = t0; 1641 dst.w = t0; 1642 } 1643 TEXDP3TEX(Vector4s & dst,Float4 & u,Float4 & v,Float4 & s,int stage,Vector4s & src0)1644 void PixelPipeline::TEXDP3TEX(Vector4s &dst, Float4 &u, Float4 &v, Float4 &s, int stage, Vector4s &src0) 1645 { 1646 TEXM3X3PAD(u, v, s, src0, 0, false); 1647 1648 v_ = Float4(0.0f); 1649 w_ = Float4(0.0f); 1650 1651 dst = sampleTexture(stage, u_, v_, w_, w_); 1652 } 1653 TEXKILL(Int cMask[4],Float4 & u,Float4 & v,Float4 & s)1654 void PixelPipeline::TEXKILL(Int cMask[4], Float4 &u, Float4 &v, Float4 &s) 1655 { 1656 Int kill = SignMask(CmpNLT(u, Float4(0.0f))) & 1657 SignMask(CmpNLT(v, Float4(0.0f))) & 1658 SignMask(CmpNLT(s, Float4(0.0f))); 1659 1660 for(unsigned int q = 0; q < state.multiSample; q++) 1661 { 1662 cMask[q] &= kill; 1663 } 1664 } 1665 TEXKILL(Int cMask[4],Vector4s & src)1666 void PixelPipeline::TEXKILL(Int cMask[4], Vector4s &src) 1667 { 1668 Short4 test = src.x | src.y | src.z; 1669 Int kill = SignMask(PackSigned(test, test)) ^ 0x0000000F; 1670 1671 for(unsigned int q = 0; q < state.multiSample; q++) 1672 { 1673 cMask[q] &= kill; 1674 } 1675 } 1676 TEX(Vector4s & dst,Float4 & u,Float4 & v,Float4 & s,int sampler,bool project)1677 void PixelPipeline::TEX(Vector4s &dst, Float4 &u, Float4 &v, Float4 &s, int sampler, bool project) 1678 { 1679 dst = sampleTexture(sampler, u, v, s, s, project); 1680 } 1681 TEXLD(Vector4s & dst,Vector4s & src,int sampler,bool project)1682 void PixelPipeline::TEXLD(Vector4s &dst, Vector4s &src, int sampler, bool project) 1683 { 1684 Float4 u = Float4(src.x) * Float4(1.0f / 0x0FFE); 1685 Float4 v = Float4(src.y) * Float4(1.0f / 0x0FFE); 1686 Float4 s = Float4(src.z) * Float4(1.0f / 0x0FFE); 1687 1688 dst = sampleTexture(sampler, u, v, s, s, project); 1689 } 1690 TEXBEM(Vector4s & dst,Vector4s & src,Float4 & u,Float4 & v,Float4 & s,int stage)1691 void PixelPipeline::TEXBEM(Vector4s &dst, Vector4s &src, Float4 &u, Float4 &v, Float4 &s, int stage) 1692 { 1693 Float4 du = Float4(src.x) * Float4(1.0f / 0x0FFE); 1694 Float4 dv = Float4(src.y) * Float4(1.0f / 0x0FFE); 1695 1696 Float4 du2 = du; 1697 Float4 dv2 = dv; 1698 1699 du *= *Pointer<Float4>(data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4F[0][0])); 1700 dv2 *= *Pointer<Float4>(data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4F[1][0])); 1701 du += dv2; 1702 dv *= *Pointer<Float4>(data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4F[1][1])); 1703 du2 *= *Pointer<Float4>(data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4F[0][1])); 1704 dv += du2; 1705 1706 Float4 u_ = u + du; 1707 Float4 v_ = v + dv; 1708 1709 dst = sampleTexture(stage, u_, v_, s, s); 1710 } 1711 TEXBEML(Vector4s & dst,Vector4s & src,Float4 & u,Float4 & v,Float4 & s,int stage)1712 void PixelPipeline::TEXBEML(Vector4s &dst, Vector4s &src, Float4 &u, Float4 &v, Float4 &s, int stage) 1713 { 1714 Float4 du = Float4(src.x) * Float4(1.0f / 0x0FFE); 1715 Float4 dv = Float4(src.y) * Float4(1.0f / 0x0FFE); 1716 1717 Float4 du2 = du; 1718 Float4 dv2 = dv; 1719 1720 du *= *Pointer<Float4>(data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4F[0][0])); 1721 dv2 *= *Pointer<Float4>(data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4F[1][0])); 1722 du += dv2; 1723 dv *= *Pointer<Float4>(data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4F[1][1])); 1724 du2 *= *Pointer<Float4>(data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4F[0][1])); 1725 dv += du2; 1726 1727 Float4 u_ = u + du; 1728 Float4 v_ = v + dv; 1729 1730 dst = sampleTexture(stage, u_, v_, s, s); 1731 1732 Short4 L; 1733 1734 L = src.z; 1735 L = MulHigh(L, *Pointer<Short4>(data + OFFSET(DrawData, textureStage[stage].luminanceScale4))); 1736 L = L << 4; 1737 L = AddSat(L, *Pointer<Short4>(data + OFFSET(DrawData, textureStage[stage].luminanceOffset4))); 1738 L = Max(L, Short4(0x0000)); 1739 L = Min(L, Short4(0x1000)); 1740 1741 dst.x = MulHigh(dst.x, L); dst.x = dst.x << 4; 1742 dst.y = MulHigh(dst.y, L); dst.y = dst.y << 4; 1743 dst.z = MulHigh(dst.z, L); dst.z = dst.z << 4; 1744 } 1745 TEXREG2AR(Vector4s & dst,Vector4s & src0,int stage)1746 void PixelPipeline::TEXREG2AR(Vector4s &dst, Vector4s &src0, int stage) 1747 { 1748 Float4 u = Float4(src0.w) * Float4(1.0f / 0x0FFE); 1749 Float4 v = Float4(src0.x) * Float4(1.0f / 0x0FFE); 1750 Float4 s = Float4(src0.z) * Float4(1.0f / 0x0FFE); 1751 1752 dst = sampleTexture(stage, u, v, s, s); 1753 } 1754 TEXREG2GB(Vector4s & dst,Vector4s & src0,int stage)1755 void PixelPipeline::TEXREG2GB(Vector4s &dst, Vector4s &src0, int stage) 1756 { 1757 Float4 u = Float4(src0.y) * Float4(1.0f / 0x0FFE); 1758 Float4 v = Float4(src0.z) * Float4(1.0f / 0x0FFE); 1759 Float4 s = v; 1760 1761 dst = sampleTexture(stage, u, v, s, s); 1762 } 1763 TEXREG2RGB(Vector4s & dst,Vector4s & src0,int stage)1764 void PixelPipeline::TEXREG2RGB(Vector4s &dst, Vector4s &src0, int stage) 1765 { 1766 Float4 u = Float4(src0.x) * Float4(1.0f / 0x0FFE); 1767 Float4 v = Float4(src0.y) * Float4(1.0f / 0x0FFE); 1768 Float4 s = Float4(src0.z) * Float4(1.0f / 0x0FFE); 1769 1770 dst = sampleTexture(stage, u, v, s, s); 1771 } 1772 TEXM3X2DEPTH(Vector4s & dst,Float4 & u,Float4 & v,Float4 & s,Vector4s & src,bool signedScaling)1773 void PixelPipeline::TEXM3X2DEPTH(Vector4s &dst, Float4 &u, Float4 &v, Float4 &s, Vector4s &src, bool signedScaling) 1774 { 1775 TEXM3X2PAD(u, v, s, src, 1, signedScaling); 1776 1777 // z / w 1778 u_ *= Rcp_pp(v_); // FIXME: Set result to 1.0 when division by zero 1779 1780 oDepth = u_; 1781 } 1782 TEXM3X2PAD(Float4 & u,Float4 & v,Float4 & s,Vector4s & src0,int component,bool signedScaling)1783 void PixelPipeline::TEXM3X2PAD(Float4 &u, Float4 &v, Float4 &s, Vector4s &src0, int component, bool signedScaling) 1784 { 1785 TEXM3X3PAD(u, v, s, src0, component, signedScaling); 1786 } 1787 TEXM3X2TEX(Vector4s & dst,Float4 & u,Float4 & v,Float4 & s,int stage,Vector4s & src0,bool signedScaling)1788 void PixelPipeline::TEXM3X2TEX(Vector4s &dst, Float4 &u, Float4 &v, Float4 &s, int stage, Vector4s &src0, bool signedScaling) 1789 { 1790 TEXM3X2PAD(u, v, s, src0, 1, signedScaling); 1791 1792 w_ = Float4(0.0f); 1793 1794 dst = sampleTexture(stage, u_, v_, w_, w_); 1795 } 1796 TEXM3X3(Vector4s & dst,Float4 & u,Float4 & v,Float4 & s,Vector4s & src0,bool signedScaling)1797 void PixelPipeline::TEXM3X3(Vector4s &dst, Float4 &u, Float4 &v, Float4 &s, Vector4s &src0, bool signedScaling) 1798 { 1799 TEXM3X3PAD(u, v, s, src0, 2, signedScaling); 1800 1801 dst.x = RoundShort4(u_ * Float4(0x1000)); 1802 dst.y = RoundShort4(v_ * Float4(0x1000)); 1803 dst.z = RoundShort4(w_ * Float4(0x1000)); 1804 dst.w = Short4(0x1000); 1805 } 1806 TEXM3X3PAD(Float4 & u,Float4 & v,Float4 & s,Vector4s & src0,int component,bool signedScaling)1807 void PixelPipeline::TEXM3X3PAD(Float4 &u, Float4 &v, Float4 &s, Vector4s &src0, int component, bool signedScaling) 1808 { 1809 if(component == 0 || previousScaling != signedScaling) // FIXME: Other source modifiers? 1810 { 1811 U = Float4(src0.x); 1812 V = Float4(src0.y); 1813 W = Float4(src0.z); 1814 1815 previousScaling = signedScaling; 1816 } 1817 1818 Float4 x = U * u + V * v + W * s; 1819 1820 x *= Float4(1.0f / 0x1000); 1821 1822 switch(component) 1823 { 1824 case 0: u_ = x; break; 1825 case 1: v_ = x; break; 1826 case 2: w_ = x; break; 1827 default: ASSERT(false); 1828 } 1829 } 1830 TEXM3X3SPEC(Vector4s & dst,Float4 & u,Float4 & v,Float4 & s,int stage,Vector4s & src0,Vector4s & src1)1831 void PixelPipeline::TEXM3X3SPEC(Vector4s &dst, Float4 &u, Float4 &v, Float4 &s, int stage, Vector4s &src0, Vector4s &src1) 1832 { 1833 TEXM3X3PAD(u, v, s, src0, 2, false); 1834 1835 Float4 E[3]; // Eye vector 1836 1837 E[0] = Float4(src1.x) * Float4(1.0f / 0x0FFE); 1838 E[1] = Float4(src1.y) * Float4(1.0f / 0x0FFE); 1839 E[2] = Float4(src1.z) * Float4(1.0f / 0x0FFE); 1840 1841 // Reflection 1842 Float4 u__; 1843 Float4 v__; 1844 Float4 w__; 1845 1846 // (u'', v'', w'') = 2 * (N . E) * N - E * (N . N) 1847 u__ = u_ * E[0]; 1848 v__ = v_ * E[1]; 1849 w__ = w_ * E[2]; 1850 u__ += v__ + w__; 1851 u__ += u__; 1852 v__ = u__; 1853 w__ = u__; 1854 u__ *= u_; 1855 v__ *= v_; 1856 w__ *= w_; 1857 u_ *= u_; 1858 v_ *= v_; 1859 w_ *= w_; 1860 u_ += v_ + w_; 1861 u__ -= E[0] * u_; 1862 v__ -= E[1] * u_; 1863 w__ -= E[2] * u_; 1864 1865 dst = sampleTexture(stage, u__, v__, w__, w__); 1866 } 1867 TEXM3X3TEX(Vector4s & dst,Float4 & u,Float4 & v,Float4 & s,int stage,Vector4s & src0,bool signedScaling)1868 void PixelPipeline::TEXM3X3TEX(Vector4s &dst, Float4 &u, Float4 &v, Float4 &s, int stage, Vector4s &src0, bool signedScaling) 1869 { 1870 TEXM3X3PAD(u, v, s, src0, 2, signedScaling); 1871 1872 dst = sampleTexture(stage, u_, v_, w_, w_); 1873 } 1874 TEXM3X3VSPEC(Vector4s & dst,Float4 & x,Float4 & y,Float4 & z,int stage,Vector4s & src0)1875 void PixelPipeline::TEXM3X3VSPEC(Vector4s &dst, Float4 &x, Float4 &y, Float4 &z, int stage, Vector4s &src0) 1876 { 1877 TEXM3X3PAD(x, y, z, src0, 2, false); 1878 1879 Float4 E[3]; // Eye vector 1880 1881 E[0] = v[2 + stage - 2].w; 1882 E[1] = v[2 + stage - 1].w; 1883 E[2] = v[2 + stage - 0].w; 1884 1885 // Reflection 1886 Float4 u__; 1887 Float4 v__; 1888 Float4 w__; 1889 1890 // (u'', v'', w'') = 2 * (N . E) * N - E * (N . N) 1891 u__ = u_ * E[0]; 1892 v__ = v_ * E[1]; 1893 w__ = w_ * E[2]; 1894 u__ += v__ + w__; 1895 u__ += u__; 1896 v__ = u__; 1897 w__ = u__; 1898 u__ *= u_; 1899 v__ *= v_; 1900 w__ *= w_; 1901 u_ *= u_; 1902 v_ *= v_; 1903 w_ *= w_; 1904 u_ += v_ + w_; 1905 u__ -= E[0] * u_; 1906 v__ -= E[1] * u_; 1907 w__ -= E[2] * u_; 1908 1909 dst = sampleTexture(stage, u__, v__, w__, w__); 1910 } 1911 TEXDEPTH()1912 void PixelPipeline::TEXDEPTH() 1913 { 1914 u_ = Float4(rs[5].x); 1915 v_ = Float4(rs[5].y); 1916 1917 // z / w 1918 u_ *= Rcp_pp(v_); // FIXME: Set result to 1.0 when division by zero 1919 1920 oDepth = u_; 1921 } 1922 CND(Vector4s & dst,Vector4s & src0,Vector4s & src1,Vector4s & src2)1923 void PixelPipeline::CND(Vector4s &dst, Vector4s &src0, Vector4s &src1, Vector4s &src2) 1924 { 1925 {Short4 t0; t0 = src0.x; t0 = CmpGT(t0, Short4(0x0800)); Short4 t1; t1 = src1.x; t1 = t1 & t0; t0 = ~t0 & src2.x; t0 = t0 | t1; dst.x = t0; }; 1926 {Short4 t0; t0 = src0.y; t0 = CmpGT(t0, Short4(0x0800)); Short4 t1; t1 = src1.y; t1 = t1 & t0; t0 = ~t0 & src2.y; t0 = t0 | t1; dst.y = t0; }; 1927 {Short4 t0; t0 = src0.z; t0 = CmpGT(t0, Short4(0x0800)); Short4 t1; t1 = src1.z; t1 = t1 & t0; t0 = ~t0 & src2.z; t0 = t0 | t1; dst.z = t0; }; 1928 {Short4 t0; t0 = src0.w; t0 = CmpGT(t0, Short4(0x0800)); Short4 t1; t1 = src1.w; t1 = t1 & t0; t0 = ~t0 & src2.w; t0 = t0 | t1; dst.w = t0; }; 1929 } 1930 CMP(Vector4s & dst,Vector4s & src0,Vector4s & src1,Vector4s & src2)1931 void PixelPipeline::CMP(Vector4s &dst, Vector4s &src0, Vector4s &src1, Vector4s &src2) 1932 { 1933 {Short4 t0 = CmpGT(Short4(0x0000), src0.x); Short4 t1; t1 = src2.x; t1 &= t0; t0 = ~t0 & src1.x; t0 |= t1; dst.x = t0; }; 1934 {Short4 t0 = CmpGT(Short4(0x0000), src0.y); Short4 t1; t1 = src2.y; t1 &= t0; t0 = ~t0 & src1.y; t0 |= t1; dst.y = t0; }; 1935 {Short4 t0 = CmpGT(Short4(0x0000), src0.z); Short4 t1; t1 = src2.z; t1 &= t0; t0 = ~t0 & src1.z; t0 |= t1; dst.z = t0; }; 1936 {Short4 t0 = CmpGT(Short4(0x0000), src0.w); Short4 t1; t1 = src2.w; t1 &= t0; t0 = ~t0 & src1.w; t0 |= t1; dst.w = t0; }; 1937 } 1938 BEM(Vector4s & dst,Vector4s & src0,Vector4s & src1,int stage)1939 void PixelPipeline::BEM(Vector4s &dst, Vector4s &src0, Vector4s &src1, int stage) 1940 { 1941 Short4 t0; 1942 Short4 t1; 1943 1944 // dst.x = src0.x + BUMPENVMAT00(stage) * src1.x + BUMPENVMAT10(stage) * src1.y 1945 t0 = MulHigh(src1.x, *Pointer<Short4>(data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4W[0][0]))); t0 = t0 << 4; // FIXME: Matrix components range? Overflow hazard. 1946 t1 = MulHigh(src1.y, *Pointer<Short4>(data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4W[1][0]))); t1 = t1 << 4; // FIXME: Matrix components range? Overflow hazard. 1947 t0 = AddSat(t0, t1); 1948 t0 = AddSat(t0, src0.x); 1949 dst.x = t0; 1950 1951 // dst.y = src0.y + BUMPENVMAT01(stage) * src1.x + BUMPENVMAT11(stage) * src1.y 1952 t0 = MulHigh(src1.x, *Pointer<Short4>(data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4W[0][1]))); t0 = t0 << 4; // FIXME: Matrix components range? Overflow hazard. 1953 t1 = MulHigh(src1.y, *Pointer<Short4>(data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4W[1][1]))); t1 = t1 << 4; // FIXME: Matrix components range? Overflow hazard. 1954 t0 = AddSat(t0, t1); 1955 t0 = AddSat(t0, src0.y); 1956 dst.y = t0; 1957 } 1958 } 1959 1960