1 // Copyright 2016 The SwiftShader Authors. All Rights Reserved. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #include "VertexPipeline.hpp" 16 17 #include "Vertex.hpp" 18 #include "Renderer.hpp" 19 #include "Debug.hpp" 20 21 #include <string.h> 22 #include <stdlib.h> 23 #include <stdio.h> 24 25 #undef max 26 #undef min 27 28 namespace sw 29 { 30 extern bool secondaryColor; 31 VertexPipeline(const VertexProcessor::State & state)32 VertexPipeline::VertexPipeline(const VertexProcessor::State &state) : VertexRoutine(state, 0) 33 { 34 } 35 ~VertexPipeline()36 VertexPipeline::~VertexPipeline() 37 { 38 } 39 transformBlend(const Register & src,const Pointer<Byte> & matrix,bool homogeneous)40 Vector4f VertexPipeline::transformBlend(const Register &src, const Pointer<Byte> &matrix, bool homogeneous) 41 { 42 Vector4f dst; 43 44 if(state.vertexBlendMatrixCount == 0) 45 { 46 dst = transform(src, matrix, homogeneous); 47 } 48 else 49 { 50 UInt index0[4]; 51 UInt index1[4]; 52 UInt index2[4]; 53 UInt index3[4]; 54 55 if(state.indexedVertexBlendEnable) 56 { 57 for(int i = 0; i < 4; i++) 58 { 59 Float4 B = v[BlendIndices].x; 60 UInt indices; 61 62 switch(i) 63 { 64 case 0: indices = As<UInt>(Float(B.x)); break; 65 case 1: indices = As<UInt>(Float(B.y)); break; 66 case 2: indices = As<UInt>(Float(B.z)); break; 67 case 3: indices = As<UInt>(Float(B.w)); break; 68 } 69 70 index0[i] = (indices & 0x000000FF) << 6; 71 index1[i] = (indices & 0x0000FF00) >> 2; 72 index2[i] = (indices & 0x00FF0000) >> 10; 73 index3[i] = (indices & 0xFF000000) >> 18; 74 } 75 } 76 else 77 { 78 for(int i = 0; i < 4; i++) 79 { 80 index0[i] = 0 * 64; 81 index1[i] = 1 * 64; 82 index2[i] = 2 * 64; 83 index3[i] = 3 * 64; 84 } 85 } 86 87 Float4 weight0; 88 Float4 weight1; 89 Float4 weight2; 90 Float4 weight3; 91 92 switch(state.vertexBlendMatrixCount) 93 { 94 case 4: weight2 = v[BlendWeight].z; 95 case 3: weight1 = v[BlendWeight].y; 96 case 2: weight0 = v[BlendWeight].x; 97 case 1: 98 break; 99 } 100 101 if(state.vertexBlendMatrixCount == 1) 102 { 103 dst = transform(src, matrix, index0, homogeneous); 104 } 105 else if(state.vertexBlendMatrixCount == 2) 106 { 107 weight1 = Float4(1.0f) - weight0; 108 109 Vector4f pos0; 110 Vector4f pos1; 111 112 pos0 = transform(src, matrix, index0, homogeneous); 113 pos1 = transform(src, matrix, index1, homogeneous); 114 115 dst.x = pos0.x * weight0 + pos1.x * weight1; // FIXME: Vector4f operators 116 dst.y = pos0.y * weight0 + pos1.y * weight1; 117 dst.z = pos0.z * weight0 + pos1.z * weight1; 118 dst.w = pos0.w * weight0 + pos1.w * weight1; 119 } 120 else if(state.vertexBlendMatrixCount == 3) 121 { 122 weight2 = Float4(1.0f) - (weight0 + weight1); 123 124 Vector4f pos0; 125 Vector4f pos1; 126 Vector4f pos2; 127 128 pos0 = transform(src, matrix, index0, homogeneous); 129 pos1 = transform(src, matrix, index1, homogeneous); 130 pos2 = transform(src, matrix, index2, homogeneous); 131 132 dst.x = pos0.x * weight0 + pos1.x * weight1 + pos2.x * weight2; 133 dst.y = pos0.y * weight0 + pos1.y * weight1 + pos2.y * weight2; 134 dst.z = pos0.z * weight0 + pos1.z * weight1 + pos2.z * weight2; 135 dst.w = pos0.w * weight0 + pos1.w * weight1 + pos2.w * weight2; 136 } 137 else if(state.vertexBlendMatrixCount == 4) 138 { 139 weight3 = Float4(1.0f) - (weight0 + weight1 + weight2); 140 141 Vector4f pos0; 142 Vector4f pos1; 143 Vector4f pos2; 144 Vector4f pos3; 145 146 pos0 = transform(src, matrix, index0, homogeneous); 147 pos1 = transform(src, matrix, index1, homogeneous); 148 pos2 = transform(src, matrix, index2, homogeneous); 149 pos3 = transform(src, matrix, index3, homogeneous); 150 151 dst.x = pos0.x * weight0 + pos1.x * weight1 + pos2.x * weight2 + pos3.x * weight3; 152 dst.y = pos0.y * weight0 + pos1.y * weight1 + pos2.y * weight2 + pos3.y * weight3; 153 dst.z = pos0.z * weight0 + pos1.z * weight1 + pos2.z * weight2 + pos3.z * weight3; 154 dst.w = pos0.w * weight0 + pos1.w * weight1 + pos2.w * weight2 + pos3.w * weight3; 155 } 156 } 157 158 return dst; 159 } 160 pipeline(UInt & index)161 void VertexPipeline::pipeline(UInt &index) 162 { 163 Vector4f position; 164 Vector4f normal; 165 166 if(!state.preTransformed) 167 { 168 position = transformBlend(v[Position], Pointer<Byte>(data + OFFSET(DrawData,ff.transformT)), true); 169 } 170 else 171 { 172 position = v[PositionT]; 173 } 174 175 o[Pos].x = position.x; 176 o[Pos].y = position.y; 177 o[Pos].z = position.z; 178 o[Pos].w = position.w; 179 180 Vector4f vertexPosition = transformBlend(v[Position], Pointer<Byte>(data + OFFSET(DrawData,ff.cameraTransformT)), true); 181 182 if(state.vertexNormalActive) 183 { 184 normal = transformBlend(v[Normal], Pointer<Byte>(data + OFFSET(DrawData,ff.normalTransformT)), false); 185 186 if(state.normalizeNormals) 187 { 188 normal = normalize(normal); 189 } 190 } 191 192 if(!state.vertexLightingActive) 193 { 194 // FIXME: Don't process if not used at all 195 if(state.diffuseActive && state.input[Color0]) 196 { 197 Vector4f diffuse = v[Color0]; 198 199 o[C0].x = diffuse.x; 200 o[C0].y = diffuse.y; 201 o[C0].z = diffuse.z; 202 o[C0].w = diffuse.w; 203 } 204 else 205 { 206 o[C0].x = Float4(1.0f); 207 o[C0].y = Float4(1.0f); 208 o[C0].z = Float4(1.0f); 209 o[C0].w = Float4(1.0f); 210 } 211 212 // FIXME: Don't process if not used at all 213 if(state.specularActive && state.input[Color1]) 214 { 215 Vector4f specular = v[Color1]; 216 217 o[C1].x = specular.x; 218 o[C1].y = specular.y; 219 o[C1].z = specular.z; 220 o[C1].w = specular.w; 221 } 222 else 223 { 224 o[C1].x = Float4(0.0f); 225 o[C1].y = Float4(0.0f); 226 o[C1].z = Float4(0.0f); 227 o[C1].w = Float4(1.0f); 228 } 229 } 230 else 231 { 232 o[C0].x = Float4(0.0f); 233 o[C0].y = Float4(0.0f); 234 o[C0].z = Float4(0.0f); 235 o[C0].w = Float4(0.0f); 236 237 o[C1].x = Float4(0.0f); 238 o[C1].y = Float4(0.0f); 239 o[C1].z = Float4(0.0f); 240 o[C1].w = Float4(0.0f); 241 242 Vector4f ambient; 243 Float4 globalAmbient = *Pointer<Float4>(data + OFFSET(DrawData,ff.globalAmbient)); // FIXME: Unpack 244 245 ambient.x = globalAmbient.x; 246 ambient.y = globalAmbient.y; 247 ambient.z = globalAmbient.z; 248 249 for(int i = 0; i < 8; i++) 250 { 251 if(!(state.vertexLightActive & (1 << i))) 252 { 253 continue; 254 } 255 256 Vector4f L; // Light vector 257 Float4 att; // Attenuation 258 259 // Attenuation 260 { 261 Float4 d; // Distance 262 263 L.x = L.y = L.z = *Pointer<Float4>(data + OFFSET(DrawData,ff.lightPosition[i])); // FIXME: Unpack 264 L.x = L.x.xxxx; 265 L.y = L.y.yyyy; 266 L.z = L.z.zzzz; 267 268 L.x -= vertexPosition.x; 269 L.y -= vertexPosition.y; 270 L.z -= vertexPosition.z; 271 d = dot3(L, L); 272 d = RcpSqrt_pp(d); // FIXME: Sufficient precision? 273 L.x *= d; 274 L.y *= d; 275 L.z *= d; 276 d = Rcp_pp(d); // FIXME: Sufficient precision? 277 278 Float4 q = *Pointer<Float4>(data + OFFSET(DrawData,ff.attenuationQuadratic[i])); 279 Float4 l = *Pointer<Float4>(data + OFFSET(DrawData,ff.attenuationLinear[i])); 280 Float4 c = *Pointer<Float4>(data + OFFSET(DrawData,ff.attenuationConstant[i])); 281 282 att = Rcp_pp((q * d + l) * d + c); 283 } 284 285 // Ambient per light 286 { 287 Float4 lightAmbient = *Pointer<Float4>(data + OFFSET(DrawData,ff.lightAmbient[i])); // FIXME: Unpack 288 289 ambient.x = ambient.x + lightAmbient.x * att; 290 ambient.y = ambient.y + lightAmbient.y * att; 291 ambient.z = ambient.z + lightAmbient.z * att; 292 } 293 294 // Diffuse 295 if(state.vertexNormalActive) 296 { 297 Float4 dot; 298 299 dot = dot3(L, normal); 300 dot = Max(dot, Float4(0.0f)); 301 dot *= att; 302 303 Vector4f diff; 304 305 if(state.vertexDiffuseMaterialSourceActive == MATERIAL_MATERIAL) 306 { 307 diff.x = diff.y = diff.z = *Pointer<Float4>(data + OFFSET(DrawData,ff.materialDiffuse)); // FIXME: Unpack 308 diff.x = diff.x.xxxx; 309 diff.y = diff.y.yyyy; 310 diff.z = diff.z.zzzz; 311 } 312 else if(state.vertexDiffuseMaterialSourceActive == MATERIAL_COLOR1) 313 { 314 diff = v[Color0]; 315 } 316 else if(state.vertexDiffuseMaterialSourceActive == MATERIAL_COLOR2) 317 { 318 diff = v[Color1]; 319 } 320 else ASSERT(false); 321 322 Float4 lightDiffuse = *Pointer<Float4>(data + OFFSET(DrawData,ff.lightDiffuse[i])); 323 324 o[C0].x = o[C0].x + diff.x * dot * lightDiffuse.x; // FIXME: Clamp first? 325 o[C0].y = o[C0].y + diff.y * dot * lightDiffuse.y; // FIXME: Clamp first? 326 o[C0].z = o[C0].z + diff.z * dot * lightDiffuse.z; // FIXME: Clamp first? 327 } 328 329 // Specular 330 if(state.vertexSpecularActive) 331 { 332 Vector4f S; 333 Vector4f C; // Camera vector 334 Float4 pow; 335 336 pow = *Pointer<Float>(data + OFFSET(DrawData,ff.materialShininess)); 337 338 S.x = Float4(0.0f) - vertexPosition.x; 339 S.y = Float4(0.0f) - vertexPosition.y; 340 S.z = Float4(0.0f) - vertexPosition.z; 341 C = normalize(S); 342 343 S.x = L.x + C.x; 344 S.y = L.y + C.y; 345 S.z = L.z + C.z; 346 C = normalize(S); 347 348 Float4 dot = Max(dot3(C, normal), Float4(0.0f)); // FIXME: max(dot3(C, normal), 0) 349 350 Float4 P = power(dot, pow); 351 P *= att; 352 353 Vector4f spec; 354 355 if(state.vertexSpecularMaterialSourceActive == MATERIAL_MATERIAL) 356 { 357 Float4 materialSpecular = *Pointer<Float4>(data + OFFSET(DrawData,ff.materialSpecular)); // FIXME: Unpack 358 359 spec.x = materialSpecular.x; 360 spec.y = materialSpecular.y; 361 spec.z = materialSpecular.z; 362 } 363 else if(state.vertexSpecularMaterialSourceActive == MATERIAL_COLOR1) 364 { 365 spec = v[Color0]; 366 } 367 else if(state.vertexSpecularMaterialSourceActive == MATERIAL_COLOR2) 368 { 369 spec = v[Color1]; 370 } 371 else ASSERT(false); 372 373 Float4 lightSpecular = *Pointer<Float4>(data + OFFSET(DrawData,ff.lightSpecular[i])); 374 375 spec.x *= lightSpecular.x; 376 spec.y *= lightSpecular.y; 377 spec.z *= lightSpecular.z; 378 379 spec.x *= P; 380 spec.y *= P; 381 spec.z *= P; 382 383 spec.x = Max(spec.x, Float4(0.0f)); 384 spec.y = Max(spec.y, Float4(0.0f)); 385 spec.z = Max(spec.z, Float4(0.0f)); 386 387 if(secondaryColor) 388 { 389 o[C1].x = o[C1].x + spec.x; 390 o[C1].y = o[C1].y + spec.y; 391 o[C1].z = o[C1].z + spec.z; 392 } 393 else 394 { 395 o[C0].x = o[C0].x + spec.x; 396 o[C0].y = o[C0].y + spec.y; 397 o[C0].z = o[C0].z + spec.z; 398 } 399 } 400 } 401 402 if(state.vertexAmbientMaterialSourceActive == MATERIAL_MATERIAL) 403 { 404 Float4 materialAmbient = *Pointer<Float4>(data + OFFSET(DrawData,ff.materialAmbient)); // FIXME: Unpack 405 406 ambient.x = ambient.x * materialAmbient.x; 407 ambient.y = ambient.y * materialAmbient.y; 408 ambient.z = ambient.z * materialAmbient.z; 409 } 410 else if(state.vertexAmbientMaterialSourceActive == MATERIAL_COLOR1) 411 { 412 Vector4f materialDiffuse = v[Color0]; 413 414 ambient.x = ambient.x * materialDiffuse.x; 415 ambient.y = ambient.y * materialDiffuse.y; 416 ambient.z = ambient.z * materialDiffuse.z; 417 } 418 else if(state.vertexAmbientMaterialSourceActive == MATERIAL_COLOR2) 419 { 420 Vector4f materialSpecular = v[Color1]; 421 422 ambient.x = ambient.x * materialSpecular.x; 423 ambient.y = ambient.y * materialSpecular.y; 424 ambient.z = ambient.z * materialSpecular.z; 425 } 426 else ASSERT(false); 427 428 o[C0].x = o[C0].x + ambient.x; 429 o[C0].y = o[C0].y + ambient.y; 430 o[C0].z = o[C0].z + ambient.z; 431 432 // Emissive 433 if(state.vertexEmissiveMaterialSourceActive == MATERIAL_MATERIAL) 434 { 435 Float4 materialEmission = *Pointer<Float4>(data + OFFSET(DrawData,ff.materialEmission)); // FIXME: Unpack 436 437 o[C0].x = o[C0].x + materialEmission.x; 438 o[C0].y = o[C0].y + materialEmission.y; 439 o[C0].z = o[C0].z + materialEmission.z; 440 } 441 else if(state.vertexEmissiveMaterialSourceActive == MATERIAL_COLOR1) 442 { 443 Vector4f materialSpecular = v[Color0]; 444 445 o[C0].x = o[C0].x + materialSpecular.x; 446 o[C0].y = o[C0].y + materialSpecular.y; 447 o[C0].z = o[C0].z + materialSpecular.z; 448 } 449 else if(state.vertexEmissiveMaterialSourceActive == MATERIAL_COLOR2) 450 { 451 Vector4f materialSpecular = v[Color1]; 452 453 o[C0].x = o[C0].x + materialSpecular.x; 454 o[C0].y = o[C0].y + materialSpecular.y; 455 o[C0].z = o[C0].z + materialSpecular.z; 456 } 457 else ASSERT(false); 458 459 // Diffuse alpha component 460 if(state.vertexDiffuseMaterialSourceActive == MATERIAL_MATERIAL) 461 { 462 o[C0].w = Float4(*Pointer<Float4>(data + OFFSET(DrawData,ff.materialDiffuse[0]))).wwww; // FIXME: Unpack 463 } 464 else if(state.vertexDiffuseMaterialSourceActive == MATERIAL_COLOR1) 465 { 466 Vector4f alpha = v[Color0]; 467 o[C0].w = alpha.w; 468 } 469 else if(state.vertexDiffuseMaterialSourceActive == MATERIAL_COLOR2) 470 { 471 Vector4f alpha = v[Color1]; 472 o[C0].w = alpha.w; 473 } 474 else ASSERT(false); 475 476 if(state.vertexSpecularActive) 477 { 478 // Specular alpha component 479 if(state.vertexSpecularMaterialSourceActive == MATERIAL_MATERIAL) 480 { 481 o[C1].w = Float4(*Pointer<Float4>(data + OFFSET(DrawData,ff.materialSpecular[3]))).wwww; // FIXME: Unpack 482 } 483 else if(state.vertexSpecularMaterialSourceActive == MATERIAL_COLOR1) 484 { 485 Vector4f alpha = v[Color0]; 486 o[C1].w = alpha.w; 487 } 488 else if(state.vertexSpecularMaterialSourceActive == MATERIAL_COLOR2) 489 { 490 Vector4f alpha = v[Color1]; 491 o[C1].w = alpha.w; 492 } 493 else ASSERT(false); 494 } 495 } 496 497 if(state.fogActive) 498 { 499 Float4 f; 500 501 if(!state.rangeFogActive) 502 { 503 f = Abs(vertexPosition.z); 504 } 505 else 506 { 507 f = Sqrt(dot3(vertexPosition, vertexPosition)); // FIXME: f = length(vertexPosition); 508 } 509 510 switch(state.vertexFogMode) 511 { 512 case FOG_NONE: 513 if(state.specularActive) 514 { 515 o[Fog].x = o[C1].w; 516 } 517 else 518 { 519 o[Fog].x = Float4(0.0f); 520 } 521 break; 522 case FOG_LINEAR: 523 o[Fog].x = f * *Pointer<Float4>(data + OFFSET(DrawData,fog.scale)) + *Pointer<Float4>(data + OFFSET(DrawData,fog.offset)); 524 break; 525 case FOG_EXP: 526 o[Fog].x = exponential2(f * *Pointer<Float4>(data + OFFSET(DrawData,fog.densityE)), true); 527 break; 528 case FOG_EXP2: 529 o[Fog].x = exponential2((f * f) * *Pointer<Float4>(data + OFFSET(DrawData,fog.density2E)), true); 530 break; 531 default: 532 ASSERT(false); 533 } 534 } 535 536 for(int stage = 0; stage < 8; stage++) 537 { 538 processTextureCoordinate(stage, normal, position); 539 } 540 541 processPointSize(); 542 } 543 processTextureCoordinate(int stage,Vector4f & normal,Vector4f & position)544 void VertexPipeline::processTextureCoordinate(int stage, Vector4f &normal, Vector4f &position) 545 { 546 if(state.output[T0 + stage].write) 547 { 548 int i = state.textureState[stage].texCoordIndexActive; 549 550 switch(state.textureState[stage].texGenActive) 551 { 552 case TEXGEN_NONE: 553 { 554 Vector4f &&varying = v[TexCoord0 + i]; 555 556 o[T0 + stage].x = varying.x; 557 o[T0 + stage].y = varying.y; 558 o[T0 + stage].z = varying.z; 559 o[T0 + stage].w = varying.w; 560 } 561 break; 562 case TEXGEN_PASSTHRU: 563 { 564 Vector4f &&varying = v[TexCoord0 + i]; 565 566 o[T0 + stage].x = varying.x; 567 o[T0 + stage].y = varying.y; 568 o[T0 + stage].z = varying.z; 569 o[T0 + stage].w = varying.w; 570 571 if(state.input[TexCoord0 + i]) 572 { 573 switch(state.input[TexCoord0 + i].count) 574 { 575 case 1: 576 o[T0 + stage].y = Float4(1.0f); 577 o[T0 + stage].z = Float4(0.0f); 578 o[T0 + stage].w = Float4(0.0f); 579 break; 580 case 2: 581 o[T0 + stage].z = Float4(1.0f); 582 o[T0 + stage].w = Float4(0.0f); 583 break; 584 case 3: 585 o[T0 + stage].w = Float4(1.0f); 586 break; 587 case 4: 588 break; 589 default: 590 ASSERT(false); 591 } 592 } 593 } 594 break; 595 case TEXGEN_NORMAL: 596 { 597 Vector4f Nc; // Normal vector in camera space 598 599 if(state.vertexNormalActive) 600 { 601 Nc = normal; 602 } 603 else 604 { 605 Nc.x = Float4(0.0f); 606 Nc.y = Float4(0.0f); 607 Nc.z = Float4(0.0f); 608 } 609 610 Nc.w = Float4(1.0f); 611 612 o[T0 + stage].x = Nc.x; 613 o[T0 + stage].y = Nc.y; 614 o[T0 + stage].z = Nc.z; 615 o[T0 + stage].w = Nc.w; 616 } 617 break; 618 case TEXGEN_POSITION: 619 { 620 Vector4f Pn = transformBlend(v[Position], Pointer<Byte>(data + OFFSET(DrawData,ff.cameraTransformT)), true); // Position in camera space 621 622 Pn.w = Float4(1.0f); 623 624 o[T0 + stage].x = Pn.x; 625 o[T0 + stage].y = Pn.y; 626 o[T0 + stage].z = Pn.z; 627 o[T0 + stage].w = Pn.w; 628 } 629 break; 630 case TEXGEN_REFLECTION: 631 { 632 Vector4f R; // Reflection vector 633 634 if(state.vertexNormalActive) 635 { 636 Vector4f Nc; // Normal vector in camera space 637 638 Nc = normal; 639 640 if(state.localViewerActive) 641 { 642 Vector4f Ec; // Eye vector in camera space 643 Vector4f N2; 644 645 Ec = transformBlend(v[Position], Pointer<Byte>(data + OFFSET(DrawData,ff.cameraTransformT)), true); 646 Ec = normalize(Ec); 647 648 // R = E - 2 * N * (E . N) 649 Float4 dot = Float4(2.0f) * dot3(Ec, Nc); 650 651 R.x = Ec.x - Nc.x * dot; 652 R.y = Ec.y - Nc.y * dot; 653 R.z = Ec.z - Nc.z * dot; 654 } 655 else 656 { 657 // u = -2 * Nz * Nx 658 // v = -2 * Nz * Ny 659 // w = 1 - 2 * Nz * Nz 660 661 R.x = -Float4(2.0f) * Nc.z * Nc.x; 662 R.y = -Float4(2.0f) * Nc.z * Nc.y; 663 R.z = Float4(1.0f) - Float4(2.0f) * Nc.z * Nc.z; 664 } 665 } 666 else 667 { 668 R.x = Float4(0.0f); 669 R.y = Float4(0.0f); 670 R.z = Float4(0.0f); 671 } 672 673 R.w = Float4(1.0f); 674 675 o[T0 + stage].x = R.x; 676 o[T0 + stage].y = R.y; 677 o[T0 + stage].z = R.z; 678 o[T0 + stage].w = R.w; 679 } 680 break; 681 case TEXGEN_SPHEREMAP: 682 { 683 Vector4f R; // Reflection vector 684 685 if(state.vertexNormalActive) 686 { 687 Vector4f Nc; // Normal vector in camera space 688 689 Nc = normal; 690 691 if(state.localViewerActive) 692 { 693 Vector4f Ec; // Eye vector in camera space 694 Vector4f N2; 695 696 Ec = transformBlend(v[Position], Pointer<Byte>(data + OFFSET(DrawData,ff.cameraTransformT)), true); 697 Ec = normalize(Ec); 698 699 // R = E - 2 * N * (E . N) 700 Float4 dot = Float4(2.0f) * dot3(Ec, Nc); 701 702 R.x = Ec.x - Nc.x * dot; 703 R.y = Ec.y - Nc.y * dot; 704 R.z = Ec.z - Nc.z * dot; 705 } 706 else 707 { 708 // u = -2 * Nz * Nx 709 // v = -2 * Nz * Ny 710 // w = 1 - 2 * Nz * Nz 711 712 R.x = -Float4(2.0f) * Nc.z * Nc.x; 713 R.y = -Float4(2.0f) * Nc.z * Nc.y; 714 R.z = Float4(1.0f) - Float4(2.0f) * Nc.z * Nc.z; 715 } 716 } 717 else 718 { 719 R.x = Float4(0.0f); 720 R.y = Float4(0.0f); 721 R.z = Float4(0.0f); 722 } 723 724 R.z -= Float4(1.0f); 725 R = normalize(R); 726 R.x = Float4(0.5f) * R.x + Float4(0.5f); 727 R.y = Float4(0.5f) * R.y + Float4(0.5f); 728 729 R.z = Float4(1.0f); 730 R.w = Float4(0.0f); 731 732 o[T0 + stage].x = R.x; 733 o[T0 + stage].y = R.y; 734 o[T0 + stage].z = R.z; 735 o[T0 + stage].w = R.w; 736 } 737 break; 738 default: 739 ASSERT(false); 740 } 741 742 Vector4f texTrans0; 743 Vector4f texTrans1; 744 Vector4f texTrans2; 745 Vector4f texTrans3; 746 747 Vector4f T; 748 Vector4f t; 749 750 T.x = o[T0 + stage].x; 751 T.y = o[T0 + stage].y; 752 T.z = o[T0 + stage].z; 753 T.w = o[T0 + stage].w; 754 755 switch(state.textureState[stage].textureTransformCountActive) 756 { 757 case 4: 758 texTrans3.x = texTrans3.y = texTrans3.z = texTrans3.w = *Pointer<Float4>(data + OFFSET(DrawData,ff.textureTransform[stage][3])); // FIXME: Unpack 759 texTrans3.x = texTrans3.x.xxxx; 760 texTrans3.y = texTrans3.y.yyyy; 761 texTrans3.z = texTrans3.z.zzzz; 762 texTrans3.w = texTrans3.w.wwww; 763 t.w = dot4(T, texTrans3); 764 case 3: 765 texTrans2.x = texTrans2.y = texTrans2.z = texTrans2.w = *Pointer<Float4>(data + OFFSET(DrawData,ff.textureTransform[stage][2])); // FIXME: Unpack 766 texTrans2.x = texTrans2.x.xxxx; 767 texTrans2.y = texTrans2.y.yyyy; 768 texTrans2.z = texTrans2.z.zzzz; 769 texTrans2.w = texTrans2.w.wwww; 770 t.z = dot4(T, texTrans2); 771 case 2: 772 texTrans1.x = texTrans1.y = texTrans1.z = texTrans1.w = *Pointer<Float4>(data + OFFSET(DrawData,ff.textureTransform[stage][1])); // FIXME: Unpack 773 texTrans1.x = texTrans1.x.xxxx; 774 texTrans1.y = texTrans1.y.yyyy; 775 texTrans1.z = texTrans1.z.zzzz; 776 texTrans1.w = texTrans1.w.wwww; 777 t.y = dot4(T, texTrans1); 778 case 1: 779 texTrans0.x = texTrans0.y = texTrans0.z = texTrans0.w = *Pointer<Float4>(data + OFFSET(DrawData,ff.textureTransform[stage][0])); // FIXME: Unpack 780 texTrans0.x = texTrans0.x.xxxx; 781 texTrans0.y = texTrans0.y.yyyy; 782 texTrans0.z = texTrans0.z.zzzz; 783 texTrans0.w = texTrans0.w.wwww; 784 t.x = dot4(T, texTrans0); 785 786 o[T0 + stage].x = t.x; 787 o[T0 + stage].y = t.y; 788 o[T0 + stage].z = t.z; 789 o[T0 + stage].w = t.w; 790 case 0: 791 break; 792 default: 793 ASSERT(false); 794 } 795 } 796 } 797 processPointSize()798 void VertexPipeline::processPointSize() 799 { 800 if(!state.pointSizeActive) 801 { 802 return; // Use global pointsize 803 } 804 805 if(state.input[PointSize]) 806 { 807 o[Pts].y = v[PointSize].x; 808 } 809 else 810 { 811 o[Pts].y = *Pointer<Float4>(data + OFFSET(DrawData,point.pointSize)); 812 } 813 814 if(state.pointScaleActive && !state.preTransformed) 815 { 816 Vector4f p = transformBlend(v[Position], Pointer<Byte>(data + OFFSET(DrawData,ff.cameraTransformT)), true); 817 818 Float4 d = Sqrt(dot3(p, p)); // FIXME: length(p); 819 820 Float4 A = *Pointer<Float>(data + OFFSET(DrawData,point.pointScaleA)); // FIXME: Unpack 821 Float4 B = *Pointer<Float>(data + OFFSET(DrawData,point.pointScaleB)); // FIXME: Unpack 822 Float4 C = *Pointer<Float>(data + OFFSET(DrawData,point.pointScaleC)); // FIXME: Unpack 823 824 A = RcpSqrt_pp(A + d * (B + d * C)); 825 826 o[Pts].y = o[Pts].y * Float4(*Pointer<Float>(data + OFFSET(DrawData,viewportHeight))) * A; // FIXME: Unpack 827 } 828 } 829 transform(const Register & src,const Pointer<Byte> & matrix,bool homogeneous)830 Vector4f VertexPipeline::transform(const Register &src, const Pointer<Byte> &matrix, bool homogeneous) 831 { 832 Vector4f dst; 833 834 if(homogeneous) 835 { 836 Float4 m[4][4]; 837 838 for(int j = 0; j < 4; j++) 839 { 840 for(int i = 0; i < 4; i++) 841 { 842 m[j][i].x = *Pointer<Float>(matrix + 16 * i + 4 * j); 843 m[j][i].y = *Pointer<Float>(matrix + 16 * i + 4 * j); 844 m[j][i].z = *Pointer<Float>(matrix + 16 * i + 4 * j); 845 m[j][i].w = *Pointer<Float>(matrix + 16 * i + 4 * j); 846 } 847 } 848 849 dst.x = src.x * m[0][0] + src.y * m[0][1] + src.z * m[0][2] + src.w * m[0][3]; 850 dst.y = src.x * m[1][0] + src.y * m[1][1] + src.z * m[1][2] + src.w * m[1][3]; 851 dst.z = src.x * m[2][0] + src.y * m[2][1] + src.z * m[2][2] + src.w * m[2][3]; 852 dst.w = src.x * m[3][0] + src.y * m[3][1] + src.z * m[3][2] + src.w * m[3][3]; 853 } 854 else 855 { 856 Float4 m[3][3]; 857 858 for(int j = 0; j < 3; j++) 859 { 860 for(int i = 0; i < 3; i++) 861 { 862 m[j][i].x = *Pointer<Float>(matrix + 16 * i + 4 * j); 863 m[j][i].y = *Pointer<Float>(matrix + 16 * i + 4 * j); 864 m[j][i].z = *Pointer<Float>(matrix + 16 * i + 4 * j); 865 m[j][i].w = *Pointer<Float>(matrix + 16 * i + 4 * j); 866 } 867 } 868 869 dst.x = src.x * m[0][0] + src.y * m[0][1] + src.z * m[0][2]; 870 dst.y = src.x * m[1][0] + src.y * m[1][1] + src.z * m[1][2]; 871 dst.z = src.x * m[2][0] + src.y * m[2][1] + src.z * m[2][2]; 872 } 873 874 return dst; 875 } 876 transform(const Register & src,const Pointer<Byte> & matrix,UInt index[4],bool homogeneous)877 Vector4f VertexPipeline::transform(const Register &src, const Pointer<Byte> &matrix, UInt index[4], bool homogeneous) 878 { 879 Vector4f dst; 880 881 if(homogeneous) 882 { 883 Float4 m[4][4]; 884 885 for(int j = 0; j < 4; j++) 886 { 887 for(int i = 0; i < 4; i++) 888 { 889 m[j][i].x = *Pointer<Float>(matrix + 16 * i + 4 * j + index[0]); 890 m[j][i].y = *Pointer<Float>(matrix + 16 * i + 4 * j + index[1]); 891 m[j][i].z = *Pointer<Float>(matrix + 16 * i + 4 * j + index[2]); 892 m[j][i].w = *Pointer<Float>(matrix + 16 * i + 4 * j + index[3]); 893 } 894 } 895 896 dst.x = src.x * m[0][0] + src.y * m[0][1] + src.z * m[0][2] + m[0][3]; 897 dst.y = src.x * m[1][0] + src.y * m[1][1] + src.z * m[1][2] + m[1][3]; 898 dst.z = src.x * m[2][0] + src.y * m[2][1] + src.z * m[2][2] + m[2][3]; 899 dst.w = src.x * m[3][0] + src.y * m[3][1] + src.z * m[3][2] + m[3][3]; 900 } 901 else 902 { 903 Float4 m[3][3]; 904 905 for(int j = 0; j < 3; j++) 906 { 907 for(int i = 0; i < 3; i++) 908 { 909 m[j][i].x = *Pointer<Float>(matrix + 16 * i + 4 * j + index[0]); 910 m[j][i].y = *Pointer<Float>(matrix + 16 * i + 4 * j + index[1]); 911 m[j][i].z = *Pointer<Float>(matrix + 16 * i + 4 * j + index[2]); 912 m[j][i].w = *Pointer<Float>(matrix + 16 * i + 4 * j + index[3]); 913 } 914 } 915 916 dst.x = src.x * m[0][0] + src.y * m[0][1] + src.z * m[0][2]; 917 dst.y = src.x * m[1][0] + src.y * m[1][1] + src.z * m[1][2]; 918 dst.z = src.x * m[2][0] + src.y * m[2][1] + src.z * m[2][2]; 919 } 920 921 return dst; 922 } 923 normalize(Vector4f & src)924 Vector4f VertexPipeline::normalize(Vector4f &src) 925 { 926 Vector4f dst; 927 928 Float4 rcpLength = RcpSqrt_pp(dot3(src, src)); 929 930 dst.x = src.x * rcpLength; 931 dst.y = src.y * rcpLength; 932 dst.z = src.z * rcpLength; 933 934 return dst; 935 } 936 power(Float4 & src0,Float4 & src1)937 Float4 VertexPipeline::power(Float4 &src0, Float4 &src1) 938 { 939 Float4 dst = src0; 940 941 dst = dst * dst; 942 dst = dst * dst; 943 dst = Float4(As<Int4>(dst) - As<Int4>(Float4(1.0f))); 944 945 dst *= src1; 946 947 dst = As<Float4>(Int4(dst) + As<Int4>(Float4(1.0f))); 948 dst = RcpSqrt_pp(dst); 949 dst = RcpSqrt_pp(dst); 950 951 return dst; 952 } 953 } 954