• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2016 The SwiftShader Authors. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //    http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "VertexPipeline.hpp"
16 
17 #include "Renderer/Vertex.hpp"
18 #include "Renderer/Renderer.hpp"
19 #include "Common/Debug.hpp"
20 
21 #include <string.h>
22 #include <stdlib.h>
23 #include <stdio.h>
24 
25 #undef max
26 #undef min
27 
28 namespace sw
29 {
30 	extern bool secondaryColor;
31 
VertexPipeline(const VertexProcessor::State & state)32 	VertexPipeline::VertexPipeline(const VertexProcessor::State &state) : VertexRoutine(state, 0)
33 	{
34 	}
35 
~VertexPipeline()36 	VertexPipeline::~VertexPipeline()
37 	{
38 	}
39 
transformBlend(const Register & src,const Pointer<Byte> & matrix,bool homogeneous)40 	Vector4f VertexPipeline::transformBlend(const Register &src, const Pointer<Byte> &matrix, bool homogeneous)
41 	{
42 		Vector4f dst;
43 
44 		if(state.vertexBlendMatrixCount == 0)
45 		{
46 			dst = transform(src, matrix, homogeneous);
47 		}
48 		else
49 		{
50 			UInt index0[4];
51 			UInt index1[4];
52 			UInt index2[4];
53 			UInt index3[4];
54 
55 			if(state.indexedVertexBlendEnable)
56 			{
57 				for(int i = 0; i < 4; i++)
58 				{
59 					Float4 B = v[BlendIndices].x;
60 					UInt indices;
61 
62 					switch(i)
63 					{
64 					case 0: indices = As<UInt>(Float(B.x)); break;
65 					case 1: indices = As<UInt>(Float(B.y)); break;
66 					case 2: indices = As<UInt>(Float(B.z)); break;
67 					case 3: indices = As<UInt>(Float(B.w)); break;
68 					}
69 
70 					index0[i] = (indices & 0x000000FF) << 6;
71 					index1[i] = (indices & 0x0000FF00) >> 2;
72 					index2[i] = (indices & 0x00FF0000) >> 10;
73 					index3[i] = (indices & 0xFF000000) >> 18;
74 				}
75 			}
76 			else
77 			{
78 				for(int i = 0; i < 4; i++)
79 				{
80 					index0[i] = 0 * 64;
81 					index1[i] = 1 * 64;
82 					index2[i] = 2 * 64;
83 					index3[i] = 3 * 64;
84 				}
85 			}
86 
87 			Float4 weight0;
88 			Float4 weight1;
89 			Float4 weight2;
90 			Float4 weight3;
91 
92 			switch(state.vertexBlendMatrixCount)
93 			{
94 			case 4: weight2 = v[BlendWeight].z;
95 			case 3: weight1 = v[BlendWeight].y;
96 			case 2: weight0 = v[BlendWeight].x;
97 			case 1:
98 				break;
99 			}
100 
101 			if(state.vertexBlendMatrixCount == 1)
102 			{
103 				dst = transform(src, matrix, index0, homogeneous);
104 			}
105 			else if(state.vertexBlendMatrixCount == 2)
106 			{
107 				weight1 = Float4(1.0f) - weight0;
108 
109 				Vector4f pos0;
110 				Vector4f pos1;
111 
112 				pos0 = transform(src, matrix, index0, homogeneous);
113 				pos1 = transform(src, matrix, index1, homogeneous);
114 
115 				dst.x = pos0.x * weight0 + pos1.x * weight1;   // FIXME: Vector4f operators
116 				dst.y = pos0.y * weight0 + pos1.y * weight1;
117 				dst.z = pos0.z * weight0 + pos1.z * weight1;
118 				dst.w = pos0.w * weight0 + pos1.w * weight1;
119 			}
120 			else if(state.vertexBlendMatrixCount == 3)
121 			{
122 				weight2 = Float4(1.0f) - (weight0 + weight1);
123 
124 				Vector4f pos0;
125 				Vector4f pos1;
126 				Vector4f pos2;
127 
128 				pos0 = transform(src, matrix, index0, homogeneous);
129 				pos1 = transform(src, matrix, index1, homogeneous);
130 				pos2 = transform(src, matrix, index2, homogeneous);
131 
132 				dst.x = pos0.x * weight0 + pos1.x * weight1 + pos2.x * weight2;
133 				dst.y = pos0.y * weight0 + pos1.y * weight1 + pos2.y * weight2;
134 				dst.z = pos0.z * weight0 + pos1.z * weight1 + pos2.z * weight2;
135 				dst.w = pos0.w * weight0 + pos1.w * weight1 + pos2.w * weight2;
136 			}
137 			else if(state.vertexBlendMatrixCount == 4)
138 			{
139 				weight3 = Float4(1.0f) - (weight0 + weight1 + weight2);
140 
141 				Vector4f pos0;
142 				Vector4f pos1;
143 				Vector4f pos2;
144 				Vector4f pos3;
145 
146 				pos0 = transform(src, matrix, index0, homogeneous);
147 				pos1 = transform(src, matrix, index1, homogeneous);
148 				pos2 = transform(src, matrix, index2, homogeneous);
149 				pos3 = transform(src, matrix, index3, homogeneous);
150 
151 				dst.x = pos0.x * weight0 + pos1.x * weight1 + pos2.x * weight2 + pos3.x * weight3;
152 				dst.y = pos0.y * weight0 + pos1.y * weight1 + pos2.y * weight2 + pos3.y * weight3;
153 				dst.z = pos0.z * weight0 + pos1.z * weight1 + pos2.z * weight2 + pos3.z * weight3;
154 				dst.w = pos0.w * weight0 + pos1.w * weight1 + pos2.w * weight2 + pos3.w * weight3;
155 			}
156 		}
157 
158 		return dst;
159 	}
160 
pipeline(UInt & index)161 	void VertexPipeline::pipeline(UInt &index)
162 	{
163 		Vector4f position;
164 		Vector4f normal;
165 
166 		if(!state.preTransformed)
167 		{
168 			position = transformBlend(v[Position], Pointer<Byte>(data + OFFSET(DrawData,ff.transformT)), true);
169 		}
170 		else
171 		{
172 			position = v[PositionT];
173 		}
174 
175 		o[Pos].x = position.x;
176 		o[Pos].y = position.y;
177 		o[Pos].z = position.z;
178 		o[Pos].w = position.w;
179 
180 		Vector4f vertexPosition = transformBlend(v[Position], Pointer<Byte>(data + OFFSET(DrawData,ff.cameraTransformT)), true);
181 
182 		if(state.vertexNormalActive)
183 		{
184 			normal = transformBlend(v[Normal], Pointer<Byte>(data + OFFSET(DrawData,ff.normalTransformT)), false);
185 
186 			if(state.normalizeNormals)
187 			{
188 				normal = normalize(normal);
189 			}
190 		}
191 
192 		if(!state.vertexLightingActive)
193 		{
194 			// FIXME: Don't process if not used at all
195 			if(state.diffuseActive && state.input[Color0])
196 			{
197 				Vector4f diffuse = v[Color0];
198 
199 				o[C0].x = diffuse.x;
200 				o[C0].y = diffuse.y;
201 				o[C0].z = diffuse.z;
202 				o[C0].w = diffuse.w;
203 			}
204 			else
205 			{
206 				o[C0].x = Float4(1.0f);
207 				o[C0].y = Float4(1.0f);
208 				o[C0].z = Float4(1.0f);
209 				o[C0].w = Float4(1.0f);
210 			}
211 
212 			// FIXME: Don't process if not used at all
213 			if(state.specularActive && state.input[Color1])
214 			{
215 				Vector4f specular = v[Color1];
216 
217 				o[C1].x = specular.x;
218 				o[C1].y = specular.y;
219 				o[C1].z = specular.z;
220 				o[C1].w = specular.w;
221 			}
222 			else
223 			{
224 				o[C1].x = Float4(0.0f);
225 				o[C1].y = Float4(0.0f);
226 				o[C1].z = Float4(0.0f);
227 				o[C1].w = Float4(1.0f);
228 			}
229 		}
230 		else
231 		{
232 			o[C0].x = Float4(0.0f);
233 			o[C0].y = Float4(0.0f);
234 			o[C0].z = Float4(0.0f);
235 			o[C0].w = Float4(0.0f);
236 
237 			o[C1].x = Float4(0.0f);
238 			o[C1].y = Float4(0.0f);
239 			o[C1].z = Float4(0.0f);
240 			o[C1].w = Float4(0.0f);
241 
242 			Vector4f ambient;
243 			Float4 globalAmbient = *Pointer<Float4>(data + OFFSET(DrawData,ff.globalAmbient));   // FIXME: Unpack
244 
245 			ambient.x = globalAmbient.x;
246 			ambient.y = globalAmbient.y;
247 			ambient.z = globalAmbient.z;
248 
249 			for(int i = 0; i < 8; i++)
250 			{
251 				if(!(state.vertexLightActive & (1 << i)))
252 				{
253 					continue;
254 				}
255 
256 				Vector4f L;    // Light vector
257 				Float4 att;   // Attenuation
258 
259 				// Attenuation
260 				{
261 					Float4 d;   // Distance
262 
263 					L.x = L.y = L.z = *Pointer<Float4>(data + OFFSET(DrawData,ff.lightPosition[i]));   // FIXME: Unpack
264 					L.x = L.x.xxxx;
265 					L.y = L.y.yyyy;
266 					L.z = L.z.zzzz;
267 
268 					L.x -= vertexPosition.x;
269 					L.y -= vertexPosition.y;
270 					L.z -= vertexPosition.z;
271 					d = dot3(L, L);
272 					d = RcpSqrt_pp(d);     // FIXME: Sufficient precision?
273 					L.x *= d;
274 					L.y *= d;
275 					L.z *= d;
276 					d = Rcp_pp(d);       // FIXME: Sufficient precision?
277 
278 					Float4 q = *Pointer<Float4>(data + OFFSET(DrawData,ff.attenuationQuadratic[i]));
279 					Float4 l = *Pointer<Float4>(data + OFFSET(DrawData,ff.attenuationLinear[i]));
280 					Float4 c = *Pointer<Float4>(data + OFFSET(DrawData,ff.attenuationConstant[i]));
281 
282 					att = Rcp_pp((q * d + l) * d + c);
283 				}
284 
285 				// Ambient per light
286 				{
287 					Float4 lightAmbient = *Pointer<Float4>(data + OFFSET(DrawData,ff.lightAmbient[i]));   // FIXME: Unpack
288 
289 					ambient.x = ambient.x + lightAmbient.x * att;
290 					ambient.y = ambient.y + lightAmbient.y * att;
291 					ambient.z = ambient.z + lightAmbient.z * att;
292 				}
293 
294 				// Diffuse
295 				if(state.vertexNormalActive)
296 				{
297 					Float4 dot;
298 
299 					dot = dot3(L, normal);
300 					dot = Max(dot, Float4(0.0f));
301 					dot *= att;
302 
303 					Vector4f diff;
304 
305 					if(state.vertexDiffuseMaterialSourceActive == MATERIAL_MATERIAL)
306 					{
307 						diff.x = diff.y = diff.z = *Pointer<Float4>(data + OFFSET(DrawData,ff.materialDiffuse));   // FIXME: Unpack
308 						diff.x = diff.x.xxxx;
309 						diff.y = diff.y.yyyy;
310 						diff.z = diff.z.zzzz;
311 					}
312 					else if(state.vertexDiffuseMaterialSourceActive == MATERIAL_COLOR1)
313 					{
314 						diff = v[Color0];
315 					}
316 					else if(state.vertexDiffuseMaterialSourceActive == MATERIAL_COLOR2)
317 					{
318 						diff = v[Color1];
319 					}
320 					else ASSERT(false);
321 
322 					Float4 lightDiffuse = *Pointer<Float4>(data + OFFSET(DrawData,ff.lightDiffuse[i]));
323 
324 					o[C0].x = o[C0].x + diff.x * dot * lightDiffuse.x;   // FIXME: Clamp first?
325 					o[C0].y = o[C0].y + diff.y * dot * lightDiffuse.y;   // FIXME: Clamp first?
326 					o[C0].z = o[C0].z + diff.z * dot * lightDiffuse.z;   // FIXME: Clamp first?
327 				}
328 
329 				// Specular
330 				if(state.vertexSpecularActive)
331 				{
332 					Vector4f S;
333 					Vector4f C;   // Camera vector
334 					Float4 pow;
335 
336 					pow = *Pointer<Float>(data + OFFSET(DrawData,ff.materialShininess));
337 
338 					S.x = Float4(0.0f) - vertexPosition.x;
339 					S.y = Float4(0.0f) - vertexPosition.y;
340 					S.z = Float4(0.0f) - vertexPosition.z;
341 					C = normalize(S);
342 
343 					S.x = L.x + C.x;
344 					S.y = L.y + C.y;
345 					S.z = L.z + C.z;
346 					C = normalize(S);
347 
348 					Float4 dot = Max(dot3(C, normal), Float4(0.0f));   // FIXME: max(dot3(C, normal), 0)
349 
350 					Float4 P = power(dot, pow);
351 					P *= att;
352 
353 					Vector4f spec;
354 
355 					if(state.vertexSpecularMaterialSourceActive == MATERIAL_MATERIAL)
356 					{
357 						Float4 materialSpecular = *Pointer<Float4>(data + OFFSET(DrawData,ff.materialSpecular));   // FIXME: Unpack
358 
359 						spec.x = materialSpecular.x;
360 						spec.y = materialSpecular.y;
361 						spec.z = materialSpecular.z;
362 					}
363 					else if(state.vertexSpecularMaterialSourceActive == MATERIAL_COLOR1)
364 					{
365 						spec = v[Color0];
366 					}
367 					else if(state.vertexSpecularMaterialSourceActive == MATERIAL_COLOR2)
368 					{
369 						spec = v[Color1];
370 					}
371 					else ASSERT(false);
372 
373 					Float4 lightSpecular = *Pointer<Float4>(data + OFFSET(DrawData,ff.lightSpecular[i]));
374 
375 					spec.x *= lightSpecular.x;
376 					spec.y *= lightSpecular.y;
377 					spec.z *= lightSpecular.z;
378 
379 					spec.x *= P;
380 					spec.y *= P;
381 					spec.z *= P;
382 
383 					spec.x = Max(spec.x, Float4(0.0f));
384 					spec.y = Max(spec.y, Float4(0.0f));
385 					spec.z = Max(spec.z, Float4(0.0f));
386 
387 					if(secondaryColor)
388 					{
389 						o[C1].x = o[C1].x + spec.x;
390 						o[C1].y = o[C1].y + spec.y;
391 						o[C1].z = o[C1].z + spec.z;
392 					}
393 					else
394 					{
395 						o[C0].x = o[C0].x + spec.x;
396 						o[C0].y = o[C0].y + spec.y;
397 						o[C0].z = o[C0].z + spec.z;
398 					}
399 				}
400 			}
401 
402 			if(state.vertexAmbientMaterialSourceActive == MATERIAL_MATERIAL)
403 			{
404 				Float4 materialAmbient = *Pointer<Float4>(data + OFFSET(DrawData,ff.materialAmbient));   // FIXME: Unpack
405 
406 				ambient.x = ambient.x * materialAmbient.x;
407 				ambient.y = ambient.y * materialAmbient.y;
408 				ambient.z = ambient.z * materialAmbient.z;
409 			}
410 			else if(state.vertexAmbientMaterialSourceActive == MATERIAL_COLOR1)
411 			{
412 				Vector4f materialDiffuse = v[Color0];
413 
414 				ambient.x = ambient.x * materialDiffuse.x;
415 				ambient.y = ambient.y * materialDiffuse.y;
416 				ambient.z = ambient.z * materialDiffuse.z;
417 			}
418 			else if(state.vertexAmbientMaterialSourceActive == MATERIAL_COLOR2)
419 			{
420 				Vector4f materialSpecular = v[Color1];
421 
422 				ambient.x = ambient.x * materialSpecular.x;
423 				ambient.y = ambient.y * materialSpecular.y;
424 				ambient.z = ambient.z * materialSpecular.z;
425 			}
426 			else ASSERT(false);
427 
428 			o[C0].x = o[C0].x + ambient.x;
429 			o[C0].y = o[C0].y + ambient.y;
430 			o[C0].z = o[C0].z + ambient.z;
431 
432 			// Emissive
433 			if(state.vertexEmissiveMaterialSourceActive == MATERIAL_MATERIAL)
434 			{
435 				Float4 materialEmission = *Pointer<Float4>(data + OFFSET(DrawData,ff.materialEmission));   // FIXME: Unpack
436 
437 				o[C0].x = o[C0].x + materialEmission.x;
438 				o[C0].y = o[C0].y + materialEmission.y;
439 				o[C0].z = o[C0].z + materialEmission.z;
440 			}
441 			else if(state.vertexEmissiveMaterialSourceActive == MATERIAL_COLOR1)
442 			{
443 				Vector4f materialSpecular = v[Color0];
444 
445 				o[C0].x = o[C0].x + materialSpecular.x;
446 				o[C0].y = o[C0].y + materialSpecular.y;
447 				o[C0].z = o[C0].z + materialSpecular.z;
448 			}
449 			else if(state.vertexEmissiveMaterialSourceActive == MATERIAL_COLOR2)
450 			{
451 				Vector4f materialSpecular = v[Color1];
452 
453 				o[C0].x = o[C0].x + materialSpecular.x;
454 				o[C0].y = o[C0].y + materialSpecular.y;
455 				o[C0].z = o[C0].z + materialSpecular.z;
456 			}
457 			else ASSERT(false);
458 
459 			// Diffuse alpha component
460 			if(state.vertexDiffuseMaterialSourceActive == MATERIAL_MATERIAL)
461 			{
462 				o[C0].w = Float4(*Pointer<Float4>(data + OFFSET(DrawData,ff.materialDiffuse[0]))).wwww;   // FIXME: Unpack
463 			}
464 			else if(state.vertexDiffuseMaterialSourceActive == MATERIAL_COLOR1)
465 			{
466 				Vector4f alpha = v[Color0];
467 				o[C0].w = alpha.w;
468 			}
469 			else if(state.vertexDiffuseMaterialSourceActive == MATERIAL_COLOR2)
470 			{
471 				Vector4f alpha = v[Color1];
472 				o[C0].w = alpha.w;
473 			}
474 			else ASSERT(false);
475 
476 			if(state.vertexSpecularActive)
477 			{
478 				// Specular alpha component
479 				if(state.vertexSpecularMaterialSourceActive == MATERIAL_MATERIAL)
480 				{
481 					o[C1].w = Float4(*Pointer<Float4>(data + OFFSET(DrawData,ff.materialSpecular[3]))).wwww;   // FIXME: Unpack
482 				}
483 				else if(state.vertexSpecularMaterialSourceActive == MATERIAL_COLOR1)
484 				{
485 					Vector4f alpha = v[Color0];
486 					o[C1].w = alpha.w;
487 				}
488 				else if(state.vertexSpecularMaterialSourceActive == MATERIAL_COLOR2)
489 				{
490 					Vector4f alpha = v[Color1];
491 					o[C1].w = alpha.w;
492 				}
493 				else ASSERT(false);
494 			}
495 		}
496 
497 		if(state.fogActive)
498 		{
499 			Float4 f;
500 
501 			if(!state.rangeFogActive)
502 			{
503 				f = Abs(vertexPosition.z);
504 			}
505 			else
506 			{
507 				f = Sqrt(dot3(vertexPosition, vertexPosition));   // FIXME: f = length(vertexPosition);
508 			}
509 
510 			switch(state.vertexFogMode)
511 			{
512 			case FOG_NONE:
513 				if(state.specularActive)
514 				{
515 					o[Fog].x = o[C1].w;
516 				}
517 				else
518 				{
519 					o[Fog].x = Float4(0.0f);
520 				}
521 				break;
522 			case FOG_LINEAR:
523 				o[Fog].x = f * *Pointer<Float4>(data + OFFSET(DrawData,fog.scale)) + *Pointer<Float4>(data + OFFSET(DrawData,fog.offset));
524 				break;
525 			case FOG_EXP:
526 				o[Fog].x = exponential2(f * *Pointer<Float4>(data + OFFSET(DrawData,fog.densityE)), true);
527 				break;
528 			case FOG_EXP2:
529 				o[Fog].x = exponential2((f * f) * *Pointer<Float4>(data + OFFSET(DrawData,fog.density2E)), true);
530 				break;
531 			default:
532 				ASSERT(false);
533 			}
534 		}
535 
536 		for(int stage = 0; stage < 8; stage++)
537 		{
538 			processTextureCoordinate(stage, normal, position);
539 		}
540 
541 		processPointSize();
542 	}
543 
processTextureCoordinate(int stage,Vector4f & normal,Vector4f & position)544 	void VertexPipeline::processTextureCoordinate(int stage, Vector4f &normal, Vector4f &position)
545 	{
546 		if(state.output[T0 + stage].write)
547 		{
548 			int i = state.textureState[stage].texCoordIndexActive;
549 
550 			switch(state.textureState[stage].texGenActive)
551 			{
552 			case TEXGEN_NONE:
553 				{
554 					Vector4f &&varying = v[TexCoord0 + i];
555 
556 					o[T0 + stage].x = varying.x;
557 					o[T0 + stage].y = varying.y;
558 					o[T0 + stage].z = varying.z;
559 					o[T0 + stage].w = varying.w;
560 				}
561 				break;
562 			case TEXGEN_PASSTHRU:
563 				{
564 					Vector4f &&varying = v[TexCoord0 + i];
565 
566 					o[T0 + stage].x = varying.x;
567 					o[T0 + stage].y = varying.y;
568 					o[T0 + stage].z = varying.z;
569 					o[T0 + stage].w = varying.w;
570 
571 					if(state.input[TexCoord0 + i])
572 					{
573 						switch(state.input[TexCoord0 + i].count)
574 						{
575 						case 1:
576 							o[T0 + stage].y = Float4(1.0f);
577 							o[T0 + stage].z = Float4(0.0f);
578 							o[T0 + stage].w = Float4(0.0f);
579 							break;
580 						case 2:
581 							o[T0 + stage].z = Float4(1.0f);
582 							o[T0 + stage].w = Float4(0.0f);
583 							break;
584 						case 3:
585 							o[T0 + stage].w = Float4(1.0f);
586 							break;
587 						case 4:
588 							break;
589 						default:
590 							ASSERT(false);
591 						}
592 					}
593 				}
594 				break;
595 			case TEXGEN_NORMAL:
596 				{
597 					Vector4f Nc;   // Normal vector in camera space
598 
599 					if(state.vertexNormalActive)
600 					{
601 						Nc = normal;
602 					}
603 					else
604 					{
605 						Nc.x = Float4(0.0f);
606 						Nc.y = Float4(0.0f);
607 						Nc.z = Float4(0.0f);
608 					}
609 
610 					Nc.w = Float4(1.0f);
611 
612 					o[T0 + stage].x = Nc.x;
613 					o[T0 + stage].y = Nc.y;
614 					o[T0 + stage].z = Nc.z;
615 					o[T0 + stage].w = Nc.w;
616 				}
617 				break;
618 			case TEXGEN_POSITION:
619 				{
620 					Vector4f Pn = transformBlend(v[Position], Pointer<Byte>(data + OFFSET(DrawData,ff.cameraTransformT)), true);   // Position in camera space
621 
622 					Pn.w = Float4(1.0f);
623 
624 					o[T0 + stage].x = Pn.x;
625 					o[T0 + stage].y = Pn.y;
626 					o[T0 + stage].z = Pn.z;
627 					o[T0 + stage].w = Pn.w;
628 				}
629 				break;
630 			case TEXGEN_REFLECTION:
631 				{
632 					Vector4f R;   // Reflection vector
633 
634 					if(state.vertexNormalActive)
635 					{
636 						Vector4f Nc;   // Normal vector in camera space
637 
638 						Nc = normal;
639 
640 						if(state.localViewerActive)
641 						{
642 							Vector4f Ec;   // Eye vector in camera space
643 							Vector4f N2;
644 
645 							Ec = transformBlend(v[Position], Pointer<Byte>(data + OFFSET(DrawData,ff.cameraTransformT)), true);
646 							Ec = normalize(Ec);
647 
648 							// R = E - 2 * N * (E . N)
649 							Float4 dot = Float4(2.0f) * dot3(Ec, Nc);
650 
651 							R.x = Ec.x - Nc.x * dot;
652 							R.y = Ec.y - Nc.y * dot;
653 							R.z = Ec.z - Nc.z * dot;
654 						}
655 						else
656 						{
657 							// u = -2 * Nz * Nx
658 							// v = -2 * Nz * Ny
659 							// w = 1 - 2 * Nz * Nz
660 
661 							R.x = -Float4(2.0f) * Nc.z * Nc.x;
662 							R.y = -Float4(2.0f) * Nc.z * Nc.y;
663 							R.z = Float4(1.0f) - Float4(2.0f) * Nc.z * Nc.z;
664 						}
665 					}
666 					else
667 					{
668 						R.x = Float4(0.0f);
669 						R.y = Float4(0.0f);
670 						R.z = Float4(0.0f);
671 					}
672 
673 					R.w = Float4(1.0f);
674 
675 					o[T0 + stage].x = R.x;
676 					o[T0 + stage].y = R.y;
677 					o[T0 + stage].z = R.z;
678 					o[T0 + stage].w = R.w;
679 				}
680 				break;
681 			case TEXGEN_SPHEREMAP:
682 				{
683 					Vector4f R;   // Reflection vector
684 
685 					if(state.vertexNormalActive)
686 					{
687 						Vector4f Nc;   // Normal vector in camera space
688 
689 						Nc = normal;
690 
691 						if(state.localViewerActive)
692 						{
693 							Vector4f Ec;   // Eye vector in camera space
694 							Vector4f N2;
695 
696 							Ec = transformBlend(v[Position], Pointer<Byte>(data + OFFSET(DrawData,ff.cameraTransformT)), true);
697 							Ec = normalize(Ec);
698 
699 							// R = E - 2 * N * (E . N)
700 							Float4 dot = Float4(2.0f) * dot3(Ec, Nc);
701 
702 							R.x = Ec.x - Nc.x * dot;
703 							R.y = Ec.y - Nc.y * dot;
704 							R.z = Ec.z - Nc.z * dot;
705 						}
706 						else
707 						{
708 							// u = -2 * Nz * Nx
709 							// v = -2 * Nz * Ny
710 							// w = 1 - 2 * Nz * Nz
711 
712 							R.x = -Float4(2.0f) * Nc.z * Nc.x;
713 							R.y = -Float4(2.0f) * Nc.z * Nc.y;
714 							R.z = Float4(1.0f) - Float4(2.0f) * Nc.z * Nc.z;
715 						}
716 					}
717 					else
718 					{
719 						R.x = Float4(0.0f);
720 						R.y = Float4(0.0f);
721 						R.z = Float4(0.0f);
722 					}
723 
724 					R.z -= Float4(1.0f);
725 					R = normalize(R);
726 					R.x = Float4(0.5f) * R.x + Float4(0.5f);
727 					R.y = Float4(0.5f) * R.y + Float4(0.5f);
728 
729 					R.z = Float4(1.0f);
730 					R.w = Float4(0.0f);
731 
732 					o[T0 + stage].x = R.x;
733 					o[T0 + stage].y = R.y;
734 					o[T0 + stage].z = R.z;
735 					o[T0 + stage].w = R.w;
736 				}
737 				break;
738 			default:
739 				ASSERT(false);
740 			}
741 
742 			Vector4f texTrans0;
743 			Vector4f texTrans1;
744 			Vector4f texTrans2;
745 			Vector4f texTrans3;
746 
747 			Vector4f T;
748 			Vector4f t;
749 
750 			T.x = o[T0 + stage].x;
751 			T.y = o[T0 + stage].y;
752 			T.z = o[T0 + stage].z;
753 			T.w = o[T0 + stage].w;
754 
755 			switch(state.textureState[stage].textureTransformCountActive)
756 			{
757 			case 4:
758 				texTrans3.x = texTrans3.y = texTrans3.z = texTrans3.w = *Pointer<Float4>(data + OFFSET(DrawData,ff.textureTransform[stage][3]));   // FIXME: Unpack
759 				texTrans3.x = texTrans3.x.xxxx;
760 				texTrans3.y = texTrans3.y.yyyy;
761 				texTrans3.z = texTrans3.z.zzzz;
762 				texTrans3.w = texTrans3.w.wwww;
763 				t.w = dot4(T, texTrans3);
764 			case 3:
765 				texTrans2.x = texTrans2.y = texTrans2.z = texTrans2.w = *Pointer<Float4>(data + OFFSET(DrawData,ff.textureTransform[stage][2]));   // FIXME: Unpack
766 				texTrans2.x = texTrans2.x.xxxx;
767 				texTrans2.y = texTrans2.y.yyyy;
768 				texTrans2.z = texTrans2.z.zzzz;
769 				texTrans2.w = texTrans2.w.wwww;
770 				t.z = dot4(T, texTrans2);
771 			case 2:
772 				texTrans1.x = texTrans1.y = texTrans1.z = texTrans1.w = *Pointer<Float4>(data + OFFSET(DrawData,ff.textureTransform[stage][1]));   // FIXME: Unpack
773 				texTrans1.x = texTrans1.x.xxxx;
774 				texTrans1.y = texTrans1.y.yyyy;
775 				texTrans1.z = texTrans1.z.zzzz;
776 				texTrans1.w = texTrans1.w.wwww;
777 				t.y = dot4(T, texTrans1);
778 			case 1:
779 				texTrans0.x = texTrans0.y = texTrans0.z = texTrans0.w = *Pointer<Float4>(data + OFFSET(DrawData,ff.textureTransform[stage][0]));   // FIXME: Unpack
780 				texTrans0.x = texTrans0.x.xxxx;
781 				texTrans0.y = texTrans0.y.yyyy;
782 				texTrans0.z = texTrans0.z.zzzz;
783 				texTrans0.w = texTrans0.w.wwww;
784 				t.x = dot4(T, texTrans0);
785 
786 				o[T0 + stage].x = t.x;
787 				o[T0 + stage].y = t.y;
788 				o[T0 + stage].z = t.z;
789 				o[T0 + stage].w = t.w;
790 			case 0:
791 				break;
792 			default:
793 				ASSERT(false);
794 			}
795 		}
796 	}
797 
processPointSize()798 	void VertexPipeline::processPointSize()
799 	{
800 		if(!state.pointSizeActive)
801 		{
802 			return;   // Use global pointsize
803 		}
804 
805 		if(state.input[PointSize])
806 		{
807 			o[Pts].y = v[PointSize].x;
808 		}
809 		else
810 		{
811 			o[Pts].y = *Pointer<Float4>(data + OFFSET(DrawData,point.pointSize));
812 		}
813 
814 		if(state.pointScaleActive && !state.preTransformed)
815 		{
816 			Vector4f p = transformBlend(v[Position], Pointer<Byte>(data + OFFSET(DrawData,ff.cameraTransformT)), true);
817 
818 			Float4 d = Sqrt(dot3(p, p));   // FIXME: length(p);
819 
820 			Float4 A = *Pointer<Float>(data + OFFSET(DrawData,point.pointScaleA));   // FIXME: Unpack
821 			Float4 B = *Pointer<Float>(data + OFFSET(DrawData,point.pointScaleB));   // FIXME: Unpack
822 			Float4 C = *Pointer<Float>(data + OFFSET(DrawData,point.pointScaleC));   // FIXME: Unpack
823 
824 			A = RcpSqrt_pp(A + d * (B + d * C));
825 
826 			o[Pts].y = o[Pts].y * Float4(*Pointer<Float>(data + OFFSET(DrawData,viewportHeight))) * A;   // FIXME: Unpack
827 		}
828 	}
829 
transform(const Register & src,const Pointer<Byte> & matrix,bool homogeneous)830 	Vector4f VertexPipeline::transform(const Register &src, const Pointer<Byte> &matrix, bool homogeneous)
831 	{
832 		Vector4f dst;
833 
834 		if(homogeneous)
835 		{
836 			Float4 m[4][4];
837 
838 			for(int j = 0; j < 4; j++)
839 			{
840 				for(int i = 0; i < 4; i++)
841 				{
842 					m[j][i].x = *Pointer<Float>(matrix + 16 * i + 4 * j);
843 					m[j][i].y = *Pointer<Float>(matrix + 16 * i + 4 * j);
844 					m[j][i].z = *Pointer<Float>(matrix + 16 * i + 4 * j);
845 					m[j][i].w = *Pointer<Float>(matrix + 16 * i + 4 * j);
846 				}
847 			}
848 
849 			dst.x = src.x * m[0][0] + src.y * m[0][1] + src.z * m[0][2] + src.w * m[0][3];
850 			dst.y = src.x * m[1][0] + src.y * m[1][1] + src.z * m[1][2] + src.w * m[1][3];
851 			dst.z = src.x * m[2][0] + src.y * m[2][1] + src.z * m[2][2] + src.w * m[2][3];
852 			dst.w = src.x * m[3][0] + src.y * m[3][1] + src.z * m[3][2] + src.w * m[3][3];
853 		}
854 		else
855 		{
856 			Float4 m[3][3];
857 
858 			for(int j = 0; j < 3; j++)
859 			{
860 				for(int i = 0; i < 3; i++)
861 				{
862 					m[j][i].x = *Pointer<Float>(matrix + 16 * i + 4 * j);
863 					m[j][i].y = *Pointer<Float>(matrix + 16 * i + 4 * j);
864 					m[j][i].z = *Pointer<Float>(matrix + 16 * i + 4 * j);
865 					m[j][i].w = *Pointer<Float>(matrix + 16 * i + 4 * j);
866 				}
867 			}
868 
869 			dst.x = src.x * m[0][0] + src.y * m[0][1] + src.z * m[0][2];
870 			dst.y = src.x * m[1][0] + src.y * m[1][1] + src.z * m[1][2];
871 			dst.z = src.x * m[2][0] + src.y * m[2][1] + src.z * m[2][2];
872 		}
873 
874 		return dst;
875 	}
876 
transform(const Register & src,const Pointer<Byte> & matrix,UInt index[4],bool homogeneous)877 	Vector4f VertexPipeline::transform(const Register &src, const Pointer<Byte> &matrix, UInt index[4], bool homogeneous)
878 	{
879 		Vector4f dst;
880 
881 		if(homogeneous)
882 		{
883 			Float4 m[4][4];
884 
885 			for(int j = 0; j < 4; j++)
886 			{
887 				for(int i = 0; i < 4; i++)
888 				{
889 					m[j][i].x = *Pointer<Float>(matrix + 16 * i + 4 * j + index[0]);
890 					m[j][i].y = *Pointer<Float>(matrix + 16 * i + 4 * j + index[1]);
891 					m[j][i].z = *Pointer<Float>(matrix + 16 * i + 4 * j + index[2]);
892 					m[j][i].w = *Pointer<Float>(matrix + 16 * i + 4 * j + index[3]);
893 				}
894 			}
895 
896 			dst.x = src.x * m[0][0] + src.y * m[0][1] + src.z * m[0][2] + m[0][3];
897 			dst.y = src.x * m[1][0] + src.y * m[1][1] + src.z * m[1][2] + m[1][3];
898 			dst.z = src.x * m[2][0] + src.y * m[2][1] + src.z * m[2][2] + m[2][3];
899 			dst.w = src.x * m[3][0] + src.y * m[3][1] + src.z * m[3][2] + m[3][3];
900 		}
901 		else
902 		{
903 			Float4 m[3][3];
904 
905 			for(int j = 0; j < 3; j++)
906 			{
907 				for(int i = 0; i < 3; i++)
908 				{
909 					m[j][i].x = *Pointer<Float>(matrix + 16 * i + 4 * j + index[0]);
910 					m[j][i].y = *Pointer<Float>(matrix + 16 * i + 4 * j + index[1]);
911 					m[j][i].z = *Pointer<Float>(matrix + 16 * i + 4 * j + index[2]);
912 					m[j][i].w = *Pointer<Float>(matrix + 16 * i + 4 * j + index[3]);
913 				}
914 			}
915 
916 			dst.x = src.x * m[0][0] + src.y * m[0][1] + src.z * m[0][2];
917 			dst.y = src.x * m[1][0] + src.y * m[1][1] + src.z * m[1][2];
918 			dst.z = src.x * m[2][0] + src.y * m[2][1] + src.z * m[2][2];
919 		}
920 
921 		return dst;
922 	}
923 
normalize(Vector4f & src)924 	Vector4f VertexPipeline::normalize(Vector4f &src)
925 	{
926 		Vector4f dst;
927 
928 		Float4 rcpLength = RcpSqrt_pp(dot3(src, src));
929 
930 		dst.x = src.x * rcpLength;
931 		dst.y = src.y * rcpLength;
932 		dst.z = src.z * rcpLength;
933 
934 		return dst;
935 	}
936 
power(Float4 & src0,Float4 & src1)937 	Float4 VertexPipeline::power(Float4 &src0, Float4 &src1)
938 	{
939 		Float4 dst = src0;
940 
941 		dst = dst * dst;
942 		dst = dst * dst;
943 		dst = Float4(As<Int4>(dst) - As<Int4>(Float4(1.0f)));
944 
945 		dst *= src1;
946 
947 		dst = As<Float4>(Int4(dst) + As<Int4>(Float4(1.0f)));
948 		dst = RcpSqrt_pp(dst);
949 		dst = RcpSqrt_pp(dst);
950 
951 		return dst;
952 	}
953 }
954