• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2016 The SwiftShader Authors. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //    http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "Shader.hpp"
16 
17 #include "VertexShader.hpp"
18 #include "PixelShader.hpp"
19 #include "Math.hpp"
20 #include "Debug.hpp"
21 
22 #include <set>
23 #include <fstream>
24 #include <sstream>
25 #include <stdarg.h>
26 
27 namespace sw
28 {
29 	volatile int Shader::serialCounter = 1;
30 
OPCODE_DP(int i)31 	Shader::Opcode Shader::OPCODE_DP(int i)
32 	{
33 		switch(i)
34 		{
35 		default: ASSERT(false);
36 		case 1: return OPCODE_DP1;
37 		case 2: return OPCODE_DP2;
38 		case 3: return OPCODE_DP3;
39 		case 4: return OPCODE_DP4;
40 		}
41 	}
42 
OPCODE_LEN(int i)43 	Shader::Opcode Shader::OPCODE_LEN(int i)
44 	{
45 		switch(i)
46 		{
47 		default: ASSERT(false);
48 		case 1: return OPCODE_ABS;
49 		case 2: return OPCODE_LEN2;
50 		case 3: return OPCODE_LEN3;
51 		case 4: return OPCODE_LEN4;
52 		}
53 	}
54 
OPCODE_DIST(int i)55 	Shader::Opcode Shader::OPCODE_DIST(int i)
56 	{
57 		switch(i)
58 		{
59 		default: ASSERT(false);
60 		case 1: return OPCODE_DIST1;
61 		case 2: return OPCODE_DIST2;
62 		case 3: return OPCODE_DIST3;
63 		case 4: return OPCODE_DIST4;
64 		}
65 	}
66 
OPCODE_NRM(int i)67 	Shader::Opcode Shader::OPCODE_NRM(int i)
68 	{
69 		switch(i)
70 		{
71 		default: ASSERT(false);
72 		case 1: return OPCODE_SGN;
73 		case 2: return OPCODE_NRM2;
74 		case 3: return OPCODE_NRM3;
75 		case 4: return OPCODE_NRM4;
76 		}
77 	}
78 
OPCODE_FORWARD(int i)79 	Shader::Opcode Shader::OPCODE_FORWARD(int i)
80 	{
81 		switch(i)
82 		{
83 		default: ASSERT(false);
84 		case 1: return OPCODE_FORWARD1;
85 		case 2: return OPCODE_FORWARD2;
86 		case 3: return OPCODE_FORWARD3;
87 		case 4: return OPCODE_FORWARD4;
88 		}
89 	}
90 
OPCODE_REFLECT(int i)91 	Shader::Opcode Shader::OPCODE_REFLECT(int i)
92 	{
93 		switch(i)
94 		{
95 		default: ASSERT(false);
96 		case 1: return OPCODE_REFLECT1;
97 		case 2: return OPCODE_REFLECT2;
98 		case 3: return OPCODE_REFLECT3;
99 		case 4: return OPCODE_REFLECT4;
100 		}
101 	}
102 
OPCODE_REFRACT(int i)103 	Shader::Opcode Shader::OPCODE_REFRACT(int i)
104 	{
105 		switch(i)
106 		{
107 		default: ASSERT(false);
108 		case 1: return OPCODE_REFRACT1;
109 		case 2: return OPCODE_REFRACT2;
110 		case 3: return OPCODE_REFRACT3;
111 		case 4: return OPCODE_REFRACT4;
112 		}
113 	}
114 
Instruction(Opcode opcode)115 	Shader::Instruction::Instruction(Opcode opcode) : opcode(opcode), analysis(0)
116 	{
117 		control = CONTROL_RESERVED0;
118 
119 		predicate = false;
120 		predicateNot = false;
121 		predicateSwizzle = 0xE4;
122 
123 		coissue = false;
124 		samplerType = SAMPLER_UNKNOWN;
125 		usage = USAGE_POSITION;
126 		usageIndex = 0;
127 	}
128 
Instruction(const unsigned long * token,int size,unsigned char majorVersion)129 	Shader::Instruction::Instruction(const unsigned long *token, int size, unsigned char majorVersion) : analysis(0)
130 	{
131 		parseOperationToken(*token++, majorVersion);
132 
133 		samplerType = SAMPLER_UNKNOWN;
134 		usage = USAGE_POSITION;
135 		usageIndex = 0;
136 
137 		if(opcode == OPCODE_IF ||
138 		   opcode == OPCODE_IFC ||
139 		   opcode == OPCODE_LOOP ||
140 		   opcode == OPCODE_REP ||
141 		   opcode == OPCODE_BREAKC ||
142 		   opcode == OPCODE_BREAKP)   // No destination operand
143 		{
144 			if(size > 0) parseSourceToken(0, token++, majorVersion);
145 			if(size > 1) parseSourceToken(1, token++, majorVersion);
146 			if(size > 2) parseSourceToken(2, token++, majorVersion);
147 			if(size > 3) ASSERT(false);
148 		}
149 		else if(opcode == OPCODE_DCL)
150 		{
151 			parseDeclarationToken(*token++);
152 			parseDestinationToken(token++, majorVersion);
153 		}
154 		else
155 		{
156 			if(size > 0)
157 			{
158 				parseDestinationToken(token, majorVersion);
159 
160 				if(dst.rel.type != PARAMETER_VOID && majorVersion >= 3)
161 				{
162 					token++;
163 					size--;
164 				}
165 
166 				token++;
167 				size--;
168 			}
169 
170 			if(predicate)
171 			{
172 				ASSERT(size != 0);
173 
174 				predicateNot = (Modifier)((*token & 0x0F000000) >> 24) == MODIFIER_NOT;
175 				predicateSwizzle = (unsigned char)((*token & 0x00FF0000) >> 16);
176 
177 				token++;
178 				size--;
179 			}
180 
181 			for(int i = 0; size > 0; i++)
182 			{
183 				parseSourceToken(i, token, majorVersion);
184 
185 				token++;
186 				size--;
187 
188 				if(src[i].rel.type != PARAMETER_VOID && majorVersion >= 2)
189 				{
190 					token++;
191 					size--;
192 				}
193 			}
194 		}
195 	}
196 
~Instruction()197 	Shader::Instruction::~Instruction()
198 	{
199 	}
200 
string(ShaderType shaderType,unsigned short version) const201 	std::string Shader::Instruction::string(ShaderType shaderType, unsigned short version) const
202 	{
203 		std::string instructionString;
204 
205 		if(opcode != OPCODE_DCL)
206 		{
207 			instructionString += coissue ? "+ " : "";
208 
209 			if(predicate)
210 			{
211 				instructionString += predicateNot ? "(!p0" : "(p0";
212 				instructionString += swizzleString(PARAMETER_PREDICATE, predicateSwizzle);
213 				instructionString += ") ";
214 			}
215 
216 			instructionString += operationString(version) + controlString() + dst.shiftString() + dst.modifierString();
217 
218 			if(dst.type != PARAMETER_VOID)
219 			{
220 				instructionString += " " + dst.string(shaderType, version) +
221 				                           dst.relativeString() +
222 				                           dst.maskString();
223 			}
224 
225 			for(int i = 0; i < 4; i++)
226 			{
227 				if(src[i].type != PARAMETER_VOID)
228 				{
229 					instructionString += (dst.type != PARAMETER_VOID || i > 0) ? ", " : " ";
230 					instructionString += src[i].preModifierString() +
231 										 src[i].string(shaderType, version) +
232 										 src[i].relativeString() +
233 										 src[i].postModifierString() +
234 										 src[i].swizzleString();
235 				}
236 			}
237 		}
238 		else   // DCL
239 		{
240 			instructionString += "dcl";
241 
242 			if(dst.type == PARAMETER_SAMPLER)
243 			{
244 				switch(samplerType)
245 				{
246 				case SAMPLER_UNKNOWN: instructionString += " ";        break;
247 				case SAMPLER_1D:      instructionString += "_1d ";     break;
248 				case SAMPLER_2D:      instructionString += "_2d ";     break;
249 				case SAMPLER_CUBE:    instructionString += "_cube ";   break;
250 				case SAMPLER_VOLUME:  instructionString += "_volume "; break;
251 				default:
252 					ASSERT(false);
253 				}
254 
255 				instructionString += dst.string(shaderType, version);
256 			}
257 			else if(dst.type == PARAMETER_INPUT ||
258 				    dst.type == PARAMETER_OUTPUT ||
259 				    dst.type == PARAMETER_TEXTURE)
260 			{
261 				if(version >= 0x0300)
262 				{
263 					switch(usage)
264 					{
265 					case USAGE_POSITION:     instructionString += "_position";     break;
266 					case USAGE_BLENDWEIGHT:  instructionString += "_blendweight";  break;
267 					case USAGE_BLENDINDICES: instructionString += "_blendindices"; break;
268 					case USAGE_NORMAL:       instructionString += "_normal";       break;
269 					case USAGE_PSIZE:        instructionString += "_psize";        break;
270 					case USAGE_TEXCOORD:     instructionString += "_texcoord";     break;
271 					case USAGE_TANGENT:      instructionString += "_tangent";      break;
272 					case USAGE_BINORMAL:     instructionString += "_binormal";     break;
273 					case USAGE_TESSFACTOR:   instructionString += "_tessfactor";   break;
274 					case USAGE_POSITIONT:    instructionString += "_positiont";    break;
275 					case USAGE_COLOR:        instructionString += "_color";        break;
276 					case USAGE_FOG:          instructionString += "_fog";          break;
277 					case USAGE_DEPTH:        instructionString += "_depth";        break;
278 					case USAGE_SAMPLE:       instructionString += "_sample";       break;
279 					default:
280 						ASSERT(false);
281 					}
282 
283 					if(usageIndex > 0)
284 					{
285 						std::ostringstream buffer;
286 
287 						buffer << (int)usageIndex;
288 
289 						instructionString += buffer.str();
290 					}
291 				}
292 				else ASSERT(dst.type != PARAMETER_OUTPUT);
293 
294 				instructionString += " ";
295 
296 				instructionString += dst.string(shaderType, version);
297 				instructionString += dst.maskString();
298 			}
299 			else if(dst.type == PARAMETER_MISCTYPE)   // vPos and vFace
300 			{
301 				instructionString += " ";
302 
303 				instructionString += dst.string(shaderType, version);
304 			}
305 			else ASSERT(false);
306 		}
307 
308 		return instructionString;
309 	}
310 
modifierString() const311 	std::string Shader::DestinationParameter::modifierString() const
312 	{
313 		if(type == PARAMETER_VOID || type == PARAMETER_LABEL)
314 		{
315 			return "";
316 		}
317 
318 		std::string modifierString;
319 
320 		if(integer)
321 		{
322 			modifierString += "_int";
323 		}
324 
325 		if(saturate)
326 		{
327 			modifierString += "_sat";
328 		}
329 
330 		if(partialPrecision)
331 		{
332 			modifierString += "_pp";
333 		}
334 
335 		if(centroid)
336 		{
337 			modifierString += "_centroid";
338 		}
339 
340 		return modifierString;
341 	}
342 
shiftString() const343 	std::string Shader::DestinationParameter::shiftString() const
344 	{
345 		if(type == PARAMETER_VOID || type == PARAMETER_LABEL)
346 		{
347 			return "";
348 		}
349 
350 		switch(shift)
351 		{
352 		case 0:		return "";
353 		case 1:		return "_x2";
354 		case 2:		return "_x4";
355 		case 3:		return "_x8";
356 		case -1:	return "_d2";
357 		case -2:	return "_d4";
358 		case -3:	return "_d8";
359 		default:
360 			return "";
361 		//	ASSERT(false);   // FIXME
362 		}
363 	}
364 
maskString() const365 	std::string Shader::DestinationParameter::maskString() const
366 	{
367 		if(type == PARAMETER_VOID || type == PARAMETER_LABEL)
368 		{
369 			return "";
370 		}
371 
372 		switch(mask)
373 		{
374 		case 0x0:	return "";
375 		case 0x1:	return ".x";
376 		case 0x2:	return ".y";
377 		case 0x3:	return ".xy";
378 		case 0x4:	return ".z";
379 		case 0x5:	return ".xz";
380 		case 0x6:	return ".yz";
381 		case 0x7:	return ".xyz";
382 		case 0x8:	return ".w";
383 		case 0x9:	return ".xw";
384 		case 0xA:	return ".yw";
385 		case 0xB:	return ".xyw";
386 		case 0xC:	return ".zw";
387 		case 0xD:	return ".xzw";
388 		case 0xE:	return ".yzw";
389 		case 0xF:	return "";
390 		default:
391 			ASSERT(false);
392 		}
393 
394 		return "";
395 	}
396 
preModifierString() const397 	std::string Shader::SourceParameter::preModifierString() const
398 	{
399 		if(type == PARAMETER_VOID)
400 		{
401 			return "";
402 		}
403 
404 		switch(modifier)
405 		{
406 		case MODIFIER_NONE:			return "";
407 		case MODIFIER_NEGATE:		return "-";
408 		case MODIFIER_BIAS:			return "";
409 		case MODIFIER_BIAS_NEGATE:	return "-";
410 		case MODIFIER_SIGN:			return "";
411 		case MODIFIER_SIGN_NEGATE:	return "-";
412 		case MODIFIER_COMPLEMENT:	return "1-";
413 		case MODIFIER_X2:			return "";
414 		case MODIFIER_X2_NEGATE:	return "-";
415 		case MODIFIER_DZ:			return "";
416 		case MODIFIER_DW:			return "";
417 		case MODIFIER_ABS:			return "";
418 		case MODIFIER_ABS_NEGATE:	return "-";
419 		case MODIFIER_NOT:			return "!";
420 		default:
421 			ASSERT(false);
422 		}
423 
424 		return "";
425 	}
426 
relativeString() const427 	std::string Shader::Parameter::relativeString() const
428 	{
429 		if(type == PARAMETER_CONST || type == PARAMETER_INPUT || type == PARAMETER_OUTPUT || type == PARAMETER_TEMP)
430 		{
431 			if(rel.type == PARAMETER_VOID)
432 			{
433 				return "";
434 			}
435 			else if(rel.type == PARAMETER_ADDR)
436 			{
437 				switch(rel.swizzle & 0x03)
438 				{
439 				case 0: return "[a0.x]";
440 				case 1: return "[a0.y]";
441 				case 2: return "[a0.z]";
442 				case 3: return "[a0.w]";
443 				}
444 			}
445 			else if(rel.type == PARAMETER_TEMP)
446 			{
447 				std::ostringstream buffer;
448 				buffer << rel.index;
449 
450 				switch(rel.swizzle & 0x03)
451 				{
452 				case 0: return "[r" + buffer.str() + ".x]";
453 				case 1: return "[r" + buffer.str() + ".y]";
454 				case 2: return "[r" + buffer.str() + ".z]";
455 				case 3: return "[r" + buffer.str() + ".w]";
456 				}
457 			}
458 			else if(rel.type == PARAMETER_LOOP)
459 			{
460 				return "[aL]";
461 			}
462 			else if(rel.type == PARAMETER_CONST)
463 			{
464 				std::ostringstream buffer;
465 				buffer << rel.index;
466 
467 				switch(rel.swizzle & 0x03)
468 				{
469 				case 0: return "[c" + buffer.str() + ".x]";
470 				case 1: return "[c" + buffer.str() + ".y]";
471 				case 2: return "[c" + buffer.str() + ".z]";
472 				case 3: return "[c" + buffer.str() + ".w]";
473 				}
474 			}
475 			else ASSERT(false);
476 		}
477 
478 		return "";
479 	}
480 
postModifierString() const481 	std::string Shader::SourceParameter::postModifierString() const
482 	{
483 		if(type == PARAMETER_VOID)
484 		{
485 			return "";
486 		}
487 
488 		switch(modifier)
489 		{
490 		case MODIFIER_NONE:			return "";
491 		case MODIFIER_NEGATE:		return "";
492 		case MODIFIER_BIAS:			return "_bias";
493 		case MODIFIER_BIAS_NEGATE:	return "_bias";
494 		case MODIFIER_SIGN:			return "_bx2";
495 		case MODIFIER_SIGN_NEGATE:	return "_bx2";
496 		case MODIFIER_COMPLEMENT:	return "";
497 		case MODIFIER_X2:			return "_x2";
498 		case MODIFIER_X2_NEGATE:	return "_x2";
499 		case MODIFIER_DZ:			return "_dz";
500 		case MODIFIER_DW:			return "_dw";
501 		case MODIFIER_ABS:			return "_abs";
502 		case MODIFIER_ABS_NEGATE:	return "_abs";
503 		case MODIFIER_NOT:			return "";
504 		default:
505 			ASSERT(false);
506 		}
507 
508 		return "";
509 	}
510 
swizzleString() const511 	std::string Shader::SourceParameter::swizzleString() const
512 	{
513 		return Instruction::swizzleString(type, swizzle);
514 	}
515 
parseOperationToken(unsigned long token,unsigned char majorVersion)516 	void Shader::Instruction::parseOperationToken(unsigned long token, unsigned char majorVersion)
517 	{
518 		if((token & 0xFFFF0000) == 0xFFFF0000 || (token & 0xFFFF0000) == 0xFFFE0000)   // Version token
519 		{
520 			opcode = (Opcode)token;
521 
522 			control = CONTROL_RESERVED0;
523 			predicate = false;
524 			coissue = false;
525 		}
526 		else
527 		{
528 			opcode = (Opcode)(token & 0x0000FFFF);
529 			control = (Control)((token & 0x00FF0000) >> 16);
530 
531 			int size = (token & 0x0F000000) >> 24;
532 
533 			predicate = (token & 0x10000000) != 0x00000000;
534 			coissue = (token & 0x40000000) != 0x00000000;
535 
536 			if(majorVersion < 2)
537 			{
538 				if(size != 0)
539 				{
540 					ASSERT(false);   // Reserved
541 				}
542 			}
543 
544 			if(majorVersion < 2)
545 			{
546 				if(predicate)
547 				{
548 					ASSERT(false);
549 				}
550 			}
551 
552 			if((token & 0x20000000) != 0x00000000)
553 			{
554 				ASSERT(false);   // Reserved
555 			}
556 
557 			if(majorVersion >= 2)
558 			{
559 				if(coissue)
560 				{
561 					ASSERT(false);   // Reserved
562 				}
563 			}
564 
565 			if((token & 0x80000000) != 0x00000000)
566 			{
567 				ASSERT(false);
568 			}
569 		}
570 	}
571 
parseDeclarationToken(unsigned long token)572 	void Shader::Instruction::parseDeclarationToken(unsigned long token)
573 	{
574 		samplerType = (SamplerType)((token & 0x78000000) >> 27);
575 		usage = (Usage)(token & 0x0000001F);
576 		usageIndex = (unsigned char)((token & 0x000F0000) >> 16);
577 	}
578 
parseDestinationToken(const unsigned long * token,unsigned char majorVersion)579 	void Shader::Instruction::parseDestinationToken(const unsigned long *token, unsigned char majorVersion)
580 	{
581 		dst.index = (unsigned short)(token[0] & 0x000007FF);
582 		dst.type = (ParameterType)(((token[0] & 0x00001800) >> 8) | ((token[0] & 0x70000000) >> 28));
583 
584 		// TODO: Check type and index range
585 
586 		bool relative = (token[0] & 0x00002000) != 0x00000000;
587 		dst.rel.type = relative ? PARAMETER_ADDR : PARAMETER_VOID;
588 		dst.rel.swizzle = 0x00;
589 		dst.rel.scale = 1;
590 
591 		if(relative && majorVersion >= 3)
592 		{
593 			dst.rel.type = (ParameterType)(((token[1] & 0x00001800) >> 8) | ((token[1] & 0x70000000) >> 28));
594 			dst.rel.swizzle = (unsigned char)((token[1] & 0x00FF0000) >> 16);
595 		}
596 		else if(relative) ASSERT(false);   // Reserved
597 
598 		if((token[0] & 0x0000C000) != 0x00000000)
599 		{
600 			ASSERT(false);   // Reserved
601 		}
602 
603 		dst.mask = (unsigned char)((token[0] & 0x000F0000) >> 16);
604 		dst.saturate = (token[0] & 0x00100000) != 0;
605 		dst.partialPrecision = (token[0] & 0x00200000) != 0;
606 		dst.centroid = (token[0] & 0x00400000) != 0;
607 		dst.shift = (signed char)((token[0] & 0x0F000000) >> 20) >> 4;
608 
609 		if(majorVersion >= 2)
610 		{
611 			if(dst.shift)
612 			{
613 				ASSERT(false);   // Reserved
614 			}
615 		}
616 
617 		if((token[0] & 0x80000000) != 0x80000000)
618 		{
619 			ASSERT(false);
620 		}
621 	}
622 
parseSourceToken(int i,const unsigned long * token,unsigned char majorVersion)623 	void Shader::Instruction::parseSourceToken(int i, const unsigned long *token, unsigned char majorVersion)
624 	{
625 		// Defaults
626 		src[i].index = 0;
627 		src[i].type = PARAMETER_VOID;
628 		src[i].modifier = MODIFIER_NONE;
629 		src[i].swizzle = 0xE4;
630 		src[i].rel.type = PARAMETER_VOID;
631 		src[i].rel.swizzle = 0x00;
632 		src[i].rel.scale = 1;
633 
634 		switch(opcode)
635 		{
636 		case OPCODE_DEF:
637 			src[0].type = PARAMETER_FLOAT4LITERAL;
638 			src[0].value[i] = *(float*)token;
639 			break;
640 		case OPCODE_DEFB:
641 			src[0].type = PARAMETER_BOOL1LITERAL;
642 			src[0].boolean[0] = *(int*)token;
643 			break;
644 		case OPCODE_DEFI:
645 			src[0].type = PARAMETER_INT4LITERAL;
646 			src[0].integer[i] = *(int*)token;
647 			break;
648 		default:
649 			src[i].index = (unsigned short)(token[0] & 0x000007FF);
650 			src[i].type = (ParameterType)(((token[0] & 0x00001800) >> 8) | ((token[0] & 0x70000000) >> 28));
651 
652 			// FIXME: Check type and index range
653 
654 			bool relative = (token[0] & 0x00002000) != 0x00000000;
655 			src[i].rel.type = relative ? PARAMETER_ADDR : PARAMETER_VOID;
656 
657 			if((token[0] & 0x0000C000) != 0x00000000)
658 			{
659 				if(opcode != OPCODE_DEF &&
660 				   opcode != OPCODE_DEFI &&
661 				   opcode != OPCODE_DEFB)
662 				{
663 					ASSERT(false);
664 				}
665 			}
666 
667 			src[i].swizzle = (unsigned char)((token[0] & 0x00FF0000) >> 16);
668 			src[i].modifier = (Modifier)((token[0] & 0x0F000000) >> 24);
669 
670 			if((token[0] & 0x80000000) != 0x80000000)
671 			{
672 				if(opcode != OPCODE_DEF &&
673 				   opcode != OPCODE_DEFI &&
674 				   opcode != OPCODE_DEFB)
675 				{
676 					ASSERT(false);
677 				}
678 			}
679 
680 			if(relative && majorVersion >= 2)
681 			{
682 				src[i].rel.type = (ParameterType)(((token[1] & 0x00001800) >> 8) | ((token[1] & 0x70000000) >> 28));
683 				src[i].rel.swizzle = (unsigned char)((token[1] & 0x00FF0000) >> 16);
684 			}
685 		}
686 	}
687 
swizzleString(ParameterType type,unsigned char swizzle)688 	std::string Shader::Instruction::swizzleString(ParameterType type, unsigned char swizzle)
689 	{
690 		if(type == PARAMETER_VOID || type == PARAMETER_LABEL || swizzle == 0xE4)
691 		{
692 			return "";
693 		}
694 
695 		int x = (swizzle & 0x03) >> 0;
696 		int y = (swizzle & 0x0C) >> 2;
697 		int z = (swizzle & 0x30) >> 4;
698 		int w = (swizzle & 0xC0) >> 6;
699 
700 		std::string swizzleString = ".";
701 
702 		switch(x)
703 		{
704 		case 0: swizzleString += "x"; break;
705 		case 1: swizzleString += "y"; break;
706 		case 2: swizzleString += "z"; break;
707 		case 3: swizzleString += "w"; break;
708 		}
709 
710 		if(!(x == y && y == z && z == w))
711 		{
712 			switch(y)
713 			{
714 			case 0: swizzleString += "x"; break;
715 			case 1: swizzleString += "y"; break;
716 			case 2: swizzleString += "z"; break;
717 			case 3: swizzleString += "w"; break;
718 			}
719 
720 			if(!(y == z && z == w))
721 			{
722 				switch(z)
723 				{
724 				case 0: swizzleString += "x"; break;
725 				case 1: swizzleString += "y"; break;
726 				case 2: swizzleString += "z"; break;
727 				case 3: swizzleString += "w"; break;
728 				}
729 
730 				if(!(z == w))
731 				{
732 					switch(w)
733 					{
734 					case 0: swizzleString += "x"; break;
735 					case 1: swizzleString += "y"; break;
736 					case 2: swizzleString += "z"; break;
737 					case 3: swizzleString += "w"; break;
738 					}
739 				}
740 			}
741 		}
742 
743 		return swizzleString;
744 	}
745 
operationString(unsigned short version) const746 	std::string Shader::Instruction::operationString(unsigned short version) const
747 	{
748 		switch(opcode)
749 		{
750 		case OPCODE_NULL:			return "null";
751 		case OPCODE_NOP:			return "nop";
752 		case OPCODE_MOV:			return "mov";
753 		case OPCODE_ADD:			return "add";
754 		case OPCODE_IADD:			return "iadd";
755 		case OPCODE_SUB:			return "sub";
756 		case OPCODE_ISUB:			return "isub";
757 		case OPCODE_MAD:			return "mad";
758 		case OPCODE_IMAD:			return "imad";
759 		case OPCODE_MUL:			return "mul";
760 		case OPCODE_IMUL:			return "imul";
761 		case OPCODE_RCPX:			return "rcpx";
762 		case OPCODE_DIV:			return "div";
763 		case OPCODE_IDIV:			return "idiv";
764 		case OPCODE_UDIV:			return "udiv";
765 		case OPCODE_MOD:			return "mod";
766 		case OPCODE_IMOD:			return "imod";
767 		case OPCODE_UMOD:			return "umod";
768 		case OPCODE_SHL:			return "shl";
769 		case OPCODE_ISHR:			return "ishr";
770 		case OPCODE_USHR:			return "ushr";
771 		case OPCODE_RSQX:			return "rsqx";
772 		case OPCODE_SQRT:			return "sqrt";
773 		case OPCODE_RSQ:			return "rsq";
774 		case OPCODE_LEN2:			return "len2";
775 		case OPCODE_LEN3:			return "len3";
776 		case OPCODE_LEN4:			return "len4";
777 		case OPCODE_DIST1:			return "dist1";
778 		case OPCODE_DIST2:			return "dist2";
779 		case OPCODE_DIST3:			return "dist3";
780 		case OPCODE_DIST4:			return "dist4";
781 		case OPCODE_DP3:			return "dp3";
782 		case OPCODE_DP4:			return "dp4";
783 		case OPCODE_DET2:			return "det2";
784 		case OPCODE_DET3:			return "det3";
785 		case OPCODE_DET4:			return "det4";
786 		case OPCODE_MIN:			return "min";
787 		case OPCODE_IMIN:			return "imin";
788 		case OPCODE_UMIN:			return "umin";
789 		case OPCODE_MAX:			return "max";
790 		case OPCODE_IMAX:			return "imax";
791 		case OPCODE_UMAX:			return "umax";
792 		case OPCODE_SLT:			return "slt";
793 		case OPCODE_SGE:			return "sge";
794 		case OPCODE_EXP2X:			return "exp2x";
795 		case OPCODE_LOG2X:			return "log2x";
796 		case OPCODE_LIT:			return "lit";
797 		case OPCODE_ATT:			return "att";
798 		case OPCODE_LRP:			return "lrp";
799 		case OPCODE_STEP:			return "step";
800 		case OPCODE_SMOOTH:			return "smooth";
801 		case OPCODE_FLOATBITSTOINT:	 return "floatBitsToInt";
802 		case OPCODE_FLOATBITSTOUINT: return "floatBitsToUInt";
803 		case OPCODE_INTBITSTOFLOAT:	 return "intBitsToFloat";
804 		case OPCODE_UINTBITSTOFLOAT: return "uintBitsToFloat";
805 		case OPCODE_PACKSNORM2x16:	 return "packSnorm2x16";
806 		case OPCODE_PACKUNORM2x16:	 return "packUnorm2x16";
807 		case OPCODE_PACKHALF2x16:	 return "packHalf2x16";
808 		case OPCODE_UNPACKSNORM2x16: return "unpackSnorm2x16";
809 		case OPCODE_UNPACKUNORM2x16: return "unpackUnorm2x16";
810 		case OPCODE_UNPACKHALF2x16:	 return "unpackHalf2x16";
811 		case OPCODE_FRC:			return "frc";
812 		case OPCODE_M4X4:			return "m4x4";
813 		case OPCODE_M4X3:			return "m4x3";
814 		case OPCODE_M3X4:			return "m3x4";
815 		case OPCODE_M3X3:			return "m3x3";
816 		case OPCODE_M3X2:			return "m3x2";
817 		case OPCODE_CALL:			return "call";
818 		case OPCODE_CALLNZ:			return "callnz";
819 		case OPCODE_LOOP:			return "loop";
820 		case OPCODE_RET:			return "ret";
821 		case OPCODE_ENDLOOP:		return "endloop";
822 		case OPCODE_LABEL:			return "label";
823 		case OPCODE_DCL:			return "dcl";
824 		case OPCODE_POWX:			return "powx";
825 		case OPCODE_CRS:			return "crs";
826 		case OPCODE_SGN:			return "sgn";
827 		case OPCODE_ISGN:			return "isgn";
828 		case OPCODE_ABS:			return "abs";
829 		case OPCODE_IABS:			return "iabs";
830 		case OPCODE_NRM2:			return "nrm2";
831 		case OPCODE_NRM3:			return "nrm3";
832 		case OPCODE_NRM4:			return "nrm4";
833 		case OPCODE_SINCOS:			return "sincos";
834 		case OPCODE_REP:			return "rep";
835 		case OPCODE_ENDREP:			return "endrep";
836 		case OPCODE_IF:				return "if";
837 		case OPCODE_IFC:			return "ifc";
838 		case OPCODE_ELSE:			return "else";
839 		case OPCODE_ENDIF:			return "endif";
840 		case OPCODE_BREAK:			return "break";
841 		case OPCODE_BREAKC:			return "breakc";
842 		case OPCODE_MOVA:			return "mova";
843 		case OPCODE_DEFB:			return "defb";
844 		case OPCODE_DEFI:			return "defi";
845 		case OPCODE_TEXCOORD:		return "texcoord";
846 		case OPCODE_TEXKILL:		return "texkill";
847 		case OPCODE_DISCARD:		return "discard";
848 		case OPCODE_TEX:
849 			if(version < 0x0104)	return "tex";
850 			else					return "texld";
851 		case OPCODE_TEXBEM:			return "texbem";
852 		case OPCODE_TEXBEML:		return "texbeml";
853 		case OPCODE_TEXREG2AR:		return "texreg2ar";
854 		case OPCODE_TEXREG2GB:		return "texreg2gb";
855 		case OPCODE_TEXM3X2PAD:		return "texm3x2pad";
856 		case OPCODE_TEXM3X2TEX:		return "texm3x2tex";
857 		case OPCODE_TEXM3X3PAD:		return "texm3x3pad";
858 		case OPCODE_TEXM3X3TEX:		return "texm3x3tex";
859 		case OPCODE_RESERVED0:		return "reserved0";
860 		case OPCODE_TEXM3X3SPEC:	return "texm3x3spec";
861 		case OPCODE_TEXM3X3VSPEC:	return "texm3x3vspec";
862 		case OPCODE_EXPP:			return "expp";
863 		case OPCODE_LOGP:			return "logp";
864 		case OPCODE_CND:			return "cnd";
865 		case OPCODE_DEF:			return "def";
866 		case OPCODE_TEXREG2RGB:		return "texreg2rgb";
867 		case OPCODE_TEXDP3TEX:		return "texdp3tex";
868 		case OPCODE_TEXM3X2DEPTH:	return "texm3x2depth";
869 		case OPCODE_TEXDP3:			return "texdp3";
870 		case OPCODE_TEXM3X3:		return "texm3x3";
871 		case OPCODE_TEXDEPTH:		return "texdepth";
872 		case OPCODE_CMP0:			return "cmp0";
873 		case OPCODE_ICMP:			return "icmp";
874 		case OPCODE_UCMP:			return "ucmp";
875 		case OPCODE_SELECT:			return "select";
876 		case OPCODE_EXTRACT:		return "extract";
877 		case OPCODE_INSERT:			return "insert";
878 		case OPCODE_BEM:			return "bem";
879 		case OPCODE_DP2ADD:			return "dp2add";
880 		case OPCODE_DFDX:			return "dFdx";
881 		case OPCODE_DFDY:			return "dFdy";
882 		case OPCODE_FWIDTH:			return "fwidth";
883 		case OPCODE_TEXLDD:			return "texldd";
884 		case OPCODE_CMP:			return "cmp";
885 		case OPCODE_TEXLDL:			return "texldl";
886 		case OPCODE_TEXOFFSET:		return "texoffset";
887 		case OPCODE_TEXLDLOFFSET:	return "texldloffset";
888 		case OPCODE_TEXELFETCH:		return "texelfetch";
889 		case OPCODE_TEXELFETCHOFFSET: return "texelfetchoffset";
890 		case OPCODE_TEXGRAD:		return "texgrad";
891 		case OPCODE_TEXGRADOFFSET:	return "texgradoffset";
892 		case OPCODE_BREAKP:			return "breakp";
893 		case OPCODE_TEXSIZE:        return "texsize";
894 		case OPCODE_PHASE:			return "phase";
895 		case OPCODE_COMMENT:		return "comment";
896 		case OPCODE_END:			return "end";
897 		case OPCODE_PS_1_0:			return "ps_1_0";
898 		case OPCODE_PS_1_1:			return "ps_1_1";
899 		case OPCODE_PS_1_2:			return "ps_1_2";
900 		case OPCODE_PS_1_3:			return "ps_1_3";
901 		case OPCODE_PS_1_4:			return "ps_1_4";
902 		case OPCODE_PS_2_0:			return "ps_2_0";
903 		case OPCODE_PS_2_x:			return "ps_2_x";
904 		case OPCODE_PS_3_0:			return "ps_3_0";
905 		case OPCODE_VS_1_0:			return "vs_1_0";
906 		case OPCODE_VS_1_1:			return "vs_1_1";
907 		case OPCODE_VS_2_0:			return "vs_2_0";
908 		case OPCODE_VS_2_x:			return "vs_2_x";
909 		case OPCODE_VS_2_sw:		return "vs_2_sw";
910 		case OPCODE_VS_3_0:			return "vs_3_0";
911 		case OPCODE_VS_3_sw:		return "vs_3_sw";
912 		case OPCODE_WHILE:          return "while";
913 		case OPCODE_ENDWHILE:       return "endwhile";
914 		case OPCODE_COS:            return "cos";
915 		case OPCODE_SIN:            return "sin";
916 		case OPCODE_TAN:            return "tan";
917 		case OPCODE_ACOS:           return "acos";
918 		case OPCODE_ASIN:           return "asin";
919 		case OPCODE_ATAN:           return "atan";
920 		case OPCODE_ATAN2:          return "atan2";
921 		case OPCODE_COSH:           return "cosh";
922 		case OPCODE_SINH:           return "sinh";
923 		case OPCODE_TANH:           return "tanh";
924 		case OPCODE_ACOSH:          return "acosh";
925 		case OPCODE_ASINH:          return "asinh";
926 		case OPCODE_ATANH:          return "atanh";
927 		case OPCODE_DP1:            return "dp1";
928 		case OPCODE_DP2:            return "dp2";
929 		case OPCODE_TRUNC:          return "trunc";
930 		case OPCODE_FLOOR:          return "floor";
931 		case OPCODE_ROUND:          return "round";
932 		case OPCODE_ROUNDEVEN:      return "roundEven";
933 		case OPCODE_CEIL:           return "ceil";
934 		case OPCODE_EXP2:           return "exp2";
935 		case OPCODE_LOG2:           return "log2";
936 		case OPCODE_EXP:            return "exp";
937 		case OPCODE_LOG:            return "log";
938 		case OPCODE_POW:            return "pow";
939 		case OPCODE_F2B:            return "f2b";
940 		case OPCODE_B2F:            return "b2f";
941 		case OPCODE_F2I:            return "f2i";
942 		case OPCODE_I2F:            return "i2f";
943 		case OPCODE_F2U:            return "f2u";
944 		case OPCODE_U2F:            return "u2f";
945 		case OPCODE_B2I:            return "b2i";
946 		case OPCODE_I2B:            return "i2b";
947 		case OPCODE_ALL:            return "all";
948 		case OPCODE_ANY:            return "any";
949 		case OPCODE_NEG:            return "neg";
950 		case OPCODE_INEG:           return "ineg";
951 		case OPCODE_ISNAN:          return "isnan";
952 		case OPCODE_ISINF:          return "isinf";
953 		case OPCODE_NOT:            return "not";
954 		case OPCODE_OR:             return "or";
955 		case OPCODE_XOR:            return "xor";
956 		case OPCODE_AND:            return "and";
957 		case OPCODE_EQ:             return "eq";
958 		case OPCODE_NE:             return "neq";
959 		case OPCODE_FORWARD1:       return "forward1";
960 		case OPCODE_FORWARD2:       return "forward2";
961 		case OPCODE_FORWARD3:       return "forward3";
962 		case OPCODE_FORWARD4:       return "forward4";
963 		case OPCODE_REFLECT1:       return "reflect1";
964 		case OPCODE_REFLECT2:       return "reflect2";
965 		case OPCODE_REFLECT3:       return "reflect3";
966 		case OPCODE_REFLECT4:       return "reflect4";
967 		case OPCODE_REFRACT1:       return "refract1";
968 		case OPCODE_REFRACT2:       return "refract2";
969 		case OPCODE_REFRACT3:       return "refract3";
970 		case OPCODE_REFRACT4:       return "refract4";
971 		case OPCODE_LEAVE:          return "leave";
972 		case OPCODE_CONTINUE:       return "continue";
973 		case OPCODE_TEST:           return "test";
974 		case OPCODE_SWITCH:         return "switch";
975 		case OPCODE_ENDSWITCH:      return "endswitch";
976 		default:
977 			ASSERT(false);
978 		}
979 
980 		return "<unknown>";
981 	}
982 
controlString() const983 	std::string Shader::Instruction::controlString() const
984 	{
985 		if(opcode != OPCODE_LOOP && opcode != OPCODE_BREAKC && opcode != OPCODE_IFC && opcode != OPCODE_CMP)
986 		{
987 			if(project) return "p";
988 
989 			if(bias) return "b";
990 
991 			// FIXME: LOD
992 		}
993 
994 		switch(control)
995 		{
996 		case 1: return "_gt";
997 		case 2: return "_eq";
998 		case 3: return "_ge";
999 		case 4: return "_lt";
1000 		case 5: return "_ne";
1001 		case 6: return "_le";
1002 		default:
1003 			return "";
1004 		//	ASSERT(false);   // FIXME
1005 		}
1006 	}
1007 
string(ShaderType shaderType,unsigned short version) const1008 	std::string Shader::Parameter::string(ShaderType shaderType, unsigned short version) const
1009 	{
1010 		std::ostringstream buffer;
1011 
1012 		if(type == PARAMETER_FLOAT4LITERAL)
1013 		{
1014 			buffer << '{' << value[0] << ", " << value[1] << ", " << value[2] << ", " << value[3] << '}';
1015 
1016 			return buffer.str();
1017 		}
1018 		else if(type != PARAMETER_RASTOUT && !(type == PARAMETER_ADDR && shaderType == SHADER_VERTEX) && type != PARAMETER_LOOP && type != PARAMETER_PREDICATE && type != PARAMETER_MISCTYPE)
1019 		{
1020 			buffer << index;
1021 
1022 			return typeString(shaderType, version) + buffer.str();
1023 		}
1024 		else
1025 		{
1026 			return typeString(shaderType, version);
1027 		}
1028 	}
1029 
typeString(ShaderType shaderType,unsigned short version) const1030 	std::string Shader::Parameter::typeString(ShaderType shaderType, unsigned short version) const
1031 	{
1032 		switch(type)
1033 		{
1034 		case PARAMETER_TEMP:			return "r";
1035 		case PARAMETER_INPUT:			return "v";
1036 		case PARAMETER_CONST:			return "c";
1037 		case PARAMETER_TEXTURE:
1038 	//	case PARAMETER_ADDR:
1039 			if(shaderType == SHADER_PIXEL)	return "t";
1040 			else							return "a0";
1041 		case PARAMETER_RASTOUT:
1042 			if(index == 0)              return "oPos";
1043 			else if(index == 1)         return "oFog";
1044 			else if(index == 2)         return "oPts";
1045 			else                        ASSERT(false);
1046 		case PARAMETER_ATTROUT:			return "oD";
1047 		case PARAMETER_TEXCRDOUT:
1048 	//	case PARAMETER_OUTPUT:			return "";
1049 			if(version < 0x0300)		return "oT";
1050 			else						return "o";
1051 		case PARAMETER_CONSTINT:		return "i";
1052 		case PARAMETER_COLOROUT:		return "oC";
1053 		case PARAMETER_DEPTHOUT:		return "oDepth";
1054 		case PARAMETER_SAMPLER:			return "s";
1055 	//	case PARAMETER_CONST2:			return "";
1056 	//	case PARAMETER_CONST3:			return "";
1057 	//	case PARAMETER_CONST4:			return "";
1058 		case PARAMETER_CONSTBOOL:		return "b";
1059 		case PARAMETER_LOOP:			return "aL";
1060 	//	case PARAMETER_TEMPFLOAT16:		return "";
1061 		case PARAMETER_MISCTYPE:
1062 			switch(index)
1063 			{
1064 			case VPosIndex:				return "vPos";
1065 			case VFaceIndex:			return "vFace";
1066 			case InstanceIDIndex:		return "iID";
1067 			case VertexIDIndex:			return "vID";
1068 			default: ASSERT(false);
1069 			}
1070 		case PARAMETER_LABEL:			return "l";
1071 		case PARAMETER_PREDICATE:		return "p0";
1072 		case PARAMETER_FLOAT4LITERAL:	return "";
1073 		case PARAMETER_BOOL1LITERAL:	return "";
1074 		case PARAMETER_INT4LITERAL:		return "";
1075 	//	case PARAMETER_VOID:			return "";
1076 		default:
1077 			ASSERT(false);
1078 		}
1079 
1080 		return "";
1081 	}
1082 
isBranch() const1083 	bool Shader::Instruction::isBranch() const
1084 	{
1085 		return opcode == OPCODE_IF || opcode == OPCODE_IFC;
1086 	}
1087 
isCall() const1088 	bool Shader::Instruction::isCall() const
1089 	{
1090 		return opcode == OPCODE_CALL || opcode == OPCODE_CALLNZ;
1091 	}
1092 
isBreak() const1093 	bool Shader::Instruction::isBreak() const
1094 	{
1095 		return opcode == OPCODE_BREAK || opcode == OPCODE_BREAKC || opcode == OPCODE_BREAKP;
1096 	}
1097 
isLoop() const1098 	bool Shader::Instruction::isLoop() const
1099 	{
1100 		return opcode == OPCODE_LOOP || opcode == OPCODE_REP || opcode == OPCODE_WHILE;
1101 	}
1102 
isEndLoop() const1103 	bool Shader::Instruction::isEndLoop() const
1104 	{
1105 		return opcode == OPCODE_ENDLOOP || opcode == OPCODE_ENDREP || opcode == OPCODE_ENDWHILE;
1106 	}
1107 
isPredicated() const1108 	bool Shader::Instruction::isPredicated() const
1109 	{
1110 		return predicate ||
1111 		       analysisBranch ||
1112 		       analysisBreak ||
1113 		       analysisContinue ||
1114 		       analysisLeave;
1115 	}
1116 
Shader()1117 	Shader::Shader() : serialID(serialCounter++)
1118 	{
1119 		usedSamplers = 0;
1120 	}
1121 
~Shader()1122 	Shader::~Shader()
1123 	{
1124 		for(unsigned int i = 0; i < instruction.size(); i++)
1125 		{
1126 			delete instruction[i];
1127 			instruction[i] = 0;
1128 		}
1129 	}
1130 
parse(const unsigned long * token)1131 	void Shader::parse(const unsigned long *token)
1132 	{
1133 		minorVersion = (unsigned char)(token[0] & 0x000000FF);
1134 		majorVersion = (unsigned char)((token[0] & 0x0000FF00) >> 8);
1135 		shaderType = (ShaderType)((token[0] & 0xFFFF0000) >> 16);
1136 
1137 		int length = 0;
1138 
1139 		if(shaderType == SHADER_VERTEX)
1140 		{
1141 			length = VertexShader::validate(token);
1142 		}
1143 		else if(shaderType == SHADER_PIXEL)
1144 		{
1145 			length = PixelShader::validate(token);
1146 		}
1147 		else ASSERT(false);
1148 
1149 		ASSERT(length != 0);
1150 		instruction.resize(length);
1151 
1152 		for(int i = 0; i < length; i++)
1153 		{
1154 			while((*token & 0x0000FFFF) == 0x0000FFFE)   // Comment token
1155 			{
1156 				int length = (*token & 0x7FFF0000) >> 16;
1157 
1158 				token += length + 1;
1159 			}
1160 
1161 			int tokenCount = size(*token);
1162 
1163 			instruction[i] = new Instruction(token, tokenCount, majorVersion);
1164 
1165 			token += 1 + tokenCount;
1166 		}
1167 	}
1168 
size(unsigned long opcode) const1169 	int Shader::size(unsigned long opcode) const
1170 	{
1171 		return size(opcode, version);
1172 	}
1173 
size(unsigned long opcode,unsigned short version)1174 	int Shader::size(unsigned long opcode, unsigned short version)
1175 	{
1176 		if(version > 0x0300)
1177 		{
1178 			ASSERT(false);
1179 		}
1180 
1181 		static const signed char size[] =
1182 		{
1183 			0,   // NOP = 0
1184 			2,   // MOV
1185 			3,   // ADD
1186 			3,   // SUB
1187 			4,   // MAD
1188 			3,   // MUL
1189 			2,   // RCP
1190 			2,   // RSQ
1191 			3,   // DP3
1192 			3,   // DP4
1193 			3,   // MIN
1194 			3,   // MAX
1195 			3,   // SLT
1196 			3,   // SGE
1197 			2,   // EXP
1198 			2,   // LOG
1199 			2,   // LIT
1200 			3,   // DST
1201 			4,   // LRP
1202 			2,   // FRC
1203 			3,   // M4x4
1204 			3,   // M4x3
1205 			3,   // M3x4
1206 			3,   // M3x3
1207 			3,   // M3x2
1208 			1,   // CALL
1209 			2,   // CALLNZ
1210 			2,   // LOOP
1211 			0,   // RET
1212 			0,   // ENDLOOP
1213 			1,   // LABEL
1214 			2,   // DCL
1215 			3,   // POW
1216 			3,   // CRS
1217 			4,   // SGN
1218 			2,   // ABS
1219 			2,   // NRM
1220 			4,   // SINCOS
1221 			1,   // REP
1222 			0,   // ENDREP
1223 			1,   // IF
1224 			2,   // IFC
1225 			0,   // ELSE
1226 			0,   // ENDIF
1227 			0,   // BREAK
1228 			2,   // BREAKC
1229 			2,   // MOVA
1230 			2,   // DEFB
1231 			5,   // DEFI
1232 			-1,  // 49
1233 			-1,  // 50
1234 			-1,  // 51
1235 			-1,  // 52
1236 			-1,  // 53
1237 			-1,  // 54
1238 			-1,  // 55
1239 			-1,  // 56
1240 			-1,  // 57
1241 			-1,  // 58
1242 			-1,  // 59
1243 			-1,  // 60
1244 			-1,  // 61
1245 			-1,  // 62
1246 			-1,  // 63
1247 			1,   // TEXCOORD = 64
1248 			1,   // TEXKILL
1249 			1,   // TEX
1250 			2,   // TEXBEM
1251 			2,   // TEXBEML
1252 			2,   // TEXREG2AR
1253 			2,   // TEXREG2GB
1254 			2,   // TEXM3x2PAD
1255 			2,   // TEXM3x2TEX
1256 			2,   // TEXM3x3PAD
1257 			2,   // TEXM3x3TEX
1258 			-1,  // RESERVED0
1259 			3,   // TEXM3x3SPEC
1260 			2,   // TEXM3x3VSPEC
1261 			2,   // EXPP
1262 			2,   // LOGP
1263 			4,   // CND
1264 			5,   // DEF
1265 			2,   // TEXREG2RGB
1266 			2,   // TEXDP3TEX
1267 			2,   // TEXM3x2DEPTH
1268 			2,   // TEXDP3
1269 			2,   // TEXM3x3
1270 			1,   // TEXDEPTH
1271 			4,   // CMP
1272 			3,   // BEM
1273 			4,   // DP2ADD
1274 			2,   // DSX
1275 			2,   // DSY
1276 			5,   // TEXLDD
1277 			3,   // SETP
1278 			3,   // TEXLDL
1279 			2,   // BREAKP
1280 			-1,  // 97
1281 			-1,  // 98
1282 			-1,  // 99
1283 			-1,  // 100
1284 			-1,  // 101
1285 			-1,  // 102
1286 			-1,  // 103
1287 			-1,  // 104
1288 			-1,  // 105
1289 			-1,  // 106
1290 			-1,  // 107
1291 			-1,  // 108
1292 			-1,  // 109
1293 			-1,  // 110
1294 			-1,  // 111
1295 			-1,  // 112
1296 		};
1297 
1298 		int length = 0;
1299 
1300 		if((opcode & 0x0000FFFF) == OPCODE_COMMENT)
1301 		{
1302 			return (opcode & 0x7FFF0000) >> 16;
1303 		}
1304 
1305 		if(opcode != OPCODE_PS_1_0 &&
1306 		   opcode != OPCODE_PS_1_1 &&
1307 		   opcode != OPCODE_PS_1_2 &&
1308 		   opcode != OPCODE_PS_1_3 &&
1309 		   opcode != OPCODE_PS_1_4 &&
1310 		   opcode != OPCODE_PS_2_0 &&
1311 		   opcode != OPCODE_PS_2_x &&
1312 		   opcode != OPCODE_PS_3_0 &&
1313 		   opcode != OPCODE_VS_1_0 &&
1314 		   opcode != OPCODE_VS_1_1 &&
1315 		   opcode != OPCODE_VS_2_0 &&
1316 		   opcode != OPCODE_VS_2_x &&
1317 		   opcode != OPCODE_VS_2_sw &&
1318 		   opcode != OPCODE_VS_3_0 &&
1319 		   opcode != OPCODE_VS_3_sw &&
1320 		   opcode != OPCODE_PHASE &&
1321 		   opcode != OPCODE_END)
1322 		{
1323 			if(version >= 0x0200)
1324 			{
1325 				length = (opcode & 0x0F000000) >> 24;
1326 			}
1327 			else
1328 			{
1329 				length = size[opcode & 0x0000FFFF];
1330 			}
1331 		}
1332 
1333 		if(length < 0)
1334 		{
1335 			ASSERT(false);
1336 		}
1337 
1338 		if(version == 0x0104)
1339 		{
1340 			switch(opcode & 0x0000FFFF)
1341 			{
1342 			case OPCODE_TEX:
1343 				length += 1;
1344 				break;
1345 			case OPCODE_TEXCOORD:
1346 				length += 1;
1347 				break;
1348 			default:
1349 				break;
1350 			}
1351 		}
1352 
1353 		return length;
1354 	}
1355 
maskContainsComponent(int mask,int component)1356 	bool Shader::maskContainsComponent(int mask, int component)
1357 	{
1358 		return (mask & (1 << component)) != 0;
1359 	}
1360 
swizzleContainsComponent(int swizzle,int component)1361 	bool Shader::swizzleContainsComponent(int swizzle, int component)
1362 	{
1363 		if((swizzle & 0x03) >> 0 == component) return true;
1364 		if((swizzle & 0x0C) >> 2 == component) return true;
1365 		if((swizzle & 0x30) >> 4 == component) return true;
1366 		if((swizzle & 0xC0) >> 6 == component) return true;
1367 
1368 		return false;
1369 	}
1370 
swizzleContainsComponentMasked(int swizzle,int component,int mask)1371 	bool Shader::swizzleContainsComponentMasked(int swizzle, int component, int mask)
1372 	{
1373 		if(mask & 0x1) if((swizzle & 0x03) >> 0 == component) return true;
1374 		if(mask & 0x2) if((swizzle & 0x0C) >> 2 == component) return true;
1375 		if(mask & 0x4) if((swizzle & 0x30) >> 4 == component) return true;
1376 		if(mask & 0x8) if((swizzle & 0xC0) >> 6 == component) return true;
1377 
1378 		return false;
1379 	}
1380 
containsDynamicBranching() const1381 	bool Shader::containsDynamicBranching() const
1382 	{
1383 		return dynamicBranching;
1384 	}
1385 
containsBreakInstruction() const1386 	bool Shader::containsBreakInstruction() const
1387 	{
1388 		return containsBreak;
1389 	}
1390 
containsContinueInstruction() const1391 	bool Shader::containsContinueInstruction() const
1392 	{
1393 		return containsContinue;
1394 	}
1395 
containsLeaveInstruction() const1396 	bool Shader::containsLeaveInstruction() const
1397 	{
1398 		return containsLeave;
1399 	}
1400 
containsDefineInstruction() const1401 	bool Shader::containsDefineInstruction() const
1402 	{
1403 		return containsDefine;
1404 	}
1405 
usesSampler(int index) const1406 	bool Shader::usesSampler(int index) const
1407 	{
1408 		return (usedSamplers & (1 << index)) != 0;
1409 	}
1410 
getSerialID() const1411 	int Shader::getSerialID() const
1412 	{
1413 		return serialID;
1414 	}
1415 
getLength() const1416 	size_t Shader::getLength() const
1417 	{
1418 		return instruction.size();
1419 	}
1420 
getShaderType() const1421 	Shader::ShaderType Shader::getShaderType() const
1422 	{
1423 		return shaderType;
1424 	}
1425 
getVersion() const1426 	unsigned short Shader::getVersion() const
1427 	{
1428 		return version;
1429 	}
1430 
print(const char * fileName,...) const1431 	void Shader::print(const char *fileName, ...) const
1432 	{
1433 		char fullName[1024 + 1];
1434 
1435 		va_list vararg;
1436 		va_start(vararg, fileName);
1437 		vsnprintf(fullName, 1024, fileName, vararg);
1438 		va_end(vararg);
1439 
1440 		std::ofstream file(fullName, std::ofstream::out);
1441 
1442 		for(unsigned int i = 0; i < instruction.size(); i++)
1443 		{
1444 			file << instruction[i]->string(shaderType, version) << std::endl;
1445 		}
1446 	}
1447 
printInstruction(int index,const char * fileName) const1448 	void Shader::printInstruction(int index, const char *fileName) const
1449 	{
1450 		std::ofstream file(fileName, std::ofstream::out | std::ofstream::app);
1451 
1452 		file << instruction[index]->string(shaderType, version) << std::endl;
1453 	}
1454 
append(Instruction * instruction)1455 	void Shader::append(Instruction *instruction)
1456 	{
1457 		this->instruction.push_back(instruction);
1458 	}
1459 
declareSampler(int i)1460 	void Shader::declareSampler(int i)
1461 	{
1462 		usedSamplers |= 1 << i;
1463 	}
1464 
getInstruction(size_t i) const1465 	const Shader::Instruction *Shader::getInstruction(size_t i) const
1466 	{
1467 		ASSERT(i < instruction.size());
1468 
1469 		return instruction[i];
1470 	}
1471 
optimize()1472 	void Shader::optimize()
1473 	{
1474 		optimizeLeave();
1475 		optimizeCall();
1476 		removeNull();
1477 	}
1478 
optimizeLeave()1479 	void Shader::optimizeLeave()
1480 	{
1481 		// A return (leave) right before the end of a function or the shader can be removed
1482 		for(unsigned int i = 0; i < instruction.size(); i++)
1483 		{
1484 			if(instruction[i]->opcode == OPCODE_LEAVE)
1485 			{
1486 				if(i == instruction.size() - 1 || instruction[i + 1]->opcode == OPCODE_RET)
1487 				{
1488 					instruction[i]->opcode = OPCODE_NULL;
1489 				}
1490 			}
1491 		}
1492 	}
1493 
optimizeCall()1494 	void Shader::optimizeCall()
1495 	{
1496 		// Eliminate uncalled functions
1497 		std::set<int> calledFunctions;
1498 		bool rescan = true;
1499 
1500 		while(rescan)
1501 		{
1502 			calledFunctions.clear();
1503 			rescan = false;
1504 
1505 			for(unsigned int i = 0; i < instruction.size(); i++)
1506 			{
1507 				if(instruction[i]->isCall())
1508 				{
1509 					calledFunctions.insert(instruction[i]->dst.label);
1510 				}
1511 			}
1512 
1513 			if(!calledFunctions.empty())
1514 			{
1515 				for(unsigned int i = 0; i < instruction.size(); i++)
1516 				{
1517 					if(instruction[i]->opcode == OPCODE_LABEL)
1518 					{
1519 						if(calledFunctions.find(instruction[i]->dst.label) == calledFunctions.end())
1520 						{
1521 							for( ; i < instruction.size(); i++)
1522 							{
1523 								Opcode oldOpcode = instruction[i]->opcode;
1524 								instruction[i]->opcode = OPCODE_NULL;
1525 
1526 								if(oldOpcode == OPCODE_RET)
1527 								{
1528 									rescan = true;
1529 									break;
1530 								}
1531 							}
1532 						}
1533 					}
1534 				}
1535 			}
1536 		}
1537 
1538 		// Optimize the entry call
1539 		if(instruction.size() >= 2 && instruction[0]->opcode == OPCODE_CALL && instruction[1]->opcode == OPCODE_RET)
1540 		{
1541 			if(calledFunctions.size() == 1)
1542 			{
1543 				instruction[0]->opcode = OPCODE_NULL;
1544 				instruction[1]->opcode = OPCODE_NULL;
1545 
1546 				for(size_t i = 2; i < instruction.size(); i++)
1547 				{
1548 					if(instruction[i]->opcode == OPCODE_LABEL || instruction[i]->opcode == OPCODE_RET)
1549 					{
1550 						instruction[i]->opcode = OPCODE_NULL;
1551 					}
1552 				}
1553 			}
1554 		}
1555 	}
1556 
removeNull()1557 	void Shader::removeNull()
1558 	{
1559 		size_t size = 0;
1560 		for(size_t i = 0; i < instruction.size(); i++)
1561 		{
1562 			if(instruction[i]->opcode != OPCODE_NULL)
1563 			{
1564 				instruction[size] = instruction[i];
1565 				size++;
1566 			}
1567 			else
1568 			{
1569 				delete instruction[i];
1570 			}
1571 		}
1572 
1573 		instruction.resize(size);
1574 	}
1575 
analyzeDirtyConstants()1576 	void Shader::analyzeDirtyConstants()
1577 	{
1578 		dirtyConstantsF = 0;
1579 		dirtyConstantsI = 0;
1580 		dirtyConstantsB = 0;
1581 
1582 		for(unsigned int i = 0; i < instruction.size(); i++)
1583 		{
1584 			switch(instruction[i]->opcode)
1585 			{
1586 			case OPCODE_DEF:
1587 				if(instruction[i]->dst.index + 1 > dirtyConstantsF)
1588 				{
1589 					dirtyConstantsF = instruction[i]->dst.index + 1;
1590 				}
1591 				break;
1592 			case OPCODE_DEFI:
1593 				if(instruction[i]->dst.index + 1 > dirtyConstantsI)
1594 				{
1595 					dirtyConstantsI = instruction[i]->dst.index + 1;
1596 				}
1597 				break;
1598 			case OPCODE_DEFB:
1599 				if(instruction[i]->dst.index + 1 > dirtyConstantsB)
1600 				{
1601 					dirtyConstantsB = instruction[i]->dst.index + 1;
1602 				}
1603 				break;
1604 			default:
1605 				break;
1606 			}
1607 		}
1608 	}
1609 
analyzeDynamicBranching()1610 	void Shader::analyzeDynamicBranching()
1611 	{
1612 		dynamicBranching = false;
1613 		containsLeave = false;
1614 		containsBreak = false;
1615 		containsContinue = false;
1616 		containsDefine = false;
1617 
1618 		// Determine global presence of branching instructions
1619 		for(unsigned int i = 0; i < instruction.size(); i++)
1620 		{
1621 			switch(instruction[i]->opcode)
1622 			{
1623 			case OPCODE_CALLNZ:
1624 			case OPCODE_IF:
1625 			case OPCODE_IFC:
1626 			case OPCODE_BREAK:
1627 			case OPCODE_BREAKC:
1628 			case OPCODE_CMP:
1629 			case OPCODE_BREAKP:
1630 			case OPCODE_LEAVE:
1631 			case OPCODE_CONTINUE:
1632 				if(instruction[i]->src[0].type != PARAMETER_CONSTBOOL)
1633 				{
1634 					dynamicBranching = true;
1635 				}
1636 
1637 				if(instruction[i]->opcode == OPCODE_LEAVE)
1638 				{
1639 					containsLeave = true;
1640 				}
1641 
1642 				if(instruction[i]->isBreak())
1643 				{
1644 					containsBreak = true;
1645 				}
1646 
1647 				if(instruction[i]->opcode == OPCODE_CONTINUE)
1648 				{
1649 					containsContinue = true;
1650 				}
1651 			case OPCODE_DEF:
1652 			case OPCODE_DEFB:
1653 			case OPCODE_DEFI:
1654 				containsDefine = true;
1655 			default:
1656 				break;
1657 			}
1658 		}
1659 
1660 		// Conservatively determine which instructions are affected by dynamic branching
1661 		int branchDepth = 0;
1662 		int breakDepth = 0;
1663 		int continueDepth = 0;
1664 		bool leaveReturn = false;
1665 		unsigned int functionBegin = 0;
1666 
1667 		for(unsigned int i = 0; i < instruction.size(); i++)
1668 		{
1669 			// If statements
1670 			if(instruction[i]->isBranch())
1671 			{
1672 				branchDepth++;
1673 			}
1674 			else if(instruction[i]->opcode == OPCODE_ENDIF)
1675 			{
1676 				branchDepth--;
1677 			}
1678 
1679 			if(branchDepth > 0)
1680 			{
1681 				instruction[i]->analysisBranch = true;
1682 
1683 				if(instruction[i]->isCall())
1684 				{
1685 					markFunctionAnalysis(instruction[i]->dst.label, ANALYSIS_BRANCH);
1686 				}
1687 			}
1688 
1689 			// Break statemement
1690 			if(instruction[i]->isBreak())
1691 			{
1692 				breakDepth++;
1693 			}
1694 
1695 			if(breakDepth > 0)
1696 			{
1697 				if(instruction[i]->isLoop() || instruction[i]->opcode == OPCODE_SWITCH)   // Nested loop or switch, don't make the end of it disable the break execution mask
1698 				{
1699 					breakDepth++;
1700 				}
1701 				else if(instruction[i]->isEndLoop() || instruction[i]->opcode == OPCODE_ENDSWITCH)
1702 				{
1703 					breakDepth--;
1704 				}
1705 
1706 				instruction[i]->analysisBreak = true;
1707 
1708 				if(instruction[i]->isCall())
1709 				{
1710 					markFunctionAnalysis(instruction[i]->dst.label, ANALYSIS_BRANCH);
1711 				}
1712 			}
1713 
1714 			// Continue statement
1715 			if(instruction[i]->opcode == OPCODE_CONTINUE)
1716 			{
1717 				continueDepth++;
1718 			}
1719 
1720 			if(continueDepth > 0)
1721 			{
1722 				if(instruction[i]->isLoop() || instruction[i]->opcode == OPCODE_SWITCH)   // Nested loop or switch, don't make the end of it disable the break execution mask
1723 				{
1724 					continueDepth++;
1725 				}
1726 				else if(instruction[i]->isEndLoop() || instruction[i]->opcode == OPCODE_ENDSWITCH)
1727 				{
1728 					continueDepth--;
1729 				}
1730 
1731 				instruction[i]->analysisContinue = true;
1732 
1733 				if(instruction[i]->isCall())
1734 				{
1735 					markFunctionAnalysis(instruction[i]->dst.label, ANALYSIS_CONTINUE);
1736 				}
1737 			}
1738 
1739 			// Return (leave) statement
1740 			if(instruction[i]->opcode == OPCODE_LEAVE)
1741 			{
1742 				leaveReturn = true;
1743 
1744 				// Mark loop body instructions prior to the return statement
1745 				for(unsigned int l = functionBegin; l < i; l++)
1746 				{
1747 					if(instruction[l]->isLoop())
1748 					{
1749 						for(unsigned int r = l + 1; r < i; r++)
1750 						{
1751 							instruction[r]->analysisLeave = true;
1752 						}
1753 
1754 						break;
1755 					}
1756 				}
1757 			}
1758 			else if(instruction[i]->opcode == OPCODE_RET)   // End of the function
1759 			{
1760 				leaveReturn = false;
1761 			}
1762 			else if(instruction[i]->opcode == OPCODE_LABEL)
1763 			{
1764 				functionBegin = i;
1765 			}
1766 
1767 			if(leaveReturn)
1768 			{
1769 				instruction[i]->analysisLeave = true;
1770 
1771 				if(instruction[i]->isCall())
1772 				{
1773 					markFunctionAnalysis(instruction[i]->dst.label, ANALYSIS_LEAVE);
1774 				}
1775 			}
1776 		}
1777 	}
1778 
markFunctionAnalysis(unsigned int functionLabel,Analysis flag)1779 	void Shader::markFunctionAnalysis(unsigned int functionLabel, Analysis flag)
1780 	{
1781 		bool marker = false;
1782 		for(unsigned int i = 0; i < instruction.size(); i++)
1783 		{
1784 			if(!marker)
1785 			{
1786 				if(instruction[i]->opcode == OPCODE_LABEL && instruction[i]->dst.label == functionLabel)
1787 				{
1788 					marker = true;
1789 				}
1790 			}
1791 			else
1792 			{
1793 				if(instruction[i]->opcode == OPCODE_RET)
1794 				{
1795 					break;
1796 				}
1797 				else if(instruction[i]->isCall())
1798 				{
1799 					markFunctionAnalysis(instruction[i]->dst.label, flag);
1800 				}
1801 
1802 				instruction[i]->analysis |= flag;
1803 			}
1804 		}
1805 	}
1806 
analyzeSamplers()1807 	void Shader::analyzeSamplers()
1808 	{
1809 		for(unsigned int i = 0; i < instruction.size(); i++)
1810 		{
1811 			switch(instruction[i]->opcode)
1812 			{
1813 			case OPCODE_TEX:
1814 			case OPCODE_TEXBEM:
1815 			case OPCODE_TEXBEML:
1816 			case OPCODE_TEXREG2AR:
1817 			case OPCODE_TEXREG2GB:
1818 			case OPCODE_TEXM3X2TEX:
1819 			case OPCODE_TEXM3X3TEX:
1820 			case OPCODE_TEXM3X3SPEC:
1821 			case OPCODE_TEXM3X3VSPEC:
1822 			case OPCODE_TEXREG2RGB:
1823 			case OPCODE_TEXDP3TEX:
1824 			case OPCODE_TEXM3X2DEPTH:
1825 			case OPCODE_TEXLDD:
1826 			case OPCODE_TEXLDL:
1827 			case OPCODE_TEXOFFSET:
1828 			case OPCODE_TEXLDLOFFSET:
1829 			case OPCODE_TEXELFETCH:
1830 			case OPCODE_TEXELFETCHOFFSET:
1831 			case OPCODE_TEXGRAD:
1832 			case OPCODE_TEXGRADOFFSET:
1833 				{
1834 					Parameter &dst = instruction[i]->dst;
1835 					Parameter &src1 = instruction[i]->src[1];
1836 
1837 					if(majorVersion >= 2)
1838 					{
1839 						usedSamplers |= 1 << src1.index;
1840 					}
1841 					else
1842 					{
1843 						usedSamplers |= 1 << dst.index;
1844 					}
1845 				}
1846 				break;
1847 			default:
1848 				break;
1849 			}
1850 		}
1851 	}
1852 
1853 	// Assigns a unique index to each call instruction, on a per label basis.
1854 	// This is used to know what basic block to return to.
analyzeCallSites()1855 	void Shader::analyzeCallSites()
1856 	{
1857 		int callSiteIndex[2048] = {0};
1858 
1859 		for(unsigned int i = 0; i < instruction.size(); i++)
1860 		{
1861 			if(instruction[i]->opcode == OPCODE_CALL || instruction[i]->opcode == OPCODE_CALLNZ)
1862 			{
1863 				int label = instruction[i]->dst.label;
1864 
1865 				instruction[i]->dst.callSite = callSiteIndex[label]++;
1866 			}
1867 		}
1868 	}
1869 
analyzeDynamicIndexing()1870 	void Shader::analyzeDynamicIndexing()
1871 	{
1872 		dynamicallyIndexedTemporaries = false;
1873 		dynamicallyIndexedInput = false;
1874 		dynamicallyIndexedOutput = false;
1875 
1876 		for(unsigned int i = 0; i < instruction.size(); i++)
1877 		{
1878 			if(instruction[i]->dst.rel.type == PARAMETER_ADDR ||
1879 			   instruction[i]->dst.rel.type == PARAMETER_LOOP ||
1880 			   instruction[i]->dst.rel.type == PARAMETER_TEMP ||
1881 			   instruction[i]->dst.rel.type == PARAMETER_CONST)
1882 			{
1883 				switch(instruction[i]->dst.type)
1884 				{
1885 				case PARAMETER_TEMP:   dynamicallyIndexedTemporaries = true; break;
1886 				case PARAMETER_INPUT:  dynamicallyIndexedInput = true;       break;
1887 				case PARAMETER_OUTPUT: dynamicallyIndexedOutput = true;      break;
1888 				default: break;
1889 				}
1890 			}
1891 
1892 			for(int j = 0; j < 3; j++)
1893 			{
1894 				if(instruction[i]->src[j].rel.type == PARAMETER_ADDR ||
1895 				   instruction[i]->src[j].rel.type == PARAMETER_LOOP ||
1896 				   instruction[i]->src[j].rel.type == PARAMETER_TEMP ||
1897 				   instruction[i]->src[j].rel.type == PARAMETER_CONST)
1898 				{
1899 					switch(instruction[i]->src[j].type)
1900 					{
1901 					case PARAMETER_TEMP:   dynamicallyIndexedTemporaries = true; break;
1902 					case PARAMETER_INPUT:  dynamicallyIndexedInput = true;       break;
1903 					case PARAMETER_OUTPUT: dynamicallyIndexedOutput = true;      break;
1904 					default: break;
1905 					}
1906 				}
1907 			}
1908 		}
1909 	}
1910 }
1911