• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2016 The SwiftShader Authors. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //    http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "Shader.hpp"
16 
17 #include "VertexShader.hpp"
18 #include "PixelShader.hpp"
19 #include "Common/Math.hpp"
20 #include "Common/Debug.hpp"
21 
22 #include <set>
23 #include <fstream>
24 #include <sstream>
25 #include <stdarg.h>
26 
27 namespace sw
28 {
29 	volatile int Shader::serialCounter = 1;
30 
OPCODE_DP(int i)31 	Shader::Opcode Shader::OPCODE_DP(int i)
32 	{
33 		switch(i)
34 		{
35 		default: ASSERT(false);
36 		case 1: return OPCODE_DP1;
37 		case 2: return OPCODE_DP2;
38 		case 3: return OPCODE_DP3;
39 		case 4: return OPCODE_DP4;
40 		}
41 	}
42 
OPCODE_LEN(int i)43 	Shader::Opcode Shader::OPCODE_LEN(int i)
44 	{
45 		switch(i)
46 		{
47 		default: ASSERT(false);
48 		case 1: return OPCODE_ABS;
49 		case 2: return OPCODE_LEN2;
50 		case 3: return OPCODE_LEN3;
51 		case 4: return OPCODE_LEN4;
52 		}
53 	}
54 
OPCODE_DIST(int i)55 	Shader::Opcode Shader::OPCODE_DIST(int i)
56 	{
57 		switch(i)
58 		{
59 		default: ASSERT(false);
60 		case 1: return OPCODE_DIST1;
61 		case 2: return OPCODE_DIST2;
62 		case 3: return OPCODE_DIST3;
63 		case 4: return OPCODE_DIST4;
64 		}
65 	}
66 
OPCODE_NRM(int i)67 	Shader::Opcode Shader::OPCODE_NRM(int i)
68 	{
69 		switch(i)
70 		{
71 		default: ASSERT(false);
72 		case 1: return OPCODE_SGN;
73 		case 2: return OPCODE_NRM2;
74 		case 3: return OPCODE_NRM3;
75 		case 4: return OPCODE_NRM4;
76 		}
77 	}
78 
OPCODE_FORWARD(int i)79 	Shader::Opcode Shader::OPCODE_FORWARD(int i)
80 	{
81 		switch(i)
82 		{
83 		default: ASSERT(false);
84 		case 1: return OPCODE_FORWARD1;
85 		case 2: return OPCODE_FORWARD2;
86 		case 3: return OPCODE_FORWARD3;
87 		case 4: return OPCODE_FORWARD4;
88 		}
89 	}
90 
OPCODE_REFLECT(int i)91 	Shader::Opcode Shader::OPCODE_REFLECT(int i)
92 	{
93 		switch(i)
94 		{
95 		default: ASSERT(false);
96 		case 1: return OPCODE_REFLECT1;
97 		case 2: return OPCODE_REFLECT2;
98 		case 3: return OPCODE_REFLECT3;
99 		case 4: return OPCODE_REFLECT4;
100 		}
101 	}
102 
OPCODE_REFRACT(int i)103 	Shader::Opcode Shader::OPCODE_REFRACT(int i)
104 	{
105 		switch(i)
106 		{
107 		default: ASSERT(false);
108 		case 1: return OPCODE_REFRACT1;
109 		case 2: return OPCODE_REFRACT2;
110 		case 3: return OPCODE_REFRACT3;
111 		case 4: return OPCODE_REFRACT4;
112 		}
113 	}
114 
Instruction(Opcode opcode)115 	Shader::Instruction::Instruction(Opcode opcode) : opcode(opcode), analysis(0)
116 	{
117 		control = CONTROL_RESERVED0;
118 
119 		predicate = false;
120 		predicateNot = false;
121 		predicateSwizzle = 0xE4;
122 
123 		coissue = false;
124 		samplerType = SAMPLER_UNKNOWN;
125 		usage = USAGE_POSITION;
126 		usageIndex = 0;
127 	}
128 
Instruction(const unsigned long * token,int size,unsigned char majorVersion)129 	Shader::Instruction::Instruction(const unsigned long *token, int size, unsigned char majorVersion) : analysis(0)
130 	{
131 		parseOperationToken(*token++, majorVersion);
132 
133 		samplerType = SAMPLER_UNKNOWN;
134 		usage = USAGE_POSITION;
135 		usageIndex = 0;
136 
137 		if(opcode == OPCODE_IF ||
138 		   opcode == OPCODE_IFC ||
139 		   opcode == OPCODE_LOOP ||
140 		   opcode == OPCODE_REP ||
141 		   opcode == OPCODE_BREAKC ||
142 		   opcode == OPCODE_BREAKP)   // No destination operand
143 		{
144 			if(size > 0) parseSourceToken(0, token++, majorVersion);
145 			if(size > 1) parseSourceToken(1, token++, majorVersion);
146 			if(size > 2) parseSourceToken(2, token++, majorVersion);
147 			if(size > 3) ASSERT(false);
148 		}
149 		else if(opcode == OPCODE_DCL)
150 		{
151 			parseDeclarationToken(*token++);
152 			parseDestinationToken(token++, majorVersion);
153 		}
154 		else
155 		{
156 			if(size > 0)
157 			{
158 				parseDestinationToken(token, majorVersion);
159 
160 				if(dst.rel.type != PARAMETER_VOID && majorVersion >= 3)
161 				{
162 					token++;
163 					size--;
164 				}
165 
166 				token++;
167 				size--;
168 			}
169 
170 			if(predicate)
171 			{
172 				ASSERT(size != 0);
173 
174 				predicateNot = (Modifier)((*token & 0x0F000000) >> 24) == MODIFIER_NOT;
175 				predicateSwizzle = (unsigned char)((*token & 0x00FF0000) >> 16);
176 
177 				token++;
178 				size--;
179 			}
180 
181 			for(int i = 0; size > 0; i++)
182 			{
183 				parseSourceToken(i, token, majorVersion);
184 
185 				token++;
186 				size--;
187 
188 				if(src[i].rel.type != PARAMETER_VOID && majorVersion >= 2)
189 				{
190 					token++;
191 					size--;
192 				}
193 			}
194 		}
195 	}
196 
~Instruction()197 	Shader::Instruction::~Instruction()
198 	{
199 	}
200 
string(ShaderType shaderType,unsigned short version) const201 	std::string Shader::Instruction::string(ShaderType shaderType, unsigned short version) const
202 	{
203 		std::string instructionString;
204 
205 		if(opcode != OPCODE_DCL)
206 		{
207 			instructionString += coissue ? "+ " : "";
208 
209 			if(predicate)
210 			{
211 				instructionString += predicateNot ? "(!p0" : "(p0";
212 				instructionString += swizzleString(PARAMETER_PREDICATE, predicateSwizzle);
213 				instructionString += ") ";
214 			}
215 
216 			instructionString += operationString(version) + controlString() + dst.shiftString() + dst.modifierString();
217 
218 			if(dst.type != PARAMETER_VOID)
219 			{
220 				instructionString += " " + dst.string(shaderType, version) +
221 				                           dst.relativeString() +
222 				                           dst.maskString();
223 			}
224 
225 			for(int i = 0; i < 4; i++)
226 			{
227 				if(src[i].type != PARAMETER_VOID)
228 				{
229 					instructionString += (dst.type != PARAMETER_VOID || i > 0) ? ", " : " ";
230 					instructionString += src[i].preModifierString() +
231 										 src[i].string(shaderType, version) +
232 										 src[i].relativeString() +
233 										 src[i].postModifierString() +
234 										 src[i].swizzleString();
235 				}
236 			}
237 		}
238 		else   // DCL
239 		{
240 			instructionString += "dcl";
241 
242 			if(dst.type == PARAMETER_SAMPLER)
243 			{
244 				switch(samplerType)
245 				{
246 				case SAMPLER_UNKNOWN: instructionString += " ";        break;
247 				case SAMPLER_1D:      instructionString += "_1d ";     break;
248 				case SAMPLER_2D:      instructionString += "_2d ";     break;
249 				case SAMPLER_CUBE:    instructionString += "_cube ";   break;
250 				case SAMPLER_VOLUME:  instructionString += "_volume "; break;
251 				default:
252 					ASSERT(false);
253 				}
254 
255 				instructionString += dst.string(shaderType, version);
256 			}
257 			else if(dst.type == PARAMETER_INPUT ||
258 				    dst.type == PARAMETER_OUTPUT ||
259 				    dst.type == PARAMETER_TEXTURE)
260 			{
261 				if(version >= 0x0300)
262 				{
263 					switch(usage)
264 					{
265 					case USAGE_POSITION:     instructionString += "_position";     break;
266 					case USAGE_BLENDWEIGHT:  instructionString += "_blendweight";  break;
267 					case USAGE_BLENDINDICES: instructionString += "_blendindices"; break;
268 					case USAGE_NORMAL:       instructionString += "_normal";       break;
269 					case USAGE_PSIZE:        instructionString += "_psize";        break;
270 					case USAGE_TEXCOORD:     instructionString += "_texcoord";     break;
271 					case USAGE_TANGENT:      instructionString += "_tangent";      break;
272 					case USAGE_BINORMAL:     instructionString += "_binormal";     break;
273 					case USAGE_TESSFACTOR:   instructionString += "_tessfactor";   break;
274 					case USAGE_POSITIONT:    instructionString += "_positiont";    break;
275 					case USAGE_COLOR:        instructionString += "_color";        break;
276 					case USAGE_FOG:          instructionString += "_fog";          break;
277 					case USAGE_DEPTH:        instructionString += "_depth";        break;
278 					case USAGE_SAMPLE:       instructionString += "_sample";       break;
279 					default:
280 						ASSERT(false);
281 					}
282 
283 					if(usageIndex > 0)
284 					{
285 						std::ostringstream buffer;
286 
287 						buffer << (int)usageIndex;
288 
289 						instructionString += buffer.str();
290 					}
291 				}
292 				else ASSERT(dst.type != PARAMETER_OUTPUT);
293 
294 				instructionString += " ";
295 
296 				instructionString += dst.string(shaderType, version);
297 				instructionString += dst.maskString();
298 			}
299 			else if(dst.type == PARAMETER_MISCTYPE)   // vPos and vFace
300 			{
301 				instructionString += " ";
302 
303 				instructionString += dst.string(shaderType, version);
304 			}
305 			else ASSERT(false);
306 		}
307 
308 		return instructionString;
309 	}
310 
modifierString() const311 	std::string Shader::DestinationParameter::modifierString() const
312 	{
313 		if(type == PARAMETER_VOID || type == PARAMETER_LABEL)
314 		{
315 			return "";
316 		}
317 
318 		std::string modifierString;
319 
320 		if(saturate)
321 		{
322 			modifierString += "_sat";
323 		}
324 
325 		if(partialPrecision)
326 		{
327 			modifierString += "_pp";
328 		}
329 
330 		if(centroid)
331 		{
332 			modifierString += "_centroid";
333 		}
334 
335 		return modifierString;
336 	}
337 
shiftString() const338 	std::string Shader::DestinationParameter::shiftString() const
339 	{
340 		if(type == PARAMETER_VOID || type == PARAMETER_LABEL)
341 		{
342 			return "";
343 		}
344 
345 		switch(shift)
346 		{
347 		case 0:		return "";
348 		case 1:		return "_x2";
349 		case 2:		return "_x4";
350 		case 3:		return "_x8";
351 		case -1:	return "_d2";
352 		case -2:	return "_d4";
353 		case -3:	return "_d8";
354 		default:
355 			return "";
356 		//	ASSERT(false);   // FIXME
357 		}
358 	}
359 
maskString() const360 	std::string Shader::DestinationParameter::maskString() const
361 	{
362 		if(type == PARAMETER_VOID || type == PARAMETER_LABEL)
363 		{
364 			return "";
365 		}
366 
367 		switch(mask)
368 		{
369 		case 0x0:	return "";
370 		case 0x1:	return ".x";
371 		case 0x2:	return ".y";
372 		case 0x3:	return ".xy";
373 		case 0x4:	return ".z";
374 		case 0x5:	return ".xz";
375 		case 0x6:	return ".yz";
376 		case 0x7:	return ".xyz";
377 		case 0x8:	return ".w";
378 		case 0x9:	return ".xw";
379 		case 0xA:	return ".yw";
380 		case 0xB:	return ".xyw";
381 		case 0xC:	return ".zw";
382 		case 0xD:	return ".xzw";
383 		case 0xE:	return ".yzw";
384 		case 0xF:	return "";
385 		default:
386 			ASSERT(false);
387 		}
388 
389 		return "";
390 	}
391 
preModifierString() const392 	std::string Shader::SourceParameter::preModifierString() const
393 	{
394 		if(type == PARAMETER_VOID)
395 		{
396 			return "";
397 		}
398 
399 		switch(modifier)
400 		{
401 		case MODIFIER_NONE:			return "";
402 		case MODIFIER_NEGATE:		return "-";
403 		case MODIFIER_BIAS:			return "";
404 		case MODIFIER_BIAS_NEGATE:	return "-";
405 		case MODIFIER_SIGN:			return "";
406 		case MODIFIER_SIGN_NEGATE:	return "-";
407 		case MODIFIER_COMPLEMENT:	return "1-";
408 		case MODIFIER_X2:			return "";
409 		case MODIFIER_X2_NEGATE:	return "-";
410 		case MODIFIER_DZ:			return "";
411 		case MODIFIER_DW:			return "";
412 		case MODIFIER_ABS:			return "";
413 		case MODIFIER_ABS_NEGATE:	return "-";
414 		case MODIFIER_NOT:			return "!";
415 		default:
416 			ASSERT(false);
417 		}
418 
419 		return "";
420 	}
421 
relativeString() const422 	std::string Shader::Parameter::relativeString() const
423 	{
424 		if(type == PARAMETER_CONST || type == PARAMETER_INPUT || type == PARAMETER_OUTPUT || type == PARAMETER_TEMP)
425 		{
426 			if(rel.type == PARAMETER_VOID)
427 			{
428 				return "";
429 			}
430 			else if(rel.type == PARAMETER_ADDR)
431 			{
432 				switch(rel.swizzle & 0x03)
433 				{
434 				case 0: return "[a0.x]";
435 				case 1: return "[a0.y]";
436 				case 2: return "[a0.z]";
437 				case 3: return "[a0.w]";
438 				}
439 			}
440 			else if(rel.type == PARAMETER_TEMP)
441 			{
442 				std::ostringstream buffer;
443 				buffer << rel.index;
444 
445 				switch(rel.swizzle & 0x03)
446 				{
447 				case 0: return "[r" + buffer.str() + ".x]";
448 				case 1: return "[r" + buffer.str() + ".y]";
449 				case 2: return "[r" + buffer.str() + ".z]";
450 				case 3: return "[r" + buffer.str() + ".w]";
451 				}
452 			}
453 			else if(rel.type == PARAMETER_LOOP)
454 			{
455 				return "[aL]";
456 			}
457 			else if(rel.type == PARAMETER_CONST)
458 			{
459 				std::ostringstream buffer;
460 				buffer << rel.index;
461 
462 				switch(rel.swizzle & 0x03)
463 				{
464 				case 0: return "[c" + buffer.str() + ".x]";
465 				case 1: return "[c" + buffer.str() + ".y]";
466 				case 2: return "[c" + buffer.str() + ".z]";
467 				case 3: return "[c" + buffer.str() + ".w]";
468 				}
469 			}
470 			else ASSERT(false);
471 		}
472 
473 		return "";
474 	}
475 
postModifierString() const476 	std::string Shader::SourceParameter::postModifierString() const
477 	{
478 		if(type == PARAMETER_VOID)
479 		{
480 			return "";
481 		}
482 
483 		switch(modifier)
484 		{
485 		case MODIFIER_NONE:			return "";
486 		case MODIFIER_NEGATE:		return "";
487 		case MODIFIER_BIAS:			return "_bias";
488 		case MODIFIER_BIAS_NEGATE:	return "_bias";
489 		case MODIFIER_SIGN:			return "_bx2";
490 		case MODIFIER_SIGN_NEGATE:	return "_bx2";
491 		case MODIFIER_COMPLEMENT:	return "";
492 		case MODIFIER_X2:			return "_x2";
493 		case MODIFIER_X2_NEGATE:	return "_x2";
494 		case MODIFIER_DZ:			return "_dz";
495 		case MODIFIER_DW:			return "_dw";
496 		case MODIFIER_ABS:			return "_abs";
497 		case MODIFIER_ABS_NEGATE:	return "_abs";
498 		case MODIFIER_NOT:			return "";
499 		default:
500 			ASSERT(false);
501 		}
502 
503 		return "";
504 	}
505 
string(ShaderType shaderType,unsigned short version) const506 	std::string Shader::SourceParameter::string(ShaderType shaderType, unsigned short version) const
507 	{
508 		if(type == PARAMETER_CONST && bufferIndex >= 0)
509 		{
510 			std::ostringstream buffer;
511 			buffer << bufferIndex;
512 
513 			std::ostringstream offset;
514 			offset << index;
515 
516 			return "cb" + buffer.str() + "[" + offset.str() + "]";
517 		}
518 		else
519 		{
520 			return Parameter::string(shaderType, version);
521 		}
522 	}
523 
swizzleString() const524 	std::string Shader::SourceParameter::swizzleString() const
525 	{
526 		return Instruction::swizzleString(type, swizzle);
527 	}
528 
parseOperationToken(unsigned long token,unsigned char majorVersion)529 	void Shader::Instruction::parseOperationToken(unsigned long token, unsigned char majorVersion)
530 	{
531 		if((token & 0xFFFF0000) == 0xFFFF0000 || (token & 0xFFFF0000) == 0xFFFE0000)   // Version token
532 		{
533 			opcode = (Opcode)token;
534 
535 			control = CONTROL_RESERVED0;
536 			predicate = false;
537 			coissue = false;
538 		}
539 		else
540 		{
541 			opcode = (Opcode)(token & 0x0000FFFF);
542 			control = (Control)((token & 0x00FF0000) >> 16);
543 
544 			int size = (token & 0x0F000000) >> 24;
545 
546 			predicate = (token & 0x10000000) != 0x00000000;
547 			coissue = (token & 0x40000000) != 0x00000000;
548 
549 			if(majorVersion < 2)
550 			{
551 				if(size != 0)
552 				{
553 					ASSERT(false);   // Reserved
554 				}
555 			}
556 
557 			if(majorVersion < 2)
558 			{
559 				if(predicate)
560 				{
561 					ASSERT(false);
562 				}
563 			}
564 
565 			if((token & 0x20000000) != 0x00000000)
566 			{
567 				ASSERT(false);   // Reserved
568 			}
569 
570 			if(majorVersion >= 2)
571 			{
572 				if(coissue)
573 				{
574 					ASSERT(false);   // Reserved
575 				}
576 			}
577 
578 			if((token & 0x80000000) != 0x00000000)
579 			{
580 				ASSERT(false);
581 			}
582 		}
583 	}
584 
parseDeclarationToken(unsigned long token)585 	void Shader::Instruction::parseDeclarationToken(unsigned long token)
586 	{
587 		samplerType = (SamplerType)((token & 0x78000000) >> 27);
588 		usage = (Usage)(token & 0x0000001F);
589 		usageIndex = (unsigned char)((token & 0x000F0000) >> 16);
590 	}
591 
parseDestinationToken(const unsigned long * token,unsigned char majorVersion)592 	void Shader::Instruction::parseDestinationToken(const unsigned long *token, unsigned char majorVersion)
593 	{
594 		dst.index = (unsigned short)(token[0] & 0x000007FF);
595 		dst.type = (ParameterType)(((token[0] & 0x00001800) >> 8) | ((token[0] & 0x70000000) >> 28));
596 
597 		// TODO: Check type and index range
598 
599 		bool relative = (token[0] & 0x00002000) != 0x00000000;
600 		dst.rel.type = relative ? PARAMETER_ADDR : PARAMETER_VOID;
601 		dst.rel.swizzle = 0x00;
602 		dst.rel.scale = 1;
603 
604 		if(relative && majorVersion >= 3)
605 		{
606 			dst.rel.type = (ParameterType)(((token[1] & 0x00001800) >> 8) | ((token[1] & 0x70000000) >> 28));
607 			dst.rel.swizzle = (unsigned char)((token[1] & 0x00FF0000) >> 16);
608 		}
609 		else if(relative) ASSERT(false);   // Reserved
610 
611 		if((token[0] & 0x0000C000) != 0x00000000)
612 		{
613 			ASSERT(false);   // Reserved
614 		}
615 
616 		dst.mask = (unsigned char)((token[0] & 0x000F0000) >> 16);
617 		dst.saturate = (token[0] & 0x00100000) != 0;
618 		dst.partialPrecision = (token[0] & 0x00200000) != 0;
619 		dst.centroid = (token[0] & 0x00400000) != 0;
620 		dst.shift = (signed char)((token[0] & 0x0F000000) >> 20) >> 4;
621 
622 		if(majorVersion >= 2)
623 		{
624 			if(dst.shift)
625 			{
626 				ASSERT(false);   // Reserved
627 			}
628 		}
629 
630 		if((token[0] & 0x80000000) != 0x80000000)
631 		{
632 			ASSERT(false);
633 		}
634 	}
635 
parseSourceToken(int i,const unsigned long * token,unsigned char majorVersion)636 	void Shader::Instruction::parseSourceToken(int i, const unsigned long *token, unsigned char majorVersion)
637 	{
638 		// Defaults
639 		src[i].index = 0;
640 		src[i].type = PARAMETER_VOID;
641 		src[i].modifier = MODIFIER_NONE;
642 		src[i].swizzle = 0xE4;
643 		src[i].rel.type = PARAMETER_VOID;
644 		src[i].rel.swizzle = 0x00;
645 		src[i].rel.scale = 1;
646 
647 		switch(opcode)
648 		{
649 		case OPCODE_DEF:
650 			src[0].type = PARAMETER_FLOAT4LITERAL;
651 			src[0].value[i] = *(float*)token;
652 			break;
653 		case OPCODE_DEFB:
654 			src[0].type = PARAMETER_BOOL1LITERAL;
655 			src[0].boolean[0] = *(int*)token;
656 			break;
657 		case OPCODE_DEFI:
658 			src[0].type = PARAMETER_INT4LITERAL;
659 			src[0].integer[i] = *(int*)token;
660 			break;
661 		default:
662 			src[i].index = (unsigned short)(token[0] & 0x000007FF);
663 			src[i].type = (ParameterType)(((token[0] & 0x00001800) >> 8) | ((token[0] & 0x70000000) >> 28));
664 
665 			// FIXME: Check type and index range
666 
667 			bool relative = (token[0] & 0x00002000) != 0x00000000;
668 			src[i].rel.type = relative ? PARAMETER_ADDR : PARAMETER_VOID;
669 
670 			if((token[0] & 0x0000C000) != 0x00000000)
671 			{
672 				if(opcode != OPCODE_DEF &&
673 				   opcode != OPCODE_DEFI &&
674 				   opcode != OPCODE_DEFB)
675 				{
676 					ASSERT(false);
677 				}
678 			}
679 
680 			src[i].swizzle = (unsigned char)((token[0] & 0x00FF0000) >> 16);
681 			src[i].modifier = (Modifier)((token[0] & 0x0F000000) >> 24);
682 
683 			if((token[0] & 0x80000000) != 0x80000000)
684 			{
685 				if(opcode != OPCODE_DEF &&
686 				   opcode != OPCODE_DEFI &&
687 				   opcode != OPCODE_DEFB)
688 				{
689 					ASSERT(false);
690 				}
691 			}
692 
693 			if(relative && majorVersion >= 2)
694 			{
695 				src[i].rel.type = (ParameterType)(((token[1] & 0x00001800) >> 8) | ((token[1] & 0x70000000) >> 28));
696 				src[i].rel.swizzle = (unsigned char)((token[1] & 0x00FF0000) >> 16);
697 			}
698 		}
699 	}
700 
swizzleString(ParameterType type,unsigned char swizzle)701 	std::string Shader::Instruction::swizzleString(ParameterType type, unsigned char swizzle)
702 	{
703 		if(type == PARAMETER_VOID || type == PARAMETER_LABEL || swizzle == 0xE4)
704 		{
705 			return "";
706 		}
707 
708 		int x = (swizzle & 0x03) >> 0;
709 		int y = (swizzle & 0x0C) >> 2;
710 		int z = (swizzle & 0x30) >> 4;
711 		int w = (swizzle & 0xC0) >> 6;
712 
713 		std::string swizzleString = ".";
714 
715 		switch(x)
716 		{
717 		case 0: swizzleString += "x"; break;
718 		case 1: swizzleString += "y"; break;
719 		case 2: swizzleString += "z"; break;
720 		case 3: swizzleString += "w"; break;
721 		}
722 
723 		if(!(x == y && y == z && z == w))
724 		{
725 			switch(y)
726 			{
727 			case 0: swizzleString += "x"; break;
728 			case 1: swizzleString += "y"; break;
729 			case 2: swizzleString += "z"; break;
730 			case 3: swizzleString += "w"; break;
731 			}
732 
733 			if(!(y == z && z == w))
734 			{
735 				switch(z)
736 				{
737 				case 0: swizzleString += "x"; break;
738 				case 1: swizzleString += "y"; break;
739 				case 2: swizzleString += "z"; break;
740 				case 3: swizzleString += "w"; break;
741 				}
742 
743 				if(!(z == w))
744 				{
745 					switch(w)
746 					{
747 					case 0: swizzleString += "x"; break;
748 					case 1: swizzleString += "y"; break;
749 					case 2: swizzleString += "z"; break;
750 					case 3: swizzleString += "w"; break;
751 					}
752 				}
753 			}
754 		}
755 
756 		return swizzleString;
757 	}
758 
operationString(unsigned short version) const759 	std::string Shader::Instruction::operationString(unsigned short version) const
760 	{
761 		switch(opcode)
762 		{
763 		case OPCODE_NULL:            return "null";
764 		case OPCODE_NOP:             return "nop";
765 		case OPCODE_MOV:             return "mov";
766 		case OPCODE_ADD:             return "add";
767 		case OPCODE_IADD:            return "iadd";
768 		case OPCODE_SUB:             return "sub";
769 		case OPCODE_ISUB:            return "isub";
770 		case OPCODE_MAD:             return "mad";
771 		case OPCODE_IMAD:            return "imad";
772 		case OPCODE_MUL:             return "mul";
773 		case OPCODE_IMUL:            return "imul";
774 		case OPCODE_RCPX:            return "rcpx";
775 		case OPCODE_DIV:             return "div";
776 		case OPCODE_IDIV:            return "idiv";
777 		case OPCODE_UDIV:            return "udiv";
778 		case OPCODE_MOD:             return "mod";
779 		case OPCODE_IMOD:            return "imod";
780 		case OPCODE_UMOD:            return "umod";
781 		case OPCODE_SHL:             return "shl";
782 		case OPCODE_ISHR:            return "ishr";
783 		case OPCODE_USHR:            return "ushr";
784 		case OPCODE_RSQX:            return "rsqx";
785 		case OPCODE_SQRT:            return "sqrt";
786 		case OPCODE_RSQ:             return "rsq";
787 		case OPCODE_LEN2:            return "len2";
788 		case OPCODE_LEN3:            return "len3";
789 		case OPCODE_LEN4:            return "len4";
790 		case OPCODE_DIST1:           return "dist1";
791 		case OPCODE_DIST2:           return "dist2";
792 		case OPCODE_DIST3:           return "dist3";
793 		case OPCODE_DIST4:           return "dist4";
794 		case OPCODE_DP3:             return "dp3";
795 		case OPCODE_DP4:             return "dp4";
796 		case OPCODE_DET2:            return "det2";
797 		case OPCODE_DET3:            return "det3";
798 		case OPCODE_DET4:            return "det4";
799 		case OPCODE_MIN:             return "min";
800 		case OPCODE_IMIN:            return "imin";
801 		case OPCODE_UMIN:            return "umin";
802 		case OPCODE_MAX:             return "max";
803 		case OPCODE_IMAX:            return "imax";
804 		case OPCODE_UMAX:            return "umax";
805 		case OPCODE_SLT:             return "slt";
806 		case OPCODE_SGE:             return "sge";
807 		case OPCODE_EXP2X:           return "exp2x";
808 		case OPCODE_LOG2X:           return "log2x";
809 		case OPCODE_LIT:             return "lit";
810 		case OPCODE_ATT:             return "att";
811 		case OPCODE_LRP:             return "lrp";
812 		case OPCODE_STEP:            return "step";
813 		case OPCODE_SMOOTH:          return "smooth";
814 		case OPCODE_FLOATBITSTOINT:  return "floatBitsToInt";
815 		case OPCODE_FLOATBITSTOUINT: return "floatBitsToUInt";
816 		case OPCODE_INTBITSTOFLOAT:  return "intBitsToFloat";
817 		case OPCODE_UINTBITSTOFLOAT: return "uintBitsToFloat";
818 		case OPCODE_PACKSNORM2x16:   return "packSnorm2x16";
819 		case OPCODE_PACKUNORM2x16:   return "packUnorm2x16";
820 		case OPCODE_PACKHALF2x16:    return "packHalf2x16";
821 		case OPCODE_UNPACKSNORM2x16: return "unpackSnorm2x16";
822 		case OPCODE_UNPACKUNORM2x16: return "unpackUnorm2x16";
823 		case OPCODE_UNPACKHALF2x16:  return "unpackHalf2x16";
824 		case OPCODE_FRC:             return "frc";
825 		case OPCODE_M4X4:            return "m4x4";
826 		case OPCODE_M4X3:            return "m4x3";
827 		case OPCODE_M3X4:            return "m3x4";
828 		case OPCODE_M3X3:            return "m3x3";
829 		case OPCODE_M3X2:            return "m3x2";
830 		case OPCODE_CALL:            return "call";
831 		case OPCODE_CALLNZ:          return "callnz";
832 		case OPCODE_LOOP:            return "loop";
833 		case OPCODE_RET:             return "ret";
834 		case OPCODE_ENDLOOP:         return "endloop";
835 		case OPCODE_LABEL:           return "label";
836 		case OPCODE_DCL:             return "dcl";
837 		case OPCODE_POWX:            return "powx";
838 		case OPCODE_CRS:             return "crs";
839 		case OPCODE_SGN:             return "sgn";
840 		case OPCODE_ISGN:            return "isgn";
841 		case OPCODE_ABS:             return "abs";
842 		case OPCODE_IABS:            return "iabs";
843 		case OPCODE_NRM2:            return "nrm2";
844 		case OPCODE_NRM3:            return "nrm3";
845 		case OPCODE_NRM4:            return "nrm4";
846 		case OPCODE_SINCOS:          return "sincos";
847 		case OPCODE_REP:             return "rep";
848 		case OPCODE_ENDREP:          return "endrep";
849 		case OPCODE_IF:              return "if";
850 		case OPCODE_IFC:             return "ifc";
851 		case OPCODE_ELSE:            return "else";
852 		case OPCODE_ENDIF:           return "endif";
853 		case OPCODE_BREAK:           return "break";
854 		case OPCODE_BREAKC:          return "breakc";
855 		case OPCODE_MOVA:            return "mova";
856 		case OPCODE_DEFB:            return "defb";
857 		case OPCODE_DEFI:            return "defi";
858 		case OPCODE_TEXCOORD:        return "texcoord";
859 		case OPCODE_TEXKILL:         return "texkill";
860 		case OPCODE_DISCARD:         return "discard";
861 		case OPCODE_TEX:
862 			if(version < 0x0104)     return "tex";
863 			else                     return "texld";
864 		case OPCODE_TEXBEM:          return "texbem";
865 		case OPCODE_TEXBEML:         return "texbeml";
866 		case OPCODE_TEXREG2AR:       return "texreg2ar";
867 		case OPCODE_TEXREG2GB:       return "texreg2gb";
868 		case OPCODE_TEXM3X2PAD:      return "texm3x2pad";
869 		case OPCODE_TEXM3X2TEX:      return "texm3x2tex";
870 		case OPCODE_TEXM3X3PAD:      return "texm3x3pad";
871 		case OPCODE_TEXM3X3TEX:      return "texm3x3tex";
872 		case OPCODE_RESERVED0:       return "reserved0";
873 		case OPCODE_TEXM3X3SPEC:     return "texm3x3spec";
874 		case OPCODE_TEXM3X3VSPEC:    return "texm3x3vspec";
875 		case OPCODE_EXPP:            return "expp";
876 		case OPCODE_LOGP:            return "logp";
877 		case OPCODE_CND:             return "cnd";
878 		case OPCODE_DEF:             return "def";
879 		case OPCODE_TEXREG2RGB:      return "texreg2rgb";
880 		case OPCODE_TEXDP3TEX:       return "texdp3tex";
881 		case OPCODE_TEXM3X2DEPTH:    return "texm3x2depth";
882 		case OPCODE_TEXDP3:          return "texdp3";
883 		case OPCODE_TEXM3X3:         return "texm3x3";
884 		case OPCODE_TEXDEPTH:        return "texdepth";
885 		case OPCODE_CMP0:            return "cmp0";
886 		case OPCODE_ICMP:            return "icmp";
887 		case OPCODE_UCMP:            return "ucmp";
888 		case OPCODE_SELECT:          return "select";
889 		case OPCODE_EXTRACT:         return "extract";
890 		case OPCODE_INSERT:          return "insert";
891 		case OPCODE_BEM:             return "bem";
892 		case OPCODE_DP2ADD:          return "dp2add";
893 		case OPCODE_DFDX:            return "dFdx";
894 		case OPCODE_DFDY:            return "dFdy";
895 		case OPCODE_FWIDTH:          return "fwidth";
896 		case OPCODE_TEXLDD:          return "texldd";
897 		case OPCODE_CMP:             return "cmp";
898 		case OPCODE_TEXLDL:          return "texldl";
899 		case OPCODE_TEXBIAS:         return "texbias";
900 		case OPCODE_TEXOFFSET:       return "texoffset";
901 		case OPCODE_TEXOFFSETBIAS:   return "texoffsetbias";
902 		case OPCODE_TEXLODOFFSET:    return "texlodoffset";
903 		case OPCODE_TEXELFETCH:      return "texelfetch";
904 		case OPCODE_TEXELFETCHOFFSET: return "texelfetchoffset";
905 		case OPCODE_TEXGRAD:         return "texgrad";
906 		case OPCODE_TEXGRADOFFSET:   return "texgradoffset";
907 		case OPCODE_BREAKP:          return "breakp";
908 		case OPCODE_TEXSIZE:         return "texsize";
909 		case OPCODE_PHASE:           return "phase";
910 		case OPCODE_COMMENT:         return "comment";
911 		case OPCODE_END:             return "end";
912 		case OPCODE_PS_1_0:          return "ps_1_0";
913 		case OPCODE_PS_1_1:          return "ps_1_1";
914 		case OPCODE_PS_1_2:          return "ps_1_2";
915 		case OPCODE_PS_1_3:          return "ps_1_3";
916 		case OPCODE_PS_1_4:          return "ps_1_4";
917 		case OPCODE_PS_2_0:          return "ps_2_0";
918 		case OPCODE_PS_2_x:          return "ps_2_x";
919 		case OPCODE_PS_3_0:          return "ps_3_0";
920 		case OPCODE_VS_1_0:          return "vs_1_0";
921 		case OPCODE_VS_1_1:          return "vs_1_1";
922 		case OPCODE_VS_2_0:          return "vs_2_0";
923 		case OPCODE_VS_2_x:          return "vs_2_x";
924 		case OPCODE_VS_2_sw:         return "vs_2_sw";
925 		case OPCODE_VS_3_0:          return "vs_3_0";
926 		case OPCODE_VS_3_sw:         return "vs_3_sw";
927 		case OPCODE_WHILE:           return "while";
928 		case OPCODE_ENDWHILE:        return "endwhile";
929 		case OPCODE_COS:             return "cos";
930 		case OPCODE_SIN:             return "sin";
931 		case OPCODE_TAN:             return "tan";
932 		case OPCODE_ACOS:            return "acos";
933 		case OPCODE_ASIN:            return "asin";
934 		case OPCODE_ATAN:            return "atan";
935 		case OPCODE_ATAN2:           return "atan2";
936 		case OPCODE_COSH:            return "cosh";
937 		case OPCODE_SINH:            return "sinh";
938 		case OPCODE_TANH:            return "tanh";
939 		case OPCODE_ACOSH:           return "acosh";
940 		case OPCODE_ASINH:           return "asinh";
941 		case OPCODE_ATANH:           return "atanh";
942 		case OPCODE_DP1:             return "dp1";
943 		case OPCODE_DP2:             return "dp2";
944 		case OPCODE_TRUNC:           return "trunc";
945 		case OPCODE_FLOOR:           return "floor";
946 		case OPCODE_ROUND:           return "round";
947 		case OPCODE_ROUNDEVEN:       return "roundEven";
948 		case OPCODE_CEIL:            return "ceil";
949 		case OPCODE_EXP2:            return "exp2";
950 		case OPCODE_LOG2:            return "log2";
951 		case OPCODE_EXP:             return "exp";
952 		case OPCODE_LOG:             return "log";
953 		case OPCODE_POW:             return "pow";
954 		case OPCODE_F2B:             return "f2b";
955 		case OPCODE_B2F:             return "b2f";
956 		case OPCODE_F2I:             return "f2i";
957 		case OPCODE_I2F:             return "i2f";
958 		case OPCODE_F2U:             return "f2u";
959 		case OPCODE_U2F:             return "u2f";
960 		case OPCODE_B2I:             return "b2i";
961 		case OPCODE_I2B:             return "i2b";
962 		case OPCODE_ALL:             return "all";
963 		case OPCODE_ANY:             return "any";
964 		case OPCODE_NEG:             return "neg";
965 		case OPCODE_INEG:            return "ineg";
966 		case OPCODE_ISNAN:           return "isnan";
967 		case OPCODE_ISINF:           return "isinf";
968 		case OPCODE_NOT:             return "not";
969 		case OPCODE_OR:              return "or";
970 		case OPCODE_XOR:             return "xor";
971 		case OPCODE_AND:             return "and";
972 		case OPCODE_EQ:              return "eq";
973 		case OPCODE_NE:              return "neq";
974 		case OPCODE_FORWARD1:        return "forward1";
975 		case OPCODE_FORWARD2:        return "forward2";
976 		case OPCODE_FORWARD3:        return "forward3";
977 		case OPCODE_FORWARD4:        return "forward4";
978 		case OPCODE_REFLECT1:        return "reflect1";
979 		case OPCODE_REFLECT2:        return "reflect2";
980 		case OPCODE_REFLECT3:        return "reflect3";
981 		case OPCODE_REFLECT4:        return "reflect4";
982 		case OPCODE_REFRACT1:        return "refract1";
983 		case OPCODE_REFRACT2:        return "refract2";
984 		case OPCODE_REFRACT3:        return "refract3";
985 		case OPCODE_REFRACT4:        return "refract4";
986 		case OPCODE_LEAVE:           return "leave";
987 		case OPCODE_CONTINUE:        return "continue";
988 		case OPCODE_TEST:            return "test";
989 		case OPCODE_SWITCH:          return "switch";
990 		case OPCODE_ENDSWITCH:       return "endswitch";
991 		default:
992 			ASSERT(false);
993 		}
994 
995 		return "<unknown>";
996 	}
997 
controlString() const998 	std::string Shader::Instruction::controlString() const
999 	{
1000 		if(opcode != OPCODE_LOOP && opcode != OPCODE_BREAKC && opcode != OPCODE_IFC && opcode != OPCODE_CMP)
1001 		{
1002 			if(project) return "p";
1003 
1004 			if(bias) return "b";
1005 
1006 			// FIXME: LOD
1007 		}
1008 
1009 		switch(control)
1010 		{
1011 		case 1: return "_gt";
1012 		case 2: return "_eq";
1013 		case 3: return "_ge";
1014 		case 4: return "_lt";
1015 		case 5: return "_ne";
1016 		case 6: return "_le";
1017 		default:
1018 			return "";
1019 		//	ASSERT(false);   // FIXME
1020 		}
1021 	}
1022 
string(ShaderType shaderType,unsigned short version) const1023 	std::string Shader::Parameter::string(ShaderType shaderType, unsigned short version) const
1024 	{
1025 		std::ostringstream buffer;
1026 
1027 		if(type == PARAMETER_FLOAT4LITERAL)
1028 		{
1029 			buffer << '{' << value[0] << ", " << value[1] << ", " << value[2] << ", " << value[3] << '}';
1030 
1031 			return buffer.str();
1032 		}
1033 		else if(type != PARAMETER_RASTOUT && !(type == PARAMETER_ADDR && shaderType == SHADER_VERTEX) && type != PARAMETER_LOOP && type != PARAMETER_PREDICATE && type != PARAMETER_MISCTYPE)
1034 		{
1035 			buffer << index;
1036 
1037 			return typeString(shaderType, version) + buffer.str();
1038 		}
1039 		else
1040 		{
1041 			return typeString(shaderType, version);
1042 		}
1043 	}
1044 
typeString(ShaderType shaderType,unsigned short version) const1045 	std::string Shader::Parameter::typeString(ShaderType shaderType, unsigned short version) const
1046 	{
1047 		switch(type)
1048 		{
1049 		case PARAMETER_TEMP:			return "r";
1050 		case PARAMETER_INPUT:			return "v";
1051 		case PARAMETER_CONST:			return "c";
1052 		case PARAMETER_TEXTURE:
1053 	//	case PARAMETER_ADDR:
1054 			if(shaderType == SHADER_PIXEL)	return "t";
1055 			else							return "a0";
1056 		case PARAMETER_RASTOUT:
1057 			if(index == 0)              return "oPos";
1058 			else if(index == 1)         return "oFog";
1059 			else if(index == 2)         return "oPts";
1060 			else                        ASSERT(false);
1061 		case PARAMETER_ATTROUT:			return "oD";
1062 		case PARAMETER_TEXCRDOUT:
1063 	//	case PARAMETER_OUTPUT:			return "";
1064 			if(version < 0x0300)		return "oT";
1065 			else						return "o";
1066 		case PARAMETER_CONSTINT:		return "i";
1067 		case PARAMETER_COLOROUT:		return "oC";
1068 		case PARAMETER_DEPTHOUT:		return "oDepth";
1069 		case PARAMETER_SAMPLER:			return "s";
1070 	//	case PARAMETER_CONST2:			return "";
1071 	//	case PARAMETER_CONST3:			return "";
1072 	//	case PARAMETER_CONST4:			return "";
1073 		case PARAMETER_CONSTBOOL:		return "b";
1074 		case PARAMETER_LOOP:			return "aL";
1075 	//	case PARAMETER_TEMPFLOAT16:		return "";
1076 		case PARAMETER_MISCTYPE:
1077 			switch(index)
1078 			{
1079 			case VPosIndex:				return "vPos";
1080 			case VFaceIndex:			return "vFace";
1081 			case InstanceIDIndex:		return "iID";
1082 			case VertexIDIndex:			return "vID";
1083 			default: ASSERT(false);
1084 			}
1085 		case PARAMETER_LABEL:			return "l";
1086 		case PARAMETER_PREDICATE:		return "p0";
1087 		case PARAMETER_FLOAT4LITERAL:	return "";
1088 		case PARAMETER_BOOL1LITERAL:	return "";
1089 		case PARAMETER_INT4LITERAL:		return "";
1090 	//	case PARAMETER_VOID:			return "";
1091 		default:
1092 			ASSERT(false);
1093 		}
1094 
1095 		return "";
1096 	}
1097 
isBranch() const1098 	bool Shader::Instruction::isBranch() const
1099 	{
1100 		return opcode == OPCODE_IF || opcode == OPCODE_IFC;
1101 	}
1102 
isCall() const1103 	bool Shader::Instruction::isCall() const
1104 	{
1105 		return opcode == OPCODE_CALL || opcode == OPCODE_CALLNZ;
1106 	}
1107 
isBreak() const1108 	bool Shader::Instruction::isBreak() const
1109 	{
1110 		return opcode == OPCODE_BREAK || opcode == OPCODE_BREAKC || opcode == OPCODE_BREAKP;
1111 	}
1112 
isLoop() const1113 	bool Shader::Instruction::isLoop() const
1114 	{
1115 		return opcode == OPCODE_LOOP || opcode == OPCODE_REP || opcode == OPCODE_WHILE;
1116 	}
1117 
isEndLoop() const1118 	bool Shader::Instruction::isEndLoop() const
1119 	{
1120 		return opcode == OPCODE_ENDLOOP || opcode == OPCODE_ENDREP || opcode == OPCODE_ENDWHILE;
1121 	}
1122 
isPredicated() const1123 	bool Shader::Instruction::isPredicated() const
1124 	{
1125 		return predicate ||
1126 		       analysisBranch ||
1127 		       analysisBreak ||
1128 		       analysisContinue ||
1129 		       analysisLeave;
1130 	}
1131 
Shader()1132 	Shader::Shader() : serialID(serialCounter++)
1133 	{
1134 		usedSamplers = 0;
1135 	}
1136 
~Shader()1137 	Shader::~Shader()
1138 	{
1139 		for(auto &inst : instruction)
1140 		{
1141 			delete inst;
1142 			inst = 0;
1143 		}
1144 	}
1145 
parse(const unsigned long * token)1146 	void Shader::parse(const unsigned long *token)
1147 	{
1148 		minorVersion = (unsigned char)(token[0] & 0x000000FF);
1149 		majorVersion = (unsigned char)((token[0] & 0x0000FF00) >> 8);
1150 		shaderType = (ShaderType)((token[0] & 0xFFFF0000) >> 16);
1151 
1152 		int length = 0;
1153 
1154 		if(shaderType == SHADER_VERTEX)
1155 		{
1156 			length = VertexShader::validate(token);
1157 		}
1158 		else if(shaderType == SHADER_PIXEL)
1159 		{
1160 			length = PixelShader::validate(token);
1161 		}
1162 		else ASSERT(false);
1163 
1164 		ASSERT(length != 0);
1165 		instruction.resize(length);
1166 
1167 		for(int i = 0; i < length; i++)
1168 		{
1169 			while((*token & 0x0000FFFF) == 0x0000FFFE)   // Comment token
1170 			{
1171 				int length = (*token & 0x7FFF0000) >> 16;
1172 
1173 				token += length + 1;
1174 			}
1175 
1176 			int tokenCount = size(*token);
1177 
1178 			instruction[i] = new Instruction(token, tokenCount, majorVersion);
1179 
1180 			token += 1 + tokenCount;
1181 		}
1182 	}
1183 
size(unsigned long opcode) const1184 	int Shader::size(unsigned long opcode) const
1185 	{
1186 		return size(opcode, shaderModel);
1187 	}
1188 
size(unsigned long opcode,unsigned short shaderModel)1189 	int Shader::size(unsigned long opcode, unsigned short shaderModel)
1190 	{
1191 		if(shaderModel > 0x0300)
1192 		{
1193 			ASSERT(false);
1194 		}
1195 
1196 		static const signed char size[] =
1197 		{
1198 			0,   // NOP = 0
1199 			2,   // MOV
1200 			3,   // ADD
1201 			3,   // SUB
1202 			4,   // MAD
1203 			3,   // MUL
1204 			2,   // RCP
1205 			2,   // RSQ
1206 			3,   // DP3
1207 			3,   // DP4
1208 			3,   // MIN
1209 			3,   // MAX
1210 			3,   // SLT
1211 			3,   // SGE
1212 			2,   // EXP
1213 			2,   // LOG
1214 			2,   // LIT
1215 			3,   // DST
1216 			4,   // LRP
1217 			2,   // FRC
1218 			3,   // M4x4
1219 			3,   // M4x3
1220 			3,   // M3x4
1221 			3,   // M3x3
1222 			3,   // M3x2
1223 			1,   // CALL
1224 			2,   // CALLNZ
1225 			2,   // LOOP
1226 			0,   // RET
1227 			0,   // ENDLOOP
1228 			1,   // LABEL
1229 			2,   // DCL
1230 			3,   // POW
1231 			3,   // CRS
1232 			4,   // SGN
1233 			2,   // ABS
1234 			2,   // NRM
1235 			4,   // SINCOS
1236 			1,   // REP
1237 			0,   // ENDREP
1238 			1,   // IF
1239 			2,   // IFC
1240 			0,   // ELSE
1241 			0,   // ENDIF
1242 			0,   // BREAK
1243 			2,   // BREAKC
1244 			2,   // MOVA
1245 			2,   // DEFB
1246 			5,   // DEFI
1247 			-1,  // 49
1248 			-1,  // 50
1249 			-1,  // 51
1250 			-1,  // 52
1251 			-1,  // 53
1252 			-1,  // 54
1253 			-1,  // 55
1254 			-1,  // 56
1255 			-1,  // 57
1256 			-1,  // 58
1257 			-1,  // 59
1258 			-1,  // 60
1259 			-1,  // 61
1260 			-1,  // 62
1261 			-1,  // 63
1262 			1,   // TEXCOORD = 64
1263 			1,   // TEXKILL
1264 			1,   // TEX
1265 			2,   // TEXBEM
1266 			2,   // TEXBEML
1267 			2,   // TEXREG2AR
1268 			2,   // TEXREG2GB
1269 			2,   // TEXM3x2PAD
1270 			2,   // TEXM3x2TEX
1271 			2,   // TEXM3x3PAD
1272 			2,   // TEXM3x3TEX
1273 			-1,  // RESERVED0
1274 			3,   // TEXM3x3SPEC
1275 			2,   // TEXM3x3VSPEC
1276 			2,   // EXPP
1277 			2,   // LOGP
1278 			4,   // CND
1279 			5,   // DEF
1280 			2,   // TEXREG2RGB
1281 			2,   // TEXDP3TEX
1282 			2,   // TEXM3x2DEPTH
1283 			2,   // TEXDP3
1284 			2,   // TEXM3x3
1285 			1,   // TEXDEPTH
1286 			4,   // CMP
1287 			3,   // BEM
1288 			4,   // DP2ADD
1289 			2,   // DSX
1290 			2,   // DSY
1291 			5,   // TEXLDD
1292 			3,   // SETP
1293 			3,   // TEXLDL
1294 			2,   // BREAKP
1295 			-1,  // 97
1296 			-1,  // 98
1297 			-1,  // 99
1298 			-1,  // 100
1299 			-1,  // 101
1300 			-1,  // 102
1301 			-1,  // 103
1302 			-1,  // 104
1303 			-1,  // 105
1304 			-1,  // 106
1305 			-1,  // 107
1306 			-1,  // 108
1307 			-1,  // 109
1308 			-1,  // 110
1309 			-1,  // 111
1310 			-1,  // 112
1311 		};
1312 
1313 		int length = 0;
1314 
1315 		if((opcode & 0x0000FFFF) == OPCODE_COMMENT)
1316 		{
1317 			return (opcode & 0x7FFF0000) >> 16;
1318 		}
1319 
1320 		if(opcode != OPCODE_PS_1_0 &&
1321 		   opcode != OPCODE_PS_1_1 &&
1322 		   opcode != OPCODE_PS_1_2 &&
1323 		   opcode != OPCODE_PS_1_3 &&
1324 		   opcode != OPCODE_PS_1_4 &&
1325 		   opcode != OPCODE_PS_2_0 &&
1326 		   opcode != OPCODE_PS_2_x &&
1327 		   opcode != OPCODE_PS_3_0 &&
1328 		   opcode != OPCODE_VS_1_0 &&
1329 		   opcode != OPCODE_VS_1_1 &&
1330 		   opcode != OPCODE_VS_2_0 &&
1331 		   opcode != OPCODE_VS_2_x &&
1332 		   opcode != OPCODE_VS_2_sw &&
1333 		   opcode != OPCODE_VS_3_0 &&
1334 		   opcode != OPCODE_VS_3_sw &&
1335 		   opcode != OPCODE_PHASE &&
1336 		   opcode != OPCODE_END)
1337 		{
1338 			if(shaderModel >= 0x0200)
1339 			{
1340 				length = (opcode & 0x0F000000) >> 24;
1341 			}
1342 			else
1343 			{
1344 				length = size[opcode & 0x0000FFFF];
1345 			}
1346 		}
1347 
1348 		if(length < 0)
1349 		{
1350 			ASSERT(false);
1351 		}
1352 
1353 		if(shaderModel == 0x0104)
1354 		{
1355 			switch(opcode & 0x0000FFFF)
1356 			{
1357 			case OPCODE_TEX:
1358 				length += 1;
1359 				break;
1360 			case OPCODE_TEXCOORD:
1361 				length += 1;
1362 				break;
1363 			default:
1364 				break;
1365 			}
1366 		}
1367 
1368 		return length;
1369 	}
1370 
maskContainsComponent(int mask,int component)1371 	bool Shader::maskContainsComponent(int mask, int component)
1372 	{
1373 		return (mask & (1 << component)) != 0;
1374 	}
1375 
swizzleContainsComponent(int swizzle,int component)1376 	bool Shader::swizzleContainsComponent(int swizzle, int component)
1377 	{
1378 		if((swizzle & 0x03) >> 0 == component) return true;
1379 		if((swizzle & 0x0C) >> 2 == component) return true;
1380 		if((swizzle & 0x30) >> 4 == component) return true;
1381 		if((swizzle & 0xC0) >> 6 == component) return true;
1382 
1383 		return false;
1384 	}
1385 
swizzleContainsComponentMasked(int swizzle,int component,int mask)1386 	bool Shader::swizzleContainsComponentMasked(int swizzle, int component, int mask)
1387 	{
1388 		if(mask & 0x1) if((swizzle & 0x03) >> 0 == component) return true;
1389 		if(mask & 0x2) if((swizzle & 0x0C) >> 2 == component) return true;
1390 		if(mask & 0x4) if((swizzle & 0x30) >> 4 == component) return true;
1391 		if(mask & 0x8) if((swizzle & 0xC0) >> 6 == component) return true;
1392 
1393 		return false;
1394 	}
1395 
containsDynamicBranching() const1396 	bool Shader::containsDynamicBranching() const
1397 	{
1398 		return dynamicBranching;
1399 	}
1400 
containsBreakInstruction() const1401 	bool Shader::containsBreakInstruction() const
1402 	{
1403 		return containsBreak;
1404 	}
1405 
containsContinueInstruction() const1406 	bool Shader::containsContinueInstruction() const
1407 	{
1408 		return containsContinue;
1409 	}
1410 
containsLeaveInstruction() const1411 	bool Shader::containsLeaveInstruction() const
1412 	{
1413 		return containsLeave;
1414 	}
1415 
containsDefineInstruction() const1416 	bool Shader::containsDefineInstruction() const
1417 	{
1418 		return containsDefine;
1419 	}
1420 
usesSampler(int index) const1421 	bool Shader::usesSampler(int index) const
1422 	{
1423 		return (usedSamplers & (1 << index)) != 0;
1424 	}
1425 
getSerialID() const1426 	int Shader::getSerialID() const
1427 	{
1428 		return serialID;
1429 	}
1430 
getLength() const1431 	size_t Shader::getLength() const
1432 	{
1433 		return instruction.size();
1434 	}
1435 
getShaderType() const1436 	Shader::ShaderType Shader::getShaderType() const
1437 	{
1438 		return shaderType;
1439 	}
1440 
getShaderModel() const1441 	unsigned short Shader::getShaderModel() const
1442 	{
1443 		return shaderModel;
1444 	}
1445 
print(const char * fileName,...) const1446 	void Shader::print(const char *fileName, ...) const
1447 	{
1448 		char fullName[1024 + 1];
1449 
1450 		va_list vararg;
1451 		va_start(vararg, fileName);
1452 		vsnprintf(fullName, 1024, fileName, vararg);
1453 		va_end(vararg);
1454 
1455 		std::ofstream file(fullName, std::ofstream::out);
1456 
1457 		for(const auto &inst : instruction)
1458 		{
1459 			file << inst->string(shaderType, shaderModel) << std::endl;
1460 		}
1461 	}
1462 
printInstruction(int index,const char * fileName) const1463 	void Shader::printInstruction(int index, const char *fileName) const
1464 	{
1465 		std::ofstream file(fileName, std::ofstream::out | std::ofstream::app);
1466 
1467 		file << instruction[index]->string(shaderType, shaderModel) << std::endl;
1468 	}
1469 
append(Instruction * instruction)1470 	void Shader::append(Instruction *instruction)
1471 	{
1472 		this->instruction.push_back(instruction);
1473 	}
1474 
declareSampler(int i)1475 	void Shader::declareSampler(int i)
1476 	{
1477 		if(i >= 0 && i < 16)
1478 		{
1479 			usedSamplers |= 1 << i;
1480 		}
1481 	}
1482 
getInstruction(size_t i) const1483 	const Shader::Instruction *Shader::getInstruction(size_t i) const
1484 	{
1485 		ASSERT(i < instruction.size());
1486 
1487 		return instruction[i];
1488 	}
1489 
optimize()1490 	void Shader::optimize()
1491 	{
1492 		optimizeLeave();
1493 		optimizeCall();
1494 		removeNull();
1495 	}
1496 
optimizeLeave()1497 	void Shader::optimizeLeave()
1498 	{
1499 		// A return (leave) right before the end of a function or the shader can be removed
1500 		for(unsigned int i = 0; i < instruction.size(); i++)
1501 		{
1502 			if(instruction[i]->opcode == OPCODE_LEAVE)
1503 			{
1504 				if(i == instruction.size() - 1 || instruction[i + 1]->opcode == OPCODE_RET)
1505 				{
1506 					instruction[i]->opcode = OPCODE_NULL;
1507 				}
1508 			}
1509 		}
1510 	}
1511 
optimizeCall()1512 	void Shader::optimizeCall()
1513 	{
1514 		// Eliminate uncalled functions
1515 		std::set<int> calledFunctions;
1516 		bool rescan = true;
1517 
1518 		while(rescan)
1519 		{
1520 			calledFunctions.clear();
1521 			rescan = false;
1522 
1523 			for(const auto &inst : instruction)
1524 			{
1525 				if(inst->isCall())
1526 				{
1527 					calledFunctions.insert(inst->dst.label);
1528 				}
1529 			}
1530 
1531 			if(!calledFunctions.empty())
1532 			{
1533 				for(unsigned int i = 0; i < instruction.size(); i++)
1534 				{
1535 					if(instruction[i]->opcode == OPCODE_LABEL)
1536 					{
1537 						if(calledFunctions.find(instruction[i]->dst.label) == calledFunctions.end())
1538 						{
1539 							for( ; i < instruction.size(); i++)
1540 							{
1541 								Opcode oldOpcode = instruction[i]->opcode;
1542 								instruction[i]->opcode = OPCODE_NULL;
1543 
1544 								if(oldOpcode == OPCODE_RET)
1545 								{
1546 									rescan = true;
1547 									break;
1548 								}
1549 							}
1550 						}
1551 					}
1552 				}
1553 			}
1554 		}
1555 
1556 		// Optimize the entry call
1557 		if(instruction.size() >= 2 && instruction[0]->opcode == OPCODE_CALL && instruction[1]->opcode == OPCODE_RET)
1558 		{
1559 			if(calledFunctions.size() == 1)
1560 			{
1561 				instruction[0]->opcode = OPCODE_NULL;
1562 				instruction[1]->opcode = OPCODE_NULL;
1563 
1564 				for(size_t i = 2; i < instruction.size(); i++)
1565 				{
1566 					if(instruction[i]->opcode == OPCODE_LABEL || instruction[i]->opcode == OPCODE_RET)
1567 					{
1568 						instruction[i]->opcode = OPCODE_NULL;
1569 					}
1570 				}
1571 			}
1572 		}
1573 	}
1574 
removeNull()1575 	void Shader::removeNull()
1576 	{
1577 		size_t size = 0;
1578 		for(size_t i = 0; i < instruction.size(); i++)
1579 		{
1580 			if(instruction[i]->opcode != OPCODE_NULL)
1581 			{
1582 				instruction[size] = instruction[i];
1583 				size++;
1584 			}
1585 			else
1586 			{
1587 				delete instruction[i];
1588 			}
1589 		}
1590 
1591 		instruction.resize(size);
1592 	}
1593 
analyzeDirtyConstants()1594 	void Shader::analyzeDirtyConstants()
1595 	{
1596 		dirtyConstantsF = 0;
1597 		dirtyConstantsI = 0;
1598 		dirtyConstantsB = 0;
1599 
1600 		for(const auto &inst : instruction)
1601 		{
1602 			switch(inst->opcode)
1603 			{
1604 			case OPCODE_DEF:
1605 				if(inst->dst.index + 1 > dirtyConstantsF)
1606 				{
1607 					dirtyConstantsF = inst->dst.index + 1;
1608 				}
1609 				break;
1610 			case OPCODE_DEFI:
1611 				if(inst->dst.index + 1 > dirtyConstantsI)
1612 				{
1613 					dirtyConstantsI = inst->dst.index + 1;
1614 				}
1615 				break;
1616 			case OPCODE_DEFB:
1617 				if(inst->dst.index + 1 > dirtyConstantsB)
1618 				{
1619 					dirtyConstantsB = inst->dst.index + 1;
1620 				}
1621 				break;
1622 			default:
1623 				break;
1624 			}
1625 		}
1626 	}
1627 
analyzeDynamicBranching()1628 	void Shader::analyzeDynamicBranching()
1629 	{
1630 		dynamicBranching = false;
1631 		containsLeave = false;
1632 		containsBreak = false;
1633 		containsContinue = false;
1634 		containsDefine = false;
1635 
1636 		// Determine global presence of branching instructions
1637 		for(const auto &inst : instruction)
1638 		{
1639 			switch(inst->opcode)
1640 			{
1641 			case OPCODE_CALLNZ:
1642 			case OPCODE_IF:
1643 			case OPCODE_IFC:
1644 			case OPCODE_BREAK:
1645 			case OPCODE_BREAKC:
1646 			case OPCODE_CMP:
1647 			case OPCODE_BREAKP:
1648 			case OPCODE_LEAVE:
1649 			case OPCODE_CONTINUE:
1650 				if(inst->src[0].type != PARAMETER_CONSTBOOL)
1651 				{
1652 					dynamicBranching = true;
1653 				}
1654 
1655 				if(inst->opcode == OPCODE_LEAVE)
1656 				{
1657 					containsLeave = true;
1658 				}
1659 
1660 				if(inst->isBreak())
1661 				{
1662 					containsBreak = true;
1663 				}
1664 
1665 				if(inst->opcode == OPCODE_CONTINUE)
1666 				{
1667 					containsContinue = true;
1668 				}
1669 			case OPCODE_DEF:
1670 			case OPCODE_DEFB:
1671 			case OPCODE_DEFI:
1672 				containsDefine = true;
1673 			default:
1674 				break;
1675 			}
1676 		}
1677 
1678 		// Conservatively determine which instructions are affected by dynamic branching
1679 		int branchDepth = 0;
1680 		int breakDepth = 0;
1681 		int continueDepth = 0;
1682 		bool leaveReturn = false;
1683 		unsigned int functionBegin = 0;
1684 
1685 		for(unsigned int i = 0; i < instruction.size(); i++)
1686 		{
1687 			// If statements and loops
1688 			if(instruction[i]->isBranch() || instruction[i]->isLoop())
1689 			{
1690 				branchDepth++;
1691 			}
1692 			else if(instruction[i]->opcode == OPCODE_ENDIF || instruction[i]->isEndLoop())
1693 			{
1694 				branchDepth--;
1695 			}
1696 
1697 			if(branchDepth > 0)
1698 			{
1699 				instruction[i]->analysisBranch = true;
1700 
1701 				if(instruction[i]->isCall())
1702 				{
1703 					markFunctionAnalysis(instruction[i]->dst.label, ANALYSIS_BRANCH);
1704 				}
1705 			}
1706 
1707 			// Break statemement
1708 			if(instruction[i]->isBreak())
1709 			{
1710 				breakDepth++;
1711 			}
1712 
1713 			if(breakDepth > 0)
1714 			{
1715 				if(instruction[i]->isLoop() || instruction[i]->opcode == OPCODE_SWITCH)   // Nested loop or switch, don't make the end of it disable the break execution mask
1716 				{
1717 					breakDepth++;
1718 				}
1719 				else if(instruction[i]->isEndLoop() || instruction[i]->opcode == OPCODE_ENDSWITCH)
1720 				{
1721 					breakDepth--;
1722 				}
1723 
1724 				instruction[i]->analysisBreak = true;
1725 
1726 				if(instruction[i]->isCall())
1727 				{
1728 					markFunctionAnalysis(instruction[i]->dst.label, ANALYSIS_BRANCH);
1729 				}
1730 			}
1731 
1732 			// Continue statement
1733 			if(instruction[i]->opcode == OPCODE_CONTINUE)
1734 			{
1735 				continueDepth++;
1736 			}
1737 
1738 			if(continueDepth > 0)
1739 			{
1740 				if(instruction[i]->isLoop() || instruction[i]->opcode == OPCODE_SWITCH)   // Nested loop or switch, don't make the end of it disable the break execution mask
1741 				{
1742 					continueDepth++;
1743 				}
1744 				else if(instruction[i]->isEndLoop() || instruction[i]->opcode == OPCODE_ENDSWITCH)
1745 				{
1746 					continueDepth--;
1747 				}
1748 
1749 				instruction[i]->analysisContinue = true;
1750 
1751 				if(instruction[i]->isCall())
1752 				{
1753 					markFunctionAnalysis(instruction[i]->dst.label, ANALYSIS_CONTINUE);
1754 				}
1755 			}
1756 
1757 			// Return (leave) statement
1758 			if(instruction[i]->opcode == OPCODE_LEAVE)
1759 			{
1760 				leaveReturn = true;
1761 
1762 				// Mark loop body instructions prior to the return statement
1763 				for(unsigned int l = functionBegin; l < i; l++)
1764 				{
1765 					if(instruction[l]->isLoop())
1766 					{
1767 						for(unsigned int r = l + 1; r < i; r++)
1768 						{
1769 							instruction[r]->analysisLeave = true;
1770 						}
1771 
1772 						break;
1773 					}
1774 				}
1775 			}
1776 			else if(instruction[i]->opcode == OPCODE_RET)   // End of the function
1777 			{
1778 				leaveReturn = false;
1779 			}
1780 			else if(instruction[i]->opcode == OPCODE_LABEL)
1781 			{
1782 				functionBegin = i;
1783 			}
1784 
1785 			if(leaveReturn)
1786 			{
1787 				instruction[i]->analysisLeave = true;
1788 
1789 				if(instruction[i]->isCall())
1790 				{
1791 					markFunctionAnalysis(instruction[i]->dst.label, ANALYSIS_LEAVE);
1792 				}
1793 			}
1794 		}
1795 	}
1796 
markFunctionAnalysis(unsigned int functionLabel,Analysis flag)1797 	void Shader::markFunctionAnalysis(unsigned int functionLabel, Analysis flag)
1798 	{
1799 		bool marker = false;
1800 		for(auto &inst : instruction)
1801 		{
1802 			if(!marker)
1803 			{
1804 				if(inst->opcode == OPCODE_LABEL && inst->dst.label == functionLabel)
1805 				{
1806 					marker = true;
1807 				}
1808 			}
1809 			else
1810 			{
1811 				if(inst->opcode == OPCODE_RET)
1812 				{
1813 					break;
1814 				}
1815 				else if(inst->isCall())
1816 				{
1817 					markFunctionAnalysis(inst->dst.label, flag);
1818 				}
1819 
1820 				inst->analysis |= flag;
1821 			}
1822 		}
1823 	}
1824 
analyzeSamplers()1825 	void Shader::analyzeSamplers()
1826 	{
1827 		for(const auto &inst : instruction)
1828 		{
1829 			switch(inst->opcode)
1830 			{
1831 			case OPCODE_TEX:
1832 			case OPCODE_TEXBEM:
1833 			case OPCODE_TEXBEML:
1834 			case OPCODE_TEXREG2AR:
1835 			case OPCODE_TEXREG2GB:
1836 			case OPCODE_TEXM3X2TEX:
1837 			case OPCODE_TEXM3X3TEX:
1838 			case OPCODE_TEXM3X3SPEC:
1839 			case OPCODE_TEXM3X3VSPEC:
1840 			case OPCODE_TEXREG2RGB:
1841 			case OPCODE_TEXDP3TEX:
1842 			case OPCODE_TEXM3X2DEPTH:
1843 			case OPCODE_TEXLDD:
1844 			case OPCODE_TEXLDL:
1845 			case OPCODE_TEXLOD:
1846 			case OPCODE_TEXOFFSET:
1847 			case OPCODE_TEXOFFSETBIAS:
1848 			case OPCODE_TEXLODOFFSET:
1849 			case OPCODE_TEXELFETCH:
1850 			case OPCODE_TEXELFETCHOFFSET:
1851 			case OPCODE_TEXGRAD:
1852 			case OPCODE_TEXGRADOFFSET:
1853 				{
1854 					Parameter &dst = inst->dst;
1855 					Parameter &src1 = inst->src[1];
1856 
1857 					if(majorVersion >= 2)
1858 					{
1859 						usedSamplers |= 1 << src1.index;
1860 					}
1861 					else
1862 					{
1863 						usedSamplers |= 1 << dst.index;
1864 					}
1865 				}
1866 				break;
1867 			default:
1868 				break;
1869 			}
1870 		}
1871 	}
1872 
1873 	// Assigns a unique index to each call instruction, on a per label basis.
1874 	// This is used to know what basic block to return to.
analyzeCallSites()1875 	void Shader::analyzeCallSites()
1876 	{
1877 		int callSiteIndex[2048] = {0};
1878 
1879 		for(auto &inst : instruction)
1880 		{
1881 			if(inst->opcode == OPCODE_CALL || inst->opcode == OPCODE_CALLNZ)
1882 			{
1883 				int label = inst->dst.label;
1884 
1885 				inst->dst.callSite = callSiteIndex[label]++;
1886 			}
1887 		}
1888 	}
1889 
analyzeDynamicIndexing()1890 	void Shader::analyzeDynamicIndexing()
1891 	{
1892 		dynamicallyIndexedTemporaries = false;
1893 		dynamicallyIndexedInput = false;
1894 		dynamicallyIndexedOutput = false;
1895 
1896 		for(const auto &inst : instruction)
1897 		{
1898 			if(inst->dst.rel.type == PARAMETER_ADDR ||
1899 			   inst->dst.rel.type == PARAMETER_LOOP ||
1900 			   inst->dst.rel.type == PARAMETER_TEMP ||
1901 			   inst->dst.rel.type == PARAMETER_CONST)
1902 			{
1903 				switch(inst->dst.type)
1904 				{
1905 				case PARAMETER_TEMP:   dynamicallyIndexedTemporaries = true; break;
1906 				case PARAMETER_INPUT:  dynamicallyIndexedInput = true;       break;
1907 				case PARAMETER_OUTPUT: dynamicallyIndexedOutput = true;      break;
1908 				default: break;
1909 				}
1910 			}
1911 
1912 			for(int j = 0; j < 3; j++)
1913 			{
1914 				if(inst->src[j].rel.type == PARAMETER_ADDR ||
1915 				   inst->src[j].rel.type == PARAMETER_LOOP ||
1916 				   inst->src[j].rel.type == PARAMETER_TEMP ||
1917 				   inst->src[j].rel.type == PARAMETER_CONST)
1918 				{
1919 					switch(inst->src[j].type)
1920 					{
1921 					case PARAMETER_TEMP:   dynamicallyIndexedTemporaries = true; break;
1922 					case PARAMETER_INPUT:  dynamicallyIndexedInput = true;       break;
1923 					case PARAMETER_OUTPUT: dynamicallyIndexedOutput = true;      break;
1924 					default: break;
1925 					}
1926 				}
1927 			}
1928 		}
1929 	}
1930 }
1931