• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1  // Copyright 2016 The SwiftShader Authors. All Rights Reserved.
2  //
3  // Licensed under the Apache License, Version 2.0 (the "License");
4  // you may not use this file except in compliance with the License.
5  // You may obtain a copy of the License at
6  //
7  //    http://www.apache.org/licenses/LICENSE-2.0
8  //
9  // Unless required by applicable law or agreed to in writing, software
10  // distributed under the License is distributed on an "AS IS" BASIS,
11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  // See the License for the specific language governing permissions and
13  // limitations under the License.
14  
15  #include "Shader.hpp"
16  
17  #include "VertexShader.hpp"
18  #include "PixelShader.hpp"
19  #include "Common/Math.hpp"
20  #include "Common/Debug.hpp"
21  
22  #include <algorithm>
23  #include <set>
24  #include <fstream>
25  #include <functional>
26  #include <sstream>
27  #include <stdarg.h>
28  #include <unordered_map>
29  #include <unordered_set>
30  
31  namespace sw
32  {
33  	volatile int Shader::serialCounter = 1;
34  
OPCODE_DP(int i)35  	Shader::Opcode Shader::OPCODE_DP(int i)
36  	{
37  		switch(i)
38  		{
39  		default: ASSERT(false);
40  		case 1: return OPCODE_DP1;
41  		case 2: return OPCODE_DP2;
42  		case 3: return OPCODE_DP3;
43  		case 4: return OPCODE_DP4;
44  		}
45  	}
46  
OPCODE_LEN(int i)47  	Shader::Opcode Shader::OPCODE_LEN(int i)
48  	{
49  		switch(i)
50  		{
51  		default: ASSERT(false);
52  		case 1: return OPCODE_ABS;
53  		case 2: return OPCODE_LEN2;
54  		case 3: return OPCODE_LEN3;
55  		case 4: return OPCODE_LEN4;
56  		}
57  	}
58  
OPCODE_DIST(int i)59  	Shader::Opcode Shader::OPCODE_DIST(int i)
60  	{
61  		switch(i)
62  		{
63  		default: ASSERT(false);
64  		case 1: return OPCODE_DIST1;
65  		case 2: return OPCODE_DIST2;
66  		case 3: return OPCODE_DIST3;
67  		case 4: return OPCODE_DIST4;
68  		}
69  	}
70  
OPCODE_NRM(int i)71  	Shader::Opcode Shader::OPCODE_NRM(int i)
72  	{
73  		switch(i)
74  		{
75  		default: ASSERT(false);
76  		case 1: return OPCODE_SGN;
77  		case 2: return OPCODE_NRM2;
78  		case 3: return OPCODE_NRM3;
79  		case 4: return OPCODE_NRM4;
80  		}
81  	}
82  
OPCODE_FORWARD(int i)83  	Shader::Opcode Shader::OPCODE_FORWARD(int i)
84  	{
85  		switch(i)
86  		{
87  		default: ASSERT(false);
88  		case 1: return OPCODE_FORWARD1;
89  		case 2: return OPCODE_FORWARD2;
90  		case 3: return OPCODE_FORWARD3;
91  		case 4: return OPCODE_FORWARD4;
92  		}
93  	}
94  
OPCODE_REFLECT(int i)95  	Shader::Opcode Shader::OPCODE_REFLECT(int i)
96  	{
97  		switch(i)
98  		{
99  		default: ASSERT(false);
100  		case 1: return OPCODE_REFLECT1;
101  		case 2: return OPCODE_REFLECT2;
102  		case 3: return OPCODE_REFLECT3;
103  		case 4: return OPCODE_REFLECT4;
104  		}
105  	}
106  
OPCODE_REFRACT(int i)107  	Shader::Opcode Shader::OPCODE_REFRACT(int i)
108  	{
109  		switch(i)
110  		{
111  		default: ASSERT(false);
112  		case 1: return OPCODE_REFRACT1;
113  		case 2: return OPCODE_REFRACT2;
114  		case 3: return OPCODE_REFRACT3;
115  		case 4: return OPCODE_REFRACT4;
116  		}
117  	}
118  
Instruction(Opcode opcode)119  	Shader::Instruction::Instruction(Opcode opcode) : opcode(opcode), analysis(0)
120  	{
121  		control = CONTROL_RESERVED0;
122  
123  		predicate = false;
124  		predicateNot = false;
125  		predicateSwizzle = 0xE4;
126  
127  		coissue = false;
128  		samplerType = SAMPLER_UNKNOWN;
129  		usage = USAGE_POSITION;
130  		usageIndex = 0;
131  	}
132  
Instruction(const unsigned long * token,int size,unsigned char majorVersion)133  	Shader::Instruction::Instruction(const unsigned long *token, int size, unsigned char majorVersion) : analysis(0)
134  	{
135  		parseOperationToken(*token++, majorVersion);
136  
137  		samplerType = SAMPLER_UNKNOWN;
138  		usage = USAGE_POSITION;
139  		usageIndex = 0;
140  
141  		if(opcode == OPCODE_IF ||
142  		   opcode == OPCODE_IFC ||
143  		   opcode == OPCODE_LOOP ||
144  		   opcode == OPCODE_REP ||
145  		   opcode == OPCODE_BREAKC ||
146  		   opcode == OPCODE_BREAKP)   // No destination operand
147  		{
148  			if(size > 0) parseSourceToken(0, token++, majorVersion);
149  			if(size > 1) parseSourceToken(1, token++, majorVersion);
150  			if(size > 2) parseSourceToken(2, token++, majorVersion);
151  			if(size > 3) ASSERT(false);
152  		}
153  		else if(opcode == OPCODE_DCL)
154  		{
155  			parseDeclarationToken(*token++);
156  			parseDestinationToken(token++, majorVersion);
157  		}
158  		else
159  		{
160  			if(size > 0)
161  			{
162  				parseDestinationToken(token, majorVersion);
163  
164  				if(dst.rel.type != PARAMETER_VOID && majorVersion >= 3)
165  				{
166  					token++;
167  					size--;
168  				}
169  
170  				token++;
171  				size--;
172  			}
173  
174  			if(predicate)
175  			{
176  				ASSERT(size != 0);
177  
178  				predicateNot = (Modifier)((*token & 0x0F000000) >> 24) == MODIFIER_NOT;
179  				predicateSwizzle = (unsigned char)((*token & 0x00FF0000) >> 16);
180  
181  				token++;
182  				size--;
183  			}
184  
185  			for(int i = 0; size > 0; i++)
186  			{
187  				parseSourceToken(i, token, majorVersion);
188  
189  				token++;
190  				size--;
191  
192  				if(src[i].rel.type != PARAMETER_VOID && majorVersion >= 2)
193  				{
194  					token++;
195  					size--;
196  				}
197  			}
198  		}
199  	}
200  
~Instruction()201  	Shader::Instruction::~Instruction()
202  	{
203  	}
204  
string(ShaderType shaderType,unsigned short version) const205  	std::string Shader::Instruction::string(ShaderType shaderType, unsigned short version) const
206  	{
207  		std::string instructionString;
208  
209  		if(opcode != OPCODE_DCL)
210  		{
211  			instructionString += coissue ? "+ " : "";
212  
213  			if(predicate)
214  			{
215  				instructionString += predicateNot ? "(!p0" : "(p0";
216  				instructionString += swizzleString(PARAMETER_PREDICATE, predicateSwizzle);
217  				instructionString += ") ";
218  			}
219  
220  			instructionString += operationString(version) + controlString() + dst.shiftString() + dst.modifierString();
221  
222  			if(dst.type != PARAMETER_VOID)
223  			{
224  				instructionString += " " + dst.string(shaderType, version) +
225  				                           dst.relativeString() +
226  				                           dst.maskString();
227  			}
228  
229  			for(int i = 0; i < 4; i++)
230  			{
231  				if(src[i].type != PARAMETER_VOID)
232  				{
233  					instructionString += (dst.type != PARAMETER_VOID || i > 0) ? ", " : " ";
234  					instructionString += src[i].preModifierString() +
235  										 src[i].string(shaderType, version) +
236  										 src[i].relativeString() +
237  										 src[i].postModifierString() +
238  										 src[i].swizzleString();
239  				}
240  			}
241  		}
242  		else   // DCL
243  		{
244  			instructionString += "dcl";
245  
246  			if(dst.type == PARAMETER_SAMPLER)
247  			{
248  				switch(samplerType)
249  				{
250  				case SAMPLER_UNKNOWN: instructionString += " ";        break;
251  				case SAMPLER_1D:      instructionString += "_1d ";     break;
252  				case SAMPLER_2D:      instructionString += "_2d ";     break;
253  				case SAMPLER_CUBE:    instructionString += "_cube ";   break;
254  				case SAMPLER_VOLUME:  instructionString += "_volume "; break;
255  				default:
256  					ASSERT(false);
257  				}
258  
259  				instructionString += dst.string(shaderType, version);
260  			}
261  			else if(dst.type == PARAMETER_INPUT ||
262  				    dst.type == PARAMETER_OUTPUT ||
263  				    dst.type == PARAMETER_TEXTURE)
264  			{
265  				if(version >= 0x0300)
266  				{
267  					switch(usage)
268  					{
269  					case USAGE_POSITION:     instructionString += "_position";     break;
270  					case USAGE_BLENDWEIGHT:  instructionString += "_blendweight";  break;
271  					case USAGE_BLENDINDICES: instructionString += "_blendindices"; break;
272  					case USAGE_NORMAL:       instructionString += "_normal";       break;
273  					case USAGE_PSIZE:        instructionString += "_psize";        break;
274  					case USAGE_TEXCOORD:     instructionString += "_texcoord";     break;
275  					case USAGE_TANGENT:      instructionString += "_tangent";      break;
276  					case USAGE_BINORMAL:     instructionString += "_binormal";     break;
277  					case USAGE_TESSFACTOR:   instructionString += "_tessfactor";   break;
278  					case USAGE_POSITIONT:    instructionString += "_positiont";    break;
279  					case USAGE_COLOR:        instructionString += "_color";        break;
280  					case USAGE_FOG:          instructionString += "_fog";          break;
281  					case USAGE_DEPTH:        instructionString += "_depth";        break;
282  					case USAGE_SAMPLE:       instructionString += "_sample";       break;
283  					default:
284  						ASSERT(false);
285  					}
286  
287  					if(usageIndex > 0)
288  					{
289  						std::ostringstream buffer;
290  
291  						buffer << (int)usageIndex;
292  
293  						instructionString += buffer.str();
294  					}
295  				}
296  				else ASSERT(dst.type != PARAMETER_OUTPUT);
297  
298  				instructionString += " ";
299  
300  				instructionString += dst.string(shaderType, version);
301  				instructionString += dst.maskString();
302  			}
303  			else if(dst.type == PARAMETER_MISCTYPE)   // vPos and vFace
304  			{
305  				instructionString += " ";
306  
307  				instructionString += dst.string(shaderType, version);
308  			}
309  			else ASSERT(false);
310  		}
311  
312  		return instructionString;
313  	}
314  
modifierString() const315  	std::string Shader::DestinationParameter::modifierString() const
316  	{
317  		if(type == PARAMETER_VOID || type == PARAMETER_LABEL)
318  		{
319  			return "";
320  		}
321  
322  		std::string modifierString;
323  
324  		if(saturate)
325  		{
326  			modifierString += "_sat";
327  		}
328  
329  		if(partialPrecision)
330  		{
331  			modifierString += "_pp";
332  		}
333  
334  		if(centroid)
335  		{
336  			modifierString += "_centroid";
337  		}
338  
339  		return modifierString;
340  	}
341  
shiftString() const342  	std::string Shader::DestinationParameter::shiftString() const
343  	{
344  		if(type == PARAMETER_VOID || type == PARAMETER_LABEL)
345  		{
346  			return "";
347  		}
348  
349  		switch(shift)
350  		{
351  		case 0:		return "";
352  		case 1:		return "_x2";
353  		case 2:		return "_x4";
354  		case 3:		return "_x8";
355  		case -1:	return "_d2";
356  		case -2:	return "_d4";
357  		case -3:	return "_d8";
358  		default:
359  			return "";
360  		//	ASSERT(false);   // FIXME
361  		}
362  	}
363  
maskString() const364  	std::string Shader::DestinationParameter::maskString() const
365  	{
366  		if(type == PARAMETER_VOID || type == PARAMETER_LABEL)
367  		{
368  			return "";
369  		}
370  
371  		switch(mask)
372  		{
373  		case 0x0:	return "";
374  		case 0x1:	return ".x";
375  		case 0x2:	return ".y";
376  		case 0x3:	return ".xy";
377  		case 0x4:	return ".z";
378  		case 0x5:	return ".xz";
379  		case 0x6:	return ".yz";
380  		case 0x7:	return ".xyz";
381  		case 0x8:	return ".w";
382  		case 0x9:	return ".xw";
383  		case 0xA:	return ".yw";
384  		case 0xB:	return ".xyw";
385  		case 0xC:	return ".zw";
386  		case 0xD:	return ".xzw";
387  		case 0xE:	return ".yzw";
388  		case 0xF:	return "";
389  		default:
390  			ASSERT(false);
391  		}
392  
393  		return "";
394  	}
395  
preModifierString() const396  	std::string Shader::SourceParameter::preModifierString() const
397  	{
398  		if(type == PARAMETER_VOID)
399  		{
400  			return "";
401  		}
402  
403  		switch(modifier)
404  		{
405  		case MODIFIER_NONE:			return "";
406  		case MODIFIER_NEGATE:		return "-";
407  		case MODIFIER_BIAS:			return "";
408  		case MODIFIER_BIAS_NEGATE:	return "-";
409  		case MODIFIER_SIGN:			return "";
410  		case MODIFIER_SIGN_NEGATE:	return "-";
411  		case MODIFIER_COMPLEMENT:	return "1-";
412  		case MODIFIER_X2:			return "";
413  		case MODIFIER_X2_NEGATE:	return "-";
414  		case MODIFIER_DZ:			return "";
415  		case MODIFIER_DW:			return "";
416  		case MODIFIER_ABS:			return "";
417  		case MODIFIER_ABS_NEGATE:	return "-";
418  		case MODIFIER_NOT:			return "!";
419  		default:
420  			ASSERT(false);
421  		}
422  
423  		return "";
424  	}
425  
relativeString() const426  	std::string Shader::Parameter::relativeString() const
427  	{
428  		if(type == PARAMETER_CONST || type == PARAMETER_INPUT || type == PARAMETER_OUTPUT || type == PARAMETER_TEMP)
429  		{
430  			if(rel.type == PARAMETER_VOID)
431  			{
432  				return "";
433  			}
434  			else if(rel.type == PARAMETER_ADDR)
435  			{
436  				switch(rel.swizzle & 0x03)
437  				{
438  				case 0: return "[a0.x]";
439  				case 1: return "[a0.y]";
440  				case 2: return "[a0.z]";
441  				case 3: return "[a0.w]";
442  				}
443  			}
444  			else if(rel.type == PARAMETER_TEMP)
445  			{
446  				std::ostringstream buffer;
447  				buffer << rel.index;
448  
449  				switch(rel.swizzle & 0x03)
450  				{
451  				case 0: return "[r" + buffer.str() + ".x]";
452  				case 1: return "[r" + buffer.str() + ".y]";
453  				case 2: return "[r" + buffer.str() + ".z]";
454  				case 3: return "[r" + buffer.str() + ".w]";
455  				}
456  			}
457  			else if(rel.type == PARAMETER_LOOP)
458  			{
459  				return "[aL]";
460  			}
461  			else if(rel.type == PARAMETER_CONST)
462  			{
463  				std::ostringstream buffer;
464  				buffer << rel.index;
465  
466  				switch(rel.swizzle & 0x03)
467  				{
468  				case 0: return "[c" + buffer.str() + ".x]";
469  				case 1: return "[c" + buffer.str() + ".y]";
470  				case 2: return "[c" + buffer.str() + ".z]";
471  				case 3: return "[c" + buffer.str() + ".w]";
472  				}
473  			}
474  			else ASSERT(false);
475  		}
476  
477  		return "";
478  	}
479  
postModifierString() const480  	std::string Shader::SourceParameter::postModifierString() const
481  	{
482  		if(type == PARAMETER_VOID)
483  		{
484  			return "";
485  		}
486  
487  		switch(modifier)
488  		{
489  		case MODIFIER_NONE:			return "";
490  		case MODIFIER_NEGATE:		return "";
491  		case MODIFIER_BIAS:			return "_bias";
492  		case MODIFIER_BIAS_NEGATE:	return "_bias";
493  		case MODIFIER_SIGN:			return "_bx2";
494  		case MODIFIER_SIGN_NEGATE:	return "_bx2";
495  		case MODIFIER_COMPLEMENT:	return "";
496  		case MODIFIER_X2:			return "_x2";
497  		case MODIFIER_X2_NEGATE:	return "_x2";
498  		case MODIFIER_DZ:			return "_dz";
499  		case MODIFIER_DW:			return "_dw";
500  		case MODIFIER_ABS:			return "_abs";
501  		case MODIFIER_ABS_NEGATE:	return "_abs";
502  		case MODIFIER_NOT:			return "";
503  		default:
504  			ASSERT(false);
505  		}
506  
507  		return "";
508  	}
509  
string(ShaderType shaderType,unsigned short version) const510  	std::string Shader::SourceParameter::string(ShaderType shaderType, unsigned short version) const
511  	{
512  		if(type == PARAMETER_CONST && bufferIndex >= 0)
513  		{
514  			std::ostringstream buffer;
515  			buffer << bufferIndex;
516  
517  			std::ostringstream offset;
518  			offset << index;
519  
520  			return "cb" + buffer.str() + "[" + offset.str() + "]";
521  		}
522  		else
523  		{
524  			return Parameter::string(shaderType, version);
525  		}
526  	}
527  
swizzleString() const528  	std::string Shader::SourceParameter::swizzleString() const
529  	{
530  		return Instruction::swizzleString(type, swizzle);
531  	}
532  
parseOperationToken(unsigned long token,unsigned char majorVersion)533  	void Shader::Instruction::parseOperationToken(unsigned long token, unsigned char majorVersion)
534  	{
535  		if((token & 0xFFFF0000) == 0xFFFF0000 || (token & 0xFFFF0000) == 0xFFFE0000)   // Version token
536  		{
537  			opcode = (Opcode)token;
538  
539  			control = CONTROL_RESERVED0;
540  			predicate = false;
541  			coissue = false;
542  		}
543  		else
544  		{
545  			opcode = (Opcode)(token & 0x0000FFFF);
546  			control = (Control)((token & 0x00FF0000) >> 16);
547  
548  			int size = (token & 0x0F000000) >> 24;
549  
550  			predicate = (token & 0x10000000) != 0x00000000;
551  			coissue = (token & 0x40000000) != 0x00000000;
552  
553  			if(majorVersion < 2)
554  			{
555  				if(size != 0)
556  				{
557  					ASSERT(false);   // Reserved
558  				}
559  			}
560  
561  			if(majorVersion < 2)
562  			{
563  				if(predicate)
564  				{
565  					ASSERT(false);
566  				}
567  			}
568  
569  			if((token & 0x20000000) != 0x00000000)
570  			{
571  				ASSERT(false);   // Reserved
572  			}
573  
574  			if(majorVersion >= 2)
575  			{
576  				if(coissue)
577  				{
578  					ASSERT(false);   // Reserved
579  				}
580  			}
581  
582  			if((token & 0x80000000) != 0x00000000)
583  			{
584  				ASSERT(false);
585  			}
586  		}
587  	}
588  
parseDeclarationToken(unsigned long token)589  	void Shader::Instruction::parseDeclarationToken(unsigned long token)
590  	{
591  		samplerType = (SamplerType)((token & 0x78000000) >> 27);
592  		usage = (Usage)(token & 0x0000001F);
593  		usageIndex = (unsigned char)((token & 0x000F0000) >> 16);
594  	}
595  
parseDestinationToken(const unsigned long * token,unsigned char majorVersion)596  	void Shader::Instruction::parseDestinationToken(const unsigned long *token, unsigned char majorVersion)
597  	{
598  		dst.index = (unsigned short)(token[0] & 0x000007FF);
599  		dst.type = (ParameterType)(((token[0] & 0x00001800) >> 8) | ((token[0] & 0x70000000) >> 28));
600  
601  		// TODO: Check type and index range
602  
603  		bool relative = (token[0] & 0x00002000) != 0x00000000;
604  		dst.rel.type = relative ? PARAMETER_ADDR : PARAMETER_VOID;
605  		dst.rel.swizzle = 0x00;
606  		dst.rel.scale = 1;
607  
608  		if(relative && majorVersion >= 3)
609  		{
610  			dst.rel.type = (ParameterType)(((token[1] & 0x00001800) >> 8) | ((token[1] & 0x70000000) >> 28));
611  			dst.rel.swizzle = (unsigned char)((token[1] & 0x00FF0000) >> 16);
612  		}
613  		else if(relative) ASSERT(false);   // Reserved
614  
615  		if((token[0] & 0x0000C000) != 0x00000000)
616  		{
617  			ASSERT(false);   // Reserved
618  		}
619  
620  		dst.mask = (unsigned char)((token[0] & 0x000F0000) >> 16);
621  		dst.saturate = (token[0] & 0x00100000) != 0;
622  		dst.partialPrecision = (token[0] & 0x00200000) != 0;
623  		dst.centroid = (token[0] & 0x00400000) != 0;
624  		dst.shift = (signed char)((token[0] & 0x0F000000) >> 20) >> 4;
625  
626  		if(majorVersion >= 2)
627  		{
628  			if(dst.shift)
629  			{
630  				ASSERT(false);   // Reserved
631  			}
632  		}
633  
634  		if((token[0] & 0x80000000) != 0x80000000)
635  		{
636  			ASSERT(false);
637  		}
638  	}
639  
parseSourceToken(int i,const unsigned long * token,unsigned char majorVersion)640  	void Shader::Instruction::parseSourceToken(int i, const unsigned long *token, unsigned char majorVersion)
641  	{
642  		// Defaults
643  		src[i].index = 0;
644  		src[i].type = PARAMETER_VOID;
645  		src[i].modifier = MODIFIER_NONE;
646  		src[i].swizzle = 0xE4;
647  		src[i].rel.type = PARAMETER_VOID;
648  		src[i].rel.swizzle = 0x00;
649  		src[i].rel.scale = 1;
650  
651  		switch(opcode)
652  		{
653  		case OPCODE_DEF:
654  			src[0].type = PARAMETER_FLOAT4LITERAL;
655  			src[0].value[i] = *(float*)token;
656  			break;
657  		case OPCODE_DEFB:
658  			src[0].type = PARAMETER_BOOL1LITERAL;
659  			src[0].boolean[0] = *(int*)token;
660  			break;
661  		case OPCODE_DEFI:
662  			src[0].type = PARAMETER_INT4LITERAL;
663  			src[0].integer[i] = *(int*)token;
664  			break;
665  		default:
666  			src[i].index = (unsigned short)(token[0] & 0x000007FF);
667  			src[i].type = (ParameterType)(((token[0] & 0x00001800) >> 8) | ((token[0] & 0x70000000) >> 28));
668  
669  			// FIXME: Check type and index range
670  
671  			bool relative = (token[0] & 0x00002000) != 0x00000000;
672  			src[i].rel.type = relative ? PARAMETER_ADDR : PARAMETER_VOID;
673  
674  			if((token[0] & 0x0000C000) != 0x00000000)
675  			{
676  				if(opcode != OPCODE_DEF &&
677  				   opcode != OPCODE_DEFI &&
678  				   opcode != OPCODE_DEFB)
679  				{
680  					ASSERT(false);
681  				}
682  			}
683  
684  			src[i].swizzle = (unsigned char)((token[0] & 0x00FF0000) >> 16);
685  			src[i].modifier = (Modifier)((token[0] & 0x0F000000) >> 24);
686  
687  			if((token[0] & 0x80000000) != 0x80000000)
688  			{
689  				if(opcode != OPCODE_DEF &&
690  				   opcode != OPCODE_DEFI &&
691  				   opcode != OPCODE_DEFB)
692  				{
693  					ASSERT(false);
694  				}
695  			}
696  
697  			if(relative && majorVersion >= 2)
698  			{
699  				src[i].rel.type = (ParameterType)(((token[1] & 0x00001800) >> 8) | ((token[1] & 0x70000000) >> 28));
700  				src[i].rel.swizzle = (unsigned char)((token[1] & 0x00FF0000) >> 16);
701  			}
702  		}
703  	}
704  
swizzleString(ParameterType type,unsigned char swizzle)705  	std::string Shader::Instruction::swizzleString(ParameterType type, unsigned char swizzle)
706  	{
707  		if(type == PARAMETER_VOID || type == PARAMETER_LABEL || swizzle == 0xE4)
708  		{
709  			return "";
710  		}
711  
712  		int x = (swizzle & 0x03) >> 0;
713  		int y = (swizzle & 0x0C) >> 2;
714  		int z = (swizzle & 0x30) >> 4;
715  		int w = (swizzle & 0xC0) >> 6;
716  
717  		std::string swizzleString = ".";
718  
719  		switch(x)
720  		{
721  		case 0: swizzleString += "x"; break;
722  		case 1: swizzleString += "y"; break;
723  		case 2: swizzleString += "z"; break;
724  		case 3: swizzleString += "w"; break;
725  		}
726  
727  		if(!(x == y && y == z && z == w))
728  		{
729  			switch(y)
730  			{
731  			case 0: swizzleString += "x"; break;
732  			case 1: swizzleString += "y"; break;
733  			case 2: swizzleString += "z"; break;
734  			case 3: swizzleString += "w"; break;
735  			}
736  
737  			if(!(y == z && z == w))
738  			{
739  				switch(z)
740  				{
741  				case 0: swizzleString += "x"; break;
742  				case 1: swizzleString += "y"; break;
743  				case 2: swizzleString += "z"; break;
744  				case 3: swizzleString += "w"; break;
745  				}
746  
747  				if(!(z == w))
748  				{
749  					switch(w)
750  					{
751  					case 0: swizzleString += "x"; break;
752  					case 1: swizzleString += "y"; break;
753  					case 2: swizzleString += "z"; break;
754  					case 3: swizzleString += "w"; break;
755  					}
756  				}
757  			}
758  		}
759  
760  		return swizzleString;
761  	}
762  
operationString(unsigned short version) const763  	std::string Shader::Instruction::operationString(unsigned short version) const
764  	{
765  		switch(opcode)
766  		{
767  		case OPCODE_NULL:            return "null";
768  		case OPCODE_NOP:             return "nop";
769  		case OPCODE_MOV:             return "mov";
770  		case OPCODE_ADD:             return "add";
771  		case OPCODE_IADD:            return "iadd";
772  		case OPCODE_SUB:             return "sub";
773  		case OPCODE_ISUB:            return "isub";
774  		case OPCODE_MAD:             return "mad";
775  		case OPCODE_IMAD:            return "imad";
776  		case OPCODE_MUL:             return "mul";
777  		case OPCODE_IMUL:            return "imul";
778  		case OPCODE_RCPX:            return "rcpx";
779  		case OPCODE_DIV:             return "div";
780  		case OPCODE_IDIV:            return "idiv";
781  		case OPCODE_UDIV:            return "udiv";
782  		case OPCODE_MOD:             return "mod";
783  		case OPCODE_IMOD:            return "imod";
784  		case OPCODE_UMOD:            return "umod";
785  		case OPCODE_SHL:             return "shl";
786  		case OPCODE_ISHR:            return "ishr";
787  		case OPCODE_USHR:            return "ushr";
788  		case OPCODE_RSQX:            return "rsqx";
789  		case OPCODE_SQRT:            return "sqrt";
790  		case OPCODE_RSQ:             return "rsq";
791  		case OPCODE_LEN2:            return "len2";
792  		case OPCODE_LEN3:            return "len3";
793  		case OPCODE_LEN4:            return "len4";
794  		case OPCODE_DIST1:           return "dist1";
795  		case OPCODE_DIST2:           return "dist2";
796  		case OPCODE_DIST3:           return "dist3";
797  		case OPCODE_DIST4:           return "dist4";
798  		case OPCODE_DP3:             return "dp3";
799  		case OPCODE_DP4:             return "dp4";
800  		case OPCODE_DET2:            return "det2";
801  		case OPCODE_DET3:            return "det3";
802  		case OPCODE_DET4:            return "det4";
803  		case OPCODE_MIN:             return "min";
804  		case OPCODE_IMIN:            return "imin";
805  		case OPCODE_UMIN:            return "umin";
806  		case OPCODE_MAX:             return "max";
807  		case OPCODE_IMAX:            return "imax";
808  		case OPCODE_UMAX:            return "umax";
809  		case OPCODE_SLT:             return "slt";
810  		case OPCODE_SGE:             return "sge";
811  		case OPCODE_EXP2X:           return "exp2x";
812  		case OPCODE_LOG2X:           return "log2x";
813  		case OPCODE_LIT:             return "lit";
814  		case OPCODE_ATT:             return "att";
815  		case OPCODE_LRP:             return "lrp";
816  		case OPCODE_STEP:            return "step";
817  		case OPCODE_SMOOTH:          return "smooth";
818  		case OPCODE_FLOATBITSTOINT:  return "floatBitsToInt";
819  		case OPCODE_FLOATBITSTOUINT: return "floatBitsToUInt";
820  		case OPCODE_INTBITSTOFLOAT:  return "intBitsToFloat";
821  		case OPCODE_UINTBITSTOFLOAT: return "uintBitsToFloat";
822  		case OPCODE_PACKSNORM2x16:   return "packSnorm2x16";
823  		case OPCODE_PACKUNORM2x16:   return "packUnorm2x16";
824  		case OPCODE_PACKHALF2x16:    return "packHalf2x16";
825  		case OPCODE_UNPACKSNORM2x16: return "unpackSnorm2x16";
826  		case OPCODE_UNPACKUNORM2x16: return "unpackUnorm2x16";
827  		case OPCODE_UNPACKHALF2x16:  return "unpackHalf2x16";
828  		case OPCODE_FRC:             return "frc";
829  		case OPCODE_M4X4:            return "m4x4";
830  		case OPCODE_M4X3:            return "m4x3";
831  		case OPCODE_M3X4:            return "m3x4";
832  		case OPCODE_M3X3:            return "m3x3";
833  		case OPCODE_M3X2:            return "m3x2";
834  		case OPCODE_CALL:            return "call";
835  		case OPCODE_CALLNZ:          return "callnz";
836  		case OPCODE_LOOP:            return "loop";
837  		case OPCODE_RET:             return "ret";
838  		case OPCODE_ENDLOOP:         return "endloop";
839  		case OPCODE_LABEL:           return "label";
840  		case OPCODE_DCL:             return "dcl";
841  		case OPCODE_POWX:            return "powx";
842  		case OPCODE_CRS:             return "crs";
843  		case OPCODE_SGN:             return "sgn";
844  		case OPCODE_ISGN:            return "isgn";
845  		case OPCODE_ABS:             return "abs";
846  		case OPCODE_IABS:            return "iabs";
847  		case OPCODE_NRM2:            return "nrm2";
848  		case OPCODE_NRM3:            return "nrm3";
849  		case OPCODE_NRM4:            return "nrm4";
850  		case OPCODE_SINCOS:          return "sincos";
851  		case OPCODE_REP:             return "rep";
852  		case OPCODE_ENDREP:          return "endrep";
853  		case OPCODE_IF:              return "if";
854  		case OPCODE_IFC:             return "ifc";
855  		case OPCODE_ELSE:            return "else";
856  		case OPCODE_ENDIF:           return "endif";
857  		case OPCODE_BREAK:           return "break";
858  		case OPCODE_BREAKC:          return "breakc";
859  		case OPCODE_MOVA:            return "mova";
860  		case OPCODE_DEFB:            return "defb";
861  		case OPCODE_DEFI:            return "defi";
862  		case OPCODE_TEXCOORD:        return "texcoord";
863  		case OPCODE_TEXKILL:         return "texkill";
864  		case OPCODE_DISCARD:         return "discard";
865  		case OPCODE_TEX:
866  			if(version < 0x0104)     return "tex";
867  			else                     return "texld";
868  		case OPCODE_TEXBEM:          return "texbem";
869  		case OPCODE_TEXBEML:         return "texbeml";
870  		case OPCODE_TEXREG2AR:       return "texreg2ar";
871  		case OPCODE_TEXREG2GB:       return "texreg2gb";
872  		case OPCODE_TEXM3X2PAD:      return "texm3x2pad";
873  		case OPCODE_TEXM3X2TEX:      return "texm3x2tex";
874  		case OPCODE_TEXM3X3PAD:      return "texm3x3pad";
875  		case OPCODE_TEXM3X3TEX:      return "texm3x3tex";
876  		case OPCODE_RESERVED0:       return "reserved0";
877  		case OPCODE_TEXM3X3SPEC:     return "texm3x3spec";
878  		case OPCODE_TEXM3X3VSPEC:    return "texm3x3vspec";
879  		case OPCODE_EXPP:            return "expp";
880  		case OPCODE_LOGP:            return "logp";
881  		case OPCODE_CND:             return "cnd";
882  		case OPCODE_DEF:             return "def";
883  		case OPCODE_TEXREG2RGB:      return "texreg2rgb";
884  		case OPCODE_TEXDP3TEX:       return "texdp3tex";
885  		case OPCODE_TEXM3X2DEPTH:    return "texm3x2depth";
886  		case OPCODE_TEXDP3:          return "texdp3";
887  		case OPCODE_TEXM3X3:         return "texm3x3";
888  		case OPCODE_TEXDEPTH:        return "texdepth";
889  		case OPCODE_CMP0:            return "cmp0";
890  		case OPCODE_ICMP:            return "icmp";
891  		case OPCODE_UCMP:            return "ucmp";
892  		case OPCODE_SELECT:          return "select";
893  		case OPCODE_EXTRACT:         return "extract";
894  		case OPCODE_INSERT:          return "insert";
895  		case OPCODE_BEM:             return "bem";
896  		case OPCODE_DP2ADD:          return "dp2add";
897  		case OPCODE_DFDX:            return "dFdx";
898  		case OPCODE_DFDY:            return "dFdy";
899  		case OPCODE_FWIDTH:          return "fwidth";
900  		case OPCODE_TEXLDD:          return "texldd";
901  		case OPCODE_CMP:             return "cmp";
902  		case OPCODE_TEXLDL:          return "texldl";
903  		case OPCODE_TEXBIAS:         return "texbias";
904  		case OPCODE_TEXOFFSET:       return "texoffset";
905  		case OPCODE_TEXOFFSETBIAS:   return "texoffsetbias";
906  		case OPCODE_TEXLOD:          return "texlod";
907  		case OPCODE_TEXLODOFFSET:    return "texlodoffset";
908  		case OPCODE_TEXELFETCH:      return "texelfetch";
909  		case OPCODE_TEXELFETCHOFFSET: return "texelfetchoffset";
910  		case OPCODE_TEXGRAD:         return "texgrad";
911  		case OPCODE_TEXGRADOFFSET:   return "texgradoffset";
912  		case OPCODE_BREAKP:          return "breakp";
913  		case OPCODE_TEXSIZE:         return "texsize";
914  		case OPCODE_PHASE:           return "phase";
915  		case OPCODE_COMMENT:         return "comment";
916  		case OPCODE_END:             return "end";
917  		case OPCODE_PS_1_0:          return "ps_1_0";
918  		case OPCODE_PS_1_1:          return "ps_1_1";
919  		case OPCODE_PS_1_2:          return "ps_1_2";
920  		case OPCODE_PS_1_3:          return "ps_1_3";
921  		case OPCODE_PS_1_4:          return "ps_1_4";
922  		case OPCODE_PS_2_0:          return "ps_2_0";
923  		case OPCODE_PS_2_x:          return "ps_2_x";
924  		case OPCODE_PS_3_0:          return "ps_3_0";
925  		case OPCODE_VS_1_0:          return "vs_1_0";
926  		case OPCODE_VS_1_1:          return "vs_1_1";
927  		case OPCODE_VS_2_0:          return "vs_2_0";
928  		case OPCODE_VS_2_x:          return "vs_2_x";
929  		case OPCODE_VS_2_sw:         return "vs_2_sw";
930  		case OPCODE_VS_3_0:          return "vs_3_0";
931  		case OPCODE_VS_3_sw:         return "vs_3_sw";
932  		case OPCODE_WHILE:           return "while";
933  		case OPCODE_ENDWHILE:        return "endwhile";
934  		case OPCODE_COS:             return "cos";
935  		case OPCODE_SIN:             return "sin";
936  		case OPCODE_TAN:             return "tan";
937  		case OPCODE_ACOS:            return "acos";
938  		case OPCODE_ASIN:            return "asin";
939  		case OPCODE_ATAN:            return "atan";
940  		case OPCODE_ATAN2:           return "atan2";
941  		case OPCODE_COSH:            return "cosh";
942  		case OPCODE_SINH:            return "sinh";
943  		case OPCODE_TANH:            return "tanh";
944  		case OPCODE_ACOSH:           return "acosh";
945  		case OPCODE_ASINH:           return "asinh";
946  		case OPCODE_ATANH:           return "atanh";
947  		case OPCODE_DP1:             return "dp1";
948  		case OPCODE_DP2:             return "dp2";
949  		case OPCODE_TRUNC:           return "trunc";
950  		case OPCODE_FLOOR:           return "floor";
951  		case OPCODE_ROUND:           return "round";
952  		case OPCODE_ROUNDEVEN:       return "roundEven";
953  		case OPCODE_CEIL:            return "ceil";
954  		case OPCODE_EXP2:            return "exp2";
955  		case OPCODE_LOG2:            return "log2";
956  		case OPCODE_EXP:             return "exp";
957  		case OPCODE_LOG:             return "log";
958  		case OPCODE_POW:             return "pow";
959  		case OPCODE_F2B:             return "f2b";
960  		case OPCODE_B2F:             return "b2f";
961  		case OPCODE_F2I:             return "f2i";
962  		case OPCODE_I2F:             return "i2f";
963  		case OPCODE_F2U:             return "f2u";
964  		case OPCODE_U2F:             return "u2f";
965  		case OPCODE_B2I:             return "b2i";
966  		case OPCODE_I2B:             return "i2b";
967  		case OPCODE_ALL:             return "all";
968  		case OPCODE_ANY:             return "any";
969  		case OPCODE_NEG:             return "neg";
970  		case OPCODE_INEG:            return "ineg";
971  		case OPCODE_ISNAN:           return "isnan";
972  		case OPCODE_ISINF:           return "isinf";
973  		case OPCODE_NOT:             return "not";
974  		case OPCODE_OR:              return "or";
975  		case OPCODE_XOR:             return "xor";
976  		case OPCODE_AND:             return "and";
977  		case OPCODE_EQ:              return "eq";
978  		case OPCODE_NE:              return "neq";
979  		case OPCODE_FORWARD1:        return "forward1";
980  		case OPCODE_FORWARD2:        return "forward2";
981  		case OPCODE_FORWARD3:        return "forward3";
982  		case OPCODE_FORWARD4:        return "forward4";
983  		case OPCODE_REFLECT1:        return "reflect1";
984  		case OPCODE_REFLECT2:        return "reflect2";
985  		case OPCODE_REFLECT3:        return "reflect3";
986  		case OPCODE_REFLECT4:        return "reflect4";
987  		case OPCODE_REFRACT1:        return "refract1";
988  		case OPCODE_REFRACT2:        return "refract2";
989  		case OPCODE_REFRACT3:        return "refract3";
990  		case OPCODE_REFRACT4:        return "refract4";
991  		case OPCODE_LEAVE:           return "leave";
992  		case OPCODE_CONTINUE:        return "continue";
993  		case OPCODE_TEST:            return "test";
994  		case OPCODE_SWITCH:          return "switch";
995  		case OPCODE_ENDSWITCH:       return "endswitch";
996  		case OPCODE_SCALAR:          return "scalar";
997  		}
998  
999  		return "<unknown>";
1000  	}
1001  
controlString() const1002  	std::string Shader::Instruction::controlString() const
1003  	{
1004  		if(opcode != OPCODE_LOOP && opcode != OPCODE_BREAKC && opcode != OPCODE_IFC && opcode != OPCODE_CMP)
1005  		{
1006  			if(project) return "p";
1007  
1008  			if(bias) return "b";
1009  
1010  			// FIXME: LOD
1011  		}
1012  
1013  		switch(control)
1014  		{
1015  		case 1: return "_gt";
1016  		case 2: return "_eq";
1017  		case 3: return "_ge";
1018  		case 4: return "_lt";
1019  		case 5: return "_ne";
1020  		case 6: return "_le";
1021  		default:
1022  			return "";
1023  		//	ASSERT(false);   // FIXME
1024  		}
1025  	}
1026  
string(ShaderType shaderType,unsigned short version) const1027  	std::string Shader::Parameter::string(ShaderType shaderType, unsigned short version) const
1028  	{
1029  		std::ostringstream buffer;
1030  
1031  		if(type == PARAMETER_FLOAT4LITERAL)
1032  		{
1033  			buffer << '{' << value[0] << ", " << value[1] << ", " << value[2] << ", " << value[3] << '}';
1034  
1035  			return buffer.str();
1036  		}
1037  		else if(type != PARAMETER_RASTOUT && !(type == PARAMETER_ADDR && shaderType == SHADER_VERTEX) && type != PARAMETER_LOOP && type != PARAMETER_PREDICATE && type != PARAMETER_MISCTYPE)
1038  		{
1039  			buffer << index;
1040  
1041  			return typeString(shaderType, version) + buffer.str();
1042  		}
1043  		else
1044  		{
1045  			return typeString(shaderType, version);
1046  		}
1047  	}
1048  
typeString(ShaderType shaderType,unsigned short version) const1049  	std::string Shader::Parameter::typeString(ShaderType shaderType, unsigned short version) const
1050  	{
1051  		switch(type)
1052  		{
1053  		case PARAMETER_TEMP:			return "r";
1054  		case PARAMETER_INPUT:			return "v";
1055  		case PARAMETER_CONST:			return "c";
1056  		case PARAMETER_TEXTURE:
1057  	//	case PARAMETER_ADDR:
1058  			if(shaderType == SHADER_PIXEL)	return "t";
1059  			else							return "a0";
1060  		case PARAMETER_RASTOUT:
1061  			if(index == 0)              return "oPos";
1062  			else if(index == 1)         return "oFog";
1063  			else if(index == 2)         return "oPts";
1064  			else                        ASSERT(false);
1065  		case PARAMETER_ATTROUT:			return "oD";
1066  		case PARAMETER_TEXCRDOUT:
1067  	//	case PARAMETER_OUTPUT:			return "";
1068  			if(version < 0x0300)		return "oT";
1069  			else						return "o";
1070  		case PARAMETER_CONSTINT:		return "i";
1071  		case PARAMETER_COLOROUT:		return "oC";
1072  		case PARAMETER_DEPTHOUT:		return "oDepth";
1073  		case PARAMETER_SAMPLER:			return "s";
1074  	//	case PARAMETER_CONST2:			return "";
1075  	//	case PARAMETER_CONST3:			return "";
1076  	//	case PARAMETER_CONST4:			return "";
1077  		case PARAMETER_CONSTBOOL:		return "b";
1078  		case PARAMETER_LOOP:			return "aL";
1079  	//	case PARAMETER_TEMPFLOAT16:		return "";
1080  		case PARAMETER_MISCTYPE:
1081  			switch(index)
1082  			{
1083  			case VPosIndex:				return "vPos";
1084  			case VFaceIndex:			return "vFace";
1085  			case InstanceIDIndex:		return "iID";
1086  			case VertexIDIndex:			return "vID";
1087  			default: ASSERT(false);
1088  			}
1089  		case PARAMETER_LABEL:			return "l";
1090  		case PARAMETER_PREDICATE:		return "p0";
1091  		case PARAMETER_FLOAT4LITERAL:	return "";
1092  		case PARAMETER_BOOL1LITERAL:	return "";
1093  		case PARAMETER_INT4LITERAL:		return "";
1094  	//	case PARAMETER_VOID:			return "";
1095  		default:
1096  			ASSERT(false);
1097  		}
1098  
1099  		return "";
1100  	}
1101  
isBranch() const1102  	bool Shader::Instruction::isBranch() const
1103  	{
1104  		return opcode == OPCODE_IF || opcode == OPCODE_IFC;
1105  	}
1106  
isCall() const1107  	bool Shader::Instruction::isCall() const
1108  	{
1109  		return opcode == OPCODE_CALL || opcode == OPCODE_CALLNZ;
1110  	}
1111  
isBreak() const1112  	bool Shader::Instruction::isBreak() const
1113  	{
1114  		return opcode == OPCODE_BREAK || opcode == OPCODE_BREAKC || opcode == OPCODE_BREAKP;
1115  	}
1116  
isLoop() const1117  	bool Shader::Instruction::isLoop() const
1118  	{
1119  		return opcode == OPCODE_LOOP || opcode == OPCODE_REP || opcode == OPCODE_WHILE;
1120  	}
1121  
isEndLoop() const1122  	bool Shader::Instruction::isEndLoop() const
1123  	{
1124  		return opcode == OPCODE_ENDLOOP || opcode == OPCODE_ENDREP || opcode == OPCODE_ENDWHILE;
1125  	}
1126  
isPredicated() const1127  	bool Shader::Instruction::isPredicated() const
1128  	{
1129  		return predicate ||
1130  		       analysisBranch ||
1131  		       analysisBreak ||
1132  		       analysisContinue ||
1133  		       analysisLeave;
1134  	}
1135  
Shader()1136  	Shader::Shader() : serialID(serialCounter++)
1137  	{
1138  		usedSamplers = 0;
1139  	}
1140  
~Shader()1141  	Shader::~Shader()
1142  	{
1143  		for(auto &inst : instruction)
1144  		{
1145  			delete inst;
1146  			inst = 0;
1147  		}
1148  	}
1149  
parse(const unsigned long * token)1150  	void Shader::parse(const unsigned long *token)
1151  	{
1152  		minorVersion = (unsigned char)(token[0] & 0x000000FF);
1153  		majorVersion = (unsigned char)((token[0] & 0x0000FF00) >> 8);
1154  		shaderType = (ShaderType)((token[0] & 0xFFFF0000) >> 16);
1155  
1156  		int length = 0;
1157  
1158  		if(shaderType == SHADER_VERTEX)
1159  		{
1160  			length = VertexShader::validate(token);
1161  		}
1162  		else if(shaderType == SHADER_PIXEL)
1163  		{
1164  			length = PixelShader::validate(token);
1165  		}
1166  		else ASSERT(false);
1167  
1168  		ASSERT(length != 0);
1169  		instruction.resize(length);
1170  
1171  		for(int i = 0; i < length; i++)
1172  		{
1173  			while((*token & 0x0000FFFF) == 0x0000FFFE)   // Comment token
1174  			{
1175  				int length = (*token & 0x7FFF0000) >> 16;
1176  
1177  				token += length + 1;
1178  			}
1179  
1180  			int tokenCount = size(*token);
1181  
1182  			instruction[i] = new Instruction(token, tokenCount, majorVersion);
1183  
1184  			token += 1 + tokenCount;
1185  		}
1186  	}
1187  
size(unsigned long opcode) const1188  	int Shader::size(unsigned long opcode) const
1189  	{
1190  		return size(opcode, shaderModel);
1191  	}
1192  
size(unsigned long opcode,unsigned short shaderModel)1193  	int Shader::size(unsigned long opcode, unsigned short shaderModel)
1194  	{
1195  		if(shaderModel > 0x0300)
1196  		{
1197  			ASSERT(false);
1198  		}
1199  
1200  		static const signed char size[] =
1201  		{
1202  			0,   // NOP = 0
1203  			2,   // MOV
1204  			3,   // ADD
1205  			3,   // SUB
1206  			4,   // MAD
1207  			3,   // MUL
1208  			2,   // RCP
1209  			2,   // RSQ
1210  			3,   // DP3
1211  			3,   // DP4
1212  			3,   // MIN
1213  			3,   // MAX
1214  			3,   // SLT
1215  			3,   // SGE
1216  			2,   // EXP
1217  			2,   // LOG
1218  			2,   // LIT
1219  			3,   // DST
1220  			4,   // LRP
1221  			2,   // FRC
1222  			3,   // M4x4
1223  			3,   // M4x3
1224  			3,   // M3x4
1225  			3,   // M3x3
1226  			3,   // M3x2
1227  			1,   // CALL
1228  			2,   // CALLNZ
1229  			2,   // LOOP
1230  			0,   // RET
1231  			0,   // ENDLOOP
1232  			1,   // LABEL
1233  			2,   // DCL
1234  			3,   // POW
1235  			3,   // CRS
1236  			4,   // SGN
1237  			2,   // ABS
1238  			2,   // NRM
1239  			4,   // SINCOS
1240  			1,   // REP
1241  			0,   // ENDREP
1242  			1,   // IF
1243  			2,   // IFC
1244  			0,   // ELSE
1245  			0,   // ENDIF
1246  			0,   // BREAK
1247  			2,   // BREAKC
1248  			2,   // MOVA
1249  			2,   // DEFB
1250  			5,   // DEFI
1251  			-1,  // 49
1252  			-1,  // 50
1253  			-1,  // 51
1254  			-1,  // 52
1255  			-1,  // 53
1256  			-1,  // 54
1257  			-1,  // 55
1258  			-1,  // 56
1259  			-1,  // 57
1260  			-1,  // 58
1261  			-1,  // 59
1262  			-1,  // 60
1263  			-1,  // 61
1264  			-1,  // 62
1265  			-1,  // 63
1266  			1,   // TEXCOORD = 64
1267  			1,   // TEXKILL
1268  			1,   // TEX
1269  			2,   // TEXBEM
1270  			2,   // TEXBEML
1271  			2,   // TEXREG2AR
1272  			2,   // TEXREG2GB
1273  			2,   // TEXM3x2PAD
1274  			2,   // TEXM3x2TEX
1275  			2,   // TEXM3x3PAD
1276  			2,   // TEXM3x3TEX
1277  			-1,  // RESERVED0
1278  			3,   // TEXM3x3SPEC
1279  			2,   // TEXM3x3VSPEC
1280  			2,   // EXPP
1281  			2,   // LOGP
1282  			4,   // CND
1283  			5,   // DEF
1284  			2,   // TEXREG2RGB
1285  			2,   // TEXDP3TEX
1286  			2,   // TEXM3x2DEPTH
1287  			2,   // TEXDP3
1288  			2,   // TEXM3x3
1289  			1,   // TEXDEPTH
1290  			4,   // CMP
1291  			3,   // BEM
1292  			4,   // DP2ADD
1293  			2,   // DSX
1294  			2,   // DSY
1295  			5,   // TEXLDD
1296  			3,   // SETP
1297  			3,   // TEXLDL
1298  			2,   // BREAKP
1299  			-1,  // 97
1300  			-1,  // 98
1301  			-1,  // 99
1302  			-1,  // 100
1303  			-1,  // 101
1304  			-1,  // 102
1305  			-1,  // 103
1306  			-1,  // 104
1307  			-1,  // 105
1308  			-1,  // 106
1309  			-1,  // 107
1310  			-1,  // 108
1311  			-1,  // 109
1312  			-1,  // 110
1313  			-1,  // 111
1314  			-1,  // 112
1315  		};
1316  
1317  		int length = 0;
1318  
1319  		if((opcode & 0x0000FFFF) == OPCODE_COMMENT)
1320  		{
1321  			return (opcode & 0x7FFF0000) >> 16;
1322  		}
1323  
1324  		if(opcode != OPCODE_PS_1_0 &&
1325  		   opcode != OPCODE_PS_1_1 &&
1326  		   opcode != OPCODE_PS_1_2 &&
1327  		   opcode != OPCODE_PS_1_3 &&
1328  		   opcode != OPCODE_PS_1_4 &&
1329  		   opcode != OPCODE_PS_2_0 &&
1330  		   opcode != OPCODE_PS_2_x &&
1331  		   opcode != OPCODE_PS_3_0 &&
1332  		   opcode != OPCODE_VS_1_0 &&
1333  		   opcode != OPCODE_VS_1_1 &&
1334  		   opcode != OPCODE_VS_2_0 &&
1335  		   opcode != OPCODE_VS_2_x &&
1336  		   opcode != OPCODE_VS_2_sw &&
1337  		   opcode != OPCODE_VS_3_0 &&
1338  		   opcode != OPCODE_VS_3_sw &&
1339  		   opcode != OPCODE_PHASE &&
1340  		   opcode != OPCODE_END)
1341  		{
1342  			if(shaderModel >= 0x0200)
1343  			{
1344  				length = (opcode & 0x0F000000) >> 24;
1345  			}
1346  			else
1347  			{
1348  				length = size[opcode & 0x0000FFFF];
1349  			}
1350  		}
1351  
1352  		if(length < 0)
1353  		{
1354  			ASSERT(false);
1355  		}
1356  
1357  		if(shaderModel == 0x0104)
1358  		{
1359  			switch(opcode & 0x0000FFFF)
1360  			{
1361  			case OPCODE_TEX:
1362  				length += 1;
1363  				break;
1364  			case OPCODE_TEXCOORD:
1365  				length += 1;
1366  				break;
1367  			default:
1368  				break;
1369  			}
1370  		}
1371  
1372  		return length;
1373  	}
1374  
maskContainsComponent(int mask,int component)1375  	bool Shader::maskContainsComponent(int mask, int component)
1376  	{
1377  		return (mask & (1 << component)) != 0;
1378  	}
1379  
swizzleContainsComponent(int swizzle,int component)1380  	bool Shader::swizzleContainsComponent(int swizzle, int component)
1381  	{
1382  		if((swizzle & 0x03) >> 0 == component) return true;
1383  		if((swizzle & 0x0C) >> 2 == component) return true;
1384  		if((swizzle & 0x30) >> 4 == component) return true;
1385  		if((swizzle & 0xC0) >> 6 == component) return true;
1386  
1387  		return false;
1388  	}
1389  
swizzleContainsComponentMasked(int swizzle,int component,int mask)1390  	bool Shader::swizzleContainsComponentMasked(int swizzle, int component, int mask)
1391  	{
1392  		if(mask & 0x1) if((swizzle & 0x03) >> 0 == component) return true;
1393  		if(mask & 0x2) if((swizzle & 0x0C) >> 2 == component) return true;
1394  		if(mask & 0x4) if((swizzle & 0x30) >> 4 == component) return true;
1395  		if(mask & 0x8) if((swizzle & 0xC0) >> 6 == component) return true;
1396  
1397  		return false;
1398  	}
1399  
containsDynamicBranching() const1400  	bool Shader::containsDynamicBranching() const
1401  	{
1402  		return dynamicBranching;
1403  	}
1404  
containsBreakInstruction() const1405  	bool Shader::containsBreakInstruction() const
1406  	{
1407  		return containsBreak;
1408  	}
1409  
containsContinueInstruction() const1410  	bool Shader::containsContinueInstruction() const
1411  	{
1412  		return containsContinue;
1413  	}
1414  
containsLeaveInstruction() const1415  	bool Shader::containsLeaveInstruction() const
1416  	{
1417  		return containsLeave;
1418  	}
1419  
containsDefineInstruction() const1420  	bool Shader::containsDefineInstruction() const
1421  	{
1422  		return containsDefine;
1423  	}
1424  
usesSampler(int index) const1425  	bool Shader::usesSampler(int index) const
1426  	{
1427  		return (usedSamplers & (1 << index)) != 0;
1428  	}
1429  
getSerialID() const1430  	int Shader::getSerialID() const
1431  	{
1432  		return serialID;
1433  	}
1434  
getLength() const1435  	size_t Shader::getLength() const
1436  	{
1437  		return instruction.size();
1438  	}
1439  
getShaderType() const1440  	Shader::ShaderType Shader::getShaderType() const
1441  	{
1442  		return shaderType;
1443  	}
1444  
getShaderModel() const1445  	unsigned short Shader::getShaderModel() const
1446  	{
1447  		return shaderModel;
1448  	}
1449  
print(const char * fileName,...) const1450  	void Shader::print(const char *fileName, ...) const
1451  	{
1452  		char fullName[1024 + 1];
1453  
1454  		va_list vararg;
1455  		va_start(vararg, fileName);
1456  		vsnprintf(fullName, 1024, fileName, vararg);
1457  		va_end(vararg);
1458  
1459  		std::ofstream file(fullName, std::ofstream::out);
1460  
1461  		for(const auto &inst : instruction)
1462  		{
1463  			file << inst->string(shaderType, shaderModel) << std::endl;
1464  		}
1465  	}
1466  
printInstruction(int index,const char * fileName) const1467  	void Shader::printInstruction(int index, const char *fileName) const
1468  	{
1469  		std::ofstream file(fileName, std::ofstream::out | std::ofstream::app);
1470  
1471  		file << instruction[index]->string(shaderType, shaderModel) << std::endl;
1472  	}
1473  
append(Instruction * instruction)1474  	void Shader::append(Instruction *instruction)
1475  	{
1476  		this->instruction.push_back(instruction);
1477  	}
1478  
declareSampler(int i)1479  	void Shader::declareSampler(int i)
1480  	{
1481  		if(i >= 0 && i < 16)
1482  		{
1483  			usedSamplers |= 1 << i;
1484  		}
1485  	}
1486  
getInstruction(size_t i) const1487  	const Shader::Instruction *Shader::getInstruction(size_t i) const
1488  	{
1489  		ASSERT(i < instruction.size());
1490  
1491  		return instruction[i];
1492  	}
1493  
optimize()1494  	void Shader::optimize()
1495  	{
1496  		optimizeLeave();
1497  		optimizeCall();
1498  		removeNull();
1499  	}
1500  
optimizeLeave()1501  	void Shader::optimizeLeave()
1502  	{
1503  		// A return (leave) right before the end of a function or the shader can be removed
1504  		for(unsigned int i = 0; i < instruction.size(); i++)
1505  		{
1506  			if(instruction[i]->opcode == OPCODE_LEAVE)
1507  			{
1508  				if(i == instruction.size() - 1 || instruction[i + 1]->opcode == OPCODE_RET)
1509  				{
1510  					instruction[i]->opcode = OPCODE_NULL;
1511  				}
1512  			}
1513  		}
1514  	}
1515  
optimizeCall()1516  	void Shader::optimizeCall()
1517  	{
1518  		// Eliminate uncalled functions
1519  		std::set<int> calledFunctions;
1520  		bool rescan = true;
1521  
1522  		while(rescan)
1523  		{
1524  			calledFunctions.clear();
1525  			rescan = false;
1526  
1527  			for(const auto &inst : instruction)
1528  			{
1529  				if(inst->isCall())
1530  				{
1531  					calledFunctions.insert(inst->dst.label);
1532  				}
1533  			}
1534  
1535  			if(!calledFunctions.empty())
1536  			{
1537  				for(unsigned int i = 0; i < instruction.size(); i++)
1538  				{
1539  					if(instruction[i]->opcode == OPCODE_LABEL)
1540  					{
1541  						if(calledFunctions.find(instruction[i]->dst.label) == calledFunctions.end())
1542  						{
1543  							for( ; i < instruction.size(); i++)
1544  							{
1545  								Opcode oldOpcode = instruction[i]->opcode;
1546  								instruction[i]->opcode = OPCODE_NULL;
1547  
1548  								if(oldOpcode == OPCODE_RET)
1549  								{
1550  									rescan = true;
1551  									break;
1552  								}
1553  							}
1554  						}
1555  					}
1556  				}
1557  			}
1558  		}
1559  
1560  		// Optimize the entry call
1561  		if(instruction.size() >= 2 && instruction[0]->opcode == OPCODE_CALL && instruction[1]->opcode == OPCODE_RET)
1562  		{
1563  			if(calledFunctions.size() == 1)
1564  			{
1565  				instruction[0]->opcode = OPCODE_NULL;
1566  				instruction[1]->opcode = OPCODE_NULL;
1567  
1568  				for(size_t i = 2; i < instruction.size(); i++)
1569  				{
1570  					if(instruction[i]->opcode == OPCODE_LABEL || instruction[i]->opcode == OPCODE_RET)
1571  					{
1572  						instruction[i]->opcode = OPCODE_NULL;
1573  					}
1574  				}
1575  			}
1576  		}
1577  	}
1578  
removeNull()1579  	void Shader::removeNull()
1580  	{
1581  		size_t size = 0;
1582  		for(size_t i = 0; i < instruction.size(); i++)
1583  		{
1584  			if(instruction[i]->opcode != OPCODE_NULL)
1585  			{
1586  				instruction[size] = instruction[i];
1587  				size++;
1588  			}
1589  			else
1590  			{
1591  				delete instruction[i];
1592  			}
1593  		}
1594  
1595  		instruction.resize(size);
1596  	}
1597  
analyzeDirtyConstants()1598  	void Shader::analyzeDirtyConstants()
1599  	{
1600  		dirtyConstantsF = 0;
1601  		dirtyConstantsI = 0;
1602  		dirtyConstantsB = 0;
1603  
1604  		for(const auto &inst : instruction)
1605  		{
1606  			switch(inst->opcode)
1607  			{
1608  			case OPCODE_DEF:
1609  				if(inst->dst.index + 1 > dirtyConstantsF)
1610  				{
1611  					dirtyConstantsF = inst->dst.index + 1;
1612  				}
1613  				break;
1614  			case OPCODE_DEFI:
1615  				if(inst->dst.index + 1 > dirtyConstantsI)
1616  				{
1617  					dirtyConstantsI = inst->dst.index + 1;
1618  				}
1619  				break;
1620  			case OPCODE_DEFB:
1621  				if(inst->dst.index + 1 > dirtyConstantsB)
1622  				{
1623  					dirtyConstantsB = inst->dst.index + 1;
1624  				}
1625  				break;
1626  			default:
1627  				break;
1628  			}
1629  		}
1630  	}
1631  
analyzeDynamicBranching()1632  	void Shader::analyzeDynamicBranching()
1633  	{
1634  		dynamicBranching = false;
1635  		containsLeave = false;
1636  		containsBreak = false;
1637  		containsContinue = false;
1638  		containsDefine = false;
1639  
1640  		// Determine global presence of branching instructions
1641  		for(const auto &inst : instruction)
1642  		{
1643  			switch(inst->opcode)
1644  			{
1645  			case OPCODE_CALLNZ:
1646  			case OPCODE_IF:
1647  			case OPCODE_IFC:
1648  			case OPCODE_BREAK:
1649  			case OPCODE_BREAKC:
1650  			case OPCODE_CMP:
1651  			case OPCODE_BREAKP:
1652  			case OPCODE_LEAVE:
1653  			case OPCODE_CONTINUE:
1654  				if(inst->src[0].type != PARAMETER_CONSTBOOL)
1655  				{
1656  					dynamicBranching = true;
1657  				}
1658  
1659  				if(inst->opcode == OPCODE_LEAVE)
1660  				{
1661  					containsLeave = true;
1662  				}
1663  
1664  				if(inst->isBreak())
1665  				{
1666  					containsBreak = true;
1667  				}
1668  
1669  				if(inst->opcode == OPCODE_CONTINUE)
1670  				{
1671  					containsContinue = true;
1672  				}
1673  			case OPCODE_DEF:
1674  			case OPCODE_DEFB:
1675  			case OPCODE_DEFI:
1676  				containsDefine = true;
1677  			default:
1678  				break;
1679  			}
1680  		}
1681  
1682  		// Conservatively determine which instructions are affected by dynamic branching
1683  		int branchDepth = 0;
1684  		int breakDepth = 0;
1685  		int continueDepth = 0;
1686  		bool leaveReturn = false;
1687  		unsigned int functionBegin = 0;
1688  
1689  		for(unsigned int i = 0; i < instruction.size(); i++)
1690  		{
1691  			// If statements and loops
1692  			if(instruction[i]->isBranch() || instruction[i]->isLoop())
1693  			{
1694  				branchDepth++;
1695  			}
1696  			else if(instruction[i]->opcode == OPCODE_ENDIF || instruction[i]->isEndLoop())
1697  			{
1698  				branchDepth--;
1699  			}
1700  
1701  			if(branchDepth > 0)
1702  			{
1703  				instruction[i]->analysisBranch = true;
1704  
1705  				if(instruction[i]->isCall())
1706  				{
1707  					markFunctionAnalysis(instruction[i]->dst.label, ANALYSIS_BRANCH);
1708  				}
1709  			}
1710  
1711  			// Break statemement
1712  			if(instruction[i]->isBreak())
1713  			{
1714  				breakDepth++;
1715  			}
1716  
1717  			if(breakDepth > 0)
1718  			{
1719  				if(instruction[i]->isLoop() || instruction[i]->opcode == OPCODE_SWITCH)   // Nested loop or switch, don't make the end of it disable the break execution mask
1720  				{
1721  					breakDepth++;
1722  				}
1723  				else if(instruction[i]->isEndLoop() || instruction[i]->opcode == OPCODE_ENDSWITCH)
1724  				{
1725  					breakDepth--;
1726  				}
1727  
1728  				instruction[i]->analysisBreak = true;
1729  
1730  				if(instruction[i]->isCall())
1731  				{
1732  					markFunctionAnalysis(instruction[i]->dst.label, ANALYSIS_BRANCH);
1733  				}
1734  			}
1735  
1736  			// Continue statement
1737  			if(instruction[i]->opcode == OPCODE_CONTINUE)
1738  			{
1739  				continueDepth++;
1740  			}
1741  
1742  			if(continueDepth > 0)
1743  			{
1744  				if(instruction[i]->isLoop() || instruction[i]->opcode == OPCODE_SWITCH)   // Nested loop or switch, don't make the end of it disable the break execution mask
1745  				{
1746  					continueDepth++;
1747  				}
1748  				else if(instruction[i]->isEndLoop() || instruction[i]->opcode == OPCODE_ENDSWITCH)
1749  				{
1750  					continueDepth--;
1751  				}
1752  
1753  				instruction[i]->analysisContinue = true;
1754  
1755  				if(instruction[i]->isCall())
1756  				{
1757  					markFunctionAnalysis(instruction[i]->dst.label, ANALYSIS_CONTINUE);
1758  				}
1759  			}
1760  
1761  			// Return (leave) statement
1762  			if(instruction[i]->opcode == OPCODE_LEAVE)
1763  			{
1764  				leaveReturn = true;
1765  
1766  				// Mark loop body instructions prior to the return statement
1767  				for(unsigned int l = functionBegin; l < i; l++)
1768  				{
1769  					if(instruction[l]->isLoop())
1770  					{
1771  						for(unsigned int r = l + 1; r < i; r++)
1772  						{
1773  							instruction[r]->analysisLeave = true;
1774  						}
1775  
1776  						break;
1777  					}
1778  				}
1779  			}
1780  			else if(instruction[i]->opcode == OPCODE_RET)   // End of the function
1781  			{
1782  				leaveReturn = false;
1783  			}
1784  			else if(instruction[i]->opcode == OPCODE_LABEL)
1785  			{
1786  				functionBegin = i;
1787  			}
1788  
1789  			if(leaveReturn)
1790  			{
1791  				instruction[i]->analysisLeave = true;
1792  
1793  				if(instruction[i]->isCall())
1794  				{
1795  					markFunctionAnalysis(instruction[i]->dst.label, ANALYSIS_LEAVE);
1796  				}
1797  			}
1798  		}
1799  	}
1800  
markFunctionAnalysis(unsigned int functionLabel,Analysis flag)1801  	void Shader::markFunctionAnalysis(unsigned int functionLabel, Analysis flag)
1802  	{
1803  		bool marker = false;
1804  		for(auto &inst : instruction)
1805  		{
1806  			if(!marker)
1807  			{
1808  				if(inst->opcode == OPCODE_LABEL && inst->dst.label == functionLabel)
1809  				{
1810  					marker = true;
1811  				}
1812  			}
1813  			else
1814  			{
1815  				if(inst->opcode == OPCODE_RET)
1816  				{
1817  					break;
1818  				}
1819  				else if(inst->isCall())
1820  				{
1821  					markFunctionAnalysis(inst->dst.label, flag);
1822  				}
1823  
1824  				inst->analysis |= flag;
1825  			}
1826  		}
1827  	}
1828  
analyzeSamplers()1829  	void Shader::analyzeSamplers()
1830  	{
1831  		for(const auto &inst : instruction)
1832  		{
1833  			switch(inst->opcode)
1834  			{
1835  			case OPCODE_TEX:
1836  			case OPCODE_TEXBEM:
1837  			case OPCODE_TEXBEML:
1838  			case OPCODE_TEXREG2AR:
1839  			case OPCODE_TEXREG2GB:
1840  			case OPCODE_TEXM3X2TEX:
1841  			case OPCODE_TEXM3X3TEX:
1842  			case OPCODE_TEXM3X3SPEC:
1843  			case OPCODE_TEXM3X3VSPEC:
1844  			case OPCODE_TEXREG2RGB:
1845  			case OPCODE_TEXDP3TEX:
1846  			case OPCODE_TEXM3X2DEPTH:
1847  			case OPCODE_TEXLDD:
1848  			case OPCODE_TEXLDL:
1849  			case OPCODE_TEXLOD:
1850  			case OPCODE_TEXOFFSET:
1851  			case OPCODE_TEXOFFSETBIAS:
1852  			case OPCODE_TEXLODOFFSET:
1853  			case OPCODE_TEXELFETCH:
1854  			case OPCODE_TEXELFETCHOFFSET:
1855  			case OPCODE_TEXGRAD:
1856  			case OPCODE_TEXGRADOFFSET:
1857  				{
1858  					Parameter &dst = inst->dst;
1859  					Parameter &src1 = inst->src[1];
1860  
1861  					if(majorVersion >= 2)
1862  					{
1863  						if(src1.type == PARAMETER_SAMPLER)
1864  						{
1865  							usedSamplers |= 1 << src1.index;
1866  						}
1867  					}
1868  					else
1869  					{
1870  						usedSamplers |= 1 << dst.index;
1871  					}
1872  				}
1873  				break;
1874  			default:
1875  				break;
1876  			}
1877  		}
1878  	}
1879  
1880  	// Assigns a unique index to each call instruction, on a per label basis.
1881  	// This is used to know what basic block to return to.
analyzeCallSites()1882  	void Shader::analyzeCallSites()
1883  	{
1884  		std::unordered_map<int, int> callSiteIndices;
1885  
1886  		for(auto &inst : instruction)
1887  		{
1888  			if(inst->opcode == OPCODE_CALL || inst->opcode == OPCODE_CALLNZ)
1889  			{
1890  				inst->dst.callSite = callSiteIndices[inst->dst.label]++;
1891  			}
1892  		}
1893  	}
1894  
analyzeIndirectAddressing()1895  	void Shader::analyzeIndirectAddressing()
1896  	{
1897  		indirectAddressableTemporaries = false;
1898  		indirectAddressableInput = false;
1899  		indirectAddressableOutput = false;
1900  
1901  		for(const auto &inst : instruction)
1902  		{
1903  			if(inst->dst.rel.type != PARAMETER_VOID)
1904  			{
1905  				switch(inst->dst.type)
1906  				{
1907  				case PARAMETER_TEMP:   indirectAddressableTemporaries = true; break;
1908  				case PARAMETER_INPUT:  indirectAddressableInput = true;       break;
1909  				case PARAMETER_OUTPUT: indirectAddressableOutput = true;      break;
1910  				default: break;
1911  				}
1912  			}
1913  
1914  			for(int j = 0; j < 3; j++)
1915  			{
1916  				if(inst->src[j].rel.type != PARAMETER_VOID)
1917  				{
1918  					switch(inst->src[j].type)
1919  					{
1920  					case PARAMETER_TEMP:   indirectAddressableTemporaries = true; break;
1921  					case PARAMETER_INPUT:  indirectAddressableInput = true;       break;
1922  					case PARAMETER_OUTPUT: indirectAddressableOutput = true;      break;
1923  					default: break;
1924  					}
1925  				}
1926  			}
1927  		}
1928  	}
1929  
1930  	// analyzeLimits analyzes the whole shader program to determine the deepest
1931  	// nesting of control flow blocks and function calls. These calculations
1932  	// are stored into the limits member, and is used by the programs to
1933  	// allocate stack storage variables.
analyzeLimits()1934  	void Shader::analyzeLimits()
1935  	{
1936  		typedef unsigned int FunctionID;
1937  
1938  		// Identifier of the function with the main entry point.
1939  		constexpr FunctionID MAIN_ID = 0xF0000000;
1940  
1941  		// Invalid function identifier.
1942  		constexpr FunctionID INVALID_ID = ~0U;
1943  
1944  		// Limits on a single function.
1945  		struct FunctionLimits
1946  		{
1947  			uint32_t loops = 0; // maximum nested loop and reps.
1948  			uint32_t ifs = 0; // maximum nested if statements.
1949  			uint32_t stack = 0; // maximum call depth.
1950  		};
1951  
1952  		// Information about a single function in the shader.
1953  		struct FunctionInfo
1954  		{
1955  			FunctionLimits limits;
1956  			std::unordered_set<FunctionID> calls; // What this function calls.
1957  			bool reachable; // Is this function reachable?
1958  		};
1959  
1960  		std::unordered_map<FunctionID, FunctionInfo> functions;
1961  
1962  		uint32_t maxLabel = 0; // Highest label found
1963  
1964  		// Add a definition for the main entry point.
1965  		// This starts at the beginning of the instructions and does not have
1966  		// its own label.
1967  		functions[MAIN_ID] = FunctionInfo();
1968  		functions[MAIN_ID].reachable = true;
1969  
1970  		// Begin by doing a pass over the instructions to identify all the
1971  		// functions. These start with a label and end with a ret. Note that
1972  		// functions can have labels within them.
1973  		FunctionID currentFunc = MAIN_ID;
1974  		for(auto &inst : instruction)
1975  		{
1976  			switch (inst->opcode)
1977  			{
1978  				case OPCODE_LABEL:
1979  					if (currentFunc == INVALID_ID)
1980  					{
1981  						// Start of a function.
1982  						FunctionID id = inst->dst.label;
1983  						ASSERT(id != MAIN_ID); // If this fires, we're going to have to represent main with something else.
1984  						functions[id] = FunctionInfo();
1985  					}
1986  					break;
1987  				case OPCODE_RET:
1988  					currentFunc = INVALID_ID;
1989  					break;
1990  				default:
1991  					break;
1992  			}
1993  		}
1994  
1995  		// Limits for the currently analyzed function.
1996  		FunctionLimits currentLimits;
1997  
1998  		// Now loop over the instructions gathering the limits of each of the
1999  		// functions.
2000  		currentFunc = MAIN_ID;
2001  		for(size_t i = 0; i < instruction.size(); i++)
2002  		{
2003  			const auto& inst = instruction[i];
2004  			switch (inst->opcode)
2005  			{
2006  				case OPCODE_LABEL:
2007  				{
2008  					maxLabel = std::max(maxLabel, inst->dst.label);
2009  					if (currentFunc == INVALID_ID)
2010  					{
2011  						// Start of a function.
2012  						FunctionID id = inst->dst.label;
2013  						ASSERT(functions.find(id) != functions.end()); // Sanity check
2014  						currentFunc = id;
2015  					}
2016  					break;
2017  				}
2018  				case OPCODE_CALL:
2019  				case OPCODE_CALLNZ:
2020  				{
2021  					ASSERT(currentFunc != INVALID_ID);
2022  					FunctionID id = inst->dst.label;
2023  					ASSERT(functions.find(id) != functions.end());
2024  					functions[currentFunc].calls.emplace(id);
2025  					functions[id].reachable = true;
2026  					break;
2027  				}
2028  				case OPCODE_LOOP:
2029  				case OPCODE_REP:
2030  				case OPCODE_WHILE:
2031  				case OPCODE_SWITCH: // Not a mistake - switches share loopReps.
2032  				{
2033  					ASSERT(currentFunc != INVALID_ID);
2034  					++currentLimits.loops;
2035  					auto& func = functions[currentFunc];
2036  					func.limits.loops = std::max(func.limits.loops, currentLimits.loops);
2037  					break;
2038  				}
2039  				case OPCODE_ENDLOOP:
2040  				case OPCODE_ENDREP:
2041  				case OPCODE_ENDWHILE:
2042  				case OPCODE_ENDSWITCH:
2043  				{
2044  					ASSERT(currentLimits.loops > 0);
2045  					--currentLimits.loops;
2046  					break;
2047  				}
2048  				case OPCODE_IF:
2049  				case OPCODE_IFC:
2050  				{
2051  					ASSERT(currentFunc != INVALID_ID);
2052  					++currentLimits.ifs;
2053  					auto& func = functions[currentFunc];
2054  					func.limits.ifs = std::max(func.limits.ifs, currentLimits.ifs);
2055  					break;
2056  				}
2057  				case OPCODE_ENDIF:
2058  				{
2059  					ASSERT(currentLimits.ifs > 0);
2060  					currentLimits.ifs--;
2061  					break;
2062  				}
2063  				case OPCODE_RET:
2064  				{
2065  					// Must be in a function to return.
2066  					ASSERT(currentFunc != INVALID_ID);
2067  
2068  					// All stacks should be popped before returning.
2069  					ASSERT(currentLimits.ifs == 0);
2070  					ASSERT(currentLimits.loops == 0);
2071  
2072  					currentFunc = INVALID_ID;
2073  					currentLimits = FunctionLimits();
2074  					break;
2075  				}
2076  				default:
2077  					break;
2078  			}
2079  		}
2080  
2081  #if !defined(NDEBUG) || defined(DCHECK_ALWAYS_ON)
2082  		// Assert that every function is reachable (these should have been
2083  		// stripped in earlier stages). Unreachable functions may be code
2084  		// generated, but their own limits are not considered below, potentially
2085  		// causing OOB indexing in later stages.
2086  		// If we ever find cases where there are unreachable functions, we can
2087  		// replace this assert with NO-OPing or stripping out the dead
2088  		// functions.
2089  		for (const auto &it : functions) { ASSERT(it.second.reachable); }
2090  #endif
2091  
2092  		// We have now gathered all the information about each of the functions
2093  		// in the shader. Traverse these functions starting from the main
2094  		// function to calculate the maximum limits across the entire shader.
2095  
2096  		std::unordered_set<FunctionID> visited;
2097  		std::function<Limits(FunctionID)> traverse;
2098  		traverse = [&](FunctionID id) -> Limits
2099  		{
2100  			const auto& func = functions[id];
2101  			ASSERT(visited.count(id) == 0); // Sanity check: Recursive functions are not allowed.
2102  			visited.insert(id);
2103  			Limits limits;
2104  			limits.stack = 1;
2105  			for (auto callee : func.calls)
2106  			{
2107  				auto calleeLimits = traverse(callee);
2108  				limits.loops = std::max(limits.loops, calleeLimits.loops);
2109  				limits.ifs = std::max(limits.ifs, calleeLimits.ifs);
2110  				limits.stack = std::max(limits.stack, calleeLimits.stack + 1);
2111  			}
2112  			visited.erase(id);
2113  
2114  			limits.loops += func.limits.loops;
2115  			limits.ifs += func.limits.ifs;
2116  			return limits;
2117  		};
2118  
2119  		limits = traverse(MAIN_ID);
2120  		limits.maxLabel = maxLabel;
2121  	}
2122  }
2123