• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2016 The SwiftShader Authors. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //    http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "OutputASM.h"
16 #include "Common/Math.hpp"
17 
18 #include "common/debug.h"
19 #include "InfoSink.h"
20 
21 #include "libGLESv2/Shader.h"
22 
23 #include <GLES2/gl2.h>
24 #include <GLES2/gl2ext.h>
25 #include <GLES3/gl3.h>
26 #include <GL/glcorearb.h>
27 #include <GL/glext.h>
28 
29 #include <stdlib.h>
30 
31 namespace
32 {
glVariableType(const TType & type)33 	GLenum glVariableType(const TType &type)
34 	{
35 		switch(type.getBasicType())
36 		{
37 		case EbtFloat:
38 			if(type.isScalar())
39 			{
40 				return GL_FLOAT;
41 			}
42 			else if(type.isVector())
43 			{
44 				switch(type.getNominalSize())
45 				{
46 				case 2: return GL_FLOAT_VEC2;
47 				case 3: return GL_FLOAT_VEC3;
48 				case 4: return GL_FLOAT_VEC4;
49 				default: UNREACHABLE(type.getNominalSize());
50 				}
51 			}
52 			else if(type.isMatrix())
53 			{
54 				switch(type.getNominalSize())
55 				{
56 				case 2:
57 					switch(type.getSecondarySize())
58 					{
59 					case 2: return GL_FLOAT_MAT2;
60 					case 3: return GL_FLOAT_MAT2x3;
61 					case 4: return GL_FLOAT_MAT2x4;
62 					default: UNREACHABLE(type.getSecondarySize());
63 					}
64 				case 3:
65 					switch(type.getSecondarySize())
66 					{
67 					case 2: return GL_FLOAT_MAT3x2;
68 					case 3: return GL_FLOAT_MAT3;
69 					case 4: return GL_FLOAT_MAT3x4;
70 					default: UNREACHABLE(type.getSecondarySize());
71 					}
72 				case 4:
73 					switch(type.getSecondarySize())
74 					{
75 					case 2: return GL_FLOAT_MAT4x2;
76 					case 3: return GL_FLOAT_MAT4x3;
77 					case 4: return GL_FLOAT_MAT4;
78 					default: UNREACHABLE(type.getSecondarySize());
79 					}
80 				default: UNREACHABLE(type.getNominalSize());
81 				}
82 			}
83 			else UNREACHABLE(0);
84 			break;
85 		case EbtInt:
86 			if(type.isScalar())
87 			{
88 				return GL_INT;
89 			}
90 			else if(type.isVector())
91 			{
92 				switch(type.getNominalSize())
93 				{
94 				case 2: return GL_INT_VEC2;
95 				case 3: return GL_INT_VEC3;
96 				case 4: return GL_INT_VEC4;
97 				default: UNREACHABLE(type.getNominalSize());
98 				}
99 			}
100 			else UNREACHABLE(0);
101 			break;
102 		case EbtUInt:
103 			if(type.isScalar())
104 			{
105 				return GL_UNSIGNED_INT;
106 			}
107 			else if(type.isVector())
108 			{
109 				switch(type.getNominalSize())
110 				{
111 				case 2: return GL_UNSIGNED_INT_VEC2;
112 				case 3: return GL_UNSIGNED_INT_VEC3;
113 				case 4: return GL_UNSIGNED_INT_VEC4;
114 				default: UNREACHABLE(type.getNominalSize());
115 				}
116 			}
117 			else UNREACHABLE(0);
118 			break;
119 		case EbtBool:
120 			if(type.isScalar())
121 			{
122 				return GL_BOOL;
123 			}
124 			else if(type.isVector())
125 			{
126 				switch(type.getNominalSize())
127 				{
128 				case 2: return GL_BOOL_VEC2;
129 				case 3: return GL_BOOL_VEC3;
130 				case 4: return GL_BOOL_VEC4;
131 				default: UNREACHABLE(type.getNominalSize());
132 				}
133 			}
134 			else UNREACHABLE(0);
135 			break;
136 		case EbtSampler2D:
137 			return GL_SAMPLER_2D;
138 		case EbtISampler2D:
139 			return GL_INT_SAMPLER_2D;
140 		case EbtUSampler2D:
141 			return GL_UNSIGNED_INT_SAMPLER_2D;
142 		case EbtSamplerCube:
143 			return GL_SAMPLER_CUBE;
144 		case EbtSampler2DRect:
145 			return GL_SAMPLER_2D_RECT_ARB;
146 		case EbtISamplerCube:
147 			return GL_INT_SAMPLER_CUBE;
148 		case EbtUSamplerCube:
149 			return GL_UNSIGNED_INT_SAMPLER_CUBE;
150 		case EbtSamplerExternalOES:
151 			return GL_SAMPLER_EXTERNAL_OES;
152 		case EbtSampler3D:
153 			return GL_SAMPLER_3D_OES;
154 		case EbtISampler3D:
155 			return GL_INT_SAMPLER_3D;
156 		case EbtUSampler3D:
157 			return GL_UNSIGNED_INT_SAMPLER_3D;
158 		case EbtSampler2DArray:
159 			return GL_SAMPLER_2D_ARRAY;
160 		case EbtISampler2DArray:
161 			return GL_INT_SAMPLER_2D_ARRAY;
162 		case EbtUSampler2DArray:
163 			return GL_UNSIGNED_INT_SAMPLER_2D_ARRAY;
164 		case EbtSampler2DShadow:
165 			return GL_SAMPLER_2D_SHADOW;
166 		case EbtSamplerCubeShadow:
167 			return GL_SAMPLER_CUBE_SHADOW;
168 		case EbtSampler2DArrayShadow:
169 			return GL_SAMPLER_2D_ARRAY_SHADOW;
170 		default:
171 			UNREACHABLE(type.getBasicType());
172 			break;
173 		}
174 
175 		return GL_NONE;
176 	}
177 
glVariablePrecision(const TType & type)178 	GLenum glVariablePrecision(const TType &type)
179 	{
180 		if(type.getBasicType() == EbtFloat)
181 		{
182 			switch(type.getPrecision())
183 			{
184 			case EbpHigh:   return GL_HIGH_FLOAT;
185 			case EbpMedium: return GL_MEDIUM_FLOAT;
186 			case EbpLow:    return GL_LOW_FLOAT;
187 			case EbpUndefined:
188 				// Should be defined as the default precision by the parser
189 			default: UNREACHABLE(type.getPrecision());
190 			}
191 		}
192 		else if(type.getBasicType() == EbtInt)
193 		{
194 			switch(type.getPrecision())
195 			{
196 			case EbpHigh:   return GL_HIGH_INT;
197 			case EbpMedium: return GL_MEDIUM_INT;
198 			case EbpLow:    return GL_LOW_INT;
199 			case EbpUndefined:
200 				// Should be defined as the default precision by the parser
201 			default: UNREACHABLE(type.getPrecision());
202 			}
203 		}
204 
205 		// Other types (boolean, sampler) don't have a precision
206 		return GL_NONE;
207 	}
208 }
209 
210 namespace glsl
211 {
212 	// Integer to TString conversion
str(int i)213 	TString str(int i)
214 	{
215 		char buffer[20];
216 		sprintf(buffer, "%d", i);
217 		return buffer;
218 	}
219 
220 	class Temporary : public TIntermSymbol
221 	{
222 	public:
Temporary(OutputASM * assembler)223 		Temporary(OutputASM *assembler) : TIntermSymbol(TSymbolTableLevel::nextUniqueId(), "tmp", TType(EbtFloat, EbpHigh, EvqTemporary, 4, 1, false)), assembler(assembler)
224 		{
225 		}
226 
~Temporary()227 		~Temporary()
228 		{
229 			assembler->freeTemporary(this);
230 		}
231 
232 	private:
233 		OutputASM *const assembler;
234 	};
235 
236 	class Constant : public TIntermConstantUnion
237 	{
238 	public:
Constant(float x,float y,float z,float w)239 		Constant(float x, float y, float z, float w) : TIntermConstantUnion(constants, TType(EbtFloat, EbpHigh, EvqConstExpr, 4, 1, false))
240 		{
241 			constants[0].setFConst(x);
242 			constants[1].setFConst(y);
243 			constants[2].setFConst(z);
244 			constants[3].setFConst(w);
245 		}
246 
Constant(bool b)247 		Constant(bool b) : TIntermConstantUnion(constants, TType(EbtBool, EbpHigh, EvqConstExpr, 1, 1, false))
248 		{
249 			constants[0].setBConst(b);
250 		}
251 
Constant(int i)252 		Constant(int i) : TIntermConstantUnion(constants, TType(EbtInt, EbpHigh, EvqConstExpr, 1, 1, false))
253 		{
254 			constants[0].setIConst(i);
255 		}
256 
~Constant()257 		~Constant()
258 		{
259 		}
260 
261 	private:
262 		ConstantUnion constants[4];
263 	};
264 
ShaderVariable(const TType & type,const std::string & name,int registerIndex)265 	ShaderVariable::ShaderVariable(const TType& type, const std::string& name, int registerIndex) :
266 		type(type.isStruct() ? GL_NONE : glVariableType(type)), precision(glVariablePrecision(type)),
267 		name(name), arraySize(type.getArraySize()), registerIndex(registerIndex)
268 	{
269 		if(type.isStruct())
270 		{
271 			for(const auto& field : type.getStruct()->fields())
272 			{
273 				fields.push_back(ShaderVariable(*(field->type()), field->name().c_str(), -1));
274 			}
275 		}
276 	}
277 
Uniform(const TType & type,const std::string & name,int registerIndex,int blockId,const BlockMemberInfo & blockMemberInfo)278 	Uniform::Uniform(const TType& type, const std::string &name, int registerIndex, int blockId, const BlockMemberInfo& blockMemberInfo) :
279 		ShaderVariable(type, name, registerIndex), blockId(blockId), blockInfo(blockMemberInfo)
280 	{
281 	}
282 
UniformBlock(const std::string & name,unsigned int dataSize,unsigned int arraySize,TLayoutBlockStorage layout,bool isRowMajorLayout,int registerIndex,int blockId)283 	UniformBlock::UniformBlock(const std::string& name, unsigned int dataSize, unsigned int arraySize,
284 	                           TLayoutBlockStorage layout, bool isRowMajorLayout, int registerIndex, int blockId) :
285 		name(name), dataSize(dataSize), arraySize(arraySize), layout(layout),
286 		isRowMajorLayout(isRowMajorLayout), registerIndex(registerIndex), blockId(blockId)
287 	{
288 	}
289 
BlockLayoutEncoder()290 	BlockLayoutEncoder::BlockLayoutEncoder()
291 		: mCurrentOffset(0)
292 	{
293 	}
294 
encodeType(const TType & type)295 	BlockMemberInfo BlockLayoutEncoder::encodeType(const TType &type)
296 	{
297 		int arrayStride;
298 		int matrixStride;
299 
300 		bool isRowMajor = type.getLayoutQualifier().matrixPacking == EmpRowMajor;
301 		getBlockLayoutInfo(type, type.getArraySize(), isRowMajor, &arrayStride, &matrixStride);
302 
303 		const BlockMemberInfo memberInfo(static_cast<int>(mCurrentOffset * BytesPerComponent),
304 		                                 static_cast<int>(arrayStride * BytesPerComponent),
305 		                                 static_cast<int>(matrixStride * BytesPerComponent),
306 		                                 (matrixStride > 0) && isRowMajor);
307 
308 		advanceOffset(type, type.getArraySize(), isRowMajor, arrayStride, matrixStride);
309 
310 		return memberInfo;
311 	}
312 
313 	// static
getBlockRegister(const BlockMemberInfo & info)314 	size_t BlockLayoutEncoder::getBlockRegister(const BlockMemberInfo &info)
315 	{
316 		return (info.offset / BytesPerComponent) / ComponentsPerRegister;
317 	}
318 
319 	// static
getBlockRegisterElement(const BlockMemberInfo & info)320 	size_t BlockLayoutEncoder::getBlockRegisterElement(const BlockMemberInfo &info)
321 	{
322 		return (info.offset / BytesPerComponent) % ComponentsPerRegister;
323 	}
324 
nextRegister()325 	void BlockLayoutEncoder::nextRegister()
326 	{
327 		mCurrentOffset = sw::align(mCurrentOffset, ComponentsPerRegister);
328 	}
329 
Std140BlockEncoder()330 	Std140BlockEncoder::Std140BlockEncoder() : BlockLayoutEncoder()
331 	{
332 	}
333 
enterAggregateType()334 	void Std140BlockEncoder::enterAggregateType()
335 	{
336 		nextRegister();
337 	}
338 
exitAggregateType()339 	void Std140BlockEncoder::exitAggregateType()
340 	{
341 		nextRegister();
342 	}
343 
getBlockLayoutInfo(const TType & type,unsigned int arraySize,bool isRowMajorMatrix,int * arrayStrideOut,int * matrixStrideOut)344 	void Std140BlockEncoder::getBlockLayoutInfo(const TType &type, unsigned int arraySize, bool isRowMajorMatrix, int *arrayStrideOut, int *matrixStrideOut)
345 	{
346 		size_t baseAlignment = 0;
347 		int matrixStride = 0;
348 		int arrayStride = 0;
349 
350 		if(type.isMatrix())
351 		{
352 			baseAlignment = ComponentsPerRegister;
353 			matrixStride = ComponentsPerRegister;
354 
355 			if(arraySize > 0)
356 			{
357 				const int numRegisters = isRowMajorMatrix ? type.getSecondarySize() : type.getNominalSize();
358 				arrayStride = ComponentsPerRegister * numRegisters;
359 			}
360 		}
361 		else if(arraySize > 0)
362 		{
363 			baseAlignment = ComponentsPerRegister;
364 			arrayStride = ComponentsPerRegister;
365 		}
366 		else
367 		{
368 			const size_t numComponents = type.getElementSize();
369 			baseAlignment = (numComponents == 3 ? 4u : numComponents);
370 		}
371 
372 		mCurrentOffset = sw::align(mCurrentOffset, baseAlignment);
373 
374 		*matrixStrideOut = matrixStride;
375 		*arrayStrideOut = arrayStride;
376 	}
377 
advanceOffset(const TType & type,unsigned int arraySize,bool isRowMajorMatrix,int arrayStride,int matrixStride)378 	void Std140BlockEncoder::advanceOffset(const TType &type, unsigned int arraySize, bool isRowMajorMatrix, int arrayStride, int matrixStride)
379 	{
380 		if(arraySize > 0)
381 		{
382 			mCurrentOffset += arrayStride * arraySize;
383 		}
384 		else if(type.isMatrix())
385 		{
386 			ASSERT(matrixStride == ComponentsPerRegister);
387 			const int numRegisters = isRowMajorMatrix ? type.getSecondarySize() : type.getNominalSize();
388 			mCurrentOffset += ComponentsPerRegister * numRegisters;
389 		}
390 		else
391 		{
392 			mCurrentOffset += type.getElementSize();
393 		}
394 	}
395 
Attribute()396 	Attribute::Attribute()
397 	{
398 		type = GL_NONE;
399 		arraySize = 0;
400 		registerIndex = 0;
401 	}
402 
Attribute(GLenum type,const std::string & name,int arraySize,int layoutLocation,int registerIndex)403 	Attribute::Attribute(GLenum type, const std::string &name, int arraySize, int layoutLocation, int registerIndex)
404 	{
405 		this->type = type;
406 		this->name = name;
407 		this->arraySize = arraySize;
408 		this->layoutLocation = layoutLocation;
409 		this->registerIndex = registerIndex;
410 	}
411 
getPixelShader() const412 	sw::PixelShader *Shader::getPixelShader() const
413 	{
414 		return nullptr;
415 	}
416 
getVertexShader() const417 	sw::VertexShader *Shader::getVertexShader() const
418 	{
419 		return nullptr;
420 	}
421 
TextureFunction(const TString & nodeName)422 	OutputASM::TextureFunction::TextureFunction(const TString& nodeName) : method(IMPLICIT), proj(false), offset(false)
423 	{
424 		TString name = TFunction::unmangleName(nodeName);
425 
426 		if(name == "texture2D" || name == "textureCube" || name == "texture" || name == "texture3D" || name == "texture2DRect")
427 		{
428 			method = IMPLICIT;
429 		}
430 		else if(name == "texture2DProj" || name == "textureProj" || name == "texture2DRectProj")
431 		{
432 			method = IMPLICIT;
433 			proj = true;
434 		}
435 		else if(name == "texture2DLod" || name == "textureCubeLod" || name == "textureLod")
436 		{
437 			method = LOD;
438 		}
439 		else if(name == "texture2DProjLod" || name == "textureProjLod")
440 		{
441 			method = LOD;
442 			proj = true;
443 		}
444 		else if(name == "textureSize")
445 		{
446 			method = SIZE;
447 		}
448 		else if(name == "textureOffset")
449 		{
450 			method = IMPLICIT;
451 			offset = true;
452 		}
453 		else if(name == "textureProjOffset")
454 		{
455 			method = IMPLICIT;
456 			offset = true;
457 			proj = true;
458 		}
459 		else if(name == "textureLodOffset")
460 		{
461 			method = LOD;
462 			offset = true;
463 		}
464 		else if(name == "textureProjLodOffset")
465 		{
466 			method = LOD;
467 			proj = true;
468 			offset = true;
469 		}
470 		else if(name == "texelFetch")
471 		{
472 			method = FETCH;
473 		}
474 		else if(name == "texelFetchOffset")
475 		{
476 			method = FETCH;
477 			offset = true;
478 		}
479 		else if(name == "textureGrad")
480 		{
481 			method = GRAD;
482 		}
483 		else if(name == "textureGradOffset")
484 		{
485 			method = GRAD;
486 			offset = true;
487 		}
488 		else if(name == "textureProjGrad")
489 		{
490 			method = GRAD;
491 			proj = true;
492 		}
493 		else if(name == "textureProjGradOffset")
494 		{
495 			method = GRAD;
496 			proj = true;
497 			offset = true;
498 		}
499 		else UNREACHABLE(0);
500 	}
501 
OutputASM(TParseContext & context,Shader * shaderObject)502 	OutputASM::OutputASM(TParseContext &context, Shader *shaderObject) : TIntermTraverser(true, true, true), shaderObject(shaderObject), mContext(context)
503 	{
504 		shader = nullptr;
505 		pixelShader = nullptr;
506 		vertexShader = nullptr;
507 
508 		if(shaderObject)
509 		{
510 			shader = shaderObject->getShader();
511 			pixelShader = shaderObject->getPixelShader();
512 			vertexShader = shaderObject->getVertexShader();
513 		}
514 
515 		functionArray.push_back(Function(0, "main(", nullptr, nullptr));
516 		currentFunction = 0;
517 		outputQualifier = EvqOutput;   // Initialize outputQualifier to any value other than EvqFragColor or EvqFragData
518 	}
519 
~OutputASM()520 	OutputASM::~OutputASM()
521 	{
522 	}
523 
output()524 	void OutputASM::output()
525 	{
526 		if(shader)
527 		{
528 			emitShader(GLOBAL);
529 
530 			if(functionArray.size() > 1)   // Only call main() when there are other functions
531 			{
532 				Instruction *callMain = emit(sw::Shader::OPCODE_CALL);
533 				callMain->dst.type = sw::Shader::PARAMETER_LABEL;
534 				callMain->dst.index = 0;   // main()
535 
536 				emit(sw::Shader::OPCODE_RET);
537 			}
538 
539 			emitShader(FUNCTION);
540 		}
541 	}
542 
emitShader(Scope scope)543 	void OutputASM::emitShader(Scope scope)
544 	{
545 		emitScope = scope;
546 		currentScope = GLOBAL;
547 		mContext.getTreeRoot()->traverse(this);
548 	}
549 
freeTemporary(Temporary * temporary)550 	void OutputASM::freeTemporary(Temporary *temporary)
551 	{
552 		free(temporaries, temporary);
553 	}
554 
getOpcode(sw::Shader::Opcode op,TIntermTyped * in) const555 	sw::Shader::Opcode OutputASM::getOpcode(sw::Shader::Opcode op, TIntermTyped *in) const
556 	{
557 		TBasicType baseType = in->getType().getBasicType();
558 
559 		switch(op)
560 		{
561 		case sw::Shader::OPCODE_NEG:
562 			switch(baseType)
563 			{
564 			case EbtInt:
565 			case EbtUInt:
566 				return sw::Shader::OPCODE_INEG;
567 			case EbtFloat:
568 			default:
569 				return op;
570 			}
571 		case sw::Shader::OPCODE_ABS:
572 			switch(baseType)
573 			{
574 			case EbtInt:
575 				return sw::Shader::OPCODE_IABS;
576 			case EbtFloat:
577 			default:
578 				return op;
579 			}
580 		case sw::Shader::OPCODE_SGN:
581 			switch(baseType)
582 			{
583 			case EbtInt:
584 				return sw::Shader::OPCODE_ISGN;
585 			case EbtFloat:
586 			default:
587 				return op;
588 			}
589 		case sw::Shader::OPCODE_ADD:
590 			switch(baseType)
591 			{
592 			case EbtInt:
593 			case EbtUInt:
594 				return sw::Shader::OPCODE_IADD;
595 			case EbtFloat:
596 			default:
597 				return op;
598 			}
599 		case sw::Shader::OPCODE_SUB:
600 			switch(baseType)
601 			{
602 			case EbtInt:
603 			case EbtUInt:
604 				return sw::Shader::OPCODE_ISUB;
605 			case EbtFloat:
606 			default:
607 				return op;
608 			}
609 		case sw::Shader::OPCODE_MUL:
610 			switch(baseType)
611 			{
612 			case EbtInt:
613 			case EbtUInt:
614 				return sw::Shader::OPCODE_IMUL;
615 			case EbtFloat:
616 			default:
617 				return op;
618 			}
619 		case sw::Shader::OPCODE_DIV:
620 			switch(baseType)
621 			{
622 			case EbtInt:
623 				return sw::Shader::OPCODE_IDIV;
624 			case EbtUInt:
625 				return sw::Shader::OPCODE_UDIV;
626 			case EbtFloat:
627 			default:
628 				return op;
629 			}
630 		case sw::Shader::OPCODE_IMOD:
631 			return baseType == EbtUInt ? sw::Shader::OPCODE_UMOD : op;
632 		case sw::Shader::OPCODE_ISHR:
633 			return baseType == EbtUInt ? sw::Shader::OPCODE_USHR : op;
634 		case sw::Shader::OPCODE_MIN:
635 			switch(baseType)
636 			{
637 			case EbtInt:
638 				return sw::Shader::OPCODE_IMIN;
639 			case EbtUInt:
640 				return sw::Shader::OPCODE_UMIN;
641 			case EbtFloat:
642 			default:
643 				return op;
644 			}
645 		case sw::Shader::OPCODE_MAX:
646 			switch(baseType)
647 			{
648 			case EbtInt:
649 				return sw::Shader::OPCODE_IMAX;
650 			case EbtUInt:
651 				return sw::Shader::OPCODE_UMAX;
652 			case EbtFloat:
653 			default:
654 				return op;
655 			}
656 		default:
657 			return op;
658 		}
659 	}
660 
visitSymbol(TIntermSymbol * symbol)661 	void OutputASM::visitSymbol(TIntermSymbol *symbol)
662 	{
663 		// The type of vertex outputs and fragment inputs with the same name must match (validated at link time),
664 		// so declare them but don't assign a register index yet (one will be assigned when referenced in reachable code).
665 		switch(symbol->getQualifier())
666 		{
667 		case EvqVaryingIn:
668 		case EvqVaryingOut:
669 		case EvqInvariantVaryingIn:
670 		case EvqInvariantVaryingOut:
671 		case EvqVertexOut:
672 		case EvqFragmentIn:
673 			if(symbol->getBasicType() != EbtInvariant)   // Typeless declarations are not new varyings
674 			{
675 				declareVarying(symbol, -1);
676 			}
677 			break;
678 		case EvqFragmentOut:
679 			declareFragmentOutput(symbol);
680 			break;
681 		default:
682 			break;
683 		}
684 
685 		TInterfaceBlock* block = symbol->getType().getInterfaceBlock();
686 		// OpenGL ES 3.0.4 spec, section 2.12.6 Uniform Variables:
687 		// "All members of a named uniform block declared with a shared or std140 layout qualifier
688 		// are considered active, even if they are not referenced in any shader in the program.
689 		// The uniform block itself is also considered active, even if no member of the block is referenced."
690 		if(block && ((block->blockStorage() == EbsShared) || (block->blockStorage() == EbsStd140)))
691 		{
692 			uniformRegister(symbol);
693 		}
694 	}
695 
visitBinary(Visit visit,TIntermBinary * node)696 	bool OutputASM::visitBinary(Visit visit, TIntermBinary *node)
697 	{
698 		if(currentScope != emitScope)
699 		{
700 			return false;
701 		}
702 
703 		TIntermTyped *result = node;
704 		TIntermTyped *left = node->getLeft();
705 		TIntermTyped *right = node->getRight();
706 		const TType &leftType = left->getType();
707 		const TType &rightType = right->getType();
708 
709 		if(isSamplerRegister(result))
710 		{
711 			return false;   // Don't traverse, the register index is determined statically
712 		}
713 
714 		switch(node->getOp())
715 		{
716 		case EOpAssign:
717 			assert(visit == PreVisit);
718 			right->traverse(this);
719 			assignLvalue(left, right);
720 			copy(result, right);
721 			return false;
722 		case EOpInitialize:
723 			assert(visit == PreVisit);
724 			// Constant arrays go into the constant register file.
725 			if(leftType.getQualifier() == EvqConstExpr && leftType.isArray() && leftType.getArraySize() > 1)
726 			{
727 				for(int i = 0; i < left->totalRegisterCount(); i++)
728 				{
729 					emit(sw::Shader::OPCODE_DEF, left, i, right, i);
730 				}
731 			}
732 			else
733 			{
734 				right->traverse(this);
735 				copy(left, right);
736 			}
737 			return false;
738 		case EOpMatrixTimesScalarAssign:
739 			assert(visit == PreVisit);
740 			right->traverse(this);
741 			for(int i = 0; i < leftType.getNominalSize(); i++)
742 			{
743 				emit(sw::Shader::OPCODE_MUL, result, i, left, i, right);
744 			}
745 
746 			assignLvalue(left, result);
747 			return false;
748 		case EOpVectorTimesMatrixAssign:
749 			assert(visit == PreVisit);
750 			{
751 				// The left operand may contain a swizzle serving double-duty as
752 				// swizzle and writemask, so it's important that we traverse it
753 				// first. Otherwise we may end up never setting up our left
754 				// operand correctly.
755 				left->traverse(this);
756 				right->traverse(this);
757 				int size = leftType.getNominalSize();
758 
759 				for(int i = 0; i < size; i++)
760 				{
761 					Instruction *dot = emit(sw::Shader::OPCODE_DP(size), result, 0, left, 0, right, i);
762 					dot->dst.mask = 1 << i;
763 				}
764 
765 				assignLvalue(left, result);
766 			}
767 			return false;
768 		case EOpMatrixTimesMatrixAssign:
769 			assert(visit == PreVisit);
770 			{
771 				right->traverse(this);
772 				int dim = leftType.getNominalSize();
773 
774 				for(int i = 0; i < dim; i++)
775 				{
776 					Instruction *mul = emit(sw::Shader::OPCODE_MUL, result, i, left, 0, right, i);
777 					mul->src[1].swizzle = 0x00;
778 
779 					for(int j = 1; j < dim; j++)
780 					{
781 						Instruction *mad = emit(sw::Shader::OPCODE_MAD, result, i, left, j, right, i, result, i);
782 						mad->src[1].swizzle = j * 0x55;
783 					}
784 				}
785 
786 				assignLvalue(left, result);
787 			}
788 			return false;
789 		case EOpIndexDirect:
790 		case EOpIndexIndirect:
791 		case EOpIndexDirectStruct:
792 		case EOpIndexDirectInterfaceBlock:
793 			assert(visit == PreVisit);
794 			evaluateRvalue(node);
795 			return false;
796 		case EOpVectorSwizzle:
797 			if(visit == PostVisit)
798 			{
799 				int swizzle = 0;
800 				TIntermAggregate *components = right->getAsAggregate();
801 
802 				if(components)
803 				{
804 					TIntermSequence &sequence = components->getSequence();
805 					int component = 0;
806 
807 					for(TIntermSequence::iterator sit = sequence.begin(); sit != sequence.end(); sit++)
808 					{
809 						TIntermConstantUnion *element = (*sit)->getAsConstantUnion();
810 
811 						if(element)
812 						{
813 							int i = element->getUnionArrayPointer()[0].getIConst();
814 							swizzle |= i << (component * 2);
815 							component++;
816 						}
817 						else UNREACHABLE(0);
818 					}
819 				}
820 				else UNREACHABLE(0);
821 
822 				Instruction *mov = emit(sw::Shader::OPCODE_MOV, result, left);
823 				mov->src[0].swizzle = swizzle;
824 			}
825 			break;
826 		case EOpAddAssign: if(visit == PostVisit) emitAssign(getOpcode(sw::Shader::OPCODE_ADD, result), result, left, left, right); break;
827 		case EOpAdd:       if(visit == PostVisit) emitBinary(getOpcode(sw::Shader::OPCODE_ADD, result), result, left, right);       break;
828 		case EOpSubAssign: if(visit == PostVisit) emitAssign(getOpcode(sw::Shader::OPCODE_SUB, result), result, left, left, right); break;
829 		case EOpSub:       if(visit == PostVisit) emitBinary(getOpcode(sw::Shader::OPCODE_SUB, result), result, left, right);       break;
830 		case EOpMulAssign: if(visit == PostVisit) emitAssign(getOpcode(sw::Shader::OPCODE_MUL, result), result, left, left, right); break;
831 		case EOpMul:       if(visit == PostVisit) emitBinary(getOpcode(sw::Shader::OPCODE_MUL, result), result, left, right);       break;
832 		case EOpDivAssign: if(visit == PostVisit) emitAssign(getOpcode(sw::Shader::OPCODE_DIV, result), result, left, left, right); break;
833 		case EOpDiv:       if(visit == PostVisit) emitBinary(getOpcode(sw::Shader::OPCODE_DIV, result), result, left, right);       break;
834 		case EOpIModAssign:          if(visit == PostVisit) emitAssign(getOpcode(sw::Shader::OPCODE_IMOD, result), result, left, left, right); break;
835 		case EOpIMod:                if(visit == PostVisit) emitBinary(getOpcode(sw::Shader::OPCODE_IMOD, result), result, left, right);       break;
836 		case EOpBitShiftLeftAssign:  if(visit == PostVisit) emitAssign(sw::Shader::OPCODE_SHL, result, left, left, right); break;
837 		case EOpBitShiftLeft:        if(visit == PostVisit) emitBinary(sw::Shader::OPCODE_SHL, result, left, right);       break;
838 		case EOpBitShiftRightAssign: if(visit == PostVisit) emitAssign(getOpcode(sw::Shader::OPCODE_ISHR, result), result, left, left, right); break;
839 		case EOpBitShiftRight:       if(visit == PostVisit) emitBinary(getOpcode(sw::Shader::OPCODE_ISHR, result), result, left, right);       break;
840 		case EOpBitwiseAndAssign:    if(visit == PostVisit) emitAssign(sw::Shader::OPCODE_AND, result, left, left, right); break;
841 		case EOpBitwiseAnd:          if(visit == PostVisit) emitBinary(sw::Shader::OPCODE_AND, result, left, right);       break;
842 		case EOpBitwiseXorAssign:    if(visit == PostVisit) emitAssign(sw::Shader::OPCODE_XOR, result, left, left, right); break;
843 		case EOpBitwiseXor:          if(visit == PostVisit) emitBinary(sw::Shader::OPCODE_XOR, result, left, right);       break;
844 		case EOpBitwiseOrAssign:     if(visit == PostVisit) emitAssign(sw::Shader::OPCODE_OR, result, left, left, right);  break;
845 		case EOpBitwiseOr:           if(visit == PostVisit) emitBinary(sw::Shader::OPCODE_OR, result, left, right);        break;
846 		case EOpEqual:
847 			if(visit == PostVisit)
848 			{
849 				emitBinary(sw::Shader::OPCODE_EQ, result, left, right);
850 
851 				for(int index = 1; index < left->totalRegisterCount(); index++)
852 				{
853 					Temporary equal(this);
854 					emit(sw::Shader::OPCODE_EQ, &equal, 0, left, index, right, index);
855 					emit(sw::Shader::OPCODE_AND, result, result, &equal);
856 				}
857 			}
858 			break;
859 		case EOpNotEqual:
860 			if(visit == PostVisit)
861 			{
862 				emitBinary(sw::Shader::OPCODE_NE, result, left, right);
863 
864 				for(int index = 1; index < left->totalRegisterCount(); index++)
865 				{
866 					Temporary notEqual(this);
867 					emit(sw::Shader::OPCODE_NE, &notEqual, 0, left, index, right, index);
868 					emit(sw::Shader::OPCODE_OR, result, result, &notEqual);
869 				}
870 			}
871 			break;
872 		case EOpLessThan:                if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_LT, result, left, right); break;
873 		case EOpGreaterThan:             if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_GT, result, left, right); break;
874 		case EOpLessThanEqual:           if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_LE, result, left, right); break;
875 		case EOpGreaterThanEqual:        if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_GE, result, left, right); break;
876 		case EOpVectorTimesScalarAssign: if(visit == PostVisit) emitAssign(getOpcode(sw::Shader::OPCODE_MUL, left), result, left, left, right); break;
877 		case EOpVectorTimesScalar:       if(visit == PostVisit) emit(getOpcode(sw::Shader::OPCODE_MUL, left), result, left, right); break;
878 		case EOpMatrixTimesScalar:
879 			if(visit == PostVisit)
880 			{
881 				if(left->isMatrix())
882 				{
883 					for(int i = 0; i < leftType.getNominalSize(); i++)
884 					{
885 						emit(sw::Shader::OPCODE_MUL, result, i, left, i, right, 0);
886 					}
887 				}
888 				else if(right->isMatrix())
889 				{
890 					for(int i = 0; i < rightType.getNominalSize(); i++)
891 					{
892 						emit(sw::Shader::OPCODE_MUL, result, i, left, 0, right, i);
893 					}
894 				}
895 				else UNREACHABLE(0);
896 			}
897 			break;
898 		case EOpVectorTimesMatrix:
899 			if(visit == PostVisit)
900 			{
901 				sw::Shader::Opcode dpOpcode = sw::Shader::OPCODE_DP(leftType.getNominalSize());
902 
903 				int size = rightType.getNominalSize();
904 				for(int i = 0; i < size; i++)
905 				{
906 					Instruction *dot = emit(dpOpcode, result, 0, left, 0, right, i);
907 					dot->dst.mask = 1 << i;
908 				}
909 			}
910 			break;
911 		case EOpMatrixTimesVector:
912 			if(visit == PostVisit)
913 			{
914 				Instruction *mul = emit(sw::Shader::OPCODE_MUL, result, left, right);
915 				mul->src[1].swizzle = 0x00;
916 
917 				int size = rightType.getNominalSize();
918 				for(int i = 1; i < size; i++)
919 				{
920 					Instruction *mad = emit(sw::Shader::OPCODE_MAD, result, 0, left, i, right, 0, result);
921 					mad->src[1].swizzle = i * 0x55;
922 				}
923 			}
924 			break;
925 		case EOpMatrixTimesMatrix:
926 			if(visit == PostVisit)
927 			{
928 				int dim = leftType.getNominalSize();
929 
930 				int size = rightType.getNominalSize();
931 				for(int i = 0; i < size; i++)
932 				{
933 					Instruction *mul = emit(sw::Shader::OPCODE_MUL, result, i, left, 0, right, i);
934 					mul->src[1].swizzle = 0x00;
935 
936 					for(int j = 1; j < dim; j++)
937 					{
938 						Instruction *mad = emit(sw::Shader::OPCODE_MAD, result, i, left, j, right, i, result, i);
939 						mad->src[1].swizzle = j * 0x55;
940 					}
941 				}
942 			}
943 			break;
944 		case EOpLogicalOr:
945 			if(trivial(right, 6))
946 			{
947 				if(visit == PostVisit)
948 				{
949 					emit(sw::Shader::OPCODE_OR, result, left, right);
950 				}
951 			}
952 			else   // Short-circuit evaluation
953 			{
954 				if(visit == InVisit)
955 				{
956 					emit(sw::Shader::OPCODE_MOV, result, left);
957 					Instruction *ifnot = emit(sw::Shader::OPCODE_IF, 0, result);
958 					ifnot->src[0].modifier = sw::Shader::MODIFIER_NOT;
959 				}
960 				else if(visit == PostVisit)
961 				{
962 					emit(sw::Shader::OPCODE_MOV, result, right);
963 					emit(sw::Shader::OPCODE_ENDIF);
964 				}
965 			}
966 			break;
967 		case EOpLogicalXor:        if(visit == PostVisit) emit(sw::Shader::OPCODE_XOR, result, left, right); break;
968 		case EOpLogicalAnd:
969 			if(trivial(right, 6))
970 			{
971 				if(visit == PostVisit)
972 				{
973 					emit(sw::Shader::OPCODE_AND, result, left, right);
974 				}
975 			}
976 			else   // Short-circuit evaluation
977 			{
978 				if(visit == InVisit)
979 				{
980 					emit(sw::Shader::OPCODE_MOV, result, left);
981 					emit(sw::Shader::OPCODE_IF, 0, result);
982 				}
983 				else if(visit == PostVisit)
984 				{
985 					emit(sw::Shader::OPCODE_MOV, result, right);
986 					emit(sw::Shader::OPCODE_ENDIF);
987 				}
988 			}
989 			break;
990 		default: UNREACHABLE(node->getOp());
991 		}
992 
993 		return true;
994 	}
995 
emitDeterminant(TIntermTyped * result,TIntermTyped * arg,int size,int col,int row,int outCol,int outRow)996 	void OutputASM::emitDeterminant(TIntermTyped *result, TIntermTyped *arg, int size, int col, int row, int outCol, int outRow)
997 	{
998 		switch(size)
999 		{
1000 		case 1: // Used for cofactor computation only
1001 			{
1002 				// For a 2x2 matrix, the cofactor is simply a transposed move or negate
1003 				bool isMov = (row == col);
1004 				sw::Shader::Opcode op = isMov ? sw::Shader::OPCODE_MOV : sw::Shader::OPCODE_NEG;
1005 				Instruction *mov = emit(op, result, outCol, arg, isMov ? 1 - row : row);
1006 				mov->src[0].swizzle = 0x55 * (isMov ? 1 - col : col);
1007 				mov->dst.mask = 1 << outRow;
1008 			}
1009 			break;
1010 		case 2:
1011 			{
1012 				static const unsigned int swizzle[3] = { 0x99, 0x88, 0x44 }; // xy?? : yzyz, xzxz, xyxy
1013 
1014 				bool isCofactor = (col >= 0) && (row >= 0);
1015 				int col0 = (isCofactor && (col <= 0)) ? 1 : 0;
1016 				int col1 = (isCofactor && (col <= 1)) ? 2 : 1;
1017 				bool negate = isCofactor && ((col & 0x01) ^ (row & 0x01));
1018 
1019 				Instruction *det = emit(sw::Shader::OPCODE_DET2, result, outCol, arg, negate ? col1 : col0, arg, negate ? col0 : col1);
1020 				det->src[0].swizzle = det->src[1].swizzle = swizzle[isCofactor ? row : 2];
1021 				det->dst.mask = 1 << outRow;
1022 			}
1023 			break;
1024 		case 3:
1025 			{
1026 				static const unsigned int swizzle[4] = { 0xF9, 0xF8, 0xF4, 0xE4 }; // xyz? : yzww, xzww, xyww, xyzw
1027 
1028 				bool isCofactor = (col >= 0) && (row >= 0);
1029 				int col0 = (isCofactor && (col <= 0)) ? 1 : 0;
1030 				int col1 = (isCofactor && (col <= 1)) ? 2 : 1;
1031 				int col2 = (isCofactor && (col <= 2)) ? 3 : 2;
1032 				bool negate = isCofactor && ((col & 0x01) ^ (row & 0x01));
1033 
1034 				Instruction *det = emit(sw::Shader::OPCODE_DET3, result, outCol, arg, col0, arg, negate ? col2 : col1, arg, negate ? col1 : col2);
1035 				det->src[0].swizzle = det->src[1].swizzle = det->src[2].swizzle = swizzle[isCofactor ? row : 3];
1036 				det->dst.mask = 1 << outRow;
1037 			}
1038 			break;
1039 		case 4:
1040 			{
1041 				Instruction *det = emit(sw::Shader::OPCODE_DET4, result, outCol, arg, 0, arg, 1, arg, 2, arg, 3);
1042 				det->dst.mask = 1 << outRow;
1043 			}
1044 			break;
1045 		default:
1046 			UNREACHABLE(size);
1047 			break;
1048 		}
1049 	}
1050 
visitUnary(Visit visit,TIntermUnary * node)1051 	bool OutputASM::visitUnary(Visit visit, TIntermUnary *node)
1052 	{
1053 		if(currentScope != emitScope)
1054 		{
1055 			return false;
1056 		}
1057 
1058 		TIntermTyped *result = node;
1059 		TIntermTyped *arg = node->getOperand();
1060 		TBasicType basicType = arg->getType().getBasicType();
1061 
1062 		union
1063 		{
1064 			float f;
1065 			int i;
1066 		} one_value;
1067 
1068 		if(basicType == EbtInt || basicType == EbtUInt)
1069 		{
1070 			one_value.i = 1;
1071 		}
1072 		else
1073 		{
1074 			one_value.f = 1.0f;
1075 		}
1076 
1077 		Constant one(one_value.f, one_value.f, one_value.f, one_value.f);
1078 		Constant rad(1.74532925e-2f, 1.74532925e-2f, 1.74532925e-2f, 1.74532925e-2f);
1079 		Constant deg(5.72957795e+1f, 5.72957795e+1f, 5.72957795e+1f, 5.72957795e+1f);
1080 
1081 		switch(node->getOp())
1082 		{
1083 		case EOpNegative:
1084 			if(visit == PostVisit)
1085 			{
1086 				sw::Shader::Opcode negOpcode = getOpcode(sw::Shader::OPCODE_NEG, arg);
1087 				for(int index = 0; index < arg->totalRegisterCount(); index++)
1088 				{
1089 					emit(negOpcode, result, index, arg, index);
1090 				}
1091 			}
1092 			break;
1093 		case EOpVectorLogicalNot: if(visit == PostVisit) emit(sw::Shader::OPCODE_NOT, result, arg); break;
1094 		case EOpLogicalNot:       if(visit == PostVisit) emit(sw::Shader::OPCODE_NOT, result, arg); break;
1095 		case EOpBitwiseNot:       if(visit == PostVisit) emit(sw::Shader::OPCODE_NOT, result, arg); break;
1096 		case EOpPostIncrement:
1097 			if(visit == PostVisit)
1098 			{
1099 				copy(result, arg);
1100 
1101 				sw::Shader::Opcode addOpcode = getOpcode(sw::Shader::OPCODE_ADD, arg);
1102 				for(int index = 0; index < arg->totalRegisterCount(); index++)
1103 				{
1104 					emit(addOpcode, arg, index, arg, index, &one);
1105 				}
1106 
1107 				assignLvalue(arg, arg);
1108 			}
1109 			break;
1110 		case EOpPostDecrement:
1111 			if(visit == PostVisit)
1112 			{
1113 				copy(result, arg);
1114 
1115 				sw::Shader::Opcode subOpcode = getOpcode(sw::Shader::OPCODE_SUB, arg);
1116 				for(int index = 0; index < arg->totalRegisterCount(); index++)
1117 				{
1118 					emit(subOpcode, arg, index, arg, index, &one);
1119 				}
1120 
1121 				assignLvalue(arg, arg);
1122 			}
1123 			break;
1124 		case EOpPreIncrement:
1125 			if(visit == PostVisit)
1126 			{
1127 				sw::Shader::Opcode addOpcode = getOpcode(sw::Shader::OPCODE_ADD, arg);
1128 				for(int index = 0; index < arg->totalRegisterCount(); index++)
1129 				{
1130 					emit(addOpcode, result, index, arg, index, &one);
1131 				}
1132 
1133 				assignLvalue(arg, result);
1134 			}
1135 			break;
1136 		case EOpPreDecrement:
1137 			if(visit == PostVisit)
1138 			{
1139 				sw::Shader::Opcode subOpcode = getOpcode(sw::Shader::OPCODE_SUB, arg);
1140 				for(int index = 0; index < arg->totalRegisterCount(); index++)
1141 				{
1142 					emit(subOpcode, result, index, arg, index, &one);
1143 				}
1144 
1145 				assignLvalue(arg, result);
1146 			}
1147 			break;
1148 		case EOpRadians:          if(visit == PostVisit) emit(sw::Shader::OPCODE_MUL, result, arg, &rad); break;
1149 		case EOpDegrees:          if(visit == PostVisit) emit(sw::Shader::OPCODE_MUL, result, arg, &deg); break;
1150 		case EOpSin:              if(visit == PostVisit) emit(sw::Shader::OPCODE_SIN, result, arg); break;
1151 		case EOpCos:              if(visit == PostVisit) emit(sw::Shader::OPCODE_COS, result, arg); break;
1152 		case EOpTan:              if(visit == PostVisit) emit(sw::Shader::OPCODE_TAN, result, arg); break;
1153 		case EOpAsin:             if(visit == PostVisit) emit(sw::Shader::OPCODE_ASIN, result, arg); break;
1154 		case EOpAcos:             if(visit == PostVisit) emit(sw::Shader::OPCODE_ACOS, result, arg); break;
1155 		case EOpAtan:             if(visit == PostVisit) emit(sw::Shader::OPCODE_ATAN, result, arg); break;
1156 		case EOpSinh:             if(visit == PostVisit) emit(sw::Shader::OPCODE_SINH, result, arg); break;
1157 		case EOpCosh:             if(visit == PostVisit) emit(sw::Shader::OPCODE_COSH, result, arg); break;
1158 		case EOpTanh:             if(visit == PostVisit) emit(sw::Shader::OPCODE_TANH, result, arg); break;
1159 		case EOpAsinh:            if(visit == PostVisit) emit(sw::Shader::OPCODE_ASINH, result, arg); break;
1160 		case EOpAcosh:            if(visit == PostVisit) emit(sw::Shader::OPCODE_ACOSH, result, arg); break;
1161 		case EOpAtanh:            if(visit == PostVisit) emit(sw::Shader::OPCODE_ATANH, result, arg); break;
1162 		case EOpExp:              if(visit == PostVisit) emit(sw::Shader::OPCODE_EXP, result, arg); break;
1163 		case EOpLog:              if(visit == PostVisit) emit(sw::Shader::OPCODE_LOG, result, arg); break;
1164 		case EOpExp2:             if(visit == PostVisit) emit(sw::Shader::OPCODE_EXP2, result, arg); break;
1165 		case EOpLog2:             if(visit == PostVisit) emit(sw::Shader::OPCODE_LOG2, result, arg); break;
1166 		case EOpSqrt:             if(visit == PostVisit) emit(sw::Shader::OPCODE_SQRT, result, arg); break;
1167 		case EOpInverseSqrt:      if(visit == PostVisit) emit(sw::Shader::OPCODE_RSQ, result, arg); break;
1168 		case EOpAbs:              if(visit == PostVisit) emit(getOpcode(sw::Shader::OPCODE_ABS, result), result, arg); break;
1169 		case EOpSign:             if(visit == PostVisit) emit(getOpcode(sw::Shader::OPCODE_SGN, result), result, arg); break;
1170 		case EOpFloor:            if(visit == PostVisit) emit(sw::Shader::OPCODE_FLOOR, result, arg); break;
1171 		case EOpTrunc:            if(visit == PostVisit) emit(sw::Shader::OPCODE_TRUNC, result, arg); break;
1172 		case EOpRound:            if(visit == PostVisit) emit(sw::Shader::OPCODE_ROUND, result, arg); break;
1173 		case EOpRoundEven:        if(visit == PostVisit) emit(sw::Shader::OPCODE_ROUNDEVEN, result, arg); break;
1174 		case EOpCeil:             if(visit == PostVisit) emit(sw::Shader::OPCODE_CEIL, result, arg, result); break;
1175 		case EOpFract:            if(visit == PostVisit) emit(sw::Shader::OPCODE_FRC, result, arg); break;
1176 		case EOpIsNan:            if(visit == PostVisit) emit(sw::Shader::OPCODE_ISNAN, result, arg); break;
1177 		case EOpIsInf:            if(visit == PostVisit) emit(sw::Shader::OPCODE_ISINF, result, arg); break;
1178 		case EOpLength:           if(visit == PostVisit) emit(sw::Shader::OPCODE_LEN(dim(arg)), result, arg); break;
1179 		case EOpNormalize:        if(visit == PostVisit) emit(sw::Shader::OPCODE_NRM(dim(arg)), result, arg); break;
1180 		case EOpDFdx:             if(visit == PostVisit) emit(sw::Shader::OPCODE_DFDX, result, arg); break;
1181 		case EOpDFdy:             if(visit == PostVisit) emit(sw::Shader::OPCODE_DFDY, result, arg); break;
1182 		case EOpFwidth:           if(visit == PostVisit) emit(sw::Shader::OPCODE_FWIDTH, result, arg); break;
1183 		case EOpAny:              if(visit == PostVisit) emit(sw::Shader::OPCODE_ANY, result, arg); break;
1184 		case EOpAll:              if(visit == PostVisit) emit(sw::Shader::OPCODE_ALL, result, arg); break;
1185 		case EOpFloatBitsToInt:   if(visit == PostVisit) emit(sw::Shader::OPCODE_FLOATBITSTOINT, result, arg); break;
1186 		case EOpFloatBitsToUint:  if(visit == PostVisit) emit(sw::Shader::OPCODE_FLOATBITSTOUINT, result, arg); break;
1187 		case EOpIntBitsToFloat:   if(visit == PostVisit) emit(sw::Shader::OPCODE_INTBITSTOFLOAT, result, arg); break;
1188 		case EOpUintBitsToFloat:  if(visit == PostVisit) emit(sw::Shader::OPCODE_UINTBITSTOFLOAT, result, arg); break;
1189 		case EOpPackSnorm2x16:    if(visit == PostVisit) emit(sw::Shader::OPCODE_PACKSNORM2x16, result, arg); break;
1190 		case EOpPackUnorm2x16:    if(visit == PostVisit) emit(sw::Shader::OPCODE_PACKUNORM2x16, result, arg); break;
1191 		case EOpPackHalf2x16:     if(visit == PostVisit) emit(sw::Shader::OPCODE_PACKHALF2x16, result, arg); break;
1192 		case EOpUnpackSnorm2x16:  if(visit == PostVisit) emit(sw::Shader::OPCODE_UNPACKSNORM2x16, result, arg); break;
1193 		case EOpUnpackUnorm2x16:  if(visit == PostVisit) emit(sw::Shader::OPCODE_UNPACKUNORM2x16, result, arg); break;
1194 		case EOpUnpackHalf2x16:   if(visit == PostVisit) emit(sw::Shader::OPCODE_UNPACKHALF2x16, result, arg); break;
1195 		case EOpTranspose:
1196 			if(visit == PostVisit)
1197 			{
1198 				int numCols = arg->getNominalSize();
1199 				int numRows = arg->getSecondarySize();
1200 				for(int i = 0; i < numCols; ++i)
1201 				{
1202 					for(int j = 0; j < numRows; ++j)
1203 					{
1204 						Instruction *mov = emit(sw::Shader::OPCODE_MOV, result, j, arg, i);
1205 						mov->src[0].swizzle = 0x55 * j;
1206 						mov->dst.mask = 1 << i;
1207 					}
1208 				}
1209 			}
1210 			break;
1211 		case EOpDeterminant:
1212 			if(visit == PostVisit)
1213 			{
1214 				int size = arg->getNominalSize();
1215 				ASSERT(size == arg->getSecondarySize());
1216 
1217 				emitDeterminant(result, arg, size);
1218 			}
1219 			break;
1220 		case EOpInverse:
1221 			if(visit == PostVisit)
1222 			{
1223 				int size = arg->getNominalSize();
1224 				ASSERT(size == arg->getSecondarySize());
1225 
1226 				// Compute transposed matrix of cofactors
1227 				for(int i = 0; i < size; ++i)
1228 				{
1229 					for(int j = 0; j < size; ++j)
1230 					{
1231 						// For a 2x2 matrix, the cofactor is simply a transposed move or negate
1232 						// For a 3x3 or 4x4 matrix, the cofactor is a transposed determinant
1233 						emitDeterminant(result, arg, size - 1, j, i, i, j);
1234 					}
1235 				}
1236 
1237 				// Compute 1 / determinant
1238 				Temporary invDet(this);
1239 				emitDeterminant(&invDet, arg, size);
1240 				Constant one(1.0f, 1.0f, 1.0f, 1.0f);
1241 				Instruction *div = emit(sw::Shader::OPCODE_DIV, &invDet, &one, &invDet);
1242 				div->src[1].swizzle = 0x00; // xxxx
1243 
1244 				// Divide transposed matrix of cofactors by determinant
1245 				for(int i = 0; i < size; ++i)
1246 				{
1247 					emit(sw::Shader::OPCODE_MUL, result, i, result, i, &invDet);
1248 				}
1249 			}
1250 			break;
1251 		default: UNREACHABLE(node->getOp());
1252 		}
1253 
1254 		return true;
1255 	}
1256 
visitAggregate(Visit visit,TIntermAggregate * node)1257 	bool OutputASM::visitAggregate(Visit visit, TIntermAggregate *node)
1258 	{
1259 		if(currentScope != emitScope && node->getOp() != EOpFunction && node->getOp() != EOpSequence)
1260 		{
1261 			return false;
1262 		}
1263 
1264 		Constant zero(0.0f, 0.0f, 0.0f, 0.0f);
1265 
1266 		TIntermTyped *result = node;
1267 		const TType &resultType = node->getType();
1268 		TIntermSequence &arg = node->getSequence();
1269 		int argumentCount = static_cast<int>(arg.size());
1270 
1271 		switch(node->getOp())
1272 		{
1273 		case EOpSequence:             break;
1274 		case EOpDeclaration:          break;
1275 		case EOpInvariantDeclaration: break;
1276 		case EOpPrototype:            break;
1277 		case EOpComma:
1278 			if(visit == PostVisit)
1279 			{
1280 				copy(result, arg[1]);
1281 			}
1282 			break;
1283 		case EOpFunction:
1284 			if(visit == PreVisit)
1285 			{
1286 				const TString &name = node->getName();
1287 
1288 				if(emitScope == FUNCTION)
1289 				{
1290 					if(functionArray.size() > 1)   // No need for a label when there's only main()
1291 					{
1292 						Instruction *label = emit(sw::Shader::OPCODE_LABEL);
1293 						label->dst.type = sw::Shader::PARAMETER_LABEL;
1294 
1295 						const Function *function = findFunction(name);
1296 						ASSERT(function);   // Should have been added during global pass
1297 						label->dst.index = function->label;
1298 						currentFunction = function->label;
1299 					}
1300 				}
1301 				else if(emitScope == GLOBAL)
1302 				{
1303 					if(name != "main(")
1304 					{
1305 						TIntermSequence &arguments = node->getSequence()[0]->getAsAggregate()->getSequence();
1306 						functionArray.push_back(Function(functionArray.size(), name, &arguments, node));
1307 					}
1308 				}
1309 				else UNREACHABLE(emitScope);
1310 
1311 				currentScope = FUNCTION;
1312 			}
1313 			else if(visit == PostVisit)
1314 			{
1315 				if(emitScope == FUNCTION)
1316 				{
1317 					if(functionArray.size() > 1)   // No need to return when there's only main()
1318 					{
1319 						emit(sw::Shader::OPCODE_RET);
1320 					}
1321 				}
1322 
1323 				currentScope = GLOBAL;
1324 			}
1325 			break;
1326 		case EOpFunctionCall:
1327 			if(visit == PostVisit)
1328 			{
1329 				if(node->isUserDefined())
1330 				{
1331 					const TString &name = node->getName();
1332 					const Function *function = findFunction(name);
1333 
1334 					if(!function)
1335 					{
1336 						mContext.error(node->getLine(), "function definition not found", name.c_str());
1337 						return false;
1338 					}
1339 
1340 					TIntermSequence &arguments = *function->arg;
1341 
1342 					for(int i = 0; i < argumentCount; i++)
1343 					{
1344 						TIntermTyped *in = arguments[i]->getAsTyped();
1345 
1346 						if(in->getQualifier() == EvqIn ||
1347 						   in->getQualifier() == EvqInOut ||
1348 						   in->getQualifier() == EvqConstReadOnly)
1349 						{
1350 							copy(in, arg[i]);
1351 						}
1352 					}
1353 
1354 					Instruction *call = emit(sw::Shader::OPCODE_CALL);
1355 					call->dst.type = sw::Shader::PARAMETER_LABEL;
1356 					call->dst.index = function->label;
1357 
1358 					if(function->ret && function->ret->getType().getBasicType() != EbtVoid)
1359 					{
1360 						copy(result, function->ret);
1361 					}
1362 
1363 					for(int i = 0; i < argumentCount; i++)
1364 					{
1365 						TIntermTyped *argument = arguments[i]->getAsTyped();
1366 						TIntermTyped *out = arg[i]->getAsTyped();
1367 
1368 						if(argument->getQualifier() == EvqOut ||
1369 						   argument->getQualifier() == EvqInOut)
1370 						{
1371 							assignLvalue(out, argument);
1372 						}
1373 					}
1374 				}
1375 				else
1376 				{
1377 					const TextureFunction textureFunction(node->getName());
1378 					TIntermTyped *s = arg[0]->getAsTyped();
1379 					TIntermTyped *t = arg[1]->getAsTyped();
1380 
1381 					Temporary coord(this);
1382 
1383 					if(textureFunction.proj)
1384 					{
1385 						Instruction *rcp = emit(sw::Shader::OPCODE_RCPX, &coord, arg[1]);
1386 						rcp->src[0].swizzle = 0x55 * (t->getNominalSize() - 1);
1387 						rcp->dst.mask = 0x7;
1388 
1389 						Instruction *mul = emit(sw::Shader::OPCODE_MUL, &coord, arg[1], &coord);
1390 						mul->dst.mask = 0x7;
1391 
1392 						if(IsShadowSampler(s->getBasicType()))
1393 						{
1394 							ASSERT(s->getBasicType() == EbtSampler2DShadow);
1395 							Instruction *mov = emit(sw::Shader::OPCODE_MOV, &coord, &coord);
1396 							mov->src[0].swizzle = 0xA4;
1397 						}
1398 					}
1399 					else
1400 					{
1401 						Instruction *mov = emit(sw::Shader::OPCODE_MOV, &coord, arg[1]);
1402 
1403 						if(IsShadowSampler(s->getBasicType()) && t->getNominalSize() == 3)
1404 						{
1405 							ASSERT(s->getBasicType() == EbtSampler2DShadow);
1406 							mov->src[0].swizzle = 0xA4;
1407 						}
1408 					}
1409 
1410 					switch(textureFunction.method)
1411 					{
1412 					case TextureFunction::IMPLICIT:
1413 						if(!textureFunction.offset)
1414 						{
1415 							if(argumentCount == 2)
1416 							{
1417 								emit(sw::Shader::OPCODE_TEX, result, &coord, s);
1418 							}
1419 							else if(argumentCount == 3)   // Bias
1420 							{
1421 								emit(sw::Shader::OPCODE_TEXBIAS, result, &coord, s, arg[2]);
1422 							}
1423 							else UNREACHABLE(argumentCount);
1424 						}
1425 						else   // Offset
1426 						{
1427 							if(argumentCount == 3)
1428 							{
1429 								emit(sw::Shader::OPCODE_TEXOFFSET, result, &coord, s, arg[2]);
1430 							}
1431 							else if(argumentCount == 4)   // Bias
1432 							{
1433 								emit(sw::Shader::OPCODE_TEXOFFSETBIAS, result, &coord, s, arg[2], arg[3]);
1434 							}
1435 							else UNREACHABLE(argumentCount);
1436 						}
1437 						break;
1438 					case TextureFunction::LOD:
1439 						if(!textureFunction.offset && argumentCount == 3)
1440 						{
1441 							emit(sw::Shader::OPCODE_TEXLOD, result, &coord, s, arg[2]);
1442 						}
1443 						else if(argumentCount == 4)   // Offset
1444 						{
1445 							emit(sw::Shader::OPCODE_TEXLODOFFSET, result, &coord, s, arg[3], arg[2]);
1446 						}
1447 						else UNREACHABLE(argumentCount);
1448 						break;
1449 					case TextureFunction::FETCH:
1450 						if(!textureFunction.offset && argumentCount == 3)
1451 						{
1452 							emit(sw::Shader::OPCODE_TEXELFETCH, result, &coord, s, arg[2]);
1453 						}
1454 						else if(argumentCount == 4)   // Offset
1455 						{
1456 							emit(sw::Shader::OPCODE_TEXELFETCHOFFSET, result, &coord, s, arg[3], arg[2]);
1457 						}
1458 						else UNREACHABLE(argumentCount);
1459 						break;
1460 					case TextureFunction::GRAD:
1461 						if(!textureFunction.offset && argumentCount == 4)
1462 						{
1463 							emit(sw::Shader::OPCODE_TEXGRAD, result, &coord, s, arg[2], arg[3]);
1464 						}
1465 						else if(argumentCount == 5)   // Offset
1466 						{
1467 							emit(sw::Shader::OPCODE_TEXGRADOFFSET, result, &coord, s, arg[2], arg[3], arg[4]);
1468 						}
1469 						else UNREACHABLE(argumentCount);
1470 						break;
1471 					case TextureFunction::SIZE:
1472 						emit(sw::Shader::OPCODE_TEXSIZE, result, arg[1], s);
1473 						break;
1474 					default:
1475 						UNREACHABLE(textureFunction.method);
1476 					}
1477 				}
1478 			}
1479 			break;
1480 		case EOpParameters:
1481 			break;
1482 		case EOpConstructFloat:
1483 		case EOpConstructVec2:
1484 		case EOpConstructVec3:
1485 		case EOpConstructVec4:
1486 		case EOpConstructBool:
1487 		case EOpConstructBVec2:
1488 		case EOpConstructBVec3:
1489 		case EOpConstructBVec4:
1490 		case EOpConstructInt:
1491 		case EOpConstructIVec2:
1492 		case EOpConstructIVec3:
1493 		case EOpConstructIVec4:
1494 		case EOpConstructUInt:
1495 		case EOpConstructUVec2:
1496 		case EOpConstructUVec3:
1497 		case EOpConstructUVec4:
1498 			if(visit == PostVisit)
1499 			{
1500 				int component = 0;
1501 				int arrayMaxIndex = result->isArray() ? result->getArraySize() - 1 : 0;
1502 				int arrayComponents = result->getType().getElementSize();
1503 				for(int i = 0; i < argumentCount; i++)
1504 				{
1505 					TIntermTyped *argi = arg[i]->getAsTyped();
1506 					int size = argi->getNominalSize();
1507 					int arrayIndex = std::min(component / arrayComponents, arrayMaxIndex);
1508 					int swizzle = component - (arrayIndex * arrayComponents);
1509 
1510 					if(!argi->isMatrix())
1511 					{
1512 						Instruction *mov = emitCast(result, arrayIndex, argi, 0);
1513 						mov->dst.mask = (0xF << swizzle) & 0xF;
1514 						mov->src[0].swizzle = readSwizzle(argi, size) << (swizzle * 2);
1515 
1516 						component += size;
1517 					}
1518 					else if(!result->isMatrix()) // Construct a non matrix from a matrix
1519 					{
1520 						Instruction *mov = emitCast(result, arrayIndex, argi, 0);
1521 						mov->dst.mask = (0xF << swizzle) & 0xF;
1522 						mov->src[0].swizzle = readSwizzle(argi, size) << (swizzle * 2);
1523 
1524 						// At most one more instruction when constructing a vec3 from a mat2 or a vec4 from a mat2/mat3
1525 						if(result->getNominalSize() > size)
1526 						{
1527 							Instruction *mov = emitCast(result, arrayIndex, argi, 1);
1528 							mov->dst.mask = (0xF << (swizzle + size)) & 0xF;
1529 							// mat2: xxxy (0x40), mat3: xxxx (0x00)
1530 							mov->src[0].swizzle = ((size == 2) ? 0x40 : 0x00) << (swizzle * 2);
1531 						}
1532 
1533 						component += size;
1534 					}
1535 					else   // Matrix
1536 					{
1537 						int column = 0;
1538 
1539 						while(component < resultType.getNominalSize())
1540 						{
1541 							Instruction *mov = emitCast(result, arrayIndex, argi, column);
1542 							mov->dst.mask = (0xF << swizzle) & 0xF;
1543 							mov->src[0].swizzle = readSwizzle(argi, size) << (swizzle * 2);
1544 
1545 							column++;
1546 							component += size;
1547 						}
1548 					}
1549 				}
1550 			}
1551 			break;
1552 		case EOpConstructMat2:
1553 		case EOpConstructMat2x3:
1554 		case EOpConstructMat2x4:
1555 		case EOpConstructMat3x2:
1556 		case EOpConstructMat3:
1557 		case EOpConstructMat3x4:
1558 		case EOpConstructMat4x2:
1559 		case EOpConstructMat4x3:
1560 		case EOpConstructMat4:
1561 			if(visit == PostVisit)
1562 			{
1563 				TIntermTyped *arg0 = arg[0]->getAsTyped();
1564 				const int outCols = result->getNominalSize();
1565 				const int outRows = result->getSecondarySize();
1566 
1567 				if(arg0->isScalar() && arg.size() == 1)   // Construct scale matrix
1568 				{
1569 					for(int i = 0; i < outCols; i++)
1570 					{
1571 						emit(sw::Shader::OPCODE_MOV, result, i, &zero);
1572 						if (i < outRows)
1573 						{
1574 							// Insert the scalar value on the main diagonal.
1575 							// For non-square matrices, Avoid emitting in
1576 							// a column which doesn't /have/ a main diagonal
1577 							// element, even though it would be fairly benign --
1578 							// it's not necessarily trivial for downstream
1579 							// passes to see that this is redundant and strip it
1580 							// out.
1581 							Instruction *mov = emitCast(result, i, arg0, 0);
1582 							mov->dst.mask = 1 << i;
1583 							ASSERT(mov->src[0].swizzle == 0x00);
1584 						}
1585 					}
1586 				}
1587 				else if(arg0->isMatrix())
1588 				{
1589 					int arraySize = result->isArray() ? result->getArraySize() : 1;
1590 
1591 					for(int n = 0; n < arraySize; n++)
1592 					{
1593 						TIntermTyped *argi = arg[n]->getAsTyped();
1594 						const int inCols = argi->getNominalSize();
1595 						const int inRows = argi->getSecondarySize();
1596 
1597 						for(int i = 0; i < outCols; i++)
1598 						{
1599 							if(i >= inCols || outRows > inRows)
1600 							{
1601 								// Initialize to identity matrix
1602 								Constant col((i == 0 ? 1.0f : 0.0f), (i == 1 ? 1.0f : 0.0f), (i == 2 ? 1.0f : 0.0f), (i == 3 ? 1.0f : 0.0f));
1603 								emitCast(result, i + n * outCols, &col, 0);
1604 							}
1605 
1606 							if(i < inCols)
1607 							{
1608 								Instruction *mov = emitCast(result, i + n * outCols, argi, i);
1609 								mov->dst.mask = 0xF >> (4 - inRows);
1610 							}
1611 						}
1612 					}
1613 				}
1614 				else
1615 				{
1616 					int column = 0;
1617 					int row = 0;
1618 
1619 					for(int i = 0; i < argumentCount; i++)
1620 					{
1621 						TIntermTyped *argi = arg[i]->getAsTyped();
1622 						int size = argi->getNominalSize();
1623 						int element = 0;
1624 
1625 						while(element < size)
1626 						{
1627 							Instruction *mov = emitCast(result, column, argi, 0);
1628 							mov->dst.mask = (0xF << row) & 0xF;
1629 							mov->src[0].swizzle = (readSwizzle(argi, size) << (row * 2)) + 0x55 * element;
1630 
1631 							int end = row + size - element;
1632 							column = end >= outRows ? column + 1 : column;
1633 							element = element + outRows - row;
1634 							row = end >= outRows ? 0 : end;
1635 						}
1636 					}
1637 				}
1638 			}
1639 			break;
1640 		case EOpConstructStruct:
1641 			if(visit == PostVisit)
1642 			{
1643 				int offset = 0;
1644 				for(int i = 0; i < argumentCount; i++)
1645 				{
1646 					TIntermTyped *argi = arg[i]->getAsTyped();
1647 					int size = argi->totalRegisterCount();
1648 
1649 					for(int index = 0; index < size; index++)
1650 					{
1651 						Instruction *mov = emit(sw::Shader::OPCODE_MOV, result, index + offset, argi, index);
1652 						mov->dst.mask = writeMask(result, offset + index);
1653 					}
1654 
1655 					offset += size;
1656 				}
1657 			}
1658 			break;
1659 		case EOpLessThan:         if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_LT, result, arg[0], arg[1]); break;
1660 		case EOpGreaterThan:      if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_GT, result, arg[0], arg[1]); break;
1661 		case EOpLessThanEqual:    if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_LE, result, arg[0], arg[1]); break;
1662 		case EOpGreaterThanEqual: if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_GE, result, arg[0], arg[1]); break;
1663 		case EOpVectorEqual:      if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_EQ, result, arg[0], arg[1]); break;
1664 		case EOpVectorNotEqual:   if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_NE, result, arg[0], arg[1]); break;
1665 		case EOpMod:              if(visit == PostVisit) emit(sw::Shader::OPCODE_MOD, result, arg[0], arg[1]); break;
1666 		case EOpModf:
1667 			if(visit == PostVisit)
1668 			{
1669 				TIntermTyped* arg1 = arg[1]->getAsTyped();
1670 				emit(sw::Shader::OPCODE_TRUNC, arg1, arg[0]);
1671 				assignLvalue(arg1, arg1);
1672 				emitBinary(sw::Shader::OPCODE_SUB, result, arg[0], arg1);
1673 			}
1674 			break;
1675 		case EOpPow:              if(visit == PostVisit) emit(sw::Shader::OPCODE_POW, result, arg[0], arg[1]); break;
1676 		case EOpAtan:             if(visit == PostVisit) emit(sw::Shader::OPCODE_ATAN2, result, arg[0], arg[1]); break;
1677 		case EOpMin:              if(visit == PostVisit) emit(getOpcode(sw::Shader::OPCODE_MIN, result), result, arg[0], arg[1]); break;
1678 		case EOpMax:              if(visit == PostVisit) emit(getOpcode(sw::Shader::OPCODE_MAX, result), result, arg[0], arg[1]); break;
1679 		case EOpClamp:
1680 			if(visit == PostVisit)
1681 			{
1682 				emit(getOpcode(sw::Shader::OPCODE_MAX, result), result, arg[0], arg[1]);
1683 				emit(getOpcode(sw::Shader::OPCODE_MIN, result), result, result, arg[2]);
1684 			}
1685 			break;
1686 		case EOpMix:
1687 			if(visit == PostVisit)
1688 			{
1689 				if(arg[2]->getAsTyped()->getBasicType() == EbtBool)
1690 				{
1691 					emit(sw::Shader::OPCODE_SELECT, result, arg[2], arg[1], arg[0]);
1692 				}
1693 				else
1694 				{
1695 					emit(sw::Shader::OPCODE_LRP, result, arg[2], arg[1], arg[0]);
1696 				}
1697 			}
1698 			break;
1699 		case EOpStep:        if(visit == PostVisit) emit(sw::Shader::OPCODE_STEP, result, arg[0], arg[1]); break;
1700 		case EOpSmoothStep:  if(visit == PostVisit) emit(sw::Shader::OPCODE_SMOOTH, result, arg[0], arg[1], arg[2]); break;
1701 		case EOpDistance:    if(visit == PostVisit) emit(sw::Shader::OPCODE_DIST(dim(arg[0])), result, arg[0], arg[1]); break;
1702 		case EOpDot:         if(visit == PostVisit) emit(sw::Shader::OPCODE_DP(dim(arg[0])), result, arg[0], arg[1]); break;
1703 		case EOpCross:       if(visit == PostVisit) emit(sw::Shader::OPCODE_CRS, result, arg[0], arg[1]); break;
1704 		case EOpFaceForward: if(visit == PostVisit) emit(sw::Shader::OPCODE_FORWARD(dim(arg[0])), result, arg[0], arg[1], arg[2]); break;
1705 		case EOpReflect:     if(visit == PostVisit) emit(sw::Shader::OPCODE_REFLECT(dim(arg[0])), result, arg[0], arg[1]); break;
1706 		case EOpRefract:     if(visit == PostVisit) emit(sw::Shader::OPCODE_REFRACT(dim(arg[0])), result, arg[0], arg[1], arg[2]); break;
1707 		case EOpMul:
1708 			if(visit == PostVisit)
1709 			{
1710 				TIntermTyped *arg0 = arg[0]->getAsTyped();
1711 				ASSERT((arg0->getNominalSize() == arg[1]->getAsTyped()->getNominalSize()) &&
1712 				       (arg0->getSecondarySize() == arg[1]->getAsTyped()->getSecondarySize()));
1713 
1714 				int size = arg0->getNominalSize();
1715 				for(int i = 0; i < size; i++)
1716 				{
1717 					emit(sw::Shader::OPCODE_MUL, result, i, arg[0], i, arg[1], i);
1718 				}
1719 			}
1720 			break;
1721 		case EOpOuterProduct:
1722 			if(visit == PostVisit)
1723 			{
1724 				for(int i = 0; i < dim(arg[1]); i++)
1725 				{
1726 					Instruction *mul = emit(sw::Shader::OPCODE_MUL, result, i, arg[0], 0, arg[1]);
1727 					mul->src[1].swizzle = 0x55 * i;
1728 				}
1729 			}
1730 			break;
1731 		default: UNREACHABLE(node->getOp());
1732 		}
1733 
1734 		return true;
1735 	}
1736 
visitSelection(Visit visit,TIntermSelection * node)1737 	bool OutputASM::visitSelection(Visit visit, TIntermSelection *node)
1738 	{
1739 		if(currentScope != emitScope)
1740 		{
1741 			return false;
1742 		}
1743 
1744 		TIntermTyped *condition = node->getCondition();
1745 		TIntermNode *trueBlock = node->getTrueBlock();
1746 		TIntermNode *falseBlock = node->getFalseBlock();
1747 		TIntermConstantUnion *constantCondition = condition->getAsConstantUnion();
1748 
1749 		condition->traverse(this);
1750 
1751 		if(node->usesTernaryOperator())
1752 		{
1753 			if(constantCondition)
1754 			{
1755 				bool trueCondition = constantCondition->getUnionArrayPointer()->getBConst();
1756 
1757 				if(trueCondition)
1758 				{
1759 					trueBlock->traverse(this);
1760 					copy(node, trueBlock);
1761 				}
1762 				else
1763 				{
1764 					falseBlock->traverse(this);
1765 					copy(node, falseBlock);
1766 				}
1767 			}
1768 			else if(trivial(node, 6))   // Fast to compute both potential results and no side effects
1769 			{
1770 				trueBlock->traverse(this);
1771 				falseBlock->traverse(this);
1772 				emit(sw::Shader::OPCODE_SELECT, node, condition, trueBlock, falseBlock);
1773 			}
1774 			else
1775 			{
1776 				emit(sw::Shader::OPCODE_IF, 0, condition);
1777 
1778 				if(trueBlock)
1779 				{
1780 					trueBlock->traverse(this);
1781 					copy(node, trueBlock);
1782 				}
1783 
1784 				if(falseBlock)
1785 				{
1786 					emit(sw::Shader::OPCODE_ELSE);
1787 					falseBlock->traverse(this);
1788 					copy(node, falseBlock);
1789 				}
1790 
1791 				emit(sw::Shader::OPCODE_ENDIF);
1792 			}
1793 		}
1794 		else  // if/else statement
1795 		{
1796 			if(constantCondition)
1797 			{
1798 				bool trueCondition = constantCondition->getUnionArrayPointer()->getBConst();
1799 
1800 				if(trueCondition)
1801 				{
1802 					if(trueBlock)
1803 					{
1804 						trueBlock->traverse(this);
1805 					}
1806 				}
1807 				else
1808 				{
1809 					if(falseBlock)
1810 					{
1811 						falseBlock->traverse(this);
1812 					}
1813 				}
1814 			}
1815 			else
1816 			{
1817 				emit(sw::Shader::OPCODE_IF, 0, condition);
1818 
1819 				if(trueBlock)
1820 				{
1821 					trueBlock->traverse(this);
1822 				}
1823 
1824 				if(falseBlock)
1825 				{
1826 					emit(sw::Shader::OPCODE_ELSE);
1827 					falseBlock->traverse(this);
1828 				}
1829 
1830 				emit(sw::Shader::OPCODE_ENDIF);
1831 			}
1832 		}
1833 
1834 		return false;
1835 	}
1836 
visitLoop(Visit visit,TIntermLoop * node)1837 	bool OutputASM::visitLoop(Visit visit, TIntermLoop *node)
1838 	{
1839 		if(currentScope != emitScope)
1840 		{
1841 			return false;
1842 		}
1843 
1844 		LoopInfo loop(node);
1845 
1846 		if(loop.iterations == 0)
1847 		{
1848 			return false;
1849 		}
1850 
1851 		bool unroll = (loop.iterations <= 4);
1852 
1853 		TIntermNode *init = node->getInit();
1854 		TIntermTyped *condition = node->getCondition();
1855 		TIntermTyped *expression = node->getExpression();
1856 		TIntermNode *body = node->getBody();
1857 		Constant True(true);
1858 
1859 		if(loop.isDeterministic())
1860 		{
1861 			 deterministicVariables.insert(loop.index->getId());
1862 
1863 			 if(!unroll)
1864 			 {
1865 				 emit(sw::Shader::OPCODE_SCALAR);   // Unrolled loops don't have an ENDWHILE to disable scalar mode.
1866 			 }
1867 		}
1868 
1869 		if(node->getType() == ELoopDoWhile)
1870 		{
1871 			Temporary iterate(this);
1872 			emit(sw::Shader::OPCODE_MOV, &iterate, &True);
1873 
1874 			emit(sw::Shader::OPCODE_WHILE, 0, &iterate);   // FIXME: Implement real do-while
1875 
1876 			if(body)
1877 			{
1878 				body->traverse(this);
1879 			}
1880 
1881 			emit(sw::Shader::OPCODE_TEST);
1882 
1883 			condition->traverse(this);
1884 			emit(sw::Shader::OPCODE_MOV, &iterate, condition);
1885 
1886 			emit(sw::Shader::OPCODE_ENDWHILE);
1887 		}
1888 		else
1889 		{
1890 			if(init)
1891 			{
1892 				init->traverse(this);
1893 			}
1894 
1895 			if(unroll)
1896 			{
1897 				mContext.info(node->getLine(), "loop unrolled", "for");
1898 
1899 				for(unsigned int i = 0; i < loop.iterations; i++)
1900 				{
1901 				//	condition->traverse(this);   // Condition could contain statements, but not in an unrollable loop
1902 
1903 					if(body)
1904 					{
1905 						body->traverse(this);
1906 					}
1907 
1908 					if(expression)
1909 					{
1910 						expression->traverse(this);
1911 					}
1912 				}
1913 			}
1914 			else
1915 			{
1916 				if(condition)
1917 				{
1918 					condition->traverse(this);
1919 				}
1920 				else
1921 				{
1922 					condition = &True;
1923 				}
1924 
1925 				emit(sw::Shader::OPCODE_WHILE, 0, condition);
1926 
1927 				if(body)
1928 				{
1929 					body->traverse(this);
1930 				}
1931 
1932 				emit(sw::Shader::OPCODE_TEST);
1933 
1934 				if(loop.isDeterministic())
1935 				{
1936 					emit(sw::Shader::OPCODE_SCALAR);
1937 				}
1938 
1939 				if(expression)
1940 				{
1941 					expression->traverse(this);
1942 				}
1943 
1944 				if(condition)
1945 				{
1946 					condition->traverse(this);
1947 				}
1948 
1949 				emit(sw::Shader::OPCODE_ENDWHILE);
1950 			}
1951 		}
1952 
1953 		if(loop.isDeterministic())
1954 		{
1955 			 deterministicVariables.erase(loop.index->getId());
1956 		}
1957 
1958 		return false;
1959 	}
1960 
visitBranch(Visit visit,TIntermBranch * node)1961 	bool OutputASM::visitBranch(Visit visit, TIntermBranch *node)
1962 	{
1963 		if(currentScope != emitScope)
1964 		{
1965 			return false;
1966 		}
1967 
1968 		switch(node->getFlowOp())
1969 		{
1970 		case EOpKill:      if(visit == PostVisit) emit(sw::Shader::OPCODE_DISCARD);  break;
1971 		case EOpBreak:     if(visit == PostVisit) emit(sw::Shader::OPCODE_BREAK);    break;
1972 		case EOpContinue:  if(visit == PostVisit) emit(sw::Shader::OPCODE_CONTINUE); break;
1973 		case EOpReturn:
1974 			if(visit == PostVisit)
1975 			{
1976 				TIntermTyped *value = node->getExpression();
1977 
1978 				if(value)
1979 				{
1980 					copy(functionArray[currentFunction].ret, value);
1981 				}
1982 
1983 				emit(sw::Shader::OPCODE_LEAVE);
1984 			}
1985 			break;
1986 		default: UNREACHABLE(node->getFlowOp());
1987 		}
1988 
1989 		return true;
1990 	}
1991 
visitSwitch(Visit visit,TIntermSwitch * node)1992 	bool OutputASM::visitSwitch(Visit visit, TIntermSwitch *node)
1993 	{
1994 		if(currentScope != emitScope)
1995 		{
1996 			return false;
1997 		}
1998 
1999 		TIntermTyped* switchValue = node->getInit();
2000 		TIntermAggregate* opList = node->getStatementList();
2001 
2002 		if(!switchValue || !opList)
2003 		{
2004 			return false;
2005 		}
2006 
2007 		switchValue->traverse(this);
2008 
2009 		emit(sw::Shader::OPCODE_SWITCH);
2010 
2011 		TIntermSequence& sequence = opList->getSequence();
2012 		TIntermSequence::iterator it = sequence.begin();
2013 		TIntermSequence::iterator defaultIt = sequence.end();
2014 		int nbCases = 0;
2015 		for(; it != sequence.end(); ++it)
2016 		{
2017 			TIntermCase* currentCase = (*it)->getAsCaseNode();
2018 			if(currentCase)
2019 			{
2020 				TIntermSequence::iterator caseIt = it;
2021 
2022 				TIntermTyped* condition = currentCase->getCondition();
2023 				if(condition) // non default case
2024 				{
2025 					if(nbCases != 0)
2026 					{
2027 						emit(sw::Shader::OPCODE_ELSE);
2028 					}
2029 
2030 					condition->traverse(this);
2031 					Temporary result(this);
2032 					emitBinary(sw::Shader::OPCODE_EQ, &result, switchValue, condition);
2033 					emit(sw::Shader::OPCODE_IF, 0, &result);
2034 					nbCases++;
2035 
2036 					// Emit the code for this case and all subsequent cases until we hit a break statement.
2037 					// TODO: This can repeat a lot of code for switches with many fall-through cases.
2038 					for(++caseIt; caseIt != sequence.end(); ++caseIt)
2039 					{
2040 						(*caseIt)->traverse(this);
2041 
2042 						// Stop if we encounter an unconditional branch (break, continue, return, or kill).
2043 						// TODO: This doesn't work if the statement is at a deeper scope level (e.g. {break;}).
2044 						// Note that this eliminates useless operations but shouldn't affect correctness.
2045 						if((*caseIt)->getAsBranchNode())
2046 						{
2047 							break;
2048 						}
2049 					}
2050 				}
2051 				else
2052 				{
2053 					defaultIt = it; // The default case might not be the last case, keep it for last
2054 				}
2055 			}
2056 		}
2057 
2058 		// If there's a default case, traverse it here
2059 		if(defaultIt != sequence.end())
2060 		{
2061 			if(nbCases != 0)
2062 			{
2063 				emit(sw::Shader::OPCODE_ELSE);
2064 			}
2065 
2066 			for(++defaultIt; defaultIt != sequence.end(); ++defaultIt)
2067 			{
2068 				(*defaultIt)->traverse(this);
2069 				if((*defaultIt)->getAsBranchNode()) // Kill, Break, Continue or Return
2070 				{
2071 					break;
2072 				}
2073 			}
2074 		}
2075 
2076 		for(int i = 0; i < nbCases; ++i)
2077 		{
2078 			emit(sw::Shader::OPCODE_ENDIF);
2079 		}
2080 
2081 		emit(sw::Shader::OPCODE_ENDSWITCH);
2082 
2083 		return false;
2084 	}
2085 
emit(sw::Shader::Opcode op,TIntermTyped * dst,TIntermNode * src0,TIntermNode * src1,TIntermNode * src2,TIntermNode * src3,TIntermNode * src4)2086 	Instruction *OutputASM::emit(sw::Shader::Opcode op, TIntermTyped *dst, TIntermNode *src0, TIntermNode *src1, TIntermNode *src2, TIntermNode *src3, TIntermNode *src4)
2087 	{
2088 		return emit(op, dst, 0, src0, 0, src1, 0, src2, 0, src3, 0, src4, 0);
2089 	}
2090 
emit(sw::Shader::Opcode op,TIntermTyped * dst,int dstIndex,TIntermNode * src0,int index0,TIntermNode * src1,int index1,TIntermNode * src2,int index2,TIntermNode * src3,int index3,TIntermNode * src4,int index4)2091 	Instruction *OutputASM::emit(sw::Shader::Opcode op, TIntermTyped *dst, int dstIndex, TIntermNode *src0, int index0, TIntermNode *src1, int index1,
2092 	                             TIntermNode *src2, int index2, TIntermNode *src3, int index3, TIntermNode *src4, int index4)
2093 	{
2094 		Instruction *instruction = new Instruction(op);
2095 
2096 		if(dst)
2097 		{
2098 			destination(instruction->dst, dst, dstIndex);
2099 		}
2100 
2101 		if(src0)
2102 		{
2103 			TIntermTyped* src = src0->getAsTyped();
2104 			instruction->dst.partialPrecision = src && (src->getPrecision() <= EbpLow);
2105 		}
2106 
2107 		source(instruction->src[0], src0, index0);
2108 		source(instruction->src[1], src1, index1);
2109 		source(instruction->src[2], src2, index2);
2110 		source(instruction->src[3], src3, index3);
2111 		source(instruction->src[4], src4, index4);
2112 
2113 		shader->append(instruction);
2114 
2115 		return instruction;
2116 	}
2117 
emitCast(TIntermTyped * dst,TIntermTyped * src)2118 	Instruction *OutputASM::emitCast(TIntermTyped *dst, TIntermTyped *src)
2119 	{
2120 		return emitCast(dst, 0, src, 0);
2121 	}
2122 
emitCast(TIntermTyped * dst,int dstIndex,TIntermTyped * src,int srcIndex)2123 	Instruction *OutputASM::emitCast(TIntermTyped *dst, int dstIndex, TIntermTyped *src, int srcIndex)
2124 	{
2125 		switch(src->getBasicType())
2126 		{
2127 		case EbtBool:
2128 			switch(dst->getBasicType())
2129 			{
2130 			case EbtInt:   return emit(sw::Shader::OPCODE_B2I, dst, dstIndex, src, srcIndex);
2131 			case EbtUInt:  return emit(sw::Shader::OPCODE_B2I, dst, dstIndex, src, srcIndex);
2132 			case EbtFloat: return emit(sw::Shader::OPCODE_B2F, dst, dstIndex, src, srcIndex);
2133 			default:       break;
2134 			}
2135 			break;
2136 		case EbtInt:
2137 			switch(dst->getBasicType())
2138 			{
2139 			case EbtBool:  return emit(sw::Shader::OPCODE_I2B, dst, dstIndex, src, srcIndex);
2140 			case EbtFloat: return emit(sw::Shader::OPCODE_I2F, dst, dstIndex, src, srcIndex);
2141 			default:       break;
2142 			}
2143 			break;
2144 		case EbtUInt:
2145 			switch(dst->getBasicType())
2146 			{
2147 			case EbtBool:  return emit(sw::Shader::OPCODE_I2B, dst, dstIndex, src, srcIndex);
2148 			case EbtFloat: return emit(sw::Shader::OPCODE_U2F, dst, dstIndex, src, srcIndex);
2149 			default:       break;
2150 			}
2151 			break;
2152 		case EbtFloat:
2153 			switch(dst->getBasicType())
2154 			{
2155 			case EbtBool: return emit(sw::Shader::OPCODE_F2B, dst, dstIndex, src, srcIndex);
2156 			case EbtInt:  return emit(sw::Shader::OPCODE_F2I, dst, dstIndex, src, srcIndex);
2157 			case EbtUInt: return emit(sw::Shader::OPCODE_F2U, dst, dstIndex, src, srcIndex);
2158 			default:      break;
2159 			}
2160 			break;
2161 		default:
2162 			break;
2163 		}
2164 
2165 		ASSERT((src->getBasicType() == dst->getBasicType()) ||
2166 		      ((src->getBasicType() == EbtInt) && (dst->getBasicType() == EbtUInt)) ||
2167 		      ((src->getBasicType() == EbtUInt) && (dst->getBasicType() == EbtInt)));
2168 
2169 		return emit(sw::Shader::OPCODE_MOV, dst, dstIndex, src, srcIndex);
2170 	}
2171 
emitBinary(sw::Shader::Opcode op,TIntermTyped * dst,TIntermNode * src0,TIntermNode * src1,TIntermNode * src2)2172 	void OutputASM::emitBinary(sw::Shader::Opcode op, TIntermTyped *dst, TIntermNode *src0, TIntermNode *src1, TIntermNode *src2)
2173 	{
2174 		for(int index = 0; index < dst->elementRegisterCount(); index++)
2175 		{
2176 			emit(op, dst, index, src0, index, src1, index, src2, index);
2177 		}
2178 	}
2179 
emitAssign(sw::Shader::Opcode op,TIntermTyped * result,TIntermTyped * lhs,TIntermTyped * src0,TIntermTyped * src1)2180 	void OutputASM::emitAssign(sw::Shader::Opcode op, TIntermTyped *result, TIntermTyped *lhs, TIntermTyped *src0, TIntermTyped *src1)
2181 	{
2182 		emitBinary(op, result, src0, src1);
2183 		assignLvalue(lhs, result);
2184 	}
2185 
emitCmp(sw::Shader::Control cmpOp,TIntermTyped * dst,TIntermNode * left,TIntermNode * right,int index)2186 	void OutputASM::emitCmp(sw::Shader::Control cmpOp, TIntermTyped *dst, TIntermNode *left, TIntermNode *right, int index)
2187 	{
2188 		sw::Shader::Opcode opcode;
2189 		switch(left->getAsTyped()->getBasicType())
2190 		{
2191 		case EbtBool:
2192 		case EbtInt:
2193 			opcode = sw::Shader::OPCODE_ICMP;
2194 			break;
2195 		case EbtUInt:
2196 			opcode = sw::Shader::OPCODE_UCMP;
2197 			break;
2198 		default:
2199 			opcode = sw::Shader::OPCODE_CMP;
2200 			break;
2201 		}
2202 
2203 		Instruction *cmp = emit(opcode, dst, 0, left, index, right, index);
2204 		cmp->control = cmpOp;
2205 	}
2206 
componentCount(const TType & type,int registers)2207 	int componentCount(const TType &type, int registers)
2208 	{
2209 		if(registers == 0)
2210 		{
2211 			return 0;
2212 		}
2213 
2214 		if(type.isArray() && registers >= type.elementRegisterCount())
2215 		{
2216 			int index = registers / type.elementRegisterCount();
2217 			registers -= index * type.elementRegisterCount();
2218 			return index * type.getElementSize() + componentCount(type, registers);
2219 		}
2220 
2221 		if(type.isStruct() || type.isInterfaceBlock())
2222 		{
2223 			const TFieldList& fields = type.getStruct() ? type.getStruct()->fields() : type.getInterfaceBlock()->fields();
2224 			int elements = 0;
2225 
2226 			for(const auto &field : fields)
2227 			{
2228 				const TType &fieldType = *(field->type());
2229 
2230 				if(fieldType.totalRegisterCount() <= registers)
2231 				{
2232 					registers -= fieldType.totalRegisterCount();
2233 					elements += fieldType.getObjectSize();
2234 				}
2235 				else   // Register within this field
2236 				{
2237 					return elements + componentCount(fieldType, registers);
2238 				}
2239 			}
2240 		}
2241 		else if(type.isMatrix())
2242 		{
2243 			return registers * type.registerSize();
2244 		}
2245 
2246 		UNREACHABLE(0);
2247 		return 0;
2248 	}
2249 
registerSize(const TType & type,int registers)2250 	int registerSize(const TType &type, int registers)
2251 	{
2252 		if(registers == 0)
2253 		{
2254 			if(type.isStruct())
2255 			{
2256 				return registerSize(*((*(type.getStruct()->fields().begin()))->type()), 0);
2257 			}
2258 			else if(type.isInterfaceBlock())
2259 			{
2260 				return registerSize(*((*(type.getInterfaceBlock()->fields().begin()))->type()), 0);
2261 			}
2262 
2263 			return type.registerSize();
2264 		}
2265 
2266 		if(type.isArray() && registers >= type.elementRegisterCount())
2267 		{
2268 			int index = registers / type.elementRegisterCount();
2269 			registers -= index * type.elementRegisterCount();
2270 			return registerSize(type, registers);
2271 		}
2272 
2273 		if(type.isStruct() || type.isInterfaceBlock())
2274 		{
2275 			const TFieldList& fields = type.getStruct() ? type.getStruct()->fields() : type.getInterfaceBlock()->fields();
2276 
2277 			for(const auto &field : fields)
2278 			{
2279 				const TType &fieldType = *(field->type());
2280 
2281 				if(fieldType.totalRegisterCount() <= registers)
2282 				{
2283 					registers -= fieldType.totalRegisterCount();
2284 				}
2285 				else   // Register within this field
2286 				{
2287 					return registerSize(fieldType, registers);
2288 				}
2289 			}
2290 		}
2291 		else if(type.isMatrix())
2292 		{
2293 			return registerSize(type, 0);
2294 		}
2295 
2296 		UNREACHABLE(0);
2297 		return 0;
2298 	}
2299 
getBlockId(TIntermTyped * arg)2300 	int OutputASM::getBlockId(TIntermTyped *arg)
2301 	{
2302 		if(arg)
2303 		{
2304 			const TType &type = arg->getType();
2305 			TInterfaceBlock* block = type.getInterfaceBlock();
2306 			if(block && (type.getQualifier() == EvqUniform))
2307 			{
2308 				// Make sure the uniform block is declared
2309 				uniformRegister(arg);
2310 
2311 				const char* blockName = block->name().c_str();
2312 
2313 				// Fetch uniform block index from array of blocks
2314 				for(ActiveUniformBlocks::const_iterator it = shaderObject->activeUniformBlocks.begin(); it != shaderObject->activeUniformBlocks.end(); ++it)
2315 				{
2316 					if(blockName == it->name)
2317 					{
2318 						return it->blockId;
2319 					}
2320 				}
2321 
2322 				ASSERT(false);
2323 			}
2324 		}
2325 
2326 		return -1;
2327 	}
2328 
getArgumentInfo(TIntermTyped * arg,int index)2329 	OutputASM::ArgumentInfo OutputASM::getArgumentInfo(TIntermTyped *arg, int index)
2330 	{
2331 		const TType &type = arg->getType();
2332 		int blockId = getBlockId(arg);
2333 		ArgumentInfo argumentInfo(BlockMemberInfo::getDefaultBlockInfo(), type, -1, -1);
2334 		if(blockId != -1)
2335 		{
2336 			argumentInfo.bufferIndex = 0;
2337 			for(int i = 0; i < blockId; ++i)
2338 			{
2339 				int blockArraySize = shaderObject->activeUniformBlocks[i].arraySize;
2340 				argumentInfo.bufferIndex += blockArraySize > 0 ? blockArraySize : 1;
2341 			}
2342 
2343 			const BlockDefinitionIndexMap& blockDefinition = blockDefinitions[blockId];
2344 
2345 			BlockDefinitionIndexMap::const_iterator itEnd = blockDefinition.end();
2346 			BlockDefinitionIndexMap::const_iterator it = itEnd;
2347 
2348 			argumentInfo.clampedIndex = index;
2349 			if(type.isInterfaceBlock())
2350 			{
2351 				// Offset index to the beginning of the selected instance
2352 				int blockRegisters = type.elementRegisterCount();
2353 				int bufferOffset = argumentInfo.clampedIndex / blockRegisters;
2354 				argumentInfo.bufferIndex += bufferOffset;
2355 				argumentInfo.clampedIndex -= bufferOffset * blockRegisters;
2356 			}
2357 
2358 			int regIndex = registerIndex(arg);
2359 			for(int i = regIndex + argumentInfo.clampedIndex; i >= regIndex; --i)
2360 			{
2361 				it = blockDefinition.find(i);
2362 				if(it != itEnd)
2363 				{
2364 					argumentInfo.clampedIndex -= (i - regIndex);
2365 					break;
2366 				}
2367 			}
2368 			ASSERT(it != itEnd);
2369 
2370 			argumentInfo.typedMemberInfo = it->second;
2371 
2372 			int registerCount = argumentInfo.typedMemberInfo.type.totalRegisterCount();
2373 			argumentInfo.clampedIndex = (argumentInfo.clampedIndex >= registerCount) ? registerCount - 1 : argumentInfo.clampedIndex;
2374 		}
2375 		else
2376 		{
2377 			argumentInfo.clampedIndex = (index >= arg->totalRegisterCount()) ? arg->totalRegisterCount() - 1 : index;
2378 		}
2379 
2380 		return argumentInfo;
2381 	}
2382 
source(sw::Shader::SourceParameter & parameter,TIntermNode * argument,int index)2383 	void OutputASM::source(sw::Shader::SourceParameter &parameter, TIntermNode *argument, int index)
2384 	{
2385 		if(argument)
2386 		{
2387 			TIntermTyped *arg = argument->getAsTyped();
2388 			Temporary unpackedUniform(this);
2389 
2390 			const TType& srcType = arg->getType();
2391 			TInterfaceBlock* srcBlock = srcType.getInterfaceBlock();
2392 			if(srcBlock && (srcType.getQualifier() == EvqUniform))
2393 			{
2394 				const ArgumentInfo argumentInfo = getArgumentInfo(arg, index);
2395 				const TType &memberType = argumentInfo.typedMemberInfo.type;
2396 
2397 				if(memberType.getBasicType() == EbtBool)
2398 				{
2399 					ASSERT(argumentInfo.clampedIndex < (memberType.isArray() ? memberType.getArraySize() : 1)); // index < arraySize
2400 
2401 					// Convert the packed bool, which is currently an int, to a true bool
2402 					Instruction *instruction = new Instruction(sw::Shader::OPCODE_I2B);
2403 					instruction->dst.type = sw::Shader::PARAMETER_TEMP;
2404 					instruction->dst.index = registerIndex(&unpackedUniform);
2405 					instruction->src[0].type = sw::Shader::PARAMETER_CONST;
2406 					instruction->src[0].bufferIndex = argumentInfo.bufferIndex;
2407 					instruction->src[0].index = argumentInfo.typedMemberInfo.offset + argumentInfo.clampedIndex * argumentInfo.typedMemberInfo.arrayStride;
2408 
2409 					shader->append(instruction);
2410 
2411 					arg = &unpackedUniform;
2412 					index = 0;
2413 				}
2414 				else if((memberType.getLayoutQualifier().matrixPacking == EmpRowMajor) && memberType.isMatrix())
2415 				{
2416 					int numCols = memberType.getNominalSize();
2417 					int numRows = memberType.getSecondarySize();
2418 
2419 					ASSERT(argumentInfo.clampedIndex < (numCols * (memberType.isArray() ? memberType.getArraySize() : 1))); // index < cols * arraySize
2420 
2421 					unsigned int dstIndex = registerIndex(&unpackedUniform);
2422 					unsigned int srcSwizzle = (argumentInfo.clampedIndex % numCols) * 0x55;
2423 					int arrayIndex = argumentInfo.clampedIndex / numCols;
2424 					int matrixStartOffset = argumentInfo.typedMemberInfo.offset + arrayIndex * argumentInfo.typedMemberInfo.arrayStride;
2425 
2426 					for(int j = 0; j < numRows; ++j)
2427 					{
2428 						// Transpose the row major matrix
2429 						Instruction *instruction = new Instruction(sw::Shader::OPCODE_MOV);
2430 						instruction->dst.type = sw::Shader::PARAMETER_TEMP;
2431 						instruction->dst.index = dstIndex;
2432 						instruction->dst.mask = 1 << j;
2433 						instruction->src[0].type = sw::Shader::PARAMETER_CONST;
2434 						instruction->src[0].bufferIndex = argumentInfo.bufferIndex;
2435 						instruction->src[0].index = matrixStartOffset + j * argumentInfo.typedMemberInfo.matrixStride;
2436 						instruction->src[0].swizzle = srcSwizzle;
2437 
2438 						shader->append(instruction);
2439 					}
2440 
2441 					arg = &unpackedUniform;
2442 					index = 0;
2443 				}
2444 			}
2445 
2446 			const ArgumentInfo argumentInfo = getArgumentInfo(arg, index);
2447 			const TType &type = argumentInfo.typedMemberInfo.type;
2448 
2449 			int size = registerSize(type, argumentInfo.clampedIndex);
2450 
2451 			parameter.type = registerType(arg);
2452 			parameter.bufferIndex = argumentInfo.bufferIndex;
2453 
2454 			if(arg->getAsConstantUnion() && arg->getAsConstantUnion()->getUnionArrayPointer())
2455 			{
2456 				int component = componentCount(type, argumentInfo.clampedIndex);
2457 				ConstantUnion *constants = arg->getAsConstantUnion()->getUnionArrayPointer();
2458 
2459 				for(int i = 0; i < 4; i++)
2460 				{
2461 					if(size == 1)   // Replicate
2462 					{
2463 						parameter.value[i] = constants[component + 0].getAsFloat();
2464 					}
2465 					else if(i < size)
2466 					{
2467 						parameter.value[i] = constants[component + i].getAsFloat();
2468 					}
2469 					else
2470 					{
2471 						parameter.value[i] = 0.0f;
2472 					}
2473 				}
2474 			}
2475 			else
2476 			{
2477 				parameter.index = registerIndex(arg) + argumentInfo.clampedIndex;
2478 
2479 				if(parameter.bufferIndex != -1)
2480 				{
2481 					int stride = (argumentInfo.typedMemberInfo.matrixStride > 0) ? argumentInfo.typedMemberInfo.matrixStride : argumentInfo.typedMemberInfo.arrayStride;
2482 					parameter.index = argumentInfo.typedMemberInfo.offset + argumentInfo.clampedIndex * stride;
2483 				}
2484 
2485 				if(parameter.index >= sw::NUM_TEMPORARY_REGISTERS)
2486 				{
2487 					mContext.error(arg->getLine(),
2488 						"Too many temporary registers required to compile shader",
2489 						pixelShader ? "pixel shader" : "vertex shader");
2490 				}
2491 			}
2492 
2493 			if(!IsSampler(arg->getBasicType()))
2494 			{
2495 				parameter.swizzle = readSwizzle(arg, size);
2496 			}
2497 		}
2498 	}
2499 
destination(sw::Shader::DestinationParameter & parameter,TIntermTyped * arg,int index)2500 	void OutputASM::destination(sw::Shader::DestinationParameter &parameter, TIntermTyped *arg, int index)
2501 	{
2502 		parameter.type = registerType(arg);
2503 		parameter.index = registerIndex(arg) + index;
2504 		parameter.mask = writeMask(arg, index);
2505 
2506 		if(parameter.index >= sw::NUM_TEMPORARY_REGISTERS)
2507 		{
2508 			mContext.error(arg->getLine(),
2509 				"Too many temporary registers required to compile shader",
2510 				pixelShader ? "pixel shader" : "vertex shader");
2511 		}
2512 
2513 	}
2514 
copy(TIntermTyped * dst,TIntermNode * src,int offset)2515 	void OutputASM::copy(TIntermTyped *dst, TIntermNode *src, int offset)
2516 	{
2517 		for(int index = 0; index < dst->totalRegisterCount(); index++)
2518 		{
2519 			emit(sw::Shader::OPCODE_MOV, dst, index, src, offset + index);
2520 		}
2521 	}
2522 
swizzleElement(int swizzle,int index)2523 	int swizzleElement(int swizzle, int index)
2524 	{
2525 		return (swizzle >> (index * 2)) & 0x03;
2526 	}
2527 
swizzleSwizzle(int leftSwizzle,int rightSwizzle)2528 	int swizzleSwizzle(int leftSwizzle, int rightSwizzle)
2529 	{
2530 		return (swizzleElement(leftSwizzle, swizzleElement(rightSwizzle, 0)) << 0) |
2531 		       (swizzleElement(leftSwizzle, swizzleElement(rightSwizzle, 1)) << 2) |
2532 		       (swizzleElement(leftSwizzle, swizzleElement(rightSwizzle, 2)) << 4) |
2533 		       (swizzleElement(leftSwizzle, swizzleElement(rightSwizzle, 3)) << 6);
2534 	}
2535 
assignLvalue(TIntermTyped * dst,TIntermTyped * src)2536 	void OutputASM::assignLvalue(TIntermTyped *dst, TIntermTyped *src)
2537 	{
2538 		if((src->isVector() && (!dst->isVector() || (src->getNominalSize() != dst->getNominalSize()))) ||
2539 		   (src->isMatrix() && (!dst->isMatrix() || (src->getNominalSize() != dst->getNominalSize()) || (src->getSecondarySize() != dst->getSecondarySize()))))
2540 		{
2541 			return mContext.error(src->getLine(), "Result type should match the l-value type in compound assignment", src->isVector() ? "vector" : "matrix");
2542 		}
2543 
2544 		TIntermBinary *binary = dst->getAsBinaryNode();
2545 
2546 		if(binary && binary->getOp() == EOpIndexIndirect && binary->getLeft()->isVector() && dst->isScalar())
2547 		{
2548 			Instruction *insert = new Instruction(sw::Shader::OPCODE_INSERT);
2549 
2550 			lvalue(insert->dst, dst);
2551 
2552 			insert->src[0].type = insert->dst.type;
2553 			insert->src[0].index = insert->dst.index;
2554 			insert->src[0].rel = insert->dst.rel;
2555 			source(insert->src[1], src);
2556 			source(insert->src[2], binary->getRight());
2557 
2558 			shader->append(insert);
2559 		}
2560 		else
2561 		{
2562 			Instruction *mov1 = new Instruction(sw::Shader::OPCODE_MOV);
2563 
2564 			int swizzle = lvalue(mov1->dst, dst);
2565 
2566 			source(mov1->src[0], src);
2567 			mov1->src[0].swizzle = swizzleSwizzle(mov1->src[0].swizzle, swizzle);
2568 
2569 			shader->append(mov1);
2570 
2571 			for(int offset = 1; offset < dst->totalRegisterCount(); offset++)
2572 			{
2573 				Instruction *mov = new Instruction(sw::Shader::OPCODE_MOV);
2574 
2575 				mov->dst = mov1->dst;
2576 				mov->dst.index += offset;
2577 				mov->dst.mask = writeMask(dst, offset);
2578 
2579 				source(mov->src[0], src, offset);
2580 
2581 				shader->append(mov);
2582 			}
2583 		}
2584 	}
2585 
evaluateRvalue(TIntermTyped * node)2586 	void OutputASM::evaluateRvalue(TIntermTyped *node)
2587 	{
2588 		TIntermBinary *binary = node->getAsBinaryNode();
2589 
2590 		if(binary && binary->getOp() == EOpIndexIndirect && binary->getLeft()->isVector() && node->isScalar())
2591 		{
2592 			Instruction *insert = new Instruction(sw::Shader::OPCODE_EXTRACT);
2593 
2594 			destination(insert->dst, node);
2595 
2596 			Temporary address(this);
2597 			unsigned char mask;
2598 			TIntermTyped *root = nullptr;
2599 			unsigned int offset = 0;
2600 			int swizzle = lvalue(root, offset, insert->src[0].rel, mask, address, node);
2601 
2602 			source(insert->src[0], root, offset);
2603 			insert->src[0].swizzle = swizzleSwizzle(insert->src[0].swizzle, swizzle);
2604 
2605 			source(insert->src[1], binary->getRight());
2606 
2607 			shader->append(insert);
2608 		}
2609 		else
2610 		{
2611 			Instruction *mov1 = new Instruction(sw::Shader::OPCODE_MOV);
2612 
2613 			destination(mov1->dst, node, 0);
2614 
2615 			Temporary address(this);
2616 			unsigned char mask;
2617 			TIntermTyped *root = nullptr;
2618 			unsigned int offset = 0;
2619 			int swizzle = lvalue(root, offset, mov1->src[0].rel, mask, address, node);
2620 
2621 			source(mov1->src[0], root, offset);
2622 			mov1->src[0].swizzle = swizzleSwizzle(mov1->src[0].swizzle, swizzle);
2623 
2624 			shader->append(mov1);
2625 
2626 			for(int i = 1; i < node->totalRegisterCount(); i++)
2627 			{
2628 				Instruction *mov = emit(sw::Shader::OPCODE_MOV, node, i, root, offset + i);
2629 				mov->src[0].rel = mov1->src[0].rel;
2630 			}
2631 		}
2632 	}
2633 
lvalue(sw::Shader::DestinationParameter & dst,TIntermTyped * node)2634 	int OutputASM::lvalue(sw::Shader::DestinationParameter &dst, TIntermTyped *node)
2635 	{
2636 		Temporary address(this);
2637 		TIntermTyped *root = nullptr;
2638 		unsigned int offset = 0;
2639 		unsigned char mask = 0xF;
2640 		int swizzle = lvalue(root, offset, dst.rel, mask, address, node);
2641 
2642 		dst.type = registerType(root);
2643 		dst.index = registerIndex(root) + offset;
2644 		dst.mask = mask;
2645 
2646 		return swizzle;
2647 	}
2648 
lvalue(TIntermTyped * & root,unsigned int & offset,sw::Shader::Relative & rel,unsigned char & mask,Temporary & address,TIntermTyped * node)2649 	int OutputASM::lvalue(TIntermTyped *&root, unsigned int &offset, sw::Shader::Relative &rel, unsigned char &mask, Temporary &address, TIntermTyped *node)
2650 	{
2651 		TIntermTyped *result = node;
2652 		TIntermBinary *binary = node->getAsBinaryNode();
2653 		TIntermSymbol *symbol = node->getAsSymbolNode();
2654 
2655 		if(binary)
2656 		{
2657 			TIntermTyped *left = binary->getLeft();
2658 			TIntermTyped *right = binary->getRight();
2659 
2660 			int leftSwizzle = lvalue(root, offset, rel, mask, address, left);   // Resolve the l-value of the left side
2661 
2662 			switch(binary->getOp())
2663 			{
2664 			case EOpIndexDirect:
2665 				{
2666 					int rightIndex = right->getAsConstantUnion()->getIConst(0);
2667 
2668 					if(left->isRegister())
2669 					{
2670 						int leftMask = mask;
2671 
2672 						mask = 1;
2673 						while((leftMask & mask) == 0)
2674 						{
2675 							mask = mask << 1;
2676 						}
2677 
2678 						int element = swizzleElement(leftSwizzle, rightIndex);
2679 						mask = 1 << element;
2680 
2681 						return element;
2682 					}
2683 					else if(left->isArray() || left->isMatrix())
2684 					{
2685 						offset += rightIndex * result->totalRegisterCount();
2686 						return 0xE4;
2687 					}
2688 					else UNREACHABLE(0);
2689 				}
2690 				break;
2691 			case EOpIndexIndirect:
2692 				{
2693 					right->traverse(this);
2694 
2695 					if(left->isRegister())
2696 					{
2697 						// Requires INSERT instruction (handled by calling function)
2698 					}
2699 					else if(left->isArray() || left->isMatrix())
2700 					{
2701 						int scale = result->totalRegisterCount();
2702 
2703 						if(rel.type == sw::Shader::PARAMETER_VOID)   // Use the index register as the relative address directly
2704 						{
2705 							if(left->totalRegisterCount() > 1)
2706 							{
2707 								sw::Shader::SourceParameter relativeRegister;
2708 								source(relativeRegister, right);
2709 
2710 								int indexId = right->getAsSymbolNode() ? right->getAsSymbolNode()->getId() : 0;
2711 
2712 								rel.index = relativeRegister.index;
2713 								rel.type = relativeRegister.type;
2714 								rel.scale = scale;
2715 								rel.dynamic = (right->getQualifier() != EvqUniform) && (deterministicVariables.count(indexId) == 0);
2716 							}
2717 						}
2718 						else if(rel.index != registerIndex(&address))   // Move the previous index register to the address register
2719 						{
2720 							if(scale == 1)
2721 							{
2722 								Constant oldScale((int)rel.scale);
2723 								Instruction *mad = emit(sw::Shader::OPCODE_IMAD, &address, &address, &oldScale, right);
2724 								mad->src[0].index = rel.index;
2725 								mad->src[0].type = rel.type;
2726 							}
2727 							else
2728 							{
2729 								Constant oldScale((int)rel.scale);
2730 								Instruction *mul = emit(sw::Shader::OPCODE_IMUL, &address, &address, &oldScale);
2731 								mul->src[0].index = rel.index;
2732 								mul->src[0].type = rel.type;
2733 
2734 								Constant newScale(scale);
2735 								emit(sw::Shader::OPCODE_IMAD, &address, right, &newScale, &address);
2736 							}
2737 
2738 							rel.type = sw::Shader::PARAMETER_TEMP;
2739 							rel.index = registerIndex(&address);
2740 							rel.scale = 1;
2741 						}
2742 						else   // Just add the new index to the address register
2743 						{
2744 							if(scale == 1)
2745 							{
2746 								emit(sw::Shader::OPCODE_IADD, &address, &address, right);
2747 							}
2748 							else
2749 							{
2750 								Constant newScale(scale);
2751 								emit(sw::Shader::OPCODE_IMAD, &address, right, &newScale, &address);
2752 							}
2753 						}
2754 					}
2755 					else UNREACHABLE(0);
2756 				}
2757 				break;
2758 			case EOpIndexDirectStruct:
2759 			case EOpIndexDirectInterfaceBlock:
2760 				{
2761 					const TFieldList& fields = (binary->getOp() == EOpIndexDirectStruct) ?
2762 					                           left->getType().getStruct()->fields() :
2763 					                           left->getType().getInterfaceBlock()->fields();
2764 					int index = right->getAsConstantUnion()->getIConst(0);
2765 					int fieldOffset = 0;
2766 
2767 					for(int i = 0; i < index; i++)
2768 					{
2769 						fieldOffset += fields[i]->type()->totalRegisterCount();
2770 					}
2771 
2772 					offset += fieldOffset;
2773 					mask = writeMask(result);
2774 
2775 					return 0xE4;
2776 				}
2777 				break;
2778 			case EOpVectorSwizzle:
2779 				{
2780 					ASSERT(left->isRegister());
2781 
2782 					int leftMask = mask;
2783 
2784 					int swizzle = 0;
2785 					int rightMask = 0;
2786 
2787 					TIntermSequence &sequence = right->getAsAggregate()->getSequence();
2788 
2789 					for(unsigned int i = 0; i < sequence.size(); i++)
2790 					{
2791 						int index = sequence[i]->getAsConstantUnion()->getIConst(0);
2792 
2793 						int element = swizzleElement(leftSwizzle, index);
2794 						rightMask = rightMask | (1 << element);
2795 						swizzle = swizzle | swizzleElement(leftSwizzle, i) << (element * 2);
2796 					}
2797 
2798 					mask = leftMask & rightMask;
2799 
2800 					return swizzle;
2801 				}
2802 				break;
2803 			default:
2804 				UNREACHABLE(binary->getOp());   // Not an l-value operator
2805 				break;
2806 			}
2807 		}
2808 		else if(symbol)
2809 		{
2810 			root = symbol;
2811 			offset = 0;
2812 			mask = writeMask(symbol);
2813 
2814 			return 0xE4;
2815 		}
2816 		else
2817 		{
2818 			node->traverse(this);
2819 
2820 			root = node;
2821 			offset = 0;
2822 			mask = writeMask(node);
2823 
2824 			return 0xE4;
2825 		}
2826 
2827 		return 0xE4;
2828 	}
2829 
registerType(TIntermTyped * operand)2830 	sw::Shader::ParameterType OutputASM::registerType(TIntermTyped *operand)
2831 	{
2832 		if(isSamplerRegister(operand))
2833 		{
2834 			return sw::Shader::PARAMETER_SAMPLER;
2835 		}
2836 
2837 		const TQualifier qualifier = operand->getQualifier();
2838 		if((qualifier == EvqFragColor) || (qualifier == EvqFragData))
2839 		{
2840 			if(((qualifier == EvqFragData) && (outputQualifier == EvqFragColor)) ||
2841 			   ((qualifier == EvqFragColor) && (outputQualifier == EvqFragData)))
2842 			{
2843 				mContext.error(operand->getLine(), "static assignment to both gl_FragData and gl_FragColor", "");
2844 			}
2845 			outputQualifier = qualifier;
2846 		}
2847 
2848 		if(qualifier == EvqConstExpr && (!operand->getAsConstantUnion() || !operand->getAsConstantUnion()->getUnionArrayPointer()))
2849 		{
2850 			// Constant arrays are in the constant register file.
2851 			if(operand->isArray() && operand->getArraySize() > 1)
2852 			{
2853 				return sw::Shader::PARAMETER_CONST;
2854 			}
2855 			else
2856 			{
2857 				return sw::Shader::PARAMETER_TEMP;
2858 			}
2859 		}
2860 
2861 		switch(qualifier)
2862 		{
2863 		case EvqTemporary:           return sw::Shader::PARAMETER_TEMP;
2864 		case EvqGlobal:              return sw::Shader::PARAMETER_TEMP;
2865 		case EvqConstExpr:           return sw::Shader::PARAMETER_FLOAT4LITERAL;   // All converted to float
2866 		case EvqAttribute:           return sw::Shader::PARAMETER_INPUT;
2867 		case EvqVaryingIn:           return sw::Shader::PARAMETER_INPUT;
2868 		case EvqVaryingOut:          return sw::Shader::PARAMETER_OUTPUT;
2869 		case EvqVertexIn:            return sw::Shader::PARAMETER_INPUT;
2870 		case EvqFragmentOut:         return sw::Shader::PARAMETER_COLOROUT;
2871 		case EvqVertexOut:           return sw::Shader::PARAMETER_OUTPUT;
2872 		case EvqFragmentIn:          return sw::Shader::PARAMETER_INPUT;
2873 		case EvqInvariantVaryingIn:  return sw::Shader::PARAMETER_INPUT;    // FIXME: Guarantee invariance at the backend
2874 		case EvqInvariantVaryingOut: return sw::Shader::PARAMETER_OUTPUT;   // FIXME: Guarantee invariance at the backend
2875 		case EvqSmooth:              return sw::Shader::PARAMETER_OUTPUT;
2876 		case EvqFlat:                return sw::Shader::PARAMETER_OUTPUT;
2877 		case EvqCentroidOut:         return sw::Shader::PARAMETER_OUTPUT;
2878 		case EvqSmoothIn:            return sw::Shader::PARAMETER_INPUT;
2879 		case EvqFlatIn:              return sw::Shader::PARAMETER_INPUT;
2880 		case EvqCentroidIn:          return sw::Shader::PARAMETER_INPUT;
2881 		case EvqUniform:             return sw::Shader::PARAMETER_CONST;
2882 		case EvqIn:                  return sw::Shader::PARAMETER_TEMP;
2883 		case EvqOut:                 return sw::Shader::PARAMETER_TEMP;
2884 		case EvqInOut:               return sw::Shader::PARAMETER_TEMP;
2885 		case EvqConstReadOnly:       return sw::Shader::PARAMETER_TEMP;
2886 		case EvqPosition:            return sw::Shader::PARAMETER_OUTPUT;
2887 		case EvqPointSize:           return sw::Shader::PARAMETER_OUTPUT;
2888 		case EvqInstanceID:          return sw::Shader::PARAMETER_MISCTYPE;
2889 		case EvqVertexID:            return sw::Shader::PARAMETER_MISCTYPE;
2890 		case EvqFragCoord:           return sw::Shader::PARAMETER_MISCTYPE;
2891 		case EvqFrontFacing:         return sw::Shader::PARAMETER_MISCTYPE;
2892 		case EvqPointCoord:          return sw::Shader::PARAMETER_INPUT;
2893 		case EvqFragColor:           return sw::Shader::PARAMETER_COLOROUT;
2894 		case EvqFragData:            return sw::Shader::PARAMETER_COLOROUT;
2895 		case EvqFragDepth:           return sw::Shader::PARAMETER_DEPTHOUT;
2896 		default: UNREACHABLE(qualifier);
2897 		}
2898 
2899 		return sw::Shader::PARAMETER_VOID;
2900 	}
2901 
hasFlatQualifier(TIntermTyped * operand)2902 	bool OutputASM::hasFlatQualifier(TIntermTyped *operand)
2903 	{
2904 		const TQualifier qualifier = operand->getQualifier();
2905 		return qualifier == EvqFlat || qualifier == EvqFlatOut || qualifier == EvqFlatIn;
2906 	}
2907 
registerIndex(TIntermTyped * operand)2908 	unsigned int OutputASM::registerIndex(TIntermTyped *operand)
2909 	{
2910 		if(isSamplerRegister(operand))
2911 		{
2912 			return samplerRegister(operand);
2913 		}
2914 		else if(operand->getType().totalSamplerRegisterCount() > 0) // Struct containing a sampler
2915 		{
2916 			samplerRegister(operand); // Make sure the sampler is declared
2917 		}
2918 
2919 		const TQualifier qualifier = operand->getQualifier();
2920 		if(qualifier == EvqConstExpr && (!operand->getAsConstantUnion() || !operand->getAsConstantUnion()->getUnionArrayPointer()))
2921 		{
2922 			// Constant arrays are in the constant register file.
2923 			if(operand->isArray() && operand->getArraySize() > 1)
2924 			{
2925 				return uniformRegister(operand);
2926 			}
2927 			else
2928 			{
2929 				return temporaryRegister(operand);
2930 			}
2931 		}
2932 
2933 		switch(operand->getQualifier())
2934 		{
2935 		case EvqTemporary:           return temporaryRegister(operand);
2936 		case EvqGlobal:              return temporaryRegister(operand);
2937 		case EvqConstExpr:           return temporaryRegister(operand);   // Unevaluated constant expression
2938 		case EvqAttribute:           return attributeRegister(operand);
2939 		case EvqVaryingIn:           return varyingRegister(operand);
2940 		case EvqVaryingOut:          return varyingRegister(operand);
2941 		case EvqVertexIn:            return attributeRegister(operand);
2942 		case EvqFragmentOut:         return fragmentOutputRegister(operand);
2943 		case EvqVertexOut:           return varyingRegister(operand);
2944 		case EvqFragmentIn:          return varyingRegister(operand);
2945 		case EvqInvariantVaryingIn:  return varyingRegister(operand);
2946 		case EvqInvariantVaryingOut: return varyingRegister(operand);
2947 		case EvqSmooth:              return varyingRegister(operand);
2948 		case EvqFlat:                return varyingRegister(operand);
2949 		case EvqCentroidOut:         return varyingRegister(operand);
2950 		case EvqSmoothIn:            return varyingRegister(operand);
2951 		case EvqFlatIn:              return varyingRegister(operand);
2952 		case EvqCentroidIn:          return varyingRegister(operand);
2953 		case EvqUniform:             return uniformRegister(operand);
2954 		case EvqIn:                  return temporaryRegister(operand);
2955 		case EvqOut:                 return temporaryRegister(operand);
2956 		case EvqInOut:               return temporaryRegister(operand);
2957 		case EvqConstReadOnly:       return temporaryRegister(operand);
2958 		case EvqPosition:            return varyingRegister(operand);
2959 		case EvqPointSize:           return varyingRegister(operand);
2960 		case EvqInstanceID:          vertexShader->declareInstanceId(); return sw::Shader::InstanceIDIndex;
2961 		case EvqVertexID:            vertexShader->declareVertexId(); return sw::Shader::VertexIDIndex;
2962 		case EvqFragCoord:           pixelShader->declareVPos();  return sw::Shader::VPosIndex;
2963 		case EvqFrontFacing:         pixelShader->declareVFace(); return sw::Shader::VFaceIndex;
2964 		case EvqPointCoord:          return varyingRegister(operand);
2965 		case EvqFragColor:           return 0;
2966 		case EvqFragData:            return fragmentOutputRegister(operand);
2967 		case EvqFragDepth:           return 0;
2968 		default: UNREACHABLE(operand->getQualifier());
2969 		}
2970 
2971 		return 0;
2972 	}
2973 
writeMask(TIntermTyped * destination,int index)2974 	int OutputASM::writeMask(TIntermTyped *destination, int index)
2975 	{
2976 		if(destination->getQualifier() == EvqPointSize)
2977 		{
2978 			return 0x2;   // Point size stored in the y component
2979 		}
2980 
2981 		return 0xF >> (4 - registerSize(destination->getType(), index));
2982 	}
2983 
readSwizzle(TIntermTyped * argument,int size)2984 	int OutputASM::readSwizzle(TIntermTyped *argument, int size)
2985 	{
2986 		if(argument->getQualifier() == EvqPointSize)
2987 		{
2988 			return 0x55;   // Point size stored in the y component
2989 		}
2990 
2991 		static const unsigned char swizzleSize[5] = {0x00, 0x00, 0x54, 0xA4, 0xE4};   // (void), xxxx, xyyy, xyzz, xyzw
2992 
2993 		return swizzleSize[size];
2994 	}
2995 
2996 	// Conservatively checks whether an expression is fast to compute and has no side effects
trivial(TIntermTyped * expression,int budget)2997 	bool OutputASM::trivial(TIntermTyped *expression, int budget)
2998 	{
2999 		if(!expression->isRegister())
3000 		{
3001 			return false;
3002 		}
3003 
3004 		return cost(expression, budget) >= 0;
3005 	}
3006 
3007 	// Returns the remaining computing budget (if < 0 the expression is too expensive or has side effects)
cost(TIntermNode * expression,int budget)3008 	int OutputASM::cost(TIntermNode *expression, int budget)
3009 	{
3010 		if(budget < 0)
3011 		{
3012 			return budget;
3013 		}
3014 
3015 		if(expression->getAsSymbolNode())
3016 		{
3017 			return budget;
3018 		}
3019 		else if(expression->getAsConstantUnion())
3020 		{
3021 			return budget;
3022 		}
3023 		else if(expression->getAsBinaryNode())
3024 		{
3025 			TIntermBinary *binary = expression->getAsBinaryNode();
3026 
3027 			switch(binary->getOp())
3028 			{
3029 			case EOpVectorSwizzle:
3030 			case EOpIndexDirect:
3031 			case EOpIndexDirectStruct:
3032 			case EOpIndexDirectInterfaceBlock:
3033 				return cost(binary->getLeft(), budget - 0);
3034 			case EOpAdd:
3035 			case EOpSub:
3036 			case EOpMul:
3037 				return cost(binary->getLeft(), cost(binary->getRight(), budget - 1));
3038 			default:
3039 				return -1;
3040 			}
3041 		}
3042 		else if(expression->getAsUnaryNode())
3043 		{
3044 			TIntermUnary *unary = expression->getAsUnaryNode();
3045 
3046 			switch(unary->getOp())
3047 			{
3048 			case EOpAbs:
3049 			case EOpNegative:
3050 				return cost(unary->getOperand(), budget - 1);
3051 			default:
3052 				return -1;
3053 			}
3054 		}
3055 		else if(expression->getAsSelectionNode())
3056 		{
3057 			TIntermSelection *selection = expression->getAsSelectionNode();
3058 
3059 			if(selection->usesTernaryOperator())
3060 			{
3061 				TIntermTyped *condition = selection->getCondition();
3062 				TIntermNode *trueBlock = selection->getTrueBlock();
3063 				TIntermNode *falseBlock = selection->getFalseBlock();
3064 				TIntermConstantUnion *constantCondition = condition->getAsConstantUnion();
3065 
3066 				if(constantCondition)
3067 				{
3068 					bool trueCondition = constantCondition->getUnionArrayPointer()->getBConst();
3069 
3070 					if(trueCondition)
3071 					{
3072 						return cost(trueBlock, budget - 0);
3073 					}
3074 					else
3075 					{
3076 						return cost(falseBlock, budget - 0);
3077 					}
3078 				}
3079 				else
3080 				{
3081 					return cost(trueBlock, cost(falseBlock, budget - 2));
3082 				}
3083 			}
3084 		}
3085 
3086 		return -1;
3087 	}
3088 
findFunction(const TString & name)3089 	const Function *OutputASM::findFunction(const TString &name)
3090 	{
3091 		for(unsigned int f = 0; f < functionArray.size(); f++)
3092 		{
3093 			if(functionArray[f].name == name)
3094 			{
3095 				return &functionArray[f];
3096 			}
3097 		}
3098 
3099 		return 0;
3100 	}
3101 
temporaryRegister(TIntermTyped * temporary)3102 	int OutputASM::temporaryRegister(TIntermTyped *temporary)
3103 	{
3104 		int index = allocate(temporaries, temporary);
3105 		if(index >= sw::NUM_TEMPORARY_REGISTERS)
3106 		{
3107 			mContext.error(temporary->getLine(),
3108 				"Too many temporary registers required to compile shader",
3109 				pixelShader ? "pixel shader" : "vertex shader");
3110 		}
3111 		return index;
3112 	}
3113 
setPixelShaderInputs(const TType & type,int var,bool flat)3114 	void OutputASM::setPixelShaderInputs(const TType& type, int var, bool flat)
3115 	{
3116 		if(type.isStruct())
3117 		{
3118 			const TFieldList &fields = type.getStruct()->fields();
3119 			int fieldVar = var;
3120 			for(const auto &field : fields)
3121 			{
3122 				const TType& fieldType = *(field->type());
3123 				setPixelShaderInputs(fieldType, fieldVar, flat);
3124 				fieldVar += fieldType.totalRegisterCount();
3125 			}
3126 		}
3127 		else
3128 		{
3129 			for(int i = 0; i < type.totalRegisterCount(); i++)
3130 			{
3131 				pixelShader->setInput(var + i, type.registerSize(), sw::Shader::Semantic(sw::Shader::USAGE_COLOR, var + i, flat));
3132 			}
3133 		}
3134 	}
3135 
varyingRegister(TIntermTyped * varying)3136 	int OutputASM::varyingRegister(TIntermTyped *varying)
3137 	{
3138 		int var = lookup(varyings, varying);
3139 
3140 		if(var == -1)
3141 		{
3142 			var = allocate(varyings, varying);
3143 			if (var == -1)
3144 			{
3145 				return 0;
3146 			}
3147 			int registerCount = varying->totalRegisterCount();
3148 
3149 			if(pixelShader)
3150 			{
3151 				if((var + registerCount) > sw::MAX_FRAGMENT_INPUTS)
3152 				{
3153 					mContext.error(varying->getLine(), "Varyings packing failed: Too many varyings", "fragment shader");
3154 					return 0;
3155 				}
3156 
3157 				if(varying->getQualifier() == EvqPointCoord)
3158 				{
3159 					ASSERT(varying->isRegister());
3160 					pixelShader->setInput(var, varying->registerSize(), sw::Shader::Semantic(sw::Shader::USAGE_TEXCOORD, var));
3161 				}
3162 				else
3163 				{
3164 					setPixelShaderInputs(varying->getType(), var, hasFlatQualifier(varying));
3165 				}
3166 			}
3167 			else if(vertexShader)
3168 			{
3169 				if((var + registerCount) > sw::MAX_VERTEX_OUTPUTS)
3170 				{
3171 					mContext.error(varying->getLine(), "Varyings packing failed: Too many varyings", "vertex shader");
3172 					return 0;
3173 				}
3174 
3175 				if(varying->getQualifier() == EvqPosition)
3176 				{
3177 					ASSERT(varying->isRegister());
3178 					vertexShader->setPositionRegister(var);
3179 				}
3180 				else if(varying->getQualifier() == EvqPointSize)
3181 				{
3182 					ASSERT(varying->isRegister());
3183 					vertexShader->setPointSizeRegister(var);
3184 				}
3185 				else
3186 				{
3187 					// Semantic indexes for user varyings will be assigned during program link to match the pixel shader
3188 				}
3189 			}
3190 			else UNREACHABLE(0);
3191 
3192 			declareVarying(varying, var);
3193 		}
3194 
3195 		return var;
3196 	}
3197 
declareVarying(TIntermTyped * varying,int reg)3198 	void OutputASM::declareVarying(TIntermTyped *varying, int reg)
3199 	{
3200 		if(varying->getQualifier() != EvqPointCoord)   // gl_PointCoord does not need linking
3201 		{
3202 			TIntermSymbol *symbol = varying->getAsSymbolNode();
3203 			declareVarying(varying->getType(), symbol->getSymbol(), reg);
3204 		}
3205 	}
3206 
declareVarying(const TType & type,const TString & varyingName,int registerIndex)3207 	void OutputASM::declareVarying(const TType &type, const TString &varyingName, int registerIndex)
3208 	{
3209 		const char *name = varyingName.c_str();
3210 		VaryingList &activeVaryings = shaderObject->varyings;
3211 
3212 		TStructure* structure = type.getStruct();
3213 		if(structure)
3214 		{
3215 			int fieldRegisterIndex = registerIndex;
3216 
3217 			const TFieldList &fields = type.getStruct()->fields();
3218 			for(const auto &field : fields)
3219 			{
3220 				const TType& fieldType = *(field->type());
3221 				declareVarying(fieldType, varyingName + "." + field->name(), fieldRegisterIndex);
3222 				if(fieldRegisterIndex >= 0)
3223 				{
3224 					fieldRegisterIndex += fieldType.totalRegisterCount();
3225 				}
3226 			}
3227 		}
3228 		else
3229 		{
3230 			// Check if this varying has been declared before without having a register assigned
3231 			for(VaryingList::iterator v = activeVaryings.begin(); v != activeVaryings.end(); v++)
3232 			{
3233 				if(v->name == name)
3234 				{
3235 					if(registerIndex >= 0)
3236 					{
3237 						ASSERT(v->registerIndex < 0 || v->registerIndex == registerIndex);
3238 						v->registerIndex = registerIndex;
3239 					}
3240 
3241 					return;
3242 				}
3243 			}
3244 
3245 			activeVaryings.push_back(glsl::Varying(type, name, registerIndex, 0));
3246 		}
3247 	}
3248 
declareFragmentOutput(TIntermTyped * fragmentOutput)3249 	void OutputASM::declareFragmentOutput(TIntermTyped *fragmentOutput)
3250 	{
3251 		int requestedLocation = fragmentOutput->getType().getLayoutQualifier().location;
3252 		int registerCount = fragmentOutput->totalRegisterCount();
3253 		if(requestedLocation < 0)
3254 		{
3255 			ASSERT(requestedLocation == -1); // All other negative values would have been prevented in TParseContext::parseLayoutQualifier
3256 			return; // No requested location
3257 		}
3258 		else if((requestedLocation + registerCount) > sw::RENDERTARGETS)
3259 		{
3260 			mContext.error(fragmentOutput->getLine(), "Fragment output location larger or equal to MAX_DRAW_BUFFERS", "fragment shader");
3261 		}
3262 		else
3263 		{
3264 			int currentIndex = lookup(fragmentOutputs, fragmentOutput);
3265 			if(requestedLocation != currentIndex)
3266 			{
3267 				if(currentIndex != -1)
3268 				{
3269 					mContext.error(fragmentOutput->getLine(), "Multiple locations for fragment output", "fragment shader");
3270 				}
3271 				else
3272 				{
3273 					if(fragmentOutputs.size() <= (size_t)requestedLocation)
3274 					{
3275 						while(fragmentOutputs.size() < (size_t)requestedLocation)
3276 						{
3277 							fragmentOutputs.push_back(nullptr);
3278 						}
3279 						for(int i = 0; i < registerCount; i++)
3280 						{
3281 							fragmentOutputs.push_back(fragmentOutput);
3282 						}
3283 					}
3284 					else
3285 					{
3286 						for(int i = 0; i < registerCount; i++)
3287 						{
3288 							if(!fragmentOutputs[requestedLocation + i])
3289 							{
3290 								fragmentOutputs[requestedLocation + i] = fragmentOutput;
3291 							}
3292 							else
3293 							{
3294 								mContext.error(fragmentOutput->getLine(), "Fragment output location aliasing", "fragment shader");
3295 								return;
3296 							}
3297 						}
3298 					}
3299 				}
3300 			}
3301 		}
3302 	}
3303 
uniformRegister(TIntermTyped * uniform)3304 	int OutputASM::uniformRegister(TIntermTyped *uniform)
3305 	{
3306 		const TType &type = uniform->getType();
3307 		ASSERT(!IsSampler(type.getBasicType()));
3308 		TInterfaceBlock *block = type.getAsInterfaceBlock();
3309 		TIntermSymbol *symbol = uniform->getAsSymbolNode();
3310 		ASSERT(symbol || block);
3311 
3312 		if(symbol || block)
3313 		{
3314 			TInterfaceBlock* parentBlock = type.getInterfaceBlock();
3315 			bool isBlockMember = (!block && parentBlock);
3316 			int index = isBlockMember ? lookup(uniforms, parentBlock) : lookup(uniforms, uniform);
3317 
3318 			if(index == -1 || isBlockMember)
3319 			{
3320 				if(index == -1)
3321 				{
3322 					index = allocate(uniforms, uniform);
3323 					if (index == -1)
3324 					{
3325 						return 0;
3326 					}
3327 				}
3328 
3329 				// Verify if the current uniform is a member of an already declared block
3330 				const TString &name = symbol ? symbol->getSymbol() : block->name();
3331 				int blockMemberIndex = blockMemberLookup(type, name, index);
3332 				if(blockMemberIndex == -1)
3333 				{
3334 					declareUniform(type, name, index, false);
3335 				}
3336 				else
3337 				{
3338 					index = blockMemberIndex;
3339 				}
3340 			}
3341 
3342 			return index;
3343 		}
3344 
3345 		return 0;
3346 	}
3347 
attributeRegister(TIntermTyped * attribute)3348 	int OutputASM::attributeRegister(TIntermTyped *attribute)
3349 	{
3350 		ASSERT(!attribute->isArray());
3351 
3352 		int index = lookup(attributes, attribute);
3353 
3354 		if(index == -1)
3355 		{
3356 			TIntermSymbol *symbol = attribute->getAsSymbolNode();
3357 			ASSERT(symbol);
3358 
3359 			if(symbol)
3360 			{
3361 				index = allocate(attributes, attribute);
3362 				if (index == -1)
3363 				{
3364 					return -1;
3365 				}
3366 				const TType &type = attribute->getType();
3367 				int registerCount = attribute->totalRegisterCount();
3368 				sw::VertexShader::AttribType attribType = sw::VertexShader::ATTRIBTYPE_FLOAT;
3369 				switch(type.getBasicType())
3370 				{
3371 				case EbtInt:
3372 					attribType = sw::VertexShader::ATTRIBTYPE_INT;
3373 					break;
3374 				case EbtUInt:
3375 					attribType = sw::VertexShader::ATTRIBTYPE_UINT;
3376 					break;
3377 				case EbtFloat:
3378 				default:
3379 					break;
3380 				}
3381 
3382 				if(vertexShader && (index + registerCount) <= sw::MAX_VERTEX_INPUTS)
3383 				{
3384 					for(int i = 0; i < registerCount; i++)
3385 					{
3386 						vertexShader->setInput(index + i, sw::Shader::Semantic(sw::Shader::USAGE_TEXCOORD, index + i, false), attribType);
3387 					}
3388 				}
3389 
3390 				ActiveAttributes &activeAttributes = shaderObject->activeAttributes;
3391 
3392 				const char *name = symbol->getSymbol().c_str();
3393 				activeAttributes.push_back(Attribute(glVariableType(type), name, type.getArraySize(), type.getLayoutQualifier().location, index));
3394 			}
3395 		}
3396 
3397 		return index;
3398 	}
3399 
fragmentOutputRegister(TIntermTyped * fragmentOutput)3400 	int OutputASM::fragmentOutputRegister(TIntermTyped *fragmentOutput)
3401 	{
3402 		return allocate(fragmentOutputs, fragmentOutput);
3403 	}
3404 
samplerRegister(TIntermTyped * sampler)3405 	int OutputASM::samplerRegister(TIntermTyped *sampler)
3406 	{
3407 		const TType &type = sampler->getType();
3408 		ASSERT(IsSampler(type.getBasicType()) || type.isStruct());   // Structures can contain samplers
3409 
3410 		TIntermSymbol *symbol = sampler->getAsSymbolNode();
3411 		TIntermBinary *binary = sampler->getAsBinaryNode();
3412 
3413 		if(symbol)
3414 		{
3415 			switch(type.getQualifier())
3416 			{
3417 			case EvqUniform:
3418 				return samplerRegister(symbol);
3419 			case EvqIn:
3420 			case EvqConstReadOnly:
3421 				// Function arguments are not (uniform) sampler registers
3422 				return -1;
3423 			default:
3424 				UNREACHABLE(type.getQualifier());
3425 			}
3426 		}
3427 		else if(binary)
3428 		{
3429 			TIntermTyped *left = binary->getLeft();
3430 			TIntermTyped *right = binary->getRight();
3431 			const TType &leftType = left->getType();
3432 			int index = right->getAsConstantUnion() ? right->getAsConstantUnion()->getIConst(0) : 0;
3433 			int offset = 0;
3434 
3435 			switch(binary->getOp())
3436 			{
3437 			case EOpIndexDirect:
3438 				ASSERT(left->isArray());
3439 				offset = index * leftType.samplerRegisterCount();
3440 				break;
3441 			case EOpIndexDirectStruct:
3442 				ASSERT(leftType.isStruct());
3443 				{
3444 					const TFieldList &fields = leftType.getStruct()->fields();
3445 
3446 					for(int i = 0; i < index; i++)
3447 					{
3448 						offset += fields[i]->type()->totalSamplerRegisterCount();
3449 					}
3450 				}
3451 				break;
3452 			case EOpIndexIndirect:               // Indirect indexing produces a temporary, not a sampler register
3453 				return -1;
3454 			case EOpIndexDirectInterfaceBlock:   // Interface blocks can't contain samplers
3455 			default:
3456 				UNREACHABLE(binary->getOp());
3457 				return -1;
3458 			}
3459 
3460 			int base = samplerRegister(left);
3461 
3462 			if(base < 0)
3463 			{
3464 				return -1;
3465 			}
3466 
3467 			return base + offset;
3468 		}
3469 
3470 		UNREACHABLE(0);
3471 		return -1;   // Not a (uniform) sampler register
3472 	}
3473 
samplerRegister(TIntermSymbol * sampler)3474 	int OutputASM::samplerRegister(TIntermSymbol *sampler)
3475 	{
3476 		const TType &type = sampler->getType();
3477 		ASSERT(IsSampler(type.getBasicType()) || type.isStruct());   // Structures can contain samplers
3478 
3479 		int index = lookup(samplers, sampler);
3480 
3481 		if(index == -1)
3482 		{
3483 			index = allocate(samplers, sampler, true);
3484 			if (index == -1)
3485 			{
3486 				return 0;
3487 			}
3488 
3489 			if(sampler->getQualifier() == EvqUniform)
3490 			{
3491 				const char *name = sampler->getSymbol().c_str();
3492 				declareUniform(type, name, index, true);
3493 			}
3494 		}
3495 
3496 		return index;
3497 	}
3498 
isSamplerRegister(TIntermTyped * operand)3499 	bool OutputASM::isSamplerRegister(TIntermTyped *operand)
3500 	{
3501 		return operand && IsSampler(operand->getBasicType()) && samplerRegister(operand) >= 0;
3502 	}
3503 
arrayExceedsLimits(TIntermTyped * operand)3504 	bool OutputASM::arrayExceedsLimits(TIntermTyped *operand)
3505 	{
3506 		const TVariable *maxUniformVectors = nullptr;
3507 		TString builtinName = "";
3508 		if (vertexShader)
3509 		{
3510 			builtinName = "gl_MaxVertexUniformVectors";
3511 		}
3512 		else if (pixelShader)
3513 		{
3514 			builtinName = "gl_MaxFragmentUniformVectors";
3515 		}
3516 		maxUniformVectors = static_cast<const TVariable *>(mContext.symbolTable.findBuiltIn(builtinName.c_str(), mContext.getShaderVersion()));
3517 		if (operand->getArraySize() > maxUniformVectors->getConstPointer()->getIConst())
3518 		{
3519 			std::stringstream extraInfoStream;
3520 			extraInfoStream << "Array size (" << operand->getArraySize() << ") "
3521 			                << "exceeds limit of " << builtinName
3522 			                << " (" << maxUniformVectors->getConstPointer()->getIConst() << ")";
3523 			std::string errorStr = extraInfoStream.str();
3524 			mContext.error(operand->getLine(), errorStr.c_str(),
3525 			               operand->getBasicString());
3526 			return true;
3527 		}
3528 		return false;
3529 	}
3530 
lookup(VariableArray & list,TIntermTyped * variable)3531 	int OutputASM::lookup(VariableArray &list, TIntermTyped *variable)
3532 	{
3533 		for(unsigned int i = 0; i < list.size(); i++)
3534 		{
3535 			if(list[i] == variable)
3536 			{
3537 				return i;   // Pointer match
3538 			}
3539 		}
3540 
3541 		TIntermSymbol *varSymbol = variable->getAsSymbolNode();
3542 		TInterfaceBlock *varBlock = variable->getType().getAsInterfaceBlock();
3543 
3544 		if(varBlock)
3545 		{
3546 			for(unsigned int i = 0; i < list.size(); i++)
3547 			{
3548 				if(list[i])
3549 				{
3550 					TInterfaceBlock *listBlock = list[i]->getType().getAsInterfaceBlock();
3551 
3552 					if(listBlock)
3553 					{
3554 						if(listBlock->name() == varBlock->name())
3555 						{
3556 							ASSERT(listBlock->arraySize() == varBlock->arraySize());
3557 							ASSERT(listBlock->fields() == varBlock->fields());
3558 							ASSERT(listBlock->blockStorage() == varBlock->blockStorage());
3559 							ASSERT(listBlock->matrixPacking() == varBlock->matrixPacking());
3560 
3561 							return i;
3562 						}
3563 					}
3564 				}
3565 			}
3566 		}
3567 		else if(varSymbol)
3568 		{
3569 			for(unsigned int i = 0; i < list.size(); i++)
3570 			{
3571 				if(list[i])
3572 				{
3573 					TIntermSymbol *listSymbol = list[i]->getAsSymbolNode();
3574 
3575 					if(listSymbol)
3576 					{
3577 						if(listSymbol->getId() == varSymbol->getId())
3578 						{
3579 							ASSERT(listSymbol->getSymbol() == varSymbol->getSymbol());
3580 							ASSERT(listSymbol->getType() == varSymbol->getType());
3581 							ASSERT(listSymbol->getQualifier() == varSymbol->getQualifier());
3582 
3583 							return i;
3584 						}
3585 					}
3586 				}
3587 			}
3588 		}
3589 
3590 		return -1;
3591 	}
3592 
lookup(VariableArray & list,TInterfaceBlock * block)3593 	int OutputASM::lookup(VariableArray &list, TInterfaceBlock *block)
3594 	{
3595 		for(unsigned int i = 0; i < list.size(); i++)
3596 		{
3597 			if(list[i] && (list[i]->getType().getInterfaceBlock() == block))
3598 			{
3599 				return i;   // Pointer match
3600 			}
3601 		}
3602 		return -1;
3603 	}
3604 
allocate(VariableArray & list,TIntermTyped * variable,bool samplersOnly)3605 	int OutputASM::allocate(VariableArray &list, TIntermTyped *variable, bool samplersOnly)
3606 	{
3607 		int index = lookup(list, variable);
3608 
3609 		if(index == -1)
3610 		{
3611 			if (arrayExceedsLimits(variable))
3612 			{
3613 				return -1;
3614 			}
3615 			unsigned int registerCount = variable->blockRegisterCount(samplersOnly);
3616 
3617 			for(unsigned int i = 0; i < list.size(); i++)
3618 			{
3619 				if(list[i] == 0)
3620 				{
3621 					unsigned int j = 1;
3622 					for( ; j < registerCount && (i + j) < list.size(); j++)
3623 					{
3624 						if(list[i + j] != 0)
3625 						{
3626 							break;
3627 						}
3628 					}
3629 
3630 					if(j == registerCount)   // Found free slots
3631 					{
3632 						for(unsigned int j = 0; j < registerCount; j++)
3633 						{
3634 							list[i + j] = variable;
3635 						}
3636 
3637 						return i;
3638 					}
3639 				}
3640 			}
3641 
3642 			index = list.size();
3643 
3644 			for(unsigned int i = 0; i < registerCount; i++)
3645 			{
3646 				list.push_back(variable);
3647 			}
3648 		}
3649 
3650 		return index;
3651 	}
3652 
free(VariableArray & list,TIntermTyped * variable)3653 	void OutputASM::free(VariableArray &list, TIntermTyped *variable)
3654 	{
3655 		int index = lookup(list, variable);
3656 
3657 		if(index >= 0)
3658 		{
3659 			list[index] = 0;
3660 		}
3661 	}
3662 
blockMemberLookup(const TType & type,const TString & name,int registerIndex)3663 	int OutputASM::blockMemberLookup(const TType &type, const TString &name, int registerIndex)
3664 	{
3665 		const TInterfaceBlock *block = type.getInterfaceBlock();
3666 
3667 		if(block)
3668 		{
3669 			ActiveUniformBlocks &activeUniformBlocks = shaderObject->activeUniformBlocks;
3670 			const TFieldList& fields = block->fields();
3671 			const TString &blockName = block->name();
3672 			int fieldRegisterIndex = registerIndex;
3673 
3674 			if(!type.isInterfaceBlock())
3675 			{
3676 				// This is a uniform that's part of a block, let's see if the block is already defined
3677 				for(size_t i = 0; i < activeUniformBlocks.size(); ++i)
3678 				{
3679 					if(activeUniformBlocks[i].name == blockName.c_str())
3680 					{
3681 						// The block is already defined, find the register for the current uniform and return it
3682 						for(size_t j = 0; j < fields.size(); j++)
3683 						{
3684 							const TString &fieldName = fields[j]->name();
3685 							if(fieldName == name)
3686 							{
3687 								return fieldRegisterIndex;
3688 							}
3689 
3690 							fieldRegisterIndex += fields[j]->type()->totalRegisterCount();
3691 						}
3692 
3693 						ASSERT(false);
3694 						return fieldRegisterIndex;
3695 					}
3696 				}
3697 			}
3698 		}
3699 
3700 		return -1;
3701 	}
3702 
declareUniform(const TType & type,const TString & name,int registerIndex,bool samplersOnly,int blockId,BlockLayoutEncoder * encoder)3703 	void OutputASM::declareUniform(const TType &type, const TString &name, int registerIndex, bool samplersOnly, int blockId, BlockLayoutEncoder* encoder)
3704 	{
3705 		const TStructure *structure = type.getStruct();
3706 		const TInterfaceBlock *block = (type.isInterfaceBlock() || (blockId == -1)) ? type.getInterfaceBlock() : nullptr;
3707 
3708 		if(!structure && !block)
3709 		{
3710 			ActiveUniforms &activeUniforms = shaderObject->activeUniforms;
3711 			const BlockMemberInfo blockInfo = encoder ? encoder->encodeType(type) : BlockMemberInfo::getDefaultBlockInfo();
3712 			if(blockId >= 0)
3713 			{
3714 				blockDefinitions[blockId].insert(BlockDefinitionIndexMap::value_type(registerIndex, TypedMemberInfo(blockInfo, type)));
3715 				shaderObject->activeUniformBlocks[blockId].fields.push_back(activeUniforms.size());
3716 			}
3717 			int fieldRegisterIndex = encoder ? shaderObject->activeUniformBlocks[blockId].registerIndex + BlockLayoutEncoder::getBlockRegister(blockInfo) : registerIndex;
3718 			bool isSampler = IsSampler(type.getBasicType());
3719 			if(isSampler && samplersOnly)
3720 			{
3721 				for(int i = 0; i < type.totalRegisterCount(); i++)
3722 				{
3723 					shader->declareSampler(fieldRegisterIndex + i);
3724 				}
3725 			}
3726 			if(isSampler == samplersOnly)
3727 			{
3728 				activeUniforms.push_back(Uniform(type, name.c_str(), fieldRegisterIndex, blockId, blockInfo));
3729 			}
3730 		}
3731 		else if(block)
3732 		{
3733 			ActiveUniformBlocks &activeUniformBlocks = shaderObject->activeUniformBlocks;
3734 			const TFieldList& fields = block->fields();
3735 			const TString &blockName = block->name();
3736 			int fieldRegisterIndex = registerIndex;
3737 			bool isUniformBlockMember = !type.isInterfaceBlock() && (blockId == -1);
3738 
3739 			blockId = activeUniformBlocks.size();
3740 			bool isRowMajor = block->matrixPacking() == EmpRowMajor;
3741 			activeUniformBlocks.push_back(UniformBlock(blockName.c_str(), 0, block->arraySize(),
3742 			                                           block->blockStorage(), isRowMajor, registerIndex, blockId));
3743 			blockDefinitions.push_back(BlockDefinitionIndexMap());
3744 
3745 			Std140BlockEncoder currentBlockEncoder;
3746 			currentBlockEncoder.enterAggregateType();
3747 			for(const auto &field : fields)
3748 			{
3749 				const TType &fieldType = *(field->type());
3750 				const TString &fieldName = field->name();
3751 				if(isUniformBlockMember && (fieldName == name))
3752 				{
3753 					registerIndex = fieldRegisterIndex;
3754 				}
3755 
3756 				const TString uniformName = block->hasInstanceName() ? blockName + "." + fieldName : fieldName;
3757 
3758 				declareUniform(fieldType, uniformName, fieldRegisterIndex, samplersOnly, blockId, &currentBlockEncoder);
3759 				fieldRegisterIndex += fieldType.totalRegisterCount();
3760 			}
3761 			currentBlockEncoder.exitAggregateType();
3762 			activeUniformBlocks[blockId].dataSize = currentBlockEncoder.getBlockSize();
3763 		}
3764 		else
3765 		{
3766 			// Store struct for program link time validation
3767 			shaderObject->activeUniformStructs.push_back(Uniform(type, name.c_str(), registerIndex, -1, BlockMemberInfo::getDefaultBlockInfo()));
3768 
3769 			int fieldRegisterIndex = registerIndex;
3770 
3771 			const TFieldList& fields = structure->fields();
3772 			if(type.isArray() && (structure || type.isInterfaceBlock()))
3773 			{
3774 				for(int i = 0; i < type.getArraySize(); i++)
3775 				{
3776 					if(encoder)
3777 					{
3778 						encoder->enterAggregateType();
3779 					}
3780 					for(const auto &field : fields)
3781 					{
3782 						const TType &fieldType = *(field->type());
3783 						const TString &fieldName = field->name();
3784 						const TString uniformName = name + "[" + str(i) + "]." + fieldName;
3785 
3786 						declareUniform(fieldType, uniformName, fieldRegisterIndex, samplersOnly, blockId, encoder);
3787 						fieldRegisterIndex += samplersOnly ? fieldType.totalSamplerRegisterCount() : fieldType.totalRegisterCount();
3788 					}
3789 					if(encoder)
3790 					{
3791 						encoder->exitAggregateType();
3792 					}
3793 				}
3794 			}
3795 			else
3796 			{
3797 				if(encoder)
3798 				{
3799 					encoder->enterAggregateType();
3800 				}
3801 				for(const auto &field : fields)
3802 				{
3803 					const TType &fieldType = *(field->type());
3804 					const TString &fieldName = field->name();
3805 					const TString uniformName = name + "." + fieldName;
3806 
3807 					declareUniform(fieldType, uniformName, fieldRegisterIndex, samplersOnly, blockId, encoder);
3808 					fieldRegisterIndex += samplersOnly ? fieldType.totalSamplerRegisterCount() : fieldType.totalRegisterCount();
3809 				}
3810 				if(encoder)
3811 				{
3812 					encoder->exitAggregateType();
3813 				}
3814 			}
3815 		}
3816 	}
3817 
dim(TIntermNode * v)3818 	int OutputASM::dim(TIntermNode *v)
3819 	{
3820 		TIntermTyped *vector = v->getAsTyped();
3821 		ASSERT(vector && vector->isRegister());
3822 		return vector->getNominalSize();
3823 	}
3824 
dim2(TIntermNode * m)3825 	int OutputASM::dim2(TIntermNode *m)
3826 	{
3827 		TIntermTyped *matrix = m->getAsTyped();
3828 		ASSERT(matrix && matrix->isMatrix() && !matrix->isArray());
3829 		return matrix->getSecondarySize();
3830 	}
3831 
3832 	// Sets iterations to ~0u if no loop count could be statically determined.
LoopInfo(TIntermLoop * node)3833 	OutputASM::LoopInfo::LoopInfo(TIntermLoop *node)
3834 	{
3835 		// Parse loops of the form:
3836 		// for(int index = initial; index [comparator] limit; index [op] increment)
3837 
3838 		// Parse index name and intial value
3839 		if(node->getInit())
3840 		{
3841 			TIntermAggregate *init = node->getInit()->getAsAggregate();
3842 
3843 			if(init)
3844 			{
3845 				TIntermSequence &sequence = init->getSequence();
3846 				TIntermTyped *variable = sequence[0]->getAsTyped();
3847 
3848 				if(variable && variable->getQualifier() == EvqTemporary && variable->getBasicType() == EbtInt)
3849 				{
3850 					TIntermBinary *assign = variable->getAsBinaryNode();
3851 
3852 					if(assign && assign->getOp() == EOpInitialize)
3853 					{
3854 						TIntermSymbol *symbol = assign->getLeft()->getAsSymbolNode();
3855 						TIntermConstantUnion *constant = assign->getRight()->getAsConstantUnion();
3856 
3857 						if(symbol && constant)
3858 						{
3859 							if(constant->getBasicType() == EbtInt && constant->getNominalSize() == 1)
3860 							{
3861 								index = symbol;
3862 								initial = constant->getUnionArrayPointer()[0].getIConst();
3863 							}
3864 						}
3865 					}
3866 				}
3867 			}
3868 		}
3869 
3870 		// Parse comparator and limit value
3871 		if(index && node->getCondition())
3872 		{
3873 			TIntermBinary *test = node->getCondition()->getAsBinaryNode();
3874 			TIntermSymbol *left = test ? test->getLeft()->getAsSymbolNode() : nullptr;
3875 
3876 			if(left && (left->getId() == index->getId()))
3877 			{
3878 				TIntermConstantUnion *constant = test->getRight()->getAsConstantUnion();
3879 
3880 				if(constant)
3881 				{
3882 					if(constant->getBasicType() == EbtInt && constant->getNominalSize() == 1)
3883 					{
3884 						comparator = test->getOp();
3885 						limit = constant->getUnionArrayPointer()[0].getIConst();
3886 					}
3887 				}
3888 			}
3889 		}
3890 
3891 		// Parse increment
3892 		if(index && comparator != EOpNull && node->getExpression())
3893 		{
3894 			TIntermBinary *binaryTerminal = node->getExpression()->getAsBinaryNode();
3895 			TIntermUnary *unaryTerminal = node->getExpression()->getAsUnaryNode();
3896 
3897 			if(binaryTerminal)
3898 			{
3899 				TIntermSymbol *operand = binaryTerminal->getLeft()->getAsSymbolNode();
3900 
3901 				if(operand && operand->getId() == index->getId())
3902 				{
3903 					TOperator op = binaryTerminal->getOp();
3904 					TIntermConstantUnion *constant = binaryTerminal->getRight()->getAsConstantUnion();
3905 
3906 					if(constant)
3907 					{
3908 						if(constant->getBasicType() == EbtInt && constant->getNominalSize() == 1)
3909 						{
3910 							int value = constant->getUnionArrayPointer()[0].getIConst();
3911 
3912 							switch(op)
3913 							{
3914 							case EOpAddAssign: increment = value;  break;
3915 							case EOpSubAssign: increment = -value; break;
3916 							default:           increment = 0;      break;   // Rare cases left unhandled. Treated as non-deterministic.
3917 							}
3918 						}
3919 					}
3920 				}
3921 			}
3922 			else if(unaryTerminal)
3923 			{
3924 				TIntermSymbol *operand = unaryTerminal->getOperand()->getAsSymbolNode();
3925 
3926 				if(operand && operand->getId() == index->getId())
3927 				{
3928 					TOperator op = unaryTerminal->getOp();
3929 
3930 					switch(op)
3931 					{
3932 					case EOpPostIncrement: increment = 1;  break;
3933 					case EOpPostDecrement: increment = -1; break;
3934 					case EOpPreIncrement:  increment = 1;  break;
3935 					case EOpPreDecrement:  increment = -1; break;
3936 					default:               increment = 0;  break;   // Rare cases left unhandled. Treated as non-deterministic.
3937 					}
3938 				}
3939 			}
3940 		}
3941 
3942 		if(index && comparator != EOpNull && increment != 0)
3943 		{
3944 			// Check the loop body for return statements or changes to the index variable that make it non-deterministic.
3945 			LoopUnrollable loopUnrollable;
3946 			bool unrollable = loopUnrollable.traverse(node, index->getId());
3947 
3948 			if(!unrollable)
3949 			{
3950 				iterations = ~0u;
3951 				return;
3952 			}
3953 
3954 			if(comparator == EOpLessThanEqual)
3955 			{
3956 				comparator = EOpLessThan;
3957 				limit += 1;
3958 			}
3959 			else if(comparator == EOpGreaterThanEqual)
3960 			{
3961 				comparator = EOpLessThan;
3962 				limit -= 1;
3963 				std::swap(initial, limit);
3964 				increment = -increment;
3965 			}
3966 			else if(comparator == EOpGreaterThan)
3967 			{
3968 				comparator = EOpLessThan;
3969 				std::swap(initial, limit);
3970 				increment = -increment;
3971 			}
3972 
3973 			if(comparator == EOpLessThan)
3974 			{
3975 				if(!(initial < limit))   // Never loops
3976 				{
3977 					iterations = 0;
3978 				}
3979 				else if(increment < 0)
3980 				{
3981 					iterations = ~0u;
3982 				}
3983 				else
3984 				{
3985 					iterations = (limit - initial + abs(increment) - 1) / increment;   // Ceiling division
3986 				}
3987 			}
3988 			else
3989 			{
3990 				// Rare cases left unhandled. Treated as non-deterministic.
3991 				iterations = ~0u;
3992 			}
3993 		}
3994 	}
3995 
traverse(TIntermLoop * loop,int indexId)3996 	bool LoopUnrollable::traverse(TIntermLoop *loop, int indexId)
3997 	{
3998 		loopUnrollable = true;
3999 
4000 		loopIndexId = indexId;
4001 		TIntermNode *body = loop->getBody();
4002 
4003 		if(body)
4004 		{
4005 			body->traverse(this);
4006 		}
4007 
4008 		return loopUnrollable;
4009 	}
4010 
visitSymbol(TIntermSymbol * node)4011 	void LoopUnrollable::visitSymbol(TIntermSymbol *node)
4012 	{
4013 		// Check that the loop index is not used as the argument to a function out or inout parameter.
4014 		if(node->getId() == loopIndexId)
4015 		{
4016 			if(node->getQualifier() == EvqOut || node->getQualifier() == EvqInOut)
4017 			{
4018 				loopUnrollable = false;
4019 			}
4020 		}
4021 	}
4022 
visitBinary(Visit visit,TIntermBinary * node)4023 	bool LoopUnrollable::visitBinary(Visit visit, TIntermBinary *node)
4024 	{
4025 		if(!loopUnrollable)
4026 		{
4027 			return false;
4028 		}
4029 
4030 		// Check that the loop index is not statically assigned to.
4031 		TIntermSymbol *symbol = node->getLeft()->getAsSymbolNode();
4032 		loopUnrollable = !(node->modifiesState() && symbol && (symbol->getId() == loopIndexId));
4033 
4034 		return loopUnrollable;
4035 	}
4036 
visitUnary(Visit visit,TIntermUnary * node)4037 	bool LoopUnrollable::visitUnary(Visit visit, TIntermUnary *node)
4038 	{
4039 		if(!loopUnrollable)
4040 		{
4041 			return false;
4042 		}
4043 
4044 		// Check that the loop index is not statically assigned to.
4045 		TIntermSymbol *symbol = node->getOperand()->getAsSymbolNode();
4046 		loopUnrollable = !(node->modifiesState() && symbol && (symbol->getId() == loopIndexId));
4047 
4048 		return loopUnrollable;
4049 	}
4050 
visitBranch(Visit visit,TIntermBranch * node)4051 	bool LoopUnrollable::visitBranch(Visit visit, TIntermBranch *node)
4052 	{
4053 		if(!loopUnrollable)
4054 		{
4055 			return false;
4056 		}
4057 
4058 		switch(node->getFlowOp())
4059 		{
4060 		case EOpKill:
4061 		case EOpReturn:
4062 		case EOpBreak:
4063 		case EOpContinue:
4064 			loopUnrollable = false;
4065 			break;
4066 		default: UNREACHABLE(node->getFlowOp());
4067 		}
4068 
4069 		return loopUnrollable;
4070 	}
4071 
visitAggregate(Visit visit,TIntermAggregate * node)4072 	bool LoopUnrollable::visitAggregate(Visit visit, TIntermAggregate *node)
4073 	{
4074 		return loopUnrollable;
4075 	}
4076 }
4077