• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2016 The SwiftShader Authors. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //    http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "OutputASM.h"
16 #include "Common/Math.hpp"
17 
18 #include "common/debug.h"
19 #include "InfoSink.h"
20 
21 #include "libGLESv2/Shader.h"
22 
23 #include <GLES2/gl2.h>
24 #include <GLES2/gl2ext.h>
25 #include <GLES3/gl3.h>
26 
27 #include <stdlib.h>
28 
29 namespace
30 {
glVariableType(const TType & type)31 	GLenum glVariableType(const TType &type)
32 	{
33 		switch(type.getBasicType())
34 		{
35 		case EbtFloat:
36 			if(type.isScalar())
37 			{
38 				return GL_FLOAT;
39 			}
40 			else if(type.isVector())
41 			{
42 				switch(type.getNominalSize())
43 				{
44 				case 2: return GL_FLOAT_VEC2;
45 				case 3: return GL_FLOAT_VEC3;
46 				case 4: return GL_FLOAT_VEC4;
47 				default: UNREACHABLE(type.getNominalSize());
48 				}
49 			}
50 			else if(type.isMatrix())
51 			{
52 				switch(type.getNominalSize())
53 				{
54 				case 2:
55 					switch(type.getSecondarySize())
56 					{
57 					case 2: return GL_FLOAT_MAT2;
58 					case 3: return GL_FLOAT_MAT2x3;
59 					case 4: return GL_FLOAT_MAT2x4;
60 					default: UNREACHABLE(type.getSecondarySize());
61 					}
62 				case 3:
63 					switch(type.getSecondarySize())
64 					{
65 					case 2: return GL_FLOAT_MAT3x2;
66 					case 3: return GL_FLOAT_MAT3;
67 					case 4: return GL_FLOAT_MAT3x4;
68 					default: UNREACHABLE(type.getSecondarySize());
69 					}
70 				case 4:
71 					switch(type.getSecondarySize())
72 					{
73 					case 2: return GL_FLOAT_MAT4x2;
74 					case 3: return GL_FLOAT_MAT4x3;
75 					case 4: return GL_FLOAT_MAT4;
76 					default: UNREACHABLE(type.getSecondarySize());
77 					}
78 				default: UNREACHABLE(type.getNominalSize());
79 				}
80 			}
81 			else UNREACHABLE(0);
82 			break;
83 		case EbtInt:
84 			if(type.isScalar())
85 			{
86 				return GL_INT;
87 			}
88 			else if(type.isVector())
89 			{
90 				switch(type.getNominalSize())
91 				{
92 				case 2: return GL_INT_VEC2;
93 				case 3: return GL_INT_VEC3;
94 				case 4: return GL_INT_VEC4;
95 				default: UNREACHABLE(type.getNominalSize());
96 				}
97 			}
98 			else UNREACHABLE(0);
99 			break;
100 		case EbtUInt:
101 			if(type.isScalar())
102 			{
103 				return GL_UNSIGNED_INT;
104 			}
105 			else if(type.isVector())
106 			{
107 				switch(type.getNominalSize())
108 				{
109 				case 2: return GL_UNSIGNED_INT_VEC2;
110 				case 3: return GL_UNSIGNED_INT_VEC3;
111 				case 4: return GL_UNSIGNED_INT_VEC4;
112 				default: UNREACHABLE(type.getNominalSize());
113 				}
114 			}
115 			else UNREACHABLE(0);
116 			break;
117 		case EbtBool:
118 			if(type.isScalar())
119 			{
120 				return GL_BOOL;
121 			}
122 			else if(type.isVector())
123 			{
124 				switch(type.getNominalSize())
125 				{
126 				case 2: return GL_BOOL_VEC2;
127 				case 3: return GL_BOOL_VEC3;
128 				case 4: return GL_BOOL_VEC4;
129 				default: UNREACHABLE(type.getNominalSize());
130 				}
131 			}
132 			else UNREACHABLE(0);
133 			break;
134 		case EbtSampler2D:
135 			return GL_SAMPLER_2D;
136 		case EbtISampler2D:
137 			return GL_INT_SAMPLER_2D;
138 		case EbtUSampler2D:
139 			return GL_UNSIGNED_INT_SAMPLER_2D;
140 		case EbtSamplerCube:
141 			return GL_SAMPLER_CUBE;
142 		case EbtSampler2DRect:
143 			return GL_SAMPLER_2D_RECT_ARB;
144 		case EbtISamplerCube:
145 			return GL_INT_SAMPLER_CUBE;
146 		case EbtUSamplerCube:
147 			return GL_UNSIGNED_INT_SAMPLER_CUBE;
148 		case EbtSamplerExternalOES:
149 			return GL_SAMPLER_EXTERNAL_OES;
150 		case EbtSampler3D:
151 			return GL_SAMPLER_3D_OES;
152 		case EbtISampler3D:
153 			return GL_INT_SAMPLER_3D;
154 		case EbtUSampler3D:
155 			return GL_UNSIGNED_INT_SAMPLER_3D;
156 		case EbtSampler2DArray:
157 			return GL_SAMPLER_2D_ARRAY;
158 		case EbtISampler2DArray:
159 			return GL_INT_SAMPLER_2D_ARRAY;
160 		case EbtUSampler2DArray:
161 			return GL_UNSIGNED_INT_SAMPLER_2D_ARRAY;
162 		case EbtSampler2DShadow:
163 			return GL_SAMPLER_2D_SHADOW;
164 		case EbtSamplerCubeShadow:
165 			return GL_SAMPLER_CUBE_SHADOW;
166 		case EbtSampler2DArrayShadow:
167 			return GL_SAMPLER_2D_ARRAY_SHADOW;
168 		default:
169 			UNREACHABLE(type.getBasicType());
170 			break;
171 		}
172 
173 		return GL_NONE;
174 	}
175 
glVariablePrecision(const TType & type)176 	GLenum glVariablePrecision(const TType &type)
177 	{
178 		if(type.getBasicType() == EbtFloat)
179 		{
180 			switch(type.getPrecision())
181 			{
182 			case EbpHigh:   return GL_HIGH_FLOAT;
183 			case EbpMedium: return GL_MEDIUM_FLOAT;
184 			case EbpLow:    return GL_LOW_FLOAT;
185 			case EbpUndefined:
186 				// Should be defined as the default precision by the parser
187 			default: UNREACHABLE(type.getPrecision());
188 			}
189 		}
190 		else if(type.getBasicType() == EbtInt)
191 		{
192 			switch(type.getPrecision())
193 			{
194 			case EbpHigh:   return GL_HIGH_INT;
195 			case EbpMedium: return GL_MEDIUM_INT;
196 			case EbpLow:    return GL_LOW_INT;
197 			case EbpUndefined:
198 				// Should be defined as the default precision by the parser
199 			default: UNREACHABLE(type.getPrecision());
200 			}
201 		}
202 
203 		// Other types (boolean, sampler) don't have a precision
204 		return GL_NONE;
205 	}
206 }
207 
208 namespace glsl
209 {
210 	// Integer to TString conversion
str(int i)211 	TString str(int i)
212 	{
213 		char buffer[20];
214 		sprintf(buffer, "%d", i);
215 		return buffer;
216 	}
217 
218 	class Temporary : public TIntermSymbol
219 	{
220 	public:
Temporary(OutputASM * assembler)221 		Temporary(OutputASM *assembler) : TIntermSymbol(TSymbolTableLevel::nextUniqueId(), "tmp", TType(EbtFloat, EbpHigh, EvqTemporary, 4, 1, false)), assembler(assembler)
222 		{
223 		}
224 
~Temporary()225 		~Temporary()
226 		{
227 			assembler->freeTemporary(this);
228 		}
229 
230 	private:
231 		OutputASM *const assembler;
232 	};
233 
234 	class Constant : public TIntermConstantUnion
235 	{
236 	public:
Constant(float x,float y,float z,float w)237 		Constant(float x, float y, float z, float w) : TIntermConstantUnion(constants, TType(EbtFloat, EbpHigh, EvqConstExpr, 4, 1, false))
238 		{
239 			constants[0].setFConst(x);
240 			constants[1].setFConst(y);
241 			constants[2].setFConst(z);
242 			constants[3].setFConst(w);
243 		}
244 
Constant(bool b)245 		Constant(bool b) : TIntermConstantUnion(constants, TType(EbtBool, EbpHigh, EvqConstExpr, 1, 1, false))
246 		{
247 			constants[0].setBConst(b);
248 		}
249 
Constant(int i)250 		Constant(int i) : TIntermConstantUnion(constants, TType(EbtInt, EbpHigh, EvqConstExpr, 1, 1, false))
251 		{
252 			constants[0].setIConst(i);
253 		}
254 
~Constant()255 		~Constant()
256 		{
257 		}
258 
259 	private:
260 		ConstantUnion constants[4];
261 	};
262 
ShaderVariable(const TType & type,const std::string & name,int registerIndex)263 	ShaderVariable::ShaderVariable(const TType& type, const std::string& name, int registerIndex) :
264 		type(type.isStruct() ? GL_NONE : glVariableType(type)), precision(glVariablePrecision(type)),
265 		name(name), arraySize(type.getArraySize()), registerIndex(registerIndex)
266 	{
267 		if(type.isStruct())
268 		{
269 			for(const auto& field : type.getStruct()->fields())
270 			{
271 				fields.push_back(ShaderVariable(*(field->type()), field->name().c_str(), -1));
272 			}
273 		}
274 	}
275 
Uniform(const TType & type,const std::string & name,int registerIndex,int blockId,const BlockMemberInfo & blockMemberInfo)276 	Uniform::Uniform(const TType& type, const std::string &name, int registerIndex, int blockId, const BlockMemberInfo& blockMemberInfo) :
277 		ShaderVariable(type, name, registerIndex), blockId(blockId), blockInfo(blockMemberInfo)
278 	{
279 	}
280 
UniformBlock(const std::string & name,unsigned int dataSize,unsigned int arraySize,TLayoutBlockStorage layout,bool isRowMajorLayout,int registerIndex,int blockId)281 	UniformBlock::UniformBlock(const std::string& name, unsigned int dataSize, unsigned int arraySize,
282 	                           TLayoutBlockStorage layout, bool isRowMajorLayout, int registerIndex, int blockId) :
283 		name(name), dataSize(dataSize), arraySize(arraySize), layout(layout),
284 		isRowMajorLayout(isRowMajorLayout), registerIndex(registerIndex), blockId(blockId)
285 	{
286 	}
287 
BlockLayoutEncoder()288 	BlockLayoutEncoder::BlockLayoutEncoder()
289 		: mCurrentOffset(0)
290 	{
291 	}
292 
encodeType(const TType & type)293 	BlockMemberInfo BlockLayoutEncoder::encodeType(const TType &type)
294 	{
295 		int arrayStride;
296 		int matrixStride;
297 
298 		bool isRowMajor = type.getLayoutQualifier().matrixPacking == EmpRowMajor;
299 		getBlockLayoutInfo(type, type.getArraySize(), isRowMajor, &arrayStride, &matrixStride);
300 
301 		const BlockMemberInfo memberInfo(static_cast<int>(mCurrentOffset * BytesPerComponent),
302 		                                 static_cast<int>(arrayStride * BytesPerComponent),
303 		                                 static_cast<int>(matrixStride * BytesPerComponent),
304 		                                 (matrixStride > 0) && isRowMajor);
305 
306 		advanceOffset(type, type.getArraySize(), isRowMajor, arrayStride, matrixStride);
307 
308 		return memberInfo;
309 	}
310 
311 	// static
getBlockRegister(const BlockMemberInfo & info)312 	size_t BlockLayoutEncoder::getBlockRegister(const BlockMemberInfo &info)
313 	{
314 		return (info.offset / BytesPerComponent) / ComponentsPerRegister;
315 	}
316 
317 	// static
getBlockRegisterElement(const BlockMemberInfo & info)318 	size_t BlockLayoutEncoder::getBlockRegisterElement(const BlockMemberInfo &info)
319 	{
320 		return (info.offset / BytesPerComponent) % ComponentsPerRegister;
321 	}
322 
nextRegister()323 	void BlockLayoutEncoder::nextRegister()
324 	{
325 		mCurrentOffset = sw::align(mCurrentOffset, ComponentsPerRegister);
326 	}
327 
Std140BlockEncoder()328 	Std140BlockEncoder::Std140BlockEncoder() : BlockLayoutEncoder()
329 	{
330 	}
331 
enterAggregateType()332 	void Std140BlockEncoder::enterAggregateType()
333 	{
334 		nextRegister();
335 	}
336 
exitAggregateType()337 	void Std140BlockEncoder::exitAggregateType()
338 	{
339 		nextRegister();
340 	}
341 
getBlockLayoutInfo(const TType & type,unsigned int arraySize,bool isRowMajorMatrix,int * arrayStrideOut,int * matrixStrideOut)342 	void Std140BlockEncoder::getBlockLayoutInfo(const TType &type, unsigned int arraySize, bool isRowMajorMatrix, int *arrayStrideOut, int *matrixStrideOut)
343 	{
344 		size_t baseAlignment = 0;
345 		int matrixStride = 0;
346 		int arrayStride = 0;
347 
348 		if(type.isMatrix())
349 		{
350 			baseAlignment = ComponentsPerRegister;
351 			matrixStride = ComponentsPerRegister;
352 
353 			if(arraySize > 0)
354 			{
355 				const int numRegisters = isRowMajorMatrix ? type.getSecondarySize() : type.getNominalSize();
356 				arrayStride = ComponentsPerRegister * numRegisters;
357 			}
358 		}
359 		else if(arraySize > 0)
360 		{
361 			baseAlignment = ComponentsPerRegister;
362 			arrayStride = ComponentsPerRegister;
363 		}
364 		else
365 		{
366 			const size_t numComponents = type.getElementSize();
367 			baseAlignment = (numComponents == 3 ? 4u : numComponents);
368 		}
369 
370 		mCurrentOffset = sw::align(mCurrentOffset, baseAlignment);
371 
372 		*matrixStrideOut = matrixStride;
373 		*arrayStrideOut = arrayStride;
374 	}
375 
advanceOffset(const TType & type,unsigned int arraySize,bool isRowMajorMatrix,int arrayStride,int matrixStride)376 	void Std140BlockEncoder::advanceOffset(const TType &type, unsigned int arraySize, bool isRowMajorMatrix, int arrayStride, int matrixStride)
377 	{
378 		if(arraySize > 0)
379 		{
380 			mCurrentOffset += arrayStride * arraySize;
381 		}
382 		else if(type.isMatrix())
383 		{
384 			ASSERT(matrixStride == ComponentsPerRegister);
385 			const int numRegisters = isRowMajorMatrix ? type.getSecondarySize() : type.getNominalSize();
386 			mCurrentOffset += ComponentsPerRegister * numRegisters;
387 		}
388 		else
389 		{
390 			mCurrentOffset += type.getElementSize();
391 		}
392 	}
393 
Attribute()394 	Attribute::Attribute()
395 	{
396 		type = GL_NONE;
397 		arraySize = 0;
398 		registerIndex = 0;
399 	}
400 
Attribute(GLenum type,const std::string & name,int arraySize,int location,int registerIndex)401 	Attribute::Attribute(GLenum type, const std::string &name, int arraySize, int location, int registerIndex)
402 	{
403 		this->type = type;
404 		this->name = name;
405 		this->arraySize = arraySize;
406 		this->location = location;
407 		this->registerIndex = registerIndex;
408 	}
409 
getPixelShader() const410 	sw::PixelShader *Shader::getPixelShader() const
411 	{
412 		return nullptr;
413 	}
414 
getVertexShader() const415 	sw::VertexShader *Shader::getVertexShader() const
416 	{
417 		return nullptr;
418 	}
419 
TextureFunction(const TString & nodeName)420 	OutputASM::TextureFunction::TextureFunction(const TString& nodeName) : method(IMPLICIT), proj(false), offset(false)
421 	{
422 		TString name = TFunction::unmangleName(nodeName);
423 
424 		if(name == "texture2D" || name == "textureCube" || name == "texture" || name == "texture3D" || name == "texture2DRect")
425 		{
426 			method = IMPLICIT;
427 		}
428 		else if(name == "texture2DProj" || name == "textureProj" || name == "texture2DRectProj")
429 		{
430 			method = IMPLICIT;
431 			proj = true;
432 		}
433 		else if(name == "texture2DLod" || name == "textureCubeLod" || name == "textureLod")
434 		{
435 			method = LOD;
436 		}
437 		else if(name == "texture2DProjLod" || name == "textureProjLod")
438 		{
439 			method = LOD;
440 			proj = true;
441 		}
442 		else if(name == "textureSize")
443 		{
444 			method = SIZE;
445 		}
446 		else if(name == "textureOffset")
447 		{
448 			method = IMPLICIT;
449 			offset = true;
450 		}
451 		else if(name == "textureProjOffset")
452 		{
453 			method = IMPLICIT;
454 			offset = true;
455 			proj = true;
456 		}
457 		else if(name == "textureLodOffset")
458 		{
459 			method = LOD;
460 			offset = true;
461 		}
462 		else if(name == "textureProjLodOffset")
463 		{
464 			method = LOD;
465 			proj = true;
466 			offset = true;
467 		}
468 		else if(name == "texelFetch")
469 		{
470 			method = FETCH;
471 		}
472 		else if(name == "texelFetchOffset")
473 		{
474 			method = FETCH;
475 			offset = true;
476 		}
477 		else if(name == "textureGrad")
478 		{
479 			method = GRAD;
480 		}
481 		else if(name == "textureGradOffset")
482 		{
483 			method = GRAD;
484 			offset = true;
485 		}
486 		else if(name == "textureProjGrad")
487 		{
488 			method = GRAD;
489 			proj = true;
490 		}
491 		else if(name == "textureProjGradOffset")
492 		{
493 			method = GRAD;
494 			proj = true;
495 			offset = true;
496 		}
497 		else UNREACHABLE(0);
498 	}
499 
OutputASM(TParseContext & context,Shader * shaderObject)500 	OutputASM::OutputASM(TParseContext &context, Shader *shaderObject) : TIntermTraverser(true, true, true), shaderObject(shaderObject), mContext(context)
501 	{
502 		shader = nullptr;
503 		pixelShader = nullptr;
504 		vertexShader = nullptr;
505 
506 		if(shaderObject)
507 		{
508 			shader = shaderObject->getShader();
509 			pixelShader = shaderObject->getPixelShader();
510 			vertexShader = shaderObject->getVertexShader();
511 		}
512 
513 		functionArray.push_back(Function(0, "main(", nullptr, nullptr));
514 		currentFunction = 0;
515 		outputQualifier = EvqOutput;   // Initialize outputQualifier to any value other than EvqFragColor or EvqFragData
516 	}
517 
~OutputASM()518 	OutputASM::~OutputASM()
519 	{
520 	}
521 
output()522 	void OutputASM::output()
523 	{
524 		if(shader)
525 		{
526 			emitShader(GLOBAL);
527 
528 			if(functionArray.size() > 1)   // Only call main() when there are other functions
529 			{
530 				Instruction *callMain = emit(sw::Shader::OPCODE_CALL);
531 				callMain->dst.type = sw::Shader::PARAMETER_LABEL;
532 				callMain->dst.index = 0;   // main()
533 
534 				emit(sw::Shader::OPCODE_RET);
535 			}
536 
537 			emitShader(FUNCTION);
538 		}
539 	}
540 
emitShader(Scope scope)541 	void OutputASM::emitShader(Scope scope)
542 	{
543 		emitScope = scope;
544 		currentScope = GLOBAL;
545 		mContext.getTreeRoot()->traverse(this);
546 	}
547 
freeTemporary(Temporary * temporary)548 	void OutputASM::freeTemporary(Temporary *temporary)
549 	{
550 		free(temporaries, temporary);
551 	}
552 
getOpcode(sw::Shader::Opcode op,TIntermTyped * in) const553 	sw::Shader::Opcode OutputASM::getOpcode(sw::Shader::Opcode op, TIntermTyped *in) const
554 	{
555 		TBasicType baseType = in->getType().getBasicType();
556 
557 		switch(op)
558 		{
559 		case sw::Shader::OPCODE_NEG:
560 			switch(baseType)
561 			{
562 			case EbtInt:
563 			case EbtUInt:
564 				return sw::Shader::OPCODE_INEG;
565 			case EbtFloat:
566 			default:
567 				return op;
568 			}
569 		case sw::Shader::OPCODE_ABS:
570 			switch(baseType)
571 			{
572 			case EbtInt:
573 				return sw::Shader::OPCODE_IABS;
574 			case EbtFloat:
575 			default:
576 				return op;
577 			}
578 		case sw::Shader::OPCODE_SGN:
579 			switch(baseType)
580 			{
581 			case EbtInt:
582 				return sw::Shader::OPCODE_ISGN;
583 			case EbtFloat:
584 			default:
585 				return op;
586 			}
587 		case sw::Shader::OPCODE_ADD:
588 			switch(baseType)
589 			{
590 			case EbtInt:
591 			case EbtUInt:
592 				return sw::Shader::OPCODE_IADD;
593 			case EbtFloat:
594 			default:
595 				return op;
596 			}
597 		case sw::Shader::OPCODE_SUB:
598 			switch(baseType)
599 			{
600 			case EbtInt:
601 			case EbtUInt:
602 				return sw::Shader::OPCODE_ISUB;
603 			case EbtFloat:
604 			default:
605 				return op;
606 			}
607 		case sw::Shader::OPCODE_MUL:
608 			switch(baseType)
609 			{
610 			case EbtInt:
611 			case EbtUInt:
612 				return sw::Shader::OPCODE_IMUL;
613 			case EbtFloat:
614 			default:
615 				return op;
616 			}
617 		case sw::Shader::OPCODE_DIV:
618 			switch(baseType)
619 			{
620 			case EbtInt:
621 				return sw::Shader::OPCODE_IDIV;
622 			case EbtUInt:
623 				return sw::Shader::OPCODE_UDIV;
624 			case EbtFloat:
625 			default:
626 				return op;
627 			}
628 		case sw::Shader::OPCODE_IMOD:
629 			return baseType == EbtUInt ? sw::Shader::OPCODE_UMOD : op;
630 		case sw::Shader::OPCODE_ISHR:
631 			return baseType == EbtUInt ? sw::Shader::OPCODE_USHR : op;
632 		case sw::Shader::OPCODE_MIN:
633 			switch(baseType)
634 			{
635 			case EbtInt:
636 				return sw::Shader::OPCODE_IMIN;
637 			case EbtUInt:
638 				return sw::Shader::OPCODE_UMIN;
639 			case EbtFloat:
640 			default:
641 				return op;
642 			}
643 		case sw::Shader::OPCODE_MAX:
644 			switch(baseType)
645 			{
646 			case EbtInt:
647 				return sw::Shader::OPCODE_IMAX;
648 			case EbtUInt:
649 				return sw::Shader::OPCODE_UMAX;
650 			case EbtFloat:
651 			default:
652 				return op;
653 			}
654 		default:
655 			return op;
656 		}
657 	}
658 
visitSymbol(TIntermSymbol * symbol)659 	void OutputASM::visitSymbol(TIntermSymbol *symbol)
660 	{
661 		// The type of vertex outputs and fragment inputs with the same name must match (validated at link time),
662 		// so declare them but don't assign a register index yet (one will be assigned when referenced in reachable code).
663 		switch(symbol->getQualifier())
664 		{
665 		case EvqVaryingIn:
666 		case EvqVaryingOut:
667 		case EvqInvariantVaryingIn:
668 		case EvqInvariantVaryingOut:
669 		case EvqVertexOut:
670 		case EvqFragmentIn:
671 			if(symbol->getBasicType() != EbtInvariant)   // Typeless declarations are not new varyings
672 			{
673 				declareVarying(symbol, -1);
674 			}
675 			break;
676 		case EvqFragmentOut:
677 			declareFragmentOutput(symbol);
678 			break;
679 		default:
680 			break;
681 		}
682 
683 		TInterfaceBlock* block = symbol->getType().getInterfaceBlock();
684 		// OpenGL ES 3.0.4 spec, section 2.12.6 Uniform Variables:
685 		// "All members of a named uniform block declared with a shared or std140 layout qualifier
686 		// are considered active, even if they are not referenced in any shader in the program.
687 		// The uniform block itself is also considered active, even if no member of the block is referenced."
688 		if(block && ((block->blockStorage() == EbsShared) || (block->blockStorage() == EbsStd140)))
689 		{
690 			uniformRegister(symbol);
691 		}
692 	}
693 
visitBinary(Visit visit,TIntermBinary * node)694 	bool OutputASM::visitBinary(Visit visit, TIntermBinary *node)
695 	{
696 		if(currentScope != emitScope)
697 		{
698 			return false;
699 		}
700 
701 		TIntermTyped *result = node;
702 		TIntermTyped *left = node->getLeft();
703 		TIntermTyped *right = node->getRight();
704 		const TType &leftType = left->getType();
705 		const TType &rightType = right->getType();
706 
707 		if(isSamplerRegister(result))
708 		{
709 			return false;   // Don't traverse, the register index is determined statically
710 		}
711 
712 		switch(node->getOp())
713 		{
714 		case EOpAssign:
715 			assert(visit == PreVisit);
716 			right->traverse(this);
717 			assignLvalue(left, right);
718 			copy(result, right);
719 			return false;
720 		case EOpInitialize:
721 			assert(visit == PreVisit);
722 			// Constant arrays go into the constant register file.
723 			if(leftType.getQualifier() == EvqConstExpr && leftType.isArray() && leftType.getArraySize() > 1)
724 			{
725 				for(int i = 0; i < left->totalRegisterCount(); i++)
726 				{
727 					emit(sw::Shader::OPCODE_DEF, left, i, right, i);
728 				}
729 			}
730 			else
731 			{
732 				right->traverse(this);
733 				copy(left, right);
734 			}
735 			return false;
736 		case EOpMatrixTimesScalarAssign:
737 			assert(visit == PreVisit);
738 			right->traverse(this);
739 			for(int i = 0; i < leftType.getNominalSize(); i++)
740 			{
741 				emit(sw::Shader::OPCODE_MUL, result, i, left, i, right);
742 			}
743 
744 			assignLvalue(left, result);
745 			return false;
746 		case EOpVectorTimesMatrixAssign:
747 			assert(visit == PreVisit);
748 			{
749 				right->traverse(this);
750 				int size = leftType.getNominalSize();
751 
752 				for(int i = 0; i < size; i++)
753 				{
754 					Instruction *dot = emit(sw::Shader::OPCODE_DP(size), result, 0, left, 0, right, i);
755 					dot->dst.mask = 1 << i;
756 				}
757 
758 				assignLvalue(left, result);
759 			}
760 			return false;
761 		case EOpMatrixTimesMatrixAssign:
762 			assert(visit == PreVisit);
763 			{
764 				right->traverse(this);
765 				int dim = leftType.getNominalSize();
766 
767 				for(int i = 0; i < dim; i++)
768 				{
769 					Instruction *mul = emit(sw::Shader::OPCODE_MUL, result, i, left, 0, right, i);
770 					mul->src[1].swizzle = 0x00;
771 
772 					for(int j = 1; j < dim; j++)
773 					{
774 						Instruction *mad = emit(sw::Shader::OPCODE_MAD, result, i, left, j, right, i, result, i);
775 						mad->src[1].swizzle = j * 0x55;
776 					}
777 				}
778 
779 				assignLvalue(left, result);
780 			}
781 			return false;
782 		case EOpIndexDirect:
783 		case EOpIndexIndirect:
784 		case EOpIndexDirectStruct:
785 		case EOpIndexDirectInterfaceBlock:
786 			assert(visit == PreVisit);
787 			evaluateRvalue(node);
788 			return false;
789 		case EOpVectorSwizzle:
790 			if(visit == PostVisit)
791 			{
792 				int swizzle = 0;
793 				TIntermAggregate *components = right->getAsAggregate();
794 
795 				if(components)
796 				{
797 					TIntermSequence &sequence = components->getSequence();
798 					int component = 0;
799 
800 					for(TIntermSequence::iterator sit = sequence.begin(); sit != sequence.end(); sit++)
801 					{
802 						TIntermConstantUnion *element = (*sit)->getAsConstantUnion();
803 
804 						if(element)
805 						{
806 							int i = element->getUnionArrayPointer()[0].getIConst();
807 							swizzle |= i << (component * 2);
808 							component++;
809 						}
810 						else UNREACHABLE(0);
811 					}
812 				}
813 				else UNREACHABLE(0);
814 
815 				Instruction *mov = emit(sw::Shader::OPCODE_MOV, result, left);
816 				mov->src[0].swizzle = swizzle;
817 			}
818 			break;
819 		case EOpAddAssign: if(visit == PostVisit) emitAssign(getOpcode(sw::Shader::OPCODE_ADD, result), result, left, left, right); break;
820 		case EOpAdd:       if(visit == PostVisit) emitBinary(getOpcode(sw::Shader::OPCODE_ADD, result), result, left, right);       break;
821 		case EOpSubAssign: if(visit == PostVisit) emitAssign(getOpcode(sw::Shader::OPCODE_SUB, result), result, left, left, right); break;
822 		case EOpSub:       if(visit == PostVisit) emitBinary(getOpcode(sw::Shader::OPCODE_SUB, result), result, left, right);       break;
823 		case EOpMulAssign: if(visit == PostVisit) emitAssign(getOpcode(sw::Shader::OPCODE_MUL, result), result, left, left, right); break;
824 		case EOpMul:       if(visit == PostVisit) emitBinary(getOpcode(sw::Shader::OPCODE_MUL, result), result, left, right);       break;
825 		case EOpDivAssign: if(visit == PostVisit) emitAssign(getOpcode(sw::Shader::OPCODE_DIV, result), result, left, left, right); break;
826 		case EOpDiv:       if(visit == PostVisit) emitBinary(getOpcode(sw::Shader::OPCODE_DIV, result), result, left, right);       break;
827 		case EOpIModAssign:          if(visit == PostVisit) emitAssign(getOpcode(sw::Shader::OPCODE_IMOD, result), result, left, left, right); break;
828 		case EOpIMod:                if(visit == PostVisit) emitBinary(getOpcode(sw::Shader::OPCODE_IMOD, result), result, left, right);       break;
829 		case EOpBitShiftLeftAssign:  if(visit == PostVisit) emitAssign(sw::Shader::OPCODE_SHL, result, left, left, right); break;
830 		case EOpBitShiftLeft:        if(visit == PostVisit) emitBinary(sw::Shader::OPCODE_SHL, result, left, right);       break;
831 		case EOpBitShiftRightAssign: if(visit == PostVisit) emitAssign(getOpcode(sw::Shader::OPCODE_ISHR, result), result, left, left, right); break;
832 		case EOpBitShiftRight:       if(visit == PostVisit) emitBinary(getOpcode(sw::Shader::OPCODE_ISHR, result), result, left, right);       break;
833 		case EOpBitwiseAndAssign:    if(visit == PostVisit) emitAssign(sw::Shader::OPCODE_AND, result, left, left, right); break;
834 		case EOpBitwiseAnd:          if(visit == PostVisit) emitBinary(sw::Shader::OPCODE_AND, result, left, right);       break;
835 		case EOpBitwiseXorAssign:    if(visit == PostVisit) emitAssign(sw::Shader::OPCODE_XOR, result, left, left, right); break;
836 		case EOpBitwiseXor:          if(visit == PostVisit) emitBinary(sw::Shader::OPCODE_XOR, result, left, right);       break;
837 		case EOpBitwiseOrAssign:     if(visit == PostVisit) emitAssign(sw::Shader::OPCODE_OR, result, left, left, right);  break;
838 		case EOpBitwiseOr:           if(visit == PostVisit) emitBinary(sw::Shader::OPCODE_OR, result, left, right);        break;
839 		case EOpEqual:
840 			if(visit == PostVisit)
841 			{
842 				emitBinary(sw::Shader::OPCODE_EQ, result, left, right);
843 
844 				for(int index = 1; index < left->totalRegisterCount(); index++)
845 				{
846 					Temporary equal(this);
847 					emit(sw::Shader::OPCODE_EQ, &equal, 0, left, index, right, index);
848 					emit(sw::Shader::OPCODE_AND, result, result, &equal);
849 				}
850 			}
851 			break;
852 		case EOpNotEqual:
853 			if(visit == PostVisit)
854 			{
855 				emitBinary(sw::Shader::OPCODE_NE, result, left, right);
856 
857 				for(int index = 1; index < left->totalRegisterCount(); index++)
858 				{
859 					Temporary notEqual(this);
860 					emit(sw::Shader::OPCODE_NE, &notEqual, 0, left, index, right, index);
861 					emit(sw::Shader::OPCODE_OR, result, result, &notEqual);
862 				}
863 			}
864 			break;
865 		case EOpLessThan:                if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_LT, result, left, right); break;
866 		case EOpGreaterThan:             if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_GT, result, left, right); break;
867 		case EOpLessThanEqual:           if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_LE, result, left, right); break;
868 		case EOpGreaterThanEqual:        if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_GE, result, left, right); break;
869 		case EOpVectorTimesScalarAssign: if(visit == PostVisit) emitAssign(getOpcode(sw::Shader::OPCODE_MUL, left), result, left, left, right); break;
870 		case EOpVectorTimesScalar:       if(visit == PostVisit) emit(getOpcode(sw::Shader::OPCODE_MUL, left), result, left, right); break;
871 		case EOpMatrixTimesScalar:
872 			if(visit == PostVisit)
873 			{
874 				if(left->isMatrix())
875 				{
876 					for(int i = 0; i < leftType.getNominalSize(); i++)
877 					{
878 						emit(sw::Shader::OPCODE_MUL, result, i, left, i, right, 0);
879 					}
880 				}
881 				else if(right->isMatrix())
882 				{
883 					for(int i = 0; i < rightType.getNominalSize(); i++)
884 					{
885 						emit(sw::Shader::OPCODE_MUL, result, i, left, 0, right, i);
886 					}
887 				}
888 				else UNREACHABLE(0);
889 			}
890 			break;
891 		case EOpVectorTimesMatrix:
892 			if(visit == PostVisit)
893 			{
894 				sw::Shader::Opcode dpOpcode = sw::Shader::OPCODE_DP(leftType.getNominalSize());
895 
896 				int size = rightType.getNominalSize();
897 				for(int i = 0; i < size; i++)
898 				{
899 					Instruction *dot = emit(dpOpcode, result, 0, left, 0, right, i);
900 					dot->dst.mask = 1 << i;
901 				}
902 			}
903 			break;
904 		case EOpMatrixTimesVector:
905 			if(visit == PostVisit)
906 			{
907 				Instruction *mul = emit(sw::Shader::OPCODE_MUL, result, left, right);
908 				mul->src[1].swizzle = 0x00;
909 
910 				int size = rightType.getNominalSize();
911 				for(int i = 1; i < size; i++)
912 				{
913 					Instruction *mad = emit(sw::Shader::OPCODE_MAD, result, 0, left, i, right, 0, result);
914 					mad->src[1].swizzle = i * 0x55;
915 				}
916 			}
917 			break;
918 		case EOpMatrixTimesMatrix:
919 			if(visit == PostVisit)
920 			{
921 				int dim = leftType.getNominalSize();
922 
923 				int size = rightType.getNominalSize();
924 				for(int i = 0; i < size; i++)
925 				{
926 					Instruction *mul = emit(sw::Shader::OPCODE_MUL, result, i, left, 0, right, i);
927 					mul->src[1].swizzle = 0x00;
928 
929 					for(int j = 1; j < dim; j++)
930 					{
931 						Instruction *mad = emit(sw::Shader::OPCODE_MAD, result, i, left, j, right, i, result, i);
932 						mad->src[1].swizzle = j * 0x55;
933 					}
934 				}
935 			}
936 			break;
937 		case EOpLogicalOr:
938 			if(trivial(right, 6))
939 			{
940 				if(visit == PostVisit)
941 				{
942 					emit(sw::Shader::OPCODE_OR, result, left, right);
943 				}
944 			}
945 			else   // Short-circuit evaluation
946 			{
947 				if(visit == InVisit)
948 				{
949 					emit(sw::Shader::OPCODE_MOV, result, left);
950 					Instruction *ifnot = emit(sw::Shader::OPCODE_IF, 0, result);
951 					ifnot->src[0].modifier = sw::Shader::MODIFIER_NOT;
952 				}
953 				else if(visit == PostVisit)
954 				{
955 					emit(sw::Shader::OPCODE_MOV, result, right);
956 					emit(sw::Shader::OPCODE_ENDIF);
957 				}
958 			}
959 			break;
960 		case EOpLogicalXor:        if(visit == PostVisit) emit(sw::Shader::OPCODE_XOR, result, left, right); break;
961 		case EOpLogicalAnd:
962 			if(trivial(right, 6))
963 			{
964 				if(visit == PostVisit)
965 				{
966 					emit(sw::Shader::OPCODE_AND, result, left, right);
967 				}
968 			}
969 			else   // Short-circuit evaluation
970 			{
971 				if(visit == InVisit)
972 				{
973 					emit(sw::Shader::OPCODE_MOV, result, left);
974 					emit(sw::Shader::OPCODE_IF, 0, result);
975 				}
976 				else if(visit == PostVisit)
977 				{
978 					emit(sw::Shader::OPCODE_MOV, result, right);
979 					emit(sw::Shader::OPCODE_ENDIF);
980 				}
981 			}
982 			break;
983 		default: UNREACHABLE(node->getOp());
984 		}
985 
986 		return true;
987 	}
988 
emitDeterminant(TIntermTyped * result,TIntermTyped * arg,int size,int col,int row,int outCol,int outRow)989 	void OutputASM::emitDeterminant(TIntermTyped *result, TIntermTyped *arg, int size, int col, int row, int outCol, int outRow)
990 	{
991 		switch(size)
992 		{
993 		case 1: // Used for cofactor computation only
994 			{
995 				// For a 2x2 matrix, the cofactor is simply a transposed move or negate
996 				bool isMov = (row == col);
997 				sw::Shader::Opcode op = isMov ? sw::Shader::OPCODE_MOV : sw::Shader::OPCODE_NEG;
998 				Instruction *mov = emit(op, result, outCol, arg, isMov ? 1 - row : row);
999 				mov->src[0].swizzle = 0x55 * (isMov ? 1 - col : col);
1000 				mov->dst.mask = 1 << outRow;
1001 			}
1002 			break;
1003 		case 2:
1004 			{
1005 				static const unsigned int swizzle[3] = { 0x99, 0x88, 0x44 }; // xy?? : yzyz, xzxz, xyxy
1006 
1007 				bool isCofactor = (col >= 0) && (row >= 0);
1008 				int col0 = (isCofactor && (col <= 0)) ? 1 : 0;
1009 				int col1 = (isCofactor && (col <= 1)) ? 2 : 1;
1010 				bool negate = isCofactor && ((col & 0x01) ^ (row & 0x01));
1011 
1012 				Instruction *det = emit(sw::Shader::OPCODE_DET2, result, outCol, arg, negate ? col1 : col0, arg, negate ? col0 : col1);
1013 				det->src[0].swizzle = det->src[1].swizzle = swizzle[isCofactor ? row : 2];
1014 				det->dst.mask = 1 << outRow;
1015 			}
1016 			break;
1017 		case 3:
1018 			{
1019 				static const unsigned int swizzle[4] = { 0xF9, 0xF8, 0xF4, 0xE4 }; // xyz? : yzww, xzww, xyww, xyzw
1020 
1021 				bool isCofactor = (col >= 0) && (row >= 0);
1022 				int col0 = (isCofactor && (col <= 0)) ? 1 : 0;
1023 				int col1 = (isCofactor && (col <= 1)) ? 2 : 1;
1024 				int col2 = (isCofactor && (col <= 2)) ? 3 : 2;
1025 				bool negate = isCofactor && ((col & 0x01) ^ (row & 0x01));
1026 
1027 				Instruction *det = emit(sw::Shader::OPCODE_DET3, result, outCol, arg, col0, arg, negate ? col2 : col1, arg, negate ? col1 : col2);
1028 				det->src[0].swizzle = det->src[1].swizzle = det->src[2].swizzle = swizzle[isCofactor ? row : 3];
1029 				det->dst.mask = 1 << outRow;
1030 			}
1031 			break;
1032 		case 4:
1033 			{
1034 				Instruction *det = emit(sw::Shader::OPCODE_DET4, result, outCol, arg, 0, arg, 1, arg, 2, arg, 3);
1035 				det->dst.mask = 1 << outRow;
1036 			}
1037 			break;
1038 		default:
1039 			UNREACHABLE(size);
1040 			break;
1041 		}
1042 	}
1043 
visitUnary(Visit visit,TIntermUnary * node)1044 	bool OutputASM::visitUnary(Visit visit, TIntermUnary *node)
1045 	{
1046 		if(currentScope != emitScope)
1047 		{
1048 			return false;
1049 		}
1050 
1051 		TIntermTyped *result = node;
1052 		TIntermTyped *arg = node->getOperand();
1053 		TBasicType basicType = arg->getType().getBasicType();
1054 
1055 		union
1056 		{
1057 			float f;
1058 			int i;
1059 		} one_value;
1060 
1061 		if(basicType == EbtInt || basicType == EbtUInt)
1062 		{
1063 			one_value.i = 1;
1064 		}
1065 		else
1066 		{
1067 			one_value.f = 1.0f;
1068 		}
1069 
1070 		Constant one(one_value.f, one_value.f, one_value.f, one_value.f);
1071 		Constant rad(1.74532925e-2f, 1.74532925e-2f, 1.74532925e-2f, 1.74532925e-2f);
1072 		Constant deg(5.72957795e+1f, 5.72957795e+1f, 5.72957795e+1f, 5.72957795e+1f);
1073 
1074 		switch(node->getOp())
1075 		{
1076 		case EOpNegative:
1077 			if(visit == PostVisit)
1078 			{
1079 				sw::Shader::Opcode negOpcode = getOpcode(sw::Shader::OPCODE_NEG, arg);
1080 				for(int index = 0; index < arg->totalRegisterCount(); index++)
1081 				{
1082 					emit(negOpcode, result, index, arg, index);
1083 				}
1084 			}
1085 			break;
1086 		case EOpVectorLogicalNot: if(visit == PostVisit) emit(sw::Shader::OPCODE_NOT, result, arg); break;
1087 		case EOpLogicalNot:       if(visit == PostVisit) emit(sw::Shader::OPCODE_NOT, result, arg); break;
1088 		case EOpBitwiseNot:       if(visit == PostVisit) emit(sw::Shader::OPCODE_NOT, result, arg); break;
1089 		case EOpPostIncrement:
1090 			if(visit == PostVisit)
1091 			{
1092 				copy(result, arg);
1093 
1094 				sw::Shader::Opcode addOpcode = getOpcode(sw::Shader::OPCODE_ADD, arg);
1095 				for(int index = 0; index < arg->totalRegisterCount(); index++)
1096 				{
1097 					emit(addOpcode, arg, index, arg, index, &one);
1098 				}
1099 
1100 				assignLvalue(arg, arg);
1101 			}
1102 			break;
1103 		case EOpPostDecrement:
1104 			if(visit == PostVisit)
1105 			{
1106 				copy(result, arg);
1107 
1108 				sw::Shader::Opcode subOpcode = getOpcode(sw::Shader::OPCODE_SUB, arg);
1109 				for(int index = 0; index < arg->totalRegisterCount(); index++)
1110 				{
1111 					emit(subOpcode, arg, index, arg, index, &one);
1112 				}
1113 
1114 				assignLvalue(arg, arg);
1115 			}
1116 			break;
1117 		case EOpPreIncrement:
1118 			if(visit == PostVisit)
1119 			{
1120 				sw::Shader::Opcode addOpcode = getOpcode(sw::Shader::OPCODE_ADD, arg);
1121 				for(int index = 0; index < arg->totalRegisterCount(); index++)
1122 				{
1123 					emit(addOpcode, result, index, arg, index, &one);
1124 				}
1125 
1126 				assignLvalue(arg, result);
1127 			}
1128 			break;
1129 		case EOpPreDecrement:
1130 			if(visit == PostVisit)
1131 			{
1132 				sw::Shader::Opcode subOpcode = getOpcode(sw::Shader::OPCODE_SUB, arg);
1133 				for(int index = 0; index < arg->totalRegisterCount(); index++)
1134 				{
1135 					emit(subOpcode, result, index, arg, index, &one);
1136 				}
1137 
1138 				assignLvalue(arg, result);
1139 			}
1140 			break;
1141 		case EOpRadians:          if(visit == PostVisit) emit(sw::Shader::OPCODE_MUL, result, arg, &rad); break;
1142 		case EOpDegrees:          if(visit == PostVisit) emit(sw::Shader::OPCODE_MUL, result, arg, &deg); break;
1143 		case EOpSin:              if(visit == PostVisit) emit(sw::Shader::OPCODE_SIN, result, arg); break;
1144 		case EOpCos:              if(visit == PostVisit) emit(sw::Shader::OPCODE_COS, result, arg); break;
1145 		case EOpTan:              if(visit == PostVisit) emit(sw::Shader::OPCODE_TAN, result, arg); break;
1146 		case EOpAsin:             if(visit == PostVisit) emit(sw::Shader::OPCODE_ASIN, result, arg); break;
1147 		case EOpAcos:             if(visit == PostVisit) emit(sw::Shader::OPCODE_ACOS, result, arg); break;
1148 		case EOpAtan:             if(visit == PostVisit) emit(sw::Shader::OPCODE_ATAN, result, arg); break;
1149 		case EOpSinh:             if(visit == PostVisit) emit(sw::Shader::OPCODE_SINH, result, arg); break;
1150 		case EOpCosh:             if(visit == PostVisit) emit(sw::Shader::OPCODE_COSH, result, arg); break;
1151 		case EOpTanh:             if(visit == PostVisit) emit(sw::Shader::OPCODE_TANH, result, arg); break;
1152 		case EOpAsinh:            if(visit == PostVisit) emit(sw::Shader::OPCODE_ASINH, result, arg); break;
1153 		case EOpAcosh:            if(visit == PostVisit) emit(sw::Shader::OPCODE_ACOSH, result, arg); break;
1154 		case EOpAtanh:            if(visit == PostVisit) emit(sw::Shader::OPCODE_ATANH, result, arg); break;
1155 		case EOpExp:              if(visit == PostVisit) emit(sw::Shader::OPCODE_EXP, result, arg); break;
1156 		case EOpLog:              if(visit == PostVisit) emit(sw::Shader::OPCODE_LOG, result, arg); break;
1157 		case EOpExp2:             if(visit == PostVisit) emit(sw::Shader::OPCODE_EXP2, result, arg); break;
1158 		case EOpLog2:             if(visit == PostVisit) emit(sw::Shader::OPCODE_LOG2, result, arg); break;
1159 		case EOpSqrt:             if(visit == PostVisit) emit(sw::Shader::OPCODE_SQRT, result, arg); break;
1160 		case EOpInverseSqrt:      if(visit == PostVisit) emit(sw::Shader::OPCODE_RSQ, result, arg); break;
1161 		case EOpAbs:              if(visit == PostVisit) emit(getOpcode(sw::Shader::OPCODE_ABS, result), result, arg); break;
1162 		case EOpSign:             if(visit == PostVisit) emit(getOpcode(sw::Shader::OPCODE_SGN, result), result, arg); break;
1163 		case EOpFloor:            if(visit == PostVisit) emit(sw::Shader::OPCODE_FLOOR, result, arg); break;
1164 		case EOpTrunc:            if(visit == PostVisit) emit(sw::Shader::OPCODE_TRUNC, result, arg); break;
1165 		case EOpRound:            if(visit == PostVisit) emit(sw::Shader::OPCODE_ROUND, result, arg); break;
1166 		case EOpRoundEven:        if(visit == PostVisit) emit(sw::Shader::OPCODE_ROUNDEVEN, result, arg); break;
1167 		case EOpCeil:             if(visit == PostVisit) emit(sw::Shader::OPCODE_CEIL, result, arg, result); break;
1168 		case EOpFract:            if(visit == PostVisit) emit(sw::Shader::OPCODE_FRC, result, arg); break;
1169 		case EOpIsNan:            if(visit == PostVisit) emit(sw::Shader::OPCODE_ISNAN, result, arg); break;
1170 		case EOpIsInf:            if(visit == PostVisit) emit(sw::Shader::OPCODE_ISINF, result, arg); break;
1171 		case EOpLength:           if(visit == PostVisit) emit(sw::Shader::OPCODE_LEN(dim(arg)), result, arg); break;
1172 		case EOpNormalize:        if(visit == PostVisit) emit(sw::Shader::OPCODE_NRM(dim(arg)), result, arg); break;
1173 		case EOpDFdx:             if(visit == PostVisit) emit(sw::Shader::OPCODE_DFDX, result, arg); break;
1174 		case EOpDFdy:             if(visit == PostVisit) emit(sw::Shader::OPCODE_DFDY, result, arg); break;
1175 		case EOpFwidth:           if(visit == PostVisit) emit(sw::Shader::OPCODE_FWIDTH, result, arg); break;
1176 		case EOpAny:              if(visit == PostVisit) emit(sw::Shader::OPCODE_ANY, result, arg); break;
1177 		case EOpAll:              if(visit == PostVisit) emit(sw::Shader::OPCODE_ALL, result, arg); break;
1178 		case EOpFloatBitsToInt:   if(visit == PostVisit) emit(sw::Shader::OPCODE_FLOATBITSTOINT, result, arg); break;
1179 		case EOpFloatBitsToUint:  if(visit == PostVisit) emit(sw::Shader::OPCODE_FLOATBITSTOUINT, result, arg); break;
1180 		case EOpIntBitsToFloat:   if(visit == PostVisit) emit(sw::Shader::OPCODE_INTBITSTOFLOAT, result, arg); break;
1181 		case EOpUintBitsToFloat:  if(visit == PostVisit) emit(sw::Shader::OPCODE_UINTBITSTOFLOAT, result, arg); break;
1182 		case EOpPackSnorm2x16:    if(visit == PostVisit) emit(sw::Shader::OPCODE_PACKSNORM2x16, result, arg); break;
1183 		case EOpPackUnorm2x16:    if(visit == PostVisit) emit(sw::Shader::OPCODE_PACKUNORM2x16, result, arg); break;
1184 		case EOpPackHalf2x16:     if(visit == PostVisit) emit(sw::Shader::OPCODE_PACKHALF2x16, result, arg); break;
1185 		case EOpUnpackSnorm2x16:  if(visit == PostVisit) emit(sw::Shader::OPCODE_UNPACKSNORM2x16, result, arg); break;
1186 		case EOpUnpackUnorm2x16:  if(visit == PostVisit) emit(sw::Shader::OPCODE_UNPACKUNORM2x16, result, arg); break;
1187 		case EOpUnpackHalf2x16:   if(visit == PostVisit) emit(sw::Shader::OPCODE_UNPACKHALF2x16, result, arg); break;
1188 		case EOpTranspose:
1189 			if(visit == PostVisit)
1190 			{
1191 				int numCols = arg->getNominalSize();
1192 				int numRows = arg->getSecondarySize();
1193 				for(int i = 0; i < numCols; ++i)
1194 				{
1195 					for(int j = 0; j < numRows; ++j)
1196 					{
1197 						Instruction *mov = emit(sw::Shader::OPCODE_MOV, result, j, arg, i);
1198 						mov->src[0].swizzle = 0x55 * j;
1199 						mov->dst.mask = 1 << i;
1200 					}
1201 				}
1202 			}
1203 			break;
1204 		case EOpDeterminant:
1205 			if(visit == PostVisit)
1206 			{
1207 				int size = arg->getNominalSize();
1208 				ASSERT(size == arg->getSecondarySize());
1209 
1210 				emitDeterminant(result, arg, size);
1211 			}
1212 			break;
1213 		case EOpInverse:
1214 			if(visit == PostVisit)
1215 			{
1216 				int size = arg->getNominalSize();
1217 				ASSERT(size == arg->getSecondarySize());
1218 
1219 				// Compute transposed matrix of cofactors
1220 				for(int i = 0; i < size; ++i)
1221 				{
1222 					for(int j = 0; j < size; ++j)
1223 					{
1224 						// For a 2x2 matrix, the cofactor is simply a transposed move or negate
1225 						// For a 3x3 or 4x4 matrix, the cofactor is a transposed determinant
1226 						emitDeterminant(result, arg, size - 1, j, i, i, j);
1227 					}
1228 				}
1229 
1230 				// Compute 1 / determinant
1231 				Temporary invDet(this);
1232 				emitDeterminant(&invDet, arg, size);
1233 				Constant one(1.0f, 1.0f, 1.0f, 1.0f);
1234 				Instruction *div = emit(sw::Shader::OPCODE_DIV, &invDet, &one, &invDet);
1235 				div->src[1].swizzle = 0x00; // xxxx
1236 
1237 				// Divide transposed matrix of cofactors by determinant
1238 				for(int i = 0; i < size; ++i)
1239 				{
1240 					emit(sw::Shader::OPCODE_MUL, result, i, result, i, &invDet);
1241 				}
1242 			}
1243 			break;
1244 		default: UNREACHABLE(node->getOp());
1245 		}
1246 
1247 		return true;
1248 	}
1249 
visitAggregate(Visit visit,TIntermAggregate * node)1250 	bool OutputASM::visitAggregate(Visit visit, TIntermAggregate *node)
1251 	{
1252 		if(currentScope != emitScope && node->getOp() != EOpFunction && node->getOp() != EOpSequence)
1253 		{
1254 			return false;
1255 		}
1256 
1257 		Constant zero(0.0f, 0.0f, 0.0f, 0.0f);
1258 
1259 		TIntermTyped *result = node;
1260 		const TType &resultType = node->getType();
1261 		TIntermSequence &arg = node->getSequence();
1262 		size_t argumentCount = arg.size();
1263 
1264 		switch(node->getOp())
1265 		{
1266 		case EOpSequence:             break;
1267 		case EOpDeclaration:          break;
1268 		case EOpInvariantDeclaration: break;
1269 		case EOpPrototype:            break;
1270 		case EOpComma:
1271 			if(visit == PostVisit)
1272 			{
1273 				copy(result, arg[1]);
1274 			}
1275 			break;
1276 		case EOpFunction:
1277 			if(visit == PreVisit)
1278 			{
1279 				const TString &name = node->getName();
1280 
1281 				if(emitScope == FUNCTION)
1282 				{
1283 					if(functionArray.size() > 1)   // No need for a label when there's only main()
1284 					{
1285 						Instruction *label = emit(sw::Shader::OPCODE_LABEL);
1286 						label->dst.type = sw::Shader::PARAMETER_LABEL;
1287 
1288 						const Function *function = findFunction(name);
1289 						ASSERT(function);   // Should have been added during global pass
1290 						label->dst.index = function->label;
1291 						currentFunction = function->label;
1292 					}
1293 				}
1294 				else if(emitScope == GLOBAL)
1295 				{
1296 					if(name != "main(")
1297 					{
1298 						TIntermSequence &arguments = node->getSequence()[0]->getAsAggregate()->getSequence();
1299 						functionArray.push_back(Function(functionArray.size(), name, &arguments, node));
1300 					}
1301 				}
1302 				else UNREACHABLE(emitScope);
1303 
1304 				currentScope = FUNCTION;
1305 			}
1306 			else if(visit == PostVisit)
1307 			{
1308 				if(emitScope == FUNCTION)
1309 				{
1310 					if(functionArray.size() > 1)   // No need to return when there's only main()
1311 					{
1312 						emit(sw::Shader::OPCODE_RET);
1313 					}
1314 				}
1315 
1316 				currentScope = GLOBAL;
1317 			}
1318 			break;
1319 		case EOpFunctionCall:
1320 			if(visit == PostVisit)
1321 			{
1322 				if(node->isUserDefined())
1323 				{
1324 					const TString &name = node->getName();
1325 					const Function *function = findFunction(name);
1326 
1327 					if(!function)
1328 					{
1329 						mContext.error(node->getLine(), "function definition not found", name.c_str());
1330 						return false;
1331 					}
1332 
1333 					TIntermSequence &arguments = *function->arg;
1334 
1335 					for(size_t i = 0; i < argumentCount; i++)
1336 					{
1337 						TIntermTyped *in = arguments[i]->getAsTyped();
1338 
1339 						if(in->getQualifier() == EvqIn ||
1340 						   in->getQualifier() == EvqInOut ||
1341 						   in->getQualifier() == EvqConstReadOnly)
1342 						{
1343 							copy(in, arg[i]);
1344 						}
1345 					}
1346 
1347 					Instruction *call = emit(sw::Shader::OPCODE_CALL);
1348 					call->dst.type = sw::Shader::PARAMETER_LABEL;
1349 					call->dst.index = function->label;
1350 
1351 					if(function->ret && function->ret->getType().getBasicType() != EbtVoid)
1352 					{
1353 						copy(result, function->ret);
1354 					}
1355 
1356 					for(size_t i = 0; i < argumentCount; i++)
1357 					{
1358 						TIntermTyped *argument = arguments[i]->getAsTyped();
1359 						TIntermTyped *out = arg[i]->getAsTyped();
1360 
1361 						if(argument->getQualifier() == EvqOut ||
1362 						   argument->getQualifier() == EvqInOut)
1363 						{
1364 							assignLvalue(out, argument);
1365 						}
1366 					}
1367 				}
1368 				else
1369 				{
1370 					const TextureFunction textureFunction(node->getName());
1371 					TIntermTyped *s = arg[0]->getAsTyped();
1372 					TIntermTyped *t = arg[1]->getAsTyped();
1373 
1374 					Temporary coord(this);
1375 
1376 					if(textureFunction.proj)
1377 					{
1378 						Instruction *rcp = emit(sw::Shader::OPCODE_RCPX, &coord, arg[1]);
1379 						rcp->src[0].swizzle = 0x55 * (t->getNominalSize() - 1);
1380 						rcp->dst.mask = 0x7;
1381 
1382 						Instruction *mul = emit(sw::Shader::OPCODE_MUL, &coord, arg[1], &coord);
1383 						mul->dst.mask = 0x7;
1384 
1385 						if(IsShadowSampler(s->getBasicType()))
1386 						{
1387 							ASSERT(s->getBasicType() == EbtSampler2DShadow);
1388 							Instruction *mov = emit(sw::Shader::OPCODE_MOV, &coord, &coord);
1389 							mov->src[0].swizzle = 0xA4;
1390 						}
1391 					}
1392 					else
1393 					{
1394 						Instruction *mov = emit(sw::Shader::OPCODE_MOV, &coord, arg[1]);
1395 
1396 						if(IsShadowSampler(s->getBasicType()) && t->getNominalSize() == 3)
1397 						{
1398 							ASSERT(s->getBasicType() == EbtSampler2DShadow);
1399 							mov->src[0].swizzle = 0xA4;
1400 						}
1401 					}
1402 
1403 					switch(textureFunction.method)
1404 					{
1405 					case TextureFunction::IMPLICIT:
1406 						if(!textureFunction.offset)
1407 						{
1408 							if(argumentCount == 2)
1409 							{
1410 								emit(sw::Shader::OPCODE_TEX, result, &coord, s);
1411 							}
1412 							else if(argumentCount == 3)   // Bias
1413 							{
1414 								emit(sw::Shader::OPCODE_TEXBIAS, result, &coord, s, arg[2]);
1415 							}
1416 							else UNREACHABLE(argumentCount);
1417 						}
1418 						else   // Offset
1419 						{
1420 							if(argumentCount == 3)
1421 							{
1422 								emit(sw::Shader::OPCODE_TEXOFFSET, result, &coord, s, arg[2]);
1423 							}
1424 							else if(argumentCount == 4)   // Bias
1425 							{
1426 								emit(sw::Shader::OPCODE_TEXOFFSETBIAS, result, &coord, s, arg[2], arg[3]);
1427 							}
1428 							else UNREACHABLE(argumentCount);
1429 						}
1430 						break;
1431 					case TextureFunction::LOD:
1432 						if(!textureFunction.offset && argumentCount == 3)
1433 						{
1434 							emit(sw::Shader::OPCODE_TEXLOD, result, &coord, s, arg[2]);
1435 						}
1436 						else if(argumentCount == 4)   // Offset
1437 						{
1438 							emit(sw::Shader::OPCODE_TEXLODOFFSET, result, &coord, s, arg[3], arg[2]);
1439 						}
1440 						else UNREACHABLE(argumentCount);
1441 						break;
1442 					case TextureFunction::FETCH:
1443 						if(!textureFunction.offset && argumentCount == 3)
1444 						{
1445 							emit(sw::Shader::OPCODE_TEXELFETCH, result, &coord, s, arg[2]);
1446 						}
1447 						else if(argumentCount == 4)   // Offset
1448 						{
1449 							emit(sw::Shader::OPCODE_TEXELFETCHOFFSET, result, &coord, s, arg[3], arg[2]);
1450 						}
1451 						else UNREACHABLE(argumentCount);
1452 						break;
1453 					case TextureFunction::GRAD:
1454 						if(!textureFunction.offset && argumentCount == 4)
1455 						{
1456 							emit(sw::Shader::OPCODE_TEXGRAD, result, &coord, s, arg[2], arg[3]);
1457 						}
1458 						else if(argumentCount == 5)   // Offset
1459 						{
1460 							emit(sw::Shader::OPCODE_TEXGRADOFFSET, result, &coord, s, arg[2], arg[3], arg[4]);
1461 						}
1462 						else UNREACHABLE(argumentCount);
1463 						break;
1464 					case TextureFunction::SIZE:
1465 						emit(sw::Shader::OPCODE_TEXSIZE, result, arg[1], s);
1466 						break;
1467 					default:
1468 						UNREACHABLE(textureFunction.method);
1469 					}
1470 				}
1471 			}
1472 			break;
1473 		case EOpParameters:
1474 			break;
1475 		case EOpConstructFloat:
1476 		case EOpConstructVec2:
1477 		case EOpConstructVec3:
1478 		case EOpConstructVec4:
1479 		case EOpConstructBool:
1480 		case EOpConstructBVec2:
1481 		case EOpConstructBVec3:
1482 		case EOpConstructBVec4:
1483 		case EOpConstructInt:
1484 		case EOpConstructIVec2:
1485 		case EOpConstructIVec3:
1486 		case EOpConstructIVec4:
1487 		case EOpConstructUInt:
1488 		case EOpConstructUVec2:
1489 		case EOpConstructUVec3:
1490 		case EOpConstructUVec4:
1491 			if(visit == PostVisit)
1492 			{
1493 				int component = 0;
1494 				int arrayMaxIndex = result->isArray() ? result->getArraySize() - 1 : 0;
1495 				int arrayComponents = result->getType().getElementSize();
1496 				for(size_t i = 0; i < argumentCount; i++)
1497 				{
1498 					TIntermTyped *argi = arg[i]->getAsTyped();
1499 					int size = argi->getNominalSize();
1500 					int arrayIndex = std::min(component / arrayComponents, arrayMaxIndex);
1501 					int swizzle = component - (arrayIndex * arrayComponents);
1502 
1503 					if(!argi->isMatrix())
1504 					{
1505 						Instruction *mov = emitCast(result, arrayIndex, argi, 0);
1506 						mov->dst.mask = (0xF << swizzle) & 0xF;
1507 						mov->src[0].swizzle = readSwizzle(argi, size) << (swizzle * 2);
1508 
1509 						component += size;
1510 					}
1511 					else if(!result->isMatrix()) // Construct a non matrix from a matrix
1512 					{
1513 						Instruction *mov = emitCast(result, arrayIndex, argi, 0);
1514 						mov->dst.mask = (0xF << swizzle) & 0xF;
1515 						mov->src[0].swizzle = readSwizzle(argi, size) << (swizzle * 2);
1516 
1517 						// At most one more instruction when constructing a vec3 from a mat2 or a vec4 from a mat2/mat3
1518 						if(result->getNominalSize() > size)
1519 						{
1520 							Instruction *mov = emitCast(result, arrayIndex, argi, 1);
1521 							mov->dst.mask = (0xF << (swizzle + size)) & 0xF;
1522 							// mat2: xxxy (0x40), mat3: xxxx (0x00)
1523 							mov->src[0].swizzle = ((size == 2) ? 0x40 : 0x00) << (swizzle * 2);
1524 						}
1525 
1526 						component += size;
1527 					}
1528 					else   // Matrix
1529 					{
1530 						int column = 0;
1531 
1532 						while(component < resultType.getNominalSize())
1533 						{
1534 							Instruction *mov = emitCast(result, arrayIndex, argi, column);
1535 							mov->dst.mask = (0xF << swizzle) & 0xF;
1536 							mov->src[0].swizzle = readSwizzle(argi, size) << (swizzle * 2);
1537 
1538 							column++;
1539 							component += size;
1540 						}
1541 					}
1542 				}
1543 			}
1544 			break;
1545 		case EOpConstructMat2:
1546 		case EOpConstructMat2x3:
1547 		case EOpConstructMat2x4:
1548 		case EOpConstructMat3x2:
1549 		case EOpConstructMat3:
1550 		case EOpConstructMat3x4:
1551 		case EOpConstructMat4x2:
1552 		case EOpConstructMat4x3:
1553 		case EOpConstructMat4:
1554 			if(visit == PostVisit)
1555 			{
1556 				TIntermTyped *arg0 = arg[0]->getAsTyped();
1557 				const int outCols = result->getNominalSize();
1558 				const int outRows = result->getSecondarySize();
1559 
1560 				if(arg0->isScalar() && arg.size() == 1)   // Construct scale matrix
1561 				{
1562 					for(int i = 0; i < outCols; i++)
1563 					{
1564 						emit(sw::Shader::OPCODE_MOV, result, i, &zero);
1565 						Instruction *mov = emitCast(result, i, arg0, 0);
1566 						mov->dst.mask = 1 << i;
1567 						ASSERT(mov->src[0].swizzle == 0x00);
1568 					}
1569 				}
1570 				else if(arg0->isMatrix())
1571 				{
1572 					int arraySize = result->isArray() ? result->getArraySize() : 1;
1573 
1574 					for(int n = 0; n < arraySize; n++)
1575 					{
1576 						TIntermTyped *argi = arg[n]->getAsTyped();
1577 						const int inCols = argi->getNominalSize();
1578 						const int inRows = argi->getSecondarySize();
1579 
1580 						for(int i = 0; i < outCols; i++)
1581 						{
1582 							if(i >= inCols || outRows > inRows)
1583 							{
1584 								// Initialize to identity matrix
1585 								Constant col((i == 0 ? 1.0f : 0.0f), (i == 1 ? 1.0f : 0.0f), (i == 2 ? 1.0f : 0.0f), (i == 3 ? 1.0f : 0.0f));
1586 								emitCast(result, i + n * outCols, &col, 0);
1587 							}
1588 
1589 							if(i < inCols)
1590 							{
1591 								Instruction *mov = emitCast(result, i + n * outCols, argi, i);
1592 								mov->dst.mask = 0xF >> (4 - inRows);
1593 							}
1594 						}
1595 					}
1596 				}
1597 				else
1598 				{
1599 					int column = 0;
1600 					int row = 0;
1601 
1602 					for(size_t i = 0; i < argumentCount; i++)
1603 					{
1604 						TIntermTyped *argi = arg[i]->getAsTyped();
1605 						int size = argi->getNominalSize();
1606 						int element = 0;
1607 
1608 						while(element < size)
1609 						{
1610 							Instruction *mov = emitCast(result, column, argi, 0);
1611 							mov->dst.mask = (0xF << row) & 0xF;
1612 							mov->src[0].swizzle = (readSwizzle(argi, size) << (row * 2)) + 0x55 * element;
1613 
1614 							int end = row + size - element;
1615 							column = end >= outRows ? column + 1 : column;
1616 							element = element + outRows - row;
1617 							row = end >= outRows ? 0 : end;
1618 						}
1619 					}
1620 				}
1621 			}
1622 			break;
1623 		case EOpConstructStruct:
1624 			if(visit == PostVisit)
1625 			{
1626 				int offset = 0;
1627 				for(size_t i = 0; i < argumentCount; i++)
1628 				{
1629 					TIntermTyped *argi = arg[i]->getAsTyped();
1630 					int size = argi->totalRegisterCount();
1631 
1632 					for(int index = 0; index < size; index++)
1633 					{
1634 						Instruction *mov = emit(sw::Shader::OPCODE_MOV, result, index + offset, argi, index);
1635 						mov->dst.mask = writeMask(result, offset + index);
1636 					}
1637 
1638 					offset += size;
1639 				}
1640 			}
1641 			break;
1642 		case EOpLessThan:         if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_LT, result, arg[0], arg[1]); break;
1643 		case EOpGreaterThan:      if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_GT, result, arg[0], arg[1]); break;
1644 		case EOpLessThanEqual:    if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_LE, result, arg[0], arg[1]); break;
1645 		case EOpGreaterThanEqual: if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_GE, result, arg[0], arg[1]); break;
1646 		case EOpVectorEqual:      if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_EQ, result, arg[0], arg[1]); break;
1647 		case EOpVectorNotEqual:   if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_NE, result, arg[0], arg[1]); break;
1648 		case EOpMod:              if(visit == PostVisit) emit(sw::Shader::OPCODE_MOD, result, arg[0], arg[1]); break;
1649 		case EOpModf:
1650 			if(visit == PostVisit)
1651 			{
1652 				TIntermTyped* arg1 = arg[1]->getAsTyped();
1653 				emit(sw::Shader::OPCODE_TRUNC, arg1, arg[0]);
1654 				assignLvalue(arg1, arg1);
1655 				emitBinary(sw::Shader::OPCODE_SUB, result, arg[0], arg1);
1656 			}
1657 			break;
1658 		case EOpPow:              if(visit == PostVisit) emit(sw::Shader::OPCODE_POW, result, arg[0], arg[1]); break;
1659 		case EOpAtan:             if(visit == PostVisit) emit(sw::Shader::OPCODE_ATAN2, result, arg[0], arg[1]); break;
1660 		case EOpMin:              if(visit == PostVisit) emit(getOpcode(sw::Shader::OPCODE_MIN, result), result, arg[0], arg[1]); break;
1661 		case EOpMax:              if(visit == PostVisit) emit(getOpcode(sw::Shader::OPCODE_MAX, result), result, arg[0], arg[1]); break;
1662 		case EOpClamp:
1663 			if(visit == PostVisit)
1664 			{
1665 				emit(getOpcode(sw::Shader::OPCODE_MAX, result), result, arg[0], arg[1]);
1666 				emit(getOpcode(sw::Shader::OPCODE_MIN, result), result, result, arg[2]);
1667 			}
1668 			break;
1669 		case EOpMix:
1670 			if(visit == PostVisit)
1671 			{
1672 				if(arg[2]->getAsTyped()->getBasicType() == EbtBool)
1673 				{
1674 					emit(sw::Shader::OPCODE_SELECT, result, arg[2], arg[1], arg[0]);
1675 				}
1676 				else
1677 				{
1678 					emit(sw::Shader::OPCODE_LRP, result, arg[2], arg[1], arg[0]);
1679 				}
1680 			}
1681 			break;
1682 		case EOpStep:        if(visit == PostVisit) emit(sw::Shader::OPCODE_STEP, result, arg[0], arg[1]); break;
1683 		case EOpSmoothStep:  if(visit == PostVisit) emit(sw::Shader::OPCODE_SMOOTH, result, arg[0], arg[1], arg[2]); break;
1684 		case EOpDistance:    if(visit == PostVisit) emit(sw::Shader::OPCODE_DIST(dim(arg[0])), result, arg[0], arg[1]); break;
1685 		case EOpDot:         if(visit == PostVisit) emit(sw::Shader::OPCODE_DP(dim(arg[0])), result, arg[0], arg[1]); break;
1686 		case EOpCross:       if(visit == PostVisit) emit(sw::Shader::OPCODE_CRS, result, arg[0], arg[1]); break;
1687 		case EOpFaceForward: if(visit == PostVisit) emit(sw::Shader::OPCODE_FORWARD(dim(arg[0])), result, arg[0], arg[1], arg[2]); break;
1688 		case EOpReflect:     if(visit == PostVisit) emit(sw::Shader::OPCODE_REFLECT(dim(arg[0])), result, arg[0], arg[1]); break;
1689 		case EOpRefract:     if(visit == PostVisit) emit(sw::Shader::OPCODE_REFRACT(dim(arg[0])), result, arg[0], arg[1], arg[2]); break;
1690 		case EOpMul:
1691 			if(visit == PostVisit)
1692 			{
1693 				TIntermTyped *arg0 = arg[0]->getAsTyped();
1694 				ASSERT((arg0->getNominalSize() == arg[1]->getAsTyped()->getNominalSize()) &&
1695 				       (arg0->getSecondarySize() == arg[1]->getAsTyped()->getSecondarySize()));
1696 
1697 				int size = arg0->getNominalSize();
1698 				for(int i = 0; i < size; i++)
1699 				{
1700 					emit(sw::Shader::OPCODE_MUL, result, i, arg[0], i, arg[1], i);
1701 				}
1702 			}
1703 			break;
1704 		case EOpOuterProduct:
1705 			if(visit == PostVisit)
1706 			{
1707 				for(int i = 0; i < dim(arg[1]); i++)
1708 				{
1709 					Instruction *mul = emit(sw::Shader::OPCODE_MUL, result, i, arg[0], 0, arg[1]);
1710 					mul->src[1].swizzle = 0x55 * i;
1711 				}
1712 			}
1713 			break;
1714 		default: UNREACHABLE(node->getOp());
1715 		}
1716 
1717 		return true;
1718 	}
1719 
visitSelection(Visit visit,TIntermSelection * node)1720 	bool OutputASM::visitSelection(Visit visit, TIntermSelection *node)
1721 	{
1722 		if(currentScope != emitScope)
1723 		{
1724 			return false;
1725 		}
1726 
1727 		TIntermTyped *condition = node->getCondition();
1728 		TIntermNode *trueBlock = node->getTrueBlock();
1729 		TIntermNode *falseBlock = node->getFalseBlock();
1730 		TIntermConstantUnion *constantCondition = condition->getAsConstantUnion();
1731 
1732 		condition->traverse(this);
1733 
1734 		if(node->usesTernaryOperator())
1735 		{
1736 			if(constantCondition)
1737 			{
1738 				bool trueCondition = constantCondition->getUnionArrayPointer()->getBConst();
1739 
1740 				if(trueCondition)
1741 				{
1742 					trueBlock->traverse(this);
1743 					copy(node, trueBlock);
1744 				}
1745 				else
1746 				{
1747 					falseBlock->traverse(this);
1748 					copy(node, falseBlock);
1749 				}
1750 			}
1751 			else if(trivial(node, 6))   // Fast to compute both potential results and no side effects
1752 			{
1753 				trueBlock->traverse(this);
1754 				falseBlock->traverse(this);
1755 				emit(sw::Shader::OPCODE_SELECT, node, condition, trueBlock, falseBlock);
1756 			}
1757 			else
1758 			{
1759 				emit(sw::Shader::OPCODE_IF, 0, condition);
1760 
1761 				if(trueBlock)
1762 				{
1763 					trueBlock->traverse(this);
1764 					copy(node, trueBlock);
1765 				}
1766 
1767 				if(falseBlock)
1768 				{
1769 					emit(sw::Shader::OPCODE_ELSE);
1770 					falseBlock->traverse(this);
1771 					copy(node, falseBlock);
1772 				}
1773 
1774 				emit(sw::Shader::OPCODE_ENDIF);
1775 			}
1776 		}
1777 		else  // if/else statement
1778 		{
1779 			if(constantCondition)
1780 			{
1781 				bool trueCondition = constantCondition->getUnionArrayPointer()->getBConst();
1782 
1783 				if(trueCondition)
1784 				{
1785 					if(trueBlock)
1786 					{
1787 						trueBlock->traverse(this);
1788 					}
1789 				}
1790 				else
1791 				{
1792 					if(falseBlock)
1793 					{
1794 						falseBlock->traverse(this);
1795 					}
1796 				}
1797 			}
1798 			else
1799 			{
1800 				emit(sw::Shader::OPCODE_IF, 0, condition);
1801 
1802 				if(trueBlock)
1803 				{
1804 					trueBlock->traverse(this);
1805 				}
1806 
1807 				if(falseBlock)
1808 				{
1809 					emit(sw::Shader::OPCODE_ELSE);
1810 					falseBlock->traverse(this);
1811 				}
1812 
1813 				emit(sw::Shader::OPCODE_ENDIF);
1814 			}
1815 		}
1816 
1817 		return false;
1818 	}
1819 
visitLoop(Visit visit,TIntermLoop * node)1820 	bool OutputASM::visitLoop(Visit visit, TIntermLoop *node)
1821 	{
1822 		if(currentScope != emitScope)
1823 		{
1824 			return false;
1825 		}
1826 
1827 		unsigned int iterations = loopCount(node);
1828 
1829 		if(iterations == 0)
1830 		{
1831 			return false;
1832 		}
1833 
1834 		bool unroll = (iterations <= 4);
1835 
1836 		if(unroll)
1837 		{
1838 			LoopUnrollable loopUnrollable;
1839 			unroll = loopUnrollable.traverse(node);
1840 		}
1841 
1842 		TIntermNode *init = node->getInit();
1843 		TIntermTyped *condition = node->getCondition();
1844 		TIntermTyped *expression = node->getExpression();
1845 		TIntermNode *body = node->getBody();
1846 		Constant True(true);
1847 
1848 		if(node->getType() == ELoopDoWhile)
1849 		{
1850 			Temporary iterate(this);
1851 			emit(sw::Shader::OPCODE_MOV, &iterate, &True);
1852 
1853 			emit(sw::Shader::OPCODE_WHILE, 0, &iterate);   // FIXME: Implement real do-while
1854 
1855 			if(body)
1856 			{
1857 				body->traverse(this);
1858 			}
1859 
1860 			emit(sw::Shader::OPCODE_TEST);
1861 
1862 			condition->traverse(this);
1863 			emit(sw::Shader::OPCODE_MOV, &iterate, condition);
1864 
1865 			emit(sw::Shader::OPCODE_ENDWHILE);
1866 		}
1867 		else
1868 		{
1869 			if(init)
1870 			{
1871 				init->traverse(this);
1872 			}
1873 
1874 			if(unroll)
1875 			{
1876 				for(unsigned int i = 0; i < iterations; i++)
1877 				{
1878 				//	condition->traverse(this);   // Condition could contain statements, but not in an unrollable loop
1879 
1880 					if(body)
1881 					{
1882 						body->traverse(this);
1883 					}
1884 
1885 					if(expression)
1886 					{
1887 						expression->traverse(this);
1888 					}
1889 				}
1890 			}
1891 			else
1892 			{
1893 				if(condition)
1894 				{
1895 					condition->traverse(this);
1896 				}
1897 				else
1898 				{
1899 					condition = &True;
1900 				}
1901 
1902 				emit(sw::Shader::OPCODE_WHILE, 0, condition);
1903 
1904 				if(body)
1905 				{
1906 					body->traverse(this);
1907 				}
1908 
1909 				emit(sw::Shader::OPCODE_TEST);
1910 
1911 				if(expression)
1912 				{
1913 					expression->traverse(this);
1914 				}
1915 
1916 				if(condition)
1917 				{
1918 					condition->traverse(this);
1919 				}
1920 
1921 				emit(sw::Shader::OPCODE_ENDWHILE);
1922 			}
1923 		}
1924 
1925 		return false;
1926 	}
1927 
visitBranch(Visit visit,TIntermBranch * node)1928 	bool OutputASM::visitBranch(Visit visit, TIntermBranch *node)
1929 	{
1930 		if(currentScope != emitScope)
1931 		{
1932 			return false;
1933 		}
1934 
1935 		switch(node->getFlowOp())
1936 		{
1937 		case EOpKill:      if(visit == PostVisit) emit(sw::Shader::OPCODE_DISCARD);  break;
1938 		case EOpBreak:     if(visit == PostVisit) emit(sw::Shader::OPCODE_BREAK);    break;
1939 		case EOpContinue:  if(visit == PostVisit) emit(sw::Shader::OPCODE_CONTINUE); break;
1940 		case EOpReturn:
1941 			if(visit == PostVisit)
1942 			{
1943 				TIntermTyped *value = node->getExpression();
1944 
1945 				if(value)
1946 				{
1947 					copy(functionArray[currentFunction].ret, value);
1948 				}
1949 
1950 				emit(sw::Shader::OPCODE_LEAVE);
1951 			}
1952 			break;
1953 		default: UNREACHABLE(node->getFlowOp());
1954 		}
1955 
1956 		return true;
1957 	}
1958 
visitSwitch(Visit visit,TIntermSwitch * node)1959 	bool OutputASM::visitSwitch(Visit visit, TIntermSwitch *node)
1960 	{
1961 		if(currentScope != emitScope)
1962 		{
1963 			return false;
1964 		}
1965 
1966 		TIntermTyped* switchValue = node->getInit();
1967 		TIntermAggregate* opList = node->getStatementList();
1968 
1969 		if(!switchValue || !opList)
1970 		{
1971 			return false;
1972 		}
1973 
1974 		switchValue->traverse(this);
1975 
1976 		emit(sw::Shader::OPCODE_SWITCH);
1977 
1978 		TIntermSequence& sequence = opList->getSequence();
1979 		TIntermSequence::iterator it = sequence.begin();
1980 		TIntermSequence::iterator defaultIt = sequence.end();
1981 		int nbCases = 0;
1982 		for(; it != sequence.end(); ++it)
1983 		{
1984 			TIntermCase* currentCase = (*it)->getAsCaseNode();
1985 			if(currentCase)
1986 			{
1987 				TIntermSequence::iterator caseIt = it;
1988 
1989 				TIntermTyped* condition = currentCase->getCondition();
1990 				if(condition) // non default case
1991 				{
1992 					if(nbCases != 0)
1993 					{
1994 						emit(sw::Shader::OPCODE_ELSE);
1995 					}
1996 
1997 					condition->traverse(this);
1998 					Temporary result(this);
1999 					emitBinary(sw::Shader::OPCODE_EQ, &result, switchValue, condition);
2000 					emit(sw::Shader::OPCODE_IF, 0, &result);
2001 					nbCases++;
2002 
2003 					// Emit the code for this case and all subsequent cases until we hit a break statement.
2004 					// TODO: This can repeat a lot of code for switches with many fall-through cases.
2005 					for(++caseIt; caseIt != sequence.end(); ++caseIt)
2006 					{
2007 						(*caseIt)->traverse(this);
2008 
2009 						// Stop if we encounter an unconditional branch (break, continue, return, or kill).
2010 						// TODO: This doesn't work if the statement is at a deeper scope level (e.g. {break;}).
2011 						// Note that this eliminates useless operations but shouldn't affect correctness.
2012 						if((*caseIt)->getAsBranchNode())
2013 						{
2014 							break;
2015 						}
2016 					}
2017 				}
2018 				else
2019 				{
2020 					defaultIt = it; // The default case might not be the last case, keep it for last
2021 				}
2022 			}
2023 		}
2024 
2025 		// If there's a default case, traverse it here
2026 		if(defaultIt != sequence.end())
2027 		{
2028 			emit(sw::Shader::OPCODE_ELSE);
2029 			for(++defaultIt; defaultIt != sequence.end(); ++defaultIt)
2030 			{
2031 				(*defaultIt)->traverse(this);
2032 				if((*defaultIt)->getAsBranchNode()) // Kill, Break, Continue or Return
2033 				{
2034 					break;
2035 				}
2036 			}
2037 		}
2038 
2039 		for(int i = 0; i < nbCases; ++i)
2040 		{
2041 			emit(sw::Shader::OPCODE_ENDIF);
2042 		}
2043 
2044 		emit(sw::Shader::OPCODE_ENDSWITCH);
2045 
2046 		return false;
2047 	}
2048 
emit(sw::Shader::Opcode op,TIntermTyped * dst,TIntermNode * src0,TIntermNode * src1,TIntermNode * src2,TIntermNode * src3,TIntermNode * src4)2049 	Instruction *OutputASM::emit(sw::Shader::Opcode op, TIntermTyped *dst, TIntermNode *src0, TIntermNode *src1, TIntermNode *src2, TIntermNode *src3, TIntermNode *src4)
2050 	{
2051 		return emit(op, dst, 0, src0, 0, src1, 0, src2, 0, src3, 0, src4, 0);
2052 	}
2053 
emit(sw::Shader::Opcode op,TIntermTyped * dst,int dstIndex,TIntermNode * src0,int index0,TIntermNode * src1,int index1,TIntermNode * src2,int index2,TIntermNode * src3,int index3,TIntermNode * src4,int index4)2054 	Instruction *OutputASM::emit(sw::Shader::Opcode op, TIntermTyped *dst, int dstIndex, TIntermNode *src0, int index0, TIntermNode *src1, int index1,
2055 	                             TIntermNode *src2, int index2, TIntermNode *src3, int index3, TIntermNode *src4, int index4)
2056 	{
2057 		Instruction *instruction = new Instruction(op);
2058 
2059 		if(dst)
2060 		{
2061 			destination(instruction->dst, dst, dstIndex);
2062 		}
2063 
2064 		if(src0)
2065 		{
2066 			TIntermTyped* src = src0->getAsTyped();
2067 			instruction->dst.partialPrecision = src && (src->getPrecision() <= EbpLow);
2068 		}
2069 
2070 		source(instruction->src[0], src0, index0);
2071 		source(instruction->src[1], src1, index1);
2072 		source(instruction->src[2], src2, index2);
2073 		source(instruction->src[3], src3, index3);
2074 		source(instruction->src[4], src4, index4);
2075 
2076 		shader->append(instruction);
2077 
2078 		return instruction;
2079 	}
2080 
emitCast(TIntermTyped * dst,TIntermTyped * src)2081 	Instruction *OutputASM::emitCast(TIntermTyped *dst, TIntermTyped *src)
2082 	{
2083 		return emitCast(dst, 0, src, 0);
2084 	}
2085 
emitCast(TIntermTyped * dst,int dstIndex,TIntermTyped * src,int srcIndex)2086 	Instruction *OutputASM::emitCast(TIntermTyped *dst, int dstIndex, TIntermTyped *src, int srcIndex)
2087 	{
2088 		switch(src->getBasicType())
2089 		{
2090 		case EbtBool:
2091 			switch(dst->getBasicType())
2092 			{
2093 			case EbtInt:   return emit(sw::Shader::OPCODE_B2I, dst, dstIndex, src, srcIndex);
2094 			case EbtUInt:  return emit(sw::Shader::OPCODE_B2I, dst, dstIndex, src, srcIndex);
2095 			case EbtFloat: return emit(sw::Shader::OPCODE_B2F, dst, dstIndex, src, srcIndex);
2096 			default:       break;
2097 			}
2098 			break;
2099 		case EbtInt:
2100 			switch(dst->getBasicType())
2101 			{
2102 			case EbtBool:  return emit(sw::Shader::OPCODE_I2B, dst, dstIndex, src, srcIndex);
2103 			case EbtFloat: return emit(sw::Shader::OPCODE_I2F, dst, dstIndex, src, srcIndex);
2104 			default:       break;
2105 			}
2106 			break;
2107 		case EbtUInt:
2108 			switch(dst->getBasicType())
2109 			{
2110 			case EbtBool:  return emit(sw::Shader::OPCODE_I2B, dst, dstIndex, src, srcIndex);
2111 			case EbtFloat: return emit(sw::Shader::OPCODE_U2F, dst, dstIndex, src, srcIndex);
2112 			default:       break;
2113 			}
2114 			break;
2115 		case EbtFloat:
2116 			switch(dst->getBasicType())
2117 			{
2118 			case EbtBool: return emit(sw::Shader::OPCODE_F2B, dst, dstIndex, src, srcIndex);
2119 			case EbtInt:  return emit(sw::Shader::OPCODE_F2I, dst, dstIndex, src, srcIndex);
2120 			case EbtUInt: return emit(sw::Shader::OPCODE_F2U, dst, dstIndex, src, srcIndex);
2121 			default:      break;
2122 			}
2123 			break;
2124 		default:
2125 			break;
2126 		}
2127 
2128 		ASSERT((src->getBasicType() == dst->getBasicType()) ||
2129 		      ((src->getBasicType() == EbtInt) && (dst->getBasicType() == EbtUInt)) ||
2130 		      ((src->getBasicType() == EbtUInt) && (dst->getBasicType() == EbtInt)));
2131 
2132 		return emit(sw::Shader::OPCODE_MOV, dst, dstIndex, src, srcIndex);
2133 	}
2134 
emitBinary(sw::Shader::Opcode op,TIntermTyped * dst,TIntermNode * src0,TIntermNode * src1,TIntermNode * src2)2135 	void OutputASM::emitBinary(sw::Shader::Opcode op, TIntermTyped *dst, TIntermNode *src0, TIntermNode *src1, TIntermNode *src2)
2136 	{
2137 		for(int index = 0; index < dst->elementRegisterCount(); index++)
2138 		{
2139 			emit(op, dst, index, src0, index, src1, index, src2, index);
2140 		}
2141 	}
2142 
emitAssign(sw::Shader::Opcode op,TIntermTyped * result,TIntermTyped * lhs,TIntermTyped * src0,TIntermTyped * src1)2143 	void OutputASM::emitAssign(sw::Shader::Opcode op, TIntermTyped *result, TIntermTyped *lhs, TIntermTyped *src0, TIntermTyped *src1)
2144 	{
2145 		emitBinary(op, result, src0, src1);
2146 		assignLvalue(lhs, result);
2147 	}
2148 
emitCmp(sw::Shader::Control cmpOp,TIntermTyped * dst,TIntermNode * left,TIntermNode * right,int index)2149 	void OutputASM::emitCmp(sw::Shader::Control cmpOp, TIntermTyped *dst, TIntermNode *left, TIntermNode *right, int index)
2150 	{
2151 		sw::Shader::Opcode opcode;
2152 		switch(left->getAsTyped()->getBasicType())
2153 		{
2154 		case EbtBool:
2155 		case EbtInt:
2156 			opcode = sw::Shader::OPCODE_ICMP;
2157 			break;
2158 		case EbtUInt:
2159 			opcode = sw::Shader::OPCODE_UCMP;
2160 			break;
2161 		default:
2162 			opcode = sw::Shader::OPCODE_CMP;
2163 			break;
2164 		}
2165 
2166 		Instruction *cmp = emit(opcode, dst, 0, left, index, right, index);
2167 		cmp->control = cmpOp;
2168 	}
2169 
componentCount(const TType & type,int registers)2170 	int componentCount(const TType &type, int registers)
2171 	{
2172 		if(registers == 0)
2173 		{
2174 			return 0;
2175 		}
2176 
2177 		if(type.isArray() && registers >= type.elementRegisterCount())
2178 		{
2179 			int index = registers / type.elementRegisterCount();
2180 			registers -= index * type.elementRegisterCount();
2181 			return index * type.getElementSize() + componentCount(type, registers);
2182 		}
2183 
2184 		if(type.isStruct() || type.isInterfaceBlock())
2185 		{
2186 			const TFieldList& fields = type.getStruct() ? type.getStruct()->fields() : type.getInterfaceBlock()->fields();
2187 			int elements = 0;
2188 
2189 			for(const auto &field : fields)
2190 			{
2191 				const TType &fieldType = *(field->type());
2192 
2193 				if(fieldType.totalRegisterCount() <= registers)
2194 				{
2195 					registers -= fieldType.totalRegisterCount();
2196 					elements += fieldType.getObjectSize();
2197 				}
2198 				else   // Register within this field
2199 				{
2200 					return elements + componentCount(fieldType, registers);
2201 				}
2202 			}
2203 		}
2204 		else if(type.isMatrix())
2205 		{
2206 			return registers * type.registerSize();
2207 		}
2208 
2209 		UNREACHABLE(0);
2210 		return 0;
2211 	}
2212 
registerSize(const TType & type,int registers)2213 	int registerSize(const TType &type, int registers)
2214 	{
2215 		if(registers == 0)
2216 		{
2217 			if(type.isStruct())
2218 			{
2219 				return registerSize(*((*(type.getStruct()->fields().begin()))->type()), 0);
2220 			}
2221 			else if(type.isInterfaceBlock())
2222 			{
2223 				return registerSize(*((*(type.getInterfaceBlock()->fields().begin()))->type()), 0);
2224 			}
2225 
2226 			return type.registerSize();
2227 		}
2228 
2229 		if(type.isArray() && registers >= type.elementRegisterCount())
2230 		{
2231 			int index = registers / type.elementRegisterCount();
2232 			registers -= index * type.elementRegisterCount();
2233 			return registerSize(type, registers);
2234 		}
2235 
2236 		if(type.isStruct() || type.isInterfaceBlock())
2237 		{
2238 			const TFieldList& fields = type.getStruct() ? type.getStruct()->fields() : type.getInterfaceBlock()->fields();
2239 			int elements = 0;
2240 
2241 			for(const auto &field : fields)
2242 			{
2243 				const TType &fieldType = *(field->type());
2244 
2245 				if(fieldType.totalRegisterCount() <= registers)
2246 				{
2247 					registers -= fieldType.totalRegisterCount();
2248 					elements += fieldType.getObjectSize();
2249 				}
2250 				else   // Register within this field
2251 				{
2252 					return registerSize(fieldType, registers);
2253 				}
2254 			}
2255 		}
2256 		else if(type.isMatrix())
2257 		{
2258 			return registerSize(type, 0);
2259 		}
2260 
2261 		UNREACHABLE(0);
2262 		return 0;
2263 	}
2264 
getBlockId(TIntermTyped * arg)2265 	int OutputASM::getBlockId(TIntermTyped *arg)
2266 	{
2267 		if(arg)
2268 		{
2269 			const TType &type = arg->getType();
2270 			TInterfaceBlock* block = type.getInterfaceBlock();
2271 			if(block && (type.getQualifier() == EvqUniform))
2272 			{
2273 				// Make sure the uniform block is declared
2274 				uniformRegister(arg);
2275 
2276 				const char* blockName = block->name().c_str();
2277 
2278 				// Fetch uniform block index from array of blocks
2279 				for(ActiveUniformBlocks::const_iterator it = shaderObject->activeUniformBlocks.begin(); it != shaderObject->activeUniformBlocks.end(); ++it)
2280 				{
2281 					if(blockName == it->name)
2282 					{
2283 						return it->blockId;
2284 					}
2285 				}
2286 
2287 				ASSERT(false);
2288 			}
2289 		}
2290 
2291 		return -1;
2292 	}
2293 
getArgumentInfo(TIntermTyped * arg,int index)2294 	OutputASM::ArgumentInfo OutputASM::getArgumentInfo(TIntermTyped *arg, int index)
2295 	{
2296 		const TType &type = arg->getType();
2297 		int blockId = getBlockId(arg);
2298 		ArgumentInfo argumentInfo(BlockMemberInfo::getDefaultBlockInfo(), type, -1, -1);
2299 		if(blockId != -1)
2300 		{
2301 			argumentInfo.bufferIndex = 0;
2302 			for(int i = 0; i < blockId; ++i)
2303 			{
2304 				int blockArraySize = shaderObject->activeUniformBlocks[i].arraySize;
2305 				argumentInfo.bufferIndex += blockArraySize > 0 ? blockArraySize : 1;
2306 			}
2307 
2308 			const BlockDefinitionIndexMap& blockDefinition = blockDefinitions[blockId];
2309 
2310 			BlockDefinitionIndexMap::const_iterator itEnd = blockDefinition.end();
2311 			BlockDefinitionIndexMap::const_iterator it = itEnd;
2312 
2313 			argumentInfo.clampedIndex = index;
2314 			if(type.isInterfaceBlock())
2315 			{
2316 				// Offset index to the beginning of the selected instance
2317 				int blockRegisters = type.elementRegisterCount();
2318 				int bufferOffset = argumentInfo.clampedIndex / blockRegisters;
2319 				argumentInfo.bufferIndex += bufferOffset;
2320 				argumentInfo.clampedIndex -= bufferOffset * blockRegisters;
2321 			}
2322 
2323 			int regIndex = registerIndex(arg);
2324 			for(int i = regIndex + argumentInfo.clampedIndex; i >= regIndex; --i)
2325 			{
2326 				it = blockDefinition.find(i);
2327 				if(it != itEnd)
2328 				{
2329 					argumentInfo.clampedIndex -= (i - regIndex);
2330 					break;
2331 				}
2332 			}
2333 			ASSERT(it != itEnd);
2334 
2335 			argumentInfo.typedMemberInfo = it->second;
2336 
2337 			int registerCount = argumentInfo.typedMemberInfo.type.totalRegisterCount();
2338 			argumentInfo.clampedIndex = (argumentInfo.clampedIndex >= registerCount) ? registerCount - 1 : argumentInfo.clampedIndex;
2339 		}
2340 		else
2341 		{
2342 			argumentInfo.clampedIndex = (index >= arg->totalRegisterCount()) ? arg->totalRegisterCount() - 1 : index;
2343 		}
2344 
2345 		return argumentInfo;
2346 	}
2347 
source(sw::Shader::SourceParameter & parameter,TIntermNode * argument,int index)2348 	void OutputASM::source(sw::Shader::SourceParameter &parameter, TIntermNode *argument, int index)
2349 	{
2350 		if(argument)
2351 		{
2352 			TIntermTyped *arg = argument->getAsTyped();
2353 			Temporary unpackedUniform(this);
2354 
2355 			const TType& srcType = arg->getType();
2356 			TInterfaceBlock* srcBlock = srcType.getInterfaceBlock();
2357 			if(srcBlock && (srcType.getQualifier() == EvqUniform))
2358 			{
2359 				const ArgumentInfo argumentInfo = getArgumentInfo(arg, index);
2360 				const TType &memberType = argumentInfo.typedMemberInfo.type;
2361 
2362 				if(memberType.getBasicType() == EbtBool)
2363 				{
2364 					ASSERT(argumentInfo.clampedIndex < (memberType.isArray() ? memberType.getArraySize() : 1)); // index < arraySize
2365 
2366 					// Convert the packed bool, which is currently an int, to a true bool
2367 					Instruction *instruction = new Instruction(sw::Shader::OPCODE_I2B);
2368 					instruction->dst.type = sw::Shader::PARAMETER_TEMP;
2369 					instruction->dst.index = registerIndex(&unpackedUniform);
2370 					instruction->src[0].type = sw::Shader::PARAMETER_CONST;
2371 					instruction->src[0].bufferIndex = argumentInfo.bufferIndex;
2372 					instruction->src[0].index = argumentInfo.typedMemberInfo.offset + argumentInfo.clampedIndex * argumentInfo.typedMemberInfo.arrayStride;
2373 
2374 					shader->append(instruction);
2375 
2376 					arg = &unpackedUniform;
2377 					index = 0;
2378 				}
2379 				else if((memberType.getLayoutQualifier().matrixPacking == EmpRowMajor) && memberType.isMatrix())
2380 				{
2381 					int numCols = memberType.getNominalSize();
2382 					int numRows = memberType.getSecondarySize();
2383 
2384 					ASSERT(argumentInfo.clampedIndex < (numCols * (memberType.isArray() ? memberType.getArraySize() : 1))); // index < cols * arraySize
2385 
2386 					unsigned int dstIndex = registerIndex(&unpackedUniform);
2387 					unsigned int srcSwizzle = (argumentInfo.clampedIndex % numCols) * 0x55;
2388 					int arrayIndex = argumentInfo.clampedIndex / numCols;
2389 					int matrixStartOffset = argumentInfo.typedMemberInfo.offset + arrayIndex * argumentInfo.typedMemberInfo.arrayStride;
2390 
2391 					for(int j = 0; j < numRows; ++j)
2392 					{
2393 						// Transpose the row major matrix
2394 						Instruction *instruction = new Instruction(sw::Shader::OPCODE_MOV);
2395 						instruction->dst.type = sw::Shader::PARAMETER_TEMP;
2396 						instruction->dst.index = dstIndex;
2397 						instruction->dst.mask = 1 << j;
2398 						instruction->src[0].type = sw::Shader::PARAMETER_CONST;
2399 						instruction->src[0].bufferIndex = argumentInfo.bufferIndex;
2400 						instruction->src[0].index = matrixStartOffset + j * argumentInfo.typedMemberInfo.matrixStride;
2401 						instruction->src[0].swizzle = srcSwizzle;
2402 
2403 						shader->append(instruction);
2404 					}
2405 
2406 					arg = &unpackedUniform;
2407 					index = 0;
2408 				}
2409 			}
2410 
2411 			const ArgumentInfo argumentInfo = getArgumentInfo(arg, index);
2412 			const TType &type = argumentInfo.typedMemberInfo.type;
2413 
2414 			int size = registerSize(type, argumentInfo.clampedIndex);
2415 
2416 			parameter.type = registerType(arg);
2417 			parameter.bufferIndex = argumentInfo.bufferIndex;
2418 
2419 			if(arg->getAsConstantUnion() && arg->getAsConstantUnion()->getUnionArrayPointer())
2420 			{
2421 				int component = componentCount(type, argumentInfo.clampedIndex);
2422 				ConstantUnion *constants = arg->getAsConstantUnion()->getUnionArrayPointer();
2423 
2424 				for(int i = 0; i < 4; i++)
2425 				{
2426 					if(size == 1)   // Replicate
2427 					{
2428 						parameter.value[i] = constants[component + 0].getAsFloat();
2429 					}
2430 					else if(i < size)
2431 					{
2432 						parameter.value[i] = constants[component + i].getAsFloat();
2433 					}
2434 					else
2435 					{
2436 						parameter.value[i] = 0.0f;
2437 					}
2438 				}
2439 			}
2440 			else
2441 			{
2442 				parameter.index = registerIndex(arg) + argumentInfo.clampedIndex;
2443 
2444 				if(parameter.bufferIndex != -1)
2445 				{
2446 					int stride = (argumentInfo.typedMemberInfo.matrixStride > 0) ? argumentInfo.typedMemberInfo.matrixStride : argumentInfo.typedMemberInfo.arrayStride;
2447 					parameter.index = argumentInfo.typedMemberInfo.offset + argumentInfo.clampedIndex * stride;
2448 				}
2449 			}
2450 
2451 			if(!IsSampler(arg->getBasicType()))
2452 			{
2453 				parameter.swizzle = readSwizzle(arg, size);
2454 			}
2455 		}
2456 	}
2457 
destination(sw::Shader::DestinationParameter & parameter,TIntermTyped * arg,int index)2458 	void OutputASM::destination(sw::Shader::DestinationParameter &parameter, TIntermTyped *arg, int index)
2459 	{
2460 		parameter.type = registerType(arg);
2461 		parameter.index = registerIndex(arg) + index;
2462 		parameter.mask = writeMask(arg, index);
2463 	}
2464 
copy(TIntermTyped * dst,TIntermNode * src,int offset)2465 	void OutputASM::copy(TIntermTyped *dst, TIntermNode *src, int offset)
2466 	{
2467 		for(int index = 0; index < dst->totalRegisterCount(); index++)
2468 		{
2469 			Instruction *mov = emit(sw::Shader::OPCODE_MOV, dst, index, src, offset + index);
2470 		}
2471 	}
2472 
swizzleElement(int swizzle,int index)2473 	int swizzleElement(int swizzle, int index)
2474 	{
2475 		return (swizzle >> (index * 2)) & 0x03;
2476 	}
2477 
swizzleSwizzle(int leftSwizzle,int rightSwizzle)2478 	int swizzleSwizzle(int leftSwizzle, int rightSwizzle)
2479 	{
2480 		return (swizzleElement(leftSwizzle, swizzleElement(rightSwizzle, 0)) << 0) |
2481 		       (swizzleElement(leftSwizzle, swizzleElement(rightSwizzle, 1)) << 2) |
2482 		       (swizzleElement(leftSwizzle, swizzleElement(rightSwizzle, 2)) << 4) |
2483 		       (swizzleElement(leftSwizzle, swizzleElement(rightSwizzle, 3)) << 6);
2484 	}
2485 
assignLvalue(TIntermTyped * dst,TIntermTyped * src)2486 	void OutputASM::assignLvalue(TIntermTyped *dst, TIntermTyped *src)
2487 	{
2488 		if((src->isVector() && (!dst->isVector() || (src->getNominalSize() != dst->getNominalSize()))) ||
2489 		   (src->isMatrix() && (!dst->isMatrix() || (src->getNominalSize() != dst->getNominalSize()) || (src->getSecondarySize() != dst->getSecondarySize()))))
2490 		{
2491 			return mContext.error(src->getLine(), "Result type should match the l-value type in compound assignment", src->isVector() ? "vector" : "matrix");
2492 		}
2493 
2494 		TIntermBinary *binary = dst->getAsBinaryNode();
2495 
2496 		if(binary && binary->getOp() == EOpIndexIndirect && binary->getLeft()->isVector() && dst->isScalar())
2497 		{
2498 			Instruction *insert = new Instruction(sw::Shader::OPCODE_INSERT);
2499 
2500 			lvalue(insert->dst, dst);
2501 
2502 			insert->src[0].type = insert->dst.type;
2503 			insert->src[0].index = insert->dst.index;
2504 			insert->src[0].rel = insert->dst.rel;
2505 			source(insert->src[1], src);
2506 			source(insert->src[2], binary->getRight());
2507 
2508 			shader->append(insert);
2509 		}
2510 		else
2511 		{
2512 			Instruction *mov1 = new Instruction(sw::Shader::OPCODE_MOV);
2513 
2514 			int swizzle = lvalue(mov1->dst, dst);
2515 
2516 			source(mov1->src[0], src);
2517 			mov1->src[0].swizzle = swizzleSwizzle(mov1->src[0].swizzle, swizzle);
2518 
2519 			shader->append(mov1);
2520 
2521 			for(int offset = 1; offset < dst->totalRegisterCount(); offset++)
2522 			{
2523 				Instruction *mov = new Instruction(sw::Shader::OPCODE_MOV);
2524 
2525 				mov->dst = mov1->dst;
2526 				mov->dst.index += offset;
2527 				mov->dst.mask = writeMask(dst, offset);
2528 
2529 				source(mov->src[0], src, offset);
2530 
2531 				shader->append(mov);
2532 			}
2533 		}
2534 	}
2535 
evaluateRvalue(TIntermTyped * node)2536 	void OutputASM::evaluateRvalue(TIntermTyped *node)
2537 	{
2538 		TIntermBinary *binary = node->getAsBinaryNode();
2539 
2540 		if(binary && binary->getOp() == EOpIndexIndirect && binary->getLeft()->isVector() && node->isScalar())
2541 		{
2542 			Instruction *insert = new Instruction(sw::Shader::OPCODE_EXTRACT);
2543 
2544 			destination(insert->dst, node);
2545 
2546 			Temporary address(this);
2547 			unsigned char mask;
2548 			TIntermTyped *root = nullptr;
2549 			unsigned int offset = 0;
2550 			int swizzle = lvalue(root, offset, insert->src[0].rel, mask, address, node);
2551 
2552 			source(insert->src[0], root, offset);
2553 			insert->src[0].swizzle = swizzleSwizzle(insert->src[0].swizzle, swizzle);
2554 
2555 			source(insert->src[1], binary->getRight());
2556 
2557 			shader->append(insert);
2558 		}
2559 		else
2560 		{
2561 			Instruction *mov1 = new Instruction(sw::Shader::OPCODE_MOV);
2562 
2563 			destination(mov1->dst, node, 0);
2564 
2565 			Temporary address(this);
2566 			unsigned char mask;
2567 			TIntermTyped *root = nullptr;
2568 			unsigned int offset = 0;
2569 			int swizzle = lvalue(root, offset, mov1->src[0].rel, mask, address, node);
2570 
2571 			source(mov1->src[0], root, offset);
2572 			mov1->src[0].swizzle = swizzleSwizzle(mov1->src[0].swizzle, swizzle);
2573 
2574 			shader->append(mov1);
2575 
2576 			for(int i = 1; i < node->totalRegisterCount(); i++)
2577 			{
2578 				Instruction *mov = emit(sw::Shader::OPCODE_MOV, node, i, root, offset + i);
2579 				mov->src[0].rel = mov1->src[0].rel;
2580 			}
2581 		}
2582 	}
2583 
lvalue(sw::Shader::DestinationParameter & dst,TIntermTyped * node)2584 	int OutputASM::lvalue(sw::Shader::DestinationParameter &dst, TIntermTyped *node)
2585 	{
2586 		Temporary address(this);
2587 		TIntermTyped *root = nullptr;
2588 		unsigned int offset = 0;
2589 		unsigned char mask = 0xF;
2590 		int swizzle = lvalue(root, offset, dst.rel, mask, address, node);
2591 
2592 		dst.type = registerType(root);
2593 		dst.index = registerIndex(root) + offset;
2594 		dst.mask = mask;
2595 
2596 		return swizzle;
2597 	}
2598 
lvalue(TIntermTyped * & root,unsigned int & offset,sw::Shader::Relative & rel,unsigned char & mask,Temporary & address,TIntermTyped * node)2599 	int OutputASM::lvalue(TIntermTyped *&root, unsigned int &offset, sw::Shader::Relative &rel, unsigned char &mask, Temporary &address, TIntermTyped *node)
2600 	{
2601 		TIntermTyped *result = node;
2602 		TIntermBinary *binary = node->getAsBinaryNode();
2603 		TIntermSymbol *symbol = node->getAsSymbolNode();
2604 
2605 		if(binary)
2606 		{
2607 			TIntermTyped *left = binary->getLeft();
2608 			TIntermTyped *right = binary->getRight();
2609 
2610 			int leftSwizzle = lvalue(root, offset, rel, mask, address, left);   // Resolve the l-value of the left side
2611 
2612 			switch(binary->getOp())
2613 			{
2614 			case EOpIndexDirect:
2615 				{
2616 					int rightIndex = right->getAsConstantUnion()->getIConst(0);
2617 
2618 					if(left->isRegister())
2619 					{
2620 						int leftMask = mask;
2621 
2622 						mask = 1;
2623 						while((leftMask & mask) == 0)
2624 						{
2625 							mask = mask << 1;
2626 						}
2627 
2628 						int element = swizzleElement(leftSwizzle, rightIndex);
2629 						mask = 1 << element;
2630 
2631 						return element;
2632 					}
2633 					else if(left->isArray() || left->isMatrix())
2634 					{
2635 						offset += rightIndex * result->totalRegisterCount();
2636 						return 0xE4;
2637 					}
2638 					else UNREACHABLE(0);
2639 				}
2640 				break;
2641 			case EOpIndexIndirect:
2642 				{
2643 					right->traverse(this);
2644 
2645 					if(left->isRegister())
2646 					{
2647 						// Requires INSERT instruction (handled by calling function)
2648 					}
2649 					else if(left->isArray() || left->isMatrix())
2650 					{
2651 						int scale = result->totalRegisterCount();
2652 
2653 						if(rel.type == sw::Shader::PARAMETER_VOID)   // Use the index register as the relative address directly
2654 						{
2655 							if(left->totalRegisterCount() > 1)
2656 							{
2657 								sw::Shader::SourceParameter relativeRegister;
2658 								source(relativeRegister, right);
2659 
2660 								rel.index = relativeRegister.index;
2661 								rel.type = relativeRegister.type;
2662 								rel.scale = scale;
2663 								rel.deterministic = !(vertexShader && left->getQualifier() == EvqUniform);
2664 							}
2665 						}
2666 						else if(rel.index != registerIndex(&address))   // Move the previous index register to the address register
2667 						{
2668 							if(scale == 1)
2669 							{
2670 								Constant oldScale((int)rel.scale);
2671 								Instruction *mad = emit(sw::Shader::OPCODE_IMAD, &address, &address, &oldScale, right);
2672 								mad->src[0].index = rel.index;
2673 								mad->src[0].type = rel.type;
2674 							}
2675 							else
2676 							{
2677 								Constant oldScale((int)rel.scale);
2678 								Instruction *mul = emit(sw::Shader::OPCODE_IMUL, &address, &address, &oldScale);
2679 								mul->src[0].index = rel.index;
2680 								mul->src[0].type = rel.type;
2681 
2682 								Constant newScale(scale);
2683 								emit(sw::Shader::OPCODE_IMAD, &address, right, &newScale, &address);
2684 							}
2685 
2686 							rel.type = sw::Shader::PARAMETER_TEMP;
2687 							rel.index = registerIndex(&address);
2688 							rel.scale = 1;
2689 						}
2690 						else   // Just add the new index to the address register
2691 						{
2692 							if(scale == 1)
2693 							{
2694 								emit(sw::Shader::OPCODE_IADD, &address, &address, right);
2695 							}
2696 							else
2697 							{
2698 								Constant newScale(scale);
2699 								emit(sw::Shader::OPCODE_IMAD, &address, right, &newScale, &address);
2700 							}
2701 						}
2702 					}
2703 					else UNREACHABLE(0);
2704 				}
2705 				break;
2706 			case EOpIndexDirectStruct:
2707 			case EOpIndexDirectInterfaceBlock:
2708 				{
2709 					const TFieldList& fields = (binary->getOp() == EOpIndexDirectStruct) ?
2710 					                           left->getType().getStruct()->fields() :
2711 					                           left->getType().getInterfaceBlock()->fields();
2712 					int index = right->getAsConstantUnion()->getIConst(0);
2713 					int fieldOffset = 0;
2714 
2715 					for(int i = 0; i < index; i++)
2716 					{
2717 						fieldOffset += fields[i]->type()->totalRegisterCount();
2718 					}
2719 
2720 					offset += fieldOffset;
2721 					mask = writeMask(result);
2722 
2723 					return 0xE4;
2724 				}
2725 				break;
2726 			case EOpVectorSwizzle:
2727 				{
2728 					ASSERT(left->isRegister());
2729 
2730 					int leftMask = mask;
2731 
2732 					int swizzle = 0;
2733 					int rightMask = 0;
2734 
2735 					TIntermSequence &sequence = right->getAsAggregate()->getSequence();
2736 
2737 					for(unsigned int i = 0; i < sequence.size(); i++)
2738 					{
2739 						int index = sequence[i]->getAsConstantUnion()->getIConst(0);
2740 
2741 						int element = swizzleElement(leftSwizzle, index);
2742 						rightMask = rightMask | (1 << element);
2743 						swizzle = swizzle | swizzleElement(leftSwizzle, i) << (element * 2);
2744 					}
2745 
2746 					mask = leftMask & rightMask;
2747 
2748 					return swizzle;
2749 				}
2750 				break;
2751 			default:
2752 				UNREACHABLE(binary->getOp());   // Not an l-value operator
2753 				break;
2754 			}
2755 		}
2756 		else if(symbol)
2757 		{
2758 			root = symbol;
2759 			offset = 0;
2760 			mask = writeMask(symbol);
2761 
2762 			return 0xE4;
2763 		}
2764 		else
2765 		{
2766 			node->traverse(this);
2767 
2768 			root = node;
2769 			offset = 0;
2770 			mask = writeMask(node);
2771 
2772 			return 0xE4;
2773 		}
2774 
2775 		return 0xE4;
2776 	}
2777 
registerType(TIntermTyped * operand)2778 	sw::Shader::ParameterType OutputASM::registerType(TIntermTyped *operand)
2779 	{
2780 		if(isSamplerRegister(operand))
2781 		{
2782 			return sw::Shader::PARAMETER_SAMPLER;
2783 		}
2784 
2785 		const TQualifier qualifier = operand->getQualifier();
2786 		if((qualifier == EvqFragColor) || (qualifier == EvqFragData))
2787 		{
2788 			if(((qualifier == EvqFragData) && (outputQualifier == EvqFragColor)) ||
2789 			   ((qualifier == EvqFragColor) && (outputQualifier == EvqFragData)))
2790 			{
2791 				mContext.error(operand->getLine(), "static assignment to both gl_FragData and gl_FragColor", "");
2792 			}
2793 			outputQualifier = qualifier;
2794 		}
2795 
2796 		if(qualifier == EvqConstExpr && (!operand->getAsConstantUnion() || !operand->getAsConstantUnion()->getUnionArrayPointer()))
2797 		{
2798 			// Constant arrays are in the constant register file.
2799 			if(operand->isArray() && operand->getArraySize() > 1)
2800 			{
2801 				return sw::Shader::PARAMETER_CONST;
2802 			}
2803 			else
2804 			{
2805 				return sw::Shader::PARAMETER_TEMP;
2806 			}
2807 		}
2808 
2809 		switch(qualifier)
2810 		{
2811 		case EvqTemporary:           return sw::Shader::PARAMETER_TEMP;
2812 		case EvqGlobal:              return sw::Shader::PARAMETER_TEMP;
2813 		case EvqConstExpr:           return sw::Shader::PARAMETER_FLOAT4LITERAL;   // All converted to float
2814 		case EvqAttribute:           return sw::Shader::PARAMETER_INPUT;
2815 		case EvqVaryingIn:           return sw::Shader::PARAMETER_INPUT;
2816 		case EvqVaryingOut:          return sw::Shader::PARAMETER_OUTPUT;
2817 		case EvqVertexIn:            return sw::Shader::PARAMETER_INPUT;
2818 		case EvqFragmentOut:         return sw::Shader::PARAMETER_COLOROUT;
2819 		case EvqVertexOut:           return sw::Shader::PARAMETER_OUTPUT;
2820 		case EvqFragmentIn:          return sw::Shader::PARAMETER_INPUT;
2821 		case EvqInvariantVaryingIn:  return sw::Shader::PARAMETER_INPUT;    // FIXME: Guarantee invariance at the backend
2822 		case EvqInvariantVaryingOut: return sw::Shader::PARAMETER_OUTPUT;   // FIXME: Guarantee invariance at the backend
2823 		case EvqSmooth:              return sw::Shader::PARAMETER_OUTPUT;
2824 		case EvqFlat:                return sw::Shader::PARAMETER_OUTPUT;
2825 		case EvqCentroidOut:         return sw::Shader::PARAMETER_OUTPUT;
2826 		case EvqSmoothIn:            return sw::Shader::PARAMETER_INPUT;
2827 		case EvqFlatIn:              return sw::Shader::PARAMETER_INPUT;
2828 		case EvqCentroidIn:          return sw::Shader::PARAMETER_INPUT;
2829 		case EvqUniform:             return sw::Shader::PARAMETER_CONST;
2830 		case EvqIn:                  return sw::Shader::PARAMETER_TEMP;
2831 		case EvqOut:                 return sw::Shader::PARAMETER_TEMP;
2832 		case EvqInOut:               return sw::Shader::PARAMETER_TEMP;
2833 		case EvqConstReadOnly:       return sw::Shader::PARAMETER_TEMP;
2834 		case EvqPosition:            return sw::Shader::PARAMETER_OUTPUT;
2835 		case EvqPointSize:           return sw::Shader::PARAMETER_OUTPUT;
2836 		case EvqInstanceID:          return sw::Shader::PARAMETER_MISCTYPE;
2837 		case EvqVertexID:            return sw::Shader::PARAMETER_MISCTYPE;
2838 		case EvqFragCoord:           return sw::Shader::PARAMETER_MISCTYPE;
2839 		case EvqFrontFacing:         return sw::Shader::PARAMETER_MISCTYPE;
2840 		case EvqPointCoord:          return sw::Shader::PARAMETER_INPUT;
2841 		case EvqFragColor:           return sw::Shader::PARAMETER_COLOROUT;
2842 		case EvqFragData:            return sw::Shader::PARAMETER_COLOROUT;
2843 		case EvqFragDepth:           return sw::Shader::PARAMETER_DEPTHOUT;
2844 		default: UNREACHABLE(qualifier);
2845 		}
2846 
2847 		return sw::Shader::PARAMETER_VOID;
2848 	}
2849 
hasFlatQualifier(TIntermTyped * operand)2850 	bool OutputASM::hasFlatQualifier(TIntermTyped *operand)
2851 	{
2852 		const TQualifier qualifier = operand->getQualifier();
2853 		return qualifier == EvqFlat || qualifier == EvqFlatOut || qualifier == EvqFlatIn;
2854 	}
2855 
registerIndex(TIntermTyped * operand)2856 	unsigned int OutputASM::registerIndex(TIntermTyped *operand)
2857 	{
2858 		if(isSamplerRegister(operand))
2859 		{
2860 			return samplerRegister(operand);
2861 		}
2862 
2863 		switch(operand->getQualifier())
2864 		{
2865 		case EvqTemporary:           return temporaryRegister(operand);
2866 		case EvqGlobal:              return temporaryRegister(operand);
2867 		case EvqConstExpr:           return temporaryRegister(operand);   // Unevaluated constant expression
2868 		case EvqAttribute:           return attributeRegister(operand);
2869 		case EvqVaryingIn:           return varyingRegister(operand);
2870 		case EvqVaryingOut:          return varyingRegister(operand);
2871 		case EvqVertexIn:            return attributeRegister(operand);
2872 		case EvqFragmentOut:         return fragmentOutputRegister(operand);
2873 		case EvqVertexOut:           return varyingRegister(operand);
2874 		case EvqFragmentIn:          return varyingRegister(operand);
2875 		case EvqInvariantVaryingIn:  return varyingRegister(operand);
2876 		case EvqInvariantVaryingOut: return varyingRegister(operand);
2877 		case EvqSmooth:              return varyingRegister(operand);
2878 		case EvqFlat:                return varyingRegister(operand);
2879 		case EvqCentroidOut:         return varyingRegister(operand);
2880 		case EvqSmoothIn:            return varyingRegister(operand);
2881 		case EvqFlatIn:              return varyingRegister(operand);
2882 		case EvqCentroidIn:          return varyingRegister(operand);
2883 		case EvqUniform:             return uniformRegister(operand);
2884 		case EvqIn:                  return temporaryRegister(operand);
2885 		case EvqOut:                 return temporaryRegister(operand);
2886 		case EvqInOut:               return temporaryRegister(operand);
2887 		case EvqConstReadOnly:       return temporaryRegister(operand);
2888 		case EvqPosition:            return varyingRegister(operand);
2889 		case EvqPointSize:           return varyingRegister(operand);
2890 		case EvqInstanceID:          vertexShader->declareInstanceId(); return sw::Shader::InstanceIDIndex;
2891 		case EvqVertexID:            vertexShader->declareVertexId(); return sw::Shader::VertexIDIndex;
2892 		case EvqFragCoord:           pixelShader->declareVPos();  return sw::Shader::VPosIndex;
2893 		case EvqFrontFacing:         pixelShader->declareVFace(); return sw::Shader::VFaceIndex;
2894 		case EvqPointCoord:          return varyingRegister(operand);
2895 		case EvqFragColor:           return 0;
2896 		case EvqFragData:            return fragmentOutputRegister(operand);
2897 		case EvqFragDepth:           return 0;
2898 		default: UNREACHABLE(operand->getQualifier());
2899 		}
2900 
2901 		return 0;
2902 	}
2903 
writeMask(TIntermTyped * destination,int index)2904 	int OutputASM::writeMask(TIntermTyped *destination, int index)
2905 	{
2906 		if(destination->getQualifier() == EvqPointSize)
2907 		{
2908 			return 0x2;   // Point size stored in the y component
2909 		}
2910 
2911 		return 0xF >> (4 - registerSize(destination->getType(), index));
2912 	}
2913 
readSwizzle(TIntermTyped * argument,int size)2914 	int OutputASM::readSwizzle(TIntermTyped *argument, int size)
2915 	{
2916 		if(argument->getQualifier() == EvqPointSize)
2917 		{
2918 			return 0x55;   // Point size stored in the y component
2919 		}
2920 
2921 		static const unsigned char swizzleSize[5] = {0x00, 0x00, 0x54, 0xA4, 0xE4};   // (void), xxxx, xyyy, xyzz, xyzw
2922 
2923 		return swizzleSize[size];
2924 	}
2925 
2926 	// Conservatively checks whether an expression is fast to compute and has no side effects
trivial(TIntermTyped * expression,int budget)2927 	bool OutputASM::trivial(TIntermTyped *expression, int budget)
2928 	{
2929 		if(!expression->isRegister())
2930 		{
2931 			return false;
2932 		}
2933 
2934 		return cost(expression, budget) >= 0;
2935 	}
2936 
2937 	// Returns the remaining computing budget (if < 0 the expression is too expensive or has side effects)
cost(TIntermNode * expression,int budget)2938 	int OutputASM::cost(TIntermNode *expression, int budget)
2939 	{
2940 		if(budget < 0)
2941 		{
2942 			return budget;
2943 		}
2944 
2945 		if(expression->getAsSymbolNode())
2946 		{
2947 			return budget;
2948 		}
2949 		else if(expression->getAsConstantUnion())
2950 		{
2951 			return budget;
2952 		}
2953 		else if(expression->getAsBinaryNode())
2954 		{
2955 			TIntermBinary *binary = expression->getAsBinaryNode();
2956 
2957 			switch(binary->getOp())
2958 			{
2959 			case EOpVectorSwizzle:
2960 			case EOpIndexDirect:
2961 			case EOpIndexDirectStruct:
2962 			case EOpIndexDirectInterfaceBlock:
2963 				return cost(binary->getLeft(), budget - 0);
2964 			case EOpAdd:
2965 			case EOpSub:
2966 			case EOpMul:
2967 				return cost(binary->getLeft(), cost(binary->getRight(), budget - 1));
2968 			default:
2969 				return -1;
2970 			}
2971 		}
2972 		else if(expression->getAsUnaryNode())
2973 		{
2974 			TIntermUnary *unary = expression->getAsUnaryNode();
2975 
2976 			switch(unary->getOp())
2977 			{
2978 			case EOpAbs:
2979 			case EOpNegative:
2980 				return cost(unary->getOperand(), budget - 1);
2981 			default:
2982 				return -1;
2983 			}
2984 		}
2985 		else if(expression->getAsSelectionNode())
2986 		{
2987 			TIntermSelection *selection = expression->getAsSelectionNode();
2988 
2989 			if(selection->usesTernaryOperator())
2990 			{
2991 				TIntermTyped *condition = selection->getCondition();
2992 				TIntermNode *trueBlock = selection->getTrueBlock();
2993 				TIntermNode *falseBlock = selection->getFalseBlock();
2994 				TIntermConstantUnion *constantCondition = condition->getAsConstantUnion();
2995 
2996 				if(constantCondition)
2997 				{
2998 					bool trueCondition = constantCondition->getUnionArrayPointer()->getBConst();
2999 
3000 					if(trueCondition)
3001 					{
3002 						return cost(trueBlock, budget - 0);
3003 					}
3004 					else
3005 					{
3006 						return cost(falseBlock, budget - 0);
3007 					}
3008 				}
3009 				else
3010 				{
3011 					return cost(trueBlock, cost(falseBlock, budget - 2));
3012 				}
3013 			}
3014 		}
3015 
3016 		return -1;
3017 	}
3018 
findFunction(const TString & name)3019 	const Function *OutputASM::findFunction(const TString &name)
3020 	{
3021 		for(unsigned int f = 0; f < functionArray.size(); f++)
3022 		{
3023 			if(functionArray[f].name == name)
3024 			{
3025 				return &functionArray[f];
3026 			}
3027 		}
3028 
3029 		return 0;
3030 	}
3031 
temporaryRegister(TIntermTyped * temporary)3032 	int OutputASM::temporaryRegister(TIntermTyped *temporary)
3033 	{
3034 		return allocate(temporaries, temporary);
3035 	}
3036 
setPixelShaderInputs(const TType & type,int var,bool flat)3037 	void OutputASM::setPixelShaderInputs(const TType& type, int var, bool flat)
3038 	{
3039 		if(type.isStruct())
3040 		{
3041 			const TFieldList &fields = type.getStruct()->fields();
3042 			int fieldVar = var;
3043 			for(const auto &field : fields)
3044 			{
3045 				const TType& fieldType = *(field->type());
3046 				setPixelShaderInputs(fieldType, fieldVar, flat);
3047 				fieldVar += fieldType.totalRegisterCount();
3048 			}
3049 		}
3050 		else
3051 		{
3052 			for(int i = 0; i < type.totalRegisterCount(); i++)
3053 			{
3054 				pixelShader->setInput(var + i, type.registerSize(), sw::Shader::Semantic(sw::Shader::USAGE_COLOR, var + i, flat));
3055 			}
3056 		}
3057 	}
3058 
varyingRegister(TIntermTyped * varying)3059 	int OutputASM::varyingRegister(TIntermTyped *varying)
3060 	{
3061 		int var = lookup(varyings, varying);
3062 
3063 		if(var == -1)
3064 		{
3065 			var = allocate(varyings, varying);
3066 			int registerCount = varying->totalRegisterCount();
3067 
3068 			if(pixelShader)
3069 			{
3070 				if((var + registerCount) > sw::MAX_FRAGMENT_INPUTS)
3071 				{
3072 					mContext.error(varying->getLine(), "Varyings packing failed: Too many varyings", "fragment shader");
3073 					return 0;
3074 				}
3075 
3076 				if(varying->getQualifier() == EvqPointCoord)
3077 				{
3078 					ASSERT(varying->isRegister());
3079 					pixelShader->setInput(var, varying->registerSize(), sw::Shader::Semantic(sw::Shader::USAGE_TEXCOORD, var));
3080 				}
3081 				else
3082 				{
3083 					setPixelShaderInputs(varying->getType(), var, hasFlatQualifier(varying));
3084 				}
3085 			}
3086 			else if(vertexShader)
3087 			{
3088 				if((var + registerCount) > sw::MAX_VERTEX_OUTPUTS)
3089 				{
3090 					mContext.error(varying->getLine(), "Varyings packing failed: Too many varyings", "vertex shader");
3091 					return 0;
3092 				}
3093 
3094 				if(varying->getQualifier() == EvqPosition)
3095 				{
3096 					ASSERT(varying->isRegister());
3097 					vertexShader->setPositionRegister(var);
3098 				}
3099 				else if(varying->getQualifier() == EvqPointSize)
3100 				{
3101 					ASSERT(varying->isRegister());
3102 					vertexShader->setPointSizeRegister(var);
3103 				}
3104 				else
3105 				{
3106 					// Semantic indexes for user varyings will be assigned during program link to match the pixel shader
3107 				}
3108 			}
3109 			else UNREACHABLE(0);
3110 
3111 			declareVarying(varying, var);
3112 		}
3113 
3114 		return var;
3115 	}
3116 
declareVarying(TIntermTyped * varying,int reg)3117 	void OutputASM::declareVarying(TIntermTyped *varying, int reg)
3118 	{
3119 		if(varying->getQualifier() != EvqPointCoord)   // gl_PointCoord does not need linking
3120 		{
3121 			TIntermSymbol *symbol = varying->getAsSymbolNode();
3122 			declareVarying(varying->getType(), symbol->getSymbol(), reg);
3123 		}
3124 	}
3125 
declareVarying(const TType & type,const TString & varyingName,int registerIndex)3126 	void OutputASM::declareVarying(const TType &type, const TString &varyingName, int registerIndex)
3127 	{
3128 		const char *name = varyingName.c_str();
3129 		VaryingList &activeVaryings = shaderObject->varyings;
3130 
3131 		TStructure* structure = type.getStruct();
3132 		if(structure)
3133 		{
3134 			int fieldRegisterIndex = registerIndex;
3135 
3136 			const TFieldList &fields = type.getStruct()->fields();
3137 			for(const auto &field : fields)
3138 			{
3139 				const TType& fieldType = *(field->type());
3140 				declareVarying(fieldType, varyingName + "." + field->name(), fieldRegisterIndex);
3141 				if(fieldRegisterIndex >= 0)
3142 				{
3143 					fieldRegisterIndex += fieldType.totalRegisterCount();
3144 				}
3145 			}
3146 		}
3147 		else
3148 		{
3149 			// Check if this varying has been declared before without having a register assigned
3150 			for(VaryingList::iterator v = activeVaryings.begin(); v != activeVaryings.end(); v++)
3151 			{
3152 				if(v->name == name)
3153 				{
3154 					if(registerIndex >= 0)
3155 					{
3156 						ASSERT(v->registerIndex < 0 || v->registerIndex == registerIndex);
3157 						v->registerIndex = registerIndex;
3158 					}
3159 
3160 					return;
3161 				}
3162 			}
3163 
3164 			activeVaryings.push_back(glsl::Varying(type, name, registerIndex, 0));
3165 		}
3166 	}
3167 
declareFragmentOutput(TIntermTyped * fragmentOutput)3168 	void OutputASM::declareFragmentOutput(TIntermTyped *fragmentOutput)
3169 	{
3170 		int requestedLocation = fragmentOutput->getType().getLayoutQualifier().location;
3171 		int registerCount = fragmentOutput->totalRegisterCount();
3172 		if(requestedLocation < 0)
3173 		{
3174 			ASSERT(requestedLocation == -1); // All other negative values would have been prevented in TParseContext::parseLayoutQualifier
3175 			return; // No requested location
3176 		}
3177 		else if((requestedLocation + registerCount) > sw::RENDERTARGETS)
3178 		{
3179 			mContext.error(fragmentOutput->getLine(), "Fragment output location larger or equal to MAX_DRAW_BUFFERS", "fragment shader");
3180 		}
3181 		else
3182 		{
3183 			int currentIndex = lookup(fragmentOutputs, fragmentOutput);
3184 			if(requestedLocation != currentIndex)
3185 			{
3186 				if(currentIndex != -1)
3187 				{
3188 					mContext.error(fragmentOutput->getLine(), "Multiple locations for fragment output", "fragment shader");
3189 				}
3190 				else
3191 				{
3192 					if(fragmentOutputs.size() <= (size_t)requestedLocation)
3193 					{
3194 						while(fragmentOutputs.size() < (size_t)requestedLocation)
3195 						{
3196 							fragmentOutputs.push_back(nullptr);
3197 						}
3198 						for(int i = 0; i < registerCount; i++)
3199 						{
3200 							fragmentOutputs.push_back(fragmentOutput);
3201 						}
3202 					}
3203 					else
3204 					{
3205 						for(int i = 0; i < registerCount; i++)
3206 						{
3207 							if(!fragmentOutputs[requestedLocation + i])
3208 							{
3209 								fragmentOutputs[requestedLocation + i] = fragmentOutput;
3210 							}
3211 							else
3212 							{
3213 								mContext.error(fragmentOutput->getLine(), "Fragment output location aliasing", "fragment shader");
3214 								return;
3215 							}
3216 						}
3217 					}
3218 				}
3219 			}
3220 		}
3221 	}
3222 
uniformRegister(TIntermTyped * uniform)3223 	int OutputASM::uniformRegister(TIntermTyped *uniform)
3224 	{
3225 		const TType &type = uniform->getType();
3226 		ASSERT(!IsSampler(type.getBasicType()));
3227 		TInterfaceBlock *block = type.getAsInterfaceBlock();
3228 		TIntermSymbol *symbol = uniform->getAsSymbolNode();
3229 		ASSERT(symbol || block);
3230 
3231 		if(symbol || block)
3232 		{
3233 			TInterfaceBlock* parentBlock = type.getInterfaceBlock();
3234 			bool isBlockMember = (!block && parentBlock);
3235 			int index = isBlockMember ? lookup(uniforms, parentBlock) : lookup(uniforms, uniform);
3236 
3237 			if(index == -1 || isBlockMember)
3238 			{
3239 				if(index == -1)
3240 				{
3241 					index = allocate(uniforms, uniform);
3242 				}
3243 
3244 				// Verify if the current uniform is a member of an already declared block
3245 				const TString &name = symbol ? symbol->getSymbol() : block->name();
3246 				int blockMemberIndex = blockMemberLookup(type, name, index);
3247 				if(blockMemberIndex == -1)
3248 				{
3249 					declareUniform(type, name, index, false);
3250 				}
3251 				else
3252 				{
3253 					index = blockMemberIndex;
3254 				}
3255 			}
3256 
3257 			return index;
3258 		}
3259 
3260 		return 0;
3261 	}
3262 
attributeRegister(TIntermTyped * attribute)3263 	int OutputASM::attributeRegister(TIntermTyped *attribute)
3264 	{
3265 		ASSERT(!attribute->isArray());
3266 
3267 		int index = lookup(attributes, attribute);
3268 
3269 		if(index == -1)
3270 		{
3271 			TIntermSymbol *symbol = attribute->getAsSymbolNode();
3272 			ASSERT(symbol);
3273 
3274 			if(symbol)
3275 			{
3276 				index = allocate(attributes, attribute);
3277 				const TType &type = attribute->getType();
3278 				int registerCount = attribute->totalRegisterCount();
3279 				sw::VertexShader::AttribType attribType = sw::VertexShader::ATTRIBTYPE_FLOAT;
3280 				switch(type.getBasicType())
3281 				{
3282 				case EbtInt:
3283 					attribType = sw::VertexShader::ATTRIBTYPE_INT;
3284 					break;
3285 				case EbtUInt:
3286 					attribType = sw::VertexShader::ATTRIBTYPE_UINT;
3287 					break;
3288 				case EbtFloat:
3289 				default:
3290 					break;
3291 				}
3292 
3293 				if(vertexShader && (index + registerCount) <= sw::MAX_VERTEX_INPUTS)
3294 				{
3295 					for(int i = 0; i < registerCount; i++)
3296 					{
3297 						vertexShader->setInput(index + i, sw::Shader::Semantic(sw::Shader::USAGE_TEXCOORD, index + i, false), attribType);
3298 					}
3299 				}
3300 
3301 				ActiveAttributes &activeAttributes = shaderObject->activeAttributes;
3302 
3303 				const char *name = symbol->getSymbol().c_str();
3304 				activeAttributes.push_back(Attribute(glVariableType(type), name, type.getArraySize(), type.getLayoutQualifier().location, index));
3305 			}
3306 		}
3307 
3308 		return index;
3309 	}
3310 
fragmentOutputRegister(TIntermTyped * fragmentOutput)3311 	int OutputASM::fragmentOutputRegister(TIntermTyped *fragmentOutput)
3312 	{
3313 		return allocate(fragmentOutputs, fragmentOutput);
3314 	}
3315 
samplerRegister(TIntermTyped * sampler)3316 	int OutputASM::samplerRegister(TIntermTyped *sampler)
3317 	{
3318 		const TType &type = sampler->getType();
3319 		ASSERT(IsSampler(type.getBasicType()) || type.isStruct());   // Structures can contain samplers
3320 
3321 		TIntermSymbol *symbol = sampler->getAsSymbolNode();
3322 		TIntermBinary *binary = sampler->getAsBinaryNode();
3323 
3324 		if(symbol)
3325 		{
3326 			switch(type.getQualifier())
3327 			{
3328 			case EvqUniform:
3329 				return samplerRegister(symbol);
3330 			case EvqIn:
3331 			case EvqConstReadOnly:
3332 				// Function arguments are not (uniform) sampler registers
3333 				return -1;
3334 			default:
3335 				UNREACHABLE(type.getQualifier());
3336 			}
3337 		}
3338 		else if(binary)
3339 		{
3340 			TIntermTyped *left = binary->getLeft();
3341 			TIntermTyped *right = binary->getRight();
3342 			const TType &leftType = left->getType();
3343 			int index = right->getAsConstantUnion() ? right->getAsConstantUnion()->getIConst(0) : 0;
3344 			int offset = 0;
3345 
3346 			switch(binary->getOp())
3347 			{
3348 			case EOpIndexDirect:
3349 				ASSERT(left->isArray());
3350 				offset = index * leftType.samplerRegisterCount();
3351 				break;
3352 			case EOpIndexDirectStruct:
3353 				ASSERT(leftType.isStruct());
3354 				{
3355 					const TFieldList &fields = leftType.getStruct()->fields();
3356 
3357 					for(int i = 0; i < index; i++)
3358 					{
3359 						offset += fields[i]->type()->totalSamplerRegisterCount();
3360 					}
3361 				}
3362 				break;
3363 			case EOpIndexIndirect:               // Indirect indexing produces a temporary, not a sampler register
3364 				return -1;
3365 			case EOpIndexDirectInterfaceBlock:   // Interface blocks can't contain samplers
3366 			default:
3367 				UNREACHABLE(binary->getOp());
3368 				return -1;
3369 			}
3370 
3371 			int base = samplerRegister(left);
3372 
3373 			if(base < 0)
3374 			{
3375 				return -1;
3376 			}
3377 
3378 			return base + offset;
3379 		}
3380 
3381 		UNREACHABLE(0);
3382 		return -1;   // Not a (uniform) sampler register
3383 	}
3384 
samplerRegister(TIntermSymbol * sampler)3385 	int OutputASM::samplerRegister(TIntermSymbol *sampler)
3386 	{
3387 		const TType &type = sampler->getType();
3388 		ASSERT(IsSampler(type.getBasicType()) || type.isStruct());   // Structures can contain samplers
3389 
3390 		int index = lookup(samplers, sampler);
3391 
3392 		if(index == -1)
3393 		{
3394 			index = allocate(samplers, sampler, true);
3395 
3396 			if(sampler->getQualifier() == EvqUniform)
3397 			{
3398 				const char *name = sampler->getSymbol().c_str();
3399 				declareUniform(type, name, index, true);
3400 			}
3401 		}
3402 
3403 		return index;
3404 	}
3405 
isSamplerRegister(TIntermTyped * operand)3406 	bool OutputASM::isSamplerRegister(TIntermTyped *operand)
3407 	{
3408 		return operand && IsSampler(operand->getBasicType()) && samplerRegister(operand) >= 0;
3409 	}
3410 
lookup(VariableArray & list,TIntermTyped * variable)3411 	int OutputASM::lookup(VariableArray &list, TIntermTyped *variable)
3412 	{
3413 		for(unsigned int i = 0; i < list.size(); i++)
3414 		{
3415 			if(list[i] == variable)
3416 			{
3417 				return i;   // Pointer match
3418 			}
3419 		}
3420 
3421 		TIntermSymbol *varSymbol = variable->getAsSymbolNode();
3422 		TInterfaceBlock *varBlock = variable->getType().getAsInterfaceBlock();
3423 
3424 		if(varBlock)
3425 		{
3426 			for(unsigned int i = 0; i < list.size(); i++)
3427 			{
3428 				if(list[i])
3429 				{
3430 					TInterfaceBlock *listBlock = list[i]->getType().getAsInterfaceBlock();
3431 
3432 					if(listBlock)
3433 					{
3434 						if(listBlock->name() == varBlock->name())
3435 						{
3436 							ASSERT(listBlock->arraySize() == varBlock->arraySize());
3437 							ASSERT(listBlock->fields() == varBlock->fields());
3438 							ASSERT(listBlock->blockStorage() == varBlock->blockStorage());
3439 							ASSERT(listBlock->matrixPacking() == varBlock->matrixPacking());
3440 
3441 							return i;
3442 						}
3443 					}
3444 				}
3445 			}
3446 		}
3447 		else if(varSymbol)
3448 		{
3449 			for(unsigned int i = 0; i < list.size(); i++)
3450 			{
3451 				if(list[i])
3452 				{
3453 					TIntermSymbol *listSymbol = list[i]->getAsSymbolNode();
3454 
3455 					if(listSymbol)
3456 					{
3457 						if(listSymbol->getId() == varSymbol->getId())
3458 						{
3459 							ASSERT(listSymbol->getSymbol() == varSymbol->getSymbol());
3460 							ASSERT(listSymbol->getType() == varSymbol->getType());
3461 							ASSERT(listSymbol->getQualifier() == varSymbol->getQualifier());
3462 
3463 							return i;
3464 						}
3465 					}
3466 				}
3467 			}
3468 		}
3469 
3470 		return -1;
3471 	}
3472 
lookup(VariableArray & list,TInterfaceBlock * block)3473 	int OutputASM::lookup(VariableArray &list, TInterfaceBlock *block)
3474 	{
3475 		for(unsigned int i = 0; i < list.size(); i++)
3476 		{
3477 			if(list[i] && (list[i]->getType().getInterfaceBlock() == block))
3478 			{
3479 				return i;   // Pointer match
3480 			}
3481 		}
3482 		return -1;
3483 	}
3484 
allocate(VariableArray & list,TIntermTyped * variable,bool samplersOnly)3485 	int OutputASM::allocate(VariableArray &list, TIntermTyped *variable, bool samplersOnly)
3486 	{
3487 		int index = lookup(list, variable);
3488 
3489 		if(index == -1)
3490 		{
3491 			unsigned int registerCount = variable->blockRegisterCount(samplersOnly);
3492 
3493 			for(unsigned int i = 0; i < list.size(); i++)
3494 			{
3495 				if(list[i] == 0)
3496 				{
3497 					unsigned int j = 1;
3498 					for( ; j < registerCount && (i + j) < list.size(); j++)
3499 					{
3500 						if(list[i + j] != 0)
3501 						{
3502 							break;
3503 						}
3504 					}
3505 
3506 					if(j == registerCount)   // Found free slots
3507 					{
3508 						for(unsigned int j = 0; j < registerCount; j++)
3509 						{
3510 							list[i + j] = variable;
3511 						}
3512 
3513 						return i;
3514 					}
3515 				}
3516 			}
3517 
3518 			index = list.size();
3519 
3520 			for(unsigned int i = 0; i < registerCount; i++)
3521 			{
3522 				list.push_back(variable);
3523 			}
3524 		}
3525 
3526 		return index;
3527 	}
3528 
free(VariableArray & list,TIntermTyped * variable)3529 	void OutputASM::free(VariableArray &list, TIntermTyped *variable)
3530 	{
3531 		int index = lookup(list, variable);
3532 
3533 		if(index >= 0)
3534 		{
3535 			list[index] = 0;
3536 		}
3537 	}
3538 
blockMemberLookup(const TType & type,const TString & name,int registerIndex)3539 	int OutputASM::blockMemberLookup(const TType &type, const TString &name, int registerIndex)
3540 	{
3541 		const TInterfaceBlock *block = type.getInterfaceBlock();
3542 
3543 		if(block)
3544 		{
3545 			ActiveUniformBlocks &activeUniformBlocks = shaderObject->activeUniformBlocks;
3546 			const TFieldList& fields = block->fields();
3547 			const TString &blockName = block->name();
3548 			int fieldRegisterIndex = registerIndex;
3549 
3550 			if(!type.isInterfaceBlock())
3551 			{
3552 				// This is a uniform that's part of a block, let's see if the block is already defined
3553 				for(size_t i = 0; i < activeUniformBlocks.size(); ++i)
3554 				{
3555 					if(activeUniformBlocks[i].name == blockName.c_str())
3556 					{
3557 						// The block is already defined, find the register for the current uniform and return it
3558 						for(size_t j = 0; j < fields.size(); j++)
3559 						{
3560 							const TString &fieldName = fields[j]->name();
3561 							if(fieldName == name)
3562 							{
3563 								return fieldRegisterIndex;
3564 							}
3565 
3566 							fieldRegisterIndex += fields[j]->type()->totalRegisterCount();
3567 						}
3568 
3569 						ASSERT(false);
3570 						return fieldRegisterIndex;
3571 					}
3572 				}
3573 			}
3574 		}
3575 
3576 		return -1;
3577 	}
3578 
declareUniform(const TType & type,const TString & name,int registerIndex,bool samplersOnly,int blockId,BlockLayoutEncoder * encoder)3579 	void OutputASM::declareUniform(const TType &type, const TString &name, int registerIndex, bool samplersOnly, int blockId, BlockLayoutEncoder* encoder)
3580 	{
3581 		const TStructure *structure = type.getStruct();
3582 		const TInterfaceBlock *block = (type.isInterfaceBlock() || (blockId == -1)) ? type.getInterfaceBlock() : nullptr;
3583 
3584 		if(!structure && !block)
3585 		{
3586 			ActiveUniforms &activeUniforms = shaderObject->activeUniforms;
3587 			const BlockMemberInfo blockInfo = encoder ? encoder->encodeType(type) : BlockMemberInfo::getDefaultBlockInfo();
3588 			if(blockId >= 0)
3589 			{
3590 				blockDefinitions[blockId].insert(BlockDefinitionIndexMap::value_type(registerIndex, TypedMemberInfo(blockInfo, type)));
3591 				shaderObject->activeUniformBlocks[blockId].fields.push_back(activeUniforms.size());
3592 			}
3593 			int fieldRegisterIndex = encoder ? shaderObject->activeUniformBlocks[blockId].registerIndex + BlockLayoutEncoder::getBlockRegister(blockInfo) : registerIndex;
3594 			bool isSampler = IsSampler(type.getBasicType());
3595 			if(isSampler && samplersOnly)
3596 			{
3597 				for(int i = 0; i < type.totalRegisterCount(); i++)
3598 				{
3599 					shader->declareSampler(fieldRegisterIndex + i);
3600 				}
3601 			}
3602 			if(isSampler == samplersOnly)
3603 			{
3604 				activeUniforms.push_back(Uniform(type, name.c_str(), fieldRegisterIndex, blockId, blockInfo));
3605 			}
3606 		}
3607 		else if(block)
3608 		{
3609 			ActiveUniformBlocks &activeUniformBlocks = shaderObject->activeUniformBlocks;
3610 			const TFieldList& fields = block->fields();
3611 			const TString &blockName = block->name();
3612 			int fieldRegisterIndex = registerIndex;
3613 			bool isUniformBlockMember = !type.isInterfaceBlock() && (blockId == -1);
3614 
3615 			blockId = activeUniformBlocks.size();
3616 			bool isRowMajor = block->matrixPacking() == EmpRowMajor;
3617 			activeUniformBlocks.push_back(UniformBlock(blockName.c_str(), 0, block->arraySize(),
3618 			                                           block->blockStorage(), isRowMajor, registerIndex, blockId));
3619 			blockDefinitions.push_back(BlockDefinitionIndexMap());
3620 
3621 			Std140BlockEncoder currentBlockEncoder;
3622 			currentBlockEncoder.enterAggregateType();
3623 			for(const auto &field : fields)
3624 			{
3625 				const TType &fieldType = *(field->type());
3626 				const TString &fieldName = field->name();
3627 				if(isUniformBlockMember && (fieldName == name))
3628 				{
3629 					registerIndex = fieldRegisterIndex;
3630 				}
3631 
3632 				const TString uniformName = block->hasInstanceName() ? blockName + "." + fieldName : fieldName;
3633 
3634 				declareUniform(fieldType, uniformName, fieldRegisterIndex, samplersOnly, blockId, &currentBlockEncoder);
3635 				fieldRegisterIndex += fieldType.totalRegisterCount();
3636 			}
3637 			currentBlockEncoder.exitAggregateType();
3638 			activeUniformBlocks[blockId].dataSize = currentBlockEncoder.getBlockSize();
3639 		}
3640 		else
3641 		{
3642 			// Store struct for program link time validation
3643 			shaderObject->activeUniformStructs.push_back(Uniform(type, name.c_str(), registerIndex, -1, BlockMemberInfo::getDefaultBlockInfo()));
3644 
3645 			int fieldRegisterIndex = registerIndex;
3646 
3647 			const TFieldList& fields = structure->fields();
3648 			if(type.isArray() && (structure || type.isInterfaceBlock()))
3649 			{
3650 				for(int i = 0; i < type.getArraySize(); i++)
3651 				{
3652 					if(encoder)
3653 					{
3654 						encoder->enterAggregateType();
3655 					}
3656 					for(const auto &field : fields)
3657 					{
3658 						const TType &fieldType = *(field->type());
3659 						const TString &fieldName = field->name();
3660 						const TString uniformName = name + "[" + str(i) + "]." + fieldName;
3661 
3662 						declareUniform(fieldType, uniformName, fieldRegisterIndex, samplersOnly, blockId, encoder);
3663 						fieldRegisterIndex += samplersOnly ? fieldType.totalSamplerRegisterCount() : fieldType.totalRegisterCount();
3664 					}
3665 					if(encoder)
3666 					{
3667 						encoder->exitAggregateType();
3668 					}
3669 				}
3670 			}
3671 			else
3672 			{
3673 				if(encoder)
3674 				{
3675 					encoder->enterAggregateType();
3676 				}
3677 				for(const auto &field : fields)
3678 				{
3679 					const TType &fieldType = *(field->type());
3680 					const TString &fieldName = field->name();
3681 					const TString uniformName = name + "." + fieldName;
3682 
3683 					declareUniform(fieldType, uniformName, fieldRegisterIndex, samplersOnly, blockId, encoder);
3684 					fieldRegisterIndex += samplersOnly ? fieldType.totalSamplerRegisterCount() : fieldType.totalRegisterCount();
3685 				}
3686 				if(encoder)
3687 				{
3688 					encoder->exitAggregateType();
3689 				}
3690 			}
3691 		}
3692 	}
3693 
dim(TIntermNode * v)3694 	int OutputASM::dim(TIntermNode *v)
3695 	{
3696 		TIntermTyped *vector = v->getAsTyped();
3697 		ASSERT(vector && vector->isRegister());
3698 		return vector->getNominalSize();
3699 	}
3700 
dim2(TIntermNode * m)3701 	int OutputASM::dim2(TIntermNode *m)
3702 	{
3703 		TIntermTyped *matrix = m->getAsTyped();
3704 		ASSERT(matrix && matrix->isMatrix() && !matrix->isArray());
3705 		return matrix->getSecondarySize();
3706 	}
3707 
3708 	// Returns ~0u if no loop count could be determined
loopCount(TIntermLoop * node)3709 	unsigned int OutputASM::loopCount(TIntermLoop *node)
3710 	{
3711 		// Parse loops of the form:
3712 		// for(int index = initial; index [comparator] limit; index += increment)
3713 		TIntermSymbol *index = 0;
3714 		TOperator comparator = EOpNull;
3715 		int initial = 0;
3716 		int limit = 0;
3717 		int increment = 0;
3718 
3719 		// Parse index name and intial value
3720 		if(node->getInit())
3721 		{
3722 			TIntermAggregate *init = node->getInit()->getAsAggregate();
3723 
3724 			if(init)
3725 			{
3726 				TIntermSequence &sequence = init->getSequence();
3727 				TIntermTyped *variable = sequence[0]->getAsTyped();
3728 
3729 				if(variable && variable->getQualifier() == EvqTemporary && variable->getBasicType() == EbtInt)
3730 				{
3731 					TIntermBinary *assign = variable->getAsBinaryNode();
3732 
3733 					if(assign && assign->getOp() == EOpInitialize)
3734 					{
3735 						TIntermSymbol *symbol = assign->getLeft()->getAsSymbolNode();
3736 						TIntermConstantUnion *constant = assign->getRight()->getAsConstantUnion();
3737 
3738 						if(symbol && constant)
3739 						{
3740 							if(constant->getBasicType() == EbtInt && constant->getNominalSize() == 1)
3741 							{
3742 								index = symbol;
3743 								initial = constant->getUnionArrayPointer()[0].getIConst();
3744 							}
3745 						}
3746 					}
3747 				}
3748 			}
3749 		}
3750 
3751 		// Parse comparator and limit value
3752 		if(index && node->getCondition())
3753 		{
3754 			TIntermBinary *test = node->getCondition()->getAsBinaryNode();
3755 			TIntermSymbol *left = test ? test->getLeft()->getAsSymbolNode() : nullptr;
3756 
3757 			if(left && (left->getId() == index->getId()))
3758 			{
3759 				TIntermConstantUnion *constant = test->getRight()->getAsConstantUnion();
3760 
3761 				if(constant)
3762 				{
3763 					if(constant->getBasicType() == EbtInt && constant->getNominalSize() == 1)
3764 					{
3765 						comparator = test->getOp();
3766 						limit = constant->getUnionArrayPointer()[0].getIConst();
3767 					}
3768 				}
3769 			}
3770 		}
3771 
3772 		// Parse increment
3773 		if(index && comparator != EOpNull && node->getExpression())
3774 		{
3775 			TIntermBinary *binaryTerminal = node->getExpression()->getAsBinaryNode();
3776 			TIntermUnary *unaryTerminal = node->getExpression()->getAsUnaryNode();
3777 
3778 			if(binaryTerminal)
3779 			{
3780 				TOperator op = binaryTerminal->getOp();
3781 				TIntermConstantUnion *constant = binaryTerminal->getRight()->getAsConstantUnion();
3782 
3783 				if(constant)
3784 				{
3785 					if(constant->getBasicType() == EbtInt && constant->getNominalSize() == 1)
3786 					{
3787 						int value = constant->getUnionArrayPointer()[0].getIConst();
3788 
3789 						switch(op)
3790 						{
3791 						case EOpAddAssign: increment = value;  break;
3792 						case EOpSubAssign: increment = -value; break;
3793 						default: UNIMPLEMENTED();
3794 						}
3795 					}
3796 				}
3797 			}
3798 			else if(unaryTerminal)
3799 			{
3800 				TOperator op = unaryTerminal->getOp();
3801 
3802 				switch(op)
3803 				{
3804 				case EOpPostIncrement: increment = 1;  break;
3805 				case EOpPostDecrement: increment = -1; break;
3806 				case EOpPreIncrement:  increment = 1;  break;
3807 				case EOpPreDecrement:  increment = -1; break;
3808 				default: UNIMPLEMENTED();
3809 				}
3810 			}
3811 		}
3812 
3813 		if(index && comparator != EOpNull && increment != 0)
3814 		{
3815 			if(comparator == EOpLessThanEqual)
3816 			{
3817 				comparator = EOpLessThan;
3818 				limit += 1;
3819 			}
3820 			else if(comparator == EOpGreaterThanEqual)
3821 			{
3822 				comparator = EOpLessThan;
3823 				limit -= 1;
3824 				std::swap(initial, limit);
3825 				increment = -increment;
3826 			}
3827 			else if(comparator == EOpGreaterThan)
3828 			{
3829 				comparator = EOpLessThan;
3830 				std::swap(initial, limit);
3831 				increment = -increment;
3832 			}
3833 
3834 			if(comparator == EOpLessThan)
3835 			{
3836 				if(!(initial < limit))   // Never loops
3837 				{
3838 					return 0;
3839 				}
3840 
3841 				int iterations = (limit - initial + abs(increment) - 1) / increment;   // Ceiling division
3842 
3843 				if(iterations < 0)
3844 				{
3845 					return ~0u;
3846 				}
3847 
3848 				return iterations;
3849 			}
3850 			else UNIMPLEMENTED();   // Falls through
3851 		}
3852 
3853 		return ~0u;
3854 	}
3855 
traverse(TIntermNode * node)3856 	bool LoopUnrollable::traverse(TIntermNode *node)
3857 	{
3858 		loopDepth = 0;
3859 		loopUnrollable = true;
3860 
3861 		node->traverse(this);
3862 
3863 		return loopUnrollable;
3864 	}
3865 
visitLoop(Visit visit,TIntermLoop * loop)3866 	bool LoopUnrollable::visitLoop(Visit visit, TIntermLoop *loop)
3867 	{
3868 		if(visit == PreVisit)
3869 		{
3870 			loopDepth++;
3871 		}
3872 		else if(visit == PostVisit)
3873 		{
3874 			loopDepth++;
3875 		}
3876 
3877 		return true;
3878 	}
3879 
visitBranch(Visit visit,TIntermBranch * node)3880 	bool LoopUnrollable::visitBranch(Visit visit, TIntermBranch *node)
3881 	{
3882 		if(!loopUnrollable)
3883 		{
3884 			return false;
3885 		}
3886 
3887 		if(!loopDepth)
3888 		{
3889 			return true;
3890 		}
3891 
3892 		switch(node->getFlowOp())
3893 		{
3894 		case EOpKill:
3895 		case EOpReturn:
3896 			break;
3897 		case EOpBreak:
3898 		case EOpContinue:
3899 			loopUnrollable = false;
3900 			break;
3901 		default: UNREACHABLE(node->getFlowOp());
3902 		}
3903 
3904 		return loopUnrollable;
3905 	}
3906 
visitAggregate(Visit visit,TIntermAggregate * node)3907 	bool LoopUnrollable::visitAggregate(Visit visit, TIntermAggregate *node)
3908 	{
3909 		return loopUnrollable;
3910 	}
3911 }
3912