• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2016 The SwiftShader Authors. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //    http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "OutputASM.h"
16 #include "Common/Math.hpp"
17 
18 #include "common/debug.h"
19 #include "InfoSink.h"
20 
21 #include "libGLESv2/Shader.h"
22 
23 #include <GLES2/gl2.h>
24 #include <GLES2/gl2ext.h>
25 #include <GLES3/gl3.h>
26 
27 #include <stdlib.h>
28 
29 namespace glsl
30 {
31 	// Integer to TString conversion
str(int i)32 	TString str(int i)
33 	{
34 		char buffer[20];
35 		sprintf(buffer, "%d", i);
36 		return buffer;
37 	}
38 
39 	class Temporary : public TIntermSymbol
40 	{
41 	public:
Temporary(OutputASM * assembler)42 		Temporary(OutputASM *assembler) : TIntermSymbol(TSymbolTableLevel::nextUniqueId(), "tmp", TType(EbtFloat, EbpHigh, EvqTemporary, 4, 1, false)), assembler(assembler)
43 		{
44 		}
45 
~Temporary()46 		~Temporary()
47 		{
48 			assembler->freeTemporary(this);
49 		}
50 
51 	private:
52 		OutputASM *const assembler;
53 	};
54 
55 	class Constant : public TIntermConstantUnion
56 	{
57 	public:
Constant(float x,float y,float z,float w)58 		Constant(float x, float y, float z, float w) : TIntermConstantUnion(constants, TType(EbtFloat, EbpHigh, EvqConstExpr, 4, 1, false))
59 		{
60 			constants[0].setFConst(x);
61 			constants[1].setFConst(y);
62 			constants[2].setFConst(z);
63 			constants[3].setFConst(w);
64 		}
65 
Constant(bool b)66 		Constant(bool b) : TIntermConstantUnion(constants, TType(EbtBool, EbpHigh, EvqConstExpr, 1, 1, false))
67 		{
68 			constants[0].setBConst(b);
69 		}
70 
Constant(int i)71 		Constant(int i) : TIntermConstantUnion(constants, TType(EbtInt, EbpHigh, EvqConstExpr, 1, 1, false))
72 		{
73 			constants[0].setIConst(i);
74 		}
75 
~Constant()76 		~Constant()
77 		{
78 		}
79 
80 	private:
81 		ConstantUnion constants[4];
82 	};
83 
Uniform(GLenum type,GLenum precision,const std::string & name,int arraySize,int registerIndex,int blockId,const BlockMemberInfo & blockMemberInfo)84 	Uniform::Uniform(GLenum type, GLenum precision, const std::string &name, int arraySize, int registerIndex, int blockId, const BlockMemberInfo& blockMemberInfo) :
85 		type(type), precision(precision), name(name), arraySize(arraySize), registerIndex(registerIndex), blockId(blockId), blockInfo(blockMemberInfo)
86 	{
87 	}
88 
UniformBlock(const std::string & name,unsigned int dataSize,unsigned int arraySize,TLayoutBlockStorage layout,bool isRowMajorLayout,int registerIndex,int blockId)89 	UniformBlock::UniformBlock(const std::string& name, unsigned int dataSize, unsigned int arraySize,
90 	                           TLayoutBlockStorage layout, bool isRowMajorLayout, int registerIndex, int blockId) :
91 		name(name), dataSize(dataSize), arraySize(arraySize), layout(layout),
92 		isRowMajorLayout(isRowMajorLayout), registerIndex(registerIndex), blockId(blockId)
93 	{
94 	}
95 
BlockLayoutEncoder(bool rowMajor)96 	BlockLayoutEncoder::BlockLayoutEncoder(bool rowMajor)
97 		: mCurrentOffset(0), isRowMajor(rowMajor)
98 	{
99 	}
100 
encodeType(const TType & type)101 	BlockMemberInfo BlockLayoutEncoder::encodeType(const TType &type)
102 	{
103 		int arrayStride;
104 		int matrixStride;
105 
106 		getBlockLayoutInfo(type, type.getArraySize(), isRowMajor, &arrayStride, &matrixStride);
107 
108 		const BlockMemberInfo memberInfo(static_cast<int>(mCurrentOffset * BytesPerComponent),
109 		                                 static_cast<int>(arrayStride * BytesPerComponent),
110 		                                 static_cast<int>(matrixStride * BytesPerComponent),
111 		                                 (matrixStride > 0) && isRowMajor);
112 
113 		advanceOffset(type, type.getArraySize(), isRowMajor, arrayStride, matrixStride);
114 
115 		return memberInfo;
116 	}
117 
118 	// static
getBlockRegister(const BlockMemberInfo & info)119 	size_t BlockLayoutEncoder::getBlockRegister(const BlockMemberInfo &info)
120 	{
121 		return (info.offset / BytesPerComponent) / ComponentsPerRegister;
122 	}
123 
124 	// static
getBlockRegisterElement(const BlockMemberInfo & info)125 	size_t BlockLayoutEncoder::getBlockRegisterElement(const BlockMemberInfo &info)
126 	{
127 		return (info.offset / BytesPerComponent) % ComponentsPerRegister;
128 	}
129 
nextRegister()130 	void BlockLayoutEncoder::nextRegister()
131 	{
132 		mCurrentOffset = sw::align(mCurrentOffset, ComponentsPerRegister);
133 	}
134 
Std140BlockEncoder(bool rowMajor)135 	Std140BlockEncoder::Std140BlockEncoder(bool rowMajor) : BlockLayoutEncoder(rowMajor)
136 	{
137 	}
138 
enterAggregateType()139 	void Std140BlockEncoder::enterAggregateType()
140 	{
141 		nextRegister();
142 	}
143 
exitAggregateType()144 	void Std140BlockEncoder::exitAggregateType()
145 	{
146 		nextRegister();
147 	}
148 
getBlockLayoutInfo(const TType & type,unsigned int arraySize,bool isRowMajorMatrix,int * arrayStrideOut,int * matrixStrideOut)149 	void Std140BlockEncoder::getBlockLayoutInfo(const TType &type, unsigned int arraySize, bool isRowMajorMatrix, int *arrayStrideOut, int *matrixStrideOut)
150 	{
151 		size_t baseAlignment = 0;
152 		int matrixStride = 0;
153 		int arrayStride = 0;
154 
155 		if(type.isMatrix())
156 		{
157 			baseAlignment = ComponentsPerRegister;
158 			matrixStride = ComponentsPerRegister;
159 
160 			if(arraySize > 0)
161 			{
162 				const int numRegisters = isRowMajorMatrix ? type.getSecondarySize() : type.getNominalSize();
163 				arrayStride = ComponentsPerRegister * numRegisters;
164 			}
165 		}
166 		else if(arraySize > 0)
167 		{
168 			baseAlignment = ComponentsPerRegister;
169 			arrayStride = ComponentsPerRegister;
170 		}
171 		else
172 		{
173 			const size_t numComponents = type.getElementSize();
174 			baseAlignment = (numComponents == 3 ? 4u : numComponents);
175 		}
176 
177 		mCurrentOffset = sw::align(mCurrentOffset, baseAlignment);
178 
179 		*matrixStrideOut = matrixStride;
180 		*arrayStrideOut = arrayStride;
181 	}
182 
advanceOffset(const TType & type,unsigned int arraySize,bool isRowMajorMatrix,int arrayStride,int matrixStride)183 	void Std140BlockEncoder::advanceOffset(const TType &type, unsigned int arraySize, bool isRowMajorMatrix, int arrayStride, int matrixStride)
184 	{
185 		if(arraySize > 0)
186 		{
187 			mCurrentOffset += arrayStride * arraySize;
188 		}
189 		else if(type.isMatrix())
190 		{
191 			ASSERT(matrixStride == ComponentsPerRegister);
192 			const int numRegisters = isRowMajorMatrix ? type.getSecondarySize() : type.getNominalSize();
193 			mCurrentOffset += ComponentsPerRegister * numRegisters;
194 		}
195 		else
196 		{
197 			mCurrentOffset += type.getElementSize();
198 		}
199 	}
200 
Attribute()201 	Attribute::Attribute()
202 	{
203 		type = GL_NONE;
204 		arraySize = 0;
205 		registerIndex = 0;
206 	}
207 
Attribute(GLenum type,const std::string & name,int arraySize,int location,int registerIndex)208 	Attribute::Attribute(GLenum type, const std::string &name, int arraySize, int location, int registerIndex)
209 	{
210 		this->type = type;
211 		this->name = name;
212 		this->arraySize = arraySize;
213 		this->location = location;
214 		this->registerIndex = registerIndex;
215 	}
216 
getPixelShader() const217 	sw::PixelShader *Shader::getPixelShader() const
218 	{
219 		return 0;
220 	}
221 
getVertexShader() const222 	sw::VertexShader *Shader::getVertexShader() const
223 	{
224 		return 0;
225 	}
226 
TextureFunction(const TString & nodeName)227 	OutputASM::TextureFunction::TextureFunction(const TString& nodeName) : method(IMPLICIT), proj(false), offset(false)
228 	{
229 		TString name = TFunction::unmangleName(nodeName);
230 
231 		if(name == "texture2D" || name == "textureCube" || name == "texture" || name == "texture3D")
232 		{
233 			method = IMPLICIT;
234 		}
235 		else if(name == "texture2DProj" || name == "textureProj")
236 		{
237 			method = IMPLICIT;
238 			proj = true;
239 		}
240 		else if(name == "texture2DLod" || name == "textureCubeLod" || name == "textureLod")
241 		{
242 			method = LOD;
243 		}
244 		else if(name == "texture2DProjLod" || name == "textureProjLod")
245 		{
246 			method = LOD;
247 			proj = true;
248 		}
249 		else if(name == "textureSize")
250 		{
251 			method = SIZE;
252 		}
253 		else if(name == "textureOffset")
254 		{
255 			method = IMPLICIT;
256 			offset = true;
257 		}
258 		else if(name == "textureProjOffset")
259 		{
260 			method = IMPLICIT;
261 			offset = true;
262 			proj = true;
263 		}
264 		else if(name == "textureLodOffset")
265 		{
266 			method = LOD;
267 			offset = true;
268 		}
269 		else if(name == "textureProjLodOffset")
270 		{
271 			method = LOD;
272 			proj = true;
273 			offset = true;
274 		}
275 		else if(name == "texelFetch")
276 		{
277 			method = FETCH;
278 		}
279 		else if(name == "texelFetchOffset")
280 		{
281 			method = FETCH;
282 			offset = true;
283 		}
284 		else if(name == "textureGrad")
285 		{
286 			method = GRAD;
287 		}
288 		else if(name == "textureGradOffset")
289 		{
290 			method = GRAD;
291 			offset = true;
292 		}
293 		else if(name == "textureProjGrad")
294 		{
295 			method = GRAD;
296 			proj = true;
297 		}
298 		else if(name == "textureProjGradOffset")
299 		{
300 			method = GRAD;
301 			proj = true;
302 			offset = true;
303 		}
304 		else UNREACHABLE(0);
305 	}
306 
OutputASM(TParseContext & context,Shader * shaderObject)307 	OutputASM::OutputASM(TParseContext &context, Shader *shaderObject) : TIntermTraverser(true, true, true), shaderObject(shaderObject), mContext(context)
308 	{
309 		shader = 0;
310 		pixelShader = 0;
311 		vertexShader = 0;
312 
313 		if(shaderObject)
314 		{
315 			shader = shaderObject->getShader();
316 			pixelShader = shaderObject->getPixelShader();
317 			vertexShader = shaderObject->getVertexShader();
318 		}
319 
320 		functionArray.push_back(Function(0, "main(", 0, 0));
321 		currentFunction = 0;
322 		outputQualifier = EvqOutput; // Set outputQualifier to any value other than EvqFragColor or EvqFragData
323 	}
324 
~OutputASM()325 	OutputASM::~OutputASM()
326 	{
327 	}
328 
output()329 	void OutputASM::output()
330 	{
331 		if(shader)
332 		{
333 			emitShader(GLOBAL);
334 
335 			if(functionArray.size() > 1)   // Only call main() when there are other functions
336 			{
337 				Instruction *callMain = emit(sw::Shader::OPCODE_CALL);
338 				callMain->dst.type = sw::Shader::PARAMETER_LABEL;
339 				callMain->dst.index = 0;   // main()
340 
341 				emit(sw::Shader::OPCODE_RET);
342 			}
343 
344 			emitShader(FUNCTION);
345 		}
346 	}
347 
emitShader(Scope scope)348 	void OutputASM::emitShader(Scope scope)
349 	{
350 		emitScope = scope;
351 		currentScope = GLOBAL;
352 		mContext.getTreeRoot()->traverse(this);
353 	}
354 
freeTemporary(Temporary * temporary)355 	void OutputASM::freeTemporary(Temporary *temporary)
356 	{
357 		free(temporaries, temporary);
358 	}
359 
getOpcode(sw::Shader::Opcode op,TIntermTyped * in) const360 	sw::Shader::Opcode OutputASM::getOpcode(sw::Shader::Opcode op, TIntermTyped *in) const
361 	{
362 		TBasicType baseType = in->getType().getBasicType();
363 
364 		switch(op)
365 		{
366 		case sw::Shader::OPCODE_NEG:
367 			switch(baseType)
368 			{
369 			case EbtInt:
370 			case EbtUInt:
371 				return sw::Shader::OPCODE_INEG;
372 			case EbtFloat:
373 			default:
374 				return op;
375 			}
376 		case sw::Shader::OPCODE_ABS:
377 			switch(baseType)
378 			{
379 			case EbtInt:
380 				return sw::Shader::OPCODE_IABS;
381 			case EbtFloat:
382 			default:
383 				return op;
384 			}
385 		case sw::Shader::OPCODE_SGN:
386 			switch(baseType)
387 			{
388 			case EbtInt:
389 				return sw::Shader::OPCODE_ISGN;
390 			case EbtFloat:
391 			default:
392 				return op;
393 			}
394 		case sw::Shader::OPCODE_ADD:
395 			switch(baseType)
396 			{
397 			case EbtInt:
398 			case EbtUInt:
399 				return sw::Shader::OPCODE_IADD;
400 			case EbtFloat:
401 			default:
402 				return op;
403 			}
404 		case sw::Shader::OPCODE_SUB:
405 			switch(baseType)
406 			{
407 			case EbtInt:
408 			case EbtUInt:
409 				return sw::Shader::OPCODE_ISUB;
410 			case EbtFloat:
411 			default:
412 				return op;
413 			}
414 		case sw::Shader::OPCODE_MUL:
415 			switch(baseType)
416 			{
417 			case EbtInt:
418 			case EbtUInt:
419 				return sw::Shader::OPCODE_IMUL;
420 			case EbtFloat:
421 			default:
422 				return op;
423 			}
424 		case sw::Shader::OPCODE_DIV:
425 			switch(baseType)
426 			{
427 			case EbtInt:
428 				return sw::Shader::OPCODE_IDIV;
429 			case EbtUInt:
430 				return sw::Shader::OPCODE_UDIV;
431 			case EbtFloat:
432 			default:
433 				return op;
434 			}
435 		case sw::Shader::OPCODE_IMOD:
436 			return baseType == EbtUInt ? sw::Shader::OPCODE_UMOD : op;
437 		case sw::Shader::OPCODE_ISHR:
438 			return baseType == EbtUInt ? sw::Shader::OPCODE_USHR : op;
439 		case sw::Shader::OPCODE_MIN:
440 			switch(baseType)
441 			{
442 			case EbtInt:
443 				return sw::Shader::OPCODE_IMIN;
444 			case EbtUInt:
445 				return sw::Shader::OPCODE_UMIN;
446 			case EbtFloat:
447 			default:
448 				return op;
449 			}
450 		case sw::Shader::OPCODE_MAX:
451 			switch(baseType)
452 			{
453 			case EbtInt:
454 				return sw::Shader::OPCODE_IMAX;
455 			case EbtUInt:
456 				return sw::Shader::OPCODE_UMAX;
457 			case EbtFloat:
458 			default:
459 				return op;
460 			}
461 		default:
462 			return op;
463 		}
464 	}
465 
visitSymbol(TIntermSymbol * symbol)466 	void OutputASM::visitSymbol(TIntermSymbol *symbol)
467 	{
468 		// Vertex varyings don't have to be actively used to successfully link
469 		// against pixel shaders that use them. So make sure they're declared.
470 		if(symbol->getQualifier() == EvqVaryingOut || symbol->getQualifier() == EvqInvariantVaryingOut || symbol->getQualifier() == EvqVertexOut)
471 		{
472 			if(symbol->getBasicType() != EbtInvariant)   // Typeless declarations are not new varyings
473 			{
474 				declareVarying(symbol, -1);
475 			}
476 		}
477 
478 		TInterfaceBlock* block = symbol->getType().getInterfaceBlock();
479 		// OpenGL ES 3.0.4 spec, section 2.12.6 Uniform Variables:
480 		// "All members of a named uniform block declared with a shared or std140 layout qualifier
481 		// are considered active, even if they are not referenced in any shader in the program.
482 		// The uniform block itself is also considered active, even if no member of the block is referenced."
483 		if(block && ((block->blockStorage() == EbsShared) || (block->blockStorage() == EbsStd140)))
484 		{
485 			uniformRegister(symbol);
486 		}
487 	}
488 
visitBinary(Visit visit,TIntermBinary * node)489 	bool OutputASM::visitBinary(Visit visit, TIntermBinary *node)
490 	{
491 		if(currentScope != emitScope)
492 		{
493 			return false;
494 		}
495 
496 		TIntermTyped *result = node;
497 		TIntermTyped *left = node->getLeft();
498 		TIntermTyped *right = node->getRight();
499 		const TType &leftType = left->getType();
500 		const TType &rightType = right->getType();
501 
502 		if(isSamplerRegister(result))
503 		{
504 			return false;   // Don't traverse, the register index is determined statically
505 		}
506 
507 		switch(node->getOp())
508 		{
509 		case EOpAssign:
510 			if(visit == PostVisit)
511 			{
512 				assignLvalue(left, right);
513 				copy(result, right);
514 			}
515 			break;
516 		case EOpInitialize:
517 			if(visit == PostVisit)
518 			{
519 				copy(left, right);
520 			}
521 			break;
522 		case EOpMatrixTimesScalarAssign:
523 			if(visit == PostVisit)
524 			{
525 				for(int i = 0; i < leftType.getNominalSize(); i++)
526 				{
527 					emit(sw::Shader::OPCODE_MUL, result, i, left, i, right);
528 				}
529 
530 				assignLvalue(left, result);
531 			}
532 			break;
533 		case EOpVectorTimesMatrixAssign:
534 			if(visit == PostVisit)
535 			{
536 				int size = leftType.getNominalSize();
537 
538 				for(int i = 0; i < size; i++)
539 				{
540 					Instruction *dot = emit(sw::Shader::OPCODE_DP(size), result, 0, left, 0, right, i);
541 					dot->dst.mask = 1 << i;
542 				}
543 
544 				assignLvalue(left, result);
545 			}
546 			break;
547 		case EOpMatrixTimesMatrixAssign:
548 			if(visit == PostVisit)
549 			{
550 				int dim = leftType.getNominalSize();
551 
552 				for(int i = 0; i < dim; i++)
553 				{
554 					Instruction *mul = emit(sw::Shader::OPCODE_MUL, result, i, left, 0, right, i);
555 					mul->src[1].swizzle = 0x00;
556 
557 					for(int j = 1; j < dim; j++)
558 					{
559 						Instruction *mad = emit(sw::Shader::OPCODE_MAD, result, i, left, j, right, i, result, i);
560 						mad->src[1].swizzle = j * 0x55;
561 					}
562 				}
563 
564 				assignLvalue(left, result);
565 			}
566 			break;
567 		case EOpIndexDirect:
568 			if(visit == PostVisit)
569 			{
570 				int index = right->getAsConstantUnion()->getIConst(0);
571 
572 				if(result->isMatrix() || result->isStruct() || result->isInterfaceBlock())
573 				{
574 					ASSERT(left->isArray());
575 					copy(result, left, index * left->elementRegisterCount());
576 				}
577 				else if(result->isRegister())
578 				{
579 					int srcIndex = 0;
580 					if(left->isRegister())
581 					{
582 						srcIndex = 0;
583 					}
584 					else if(left->isArray())
585 					{
586 						srcIndex = index * left->elementRegisterCount();
587 					}
588 					else if(left->isMatrix())
589 					{
590 						ASSERT(index < left->getNominalSize());   // FIXME: Report semantic error
591 						srcIndex = index;
592 					}
593 					else UNREACHABLE(0);
594 
595 					Instruction *mov = emit(sw::Shader::OPCODE_MOV, result, 0, left, srcIndex);
596 
597 					if(left->isRegister())
598 					{
599 						mov->src[0].swizzle = index;
600 					}
601 				}
602 				else UNREACHABLE(0);
603 			}
604 			break;
605 		case EOpIndexIndirect:
606 			if(visit == PostVisit)
607 			{
608 				if(left->isArray() || left->isMatrix())
609 				{
610 					for(int index = 0; index < result->totalRegisterCount(); index++)
611 					{
612 						Instruction *mov = emit(sw::Shader::OPCODE_MOV, result, index, left, index);
613 						mov->dst.mask = writeMask(result, index);
614 
615 						if(left->totalRegisterCount() > 1)
616 						{
617 							sw::Shader::SourceParameter relativeRegister;
618 							argument(relativeRegister, right);
619 
620 							mov->src[0].rel.type = relativeRegister.type;
621 							mov->src[0].rel.index = relativeRegister.index;
622 							mov->src[0].rel.scale =	result->totalRegisterCount();
623 							mov->src[0].rel.deterministic = !(vertexShader && left->getQualifier() == EvqUniform);
624 						}
625 					}
626 				}
627 				else if(left->isRegister())
628 				{
629 					emit(sw::Shader::OPCODE_EXTRACT, result, left, right);
630 				}
631 				else UNREACHABLE(0);
632 			}
633 			break;
634 		case EOpIndexDirectStruct:
635 		case EOpIndexDirectInterfaceBlock:
636 			if(visit == PostVisit)
637 			{
638 				ASSERT(leftType.isStruct() || (leftType.isInterfaceBlock()));
639 
640 				const TFieldList& fields = (node->getOp() == EOpIndexDirectStruct) ?
641 				                           leftType.getStruct()->fields() :
642 				                           leftType.getInterfaceBlock()->fields();
643 				int index = right->getAsConstantUnion()->getIConst(0);
644 				int fieldOffset = 0;
645 
646 				for(int i = 0; i < index; i++)
647 				{
648 					fieldOffset += fields[i]->type()->totalRegisterCount();
649 				}
650 
651 				copy(result, left, fieldOffset);
652 			}
653 			break;
654 		case EOpVectorSwizzle:
655 			if(visit == PostVisit)
656 			{
657 				int swizzle = 0;
658 				TIntermAggregate *components = right->getAsAggregate();
659 
660 				if(components)
661 				{
662 					TIntermSequence &sequence = components->getSequence();
663 					int component = 0;
664 
665 					for(TIntermSequence::iterator sit = sequence.begin(); sit != sequence.end(); sit++)
666 					{
667 						TIntermConstantUnion *element = (*sit)->getAsConstantUnion();
668 
669 						if(element)
670 						{
671 							int i = element->getUnionArrayPointer()[0].getIConst();
672 							swizzle |= i << (component * 2);
673 							component++;
674 						}
675 						else UNREACHABLE(0);
676 					}
677 				}
678 				else UNREACHABLE(0);
679 
680 				Instruction *mov = emit(sw::Shader::OPCODE_MOV, result, left);
681 				mov->src[0].swizzle = swizzle;
682 			}
683 			break;
684 		case EOpAddAssign: if(visit == PostVisit) emitAssign(getOpcode(sw::Shader::OPCODE_ADD, result), result, left, left, right); break;
685 		case EOpAdd:       if(visit == PostVisit) emitBinary(getOpcode(sw::Shader::OPCODE_ADD, result), result, left, right);       break;
686 		case EOpSubAssign: if(visit == PostVisit) emitAssign(getOpcode(sw::Shader::OPCODE_SUB, result), result, left, left, right); break;
687 		case EOpSub:       if(visit == PostVisit) emitBinary(getOpcode(sw::Shader::OPCODE_SUB, result), result, left, right);       break;
688 		case EOpMulAssign: if(visit == PostVisit) emitAssign(getOpcode(sw::Shader::OPCODE_MUL, result), result, left, left, right); break;
689 		case EOpMul:       if(visit == PostVisit) emitBinary(getOpcode(sw::Shader::OPCODE_MUL, result), result, left, right);       break;
690 		case EOpDivAssign: if(visit == PostVisit) emitAssign(getOpcode(sw::Shader::OPCODE_DIV, result), result, left, left, right); break;
691 		case EOpDiv:       if(visit == PostVisit) emitBinary(getOpcode(sw::Shader::OPCODE_DIV, result), result, left, right);       break;
692 		case EOpIModAssign:          if(visit == PostVisit) emitAssign(getOpcode(sw::Shader::OPCODE_IMOD, result), result, left, left, right); break;
693 		case EOpIMod:                if(visit == PostVisit) emitBinary(getOpcode(sw::Shader::OPCODE_IMOD, result), result, left, right);       break;
694 		case EOpBitShiftLeftAssign:  if(visit == PostVisit) emitAssign(sw::Shader::OPCODE_SHL, result, left, left, right); break;
695 		case EOpBitShiftLeft:        if(visit == PostVisit) emitBinary(sw::Shader::OPCODE_SHL, result, left, right);       break;
696 		case EOpBitShiftRightAssign: if(visit == PostVisit) emitAssign(getOpcode(sw::Shader::OPCODE_ISHR, result), result, left, left, right); break;
697 		case EOpBitShiftRight:       if(visit == PostVisit) emitBinary(getOpcode(sw::Shader::OPCODE_ISHR, result), result, left, right);       break;
698 		case EOpBitwiseAndAssign:    if(visit == PostVisit) emitAssign(sw::Shader::OPCODE_AND, result, left, left, right); break;
699 		case EOpBitwiseAnd:          if(visit == PostVisit) emitBinary(sw::Shader::OPCODE_AND, result, left, right);       break;
700 		case EOpBitwiseXorAssign:    if(visit == PostVisit) emitAssign(sw::Shader::OPCODE_XOR, result, left, left, right); break;
701 		case EOpBitwiseXor:          if(visit == PostVisit) emitBinary(sw::Shader::OPCODE_XOR, result, left, right);       break;
702 		case EOpBitwiseOrAssign:     if(visit == PostVisit) emitAssign(sw::Shader::OPCODE_OR, result, left, left, right);  break;
703 		case EOpBitwiseOr:           if(visit == PostVisit) emitBinary(sw::Shader::OPCODE_OR, result, left, right);        break;
704 		case EOpEqual:
705 			if(visit == PostVisit)
706 			{
707 				emitBinary(sw::Shader::OPCODE_EQ, result, left, right);
708 
709 				for(int index = 1; index < left->totalRegisterCount(); index++)
710 				{
711 					Temporary equal(this);
712 					emit(sw::Shader::OPCODE_EQ, &equal, 0, left, index, right, index);
713 					emit(sw::Shader::OPCODE_AND, result, result, &equal);
714 				}
715 			}
716 			break;
717 		case EOpNotEqual:
718 			if(visit == PostVisit)
719 			{
720 				emitBinary(sw::Shader::OPCODE_NE, result, left, right);
721 
722 				for(int index = 1; index < left->totalRegisterCount(); index++)
723 				{
724 					Temporary notEqual(this);
725 					emit(sw::Shader::OPCODE_NE, &notEqual, 0, left, index, right, index);
726 					emit(sw::Shader::OPCODE_OR, result, result, &notEqual);
727 				}
728 			}
729 			break;
730 		case EOpLessThan:                if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_LT, result, left, right); break;
731 		case EOpGreaterThan:             if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_GT, result, left, right); break;
732 		case EOpLessThanEqual:           if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_LE, result, left, right); break;
733 		case EOpGreaterThanEqual:        if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_GE, result, left, right); break;
734 		case EOpVectorTimesScalarAssign: if(visit == PostVisit) emitAssign(getOpcode(sw::Shader::OPCODE_MUL, left), result, left, left, right); break;
735 		case EOpVectorTimesScalar:       if(visit == PostVisit) emit(getOpcode(sw::Shader::OPCODE_MUL, left), result, left, right); break;
736 		case EOpMatrixTimesScalar:
737 			if(visit == PostVisit)
738 			{
739 				if(left->isMatrix())
740 				{
741 					for(int i = 0; i < leftType.getNominalSize(); i++)
742 					{
743 						emit(sw::Shader::OPCODE_MUL, result, i, left, i, right, 0);
744 					}
745 				}
746 				else if(right->isMatrix())
747 				{
748 					for(int i = 0; i < rightType.getNominalSize(); i++)
749 					{
750 						emit(sw::Shader::OPCODE_MUL, result, i, left, 0, right, i);
751 					}
752 				}
753 				else UNREACHABLE(0);
754 			}
755 			break;
756 		case EOpVectorTimesMatrix:
757 			if(visit == PostVisit)
758 			{
759 				sw::Shader::Opcode dpOpcode = sw::Shader::OPCODE_DP(leftType.getNominalSize());
760 
761 				int size = rightType.getNominalSize();
762 				for(int i = 0; i < size; i++)
763 				{
764 					Instruction *dot = emit(dpOpcode, result, 0, left, 0, right, i);
765 					dot->dst.mask = 1 << i;
766 				}
767 			}
768 			break;
769 		case EOpMatrixTimesVector:
770 			if(visit == PostVisit)
771 			{
772 				Instruction *mul = emit(sw::Shader::OPCODE_MUL, result, left, right);
773 				mul->src[1].swizzle = 0x00;
774 
775 				int size = rightType.getNominalSize();
776 				for(int i = 1; i < size; i++)
777 				{
778 					Instruction *mad = emit(sw::Shader::OPCODE_MAD, result, 0, left, i, right, 0, result);
779 					mad->src[1].swizzle = i * 0x55;
780 				}
781 			}
782 			break;
783 		case EOpMatrixTimesMatrix:
784 			if(visit == PostVisit)
785 			{
786 				int dim = leftType.getNominalSize();
787 
788 				int size = rightType.getNominalSize();
789 				for(int i = 0; i < size; i++)
790 				{
791 					Instruction *mul = emit(sw::Shader::OPCODE_MUL, result, i, left, 0, right, i);
792 					mul->src[1].swizzle = 0x00;
793 
794 					for(int j = 1; j < dim; j++)
795 					{
796 						Instruction *mad = emit(sw::Shader::OPCODE_MAD, result, i, left, j, right, i, result, i);
797 						mad->src[1].swizzle = j * 0x55;
798 					}
799 				}
800 			}
801 			break;
802 		case EOpLogicalOr:
803 			if(trivial(right, 6))
804 			{
805 				if(visit == PostVisit)
806 				{
807 					emit(sw::Shader::OPCODE_OR, result, left, right);
808 				}
809 			}
810 			else   // Short-circuit evaluation
811 			{
812 				if(visit == InVisit)
813 				{
814 					emit(sw::Shader::OPCODE_MOV, result, left);
815 					Instruction *ifnot = emit(sw::Shader::OPCODE_IF, 0, result);
816 					ifnot->src[0].modifier = sw::Shader::MODIFIER_NOT;
817 				}
818 				else if(visit == PostVisit)
819 				{
820 					emit(sw::Shader::OPCODE_MOV, result, right);
821 					emit(sw::Shader::OPCODE_ENDIF);
822 				}
823 			}
824 			break;
825 		case EOpLogicalXor:        if(visit == PostVisit) emit(sw::Shader::OPCODE_XOR, result, left, right); break;
826 		case EOpLogicalAnd:
827 			if(trivial(right, 6))
828 			{
829 				if(visit == PostVisit)
830 				{
831 					emit(sw::Shader::OPCODE_AND, result, left, right);
832 				}
833 			}
834 			else   // Short-circuit evaluation
835 			{
836 				if(visit == InVisit)
837 				{
838 					emit(sw::Shader::OPCODE_MOV, result, left);
839 					emit(sw::Shader::OPCODE_IF, 0, result);
840 				}
841 				else if(visit == PostVisit)
842 				{
843 					emit(sw::Shader::OPCODE_MOV, result, right);
844 					emit(sw::Shader::OPCODE_ENDIF);
845 				}
846 			}
847 			break;
848 		default: UNREACHABLE(node->getOp());
849 		}
850 
851 		return true;
852 	}
853 
emitDeterminant(TIntermTyped * result,TIntermTyped * arg,int size,int col,int row,int outCol,int outRow)854 	void OutputASM::emitDeterminant(TIntermTyped *result, TIntermTyped *arg, int size, int col, int row, int outCol, int outRow)
855 	{
856 		switch(size)
857 		{
858 		case 1: // Used for cofactor computation only
859 			{
860 				// For a 2x2 matrix, the cofactor is simply a transposed move or negate
861 				bool isMov = (row == col);
862 				sw::Shader::Opcode op = isMov ? sw::Shader::OPCODE_MOV : sw::Shader::OPCODE_NEG;
863 				Instruction *mov = emit(op, result, outCol, arg, isMov ? 1 - row : row);
864 				mov->src[0].swizzle = 0x55 * (isMov ? 1 - col : col);
865 				mov->dst.mask = 1 << outRow;
866 			}
867 			break;
868 		case 2:
869 			{
870 				static const unsigned int swizzle[3] = { 0x99, 0x88, 0x44 }; // xy?? : yzyz, xzxz, xyxy
871 
872 				bool isCofactor = (col >= 0) && (row >= 0);
873 				int col0 = (isCofactor && (col <= 0)) ? 1 : 0;
874 				int col1 = (isCofactor && (col <= 1)) ? 2 : 1;
875 				bool negate = isCofactor && ((col & 0x01) ^ (row & 0x01));
876 
877 				Instruction *det = emit(sw::Shader::OPCODE_DET2, result, outCol, arg, negate ? col1 : col0, arg, negate ? col0 : col1);
878 				det->src[0].swizzle = det->src[1].swizzle = swizzle[isCofactor ? row : 2];
879 				det->dst.mask = 1 << outRow;
880 			}
881 			break;
882 		case 3:
883 			{
884 				static const unsigned int swizzle[4] = { 0xF9, 0xF8, 0xF4, 0xE4 }; // xyz? : yzww, xzww, xyww, xyzw
885 
886 				bool isCofactor = (col >= 0) && (row >= 0);
887 				int col0 = (isCofactor && (col <= 0)) ? 1 : 0;
888 				int col1 = (isCofactor && (col <= 1)) ? 2 : 1;
889 				int col2 = (isCofactor && (col <= 2)) ? 3 : 2;
890 				bool negate = isCofactor && ((col & 0x01) ^ (row & 0x01));
891 
892 				Instruction *det = emit(sw::Shader::OPCODE_DET3, result, outCol, arg, col0, arg, negate ? col2 : col1, arg, negate ? col1 : col2);
893 				det->src[0].swizzle = det->src[1].swizzle = det->src[2].swizzle = swizzle[isCofactor ? row : 3];
894 				det->dst.mask = 1 << outRow;
895 			}
896 			break;
897 		case 4:
898 			{
899 				Instruction *det = emit(sw::Shader::OPCODE_DET4, result, outCol, arg, 0, arg, 1, arg, 2, arg, 3);
900 				det->dst.mask = 1 << outRow;
901 			}
902 			break;
903 		default:
904 			UNREACHABLE(size);
905 			break;
906 		}
907 	}
908 
visitUnary(Visit visit,TIntermUnary * node)909 	bool OutputASM::visitUnary(Visit visit, TIntermUnary *node)
910 	{
911 		if(currentScope != emitScope)
912 		{
913 			return false;
914 		}
915 
916 		TIntermTyped *result = node;
917 		TIntermTyped *arg = node->getOperand();
918 		TBasicType basicType = arg->getType().getBasicType();
919 
920 		union
921 		{
922 			float f;
923 			int i;
924 		} one_value;
925 
926 		if(basicType == EbtInt || basicType == EbtUInt)
927 		{
928 			one_value.i = 1;
929 		}
930 		else
931 		{
932 			one_value.f = 1.0f;
933 		}
934 
935 		Constant one(one_value.f, one_value.f, one_value.f, one_value.f);
936 		Constant rad(1.74532925e-2f, 1.74532925e-2f, 1.74532925e-2f, 1.74532925e-2f);
937 		Constant deg(5.72957795e+1f, 5.72957795e+1f, 5.72957795e+1f, 5.72957795e+1f);
938 
939 		switch(node->getOp())
940 		{
941 		case EOpNegative:
942 			if(visit == PostVisit)
943 			{
944 				sw::Shader::Opcode negOpcode = getOpcode(sw::Shader::OPCODE_NEG, arg);
945 				for(int index = 0; index < arg->totalRegisterCount(); index++)
946 				{
947 					emit(negOpcode, result, index, arg, index);
948 				}
949 			}
950 			break;
951 		case EOpVectorLogicalNot: if(visit == PostVisit) emit(sw::Shader::OPCODE_NOT, result, arg); break;
952 		case EOpLogicalNot:       if(visit == PostVisit) emit(sw::Shader::OPCODE_NOT, result, arg); break;
953 		case EOpBitwiseNot:       if(visit == PostVisit) emit(sw::Shader::OPCODE_NOT, result, arg); break;
954 		case EOpPostIncrement:
955 			if(visit == PostVisit)
956 			{
957 				copy(result, arg);
958 
959 				sw::Shader::Opcode addOpcode = getOpcode(sw::Shader::OPCODE_ADD, arg);
960 				for(int index = 0; index < arg->totalRegisterCount(); index++)
961 				{
962 					emit(addOpcode, arg, index, arg, index, &one);
963 				}
964 
965 				assignLvalue(arg, arg);
966 			}
967 			break;
968 		case EOpPostDecrement:
969 			if(visit == PostVisit)
970 			{
971 				copy(result, arg);
972 
973 				sw::Shader::Opcode subOpcode = getOpcode(sw::Shader::OPCODE_SUB, arg);
974 				for(int index = 0; index < arg->totalRegisterCount(); index++)
975 				{
976 					emit(subOpcode, arg, index, arg, index, &one);
977 				}
978 
979 				assignLvalue(arg, arg);
980 			}
981 			break;
982 		case EOpPreIncrement:
983 			if(visit == PostVisit)
984 			{
985 				sw::Shader::Opcode addOpcode = getOpcode(sw::Shader::OPCODE_ADD, arg);
986 				for(int index = 0; index < arg->totalRegisterCount(); index++)
987 				{
988 					emit(addOpcode, result, index, arg, index, &one);
989 				}
990 
991 				assignLvalue(arg, result);
992 			}
993 			break;
994 		case EOpPreDecrement:
995 			if(visit == PostVisit)
996 			{
997 				sw::Shader::Opcode subOpcode = getOpcode(sw::Shader::OPCODE_SUB, arg);
998 				for(int index = 0; index < arg->totalRegisterCount(); index++)
999 				{
1000 					emit(subOpcode, result, index, arg, index, &one);
1001 				}
1002 
1003 				assignLvalue(arg, result);
1004 			}
1005 			break;
1006 		case EOpRadians:          if(visit == PostVisit) emit(sw::Shader::OPCODE_MUL, result, arg, &rad); break;
1007 		case EOpDegrees:          if(visit == PostVisit) emit(sw::Shader::OPCODE_MUL, result, arg, &deg); break;
1008 		case EOpSin:              if(visit == PostVisit) emit(sw::Shader::OPCODE_SIN, result, arg); break;
1009 		case EOpCos:              if(visit == PostVisit) emit(sw::Shader::OPCODE_COS, result, arg); break;
1010 		case EOpTan:              if(visit == PostVisit) emit(sw::Shader::OPCODE_TAN, result, arg); break;
1011 		case EOpAsin:             if(visit == PostVisit) emit(sw::Shader::OPCODE_ASIN, result, arg); break;
1012 		case EOpAcos:             if(visit == PostVisit) emit(sw::Shader::OPCODE_ACOS, result, arg); break;
1013 		case EOpAtan:             if(visit == PostVisit) emit(sw::Shader::OPCODE_ATAN, result, arg); break;
1014 		case EOpSinh:             if(visit == PostVisit) emit(sw::Shader::OPCODE_SINH, result, arg); break;
1015 		case EOpCosh:             if(visit == PostVisit) emit(sw::Shader::OPCODE_COSH, result, arg); break;
1016 		case EOpTanh:             if(visit == PostVisit) emit(sw::Shader::OPCODE_TANH, result, arg); break;
1017 		case EOpAsinh:            if(visit == PostVisit) emit(sw::Shader::OPCODE_ASINH, result, arg); break;
1018 		case EOpAcosh:            if(visit == PostVisit) emit(sw::Shader::OPCODE_ACOSH, result, arg); break;
1019 		case EOpAtanh:            if(visit == PostVisit) emit(sw::Shader::OPCODE_ATANH, result, arg); break;
1020 		case EOpExp:              if(visit == PostVisit) emit(sw::Shader::OPCODE_EXP, result, arg); break;
1021 		case EOpLog:              if(visit == PostVisit) emit(sw::Shader::OPCODE_LOG, result, arg); break;
1022 		case EOpExp2:             if(visit == PostVisit) emit(sw::Shader::OPCODE_EXP2, result, arg); break;
1023 		case EOpLog2:             if(visit == PostVisit) emit(sw::Shader::OPCODE_LOG2, result, arg); break;
1024 		case EOpSqrt:             if(visit == PostVisit) emit(sw::Shader::OPCODE_SQRT, result, arg); break;
1025 		case EOpInverseSqrt:      if(visit == PostVisit) emit(sw::Shader::OPCODE_RSQ, result, arg); break;
1026 		case EOpAbs:              if(visit == PostVisit) emit(getOpcode(sw::Shader::OPCODE_ABS, result), result, arg); break;
1027 		case EOpSign:             if(visit == PostVisit) emit(getOpcode(sw::Shader::OPCODE_SGN, result), result, arg); break;
1028 		case EOpFloor:            if(visit == PostVisit) emit(sw::Shader::OPCODE_FLOOR, result, arg); break;
1029 		case EOpTrunc:            if(visit == PostVisit) emit(sw::Shader::OPCODE_TRUNC, result, arg); break;
1030 		case EOpRound:            if(visit == PostVisit) emit(sw::Shader::OPCODE_ROUND, result, arg); break;
1031 		case EOpRoundEven:        if(visit == PostVisit) emit(sw::Shader::OPCODE_ROUNDEVEN, result, arg); break;
1032 		case EOpCeil:             if(visit == PostVisit) emit(sw::Shader::OPCODE_CEIL, result, arg, result); break;
1033 		case EOpFract:            if(visit == PostVisit) emit(sw::Shader::OPCODE_FRC, result, arg); break;
1034 		case EOpIsNan:            if(visit == PostVisit) emit(sw::Shader::OPCODE_ISNAN, result, arg); break;
1035 		case EOpIsInf:            if(visit == PostVisit) emit(sw::Shader::OPCODE_ISINF, result, arg); break;
1036 		case EOpLength:           if(visit == PostVisit) emit(sw::Shader::OPCODE_LEN(dim(arg)), result, arg); break;
1037 		case EOpNormalize:        if(visit == PostVisit) emit(sw::Shader::OPCODE_NRM(dim(arg)), result, arg); break;
1038 		case EOpDFdx:             if(visit == PostVisit) emit(sw::Shader::OPCODE_DFDX, result, arg); break;
1039 		case EOpDFdy:             if(visit == PostVisit) emit(sw::Shader::OPCODE_DFDY, result, arg); break;
1040 		case EOpFwidth:           if(visit == PostVisit) emit(sw::Shader::OPCODE_FWIDTH, result, arg); break;
1041 		case EOpAny:              if(visit == PostVisit) emit(sw::Shader::OPCODE_ANY, result, arg); break;
1042 		case EOpAll:              if(visit == PostVisit) emit(sw::Shader::OPCODE_ALL, result, arg); break;
1043 		case EOpFloatBitsToInt:   if(visit == PostVisit) emit(sw::Shader::OPCODE_FLOATBITSTOINT, result, arg); break;
1044 		case EOpFloatBitsToUint:  if(visit == PostVisit) emit(sw::Shader::OPCODE_FLOATBITSTOUINT, result, arg); break;
1045 		case EOpIntBitsToFloat:   if(visit == PostVisit) emit(sw::Shader::OPCODE_INTBITSTOFLOAT, result, arg); break;
1046 		case EOpUintBitsToFloat:  if(visit == PostVisit) emit(sw::Shader::OPCODE_UINTBITSTOFLOAT, result, arg); break;
1047 		case EOpPackSnorm2x16:    if(visit == PostVisit) emit(sw::Shader::OPCODE_PACKSNORM2x16, result, arg); break;
1048 		case EOpPackUnorm2x16:    if(visit == PostVisit) emit(sw::Shader::OPCODE_PACKUNORM2x16, result, arg); break;
1049 		case EOpPackHalf2x16:     if(visit == PostVisit) emit(sw::Shader::OPCODE_PACKHALF2x16, result, arg); break;
1050 		case EOpUnpackSnorm2x16:  if(visit == PostVisit) emit(sw::Shader::OPCODE_UNPACKSNORM2x16, result, arg); break;
1051 		case EOpUnpackUnorm2x16:  if(visit == PostVisit) emit(sw::Shader::OPCODE_UNPACKUNORM2x16, result, arg); break;
1052 		case EOpUnpackHalf2x16:   if(visit == PostVisit) emit(sw::Shader::OPCODE_UNPACKHALF2x16, result, arg); break;
1053 		case EOpTranspose:
1054 			if(visit == PostVisit)
1055 			{
1056 				int numCols = arg->getNominalSize();
1057 				int numRows = arg->getSecondarySize();
1058 				for(int i = 0; i < numCols; ++i)
1059 				{
1060 					for(int j = 0; j < numRows; ++j)
1061 					{
1062 						Instruction *mov = emit(sw::Shader::OPCODE_MOV, result, j, arg, i);
1063 						mov->src[0].swizzle = 0x55 * j;
1064 						mov->dst.mask = 1 << i;
1065 					}
1066 				}
1067 			}
1068 			break;
1069 		case EOpDeterminant:
1070 			if(visit == PostVisit)
1071 			{
1072 				int size = arg->getNominalSize();
1073 				ASSERT(size == arg->getSecondarySize());
1074 
1075 				emitDeterminant(result, arg, size);
1076 			}
1077 			break;
1078 		case EOpInverse:
1079 			if(visit == PostVisit)
1080 			{
1081 				int size = arg->getNominalSize();
1082 				ASSERT(size == arg->getSecondarySize());
1083 
1084 				// Compute transposed matrix of cofactors
1085 				for(int i = 0; i < size; ++i)
1086 				{
1087 					for(int j = 0; j < size; ++j)
1088 					{
1089 						// For a 2x2 matrix, the cofactor is simply a transposed move or negate
1090 						// For a 3x3 or 4x4 matrix, the cofactor is a transposed determinant
1091 						emitDeterminant(result, arg, size - 1, j, i, i, j);
1092 					}
1093 				}
1094 
1095 				// Compute 1 / determinant
1096 				Temporary invDet(this);
1097 				emitDeterminant(&invDet, arg, size);
1098 				Constant one(1.0f, 1.0f, 1.0f, 1.0f);
1099 				Instruction *div = emit(sw::Shader::OPCODE_DIV, &invDet, &one, &invDet);
1100 				div->src[1].swizzle = 0x00; // xxxx
1101 
1102 				// Divide transposed matrix of cofactors by determinant
1103 				for(int i = 0; i < size; ++i)
1104 				{
1105 					emit(sw::Shader::OPCODE_MUL, result, i, result, i, &invDet);
1106 				}
1107 			}
1108 			break;
1109 		default: UNREACHABLE(node->getOp());
1110 		}
1111 
1112 		return true;
1113 	}
1114 
visitAggregate(Visit visit,TIntermAggregate * node)1115 	bool OutputASM::visitAggregate(Visit visit, TIntermAggregate *node)
1116 	{
1117 		if(currentScope != emitScope && node->getOp() != EOpFunction && node->getOp() != EOpSequence)
1118 		{
1119 			return false;
1120 		}
1121 
1122 		Constant zero(0.0f, 0.0f, 0.0f, 0.0f);
1123 
1124 		TIntermTyped *result = node;
1125 		const TType &resultType = node->getType();
1126 		TIntermSequence &arg = node->getSequence();
1127 		size_t argumentCount = arg.size();
1128 
1129 		switch(node->getOp())
1130 		{
1131 		case EOpSequence:             break;
1132 		case EOpDeclaration:          break;
1133 		case EOpInvariantDeclaration: break;
1134 		case EOpPrototype:            break;
1135 		case EOpComma:
1136 			if(visit == PostVisit)
1137 			{
1138 				copy(result, arg[1]);
1139 			}
1140 			break;
1141 		case EOpFunction:
1142 			if(visit == PreVisit)
1143 			{
1144 				const TString &name = node->getName();
1145 
1146 				if(emitScope == FUNCTION)
1147 				{
1148 					if(functionArray.size() > 1)   // No need for a label when there's only main()
1149 					{
1150 						Instruction *label = emit(sw::Shader::OPCODE_LABEL);
1151 						label->dst.type = sw::Shader::PARAMETER_LABEL;
1152 
1153 						const Function *function = findFunction(name);
1154 						ASSERT(function);   // Should have been added during global pass
1155 						label->dst.index = function->label;
1156 						currentFunction = function->label;
1157 					}
1158 				}
1159 				else if(emitScope == GLOBAL)
1160 				{
1161 					if(name != "main(")
1162 					{
1163 						TIntermSequence &arguments = node->getSequence()[0]->getAsAggregate()->getSequence();
1164 						functionArray.push_back(Function(functionArray.size(), name, &arguments, node));
1165 					}
1166 				}
1167 				else UNREACHABLE(emitScope);
1168 
1169 				currentScope = FUNCTION;
1170 			}
1171 			else if(visit == PostVisit)
1172 			{
1173 				if(emitScope == FUNCTION)
1174 				{
1175 					if(functionArray.size() > 1)   // No need to return when there's only main()
1176 					{
1177 						emit(sw::Shader::OPCODE_RET);
1178 					}
1179 				}
1180 
1181 				currentScope = GLOBAL;
1182 			}
1183 			break;
1184 		case EOpFunctionCall:
1185 			if(visit == PostVisit)
1186 			{
1187 				if(node->isUserDefined())
1188 				{
1189 					const TString &name = node->getName();
1190 					const Function *function = findFunction(name);
1191 
1192 					if(!function)
1193 					{
1194 						mContext.error(node->getLine(), "function definition not found", name.c_str());
1195 						return false;
1196 					}
1197 
1198 					TIntermSequence &arguments = *function->arg;
1199 
1200 					for(size_t i = 0; i < argumentCount; i++)
1201 					{
1202 						TIntermTyped *in = arguments[i]->getAsTyped();
1203 
1204 						if(in->getQualifier() == EvqIn ||
1205 						   in->getQualifier() == EvqInOut ||
1206 						   in->getQualifier() == EvqConstReadOnly)
1207 						{
1208 							copy(in, arg[i]);
1209 						}
1210 					}
1211 
1212 					Instruction *call = emit(sw::Shader::OPCODE_CALL);
1213 					call->dst.type = sw::Shader::PARAMETER_LABEL;
1214 					call->dst.index = function->label;
1215 
1216 					if(function->ret && function->ret->getType().getBasicType() != EbtVoid)
1217 					{
1218 						copy(result, function->ret);
1219 					}
1220 
1221 					for(size_t i = 0; i < argumentCount; i++)
1222 					{
1223 						TIntermTyped *argument = arguments[i]->getAsTyped();
1224 						TIntermTyped *out = arg[i]->getAsTyped();
1225 
1226 						if(argument->getQualifier() == EvqOut ||
1227 						   argument->getQualifier() == EvqInOut)
1228 						{
1229 							assignLvalue(out, argument);
1230 						}
1231 					}
1232 				}
1233 				else
1234 				{
1235 					const TextureFunction textureFunction(node->getName());
1236 					TIntermTyped *t = arg[1]->getAsTyped();
1237 
1238 					Temporary coord(this);
1239 
1240 					if(textureFunction.proj)
1241 					{
1242 						Instruction *rcp = emit(sw::Shader::OPCODE_RCPX, &coord, arg[1]);
1243 						rcp->src[0].swizzle = 0x55 * (t->getNominalSize() - 1);
1244 						rcp->dst.mask = 0x7;
1245 
1246 						Instruction *mul = emit(sw::Shader::OPCODE_MUL, &coord, arg[1], &coord);
1247 						mul->dst.mask = 0x7;
1248 					}
1249 					else
1250 					{
1251 						emit(sw::Shader::OPCODE_MOV, &coord, arg[1]);
1252 					}
1253 
1254 					switch(textureFunction.method)
1255 					{
1256 					case TextureFunction::IMPLICIT:
1257 						{
1258 							TIntermNode* offset = textureFunction.offset ? arg[2] : 0;
1259 
1260 							if(argumentCount == 2 || (textureFunction.offset && argumentCount == 3))
1261 							{
1262 								emit(textureFunction.offset ? sw::Shader::OPCODE_TEXOFFSET : sw::Shader::OPCODE_TEX,
1263 								     result, &coord, arg[0], offset);
1264 							}
1265 							else if(argumentCount == 3 || (textureFunction.offset && argumentCount == 4))   // bias
1266 							{
1267 								Instruction *bias = emit(sw::Shader::OPCODE_MOV, &coord, arg[textureFunction.offset ? 3 : 2]);
1268 								bias->dst.mask = 0x8;
1269 
1270 								Instruction *tex = emit(textureFunction.offset ? sw::Shader::OPCODE_TEXOFFSET : sw::Shader::OPCODE_TEX,
1271 								                        result, &coord, arg[0], offset); // FIXME: Implement an efficient TEXLDB instruction
1272 								tex->bias = true;
1273 							}
1274 							else UNREACHABLE(argumentCount);
1275 						}
1276 						break;
1277 					case TextureFunction::LOD:
1278 						{
1279 							Instruction *lod = emit(sw::Shader::OPCODE_MOV, &coord, arg[2]);
1280 							lod->dst.mask = 0x8;
1281 
1282 							emit(textureFunction.offset ? sw::Shader::OPCODE_TEXLDLOFFSET : sw::Shader::OPCODE_TEXLDL,
1283 							     result, &coord, arg[0], textureFunction.offset ? arg[3] : nullptr);
1284 						}
1285 						break;
1286 					case TextureFunction::FETCH:
1287 						{
1288 							if(argumentCount == 3 || (textureFunction.offset && argumentCount == 4))
1289 							{
1290 								Instruction *lod = emit(sw::Shader::OPCODE_MOV, &coord, arg[2]);
1291 								lod->dst.mask = 0x8;
1292 
1293 								TIntermNode *offset = textureFunction.offset ? arg[3] : nullptr;
1294 
1295 								emit(textureFunction.offset ? sw::Shader::OPCODE_TEXELFETCHOFFSET : sw::Shader::OPCODE_TEXELFETCH,
1296 								     result, &coord, arg[0], offset);
1297 							}
1298 							else UNREACHABLE(argumentCount);
1299 						}
1300 						break;
1301 					case TextureFunction::GRAD:
1302 						{
1303 							if(argumentCount == 4 || (textureFunction.offset && argumentCount == 5))
1304 							{
1305 								TIntermNode *offset = textureFunction.offset ? arg[4] : nullptr;
1306 
1307 								emit(textureFunction.offset ? sw::Shader::OPCODE_TEXGRADOFFSET : sw::Shader::OPCODE_TEXGRAD,
1308 								     result, &coord, arg[0], arg[2], arg[3], offset);
1309 							}
1310 							else UNREACHABLE(argumentCount);
1311 						}
1312 						break;
1313 					case TextureFunction::SIZE:
1314 						emit(sw::Shader::OPCODE_TEXSIZE, result, arg[1], arg[0]);
1315 						break;
1316 					default:
1317 						UNREACHABLE(textureFunction.method);
1318 					}
1319 				}
1320 			}
1321 			break;
1322 		case EOpParameters:
1323 			break;
1324 		case EOpConstructFloat:
1325 		case EOpConstructVec2:
1326 		case EOpConstructVec3:
1327 		case EOpConstructVec4:
1328 		case EOpConstructBool:
1329 		case EOpConstructBVec2:
1330 		case EOpConstructBVec3:
1331 		case EOpConstructBVec4:
1332 		case EOpConstructInt:
1333 		case EOpConstructIVec2:
1334 		case EOpConstructIVec3:
1335 		case EOpConstructIVec4:
1336 		case EOpConstructUInt:
1337 		case EOpConstructUVec2:
1338 		case EOpConstructUVec3:
1339 		case EOpConstructUVec4:
1340 			if(visit == PostVisit)
1341 			{
1342 				int component = 0;
1343 				int arrayMaxIndex = result->isArray() ? result->getArraySize() - 1 : 0;
1344 				int arrayComponents = result->getType().getElementSize();
1345 				for(size_t i = 0; i < argumentCount; i++)
1346 				{
1347 					TIntermTyped *argi = arg[i]->getAsTyped();
1348 					int size = argi->getNominalSize();
1349 					int arrayIndex = std::min(component / arrayComponents, arrayMaxIndex);
1350 					int swizzle = component - (arrayIndex * arrayComponents);
1351 
1352 					if(!argi->isMatrix())
1353 					{
1354 						Instruction *mov = emitCast(result, arrayIndex, argi, 0);
1355 						mov->dst.mask = (0xF << swizzle) & 0xF;
1356 						mov->src[0].swizzle = readSwizzle(argi, size) << (swizzle * 2);
1357 
1358 						component += size;
1359 					}
1360 					else   // Matrix
1361 					{
1362 						int column = 0;
1363 
1364 						while(component < resultType.getNominalSize())
1365 						{
1366 							Instruction *mov = emitCast(result, arrayIndex, argi, column);
1367 							mov->dst.mask = (0xF << swizzle) & 0xF;
1368 							mov->src[0].swizzle = readSwizzle(argi, size) << (swizzle * 2);
1369 
1370 							column++;
1371 							component += size;
1372 						}
1373 					}
1374 				}
1375 			}
1376 			break;
1377 		case EOpConstructMat2:
1378 		case EOpConstructMat2x3:
1379 		case EOpConstructMat2x4:
1380 		case EOpConstructMat3x2:
1381 		case EOpConstructMat3:
1382 		case EOpConstructMat3x4:
1383 		case EOpConstructMat4x2:
1384 		case EOpConstructMat4x3:
1385 		case EOpConstructMat4:
1386 			if(visit == PostVisit)
1387 			{
1388 				TIntermTyped *arg0 = arg[0]->getAsTyped();
1389 				const int outCols = result->getNominalSize();
1390 				const int outRows = result->getSecondarySize();
1391 
1392 				if(arg0->isScalar() && arg.size() == 1)   // Construct scale matrix
1393 				{
1394 					for(int i = 0; i < outCols; i++)
1395 					{
1396 						emit(sw::Shader::OPCODE_MOV, result, i, &zero);
1397 						Instruction *mov = emitCast(result, i, arg0, 0);
1398 						mov->dst.mask = 1 << i;
1399 						ASSERT(mov->src[0].swizzle == 0x00);
1400 					}
1401 				}
1402 				else if(arg0->isMatrix())
1403 				{
1404 					int arraySize = result->isArray() ? result->getArraySize() : 1;
1405 
1406 					for(int n = 0; n < arraySize; n++)
1407 					{
1408 						TIntermTyped *argi = arg[n]->getAsTyped();
1409 						const int inCols = argi->getNominalSize();
1410 						const int inRows = argi->getSecondarySize();
1411 
1412 						for(int i = 0; i < outCols; i++)
1413 						{
1414 							if(i >= inCols || outRows > inRows)
1415 							{
1416 								// Initialize to identity matrix
1417 								Constant col((i == 0 ? 1.0f : 0.0f), (i == 1 ? 1.0f : 0.0f), (i == 2 ? 1.0f : 0.0f), (i == 3 ? 1.0f : 0.0f));
1418 								emitCast(result, i + n * outCols, &col, 0);
1419 							}
1420 
1421 							if(i < inCols)
1422 							{
1423 								Instruction *mov = emitCast(result, i + n * outCols, argi, i);
1424 								mov->dst.mask = 0xF >> (4 - inRows);
1425 							}
1426 						}
1427 					}
1428 				}
1429 				else
1430 				{
1431 					int column = 0;
1432 					int row = 0;
1433 
1434 					for(size_t i = 0; i < argumentCount; i++)
1435 					{
1436 						TIntermTyped *argi = arg[i]->getAsTyped();
1437 						int size = argi->getNominalSize();
1438 						int element = 0;
1439 
1440 						while(element < size)
1441 						{
1442 							Instruction *mov = emitCast(result, column, argi, 0);
1443 							mov->dst.mask = (0xF << row) & 0xF;
1444 							mov->src[0].swizzle = (readSwizzle(argi, size) << (row * 2)) + 0x55 * element;
1445 
1446 							int end = row + size - element;
1447 							column = end >= outRows ? column + 1 : column;
1448 							element = element + outRows - row;
1449 							row = end >= outRows ? 0 : end;
1450 						}
1451 					}
1452 				}
1453 			}
1454 			break;
1455 		case EOpConstructStruct:
1456 			if(visit == PostVisit)
1457 			{
1458 				int offset = 0;
1459 				for(size_t i = 0; i < argumentCount; i++)
1460 				{
1461 					TIntermTyped *argi = arg[i]->getAsTyped();
1462 					int size = argi->totalRegisterCount();
1463 
1464 					for(int index = 0; index < size; index++)
1465 					{
1466 						Instruction *mov = emit(sw::Shader::OPCODE_MOV, result, index + offset, argi, index);
1467 						mov->dst.mask = writeMask(result, offset + index);
1468 					}
1469 
1470 					offset += size;
1471 				}
1472 			}
1473 			break;
1474 		case EOpLessThan:         if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_LT, result, arg[0], arg[1]); break;
1475 		case EOpGreaterThan:      if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_GT, result, arg[0], arg[1]); break;
1476 		case EOpLessThanEqual:    if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_LE, result, arg[0], arg[1]); break;
1477 		case EOpGreaterThanEqual: if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_GE, result, arg[0], arg[1]); break;
1478 		case EOpVectorEqual:      if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_EQ, result, arg[0], arg[1]); break;
1479 		case EOpVectorNotEqual:   if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_NE, result, arg[0], arg[1]); break;
1480 		case EOpMod:              if(visit == PostVisit) emit(sw::Shader::OPCODE_MOD, result, arg[0], arg[1]); break;
1481 		case EOpModf:
1482 			if(visit == PostVisit)
1483 			{
1484 				TIntermTyped* arg1 = arg[1]->getAsTyped();
1485 				emit(sw::Shader::OPCODE_TRUNC, arg1, arg[0]);
1486 				assignLvalue(arg1, arg1);
1487 				emitBinary(sw::Shader::OPCODE_SUB, result, arg[0], arg1);
1488 			}
1489 			break;
1490 		case EOpPow:              if(visit == PostVisit) emit(sw::Shader::OPCODE_POW, result, arg[0], arg[1]); break;
1491 		case EOpAtan:             if(visit == PostVisit) emit(sw::Shader::OPCODE_ATAN2, result, arg[0], arg[1]); break;
1492 		case EOpMin:              if(visit == PostVisit) emit(getOpcode(sw::Shader::OPCODE_MIN, result), result, arg[0], arg[1]); break;
1493 		case EOpMax:              if(visit == PostVisit) emit(getOpcode(sw::Shader::OPCODE_MAX, result), result, arg[0], arg[1]); break;
1494 		case EOpClamp:
1495 			if(visit == PostVisit)
1496 			{
1497 				emit(getOpcode(sw::Shader::OPCODE_MAX, result), result, arg[0], arg[1]);
1498 				emit(getOpcode(sw::Shader::OPCODE_MIN, result), result, result, arg[2]);
1499 			}
1500 			break;
1501 		case EOpMix:         if(visit == PostVisit) emit(sw::Shader::OPCODE_LRP, result, arg[2], arg[1], arg[0]); break;
1502 		case EOpStep:        if(visit == PostVisit) emit(sw::Shader::OPCODE_STEP, result, arg[0], arg[1]); break;
1503 		case EOpSmoothStep:  if(visit == PostVisit) emit(sw::Shader::OPCODE_SMOOTH, result, arg[0], arg[1], arg[2]); break;
1504 		case EOpDistance:    if(visit == PostVisit) emit(sw::Shader::OPCODE_DIST(dim(arg[0])), result, arg[0], arg[1]); break;
1505 		case EOpDot:         if(visit == PostVisit) emit(sw::Shader::OPCODE_DP(dim(arg[0])), result, arg[0], arg[1]); break;
1506 		case EOpCross:       if(visit == PostVisit) emit(sw::Shader::OPCODE_CRS, result, arg[0], arg[1]); break;
1507 		case EOpFaceForward: if(visit == PostVisit) emit(sw::Shader::OPCODE_FORWARD(dim(arg[0])), result, arg[0], arg[1], arg[2]); break;
1508 		case EOpReflect:     if(visit == PostVisit) emit(sw::Shader::OPCODE_REFLECT(dim(arg[0])), result, arg[0], arg[1]); break;
1509 		case EOpRefract:     if(visit == PostVisit) emit(sw::Shader::OPCODE_REFRACT(dim(arg[0])), result, arg[0], arg[1], arg[2]); break;
1510 		case EOpMul:
1511 			if(visit == PostVisit)
1512 			{
1513 				TIntermTyped *arg0 = arg[0]->getAsTyped();
1514 				ASSERT((arg0->getNominalSize() == arg[1]->getAsTyped()->getNominalSize()) &&
1515 				       (arg0->getSecondarySize() == arg[1]->getAsTyped()->getSecondarySize()));
1516 
1517 				int size = arg0->getNominalSize();
1518 				for(int i = 0; i < size; i++)
1519 				{
1520 					emit(sw::Shader::OPCODE_MUL, result, i, arg[0], i, arg[1], i);
1521 				}
1522 			}
1523 			break;
1524 		case EOpOuterProduct:
1525 			if(visit == PostVisit)
1526 			{
1527 				for(int i = 0; i < dim(arg[1]); i++)
1528 				{
1529 					Instruction *mul = emit(sw::Shader::OPCODE_MUL, result, i, arg[0], 0, arg[1]);
1530 					mul->src[1].swizzle = 0x55 * i;
1531 				}
1532 			}
1533 			break;
1534 		default: UNREACHABLE(node->getOp());
1535 		}
1536 
1537 		return true;
1538 	}
1539 
visitSelection(Visit visit,TIntermSelection * node)1540 	bool OutputASM::visitSelection(Visit visit, TIntermSelection *node)
1541 	{
1542 		if(currentScope != emitScope)
1543 		{
1544 			return false;
1545 		}
1546 
1547 		TIntermTyped *condition = node->getCondition();
1548 		TIntermNode *trueBlock = node->getTrueBlock();
1549 		TIntermNode *falseBlock = node->getFalseBlock();
1550 		TIntermConstantUnion *constantCondition = condition->getAsConstantUnion();
1551 
1552 		condition->traverse(this);
1553 
1554 		if(node->usesTernaryOperator())
1555 		{
1556 			if(constantCondition)
1557 			{
1558 				bool trueCondition = constantCondition->getUnionArrayPointer()->getBConst();
1559 
1560 				if(trueCondition)
1561 				{
1562 					trueBlock->traverse(this);
1563 					copy(node, trueBlock);
1564 				}
1565 				else
1566 				{
1567 					falseBlock->traverse(this);
1568 					copy(node, falseBlock);
1569 				}
1570 			}
1571 			else if(trivial(node, 6))   // Fast to compute both potential results and no side effects
1572 			{
1573 				trueBlock->traverse(this);
1574 				falseBlock->traverse(this);
1575 				emit(sw::Shader::OPCODE_SELECT, node, condition, trueBlock, falseBlock);
1576 			}
1577 			else
1578 			{
1579 				emit(sw::Shader::OPCODE_IF, 0, condition);
1580 
1581 				if(trueBlock)
1582 				{
1583 					trueBlock->traverse(this);
1584 					copy(node, trueBlock);
1585 				}
1586 
1587 				if(falseBlock)
1588 				{
1589 					emit(sw::Shader::OPCODE_ELSE);
1590 					falseBlock->traverse(this);
1591 					copy(node, falseBlock);
1592 				}
1593 
1594 				emit(sw::Shader::OPCODE_ENDIF);
1595 			}
1596 		}
1597 		else  // if/else statement
1598 		{
1599 			if(constantCondition)
1600 			{
1601 				bool trueCondition = constantCondition->getUnionArrayPointer()->getBConst();
1602 
1603 				if(trueCondition)
1604 				{
1605 					if(trueBlock)
1606 					{
1607 						trueBlock->traverse(this);
1608 					}
1609 				}
1610 				else
1611 				{
1612 					if(falseBlock)
1613 					{
1614 						falseBlock->traverse(this);
1615 					}
1616 				}
1617 			}
1618 			else
1619 			{
1620 				emit(sw::Shader::OPCODE_IF, 0, condition);
1621 
1622 				if(trueBlock)
1623 				{
1624 					trueBlock->traverse(this);
1625 				}
1626 
1627 				if(falseBlock)
1628 				{
1629 					emit(sw::Shader::OPCODE_ELSE);
1630 					falseBlock->traverse(this);
1631 				}
1632 
1633 				emit(sw::Shader::OPCODE_ENDIF);
1634 			}
1635 		}
1636 
1637 		return false;
1638 	}
1639 
visitLoop(Visit visit,TIntermLoop * node)1640 	bool OutputASM::visitLoop(Visit visit, TIntermLoop *node)
1641 	{
1642 		if(currentScope != emitScope)
1643 		{
1644 			return false;
1645 		}
1646 
1647 		unsigned int iterations = loopCount(node);
1648 
1649 		if(iterations == 0)
1650 		{
1651 			return false;
1652 		}
1653 
1654 		bool unroll = (iterations <= 4);
1655 
1656 		if(unroll)
1657 		{
1658 			LoopUnrollable loopUnrollable;
1659 			unroll = loopUnrollable.traverse(node);
1660 		}
1661 
1662 		TIntermNode *init = node->getInit();
1663 		TIntermTyped *condition = node->getCondition();
1664 		TIntermTyped *expression = node->getExpression();
1665 		TIntermNode *body = node->getBody();
1666 		Constant True(true);
1667 
1668 		if(node->getType() == ELoopDoWhile)
1669 		{
1670 			Temporary iterate(this);
1671 			emit(sw::Shader::OPCODE_MOV, &iterate, &True);
1672 
1673 			emit(sw::Shader::OPCODE_WHILE, 0, &iterate);   // FIXME: Implement real do-while
1674 
1675 			if(body)
1676 			{
1677 				body->traverse(this);
1678 			}
1679 
1680 			emit(sw::Shader::OPCODE_TEST);
1681 
1682 			condition->traverse(this);
1683 			emit(sw::Shader::OPCODE_MOV, &iterate, condition);
1684 
1685 			emit(sw::Shader::OPCODE_ENDWHILE);
1686 		}
1687 		else
1688 		{
1689 			if(init)
1690 			{
1691 				init->traverse(this);
1692 			}
1693 
1694 			if(unroll)
1695 			{
1696 				for(unsigned int i = 0; i < iterations; i++)
1697 				{
1698 				//	condition->traverse(this);   // Condition could contain statements, but not in an unrollable loop
1699 
1700 					if(body)
1701 					{
1702 						body->traverse(this);
1703 					}
1704 
1705 					if(expression)
1706 					{
1707 						expression->traverse(this);
1708 					}
1709 				}
1710 			}
1711 			else
1712 			{
1713 				if(condition)
1714 				{
1715 					condition->traverse(this);
1716 				}
1717 				else
1718 				{
1719 					condition = &True;
1720 				}
1721 
1722 				emit(sw::Shader::OPCODE_WHILE, 0, condition);
1723 
1724 				if(body)
1725 				{
1726 					body->traverse(this);
1727 				}
1728 
1729 				emit(sw::Shader::OPCODE_TEST);
1730 
1731 				if(expression)
1732 				{
1733 					expression->traverse(this);
1734 				}
1735 
1736 				if(condition)
1737 				{
1738 					condition->traverse(this);
1739 				}
1740 
1741 				emit(sw::Shader::OPCODE_ENDWHILE);
1742 			}
1743 		}
1744 
1745 		return false;
1746 	}
1747 
visitBranch(Visit visit,TIntermBranch * node)1748 	bool OutputASM::visitBranch(Visit visit, TIntermBranch *node)
1749 	{
1750 		if(currentScope != emitScope)
1751 		{
1752 			return false;
1753 		}
1754 
1755 		switch(node->getFlowOp())
1756 		{
1757 		case EOpKill:      if(visit == PostVisit) emit(sw::Shader::OPCODE_DISCARD);  break;
1758 		case EOpBreak:     if(visit == PostVisit) emit(sw::Shader::OPCODE_BREAK);    break;
1759 		case EOpContinue:  if(visit == PostVisit) emit(sw::Shader::OPCODE_CONTINUE); break;
1760 		case EOpReturn:
1761 			if(visit == PostVisit)
1762 			{
1763 				TIntermTyped *value = node->getExpression();
1764 
1765 				if(value)
1766 				{
1767 					copy(functionArray[currentFunction].ret, value);
1768 				}
1769 
1770 				emit(sw::Shader::OPCODE_LEAVE);
1771 			}
1772 			break;
1773 		default: UNREACHABLE(node->getFlowOp());
1774 		}
1775 
1776 		return true;
1777 	}
1778 
visitSwitch(Visit visit,TIntermSwitch * node)1779 	bool OutputASM::visitSwitch(Visit visit, TIntermSwitch *node)
1780 	{
1781 		if(currentScope != emitScope)
1782 		{
1783 			return false;
1784 		}
1785 
1786 		TIntermTyped* switchValue = node->getInit();
1787 		TIntermAggregate* opList = node->getStatementList();
1788 
1789 		if(!switchValue || !opList)
1790 		{
1791 			return false;
1792 		}
1793 
1794 		switchValue->traverse(this);
1795 
1796 		emit(sw::Shader::OPCODE_SWITCH);
1797 
1798 		TIntermSequence& sequence = opList->getSequence();
1799 		TIntermSequence::iterator it = sequence.begin();
1800 		TIntermSequence::iterator defaultIt = sequence.end();
1801 		int nbCases = 0;
1802 		for(; it != sequence.end(); ++it)
1803 		{
1804 			TIntermCase* currentCase = (*it)->getAsCaseNode();
1805 			if(currentCase)
1806 			{
1807 				TIntermSequence::iterator caseIt = it;
1808 
1809 				TIntermTyped* condition = currentCase->getCondition();
1810 				if(condition) // non default case
1811 				{
1812 					if(nbCases != 0)
1813 					{
1814 						emit(sw::Shader::OPCODE_ELSE);
1815 					}
1816 
1817 					condition->traverse(this);
1818 					Temporary result(this);
1819 					emitBinary(sw::Shader::OPCODE_EQ, &result, switchValue, condition);
1820 					emit(sw::Shader::OPCODE_IF, 0, &result);
1821 					nbCases++;
1822 
1823 					for(++caseIt; caseIt != sequence.end(); ++caseIt)
1824 					{
1825 						(*caseIt)->traverse(this);
1826 						if((*caseIt)->getAsBranchNode()) // Kill, Break, Continue or Return
1827 						{
1828 							break;
1829 						}
1830 					}
1831 				}
1832 				else
1833 				{
1834 					defaultIt = it; // The default case might not be the last case, keep it for last
1835 				}
1836 			}
1837 		}
1838 
1839 		// If there's a default case, traverse it here
1840 		if(defaultIt != sequence.end())
1841 		{
1842 			emit(sw::Shader::OPCODE_ELSE);
1843 			for(++defaultIt; defaultIt != sequence.end(); ++defaultIt)
1844 			{
1845 				(*defaultIt)->traverse(this);
1846 				if((*defaultIt)->getAsBranchNode()) // Kill, Break, Continue or Return
1847 				{
1848 					break;
1849 				}
1850 			}
1851 		}
1852 
1853 		for(int i = 0; i < nbCases; ++i)
1854 		{
1855 			emit(sw::Shader::OPCODE_ENDIF);
1856 		}
1857 
1858 		emit(sw::Shader::OPCODE_ENDSWITCH);
1859 
1860 		return false;
1861 	}
1862 
emit(sw::Shader::Opcode op,TIntermTyped * dst,TIntermNode * src0,TIntermNode * src1,TIntermNode * src2,TIntermNode * src3,TIntermNode * src4)1863 	Instruction *OutputASM::emit(sw::Shader::Opcode op, TIntermTyped *dst, TIntermNode *src0, TIntermNode *src1, TIntermNode *src2, TIntermNode *src3, TIntermNode *src4)
1864 	{
1865 		return emit(op, dst, 0, src0, 0, src1, 0, src2, 0, src3, 0, src4, 0);
1866 	}
1867 
emit(sw::Shader::Opcode op,TIntermTyped * dst,int dstIndex,TIntermNode * src0,int index0,TIntermNode * src1,int index1,TIntermNode * src2,int index2,TIntermNode * src3,int index3,TIntermNode * src4,int index4)1868 	Instruction *OutputASM::emit(sw::Shader::Opcode op, TIntermTyped *dst, int dstIndex, TIntermNode *src0, int index0, TIntermNode *src1, int index1,
1869 	                             TIntermNode *src2, int index2, TIntermNode *src3, int index3, TIntermNode *src4, int index4)
1870 	{
1871 		Instruction *instruction = new Instruction(op);
1872 
1873 		if(dst)
1874 		{
1875 			instruction->dst.type = registerType(dst);
1876 			instruction->dst.index = registerIndex(dst) + dstIndex;
1877 			instruction->dst.mask = writeMask(dst);
1878 			instruction->dst.integer = (dst->getBasicType() == EbtInt);
1879 		}
1880 
1881 		argument(instruction->src[0], src0, index0);
1882 		argument(instruction->src[1], src1, index1);
1883 		argument(instruction->src[2], src2, index2);
1884 		argument(instruction->src[3], src3, index3);
1885 		argument(instruction->src[4], src4, index4);
1886 
1887 		shader->append(instruction);
1888 
1889 		return instruction;
1890 	}
1891 
emitCast(TIntermTyped * dst,TIntermTyped * src)1892 	Instruction *OutputASM::emitCast(TIntermTyped *dst, TIntermTyped *src)
1893 	{
1894 		return emitCast(dst, 0, src, 0);
1895 	}
1896 
emitCast(TIntermTyped * dst,int dstIndex,TIntermTyped * src,int srcIndex)1897 	Instruction *OutputASM::emitCast(TIntermTyped *dst, int dstIndex, TIntermTyped *src, int srcIndex)
1898 	{
1899 		switch(src->getBasicType())
1900 		{
1901 		case EbtBool:
1902 			switch(dst->getBasicType())
1903 			{
1904 			case EbtInt:   return emit(sw::Shader::OPCODE_B2I, dst, dstIndex, src, srcIndex);
1905 			case EbtUInt:  return emit(sw::Shader::OPCODE_B2I, dst, dstIndex, src, srcIndex);
1906 			case EbtFloat: return emit(sw::Shader::OPCODE_B2F, dst, dstIndex, src, srcIndex);
1907 			default:       break;
1908 			}
1909 			break;
1910 		case EbtInt:
1911 			switch(dst->getBasicType())
1912 			{
1913 			case EbtBool:  return emit(sw::Shader::OPCODE_I2B, dst, dstIndex, src, srcIndex);
1914 			case EbtFloat: return emit(sw::Shader::OPCODE_I2F, dst, dstIndex, src, srcIndex);
1915 			default:       break;
1916 			}
1917 			break;
1918 		case EbtUInt:
1919 			switch(dst->getBasicType())
1920 			{
1921 			case EbtBool:  return emit(sw::Shader::OPCODE_I2B, dst, dstIndex, src, srcIndex);
1922 			case EbtFloat: return emit(sw::Shader::OPCODE_U2F, dst, dstIndex, src, srcIndex);
1923 			default:       break;
1924 			}
1925 			break;
1926 		case EbtFloat:
1927 			switch(dst->getBasicType())
1928 			{
1929 			case EbtBool: return emit(sw::Shader::OPCODE_F2B, dst, dstIndex, src, srcIndex);
1930 			case EbtInt:  return emit(sw::Shader::OPCODE_F2I, dst, dstIndex, src, srcIndex);
1931 			case EbtUInt: return emit(sw::Shader::OPCODE_F2U, dst, dstIndex, src, srcIndex);
1932 			default:      break;
1933 			}
1934 			break;
1935 		default:
1936 			break;
1937 		}
1938 
1939 		ASSERT((src->getBasicType() == dst->getBasicType()) ||
1940 		      ((src->getBasicType() == EbtInt) && (dst->getBasicType() == EbtUInt)) ||
1941 		      ((src->getBasicType() == EbtUInt) && (dst->getBasicType() == EbtInt)));
1942 
1943 		return emit(sw::Shader::OPCODE_MOV, dst, dstIndex, src, srcIndex);
1944 	}
1945 
emitBinary(sw::Shader::Opcode op,TIntermTyped * dst,TIntermNode * src0,TIntermNode * src1,TIntermNode * src2)1946 	void OutputASM::emitBinary(sw::Shader::Opcode op, TIntermTyped *dst, TIntermNode *src0, TIntermNode *src1, TIntermNode *src2)
1947 	{
1948 		for(int index = 0; index < dst->elementRegisterCount(); index++)
1949 		{
1950 			emit(op, dst, index, src0, index, src1, index, src2, index);
1951 		}
1952 	}
1953 
emitAssign(sw::Shader::Opcode op,TIntermTyped * result,TIntermTyped * lhs,TIntermTyped * src0,TIntermTyped * src1)1954 	void OutputASM::emitAssign(sw::Shader::Opcode op, TIntermTyped *result, TIntermTyped *lhs, TIntermTyped *src0, TIntermTyped *src1)
1955 	{
1956 		emitBinary(op, result, src0, src1);
1957 		assignLvalue(lhs, result);
1958 	}
1959 
emitCmp(sw::Shader::Control cmpOp,TIntermTyped * dst,TIntermNode * left,TIntermNode * right,int index)1960 	void OutputASM::emitCmp(sw::Shader::Control cmpOp, TIntermTyped *dst, TIntermNode *left, TIntermNode *right, int index)
1961 	{
1962 		sw::Shader::Opcode opcode;
1963 		switch(left->getAsTyped()->getBasicType())
1964 		{
1965 		case EbtBool:
1966 		case EbtInt:
1967 			opcode = sw::Shader::OPCODE_ICMP;
1968 			break;
1969 		case EbtUInt:
1970 			opcode = sw::Shader::OPCODE_UCMP;
1971 			break;
1972 		default:
1973 			opcode = sw::Shader::OPCODE_CMP;
1974 			break;
1975 		}
1976 
1977 		Instruction *cmp = emit(opcode, dst, 0, left, index, right, index);
1978 		cmp->control = cmpOp;
1979 	}
1980 
componentCount(const TType & type,int registers)1981 	int componentCount(const TType &type, int registers)
1982 	{
1983 		if(registers == 0)
1984 		{
1985 			return 0;
1986 		}
1987 
1988 		if(type.isArray() && registers >= type.elementRegisterCount())
1989 		{
1990 			int index = registers / type.elementRegisterCount();
1991 			registers -= index * type.elementRegisterCount();
1992 			return index * type.getElementSize() + componentCount(type, registers);
1993 		}
1994 
1995 		if(type.isStruct() || type.isInterfaceBlock())
1996 		{
1997 			const TFieldList& fields = type.getStruct() ? type.getStruct()->fields() : type.getInterfaceBlock()->fields();
1998 			int elements = 0;
1999 
2000 			for(TFieldList::const_iterator field = fields.begin(); field != fields.end(); field++)
2001 			{
2002 				const TType &fieldType = *((*field)->type());
2003 
2004 				if(fieldType.totalRegisterCount() <= registers)
2005 				{
2006 					registers -= fieldType.totalRegisterCount();
2007 					elements += fieldType.getObjectSize();
2008 				}
2009 				else   // Register within this field
2010 				{
2011 					return elements + componentCount(fieldType, registers);
2012 				}
2013 			}
2014 		}
2015 		else if(type.isMatrix())
2016 		{
2017 			return registers * type.registerSize();
2018 		}
2019 
2020 		UNREACHABLE(0);
2021 		return 0;
2022 	}
2023 
registerSize(const TType & type,int registers)2024 	int registerSize(const TType &type, int registers)
2025 	{
2026 		if(registers == 0)
2027 		{
2028 			if(type.isStruct())
2029 			{
2030 				return registerSize(*((*(type.getStruct()->fields().begin()))->type()), 0);
2031 			}
2032 			else if(type.isInterfaceBlock())
2033 			{
2034 				return registerSize(*((*(type.getInterfaceBlock()->fields().begin()))->type()), 0);
2035 			}
2036 
2037 			return type.registerSize();
2038 		}
2039 
2040 		if(type.isArray() && registers >= type.elementRegisterCount())
2041 		{
2042 			int index = registers / type.elementRegisterCount();
2043 			registers -= index * type.elementRegisterCount();
2044 			return registerSize(type, registers);
2045 		}
2046 
2047 		if(type.isStruct() || type.isInterfaceBlock())
2048 		{
2049 			const TFieldList& fields = type.getStruct() ? type.getStruct()->fields() : type.getInterfaceBlock()->fields();
2050 			int elements = 0;
2051 
2052 			for(TFieldList::const_iterator field = fields.begin(); field != fields.end(); field++)
2053 			{
2054 				const TType &fieldType = *((*field)->type());
2055 
2056 				if(fieldType.totalRegisterCount() <= registers)
2057 				{
2058 					registers -= fieldType.totalRegisterCount();
2059 					elements += fieldType.getObjectSize();
2060 				}
2061 				else   // Register within this field
2062 				{
2063 					return registerSize(fieldType, registers);
2064 				}
2065 			}
2066 		}
2067 		else if(type.isMatrix())
2068 		{
2069 			return registerSize(type, 0);
2070 		}
2071 
2072 		UNREACHABLE(0);
2073 		return 0;
2074 	}
2075 
getBlockId(TIntermTyped * arg)2076 	int OutputASM::getBlockId(TIntermTyped *arg)
2077 	{
2078 		if(arg)
2079 		{
2080 			const TType &type = arg->getType();
2081 			TInterfaceBlock* block = type.getInterfaceBlock();
2082 			if(block && (type.getQualifier() == EvqUniform))
2083 			{
2084 				// Make sure the uniform block is declared
2085 				uniformRegister(arg);
2086 
2087 				const char* blockName = block->name().c_str();
2088 
2089 				// Fetch uniform block index from array of blocks
2090 				for(ActiveUniformBlocks::const_iterator it = shaderObject->activeUniformBlocks.begin(); it != shaderObject->activeUniformBlocks.end(); ++it)
2091 				{
2092 					if(blockName == it->name)
2093 					{
2094 						return it->blockId;
2095 					}
2096 				}
2097 
2098 				ASSERT(false);
2099 			}
2100 		}
2101 
2102 		return -1;
2103 	}
2104 
getArgumentInfo(TIntermTyped * arg,int index)2105 	OutputASM::ArgumentInfo OutputASM::getArgumentInfo(TIntermTyped *arg, int index)
2106 	{
2107 		const TType &type = arg->getType();
2108 		int blockId = getBlockId(arg);
2109 		ArgumentInfo argumentInfo(BlockMemberInfo::getDefaultBlockInfo(), type, -1, -1);
2110 		if(blockId != -1)
2111 		{
2112 			argumentInfo.bufferIndex = 0;
2113 			for(int i = 0; i < blockId; ++i)
2114 			{
2115 				int blockArraySize = shaderObject->activeUniformBlocks[i].arraySize;
2116 				argumentInfo.bufferIndex += blockArraySize > 0 ? blockArraySize : 1;
2117 			}
2118 
2119 			const BlockDefinitionIndexMap& blockDefinition = blockDefinitions[blockId];
2120 
2121 			BlockDefinitionIndexMap::const_iterator itEnd = blockDefinition.end();
2122 			BlockDefinitionIndexMap::const_iterator it = itEnd;
2123 
2124 			argumentInfo.clampedIndex = index;
2125 			if(type.isInterfaceBlock())
2126 			{
2127 				// Offset index to the beginning of the selected instance
2128 				int blockRegisters = type.elementRegisterCount();
2129 				int bufferOffset = argumentInfo.clampedIndex / blockRegisters;
2130 				argumentInfo.bufferIndex += bufferOffset;
2131 				argumentInfo.clampedIndex -= bufferOffset * blockRegisters;
2132 			}
2133 
2134 			int regIndex = registerIndex(arg);
2135 			for(int i = regIndex + argumentInfo.clampedIndex; i >= regIndex; --i)
2136 			{
2137 				it = blockDefinition.find(i);
2138 				if(it != itEnd)
2139 				{
2140 					argumentInfo.clampedIndex -= (i - regIndex);
2141 					break;
2142 				}
2143 			}
2144 			ASSERT(it != itEnd);
2145 
2146 			argumentInfo.typedMemberInfo = it->second;
2147 
2148 			int registerCount = argumentInfo.typedMemberInfo.type.totalRegisterCount();
2149 			argumentInfo.clampedIndex = (argumentInfo.clampedIndex >= registerCount) ? registerCount - 1 : argumentInfo.clampedIndex;
2150 		}
2151 		else
2152 		{
2153 			argumentInfo.clampedIndex = (index >= arg->totalRegisterCount()) ? arg->totalRegisterCount() - 1 : index;
2154 		}
2155 
2156 		return argumentInfo;
2157 	}
2158 
argument(sw::Shader::SourceParameter & parameter,TIntermNode * argument,int index)2159 	void OutputASM::argument(sw::Shader::SourceParameter &parameter, TIntermNode *argument, int index)
2160 	{
2161 		if(argument)
2162 		{
2163 			TIntermTyped *arg = argument->getAsTyped();
2164 			Temporary unpackedUniform(this);
2165 
2166 			const TType& srcType = arg->getType();
2167 			TInterfaceBlock* srcBlock = srcType.getInterfaceBlock();
2168 			if(srcBlock && (srcType.getQualifier() == EvqUniform))
2169 			{
2170 				const ArgumentInfo argumentInfo = getArgumentInfo(arg, index);
2171 				const TType &memberType = argumentInfo.typedMemberInfo.type;
2172 
2173 				if(memberType.getBasicType() == EbtBool)
2174 				{
2175 					ASSERT(argumentInfo.clampedIndex < (memberType.isArray() ? memberType.getArraySize() : 1)); // index < arraySize
2176 
2177 					// Convert the packed bool, which is currently an int, to a true bool
2178 					Instruction *instruction = new Instruction(sw::Shader::OPCODE_I2B);
2179 					instruction->dst.type = sw::Shader::PARAMETER_TEMP;
2180 					instruction->dst.index = registerIndex(&unpackedUniform);
2181 					instruction->src[0].type = sw::Shader::PARAMETER_CONST;
2182 					instruction->src[0].bufferIndex = argumentInfo.bufferIndex;
2183 					instruction->src[0].index = argumentInfo.typedMemberInfo.offset + argumentInfo.clampedIndex * argumentInfo.typedMemberInfo.arrayStride;
2184 
2185 					shader->append(instruction);
2186 
2187 					arg = &unpackedUniform;
2188 					index = 0;
2189 				}
2190 				else if((srcBlock->matrixPacking() == EmpRowMajor) && memberType.isMatrix())
2191 				{
2192 					int numCols = memberType.getNominalSize();
2193 					int numRows = memberType.getSecondarySize();
2194 
2195 					ASSERT(argumentInfo.clampedIndex < (numCols * (memberType.isArray() ? memberType.getArraySize() : 1))); // index < cols * arraySize
2196 
2197 					unsigned int dstIndex = registerIndex(&unpackedUniform);
2198 					unsigned int srcSwizzle = (argumentInfo.clampedIndex % numCols) * 0x55;
2199 					int arrayIndex = argumentInfo.clampedIndex / numCols;
2200 					int matrixStartOffset = argumentInfo.typedMemberInfo.offset + arrayIndex * argumentInfo.typedMemberInfo.arrayStride;
2201 
2202 					for(int j = 0; j < numRows; ++j)
2203 					{
2204 						// Transpose the row major matrix
2205 						Instruction *instruction = new Instruction(sw::Shader::OPCODE_MOV);
2206 						instruction->dst.type = sw::Shader::PARAMETER_TEMP;
2207 						instruction->dst.index = dstIndex;
2208 						instruction->dst.mask = 1 << j;
2209 						instruction->src[0].type = sw::Shader::PARAMETER_CONST;
2210 						instruction->src[0].bufferIndex = argumentInfo.bufferIndex;
2211 						instruction->src[0].index = matrixStartOffset + j * argumentInfo.typedMemberInfo.matrixStride;
2212 						instruction->src[0].swizzle = srcSwizzle;
2213 
2214 						shader->append(instruction);
2215 					}
2216 
2217 					arg = &unpackedUniform;
2218 					index = 0;
2219 				}
2220 			}
2221 
2222 			const ArgumentInfo argumentInfo = getArgumentInfo(arg, index);
2223 			const TType &type = argumentInfo.typedMemberInfo.type;
2224 
2225 			int size = registerSize(type, argumentInfo.clampedIndex);
2226 
2227 			parameter.type = registerType(arg);
2228 			parameter.bufferIndex = argumentInfo.bufferIndex;
2229 
2230 			if(arg->getAsConstantUnion() && arg->getAsConstantUnion()->getUnionArrayPointer())
2231 			{
2232 				int component = componentCount(type, argumentInfo.clampedIndex);
2233 				ConstantUnion *constants = arg->getAsConstantUnion()->getUnionArrayPointer();
2234 
2235 				for(int i = 0; i < 4; i++)
2236 				{
2237 					if(size == 1)   // Replicate
2238 					{
2239 						parameter.value[i] = constants[component + 0].getAsFloat();
2240 					}
2241 					else if(i < size)
2242 					{
2243 						parameter.value[i] = constants[component + i].getAsFloat();
2244 					}
2245 					else
2246 					{
2247 						parameter.value[i] = 0.0f;
2248 					}
2249 				}
2250 			}
2251 			else
2252 			{
2253 				parameter.index = registerIndex(arg) + argumentInfo.clampedIndex;
2254 
2255 				if(parameter.bufferIndex != -1)
2256 				{
2257 					int stride = (argumentInfo.typedMemberInfo.matrixStride > 0) ? argumentInfo.typedMemberInfo.matrixStride : argumentInfo.typedMemberInfo.arrayStride;
2258 					parameter.index = argumentInfo.typedMemberInfo.offset + argumentInfo.clampedIndex * stride;
2259 				}
2260 			}
2261 
2262 			if(!IsSampler(arg->getBasicType()))
2263 			{
2264 				parameter.swizzle = readSwizzle(arg, size);
2265 			}
2266 		}
2267 	}
2268 
copy(TIntermTyped * dst,TIntermNode * src,int offset)2269 	void OutputASM::copy(TIntermTyped *dst, TIntermNode *src, int offset)
2270 	{
2271 		for(int index = 0; index < dst->totalRegisterCount(); index++)
2272 		{
2273 			Instruction *mov = emit(sw::Shader::OPCODE_MOV, dst, index, src, offset + index);
2274 			mov->dst.mask = writeMask(dst, index);
2275 		}
2276 	}
2277 
swizzleElement(int swizzle,int index)2278 	int swizzleElement(int swizzle, int index)
2279 	{
2280 		return (swizzle >> (index * 2)) & 0x03;
2281 	}
2282 
swizzleSwizzle(int leftSwizzle,int rightSwizzle)2283 	int swizzleSwizzle(int leftSwizzle, int rightSwizzle)
2284 	{
2285 		return (swizzleElement(leftSwizzle, swizzleElement(rightSwizzle, 0)) << 0) |
2286 		       (swizzleElement(leftSwizzle, swizzleElement(rightSwizzle, 1)) << 2) |
2287 		       (swizzleElement(leftSwizzle, swizzleElement(rightSwizzle, 2)) << 4) |
2288 		       (swizzleElement(leftSwizzle, swizzleElement(rightSwizzle, 3)) << 6);
2289 	}
2290 
assignLvalue(TIntermTyped * dst,TIntermTyped * src)2291 	void OutputASM::assignLvalue(TIntermTyped *dst, TIntermTyped *src)
2292 	{
2293 		if(src &&
2294 			((src->isVector() && (!dst->isVector() || (src->getNominalSize() != dst->getNominalSize()))) ||
2295 			 (src->isMatrix() && (!dst->isMatrix() || (src->getNominalSize() != dst->getNominalSize()) || (src->getSecondarySize() != dst->getSecondarySize())))))
2296 		{
2297 			return mContext.error(src->getLine(), "Result type should match the l-value type in compound assignment", src->isVector() ? "vector" : "matrix");
2298 		}
2299 
2300 		TIntermBinary *binary = dst->getAsBinaryNode();
2301 
2302 		if(binary && binary->getOp() == EOpIndexIndirect && binary->getLeft()->isVector() && dst->isScalar())
2303 		{
2304 			Instruction *insert = new Instruction(sw::Shader::OPCODE_INSERT);
2305 
2306 			Temporary address(this);
2307 			lvalue(insert->dst, address, dst);
2308 
2309 			insert->src[0].type = insert->dst.type;
2310 			insert->src[0].index = insert->dst.index;
2311 			insert->src[0].rel = insert->dst.rel;
2312 			argument(insert->src[1], src);
2313 			argument(insert->src[2], binary->getRight());
2314 
2315 			shader->append(insert);
2316 		}
2317 		else
2318 		{
2319 			for(int offset = 0; offset < dst->totalRegisterCount(); offset++)
2320 			{
2321 				Instruction *mov = new Instruction(sw::Shader::OPCODE_MOV);
2322 
2323 				Temporary address(this);
2324 				int swizzle = lvalue(mov->dst, address, dst);
2325 				mov->dst.index += offset;
2326 
2327 				if(offset > 0)
2328 				{
2329 					mov->dst.mask = writeMask(dst, offset);
2330 				}
2331 
2332 				argument(mov->src[0], src, offset);
2333 				mov->src[0].swizzle = swizzleSwizzle(mov->src[0].swizzle, swizzle);
2334 
2335 				shader->append(mov);
2336 			}
2337 		}
2338 	}
2339 
lvalue(sw::Shader::DestinationParameter & dst,Temporary & address,TIntermTyped * node)2340 	int OutputASM::lvalue(sw::Shader::DestinationParameter &dst, Temporary &address, TIntermTyped *node)
2341 	{
2342 		TIntermTyped *result = node;
2343 		TIntermBinary *binary = node->getAsBinaryNode();
2344 		TIntermSymbol *symbol = node->getAsSymbolNode();
2345 
2346 		if(binary)
2347 		{
2348 			TIntermTyped *left = binary->getLeft();
2349 			TIntermTyped *right = binary->getRight();
2350 
2351 			int leftSwizzle = lvalue(dst, address, left);   // Resolve the l-value of the left side
2352 
2353 			switch(binary->getOp())
2354 			{
2355 			case EOpIndexDirect:
2356 				{
2357 					int rightIndex = right->getAsConstantUnion()->getIConst(0);
2358 
2359 					if(left->isRegister())
2360 					{
2361 						int leftMask = dst.mask;
2362 
2363 						dst.mask = 1;
2364 						while((leftMask & dst.mask) == 0)
2365 						{
2366 							dst.mask = dst.mask << 1;
2367 						}
2368 
2369 						int element = swizzleElement(leftSwizzle, rightIndex);
2370 						dst.mask = 1 << element;
2371 
2372 						return element;
2373 					}
2374 					else if(left->isArray() || left->isMatrix())
2375 					{
2376 						dst.index += rightIndex * result->totalRegisterCount();
2377 						return 0xE4;
2378 					}
2379 					else UNREACHABLE(0);
2380 				}
2381 				break;
2382 			case EOpIndexIndirect:
2383 				{
2384 					if(left->isRegister())
2385 					{
2386 						// Requires INSERT instruction (handled by calling function)
2387 					}
2388 					else if(left->isArray() || left->isMatrix())
2389 					{
2390 						int scale = result->totalRegisterCount();
2391 
2392 						if(dst.rel.type == sw::Shader::PARAMETER_VOID)   // Use the index register as the relative address directly
2393 						{
2394 							if(left->totalRegisterCount() > 1)
2395 							{
2396 								sw::Shader::SourceParameter relativeRegister;
2397 								argument(relativeRegister, right);
2398 
2399 								dst.rel.index = relativeRegister.index;
2400 								dst.rel.type = relativeRegister.type;
2401 								dst.rel.scale = scale;
2402 								dst.rel.deterministic = !(vertexShader && left->getQualifier() == EvqUniform);
2403 							}
2404 						}
2405 						else if(dst.rel.index != registerIndex(&address))   // Move the previous index register to the address register
2406 						{
2407 							if(scale == 1)
2408 							{
2409 								Constant oldScale((int)dst.rel.scale);
2410 								Instruction *mad = emit(sw::Shader::OPCODE_IMAD, &address, &address, &oldScale, right);
2411 								mad->src[0].index = dst.rel.index;
2412 								mad->src[0].type = dst.rel.type;
2413 							}
2414 							else
2415 							{
2416 								Constant oldScale((int)dst.rel.scale);
2417 								Instruction *mul = emit(sw::Shader::OPCODE_IMUL, &address, &address, &oldScale);
2418 								mul->src[0].index = dst.rel.index;
2419 								mul->src[0].type = dst.rel.type;
2420 
2421 								Constant newScale(scale);
2422 								emit(sw::Shader::OPCODE_IMAD, &address, right, &newScale, &address);
2423 							}
2424 
2425 							dst.rel.type = sw::Shader::PARAMETER_TEMP;
2426 							dst.rel.index = registerIndex(&address);
2427 							dst.rel.scale = 1;
2428 						}
2429 						else   // Just add the new index to the address register
2430 						{
2431 							if(scale == 1)
2432 							{
2433 								emit(sw::Shader::OPCODE_IADD, &address, &address, right);
2434 							}
2435 							else
2436 							{
2437 								Constant newScale(scale);
2438 								emit(sw::Shader::OPCODE_IMAD, &address, right, &newScale, &address);
2439 							}
2440 						}
2441 					}
2442 					else UNREACHABLE(0);
2443 				}
2444 				break;
2445 			case EOpIndexDirectStruct:
2446 			case EOpIndexDirectInterfaceBlock:
2447 				{
2448 					const TFieldList& fields = (binary->getOp() == EOpIndexDirectStruct) ?
2449 					                           left->getType().getStruct()->fields() :
2450 					                           left->getType().getInterfaceBlock()->fields();
2451 					int index = right->getAsConstantUnion()->getIConst(0);
2452 					int fieldOffset = 0;
2453 
2454 					for(int i = 0; i < index; i++)
2455 					{
2456 						fieldOffset += fields[i]->type()->totalRegisterCount();
2457 					}
2458 
2459 					dst.type = registerType(left);
2460 					dst.index += fieldOffset;
2461 					dst.mask = writeMask(result);
2462 
2463 					return 0xE4;
2464 				}
2465 				break;
2466 			case EOpVectorSwizzle:
2467 				{
2468 					ASSERT(left->isRegister());
2469 
2470 					int leftMask = dst.mask;
2471 
2472 					int swizzle = 0;
2473 					int rightMask = 0;
2474 
2475 					TIntermSequence &sequence = right->getAsAggregate()->getSequence();
2476 
2477 					for(unsigned int i = 0; i < sequence.size(); i++)
2478 					{
2479 						int index = sequence[i]->getAsConstantUnion()->getIConst(0);
2480 
2481 						int element = swizzleElement(leftSwizzle, index);
2482 						rightMask = rightMask | (1 << element);
2483 						swizzle = swizzle | swizzleElement(leftSwizzle, i) << (element * 2);
2484 					}
2485 
2486 					dst.mask = leftMask & rightMask;
2487 
2488 					return swizzle;
2489 				}
2490 				break;
2491 			default:
2492 				UNREACHABLE(binary->getOp());   // Not an l-value operator
2493 				break;
2494 			}
2495 		}
2496 		else if(symbol)
2497 		{
2498 			dst.type = registerType(symbol);
2499 			dst.index = registerIndex(symbol);
2500 			dst.mask = writeMask(symbol);
2501 			return 0xE4;
2502 		}
2503 
2504 		return 0xE4;
2505 	}
2506 
registerType(TIntermTyped * operand)2507 	sw::Shader::ParameterType OutputASM::registerType(TIntermTyped *operand)
2508 	{
2509 		if(isSamplerRegister(operand))
2510 		{
2511 			return sw::Shader::PARAMETER_SAMPLER;
2512 		}
2513 
2514 		const TQualifier qualifier = operand->getQualifier();
2515 		if((EvqFragColor == qualifier) || (EvqFragData == qualifier))
2516 		{
2517 			if(((EvqFragData == qualifier) && (EvqFragColor == outputQualifier)) ||
2518 			   ((EvqFragColor == qualifier) && (EvqFragData == outputQualifier)))
2519 			{
2520 				mContext.error(operand->getLine(), "static assignment to both gl_FragData and gl_FragColor", "");
2521 			}
2522 			outputQualifier = qualifier;
2523 		}
2524 
2525 		if(qualifier == EvqConstExpr && (!operand->getAsConstantUnion() || !operand->getAsConstantUnion()->getUnionArrayPointer()))
2526 		{
2527 			return sw::Shader::PARAMETER_TEMP;
2528 		}
2529 
2530 		switch(qualifier)
2531 		{
2532 		case EvqTemporary:           return sw::Shader::PARAMETER_TEMP;
2533 		case EvqGlobal:              return sw::Shader::PARAMETER_TEMP;
2534 		case EvqConstExpr:           return sw::Shader::PARAMETER_FLOAT4LITERAL;   // All converted to float
2535 		case EvqAttribute:           return sw::Shader::PARAMETER_INPUT;
2536 		case EvqVaryingIn:           return sw::Shader::PARAMETER_INPUT;
2537 		case EvqVaryingOut:          return sw::Shader::PARAMETER_OUTPUT;
2538 		case EvqVertexIn:            return sw::Shader::PARAMETER_INPUT;
2539 		case EvqFragmentOut:         return sw::Shader::PARAMETER_COLOROUT;
2540 		case EvqVertexOut:           return sw::Shader::PARAMETER_OUTPUT;
2541 		case EvqFragmentIn:          return sw::Shader::PARAMETER_INPUT;
2542 		case EvqInvariantVaryingIn:  return sw::Shader::PARAMETER_INPUT;    // FIXME: Guarantee invariance at the backend
2543 		case EvqInvariantVaryingOut: return sw::Shader::PARAMETER_OUTPUT;   // FIXME: Guarantee invariance at the backend
2544 		case EvqSmooth:              return sw::Shader::PARAMETER_OUTPUT;
2545 		case EvqFlat:                return sw::Shader::PARAMETER_OUTPUT;
2546 		case EvqCentroidOut:         return sw::Shader::PARAMETER_OUTPUT;
2547 		case EvqSmoothIn:            return sw::Shader::PARAMETER_INPUT;
2548 		case EvqFlatIn:              return sw::Shader::PARAMETER_INPUT;
2549 		case EvqCentroidIn:          return sw::Shader::PARAMETER_INPUT;
2550 		case EvqUniform:             return sw::Shader::PARAMETER_CONST;
2551 		case EvqIn:                  return sw::Shader::PARAMETER_TEMP;
2552 		case EvqOut:                 return sw::Shader::PARAMETER_TEMP;
2553 		case EvqInOut:               return sw::Shader::PARAMETER_TEMP;
2554 		case EvqConstReadOnly:       return sw::Shader::PARAMETER_TEMP;
2555 		case EvqPosition:            return sw::Shader::PARAMETER_OUTPUT;
2556 		case EvqPointSize:           return sw::Shader::PARAMETER_OUTPUT;
2557 		case EvqInstanceID:          return sw::Shader::PARAMETER_MISCTYPE;
2558 		case EvqVertexID:            return sw::Shader::PARAMETER_MISCTYPE;
2559 		case EvqFragCoord:           return sw::Shader::PARAMETER_MISCTYPE;
2560 		case EvqFrontFacing:         return sw::Shader::PARAMETER_MISCTYPE;
2561 		case EvqPointCoord:          return sw::Shader::PARAMETER_INPUT;
2562 		case EvqFragColor:           return sw::Shader::PARAMETER_COLOROUT;
2563 		case EvqFragData:            return sw::Shader::PARAMETER_COLOROUT;
2564 		case EvqFragDepth:           return sw::Shader::PARAMETER_DEPTHOUT;
2565 		default: UNREACHABLE(qualifier);
2566 		}
2567 
2568 		return sw::Shader::PARAMETER_VOID;
2569 	}
2570 
hasFlatQualifier(TIntermTyped * operand)2571 	bool OutputASM::hasFlatQualifier(TIntermTyped *operand)
2572 	{
2573 		const TQualifier qualifier = operand->getQualifier();
2574 		return qualifier == EvqFlat || qualifier == EvqFlatOut || qualifier == EvqFlatIn;
2575 	}
2576 
registerIndex(TIntermTyped * operand)2577 	unsigned int OutputASM::registerIndex(TIntermTyped *operand)
2578 	{
2579 		if(isSamplerRegister(operand))
2580 		{
2581 			return samplerRegister(operand);
2582 		}
2583 
2584 		switch(operand->getQualifier())
2585 		{
2586 		case EvqTemporary:           return temporaryRegister(operand);
2587 		case EvqGlobal:              return temporaryRegister(operand);
2588 		case EvqConstExpr:           return temporaryRegister(operand);   // Unevaluated constant expression
2589 		case EvqAttribute:           return attributeRegister(operand);
2590 		case EvqVaryingIn:           return varyingRegister(operand);
2591 		case EvqVaryingOut:          return varyingRegister(operand);
2592 		case EvqVertexIn:            return attributeRegister(operand);
2593 		case EvqFragmentOut:         return fragmentOutputRegister(operand);
2594 		case EvqVertexOut:           return varyingRegister(operand);
2595 		case EvqFragmentIn:          return varyingRegister(operand);
2596 		case EvqInvariantVaryingIn:  return varyingRegister(operand);
2597 		case EvqInvariantVaryingOut: return varyingRegister(operand);
2598 		case EvqSmooth:              return varyingRegister(operand);
2599 		case EvqFlat:                return varyingRegister(operand);
2600 		case EvqCentroidOut:         return varyingRegister(operand);
2601 		case EvqSmoothIn:            return varyingRegister(operand);
2602 		case EvqFlatIn:              return varyingRegister(operand);
2603 		case EvqCentroidIn:          return varyingRegister(operand);
2604 		case EvqUniform:             return uniformRegister(operand);
2605 		case EvqIn:                  return temporaryRegister(operand);
2606 		case EvqOut:                 return temporaryRegister(operand);
2607 		case EvqInOut:               return temporaryRegister(operand);
2608 		case EvqConstReadOnly:       return temporaryRegister(operand);
2609 		case EvqPosition:            return varyingRegister(operand);
2610 		case EvqPointSize:           return varyingRegister(operand);
2611 		case EvqInstanceID:          vertexShader->declareInstanceId(); return sw::Shader::InstanceIDIndex;
2612 		case EvqVertexID:            vertexShader->declareVertexId(); return sw::Shader::VertexIDIndex;
2613 		case EvqFragCoord:           pixelShader->declareVPos();  return sw::Shader::VPosIndex;
2614 		case EvqFrontFacing:         pixelShader->declareVFace(); return sw::Shader::VFaceIndex;
2615 		case EvqPointCoord:          return varyingRegister(operand);
2616 		case EvqFragColor:           return 0;
2617 		case EvqFragData:            return fragmentOutputRegister(operand);
2618 		case EvqFragDepth:           return 0;
2619 		default: UNREACHABLE(operand->getQualifier());
2620 		}
2621 
2622 		return 0;
2623 	}
2624 
writeMask(TIntermTyped * destination,int index)2625 	int OutputASM::writeMask(TIntermTyped *destination, int index)
2626 	{
2627 		if(destination->getQualifier() == EvqPointSize)
2628 		{
2629 			return 0x2;   // Point size stored in the y component
2630 		}
2631 
2632 		return 0xF >> (4 - registerSize(destination->getType(), index));
2633 	}
2634 
readSwizzle(TIntermTyped * argument,int size)2635 	int OutputASM::readSwizzle(TIntermTyped *argument, int size)
2636 	{
2637 		if(argument->getQualifier() == EvqPointSize)
2638 		{
2639 			return 0x55;   // Point size stored in the y component
2640 		}
2641 
2642 		static const unsigned char swizzleSize[5] = {0x00, 0x00, 0x54, 0xA4, 0xE4};   // (void), xxxx, xyyy, xyzz, xyzw
2643 
2644 		return swizzleSize[size];
2645 	}
2646 
2647 	// Conservatively checks whether an expression is fast to compute and has no side effects
trivial(TIntermTyped * expression,int budget)2648 	bool OutputASM::trivial(TIntermTyped *expression, int budget)
2649 	{
2650 		if(!expression->isRegister())
2651 		{
2652 			return false;
2653 		}
2654 
2655 		return cost(expression, budget) >= 0;
2656 	}
2657 
2658 	// Returns the remaining computing budget (if < 0 the expression is too expensive or has side effects)
cost(TIntermNode * expression,int budget)2659 	int OutputASM::cost(TIntermNode *expression, int budget)
2660 	{
2661 		if(budget < 0)
2662 		{
2663 			return budget;
2664 		}
2665 
2666 		if(expression->getAsSymbolNode())
2667 		{
2668 			return budget;
2669 		}
2670 		else if(expression->getAsConstantUnion())
2671 		{
2672 			return budget;
2673 		}
2674 		else if(expression->getAsBinaryNode())
2675 		{
2676 			TIntermBinary *binary = expression->getAsBinaryNode();
2677 
2678 			switch(binary->getOp())
2679 			{
2680 			case EOpVectorSwizzle:
2681 			case EOpIndexDirect:
2682 			case EOpIndexDirectStruct:
2683 			case EOpIndexDirectInterfaceBlock:
2684 				return cost(binary->getLeft(), budget - 0);
2685 			case EOpAdd:
2686 			case EOpSub:
2687 			case EOpMul:
2688 				return cost(binary->getLeft(), cost(binary->getRight(), budget - 1));
2689 			default:
2690 				return -1;
2691 			}
2692 		}
2693 		else if(expression->getAsUnaryNode())
2694 		{
2695 			TIntermUnary *unary = expression->getAsUnaryNode();
2696 
2697 			switch(unary->getOp())
2698 			{
2699 			case EOpAbs:
2700 			case EOpNegative:
2701 				return cost(unary->getOperand(), budget - 1);
2702 			default:
2703 				return -1;
2704 			}
2705 		}
2706 		else if(expression->getAsSelectionNode())
2707 		{
2708 			TIntermSelection *selection = expression->getAsSelectionNode();
2709 
2710 			if(selection->usesTernaryOperator())
2711 			{
2712 				TIntermTyped *condition = selection->getCondition();
2713 				TIntermNode *trueBlock = selection->getTrueBlock();
2714 				TIntermNode *falseBlock = selection->getFalseBlock();
2715 				TIntermConstantUnion *constantCondition = condition->getAsConstantUnion();
2716 
2717 				if(constantCondition)
2718 				{
2719 					bool trueCondition = constantCondition->getUnionArrayPointer()->getBConst();
2720 
2721 					if(trueCondition)
2722 					{
2723 						return cost(trueBlock, budget - 0);
2724 					}
2725 					else
2726 					{
2727 						return cost(falseBlock, budget - 0);
2728 					}
2729 				}
2730 				else
2731 				{
2732 					return cost(trueBlock, cost(falseBlock, budget - 2));
2733 				}
2734 			}
2735 		}
2736 
2737 		return -1;
2738 	}
2739 
findFunction(const TString & name)2740 	const Function *OutputASM::findFunction(const TString &name)
2741 	{
2742 		for(unsigned int f = 0; f < functionArray.size(); f++)
2743 		{
2744 			if(functionArray[f].name == name)
2745 			{
2746 				return &functionArray[f];
2747 			}
2748 		}
2749 
2750 		return 0;
2751 	}
2752 
temporaryRegister(TIntermTyped * temporary)2753 	int OutputASM::temporaryRegister(TIntermTyped *temporary)
2754 	{
2755 		return allocate(temporaries, temporary);
2756 	}
2757 
varyingRegister(TIntermTyped * varying)2758 	int OutputASM::varyingRegister(TIntermTyped *varying)
2759 	{
2760 		int var = lookup(varyings, varying);
2761 
2762 		if(var == -1)
2763 		{
2764 			var = allocate(varyings, varying);
2765 			int componentCount = varying->registerSize();
2766 			int registerCount = varying->totalRegisterCount();
2767 
2768 			if(pixelShader)
2769 			{
2770 				if((var + registerCount) > sw::MAX_FRAGMENT_INPUTS)
2771 				{
2772 					mContext.error(varying->getLine(), "Varyings packing failed: Too many varyings", "fragment shader");
2773 					return 0;
2774 				}
2775 
2776 				if(varying->getQualifier() == EvqPointCoord)
2777 				{
2778 					ASSERT(varying->isRegister());
2779 					pixelShader->setInput(var, componentCount, sw::Shader::Semantic(sw::Shader::USAGE_TEXCOORD, var));
2780 				}
2781 				else
2782 				{
2783 					for(int i = 0; i < varying->totalRegisterCount(); i++)
2784 					{
2785 						bool flat = hasFlatQualifier(varying);
2786 
2787 						pixelShader->setInput(var + i, componentCount, sw::Shader::Semantic(sw::Shader::USAGE_COLOR, var + i, flat));
2788 					}
2789 				}
2790 			}
2791 			else if(vertexShader)
2792 			{
2793 				if((var + registerCount) > sw::MAX_VERTEX_OUTPUTS)
2794 				{
2795 					mContext.error(varying->getLine(), "Varyings packing failed: Too many varyings", "vertex shader");
2796 					return 0;
2797 				}
2798 
2799 				if(varying->getQualifier() == EvqPosition)
2800 				{
2801 					ASSERT(varying->isRegister());
2802 					vertexShader->setPositionRegister(var);
2803 				}
2804 				else if(varying->getQualifier() == EvqPointSize)
2805 				{
2806 					ASSERT(varying->isRegister());
2807 					vertexShader->setPointSizeRegister(var);
2808 				}
2809 				else
2810 				{
2811 					// Semantic indexes for user varyings will be assigned during program link to match the pixel shader
2812 				}
2813 			}
2814 			else UNREACHABLE(0);
2815 
2816 			declareVarying(varying, var);
2817 		}
2818 
2819 		return var;
2820 	}
2821 
declareVarying(TIntermTyped * varying,int reg)2822 	void OutputASM::declareVarying(TIntermTyped *varying, int reg)
2823 	{
2824 		if(varying->getQualifier() != EvqPointCoord)   // gl_PointCoord does not need linking
2825 		{
2826 			const TType &type = varying->getType();
2827 			const char *name = varying->getAsSymbolNode()->getSymbol().c_str();
2828 			VaryingList &activeVaryings = shaderObject->varyings;
2829 
2830 			// Check if this varying has been declared before without having a register assigned
2831 			for(VaryingList::iterator v = activeVaryings.begin(); v != activeVaryings.end(); v++)
2832 			{
2833 				if(v->name == name)
2834 				{
2835 					if(reg >= 0)
2836 					{
2837 						ASSERT(v->reg < 0 || v->reg == reg);
2838 						v->reg = reg;
2839 					}
2840 
2841 					return;
2842 				}
2843 			}
2844 
2845 			activeVaryings.push_back(glsl::Varying(glVariableType(type), name, varying->getArraySize(), reg, 0));
2846 		}
2847 	}
2848 
uniformRegister(TIntermTyped * uniform)2849 	int OutputASM::uniformRegister(TIntermTyped *uniform)
2850 	{
2851 		const TType &type = uniform->getType();
2852 		ASSERT(!IsSampler(type.getBasicType()));
2853 		TInterfaceBlock *block = type.getAsInterfaceBlock();
2854 		TIntermSymbol *symbol = uniform->getAsSymbolNode();
2855 		ASSERT(symbol || block);
2856 
2857 		if(symbol || block)
2858 		{
2859 			TInterfaceBlock* parentBlock = type.getInterfaceBlock();
2860 			bool isBlockMember = (!block && parentBlock);
2861 			int index = isBlockMember ? lookup(uniforms, parentBlock) : lookup(uniforms, uniform);
2862 
2863 			if(index == -1 || isBlockMember)
2864 			{
2865 				if(index == -1)
2866 				{
2867 					index = allocate(uniforms, uniform);
2868 				}
2869 
2870 				// Verify if the current uniform is a member of an already declared block
2871 				const TString &name = symbol ? symbol->getSymbol() : block->name();
2872 				int blockMemberIndex = blockMemberLookup(type, name, index);
2873 				if(blockMemberIndex == -1)
2874 				{
2875 					declareUniform(type, name, index);
2876 				}
2877 				else
2878 				{
2879 					index = blockMemberIndex;
2880 				}
2881 			}
2882 
2883 			return index;
2884 		}
2885 
2886 		return 0;
2887 	}
2888 
attributeRegister(TIntermTyped * attribute)2889 	int OutputASM::attributeRegister(TIntermTyped *attribute)
2890 	{
2891 		ASSERT(!attribute->isArray());
2892 
2893 		int index = lookup(attributes, attribute);
2894 
2895 		if(index == -1)
2896 		{
2897 			TIntermSymbol *symbol = attribute->getAsSymbolNode();
2898 			ASSERT(symbol);
2899 
2900 			if(symbol)
2901 			{
2902 				index = allocate(attributes, attribute);
2903 				const TType &type = attribute->getType();
2904 				int registerCount = attribute->totalRegisterCount();
2905 				sw::VertexShader::AttribType attribType = sw::VertexShader::ATTRIBTYPE_FLOAT;
2906 				switch(type.getBasicType())
2907 				{
2908 				case EbtInt:
2909 					attribType = sw::VertexShader::ATTRIBTYPE_INT;
2910 					break;
2911 				case EbtUInt:
2912 					attribType = sw::VertexShader::ATTRIBTYPE_UINT;
2913 					break;
2914 				case EbtFloat:
2915 				default:
2916 					break;
2917 				}
2918 
2919 				if(vertexShader && (index + registerCount) <= sw::MAX_VERTEX_INPUTS)
2920 				{
2921 					for(int i = 0; i < registerCount; i++)
2922 					{
2923 						vertexShader->setInput(index + i, sw::Shader::Semantic(sw::Shader::USAGE_TEXCOORD, index + i, false), attribType);
2924 					}
2925 				}
2926 
2927 				ActiveAttributes &activeAttributes = shaderObject->activeAttributes;
2928 
2929 				const char *name = symbol->getSymbol().c_str();
2930 				activeAttributes.push_back(Attribute(glVariableType(type), name, type.getArraySize(), type.getLayoutQualifier().location, index));
2931 			}
2932 		}
2933 
2934 		return index;
2935 	}
2936 
fragmentOutputRegister(TIntermTyped * fragmentOutput)2937 	int OutputASM::fragmentOutputRegister(TIntermTyped *fragmentOutput)
2938 	{
2939 		return allocate(fragmentOutputs, fragmentOutput);
2940 	}
2941 
samplerRegister(TIntermTyped * sampler)2942 	int OutputASM::samplerRegister(TIntermTyped *sampler)
2943 	{
2944 		const TType &type = sampler->getType();
2945 		ASSERT(IsSampler(type.getBasicType()) || type.isStruct());   // Structures can contain samplers
2946 
2947 		TIntermSymbol *symbol = sampler->getAsSymbolNode();
2948 		TIntermBinary *binary = sampler->getAsBinaryNode();
2949 
2950 		if(symbol)
2951 		{
2952 			switch(type.getQualifier())
2953 			{
2954 			case EvqUniform:
2955 				return samplerRegister(symbol);
2956 			case EvqIn:
2957 			case EvqConstReadOnly:
2958 				// Function arguments are not (uniform) sampler registers
2959 				return -1;
2960 			default:
2961 				UNREACHABLE(type.getQualifier());
2962 			}
2963 		}
2964 		else if(binary)
2965 		{
2966 			TIntermTyped *left = binary->getLeft();
2967 			TIntermTyped *right = binary->getRight();
2968 			const TType &leftType = left->getType();
2969 			int index = right->getAsConstantUnion() ? right->getAsConstantUnion()->getIConst(0) : 0;
2970 			int offset = 0;
2971 
2972 			switch(binary->getOp())
2973 			{
2974 			case EOpIndexDirect:
2975 				ASSERT(left->isArray());
2976 				offset = index * leftType.elementRegisterCount();
2977 				break;
2978 			case EOpIndexDirectStruct:
2979 				ASSERT(leftType.isStruct());
2980 				{
2981 					const TFieldList &fields = leftType.getStruct()->fields();
2982 
2983 					for(int i = 0; i < index; i++)
2984 					{
2985 						offset += fields[i]->type()->totalRegisterCount();
2986 					}
2987 				}
2988 				break;
2989 			case EOpIndexIndirect:               // Indirect indexing produces a temporary, not a sampler register
2990 				return -1;
2991 			case EOpIndexDirectInterfaceBlock:   // Interface blocks can't contain samplers
2992 			default:
2993 				UNREACHABLE(binary->getOp());
2994 				return -1;
2995 			}
2996 
2997 			int base = samplerRegister(left);
2998 
2999 			if(base < 0)
3000 			{
3001 				return -1;
3002 			}
3003 
3004 			return base + offset;
3005 		}
3006 
3007 		UNREACHABLE(0);
3008 		return -1;   // Not a (uniform) sampler register
3009 	}
3010 
samplerRegister(TIntermSymbol * sampler)3011 	int OutputASM::samplerRegister(TIntermSymbol *sampler)
3012 	{
3013 		const TType &type = sampler->getType();
3014 		ASSERT(IsSampler(type.getBasicType()) || type.isStruct());   // Structures can contain samplers
3015 
3016 		int index = lookup(samplers, sampler);
3017 
3018 		if(index == -1)
3019 		{
3020 			index = allocate(samplers, sampler);
3021 
3022 			if(sampler->getQualifier() == EvqUniform)
3023 			{
3024 				const char *name = sampler->getSymbol().c_str();
3025 				declareUniform(type, name, index);
3026 			}
3027 		}
3028 
3029 		return index;
3030 	}
3031 
isSamplerRegister(TIntermTyped * operand)3032 	bool OutputASM::isSamplerRegister(TIntermTyped *operand)
3033 	{
3034 		return operand && IsSampler(operand->getBasicType()) && samplerRegister(operand) >= 0;
3035 	}
3036 
lookup(VariableArray & list,TIntermTyped * variable)3037 	int OutputASM::lookup(VariableArray &list, TIntermTyped *variable)
3038 	{
3039 		for(unsigned int i = 0; i < list.size(); i++)
3040 		{
3041 			if(list[i] == variable)
3042 			{
3043 				return i;   // Pointer match
3044 			}
3045 		}
3046 
3047 		TIntermSymbol *varSymbol = variable->getAsSymbolNode();
3048 		TInterfaceBlock *varBlock = variable->getType().getAsInterfaceBlock();
3049 
3050 		if(varBlock)
3051 		{
3052 			for(unsigned int i = 0; i < list.size(); i++)
3053 			{
3054 				if(list[i])
3055 				{
3056 					TInterfaceBlock *listBlock = list[i]->getType().getAsInterfaceBlock();
3057 
3058 					if(listBlock)
3059 					{
3060 						if(listBlock->name() == varBlock->name())
3061 						{
3062 							ASSERT(listBlock->arraySize() == varBlock->arraySize());
3063 							ASSERT(listBlock->fields() == varBlock->fields());
3064 							ASSERT(listBlock->blockStorage() == varBlock->blockStorage());
3065 							ASSERT(listBlock->matrixPacking() == varBlock->matrixPacking());
3066 
3067 							return i;
3068 						}
3069 					}
3070 				}
3071 			}
3072 		}
3073 		else if(varSymbol)
3074 		{
3075 			for(unsigned int i = 0; i < list.size(); i++)
3076 			{
3077 				if(list[i])
3078 				{
3079 					TIntermSymbol *listSymbol = list[i]->getAsSymbolNode();
3080 
3081 					if(listSymbol)
3082 					{
3083 						if(listSymbol->getId() == varSymbol->getId())
3084 						{
3085 							ASSERT(listSymbol->getSymbol() == varSymbol->getSymbol());
3086 							ASSERT(listSymbol->getType() == varSymbol->getType());
3087 							ASSERT(listSymbol->getQualifier() == varSymbol->getQualifier());
3088 
3089 							return i;
3090 						}
3091 					}
3092 				}
3093 			}
3094 		}
3095 
3096 		return -1;
3097 	}
3098 
lookup(VariableArray & list,TInterfaceBlock * block)3099 	int OutputASM::lookup(VariableArray &list, TInterfaceBlock *block)
3100 	{
3101 		for(unsigned int i = 0; i < list.size(); i++)
3102 		{
3103 			if(list[i] && (list[i]->getType().getInterfaceBlock() == block))
3104 			{
3105 				return i;   // Pointer match
3106 			}
3107 		}
3108 		return -1;
3109 	}
3110 
allocate(VariableArray & list,TIntermTyped * variable)3111 	int OutputASM::allocate(VariableArray &list, TIntermTyped *variable)
3112 	{
3113 		int index = lookup(list, variable);
3114 
3115 		if(index == -1)
3116 		{
3117 			unsigned int registerCount = variable->blockRegisterCount();
3118 
3119 			for(unsigned int i = 0; i < list.size(); i++)
3120 			{
3121 				if(list[i] == 0)
3122 				{
3123 					unsigned int j = 1;
3124 					for( ; j < registerCount && (i + j) < list.size(); j++)
3125 					{
3126 						if(list[i + j] != 0)
3127 						{
3128 							break;
3129 						}
3130 					}
3131 
3132 					if(j == registerCount)   // Found free slots
3133 					{
3134 						for(unsigned int j = 0; j < registerCount; j++)
3135 						{
3136 							list[i + j] = variable;
3137 						}
3138 
3139 						return i;
3140 					}
3141 				}
3142 			}
3143 
3144 			index = list.size();
3145 
3146 			for(unsigned int i = 0; i < registerCount; i++)
3147 			{
3148 				list.push_back(variable);
3149 			}
3150 		}
3151 
3152 		return index;
3153 	}
3154 
free(VariableArray & list,TIntermTyped * variable)3155 	void OutputASM::free(VariableArray &list, TIntermTyped *variable)
3156 	{
3157 		int index = lookup(list, variable);
3158 
3159 		if(index >= 0)
3160 		{
3161 			list[index] = 0;
3162 		}
3163 	}
3164 
blockMemberLookup(const TType & type,const TString & name,int registerIndex)3165 	int OutputASM::blockMemberLookup(const TType &type, const TString &name, int registerIndex)
3166 	{
3167 		const TInterfaceBlock *block = type.getInterfaceBlock();
3168 
3169 		if(block)
3170 		{
3171 			ActiveUniformBlocks &activeUniformBlocks = shaderObject->activeUniformBlocks;
3172 			const TFieldList& fields = block->fields();
3173 			const TString &blockName = block->name();
3174 			int fieldRegisterIndex = registerIndex;
3175 
3176 			if(!type.isInterfaceBlock())
3177 			{
3178 				// This is a uniform that's part of a block, let's see if the block is already defined
3179 				for(size_t i = 0; i < activeUniformBlocks.size(); ++i)
3180 				{
3181 					if(activeUniformBlocks[i].name == blockName.c_str())
3182 					{
3183 						// The block is already defined, find the register for the current uniform and return it
3184 						for(size_t j = 0; j < fields.size(); j++)
3185 						{
3186 							const TString &fieldName = fields[j]->name();
3187 							if(fieldName == name)
3188 							{
3189 								return fieldRegisterIndex;
3190 							}
3191 
3192 							fieldRegisterIndex += fields[j]->type()->totalRegisterCount();
3193 						}
3194 
3195 						ASSERT(false);
3196 						return fieldRegisterIndex;
3197 					}
3198 				}
3199 			}
3200 		}
3201 
3202 		return -1;
3203 	}
3204 
declareUniform(const TType & type,const TString & name,int registerIndex,int blockId,BlockLayoutEncoder * encoder)3205 	void OutputASM::declareUniform(const TType &type, const TString &name, int registerIndex, int blockId, BlockLayoutEncoder* encoder)
3206 	{
3207 		const TStructure *structure = type.getStruct();
3208 		const TInterfaceBlock *block = (type.isInterfaceBlock() || (blockId == -1)) ? type.getInterfaceBlock() : nullptr;
3209 
3210 		if(!structure && !block)
3211 		{
3212 			ActiveUniforms &activeUniforms = shaderObject->activeUniforms;
3213 			const BlockMemberInfo blockInfo = encoder ? encoder->encodeType(type) : BlockMemberInfo::getDefaultBlockInfo();
3214 			if(blockId >= 0)
3215 			{
3216 				blockDefinitions[blockId][registerIndex] = TypedMemberInfo(blockInfo, type);
3217 				shaderObject->activeUniformBlocks[blockId].fields.push_back(activeUniforms.size());
3218 			}
3219 			int fieldRegisterIndex = encoder ? shaderObject->activeUniformBlocks[blockId].registerIndex + BlockLayoutEncoder::getBlockRegister(blockInfo) : registerIndex;
3220 			activeUniforms.push_back(Uniform(glVariableType(type), glVariablePrecision(type), name.c_str(), type.getArraySize(),
3221 			                                 fieldRegisterIndex, blockId, blockInfo));
3222 			if(IsSampler(type.getBasicType()))
3223 			{
3224 				for(int i = 0; i < type.totalRegisterCount(); i++)
3225 				{
3226 					shader->declareSampler(fieldRegisterIndex + i);
3227 				}
3228 			}
3229 		}
3230 		else if(block)
3231 		{
3232 			ActiveUniformBlocks &activeUniformBlocks = shaderObject->activeUniformBlocks;
3233 			const TFieldList& fields = block->fields();
3234 			const TString &blockName = block->name();
3235 			int fieldRegisterIndex = registerIndex;
3236 			bool isUniformBlockMember = !type.isInterfaceBlock() && (blockId == -1);
3237 
3238 			blockId = activeUniformBlocks.size();
3239 			bool isRowMajor = block->matrixPacking() == EmpRowMajor;
3240 			activeUniformBlocks.push_back(UniformBlock(blockName.c_str(), 0, block->arraySize(),
3241 			                                           block->blockStorage(), isRowMajor, registerIndex, blockId));
3242 			blockDefinitions.push_back(BlockDefinitionIndexMap());
3243 
3244 			Std140BlockEncoder currentBlockEncoder(isRowMajor);
3245 			currentBlockEncoder.enterAggregateType();
3246 			for(size_t i = 0; i < fields.size(); i++)
3247 			{
3248 				const TType &fieldType = *(fields[i]->type());
3249 				const TString &fieldName = fields[i]->name();
3250 				if(isUniformBlockMember && (fieldName == name))
3251 				{
3252 					registerIndex = fieldRegisterIndex;
3253 				}
3254 
3255 				const TString uniformName = block->hasInstanceName() ? blockName + "." + fieldName : fieldName;
3256 
3257 				declareUniform(fieldType, uniformName, fieldRegisterIndex, blockId, &currentBlockEncoder);
3258 				fieldRegisterIndex += fieldType.totalRegisterCount();
3259 			}
3260 			currentBlockEncoder.exitAggregateType();
3261 			activeUniformBlocks[blockId].dataSize = currentBlockEncoder.getBlockSize();
3262 		}
3263 		else
3264 		{
3265 			int fieldRegisterIndex = registerIndex;
3266 
3267 			const TFieldList& fields = structure->fields();
3268 			if(type.isArray() && (structure || type.isInterfaceBlock()))
3269 			{
3270 				for(int i = 0; i < type.getArraySize(); i++)
3271 				{
3272 					if(encoder)
3273 					{
3274 						encoder->enterAggregateType();
3275 					}
3276 					for(size_t j = 0; j < fields.size(); j++)
3277 					{
3278 						const TType &fieldType = *(fields[j]->type());
3279 						const TString &fieldName = fields[j]->name();
3280 						const TString uniformName = name + "[" + str(i) + "]." + fieldName;
3281 
3282 						declareUniform(fieldType, uniformName, fieldRegisterIndex, blockId, encoder);
3283 						fieldRegisterIndex += fieldType.totalRegisterCount();
3284 					}
3285 					if(encoder)
3286 					{
3287 						encoder->exitAggregateType();
3288 					}
3289 				}
3290 			}
3291 			else
3292 			{
3293 				if(encoder)
3294 				{
3295 					encoder->enterAggregateType();
3296 				}
3297 				for(size_t i = 0; i < fields.size(); i++)
3298 				{
3299 					const TType &fieldType = *(fields[i]->type());
3300 					const TString &fieldName = fields[i]->name();
3301 					const TString uniformName = name + "." + fieldName;
3302 
3303 					declareUniform(fieldType, uniformName, fieldRegisterIndex, blockId, encoder);
3304 					fieldRegisterIndex += fieldType.totalRegisterCount();
3305 				}
3306 				if(encoder)
3307 				{
3308 					encoder->exitAggregateType();
3309 				}
3310 			}
3311 		}
3312 	}
3313 
glVariableType(const TType & type)3314 	GLenum OutputASM::glVariableType(const TType &type)
3315 	{
3316 		switch(type.getBasicType())
3317 		{
3318 		case EbtFloat:
3319 			if(type.isScalar())
3320 			{
3321 				return GL_FLOAT;
3322 			}
3323 			else if(type.isVector())
3324 			{
3325 				switch(type.getNominalSize())
3326 				{
3327 				case 2: return GL_FLOAT_VEC2;
3328 				case 3: return GL_FLOAT_VEC3;
3329 				case 4: return GL_FLOAT_VEC4;
3330 				default: UNREACHABLE(type.getNominalSize());
3331 				}
3332 			}
3333 			else if(type.isMatrix())
3334 			{
3335 				switch(type.getNominalSize())
3336 				{
3337 				case 2:
3338 					switch(type.getSecondarySize())
3339 					{
3340 					case 2: return GL_FLOAT_MAT2;
3341 					case 3: return GL_FLOAT_MAT2x3;
3342 					case 4: return GL_FLOAT_MAT2x4;
3343 					default: UNREACHABLE(type.getSecondarySize());
3344 					}
3345 				case 3:
3346 					switch(type.getSecondarySize())
3347 					{
3348 					case 2: return GL_FLOAT_MAT3x2;
3349 					case 3: return GL_FLOAT_MAT3;
3350 					case 4: return GL_FLOAT_MAT3x4;
3351 					default: UNREACHABLE(type.getSecondarySize());
3352 					}
3353 				case 4:
3354 					switch(type.getSecondarySize())
3355 					{
3356 					case 2: return GL_FLOAT_MAT4x2;
3357 					case 3: return GL_FLOAT_MAT4x3;
3358 					case 4: return GL_FLOAT_MAT4;
3359 					default: UNREACHABLE(type.getSecondarySize());
3360 					}
3361 				default: UNREACHABLE(type.getNominalSize());
3362 				}
3363 			}
3364 			else UNREACHABLE(0);
3365 			break;
3366 		case EbtInt:
3367 			if(type.isScalar())
3368 			{
3369 				return GL_INT;
3370 			}
3371 			else if(type.isVector())
3372 			{
3373 				switch(type.getNominalSize())
3374 				{
3375 				case 2: return GL_INT_VEC2;
3376 				case 3: return GL_INT_VEC3;
3377 				case 4: return GL_INT_VEC4;
3378 				default: UNREACHABLE(type.getNominalSize());
3379 				}
3380 			}
3381 			else UNREACHABLE(0);
3382 			break;
3383 		case EbtUInt:
3384 			if(type.isScalar())
3385 			{
3386 				return GL_UNSIGNED_INT;
3387 			}
3388 			else if(type.isVector())
3389 			{
3390 				switch(type.getNominalSize())
3391 				{
3392 				case 2: return GL_UNSIGNED_INT_VEC2;
3393 				case 3: return GL_UNSIGNED_INT_VEC3;
3394 				case 4: return GL_UNSIGNED_INT_VEC4;
3395 				default: UNREACHABLE(type.getNominalSize());
3396 				}
3397 			}
3398 			else UNREACHABLE(0);
3399 			break;
3400 		case EbtBool:
3401 			if(type.isScalar())
3402 			{
3403 				return GL_BOOL;
3404 			}
3405 			else if(type.isVector())
3406 			{
3407 				switch(type.getNominalSize())
3408 				{
3409 				case 2: return GL_BOOL_VEC2;
3410 				case 3: return GL_BOOL_VEC3;
3411 				case 4: return GL_BOOL_VEC4;
3412 				default: UNREACHABLE(type.getNominalSize());
3413 				}
3414 			}
3415 			else UNREACHABLE(0);
3416 			break;
3417 		case EbtSampler2D:
3418 			return GL_SAMPLER_2D;
3419 		case EbtISampler2D:
3420 			return GL_INT_SAMPLER_2D;
3421 		case EbtUSampler2D:
3422 			return GL_UNSIGNED_INT_SAMPLER_2D;
3423 		case EbtSamplerCube:
3424 			return GL_SAMPLER_CUBE;
3425 		case EbtISamplerCube:
3426 			return GL_INT_SAMPLER_CUBE;
3427 		case EbtUSamplerCube:
3428 			return GL_UNSIGNED_INT_SAMPLER_CUBE;
3429 		case EbtSamplerExternalOES:
3430 			return GL_SAMPLER_EXTERNAL_OES;
3431 		case EbtSampler3D:
3432 			return GL_SAMPLER_3D_OES;
3433 		case EbtISampler3D:
3434 			return GL_INT_SAMPLER_3D;
3435 		case EbtUSampler3D:
3436 			return GL_UNSIGNED_INT_SAMPLER_3D;
3437 		case EbtSampler2DArray:
3438 			return GL_SAMPLER_2D_ARRAY;
3439 		case EbtISampler2DArray:
3440 			return GL_INT_SAMPLER_2D_ARRAY;
3441 		case EbtUSampler2DArray:
3442 			return GL_UNSIGNED_INT_SAMPLER_2D_ARRAY;
3443 		case EbtSampler2DShadow:
3444 			return GL_SAMPLER_2D_SHADOW;
3445 		case EbtSamplerCubeShadow:
3446 			return GL_SAMPLER_CUBE_SHADOW;
3447 		case EbtSampler2DArrayShadow:
3448 			return GL_SAMPLER_2D_ARRAY_SHADOW;
3449 		default:
3450 			UNREACHABLE(type.getBasicType());
3451 			break;
3452 		}
3453 
3454 		return GL_NONE;
3455 	}
3456 
glVariablePrecision(const TType & type)3457 	GLenum OutputASM::glVariablePrecision(const TType &type)
3458 	{
3459 		if(type.getBasicType() == EbtFloat)
3460 		{
3461 			switch(type.getPrecision())
3462 			{
3463 			case EbpHigh:   return GL_HIGH_FLOAT;
3464 			case EbpMedium: return GL_MEDIUM_FLOAT;
3465 			case EbpLow:    return GL_LOW_FLOAT;
3466 			case EbpUndefined:
3467 				// Should be defined as the default precision by the parser
3468 			default: UNREACHABLE(type.getPrecision());
3469 			}
3470 		}
3471 		else if(type.getBasicType() == EbtInt)
3472 		{
3473 			switch(type.getPrecision())
3474 			{
3475 			case EbpHigh:   return GL_HIGH_INT;
3476 			case EbpMedium: return GL_MEDIUM_INT;
3477 			case EbpLow:    return GL_LOW_INT;
3478 			case EbpUndefined:
3479 				// Should be defined as the default precision by the parser
3480 			default: UNREACHABLE(type.getPrecision());
3481 			}
3482 		}
3483 
3484 		// Other types (boolean, sampler) don't have a precision
3485 		return GL_NONE;
3486 	}
3487 
dim(TIntermNode * v)3488 	int OutputASM::dim(TIntermNode *v)
3489 	{
3490 		TIntermTyped *vector = v->getAsTyped();
3491 		ASSERT(vector && vector->isRegister());
3492 		return vector->getNominalSize();
3493 	}
3494 
dim2(TIntermNode * m)3495 	int OutputASM::dim2(TIntermNode *m)
3496 	{
3497 		TIntermTyped *matrix = m->getAsTyped();
3498 		ASSERT(matrix && matrix->isMatrix() && !matrix->isArray());
3499 		return matrix->getSecondarySize();
3500 	}
3501 
3502 	// Returns ~0u if no loop count could be determined
loopCount(TIntermLoop * node)3503 	unsigned int OutputASM::loopCount(TIntermLoop *node)
3504 	{
3505 		// Parse loops of the form:
3506 		// for(int index = initial; index [comparator] limit; index += increment)
3507 		TIntermSymbol *index = 0;
3508 		TOperator comparator = EOpNull;
3509 		int initial = 0;
3510 		int limit = 0;
3511 		int increment = 0;
3512 
3513 		// Parse index name and intial value
3514 		if(node->getInit())
3515 		{
3516 			TIntermAggregate *init = node->getInit()->getAsAggregate();
3517 
3518 			if(init)
3519 			{
3520 				TIntermSequence &sequence = init->getSequence();
3521 				TIntermTyped *variable = sequence[0]->getAsTyped();
3522 
3523 				if(variable && variable->getQualifier() == EvqTemporary && variable->getBasicType() == EbtInt)
3524 				{
3525 					TIntermBinary *assign = variable->getAsBinaryNode();
3526 
3527 					if(assign && assign->getOp() == EOpInitialize)
3528 					{
3529 						TIntermSymbol *symbol = assign->getLeft()->getAsSymbolNode();
3530 						TIntermConstantUnion *constant = assign->getRight()->getAsConstantUnion();
3531 
3532 						if(symbol && constant)
3533 						{
3534 							if(constant->getBasicType() == EbtInt && constant->getNominalSize() == 1)
3535 							{
3536 								index = symbol;
3537 								initial = constant->getUnionArrayPointer()[0].getIConst();
3538 							}
3539 						}
3540 					}
3541 				}
3542 			}
3543 		}
3544 
3545 		// Parse comparator and limit value
3546 		if(index && node->getCondition())
3547 		{
3548 			TIntermBinary *test = node->getCondition()->getAsBinaryNode();
3549 			TIntermSymbol *left = test ? test->getLeft()->getAsSymbolNode() : nullptr;
3550 
3551 			if(left && (left->getId() == index->getId()))
3552 			{
3553 				TIntermConstantUnion *constant = test->getRight()->getAsConstantUnion();
3554 
3555 				if(constant)
3556 				{
3557 					if(constant->getBasicType() == EbtInt && constant->getNominalSize() == 1)
3558 					{
3559 						comparator = test->getOp();
3560 						limit = constant->getUnionArrayPointer()[0].getIConst();
3561 					}
3562 				}
3563 			}
3564 		}
3565 
3566 		// Parse increment
3567 		if(index && comparator != EOpNull && node->getExpression())
3568 		{
3569 			TIntermBinary *binaryTerminal = node->getExpression()->getAsBinaryNode();
3570 			TIntermUnary *unaryTerminal = node->getExpression()->getAsUnaryNode();
3571 
3572 			if(binaryTerminal)
3573 			{
3574 				TOperator op = binaryTerminal->getOp();
3575 				TIntermConstantUnion *constant = binaryTerminal->getRight()->getAsConstantUnion();
3576 
3577 				if(constant)
3578 				{
3579 					if(constant->getBasicType() == EbtInt && constant->getNominalSize() == 1)
3580 					{
3581 						int value = constant->getUnionArrayPointer()[0].getIConst();
3582 
3583 						switch(op)
3584 						{
3585 						case EOpAddAssign: increment = value;  break;
3586 						case EOpSubAssign: increment = -value; break;
3587 						default: UNIMPLEMENTED();
3588 						}
3589 					}
3590 				}
3591 			}
3592 			else if(unaryTerminal)
3593 			{
3594 				TOperator op = unaryTerminal->getOp();
3595 
3596 				switch(op)
3597 				{
3598 				case EOpPostIncrement: increment = 1;  break;
3599 				case EOpPostDecrement: increment = -1; break;
3600 				case EOpPreIncrement:  increment = 1;  break;
3601 				case EOpPreDecrement:  increment = -1; break;
3602 				default: UNIMPLEMENTED();
3603 				}
3604 			}
3605 		}
3606 
3607 		if(index && comparator != EOpNull && increment != 0)
3608 		{
3609 			if(comparator == EOpLessThanEqual)
3610 			{
3611 				comparator = EOpLessThan;
3612 				limit += 1;
3613 			}
3614 			else if(comparator == EOpGreaterThanEqual)
3615 			{
3616 				comparator = EOpLessThan;
3617 				limit -= 1;
3618 				std::swap(initial, limit);
3619 				increment = -increment;
3620 			}
3621 			else if(comparator == EOpGreaterThan)
3622 			{
3623 				comparator = EOpLessThan;
3624 				std::swap(initial, limit);
3625 				increment = -increment;
3626 			}
3627 
3628 			if(comparator == EOpLessThan)
3629 			{
3630 				if(!(initial < limit))   // Never loops
3631 				{
3632 					return 0;
3633 				}
3634 
3635 				int iterations = (limit - initial + abs(increment) - 1) / increment;   // Ceiling division
3636 
3637 				if(iterations < 0)
3638 				{
3639 					return ~0u;
3640 				}
3641 
3642 				return iterations;
3643 			}
3644 			else UNIMPLEMENTED();   // Falls through
3645 		}
3646 
3647 		return ~0u;
3648 	}
3649 
traverse(TIntermNode * node)3650 	bool LoopUnrollable::traverse(TIntermNode *node)
3651 	{
3652 		loopDepth = 0;
3653 		loopUnrollable = true;
3654 
3655 		node->traverse(this);
3656 
3657 		return loopUnrollable;
3658 	}
3659 
visitLoop(Visit visit,TIntermLoop * loop)3660 	bool LoopUnrollable::visitLoop(Visit visit, TIntermLoop *loop)
3661 	{
3662 		if(visit == PreVisit)
3663 		{
3664 			loopDepth++;
3665 		}
3666 		else if(visit == PostVisit)
3667 		{
3668 			loopDepth++;
3669 		}
3670 
3671 		return true;
3672 	}
3673 
visitBranch(Visit visit,TIntermBranch * node)3674 	bool LoopUnrollable::visitBranch(Visit visit, TIntermBranch *node)
3675 	{
3676 		if(!loopUnrollable)
3677 		{
3678 			return false;
3679 		}
3680 
3681 		if(!loopDepth)
3682 		{
3683 			return true;
3684 		}
3685 
3686 		switch(node->getFlowOp())
3687 		{
3688 		case EOpKill:
3689 		case EOpReturn:
3690 			break;
3691 		case EOpBreak:
3692 		case EOpContinue:
3693 			loopUnrollable = false;
3694 			break;
3695 		default: UNREACHABLE(node->getFlowOp());
3696 		}
3697 
3698 		return loopUnrollable;
3699 	}
3700 
visitAggregate(Visit visit,TIntermAggregate * node)3701 	bool LoopUnrollable::visitAggregate(Visit visit, TIntermAggregate *node)
3702 	{
3703 		return loopUnrollable;
3704 	}
3705 }
3706