• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2018 The Khronos Group Inc.
6  * Copyright (c) 2015 Samsung Electronics Co., Ltd.
7  * Copyright (c) 2016 The Android Open Source Project
8  *
9  * Licensed under the Apache License, Version 2.0 (the "License");
10  * you may not use this file except in compliance with the License.
11  * You may obtain a copy of the License at
12  *
13  *      http://www.apache.org/licenses/LICENSE-2.0
14  *
15  * Unless required by applicable law or agreed to in writing, software
16  * distributed under the License is distributed on an "AS IS" BASIS,
17  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18  * See the License for the specific language governing permissions and
19  * limitations under the License.
20  *
21  *//*!
22  * \file
23  * \brief Precision and range tests for builtins and types.
24  *
25  *//*--------------------------------------------------------------------*/
26 
27 #include "vktShaderBuiltinPrecisionTests.hpp"
28 #include "vktShaderExecutor.hpp"
29 #include "amber/vktAmberTestCase.hpp"
30 
31 #include "deMath.h"
32 #include "deMemory.h"
33 #include "deFloat16.h"
34 #include "deDefs.hpp"
35 #include "deRandom.hpp"
36 #include "deSTLUtil.hpp"
37 #include "deStringUtil.hpp"
38 #include "deUniquePtr.hpp"
39 #include "deSharedPtr.hpp"
40 #include "deArrayUtil.hpp"
41 
42 #include "tcuCommandLine.hpp"
43 #include "tcuFloatFormat.hpp"
44 #include "tcuInterval.hpp"
45 #include "tcuTestLog.hpp"
46 #include "tcuVector.hpp"
47 #include "tcuMatrix.hpp"
48 #include "tcuResultCollector.hpp"
49 #include "tcuMaybe.hpp"
50 
51 #include "gluContextInfo.hpp"
52 #include "gluVarType.hpp"
53 #include "gluRenderContext.hpp"
54 #include "glwDefs.hpp"
55 
56 #include <cmath>
57 #include <string>
58 #include <sstream>
59 #include <iostream>
60 #include <map>
61 #include <utility>
62 #include <limits>
63 
64 // Uncomment this to get evaluation trace dumps to std::cerr
65 // #define GLS_ENABLE_TRACE
66 
67 // set this to true to dump even passing results
68 #define GLS_LOG_ALL_RESULTS false
69 
70 #define FLOAT16_1_0		0x3C00 //1.0 float16bit
71 #define FLOAT16_180_0	0x59A0 //180.0 float16bit
72 #define FLOAT16_2_0		0x4000 //2.0 float16bit
73 #define FLOAT16_3_0		0x4200 //3.0 float16bit
74 #define FLOAT16_0_5		0x3800 //0.5 float16bit
75 #define FLOAT16_0_0		0x0000 //0.0 float16bit
76 
77 
78 using tcu::Vector;
79 typedef Vector<deFloat16, 1>	Vec1_16Bit;
80 typedef Vector<deFloat16, 2>	Vec2_16Bit;
81 typedef Vector<deFloat16, 3>	Vec3_16Bit;
82 typedef Vector<deFloat16, 4>	Vec4_16Bit;
83 
84 typedef Vector<double, 1>		Vec1_64Bit;
85 typedef Vector<double, 2>		Vec2_64Bit;
86 typedef Vector<double, 3>		Vec3_64Bit;
87 typedef Vector<double, 4>		Vec4_64Bit;
88 
89 enum
90 {
91 	// Computing reference intervals can take a non-trivial amount of time, especially on
92 	// platforms where toggling floating-point rounding mode is slow (emulated arm on x86).
93 	// As a workaround watchdog is kept happy by touching it periodically during reference
94 	// interval computation.
95 	TOUCH_WATCHDOG_VALUE_FREQUENCY	= 512
96 };
97 
98 namespace vkt
99 {
100 namespace shaderexecutor
101 {
102 
103 using std::string;
104 using std::map;
105 using std::ostream;
106 using std::ostringstream;
107 using std::pair;
108 using std::vector;
109 using std::set;
110 
111 using de::MovePtr;
112 using de::Random;
113 using de::SharedPtr;
114 using de::UniquePtr;
115 using tcu::Interval;
116 using tcu::FloatFormat;
117 using tcu::MessageBuilder;
118 using tcu::TestLog;
119 using tcu::Vector;
120 using tcu::Matrix;
121 using glu::Precision;
122 using glu::VarType;
123 using glu::DataType;
124 using glu::ShaderType;
125 
126 enum PrecisionTestFeatureBits
127 {
128 	PRECISION_TEST_FEATURES_NONE									= 0u,
129 	PRECISION_TEST_FEATURES_16BIT_BUFFER_ACCESS						= (1u << 1),
130 	PRECISION_TEST_FEATURES_16BIT_UNIFORM_AND_STORAGE_BUFFER_ACCESS	= (1u << 2),
131 	PRECISION_TEST_FEATURES_16BIT_PUSH_CONSTANT						= (1u << 3),
132 	PRECISION_TEST_FEATURES_16BIT_INPUT_OUTPUT						= (1u << 4),
133 	PRECISION_TEST_FEATURES_16BIT_SHADER_FLOAT						= (1u << 5),
134 	PRECISION_TEST_FEATURES_64BIT_SHADER_FLOAT						= (1u << 6),
135 };
136 typedef deUint32 PrecisionTestFeatures;
137 
138 
areFeaturesSupported(const Context & context,deUint32 toCheck)139 void areFeaturesSupported (const Context& context, deUint32 toCheck)
140 {
141 	if (toCheck == PRECISION_TEST_FEATURES_NONE) return;
142 
143 	const vk::VkPhysicalDevice16BitStorageFeatures& extensionFeatures = context.get16BitStorageFeatures();
144 
145 	if ((toCheck & PRECISION_TEST_FEATURES_16BIT_BUFFER_ACCESS) != 0 && extensionFeatures.storageBuffer16BitAccess == VK_FALSE)
146 		TCU_THROW(NotSupportedError, "Requested 16bit storage features not supported");
147 
148 	if ((toCheck & PRECISION_TEST_FEATURES_16BIT_UNIFORM_AND_STORAGE_BUFFER_ACCESS) != 0 && extensionFeatures.uniformAndStorageBuffer16BitAccess == VK_FALSE)
149 		TCU_THROW(NotSupportedError, "Requested 16bit storage features not supported");
150 
151 	if ((toCheck & PRECISION_TEST_FEATURES_16BIT_PUSH_CONSTANT) != 0 && extensionFeatures.storagePushConstant16 == VK_FALSE)
152 		TCU_THROW(NotSupportedError, "Requested 16bit storage features not supported");
153 
154 	if ((toCheck & PRECISION_TEST_FEATURES_16BIT_INPUT_OUTPUT) != 0 && extensionFeatures.storageInputOutput16 == VK_FALSE)
155 		TCU_THROW(NotSupportedError, "Requested 16bit storage features not supported");
156 
157 	if ((toCheck & PRECISION_TEST_FEATURES_16BIT_SHADER_FLOAT) != 0 && context.getShaderFloat16Int8Features().shaderFloat16 == VK_FALSE)
158 		TCU_THROW(NotSupportedError, "Requested 16-bit floats (halfs) are not supported in shader code");
159 
160 	if ((toCheck & PRECISION_TEST_FEATURES_64BIT_SHADER_FLOAT) != 0 && context.getDeviceFeatures().shaderFloat64 == VK_FALSE)
161 		TCU_THROW(NotSupportedError, "Requested 64-bit floats are not supported in shader code");
162 }
163 
164 /*--------------------------------------------------------------------*//*!
165  * \brief Generic singleton creator.
166  *
167  * instance<T>() returns a reference to a unique default-constructed instance
168  * of T. This is mainly used for our GLSL function implementations: each
169  * function is implemented by an object, and each of the objects has a
170  * distinct class. It would be extremely toilsome to maintain a separate
171  * context object that contained individual instances of the function classes,
172  * so we have to resort to global singleton instances.
173  *
174  *//*--------------------------------------------------------------------*/
175 template <typename T>
instance(void)176 const T& instance (void)
177 {
178 	static const T s_instance = T();
179 	return s_instance;
180 }
181 
182 /*--------------------------------------------------------------------*//*!
183  * \brief Dummy placeholder type for unused template parameters.
184  *
185  * In the precision tests we are dealing with functions of different arities.
186  * To minimize code duplication, we only define templates with the maximum
187  * number of arguments, currently four. If a function's arity is less than the
188  * maximum, Void us used as the type for unused arguments.
189  *
190  * Although Voids are not used at run-time, they still must be compilable, so
191  * they must support all operations that other types do.
192  *
193  *//*--------------------------------------------------------------------*/
194 struct Void
195 {
196 	typedef	Void		Element;
197 	enum
198 	{
199 		SIZE = 0,
200 	};
201 
202 	template <typename T>
Voidvkt::shaderexecutor::Void203 	explicit			Void			(const T&)		{}
Voidvkt::shaderexecutor::Void204 						Void			(void)			{}
operator doublevkt::shaderexecutor::Void205 						operator double	(void)	const	{ return TCU_NAN; }
206 
207 	// These are used to make Voids usable as containers in container-generic code.
operator []vkt::shaderexecutor::Void208 	Void&				operator[]		(int)			{ return *this; }
operator []vkt::shaderexecutor::Void209 	const Void&			operator[]		(int)	const	{ return *this; }
210 };
211 
operator <<(ostream & os,Void)212 ostream& operator<< (ostream& os, Void) { return os << "()"; }
213 
214 //! Returns true for all other types except Void
isTypeValid(void)215 template <typename T>	bool isTypeValid		(void)	{ return true;	}
isTypeValid(void)216 template <>				bool isTypeValid<Void>	(void)	{ return false;	}
217 
isInteger(void)218 template <typename T>	bool isInteger				(void)	{ return false;	}
isInteger(void)219 template <>				bool isInteger<int>			(void)	{ return true;	}
isInteger(void)220 template <>				bool isInteger<tcu::IVec2>	(void)	{ return true;	}
isInteger(void)221 template <>				bool isInteger<tcu::IVec3>	(void)	{ return true; }
isInteger(void)222 template <>				bool isInteger<tcu::IVec4>	(void)	{ return true; }
223 
224 //! Utility function for getting the name of a data type.
225 //! This is used in vector and matrix constructors.
226 template <typename T>
dataTypeNameOf(void)227 const char* dataTypeNameOf (void)
228 {
229 	return glu::getDataTypeName(glu::dataTypeOf<T>());
230 }
231 
232 template <>
dataTypeNameOf(void)233 const char* dataTypeNameOf<Void> (void)
234 {
235 	DE_FATAL("Impossible");
236 	return DE_NULL;
237 }
238 
239 template <typename T>
getVarTypeOf(Precision prec=glu::PRECISION_LAST)240 VarType getVarTypeOf (Precision prec = glu::PRECISION_LAST)
241 {
242 	return glu::varTypeOf<T>(prec);
243 }
244 
245 //! A hack to get Void support for VarType.
246 template <>
getVarTypeOf(Precision)247 VarType getVarTypeOf<Void> (Precision)
248 {
249 	DE_FATAL("Impossible");
250 	return VarType();
251 }
252 
253 /*--------------------------------------------------------------------*//*!
254  * \brief Type traits for generalized interval types.
255  *
256  * We are trying to compute sets of acceptable values not only for
257  * float-valued expressions but also for compound values: vectors and
258  * matrices. We approximate a set of vectors as a vector of intervals and
259  * likewise for matrices.
260  *
261  * We now need generalized operations for each type and its interval
262  * approximation. These are given in the type Traits<T>.
263  *
264  * The type Traits<T>::IVal is the approximation of T: it is `Interval` for
265  * scalar types, and a vector or matrix of intervals for container types.
266  *
267  * To allow template inference to take place, there are function wrappers for
268  * the actual operations in Traits<T>. Hence we can just use:
269  *
270  * makeIVal(someFloat)
271  *
272  * instead of:
273  *
274  * Traits<float>::doMakeIVal(value)
275  *
276  *//*--------------------------------------------------------------------*/
277 
278 template <typename T> struct Traits;
279 
280 //! Create container from elementwise singleton values.
281 template <typename T>
makeIVal(const T & value)282 typename Traits<T>::IVal makeIVal (const T& value)
283 {
284 	return Traits<T>::doMakeIVal(value);
285 }
286 
287 //! Elementwise union of intervals.
288 template <typename T>
unionIVal(const typename Traits<T>::IVal & a,const typename Traits<T>::IVal & b)289 typename Traits<T>::IVal unionIVal (const typename Traits<T>::IVal& a,
290 									const typename Traits<T>::IVal& b)
291 {
292 	return Traits<T>::doUnion(a, b);
293 }
294 
295 //! Returns true iff every element of `ival` contains the corresponding element of `value`.
296 template <typename T, typename U = Void>
contains(const typename Traits<T>::IVal & ival,const T & value,bool is16Bit=false,const tcu::Maybe<U> & modularDivisor=tcu::Nothing)297 bool contains (const typename Traits<T>::IVal& ival, const T& value, bool is16Bit = false, const tcu::Maybe<U>& modularDivisor = tcu::Nothing)
298 {
299 	return Traits<T>::doContains(ival, value, is16Bit, modularDivisor);
300 }
301 
302 //! Print out an interval with the precision of `fmt`.
303 template <typename T>
printIVal(const FloatFormat & fmt,const typename Traits<T>::IVal & ival,ostream & os)304 void printIVal (const FloatFormat& fmt, const typename Traits<T>::IVal& ival, ostream& os)
305 {
306 	Traits<T>::doPrintIVal(fmt, ival, os);
307 }
308 
309 template <typename T>
intervalToString(const FloatFormat & fmt,const typename Traits<T>::IVal & ival)310 string intervalToString (const FloatFormat& fmt, const typename Traits<T>::IVal& ival)
311 {
312 	ostringstream oss;
313 	printIVal<T>(fmt, ival, oss);
314 	return oss.str();
315 }
316 
317 //! Print out a value with the precision of `fmt`.
318 template <typename T>
printValue16(const FloatFormat & fmt,const T & value,ostream & os)319 void printValue16 (const FloatFormat& fmt, const T& value, ostream& os)
320 {
321 	Traits<T>::doPrintValue16(fmt, value, os);
322 }
323 
324 template <typename T>
value16ToString(const FloatFormat & fmt,const T & val)325 string value16ToString(const FloatFormat& fmt, const T& val)
326 {
327 	ostringstream oss;
328 	printValue16(fmt, val, oss);
329 	return oss.str();
330 }
331 
getComparisonOperation(const int ndx)332 const std::string getComparisonOperation(const int ndx)
333 {
334 	const int operationCount = 10;
335 	DE_ASSERT(de::inBounds(ndx, 0, operationCount));
336 	const std::string operations[operationCount] =
337 	{
338 		"OpFOrdEqual\t\t\t",
339 		"OpFOrdGreaterThan\t",
340 		"OpFOrdLessThan\t\t",
341 		"OpFOrdGreaterThanEqual",
342 		"OpFOrdLessThanEqual\t",
343 		"OpFUnordEqual\t\t",
344 		"OpFUnordGreaterThan\t",
345 		"OpFUnordLessThan\t",
346 		"OpFUnordGreaterThanEqual",
347 		"OpFUnordLessThanEqual"
348 	};
349 	return operations[ndx];
350 }
351 
352 template <typename T>
comparisonMessage(const T & val)353 string comparisonMessage(const T& val)
354 {
355 	DE_UNREF(val);
356 	return "";
357 }
358 
359 template <>
comparisonMessage(const int & val)360 string comparisonMessage(const int& val)
361 {
362 	ostringstream oss;
363 
364 	int flags = val;
365 	for(int ndx = 0; ndx < 10; ++ndx)
366 	{
367 		oss << getComparisonOperation(ndx) << "\t:\t" << ((flags & 1) == 1 ? "TRUE" : "FALSE") << "\n";
368 		flags = flags >> 1;
369 	}
370 	return oss.str();
371 }
372 
373 template <>
comparisonMessage(const tcu::IVec2 & val)374 string comparisonMessage(const tcu::IVec2& val)
375 {
376 	ostringstream oss;
377 	tcu::IVec2 flags = val;
378 	for (int ndx = 0; ndx < 10; ++ndx)
379 	{
380 		oss << getComparisonOperation(ndx) << "\t:\t" << ((flags.x() & 1) == 1 ? "TRUE" : "FALSE") << "\t" << ((flags.y() & 1) == 1 ? "TRUE" : "FALSE") << "\n";
381 		flags.x() = flags.x() >> 1;
382 		flags.y() = flags.y() >> 1;
383 	}
384 	return oss.str();
385 }
386 
387 template <>
comparisonMessage(const tcu::IVec3 & val)388 string comparisonMessage(const tcu::IVec3& val)
389 {
390 	ostringstream oss;
391 	tcu::IVec3 flags = val;
392 	for (int ndx = 0; ndx < 10; ++ndx)
393 	{
394 		oss << getComparisonOperation(ndx) << "\t:\t" << ((flags.x() & 1) == 1 ? "TRUE" : "FALSE") << "\t"
395 								<< ((flags.y() & 1) == 1 ? "TRUE" : "FALSE") << "\t"
396 								<< ((flags.z() & 1) == 1 ? "TRUE" : "FALSE") << "\n";
397 		flags.x() = flags.x() >> 1;
398 		flags.y() = flags.y() >> 1;
399 		flags.z() = flags.z() >> 1;
400 	}
401 	return oss.str();
402 }
403 
404 template <>
comparisonMessage(const tcu::IVec4 & val)405 string comparisonMessage(const tcu::IVec4& val)
406 {
407 	ostringstream oss;
408 	tcu::IVec4 flags = val;
409 	for (int ndx = 0; ndx < 10; ++ndx)
410 	{
411 		oss << getComparisonOperation(ndx) << "\t:\t" << ((flags.x() & 1) == 1 ? "TRUE" : "FALSE") << "\t"
412 			<< ((flags.y() & 1) == 1 ? "TRUE" : "FALSE") << "\t"
413 			<< ((flags.z() & 1) == 1 ? "TRUE" : "FALSE") << "\t"
414 			<< ((flags.w() & 1) == 1 ? "TRUE" : "FALSE") << "\n";
415 		flags.x() = flags.x() >> 1;
416 		flags.y() = flags.y() >> 1;
417 		flags.z() = flags.z() >> 1;
418 		flags.w() = flags.z() >> 1;
419 	}
420 	return oss.str();
421 }
422 //! Print out a value with the precision of `fmt`.
423 template <typename T>
printValue32(const FloatFormat & fmt,const T & value,ostream & os)424 void printValue32 (const FloatFormat& fmt, const T& value, ostream& os)
425 {
426 	Traits<T>::doPrintValue32(fmt, value, os);
427 }
428 
429 template <typename T>
value32ToString(const FloatFormat & fmt,const T & val)430 string value32ToString (const FloatFormat& fmt, const T& val)
431 {
432 	ostringstream oss;
433 	printValue32(fmt, val, oss);
434 	return oss.str();
435 }
436 
437 template <typename T>
printValue64(const FloatFormat & fmt,const T & value,ostream & os)438 void printValue64 (const FloatFormat& fmt, const T& value, ostream& os)
439 {
440 	Traits<T>::doPrintValue64(fmt, value, os);
441 }
442 
443 template <typename T>
value64ToString(const FloatFormat & fmt,const T & val)444 string value64ToString (const FloatFormat& fmt, const T& val)
445 {
446 	ostringstream oss;
447 	printValue64(fmt, val, oss);
448 	return oss.str();
449 }
450 
451 //! Approximate `value` elementwise to the float precision defined in `fmt`.
452 //! The resulting interval might not be a singleton if rounding in both
453 //! directions is allowed.
454 template <typename T>
round(const FloatFormat & fmt,const T & value)455 typename Traits<T>::IVal round (const FloatFormat& fmt, const T& value)
456 {
457 	return Traits<T>::doRound(fmt, value);
458 }
459 
460 template <typename T>
convert(const FloatFormat & fmt,const typename Traits<T>::IVal & value)461 typename Traits<T>::IVal convert (const FloatFormat&				fmt,
462 								  const typename Traits<T>::IVal&	value)
463 {
464 	return Traits<T>::doConvert(fmt, value);
465 }
466 
467 // Matching input and output types. We may be in a modulo case and modularDivisor may have an actual value.
468 template <typename T>
intervalContains(const Interval & interval,T value,const tcu::Maybe<T> & modularDivisor)469 bool intervalContains (const Interval& interval, T value, const tcu::Maybe<T>& modularDivisor)
470 {
471 	bool contained = interval.contains(value);
472 
473 	if (!contained && modularDivisor)
474 	{
475 		const T divisor = modularDivisor.get();
476 
477 		// In a modulo operation, if the calculated answer contains the divisor, allow exactly 0.0 as a replacement. Alternatively,
478 		// if the calculated answer contains 0.0, allow exactly the divisor as a replacement.
479 		if (interval.contains(static_cast<double>(divisor)))
480 			contained |= (value == 0.0);
481 		if (interval.contains(0.0))
482 			contained |= (value == divisor);
483 	}
484 	return contained;
485 }
486 
487 // When the input and output types do not match, we are not in a real modulo operation. Do not take the divisor into account. This
488 // version is provided for syntactical compatibility only.
489 template <typename T, typename U>
intervalContains(const Interval & interval,T value,const tcu::Maybe<U> & modularDivisor)490 bool intervalContains (const Interval& interval, T value, const tcu::Maybe<U>& modularDivisor)
491 {
492 	DE_UNREF(modularDivisor);		// For release builds.
493 	DE_ASSERT(!modularDivisor);
494 	return interval.contains(value);
495 }
496 
497 //! Common traits for scalar types.
498 template <typename T>
499 struct ScalarTraits
500 {
501 	typedef				Interval		IVal;
502 
doMakeIValvkt::shaderexecutor::ScalarTraits503 	static Interval		doMakeIVal		(const T& value)
504 	{
505 		// Thankfully all scalar types have a well-defined conversion to `double`,
506 		// hence Interval can represent their ranges without problems.
507 		return Interval(double(value));
508 	}
509 
doUnionvkt::shaderexecutor::ScalarTraits510 	static Interval		doUnion			(const Interval& a, const Interval& b)
511 	{
512 		return a | b;
513 	}
514 
doContainsvkt::shaderexecutor::ScalarTraits515 	static bool			doContains		(const Interval& a, T value)
516 	{
517 		return a.contains(double(value));
518 	}
519 
doConvertvkt::shaderexecutor::ScalarTraits520 	static Interval		doConvert		(const FloatFormat& fmt, const IVal& ival)
521 	{
522 		return fmt.convert(ival);
523 	}
524 
doConvertvkt::shaderexecutor::ScalarTraits525 	static Interval		doConvert		(const FloatFormat& fmt, const IVal& ival, bool is16Bit)
526 	{
527 		DE_UNREF(is16Bit);
528 		return fmt.convert(ival);
529 	}
530 
doRoundvkt::shaderexecutor::ScalarTraits531 	static Interval		doRound			(const FloatFormat& fmt, T value)
532 	{
533 		return fmt.roundOut(double(value), false);
534 	}
535 };
536 
537 template <>
538 struct ScalarTraits<deUint16>
539 {
540 	typedef				Interval		IVal;
541 
doMakeIValvkt::shaderexecutor::ScalarTraits542 	static Interval		doMakeIVal		(const deUint16& value)
543 	{
544 		// Thankfully all scalar types have a well-defined conversion to `double`,
545 		// hence Interval can represent their ranges without problems.
546 		return Interval(double(deFloat16To32(value)));
547 	}
548 
doUnionvkt::shaderexecutor::ScalarTraits549 	static Interval		doUnion			(const Interval& a, const Interval& b)
550 	{
551 		return a | b;
552 	}
553 
doConvertvkt::shaderexecutor::ScalarTraits554 	static Interval		doConvert		(const FloatFormat& fmt, const IVal& ival)
555 	{
556 		return fmt.convert(ival);
557 	}
558 
doRoundvkt::shaderexecutor::ScalarTraits559 	static Interval		doRound			(const FloatFormat& fmt, deUint16 value)
560 	{
561 		return fmt.roundOut(double(deFloat16To32(value)), false);
562 	}
563 };
564 
565 template<>
566 struct Traits<float> : ScalarTraits<float>
567 {
doPrintIValvkt::shaderexecutor::Traits568 	static void			doPrintIVal		(const FloatFormat&	fmt,
569 										 const Interval&	ival,
570 										 ostream&			os)
571 	{
572 		os << fmt.intervalToHex(ival);
573 	}
574 
doPrintValue16vkt::shaderexecutor::Traits575 	static void			doPrintValue16	(const FloatFormat&	fmt,
576 										 const float&		value,
577 										 ostream&			os)
578 	{
579 		const deUint32 iRep = reinterpret_cast<const deUint32 & >(value);
580 		float res0 = deFloat16To32((deFloat16)(iRep & 0xFFFF));
581 		float res1 = deFloat16To32((deFloat16)(iRep >> 16));
582 		os << fmt.floatToHex(res0) << " " << fmt.floatToHex(res1);
583 	}
584 
doPrintValue32vkt::shaderexecutor::Traits585 	static void			doPrintValue32	(const FloatFormat&	fmt,
586 										 const float&		value,
587 										 ostream&			os)
588 	{
589 		os << fmt.floatToHex(value);
590 	}
591 
doPrintValue64vkt::shaderexecutor::Traits592 	static void			doPrintValue64	(const FloatFormat&	fmt,
593 										 const float&		value,
594 										 ostream&			os)
595 	{
596 		os << fmt.floatToHex(value);
597 	}
598 
599 	template <typename U>
doContainsvkt::shaderexecutor::Traits600 	static bool			doContains		(const Interval& a, const float& value, bool is16Bit, const tcu::Maybe<U>& modularDivisor)
601 	{
602 		if(is16Bit)
603 		{
604 			// Note: for deFloat16s packed in 32 bits, the original divisor is provided as a float to the shader in the input
605 			// buffer, so U is also float here and we call the right interlvalContains() version.
606 			const deUint32 iRep = reinterpret_cast<const deUint32&>(value);
607 			float res0 = deFloat16To32((deFloat16)(iRep & 0xFFFF));
608 			float res1 = deFloat16To32((deFloat16)(iRep >> 16));
609 			return intervalContains(a, res0, modularDivisor) && (res1 == -1.0);
610 		}
611 		return intervalContains(a, value, modularDivisor);
612 	}
613 };
614 
615 template<>
616 struct Traits<double> : ScalarTraits<double>
617 {
doPrintIValvkt::shaderexecutor::Traits618 	static void			doPrintIVal		(const FloatFormat&	fmt,
619 										 const Interval&	ival,
620 										 ostream&			os)
621 	{
622 		os << fmt.intervalToHex(ival);
623 	}
624 
doPrintValue16vkt::shaderexecutor::Traits625 	static void			doPrintValue16	(const FloatFormat&	fmt,
626 										 const double&		value,
627 										 ostream&			os)
628 	{
629 		const deUint64 iRep = reinterpret_cast<const deUint64&>(value);
630 		double byte0 = deFloat16To64((deFloat16)((iRep      ) & 0xffff));
631 		double byte1 = deFloat16To64((deFloat16)((iRep >> 16) & 0xffff));
632 		double byte2 = deFloat16To64((deFloat16)((iRep >> 32) & 0xffff));
633 		double byte3 = deFloat16To64((deFloat16)((iRep >> 48) & 0xffff));
634 		os << fmt.floatToHex(byte0) << " " << fmt.floatToHex(byte1) << " " << fmt.floatToHex(byte2) << " " << fmt.floatToHex(byte3);
635 	}
636 
doPrintValue32vkt::shaderexecutor::Traits637 	static void			doPrintValue32	(const FloatFormat&	fmt,
638 										 const double&		value,
639 										 ostream&			os)
640 	{
641 		const deUint64 iRep = reinterpret_cast<const deUint64&>(value);
642 		double res0 = static_cast<double>((float)((iRep      ) & 0xffffffff));
643 		double res1 = static_cast<double>((float)((iRep >> 32) & 0xffffffff));
644 		os << fmt.floatToHex(res0) << " " << fmt.floatToHex(res1);
645 	}
646 
doPrintValue64vkt::shaderexecutor::Traits647 	static void			doPrintValue64	(const FloatFormat&	fmt,
648 										 const double&		value,
649 										 ostream&			os)
650 	{
651 		os << fmt.floatToHex(value);
652 	}
653 
654 	template <class U>
doContainsvkt::shaderexecutor::Traits655 	static bool			doContains		(const Interval& a, const double& value, bool is16Bit, const tcu::Maybe<U>& modularDivisor)
656 	{
657 		DE_UNREF(is16Bit);
658 		DE_ASSERT(!is16Bit);
659 		return intervalContains(a, value, modularDivisor);
660 	}
661 };
662 
663 template<>
664 struct Traits<deFloat16> : ScalarTraits<deFloat16>
665 {
doPrintIValvkt::shaderexecutor::Traits666 	static void			doPrintIVal		(const FloatFormat&	fmt,
667 										 const Interval&	ival,
668 										 ostream&			os)
669 	{
670 		os << fmt.intervalToHex(ival);
671 	}
672 
doPrintValue16vkt::shaderexecutor::Traits673 	static void			doPrintValue16	(const FloatFormat&	fmt,
674 										 const deFloat16&	value,
675 										 ostream&			os)
676 	{
677 		const float res0 = deFloat16To32(value);
678 		os << fmt.floatToHex(static_cast<double>(res0));
679 	}
doPrintValue32vkt::shaderexecutor::Traits680 	static void			doPrintValue32	(const FloatFormat&	fmt,
681 										 const deFloat16&	value,
682 										 ostream&			os)
683 	{
684 		const float res0 = deFloat16To32(value);
685 		os << fmt.floatToHex(static_cast<double>(res0));
686 	}
687 
doPrintValue64vkt::shaderexecutor::Traits688 	static void			doPrintValue64	(const FloatFormat&	fmt,
689 										 const deFloat16&	value,
690 										 ostream&			os)
691 	{
692 		const double res0 = deFloat16To64(value);
693 		os << fmt.floatToHex(res0);
694 	}
695 
696 	// When the value and divisor are both deFloat16, convert both to float to call the right intervalContains version.
doContainsvkt::shaderexecutor::Traits697 	static bool			doContains		(const Interval& a, const deFloat16& value, bool is16Bit, const tcu::Maybe<deFloat16>& modularDivisor)
698 	{
699 		DE_UNREF(is16Bit);
700 		float res0 = deFloat16To32(value);
701 		const tcu::Maybe<float> convertedDivisor = (modularDivisor ? tcu::just(deFloat16To32(modularDivisor.get())) : tcu::Nothing);
702 		return intervalContains(a, res0, convertedDivisor);
703 	}
704 
705 	// If the types don't match we should not be in a modulo operation, so no conversion should take place.
706 	template <class U>
doContainsvkt::shaderexecutor::Traits707 	static bool			doContains		(const Interval& a, const deFloat16& value, bool is16Bit, const tcu::Maybe<U>& modularDivisor)
708 	{
709 		DE_UNREF(is16Bit);
710 		float res0 = deFloat16To32(value);
711 		return intervalContains(a, res0, modularDivisor);
712 	}
713 };
714 
715 template<>
716 struct Traits<bool> : ScalarTraits<bool>
717 {
doPrintValue16vkt::shaderexecutor::Traits718 	static void			doPrintValue16	(const FloatFormat&,
719 										 const float&		value,
720 										 ostream&			os)
721 	{
722 		os << (value != 0.0f ? "true" : "false");
723 	}
724 
doPrintValue32vkt::shaderexecutor::Traits725 	static void		doPrintValue32	(const			FloatFormat&,
726 									 const float&	value,
727 									 ostream&		os)
728 	{
729 		os << (value != 0.0f ? "true" : "false");
730 	}
731 
doPrintValue64vkt::shaderexecutor::Traits732 	static void		doPrintValue64	(const			FloatFormat&,
733 									 const float&	value,
734 									 ostream&		os)
735 	{
736 		os << (value != 0.0f ? "true" : "false");
737 	}
738 
doPrintIValvkt::shaderexecutor::Traits739 	static void			doPrintIVal		(const FloatFormat&,
740 										 const Interval&	ival,
741 										 ostream&			os)
742 	{
743 		os << "{";
744 		if (ival.contains(false))
745 			os << "false";
746 		if (ival.contains(false) && ival.contains(true))
747 			os << ", ";
748 		if (ival.contains(true))
749 			os << "true";
750 		os << "}";
751 	}
752 };
753 
754 template<>
755 struct Traits<int> : ScalarTraits<int>
756 {
doPrintValue16vkt::shaderexecutor::Traits757 	static void			doPrintValue16	(const FloatFormat&,
758 										 const int&			value,
759 										 ostream&			os)
760 	{
761 		int res0 = value & 0xFFFF;
762 		int res1 = value >> 16;
763 		os << res0 << " " << res1;
764 	}
765 
doPrintValue32vkt::shaderexecutor::Traits766 	static void		doPrintValue32		(const FloatFormat&,
767 										 const int&			value,
768 										 ostream&			os)
769 	{
770 		os << value;
771 	}
772 
doPrintValue64vkt::shaderexecutor::Traits773 	static void		doPrintValue64		(const FloatFormat&,
774 										 const int&			value,
775 										 ostream&			os)
776 	{
777 		os << value;
778 	}
779 
doPrintIValvkt::shaderexecutor::Traits780 	static void			doPrintIVal		(const FloatFormat&,
781 										 const Interval&	ival,
782 										 ostream&			os)
783 	{
784 		os << "[" << int(ival.lo()) << ", " << int(ival.hi()) << "]";
785 	}
786 
787 	template <typename U>
doContainsvkt::shaderexecutor::Traits788 	static bool			doContains		(const Interval& a, const int& value, bool is16Bit, const tcu::Maybe<U>& modularDivisor)
789 	{
790 		DE_UNREF(is16Bit);
791 		return intervalContains(a, value, modularDivisor);
792 	}
793 };
794 
795 //! Common traits for containers, i.e. vectors and matrices.
796 //! T is the container type itself, I is the same type with interval elements.
797 template <typename T, typename I>
798 struct ContainerTraits
799 {
800 	typedef typename	T::Element		Element;
801 	typedef				I				IVal;
802 
doMakeIValvkt::shaderexecutor::ContainerTraits803 	static IVal			doMakeIVal		(const T& value)
804 	{
805 		IVal ret;
806 
807 		for (int ndx = 0; ndx < T::SIZE; ++ndx)
808 			ret[ndx] = makeIVal(value[ndx]);
809 
810 		return ret;
811 	}
812 
doUnionvkt::shaderexecutor::ContainerTraits813 	static IVal			doUnion			(const IVal& a, const IVal& b)
814 	{
815 		IVal ret;
816 
817 		for (int ndx = 0; ndx < T::SIZE; ++ndx)
818 			ret[ndx] = unionIVal<Element>(a[ndx], b[ndx]);
819 
820 		return ret;
821 	}
822 
823 	// When the input and output types match, we may be in a modulo operation. If the divisor is provided, use each of its
824 	// components to determine if the obtained result is fine.
doContainsvkt::shaderexecutor::ContainerTraits825 	static bool			doContains		(const IVal& ival, const T& value, bool is16Bit, const tcu::Maybe<T>& modularDivisor)
826 	{
827 		using DivisorElement = typename T::Element;
828 
829 		for (int ndx = 0; ndx < T::SIZE; ++ndx)
830 		{
831 			const tcu::Maybe<DivisorElement> divisorElement = (modularDivisor ? tcu::just((*modularDivisor)[ndx]) : tcu::Nothing);
832 			if (!contains(ival[ndx], value[ndx], is16Bit, divisorElement))
833 				return false;
834 		}
835 
836 		return true;
837 	}
838 
839 	// When the input and output types do not match we should not be in a modulo operation. This version is provided for syntactical
840 	// compatibility.
841 	template <typename U>
doContainsvkt::shaderexecutor::ContainerTraits842 	static bool			doContains		(const IVal& ival, const T& value, bool is16Bit, const tcu::Maybe<U>& modularDivisor)
843 	{
844 		for (int ndx = 0; ndx < T::SIZE; ++ndx)
845 		{
846 			if (!contains(ival[ndx], value[ndx], is16Bit, modularDivisor))
847 				return false;
848 		}
849 
850 		return true;
851 	}
852 
doPrintIValvkt::shaderexecutor::ContainerTraits853 	static void			doPrintIVal		(const FloatFormat& fmt, const IVal ival, ostream& os)
854 	{
855 		os << "(";
856 
857 		for (int ndx = 0; ndx < T::SIZE; ++ndx)
858 		{
859 			if (ndx > 0)
860 				os << ", ";
861 
862 			printIVal<Element>(fmt, ival[ndx], os);
863 		}
864 
865 		os << ")";
866 	}
867 
doPrintValue16vkt::shaderexecutor::ContainerTraits868 	static void			doPrintValue16	(const FloatFormat& fmt, const T& value, ostream& os)
869 	{
870 		os << dataTypeNameOf<T>() << "(";
871 
872 		for (int ndx = 0; ndx < T::SIZE; ++ndx)
873 		{
874 			if (ndx > 0)
875 				os << ", ";
876 
877 			printValue16<Element>(fmt, value[ndx], os);
878 		}
879 
880 		os << ")";
881 	}
882 
doPrintValue32vkt::shaderexecutor::ContainerTraits883 	static void			doPrintValue32	(const FloatFormat& fmt, const T& value, ostream& os)
884 	{
885 		os << dataTypeNameOf<T>() << "(";
886 
887 		for (int ndx = 0; ndx < T::SIZE; ++ndx)
888 		{
889 			if (ndx > 0)
890 				os << ", ";
891 
892 			printValue32<Element>(fmt, value[ndx], os);
893 		}
894 
895 		os << ")";
896 	}
897 
doPrintValue64vkt::shaderexecutor::ContainerTraits898 	static void			doPrintValue64	(const FloatFormat& fmt, const T& value, ostream& os)
899 	{
900 		os << dataTypeNameOf<T>() << "(";
901 
902 		for (int ndx = 0; ndx < T::SIZE; ++ndx)
903 		{
904 			if (ndx > 0)
905 				os << ", ";
906 
907 			printValue64<Element>(fmt, value[ndx], os);
908 		}
909 
910 		os << ")";
911 	}
912 
doConvertvkt::shaderexecutor::ContainerTraits913 	static IVal			doConvert		(const FloatFormat& fmt, const IVal& value)
914 	{
915 		IVal ret;
916 
917 		for (int ndx = 0; ndx < T::SIZE; ++ndx)
918 			ret[ndx] = convert<Element>(fmt, value[ndx]);
919 
920 		return ret;
921 	}
922 
doRoundvkt::shaderexecutor::ContainerTraits923 	static IVal			doRound			(const FloatFormat& fmt, T value)
924 	{
925 		IVal ret;
926 
927 		for (int ndx = 0; ndx < T::SIZE; ++ndx)
928 			ret[ndx] = round(fmt, value[ndx]);
929 
930 		return ret;
931 	}
932 };
933 
934 template <typename T, int Size>
935 struct Traits<Vector<T, Size> > :
936 	ContainerTraits<Vector<T, Size>, Vector<typename Traits<T>::IVal, Size> >
937 {
938 };
939 
940 template <typename T, int Rows, int Cols>
941 struct Traits<Matrix<T, Rows, Cols> > :
942 	ContainerTraits<Matrix<T, Rows, Cols>, Matrix<typename Traits<T>::IVal, Rows, Cols> >
943 {
944 };
945 
946 //! Void traits. These are just dummies, but technically valid: a Void is a
947 //! unit type with a single possible value.
948 template<>
949 struct Traits<Void>
950 {
951 	typedef		Void			IVal;
952 
doMakeIValvkt::shaderexecutor::Traits953 	static Void	doMakeIVal		(const Void& value)										{ return value; }
doUnionvkt::shaderexecutor::Traits954 	static Void	doUnion			(const Void&, const Void&)								{ return Void(); }
doContainsvkt::shaderexecutor::Traits955 	static bool	doContains		(const Void&, Void)										{ return true; }
956 	template <typename U>
doContainsvkt::shaderexecutor::Traits957 	static bool	doContains		(const Void&, const Void& value, bool is16Bit, const tcu::Maybe<U>& modularDivisor) { DE_UNREF(value); DE_UNREF(is16Bit); DE_UNREF(modularDivisor); return true; }
doRoundvkt::shaderexecutor::Traits958 	static Void	doRound			(const FloatFormat&, const Void& value)					{ return value; }
doConvertvkt::shaderexecutor::Traits959 	static Void	doConvert		(const FloatFormat&, const Void& value)					{ return value; }
960 
doPrintValue16vkt::shaderexecutor::Traits961 	static void	doPrintValue16	(const FloatFormat&, const Void&, ostream& os)
962 	{
963 		os << "()";
964 	}
965 
doPrintValue32vkt::shaderexecutor::Traits966 	static void	doPrintValue32	(const FloatFormat&, const Void&, ostream& os)
967 	{
968 		os << "()";
969 	}
970 
doPrintValue64vkt::shaderexecutor::Traits971 	static void	doPrintValue64	(const FloatFormat&, const Void&, ostream& os)
972 	{
973 		os << "()";
974 	}
975 
doPrintIValvkt::shaderexecutor::Traits976 	static void	doPrintIVal		(const FloatFormat&, const Void&, ostream& os)
977 	{
978 		os << "()";
979 	}
980 };
981 
982 //! This is needed for container-generic operations.
983 //! We want a scalar type T to be its own "one-element vector".
984 template <typename T, int Size> struct ContainerOf	{ typedef Vector<T, Size>	Container; };
985 
986 template <typename T>			struct ContainerOf<T, 1>		{ typedef T		Container; };
987 template <int Size>				struct ContainerOf<Void, Size>	{ typedef Void	Container; };
988 
989 // This is a kludge that is only needed to get the ExprP::operator[] syntactic sugar to work.
990 template <typename T>	struct ElementOf		{ typedef	typename T::Element	Element; };
991 template <>				struct ElementOf<float>	{ typedef	void				Element; };
992 template <>				struct ElementOf<double>{ typedef	void				Element; };
993 template <>				struct ElementOf<bool>	{ typedef	void				Element; };
994 template <>				struct ElementOf<int>	{ typedef	void				Element; };
995 
996 template <typename T>
comparisonMessageInterval(const typename Traits<T>::IVal & val)997 string comparisonMessageInterval(const typename Traits<T>::IVal& val)
998 {
999 	DE_UNREF(val);
1000 	return "";
1001 }
1002 
1003 template <>
comparisonMessageInterval(const Traits<int>::IVal & val)1004 string comparisonMessageInterval<int>(const Traits<int>::IVal& val)
1005 {
1006 	return comparisonMessage(static_cast<int>(val.lo()));
1007 }
1008 
1009 template <>
comparisonMessageInterval(const Traits<float>::IVal & val)1010 string comparisonMessageInterval<float>(const Traits<float>::IVal& val)
1011 {
1012 	return comparisonMessage(static_cast<int>(val.lo()));
1013 }
1014 
1015 template <>
comparisonMessageInterval(const tcu::Vector<tcu::Interval,2> & val)1016 string comparisonMessageInterval<tcu::Vector<int, 2> >(const tcu::Vector<tcu::Interval, 2> & val)
1017 {
1018 	tcu::IVec2 result(static_cast<int>(val[0].lo()), static_cast<int>(val[1].lo()));
1019 	return comparisonMessage(result);
1020 }
1021 
1022 template <>
comparisonMessageInterval(const tcu::Vector<tcu::Interval,3> & val)1023 string comparisonMessageInterval<tcu::Vector<int, 3> >(const tcu::Vector<tcu::Interval, 3> & val)
1024 {
1025 	tcu::IVec3 result(static_cast<int>(val[0].lo()), static_cast<int>(val[1].lo()), static_cast<int>(val[2].lo()));
1026 	return comparisonMessage(result);
1027 }
1028 
1029 template <>
comparisonMessageInterval(const tcu::Vector<tcu::Interval,4> & val)1030 string comparisonMessageInterval<tcu::Vector<int, 4> >(const tcu::Vector<tcu::Interval, 4> & val)
1031 {
1032 	tcu::IVec4 result(static_cast<int>(val[0].lo()), static_cast<int>(val[1].lo()), static_cast<int>(val[2].lo()), static_cast<int>(val[3].lo()));
1033 	return comparisonMessage(result);
1034 }
1035 
1036 /*--------------------------------------------------------------------*//*!
1037  *
1038  * \name Abstract syntax for expressions and statements.
1039  *
1040  * We represent GLSL programs as syntax objects: an Expr<T> represents an
1041  * expression whose GLSL type corresponds to the C++ type T, and a Statement
1042  * represents a statement.
1043  *
1044  * To ease memory management, we use shared pointers to refer to expressions
1045  * and statements. ExprP<T> is a shared pointer to an Expr<T>, and StatementP
1046  * is a shared pointer to a Statement.
1047  *
1048  * \{
1049  *
1050  *//*--------------------------------------------------------------------*/
1051 
1052 class ExprBase;
1053 class ExpandContext;
1054 class Statement;
1055 class StatementP;
1056 class FuncBase;
1057 template <typename T> class ExprP;
1058 template <typename T> class Variable;
1059 template <typename T> class VariableP;
1060 template <typename T> class DefaultSampling;
1061 
1062 typedef set<const FuncBase*> FuncSet;
1063 
1064 template <typename T>
1065 VariableP<T>	variable			(const string& name);
1066 StatementP		compoundStatement	(const vector<StatementP>& statements);
1067 
1068 /*--------------------------------------------------------------------*//*!
1069  * \brief A variable environment.
1070  *
1071  * An Environment object maintains the mapping between variables of the
1072  * abstract syntax tree and their values.
1073  *
1074  * \todo [2014-03-28 lauri] At least run-time type safety.
1075  *
1076  *//*--------------------------------------------------------------------*/
1077 class Environment
1078 {
1079 public:
1080 	template<typename T>
bind(const Variable<T> & variable,const typename Traits<T>::IVal & value)1081 	void						bind	(const Variable<T>&					variable,
1082 										 const typename Traits<T>::IVal&	value)
1083 	{
1084 		deUint8* const data = new deUint8[sizeof(value)];
1085 
1086 		deMemcpy(data, &value, sizeof(value));
1087 		de::insert(m_map, variable.getName(), SharedPtr<deUint8>(data, de::ArrayDeleter<deUint8>()));
1088 	}
1089 
1090 	template<typename T>
lookup(const Variable<T> & variable) const1091 	typename Traits<T>::IVal&	lookup	(const Variable<T>& variable) const
1092 	{
1093 		deUint8* const data = de::lookup(m_map, variable.getName()).get();
1094 
1095 		return *reinterpret_cast<typename Traits<T>::IVal*>(data);
1096 	}
1097 
1098 private:
1099 	map<string, SharedPtr<deUint8> >	m_map;
1100 };
1101 
1102 /*--------------------------------------------------------------------*//*!
1103  * \brief Evaluation context.
1104  *
1105  * The evaluation context contains everything that separates one execution of
1106  * an expression from the next. Currently this means the desired floating
1107  * point precision and the current variable environment.
1108  *
1109  *//*--------------------------------------------------------------------*/
1110 struct EvalContext
1111 {
EvalContextvkt::shaderexecutor::EvalContext1112 	EvalContext (const FloatFormat&	format_,
1113 				 Precision			floatPrecision_,
1114 				 Environment&		env_,
1115 				 int				callDepth_)
1116 		: format				(format_)
1117 		, floatPrecision		(floatPrecision_)
1118 		, env					(env_)
1119 		, callDepth				(callDepth_) {}
1120 
1121 	FloatFormat		format;
1122 	Precision		floatPrecision;
1123 	Environment&	env;
1124 	int				callDepth;
1125 };
1126 
1127 /*--------------------------------------------------------------------*//*!
1128  * \brief Simple incremental counter.
1129  *
1130  * This is used to make sure that different ExpandContexts will not produce
1131  * overlapping temporary names.
1132  *
1133  *//*--------------------------------------------------------------------*/
1134 class Counter
1135 {
1136 public:
Counter(int count=0)1137 			Counter		(int count = 0) : m_count(count) {}
operator ()(void)1138 	int		operator()	(void) { return m_count++; }
1139 
1140 private:
1141 	int		m_count;
1142 };
1143 
1144 class ExpandContext
1145 {
1146 public:
ExpandContext(Counter & symCounter)1147 						ExpandContext	(Counter& symCounter) : m_symCounter(symCounter) {}
ExpandContext(const ExpandContext & parent)1148 						ExpandContext	(const ExpandContext& parent)
1149 							: m_symCounter(parent.m_symCounter) {}
1150 
1151 	template<typename T>
genSym(const string & baseName)1152 	VariableP<T>		genSym			(const string& baseName)
1153 	{
1154 		return variable<T>(baseName + de::toString(m_symCounter()));
1155 	}
1156 
addStatement(const StatementP & stmt)1157 	void				addStatement	(const StatementP& stmt)
1158 	{
1159 		m_statements.push_back(stmt);
1160 	}
1161 
getStatements(void) const1162 	vector<StatementP>	getStatements	(void) const
1163 	{
1164 		return m_statements;
1165 	}
1166 private:
1167 	Counter&			m_symCounter;
1168 	vector<StatementP>	m_statements;
1169 };
1170 
1171 /*--------------------------------------------------------------------*//*!
1172  * \brief A statement or declaration.
1173  *
1174  * Statements have no values. Instead, they are executed for their side
1175  * effects only: the execute() method should modify at least one variable in
1176  * the environment.
1177  *
1178  * As a bit of a kludge, a Statement object can also represent a declaration:
1179  * when it is evaluated, it can add a variable binding to the environment
1180  * instead of modifying a current one.
1181  *
1182  *//*--------------------------------------------------------------------*/
1183 class Statement
1184 {
1185 public:
~Statement(void)1186 	virtual			~Statement		(void)							{								 }
1187 	//! Execute the statement, modifying the environment of `ctx`
execute(EvalContext & ctx) const1188 	void			execute			(EvalContext&	ctx)	const	{ this->doExecute(ctx);			 }
print(ostream & os) const1189 	void			print			(ostream&		os)		const	{ this->doPrint(os);			 }
1190 	//! Add the functions used in this statement to `dst`.
getUsedFuncs(FuncSet & dst) const1191 	void			getUsedFuncs	(FuncSet& dst)			const	{ this->doGetUsedFuncs(dst);	 }
failed(EvalContext & ctx) const1192 	void			failed			(EvalContext& ctx)		const	{ this->doFail(ctx);			 }
1193 
1194 protected:
1195 	virtual void	doPrint			(ostream& os)			const	= 0;
1196 	virtual void	doExecute		(EvalContext& ctx)		const	= 0;
1197 	virtual void	doGetUsedFuncs	(FuncSet& dst)			const	= 0;
doFail(EvalContext & ctx) const1198 	virtual void	doFail			(EvalContext& ctx)		const	{ DE_UNREF(ctx); }
1199 };
1200 
operator <<(ostream & os,const Statement & stmt)1201 ostream& operator<<(ostream& os, const Statement& stmt)
1202 {
1203 	stmt.print(os);
1204 	return os;
1205 }
1206 
1207 /*--------------------------------------------------------------------*//*!
1208  * \brief Smart pointer for statements (and declarations)
1209  *
1210  *//*--------------------------------------------------------------------*/
1211 class StatementP : public SharedPtr<const Statement>
1212 {
1213 public:
1214 	typedef		SharedPtr<const Statement>	Super;
1215 
StatementP(void)1216 				StatementP			(void) {}
StatementP(const Statement * ptr)1217 	explicit	StatementP			(const Statement* ptr)	: Super(ptr) {}
StatementP(const Super & ptr)1218 				StatementP			(const Super& ptr)		: Super(ptr) {}
1219 };
1220 
1221 /*--------------------------------------------------------------------*//*!
1222  * \brief
1223  *
1224  * A statement that modifies a variable or a declaration that binds a variable.
1225  *
1226  *//*--------------------------------------------------------------------*/
1227 template <typename T>
1228 class VariableStatement : public Statement
1229 {
1230 public:
VariableStatement(const VariableP<T> & variable,const ExprP<T> & value,bool isDeclaration)1231 					VariableStatement	(const VariableP<T>& variable, const ExprP<T>& value,
1232 										 bool isDeclaration)
1233 						: m_variable		(variable)
1234 						, m_value			(value)
1235 						, m_isDeclaration	(isDeclaration) {}
1236 
1237 protected:
doPrint(ostream & os) const1238 	void			doPrint				(ostream& os)							const
1239 	{
1240 		if (m_isDeclaration)
1241 			os << glu::declare(getVarTypeOf<T>(), m_variable->getName());
1242 		else
1243 			os << m_variable->getName();
1244 
1245 		os << " = ";
1246 		os<< *m_value << ";\n";
1247 	}
1248 
doExecute(EvalContext & ctx) const1249 	void			doExecute			(EvalContext& ctx)						const
1250 	{
1251 		if (m_isDeclaration)
1252 			ctx.env.bind(*m_variable, m_value->evaluate(ctx));
1253 		else
1254 			ctx.env.lookup(*m_variable) = m_value->evaluate(ctx);
1255 	}
1256 
doGetUsedFuncs(FuncSet & dst) const1257 	void			doGetUsedFuncs		(FuncSet& dst)							const
1258 	{
1259 		m_value->getUsedFuncs(dst);
1260 	}
1261 
doFail(EvalContext & ctx) const1262 	virtual void	doFail			(EvalContext& ctx)		const
1263 	{
1264 		if (m_isDeclaration)
1265 			ctx.env.bind(*m_variable, m_value->fails(ctx));
1266 		else
1267 			ctx.env.lookup(*m_variable) = m_value->fails(ctx);
1268 	}
1269 
1270 	VariableP<T>	m_variable;
1271 	ExprP<T>		m_value;
1272 	bool			m_isDeclaration;
1273 };
1274 
1275 template <typename T>
variableStatement(const VariableP<T> & variable,const ExprP<T> & value,bool isDeclaration)1276 StatementP variableStatement (const VariableP<T>&	variable,
1277 							  const ExprP<T>&		value,
1278 							  bool					isDeclaration)
1279 {
1280 	return StatementP(new VariableStatement<T>(variable, value, isDeclaration));
1281 }
1282 
1283 template <typename T>
variableDeclaration(const VariableP<T> & variable,const ExprP<T> & definiens)1284 StatementP variableDeclaration (const VariableP<T>& variable, const ExprP<T>& definiens)
1285 {
1286 	return variableStatement(variable, definiens, true);
1287 }
1288 
1289 template <typename T>
variableAssignment(const VariableP<T> & variable,const ExprP<T> & value)1290 StatementP variableAssignment (const VariableP<T>& variable, const ExprP<T>& value)
1291 {
1292 	return variableStatement(variable, value, false);
1293 }
1294 
1295 /*--------------------------------------------------------------------*//*!
1296  * \brief A compound statement, i.e. a block.
1297  *
1298  * A compound statement is executed by executing its constituent statements in
1299  * sequence.
1300  *
1301  *//*--------------------------------------------------------------------*/
1302 class CompoundStatement : public Statement
1303 {
1304 public:
CompoundStatement(const vector<StatementP> & statements)1305 						CompoundStatement	(const vector<StatementP>& statements)
1306 							: m_statements	(statements) {}
1307 
1308 protected:
doPrint(ostream & os) const1309 	void				doPrint				(ostream&		os)						const
1310 	{
1311 		os << "{\n";
1312 
1313 		for (size_t ndx = 0; ndx < m_statements.size(); ++ndx)
1314 			os << *m_statements[ndx];
1315 
1316 		os << "}\n";
1317 	}
1318 
doExecute(EvalContext & ctx) const1319 	void				doExecute			(EvalContext&	ctx)					const
1320 	{
1321 		for (size_t ndx = 0; ndx < m_statements.size(); ++ndx)
1322 			m_statements[ndx]->execute(ctx);
1323 	}
1324 
doGetUsedFuncs(FuncSet & dst) const1325 	void				doGetUsedFuncs		(FuncSet& dst)							const
1326 	{
1327 		for (size_t ndx = 0; ndx < m_statements.size(); ++ndx)
1328 			m_statements[ndx]->getUsedFuncs(dst);
1329 	}
1330 
1331 	vector<StatementP>	m_statements;
1332 };
1333 
compoundStatement(const vector<StatementP> & statements)1334 StatementP compoundStatement(const vector<StatementP>& statements)
1335 {
1336 	return StatementP(new CompoundStatement(statements));
1337 }
1338 
1339 //! Common base class for all expressions regardless of their type.
1340 class ExprBase
1341 {
1342 public:
~ExprBase(void)1343 	virtual				~ExprBase		(void)									{}
printExpr(ostream & os) const1344 	void				printExpr		(ostream& os) const { this->doPrintExpr(os); }
1345 
1346 	//! Output the functions that this expression refers to
getUsedFuncs(FuncSet & dst) const1347 	void				getUsedFuncs	(FuncSet& dst) const
1348 	{
1349 		this->doGetUsedFuncs(dst);
1350 	}
1351 
1352 protected:
doPrintExpr(ostream &) const1353 	virtual void		doPrintExpr		(ostream&)	const	{}
doGetUsedFuncs(FuncSet &) const1354 	virtual void		doGetUsedFuncs	(FuncSet&)	const	{}
1355 };
1356 
1357 //! Type-specific operations for an expression representing type T.
1358 template <typename T>
1359 class Expr : public ExprBase
1360 {
1361 public:
1362 	typedef				T				Val;
1363 	typedef typename	Traits<T>::IVal	IVal;
1364 
1365 	IVal				evaluate		(const EvalContext&	ctx) const;
fails(const EvalContext & ctx) const1366 	IVal				fails			(const EvalContext&	ctx) const	{ return this->doFails(ctx); }
1367 
1368 protected:
1369 	virtual IVal		doEvaluate		(const EvalContext&	ctx) const = 0;
doFails(const EvalContext & ctx) const1370 	virtual IVal		doFails			(const EvalContext&	ctx) const {return doEvaluate(ctx);}
1371 };
1372 
1373 //! Evaluate an expression with the given context, optionally tracing the calls to stderr.
1374 template <typename T>
evaluate(const EvalContext & ctx) const1375 typename Traits<T>::IVal Expr<T>::evaluate (const EvalContext& ctx) const
1376 {
1377 #ifdef GLS_ENABLE_TRACE
1378 	static const FloatFormat	highpFmt	(-126, 127, 23, true,
1379 											 tcu::MAYBE,
1380 											 tcu::YES,
1381 											 tcu::MAYBE);
1382 	EvalContext					newCtx		(ctx.format, ctx.floatPrecision,
1383 											 ctx.env, ctx.callDepth + 1);
1384 	const IVal					ret			= this->doEvaluate(newCtx);
1385 
1386 	if (isTypeValid<T>())
1387 	{
1388 		std::cerr << string(ctx.callDepth, ' ');
1389 		this->printExpr(std::cerr);
1390 		std::cerr << " -> " << intervalToString<T>(highpFmt, ret) << std::endl;
1391 	}
1392 	return ret;
1393 #else
1394 	return this->doEvaluate(ctx);
1395 #endif
1396 }
1397 
1398 template <typename T>
1399 class ExprPBase : public SharedPtr<const Expr<T> >
1400 {
1401 public:
1402 };
1403 
operator <<(ostream & os,const ExprBase & expr)1404 ostream& operator<< (ostream& os, const ExprBase& expr)
1405 {
1406 	expr.printExpr(os);
1407 	return os;
1408 }
1409 
1410 /*--------------------------------------------------------------------*//*!
1411  * \brief Shared pointer to an expression of a container type.
1412  *
1413  * Container types (i.e. vectors and matrices) support the subscription
1414  * operator. This class provides a bit of syntactic sugar to allow us to use
1415  * the C++ subscription operator to create a subscription expression.
1416  *//*--------------------------------------------------------------------*/
1417 template <typename T>
1418 class ContainerExprPBase : public ExprPBase<T>
1419 {
1420 public:
1421 	ExprP<typename T::Element>	operator[]	(int i) const;
1422 };
1423 
1424 template <typename T>
1425 class ExprP : public ExprPBase<T> {};
1426 
1427 // We treat Voids as containers since the dummy parameters in generalized
1428 // vector functions are represented as Voids.
1429 template <>
1430 class ExprP<Void> : public ContainerExprPBase<Void> {};
1431 
1432 template <typename T, int Size>
1433 class ExprP<Vector<T, Size> > : public ContainerExprPBase<Vector<T, Size> > {};
1434 
1435 template <typename T, int Rows, int Cols>
1436 class ExprP<Matrix<T, Rows, Cols> > : public ContainerExprPBase<Matrix<T, Rows, Cols> > {};
1437 
exprP(void)1438 template <typename T> ExprP<T> exprP (void)
1439 {
1440 	return ExprP<T>();
1441 }
1442 
1443 template <typename T>
exprP(const SharedPtr<const Expr<T>> & ptr)1444 ExprP<T> exprP (const SharedPtr<const Expr<T> >& ptr)
1445 {
1446 	ExprP<T> ret;
1447 	static_cast<SharedPtr<const Expr<T> >&>(ret) = ptr;
1448 	return ret;
1449 }
1450 
1451 template <typename T>
exprP(const Expr<T> * ptr)1452 ExprP<T> exprP (const Expr<T>* ptr)
1453 {
1454 	return exprP(SharedPtr<const Expr<T> >(ptr));
1455 }
1456 
1457 /*--------------------------------------------------------------------*//*!
1458  * \brief A shared pointer to a variable expression.
1459  *
1460  * This is just a narrowing of ExprP for the operations that require a variable
1461  * instead of an arbitrary expression.
1462  *
1463  *//*--------------------------------------------------------------------*/
1464 template <typename T>
1465 class VariableP : public SharedPtr<const Variable<T> >
1466 {
1467 public:
1468 	typedef		SharedPtr<const Variable<T> >	Super;
VariableP(const Variable<T> * ptr)1469 	explicit	VariableP	(const Variable<T>* ptr) : Super(ptr) {}
VariableP(void)1470 				VariableP	(void) {}
VariableP(const Super & ptr)1471 				VariableP	(const Super& ptr) : Super(ptr) {}
1472 
operator ExprP<T>(void) const1473 	operator	ExprP<T>	(void) const { return exprP(SharedPtr<const Expr<T> >(*this)); }
1474 };
1475 
1476 /*--------------------------------------------------------------------*//*!
1477  * \name Syntactic sugar operators for expressions.
1478  *
1479  * @{
1480  *
1481  * These operators allow the use of C++ syntax to construct GLSL expressions
1482  * containing operators: e.g. "a+b" creates an addition expression with
1483  * operands a and b, and so on.
1484  *
1485  *//*--------------------------------------------------------------------*/
1486 ExprP<float>						operator+ (const ExprP<float>&						arg0,
1487 											  const ExprP<float>&						arg1);
1488 ExprP<deFloat16>					operator+ (const ExprP<deFloat16>&					arg0,
1489 											  const ExprP<deFloat16>&					arg1);
1490 ExprP<double>						operator+ (const ExprP<double>&						arg0,
1491 											  const ExprP<double>&						arg1);
1492 template <typename T>
1493 ExprP<T>							operator- (const ExprP<T>& arg0);
1494 template <typename T>
1495 ExprP<T>							operator- (const ExprP<T>&							arg0,
1496 											  const ExprP<T>&							arg1);
1497 template<int Left, int Mid, int Right, typename T>
1498 ExprP<Matrix<T, Left, Right> >		operator* (const ExprP<Matrix<T, Left, Mid> >&		left,
1499 											   const ExprP<Matrix<T, Mid, Right> >&		right);
1500 ExprP<float>						operator* (const ExprP<float>&						arg0,
1501 											  const ExprP<float>&						arg1);
1502 ExprP<deFloat16>					operator* (const ExprP<deFloat16>&					arg0,
1503 											   const ExprP<deFloat16>&					arg1);
1504 ExprP<double>						operator* (const ExprP<double>&						arg0,
1505 											  const ExprP<double>&						arg1);
1506 template <typename T>
1507 ExprP<T>							operator/ (const ExprP<T>&							arg0,
1508 											  const ExprP<T>&							arg1);
1509 template<typename T, int Size>
1510 ExprP<Vector<T, Size> >				operator- (const ExprP<Vector<T, Size> >&			arg0);
1511 template<typename T, int Size>
1512 ExprP<Vector<T, Size> >				operator- (const ExprP<Vector<T, Size> >&			arg0,
1513 											   const ExprP<Vector<T, Size> >&			arg1);
1514 template<int Size, typename T>
1515 ExprP<Vector<T, Size> >				operator* (const ExprP<Vector<T, Size> >&			arg0,
1516 											   const ExprP<T>&							arg1);
1517 template<typename T, int Size>
1518 ExprP<Vector<T, Size> >				operator* (const ExprP<Vector<T, Size> >&			arg0,
1519 											   const ExprP<Vector<T, Size> >&			arg1);
1520 template<int Rows, int Cols, typename T>
1521 ExprP<Vector<T, Rows> >				operator* (const ExprP<Vector<T, Cols> >&			left,
1522 											   const ExprP<Matrix<T, Rows, Cols> >&		right);
1523 template<int Rows, int Cols, typename T>
1524 ExprP<Vector<T, Cols> >				operator* (const ExprP<Matrix<T, Rows, Cols> >&		left,
1525 											   const ExprP<Vector<T, Rows> >&			right);
1526 template<int Rows, int Cols, typename T>
1527 ExprP<Matrix<T, Rows, Cols> >		operator* (const ExprP<Matrix<T, Rows, Cols> >&		left,
1528 											   const ExprP<T>&							right);
1529 template<int Rows, int Cols>
1530 ExprP<Matrix<float, Rows, Cols> >	operator+ (const ExprP<Matrix<float, Rows, Cols> >&	left,
1531 											   const ExprP<Matrix<float, Rows, Cols> >&	right);
1532 template<int Rows, int Cols>
1533 ExprP<Matrix<deFloat16, Rows, Cols> >	operator+ (const ExprP<Matrix<deFloat16, Rows, Cols> >&	left,
1534 												   const ExprP<Matrix<deFloat16, Rows, Cols> >&	right);
1535 template<int Rows, int Cols>
1536 ExprP<Matrix<double, Rows, Cols> >	operator+ (const ExprP<Matrix<double, Rows, Cols> >&	left,
1537 											   const ExprP<Matrix<double, Rows, Cols> >&	right);
1538 template<typename T, int Rows, int Cols>
1539 ExprP<Matrix<T, Rows, Cols> >	operator- (const ExprP<Matrix<T, Rows, Cols> >&	mat);
1540 
1541 //! @}
1542 
1543 /*--------------------------------------------------------------------*//*!
1544  * \brief Variable expression.
1545  *
1546  * A variable is evaluated by looking up its range of possible values from an
1547  * environment.
1548  *//*--------------------------------------------------------------------*/
1549 template <typename T>
1550 class Variable : public Expr<T>
1551 {
1552 public:
1553 	typedef typename Expr<T>::IVal IVal;
1554 
Variable(const string & name)1555 					Variable	(const string& name) : m_name (name) {}
getName(void) const1556 	string			getName		(void)							const { return m_name; }
1557 
1558 protected:
doPrintExpr(ostream & os) const1559 	void			doPrintExpr	(ostream& os)					const { os << m_name; }
doEvaluate(const EvalContext & ctx) const1560 	IVal			doEvaluate	(const EvalContext& ctx)		const
1561 	{
1562 		return ctx.env.lookup<T>(*this);
1563 	}
1564 
1565 private:
1566 	string	m_name;
1567 };
1568 
1569 template <typename T>
variable(const string & name)1570 VariableP<T> variable (const string& name)
1571 {
1572 	return VariableP<T>(new Variable<T>(name));
1573 }
1574 
1575 template <typename T>
bindExpression(const string & name,ExpandContext & ctx,const ExprP<T> & expr)1576 VariableP<T> bindExpression (const string& name, ExpandContext& ctx, const ExprP<T>& expr)
1577 {
1578 	VariableP<T> var = ctx.genSym<T>(name);
1579 	ctx.addStatement(variableDeclaration(var, expr));
1580 	return var;
1581 }
1582 
1583 /*--------------------------------------------------------------------*//*!
1584  * \brief Constant expression.
1585  *
1586  * A constant is evaluated by rounding it to a set of possible values allowed
1587  * by the current floating point precision.
1588  *//*--------------------------------------------------------------------*/
1589 template <typename T>
1590 class Constant : public Expr<T>
1591 {
1592 public:
1593 	typedef typename Expr<T>::IVal IVal;
1594 
Constant(const T & value)1595 			Constant		(const T& value) : m_value(value) {}
1596 
1597 protected:
doPrintExpr(ostream & os) const1598 	void	doPrintExpr		(ostream& os) const			{ os << m_value; }
doEvaluate(const EvalContext &) const1599 	IVal	doEvaluate		(const EvalContext&) const	{ return makeIVal(m_value); }
1600 
1601 private:
1602 	T		m_value;
1603 };
1604 
1605 template <typename T>
constant(const T & value)1606 ExprP<T> constant (const T& value)
1607 {
1608 	return exprP(new Constant<T>(value));
1609 }
1610 
1611 //! Return a reference to a singleton void constant.
voidP(void)1612 const ExprP<Void>& voidP (void)
1613 {
1614 	static const ExprP<Void> singleton = constant(Void());
1615 
1616 	return singleton;
1617 }
1618 
1619 /*--------------------------------------------------------------------*//*!
1620  * \brief Four-element tuple.
1621  *
1622  * This is used for various things where we need one thing for each possible
1623  * function parameter. Currently the maximum supported number of parameters is
1624  * four.
1625  *//*--------------------------------------------------------------------*/
1626 template <typename T0 = Void, typename T1 = Void, typename T2 = Void, typename T3 = Void>
1627 struct Tuple4
1628 {
Tuple4vkt::shaderexecutor::Tuple41629 	explicit Tuple4 (const T0 e0 = T0(),
1630 					 const T1 e1 = T1(),
1631 					 const T2 e2 = T2(),
1632 					 const T3 e3 = T3())
1633 		: a	(e0)
1634 		, b	(e1)
1635 		, c	(e2)
1636 		, d	(e3)
1637 	{
1638 	}
1639 
1640 	T0 a;
1641 	T1 b;
1642 	T2 c;
1643 	T3 d;
1644 };
1645 
1646 /*--------------------------------------------------------------------*//*!
1647  * \brief Function signature.
1648  *
1649  * This is a purely compile-time structure used to bundle all types in a
1650  * function signature together. This makes passing the signature around in
1651  * templates easier, since we only need to take and pass a single Sig instead
1652  * of a bunch of parameter types and a return type.
1653  *
1654  *//*--------------------------------------------------------------------*/
1655 template <typename R,
1656 		  typename P0 = Void, typename P1 = Void,
1657 		  typename P2 = Void, typename P3 = Void>
1658 struct Signature
1659 {
1660 	typedef R							Ret;
1661 	typedef P0							Arg0;
1662 	typedef P1							Arg1;
1663 	typedef P2							Arg2;
1664 	typedef P3							Arg3;
1665 	typedef typename Traits<Ret>::IVal	IRet;
1666 	typedef typename Traits<Arg0>::IVal	IArg0;
1667 	typedef typename Traits<Arg1>::IVal	IArg1;
1668 	typedef typename Traits<Arg2>::IVal	IArg2;
1669 	typedef typename Traits<Arg3>::IVal	IArg3;
1670 
1671 	typedef Tuple4<	const Arg0&,	const Arg1&,	const Arg2&,	const Arg3&>	Args;
1672 	typedef Tuple4<	const IArg0&,	const IArg1&,	const IArg2&,	const IArg3&>	IArgs;
1673 	typedef Tuple4<	ExprP<Arg0>,	ExprP<Arg1>,	ExprP<Arg2>,	ExprP<Arg3> >	ArgExprs;
1674 };
1675 
1676 typedef vector<const ExprBase*> BaseArgExprs;
1677 
1678 /*--------------------------------------------------------------------*//*!
1679  * \brief Type-independent operations for function objects.
1680  *
1681  *//*--------------------------------------------------------------------*/
1682 class FuncBase
1683 {
1684 public:
~FuncBase(void)1685 	virtual				~FuncBase				(void)					{}
1686 	virtual string		getName					(void)					const = 0;
1687 	//! Name of extension that this function requires, or empty.
getRequiredExtension(void) const1688 	virtual string		getRequiredExtension	(void)					const { return ""; }
getInputRange(const bool is16bit) const1689 	virtual Interval	getInputRange			(const bool is16bit)	const {DE_UNREF(is16bit); return Interval(true, -TCU_INFINITY, TCU_INFINITY); }
1690 	virtual void		print					(ostream&,
1691 												 const BaseArgExprs&)	const = 0;
1692 	//! Index of output parameter, or -1 if none of the parameters is output.
getOutParamIndex(void) const1693 	virtual int			getOutParamIndex		(void)					const { return -1; }
1694 
getSpirvCase(void) const1695 	virtual SpirVCaseT	getSpirvCase			(void)					const { return SPIRV_CASETYPE_NONE; }
1696 
printDefinition(ostream & os) const1697 	void				printDefinition			(ostream& os)			const
1698 	{
1699 		doPrintDefinition(os);
1700 	}
1701 
getUsedFuncs(FuncSet & dst) const1702 	void				getUsedFuncs			(FuncSet& dst) const
1703 	{
1704 		this->doGetUsedFuncs(dst);
1705 	}
1706 
1707 protected:
1708 	virtual void		doPrintDefinition		(ostream& os)			const = 0;
1709 	virtual void		doGetUsedFuncs			(FuncSet& dst)			const = 0;
1710 };
1711 
1712 typedef Tuple4<string, string, string, string> ParamNames;
1713 
1714 /*--------------------------------------------------------------------*//*!
1715  * \brief Function objects.
1716  *
1717  * Each Func object represents a GLSL function. It can be applied to interval
1718  * arguments, and it returns the an interval that is a conservative
1719  * approximation of the image of the GLSL function over the argument
1720  * intervals. That is, it is given a set of possible arguments and it returns
1721  * the set of possible values.
1722  *
1723  *//*--------------------------------------------------------------------*/
1724 template <typename Sig_>
1725 class Func : public FuncBase
1726 {
1727 public:
1728 	typedef Sig_										Sig;
1729 	typedef typename Sig::Ret							Ret;
1730 	typedef typename Sig::Arg0							Arg0;
1731 	typedef typename Sig::Arg1							Arg1;
1732 	typedef typename Sig::Arg2							Arg2;
1733 	typedef typename Sig::Arg3							Arg3;
1734 	typedef typename Sig::IRet							IRet;
1735 	typedef typename Sig::IArg0							IArg0;
1736 	typedef typename Sig::IArg1							IArg1;
1737 	typedef typename Sig::IArg2							IArg2;
1738 	typedef typename Sig::IArg3							IArg3;
1739 	typedef typename Sig::Args							Args;
1740 	typedef typename Sig::IArgs							IArgs;
1741 	typedef typename Sig::ArgExprs						ArgExprs;
1742 
print(ostream & os,const BaseArgExprs & args) const1743 	void				print			(ostream&			os,
1744 										 const BaseArgExprs& args)				const
1745 	{
1746 		this->doPrint(os, args);
1747 	}
1748 
apply(const EvalContext & ctx,const IArg0 & arg0=IArg0 (),const IArg1 & arg1=IArg1 (),const IArg2 & arg2=IArg2 (),const IArg3 & arg3=IArg3 ()) const1749 	IRet				apply			(const EvalContext&	ctx,
1750 										 const IArg0&		arg0 = IArg0(),
1751 										 const IArg1&		arg1 = IArg1(),
1752 										 const IArg2&		arg2 = IArg2(),
1753 										 const IArg3&		arg3 = IArg3())		const
1754 	{
1755 		return this->applyArgs(ctx, IArgs(arg0, arg1, arg2, arg3));
1756 	}
1757 
fail(const EvalContext & ctx,const IArg0 & arg0=IArg0 (),const IArg1 & arg1=IArg1 (),const IArg2 & arg2=IArg2 (),const IArg3 & arg3=IArg3 ()) const1758 	IRet				fail			(const EvalContext&	ctx,
1759 										 const IArg0&		arg0 = IArg0(),
1760 										 const IArg1&		arg1 = IArg1(),
1761 										 const IArg2&		arg2 = IArg2(),
1762 										 const IArg3&		arg3 = IArg3())		const
1763 	{
1764 		return this->doFail(ctx, IArgs(arg0, arg1, arg2, arg3));
1765 	}
applyArgs(const EvalContext & ctx,const IArgs & args) const1766 	IRet				applyArgs		(const EvalContext&	ctx,
1767 										 const IArgs&		args)				const
1768 	{
1769 		return this->doApply(ctx, args);
1770 	}
1771 	ExprP<Ret>			operator()		(const ExprP<Arg0>&		arg0 = voidP(),
1772 										 const ExprP<Arg1>&		arg1 = voidP(),
1773 										 const ExprP<Arg2>&		arg2 = voidP(),
1774 										 const ExprP<Arg3>&		arg3 = voidP())		const;
1775 
getParamNames(void) const1776 	const ParamNames&	getParamNames	(void)									const
1777 	{
1778 		return this->doGetParamNames();
1779 	}
1780 
1781 protected:
1782 	virtual IRet		doApply			(const EvalContext&,
1783 										 const IArgs&)							const = 0;
doFail(const EvalContext & ctx,const IArgs & args) const1784 	virtual IRet		doFail			(const EvalContext&	ctx,
1785 										 const IArgs&		args)				const
1786 	{
1787 		return this->doApply(ctx, args);
1788 	}
doPrint(ostream & os,const BaseArgExprs & args) const1789 	virtual void		doPrint			(ostream& os, const BaseArgExprs& args)	const
1790 	{
1791 		os << getName() << "(";
1792 
1793 		if (isTypeValid<Arg0>())
1794 			os << *args[0];
1795 
1796 		if (isTypeValid<Arg1>())
1797 			os << ", " << *args[1];
1798 
1799 		if (isTypeValid<Arg2>())
1800 			os << ", " << *args[2];
1801 
1802 		if (isTypeValid<Arg3>())
1803 			os << ", " << *args[3];
1804 
1805 		os << ")";
1806 	}
1807 
doGetParamNames(void) const1808 	virtual const ParamNames&	doGetParamNames	(void)							const
1809 	{
1810 		static ParamNames	names	("a", "b", "c", "d");
1811 		return names;
1812 	}
1813 };
1814 
1815 template <typename Sig>
1816 class Apply : public Expr<typename Sig::Ret>
1817 {
1818 public:
1819 	typedef typename Sig::Ret				Ret;
1820 	typedef typename Sig::Arg0				Arg0;
1821 	typedef typename Sig::Arg1				Arg1;
1822 	typedef typename Sig::Arg2				Arg2;
1823 	typedef typename Sig::Arg3				Arg3;
1824 	typedef typename Expr<Ret>::Val			Val;
1825 	typedef typename Expr<Ret>::IVal		IVal;
1826 	typedef Func<Sig>						ApplyFunc;
1827 	typedef typename ApplyFunc::ArgExprs	ArgExprs;
1828 
Apply(const ApplyFunc & func,const ExprP<Arg0> & arg0=voidP (),const ExprP<Arg1> & arg1=voidP (),const ExprP<Arg2> & arg2=voidP (),const ExprP<Arg3> & arg3=voidP ())1829 						Apply	(const ApplyFunc&		func,
1830 								 const ExprP<Arg0>&		arg0 = voidP(),
1831 								 const ExprP<Arg1>&		arg1 = voidP(),
1832 								 const ExprP<Arg2>&		arg2 = voidP(),
1833 								 const ExprP<Arg3>&		arg3 = voidP())
1834 							: m_func	(func),
1835 							  m_args	(arg0, arg1, arg2, arg3) {}
1836 
Apply(const ApplyFunc & func,const ArgExprs & args)1837 						Apply	(const ApplyFunc&	func,
1838 								 const ArgExprs&	args)
1839 							: m_func	(func),
1840 							  m_args	(args) {}
1841 protected:
doPrintExpr(ostream & os) const1842 	void				doPrintExpr			(ostream& os) const
1843 	{
1844 		BaseArgExprs	args;
1845 		args.push_back(m_args.a.get());
1846 		args.push_back(m_args.b.get());
1847 		args.push_back(m_args.c.get());
1848 		args.push_back(m_args.d.get());
1849 		m_func.print(os, args);
1850 	}
1851 
doEvaluate(const EvalContext & ctx) const1852 	IVal				doEvaluate		(const EvalContext& ctx) const
1853 	{
1854 		return m_func.apply(ctx,
1855 							m_args.a->evaluate(ctx), m_args.b->evaluate(ctx),
1856 							m_args.c->evaluate(ctx), m_args.d->evaluate(ctx));
1857 	}
1858 
doGetUsedFuncs(FuncSet & dst) const1859 	void				doGetUsedFuncs	(FuncSet& dst) const
1860 	{
1861 		m_func.getUsedFuncs(dst);
1862 		m_args.a->getUsedFuncs(dst);
1863 		m_args.b->getUsedFuncs(dst);
1864 		m_args.c->getUsedFuncs(dst);
1865 		m_args.d->getUsedFuncs(dst);
1866 	}
1867 
1868 	const ApplyFunc&	m_func;
1869 	ArgExprs			m_args;
1870 };
1871 
1872 template<typename T>
1873 class Alternatives : public Func<Signature<T, T, T> >
1874 {
1875 public:
1876 	typedef typename	Alternatives::Sig		Sig;
1877 
1878 protected:
1879 	typedef typename	Alternatives::IRet		IRet;
1880 	typedef typename	Alternatives::IArgs		IArgs;
1881 
getName(void) const1882 	virtual string		getName				(void) const			{ return "alternatives"; }
doPrintDefinition(std::ostream &) const1883 	virtual void		doPrintDefinition	(std::ostream&) const	{}
doGetUsedFuncs(FuncSet &) const1884 	void				doGetUsedFuncs		(FuncSet&) const		{}
1885 
doApply(const EvalContext &,const IArgs & args) const1886 	virtual IRet		doApply				(const EvalContext&, const IArgs& args) const
1887 	{
1888 		return unionIVal<T>(args.a, args.b);
1889 	}
1890 
doPrint(ostream & os,const BaseArgExprs & args) const1891 	virtual void		doPrint				(ostream& os, const BaseArgExprs& args)	const
1892 	{
1893 		os << "{" << *args[0] << " | " << *args[1] << "}";
1894 	}
1895 };
1896 
1897 template <typename Sig>
createApply(const Func<Sig> & func,const typename Func<Sig>::ArgExprs & args)1898 ExprP<typename Sig::Ret> createApply (const Func<Sig>&						func,
1899 									  const typename Func<Sig>::ArgExprs&	args)
1900 {
1901 	return exprP(new Apply<Sig>(func, args));
1902 }
1903 
1904 template <typename Sig>
createApply(const Func<Sig> & func,const ExprP<typename Sig::Arg0> & arg0=voidP (),const ExprP<typename Sig::Arg1> & arg1=voidP (),const ExprP<typename Sig::Arg2> & arg2=voidP (),const ExprP<typename Sig::Arg3> & arg3=voidP ())1905 ExprP<typename Sig::Ret> createApply (
1906 	const Func<Sig>&			func,
1907 	const ExprP<typename Sig::Arg0>&	arg0 = voidP(),
1908 	const ExprP<typename Sig::Arg1>&	arg1 = voidP(),
1909 	const ExprP<typename Sig::Arg2>&	arg2 = voidP(),
1910 	const ExprP<typename Sig::Arg3>&	arg3 = voidP())
1911 {
1912 	return exprP(new Apply<Sig>(func, arg0, arg1, arg2, arg3));
1913 }
1914 
1915 template <typename Sig>
operator ()(const ExprP<typename Sig::Arg0> & arg0,const ExprP<typename Sig::Arg1> & arg1,const ExprP<typename Sig::Arg2> & arg2,const ExprP<typename Sig::Arg3> & arg3) const1916 ExprP<typename Sig::Ret> Func<Sig>::operator() (const ExprP<typename Sig::Arg0>& arg0,
1917 												const ExprP<typename Sig::Arg1>& arg1,
1918 												const ExprP<typename Sig::Arg2>& arg2,
1919 												const ExprP<typename Sig::Arg3>& arg3) const
1920 {
1921 	return createApply(*this, arg0, arg1, arg2, arg3);
1922 }
1923 
1924 template <typename F>
app(const ExprP<typename F::Arg0> & arg0=voidP (),const ExprP<typename F::Arg1> & arg1=voidP (),const ExprP<typename F::Arg2> & arg2=voidP (),const ExprP<typename F::Arg3> & arg3=voidP ())1925 ExprP<typename F::Ret> app (const ExprP<typename F::Arg0>& arg0 = voidP(),
1926 							const ExprP<typename F::Arg1>& arg1 = voidP(),
1927 							const ExprP<typename F::Arg2>& arg2 = voidP(),
1928 							const ExprP<typename F::Arg3>& arg3 = voidP())
1929 {
1930 	return createApply(instance<F>(), arg0, arg1, arg2, arg3);
1931 }
1932 
1933 template <typename F>
call(const EvalContext & ctx,const typename F::IArg0 & arg0=Void (),const typename F::IArg1 & arg1=Void (),const typename F::IArg2 & arg2=Void (),const typename F::IArg3 & arg3=Void ())1934 typename F::IRet call (const EvalContext&			ctx,
1935 					   const typename F::IArg0&		arg0 = Void(),
1936 					   const typename F::IArg1&		arg1 = Void(),
1937 					   const typename F::IArg2&		arg2 = Void(),
1938 					   const typename F::IArg3&		arg3 = Void())
1939 {
1940 	return instance<F>().apply(ctx, arg0, arg1, arg2, arg3);
1941 }
1942 
1943 template <typename T>
alternatives(const ExprP<T> & arg0,const ExprP<T> & arg1)1944 ExprP<T> alternatives (const ExprP<T>& arg0,
1945 					   const ExprP<T>& arg1)
1946 {
1947 	return createApply<typename Alternatives<T>::Sig>(instance<Alternatives<T> >(), arg0, arg1);
1948 }
1949 
1950 template <typename Sig>
1951 class ApplyVar : public Apply<Sig>
1952 {
1953 public:
1954 	typedef typename Sig::Ret				Ret;
1955 	typedef typename Sig::Arg0				Arg0;
1956 	typedef typename Sig::Arg1				Arg1;
1957 	typedef typename Sig::Arg2				Arg2;
1958 	typedef typename Sig::Arg3				Arg3;
1959 	typedef typename Expr<Ret>::Val			Val;
1960 	typedef typename Expr<Ret>::IVal		IVal;
1961 	typedef Func<Sig>						ApplyFunc;
1962 	typedef typename ApplyFunc::ArgExprs	ArgExprs;
1963 
ApplyVar(const ApplyFunc & func,const VariableP<Arg0> & arg0,const VariableP<Arg1> & arg1,const VariableP<Arg2> & arg2,const VariableP<Arg3> & arg3)1964 						ApplyVar	(const ApplyFunc&			func,
1965 									 const VariableP<Arg0>&		arg0,
1966 									 const VariableP<Arg1>&		arg1,
1967 									 const VariableP<Arg2>&		arg2,
1968 									 const VariableP<Arg3>&		arg3)
1969 							: Apply<Sig> (func, arg0, arg1, arg2, arg3) {}
1970 protected:
doEvaluate(const EvalContext & ctx) const1971 	IVal				doEvaluate		(const EvalContext& ctx) const
1972 	{
1973 		const Variable<Arg0>&	var0 = static_cast<const Variable<Arg0>&>(*this->m_args.a);
1974 		const Variable<Arg1>&	var1 = static_cast<const Variable<Arg1>&>(*this->m_args.b);
1975 		const Variable<Arg2>&	var2 = static_cast<const Variable<Arg2>&>(*this->m_args.c);
1976 		const Variable<Arg3>&	var3 = static_cast<const Variable<Arg3>&>(*this->m_args.d);
1977 		return this->m_func.apply(ctx,
1978 								  ctx.env.lookup(var0), ctx.env.lookup(var1),
1979 								  ctx.env.lookup(var2), ctx.env.lookup(var3));
1980 	}
1981 
doFails(const EvalContext & ctx) const1982 	IVal				doFails		(const EvalContext& ctx) const
1983 	{
1984 		const Variable<Arg0>&	var0 = static_cast<const Variable<Arg0>&>(*this->m_args.a);
1985 		const Variable<Arg1>&	var1 = static_cast<const Variable<Arg1>&>(*this->m_args.b);
1986 		const Variable<Arg2>&	var2 = static_cast<const Variable<Arg2>&>(*this->m_args.c);
1987 		const Variable<Arg3>&	var3 = static_cast<const Variable<Arg3>&>(*this->m_args.d);
1988 		return this->m_func.fail(ctx,
1989 								  ctx.env.lookup(var0), ctx.env.lookup(var1),
1990 								  ctx.env.lookup(var2), ctx.env.lookup(var3));
1991 	}
1992 };
1993 
1994 template <typename Sig>
applyVar(const Func<Sig> & func,const VariableP<typename Sig::Arg0> & arg0,const VariableP<typename Sig::Arg1> & arg1,const VariableP<typename Sig::Arg2> & arg2,const VariableP<typename Sig::Arg3> & arg3)1995 ExprP<typename Sig::Ret> applyVar (const Func<Sig>&						func,
1996 								   const VariableP<typename Sig::Arg0>&	arg0,
1997 								   const VariableP<typename Sig::Arg1>&	arg1,
1998 								   const VariableP<typename Sig::Arg2>&	arg2,
1999 								   const VariableP<typename Sig::Arg3>&	arg3)
2000 {
2001 	return exprP(new ApplyVar<Sig>(func, arg0, arg1, arg2, arg3));
2002 }
2003 
2004 template <typename Sig_>
2005 class DerivedFunc : public Func<Sig_>
2006 {
2007 public:
2008 	typedef typename DerivedFunc::ArgExprs		ArgExprs;
2009 	typedef typename DerivedFunc::IRet			IRet;
2010 	typedef typename DerivedFunc::IArgs			IArgs;
2011 	typedef typename DerivedFunc::Ret			Ret;
2012 	typedef typename DerivedFunc::Arg0			Arg0;
2013 	typedef typename DerivedFunc::Arg1			Arg1;
2014 	typedef typename DerivedFunc::Arg2			Arg2;
2015 	typedef typename DerivedFunc::Arg3			Arg3;
2016 	typedef typename DerivedFunc::IArg0			IArg0;
2017 	typedef typename DerivedFunc::IArg1			IArg1;
2018 	typedef typename DerivedFunc::IArg2			IArg2;
2019 	typedef typename DerivedFunc::IArg3			IArg3;
2020 
2021 protected:
doPrintDefinition(ostream & os) const2022 	void						doPrintDefinition	(ostream& os) const
2023 	{
2024 		const ParamNames&	paramNames	= this->getParamNames();
2025 
2026 		initialize();
2027 
2028 		os << dataTypeNameOf<Ret>() << " " << this->getName()
2029 			<< "(";
2030 		if (isTypeValid<Arg0>())
2031 			os << dataTypeNameOf<Arg0>() << " " << paramNames.a;
2032 		if (isTypeValid<Arg1>())
2033 			os << ", " << dataTypeNameOf<Arg1>() << " " << paramNames.b;
2034 		if (isTypeValid<Arg2>())
2035 			os << ", " << dataTypeNameOf<Arg2>() << " " << paramNames.c;
2036 		if (isTypeValid<Arg3>())
2037 			os << ", " << dataTypeNameOf<Arg3>() << " " << paramNames.d;
2038 		os << ")\n{\n";
2039 
2040 		for (size_t ndx = 0; ndx < m_body.size(); ++ndx)
2041 			os << *m_body[ndx];
2042 		os << "return " << *m_ret << ";\n";
2043 		os << "}\n";
2044 	}
2045 
doApply(const EvalContext & ctx,const IArgs & args) const2046 	IRet						doApply			(const EvalContext&	ctx,
2047 												 const IArgs&		args) const
2048 	{
2049 		Environment	funEnv;
2050 		IArgs&		mutArgs		= const_cast<IArgs&>(args);
2051 		IRet		ret;
2052 
2053 		initialize();
2054 
2055 		funEnv.bind(*m_var0, args.a);
2056 		funEnv.bind(*m_var1, args.b);
2057 		funEnv.bind(*m_var2, args.c);
2058 		funEnv.bind(*m_var3, args.d);
2059 
2060 		{
2061 			EvalContext	funCtx(ctx.format, ctx.floatPrecision, funEnv, ctx.callDepth);
2062 
2063 			for (size_t ndx = 0; ndx < m_body.size(); ++ndx)
2064 				m_body[ndx]->execute(funCtx);
2065 
2066 			ret = m_ret->evaluate(funCtx);
2067 		}
2068 
2069 		// \todo [lauri] Store references instead of values in environment
2070 		const_cast<IArg0&>(mutArgs.a) = funEnv.lookup(*m_var0);
2071 		const_cast<IArg1&>(mutArgs.b) = funEnv.lookup(*m_var1);
2072 		const_cast<IArg2&>(mutArgs.c) = funEnv.lookup(*m_var2);
2073 		const_cast<IArg3&>(mutArgs.d) = funEnv.lookup(*m_var3);
2074 
2075 		return ret;
2076 	}
2077 
doGetUsedFuncs(FuncSet & dst) const2078 	void						doGetUsedFuncs	(FuncSet& dst) const
2079 	{
2080 		initialize();
2081 		if (dst.insert(this).second)
2082 		{
2083 			for (size_t ndx = 0; ndx < m_body.size(); ++ndx)
2084 				m_body[ndx]->getUsedFuncs(dst);
2085 			m_ret->getUsedFuncs(dst);
2086 		}
2087 	}
2088 
2089 	virtual ExprP<Ret>			doExpand		(ExpandContext& ctx, const ArgExprs& args_) const = 0;
2090 
2091 	// These are transparently initialized when first needed. They cannot be
2092 	// initialized in the constructor because they depend on the doExpand
2093 	// method of the subclass.
2094 
2095 	mutable VariableP<Arg0>		m_var0;
2096 	mutable VariableP<Arg1>		m_var1;
2097 	mutable VariableP<Arg2>		m_var2;
2098 	mutable VariableP<Arg3>		m_var3;
2099 	mutable vector<StatementP>	m_body;
2100 	mutable ExprP<Ret>			m_ret;
2101 
2102 private:
2103 
initialize(void) const2104 	void				initialize		(void)	const
2105 	{
2106 		if (!m_ret)
2107 		{
2108 			const ParamNames&	paramNames	= this->getParamNames();
2109 			Counter				symCounter;
2110 			ExpandContext		ctx			(symCounter);
2111 			ArgExprs			args;
2112 
2113 			args.a	= m_var0 = variable<Arg0>(paramNames.a);
2114 			args.b	= m_var1 = variable<Arg1>(paramNames.b);
2115 			args.c	= m_var2 = variable<Arg2>(paramNames.c);
2116 			args.d	= m_var3 = variable<Arg3>(paramNames.d);
2117 
2118 			m_ret	= this->doExpand(ctx, args);
2119 			m_body	= ctx.getStatements();
2120 		}
2121 	}
2122 };
2123 
2124 template <typename Sig>
2125 class PrimitiveFunc : public Func<Sig>
2126 {
2127 public:
2128 	typedef typename PrimitiveFunc::Ret			Ret;
2129 	typedef typename PrimitiveFunc::ArgExprs	ArgExprs;
2130 
2131 protected:
doPrintDefinition(ostream &) const2132 	void	doPrintDefinition	(ostream&) const	{}
doGetUsedFuncs(FuncSet &) const2133 	void	doGetUsedFuncs		(FuncSet&) const	{}
2134 };
2135 
2136 template <typename T>
2137 class Cond : public PrimitiveFunc<Signature<T, bool, T, T> >
2138 {
2139 public:
2140 	typedef typename Cond::IArgs	IArgs;
2141 	typedef typename Cond::IRet		IRet;
2142 
getName(void) const2143 	string	getName	(void) const
2144 	{
2145 		return "_cond";
2146 	}
2147 
2148 protected:
2149 
doPrint(ostream & os,const BaseArgExprs & args) const2150 	void	doPrint	(ostream& os, const BaseArgExprs& args) const
2151 	{
2152 		os << "(" << *args[0] << " ? " << *args[1] << " : " << *args[2] << ")";
2153 	}
2154 
doApply(const EvalContext &,const IArgs & iargs) const2155 	IRet	doApply	(const EvalContext&, const IArgs& iargs)const
2156 	{
2157 		IRet	ret;
2158 
2159 		if (iargs.a.contains(true))
2160 			ret = unionIVal<T>(ret, iargs.b);
2161 
2162 		if (iargs.a.contains(false))
2163 			ret = unionIVal<T>(ret, iargs.c);
2164 
2165 		return ret;
2166 	}
2167 };
2168 
2169 template <typename T>
2170 class CompareOperator : public PrimitiveFunc<Signature<bool, T, T> >
2171 {
2172 public:
2173 	typedef typename CompareOperator::IArgs	IArgs;
2174 	typedef typename CompareOperator::IArg0	IArg0;
2175 	typedef typename CompareOperator::IArg1	IArg1;
2176 	typedef typename CompareOperator::IRet	IRet;
2177 
2178 protected:
doPrint(ostream & os,const BaseArgExprs & args) const2179 	void			doPrint	(ostream& os, const BaseArgExprs& args) const
2180 	{
2181 		os << "(" << *args[0] << getSymbol() << *args[1] << ")";
2182 	}
2183 
doApply(const EvalContext &,const IArgs & iargs) const2184 	Interval		doApply	(const EvalContext&, const IArgs& iargs) const
2185 	{
2186 		const IArg0&	arg0 = iargs.a;
2187 		const IArg1&	arg1 = iargs.b;
2188 		IRet	ret;
2189 
2190 		if (canSucceed(arg0, arg1))
2191 			ret |= true;
2192 		if (canFail(arg0, arg1))
2193 			ret |= false;
2194 
2195 		return ret;
2196 	}
2197 
2198 	virtual string	getSymbol	(void) const = 0;
2199 	virtual bool	canSucceed	(const IArg0&, const IArg1&) const = 0;
2200 	virtual bool	canFail		(const IArg0&, const IArg1&) const = 0;
2201 };
2202 
2203 template <typename T>
2204 class LessThan : public CompareOperator<T>
2205 {
2206 public:
getName(void) const2207 	string	getName		(void) const									{ return "lessThan"; }
2208 
2209 protected:
getSymbol(void) const2210 	string	getSymbol	(void) const									{ return "<";		}
2211 
canSucceed(const Interval & a,const Interval & b) const2212 	bool	canSucceed	(const Interval& a, const Interval& b) const
2213 	{
2214 		return (a.lo() < b.hi());
2215 	}
2216 
canFail(const Interval & a,const Interval & b) const2217 	bool	canFail		(const Interval& a, const Interval& b) const
2218 	{
2219 		return !(a.hi() < b.lo());
2220 	}
2221 };
2222 
2223 template <typename T>
operator <(const ExprP<T> & a,const ExprP<T> & b)2224 ExprP<bool> operator< (const ExprP<T>& a, const ExprP<T>& b)
2225 {
2226 	return app<LessThan<T> >(a, b);
2227 }
2228 
2229 template <typename T>
cond(const ExprP<bool> & test,const ExprP<T> & consequent,const ExprP<T> & alternative)2230 ExprP<T> cond (const ExprP<bool>&	test,
2231 			   const ExprP<T>&		consequent,
2232 			   const ExprP<T>&		alternative)
2233 {
2234 	return app<Cond<T> >(test, consequent, alternative);
2235 }
2236 
2237 /*--------------------------------------------------------------------*//*!
2238  *
2239  * @}
2240  *
2241  *//*--------------------------------------------------------------------*/
2242 //Proper parameters for template T
2243 //	Signature<float, float>		32bit tests
2244 //	Signature<float, deFloat16>	16bit tests
2245 //	Signature<double, double>	64bit tests
2246 template< class T>
2247 class FloatFunc1 : public PrimitiveFunc<T>
2248 {
2249 protected:
doApply(const EvalContext & ctx,const typename Signature<typename T::Ret,typename T::Arg0>::IArgs & iargs) const2250 		Interval			doApply			(const EvalContext& ctx, const typename Signature<typename T::Ret, typename T::Arg0>::IArgs& iargs) const
2251 	{
2252 		return this->applyMonotone(ctx, iargs.a);
2253 	}
2254 
applyMonotone(const EvalContext & ctx,const Interval & iarg0) const2255 	Interval			applyMonotone	(const EvalContext& ctx, const Interval& iarg0) const
2256 	{
2257 		Interval ret;
2258 
2259 		TCU_INTERVAL_APPLY_MONOTONE1(ret, arg0, iarg0, val,
2260 									 TCU_SET_INTERVAL(val, point,
2261 													  point = this->applyPoint(ctx, arg0)));
2262 
2263 		ret |= innerExtrema(ctx, iarg0);
2264 		ret &= (this->getCodomain(ctx) | TCU_NAN);
2265 
2266 		return ctx.format.convert(ret);
2267 	}
2268 
innerExtrema(const EvalContext &,const Interval &) const2269 	virtual Interval	innerExtrema	(const EvalContext&, const Interval&) const
2270 	{
2271 		return Interval(); // empty interval, i.e. no extrema
2272 	}
2273 
applyPoint(const EvalContext & ctx,double arg0) const2274 	virtual Interval	applyPoint		(const EvalContext& ctx, double arg0) const
2275 	{
2276 		const double	exact	= this->applyExact(arg0);
2277 		const double	prec	= this->precision(ctx, exact, arg0);
2278 
2279 		return exact + Interval(-prec, prec);
2280 	}
2281 
applyExact(double) const2282 	virtual double		applyExact		(double) const
2283 	{
2284 		TCU_THROW(InternalError, "Cannot apply");
2285 	}
2286 
getCodomain(const EvalContext &) const2287 	virtual Interval	getCodomain		(const EvalContext&) const
2288 	{
2289 		return Interval::unbounded(true);
2290 	}
2291 
2292 	virtual double		precision		(const EvalContext& ctx, double, double) const = 0;
2293 };
2294 
2295 /*Proper parameters for template T
2296 	Signature<double, double>	64bit tests
2297 	Signature<float, float>		32bit tests
2298 	Signature<float, deFloat16>	16bit tests*/
2299 template <class T>
2300 class CFloatFunc1 : public FloatFunc1<T>
2301 {
2302 public:
CFloatFunc1(const string & name,tcu::DoubleFunc1 & func)2303 						CFloatFunc1	(const string& name, tcu::DoubleFunc1& func)
2304 							: m_name(name), m_func(func) {}
2305 
getName(void) const2306 	string				getName		(void) const		{ return m_name; }
2307 
2308 protected:
applyExact(double x) const2309 	double				applyExact	(double x) const	{ return m_func(x); }
2310 
2311 	const string		m_name;
2312 	tcu::DoubleFunc1&	m_func;
2313 };
2314 
2315 //<Signature<float, deFloat16, deFloat16> >
2316 //<Signature<float, float, float> >
2317 //<Signature<double, double, double> >
2318 template <class T>
2319 class FloatFunc2 : public PrimitiveFunc<T>
2320 {
2321 protected:
doApply(const EvalContext & ctx,const typename Signature<typename T::Ret,typename T::Arg0,typename T::Arg1>::IArgs & iargs) const2322 	Interval			doApply			(const EvalContext&	ctx, const typename Signature<typename T::Ret, typename T::Arg0, typename T::Arg1>::IArgs& iargs) const
2323 	{
2324 		return this->applyMonotone(ctx, iargs.a, iargs.b);
2325 	}
2326 
applyMonotone(const EvalContext & ctx,const Interval & xi,const Interval & yi) const2327 	Interval			applyMonotone	(const EvalContext&	ctx,
2328 										 const Interval&	xi,
2329 										 const Interval&	yi) const
2330 	{
2331 		Interval reti;
2332 
2333 		TCU_INTERVAL_APPLY_MONOTONE2(reti, x, xi, y, yi, ret,
2334 									 TCU_SET_INTERVAL(ret, point,
2335 													  point = this->applyPoint(ctx, x, y)));
2336 		reti |= innerExtrema(ctx, xi, yi);
2337 		reti &= (this->getCodomain(ctx) | TCU_NAN);
2338 
2339 		return ctx.format.convert(reti);
2340 	}
2341 
innerExtrema(const EvalContext &,const Interval &,const Interval &) const2342 	virtual Interval	innerExtrema	(const EvalContext&,
2343 										 const Interval&,
2344 										 const Interval&) const
2345 	{
2346 		return Interval(); // empty interval, i.e. no extrema
2347 	}
2348 
applyPoint(const EvalContext & ctx,double x,double y) const2349 	virtual Interval	applyPoint		(const EvalContext&	ctx,
2350 										 double				x,
2351 										 double				y) const
2352 	{
2353 		const double exact	= this->applyExact(x, y);
2354 		const double prec	= this->precision(ctx, exact, x, y);
2355 
2356 		return exact + Interval(-prec, prec);
2357 	}
2358 
applyExact(double,double) const2359 	virtual double		applyExact		(double, double) const
2360 	{
2361 		TCU_THROW(InternalError, "Cannot apply");
2362 	}
2363 
getCodomain(const EvalContext &) const2364 	virtual Interval	getCodomain		(const EvalContext&) const
2365 	{
2366 		return Interval::unbounded(true);
2367 	}
2368 
2369 	virtual double		precision		(const EvalContext&	ctx,
2370 										 double				ret,
2371 										 double				x,
2372 										 double				y) const = 0;
2373 };
2374 
2375 template <class T>
2376 class CFloatFunc2 : public FloatFunc2<T>
2377 {
2378 public:
CFloatFunc2(const string & name,tcu::DoubleFunc2 & func)2379 						CFloatFunc2	(const string&		name,
2380 									 tcu::DoubleFunc2&	func)
2381 							: m_name(name)
2382 							, m_func(func)
2383 	{
2384 	}
2385 
getName(void) const2386 	string				getName		(void) const						{ return m_name; }
2387 
2388 protected:
applyExact(double x,double y) const2389 	double				applyExact	(double x, double y) const			{ return m_func(x, y); }
2390 
2391 	const string		m_name;
2392 	tcu::DoubleFunc2&	m_func;
2393 };
2394 
2395 template <class T>
2396 class InfixOperator : public FloatFunc2<T>
2397 {
2398 protected:
2399 	virtual string	getSymbol		(void) const = 0;
2400 
doPrint(ostream & os,const BaseArgExprs & args) const2401 	void			doPrint			(ostream& os, const BaseArgExprs& args) const
2402 	{
2403 		os << "(" << *args[0] << " " << getSymbol() << " " << *args[1] << ")";
2404 	}
2405 
applyPoint(const EvalContext & ctx,double x,double y) const2406 	Interval		applyPoint		(const EvalContext&	ctx,
2407 									 double				x,
2408 									 double				y) const
2409 	{
2410 		const double exact	= this->applyExact(x, y);
2411 
2412 		// Allow either representable number on both sides of the exact value,
2413 		// but require exactly representable values to be preserved.
2414 		return ctx.format.roundOut(exact, !deIsInf(x) && !deIsInf(y));
2415 	}
2416 
precision(const EvalContext &,double,double,double) const2417 	double			precision		(const EvalContext&, double, double, double) const
2418 	{
2419 		return 0.0;
2420 	}
2421 };
2422 
2423 class InfixOperator16Bit : public FloatFunc2 <Signature<float, deFloat16, deFloat16> >
2424 {
2425 protected:
2426 	virtual string	getSymbol		(void) const = 0;
2427 
doPrint(ostream & os,const BaseArgExprs & args) const2428 	void			doPrint			(ostream& os, const BaseArgExprs& args) const
2429 	{
2430 		os << "(" << *args[0] << " " << getSymbol() << " " << *args[1] << ")";
2431 	}
2432 
applyPoint(const EvalContext & ctx,double x,double y) const2433 	Interval		applyPoint		(const EvalContext&	ctx,
2434 									 double				x,
2435 									 double				y) const
2436 	{
2437 		const double exact	= this->applyExact(x, y);
2438 
2439 		// Allow either representable number on both sides of the exact value,
2440 		// but require exactly representable values to be preserved.
2441 		return ctx.format.roundOut(exact, !deIsInf(x) && !deIsInf(y));
2442 	}
2443 
precision(const EvalContext &,double,double,double) const2444 	double			precision		(const EvalContext&, double, double, double) const
2445 	{
2446 		return 0.0;
2447 	}
2448 };
2449 
2450 template <class T>
2451 class FloatFunc3 : public PrimitiveFunc<T>
2452 {
2453 protected:
doApply(const EvalContext & ctx,const typename Signature<typename T::Ret,typename T::Arg0,typename T::Arg1,typename T::Arg2>::IArgs & iargs) const2454 	Interval			doApply			(const EvalContext&	ctx, const typename Signature<typename T::Ret, typename T::Arg0, typename T::Arg1, typename T::Arg2>::IArgs& iargs) const
2455 	{
2456 		return this->applyMonotone(ctx, iargs.a, iargs.b, iargs.c);
2457 	}
2458 
applyMonotone(const EvalContext & ctx,const Interval & xi,const Interval & yi,const Interval & zi) const2459 	Interval			applyMonotone	(const EvalContext&	ctx,
2460 										 const Interval&	xi,
2461 										 const Interval&	yi,
2462 										 const Interval&	zi) const
2463 	{
2464 		Interval reti;
2465 		TCU_INTERVAL_APPLY_MONOTONE3(reti, x, xi, y, yi, z, zi, ret,
2466 									 TCU_SET_INTERVAL(ret, point,
2467 													  point = this->applyPoint(ctx, x, y, z)));
2468 		return ctx.format.convert(reti);
2469 	}
2470 
applyPoint(const EvalContext & ctx,double x,double y,double z) const2471 	virtual Interval	applyPoint		(const EvalContext&	ctx,
2472 										 double				x,
2473 										 double				y,
2474 										 double				z) const
2475 	{
2476 		const double exact	= this->applyExact(x, y, z);
2477 		const double prec	= this->precision(ctx, exact, x, y, z);
2478 		return exact + Interval(-prec, prec);
2479 	}
2480 
applyExact(double,double,double) const2481 	virtual double		applyExact		(double, double, double) const
2482 	{
2483 		TCU_THROW(InternalError, "Cannot apply");
2484 	}
2485 
2486 	virtual double		precision		(const EvalContext&	ctx,
2487 										 double				result,
2488 										 double				x,
2489 										 double				y,
2490 										 double				z) const = 0;
2491 };
2492 
2493 // We define syntactic sugar functions for expression constructors. Since
2494 // these have the same names as ordinary mathematical operations (sin, log
2495 // etc.), it's better to give them a dedicated namespace.
2496 namespace Functions
2497 {
2498 
2499 using namespace tcu;
2500 
2501 template <class T>
2502 class Comparison : public InfixOperator < T >
2503 {
2504 public:
getName(void) const2505 	string		getName			(void) const	{ return "comparison"; }
getSymbol(void) const2506 	string		getSymbol		(void) const	{ return ""; }
2507 
getSpirvCase() const2508 	SpirVCaseT	getSpirvCase	() const		{ return SPIRV_CASETYPE_COMPARE; }
2509 
doApply(const EvalContext & ctx,const typename Comparison<T>::IArgs & iargs) const2510 	Interval	doApply			(const EvalContext&						ctx,
2511 								 const typename Comparison<T>::IArgs&	iargs) const
2512 	{
2513 		DE_UNREF(ctx);
2514 		if (iargs.a.hasNaN() || iargs.b.hasNaN())
2515 		{
2516 			return TCU_NAN; // one of the floats is NaN: block analysis
2517 		}
2518 
2519 		int operationFlag = 1;
2520 		int result = 0;
2521 		const double a = iargs.a.midpoint();
2522 		const double b = iargs.b.midpoint();
2523 
2524 		for (int i = 0; i<2; ++i)
2525 		{
2526 			if (a == b)
2527 				result += operationFlag;
2528 			operationFlag = operationFlag << 1;
2529 
2530 			if (a > b)
2531 				result += operationFlag;
2532 			operationFlag = operationFlag << 1;
2533 
2534 			if (a < b)
2535 				result += operationFlag;
2536 			operationFlag = operationFlag << 1;
2537 
2538 			if (a >= b)
2539 				result += operationFlag;
2540 			operationFlag = operationFlag << 1;
2541 
2542 			if (a <= b)
2543 				result += operationFlag;
2544 			operationFlag = operationFlag << 1;
2545 		}
2546 		return result;
2547 	}
2548 };
2549 
2550 template <class T>
2551 class Add : public InfixOperator < T >
2552 {
2553 public:
getName(void) const2554 	string		getName		(void) const						{ return "add"; }
getSymbol(void) const2555 	string		getSymbol	(void) const						{ return "+"; }
2556 
doApply(const EvalContext & ctx,const typename Signature<typename T::Ret,typename T::Arg0,typename T::Arg1>::IArgs & iargs) const2557 	Interval	doApply		(const EvalContext&	ctx,
2558 							 const typename Signature<typename T::Ret, typename T::Arg0, typename T::Arg1>::IArgs& iargs) const
2559 	{
2560 		// Fast-path for common case
2561 		if (iargs.a.isOrdinary(ctx.format.getMaxValue()) && iargs.b.isOrdinary(ctx.format.getMaxValue()))
2562 		{
2563 			Interval ret;
2564 			TCU_SET_INTERVAL_BOUNDS(ret, sum,
2565 									sum = iargs.a.lo() + iargs.b.lo(),
2566 									sum = iargs.a.hi() + iargs.b.hi());
2567 			return ctx.format.convert(ctx.format.roundOut(ret, true));
2568 		}
2569 		return this->applyMonotone(ctx, iargs.a, iargs.b);
2570 	}
2571 
2572 protected:
applyExact(double x,double y) const2573 	double		applyExact	(double x, double y) const			{ return x + y; }
2574 };
2575 
2576 template<class T>
2577 class Mul : public InfixOperator<T>
2578 {
2579 public:
getName(void) const2580 	string		getName		(void) const									{ return "mul"; }
getSymbol(void) const2581 	string		getSymbol	(void) const									{ return "*"; }
2582 
doApply(const EvalContext & ctx,const typename Signature<typename T::Ret,typename T::Arg0,typename T::Arg1>::IArgs & iargs) const2583 	Interval	doApply		(const EvalContext&	ctx, const typename Signature<typename T::Ret, typename T::Arg0, typename T::Arg1>::IArgs& iargs) const
2584 	{
2585 		Interval a = iargs.a;
2586 		Interval b = iargs.b;
2587 
2588 		// Fast-path for common case
2589 		if (a.isOrdinary(ctx.format.getMaxValue()) && b.isOrdinary(ctx.format.getMaxValue()))
2590 		{
2591 			Interval ret;
2592 			if (a.hi() < 0)
2593 			{
2594 				a = -a;
2595 				b = -b;
2596 			}
2597 			if (a.lo() >= 0 && b.lo() >= 0)
2598 			{
2599 				TCU_SET_INTERVAL_BOUNDS(ret, prod,
2600 										prod = a.lo() * b.lo(),
2601 										prod = a.hi() * b.hi());
2602 				return ctx.format.convert(ctx.format.roundOut(ret, true));
2603 			}
2604 			if (a.lo() >= 0 && b.hi() <= 0)
2605 			{
2606 				TCU_SET_INTERVAL_BOUNDS(ret, prod,
2607 										prod = a.hi() * b.lo(),
2608 										prod = a.lo() * b.hi());
2609 				return ctx.format.convert(ctx.format.roundOut(ret, true));
2610 			}
2611 		}
2612 		return this->applyMonotone(ctx, iargs.a, iargs.b);
2613 	}
2614 
2615 protected:
applyExact(double x,double y) const2616 	double		applyExact	(double x, double y) const						{ return x * y; }
2617 
innerExtrema(const EvalContext &,const Interval & xi,const Interval & yi) const2618 	Interval	innerExtrema(const EvalContext&, const Interval& xi, const Interval& yi) const
2619 	{
2620 		if (((xi.contains(-TCU_INFINITY) || xi.contains(TCU_INFINITY)) && yi.contains(0.0)) ||
2621 			((yi.contains(-TCU_INFINITY) || yi.contains(TCU_INFINITY)) && xi.contains(0.0)))
2622 			return Interval(TCU_NAN);
2623 
2624 		return Interval();
2625 	}
2626 };
2627 
2628 template<class T>
2629 class Sub : public InfixOperator <T>
2630 {
2631 public:
getName(void) const2632 	string		getName		(void) const				{ return "sub"; }
getSymbol(void) const2633 	string		getSymbol	(void) const				{ return "-"; }
2634 
doApply(const EvalContext & ctx,const typename Signature<typename T::Ret,typename T::Arg0,typename T::Arg1>::IArgs & iargs) const2635 	Interval	doApply		(const EvalContext&	ctx, const typename Signature<typename T::Ret, typename T::Arg0, typename T::Arg1>::IArgs& iargs) const
2636 	{
2637 		// Fast-path for common case
2638 		if (iargs.a.isOrdinary(ctx.format.getMaxValue()) && iargs.b.isOrdinary(ctx.format.getMaxValue()))
2639 		{
2640 			Interval ret;
2641 
2642 			TCU_SET_INTERVAL_BOUNDS(ret, diff,
2643 									diff = iargs.a.lo() - iargs.b.hi(),
2644 									diff = iargs.a.hi() - iargs.b.lo());
2645 			return ctx.format.convert(ctx.format.roundOut(ret, true));
2646 
2647 		}
2648 		else
2649 		{
2650 			return this->applyMonotone(ctx, iargs.a, iargs.b);
2651 		}
2652 	}
2653 
2654 protected:
applyExact(double x,double y) const2655 	double		applyExact	(double x, double y) const	{ return x - y; }
2656 };
2657 
2658 template <class T>
2659 class Negate : public FloatFunc1<T>
2660 {
2661 public:
getName(void) const2662 	string	getName		(void) const									{ return "_negate"; }
doPrint(ostream & os,const BaseArgExprs & args) const2663 	void	doPrint		(ostream& os, const BaseArgExprs& args) const	{ os << "-" << *args[0]; }
2664 
2665 protected:
precision(const EvalContext &,double,double) const2666 	double	precision	(const EvalContext&, double, double) const		{ return 0.0; }
applyExact(double x) const2667 	double	applyExact	(double x) const								{ return -x; }
2668 };
2669 
2670 template <class T>
2671 class Div : public InfixOperator<T>
2672 {
2673 public:
getName(void) const2674 	string		getName			(void) const						{ return "div"; }
2675 
2676 protected:
getSymbol(void) const2677 	string		getSymbol		(void) const						{ return "/"; }
2678 
innerExtrema(const EvalContext &,const Interval & nom,const Interval & den) const2679 	Interval	innerExtrema	(const EvalContext&,
2680 								 const Interval&		nom,
2681 								 const Interval&		den) const
2682 	{
2683 		Interval ret;
2684 
2685 		if (den.contains(0.0))
2686 		{
2687 			if (nom.contains(0.0))
2688 				ret |= TCU_NAN;
2689 
2690 			if (nom.lo() < 0.0 || nom.hi() > 0.0)
2691 				ret |= Interval::unbounded();
2692 		}
2693 
2694 		return ret;
2695 	}
2696 
applyExact(double x,double y) const2697 	double		applyExact		(double x, double y) const { return x / y; }
2698 
applyPoint(const EvalContext & ctx,double x,double y) const2699 	Interval	applyPoint		(const EvalContext&	ctx, double x, double y) const
2700 	{
2701 		Interval ret = FloatFunc2<T>::applyPoint(ctx, x, y);
2702 
2703 		if (!deIsInf(x) && !deIsInf(y) && y != 0.0)
2704 		{
2705 			const Interval dst = ctx.format.convert(ret);
2706 			if (dst.contains(-TCU_INFINITY)) ret |= -ctx.format.getMaxValue();
2707 			if (dst.contains(+TCU_INFINITY)) ret |= +ctx.format.getMaxValue();
2708 		}
2709 
2710 		return ret;
2711 	}
2712 
precision(const EvalContext & ctx,double ret,double,double den) const2713 	double		precision		(const EvalContext& ctx, double ret, double, double den) const
2714 	{
2715 		const FloatFormat&	fmt		= ctx.format;
2716 
2717 		// \todo [2014-03-05 lauri] Check that the limits in GLSL 3.10 are actually correct.
2718 		// For now, we assume that division's precision is 2.5 ULP when the value is within
2719 		// [2^MINEXP, 2^MAXEXP-1]
2720 
2721 		if (den == 0.0)
2722 			return 0.0; // Result must be exactly inf
2723 		else if (de::inBounds(deAbs(den),
2724 							  deLdExp(1.0, fmt.getMinExp()),
2725 							  deLdExp(1.0, fmt.getMaxExp() - 1)))
2726 			return fmt.ulp(ret, 2.5);
2727 		else
2728 			return TCU_INFINITY; // Can be any number, but must be a number.
2729 	}
2730 };
2731 
2732 template <class T>
2733 class InverseSqrt : public FloatFunc1 <T>
2734 {
2735 public:
getName(void) const2736 	string		getName		(void) const							{ return "inversesqrt"; }
2737 
2738 protected:
applyExact(double x) const2739 	double		applyExact	(double x) const						{ return 1.0 / deSqrt(x); }
2740 
precision(const EvalContext & ctx,double ret,double x) const2741 	double		precision	(const EvalContext& ctx, double ret, double x) const
2742 	{
2743 		return x <= 0 ? TCU_NAN : ctx.format.ulp(ret, 2.0);
2744 	}
2745 
getCodomain(const EvalContext &) const2746 	Interval	getCodomain	(const EvalContext&) const
2747 	{
2748 		return Interval(0.0, TCU_INFINITY);
2749 	}
2750 };
2751 
2752 template <class T>
2753 class ExpFunc : public CFloatFunc1<T>
2754 {
2755 public:
ExpFunc(const string & name,DoubleFunc1 & func)2756 				ExpFunc		(const string& name, DoubleFunc1& func)
2757 					: CFloatFunc1<T> (name, func)
2758 				{}
2759 protected:
2760 	double		precision	(const EvalContext& ctx, double ret, double x) const;
getCodomain(const EvalContext &) const2761 	Interval	getCodomain	(const EvalContext&) const
2762 	{
2763 		return Interval(0.0, TCU_INFINITY);
2764 	}
2765 };
2766 
2767 template <>
precision(const EvalContext & ctx,double ret,double x) const2768 double ExpFunc <Signature<float, float> >::precision (const EvalContext& ctx, double ret, double x) const
2769 {
2770 	switch (ctx.floatPrecision)
2771 	{
2772 	case glu::PRECISION_HIGHP:
2773 		return ctx.format.ulp(ret, 3.0 + 2.0 * deAbs(x));
2774 	case glu::PRECISION_MEDIUMP:
2775 	case glu::PRECISION_LAST:
2776 		return ctx.format.ulp(ret, 1.0 + 2.0 * deAbs(x));
2777 	default:
2778 		DE_FATAL("Impossible");
2779 	}
2780 
2781 	return 0.0;
2782 }
2783 
2784 template <>
precision(const EvalContext & ctx,double ret,double x) const2785 double ExpFunc <Signature<deFloat16, deFloat16> >::precision(const EvalContext& ctx, double ret, double x) const
2786 {
2787 	return ctx.format.ulp(ret, 1.0 + 2.0 * deAbs(x));
2788 }
2789 
2790 template <>
precision(const EvalContext & ctx,double ret,double x) const2791 double ExpFunc <Signature<double, double> >::precision(const EvalContext& ctx, double ret, double x) const
2792 {
2793 	return ctx.format.ulp(ret, 1.0 + 2.0 * deAbs(x));
2794 }
2795 
2796 template <class T>
Exp2(void)2797 class Exp2	: public ExpFunc<T>	{ public: Exp2 (void)	: ExpFunc<T>("exp2", deExp2) {} };
2798 template <class T>
Exp(void)2799 class Exp	: public ExpFunc<T>	{ public: Exp (void)	: ExpFunc<T>("exp", deExp) {} };
2800 
2801 template <typename T>
exp2(const ExprP<T> & x)2802 ExprP<T> exp2	(const ExprP<T>& x)	{ return app<Exp2< Signature<T, T> > >(x); }
2803 template <typename T>
exp(const ExprP<T> & x)2804 ExprP<T> exp	(const ExprP<T>& x)	{ return app<Exp< Signature<T, T> > >(x); }
2805 
2806 template <class T>
2807 class LogFunc : public CFloatFunc1<T>
2808 {
2809 public:
LogFunc(const string & name,DoubleFunc1 & func)2810 				LogFunc		(const string& name, DoubleFunc1& func)
2811 					: CFloatFunc1<T>(name, func) {}
2812 
2813 protected:
2814 	double		precision	(const EvalContext& ctx, double ret, double x) const;
2815 };
2816 
2817 template <>
precision(const EvalContext & ctx,double ret,double x) const2818 double LogFunc<Signature<float, float> >::precision(const EvalContext& ctx, double ret, double x) const
2819 {
2820 	if (x <= 0)
2821 		return TCU_NAN;
2822 
2823 	switch (ctx.floatPrecision)
2824 	{
2825 	case glu::PRECISION_HIGHP:
2826 		return (0.5 <= x && x <= 2.0) ? deLdExp(1.0, -21) : ctx.format.ulp(ret, 3.0);
2827 	case glu::PRECISION_MEDIUMP:
2828 	case glu::PRECISION_LAST:
2829 		return (0.5 <= x && x <= 2.0) ? deLdExp(1.0, -7) : ctx.format.ulp(ret, 3.0);
2830 	default:
2831 		DE_FATAL("Impossible");
2832 	}
2833 
2834 	return 0;
2835 }
2836 
2837 template <>
precision(const EvalContext & ctx,double ret,double x) const2838 double LogFunc<Signature<deFloat16, deFloat16> >::precision(const EvalContext& ctx, double ret, double x) const
2839 {
2840 	if (x <= 0)
2841 		return TCU_NAN;
2842 	return (0.5 <= x && x <= 2.0) ? deLdExp(1.0, -7) : ctx.format.ulp(ret, 3.0);
2843 }
2844 
2845 // Spec: "The precision of double-precision instructions is at least that of single precision."
2846 // Lets pick float high precision as a reference.
2847 template <>
precision(const EvalContext & ctx,double ret,double x) const2848 double LogFunc<Signature<double, double> >::precision(const EvalContext& ctx, double ret, double x) const
2849 {
2850 	if (x <= 0)
2851 		return TCU_NAN;
2852 	return (0.5 <= x && x <= 2.0) ? deLdExp(1.0, -21) : ctx.format.ulp(ret, 3.0);
2853 }
2854 
2855 template <class T>
Log2(void)2856 class Log2	: public LogFunc<T>		{ public: Log2	(void) : LogFunc<T>("log2", deLog2) {} };
2857 template <class T>
Log(void)2858 class Log	: public LogFunc<T>		{ public: Log	(void) : LogFunc<T>("log", deLog) {} };
2859 
log2(const ExprP<float> & x)2860 ExprP<float> log2	(const ExprP<float>& x)	{ return app<Log2< Signature<float, float> > >(x); }
log(const ExprP<float> & x)2861 ExprP<float> log	(const ExprP<float>& x)	{ return app<Log< Signature<float, float> > >(x); }
2862 
log2(const ExprP<deFloat16> & x)2863 ExprP<deFloat16> log2	(const ExprP<deFloat16>& x)	{ return app<Log2< Signature<deFloat16, deFloat16> > >(x); }
log(const ExprP<deFloat16> & x)2864 ExprP<deFloat16> log	(const ExprP<deFloat16>& x)	{ return app<Log< Signature<deFloat16, deFloat16> > >(x); }
2865 
log2(const ExprP<double> & x)2866 ExprP<double> log2	(const ExprP<double>& x)	{ return app<Log2< Signature<double, double> > >(x); }
log(const ExprP<double> & x)2867 ExprP<double> log	(const ExprP<double>& x)	{ return app<Log< Signature<double, double> > >(x); }
2868 
2869 #define DEFINE_CONSTRUCTOR1(CLASS, TRET, NAME, T0) \
2870 ExprP<TRET> NAME (const ExprP<T0>& arg0) { return app<CLASS>(arg0); }
2871 
2872 #define DEFINE_DERIVED1(CLASS, TRET, NAME, T0, ARG0, EXPANSION)				\
2873 class CLASS : public DerivedFunc<Signature<TRET, T0> > /* NOLINT(CLASS) */	\
2874 {																			\
2875 public:																		\
2876 	string			getName		(void) const		{ return #NAME; }		\
2877 																			\
2878 protected:																	\
2879 	ExprP<TRET>		doExpand		(ExpandContext&,						\
2880 									 const CLASS::ArgExprs& args_) const	\
2881 	{																		\
2882 		const ExprP<T0>& ARG0 = args_.a;									\
2883 		return EXPANSION;													\
2884 	}																		\
2885 };																			\
2886 DEFINE_CONSTRUCTOR1(CLASS, TRET, NAME, T0)
2887 
2888 #define DEFINE_DERIVED_DOUBLE1(CLASS, NAME, ARG0, EXPANSION) \
2889 	DEFINE_DERIVED1(CLASS, double, NAME, double, ARG0, EXPANSION)
2890 
2891 #define DEFINE_DERIVED_FLOAT1(CLASS, NAME, ARG0, EXPANSION) \
2892 	DEFINE_DERIVED1(CLASS, float, NAME, float, ARG0, EXPANSION)
2893 
2894 
2895 #define DEFINE_DERIVED1_INPUTRANGE(CLASS, TRET, NAME, T0, ARG0, EXPANSION, INTERVAL)	\
2896 class CLASS : public DerivedFunc<Signature<TRET, T0> > /* NOLINT(CLASS) */				\
2897 {																						\
2898 public:																					\
2899 	string			getName		(void) const		{ return #NAME; }					\
2900 																						\
2901 protected:																				\
2902 	ExprP<TRET>		doExpand		(ExpandContext&,									\
2903 									 const CLASS::ArgExprs& args_) const				\
2904 	{																					\
2905 		const ExprP<T0>& ARG0 = args_.a;												\
2906 		return EXPANSION;																\
2907 	}																					\
2908 	Interval	getInputRange	(const bool /*is16bit*/) const							\
2909 	{																					\
2910 		return INTERVAL;																\
2911 	}																					\
2912 };																						\
2913 DEFINE_CONSTRUCTOR1(CLASS, TRET, NAME, T0)
2914 
2915 #define DEFINE_DERIVED_FLOAT1_INPUTRANGE(CLASS, NAME, ARG0, EXPANSION, INTERVAL) \
2916 	DEFINE_DERIVED1_INPUTRANGE(CLASS, float, NAME, float, ARG0, EXPANSION, INTERVAL)
2917 
2918 #define DEFINE_DERIVED_DOUBLE1_INPUTRANGE(CLASS, NAME, ARG0, EXPANSION, INTERVAL) \
2919 	DEFINE_DERIVED1_INPUTRANGE(CLASS, double, NAME, double, ARG0, EXPANSION, INTERVAL)
2920 
2921 #define DEFINE_DERIVED_FLOAT1_16BIT(CLASS, NAME, ARG0, EXPANSION) \
2922 	DEFINE_DERIVED1(CLASS, deFloat16, NAME, deFloat16, ARG0, EXPANSION)
2923 
2924 #define DEFINE_DERIVED_FLOAT1_INPUTRANGE_16BIT(CLASS, NAME, ARG0, EXPANSION, INTERVAL) \
2925 	DEFINE_DERIVED1_INPUTRANGE(CLASS, deFloat16, NAME, deFloat16, ARG0, EXPANSION, INTERVAL)
2926 
2927 #define DEFINE_CONSTRUCTOR2(CLASS, TRET, NAME, T0, T1)				\
2928 ExprP<TRET> NAME (const ExprP<T0>& arg0, const ExprP<T1>& arg1)		\
2929 {																	\
2930 	return app<CLASS>(arg0, arg1);									\
2931 }
2932 
2933 #define DEFINE_CASED_DERIVED2(CLASS, TRET, NAME, T0, Arg0, T1, Arg1, EXPANSION, SPIRVCASE) \
2934 class CLASS : public DerivedFunc<Signature<TRET, T0, T1> > /* NOLINT(CLASS) */ \
2935 {																		\
2936 public:																	\
2937 	string			getName		(void) const	{ return #NAME; }		\
2938 																		\
2939 	SpirVCaseT		getSpirvCase(void) const	{ return SPIRVCASE; }	\
2940 																		\
2941 protected:																\
2942 	ExprP<TRET>		doExpand	(ExpandContext&, const ArgExprs& args_) const \
2943 	{																	\
2944 		const ExprP<T0>& Arg0 = args_.a;								\
2945 		const ExprP<T1>& Arg1 = args_.b;								\
2946 		return EXPANSION;												\
2947 	}																	\
2948 };																		\
2949 DEFINE_CONSTRUCTOR2(CLASS, TRET, NAME, T0, T1)
2950 
2951 #define DEFINE_DERIVED2(CLASS, TRET, NAME, T0, Arg0, T1, Arg1, EXPANSION) \
2952 	DEFINE_CASED_DERIVED2(CLASS, TRET, NAME, T0, Arg0, T1, Arg1, EXPANSION, SPIRV_CASETYPE_NONE)
2953 
2954 #define DEFINE_DERIVED_DOUBLE2(CLASS, NAME, Arg0, Arg1, EXPANSION)		\
2955 	DEFINE_DERIVED2(CLASS, double, NAME, double, Arg0, double, Arg1, EXPANSION)
2956 
2957 #define DEFINE_DERIVED_FLOAT2(CLASS, NAME, Arg0, Arg1, EXPANSION)		\
2958 	DEFINE_DERIVED2(CLASS, float, NAME, float, Arg0, float, Arg1, EXPANSION)
2959 
2960 #define DEFINE_DERIVED_FLOAT2_16BIT(CLASS, NAME, Arg0, Arg1, EXPANSION)		\
2961 	DEFINE_DERIVED2(CLASS, deFloat16, NAME, deFloat16, Arg0, deFloat16, Arg1, EXPANSION)
2962 
2963 #define DEFINE_CASED_DERIVED_FLOAT2(CLASS, NAME, Arg0, Arg1, EXPANSION, SPIRVCASE) \
2964 	DEFINE_CASED_DERIVED2(CLASS, float, NAME, float, Arg0, float, Arg1, EXPANSION, SPIRVCASE)
2965 
2966 #define DEFINE_CASED_DERIVED_FLOAT2_16BIT(CLASS, NAME, Arg0, Arg1, EXPANSION, SPIRVCASE) \
2967 	DEFINE_CASED_DERIVED2(CLASS, deFloat16, NAME, deFloat16, Arg0, deFloat16, Arg1, EXPANSION, SPIRVCASE)
2968 
2969 #define DEFINE_CASED_DERIVED_DOUBLE2(CLASS, NAME, Arg0, Arg1, EXPANSION, SPIRVCASE) \
2970 	DEFINE_CASED_DERIVED2(CLASS, double, NAME, double, Arg0, double, Arg1, EXPANSION, SPIRVCASE)
2971 
2972 #define DEFINE_CONSTRUCTOR3(CLASS, TRET, NAME, T0, T1, T2)				\
2973 ExprP<TRET> NAME (const ExprP<T0>& arg0, const ExprP<T1>& arg1, const ExprP<T2>& arg2) \
2974 {																		\
2975 	return app<CLASS>(arg0, arg1, arg2);								\
2976 }
2977 
2978 #define DEFINE_DERIVED3(CLASS, TRET, NAME, T0, ARG0, T1, ARG1, T2, ARG2, EXPANSION) \
2979 class CLASS : public DerivedFunc<Signature<TRET, T0, T1, T2> > /* NOLINT(CLASS) */ \
2980 {																				\
2981 public:																			\
2982 	string			getName		(void) const	{ return #NAME; }				\
2983 																				\
2984 protected:																		\
2985 	ExprP<TRET>		doExpand	(ExpandContext&, const ArgExprs& args_) const	\
2986 	{																			\
2987 		const ExprP<T0>& ARG0 = args_.a;										\
2988 		const ExprP<T1>& ARG1 = args_.b;										\
2989 		const ExprP<T2>& ARG2 = args_.c;										\
2990 		return EXPANSION;														\
2991 	}																			\
2992 };																				\
2993 DEFINE_CONSTRUCTOR3(CLASS, TRET, NAME, T0, T1, T2)
2994 
2995 #define DEFINE_DERIVED_DOUBLE3(CLASS, NAME, ARG0, ARG1, ARG2, EXPANSION)			\
2996 	DEFINE_DERIVED3(CLASS, double, NAME, double, ARG0, double, ARG1, double, ARG2, EXPANSION)
2997 
2998 #define DEFINE_DERIVED_FLOAT3(CLASS, NAME, ARG0, ARG1, ARG2, EXPANSION)			\
2999 	DEFINE_DERIVED3(CLASS, float, NAME, float, ARG0, float, ARG1, float, ARG2, EXPANSION)
3000 
3001 #define DEFINE_DERIVED_FLOAT3_16BIT(CLASS, NAME, ARG0, ARG1, ARG2, EXPANSION)			\
3002 	DEFINE_DERIVED3(CLASS, deFloat16, NAME, deFloat16, ARG0, deFloat16, ARG1, deFloat16, ARG2, EXPANSION)
3003 
3004 #define DEFINE_CONSTRUCTOR4(CLASS, TRET, NAME, T0, T1, T2, T3)			\
3005 ExprP<TRET> NAME (const ExprP<T0>& arg0, const ExprP<T1>& arg1,			\
3006 				  const ExprP<T2>& arg2, const ExprP<T3>& arg3)			\
3007 {																		\
3008 	return app<CLASS>(arg0, arg1, arg2, arg3);							\
3009 }
3010 
3011 typedef	 InverseSqrt< Signature<deFloat16, deFloat16> >	InverseSqrt16Bit;
3012 typedef	 InverseSqrt< Signature<float, float> >			InverseSqrt32Bit;
3013 typedef InverseSqrt< Signature<double, double> >		InverseSqrt64Bit;
3014 
3015 DEFINE_DERIVED_FLOAT1(Sqrt32Bit,		sqrt,		x,		constant(1.0f) / app<InverseSqrt32Bit>(x))
3016 DEFINE_DERIVED_FLOAT1_16BIT(Sqrt16Bit,	sqrt,		x,		constant((deFloat16)FLOAT16_1_0) / app<InverseSqrt16Bit>(x))
3017 DEFINE_DERIVED_DOUBLE1(Sqrt64Bit,		sqrt,		x,		constant(1.0) / app<InverseSqrt64Bit>(x))
3018 DEFINE_DERIVED_FLOAT2(Pow,				pow,		x,	y,	exp2<float>(y * log2(x)))
3019 DEFINE_DERIVED_FLOAT2_16BIT(Pow16,		pow,		x,	y,	exp2<deFloat16>(y * log2(x)))
3020 DEFINE_DERIVED_DOUBLE2(Pow64,			pow,		x,	y,	exp2<double>(y * log2(x)))
3021 DEFINE_DERIVED_FLOAT1(Radians,			radians,	d,		(constant(DE_PI) / constant(180.0f)) * d)
3022 DEFINE_DERIVED_FLOAT1_16BIT(Radians16,	radians,	d,		(constant((deFloat16)DE_PI_16BIT) / constant((deFloat16)FLOAT16_180_0)) * d)
3023 DEFINE_DERIVED_DOUBLE1(Radians64,		radians,	d,		(constant((double)(DE_PI)) / constant(180.0)) * d)
3024 DEFINE_DERIVED_FLOAT1(Degrees,			degrees,	r,		(constant(180.0f) / constant(DE_PI)) * r)
3025 DEFINE_DERIVED_FLOAT1_16BIT(Degrees16,	degrees,	r,		(constant((deFloat16)FLOAT16_180_0) / constant((deFloat16)DE_PI_16BIT)) * r)
3026 DEFINE_DERIVED_DOUBLE1(Degrees64,		degrees,	r,		(constant(180.0) / constant((double)(DE_PI))) * r)
3027 
3028 /*Proper parameters for template T
3029 	Signature<float, float>		32bit tests
3030 	Signature<float, deFloat16>	16bit tests*/
3031 template<class T>
3032 class TrigFunc : public CFloatFunc1<T>
3033 {
3034 public:
TrigFunc(const string & name,DoubleFunc1 & func,const Interval & loEx,const Interval & hiEx)3035 					TrigFunc		(const string&		name,
3036 									 DoubleFunc1&		func,
3037 									 const Interval&	loEx,
3038 									 const Interval&	hiEx)
3039 						: CFloatFunc1<T>	(name, func)
3040 						, m_loExtremum		(loEx)
3041 						, m_hiExtremum		(hiEx) {}
3042 
3043 protected:
innerExtrema(const EvalContext &,const Interval & angle) const3044 	Interval		innerExtrema	(const EvalContext&, const Interval& angle) const
3045 	{
3046 		const double		lo		= angle.lo();
3047 		const double		hi		= angle.hi();
3048 		const int			loSlope	= doGetSlope(lo);
3049 		const int			hiSlope	= doGetSlope(hi);
3050 
3051 		// Detect the high and low values the function can take between the
3052 		// interval endpoints.
3053 		if (angle.length() >= 2.0 * DE_PI_DOUBLE)
3054 		{
3055 			// The interval is longer than a full cycle, so it must get all possible values.
3056 			return m_hiExtremum | m_loExtremum;
3057 		}
3058 		else if (loSlope == 1 && hiSlope == -1)
3059 		{
3060 			// The slope can change from positive to negative only at the maximum value.
3061 			return m_hiExtremum;
3062 		}
3063 		else if (loSlope == -1 && hiSlope == 1)
3064 		{
3065 			// The slope can change from negative to positive only at the maximum value.
3066 			return m_loExtremum;
3067 		}
3068 		else if (loSlope == hiSlope &&
3069 				 deIntSign(CFloatFunc1<T>::applyExact(hi) - CFloatFunc1<T>::applyExact(lo)) * loSlope == -1)
3070 		{
3071 			// The slope has changed twice between the endpoints, so both extrema are included.
3072 			return m_hiExtremum | m_loExtremum;
3073 		}
3074 
3075 		return Interval();
3076 	}
3077 
getCodomain(const EvalContext &) const3078 	Interval	getCodomain				(const EvalContext&) const
3079 	{
3080 		// Ensure that result is always within [-1, 1], or NaN (for +-inf)
3081 		return Interval(-1.0, 1.0) | TCU_NAN;
3082 	}
3083 
3084 	double		precision				(const EvalContext& ctx, double ret, double arg) const;
3085 
3086 	Interval	getInputRange			(const bool is16bit) const;
3087 	virtual int	doGetSlope				(double angle) const = 0;
3088 
3089 	Interval		m_loExtremum;
3090 	Interval		m_hiExtremum;
3091 };
3092 
3093 //Only -DE_PI_DOUBLE, DE_PI_DOUBLE input range
3094 template<>
getInputRange(const bool is16bit) const3095 Interval TrigFunc<Signature<float, float> >::getInputRange(const bool is16bit) const
3096 {
3097 	DE_UNREF(is16bit);
3098 	return Interval(false, -DE_PI_DOUBLE, DE_PI_DOUBLE);
3099 }
3100 
3101 //Only -DE_PI_DOUBLE, DE_PI_DOUBLE input range
3102 template<>
getInputRange(const bool is16bit) const3103 Interval TrigFunc<Signature<deFloat16, deFloat16> >::getInputRange(const bool is16bit) const
3104 {
3105 	DE_UNREF(is16bit);
3106 	return Interval(false, -DE_PI_DOUBLE, DE_PI_DOUBLE);
3107 }
3108 
3109 //Only -DE_PI_DOUBLE, DE_PI_DOUBLE input range
3110 template<>
getInputRange(const bool is16bit) const3111 Interval TrigFunc<Signature<double, double> >::getInputRange(const bool is16bit) const
3112 {
3113 	DE_UNREF(is16bit);
3114 	return Interval(false, -DE_PI_DOUBLE, DE_PI_DOUBLE);
3115 }
3116 
3117 template<>
precision(const EvalContext & ctx,double ret,double arg) const3118 double TrigFunc<Signature<float, float> >::precision(const EvalContext& ctx, double ret, double arg) const
3119 {
3120 	DE_UNREF(ret);
3121 	if (ctx.floatPrecision == glu::PRECISION_HIGHP)
3122 	{
3123 		if (-DE_PI_DOUBLE <= arg && arg <= DE_PI_DOUBLE)
3124 			return deLdExp(1.0, -11);
3125 		else
3126 		{
3127 			// "larger otherwise", let's pick |x| * 2^-12 , which is slightly over
3128 			// 2^-11 at x == pi.
3129 			return deLdExp(deAbs(arg), -12);
3130 		}
3131 	}
3132 	else
3133 	{
3134 		DE_ASSERT(ctx.floatPrecision == glu::PRECISION_MEDIUMP || ctx.floatPrecision == glu::PRECISION_LAST);
3135 
3136 		if (-DE_PI_DOUBLE <= arg && arg <= DE_PI_DOUBLE)
3137 			return deLdExp(1.0, -7);
3138 		else
3139 		{
3140 			// |x| * 2^-8, slightly larger than 2^-7 at x == pi
3141 			return deLdExp(deAbs(arg), -8);
3142 		}
3143 	}
3144 }
3145 //
3146 /*
3147  * Half tests
3148  * From Spec:
3149  * Absolute error 2^{-7} inside the range [-pi, pi].
3150 */
3151 template<>
precision(const EvalContext & ctx,double ret,double arg) const3152 double TrigFunc<Signature<deFloat16, deFloat16> >::precision(const EvalContext& ctx, double ret, double arg) const
3153 {
3154 	DE_UNREF(ctx);
3155 	DE_UNREF(ret);
3156 	DE_UNREF(arg);
3157 	DE_ASSERT(-DE_PI_DOUBLE <= arg && arg <= DE_PI_DOUBLE && ctx.floatPrecision == glu::PRECISION_LAST);
3158 	return deLdExp(1.0, -7);
3159 }
3160 
3161 // Spec: "The precision of double-precision instructions is at least that of single precision."
3162 // Lets pick float high precision as a reference.
3163 template<>
precision(const EvalContext & ctx,double ret,double arg) const3164 double TrigFunc<Signature<double, double> >::precision(const EvalContext& ctx, double ret, double arg) const
3165 {
3166 	DE_UNREF(ctx);
3167 	DE_UNREF(ret);
3168 	if (-DE_PI_DOUBLE <= arg && arg <= DE_PI_DOUBLE)
3169 		return deLdExp(1.0, -11);
3170 	else
3171 	{
3172 		// "larger otherwise", let's pick |x| * 2^-12 , which is slightly over
3173 		// 2^-11 at x == pi.
3174 		return deLdExp(deAbs(arg), -12);
3175 	}
3176 }
3177 
3178 /*Proper parameters for template T
3179 	Signature<float, float>		32bit tests
3180 	Signature<float, deFloat16>	16bit tests*/
3181 template <class T>
3182 class Sin : public TrigFunc<T>
3183 {
3184 public:
Sin(void)3185 				Sin			(void) : TrigFunc<T>("sin", deSin, -1.0, 1.0) {}
3186 
3187 protected:
doGetSlope(double angle) const3188 	int			doGetSlope	(double angle) const { return deIntSign(deCos(angle)); }
3189 };
3190 
sin(const ExprP<float> & x)3191 ExprP<float> sin (const ExprP<float>& x) { return app<Sin<Signature<float, float> > >(x); }
sin(const ExprP<deFloat16> & x)3192 ExprP<deFloat16> sin (const ExprP<deFloat16>& x) { return app<Sin<Signature<deFloat16, deFloat16> > >(x); }
sin(const ExprP<double> & x)3193 ExprP<double> sin (const ExprP<double>& x) { return app<Sin<Signature<double, double> > >(x); }
3194 
3195 template <class T>
3196 class Cos : public TrigFunc<T>
3197 {
3198 public:
Cos(void)3199 				Cos			(void) : TrigFunc<T> ("cos", deCos, -1.0, 1.0) {}
3200 
3201 protected:
doGetSlope(double angle) const3202 	int			doGetSlope	(double angle) const { return -deIntSign(deSin(angle)); }
3203 };
3204 
cos(const ExprP<float> & x)3205 ExprP<float> cos (const ExprP<float>& x) { return app<Cos<Signature<float, float> > >(x); }
cos(const ExprP<deFloat16> & x)3206 ExprP<deFloat16> cos (const ExprP<deFloat16>& x) { return app<Cos<Signature<deFloat16, deFloat16> > >(x); }
cos(const ExprP<double> & x)3207 ExprP<double> cos (const ExprP<double>& x) { return app<Cos<Signature<double, double> > >(x); }
3208 
3209 DEFINE_DERIVED_FLOAT1_INPUTRANGE(Tan, tan, x, sin(x) * (constant(1.0f) / cos(x)), Interval(false, -DE_PI_DOUBLE, DE_PI_DOUBLE))
3210 DEFINE_DERIVED_FLOAT1_INPUTRANGE_16BIT(Tan16Bit, tan, x, sin(x) * (constant((deFloat16)FLOAT16_1_0) / cos(x)), Interval(false, -DE_PI_DOUBLE, DE_PI_DOUBLE))
3211 DEFINE_DERIVED_DOUBLE1_INPUTRANGE(Tan64Bit, tan, x, sin(x) * (constant(1.0) / cos(x)), Interval(false, -DE_PI_DOUBLE, DE_PI_DOUBLE))
3212 
3213 template <class T>
3214 class ATan : public CFloatFunc1<T>
3215 {
3216 public:
ATan(void)3217 			ATan		(void) : CFloatFunc1<T>	("atan", deAtanOver) {}
3218 
3219 protected:
precision(const EvalContext & ctx,double ret,double) const3220 	double	precision	(const EvalContext& ctx, double ret, double) const
3221 	{
3222 		if (ctx.floatPrecision == glu::PRECISION_HIGHP)
3223 			return ctx.format.ulp(ret, 4096.0);
3224 		else
3225 			return ctx.format.ulp(ret, 5.0);
3226 	}
3227 
getCodomain(const EvalContext & ctx) const3228 	Interval getCodomain(const EvalContext& ctx) const
3229 	{
3230 		return ctx.format.roundOut(Interval(-0.5 * DE_PI_DOUBLE, 0.5 * DE_PI_DOUBLE), true);
3231 	}
3232 };
3233 
3234 template <class T>
3235 class ATan2 : public CFloatFunc2<T>
3236 {
3237 public:
ATan2(void)3238 				ATan2			(void) : CFloatFunc2<T> ("atan", deAtan2) {}
3239 
3240 protected:
innerExtrema(const EvalContext & ctx,const Interval & yi,const Interval & xi) const3241 	Interval	innerExtrema	(const EvalContext&		ctx,
3242 								 const Interval&		yi,
3243 								 const Interval&		xi) const
3244 	{
3245 		Interval ret;
3246 
3247 		if (yi.contains(0.0))
3248 		{
3249 			if (xi.contains(0.0))
3250 				ret |= TCU_NAN;
3251 			if (xi.intersects(Interval(-TCU_INFINITY, 0.0)))
3252 				ret |= ctx.format.roundOut(Interval(-DE_PI_DOUBLE, DE_PI_DOUBLE), true);
3253 		}
3254 
3255 		if (!yi.isFinite(ctx.format.getMaxValue()) || !xi.isFinite(ctx.format.getMaxValue()))
3256 		{
3257 			// Infinities may not be supported, allow anything, including NaN
3258 			ret |= TCU_NAN;
3259 		}
3260 
3261 		return ret;
3262 	}
3263 
precision(const EvalContext & ctx,double ret,double,double) const3264 	double		precision		(const EvalContext& ctx, double ret, double, double) const
3265 	{
3266 		if (ctx.floatPrecision == glu::PRECISION_HIGHP)
3267 			return ctx.format.ulp(ret, 4096.0);
3268 		else
3269 			return ctx.format.ulp(ret, 5.0);
3270 	}
3271 
getCodomain(const EvalContext & ctx) const3272 	Interval getCodomain(const EvalContext& ctx) const
3273 	{
3274 		return ctx.format.roundOut(Interval(-DE_PI_DOUBLE, DE_PI_DOUBLE), true);
3275 	}
3276 };
3277 
atan2(const ExprP<float> & x,const ExprP<float> & y)3278 ExprP<float> atan2	(const ExprP<float>& x, const ExprP<float>& y)	{ return app<ATan2<Signature<float, float, float> > >(x, y); }
3279 
atan2(const ExprP<deFloat16> & x,const ExprP<deFloat16> & y)3280 ExprP<deFloat16> atan2	(const ExprP<deFloat16>& x, const ExprP<deFloat16>& y)	{ return app<ATan2<Signature<deFloat16, deFloat16, deFloat16> > >(x, y); }
3281 
atan2(const ExprP<double> & x,const ExprP<double> & y)3282 ExprP<double> atan2	(const ExprP<double>& x, const ExprP<double>& y)	{ return app<ATan2<Signature<double, double, double> > >(x, y); }
3283 
3284 
3285 DEFINE_DERIVED_FLOAT1(Sinh, sinh, x, (exp<float>(x) - exp<float>(-x)) / constant(2.0f))
3286 DEFINE_DERIVED_FLOAT1(Cosh, cosh, x, (exp<float>(x) + exp<float>(-x)) / constant(2.0f))
3287 DEFINE_DERIVED_FLOAT1(Tanh, tanh, x, sinh(x) / cosh(x))
3288 
3289 DEFINE_DERIVED_FLOAT1_16BIT(Sinh16Bit, sinh, x, (exp(x) - exp(-x)) / constant((deFloat16)FLOAT16_2_0))
3290 DEFINE_DERIVED_FLOAT1_16BIT(Cosh16Bit, cosh, x, (exp(x) + exp(-x)) / constant((deFloat16)FLOAT16_2_0))
3291 DEFINE_DERIVED_FLOAT1_16BIT(Tanh16Bit, tanh, x, sinh(x) / cosh(x))
3292 
3293 DEFINE_DERIVED_DOUBLE1(Sinh64Bit, sinh, x, (exp<double>(x) - exp<double>(-x)) / constant(2.0))
3294 DEFINE_DERIVED_DOUBLE1(Cosh64Bit, cosh, x, (exp<double>(x) + exp<double>(-x)) / constant(2.0))
3295 DEFINE_DERIVED_DOUBLE1(Tanh64Bit, tanh, x, sinh(x) / cosh(x))
3296 
3297 DEFINE_DERIVED_FLOAT1(ASin, asin, x, atan2(x, sqrt(constant(1.0f) - x * x)))
3298 DEFINE_DERIVED_FLOAT1(ACos, acos, x, atan2(sqrt(constant(1.0f) - x * x), x))
3299 DEFINE_DERIVED_FLOAT1(ASinh, asinh, x, log(x + sqrt(x * x + constant(1.0f))))
3300 DEFINE_DERIVED_FLOAT1(ACosh, acosh, x, log(x + sqrt(alternatives((x + constant(1.0f)) * (x - constant(1.0f)),
3301 																 (x * x - constant(1.0f))))))
3302 DEFINE_DERIVED_FLOAT1(ATanh, atanh, x, constant(0.5f) * log((constant(1.0f) + x) /
3303 															(constant(1.0f) - x)))
3304 
3305 DEFINE_DERIVED_FLOAT1_16BIT(ASin16Bit, asin, x, atan2(x, sqrt(constant((deFloat16)FLOAT16_1_0) - x * x)))
3306 DEFINE_DERIVED_FLOAT1_16BIT(ACos16Bit, acos, x, atan2(sqrt(constant((deFloat16)FLOAT16_1_0) - x * x), x))
3307 DEFINE_DERIVED_FLOAT1_16BIT(ASinh16Bit, asinh, x, log(x + sqrt(x * x + constant((deFloat16)FLOAT16_1_0))))
3308 DEFINE_DERIVED_FLOAT1_16BIT(ACosh16Bit, acosh, x, log(x + sqrt(alternatives((x + constant((deFloat16)FLOAT16_1_0)) * (x - constant((deFloat16)FLOAT16_1_0)),
3309 																 (x * x - constant((deFloat16)FLOAT16_1_0))))))
3310 DEFINE_DERIVED_FLOAT1_16BIT(ATanh16Bit, atanh, x, constant((deFloat16)FLOAT16_0_5) * log((constant((deFloat16)FLOAT16_1_0) + x) /
3311 															(constant((deFloat16)FLOAT16_1_0) - x)))
3312 
3313 DEFINE_DERIVED_DOUBLE1(ASin64Bit, asin, x, atan2(x, sqrt(constant(1.0) - pow(x, constant(2.0)))))
3314 DEFINE_DERIVED_DOUBLE1(ACos64Bit, acos, x, atan2(sqrt(constant(1.0) - pow(x, constant(2.0))), x))
3315 DEFINE_DERIVED_DOUBLE1(ASinh64Bit, asinh, x, log(x + sqrt(x * x + constant(1.0))))
3316 DEFINE_DERIVED_DOUBLE1(ACosh64Bit, acosh, x, log(x + sqrt(alternatives((x + constant(1.0)) * (x - constant(1.0)),
3317 																 (x * x - constant(1.0))))))
3318 DEFINE_DERIVED_DOUBLE1(ATanh64Bit, atanh, x, constant(0.5) * log((constant(1.0) + x) /
3319 															(constant(1.0) - x)))
3320 
3321 template <typename T>
3322 class GetComponent : public PrimitiveFunc<Signature<typename T::Element, T, int> >
3323 {
3324 public:
3325 	typedef		typename GetComponent::IRet	IRet;
3326 
getName(void) const3327 	string		getName		(void) const { return "_getComponent"; }
3328 
print(ostream & os,const BaseArgExprs & args) const3329 	void		print		(ostream&				os,
3330 							 const BaseArgExprs&	args) const
3331 	{
3332 		os << *args[0] << "[" << *args[1] << "]";
3333 	}
3334 
3335 protected:
doApply(const EvalContext &,const typename GetComponent::IArgs & iargs) const3336 	IRet		doApply		(const EvalContext&,
3337 							 const typename GetComponent::IArgs& iargs) const
3338 	{
3339 		IRet ret;
3340 
3341 		for (int compNdx = 0; compNdx < T::SIZE; ++compNdx)
3342 		{
3343 			if (iargs.b.contains(compNdx))
3344 				ret = unionIVal<typename T::Element>(ret, iargs.a[compNdx]);
3345 		}
3346 
3347 		return ret;
3348 	}
3349 
3350 };
3351 
3352 template <typename T>
getComponent(const ExprP<T> & container,int ndx)3353 ExprP<typename T::Element> getComponent (const ExprP<T>& container, int ndx)
3354 {
3355 	DE_ASSERT(0 <= ndx && ndx < T::SIZE);
3356 	return app<GetComponent<T> >(container, constant(ndx));
3357 }
3358 
3359 template <typename T>	string	vecNamePrefix			(void);
vecNamePrefix(void)3360 template <>				string	vecNamePrefix<float>	(void) { return ""; }
vecNamePrefix(void)3361 template <>				string	vecNamePrefix<deFloat16>(void) { return ""; }
vecNamePrefix(void)3362 template <>				string	vecNamePrefix<double>	(void) { return "d"; }
vecNamePrefix(void)3363 template <>				string	vecNamePrefix<int>		(void) { return "i"; }
vecNamePrefix(void)3364 template <>				string	vecNamePrefix<bool>		(void) { return "b"; }
3365 
3366 template <typename T, int Size>
vecName(void)3367 string vecName (void) { return vecNamePrefix<T>() + "vec" + de::toString(Size); }
3368 
3369 template <typename T, int Size> class GenVec;
3370 
3371 template <typename T>
3372 class GenVec<T, 1> : public DerivedFunc<Signature<T, T> >
3373 {
3374 public:
3375 	typedef typename GenVec<T, 1>::ArgExprs ArgExprs;
3376 
getName(void) const3377 	string		getName		(void) const
3378 	{
3379 		return "_" + vecName<T, 1>();
3380 	}
3381 
3382 protected:
3383 
doExpand(ExpandContext &,const ArgExprs & args) const3384 	ExprP<T>	doExpand	(ExpandContext&, const ArgExprs& args) const { return args.a; }
3385 };
3386 
3387 template <typename T>
3388 class GenVec<T, 2> : public PrimitiveFunc<Signature<Vector<T, 2>, T, T> >
3389 {
3390 public:
3391 	typedef typename GenVec::IRet	IRet;
3392 	typedef typename GenVec::IArgs	IArgs;
3393 
getName(void) const3394 	string		getName		(void) const
3395 	{
3396 		return vecName<T, 2>();
3397 	}
3398 
3399 protected:
doApply(const EvalContext &,const IArgs & iargs) const3400 	IRet		doApply		(const EvalContext&, const IArgs& iargs) const
3401 	{
3402 		return IRet(iargs.a, iargs.b);
3403 	}
3404 };
3405 
3406 template <typename T>
3407 class GenVec<T, 3> : public PrimitiveFunc<Signature<Vector<T, 3>, T, T, T> >
3408 {
3409 public:
3410 	typedef typename GenVec::IRet	IRet;
3411 	typedef typename GenVec::IArgs	IArgs;
3412 
getName(void) const3413 	string	getName		(void) const
3414 	{
3415 		return vecName<T, 3>();
3416 	}
3417 
3418 protected:
doApply(const EvalContext &,const IArgs & iargs) const3419 	IRet	doApply		(const EvalContext&, const IArgs& iargs) const
3420 	{
3421 		return IRet(iargs.a, iargs.b, iargs.c);
3422 	}
3423 };
3424 
3425 template <typename T>
3426 class GenVec<T, 4> : public PrimitiveFunc<Signature<Vector<T, 4>, T, T, T, T> >
3427 {
3428 public:
3429 	typedef typename GenVec::IRet	IRet;
3430 	typedef typename GenVec::IArgs	IArgs;
3431 
getName(void) const3432 	string		getName		(void) const { return vecName<T, 4>(); }
3433 
3434 protected:
doApply(const EvalContext &,const IArgs & iargs) const3435 	IRet		doApply		(const EvalContext&, const IArgs& iargs) const
3436 	{
3437 		return IRet(iargs.a, iargs.b, iargs.c, iargs.d);
3438 	}
3439 };
3440 
3441 template <typename T, int Rows, int Columns>
3442 class GenMat;
3443 
3444 template <typename T, int Rows>
3445 class GenMat<T, Rows, 2> : public PrimitiveFunc<
3446 	Signature<Matrix<T, Rows, 2>, Vector<T, Rows>, Vector<T, Rows> > >
3447 {
3448 public:
3449 	typedef typename GenMat::Ret	Ret;
3450 	typedef typename GenMat::IRet	IRet;
3451 	typedef typename GenMat::IArgs	IArgs;
3452 
getName(void) const3453 	string		getName		(void) const
3454 	{
3455 		return dataTypeNameOf<Ret>();
3456 	}
3457 
3458 protected:
3459 
doApply(const EvalContext &,const IArgs & iargs) const3460 	IRet		doApply		(const EvalContext&, const IArgs& iargs) const
3461 	{
3462 		IRet	ret;
3463 		ret[0] = iargs.a;
3464 		ret[1] = iargs.b;
3465 		return ret;
3466 	}
3467 };
3468 
3469 template <typename T, int Rows>
3470 class GenMat<T, Rows, 3> : public PrimitiveFunc<
3471 	Signature<Matrix<T, Rows, 3>, Vector<T, Rows>, Vector<T, Rows>, Vector<T, Rows> > >
3472 {
3473 public:
3474 	typedef typename GenMat::Ret	Ret;
3475 	typedef typename GenMat::IRet	IRet;
3476 	typedef typename GenMat::IArgs	IArgs;
3477 
getName(void) const3478 	string	getName	(void) const
3479 	{
3480 		return dataTypeNameOf<Ret>();
3481 	}
3482 
3483 protected:
3484 
doApply(const EvalContext &,const IArgs & iargs) const3485 	IRet	doApply	(const EvalContext&, const IArgs& iargs) const
3486 	{
3487 		IRet	ret;
3488 		ret[0] = iargs.a;
3489 		ret[1] = iargs.b;
3490 		ret[2] = iargs.c;
3491 		return ret;
3492 	}
3493 };
3494 
3495 template <typename T, int Rows>
3496 class GenMat<T, Rows, 4> : public PrimitiveFunc<
3497 	Signature<Matrix<T, Rows, 4>,
3498 			  Vector<T, Rows>, Vector<T, Rows>, Vector<T, Rows>, Vector<T, Rows> > >
3499 {
3500 public:
3501 	typedef typename GenMat::Ret	Ret;
3502 	typedef typename GenMat::IRet	IRet;
3503 	typedef typename GenMat::IArgs	IArgs;
3504 
getName(void) const3505 	string	getName	(void) const
3506 	{
3507 		return dataTypeNameOf<Ret>();
3508 	}
3509 
3510 protected:
doApply(const EvalContext &,const IArgs & iargs) const3511 	IRet	doApply	(const EvalContext&, const IArgs& iargs) const
3512 	{
3513 		IRet	ret;
3514 		ret[0] = iargs.a;
3515 		ret[1] = iargs.b;
3516 		ret[2] = iargs.c;
3517 		ret[3] = iargs.d;
3518 		return ret;
3519 	}
3520 };
3521 
3522 template <typename T, int Rows>
mat2(const ExprP<Vector<T,Rows>> & arg0,const ExprP<Vector<T,Rows>> & arg1)3523 ExprP<Matrix<T, Rows, 2> > mat2 (const ExprP<Vector<T, Rows> >& arg0,
3524 								 const ExprP<Vector<T, Rows> >& arg1)
3525 {
3526 	return app<GenMat<T, Rows, 2> >(arg0, arg1);
3527 }
3528 
3529 template <typename T, int Rows>
mat3(const ExprP<Vector<T,Rows>> & arg0,const ExprP<Vector<T,Rows>> & arg1,const ExprP<Vector<T,Rows>> & arg2)3530 ExprP<Matrix<T, Rows, 3> > mat3 (const ExprP<Vector<T, Rows> >& arg0,
3531 								 const ExprP<Vector<T, Rows> >& arg1,
3532 								 const ExprP<Vector<T, Rows> >& arg2)
3533 {
3534 	return app<GenMat<T, Rows, 3> >(arg0, arg1, arg2);
3535 }
3536 
3537 template <typename T, int Rows>
mat4(const ExprP<Vector<T,Rows>> & arg0,const ExprP<Vector<T,Rows>> & arg1,const ExprP<Vector<T,Rows>> & arg2,const ExprP<Vector<T,Rows>> & arg3)3538 ExprP<Matrix<T, Rows, 4> > mat4 (const ExprP<Vector<T, Rows> >& arg0,
3539 								 const ExprP<Vector<T, Rows> >& arg1,
3540 								 const ExprP<Vector<T, Rows> >& arg2,
3541 								 const ExprP<Vector<T, Rows> >& arg3)
3542 {
3543 	return app<GenMat<T, Rows, 4> >(arg0, arg1, arg2, arg3);
3544 }
3545 
3546 template <typename T, int Rows, int Cols>
3547 class MatNeg : public PrimitiveFunc<Signature<Matrix<T, Rows, Cols>,
3548 											  Matrix<T, Rows, Cols> > >
3549 {
3550 public:
3551 	typedef typename MatNeg::IRet		IRet;
3552 	typedef typename MatNeg::IArgs		IArgs;
3553 
getName(void) const3554 	string	getName	(void) const
3555 	{
3556 		return "_matNeg";
3557 	}
3558 
3559 protected:
doPrint(ostream & os,const BaseArgExprs & args) const3560 	void	doPrint	(ostream& os, const BaseArgExprs& args) const
3561 	{
3562 		os << "-(" << *args[0] << ")";
3563 	}
3564 
doApply(const EvalContext &,const IArgs & iargs) const3565 	IRet	doApply	(const EvalContext&, const IArgs& iargs)			const
3566 	{
3567 		IRet	ret;
3568 
3569 		for (int col = 0; col < Cols; ++col)
3570 		{
3571 			for (int row = 0; row < Rows; ++row)
3572 				ret[col][row] = -iargs.a[col][row];
3573 		}
3574 
3575 		return ret;
3576 	}
3577 };
3578 
3579 template <typename T, typename Sig>
3580 class CompWiseFunc : public PrimitiveFunc<Sig>
3581 {
3582 public:
3583 	typedef Func<Signature<T, T, T> >	ScalarFunc;
3584 
getName(void) const3585 	string				getName			(void)									const
3586 	{
3587 		return doGetScalarFunc().getName();
3588 	}
3589 protected:
doPrint(ostream & os,const BaseArgExprs & args) const3590 	void				doPrint			(ostream&				os,
3591 										 const BaseArgExprs&	args)			const
3592 	{
3593 		doGetScalarFunc().print(os, args);
3594 	}
3595 
3596 	virtual
3597 	const ScalarFunc&	doGetScalarFunc	(void)									const = 0;
3598 };
3599 
3600 template <typename T, int Rows, int Cols>
3601 class CompMatFuncBase : public CompWiseFunc<T, Signature<Matrix<T, Rows, Cols>,
3602 														 Matrix<T, Rows, Cols>,
3603 														 Matrix<T, Rows, Cols> > >
3604 {
3605 public:
3606 	typedef typename CompMatFuncBase::IRet		IRet;
3607 	typedef typename CompMatFuncBase::IArgs		IArgs;
3608 
3609 protected:
3610 
doApply(const EvalContext & ctx,const IArgs & iargs) const3611 	IRet	doApply	(const EvalContext& ctx, const IArgs& iargs) const
3612 	{
3613 		IRet			ret;
3614 
3615 		for (int col = 0; col < Cols; ++col)
3616 		{
3617 			for (int row = 0; row < Rows; ++row)
3618 				ret[col][row] = this->doGetScalarFunc().apply(ctx,
3619 															  iargs.a[col][row],
3620 															  iargs.b[col][row]);
3621 		}
3622 
3623 		return ret;
3624 	}
3625 };
3626 
3627 template <typename F, typename T, int Rows, int Cols>
3628 class CompMatFunc : public CompMatFuncBase<T, Rows, Cols>
3629 {
3630 protected:
doGetScalarFunc(void) const3631 	const typename CompMatFunc::ScalarFunc&	doGetScalarFunc	(void) const
3632 	{
3633 		return instance<F>();
3634 	}
3635 };
3636 
3637 template <class T>
3638 class ScalarMatrixCompMult : public Mul< Signature<T, T, T> >
3639 {
3640 public:
3641 
getName(void) const3642 	string	getName	(void) const
3643 	{
3644 		return "matrixCompMult";
3645 	}
3646 
doPrint(ostream & os,const BaseArgExprs & args) const3647 	void	doPrint	(ostream& os, const BaseArgExprs& args) const
3648 	{
3649 		Func<Signature<T, T, T> >::doPrint(os, args);
3650 	}
3651 };
3652 
3653 template <int Rows, int Cols, class T>
3654 class MatrixCompMult : public CompMatFunc<ScalarMatrixCompMult<T>, T, Rows, Cols>
3655 {
3656 };
3657 
3658 template <int Rows, int Cols>
3659 class ScalarMatFuncBase : public CompWiseFunc<float, Signature<Matrix<float, Rows, Cols>,
3660 															   Matrix<float, Rows, Cols>,
3661 															   float> >
3662 {
3663 public:
3664 	typedef typename ScalarMatFuncBase::IRet	IRet;
3665 	typedef typename ScalarMatFuncBase::IArgs	IArgs;
3666 
3667 protected:
3668 
doApply(const EvalContext & ctx,const IArgs & iargs) const3669 	IRet	doApply	(const EvalContext& ctx, const IArgs& iargs) const
3670 	{
3671 		IRet	ret;
3672 
3673 		for (int col = 0; col < Cols; ++col)
3674 		{
3675 			for (int row = 0; row < Rows; ++row)
3676 				ret[col][row] = this->doGetScalarFunc().apply(ctx, iargs.a[col][row], iargs.b);
3677 		}
3678 
3679 		return ret;
3680 	}
3681 };
3682 
3683 template <typename F, int Rows, int Cols>
3684 class ScalarMatFunc : public ScalarMatFuncBase<Rows, Cols>
3685 {
3686 protected:
doGetScalarFunc(void) const3687 	const typename ScalarMatFunc::ScalarFunc&	doGetScalarFunc	(void)	const
3688 	{
3689 		return instance<F>();
3690 	}
3691 };
3692 
3693 template<typename T, int Size> struct GenXType;
3694 
3695 template<typename T>
3696 struct GenXType<T, 1>
3697 {
genXTypevkt::shaderexecutor::Functions::GenXType3698 	static ExprP<T>	genXType	(const ExprP<T>& x) { return x; }
3699 };
3700 
3701 template<typename T>
3702 struct GenXType<T, 2>
3703 {
genXTypevkt::shaderexecutor::Functions::GenXType3704 	static ExprP<Vector<T, 2> >	genXType	(const ExprP<T>& x)
3705 	{
3706 		return app<GenVec<T, 2> >(x, x);
3707 	}
3708 };
3709 
3710 template<typename T>
3711 struct GenXType<T, 3>
3712 {
genXTypevkt::shaderexecutor::Functions::GenXType3713 	static ExprP<Vector<T, 3> >	genXType	(const ExprP<T>& x)
3714 	{
3715 		return app<GenVec<T, 3> >(x, x, x);
3716 	}
3717 };
3718 
3719 template<typename T>
3720 struct GenXType<T, 4>
3721 {
genXTypevkt::shaderexecutor::Functions::GenXType3722 	static ExprP<Vector<T, 4> >	genXType	(const ExprP<T>& x)
3723 	{
3724 		return app<GenVec<T, 4> >(x, x, x, x);
3725 	}
3726 };
3727 
3728 //! Returns an expression of vector of size `Size` (or scalar if Size == 1),
3729 //! with each element initialized with the expression `x`.
3730 template<typename T, int Size>
genXType(const ExprP<T> & x)3731 ExprP<typename ContainerOf<T, Size>::Container> genXType (const ExprP<T>& x)
3732 {
3733 	return GenXType<T, Size>::genXType(x);
3734 }
3735 
3736 typedef GenVec<float, 2> FloatVec2;
3737 DEFINE_CONSTRUCTOR2(FloatVec2, Vec2, vec2, float, float)
3738 
3739 typedef GenVec<deFloat16, 2> FloatVec2_16bit;
3740 DEFINE_CONSTRUCTOR2(FloatVec2_16bit, Vec2_16Bit, vec2, deFloat16, deFloat16)
3741 
3742 typedef GenVec<double, 2> DoubleVec2;
3743 DEFINE_CONSTRUCTOR2(DoubleVec2, Vec2_64Bit, vec2, double, double)
3744 
3745 typedef GenVec<float, 3> FloatVec3;
3746 DEFINE_CONSTRUCTOR3(FloatVec3, Vec3, vec3, float, float, float)
3747 
3748 typedef GenVec<deFloat16, 3> FloatVec3_16bit;
3749 DEFINE_CONSTRUCTOR3(FloatVec3_16bit, Vec3_16Bit, vec3, deFloat16, deFloat16, deFloat16)
3750 
3751 typedef GenVec<double, 3> DoubleVec3;
3752 DEFINE_CONSTRUCTOR3(DoubleVec3, Vec3_64Bit, vec3, double, double, double)
3753 
3754 typedef GenVec<float, 4> FloatVec4;
3755 DEFINE_CONSTRUCTOR4(FloatVec4, Vec4, vec4, float, float, float, float)
3756 
3757 typedef GenVec<deFloat16, 4> FloatVec4_16bit;
3758 DEFINE_CONSTRUCTOR4(FloatVec4_16bit, Vec4_16Bit, vec4, deFloat16, deFloat16, deFloat16, deFloat16)
3759 
3760 typedef GenVec<double, 4> DoubleVec4;
3761 DEFINE_CONSTRUCTOR4(DoubleVec4, Vec4_64Bit, vec4, double, double, double, double)
3762 
3763 template <class T>
3764 const ExprP<T> getConstZero(void);
3765 template <class T>
3766 const ExprP<T> getConstOne(void);
3767 template <class T>
3768 const ExprP<T> getConstTwo(void);
3769 
3770 template <>
getConstZero(void)3771 const ExprP<float> getConstZero<float>(void)
3772 {
3773 	return constant(0.0f);
3774 }
3775 
3776 template <>
getConstZero(void)3777 const ExprP<deFloat16> getConstZero<deFloat16>(void)
3778 {
3779 	return constant((deFloat16)FLOAT16_0_0);
3780 }
3781 
3782 template <>
getConstZero(void)3783 const ExprP<double> getConstZero<double>(void)
3784 {
3785 	return constant(0.0);
3786 }
3787 
3788 template <>
getConstOne(void)3789 const ExprP<float> getConstOne<float>(void)
3790 {
3791 	return constant(1.0f);
3792 }
3793 
3794 template <>
getConstOne(void)3795 const ExprP<deFloat16> getConstOne<deFloat16>(void)
3796 {
3797 	return constant((deFloat16)FLOAT16_1_0);
3798 }
3799 
3800 template <>
getConstOne(void)3801 const ExprP<double> getConstOne<double>(void)
3802 {
3803 	return constant(1.0);
3804 }
3805 
3806 template <>
getConstTwo(void)3807 const ExprP<float> getConstTwo<float>(void)
3808 {
3809 	return constant(2.0f);
3810 }
3811 
3812 template <>
getConstTwo(void)3813 const ExprP<deFloat16> getConstTwo<deFloat16>(void)
3814 {
3815 	return constant((deFloat16)FLOAT16_2_0);
3816 }
3817 
3818 template <>
getConstTwo(void)3819 const ExprP<double> getConstTwo<double>(void)
3820 {
3821 	return constant(2.0);
3822 }
3823 
3824 template <int Size, class T>
3825 class Dot : public DerivedFunc<Signature<T, Vector<T, Size>, Vector<T, Size> > >
3826 {
3827 public:
3828 	typedef typename Dot::ArgExprs ArgExprs;
3829 
getName(void) const3830 	string			getName		(void) const
3831 	{
3832 		return "dot";
3833 	}
3834 
3835 protected:
doExpand(ExpandContext &,const ArgExprs & args) const3836 	ExprP<T>	doExpand	(ExpandContext&, const ArgExprs& args) const
3837 	{
3838 		ExprP<T> op[Size];
3839 		// Precompute all products.
3840 		for (int ndx = 0; ndx < Size; ++ndx)
3841 			op[ndx] = args.a[ndx] * args.b[ndx];
3842 
3843 		int idx[Size];
3844 		//Prepare an array of indices.
3845 		for (int ndx = 0; ndx < Size; ++ndx)
3846 			idx[ndx] = ndx;
3847 
3848 		ExprP<T> res = op[0];
3849 		// Compute the first dot alternative: SUM(a[i]*b[i]), i = 0 .. Size-1
3850 		for (int ndx = 1; ndx < Size; ++ndx)
3851 			res = res + op[ndx];
3852 
3853 		// Generate all permutations of indices and
3854 		// using a permutation compute a dot alternative.
3855 		// Generates all possible variants fo summation of products in the dot product expansion expression.
3856 		do {
3857 			ExprP<T> alt = getConstZero<T>();
3858 			for (int ndx = 0; ndx < Size; ++ndx)
3859 				alt = alt + op[idx[ndx]];
3860 			res = alternatives(res, alt);
3861 		} while (std::next_permutation(idx, idx + Size));
3862 
3863 		return res;
3864 	}
3865 };
3866 
3867 template <class T>
3868 class Dot<1, T> : public DerivedFunc<Signature<T, T, T> >
3869 {
3870 public:
3871 	typedef typename DerivedFunc<Signature<T, T, T> >::ArgExprs	TArgExprs;
3872 
getName(void) const3873 	string			getName		(void) const
3874 	{
3875 		return "dot";
3876 	}
3877 
doExpand(ExpandContext &,const TArgExprs & args) const3878 	ExprP<T>	doExpand	(ExpandContext&, const TArgExprs& args) const
3879 	{
3880 		return args.a * args.b;
3881 	}
3882 };
3883 
3884 template <int Size>
dot(const ExprP<Vector<deFloat16,Size>> & x,const ExprP<Vector<deFloat16,Size>> & y)3885 ExprP<deFloat16> dot (const ExprP<Vector<deFloat16, Size> >& x, const ExprP<Vector<deFloat16, Size> >& y)
3886 {
3887 	return app<Dot<Size, deFloat16> >(x, y);
3888 }
3889 
dot(const ExprP<deFloat16> & x,const ExprP<deFloat16> & y)3890 ExprP<deFloat16> dot (const ExprP<deFloat16>& x, const ExprP<deFloat16>& y)
3891 {
3892 	return app<Dot<1, deFloat16> >(x, y);
3893 }
3894 
3895 template <int Size>
dot(const ExprP<Vector<float,Size>> & x,const ExprP<Vector<float,Size>> & y)3896 ExprP<float> dot (const ExprP<Vector<float, Size> >& x, const ExprP<Vector<float, Size> >& y)
3897 {
3898 	return app<Dot<Size, float> >(x, y);
3899 }
3900 
dot(const ExprP<float> & x,const ExprP<float> & y)3901 ExprP<float> dot (const ExprP<float>& x, const ExprP<float>& y)
3902 {
3903 	return app<Dot<1, float> >(x, y);
3904 }
3905 
3906 template <int Size>
dot(const ExprP<Vector<double,Size>> & x,const ExprP<Vector<double,Size>> & y)3907 ExprP<double> dot (const ExprP<Vector<double, Size> >& x, const ExprP<Vector<double, Size> >& y)
3908 {
3909 	return app<Dot<Size, double> >(x, y);
3910 }
3911 
dot(const ExprP<double> & x,const ExprP<double> & y)3912 ExprP<double> dot (const ExprP<double>& x, const ExprP<double>& y)
3913 {
3914 	return app<Dot<1, double> >(x, y);
3915 }
3916 
3917 template <int Size, class T>
3918 class Length : public DerivedFunc<
3919 	Signature<T, typename ContainerOf<T, Size>::Container> >
3920 {
3921 public:
3922 	typedef typename Length::ArgExprs ArgExprs;
3923 
getName(void) const3924 	string			getName		(void) const
3925 	{
3926 		return "length";
3927 	}
3928 
3929 protected:
doExpand(ExpandContext &,const ArgExprs & args) const3930 	ExprP<T>		doExpand	(ExpandContext&, const ArgExprs& args) const
3931 	{
3932 		return sqrt(dot(args.a, args.a));
3933 	}
3934 };
3935 
3936 
3937 template <class T, class TRet>
length(const ExprP<T> & x)3938 ExprP<TRet> length (const ExprP<T>& x)
3939 {
3940 	return app<Length<1, T> >(x);
3941 }
3942 
3943 template <int Size, class T, class TRet>
length(const ExprP<typename ContainerOf<T,Size>::Container> & x)3944 ExprP<TRet> length (const ExprP<typename ContainerOf<T, Size>::Container>& x)
3945 {
3946 	return app<Length<Size, T> >(x);
3947 }
3948 
3949 template <int Size, class T>
3950 class Distance : public DerivedFunc<
3951 	Signature<T,
3952 			  typename ContainerOf<T, Size>::Container,
3953 			  typename ContainerOf<T, Size>::Container> >
3954 {
3955 public:
3956 	typedef typename	Distance::Ret		Ret;
3957 	typedef typename	Distance::ArgExprs	ArgExprs;
3958 
getName(void) const3959 	string		getName		(void) const
3960 	{
3961 		return "distance";
3962 	}
3963 
3964 protected:
doExpand(ExpandContext &,const ArgExprs & args) const3965 	ExprP<Ret>	doExpand	(ExpandContext&, const ArgExprs& args) const
3966 	{
3967 		return length<Size, T, Ret>(args.a - args.b);
3968 	}
3969 };
3970 
3971 // cross
3972 
3973 class Cross : public DerivedFunc<Signature<Vec3, Vec3, Vec3> >
3974 {
3975 public:
getName(void) const3976 	string			getName		(void) const
3977 	{
3978 		return "cross";
3979 	}
3980 
3981 protected:
doExpand(ExpandContext &,const ArgExprs & x) const3982 	ExprP<Vec3>		doExpand	(ExpandContext&, const ArgExprs& x) const
3983 	{
3984 		return vec3(x.a[1] * x.b[2] - x.b[1] * x.a[2],
3985 					x.a[2] * x.b[0] - x.b[2] * x.a[0],
3986 					x.a[0] * x.b[1] - x.b[0] * x.a[1]);
3987 	}
3988 };
3989 
3990 class Cross16Bit : public DerivedFunc<Signature<Vec3_16Bit, Vec3_16Bit, Vec3_16Bit> >
3991 {
3992 public:
getName(void) const3993 	string			getName		(void) const
3994 	{
3995 		return "cross";
3996 	}
3997 
3998 protected:
doExpand(ExpandContext &,const ArgExprs & x) const3999 	ExprP<Vec3_16Bit>		doExpand	(ExpandContext&, const ArgExprs& x) const
4000 	{
4001 		return vec3(x.a[1] * x.b[2] - x.b[1] * x.a[2],
4002 					x.a[2] * x.b[0] - x.b[2] * x.a[0],
4003 					x.a[0] * x.b[1] - x.b[0] * x.a[1]);
4004 	}
4005 };
4006 
4007 class Cross64Bit : public DerivedFunc<Signature<Vec3_64Bit, Vec3_64Bit, Vec3_64Bit> >
4008 {
4009 public:
getName(void) const4010 	string			getName		(void) const
4011 	{
4012 		return "cross";
4013 	}
4014 
4015 protected:
doExpand(ExpandContext &,const ArgExprs & x) const4016 	ExprP<Vec3_64Bit>		doExpand	(ExpandContext&, const ArgExprs& x) const
4017 	{
4018 		return vec3(x.a[1] * x.b[2] - x.b[1] * x.a[2],
4019 					x.a[2] * x.b[0] - x.b[2] * x.a[0],
4020 					x.a[0] * x.b[1] - x.b[0] * x.a[1]);
4021 	}
4022 };
4023 
4024 DEFINE_CONSTRUCTOR2(Cross, Vec3, cross, Vec3, Vec3)
4025 DEFINE_CONSTRUCTOR2(Cross16Bit, Vec3_16Bit, cross, Vec3_16Bit, Vec3_16Bit)
4026 DEFINE_CONSTRUCTOR2(Cross64Bit, Vec3_64Bit, cross, Vec3_64Bit, Vec3_64Bit)
4027 
4028 template<int Size, class T>
4029 class Normalize : public DerivedFunc<
4030 	Signature<typename ContainerOf<T, Size>::Container,
4031 			  typename ContainerOf<T, Size>::Container> >
4032 {
4033 public:
4034 	typedef typename	Normalize::Ret		Ret;
4035 	typedef typename	Normalize::ArgExprs	ArgExprs;
4036 
getName(void) const4037 	string		getName		(void) const
4038 	{
4039 		return "normalize";
4040 	}
4041 
4042 protected:
doExpand(ExpandContext &,const ArgExprs & args) const4043 	ExprP<Ret>	doExpand	(ExpandContext&, const ArgExprs& args) const
4044 	{
4045 		return args.a / length<Size, T, T>(args.a);
4046 	}
4047 };
4048 
4049 template <int Size, class T>
4050 class FaceForward : public DerivedFunc<
4051 	Signature<typename ContainerOf<T, Size>::Container,
4052 			  typename ContainerOf<T, Size>::Container,
4053 			  typename ContainerOf<T, Size>::Container,
4054 			  typename ContainerOf<T, Size>::Container> >
4055 {
4056 public:
4057 	typedef typename	FaceForward::Ret		Ret;
4058 	typedef typename	FaceForward::ArgExprs	ArgExprs;
4059 
getName(void) const4060 	string		getName		(void) const
4061 	{
4062 		return "faceforward";
4063 	}
4064 
4065 protected:
doExpand(ExpandContext &,const ArgExprs & args) const4066 	ExprP<Ret>	doExpand	(ExpandContext&, const ArgExprs& args) const
4067 	{
4068 		return cond(dot(args.c, args.b) < getConstZero<T>(), args.a, -args.a);
4069 	}
4070 };
4071 
4072 template <int Size, class T>
4073 class Reflect : public DerivedFunc<
4074 	Signature<typename ContainerOf<T, Size>::Container,
4075 			  typename ContainerOf<T, Size>::Container,
4076 			  typename ContainerOf<T, Size>::Container> >
4077 {
4078 public:
4079 	typedef typename	Reflect::Ret		Ret;
4080 	typedef typename	Reflect::Arg0		Arg0;
4081 	typedef typename	Reflect::Arg1		Arg1;
4082 	typedef typename	Reflect::ArgExprs	ArgExprs;
4083 
getName(void) const4084 	string		getName		(void) const
4085 	{
4086 		return "reflect";
4087 	}
4088 
4089 protected:
doExpand(ExpandContext & ctx,const ArgExprs & args) const4090 	ExprP<Ret>	doExpand	(ExpandContext& ctx, const ArgExprs& args) const
4091 	{
4092 		const ExprP<Arg0>&	i		= args.a;
4093 		const ExprP<Arg1>&	n		= args.b;
4094 		const ExprP<T>	dotNI	= bindExpression("dotNI", ctx, dot(n, i));
4095 
4096 		return i - alternatives((n * dotNI) * getConstTwo<T>(),
4097 								   alternatives( n * (dotNI * getConstTwo<T>()),
4098 												alternatives(n * dot(i * getConstTwo<T>(), n),
4099 															 n * dot(i, n * getConstTwo<T>())
4100 												)
4101 									)
4102 								);
4103 	}
4104 };
4105 
4106 template <int Size, class T>
4107 class Refract : public DerivedFunc<
4108 	Signature<typename ContainerOf<T, Size>::Container,
4109 			  typename ContainerOf<T, Size>::Container,
4110 			  typename ContainerOf<T, Size>::Container,
4111 			  T> >
4112 {
4113 public:
4114 	typedef typename	Refract::Ret		Ret;
4115 	typedef typename	Refract::Arg0		Arg0;
4116 	typedef typename	Refract::Arg1		Arg1;
4117 	typedef typename	Refract::Arg2		Arg2;
4118 	typedef typename	Refract::ArgExprs	ArgExprs;
4119 
getName(void) const4120 	string		getName		(void) const
4121 	{
4122 		return "refract";
4123 	}
4124 
4125 protected:
doExpand(ExpandContext & ctx,const ArgExprs & args) const4126 	ExprP<Ret>	doExpand	(ExpandContext&	ctx, const ArgExprs& args) const
4127 	{
4128 		const ExprP<Arg0>&	i		= args.a;
4129 		const ExprP<Arg1>&	n		= args.b;
4130 		const ExprP<Arg2>&	eta		= args.c;
4131 		const ExprP<T>	dotNI	= bindExpression("dotNI", ctx, dot(n, i));
4132 		const ExprP<T>	k		= bindExpression("k", ctx, getConstOne<T>() - eta * eta *
4133 												 (getConstOne<T>() - dotNI * dotNI));
4134 		return cond(k < getConstZero<T>(),
4135 					genXType<T, Size>(getConstZero<T>()),
4136 					i * eta - n * (eta * dotNI + sqrt(k)));
4137 	}
4138 };
4139 
4140 template <class T>
4141 class PreciseFunc1 : public CFloatFunc1<T>
4142 {
4143 public:
PreciseFunc1(const string & name,DoubleFunc1 & func)4144 			PreciseFunc1	(const string& name, DoubleFunc1& func) : CFloatFunc1<T> (name, func) {}
4145 protected:
precision(const EvalContext &,double,double) const4146 	double	precision		(const EvalContext&, double, double) const	{ return 0.0; }
4147 };
4148 
4149 template <class T>
4150 class Abs : public PreciseFunc1<T>
4151 {
4152 public:
Abs(void)4153 	Abs (void) : PreciseFunc1<T> ("abs", deAbs) {}
4154 };
4155 
4156 template <class T>
4157 class Sign : public PreciseFunc1<T>
4158 {
4159 public:
Sign(void)4160 	Sign (void) : PreciseFunc1<T> ("sign", deSign) {}
4161 };
4162 
4163 template <class T>
4164 class Floor : public PreciseFunc1<T>
4165 {
4166 public:
Floor(void)4167 	Floor (void) : PreciseFunc1<T> ("floor", deFloor) {}
4168 };
4169 
4170 template <class T>
4171 class Trunc : public PreciseFunc1<T>
4172 {
4173 public:
Trunc(void)4174 	Trunc (void) : PreciseFunc1<T> ("trunc", deTrunc) {}
4175 };
4176 
4177 template <class T>
4178 class Round : public FloatFunc1<T>
4179 {
4180 public:
getName(void) const4181 	string		getName		(void) const								{ return "round"; }
4182 
4183 protected:
applyPoint(const EvalContext &,double x) const4184 	Interval	applyPoint	(const EvalContext&, double x) const
4185 	{
4186 		double			truncated	= 0.0;
4187 		const double	fract		= deModf(x, &truncated);
4188 		Interval		ret;
4189 
4190 		if (fabs(fract) <= 0.5)
4191 			ret |= truncated;
4192 		if (fabs(fract) >= 0.5)
4193 			ret |= truncated + deSign(fract);
4194 
4195 		return ret;
4196 	}
4197 
precision(const EvalContext &,double,double) const4198 	double		precision	(const EvalContext&, double, double) const	{ return 0.0; }
4199 };
4200 
4201 template <class T>
4202 class RoundEven : public PreciseFunc1<T>
4203 {
4204 public:
RoundEven(void)4205 	RoundEven (void) : PreciseFunc1<T> ("roundEven", deRoundEven) {}
4206 };
4207 
4208 template <class T>
4209 class Ceil : public PreciseFunc1<T>
4210 {
4211 public:
Ceil(void)4212 	Ceil (void) : PreciseFunc1<T> ("ceil", deCeil) {}
4213 };
4214 
4215 typedef Floor< Signature<float, float> > Floor32Bit;
4216 typedef Floor< Signature<deFloat16, deFloat16> > Floor16Bit;
4217 typedef Floor< Signature<double, double> > Floor64Bit;
4218 
4219 typedef Trunc< Signature<float, float> > Trunc32Bit;
4220 typedef Trunc< Signature<deFloat16, deFloat16> > Trunc16Bit;
4221 typedef Trunc< Signature<double, double> > Trunc64Bit;
4222 
4223 typedef Trunc< Signature<float, float> > Trunc32Bit;
4224 typedef Trunc< Signature<deFloat16, deFloat16> > Trunc16Bit;
4225 
4226 DEFINE_DERIVED_FLOAT1(Fract, fract, x, x - app<Floor32Bit>(x))
4227 DEFINE_DERIVED_FLOAT1_16BIT(Fract16Bit, fract, x, x - app<Floor16Bit>(x))
4228 DEFINE_DERIVED_DOUBLE1(Fract64Bit, fract, x, x - app<Floor64Bit>(x))
4229 
4230 template <class T>
4231 class PreciseFunc2 : public CFloatFunc2<T>
4232 {
4233 public:
PreciseFunc2(const string & name,DoubleFunc2 & func)4234 			PreciseFunc2	(const string& name, DoubleFunc2& func) : CFloatFunc2<T> (name, func) {}
4235 protected:
precision(const EvalContext &,double,double,double) const4236 	double	precision		(const EvalContext&, double, double, double) const { return 0.0; }
4237 };
4238 
4239 DEFINE_DERIVED_FLOAT2(Mod32Bit, mod, x, y, x - y * app<Floor32Bit>(x / y))
4240 DEFINE_DERIVED_FLOAT2_16BIT(Mod16Bit, mod, x, y, x - y * app<Floor16Bit>(x / y))
4241 DEFINE_DERIVED_DOUBLE2(Mod64Bit, mod, x, y, x - y * app<Floor64Bit>(x / y))
4242 
4243 DEFINE_CASED_DERIVED_FLOAT2(FRem32Bit, frem, x, y, x - y * app<Trunc32Bit>(x / y), SPIRV_CASETYPE_FREM)
4244 DEFINE_CASED_DERIVED_FLOAT2_16BIT(FRem16Bit, frem, x, y, x - y * app<Trunc16Bit>(x / y), SPIRV_CASETYPE_FREM)
4245 DEFINE_CASED_DERIVED_DOUBLE2(FRem64Bit, frem, x, y, x - y * app<Trunc64Bit>(x / y), SPIRV_CASETYPE_FREM)
4246 
4247 template <class T>
4248 class Modf : public PrimitiveFunc<T>
4249 {
4250 public:
4251 	typedef typename Modf<T>::IArgs	TIArgs;
4252 	typedef typename Modf<T>::IRet	TIRet;
getName(void) const4253 	string	getName				(void) const
4254 	{
4255 		return "modf";
4256 	}
4257 
4258 protected:
doApply(const EvalContext & ctx,const TIArgs & iargs) const4259 	TIRet	doApply				(const EvalContext& ctx, const TIArgs& iargs) const
4260 	{
4261 		Interval	fracIV;
4262 		Interval&	wholeIV		= const_cast<Interval&>(iargs.b);
4263 		double		intPart		= 0;
4264 
4265 		TCU_INTERVAL_APPLY_MONOTONE1(fracIV, x, iargs.a, frac, frac = deModf(x, &intPart));
4266 		TCU_INTERVAL_APPLY_MONOTONE1(wholeIV, x, iargs.a, whole,
4267 									 deModf(x, &intPart); whole = intPart);
4268 
4269 		if (!iargs.a.isFinite(ctx.format.getMaxValue()))
4270 		{
4271 			// Behavior on modf(Inf) not well-defined, allow anything as a fractional part
4272 			// See Khronos bug 13907
4273 			fracIV |= TCU_NAN;
4274 		}
4275 
4276 		return fracIV;
4277 	}
4278 
getOutParamIndex(void) const4279 	int		getOutParamIndex	(void) const
4280 	{
4281 		return 1;
4282 	}
4283 };
4284 typedef Modf< Signature<float, float, float> >				Modf32Bit;
4285 typedef Modf< Signature<deFloat16, deFloat16, deFloat16> >	Modf16Bit;
4286 typedef Modf< Signature<double, double, double> >			Modf64Bit;
4287 
4288 template <class T>
4289 class ModfStruct : public Modf<T>
4290 {
4291 public:
getName(void) const4292 	virtual string		getName			(void) const	{ return "modfstruct"; }
getSpirvCase(void) const4293 	virtual SpirVCaseT	getSpirvCase	(void) const	{ return SPIRV_CASETYPE_MODFSTRUCT; }
4294 };
4295 typedef ModfStruct< Signature<float, float, float> >				ModfStruct32Bit;
4296 typedef ModfStruct< Signature<deFloat16, deFloat16, deFloat16> >	ModfStruct16Bit;
4297 typedef ModfStruct< Signature<double, double, double> >				ModfStruct64Bit;
4298 
4299 template <class T>
Min(void)4300 class Min : public PreciseFunc2<T> { public: Min (void) : PreciseFunc2<T> ("min", deMin) {} };
4301 template <class T>
Max(void)4302 class Max : public PreciseFunc2<T> { public: Max (void) : PreciseFunc2<T> ("max", deMax) {} };
4303 
4304 template <class T>
4305 class Clamp : public FloatFunc3<T>
4306 {
4307 public:
getName(void) const4308 	string	getName		(void) const { return "clamp"; }
4309 
applyExact(double x,double minVal,double maxVal) const4310 	double	applyExact	(double x, double minVal, double maxVal) const
4311 	{
4312 		return de::min(de::max(x, minVal), maxVal);
4313 	}
4314 
precision(const EvalContext &,double,double,double minVal,double maxVal) const4315 	double	precision	(const EvalContext&, double, double, double minVal, double maxVal) const
4316 	{
4317 		return minVal > maxVal ? TCU_NAN : 0.0;
4318 	}
4319 };
4320 
clamp(const ExprP<deFloat16> & x,const ExprP<deFloat16> & minVal,const ExprP<deFloat16> & maxVal)4321 ExprP<deFloat16> clamp(const ExprP<deFloat16>& x, const ExprP<deFloat16>& minVal, const ExprP<deFloat16>& maxVal)
4322 {
4323 	return app<Clamp< Signature<deFloat16, deFloat16, deFloat16, deFloat16> > >(x, minVal, maxVal);
4324 }
4325 
clamp(const ExprP<float> & x,const ExprP<float> & minVal,const ExprP<float> & maxVal)4326 ExprP<float> clamp(const ExprP<float>& x, const ExprP<float>& minVal, const ExprP<float>& maxVal)
4327 {
4328 	return app<Clamp< Signature<float, float, float, float> > >(x, minVal, maxVal);
4329 }
4330 
clamp(const ExprP<double> & x,const ExprP<double> & minVal,const ExprP<double> & maxVal)4331 ExprP<double> clamp(const ExprP<double>& x, const ExprP<double>& minVal, const ExprP<double>& maxVal)
4332 {
4333 	return app<Clamp< Signature<double, double, double, double> > >(x, minVal, maxVal);
4334 }
4335 
4336 template <class T>
4337 class NanIfGreaterOrEqual : public FloatFunc2<T>
4338 {
4339 public:
getName(void) const4340 	string	getName		(void) const { return "nanIfGreaterOrEqual"; }
4341 
applyExact(double edge0,double edge1) const4342 	double	applyExact	(double edge0, double edge1) const
4343 	{
4344 		return (edge0 >= edge1) ? TCU_NAN : 0.0;
4345 	}
4346 
precision(const EvalContext &,double,double edge0,double edge1) const4347 	double	precision	(const EvalContext&, double, double edge0, double edge1) const
4348 	{
4349 		return (edge0 >= edge1) ? TCU_NAN : 0.0;
4350 	}
4351 };
4352 
nanIfGreaterOrEqual(const ExprP<deFloat16> & edge0,const ExprP<deFloat16> & edge1)4353 ExprP<deFloat16> nanIfGreaterOrEqual(const ExprP<deFloat16>& edge0, const ExprP<deFloat16>& edge1)
4354 {
4355 	return app<NanIfGreaterOrEqual< Signature<deFloat16, deFloat16, deFloat16> > >(edge0, edge1);
4356 }
4357 
nanIfGreaterOrEqual(const ExprP<float> & edge0,const ExprP<float> & edge1)4358 ExprP<float> nanIfGreaterOrEqual(const ExprP<float>& edge0, const ExprP<float>& edge1)
4359 {
4360 	return app<NanIfGreaterOrEqual< Signature<float, float, float> > >(edge0, edge1);
4361 }
4362 
nanIfGreaterOrEqual(const ExprP<double> & edge0,const ExprP<double> & edge1)4363 ExprP<double> nanIfGreaterOrEqual(const ExprP<double>& edge0, const ExprP<double>& edge1)
4364 {
4365 	return app<NanIfGreaterOrEqual< Signature<double, double, double> > >(edge0, edge1);
4366 }
4367 
4368 DEFINE_DERIVED_FLOAT3(Mix, mix, x, y, a, alternatives((x * (constant(1.0f) - a)) + y * a,
4369 													  x + (y - x) * a))
4370 
4371 DEFINE_DERIVED_FLOAT3_16BIT(Mix16Bit, mix, x, y, a, alternatives((x * (constant((deFloat16)FLOAT16_1_0) - a)) + y * a,
4372 													  x + (y - x) * a))
4373 
4374 DEFINE_DERIVED_DOUBLE3(Mix64Bit, mix, x, y, a, alternatives((x * (constant(1.0) - a)) + y * a,
4375 													  x + (y - x) * a))
4376 
step(double edge,double x)4377 static double step (double edge, double x)
4378 {
4379 	return x < edge ? 0.0 : 1.0;
4380 }
4381 
4382 template <class T>
Step(void)4383 class Step : public PreciseFunc2<T> { public: Step (void) : PreciseFunc2<T> ("step", step) {} };
4384 
4385 template <class T>
4386 class SmoothStep : public DerivedFunc<T>
4387 {
4388 public:
4389 	typedef typename SmoothStep<T>::ArgExprs	TArgExprs;
4390 	typedef typename SmoothStep<T>::Ret			TRet;
getName(void) const4391 	string		getName		(void) const
4392 	{
4393 		return "smoothstep";
4394 	}
4395 
4396 protected:
4397 
4398 	ExprP<TRet>	doExpand	(ExpandContext& ctx, const TArgExprs& args) const;
4399 };
4400 
4401 template<>
doExpand(ExpandContext & ctx,const SmoothStep<Signature<float,float,float,float>>::ArgExprs & args) const4402 ExprP<SmoothStep< Signature<float, float, float, float> >::Ret>	SmoothStep< Signature<float, float, float, float> >::doExpand (ExpandContext& ctx, const SmoothStep< Signature<float, float, float, float> >::ArgExprs& args) const
4403 {
4404 	const ExprP<float>&		edge0	= args.a;
4405 	const ExprP<float>&		edge1	= args.b;
4406 	const ExprP<float>&		x		= args.c;
4407 	const ExprP<float>		tExpr	= clamp((x - edge0) / (edge1 - edge0), constant(0.0f), constant(1.0f))
4408 									+ nanIfGreaterOrEqual(edge0, edge1); // force NaN (and non-analyzable result) for cases edge0 >= edge1
4409 	const ExprP<float>		t		= bindExpression("t", ctx, tExpr);
4410 
4411 	return (t * t * (constant(3.0f) - constant(2.0f) * t));
4412 }
4413 
4414 template<>
doExpand(ExpandContext & ctx,const TArgExprs & args) const4415 ExprP<SmoothStep< Signature<deFloat16, deFloat16, deFloat16, deFloat16> >::TRet>	SmoothStep< Signature<deFloat16, deFloat16, deFloat16, deFloat16> >::doExpand (ExpandContext& ctx, const TArgExprs& args) const
4416 {
4417 	const ExprP<deFloat16>&		edge0	= args.a;
4418 	const ExprP<deFloat16>&		edge1	= args.b;
4419 	const ExprP<deFloat16>&		x		= args.c;
4420 	const ExprP<deFloat16>		tExpr	= clamp(( x - edge0 ) / ( edge1 - edge0 ),
4421 											constant((deFloat16)FLOAT16_0_0), constant((deFloat16)FLOAT16_1_0))
4422 										+ nanIfGreaterOrEqual(edge0, edge1); // force NaN (and non-analyzable result) for cases edge0 >= edge1
4423 	const ExprP<deFloat16>		t		= bindExpression("t", ctx, tExpr);
4424 
4425 	return (t * t * (constant((deFloat16)FLOAT16_3_0) - constant((deFloat16)FLOAT16_2_0) * t));
4426 }
4427 
4428 template<>
doExpand(ExpandContext & ctx,const SmoothStep<Signature<double,double,double,double>>::ArgExprs & args) const4429 ExprP<SmoothStep< Signature<double, double, double, double> >::Ret>	SmoothStep< Signature<double, double, double, double> >::doExpand (ExpandContext& ctx, const SmoothStep< Signature<double, double, double, double> >::ArgExprs& args) const
4430 {
4431 	const ExprP<double>&	edge0	= args.a;
4432 	const ExprP<double>&	edge1	= args.b;
4433 	const ExprP<double>&	x		= args.c;
4434 	const ExprP<double>		tExpr	= clamp((x - edge0) / (edge1 - edge0), constant(0.0), constant(1.0))
4435 									+ nanIfGreaterOrEqual(edge0, edge1); // force NaN (and non-analyzable result) for cases edge0 >= edge1
4436 	const ExprP<double>		t		= bindExpression("t", ctx, tExpr);
4437 
4438 	return (t * t * (constant(3.0) - constant(2.0) * t));
4439 }
4440 
4441 //Signature<float, float, int>
4442 //Signature<float, deFloat16, int>
4443 //Signature<double, double, int>
4444 template <class T>
4445 class FrExp : public PrimitiveFunc<T>
4446 {
4447 public:
getName(void) const4448 	string	getName			(void) const
4449 	{
4450 		return "frexp";
4451 	}
4452 
4453 	typedef typename	FrExp::IRet		IRet;
4454 	typedef typename	FrExp::IArgs	IArgs;
4455 	typedef typename	FrExp::IArg0	IArg0;
4456 	typedef typename	FrExp::IArg1	IArg1;
4457 
4458 protected:
doApply(const EvalContext &,const IArgs & iargs) const4459 	IRet	doApply			(const EvalContext&, const IArgs& iargs) const
4460 	{
4461 		IRet			ret;
4462 		const IArg0&	x			= iargs.a;
4463 		IArg1&			exponent	= const_cast<IArg1&>(iargs.b);
4464 
4465 		if (x.hasNaN() || x.contains(TCU_INFINITY) || x.contains(-TCU_INFINITY))
4466 		{
4467 			// GLSL (in contrast to IEEE) says that result of applying frexp
4468 			// to infinity is undefined
4469 			ret = Interval::unbounded() | TCU_NAN;
4470 			exponent = Interval(-deLdExp(1.0, 31), deLdExp(1.0, 31)-1);
4471 		}
4472 		else if (!x.empty())
4473 		{
4474 			int				loExp	= 0;
4475 			const double	loFrac	= deFrExp(x.lo(), &loExp);
4476 			int				hiExp	= 0;
4477 			const double	hiFrac	= deFrExp(x.hi(), &hiExp);
4478 
4479 			if (deSign(loFrac) != deSign(hiFrac))
4480 			{
4481 				exponent = Interval(-TCU_INFINITY, de::max(loExp, hiExp));
4482 				ret = Interval();
4483 				if (deSign(loFrac) < 0)
4484 					ret |= Interval(-1.0 + DBL_EPSILON*0.5, 0.0);
4485 				if (deSign(hiFrac) > 0)
4486 					ret |= Interval(0.0, 1.0 - DBL_EPSILON*0.5);
4487 			}
4488 			else
4489 			{
4490 				exponent = Interval(loExp, hiExp);
4491 				if (loExp == hiExp)
4492 					ret = Interval(loFrac, hiFrac);
4493 				else
4494 					ret = deSign(loFrac) * Interval(0.5, 1.0 - DBL_EPSILON*0.5);
4495 			}
4496 		}
4497 
4498 		return ret;
4499 	}
4500 
getOutParamIndex(void) const4501 	int	getOutParamIndex	(void) const
4502 	{
4503 		return 1;
4504 	}
4505 };
4506 typedef FrExp< Signature<float, float, int> >			Frexp32Bit;
4507 typedef FrExp< Signature<deFloat16, deFloat16, int> >	Frexp16Bit;
4508 typedef FrExp< Signature<double, double, int> >			Frexp64Bit;
4509 
4510 template <class T>
4511 class FrexpStruct : public FrExp<T>
4512 {
4513 public:
getName(void) const4514 	virtual string		getName			(void) const	{ return "frexpstruct"; }
getSpirvCase(void) const4515 	virtual SpirVCaseT	getSpirvCase	(void) const	{ return SPIRV_CASETYPE_FREXPSTRUCT; }
4516 };
4517 typedef FrexpStruct< Signature<float, float, int> >				FrexpStruct32Bit;
4518 typedef FrexpStruct< Signature<deFloat16, deFloat16, int> >		FrexpStruct16Bit;
4519 typedef FrexpStruct< Signature<double, double, int> >			FrexpStruct64Bit;
4520 
4521 //Signature<float, float, int>
4522 //Signature<deFloat16, deFloat16, int>
4523 //Signature<double, double, int>
4524 template <class T>
4525 class LdExp : public PrimitiveFunc<T >
4526 {
4527 public:
4528 	typedef typename	LdExp::IRet		IRet;
4529 	typedef typename	LdExp::IArgs	IArgs;
4530 
getName(void) const4531 	string		getName			(void) const
4532 	{
4533 		return "ldexp";
4534 	}
4535 
4536 protected:
doApply(const EvalContext & ctx,const IArgs & iargs) const4537 	Interval	doApply			(const EvalContext& ctx, const IArgs& iargs) const
4538 	{
4539 		const int minExp = ctx.format.getMinExp();
4540 		const int maxExp = ctx.format.getMaxExp();
4541 		// Restrictions from the GLSL.std.450 instruction set.
4542 		// See Khronos bugzilla 11180 for rationale.
4543 		bool any = iargs.a.hasNaN() || iargs.b.hi() > (maxExp + 1);
4544 		Interval ret(any, ldexp(iargs.a.lo(), (int)iargs.b.lo()), ldexp(iargs.a.hi(), (int)iargs.b.hi()));
4545 		if (iargs.b.lo() < minExp) ret |= 0.0;
4546 		if (!ret.isFinite(ctx.format.getMaxValue())) ret |= TCU_NAN;
4547 		return ctx.format.convert(ret);
4548 	}
4549 };
4550 
4551 template <>
doApply(const EvalContext & ctx,const IArgs & iargs) const4552 Interval LdExp <Signature<double, double, int>>::doApply(const EvalContext& ctx, const IArgs& iargs) const
4553 {
4554 	const int minExp = ctx.format.getMinExp();
4555 	const int maxExp = ctx.format.getMaxExp();
4556 	// Restrictions from the GLSL.std.450 instruction set.
4557 	// See Khronos bugzilla 11180 for rationale.
4558 	bool any = iargs.a.hasNaN() || iargs.b.hi() > (maxExp + 1);
4559 	Interval ret(any, ldexp(iargs.a.lo(), (int)iargs.b.lo()), ldexp(iargs.a.hi(), (int)iargs.b.hi()));
4560 	// Add 1ULP precision tolerance to account for differing rounding modes between the GPU and deLdExp.
4561 	ret += Interval(-ctx.format.ulp(ret.lo()), ctx.format.ulp(ret.hi()));
4562 	if (iargs.b.lo() < minExp) ret |= 0.0;
4563 	if (!ret.isFinite(ctx.format.getMaxValue())) ret |= TCU_NAN;
4564 	return ctx.format.convert(ret);
4565 }
4566 
4567 template<int Rows, int Columns, class T>
4568 class Transpose : public PrimitiveFunc<Signature<Matrix<T, Rows, Columns>,
4569 												 Matrix<T, Columns, Rows> > >
4570 {
4571 public:
4572 	typedef typename Transpose::IRet	IRet;
4573 	typedef typename Transpose::IArgs	IArgs;
4574 
getName(void) const4575 	string		getName		(void) const
4576 	{
4577 		return "transpose";
4578 	}
4579 
4580 protected:
doApply(const EvalContext &,const IArgs & iargs) const4581 	IRet		doApply		(const EvalContext&, const IArgs& iargs) const
4582 	{
4583 		IRet ret;
4584 
4585 		for (int rowNdx = 0; rowNdx < Rows; ++rowNdx)
4586 		{
4587 			for (int colNdx = 0; colNdx < Columns; ++colNdx)
4588 				ret(rowNdx, colNdx) = iargs.a(colNdx, rowNdx);
4589 		}
4590 
4591 		return ret;
4592 	}
4593 };
4594 
4595 template<typename Ret, typename Arg0, typename Arg1>
4596 class MulFunc : public PrimitiveFunc<Signature<Ret, Arg0, Arg1> >
4597 {
4598 public:
getName(void) const4599 	string	getName	(void) const									{ return "mul"; }
4600 
4601 protected:
doPrint(ostream & os,const BaseArgExprs & args) const4602 	void	doPrint	(ostream& os, const BaseArgExprs& args) const
4603 	{
4604 		os << "(" << *args[0] << " * " << *args[1] << ")";
4605 	}
4606 };
4607 
4608 template<typename T, int LeftRows, int Middle, int RightCols>
4609 class MatMul : public MulFunc<Matrix<T, LeftRows, RightCols>,
4610 							  Matrix<T, LeftRows, Middle>,
4611 							  Matrix<T, Middle, RightCols> >
4612 {
4613 protected:
4614 	typedef typename MatMul::IRet	IRet;
4615 	typedef typename MatMul::IArgs	IArgs;
4616 	typedef typename MatMul::IArg0	IArg0;
4617 	typedef typename MatMul::IArg1	IArg1;
4618 
doApply(const EvalContext & ctx,const IArgs & iargs) const4619 	IRet	doApply	(const EvalContext&	ctx, const IArgs& iargs) const
4620 	{
4621 		const IArg0&	left	= iargs.a;
4622 		const IArg1&	right	= iargs.b;
4623 		IRet			ret;
4624 
4625 		for (int row = 0; row < LeftRows; ++row)
4626 		{
4627 			for (int col = 0; col < RightCols; ++col)
4628 			{
4629 				Interval	element	(0.0);
4630 
4631 				for (int ndx = 0; ndx < Middle; ++ndx)
4632 					element = call<Add< Signature<T, T, T> > >(ctx, element,
4633 										call<Mul< Signature<T, T, T> > >(ctx, left[ndx][row], right[col][ndx]));
4634 
4635 				ret[col][row] = element;
4636 			}
4637 		}
4638 
4639 		return ret;
4640 	}
4641 };
4642 
4643 template<typename T, int Rows, int Cols>
4644 class VecMatMul : public MulFunc<Vector<T, Cols>,
4645 								 Vector<T, Rows>,
4646 								 Matrix<T, Rows, Cols> >
4647 {
4648 public:
4649 	typedef typename VecMatMul::IRet	IRet;
4650 	typedef typename VecMatMul::IArgs	IArgs;
4651 	typedef typename VecMatMul::IArg0	IArg0;
4652 	typedef typename VecMatMul::IArg1	IArg1;
4653 
4654 protected:
doApply(const EvalContext & ctx,const IArgs & iargs) const4655 	IRet	doApply	(const EvalContext& ctx, const IArgs& iargs) const
4656 	{
4657 		const IArg0&	left	= iargs.a;
4658 		const IArg1&	right	= iargs.b;
4659 		IRet			ret;
4660 
4661 		for (int col = 0; col < Cols; ++col)
4662 		{
4663 			Interval	element	(0.0);
4664 
4665 			for (int row = 0; row < Rows; ++row)
4666 				element = call<Add< Signature<T, T, T> > >(ctx, element, call<Mul< Signature<T, T, T> > >(ctx, left[row], right[col][row]));
4667 
4668 			ret[col] = element;
4669 		}
4670 
4671 		return ret;
4672 	}
4673 };
4674 
4675 template<int Rows, int Cols, class T>
4676 class MatVecMul : public MulFunc<Vector<T, Rows>,
4677 								 Matrix<T, Rows, Cols>,
4678 								 Vector<T, Cols> >
4679 {
4680 public:
4681 	typedef typename MatVecMul::IRet	IRet;
4682 	typedef typename MatVecMul::IArgs	IArgs;
4683 	typedef typename MatVecMul::IArg0	IArg0;
4684 	typedef typename MatVecMul::IArg1	IArg1;
4685 
4686 protected:
doApply(const EvalContext & ctx,const IArgs & iargs) const4687 	IRet	doApply	(const EvalContext& ctx, const IArgs& iargs) const
4688 	{
4689 		const IArg0&	left	= iargs.a;
4690 		const IArg1&	right	= iargs.b;
4691 
4692 		return call<VecMatMul<T, Cols, Rows> >(ctx, right,
4693 											call<Transpose<Rows, Cols, T> >(ctx, left));
4694 	}
4695 };
4696 
4697 template<int Rows, int Cols, class T>
4698 class OuterProduct : public PrimitiveFunc<Signature<Matrix<T, Rows, Cols>,
4699 													Vector<T, Rows>,
4700 													Vector<T, Cols> > >
4701 {
4702 public:
4703 	typedef typename OuterProduct::IRet		IRet;
4704 	typedef typename OuterProduct::IArgs	IArgs;
4705 
getName(void) const4706 	string	getName	(void) const
4707 	{
4708 		return "outerProduct";
4709 	}
4710 
4711 protected:
doApply(const EvalContext & ctx,const IArgs & iargs) const4712 	IRet	doApply	(const EvalContext& ctx, const IArgs& iargs) const
4713 	{
4714 		IRet	ret;
4715 
4716 		for (int row = 0; row < Rows; ++row)
4717 		{
4718 			for (int col = 0; col < Cols; ++col)
4719 				ret[col][row] = call<Mul< Signature<T, T, T> > >(ctx, iargs.a[row], iargs.b[col]);
4720 		}
4721 
4722 		return ret;
4723 	}
4724 };
4725 
4726 template<int Rows, int Cols, class T>
outerProduct(const ExprP<Vector<T,Rows>> & left,const ExprP<Vector<T,Cols>> & right)4727 ExprP<Matrix<T, Rows, Cols> > outerProduct (const ExprP<Vector<T, Rows> >& left,
4728 												const ExprP<Vector<T, Cols> >& right)
4729 {
4730 	return app<OuterProduct<Rows, Cols, T> >(left, right);
4731 }
4732 
4733 template<class T>
4734 class DeterminantBase : public DerivedFunc<T>
4735 {
4736 public:
getName(void) const4737 	string	getName	(void) const { return "determinant"; }
4738 };
4739 
4740 template<int Size> class Determinant;
4741 template<int Size> class Determinant16bit;
4742 template<int Size> class Determinant64bit;
4743 
4744 template<int Size>
determinant(ExprP<Matrix<float,Size,Size>> mat)4745 ExprP<float> determinant (ExprP<Matrix<float, Size, Size> > mat)
4746 {
4747 	return app<Determinant<Size> >(mat);
4748 }
4749 
4750 template<int Size>
determinant(ExprP<Matrix<deFloat16,Size,Size>> mat)4751 ExprP<deFloat16> determinant (ExprP<Matrix<deFloat16, Size, Size> > mat)
4752 {
4753 	return app<Determinant16bit<Size> >(mat);
4754 }
4755 
4756 template<int Size>
determinant(ExprP<Matrix<double,Size,Size>> mat)4757 ExprP<double> determinant (ExprP<Matrix<double, Size, Size> > mat)
4758 {
4759 	return app<Determinant64bit<Size> >(mat);
4760 }
4761 
4762 template<>
4763 class Determinant<2> : public DeterminantBase<Signature<float, Matrix<float, 2, 2> > >
4764 {
4765 protected:
doExpand(ExpandContext &,const ArgExprs & args) const4766 	ExprP<Ret>	doExpand (ExpandContext&, const ArgExprs& args)	const
4767 	{
4768 		ExprP<Mat2>	mat	= args.a;
4769 
4770 		return mat[0][0] * mat[1][1] - mat[1][0] * mat[0][1];
4771 	}
4772 };
4773 
4774 template<>
4775 class Determinant<3> : public DeterminantBase<Signature<float, Matrix<float, 3, 3> > >
4776 {
4777 protected:
doExpand(ExpandContext &,const ArgExprs & args) const4778 	ExprP<Ret> doExpand (ExpandContext&, const ArgExprs& args) const
4779 	{
4780 		ExprP<Mat3>	mat	= args.a;
4781 
4782 		return (mat[0][0] * (mat[1][1] * mat[2][2] - mat[1][2] * mat[2][1]) +
4783 				mat[0][1] * (mat[1][2] * mat[2][0] - mat[1][0] * mat[2][2]) +
4784 				mat[0][2] * (mat[1][0] * mat[2][1] - mat[1][1] * mat[2][0]));
4785 	}
4786 };
4787 
4788 template<>
4789 class Determinant<4> : public DeterminantBase<Signature<float, Matrix<float, 4, 4> > >
4790 {
4791 protected:
doExpand(ExpandContext & ctx,const ArgExprs & args) const4792 	 ExprP<Ret>	doExpand	(ExpandContext& ctx, const ArgExprs& args) const
4793 	{
4794 		ExprP<Mat4>	mat	= args.a;
4795 		ExprP<Mat3>	minors[4];
4796 
4797 		for (int ndx = 0; ndx < 4; ++ndx)
4798 		{
4799 			ExprP<Vec4>		minorColumns[3];
4800 			ExprP<Vec3>		columns[3];
4801 
4802 			for (int col = 0; col < 3; ++col)
4803 				minorColumns[col] = mat[col < ndx ? col : col + 1];
4804 
4805 			for (int col = 0; col < 3; ++col)
4806 				columns[col] = vec3(minorColumns[0][col+1],
4807 									minorColumns[1][col+1],
4808 									minorColumns[2][col+1]);
4809 
4810 			minors[ndx] = bindExpression("minor", ctx,
4811 										 mat3(columns[0], columns[1], columns[2]));
4812 		}
4813 
4814 		return (mat[0][0] * determinant(minors[0]) -
4815 				mat[1][0] * determinant(minors[1]) +
4816 				mat[2][0] * determinant(minors[2]) -
4817 				mat[3][0] * determinant(minors[3]));
4818 	}
4819 };
4820 
4821 template<>
4822 class Determinant16bit<2> : public DeterminantBase<Signature<deFloat16, Matrix<deFloat16, 2, 2> > >
4823 {
4824 protected:
doExpand(ExpandContext &,const ArgExprs & args) const4825 	ExprP<Ret>	doExpand (ExpandContext&, const ArgExprs& args)	const
4826 	{
4827 		ExprP<Mat2_16b>	mat	= args.a;
4828 
4829 		return mat[0][0] * mat[1][1] - mat[1][0] * mat[0][1];
4830 	}
4831 };
4832 
4833 template<>
4834 class Determinant16bit<3> : public DeterminantBase<Signature<deFloat16, Matrix<deFloat16, 3, 3> > >
4835 {
4836 protected:
doExpand(ExpandContext &,const ArgExprs & args) const4837 	ExprP<Ret> doExpand(ExpandContext&, const ArgExprs& args) const
4838 	{
4839 		ExprP<Mat3_16b>	mat = args.a;
4840 
4841 		return (mat[0][0] * (mat[1][1] * mat[2][2] - mat[1][2] * mat[2][1]) +
4842 			mat[0][1] * (mat[1][2] * mat[2][0] - mat[1][0] * mat[2][2]) +
4843 			mat[0][2] * (mat[1][0] * mat[2][1] - mat[1][1] * mat[2][0]));
4844 	}
4845 };
4846 
4847 template<>
4848 class Determinant16bit<4> : public DeterminantBase<Signature<deFloat16, Matrix<deFloat16, 4, 4> > >
4849 {
4850 protected:
doExpand(ExpandContext & ctx,const ArgExprs & args) const4851 	ExprP<Ret>	doExpand(ExpandContext& ctx, const ArgExprs& args) const
4852 	{
4853 		ExprP<Mat4_16b>	mat = args.a;
4854 		ExprP<Mat3_16b>	minors[4];
4855 
4856 		for (int ndx = 0; ndx < 4; ++ndx)
4857 		{
4858 			ExprP<Vec4_16Bit>		minorColumns[3];
4859 			ExprP<Vec3_16Bit>		columns[3];
4860 
4861 			for (int col = 0; col < 3; ++col)
4862 				minorColumns[col] = mat[col < ndx ? col : col + 1];
4863 
4864 			for (int col = 0; col < 3; ++col)
4865 				columns[col] = vec3(minorColumns[0][col + 1],
4866 					minorColumns[1][col + 1],
4867 					minorColumns[2][col + 1]);
4868 
4869 			minors[ndx] = bindExpression("minor", ctx,
4870 				mat3(columns[0], columns[1], columns[2]));
4871 		}
4872 
4873 		return (mat[0][0] * determinant(minors[0]) -
4874 			mat[1][0] * determinant(minors[1]) +
4875 			mat[2][0] * determinant(minors[2]) -
4876 			mat[3][0] * determinant(minors[3]));
4877 	}
4878 };
4879 
4880 template<>
4881 class Determinant64bit<2> : public DeterminantBase<Signature<double, Matrix<double, 2, 2> > >
4882 {
4883 protected:
doExpand(ExpandContext &,const ArgExprs & args) const4884 	ExprP<Ret>	doExpand (ExpandContext&, const ArgExprs& args)	const
4885 	{
4886 		ExprP<Matrix2d>	mat	= args.a;
4887 
4888 		return mat[0][0] * mat[1][1] - mat[1][0] * mat[0][1];
4889 	}
4890 };
4891 
4892 template<>
4893 class Determinant64bit<3> : public DeterminantBase<Signature<double, Matrix<double, 3, 3> > >
4894 {
4895 protected:
doExpand(ExpandContext &,const ArgExprs & args) const4896 	ExprP<Ret> doExpand(ExpandContext&, const ArgExprs& args) const
4897 	{
4898 		ExprP<Matrix3d>	mat = args.a;
4899 
4900 		return (mat[0][0] * (mat[1][1] * mat[2][2] - mat[1][2] * mat[2][1]) +
4901 			mat[0][1] * (mat[1][2] * mat[2][0] - mat[1][0] * mat[2][2]) +
4902 			mat[0][2] * (mat[1][0] * mat[2][1] - mat[1][1] * mat[2][0]));
4903 	}
4904 };
4905 
4906 template<>
4907 class Determinant64bit<4> : public DeterminantBase<Signature<double, Matrix<double, 4, 4> > >
4908 {
4909 protected:
doExpand(ExpandContext & ctx,const ArgExprs & args) const4910 	ExprP<Ret>	doExpand(ExpandContext& ctx, const ArgExprs& args) const
4911 	{
4912 		ExprP<Matrix4d>	mat = args.a;
4913 		ExprP<Matrix3d>	minors[4];
4914 
4915 		for (int ndx = 0; ndx < 4; ++ndx)
4916 		{
4917 			ExprP<Vec4_64Bit>		minorColumns[3];
4918 			ExprP<Vec3_64Bit>		columns[3];
4919 
4920 			for (int col = 0; col < 3; ++col)
4921 				minorColumns[col] = mat[col < ndx ? col : col + 1];
4922 
4923 			for (int col = 0; col < 3; ++col)
4924 				columns[col] = vec3(minorColumns[0][col + 1],
4925 					minorColumns[1][col + 1],
4926 					minorColumns[2][col + 1]);
4927 
4928 			minors[ndx] = bindExpression("minor", ctx,
4929 				mat3(columns[0], columns[1], columns[2]));
4930 		}
4931 
4932 		return (mat[0][0] * determinant(minors[0]) -
4933 			mat[1][0] * determinant(minors[1]) +
4934 			mat[2][0] * determinant(minors[2]) -
4935 			mat[3][0] * determinant(minors[3]));
4936 	}
4937 };
4938 
4939 template<int Size> class Inverse;
4940 
4941 template <int Size>
inverse(ExprP<Matrix<float,Size,Size>> mat)4942 ExprP<Matrix<float, Size, Size> > inverse (ExprP<Matrix<float, Size, Size> > mat)
4943 {
4944 	return app<Inverse<Size> >(mat);
4945 }
4946 
4947 template<int Size> class Inverse16bit;
4948 
4949 template <int Size>
inverse(ExprP<Matrix<deFloat16,Size,Size>> mat)4950 ExprP<Matrix<deFloat16, Size, Size> > inverse (ExprP<Matrix<deFloat16, Size, Size> > mat)
4951 {
4952 	return app<Inverse16bit<Size> >(mat);
4953 }
4954 
4955 template<int Size> class Inverse64bit;
4956 
4957 template <int Size>
inverse(ExprP<Matrix<double,Size,Size>> mat)4958 ExprP<Matrix<double, Size, Size> > inverse (ExprP<Matrix<double, Size, Size> > mat)
4959 {
4960 	return app<Inverse64bit<Size> >(mat);
4961 }
4962 
4963 template<>
4964 class Inverse<2> : public DerivedFunc<Signature<Mat2, Mat2> >
4965 {
4966 public:
getName(void) const4967 	string		getName	(void) const
4968 	{
4969 		return "inverse";
4970 	}
4971 
4972 protected:
doExpand(ExpandContext & ctx,const ArgExprs & args) const4973 	ExprP<Ret>	doExpand (ExpandContext& ctx, const ArgExprs& args) const
4974 	{
4975 		ExprP<Mat2>		mat = args.a;
4976 		ExprP<float>	det	= bindExpression("det", ctx, determinant(mat));
4977 
4978 		return mat2(vec2(mat[1][1] / det, -mat[0][1] / det),
4979 					vec2(-mat[1][0] / det, mat[0][0] / det));
4980 	}
4981 };
4982 
4983 template<>
4984 class Inverse<3> : public DerivedFunc<Signature<Mat3, Mat3> >
4985 {
4986 public:
getName(void) const4987 	string		getName		(void) const
4988 	{
4989 		return "inverse";
4990 	}
4991 
4992 protected:
doExpand(ExpandContext & ctx,const ArgExprs & args) const4993 	ExprP<Ret>	doExpand	(ExpandContext& ctx, const ArgExprs& args)			const
4994 	{
4995 		ExprP<Mat3>		mat		= args.a;
4996 		ExprP<Mat2>		invA	= bindExpression("invA", ctx,
4997 												 inverse(mat2(vec2(mat[0][0], mat[0][1]),
4998 															  vec2(mat[1][0], mat[1][1]))));
4999 
5000 		ExprP<Vec2>		matB	= bindExpression("matB", ctx, vec2(mat[2][0], mat[2][1]));
5001 		ExprP<Vec2>		matC	= bindExpression("matC", ctx, vec2(mat[0][2], mat[1][2]));
5002 		ExprP<float>	matD	= bindExpression("matD", ctx, mat[2][2]);
5003 
5004 		ExprP<float>	schur	= bindExpression("schur", ctx,
5005 												 constant(1.0f) /
5006 												 (matD - dot(matC * invA, matB)));
5007 
5008 		ExprP<Vec2>		t1		= invA * matB;
5009 		ExprP<Vec2>		t2		= t1 * schur;
5010 		ExprP<Mat2>		t3		= outerProduct(t2, matC);
5011 		ExprP<Mat2>		t4		= t3 * invA;
5012 		ExprP<Mat2>		t5		= invA + t4;
5013 		ExprP<Mat2>		blockA	= bindExpression("blockA", ctx, t5);
5014 		ExprP<Vec2>		blockB	= bindExpression("blockB", ctx,
5015 												 (invA * matB) * -schur);
5016 		ExprP<Vec2>		blockC	= bindExpression("blockC", ctx,
5017 												 (matC * invA) * -schur);
5018 
5019 		return mat3(vec3(blockA[0][0], blockA[0][1], blockC[0]),
5020 					vec3(blockA[1][0], blockA[1][1], blockC[1]),
5021 					vec3(blockB[0], blockB[1], schur));
5022 	}
5023 };
5024 
5025 template<>
5026 class Inverse<4> : public DerivedFunc<Signature<Mat4, Mat4> >
5027 {
5028 public:
getName(void) const5029 	string		getName		(void) const { return "inverse"; }
5030 
5031 protected:
doExpand(ExpandContext & ctx,const ArgExprs & args) const5032 	ExprP<Ret>			doExpand			(ExpandContext&		ctx,
5033 											 const ArgExprs&	args)			const
5034 	{
5035 		ExprP<Mat4>	mat		= args.a;
5036 		ExprP<Mat2>	invA	= bindExpression("invA", ctx,
5037 											 inverse(mat2(vec2(mat[0][0], mat[0][1]),
5038 														  vec2(mat[1][0], mat[1][1]))));
5039 		ExprP<Mat2>	matB	= bindExpression("matB", ctx,
5040 											 mat2(vec2(mat[2][0], mat[2][1]),
5041 												  vec2(mat[3][0], mat[3][1])));
5042 		ExprP<Mat2>	matC	= bindExpression("matC", ctx,
5043 											 mat2(vec2(mat[0][2], mat[0][3]),
5044 												  vec2(mat[1][2], mat[1][3])));
5045 		ExprP<Mat2>	matD	= bindExpression("matD", ctx,
5046 											 mat2(vec2(mat[2][2], mat[2][3]),
5047 												  vec2(mat[3][2], mat[3][3])));
5048 		ExprP<Mat2>	schur	= bindExpression("schur", ctx,
5049 											 inverse(matD + -(matC * invA * matB)));
5050 		ExprP<Mat2>	blockA	= bindExpression("blockA", ctx,
5051 											 invA + (invA * matB * schur * matC * invA));
5052 		ExprP<Mat2>	blockB	= bindExpression("blockB", ctx,
5053 											 (-invA) * matB * schur);
5054 		ExprP<Mat2>	blockC	= bindExpression("blockC", ctx,
5055 											 (-schur) * matC * invA);
5056 
5057 		return mat4(vec4(blockA[0][0], blockA[0][1], blockC[0][0], blockC[0][1]),
5058 					vec4(blockA[1][0], blockA[1][1], blockC[1][0], blockC[1][1]),
5059 					vec4(blockB[0][0], blockB[0][1], schur[0][0], schur[0][1]),
5060 					vec4(blockB[1][0], blockB[1][1], schur[1][0], schur[1][1]));
5061 	}
5062 };
5063 
5064 template<>
5065 class Inverse16bit<2> : public DerivedFunc<Signature<Mat2_16b, Mat2_16b> >
5066 {
5067 public:
getName(void) const5068 	string		getName	(void) const
5069 	{
5070 		return "inverse";
5071 	}
5072 
5073 protected:
doExpand(ExpandContext & ctx,const ArgExprs & args) const5074 	ExprP<Ret>	doExpand (ExpandContext& ctx, const ArgExprs& args) const
5075 	{
5076 		ExprP<Mat2_16b>		mat = args.a;
5077 		ExprP<deFloat16>	det	= bindExpression("det", ctx, determinant(mat));
5078 
5079 		return mat2(vec2((mat[1][1] / det), (-mat[0][1] / det)),
5080 					vec2((-mat[1][0] / det), (mat[0][0] / det)));
5081 	}
5082 };
5083 
5084 template<>
5085 class Inverse16bit<3> : public DerivedFunc<Signature<Mat3_16b, Mat3_16b> >
5086 {
5087 public:
getName(void) const5088 	string		getName(void) const
5089 	{
5090 		return "inverse";
5091 	}
5092 
5093 protected:
doExpand(ExpandContext & ctx,const ArgExprs & args) const5094 	ExprP<Ret>	doExpand(ExpandContext& ctx, const ArgExprs& args)			const
5095 	{
5096 		ExprP<Mat3_16b>		mat = args.a;
5097 		ExprP<Mat2_16b>		invA = bindExpression("invA", ctx,
5098 			inverse(mat2(vec2(mat[0][0], mat[0][1]),
5099 				vec2(mat[1][0], mat[1][1]))));
5100 
5101 		ExprP<Vec2_16Bit>		matB = bindExpression("matB", ctx, vec2(mat[2][0], mat[2][1]));
5102 		ExprP<Vec2_16Bit>		matC = bindExpression("matC", ctx, vec2(mat[0][2], mat[1][2]));
5103 		ExprP<Mat3_16b::Scalar>	matD = bindExpression("matD", ctx, mat[2][2]);
5104 
5105 		ExprP<Mat3_16b::Scalar>	schur = bindExpression("schur", ctx,
5106 			constant((deFloat16)FLOAT16_1_0) /
5107 			(matD - dot(matC * invA, matB)));
5108 
5109 		ExprP<Vec2_16Bit>		t1 = invA * matB;
5110 		ExprP<Vec2_16Bit>		t2 = t1 * schur;
5111 		ExprP<Mat2_16b>		t3 = outerProduct(t2, matC);
5112 		ExprP<Mat2_16b>		t4 = t3 * invA;
5113 		ExprP<Mat2_16b>		t5 = invA + t4;
5114 		ExprP<Mat2_16b>		blockA = bindExpression("blockA", ctx, t5);
5115 		ExprP<Vec2_16Bit>		blockB = bindExpression("blockB", ctx,
5116 			(invA * matB) * -schur);
5117 		ExprP<Vec2_16Bit>		blockC = bindExpression("blockC", ctx,
5118 			(matC * invA) * -schur);
5119 
5120 		return mat3(vec3(blockA[0][0], blockA[0][1], blockC[0]),
5121 			vec3(blockA[1][0], blockA[1][1], blockC[1]),
5122 			vec3(blockB[0], blockB[1], schur));
5123 	}
5124 };
5125 
5126 template<>
5127 class Inverse16bit<4> : public DerivedFunc<Signature<Mat4_16b, Mat4_16b> >
5128 {
5129 public:
getName(void) const5130 	string		getName(void) const { return "inverse"; }
5131 
5132 protected:
doExpand(ExpandContext & ctx,const ArgExprs & args) const5133 	ExprP<Ret>			doExpand(ExpandContext&		ctx,
5134 		const ArgExprs&	args)			const
5135 	{
5136 		ExprP<Mat4_16b>	mat = args.a;
5137 		ExprP<Mat2_16b>	invA = bindExpression("invA", ctx,
5138 			inverse(mat2(vec2(mat[0][0], mat[0][1]),
5139 				vec2(mat[1][0], mat[1][1]))));
5140 		ExprP<Mat2_16b>	matB = bindExpression("matB", ctx,
5141 			mat2(vec2(mat[2][0], mat[2][1]),
5142 				vec2(mat[3][0], mat[3][1])));
5143 		ExprP<Mat2_16b>	matC = bindExpression("matC", ctx,
5144 			mat2(vec2(mat[0][2], mat[0][3]),
5145 				vec2(mat[1][2], mat[1][3])));
5146 		ExprP<Mat2_16b>	matD = bindExpression("matD", ctx,
5147 			mat2(vec2(mat[2][2], mat[2][3]),
5148 				vec2(mat[3][2], mat[3][3])));
5149 		ExprP<Mat2_16b>	schur = bindExpression("schur", ctx,
5150 			inverse(matD + -(matC * invA * matB)));
5151 		ExprP<Mat2_16b>	blockA = bindExpression("blockA", ctx,
5152 			invA + (invA * matB * schur * matC * invA));
5153 		ExprP<Mat2_16b>	blockB = bindExpression("blockB", ctx,
5154 			(-invA) * matB * schur);
5155 		ExprP<Mat2_16b>	blockC = bindExpression("blockC", ctx,
5156 			(-schur) * matC * invA);
5157 
5158 		return mat4(vec4(blockA[0][0], blockA[0][1], blockC[0][0], blockC[0][1]),
5159 			vec4(blockA[1][0], blockA[1][1], blockC[1][0], blockC[1][1]),
5160 			vec4(blockB[0][0], blockB[0][1], schur[0][0], schur[0][1]),
5161 			vec4(blockB[1][0], blockB[1][1], schur[1][0], schur[1][1]));
5162 	}
5163 };
5164 
5165 template<>
5166 class Inverse64bit<2> : public DerivedFunc<Signature<Matrix2d, Matrix2d> >
5167 {
5168 public:
getName(void) const5169 	string		getName	(void) const
5170 	{
5171 		return "inverse";
5172 	}
5173 
5174 protected:
doExpand(ExpandContext & ctx,const ArgExprs & args) const5175 	ExprP<Ret>	doExpand (ExpandContext& ctx, const ArgExprs& args) const
5176 	{
5177 		ExprP<Matrix2d>		mat = args.a;
5178 		ExprP<double>		det	= bindExpression("det", ctx, determinant(mat));
5179 
5180 		return mat2(vec2((mat[1][1] / det), (-mat[0][1] / det)),
5181 					vec2((-mat[1][0] / det), (mat[0][0] / det)));
5182 	}
5183 };
5184 
5185 template<>
5186 class Inverse64bit<3> : public DerivedFunc<Signature<Matrix3d, Matrix3d> >
5187 {
5188 public:
getName(void) const5189 	string		getName(void) const
5190 	{
5191 		return "inverse";
5192 	}
5193 
5194 protected:
doExpand(ExpandContext & ctx,const ArgExprs & args) const5195 	ExprP<Ret>	doExpand(ExpandContext& ctx, const ArgExprs& args)			const
5196 	{
5197 		ExprP<Matrix3d>		mat = args.a;
5198 		ExprP<Matrix2d>		invA = bindExpression("invA", ctx,
5199 			inverse(mat2(vec2(mat[0][0], mat[0][1]),
5200 				vec2(mat[1][0], mat[1][1]))));
5201 
5202 		ExprP<Vec2_64Bit>		matB = bindExpression("matB", ctx, vec2(mat[2][0], mat[2][1]));
5203 		ExprP<Vec2_64Bit>		matC = bindExpression("matC", ctx, vec2(mat[0][2], mat[1][2]));
5204 		ExprP<Matrix3d::Scalar>	matD = bindExpression("matD", ctx, mat[2][2]);
5205 
5206 		ExprP<Matrix3d::Scalar>	schur = bindExpression("schur", ctx,
5207 			constant(1.0) /
5208 			(matD - dot(matC * invA, matB)));
5209 
5210 		ExprP<Vec2_64Bit>		t1 = invA * matB;
5211 		ExprP<Vec2_64Bit>		t2 = t1 * schur;
5212 		ExprP<Matrix2d>			t3 = outerProduct(t2, matC);
5213 		ExprP<Matrix2d>			t4 = t3 * invA;
5214 		ExprP<Matrix2d>			t5 = invA + t4;
5215 		ExprP<Matrix2d>			blockA = bindExpression("blockA", ctx, t5);
5216 		ExprP<Vec2_64Bit>		blockB = bindExpression("blockB", ctx,
5217 			(invA * matB) * -schur);
5218 		ExprP<Vec2_64Bit>		blockC = bindExpression("blockC", ctx,
5219 			(matC * invA) * -schur);
5220 
5221 		return mat3(vec3(blockA[0][0], blockA[0][1], blockC[0]),
5222 			vec3(blockA[1][0], blockA[1][1], blockC[1]),
5223 			vec3(blockB[0], blockB[1], schur));
5224 	}
5225 };
5226 
5227 template<>
5228 class Inverse64bit<4> : public DerivedFunc<Signature<Matrix4d, Matrix4d> >
5229 {
5230 public:
getName(void) const5231 	string		getName(void) const { return "inverse"; }
5232 
5233 protected:
doExpand(ExpandContext & ctx,const ArgExprs & args) const5234 	ExprP<Ret>			doExpand(ExpandContext&		ctx,
5235 		const ArgExprs&	args)			const
5236 	{
5237 		ExprP<Matrix4d>	mat = args.a;
5238 		ExprP<Matrix2d>	invA = bindExpression("invA", ctx,
5239 			inverse(mat2(vec2(mat[0][0], mat[0][1]),
5240 				vec2(mat[1][0], mat[1][1]))));
5241 		ExprP<Matrix2d>	matB = bindExpression("matB", ctx,
5242 			mat2(vec2(mat[2][0], mat[2][1]),
5243 				vec2(mat[3][0], mat[3][1])));
5244 		ExprP<Matrix2d>	matC = bindExpression("matC", ctx,
5245 			mat2(vec2(mat[0][2], mat[0][3]),
5246 				vec2(mat[1][2], mat[1][3])));
5247 		ExprP<Matrix2d>	matD = bindExpression("matD", ctx,
5248 			mat2(vec2(mat[2][2], mat[2][3]),
5249 				vec2(mat[3][2], mat[3][3])));
5250 		ExprP<Matrix2d>	schur = bindExpression("schur", ctx,
5251 			inverse(matD + -(matC * invA * matB)));
5252 		ExprP<Matrix2d>	blockA = bindExpression("blockA", ctx,
5253 			invA + (invA * matB * schur * matC * invA));
5254 		ExprP<Matrix2d>	blockB = bindExpression("blockB", ctx,
5255 			(-invA) * matB * schur);
5256 		ExprP<Matrix2d>	blockC = bindExpression("blockC", ctx,
5257 			(-schur) * matC * invA);
5258 
5259 		return mat4(vec4(blockA[0][0], blockA[0][1], blockC[0][0], blockC[0][1]),
5260 			vec4(blockA[1][0], blockA[1][1], blockC[1][0], blockC[1][1]),
5261 			vec4(blockB[0][0], blockB[0][1], schur[0][0], schur[0][1]),
5262 			vec4(blockB[1][0], blockB[1][1], schur[1][0], schur[1][1]));
5263 	}
5264 };
5265 
5266 //Signature<float, float, float, float>
5267 //Signature<deFloat16, deFloat16, deFloat16, deFloat16>
5268 //Signature<double, double, double, double>
5269 template <class T>
5270 class Fma : public DerivedFunc<T>
5271 {
5272 public:
5273 	typedef typename	Fma::ArgExprs		ArgExprs;
5274 	typedef typename	Fma::Ret			Ret;
5275 
getName(void) const5276 	string			getName					(void) const
5277 	{
5278 		return "fma";
5279 	}
5280 
5281 protected:
doExpand(ExpandContext &,const ArgExprs & x) const5282 	ExprP<Ret>	doExpand				(ExpandContext&, const ArgExprs& x) const
5283 	{
5284 		return x.a * x.b + x.c;
5285 	}
5286 };
5287 
5288 } // Functions
5289 
5290 using namespace Functions;
5291 
5292 template <typename T>
operator [](int i) const5293 ExprP<typename T::Element> ContainerExprPBase<T>::operator[] (int i) const
5294 {
5295 	return Functions::getComponent(exprP<T>(*this), i);
5296 }
5297 
operator +(const ExprP<float> & arg0,const ExprP<float> & arg1)5298 ExprP<float> operator+ (const ExprP<float>& arg0, const ExprP<float>& arg1)
5299 {
5300 	return app<Add< Signature<float, float, float> > >(arg0, arg1);
5301 }
5302 
operator +(const ExprP<deFloat16> & arg0,const ExprP<deFloat16> & arg1)5303 ExprP<deFloat16> operator+ (const ExprP<deFloat16>& arg0, const ExprP<deFloat16>& arg1)
5304 {
5305 	return app<Add< Signature<deFloat16, deFloat16, deFloat16> > >(arg0, arg1);
5306 }
5307 
operator +(const ExprP<double> & arg0,const ExprP<double> & arg1)5308 ExprP<double> operator+ (const ExprP<double>& arg0, const ExprP<double>& arg1)
5309 {
5310 	return app<Add< Signature<double, double, double> > >(arg0, arg1);
5311 }
5312 
5313 template <typename T>
operator -(const ExprP<T> & arg0,const ExprP<T> & arg1)5314 ExprP<T> operator- (const ExprP<T>& arg0, const ExprP<T>& arg1)
5315 {
5316 	return app<Sub <Signature <T,T,T> > >(arg0, arg1);
5317 }
5318 
5319 template <typename T>
operator -(const ExprP<T> & arg0)5320 ExprP<T> operator- (const ExprP<T>& arg0)
5321 {
5322 	return app<Negate< Signature<T, T> > >(arg0);
5323 }
5324 
operator *(const ExprP<float> & arg0,const ExprP<float> & arg1)5325 ExprP<float> operator* (const ExprP<float>& arg0, const ExprP<float>& arg1)
5326 {
5327 	return app<Mul< Signature<float, float, float> > >(arg0, arg1);
5328 }
5329 
operator *(const ExprP<deFloat16> & arg0,const ExprP<deFloat16> & arg1)5330 ExprP<deFloat16> operator* (const ExprP<deFloat16>& arg0, const ExprP<deFloat16>& arg1)
5331 {
5332 	return app<Mul< Signature<deFloat16, deFloat16, deFloat16> > >(arg0, arg1);
5333 }
5334 
operator *(const ExprP<double> & arg0,const ExprP<double> & arg1)5335 ExprP<double> operator* (const ExprP<double>& arg0, const ExprP<double>& arg1)
5336 {
5337 	return app<Mul< Signature<double, double, double> > >(arg0, arg1);
5338 }
5339 
5340 template <typename T>
operator /(const ExprP<T> & arg0,const ExprP<T> & arg1)5341 ExprP<T> operator/ (const ExprP<T>& arg0, const ExprP<T>& arg1)
5342 {
5343 	return app<Div< Signature<T, T, T> > >(arg0, arg1);
5344 }
5345 
5346 
5347 template <typename Sig_, int Size>
5348 class GenFunc : public PrimitiveFunc<Signature<
5349 	typename ContainerOf<typename Sig_::Ret, Size>::Container,
5350 	typename ContainerOf<typename Sig_::Arg0, Size>::Container,
5351 	typename ContainerOf<typename Sig_::Arg1, Size>::Container,
5352 	typename ContainerOf<typename Sig_::Arg2, Size>::Container,
5353 	typename ContainerOf<typename Sig_::Arg3, Size>::Container> >
5354 {
5355 public:
5356 	typedef typename GenFunc::IArgs		IArgs;
5357 	typedef typename GenFunc::IRet		IRet;
5358 
GenFunc(const Func<Sig_> & scalarFunc)5359 				GenFunc					(const Func<Sig_>&	scalarFunc) : m_func (scalarFunc) {}
5360 
getSpirvCase(void) const5361 	SpirVCaseT	getSpirvCase			(void) const
5362 	{
5363 		return m_func.getSpirvCase();
5364 	}
5365 
getName(void) const5366 	string		getName					(void) const
5367 	{
5368 		return m_func.getName();
5369 	}
5370 
getOutParamIndex(void) const5371 	int			getOutParamIndex		(void) const
5372 	{
5373 		return m_func.getOutParamIndex();
5374 	}
5375 
getRequiredExtension(void) const5376 	string		getRequiredExtension	(void) const
5377 	{
5378 		return m_func.getRequiredExtension();
5379 	}
5380 
getInputRange(const bool is16bit) const5381 	Interval	getInputRange			(const bool is16bit) const
5382 	{
5383 		return m_func.getInputRange(is16bit);
5384 	}
5385 
5386 protected:
doPrint(ostream & os,const BaseArgExprs & args) const5387 	void		doPrint					(ostream& os, const BaseArgExprs& args) const
5388 	{
5389 		m_func.print(os, args);
5390 	}
5391 
doApply(const EvalContext & ctx,const IArgs & iargs) const5392 	IRet		doApply					(const EvalContext& ctx, const IArgs& iargs) const
5393 	{
5394 		IRet ret;
5395 
5396 		for (int ndx = 0; ndx < Size; ++ndx)
5397 		{
5398 			ret[ndx] =
5399 				m_func.apply(ctx, iargs.a[ndx], iargs.b[ndx], iargs.c[ndx], iargs.d[ndx]);
5400 		}
5401 
5402 		return ret;
5403 	}
5404 
doFail(const EvalContext & ctx,const IArgs & iargs) const5405 	IRet		doFail					(const EvalContext& ctx, const IArgs& iargs) const
5406 	{
5407 		IRet ret;
5408 
5409 		for (int ndx = 0; ndx < Size; ++ndx)
5410 		{
5411 			ret[ndx] =
5412 				m_func.fail(ctx, iargs.a[ndx], iargs.b[ndx], iargs.c[ndx], iargs.d[ndx]);
5413 		}
5414 
5415 		return ret;
5416 	}
5417 
doGetUsedFuncs(FuncSet & dst) const5418 	void		doGetUsedFuncs			(FuncSet& dst) const
5419 	{
5420 		m_func.getUsedFuncs(dst);
5421 	}
5422 
5423 	const Func<Sig_>&	m_func;
5424 };
5425 
5426 template <typename F, int Size>
5427 class VectorizedFunc : public GenFunc<typename F::Sig, Size>
5428 {
5429 public:
VectorizedFunc(void)5430 	VectorizedFunc	(void) : GenFunc<typename F::Sig, Size>(instance<F>()) {}
5431 };
5432 
5433 template <typename Sig_, int Size>
5434 class FixedGenFunc : public PrimitiveFunc <Signature<
5435 	typename ContainerOf<typename Sig_::Ret, Size>::Container,
5436 	typename ContainerOf<typename Sig_::Arg0, Size>::Container,
5437 	typename Sig_::Arg1,
5438 	typename ContainerOf<typename Sig_::Arg2, Size>::Container,
5439 	typename ContainerOf<typename Sig_::Arg3, Size>::Container> >
5440 {
5441 public:
5442 	typedef typename FixedGenFunc::IArgs		IArgs;
5443 	typedef typename FixedGenFunc::IRet			IRet;
5444 
getName(void) const5445 	string						getName			(void) const
5446 	{
5447 		return this->doGetScalarFunc().getName();
5448 	}
5449 
getSpirvCase(void) const5450 	SpirVCaseT					getSpirvCase	(void) const
5451 	{
5452 		return this->doGetScalarFunc().getSpirvCase();
5453 	}
5454 
5455 protected:
doPrint(ostream & os,const BaseArgExprs & args) const5456 	void						doPrint			(ostream& os, const BaseArgExprs& args) const
5457 	{
5458 		this->doGetScalarFunc().print(os, args);
5459 	}
5460 
doApply(const EvalContext & ctx,const IArgs & iargs) const5461 	IRet						doApply			(const EvalContext& ctx,
5462 												 const IArgs&		iargs) const
5463 	{
5464 		IRet				ret;
5465 		const Func<Sig_>&	func	= this->doGetScalarFunc();
5466 
5467 		for (int ndx = 0; ndx < Size; ++ndx)
5468 			ret[ndx] = func.apply(ctx, iargs.a[ndx], iargs.b, iargs.c[ndx], iargs.d[ndx]);
5469 
5470 		return ret;
5471 	}
5472 
5473 	virtual const Func<Sig_>&	doGetScalarFunc	(void) const = 0;
5474 };
5475 
5476 template <typename F, int Size>
5477 class FixedVecFunc : public FixedGenFunc<typename F::Sig, Size>
5478 {
5479 protected:
doGetScalarFunc(void) const5480 	const Func<typename F::Sig>& doGetScalarFunc	(void) const { return instance<F>(); }
5481 };
5482 
5483 template<typename Sig>
5484 struct GenFuncs
5485 {
GenFuncsvkt::shaderexecutor::GenFuncs5486 	GenFuncs (const Func<Sig>&			func_,
5487 			  const GenFunc<Sig, 2>&	func2_,
5488 			  const GenFunc<Sig, 3>&	func3_,
5489 			  const GenFunc<Sig, 4>&	func4_)
5490 		: func	(func_)
5491 		, func2	(func2_)
5492 		, func3	(func3_)
5493 		, func4	(func4_)
5494 	{}
5495 
5496 	const Func<Sig>&		func;
5497 	const GenFunc<Sig, 2>&	func2;
5498 	const GenFunc<Sig, 3>&	func3;
5499 	const GenFunc<Sig, 4>&	func4;
5500 };
5501 
5502 template<typename F>
makeVectorizedFuncs(void)5503 GenFuncs<typename F::Sig> makeVectorizedFuncs (void)
5504 {
5505 	return GenFuncs<typename F::Sig>(instance<F>(),
5506 									 instance<VectorizedFunc<F, 2> >(),
5507 									 instance<VectorizedFunc<F, 3> >(),
5508 									 instance<VectorizedFunc<F, 4> >());
5509 }
5510 
5511 template<typename T, int Size>
operator /(const ExprP<Vector<T,Size>> & arg0,const ExprP<T> & arg1)5512 ExprP<Vector<T, Size> > operator/(const ExprP<Vector<T, Size> >&	arg0,
5513 									  const ExprP<T>&					arg1)
5514 {
5515 	return app<FixedVecFunc<Div< Signature<T, T, T> >, Size> >(arg0, arg1);
5516 }
5517 
5518 template<typename T, int Size>
operator -(const ExprP<Vector<T,Size>> & arg0)5519 ExprP<Vector<T, Size> > operator-(const ExprP<Vector<T, Size> >& arg0)
5520 {
5521 	return app<VectorizedFunc<Negate< Signature<T, T> >, Size> >(arg0);
5522 }
5523 
5524 template<typename T, int Size>
operator -(const ExprP<Vector<T,Size>> & arg0,const ExprP<Vector<T,Size>> & arg1)5525 ExprP<Vector<T, Size> > operator-(const ExprP<Vector<T, Size> >& arg0,
5526 									  const ExprP<Vector<T, Size> >& arg1)
5527 {
5528 	return app<VectorizedFunc<Sub<Signature<T, T, T> >, Size> >(arg0, arg1);
5529 }
5530 
5531 template<int Size, typename T>
operator *(const ExprP<Vector<T,Size>> & arg0,const ExprP<T> & arg1)5532 ExprP<Vector<T, Size> > operator*(const ExprP<Vector<T, Size> >&	arg0,
5533 								  const ExprP<T>&					arg1)
5534 {
5535 	return app<FixedVecFunc<Mul< Signature<T, T, T> >, Size> >(arg0, arg1);
5536 }
5537 
5538 template<typename T, int Size>
operator *(const ExprP<Vector<T,Size>> & arg0,const ExprP<Vector<T,Size>> & arg1)5539 ExprP<Vector<T, Size> > operator*(const ExprP<Vector<T, Size> >& arg0,
5540 								  const ExprP<Vector<T, Size> >& arg1)
5541 {
5542 	return app<VectorizedFunc<Mul< Signature<T, T, T> >, Size> >(arg0, arg1);
5543 }
5544 
5545 template<int LeftRows, int Middle, int RightCols, typename T>
5546 ExprP<Matrix<T, LeftRows, RightCols> >
operator *(const ExprP<Matrix<T,LeftRows,Middle>> & left,const ExprP<Matrix<T,Middle,RightCols>> & right)5547 operator* (const ExprP<Matrix<T, LeftRows, Middle> >&	left,
5548 		   const ExprP<Matrix<T, Middle, RightCols> >&	right)
5549 {
5550 	return app<MatMul<T, LeftRows, Middle, RightCols> >(left, right);
5551 }
5552 
5553 template<int Rows, int Cols, typename T>
operator *(const ExprP<Vector<T,Cols>> & left,const ExprP<Matrix<T,Rows,Cols>> & right)5554 ExprP<Vector<T, Rows> > operator* (const ExprP<Vector<T, Cols> >&		left,
5555 								   const ExprP<Matrix<T, Rows, Cols> >&	right)
5556 {
5557 	return app<VecMatMul<T, Rows, Cols> >(left, right);
5558 }
5559 
5560 template<int Rows, int Cols, class T>
operator *(const ExprP<Matrix<T,Rows,Cols>> & left,const ExprP<Vector<T,Rows>> & right)5561 ExprP<Vector<T, Cols> > operator* (const ExprP<Matrix<T, Rows, Cols> >&	left,
5562 								   const ExprP<Vector<T, Rows> >&		right)
5563 {
5564 	return app<MatVecMul<Rows, Cols, T> >(left, right);
5565 }
5566 
5567 template<int Rows, int Cols, typename T>
operator *(const ExprP<Matrix<T,Rows,Cols>> & left,const ExprP<T> & right)5568 ExprP<Matrix<T, Rows, Cols> > operator* (const ExprP<Matrix<T, Rows, Cols> >&	left,
5569 										 const ExprP<T>&						right)
5570 {
5571 	return app<ScalarMatFunc<Mul< Signature<T, T, T> >, Rows, Cols> >(left, right);
5572 }
5573 
5574 template<int Rows, int Cols>
operator +(const ExprP<Matrix<float,Rows,Cols>> & left,const ExprP<Matrix<float,Rows,Cols>> & right)5575 ExprP<Matrix<float, Rows, Cols> > operator+ (const ExprP<Matrix<float, Rows, Cols> >&	left,
5576 											 const ExprP<Matrix<float, Rows, Cols> >&	right)
5577 {
5578 	return app<CompMatFunc<Add< Signature<float, float, float> >,float, Rows, Cols> >(left, right);
5579 }
5580 
5581 template<int Rows, int Cols>
operator +(const ExprP<Matrix<deFloat16,Rows,Cols>> & left,const ExprP<Matrix<deFloat16,Rows,Cols>> & right)5582 ExprP<Matrix<deFloat16, Rows, Cols> > operator+ (const ExprP<Matrix<deFloat16, Rows, Cols> >&	left,
5583 												 const ExprP<Matrix<deFloat16, Rows, Cols> >&	right)
5584 {
5585 	return app<CompMatFunc<Add< Signature<deFloat16, deFloat16, deFloat16> >, deFloat16, Rows, Cols> >(left, right);
5586 }
5587 
5588 template<int Rows, int Cols>
operator +(const ExprP<Matrix<double,Rows,Cols>> & left,const ExprP<Matrix<double,Rows,Cols>> & right)5589 ExprP<Matrix<double, Rows, Cols> > operator+ (const ExprP<Matrix<double, Rows, Cols> >&	left,
5590 											  const ExprP<Matrix<double, Rows, Cols> >&	right)
5591 {
5592 	return app<CompMatFunc<Add< Signature<double, double, double> >, double, Rows, Cols> >(left, right);
5593 }
5594 
5595 template<typename T, int Rows, int Cols>
operator -(const ExprP<Matrix<T,Rows,Cols>> & mat)5596 ExprP<Matrix<T, Rows, Cols> > operator- (const ExprP<Matrix<T, Rows, Cols> >&	mat)
5597 {
5598 	return app<MatNeg<T, Rows, Cols> >(mat);
5599 }
5600 
5601 template <typename T>
5602 class Sampling
5603 {
5604 public:
genFixeds(const FloatFormat &,const Precision,vector<T> &,const Interval &) const5605 	virtual void	genFixeds			(const FloatFormat&, const Precision, vector<T>&, const Interval&)	const {}
genRandom(const FloatFormat &,const Precision,Random &,const Interval &) const5606 	virtual T		genRandom			(const FloatFormat&,const Precision, Random&, const Interval&)		const { return T(); }
removeNotInRange(vector<T> &,const Interval &,const Precision) const5607 	virtual void	removeNotInRange	(vector<T>&, const Interval&, const Precision)						const {}
5608 };
5609 
5610 template <>
5611 class DefaultSampling<Void> : public Sampling<Void>
5612 {
5613 public:
genFixeds(const FloatFormat &,const Precision,vector<Void> & dst,const Interval &) const5614 	void	genFixeds	(const FloatFormat&, const Precision, vector<Void>& dst, const Interval&) const { dst.push_back(Void()); }
5615 };
5616 
5617 template <>
5618 class DefaultSampling<bool> : public Sampling<bool>
5619 {
5620 public:
genFixeds(const FloatFormat &,const Precision,vector<bool> & dst,const Interval &) const5621 	void	genFixeds	(const FloatFormat&, const Precision, vector<bool>& dst, const Interval&) const
5622 	{
5623 		dst.push_back(true);
5624 		dst.push_back(false);
5625 	}
5626 };
5627 
5628 template <>
5629 class DefaultSampling<int> : public Sampling<int>
5630 {
5631 public:
genRandom(const FloatFormat &,const Precision prec,Random & rnd,const Interval &) const5632 	int		genRandom	(const FloatFormat&, const Precision prec, Random& rnd, const Interval&) const
5633 	{
5634 		const int	exp		= rnd.getInt(0, getNumBits(prec)-2);
5635 		const int	sign	= rnd.getBool() ? -1 : 1;
5636 
5637 		return sign * rnd.getInt(0, (deInt32)1 << exp);
5638 	}
5639 
genFixeds(const FloatFormat &,const Precision,vector<int> & dst,const Interval &) const5640 	void	genFixeds	(const FloatFormat&, const Precision, vector<int>& dst, const Interval&) const
5641 	{
5642 		dst.push_back(0);
5643 		dst.push_back(-1);
5644 		dst.push_back(1);
5645 	}
5646 
5647 private:
getNumBits(Precision prec)5648 	static inline int getNumBits (Precision prec)
5649 	{
5650 		switch (prec)
5651 		{
5652 			case glu::PRECISION_LAST:
5653 			case glu::PRECISION_MEDIUMP:	return 16;
5654 			case glu::PRECISION_HIGHP:		return 32;
5655 			default:
5656 				DE_ASSERT(false);
5657 				return 0;
5658 		}
5659 	}
5660 };
5661 
5662 template <>
5663 class DefaultSampling<float> : public Sampling<float>
5664 {
5665 public:
5666 	float	genRandom			(const FloatFormat& format, const Precision prec, Random& rnd, const Interval& inputRange)			const;
5667 	void	genFixeds			(const FloatFormat& format, const Precision prec, vector<float>& dst, const Interval& inputRange)	const;
5668 	void	removeNotInRange	(vector<float>& dst, const Interval& inputRange, const Precision prec)								const;
5669 };
5670 
5671 template <>
5672 class DefaultSampling<double> : public Sampling<double>
5673 {
5674 public:
5675 	double	genRandom			(const FloatFormat& format, const Precision prec, Random& rnd, const Interval& inputRange)			const;
5676 	void	genFixeds			(const FloatFormat& format, const Precision prec, vector<double>& dst, const Interval& inputRange)	const;
5677 	void	removeNotInRange	(vector<double>& dst, const Interval& inputRange, const Precision prec)								const;
5678 };
5679 
isDenorm16(deFloat16 v)5680 static bool isDenorm16(deFloat16 v)
5681 {
5682 	const deUint16 mantissa = 0x03FF;
5683 	const deUint16 exponent = 0x7C00;
5684 	return ((exponent & v) == 0 && (mantissa & v) != 0);
5685 }
5686 
5687 //! Generate a random double from a reasonable general-purpose distribution.
randomDouble(const FloatFormat & format,Random & rnd,const Interval & inputRange)5688 double randomDouble(const FloatFormat& format, Random& rnd, const Interval& inputRange)
5689 {
5690 	// No testing of subnormals. TODO: Could integrate float controls for some operations.
5691 	const int		minExp			= format.getMinExp();
5692 	const int		maxExp			= format.getMaxExp();
5693 	const bool		haveSubnormal	= false;
5694 	const double	midpoint		= inputRange.midpoint();
5695 
5696 	// Choose exponent so that the cumulative distribution is cubic.
5697 	// This makes the probability distribution quadratic, with the peak centered on zero.
5698 	const double	minRoot			= deCbrt(minExp - 0.5 - (haveSubnormal ? 1.0 : 0.0));
5699 	const double	maxRoot			= deCbrt(maxExp + 0.5);
5700 	const int		fractionBits	= format.getFractionBits();
5701 	const int		exp				= int(deRoundEven(dePow(rnd.getDouble(minRoot, maxRoot), 3.0)));
5702 
5703 	// Generate some occasional special numbers
5704 	switch (rnd.getInt(0, 64))
5705 	{
5706 		case 0:		return inputRange.contains(0)				? 0				: midpoint;
5707 		case 1:		return inputRange.contains(TCU_INFINITY)	? TCU_INFINITY	: midpoint;
5708 		case 2:		return inputRange.contains(-TCU_INFINITY)	? -TCU_INFINITY	: midpoint;
5709 		case 3:		return inputRange.contains(TCU_NAN)			? TCU_NAN		: midpoint;
5710 		default:	break;
5711 	}
5712 
5713 	DE_ASSERT(fractionBits < std::numeric_limits<double>::digits);
5714 
5715 	// Normal number
5716 	double base = deLdExp(1.0, exp);
5717 	double quantum = deLdExp(1.0, exp - fractionBits); // smallest representable difference in the binade
5718 	double significand = 0.0;
5719 	switch (rnd.getInt(0, 16))
5720 	{
5721 		case 0: // The highest number in this binade, significand is all bits one.
5722 			significand = base - quantum;
5723 			break;
5724 		case 1: // Significand is one.
5725 			significand = quantum;
5726 			break;
5727 		case 2: // Significand is zero.
5728 			significand = 0.0;
5729 			break;
5730 		default: // Random (evenly distributed) significand.
5731 		{
5732 			deUint64 intFraction = rnd.getUint64() & ((1 << fractionBits) - 1);
5733 			significand = double(intFraction) * quantum;
5734 		}
5735 	}
5736 
5737 	// Produce positive numbers more often than negative.
5738 	double value = (rnd.getInt(0, 3) == 0 ? -1.0 : 1.0) * (base + significand);
5739 	return inputRange.contains(value) ? value : midpoint;
5740 }
5741 
5742 //! Generate a random float from a reasonable general-purpose distribution.
genRandom(const FloatFormat & format,Precision prec,Random & rnd,const Interval & inputRange) const5743 float DefaultSampling<float>::genRandom (const FloatFormat&	format,
5744 										 Precision			prec,
5745 										 Random&			rnd,
5746 										 const Interval&	inputRange) const
5747 {
5748 	DE_UNREF(prec);
5749 	return (float)randomDouble(format, rnd, inputRange);
5750 }
5751 
5752 //! Generate a standard set of floats that should always be tested.
genFixeds(const FloatFormat & format,const Precision prec,vector<float> & dst,const Interval & inputRange) const5753 void DefaultSampling<float>::genFixeds (const FloatFormat& format, const Precision prec, vector<float>& dst, const Interval& inputRange) const
5754 {
5755 	const int			minExp			= format.getMinExp();
5756 	const int			maxExp			= format.getMaxExp();
5757 	const int			fractionBits	= format.getFractionBits();
5758 	const float			minQuantum		= deFloatLdExp(1.0f, minExp - fractionBits);
5759 	const float			minNormalized	= deFloatLdExp(1.0f, minExp);
5760 	const float			maxQuantum		= deFloatLdExp(1.0f, maxExp - fractionBits);
5761 
5762 	// NaN
5763 	dst.push_back(TCU_NAN);
5764 	// Zero
5765 	dst.push_back(0.0f);
5766 
5767 	for (int sign = -1; sign <= 1; sign += 2)
5768 	{
5769 		// Smallest normalized
5770 		dst.push_back((float)sign * minNormalized);
5771 
5772 		// Next smallest normalized
5773 		dst.push_back((float)sign * (minNormalized + minQuantum));
5774 
5775 		dst.push_back((float)sign * 0.5f);
5776 		dst.push_back((float)sign * 1.0f);
5777 		dst.push_back((float)sign * 2.0f);
5778 
5779 		// Largest number
5780 		dst.push_back((float)sign * (deFloatLdExp(1.0f, maxExp) +
5781 									(deFloatLdExp(1.0f, maxExp) - maxQuantum)));
5782 
5783 		dst.push_back((float)sign * TCU_INFINITY);
5784 	}
5785 	removeNotInRange(dst, inputRange, prec);
5786 }
5787 
removeNotInRange(vector<float> & dst,const Interval & inputRange,const Precision prec) const5788 void DefaultSampling<float>::removeNotInRange (vector<float>& dst, const Interval& inputRange, const Precision prec) const
5789 {
5790 	for (vector<float>::iterator it = dst.begin(); it < dst.end();)
5791 	{
5792 		// Remove out of range values. PRECISION_LAST means this is an FP16 test so remove any values that
5793 		// will be denorms when converted to FP16. (This is used in the precision_fp16_storage32b test group).
5794 		if ( !inputRange.contains(static_cast<double>(*it)) || (prec == glu::PRECISION_LAST && isDenorm16(deFloat32To16Round(*it, DE_ROUNDINGMODE_TO_ZERO))))
5795 			it = dst.erase(it);
5796 		else
5797 			++it;
5798 	}
5799 }
5800 
5801 //! Generate a random double from a reasonable general-purpose distribution.
genRandom(const FloatFormat & format,Precision prec,Random & rnd,const Interval & inputRange) const5802 double DefaultSampling<double>::genRandom (const FloatFormat&	format,
5803 										   Precision			prec,
5804 										   Random&				rnd,
5805 										   const Interval&		inputRange) const
5806 {
5807 	DE_UNREF(prec);
5808 	return randomDouble(format, rnd, inputRange);
5809 }
5810 
5811 //! Generate a standard set of floats that should always be tested.
genFixeds(const FloatFormat & format,const Precision prec,vector<double> & dst,const Interval & inputRange) const5812 void DefaultSampling<double>::genFixeds (const FloatFormat& format, const Precision prec, vector<double>& dst, const Interval& inputRange) const
5813 {
5814 	const int			minExp			= format.getMinExp();
5815 	const int			maxExp			= format.getMaxExp();
5816 	const int			fractionBits	= format.getFractionBits();
5817 	const double		minQuantum		= deLdExp(1.0, minExp - fractionBits);
5818 	const double		minNormalized	= deLdExp(1.0, minExp);
5819 	const double		maxQuantum		= deLdExp(1.0, maxExp - fractionBits);
5820 
5821 	// NaN
5822 	dst.push_back(TCU_NAN);
5823 	// Zero
5824 	dst.push_back(0.0);
5825 
5826 	for (int sign = -1; sign <= 1; sign += 2)
5827 	{
5828 		// Smallest normalized
5829 		dst.push_back((double)sign * minNormalized);
5830 
5831 		// Next smallest normalized
5832 		dst.push_back((double)sign * (minNormalized + minQuantum));
5833 
5834 		dst.push_back((double)sign * 0.5);
5835 		dst.push_back((double)sign * 1.0);
5836 		dst.push_back((double)sign * 2.0);
5837 
5838 		// Largest number
5839 		dst.push_back((double)sign * (deLdExp(1.0, maxExp) + (deLdExp(1.0, maxExp) - maxQuantum)));
5840 
5841 		dst.push_back((double)sign * TCU_INFINITY);
5842 	}
5843 	removeNotInRange(dst, inputRange, prec);
5844 }
5845 
removeNotInRange(vector<double> & dst,const Interval & inputRange,const Precision) const5846 void DefaultSampling<double>::removeNotInRange (vector<double>& dst, const Interval& inputRange, const Precision) const
5847 {
5848 	for (vector<double>::iterator it = dst.begin(); it < dst.end();)
5849 	{
5850 		if ( !inputRange.contains(*it) )
5851 			it = dst.erase(it);
5852 		else
5853 			++it;
5854 	}
5855 }
5856 
5857 template <>
5858 class DefaultSampling<deFloat16> : public Sampling<deFloat16>
5859 {
5860 public:
5861 	deFloat16	genRandom			(const FloatFormat& format, const Precision prec, Random& rnd, const Interval& inputRange) const;
5862 	void		genFixeds			(const FloatFormat& format, const Precision prec, vector<deFloat16>& dst, const Interval& inputRange) const;
5863 private:
5864 	void		removeNotInRange(vector<deFloat16>& dst, const Interval& inputRange, const Precision prec) const;
5865 };
5866 
5867 //! Generate a random float from a reasonable general-purpose distribution.
genRandom(const FloatFormat & format,const Precision prec,Random & rnd,const Interval & inputRange) const5868 deFloat16 DefaultSampling<deFloat16>::genRandom (const FloatFormat& format, const Precision prec,
5869 												Random& rnd, const Interval& inputRange) const
5870 {
5871 	DE_UNREF(prec);
5872 	return deFloat64To16Round(randomDouble(format, rnd, inputRange), DE_ROUNDINGMODE_TO_NEAREST_EVEN);
5873 }
5874 
5875 //! Generate a standard set of floats that should always be tested.
genFixeds(const FloatFormat & format,const Precision prec,vector<deFloat16> & dst,const Interval & inputRange) const5876 void DefaultSampling<deFloat16>::genFixeds (const FloatFormat& format, const Precision prec, vector<deFloat16>& dst, const Interval& inputRange) const
5877 {
5878 	dst.push_back(deUint16(0x3E00)); //1.5
5879 	dst.push_back(deUint16(0x3D00)); //1.25
5880 	dst.push_back(deUint16(0x3F00)); //1.75
5881 	// Zero
5882 	dst.push_back(deUint16(0x0000));
5883 	dst.push_back(deUint16(0x8000));
5884 	// Infinity
5885 	dst.push_back(deUint16(0x7c00));
5886 	dst.push_back(deUint16(0xfc00));
5887 	// SNaN
5888 	dst.push_back(deUint16(0x7c0f));
5889 	dst.push_back(deUint16(0xfc0f));
5890 	// QNaN
5891 	dst.push_back(deUint16(0x7cf0));
5892 	dst.push_back(deUint16(0xfcf0));
5893 	// Normalized
5894 	dst.push_back(deUint16(0x0401));
5895 	dst.push_back(deUint16(0x8401));
5896 	// Some normal number
5897 	dst.push_back(deUint16(0x14cb));
5898 	dst.push_back(deUint16(0x94cb));
5899 
5900 	const int			minExp			= format.getMinExp();
5901 	const int			maxExp			= format.getMaxExp();
5902 	const int			fractionBits	= format.getFractionBits();
5903 	const float			minQuantum		= deFloatLdExp(1.0f, minExp - fractionBits);
5904 	const float			minNormalized	= deFloatLdExp(1.0f, minExp);
5905 	const float			maxQuantum		= deFloatLdExp(1.0f, maxExp - fractionBits);
5906 
5907 	for (float sign = -1.0; sign <= 1.0f; sign += 2.0f)
5908 	{
5909 		// Smallest normalized
5910 		dst.push_back(deFloat32To16Round(sign * minNormalized, DE_ROUNDINGMODE_TO_NEAREST_EVEN));
5911 
5912 		// Next smallest normalized
5913 		dst.push_back(deFloat32To16Round(sign * (minNormalized + minQuantum), DE_ROUNDINGMODE_TO_NEAREST_EVEN));
5914 
5915 		dst.push_back(deFloat32To16Round(sign * 0.5f, DE_ROUNDINGMODE_TO_NEAREST_EVEN));
5916 		dst.push_back(deFloat32To16Round(sign * 1.0f, DE_ROUNDINGMODE_TO_NEAREST_EVEN));
5917 		dst.push_back(deFloat32To16Round(sign * 2.0f, DE_ROUNDINGMODE_TO_NEAREST_EVEN));
5918 
5919 		// Largest number
5920 		dst.push_back(deFloat32To16Round(sign * (deFloatLdExp(1.0f, maxExp) +
5921 									(deFloatLdExp(1.0f, maxExp) - maxQuantum)), DE_ROUNDINGMODE_TO_NEAREST_EVEN));
5922 
5923 		dst.push_back(deFloat32To16Round(sign * TCU_INFINITY, DE_ROUNDINGMODE_TO_NEAREST_EVEN));
5924 	}
5925 	removeNotInRange(dst, inputRange, prec);
5926 }
5927 
removeNotInRange(vector<deFloat16> & dst,const Interval & inputRange,const Precision) const5928 void DefaultSampling<deFloat16>::removeNotInRange(vector<deFloat16>& dst, const Interval& inputRange, const Precision) const
5929 {
5930 	for (vector<deFloat16>::iterator it = dst.begin(); it < dst.end();)
5931 	{
5932 		if (inputRange.contains(static_cast<double>(*it)))
5933 			++it;
5934 		else
5935 			it = dst.erase(it);
5936 	}
5937 }
5938 
5939 template <typename T, int Size>
5940 class DefaultSampling<Vector<T, Size> > : public Sampling<Vector<T, Size> >
5941 {
5942 public:
5943 	typedef Vector<T, Size>		Value;
5944 
genRandom(const FloatFormat & fmt,const Precision prec,Random & rnd,const Interval & inputRange) const5945 	Value	genRandom	(const FloatFormat& fmt, const Precision prec, Random& rnd, const Interval& inputRange) const
5946 	{
5947 		Value ret;
5948 
5949 		for (int ndx = 0; ndx < Size; ++ndx)
5950 			ret[ndx] = instance<DefaultSampling<T> >().genRandom(fmt, prec, rnd, inputRange);
5951 
5952 		return ret;
5953 	}
5954 
genFixeds(const FloatFormat & fmt,const Precision prec,vector<Value> & dst,const Interval & inputRange) const5955 	void	genFixeds	(const FloatFormat& fmt, const Precision prec, vector<Value>& dst, const Interval& inputRange) const
5956 	{
5957 		vector<T> scalars;
5958 
5959 		instance<DefaultSampling<T> >().genFixeds(fmt, prec, scalars, inputRange);
5960 
5961 		for (size_t scalarNdx = 0; scalarNdx < scalars.size(); ++scalarNdx)
5962 			dst.push_back(Value(scalars[scalarNdx]));
5963 	}
5964 };
5965 
5966 template <typename T, int Rows, int Columns>
5967 class DefaultSampling<Matrix<T, Rows, Columns> > : public Sampling<Matrix<T, Rows, Columns> >
5968 {
5969 public:
5970 	typedef Matrix<T, Rows, Columns>		Value;
5971 
genRandom(const FloatFormat & fmt,const Precision prec,Random & rnd,const Interval & inputRange) const5972 	Value	genRandom	(const FloatFormat& fmt, const Precision prec, Random& rnd, const Interval& inputRange) const
5973 	{
5974 		Value ret;
5975 
5976 		for (int rowNdx = 0; rowNdx < Rows; ++rowNdx)
5977 			for (int colNdx = 0; colNdx < Columns; ++colNdx)
5978 				ret(rowNdx, colNdx) = instance<DefaultSampling<T> >().genRandom(fmt, prec, rnd, inputRange);
5979 
5980 		return ret;
5981 	}
5982 
genFixeds(const FloatFormat & fmt,const Precision prec,vector<Value> & dst,const Interval & inputRange) const5983 	void	genFixeds	(const FloatFormat& fmt, const Precision prec, vector<Value>& dst, const Interval& inputRange) const
5984 	{
5985 		vector<T> scalars;
5986 
5987 		instance<DefaultSampling<T> >().genFixeds(fmt, prec, scalars, inputRange);
5988 
5989 		for (size_t scalarNdx = 0; scalarNdx < scalars.size(); ++scalarNdx)
5990 			dst.push_back(Value(scalars[scalarNdx]));
5991 
5992 		if (Columns == Rows)
5993 		{
5994 			Value	mat	(T(0.0));
5995 			T		x	= T(1.0f);
5996 			mat[0][0] = x;
5997 			for (int ndx = 0; ndx < Columns; ++ndx)
5998 			{
5999 				mat[Columns-1-ndx][ndx] = x;
6000 				x = static_cast<T>(x * static_cast<T>(2.0f));
6001 			}
6002 			dst.push_back(mat);
6003 		}
6004 	}
6005 };
6006 
6007 struct CaseContext
6008 {
CaseContextvkt::shaderexecutor::CaseContext6009 					CaseContext		(const string&							name_,
6010 									 TestContext&							testContext_,
6011 									 const FloatFormat&						floatFormat_,
6012 									 const FloatFormat&						highpFormat_,
6013 									 const Precision						precision_,
6014 									 const ShaderType						shaderType_,
6015 									 const size_t							numRandoms_,
6016 									 const PrecisionTestFeatures	precisionTestFeatures_ = PRECISION_TEST_FEATURES_NONE,
6017 									 const bool						isPackFloat16b_ = false,
6018 									 const bool						isFloat64b_ = false)
6019 						: name						(name_)
6020 						, testContext				(testContext_)
6021 						, floatFormat				(floatFormat_)
6022 						, highpFormat				(highpFormat_)
6023 						, precision					(precision_)
6024 						, shaderType				(shaderType_)
6025 						, numRandoms				(numRandoms_)
6026 						, inputRange				(-TCU_INFINITY, TCU_INFINITY)
6027 						, precisionTestFeatures		(precisionTestFeatures_)
6028 						, isPackFloat16b			(isPackFloat16b_)
6029 						, isFloat64b				(isFloat64b_)
6030 					{}
6031 
6032 	string							name;
6033 	TestContext&					testContext;
6034 	FloatFormat						floatFormat;
6035 	FloatFormat						highpFormat;
6036 	Precision						precision;
6037 	ShaderType						shaderType;
6038 	size_t							numRandoms;
6039 	Interval						inputRange;
6040 	PrecisionTestFeatures	precisionTestFeatures;
6041 	bool							isPackFloat16b;
6042 	bool					isFloat64b;
6043 };
6044 
6045 template<typename In0_ = Void, typename In1_ = Void, typename In2_ = Void, typename In3_ = Void>
6046 struct InTypes
6047 {
6048 	typedef	In0_	In0;
6049 	typedef	In1_	In1;
6050 	typedef	In2_	In2;
6051 	typedef	In3_	In3;
6052 };
6053 
6054 template <typename In>
numInputs(void)6055 int numInputs (void)
6056 {
6057 	return (!isTypeValid<typename In::In0>() ? 0 :
6058 			!isTypeValid<typename In::In1>() ? 1 :
6059 			!isTypeValid<typename In::In2>() ? 2 :
6060 			!isTypeValid<typename In::In3>() ? 3 :
6061 			4);
6062 }
6063 
6064 template<typename Out0_, typename Out1_ = Void>
6065 struct OutTypes
6066 {
6067 	typedef	Out0_	Out0;
6068 	typedef	Out1_	Out1;
6069 };
6070 
6071 template <typename Out>
numOutputs(void)6072 int numOutputs (void)
6073 {
6074 	return (!isTypeValid<typename Out::Out0>() ? 0 :
6075 			!isTypeValid<typename Out::Out1>() ? 1 :
6076 			2);
6077 }
6078 
6079 template<typename In>
6080 struct Inputs
6081 {
6082 	vector<typename In::In0>	in0;
6083 	vector<typename In::In1>	in1;
6084 	vector<typename In::In2>	in2;
6085 	vector<typename In::In3>	in3;
6086 };
6087 
6088 template<typename Out>
6089 struct Outputs
6090 {
Outputsvkt::shaderexecutor::Outputs6091 	Outputs	(size_t size) : out0(size), out1(size) {}
6092 
6093 	vector<typename Out::Out0>	out0;
6094 	vector<typename Out::Out1>	out1;
6095 };
6096 
6097 template<typename In, typename Out>
6098 struct Variables
6099 {
6100 	VariableP<typename In::In0>		in0;
6101 	VariableP<typename In::In1>		in1;
6102 	VariableP<typename In::In2>		in2;
6103 	VariableP<typename In::In3>		in3;
6104 	VariableP<typename Out::Out0>	out0;
6105 	VariableP<typename Out::Out1>	out1;
6106 };
6107 
6108 template<typename In>
6109 struct Samplings
6110 {
Samplingsvkt::shaderexecutor::Samplings6111 	Samplings	(const Sampling<typename In::In0>&	in0_,
6112 				 const Sampling<typename In::In1>&	in1_,
6113 				 const Sampling<typename In::In2>&	in2_,
6114 				 const Sampling<typename In::In3>&	in3_)
6115 		: in0 (in0_), in1 (in1_), in2 (in2_), in3 (in3_) {}
6116 
6117 	const Sampling<typename In::In0>&	in0;
6118 	const Sampling<typename In::In1>&	in1;
6119 	const Sampling<typename In::In2>&	in2;
6120 	const Sampling<typename In::In3>&	in3;
6121 };
6122 
6123 template<typename In>
6124 struct DefaultSamplings : Samplings<In>
6125 {
DefaultSamplingsvkt::shaderexecutor::DefaultSamplings6126 	DefaultSamplings	(void)
6127 		: Samplings<In>(instance<DefaultSampling<typename In::In0> >(),
6128 						instance<DefaultSampling<typename In::In1> >(),
6129 						instance<DefaultSampling<typename In::In2> >(),
6130 						instance<DefaultSampling<typename In::In3> >()) {}
6131 };
6132 
6133 template <typename In, typename Out>
6134 class BuiltinPrecisionCaseTestInstance : public TestInstance
6135 {
6136 public:
BuiltinPrecisionCaseTestInstance(Context & context,const CaseContext caseCtx,const ShaderSpec & shaderSpec,const Variables<In,Out> variables,const Samplings<In> & samplings,const StatementP stmt,bool modularOp=false)6137 									BuiltinPrecisionCaseTestInstance	(Context&						context,
6138 																		 const CaseContext				caseCtx,
6139 																		 const ShaderSpec&				shaderSpec,
6140 																		 const Variables<In, Out>		variables,
6141 																		 const Samplings<In>&			samplings,
6142 																		 const StatementP				stmt,
6143 																		 bool							modularOp = false)
6144 										: TestInstance	(context)
6145 										, m_caseCtx		(caseCtx)
6146 										, m_variables	(variables)
6147 										, m_samplings	(samplings)
6148 										, m_stmt		(stmt)
6149 										, m_executor	(createExecutor(context, caseCtx.shaderType, shaderSpec))
6150 										, m_modularOp	(modularOp)
6151 									{
6152 									}
6153 	virtual tcu::TestStatus			iterate								(void);
6154 
6155 protected:
6156 	CaseContext						m_caseCtx;
6157 	Variables<In, Out>				m_variables;
6158 	const Samplings<In>&			m_samplings;
6159 	StatementP						m_stmt;
6160 	de::UniquePtr<ShaderExecutor>	m_executor;
6161 	bool							m_modularOp;
6162 };
6163 
6164 template<class In, class Out>
iterate(void)6165 tcu::TestStatus BuiltinPrecisionCaseTestInstance<In, Out>::iterate (void)
6166 {
6167 	typedef typename	In::In0		In0;
6168 	typedef typename	In::In1		In1;
6169 	typedef typename	In::In2		In2;
6170 	typedef typename	In::In3		In3;
6171 	typedef typename	Out::Out0	Out0;
6172 	typedef typename	Out::Out1	Out1;
6173 
6174 	areFeaturesSupported(m_context, m_caseCtx.precisionTestFeatures);
6175 	Inputs<In>			inputs		= generateInputs(m_samplings, m_caseCtx.floatFormat, m_caseCtx.precision, m_caseCtx.numRandoms, 0xdeadbeefu + m_caseCtx.testContext.getCommandLine().getBaseSeed(), m_caseCtx.inputRange);
6176 	const FloatFormat&	fmt			= m_caseCtx.floatFormat;
6177 	const int			inCount		= numInputs<In>();
6178 	const int			outCount	= numOutputs<Out>();
6179 	const size_t		numValues	= (inCount > 0) ? inputs.in0.size() : 1;
6180 	Outputs<Out>		outputs		(numValues);
6181 	const FloatFormat	highpFmt	= m_caseCtx.highpFormat;
6182 	const int			maxMsgs		= 100;
6183 	int					numErrors	= 0;
6184 	Environment			env;		// Hoisted out of the inner loop for optimization.
6185 	ResultCollector		status;
6186 	TestLog&			testLog		= m_context.getTestContext().getLog();
6187 
6188 	// Module operations need exactly two inputs and have exactly one output.
6189 	if (m_modularOp)
6190 	{
6191 		DE_ASSERT(inCount == 2);
6192 		DE_ASSERT(outCount == 1);
6193 	}
6194 
6195 	const void*			inputArr[]	=
6196 	{
6197 		inputs.in0.data(), inputs.in1.data(), inputs.in2.data(), inputs.in3.data(),
6198 	};
6199 	void*				outputArr[]	=
6200 	{
6201 		outputs.out0.data(), outputs.out1.data(),
6202 	};
6203 
6204 	// Print out the statement and its definitions
6205 	testLog << TestLog::Message << "Statement: " << m_stmt << TestLog::EndMessage;
6206 	{
6207 		ostringstream	oss;
6208 		FuncSet			funcs;
6209 
6210 		m_stmt->getUsedFuncs(funcs);
6211 		for (FuncSet::const_iterator it = funcs.begin(); it != funcs.end(); ++it)
6212 		{
6213 			(*it)->printDefinition(oss);
6214 		}
6215 		if (!funcs.empty())
6216 			testLog << TestLog::Message << "Reference definitions:\n" << oss.str()
6217 				  << TestLog::EndMessage;
6218 	}
6219 	switch (inCount)
6220 	{
6221 		case 4:
6222 			DE_ASSERT(inputs.in3.size() == numValues);
6223 		// Fallthrough
6224 		case 3:
6225 			DE_ASSERT(inputs.in2.size() == numValues);
6226 		// Fallthrough
6227 		case 2:
6228 			DE_ASSERT(inputs.in1.size() == numValues);
6229 		// Fallthrough
6230 		case 1:
6231 			DE_ASSERT(inputs.in0.size() == numValues);
6232 		// Fallthrough
6233 		default:
6234 			break;
6235 	}
6236 
6237 	m_executor->execute(int(numValues), inputArr, outputArr);
6238 
6239 	// Initialize environment with dummy values so we don't need to bind in inner loop.
6240 	{
6241 		const typename Traits<In0>::IVal		in0;
6242 		const typename Traits<In1>::IVal		in1;
6243 		const typename Traits<In2>::IVal		in2;
6244 		const typename Traits<In3>::IVal		in3;
6245 		const typename Traits<Out0>::IVal		reference0;
6246 		const typename Traits<Out1>::IVal		reference1;
6247 
6248 		env.bind(*m_variables.in0, in0);
6249 		env.bind(*m_variables.in1, in1);
6250 		env.bind(*m_variables.in2, in2);
6251 		env.bind(*m_variables.in3, in3);
6252 		env.bind(*m_variables.out0, reference0);
6253 		env.bind(*m_variables.out1, reference1);
6254 	}
6255 
6256 	// For each input tuple, compute output reference interval and compare
6257 	// shader output to the reference.
6258 	for (size_t valueNdx = 0; valueNdx < numValues; valueNdx++)
6259 	{
6260 		bool						result			= true;
6261 		const bool					isInput16Bit	= m_executor->areInputs16Bit();
6262 		const bool					isInput64Bit	= m_executor->areInputs64Bit();
6263 
6264 		DE_ASSERT(!(isInput16Bit && isInput64Bit));
6265 
6266 		typename Traits<Out0>::IVal	reference0;
6267 		typename Traits<Out1>::IVal	reference1;
6268 
6269 		if (valueNdx % (size_t)TOUCH_WATCHDOG_VALUE_FREQUENCY == 0)
6270 			m_context.getTestContext().touchWatchdog();
6271 
6272 		env.lookup(*m_variables.in0) = convert<In0>(fmt, round(fmt, inputs.in0[valueNdx]));
6273 		env.lookup(*m_variables.in1) = convert<In1>(fmt, round(fmt, inputs.in1[valueNdx]));
6274 		env.lookup(*m_variables.in2) = convert<In2>(fmt, round(fmt, inputs.in2[valueNdx]));
6275 		env.lookup(*m_variables.in3) = convert<In3>(fmt, round(fmt, inputs.in3[valueNdx]));
6276 
6277 		{
6278 			EvalContext	ctx (fmt, m_caseCtx.precision, env, 0);
6279 			m_stmt->execute(ctx);
6280 
6281 			switch (outCount)
6282 			{
6283 				case 2:
6284 					reference1 = convert<Out1>(highpFmt, env.lookup(*m_variables.out1));
6285 					if (!status.check(contains(reference1, outputs.out1[valueNdx], m_caseCtx.isPackFloat16b), "Shader output 1 is outside acceptable range"))
6286 						result = false;
6287 				// Fallthrough
6288 				case 1:
6289 					{
6290 						// Pass b from mod(a, b) if we are in the modulo operation.
6291 						const tcu::Maybe<In1> modularDivisor = (m_modularOp ? tcu::just(inputs.in1[valueNdx]) : tcu::Nothing);
6292 
6293 						reference0 = convert<Out0>(highpFmt, env.lookup(*m_variables.out0));
6294 						if (!status.check(contains(reference0, outputs.out0[valueNdx], m_caseCtx.isPackFloat16b, modularDivisor), "Shader output 0 is outside acceptable range"))
6295 						{
6296 							m_stmt->failed(ctx);
6297 							reference0 = convert<Out0>(highpFmt, env.lookup(*m_variables.out0));
6298 							if (!status.check(contains(reference0, outputs.out0[valueNdx], m_caseCtx.isPackFloat16b, modularDivisor), "Shader output 0 is outside acceptable range"))
6299 								result = false;
6300 						}
6301 					}
6302 				// Fallthrough
6303 				default: break;
6304 			}
6305 
6306 		}
6307 		if (!result)
6308 			++numErrors;
6309 
6310 		if ((!result && numErrors <= maxMsgs) || GLS_LOG_ALL_RESULTS)
6311 		{
6312 			MessageBuilder	builder	= testLog.message();
6313 
6314 			builder << (result ? "Passed" : "Failed") << " sample:\n";
6315 
6316 			if (inCount > 0)
6317 			{
6318 				builder << "\t" << m_variables.in0->getName() << " = "
6319 						<< (isInput64Bit ? value64ToString(highpFmt, inputs.in0[valueNdx]) : (isInput16Bit ? value16ToString(highpFmt, inputs.in0[valueNdx]) : value32ToString(highpFmt, inputs.in0[valueNdx]))) << "\n";
6320 			}
6321 
6322 			if (inCount > 1)
6323 			{
6324 				builder << "\t" << m_variables.in1->getName() << " = "
6325 						<< (isInput64Bit ? value64ToString(highpFmt, inputs.in1[valueNdx]) : (isInput16Bit ? value16ToString(highpFmt, inputs.in1[valueNdx]) : value32ToString(highpFmt, inputs.in1[valueNdx]))) << "\n";
6326 			}
6327 
6328 			if (inCount > 2)
6329 			{
6330 				builder << "\t" << m_variables.in2->getName() << " = "
6331 						<< (isInput64Bit ? value64ToString(highpFmt, inputs.in2[valueNdx]) : (isInput16Bit ? value16ToString(highpFmt, inputs.in2[valueNdx]) : value32ToString(highpFmt, inputs.in2[valueNdx]))) << "\n";
6332 			}
6333 
6334 			if (inCount > 3)
6335 			{
6336 				builder << "\t" << m_variables.in3->getName() << " = "
6337 						<< (isInput64Bit ? value64ToString(highpFmt, inputs.in3[valueNdx]) : (isInput16Bit ? value16ToString(highpFmt, inputs.in3[valueNdx]) : value32ToString(highpFmt, inputs.in3[valueNdx]))) << "\n";
6338 			}
6339 
6340 			if (outCount > 0)
6341 			{
6342 				if (m_executor->spirvCase() == SPIRV_CASETYPE_COMPARE)
6343 				{
6344 					builder << "Output:\n"
6345 							<< comparisonMessage(outputs.out0[valueNdx])
6346 							<< "Expected result:\n"
6347 							<< comparisonMessageInterval<typename Out::Out0>(reference0) << "\n";
6348 				}
6349 				else
6350 				{
6351 					builder << "\t" << m_variables.out0->getName() << " = "
6352 						<< (m_executor->isOutput64Bit(0u) ? value64ToString(highpFmt, outputs.out0[valueNdx]) : (m_executor->isOutput16Bit(0u) || m_caseCtx.isPackFloat16b ? value16ToString(highpFmt, outputs.out0[valueNdx]) : value32ToString(highpFmt, outputs.out0[valueNdx]))) << "\n"
6353 						<< "\tExpected range: "
6354 						<< intervalToString<typename Out::Out0>(highpFmt, reference0) << "\n";
6355 				}
6356 			}
6357 
6358 			if (outCount > 1)
6359 			{
6360 				builder << "\t" << m_variables.out1->getName() << " = "
6361 						<< (m_executor->isOutput64Bit(1u) ? value64ToString(highpFmt, outputs.out1[valueNdx]) : (m_executor->isOutput16Bit(1u) || m_caseCtx.isPackFloat16b ? value16ToString(highpFmt, outputs.out1[valueNdx]) : value32ToString(highpFmt, outputs.out1[valueNdx]))) << "\n"
6362 						<< "\tExpected range: "
6363 						<< intervalToString<typename Out::Out1>(highpFmt, reference1) << "\n";
6364 			}
6365 
6366 			builder << TestLog::EndMessage;
6367 		}
6368 	}
6369 
6370 	if (numErrors > maxMsgs)
6371 	{
6372 		testLog << TestLog::Message << "(Skipped " << (numErrors - maxMsgs) << " messages.)"
6373 			  << TestLog::EndMessage;
6374 	}
6375 
6376 	if (numErrors == 0)
6377 	{
6378 		testLog << TestLog::Message << "All " << numValues << " inputs passed."
6379 			  << TestLog::EndMessage;
6380 	}
6381 	else
6382 	{
6383 		testLog << TestLog::Message << numErrors << "/" << numValues << " inputs failed."
6384 			  << TestLog::EndMessage;
6385 	}
6386 
6387 	if (numErrors)
6388 		return tcu::TestStatus::fail(de::toString(numErrors) + string(" test failed. Check log for the details"));
6389 	else
6390 		return tcu::TestStatus::pass("Pass");
6391 
6392 }
6393 
6394 class PrecisionCase : public TestCase
6395 {
6396 protected:
PrecisionCase(const CaseContext & context,const string & name,const Interval & inputRange,const string & extension="")6397 						PrecisionCase	(const CaseContext& context, const string& name, const Interval& inputRange, const string& extension = "")
6398 							: TestCase		(context.testContext, name.c_str(), name.c_str())
6399 							, m_ctx			(context)
6400 							, m_extension	(extension)
6401 							{
6402 								m_ctx.inputRange = inputRange;
6403 								m_spec.packFloat16Bit = context.isPackFloat16b;
6404 							}
6405 
initPrograms(vk::SourceCollections & programCollection) const6406 	virtual void		initPrograms	(vk::SourceCollections& programCollection) const
6407 	{
6408 		generateSources(m_ctx.shaderType, m_spec, programCollection);
6409 	}
6410 
getFormat(void) const6411 	const FloatFormat&	getFormat		(void) const			{ return m_ctx.floatFormat; }
6412 
6413 	template <typename In, typename Out>
6414 	void				testStatement	(const Variables<In, Out>& variables, const Statement& stmt, SpirVCaseT spirvCase);
6415 
6416 	template<typename T>
makeSymbol(const Variable<T> & variable)6417 	Symbol				makeSymbol		(const Variable<T>& variable)
6418 	{
6419 		return Symbol(variable.getName(), getVarTypeOf<T>(m_ctx.precision));
6420 	}
6421 
6422 	CaseContext			m_ctx;
6423 	const string		m_extension;
6424 	ShaderSpec			m_spec;
6425 };
6426 
6427 template <typename In, typename Out>
testStatement(const Variables<In,Out> & variables,const Statement & stmt,SpirVCaseT spirvCase)6428 void PrecisionCase::testStatement (const Variables<In, Out>& variables, const Statement& stmt, SpirVCaseT spirvCase)
6429 {
6430 	const int		inCount		= numInputs<In>();
6431 	const int		outCount	= numOutputs<Out>();
6432 	Environment		env;		// Hoisted out of the inner loop for optimization.
6433 
6434 	// Initialize ShaderSpec from precision, variables and statement.
6435 	if (m_ctx.precision != glu::PRECISION_LAST)
6436 	{
6437 		ostringstream os;
6438 		os << "precision " << glu::getPrecisionName(m_ctx.precision) << " float;\n";
6439 		m_spec.globalDeclarations = os.str();
6440 	}
6441 
6442 	if (!m_extension.empty())
6443 		m_spec.globalDeclarations = "#extension " + m_extension + " : require\n";
6444 
6445 	m_spec.inputs.resize(inCount);
6446 
6447 	switch (inCount)
6448 	{
6449 		case 4:
6450 			m_spec.inputs[3] = makeSymbol(*variables.in3);
6451 		// Fallthrough
6452 		case 3:
6453 			m_spec.inputs[2] = makeSymbol(*variables.in2);
6454 		// Fallthrough
6455 		case 2:
6456 			m_spec.inputs[1] = makeSymbol(*variables.in1);
6457 		// Fallthrough
6458 		case 1:
6459 			m_spec.inputs[0] = makeSymbol(*variables.in0);
6460 		// Fallthrough
6461 		default:
6462 			break;
6463 	}
6464 
6465 	bool inputs16Bit = false;
6466 	for (vector<Symbol>::const_iterator symIter = m_spec.inputs.begin(); symIter != m_spec.inputs.end(); ++symIter)
6467 		inputs16Bit = inputs16Bit || glu::isDataTypeFloat16OrVec(symIter->varType.getBasicType());
6468 
6469 	if (inputs16Bit || m_spec.packFloat16Bit)
6470 		m_spec.globalDeclarations += "#extension GL_EXT_shader_explicit_arithmetic_types: require\n";
6471 
6472 	m_spec.outputs.resize(outCount);
6473 
6474 	switch (outCount)
6475 	{
6476 		case 2:
6477 			m_spec.outputs[1] = makeSymbol(*variables.out1);
6478 		// Fallthrough
6479 		case 1:
6480 			m_spec.outputs[0] = makeSymbol(*variables.out0);
6481 		// Fallthrough
6482 		default:
6483 			break;
6484 	}
6485 
6486 	m_spec.source = de::toString(stmt);
6487 	m_spec.spirvCase = spirvCase;
6488 }
6489 
6490 template <typename T>
6491 struct InputLess
6492 {
operator ()vkt::shaderexecutor::InputLess6493 	bool operator() (const T& val1, const T& val2) const
6494 	{
6495 		return val1 < val2;
6496 	}
6497 };
6498 
6499 template <typename T>
inputLess(const T & val1,const T & val2)6500 bool inputLess (const T& val1, const T& val2)
6501 {
6502 	return InputLess<T>()(val1, val2);
6503 }
6504 
6505 template <>
6506 struct InputLess<float>
6507 {
operator ()vkt::shaderexecutor::InputLess6508 	bool operator() (const float& val1, const float& val2) const
6509 	{
6510 		if (deIsNaN(val1))
6511 			return false;
6512 		if (deIsNaN(val2))
6513 			return true;
6514 		return val1 < val2;
6515 	}
6516 };
6517 
6518 template <typename T, int Size>
6519 struct InputLess<Vector<T, Size> >
6520 {
operator ()vkt::shaderexecutor::InputLess6521 	bool operator() (const Vector<T, Size>& vec1, const Vector<T, Size>& vec2) const
6522 	{
6523 		for (int ndx = 0; ndx < Size; ++ndx)
6524 		{
6525 			if (inputLess(vec1[ndx], vec2[ndx]))
6526 				return true;
6527 			if (inputLess(vec2[ndx], vec1[ndx]))
6528 				return false;
6529 		}
6530 
6531 		return false;
6532 	}
6533 };
6534 
6535 template <typename T, int Rows, int Cols>
6536 struct InputLess<Matrix<T, Rows, Cols> >
6537 {
operator ()vkt::shaderexecutor::InputLess6538 	bool operator() (const Matrix<T, Rows, Cols>& mat1,
6539 					 const Matrix<T, Rows, Cols>& mat2) const
6540 	{
6541 		for (int col = 0; col < Cols; ++col)
6542 		{
6543 			if (inputLess(mat1[col], mat2[col]))
6544 				return true;
6545 			if (inputLess(mat2[col], mat1[col]))
6546 				return false;
6547 		}
6548 
6549 		return false;
6550 	}
6551 };
6552 
6553 template <typename In>
6554 struct InTuple :
6555 	public Tuple4<typename In::In0, typename In::In1, typename In::In2, typename In::In3>
6556 {
InTuplevkt::shaderexecutor::InTuple6557 	InTuple	(const typename In::In0& in0,
6558 			 const typename In::In1& in1,
6559 			 const typename In::In2& in2,
6560 			 const typename In::In3& in3)
6561 		: Tuple4<typename In::In0, typename In::In1, typename In::In2, typename In::In3>
6562 		  (in0, in1, in2, in3) {}
6563 };
6564 
6565 template <typename In>
6566 struct InputLess<InTuple<In> >
6567 {
operator ()vkt::shaderexecutor::InputLess6568 	bool operator() (const InTuple<In>& in1, const InTuple<In>& in2) const
6569 	{
6570 		if (inputLess(in1.a, in2.a))
6571 			return true;
6572 		if (inputLess(in2.a, in1.a))
6573 			return false;
6574 		if (inputLess(in1.b, in2.b))
6575 			return true;
6576 		if (inputLess(in2.b, in1.b))
6577 			return false;
6578 		if (inputLess(in1.c, in2.c))
6579 			return true;
6580 		if (inputLess(in2.c, in1.c))
6581 			return false;
6582 		if (inputLess(in1.d, in2.d))
6583 			return true;
6584 		return false;
6585 	}
6586 };
6587 
6588 template<typename In>
generateInputs(const Samplings<In> & samplings,const FloatFormat & floatFormat,Precision intPrecision,size_t numSamples,deUint32 seed,const Interval & inputRange)6589 Inputs<In> generateInputs (const Samplings<In>&		samplings,
6590 						   const FloatFormat&		floatFormat,
6591 						   Precision				intPrecision,
6592 						   size_t					numSamples,
6593 						   deUint32					seed,
6594 						   const Interval&			inputRange)
6595 {
6596 	Random										rnd(seed);
6597 	Inputs<In>									ret;
6598 	Inputs<In>									fixedInputs;
6599 	set<InTuple<In>, InputLess<InTuple<In> > >	seenInputs;
6600 
6601 	samplings.in0.genFixeds(floatFormat, intPrecision, fixedInputs.in0, inputRange);
6602 	samplings.in1.genFixeds(floatFormat, intPrecision, fixedInputs.in1, inputRange);
6603 	samplings.in2.genFixeds(floatFormat, intPrecision, fixedInputs.in2, inputRange);
6604 	samplings.in3.genFixeds(floatFormat, intPrecision, fixedInputs.in3, inputRange);
6605 
6606 	for (size_t ndx0 = 0; ndx0 < fixedInputs.in0.size(); ++ndx0)
6607 	{
6608 		for (size_t ndx1 = 0; ndx1 < fixedInputs.in1.size(); ++ndx1)
6609 		{
6610 			for (size_t ndx2 = 0; ndx2 < fixedInputs.in2.size(); ++ndx2)
6611 			{
6612 				for (size_t ndx3 = 0; ndx3 < fixedInputs.in3.size(); ++ndx3)
6613 				{
6614 					const InTuple<In>	tuple	(fixedInputs.in0[ndx0],
6615 												 fixedInputs.in1[ndx1],
6616 												 fixedInputs.in2[ndx2],
6617 												 fixedInputs.in3[ndx3]);
6618 
6619 					seenInputs.insert(tuple);
6620 					ret.in0.push_back(tuple.a);
6621 					ret.in1.push_back(tuple.b);
6622 					ret.in2.push_back(tuple.c);
6623 					ret.in3.push_back(tuple.d);
6624 				}
6625 			}
6626 		}
6627 	}
6628 
6629 	for (size_t ndx = 0; ndx < numSamples; ++ndx)
6630 	{
6631 		const typename In::In0	in0		= samplings.in0.genRandom(floatFormat, intPrecision, rnd, inputRange);
6632 		const typename In::In1	in1		= samplings.in1.genRandom(floatFormat, intPrecision, rnd, inputRange);
6633 		const typename In::In2	in2		= samplings.in2.genRandom(floatFormat, intPrecision, rnd, inputRange);
6634 		const typename In::In3	in3		= samplings.in3.genRandom(floatFormat, intPrecision, rnd, inputRange);
6635 		const InTuple<In>		tuple	(in0, in1, in2, in3);
6636 
6637 		if (de::contains(seenInputs, tuple))
6638 			continue;
6639 
6640 		seenInputs.insert(tuple);
6641 		ret.in0.push_back(in0);
6642 		ret.in1.push_back(in1);
6643 		ret.in2.push_back(in2);
6644 		ret.in3.push_back(in3);
6645 	}
6646 
6647 	return ret;
6648 }
6649 
6650 class FuncCaseBase : public PrecisionCase
6651 {
6652 protected:
FuncCaseBase(const CaseContext & context,const string & name,const FuncBase & func)6653 				FuncCaseBase	(const CaseContext& context, const string& name, const FuncBase& func)
6654 									: PrecisionCase	(context, name, func.getInputRange(!context.isFloat64b && (context.precision == glu::PRECISION_LAST || context.isPackFloat16b)), func.getRequiredExtension())
6655 								{
6656 								}
6657 
6658 	StatementP	m_stmt;
6659 };
6660 
6661 template <typename Sig>
6662 class FuncCase : public FuncCaseBase
6663 {
6664 public:
6665 	typedef Func<Sig>						CaseFunc;
6666 	typedef typename Sig::Ret				Ret;
6667 	typedef typename Sig::Arg0				Arg0;
6668 	typedef typename Sig::Arg1				Arg1;
6669 	typedef typename Sig::Arg2				Arg2;
6670 	typedef typename Sig::Arg3				Arg3;
6671 	typedef InTypes<Arg0, Arg1, Arg2, Arg3>	In;
6672 	typedef OutTypes<Ret>					Out;
6673 
FuncCase(const CaseContext & context,const string & name,const CaseFunc & func,bool modularOp=false)6674 											FuncCase		(const CaseContext& context, const string& name, const CaseFunc& func, bool modularOp = false)
6675 												: FuncCaseBase	(context, name, func)
6676 												, m_func		(func)
6677 												, m_modularOp	(modularOp)
6678 												{
6679 													buildTest();
6680 												}
6681 
createInstance(Context & context) const6682 	virtual	TestInstance*					createInstance	(Context& context) const
6683 	{
6684 		return new BuiltinPrecisionCaseTestInstance<In, Out>(context, m_ctx, m_spec, m_variables, getSamplings(), m_stmt, m_modularOp);
6685 	}
6686 
6687 protected:
6688 	void									buildTest		(void);
getSamplings(void) const6689 	virtual const Samplings<In>&			getSamplings	(void) const
6690 	{
6691 		return instance<DefaultSamplings<In> >();
6692 	}
6693 
6694 private:
6695 	const CaseFunc&							m_func;
6696 	Variables<In, Out>						m_variables;
6697 	bool									m_modularOp;
6698 };
6699 
6700 template <typename Sig>
buildTest(void)6701 void FuncCase<Sig>::buildTest (void)
6702 {
6703 	m_variables.out0	= variable<Ret>("out0");
6704 	m_variables.out1	= variable<Void>("out1");
6705 	m_variables.in0		= variable<Arg0>("in0");
6706 	m_variables.in1		= variable<Arg1>("in1");
6707 	m_variables.in2		= variable<Arg2>("in2");
6708 	m_variables.in3		= variable<Arg3>("in3");
6709 
6710 	{
6711 		ExprP<Ret> expr	= applyVar(m_func, m_variables.in0, m_variables.in1, m_variables.in2, m_variables.in3);
6712 		m_stmt			= variableAssignment(m_variables.out0, expr);
6713 
6714 		this->testStatement(m_variables, *m_stmt, m_func.getSpirvCase());
6715 	}
6716 }
6717 
6718 template <typename Sig>
6719 class InOutFuncCase : public FuncCaseBase
6720 {
6721 public:
6722 	typedef Func<Sig>					CaseFunc;
6723 	typedef typename Sig::Ret			Ret;
6724 	typedef typename Sig::Arg0			Arg0;
6725 	typedef typename Sig::Arg1			Arg1;
6726 	typedef typename Sig::Arg2			Arg2;
6727 	typedef typename Sig::Arg3			Arg3;
6728 	typedef InTypes<Arg0, Arg2, Arg3>	In;
6729 	typedef OutTypes<Ret, Arg1>			Out;
6730 
InOutFuncCase(const CaseContext & context,const string & name,const CaseFunc & func,bool modularOp=false)6731 										InOutFuncCase	(const CaseContext& context, const string& name, const CaseFunc& func, bool modularOp = false)
6732 											: FuncCaseBase	(context, name, func)
6733 											, m_func		(func)
6734 											, m_modularOp	(modularOp)
6735 											{
6736 												buildTest();
6737 											}
createInstance(Context & context) const6738 	virtual TestInstance*				createInstance	(Context& context) const
6739 	{
6740 		return new BuiltinPrecisionCaseTestInstance<In, Out>(context, m_ctx, m_spec, m_variables, getSamplings(), m_stmt, m_modularOp);
6741 	}
6742 
6743 protected:
6744 	void								buildTest		(void);
getSamplings(void) const6745 	virtual const Samplings<In>&		getSamplings	(void) const
6746 	{
6747 		return instance<DefaultSamplings<In> >();
6748 	}
6749 
6750 private:
6751 	const CaseFunc&						m_func;
6752 	Variables<In, Out>					m_variables;
6753 	bool								m_modularOp;
6754 };
6755 
6756 template <typename Sig>
buildTest(void)6757 void InOutFuncCase<Sig>::buildTest (void)
6758 {
6759 	m_variables.out0	= variable<Ret>("out0");
6760 	m_variables.out1	= variable<Arg1>("out1");
6761 	m_variables.in0		= variable<Arg0>("in0");
6762 	m_variables.in1		= variable<Arg2>("in1");
6763 	m_variables.in2		= variable<Arg3>("in2");
6764 	m_variables.in3		= variable<Void>("in3");
6765 
6766 	{
6767 		ExprP<Ret> expr	= applyVar(m_func, m_variables.in0, m_variables.out1, m_variables.in1, m_variables.in2);
6768 		m_stmt			= variableAssignment(m_variables.out0, expr);
6769 
6770 		this->testStatement(m_variables, *m_stmt, m_func.getSpirvCase());
6771 	}
6772 }
6773 
6774 template <typename Sig>
createFuncCase(const CaseContext & context,const string & name,const Func<Sig> & func,bool modularOp=false)6775 PrecisionCase* createFuncCase (const CaseContext& context, const string& name, const Func<Sig>&	func, bool modularOp = false)
6776 {
6777 	switch (func.getOutParamIndex())
6778 	{
6779 		case -1:
6780 			return new FuncCase<Sig>(context, name, func, modularOp);
6781 		case 1:
6782 			return new InOutFuncCase<Sig>(context, name, func, modularOp);
6783 		default:
6784 			DE_FATAL("Impossible");
6785 	}
6786 	return DE_NULL;
6787 }
6788 
6789 class CaseFactory
6790 {
6791 public:
~CaseFactory(void)6792 	virtual						~CaseFactory	(void) {}
6793 	virtual MovePtr<TestNode>	createCase		(const CaseContext& ctx) const = 0;
6794 	virtual string				getName			(void) const = 0;
6795 	virtual string				getDesc			(void) const = 0;
6796 };
6797 
6798 class FuncCaseFactory : public CaseFactory
6799 {
6800 public:
6801 	virtual const FuncBase&		getFunc			(void) const = 0;
getName(void) const6802 	string						getName			(void) const { return de::toLower(getFunc().getName()); }
getDesc(void) const6803 	string						getDesc			(void) const { return "Function '" + getFunc().getName() + "'";	}
6804 };
6805 
6806 template <typename Sig>
6807 class GenFuncCaseFactory : public CaseFactory
6808 {
6809 public:
GenFuncCaseFactory(const GenFuncs<Sig> & funcs,const string & name,bool modularOp=false)6810 						GenFuncCaseFactory	(const GenFuncs<Sig>& funcs, const string& name, bool modularOp = false)
6811 							: m_funcs			(funcs)
6812 							, m_name			(de::toLower(name))
6813 							, m_modularOp		(modularOp)
6814 							{
6815 							}
6816 
createCase(const CaseContext & ctx) const6817 	MovePtr<TestNode>	createCase			(const CaseContext& ctx) const
6818 	{
6819 		TestCaseGroup* group = new TestCaseGroup(ctx.testContext, ctx.name.c_str(), ctx.name.c_str());
6820 
6821 		group->addChild(createFuncCase(ctx, "scalar",	m_funcs.func,	m_modularOp));
6822 		group->addChild(createFuncCase(ctx, "vec2",		m_funcs.func2,	m_modularOp));
6823 		group->addChild(createFuncCase(ctx, "vec3",		m_funcs.func3,	m_modularOp));
6824 		group->addChild(createFuncCase(ctx, "vec4",		m_funcs.func4,	m_modularOp));
6825 		return MovePtr<TestNode>(group);
6826 	}
6827 
getName(void) const6828 	string				getName				(void) const { return m_name; }
getDesc(void) const6829 	string				getDesc				(void) const { return "Function '" + m_funcs.func.getName() + "'"; }
6830 
6831 private:
6832 	const GenFuncs<Sig>	m_funcs;
6833 	string				m_name;
6834 	bool				m_modularOp;
6835 };
6836 
6837 template <template <int, class> class GenF, typename T>
6838 class TemplateFuncCaseFactory : public FuncCaseFactory
6839 {
6840 public:
createCase(const CaseContext & ctx) const6841 	MovePtr<TestNode>	createCase		(const CaseContext& ctx) const
6842 	{
6843 		TestCaseGroup*	group = new TestCaseGroup(ctx.testContext, ctx.name.c_str(), ctx.name.c_str());
6844 
6845 		group->addChild(createFuncCase(ctx, "scalar", instance<GenF<1, T> >()));
6846 		group->addChild(createFuncCase(ctx, "vec2", instance<GenF<2, T> >()));
6847 		group->addChild(createFuncCase(ctx, "vec3", instance<GenF<3, T> >()));
6848 		group->addChild(createFuncCase(ctx, "vec4", instance<GenF<4, T> >()));
6849 
6850 		return MovePtr<TestNode>(group);
6851 	}
6852 
getFunc(void) const6853 	const FuncBase&		getFunc			(void) const { return instance<GenF<1, T> >(); }
6854 };
6855 
6856 template <template <int> class GenF>
6857 class SquareMatrixFuncCaseFactory : public FuncCaseFactory
6858 {
6859 public:
createCase(const CaseContext & ctx) const6860 	MovePtr<TestNode>	createCase		(const CaseContext& ctx) const
6861 	{
6862 		TestCaseGroup* group = new TestCaseGroup(ctx.testContext, ctx.name.c_str(), ctx.name.c_str());
6863 
6864 		group->addChild(createFuncCase(ctx, "mat2", instance<GenF<2> >()));
6865 
6866 		// There is no defined precision for mediump/RelaxedPrecision in Vulkan
6867 		if (ctx.name != "mediump")
6868 		{
6869 			static const char			dataDir[]		= "builtin/precision/square_matrix";
6870 			std::string					fileName		= getFunc().getName() + "_" + ctx.name;
6871 			std::vector<std::string>	requirements;
6872 
6873 			if (ctx.name == "compute")
6874 			{
6875 				if (ctx.isFloat64b)
6876 				{
6877 					requirements.push_back("Features.shaderFloat64");
6878 					fileName += "_fp64";
6879 				}
6880 				else
6881 				{
6882 					requirements.push_back("Float16Int8Features.shaderFloat16");
6883 					fileName += "_fp16";
6884 
6885 					if (ctx.isPackFloat16b == true)
6886 					{
6887 						fileName += "_32bit";
6888 					}
6889 					else
6890 					{
6891 						requirements.push_back("Storage16BitFeatures.storageBuffer16BitAccess");
6892 					}
6893 				}
6894 			}
6895 
6896 			requirements.push_back("VK_KHR_16bit_storage");
6897 			requirements.push_back("VK_KHR_storage_buffer_storage_class");
6898 
6899 			group->addChild(cts_amber::createAmberTestCase(ctx.testContext, "mat3", "Square matrix 3x3 precision tests", dataDir, fileName + "_mat_3x3.amber", requirements));
6900 			group->addChild(cts_amber::createAmberTestCase(ctx.testContext, "mat4", "Square matrix 4x4 precision tests", dataDir, fileName + "_mat_4x4.amber", requirements));
6901 		}
6902 
6903 		return MovePtr<TestNode>(group);
6904 	}
6905 
getFunc(void) const6906 	const FuncBase&		getFunc			(void) const { return instance<GenF<2> >(); }
6907 };
6908 
6909 template <template <int, int, class> class GenF, typename T>
6910 class MatrixFuncCaseFactory : public FuncCaseFactory
6911 {
6912 public:
createCase(const CaseContext & ctx) const6913 	MovePtr<TestNode>	createCase		(const CaseContext& ctx) const
6914 	{
6915 		TestCaseGroup*	const group = new TestCaseGroup(ctx.testContext, ctx.name.c_str(), ctx.name.c_str());
6916 
6917 		this->addCase<2, 2>(ctx, group);
6918 		this->addCase<3, 2>(ctx, group);
6919 		this->addCase<4, 2>(ctx, group);
6920 		this->addCase<2, 3>(ctx, group);
6921 		this->addCase<3, 3>(ctx, group);
6922 		this->addCase<4, 3>(ctx, group);
6923 		this->addCase<2, 4>(ctx, group);
6924 		this->addCase<3, 4>(ctx, group);
6925 		this->addCase<4, 4>(ctx, group);
6926 
6927 		return MovePtr<TestNode>(group);
6928 	}
6929 
getFunc(void) const6930 	const FuncBase&		getFunc			(void) const { return instance<GenF<2,2, T> >(); }
6931 
6932 private:
6933 	template <int Rows, int Cols>
addCase(const CaseContext & ctx,TestCaseGroup * group) const6934 	void				addCase			(const CaseContext& ctx, TestCaseGroup* group) const
6935 	{
6936 		const char*	const name = dataTypeNameOf<Matrix<float, Rows, Cols> >();
6937 		group->addChild(createFuncCase(ctx, name, instance<GenF<Rows, Cols, T> >()));
6938 	}
6939 };
6940 
6941 template <typename Sig>
6942 class SimpleFuncCaseFactory : public CaseFactory
6943 {
6944 public:
SimpleFuncCaseFactory(const Func<Sig> & func)6945 						SimpleFuncCaseFactory	(const Func<Sig>& func) : m_func(func) {}
6946 
createCase(const CaseContext & ctx) const6947 	MovePtr<TestNode>	createCase				(const CaseContext& ctx) const	{ return MovePtr<TestNode>(createFuncCase(ctx, ctx.name.c_str(), m_func)); }
getName(void) const6948 	string				getName					(void) const					{ return de::toLower(m_func.getName()); }
getDesc(void) const6949 	string				getDesc					(void) const					{ return "Function '" + getName() + "'"; }
6950 
6951 private:
6952 	const Func<Sig>&	m_func;
6953 };
6954 
6955 template <typename F>
createSimpleFuncCaseFactory(void)6956 SharedPtr<SimpleFuncCaseFactory<typename F::Sig> > createSimpleFuncCaseFactory (void)
6957 {
6958 	return SharedPtr<SimpleFuncCaseFactory<typename F::Sig> >(new SimpleFuncCaseFactory<typename F::Sig>(instance<F>()));
6959 }
6960 
6961 class CaseFactories
6962 {
6963 public:
~CaseFactories(void)6964 	virtual											~CaseFactories	(void) {}
6965 	virtual const std::vector<const CaseFactory*>	getFactories	(void) const = 0;
6966 };
6967 
6968 class BuiltinFuncs : public CaseFactories
6969 {
6970 public:
getFactories(void) const6971 	const vector<const CaseFactory*>		getFactories	(void) const
6972 	{
6973 		vector<const CaseFactory*> ret;
6974 
6975 		for (size_t ndx = 0; ndx < m_factories.size(); ++ndx)
6976 			ret.push_back(m_factories[ndx].get());
6977 
6978 		return ret;
6979 	}
6980 
addFactory(SharedPtr<const CaseFactory> fact)6981 	void									addFactory		(SharedPtr<const CaseFactory> fact) { m_factories.push_back(fact); }
6982 
6983 private:
6984 	vector<SharedPtr<const CaseFactory> >	m_factories;
6985 };
6986 
6987 template <typename F>
addScalarFactory(BuiltinFuncs & funcs,string name="",bool modularOp=false)6988 void addScalarFactory (BuiltinFuncs& funcs, string name = "", bool modularOp = false)
6989 {
6990 	if (name.empty())
6991 		name = instance<F>().getName();
6992 
6993 	funcs.addFactory(SharedPtr<const CaseFactory>(new GenFuncCaseFactory<typename F::Sig>(makeVectorizedFuncs<F>(), name, modularOp)));
6994 }
6995 
createBuiltinCases()6996 MovePtr<const CaseFactories> createBuiltinCases ()
6997 {
6998 	MovePtr<BuiltinFuncs>	funcs	(new BuiltinFuncs());
6999 
7000 	// Tests for ES3 builtins
7001 	addScalarFactory<Comparison< Signature<int, float, float> > >(*funcs);
7002 	addScalarFactory<Add< Signature<float, float, float> > >(*funcs);
7003 	addScalarFactory<Sub< Signature<float, float, float> > >(*funcs);
7004 	addScalarFactory<Mul< Signature<float, float, float> > >(*funcs);
7005 	addScalarFactory<Div< Signature<float, float, float> > >(*funcs);
7006 
7007 	addScalarFactory<Radians>(*funcs);
7008 	addScalarFactory<Degrees>(*funcs);
7009 	addScalarFactory<Sin<Signature<float, float> > >(*funcs);
7010 	addScalarFactory<Cos<Signature<float, float> > >(*funcs);
7011 	addScalarFactory<Tan>(*funcs);
7012 
7013 	addScalarFactory<ASin>(*funcs);
7014 	addScalarFactory<ACos>(*funcs);
7015 	addScalarFactory<ATan2< Signature<float, float, float> > >(*funcs, "atan2");
7016 	addScalarFactory<ATan<Signature<float, float> > >(*funcs);
7017 	addScalarFactory<Sinh>(*funcs);
7018 	addScalarFactory<Cosh>(*funcs);
7019 	addScalarFactory<Tanh>(*funcs);
7020 	addScalarFactory<ASinh>(*funcs);
7021 	addScalarFactory<ACosh>(*funcs);
7022 	addScalarFactory<ATanh>(*funcs);
7023 
7024 	addScalarFactory<Pow>(*funcs);
7025 	addScalarFactory<Exp<Signature<float, float> > >(*funcs);
7026 	addScalarFactory<Log< Signature<float, float> > >(*funcs);
7027 	addScalarFactory<Exp2<Signature<float, float> > >(*funcs);
7028 	addScalarFactory<Log2< Signature<float, float> > >(*funcs);
7029 	addScalarFactory<Sqrt32Bit>(*funcs);
7030 	addScalarFactory<InverseSqrt< Signature<float, float> > >(*funcs);
7031 
7032 	addScalarFactory<Abs< Signature<float, float> > >(*funcs);
7033 	addScalarFactory<Sign< Signature<float, float> > >(*funcs);
7034 	addScalarFactory<Floor32Bit>(*funcs);
7035 	addScalarFactory<Trunc32Bit>(*funcs);
7036 	addScalarFactory<Round< Signature<float, float> > >(*funcs);
7037 	addScalarFactory<RoundEven< Signature<float, float> > >(*funcs);
7038 	addScalarFactory<Ceil< Signature<float, float> > >(*funcs);
7039 	addScalarFactory<Fract>(*funcs);
7040 
7041 	addScalarFactory<Mod32Bit>(*funcs, "mod", true);
7042 	addScalarFactory<FRem32Bit>(*funcs);
7043 
7044 	addScalarFactory<Modf32Bit>(*funcs);
7045 	addScalarFactory<ModfStruct32Bit>(*funcs);
7046 	addScalarFactory<Min< Signature<float, float, float> > >(*funcs);
7047 	addScalarFactory<Max< Signature<float, float, float> > >(*funcs);
7048 	addScalarFactory<Clamp< Signature<float, float, float, float> > >(*funcs);
7049 	addScalarFactory<Mix>(*funcs);
7050 	addScalarFactory<Step< Signature<float, float, float> > >(*funcs);
7051 	addScalarFactory<SmoothStep< Signature<float, float, float, float> > >(*funcs);
7052 
7053 	funcs->addFactory(SharedPtr<const CaseFactory>(new TemplateFuncCaseFactory<Length, float>()));
7054 	funcs->addFactory(SharedPtr<const CaseFactory>(new TemplateFuncCaseFactory<Distance, float>()));
7055 	funcs->addFactory(SharedPtr<const CaseFactory>(new TemplateFuncCaseFactory<Dot, float>()));
7056 	funcs->addFactory(createSimpleFuncCaseFactory<Cross>());
7057 	funcs->addFactory(SharedPtr<const CaseFactory>(new TemplateFuncCaseFactory<Normalize, float>()));
7058 	funcs->addFactory(SharedPtr<const CaseFactory>(new TemplateFuncCaseFactory<FaceForward, float>()));
7059 	funcs->addFactory(SharedPtr<const CaseFactory>(new TemplateFuncCaseFactory<Reflect, float>()));
7060 	funcs->addFactory(SharedPtr<const CaseFactory>(new TemplateFuncCaseFactory<Refract, float>()));
7061 
7062 	funcs->addFactory(SharedPtr<const CaseFactory>(new MatrixFuncCaseFactory<MatrixCompMult, float>()));
7063 	funcs->addFactory(SharedPtr<const CaseFactory>(new MatrixFuncCaseFactory<OuterProduct, float>()));
7064 	funcs->addFactory(SharedPtr<const CaseFactory>(new MatrixFuncCaseFactory<Transpose, float>()));
7065 	funcs->addFactory(SharedPtr<const CaseFactory>(new SquareMatrixFuncCaseFactory<Determinant>()));
7066 	funcs->addFactory(SharedPtr<const CaseFactory>(new SquareMatrixFuncCaseFactory<Inverse>()));
7067 
7068 	addScalarFactory<Frexp32Bit>(*funcs);
7069 	addScalarFactory<FrexpStruct32Bit>(*funcs);
7070 	addScalarFactory<LdExp <Signature<float, float, int> > >(*funcs);
7071 	addScalarFactory<Fma  <Signature<float, float, float, float> > >(*funcs);
7072 
7073 	return MovePtr<const CaseFactories>(funcs.release());
7074 }
7075 
createBuiltinDoubleCases()7076 MovePtr<const CaseFactories> createBuiltinDoubleCases ()
7077 {
7078 	MovePtr<BuiltinFuncs>	funcs	(new BuiltinFuncs());
7079 
7080 	// Tests for ES3 builtins
7081 	addScalarFactory<Comparison<Signature<int, double, double>>>(*funcs);
7082 	addScalarFactory<Add<Signature<double, double, double>>>(*funcs);
7083 	addScalarFactory<Sub<Signature<double, double, double>>>(*funcs);
7084 	addScalarFactory<Mul<Signature<double, double, double>>>(*funcs);
7085 	addScalarFactory<Div<Signature<double, double, double>>>(*funcs);
7086 
7087 	// Radians, degrees, sin, cos, tan, asin, acos, atan, sinh, cosh, tanh, asinh, acosh, atanh, atan2, pow, exp, log, exp2 and log2
7088 	// only work with 16-bit and 32-bit floating point types according to the spec.
7089 #if 0
7090 	addScalarFactory<Radians64>(*funcs);
7091 	addScalarFactory<Degrees64>(*funcs);
7092 	addScalarFactory<Sin<Signature<double, double>>>(*funcs);
7093 	addScalarFactory<Cos<Signature<double, double>>>(*funcs);
7094 	addScalarFactory<Tan64Bit>(*funcs);
7095 	addScalarFactory<ASin64Bit>(*funcs);
7096 	addScalarFactory<ACos64Bit>(*funcs);
7097 	addScalarFactory<ATan2<Signature<double, double, double>>>(*funcs, "atan2");
7098 	addScalarFactory<ATan<Signature<double, double>>>(*funcs);
7099 	addScalarFactory<Sinh64Bit>(*funcs);
7100 	addScalarFactory<Cosh64Bit>(*funcs);
7101 	addScalarFactory<Tanh64Bit>(*funcs);
7102 	addScalarFactory<ASinh64Bit>(*funcs);
7103 	addScalarFactory<ACosh64Bit>(*funcs);
7104 	addScalarFactory<ATanh64Bit>(*funcs);
7105 
7106 	addScalarFactory<Pow64>(*funcs);
7107 	addScalarFactory<Exp<Signature<double, double>>>(*funcs);
7108 	addScalarFactory<Log<Signature<double, double>>>(*funcs);
7109 	addScalarFactory<Exp2<Signature<double, double>>>(*funcs);
7110 	addScalarFactory<Log2<Signature<double, double>>>(*funcs);
7111 #endif
7112 	addScalarFactory<Sqrt64Bit>(*funcs);
7113 	addScalarFactory<InverseSqrt<Signature<double, double>>>(*funcs);
7114 
7115 	addScalarFactory<Abs<Signature<double, double>>>(*funcs);
7116 	addScalarFactory<Sign<Signature<double, double>>>(*funcs);
7117 	addScalarFactory<Floor64Bit>(*funcs);
7118 	addScalarFactory<Trunc64Bit>(*funcs);
7119 	addScalarFactory<Round<Signature<double, double>>>(*funcs);
7120 	addScalarFactory<RoundEven<Signature<double, double>>>(*funcs);
7121 	addScalarFactory<Ceil<Signature<double, double>>>(*funcs);
7122 	addScalarFactory<Fract64Bit>(*funcs);
7123 
7124 	addScalarFactory<Mod64Bit>(*funcs, "mod", true);
7125 	addScalarFactory<FRem64Bit>(*funcs);
7126 
7127 	addScalarFactory<Modf64Bit>(*funcs);
7128 	addScalarFactory<ModfStruct64Bit>(*funcs);
7129 	addScalarFactory<Min<Signature<double, double, double>>>(*funcs);
7130 	addScalarFactory<Max<Signature<double, double, double>>>(*funcs);
7131 	addScalarFactory<Clamp<Signature<double, double, double, double>>>(*funcs);
7132 	addScalarFactory<Mix64Bit>(*funcs);
7133 	addScalarFactory<Step<Signature<double, double, double>>>(*funcs);
7134 	addScalarFactory<SmoothStep<Signature<double, double, double, double>>>(*funcs);
7135 
7136 	funcs->addFactory(SharedPtr<const CaseFactory>(new TemplateFuncCaseFactory<Length, double>()));
7137 	funcs->addFactory(SharedPtr<const CaseFactory>(new TemplateFuncCaseFactory<Distance, double>()));
7138 	funcs->addFactory(SharedPtr<const CaseFactory>(new TemplateFuncCaseFactory<Dot, double>()));
7139 	funcs->addFactory(createSimpleFuncCaseFactory<Cross64Bit>());
7140 	funcs->addFactory(SharedPtr<const CaseFactory>(new TemplateFuncCaseFactory<Normalize, double>()));
7141 	funcs->addFactory(SharedPtr<const CaseFactory>(new TemplateFuncCaseFactory<FaceForward, double>()));
7142 	funcs->addFactory(SharedPtr<const CaseFactory>(new TemplateFuncCaseFactory<Reflect, double>()));
7143 	funcs->addFactory(SharedPtr<const CaseFactory>(new TemplateFuncCaseFactory<Refract, double>()));
7144 
7145 	funcs->addFactory(SharedPtr<const CaseFactory>(new MatrixFuncCaseFactory<MatrixCompMult, double>()));
7146 	funcs->addFactory(SharedPtr<const CaseFactory>(new MatrixFuncCaseFactory<OuterProduct, double>()));
7147 	funcs->addFactory(SharedPtr<const CaseFactory>(new MatrixFuncCaseFactory<Transpose, double>()));
7148 	funcs->addFactory(SharedPtr<const CaseFactory>(new SquareMatrixFuncCaseFactory<Determinant64bit>()));
7149 	funcs->addFactory(SharedPtr<const CaseFactory>(new SquareMatrixFuncCaseFactory<Inverse64bit>()));
7150 
7151 	addScalarFactory<Frexp64Bit>(*funcs);
7152 	addScalarFactory<FrexpStruct64Bit>(*funcs);
7153 	addScalarFactory<LdExp<Signature<double, double, int>>>(*funcs);
7154 	addScalarFactory<Fma<Signature<double, double, double, double>>>(*funcs);
7155 
7156 	return MovePtr<const CaseFactories>(funcs.release());
7157 }
7158 
createBuiltinCases16Bit(void)7159 MovePtr<const CaseFactories> createBuiltinCases16Bit(void)
7160 {
7161 	MovePtr<BuiltinFuncs>	funcs(new BuiltinFuncs());
7162 
7163 	addScalarFactory<Comparison< Signature<int, deFloat16, deFloat16> > >(*funcs);
7164 	addScalarFactory<Add< Signature<deFloat16, deFloat16, deFloat16> > >(*funcs);
7165 	addScalarFactory<Sub< Signature<deFloat16, deFloat16, deFloat16> > >(*funcs);
7166 	addScalarFactory<Mul< Signature<deFloat16, deFloat16, deFloat16> > >(*funcs);
7167 	addScalarFactory<Div< Signature<deFloat16, deFloat16, deFloat16> > >(*funcs);
7168 
7169 	addScalarFactory<Radians16>(*funcs);
7170 	addScalarFactory<Degrees16>(*funcs);
7171 
7172 	addScalarFactory<Sin<Signature<deFloat16, deFloat16> > >(*funcs);
7173 	addScalarFactory<Cos<Signature<deFloat16, deFloat16> > >(*funcs);
7174 	addScalarFactory<Tan16Bit>(*funcs);
7175 	addScalarFactory<ASin16Bit>(*funcs);
7176 	addScalarFactory<ACos16Bit>(*funcs);
7177 	addScalarFactory<ATan2< Signature<deFloat16, deFloat16, deFloat16> > >(*funcs, "atan2");
7178 	addScalarFactory<ATan<Signature<deFloat16, deFloat16> > >(*funcs);
7179 
7180 	addScalarFactory<Sinh16Bit>(*funcs);
7181 	addScalarFactory<Cosh16Bit>(*funcs);
7182 	addScalarFactory<Tanh16Bit>(*funcs);
7183 	addScalarFactory<ASinh16Bit>(*funcs);
7184 	addScalarFactory<ACosh16Bit>(*funcs);
7185 	addScalarFactory<ATanh16Bit>(*funcs);
7186 
7187 	addScalarFactory<Pow16>(*funcs);
7188 	addScalarFactory<Exp< Signature<deFloat16, deFloat16> > >(*funcs);
7189 	addScalarFactory<Log< Signature<deFloat16, deFloat16> > >(*funcs);
7190 	addScalarFactory<Exp2< Signature<deFloat16, deFloat16> > >(*funcs);
7191 	addScalarFactory<Log2< Signature<deFloat16, deFloat16> > >(*funcs);
7192 	addScalarFactory<Sqrt16Bit>(*funcs);
7193 	addScalarFactory<InverseSqrt16Bit>(*funcs);
7194 
7195 	addScalarFactory<Abs< Signature<deFloat16, deFloat16> > >(*funcs);
7196 	addScalarFactory<Sign< Signature<deFloat16, deFloat16> > >(*funcs);
7197 	addScalarFactory<Floor16Bit>(*funcs);
7198 	addScalarFactory<Trunc16Bit>(*funcs);
7199 	addScalarFactory<Round< Signature<deFloat16, deFloat16> > >(*funcs);
7200 	addScalarFactory<RoundEven< Signature<deFloat16, deFloat16> > >(*funcs);
7201 	addScalarFactory<Ceil< Signature<deFloat16, deFloat16> > >(*funcs);
7202 	addScalarFactory<Fract16Bit>(*funcs);
7203 
7204 	addScalarFactory<Mod16Bit>(*funcs, "mod", true);
7205 	addScalarFactory<FRem16Bit>(*funcs);
7206 
7207 	addScalarFactory<Modf16Bit>(*funcs);
7208 	addScalarFactory<ModfStruct16Bit>(*funcs);
7209 	addScalarFactory<Min< Signature<deFloat16, deFloat16, deFloat16> > >(*funcs);
7210 	addScalarFactory<Max< Signature<deFloat16, deFloat16, deFloat16> > >(*funcs);
7211 	addScalarFactory<Clamp< Signature<deFloat16, deFloat16, deFloat16, deFloat16> > >(*funcs);
7212 	addScalarFactory<Mix16Bit>(*funcs);
7213 	addScalarFactory<Step< Signature<deFloat16, deFloat16, deFloat16> > >(*funcs);
7214 	addScalarFactory<SmoothStep< Signature<deFloat16, deFloat16, deFloat16, deFloat16> > >(*funcs);
7215 
7216 	funcs->addFactory(SharedPtr<const CaseFactory>(new TemplateFuncCaseFactory<Length, deFloat16>()));
7217 	funcs->addFactory(SharedPtr<const CaseFactory>(new TemplateFuncCaseFactory<Distance, deFloat16>()));
7218 	funcs->addFactory(SharedPtr<const CaseFactory>(new TemplateFuncCaseFactory<Dot, deFloat16>()));
7219 	funcs->addFactory(createSimpleFuncCaseFactory<Cross16Bit>());
7220 	funcs->addFactory(SharedPtr<const CaseFactory>(new TemplateFuncCaseFactory<Normalize, deFloat16>()));
7221 	funcs->addFactory(SharedPtr<const CaseFactory>(new TemplateFuncCaseFactory<FaceForward, deFloat16>()));
7222 	funcs->addFactory(SharedPtr<const CaseFactory>(new TemplateFuncCaseFactory<Reflect, deFloat16>()));
7223 	funcs->addFactory(SharedPtr<const CaseFactory>(new TemplateFuncCaseFactory<Refract, deFloat16>()));
7224 
7225 	funcs->addFactory(SharedPtr<const CaseFactory>(new MatrixFuncCaseFactory<OuterProduct, deFloat16>()));
7226 	funcs->addFactory(SharedPtr<const CaseFactory>(new MatrixFuncCaseFactory<Transpose, deFloat16>()));
7227 	funcs->addFactory(SharedPtr<const CaseFactory>(new SquareMatrixFuncCaseFactory<Determinant16bit>()));
7228 	funcs->addFactory(SharedPtr<const CaseFactory>(new SquareMatrixFuncCaseFactory<Inverse16bit>()));
7229 
7230 	addScalarFactory<Frexp16Bit>(*funcs);
7231 	addScalarFactory<FrexpStruct16Bit>(*funcs);
7232 	addScalarFactory<LdExp <Signature<deFloat16, deFloat16, int> > >(*funcs);
7233 	addScalarFactory<Fma <Signature<deFloat16, deFloat16, deFloat16, deFloat16> > >(*funcs);
7234 
7235 	return MovePtr<const CaseFactories>(funcs.release());
7236 }
7237 
createFuncGroup(TestContext & ctx,const CaseFactory & factory,int numRandoms)7238 TestCaseGroup* createFuncGroup (TestContext& ctx, const CaseFactory& factory, int numRandoms)
7239 {
7240 	TestCaseGroup* const	group	= new TestCaseGroup(ctx, factory.getName().c_str(), factory.getDesc().c_str());
7241 	const FloatFormat		highp		(-126, 127, 23, true,
7242 										 tcu::MAYBE,	// subnormals
7243 										 tcu::YES,		// infinities
7244 										 tcu::MAYBE);	// NaN
7245 	const FloatFormat       mediump		(-14, 13, 10, false, tcu::MAYBE);
7246 
7247 	for (int precNdx = glu::PRECISION_MEDIUMP; precNdx < glu::PRECISION_LAST; ++precNdx)
7248 	{
7249 		const Precision		precision	= Precision(precNdx);
7250 		const string		precName	(glu::getPrecisionName(precision));
7251 		const FloatFormat&	fmt			= precNdx == glu::PRECISION_MEDIUMP ? mediump : highp;
7252 
7253 		const CaseContext	caseCtx		(precName, ctx, fmt, highp, precision, glu::SHADERTYPE_COMPUTE, numRandoms);
7254 
7255 		group->addChild(factory.createCase(caseCtx).release());
7256 	}
7257 
7258 	return group;
7259 }
7260 
createFuncGroupDouble(TestContext & ctx,const CaseFactory & factory,int numRandoms)7261 TestCaseGroup* createFuncGroupDouble (TestContext& ctx, const CaseFactory& factory, int numRandoms)
7262 {
7263 	TestCaseGroup* const	group		= new TestCaseGroup(ctx, factory.getName().c_str(), factory.getDesc().c_str());
7264 	const Precision			precision	= Precision(glu::PRECISION_LAST);
7265 	const FloatFormat		highp		(-1022, 1023, 52, true,
7266 										 tcu::MAYBE,	// subnormals
7267 										 tcu::YES,		// infinities
7268 										 tcu::MAYBE);	// NaN
7269 
7270 	PrecisionTestFeatures precisionTestFeatures = PRECISION_TEST_FEATURES_64BIT_SHADER_FLOAT;
7271 
7272 	const CaseContext caseCtx("compute", ctx, highp, highp, precision, glu::SHADERTYPE_COMPUTE, numRandoms, precisionTestFeatures, false, true);
7273 	group->addChild(factory.createCase(caseCtx).release());
7274 
7275 	return group;
7276 }
7277 
createFuncGroup16Bit(TestContext & ctx,const CaseFactory & factory,int numRandoms,bool storage32)7278 TestCaseGroup* createFuncGroup16Bit(TestContext& ctx, const CaseFactory& factory, int numRandoms, bool storage32)
7279 {
7280 	TestCaseGroup* const	group = new TestCaseGroup(ctx, factory.getName().c_str(), factory.getDesc().c_str());
7281 	const Precision			precision = Precision(glu::PRECISION_LAST);
7282 	const FloatFormat		float16	(-14, 15, 10, true, tcu::MAYBE);
7283 
7284 	PrecisionTestFeatures precisionTestFeatures = PRECISION_TEST_FEATURES_16BIT_SHADER_FLOAT;
7285 	if (!storage32)
7286 		precisionTestFeatures |= PRECISION_TEST_FEATURES_16BIT_UNIFORM_AND_STORAGE_BUFFER_ACCESS;
7287 
7288 	const CaseContext caseCtx("compute", ctx, float16, float16, precision, glu::SHADERTYPE_COMPUTE, numRandoms, precisionTestFeatures, storage32);
7289 	group->addChild(factory.createCase(caseCtx).release());
7290 
7291 	return group;
7292 }
7293 
7294 const int defRandoms	= 16384;
7295 
addBuiltinPrecisionTests(TestContext & ctx,TestCaseGroup & dstGroup,const bool test16Bit=false,const bool storage32Bit=false)7296 void addBuiltinPrecisionTests (TestContext&				ctx,
7297 								TestCaseGroup&			dstGroup,
7298 								const bool				test16Bit = false,
7299 								const bool				storage32Bit = false)
7300 {
7301 	const int userRandoms	= ctx.getCommandLine().getTestIterationCount();
7302 	const int numRandoms	= userRandoms > 0 ? userRandoms : defRandoms;
7303 
7304 	MovePtr<const CaseFactories> cases = (test16Bit && !storage32Bit)	? createBuiltinCases16Bit()
7305 																		: createBuiltinCases();
7306 	for (size_t ndx = 0; ndx < cases->getFactories().size(); ++ndx)
7307 	{
7308 		if (!test16Bit)
7309 			dstGroup.addChild(createFuncGroup(ctx, *cases->getFactories()[ndx], numRandoms));
7310 		else
7311 			dstGroup.addChild(createFuncGroup16Bit(ctx, *cases->getFactories()[ndx], numRandoms, storage32Bit));
7312 	}
7313 }
7314 
addBuiltinPrecisionDoubleTests(TestContext & ctx,TestCaseGroup & dstGroup)7315 void addBuiltinPrecisionDoubleTests (TestContext&		ctx,
7316 									 TestCaseGroup&		dstGroup)
7317 {
7318 	const int userRandoms	= ctx.getCommandLine().getTestIterationCount();
7319 	const int numRandoms	= userRandoms > 0 ? userRandoms : defRandoms;
7320 
7321 	MovePtr<const CaseFactories> cases = createBuiltinDoubleCases();
7322 	for (size_t ndx = 0; ndx < cases->getFactories().size(); ++ndx)
7323 	{
7324 		dstGroup.addChild(createFuncGroupDouble(ctx, *cases->getFactories()[ndx], numRandoms));
7325 	}
7326 }
7327 
BuiltinPrecisionTests(tcu::TestContext & testCtx)7328 BuiltinPrecisionTests::BuiltinPrecisionTests (tcu::TestContext& testCtx)
7329 	: tcu::TestCaseGroup(testCtx, "precision", "Builtin precision tests")
7330 {
7331 }
7332 
~BuiltinPrecisionTests(void)7333 BuiltinPrecisionTests::~BuiltinPrecisionTests (void)
7334 {
7335 }
7336 
init(void)7337 void BuiltinPrecisionTests::init (void)
7338 {
7339 	addBuiltinPrecisionTests(m_testCtx, *this);
7340 }
7341 
BuiltinPrecisionDoubleTests(tcu::TestContext & testCtx)7342 BuiltinPrecisionDoubleTests::BuiltinPrecisionDoubleTests (tcu::TestContext& testCtx)
7343 	: tcu::TestCaseGroup(testCtx, "precision_double", "Builtin precision tests")
7344 {
7345 }
7346 
~BuiltinPrecisionDoubleTests(void)7347 BuiltinPrecisionDoubleTests::~BuiltinPrecisionDoubleTests (void)
7348 {
7349 }
7350 
init(void)7351 void BuiltinPrecisionDoubleTests::init (void)
7352 {
7353 	addBuiltinPrecisionDoubleTests(m_testCtx, *this);
7354 }
7355 
BuiltinPrecision16BitTests(tcu::TestContext & testCtx)7356 BuiltinPrecision16BitTests::BuiltinPrecision16BitTests (tcu::TestContext& testCtx)
7357 	: tcu::TestCaseGroup(testCtx, "precision_fp16_storage16b", "Builtin precision tests")
7358 {
7359 }
7360 
~BuiltinPrecision16BitTests(void)7361 BuiltinPrecision16BitTests::~BuiltinPrecision16BitTests (void)
7362 {
7363 }
7364 
init(void)7365 void BuiltinPrecision16BitTests::init (void)
7366 {
7367 	addBuiltinPrecisionTests(m_testCtx, *this, true);
7368 }
7369 
BuiltinPrecision16Storage32BitTests(tcu::TestContext & testCtx)7370 BuiltinPrecision16Storage32BitTests::BuiltinPrecision16Storage32BitTests(tcu::TestContext& testCtx)
7371 	: tcu::TestCaseGroup(testCtx, "precision_fp16_storage32b", "Builtin precision tests")
7372 {
7373 }
7374 
~BuiltinPrecision16Storage32BitTests(void)7375 BuiltinPrecision16Storage32BitTests::~BuiltinPrecision16Storage32BitTests(void)
7376 {
7377 }
7378 
init(void)7379 void BuiltinPrecision16Storage32BitTests::init(void)
7380 {
7381 	addBuiltinPrecisionTests(m_testCtx, *this, true, true);
7382 }
7383 
7384 } // shaderexecutor
7385 } // vkt
7386