• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*-------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2018 The Khronos Group Inc.
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  *      http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  *
19  *//*!
20  * \file
21  * \brief VK_KHR_shader_float_controls tests.
22  *//*--------------------------------------------------------------------*/
23 
24 
25 #include "vktSpvAsmFloatControlsTests.hpp"
26 #include "vktSpvAsmComputeShaderCase.hpp"
27 #include "vktSpvAsmGraphicsShaderTestUtil.hpp"
28 #include "vktTestGroupUtil.hpp"
29 #include "tcuFloat.hpp"
30 #include "tcuFloatFormat.hpp"
31 #include "tcuStringTemplate.hpp"
32 #include "deUniquePtr.hpp"
33 #include "deFloat16.h"
34 #include "vkQueryUtil.hpp"
35 #include "vkRefUtil.hpp"
36 #include <cstring>
37 #include <vector>
38 #include <limits>
39 #include <fenv.h>
40 
41 namespace vkt
42 {
43 namespace SpirVAssembly
44 {
45 
46 namespace
47 {
48 
49 using namespace std;
50 using namespace tcu;
51 
52 enum FloatType
53 {
54 	FP16 = 0,
55 	FP32,
56 	FP64
57 };
58 
59 enum FloatUsage
60 {
61 	// If the float type is 16bit, then the use of the type is supported by
62 	// VK_KHR_16bit_storage.
63 	FLOAT_STORAGE_ONLY = 0,
64 	// Use of the float type goes beyond VK_KHR_16bit_storage.
65 	FLOAT_ARITHMETIC
66 };
67 
68 enum FloatStatementUsageBits
69 {
70 	B_STATEMENT_USAGE_ARGS_CONST_FLOAT		= (1<<0 ),
71 	B_STATEMENT_USAGE_ARGS_CONST_FP16		= (1<<1 ),
72 	B_STATEMENT_USAGE_ARGS_CONST_FP32		= (1<<2 ),
73 	B_STATEMENT_USAGE_ARGS_CONST_FP64		= (1<<3 ),
74 	B_STATEMENT_USAGE_TYPES_TYPE_FLOAT		= (1<<4 ),
75 	B_STATEMENT_USAGE_TYPES_TYPE_FP16		= (1<<5 ),
76 	B_STATEMENT_USAGE_TYPES_TYPE_FP32		= (1<<6 ),
77 	B_STATEMENT_USAGE_TYPES_TYPE_FP64		= (1<<7 ),
78 	B_STATEMENT_USAGE_CONSTS_TYPE_FLOAT		= (1<<8 ),
79 	B_STATEMENT_USAGE_CONSTS_TYPE_FP16		= (1<<9 ),
80 	B_STATEMENT_USAGE_CONSTS_TYPE_FP32		= (1<<10),
81 	B_STATEMENT_USAGE_CONSTS_TYPE_FP64		= (1<<11),
82 	B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT	= (1<<12),
83 	B_STATEMENT_USAGE_COMMANDS_CONST_FP16	= (1<<13),
84 	B_STATEMENT_USAGE_COMMANDS_CONST_FP32	= (1<<14),
85 	B_STATEMENT_USAGE_COMMANDS_CONST_FP64	= (1<<15),
86 	B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT	= (1<<16),
87 	B_STATEMENT_USAGE_COMMANDS_TYPE_FP16	= (1<<17),
88 	B_STATEMENT_USAGE_COMMANDS_TYPE_FP32	= (1<<18),
89 	B_STATEMENT_USAGE_COMMANDS_TYPE_FP64	= (1<<19),
90 };
91 
92 typedef deUint32 FloatStatementUsageFlags;
93 
94 // Enum containing float behaviors that its possible to test.
95 enum BehaviorFlagBits
96 {
97 	B_DENORM_PRESERVE	= 0x00000001,		// DenormPreserve
98 	B_DENORM_FLUSH		= 0x00000002,		// DenormFlushToZero
99 	B_ZIN_PRESERVE		= 0x00000004,		// SignedZeroInfNanPreserve
100 	B_RTE_ROUNDING		= 0x00000008,		// RoundingModeRTE
101 	B_RTZ_ROUNDING		= 0x00000010		// RoundingModeRTZ
102 };
103 
104 typedef deUint32 BehaviorFlags;
105 
106 // Codes for all float values used in tests as arguments and operation results
107 // This approach allows to replace values with different types reducing complexity of the tests implementation
108 enum ValueId
109 {
110 	// common values used as both arguments and results
111 	V_UNUSED = 0,		//  used to mark arguments that are not used in operation
112 	V_MINUS_INF,		//    or results of tests cases that should be skipped
113 	V_MINUS_ONE,		// -1.0
114 	V_MINUS_ZERO,		// -0.0
115 	V_ZERO,				//  0.0
116 	V_HALF,				//  0.5
117 	V_ONE,				//  1.0
118 	V_INF,
119 	V_DENORM,
120 	V_NAN,
121 
122 	// arguments for rounding mode tests - used only when arguments are passed from input
123 	V_ADD_ARG_A,
124 	V_ADD_ARG_B,
125 	V_SUB_ARG_A,
126 	V_SUB_ARG_B,
127 	V_MUL_ARG_A,
128 	V_MUL_ARG_B,
129 	V_DOT_ARG_A,
130 	V_DOT_ARG_B,
131 
132 	// arguments of conversion operations - used only when arguments are passed from input
133 	V_CONV_FROM_FP32_ARG,
134 	V_CONV_FROM_FP64_ARG,
135 
136 	// arguments of rounding operations
137 	V_ADD_RTZ_RESULT,
138 	V_ADD_RTE_RESULT,
139 	V_SUB_RTZ_RESULT,
140 	V_SUB_RTE_RESULT,
141 	V_MUL_RTZ_RESULT,
142 	V_MUL_RTE_RESULT,
143 	V_DOT_RTZ_RESULT,
144 	V_DOT_RTE_RESULT,
145 
146 	// non comon results of some operation - corner cases
147 	V_MINUS_ONE_OR_CLOSE,			// value used only fur fp16 subtraction result of preserved denorm and one
148 	V_PI_DIV_2,
149 	V_ZERO_OR_MINUS_ZERO,			// both +0 and -0 are accepted
150 	V_ZERO_OR_ONE,					// both +0 and 1 are accepted
151 	V_ZERO_OR_FP16_DENORM_TO_FP32,	// both 0 and fp32 representation of fp16 denorm are accepted
152 	V_ZERO_OR_FP16_DENORM_TO_FP64,
153 	V_ZERO_OR_FP32_DENORM_TO_FP64,
154 	V_DENORM_TIMES_TWO,
155 	V_DEGREES_DENORM,
156 	V_TRIG_ONE,						// 1.0 trigonometric operations, including precision margin
157 	V_MINUS_INF_OR_LOG_DENORM,
158 	V_MINUS_INF_OR_LOG2_DENORM,
159 	V_ZERO_OR_SQRT_DENORM,
160 	V_INF_OR_INV_SQRT_DENORM,
161 
162 	//results of conversion operations
163 	V_CONV_TO_FP16_RTZ_RESULT,
164 	V_CONV_TO_FP16_RTE_RESULT,
165 	V_CONV_TO_FP32_RTZ_RESULT,
166 	V_CONV_TO_FP32_RTE_RESULT,
167 	V_CONV_DENORM_SMALLER,			// used e.g. when converting fp16 denorm to fp32
168 	V_CONV_DENORM_BIGGER,
169 };
170 
171 // Enum containing all tested operatios. Operations are defined in generic way so that
172 // they can be used to generate tests operating on arguments with different values of
173 // specified float type.
174 enum OperationId
175 {
176 	// spir-v unary operations
177 	O_NEGATE = 0,
178 	O_COMPOSITE,
179 	O_COMPOSITE_INS,
180 	O_COPY,
181 	O_D_EXTRACT,
182 	O_D_INSERT,
183 	O_SHUFFLE,
184 	O_TRANSPOSE,
185 	O_CONV_FROM_FP16,
186 	O_CONV_FROM_FP32,
187 	O_CONV_FROM_FP64,
188 	O_SCONST_CONV_FROM_FP32_TO_FP16,
189 	O_SCONST_CONV_FROM_FP64_TO_FP32,
190 	O_SCONST_CONV_FROM_FP64_TO_FP16,
191 	O_RETURN_VAL,
192 
193 	// spir-v binary operations
194 	O_ADD,
195 	O_SUB,
196 	O_MUL,
197 	O_DIV,
198 	O_REM,
199 	O_MOD,
200 	O_PHI,
201 	O_SELECT,
202 	O_DOT,
203 	O_VEC_MUL_S,
204 	O_VEC_MUL_M,
205 	O_MAT_MUL_S,
206 	O_MAT_MUL_V,
207 	O_MAT_MUL_M,
208 	O_OUT_PROD,
209 	O_ORD_EQ,
210 	O_UORD_EQ,
211 	O_ORD_NEQ,
212 	O_UORD_NEQ,
213 	O_ORD_LS,
214 	O_UORD_LS,
215 	O_ORD_GT,
216 	O_UORD_GT,
217 	O_ORD_LE,
218 	O_UORD_LE,
219 	O_ORD_GE,
220 	O_UORD_GE,
221 
222 	// glsl unary operations
223 	O_ROUND,
224 	O_ROUND_EV,
225 	O_TRUNC,
226 	O_ABS,
227 	O_SIGN,
228 	O_FLOOR,
229 	O_CEIL,
230 	O_FRACT,
231 	O_RADIANS,
232 	O_DEGREES,
233 	O_SIN,
234 	O_COS,
235 	O_TAN,
236 	O_ASIN,
237 	O_ACOS,
238 	O_ATAN,
239 	O_SINH,
240 	O_COSH,
241 	O_TANH,
242 	O_ASINH,
243 	O_ACOSH,
244 	O_ATANH,
245 	O_EXP,
246 	O_LOG,
247 	O_EXP2,
248 	O_LOG2,
249 	O_SQRT,
250 	O_INV_SQRT,
251 	O_MODF,
252 	O_MODF_ST,
253 	O_FREXP,
254 	O_FREXP_ST,
255 	O_LENGHT,
256 	O_NORMALIZE,
257 	O_REFLECT,
258 	O_REFRACT,
259 	O_MAT_DET,
260 	O_MAT_INV,
261 	O_PH_DENORM,	// PackHalf2x16
262 	O_UPH_DENORM,
263 	O_PD_DENORM,	// PackDouble2x32
264 	O_UPD_DENORM_FLUSH,
265 	O_UPD_DENORM_PRESERVE,
266 
267 	// glsl binary operations
268 	O_ATAN2,
269 	O_POW,
270 	O_MIX,
271 	O_FMA,
272 	O_MIN,
273 	O_MAX,
274 	O_CLAMP,
275 	O_STEP,
276 	O_SSTEP,
277 	O_DIST,
278 	O_CROSS,
279 	O_FACE_FWD,
280 	O_NMIN,
281 	O_NMAX,
282 	O_NCLAMP,
283 
284 	O_ORTE_ROUND,
285 	O_ORTZ_ROUND
286 };
287 
288 // Structures storing data required to test DenormPreserve and DenormFlushToZero modes.
289 // Operations are separated into binary and unary lists because binary operations can be tested with
290 // two attributes and thus denorms can be tested in combination with value, denorm, inf and nan.
291 // Unary operations are only tested with denorms.
292 struct BinaryCase
293 {
294 	OperationId	operationId;
295 	ValueId		opVarResult;
296 	ValueId		opDenormResult;
297 	ValueId		opInfResult;
298 	ValueId		opNanResult;
299 };
300 struct UnaryCase
301 {
302 	OperationId	operationId;
303 	ValueId		result;
304 };
305 
306 // Function replacing all occurrences of substring with string passed in last parameter.
replace(string str,const string & from,const string & to)307 string replace(string str, const string& from, const string& to)
308 {
309 	// to keep spir-v code clean and easier to read parts of it are processed
310 	// with this method instead of StringTemplate; main usage of this method is the
311 	// replacement of "float_" with "f16_", "f32_" or "f64_" depending on test case
312 
313 	size_t start_pos = 0;
314 	while((start_pos = str.find(from, start_pos)) != std::string::npos)
315 	{
316 		str.replace(start_pos, from.length(), to);
317 		start_pos += to.length();
318 	}
319 	return str;
320 }
321 
322 // Structure used to perform bits conversion int type <-> float type.
323 template<typename FLOAT_TYPE, typename UINT_TYPE>
324 struct RawConvert
325 {
326 	union Value
327 	{
328 		FLOAT_TYPE	fp;
329 		UINT_TYPE	ui;
330 	};
331 };
332 
333 // Traits used to get int type that can store equivalent float type.
334 template<typename FLOAT_TYPE>
335 struct GetCoresponding
336 {
337 	typedef deUint16 uint_type;
338 };
339 template<>
340 struct GetCoresponding<float>
341 {
342 	typedef deUint32 uint_type;
343 };
344 template<>
345 struct GetCoresponding<double>
346 {
347 	typedef deUint64 uint_type;
348 };
349 
350 // All values used for arguments and operation results are stored in single map.
351 // Each float type (fp16, fp32, fp64) has its own map that is used during
352 // test setup and during verification. TypeValuesBase is interface to that map.
353 class TypeValuesBase
354 {
355 public:
356 	TypeValuesBase();
357 	virtual ~TypeValuesBase() = default;
358 
359 	virtual BufferSp	constructInputBuffer	(const ValueId* twoArguments) const = 0;
360 	virtual BufferSp	constructOutputBuffer	(ValueId result) const = 0;
361 	virtual void		fillInputData			(const ValueId* twoArguments, vector<deUint8>& bufferData, deUint32& offset) const = 0;
362 
363 protected:
364 	const double	pi;
365 };
366 
TypeValuesBase()367 TypeValuesBase::TypeValuesBase()
368 	: pi(3.14159265358979323846)
369 {
370 }
371 
372 typedef de::SharedPtr<TypeValuesBase> TypeValuesSP;
373 
374 template <typename FLOAT_TYPE>
375 class TypeValues: public TypeValuesBase
376 {
377 public:
378 	TypeValues();
379 
380 	BufferSp	constructInputBuffer	(const ValueId* twoArguments) const override;
381 	BufferSp	constructOutputBuffer	(ValueId result) const override;
382 	void		fillInputData			(const ValueId* twoArguments, vector<deUint8>& bufferData, deUint32& offset) const override;
383 
384 	FLOAT_TYPE getValue(ValueId id) const;
385 
386 	template <typename UINT_TYPE>
387 	FLOAT_TYPE exactByteEquivalent(UINT_TYPE byteValue) const;
388 
389 private:
390 	typedef map<ValueId, FLOAT_TYPE> ValueMap;
391 	ValueMap m_valueIdToFloatType;
392 };
393 
394 template <typename FLOAT_TYPE>
constructInputBuffer(const ValueId * twoArguments) const395 BufferSp TypeValues<FLOAT_TYPE>::constructInputBuffer(const ValueId* twoArguments) const
396 {
397 	std::vector<FLOAT_TYPE> inputData(2);
398 	inputData[0] = m_valueIdToFloatType.at(twoArguments[0]);
399 	inputData[1] = m_valueIdToFloatType.at(twoArguments[1]);
400 	return BufferSp(new Buffer<FLOAT_TYPE>(inputData));
401 }
402 
403 template <typename FLOAT_TYPE>
constructOutputBuffer(ValueId result) const404 BufferSp TypeValues<FLOAT_TYPE>::constructOutputBuffer(ValueId result) const
405 {
406 	// note: we are not doing maping here, ValueId is directly saved in
407 	// float type in order to be able to retireve it during verification
408 
409 	typedef typename GetCoresponding<FLOAT_TYPE>::uint_type uint_t;
410 	uint_t value = static_cast<uint_t>(result);
411 
412 	std::vector<FLOAT_TYPE> outputData(1, exactByteEquivalent<uint_t>(value));
413 	return BufferSp(new Buffer<FLOAT_TYPE>(outputData));
414 }
415 
416 template <typename FLOAT_TYPE>
fillInputData(const ValueId * twoArguments,vector<deUint8> & bufferData,deUint32 & offset) const417 void TypeValues<FLOAT_TYPE>::fillInputData(const ValueId* twoArguments, vector<deUint8>& bufferData, deUint32& offset) const
418 {
419 	deUint32 typeSize = sizeof(FLOAT_TYPE);
420 
421 	FLOAT_TYPE argA = getValue(twoArguments[0]);
422 	deMemcpy(&bufferData[offset], &argA, typeSize);
423 	offset += typeSize;
424 
425 	FLOAT_TYPE argB = getValue(twoArguments[1]);
426 	deMemcpy(&bufferData[offset], &argB, typeSize);
427 	offset += typeSize;
428 }
429 
430 template <typename FLOAT_TYPE>
getValue(ValueId id) const431 FLOAT_TYPE TypeValues<FLOAT_TYPE>::getValue(ValueId id) const
432 {
433 	return m_valueIdToFloatType.at(id);
434 }
435 
436 template <typename FLOAT_TYPE>
437 template <typename UINT_TYPE>
exactByteEquivalent(UINT_TYPE byteValue) const438 FLOAT_TYPE TypeValues<FLOAT_TYPE>::exactByteEquivalent(UINT_TYPE byteValue) const
439 {
440 	typename RawConvert<FLOAT_TYPE, UINT_TYPE>::Value value;
441 	value.ui = byteValue;
442 	return value.fp;
443 }
444 
445 template <>
TypeValues()446 TypeValues<deFloat16>::TypeValues()
447 	: TypeValuesBase()
448 {
449 	// NOTE: when updating entries in m_valueIdToFloatType make sure to
450 	// update also valueIdToSnippetArgMap defined in updateSpirvSnippets()
451 	ValueMap& vm = m_valueIdToFloatType;
452 	vm[V_UNUSED]			= deFloat32To16(0.0f);
453 	vm[V_MINUS_INF]			= 0xfc00;
454 	vm[V_MINUS_ONE]			= deFloat32To16(-1.0f);
455 	vm[V_MINUS_ZERO]		= 0x8000;
456 	vm[V_ZERO]				= 0x0000;
457 	vm[V_HALF]				= deFloat32To16(0.5f);
458 	vm[V_ONE]				= deFloat32To16(1.0f);
459 	vm[V_INF]				= 0x7c00;
460 	vm[V_DENORM]			= 0x03f0; // this value should be the same as the result of denormBase - epsilon
461 	vm[V_NAN]				= 0x7cf0;
462 
463 	vm[V_PI_DIV_2]			= 0x3e48;
464 	vm[V_DENORM_TIMES_TWO]	= 0x07e0;
465 	vm[V_DEGREES_DENORM]	= 0x1b0c;
466 
467 	vm[V_ADD_ARG_A]					= 0x3c03;
468 	vm[V_ADD_ARG_B]					= vm[V_ONE];
469 	vm[V_SUB_ARG_A]					= vm[V_ADD_ARG_A];
470 	vm[V_SUB_ARG_B]					= 0x4203;
471 	vm[V_MUL_ARG_A]					= vm[V_ADD_ARG_A];
472 	vm[V_MUL_ARG_B]					= 0x1900;
473 	vm[V_DOT_ARG_A]					= vm[V_ADD_ARG_A];
474 	vm[V_DOT_ARG_B]					= vm[V_MUL_ARG_B];
475 	vm[V_CONV_FROM_FP32_ARG]		= vm[V_UNUSED];
476 	vm[V_CONV_FROM_FP64_ARG]		= vm[V_UNUSED];
477 
478 	vm[V_ADD_RTZ_RESULT]			= 0x4001;	// deFloat16Add(vm[V_ADD_ARG_A], vm[V_ADD_ARG_B], rtz)
479 	vm[V_SUB_RTZ_RESULT]			= 0xc001;	// deFloat16Sub(vm[V_SUB_ARG_A], vm[V_SUB_ARG_B], rtz)
480 	vm[V_MUL_RTZ_RESULT]			= 0x1903;	// deFloat16Mul(vm[V_MUL_ARG_A], vm[V_MUL_ARG_B], rtz)
481 	vm[V_DOT_RTZ_RESULT]			= 0x1d03;
482 	vm[V_CONV_TO_FP16_RTZ_RESULT]	= deFloat32To16Round(1.22334445f, DE_ROUNDINGMODE_TO_ZERO);
483 	vm[V_CONV_TO_FP32_RTZ_RESULT]	= vm[V_UNUSED];
484 
485 	vm[V_ADD_RTE_RESULT]			= 0x4002;	// deFloat16Add(vm[V_ADD_ARG_A], vm[V_ADD_ARG_B], rte)
486 	vm[V_SUB_RTE_RESULT]			= 0xc002;	// deFloat16Sub(vm[V_SUB_ARG_A], vm[V_SUB_ARG_B], rte)
487 	vm[V_MUL_RTE_RESULT]			= 0x1904;	// deFloat16Mul(vm[V_MUL_ARG_A], vm[V_MUL_ARG_B], rte)
488 	vm[V_DOT_RTE_RESULT]			= 0x1d04;
489 	vm[V_CONV_TO_FP16_RTE_RESULT]	= deFloat32To16Round(1.22334445f, DE_ROUNDINGMODE_TO_NEAREST_EVEN);
490 	vm[V_CONV_TO_FP32_RTE_RESULT]	= vm[V_UNUSED];
491 
492 	// there is no precision to store fp32 denorm nor fp64 denorm
493 	vm[V_CONV_DENORM_SMALLER]		= vm[V_ZERO];
494 	vm[V_CONV_DENORM_BIGGER]		= vm[V_ZERO];
495 }
496 
497 template <>
TypeValues()498 TypeValues<float>::TypeValues()
499 	: TypeValuesBase()
500 {
501 	// NOTE: when updating entries in m_valueIdToFloatType make sure to
502 	// update also valueIdToSnippetArgMap defined in updateSpirvSnippets()
503 	ValueMap& vm = m_valueIdToFloatType;
504 	vm[V_UNUSED]			=  0.0f;
505 	vm[V_MINUS_INF]			= -std::numeric_limits<float>::infinity();
506 	vm[V_MINUS_ONE]			= -1.0f;
507 	vm[V_MINUS_ZERO]		= -0.0f;
508 	vm[V_ZERO]				=  0.0f;
509 	vm[V_HALF]				=  0.5f;
510 	vm[V_ONE]				=  1.0f;
511 	vm[V_INF]				=  std::numeric_limits<float>::infinity();
512 	vm[V_DENORM]			=  static_cast<float>(1.413e-42); // 0x000003f0
513 	vm[V_NAN]				=  std::numeric_limits<float>::quiet_NaN();
514 
515 	vm[V_PI_DIV_2]			=  static_cast<float>(pi / 2);
516 	vm[V_DENORM_TIMES_TWO]	=  vm[V_DENORM] + vm[V_DENORM];
517 	vm[V_DEGREES_DENORM]	=  deFloatDegrees(vm[V_DENORM]);
518 
519 	float e = std::numeric_limits<float>::epsilon();
520 	vm[V_ADD_ARG_A]					= 1.0f + 3 * e;
521 	vm[V_ADD_ARG_B]					= 1.0f;
522 	vm[V_SUB_ARG_A]					= vm[V_ADD_ARG_A];
523 	vm[V_SUB_ARG_B]					= 3.0f + 6 * e;
524 	vm[V_MUL_ARG_A]					= vm[V_ADD_ARG_A];
525 	vm[V_MUL_ARG_B]					= 5 * e;
526 	vm[V_DOT_ARG_A]					= vm[V_ADD_ARG_A];
527 	vm[V_DOT_ARG_B]					= 5 * e;
528 	vm[V_CONV_FROM_FP32_ARG]		= 1.22334445f;
529 	vm[V_CONV_FROM_FP64_ARG]		= vm[V_UNUSED];
530 
531 	int prevRound = fegetround();
532 	fesetround(FE_TOWARDZERO);
533 	vm[V_ADD_RTZ_RESULT]			= vm[V_ADD_ARG_A] + vm[V_ADD_ARG_B];
534 	vm[V_SUB_RTZ_RESULT]			= vm[V_SUB_ARG_A] - vm[V_SUB_ARG_B];
535 	vm[V_MUL_RTZ_RESULT]			= vm[V_MUL_ARG_A] * vm[V_MUL_ARG_B];
536 	vm[V_DOT_RTZ_RESULT]			= vm[V_MUL_RTZ_RESULT] + vm[V_MUL_RTZ_RESULT];
537 	vm[V_CONV_TO_FP16_RTZ_RESULT]	= vm[V_UNUSED];
538 	vm[V_CONV_TO_FP32_RTZ_RESULT]	= exactByteEquivalent<deUint32>(0x3f9c968d); // result of conversion from double(1.22334455)
539 
540 	fesetround(FE_TONEAREST);
541 	vm[V_ADD_RTE_RESULT]			= vm[V_ADD_ARG_A] + vm[V_ADD_ARG_B];
542 	vm[V_SUB_RTE_RESULT]			= vm[V_SUB_ARG_A] - vm[V_SUB_ARG_B];
543 	vm[V_MUL_RTE_RESULT]			= vm[V_MUL_ARG_A] * vm[V_MUL_ARG_B];
544 	vm[V_DOT_RTE_RESULT]			= vm[V_MUL_RTE_RESULT] + vm[V_MUL_RTE_RESULT];
545 	vm[V_CONV_TO_FP16_RTE_RESULT]	= vm[V_UNUSED];
546 	vm[V_CONV_TO_FP32_RTE_RESULT]	= exactByteEquivalent<deUint32>(0x3f9c968e); // result of conversion from double(1.22334455)
547 	fesetround(prevRound);
548 
549 	// there is no precision to store fp64 denorm
550 	vm[V_CONV_DENORM_SMALLER]		= exactByteEquivalent<deUint32>(0x387c0000); // fp16 denorm
551 	vm[V_CONV_DENORM_BIGGER]		= vm[V_ZERO];
552 }
553 
554 template <>
TypeValues()555 TypeValues<double>::TypeValues()
556 	: TypeValuesBase()
557 {
558 	// NOTE: when updating entries in m_valueIdToFloatType make sure to
559 	// update also valueIdToSnippetArgMap defined in updateSpirvSnippets()
560 	ValueMap& vm = m_valueIdToFloatType;
561 	vm[V_UNUSED]			=  0.0;
562 	vm[V_MINUS_INF]			= -std::numeric_limits<double>::infinity();
563 	vm[V_MINUS_ONE]			= -1.0;
564 	vm[V_MINUS_ZERO]		= -0.0;
565 	vm[V_ZERO]				=  0.0;
566 	vm[V_HALF]				=  0.5;
567 	vm[V_ONE]				=  1.0;
568 	vm[V_INF]				=  std::numeric_limits<double>::infinity();
569 	vm[V_DENORM]			=  4.98e-321; // 0x00000000000003F0
570 	vm[V_NAN]				=  std::numeric_limits<double>::quiet_NaN();
571 
572 	vm[V_PI_DIV_2]			=  pi / 2;
573 	vm[V_DENORM_TIMES_TWO]	=  vm[V_DENORM] + vm[V_DENORM];
574 	vm[V_DEGREES_DENORM]	=  vm[V_UNUSED];
575 
576 	double e = std::numeric_limits<double>::epsilon();
577 	vm[V_ADD_ARG_A]				= 1.0 + 3 * e;
578 	vm[V_ADD_ARG_B]				= 1.0;
579 	vm[V_SUB_ARG_A]				= vm[V_ADD_ARG_A];
580 	vm[V_SUB_ARG_B]				= 3.0 + 6 * e;
581 	vm[V_MUL_ARG_A]				= vm[V_ADD_ARG_A];
582 	vm[V_MUL_ARG_B]				= 5 * e;
583 	vm[V_DOT_ARG_A]				= vm[V_ADD_ARG_A];
584 	vm[V_DOT_ARG_B]				= 5 * e;
585 	vm[V_CONV_FROM_FP32_ARG]	= vm[V_UNUSED];
586 	vm[V_CONV_FROM_FP64_ARG]	= 1.22334455;
587 
588 	int prevRound = fegetround();
589 	fesetround(FE_TOWARDZERO);
590 	vm[V_ADD_RTZ_RESULT]			= vm[V_ADD_ARG_A] + vm[V_ADD_ARG_B];
591 	vm[V_SUB_RTZ_RESULT]			= vm[V_SUB_ARG_A] - vm[V_SUB_ARG_B];
592 	vm[V_MUL_RTZ_RESULT]			= vm[V_MUL_ARG_A] * vm[V_MUL_ARG_B];
593 	vm[V_DOT_RTZ_RESULT]			= vm[V_MUL_RTZ_RESULT] + vm[V_MUL_RTZ_RESULT];
594 	vm[V_CONV_TO_FP16_RTZ_RESULT]	= vm[V_UNUSED];
595 	vm[V_CONV_TO_FP32_RTZ_RESULT]	= vm[V_UNUSED];
596 
597 	fesetround(FE_TONEAREST);
598 	vm[V_ADD_RTE_RESULT]			= vm[V_ADD_ARG_A] + vm[V_ADD_ARG_B];
599 	vm[V_SUB_RTE_RESULT]			= vm[V_SUB_ARG_A] - vm[V_SUB_ARG_B];
600 	vm[V_MUL_RTE_RESULT]			= vm[V_MUL_ARG_A] * vm[V_MUL_ARG_B];
601 	vm[V_DOT_RTE_RESULT]			= vm[V_MUL_RTE_RESULT] + vm[V_MUL_RTE_RESULT];
602 	vm[V_CONV_TO_FP16_RTE_RESULT]	= vm[V_UNUSED];
603 	vm[V_CONV_TO_FP32_RTE_RESULT]	= vm[V_UNUSED];
604 	fesetround(prevRound);
605 
606 	vm[V_CONV_DENORM_SMALLER]		= exactByteEquivalent<deUint64>(0x3f0f800000000000); // 0x03f0 is fp16 denorm
607 	vm[V_CONV_DENORM_BIGGER]		= exactByteEquivalent<deUint64>(0x373f800000000000); // 0x000003f0 is fp32 denorm
608 }
609 
610 // Each float type (fp16, fp32, fp64) has specific set of SPIR-V snippets
611 // that was extracted to separate template specialization. Those snippets
612 // are used to compose final test shaders. With this approach
613 // parameterization can be done just once per type and reused for many tests.
614 class TypeSnippetsBase
615 {
616 public:
617 	virtual ~TypeSnippetsBase() = default;
618 
619 protected:
620 	void updateSpirvSnippets();
621 
622 public: // Type specific data:
623 
624 	// Number of bits consumed by float type
625 	string bitWidth;
626 
627 	// Minimum positive normal
628 	string epsilon;
629 
630 	// denormBase is a normal value (found empirically) used to generate denorm value.
631 	// Denorm is generated by substracting epsilon from denormBase.
632 	// denormBase is not a denorm - it is used to create denorm.
633 	// This value is needed when operations are tested with arguments that were
634 	// generated in the code. Generated denorm should be the same as denorm
635 	// used when arguments are passed via input (m_valueIdToFloatType[V_DENORM]).
636 	// This is required as result of some operations depends on actual denorm value
637 	// e.g. OpRadians(0x0001) is 0 but OpRadians(0x03f0) is denorm.
638 	string denormBase;
639 
640 	string capabilities;
641 	string extensions;
642 	string arrayStride;
643 
644 	bool loadStoreRequiresShaderFloat16;
645 
646 public: // Type specific spir-v snippets:
647 
648 	// Common annotations
649 	string typeAnnotationsSnippet;
650 
651 	// Definitions of all types commonly used by operation tests
652 	string typeDefinitionsSnippet;
653 
654 	// Definitions of all types commonly used by settings tests
655 	string minTypeDefinitionsSnippet;
656 
657 	// Definitions of all constants commonly used by tests
658 	string constantsDefinitionsSnippet;
659 
660 	// Map that stores instructions that generate arguments of specified value.
661 	// Every test that uses generated inputod will select up to two items from this map
662 	typedef map<ValueId, string> SnippetMap;
663 	SnippetMap valueIdToSnippetArgMap;
664 
665 	// Spir-v snippets that read argument from SSBO
666 	string argumentsFromInputSnippet;
667 	string multiArgumentsFromInputSnippet;
668 
669 	// SSBO with stage input/output definitions
670 	string inputAnnotationsSnippet;
671 	string inputDefinitionsSnippet;
672 	string outputAnnotationsSnippet;
673 	string multiOutputAnnotationsSnippet;
674 	string outputDefinitionsSnippet;
675 	string multiOutputDefinitionsSnippet;
676 
677 	// Varying is required to pass result from vertex stage to fragment stage,
678 	// one of requirements was to not use SSBO writes in vertex stage so we
679 	// need to do that in fragment stage; we also cant pass operation result
680 	// directly because of interpolation, to avoid it we do a bitcast to uint
681 	string varyingsTypesSnippet;
682 	string inputVaryingsSnippet;
683 	string outputVaryingsSnippet;
684 	string storeVertexResultSnippet;
685 	string loadVertexResultSnippet;
686 
687 	string storeResultsSnippet;
688 	string multiStoreResultsSnippet;
689 };
690 
updateSpirvSnippets()691 void TypeSnippetsBase::updateSpirvSnippets()
692 {
693 	// annotations to types that are commonly used by tests
694 	const string typeAnnotationsTemplate =
695 		"OpDecorate %type_float_arr_1 ArrayStride " + arrayStride + "\n"
696 		"OpDecorate %type_float_arr_2 ArrayStride " + arrayStride + "\n";
697 
698 	// definition off all types that are commonly used by tests
699 	const string typeDefinitionsTemplate =
700 		"%type_float             = OpTypeFloat " + bitWidth + "\n"
701 		"%type_float_uptr        = OpTypePointer Uniform %type_float\n"
702 		"%type_float_fptr        = OpTypePointer Function %type_float\n"
703 		"%type_float_vec2        = OpTypeVector %type_float 2\n"
704 		"%type_float_vec3        = OpTypeVector %type_float 3\n"
705 		"%type_float_vec4        = OpTypeVector %type_float 4\n"
706 		"%type_float_vec4_iptr   = OpTypePointer Input %type_float_vec4\n"
707 		"%type_float_vec4_optr   = OpTypePointer Output %type_float_vec4\n"
708 		"%type_float_mat2x2      = OpTypeMatrix %type_float_vec2 2\n"
709 		"%type_float_arr_1       = OpTypeArray %type_float %c_i32_1\n"
710 		"%type_float_arr_2       = OpTypeArray %type_float %c_i32_2\n";
711 
712 	// minimal type definition set that is used by settings tests
713 	const string minTypeDefinitionsTemplate =
714 		"%type_float             = OpTypeFloat " + bitWidth + "\n"
715 		"%type_float_uptr        = OpTypePointer Uniform %type_float\n"
716 		"%type_float_arr_2       = OpTypeArray %type_float %c_i32_2\n";
717 
718 	// definition off all constants that are used by tests
719 	const string constantsDefinitionsTemplate =
720 		"%c_float_n1             = OpConstant %type_float -1\n"
721 		"%c_float_0              = OpConstant %type_float 0.0\n"
722 		"%c_float_0_5            = OpConstant %type_float 0.5\n"
723 		"%c_float_1              = OpConstant %type_float 1\n"
724 		"%c_float_2              = OpConstant %type_float 2\n"
725 		"%c_float_3              = OpConstant %type_float 3\n"
726 		"%c_float_4              = OpConstant %type_float 4\n"
727 		"%c_float_5              = OpConstant %type_float 5\n"
728 		"%c_float_6              = OpConstant %type_float 6\n"
729 		"%c_float_eps            = OpConstant %type_float " + epsilon + "\n"
730 		"%c_float_denorm_base    = OpConstant %type_float " + denormBase + "\n";
731 
732 	// when arguments are read from SSBO this snipped is placed in main function
733 	const string argumentsFromInputTemplate =
734 		"%arg1loc                = OpAccessChain %type_float_uptr %ssbo_in %c_i32_0 %c_i32_0\n"
735 		"%arg1                   = OpLoad %type_float %arg1loc\n"
736 		"%arg2loc                = OpAccessChain %type_float_uptr %ssbo_in %c_i32_0 %c_i32_1\n"
737 		"%arg2                   = OpLoad %type_float %arg2loc\n";
738 
739 	const string multiArgumentsFromInputTemplate =
740 		"%arg1_float_loc         = OpAccessChain %type_float_uptr %ssbo_in %c_i32_${attr} %c_i32_0\n"
741 		"%arg2_float_loc         = OpAccessChain %type_float_uptr %ssbo_in %c_i32_${attr} %c_i32_1\n"
742 		"%arg1_float             = OpLoad %type_float %arg1_float_loc\n"
743 		"%arg2_float             = OpLoad %type_float %arg2_float_loc\n";
744 
745 	// when tested shader stage reads from SSBO it has to have this snippet
746 	inputAnnotationsSnippet =
747 		"OpMemberDecorate %SSBO_in 0 Offset 0\n"
748 		"OpDecorate %SSBO_in BufferBlock\n"
749 		"OpDecorate %ssbo_in DescriptorSet 0\n"
750 		"OpDecorate %ssbo_in Binding 0\n"
751 		"OpDecorate %ssbo_in NonWritable\n";
752 
753 	const string inputDefinitionsTemplate =
754 		"%SSBO_in              = OpTypeStruct %type_float_arr_2\n"
755 		"%up_SSBO_in           = OpTypePointer Uniform %SSBO_in\n"
756 		"%ssbo_in              = OpVariable %up_SSBO_in Uniform\n";
757 
758 	outputAnnotationsSnippet =
759 		"OpMemberDecorate %SSBO_out 0 Offset 0\n"
760 		"OpDecorate %SSBO_out BufferBlock\n"
761 		"OpDecorate %ssbo_out DescriptorSet 0\n"
762 		"OpDecorate %ssbo_out Binding 1\n";
763 
764 	const string multiOutputAnnotationsTemplate =
765 		"OpMemberDecorate %SSBO_float_out 0 Offset 0\n"
766 		"OpDecorate %type_float_arr_2 ArrayStride "+ arrayStride + "\n"
767 		"OpDecorate %SSBO_float_out BufferBlock\n"
768 		"OpDecorate %ssbo_float_out DescriptorSet 0\n";
769 
770 	const string outputDefinitionsTemplate =
771 		"%SSBO_out             = OpTypeStruct %type_float_arr_1\n"
772 		"%up_SSBO_out          = OpTypePointer Uniform %SSBO_out\n"
773 		"%ssbo_out             = OpVariable %up_SSBO_out Uniform\n";
774 
775 	const string multiOutputDefinitionsTemplate =
776 		"%SSBO_float_out         = OpTypeStruct %type_float\n"
777 		"%up_SSBO_float_out      = OpTypePointer Uniform %SSBO_float_out\n"
778 		"%ssbo_float_out         = OpVariable %up_SSBO_float_out Uniform\n";
779 
780 	// this snippet is used by compute and fragment stage but not by vertex stage
781 	const string storeResultsTemplate =
782 		"%outloc               = OpAccessChain %type_float_uptr %ssbo_out %c_i32_0 %c_i32_0\n"
783 		"OpStore %outloc %result\n";
784 
785 	const string multiStoreResultsTemplate =
786 		"%outloc" + bitWidth + "             = OpAccessChain %type_float_uptr %ssbo_float_out %c_i32_0\n"
787 		"                        OpStore %outloc" + bitWidth + " %result" + bitWidth + "\n";
788 
789 	const string typeToken	= "_float";
790 	const string typeName	= "_f" + bitWidth;
791 
792 	typeAnnotationsSnippet			= replace(typeAnnotationsTemplate, typeToken, typeName);
793 	typeDefinitionsSnippet			= replace(typeDefinitionsTemplate, typeToken, typeName);
794 	minTypeDefinitionsSnippet		= replace(minTypeDefinitionsTemplate, typeToken, typeName);
795 	constantsDefinitionsSnippet		= replace(constantsDefinitionsTemplate, typeToken, typeName);
796 	argumentsFromInputSnippet		= replace(argumentsFromInputTemplate, typeToken, typeName);
797 	multiArgumentsFromInputSnippet	= replace(multiArgumentsFromInputTemplate, typeToken, typeName);
798 	inputDefinitionsSnippet			= replace(inputDefinitionsTemplate, typeToken, typeName);
799 	multiOutputAnnotationsSnippet	= replace(multiOutputAnnotationsTemplate, typeToken, typeName);
800 	outputDefinitionsSnippet		= replace(outputDefinitionsTemplate, typeToken, typeName);
801 	multiOutputDefinitionsSnippet	= replace(multiOutputDefinitionsTemplate, typeToken, typeName);
802 	storeResultsSnippet				= replace(storeResultsTemplate, typeToken, typeName);
803 	multiStoreResultsSnippet		= replace(multiStoreResultsTemplate, typeToken, typeName);
804 
805 	// NOTE: only values used as _generated_ arguments in test operations
806 	// need to be in this map, arguments that are only used by tests,
807 	// that grab arguments from input, do need to be in this map
808 	// NOTE: when updating entries in valueIdToSnippetArgMap make
809 	// sure to update also m_valueIdToFloatType for all float width
810 	SnippetMap& sm = valueIdToSnippetArgMap;
811 	sm[V_UNUSED]		= "OpFSub %type_float %c_float_0 %c_float_0\n";
812 	sm[V_MINUS_INF]		= "OpFDiv %type_float %c_float_n1 %c_float_0\n";
813 	sm[V_MINUS_ONE]		= "OpFAdd %type_float %c_float_n1 %c_float_0\n";
814 	sm[V_MINUS_ZERO]	= "OpFMul %type_float %c_float_n1 %c_float_0\n";
815 	sm[V_ZERO]			= "OpFMul %type_float %c_float_0 %c_float_0\n";
816 	sm[V_HALF]			= "OpFAdd %type_float %c_float_0_5 %c_float_0\n";
817 	sm[V_ONE]			= "OpFAdd %type_float %c_float_1 %c_float_0\n";
818 	sm[V_INF]			= "OpFDiv %type_float %c_float_1 %c_float_0\n";					// x / 0		== Inf
819 	sm[V_DENORM]		= "OpFSub %type_float %c_float_denorm_base %c_float_eps\n";
820 	sm[V_NAN]			= "OpFDiv %type_float %c_float_0 %c_float_0\n";					// 0 / 0		== Nan
821 
822 	map<ValueId, string>::iterator it;
823 	for ( it = sm.begin(); it != sm.end(); it++ )
824 		sm[it->first] = replace(it->second, typeToken, typeName);
825 }
826 
827 typedef de::SharedPtr<TypeSnippetsBase> TypeSnippetsSP;
828 
829 template<typename FLOAT_TYPE>
830 class TypeSnippets: public TypeSnippetsBase
831 {
832 public:
833 	TypeSnippets();
834 };
835 
836 template<>
TypeSnippets()837 TypeSnippets<deFloat16>::TypeSnippets()
838 {
839 	bitWidth		= "16";
840 	epsilon			= "6.104e-5";	// 2^-14 = 0x0400
841 
842 	// 1.2113e-4 is 0x07f0 which after substracting epsilon will give 0x03f0 (same as vm[V_DENORM])
843 	// NOTE: constants in SPIR-V cant be specified as exact fp16 - there is conversion from double to fp16
844 	denormBase		= "1.2113e-4";
845 
846 	capabilities	= "OpCapability StorageUniform16\n";
847 	extensions		= "OpExtension \"SPV_KHR_16bit_storage\"\n";
848 	arrayStride		= "2";
849 
850 	varyingsTypesSnippet =
851 					"%type_u32_iptr        = OpTypePointer Input %type_u32\n"
852 					"%type_u32_optr        = OpTypePointer Output %type_u32\n";
853 	inputVaryingsSnippet =
854 					"%BP_vertex_result    = OpVariable %type_u32_iptr Input\n";
855 	outputVaryingsSnippet =
856 					"%BP_vertex_result    = OpVariable %type_u32_optr Output\n";
857 	storeVertexResultSnippet =
858 					"%tmp_vec2            = OpCompositeConstruct %type_f16_vec2 %result %c_f16_0\n"
859 					"%packed_result       = OpBitcast %type_u32 %tmp_vec2\n"
860 					"OpStore %BP_vertex_result %packed_result\n";
861 	loadVertexResultSnippet =
862 					"%packed_result       = OpLoad %type_u32 %BP_vertex_result\n"
863 					"%tmp_vec2            = OpBitcast %type_f16_vec2 %packed_result\n"
864 					"%result              = OpCompositeExtract %type_f16 %tmp_vec2 0\n";
865 
866 	loadStoreRequiresShaderFloat16 = true;
867 
868 	updateSpirvSnippets();
869 }
870 
871 template<>
TypeSnippets()872 TypeSnippets<float>::TypeSnippets()
873 {
874 	bitWidth		= "32";
875 	epsilon			= "1.175494351e-38";
876 	denormBase		= "1.1756356e-38";
877 	capabilities	= "";
878 	extensions		= "";
879 	arrayStride		= "4";
880 
881 	varyingsTypesSnippet =
882 					"%type_u32_iptr        = OpTypePointer Input %type_u32\n"
883 					"%type_u32_optr        = OpTypePointer Output %type_u32\n";
884 	inputVaryingsSnippet =
885 					"%BP_vertex_result    = OpVariable %type_u32_iptr Input\n";
886 	outputVaryingsSnippet =
887 					"%BP_vertex_result    = OpVariable %type_u32_optr Output\n";
888 	storeVertexResultSnippet =
889 					"%packed_result       = OpBitcast %type_u32 %result\n"
890 					"OpStore %BP_vertex_result %packed_result\n";
891 	loadVertexResultSnippet =
892 					"%packed_result       = OpLoad %type_u32 %BP_vertex_result\n"
893 					"%result              = OpBitcast %type_f32 %packed_result\n";
894 
895 	loadStoreRequiresShaderFloat16 = false;
896 
897 	updateSpirvSnippets();
898 }
899 
900 template<>
TypeSnippets()901 TypeSnippets<double>::TypeSnippets()
902 {
903 	bitWidth		= "64";
904 	epsilon			= "2.2250738585072014e-308"; // 0x0010000000000000
905 	denormBase		= "2.2250738585076994e-308"; // 0x00100000000003F0
906 	capabilities	= "OpCapability Float64\n";
907 	extensions		= "";
908 	arrayStride		= "8";
909 
910 	varyingsTypesSnippet =
911 					"%type_u32_vec2_iptr   = OpTypePointer Input %type_u32_vec2\n"
912 					"%type_u32_vec2_optr   = OpTypePointer Output %type_u32_vec2\n";
913 	inputVaryingsSnippet =
914 					"%BP_vertex_result     = OpVariable %type_u32_vec2_iptr Input\n";
915 	outputVaryingsSnippet =
916 					"%BP_vertex_result     = OpVariable %type_u32_vec2_optr Output\n";
917 	storeVertexResultSnippet =
918 					"%packed_result        = OpBitcast %type_u32_vec2 %result\n"
919 					"OpStore %BP_vertex_result %packed_result\n";
920 	loadVertexResultSnippet =
921 					"%packed_result        = OpLoad %type_u32_vec2 %BP_vertex_result\n"
922 					"%result               = OpBitcast %type_f64 %packed_result\n";
923 
924 	loadStoreRequiresShaderFloat16 = false;
925 
926 	updateSpirvSnippets();
927 }
928 
929 class TypeTestResultsBase
930 {
931 public:
~TypeTestResultsBase()932 	virtual ~TypeTestResultsBase() {}
933 	FloatType floatType() const;
934 
935 protected:
936 	FloatType m_floatType;
937 
938 public:
939 	// Vectors containing test data for float controls
940 	vector<BinaryCase>	binaryOpFTZ;
941 	vector<UnaryCase>	unaryOpFTZ;
942 	vector<BinaryCase>	binaryOpDenormPreserve;
943 	vector<UnaryCase>	unaryOpDenormPreserve;
944 };
945 
floatType() const946 FloatType TypeTestResultsBase::floatType() const
947 {
948 	return m_floatType;
949 }
950 
951 typedef de::SharedPtr<TypeTestResultsBase> TypeTestResultsSP;
952 
953 template<typename FLOAT_TYPE>
954 class TypeTestResults: public TypeTestResultsBase
955 {
956 public:
957 	TypeTestResults();
958 };
959 
960 template<>
TypeTestResults()961 TypeTestResults<deFloat16>::TypeTestResults()
962 {
963 	m_floatType = FP16;
964 
965 	// note: there are many FTZ test cases that can produce diferent result depending
966 	// on input denorm being flushed or not; because of that FTZ tests can be limited
967 	// to those that return denorm as those are the ones affected by tested extension
968 	const BinaryCase binaryOpFTZArr[] = {
969 		//operation		den op one		den op den		den op inf		den op nan
970 		{ O_ADD,		V_ONE,			V_ZERO,			V_INF,			V_UNUSED },
971 		{ O_SUB,		V_MINUS_ONE,	V_ZERO,			V_MINUS_INF,	V_UNUSED },
972 		{ O_MUL,		V_ZERO,			V_ZERO,			V_UNUSED,		V_UNUSED },
973 		{ O_DIV,		V_ZERO,			V_UNUSED,		V_ZERO,			V_UNUSED },
974 		{ O_REM,		V_ZERO,			V_UNUSED,		V_UNUSED,		V_UNUSED },
975 		{ O_MOD,		V_ZERO,			V_UNUSED,		V_UNUSED,		V_UNUSED },
976 		{ O_VEC_MUL_S,	V_ZERO,			V_ZERO,			V_UNUSED,		V_UNUSED },
977 		{ O_VEC_MUL_M,	V_ZERO,			V_ZERO,			V_UNUSED,		V_UNUSED },
978 		{ O_MAT_MUL_S,	V_ZERO,			V_ZERO,			V_UNUSED,		V_UNUSED },
979 		{ O_MAT_MUL_V,	V_ZERO,			V_ZERO,			V_UNUSED,		V_UNUSED },
980 		{ O_MAT_MUL_M,	V_ZERO,			V_ZERO,			V_UNUSED,		V_UNUSED },
981 		{ O_OUT_PROD,	V_ZERO,			V_ZERO,			V_UNUSED,		V_UNUSED },
982 		{ O_DOT,		V_ZERO,			V_ZERO,			V_UNUSED,		V_UNUSED },
983 		{ O_ATAN2,		V_ZERO,			V_UNUSED,		V_ZERO,			V_UNUSED },
984 		{ O_POW,		V_ZERO,			V_UNUSED,		V_ZERO,			V_UNUSED },
985 		{ O_MIX,		V_HALF,			V_ZERO,			V_INF,			V_UNUSED },
986 		{ O_MIN,		V_ZERO,			V_ZERO,			V_ZERO,			V_UNUSED },
987 		{ O_MAX,		V_ONE,			V_ZERO,			V_INF,			V_UNUSED },
988 		{ O_CLAMP,		V_ONE,			V_ZERO,			V_INF,			V_UNUSED },
989 		{ O_STEP,		V_ONE,			V_ONE,			V_ONE,			V_UNUSED },
990 		{ O_SSTEP,		V_HALF,			V_ONE,			V_ZERO,			V_UNUSED },
991 		{ O_FMA,		V_HALF,			V_HALF,			V_UNUSED,		V_UNUSED },
992 		{ O_FACE_FWD,	V_MINUS_ONE,	V_MINUS_ONE,	V_MINUS_ONE,	V_MINUS_ONE },
993 		{ O_NMIN,		V_ZERO,			V_ZERO,			V_ZERO,			V_ZERO },
994 		{ O_NMAX,		V_ONE,			V_ZERO,			V_INF,			V_ZERO },
995 		{ O_NCLAMP,		V_ONE,			V_ZERO,			V_INF,			V_ZERO },
996 		{ O_DIST,		V_ONE,			V_ZERO,			V_INF,			V_UNUSED },
997 		{ O_CROSS,		V_ZERO,			V_ZERO,			V_UNUSED,		V_UNUSED },
998 	};
999 
1000 	const UnaryCase unaryOpFTZArr[] = {
1001 		//operation			op den
1002 		{ O_NEGATE,			V_MINUS_ZERO },
1003 		{ O_ROUND,			V_ZERO },
1004 		{ O_ROUND_EV,		V_ZERO },
1005 		{ O_TRUNC,			V_ZERO },
1006 		{ O_ABS,			V_ZERO },
1007 		{ O_FLOOR,			V_ZERO },
1008 		{ O_CEIL,			V_ZERO_OR_ONE },
1009 		{ O_FRACT,			V_ZERO },
1010 		{ O_RADIANS,		V_ZERO },
1011 		{ O_DEGREES,		V_ZERO },
1012 		{ O_SIN,			V_ZERO },
1013 		{ O_COS,			V_TRIG_ONE },
1014 		{ O_TAN,			V_ZERO },
1015 		{ O_ASIN,			V_ZERO },
1016 		{ O_ACOS,			V_PI_DIV_2 },
1017 		{ O_ATAN,			V_ZERO },
1018 		{ O_SINH,			V_ZERO },
1019 		{ O_COSH,			V_ONE },
1020 		{ O_TANH,			V_ZERO },
1021 		{ O_ASINH,			V_ZERO },
1022 		{ O_ACOSH,			V_UNUSED },
1023 		{ O_ATANH,			V_ZERO },
1024 		{ O_EXP,			V_ONE },
1025 		{ O_LOG,			V_MINUS_INF_OR_LOG_DENORM },
1026 		{ O_EXP2,			V_ONE },
1027 		{ O_LOG2,			V_MINUS_INF_OR_LOG2_DENORM },
1028 		{ O_SQRT,			V_ZERO_OR_SQRT_DENORM },
1029 		{ O_INV_SQRT,		V_INF_OR_INV_SQRT_DENORM },
1030 		{ O_MAT_DET,		V_ZERO },
1031 		{ O_MAT_INV,		V_ZERO_OR_MINUS_ZERO },
1032 		{ O_MODF,			V_ZERO },
1033 		{ O_MODF_ST,		V_ZERO },
1034 		{ O_NORMALIZE,		V_ZERO },
1035 		{ O_REFLECT,		V_ZERO },
1036 		{ O_REFRACT,		V_ZERO },
1037 		{ O_LENGHT,			V_ZERO },
1038 	};
1039 
1040 	const BinaryCase binaryOpDenormPreserveArr[] = {
1041 		//operation			den op one				den op den				den op inf		den op nan
1042 		{ O_PHI,			V_DENORM,				V_DENORM,				V_DENORM,		V_DENORM },
1043 		{ O_SELECT,			V_DENORM,				V_DENORM,				V_DENORM,		V_DENORM },
1044 		{ O_ADD,			V_ONE,					V_DENORM_TIMES_TWO,		V_INF,			V_NAN },
1045 		{ O_SUB,			V_MINUS_ONE_OR_CLOSE,	V_ZERO,					V_MINUS_INF,	V_NAN },
1046 		{ O_MUL,			V_DENORM,				V_ZERO,					V_INF,			V_NAN },
1047 		{ O_VEC_MUL_S,		V_DENORM,				V_ZERO,					V_INF,			V_NAN },
1048 		{ O_VEC_MUL_M,		V_DENORM_TIMES_TWO,		V_ZERO,					V_INF,			V_NAN },
1049 		{ O_MAT_MUL_S,		V_DENORM,				V_ZERO,					V_INF,			V_NAN },
1050 		{ O_MAT_MUL_V,		V_DENORM_TIMES_TWO,		V_ZERO,					V_INF,			V_NAN },
1051 		{ O_MAT_MUL_M,		V_DENORM_TIMES_TWO,		V_ZERO,					V_INF,			V_NAN },
1052 		{ O_OUT_PROD,		V_DENORM,				V_ZERO,					V_INF,			V_NAN },
1053 		{ O_DOT,			V_DENORM_TIMES_TWO,		V_ZERO,					V_INF,			V_NAN },
1054 		{ O_MIX,			V_HALF,					V_DENORM,				V_INF,			V_NAN },
1055 		{ O_FMA,			V_HALF,					V_HALF,					V_INF,			V_NAN },
1056 		{ O_MIN,			V_DENORM,				V_DENORM,				V_DENORM,		V_UNUSED },
1057 		{ O_MAX,			V_ONE,					V_DENORM,				V_INF,			V_UNUSED },
1058 		{ O_CLAMP,			V_ONE,					V_DENORM,				V_INF,			V_UNUSED },
1059 		{ O_NMIN,			V_DENORM,				V_DENORM,				V_DENORM,		V_DENORM },
1060 		{ O_NMAX,			V_ONE,					V_DENORM,				V_INF,			V_DENORM },
1061 		{ O_NCLAMP,			V_ONE,					V_DENORM,				V_INF,			V_DENORM },
1062 	};
1063 
1064 	const UnaryCase unaryOpDenormPreserveArr[] = {
1065 		//operation			op den
1066 		{ O_RETURN_VAL,		V_DENORM },
1067 		{ O_D_EXTRACT,		V_DENORM },
1068 		{ O_D_INSERT,		V_DENORM },
1069 		{ O_SHUFFLE,		V_DENORM },
1070 		{ O_COMPOSITE,		V_DENORM },
1071 		{ O_COMPOSITE_INS,	V_DENORM },
1072 		{ O_COPY,			V_DENORM },
1073 		{ O_TRANSPOSE,		V_DENORM },
1074 		{ O_NEGATE,			V_DENORM },
1075 		{ O_ABS,			V_DENORM },
1076 		{ O_SIGN,			V_ONE },
1077 		{ O_RADIANS,		V_DENORM },
1078 		{ O_DEGREES,		V_DEGREES_DENORM },
1079 	};
1080 
1081 	binaryOpFTZ.insert(binaryOpFTZ.begin(), binaryOpFTZArr,
1082 					   binaryOpFTZArr + DE_LENGTH_OF_ARRAY(binaryOpFTZArr));
1083 	unaryOpFTZ.insert(unaryOpFTZ.begin(), unaryOpFTZArr,
1084 					  unaryOpFTZArr + DE_LENGTH_OF_ARRAY(unaryOpFTZArr));
1085 	binaryOpDenormPreserve.insert(binaryOpDenormPreserve.begin(), binaryOpDenormPreserveArr,
1086 								  binaryOpDenormPreserveArr + DE_LENGTH_OF_ARRAY(binaryOpDenormPreserveArr));
1087 	unaryOpDenormPreserve.insert(unaryOpDenormPreserve.begin(), unaryOpDenormPreserveArr,
1088 								 unaryOpDenormPreserveArr + DE_LENGTH_OF_ARRAY(unaryOpDenormPreserveArr));
1089 }
1090 
1091 template<>
TypeTestResults()1092 TypeTestResults<float>::TypeTestResults()
1093 {
1094 	m_floatType = FP32;
1095 
1096 	const BinaryCase binaryOpFTZArr[] = {
1097 		//operation		den op one		den op den		den op inf		den op nan
1098 		{ O_ADD,		V_ONE,			V_ZERO,			V_INF,			V_UNUSED },
1099 		{ O_SUB,		V_MINUS_ONE,	V_ZERO,			V_MINUS_INF,	V_UNUSED },
1100 		{ O_MUL,		V_ZERO,			V_ZERO,			V_UNUSED,		V_UNUSED },
1101 		{ O_DIV,		V_ZERO,			V_UNUSED,		V_ZERO,			V_UNUSED },
1102 		{ O_REM,		V_ZERO,			V_UNUSED,		V_UNUSED,		V_UNUSED },
1103 		{ O_MOD,		V_ZERO,			V_UNUSED,		V_UNUSED,		V_UNUSED },
1104 		{ O_VEC_MUL_S,	V_ZERO,			V_ZERO,			V_UNUSED,		V_UNUSED },
1105 		{ O_VEC_MUL_M,	V_ZERO,			V_ZERO,			V_UNUSED,		V_UNUSED },
1106 		{ O_MAT_MUL_S,	V_ZERO,			V_ZERO,			V_UNUSED,		V_UNUSED },
1107 		{ O_MAT_MUL_V,	V_ZERO,			V_ZERO,			V_UNUSED,		V_UNUSED },
1108 		{ O_MAT_MUL_M,	V_ZERO,			V_ZERO,			V_UNUSED,		V_UNUSED },
1109 		{ O_OUT_PROD,	V_ZERO,			V_ZERO,			V_UNUSED,		V_UNUSED },
1110 		{ O_DOT,		V_ZERO,			V_ZERO,			V_UNUSED,		V_UNUSED },
1111 		{ O_ATAN2,		V_ZERO,			V_UNUSED,		V_ZERO,			V_UNUSED },
1112 		{ O_POW,		V_ZERO,			V_UNUSED,		V_ZERO,			V_UNUSED },
1113 		{ O_MIX,		V_HALF,			V_ZERO,			V_INF,			V_UNUSED },
1114 		{ O_MIN,		V_ZERO,			V_ZERO,			V_ZERO,			V_UNUSED },
1115 		{ O_MAX,		V_ONE,			V_ZERO,			V_INF,			V_UNUSED },
1116 		{ O_CLAMP,		V_ONE,			V_ZERO,			V_INF,			V_UNUSED },
1117 		{ O_STEP,		V_ONE,			V_ONE,			V_ONE,			V_UNUSED },
1118 		{ O_SSTEP,		V_HALF,			V_ONE,			V_ZERO,			V_UNUSED },
1119 		{ O_FMA,		V_HALF,			V_HALF,			V_UNUSED,		V_UNUSED },
1120 		{ O_FACE_FWD,	V_MINUS_ONE,	V_MINUS_ONE,	V_MINUS_ONE,	V_MINUS_ONE },
1121 		{ O_NMIN,		V_ZERO,			V_ZERO,			V_ZERO,			V_ZERO },
1122 		{ O_NMAX,		V_ONE,			V_ZERO,			V_INF,			V_ZERO },
1123 		{ O_NCLAMP,		V_ONE,			V_ZERO,			V_INF,			V_ZERO },
1124 		{ O_DIST,		V_ONE,			V_ZERO,			V_INF,			V_UNUSED },
1125 		{ O_CROSS,		V_ZERO,			V_ZERO,			V_UNUSED,		V_UNUSED },
1126 	};
1127 
1128 	const UnaryCase unaryOpFTZArr[] = {
1129 		//operation			op den
1130 		{ O_NEGATE,			V_MINUS_ZERO },
1131 		{ O_ROUND,			V_ZERO },
1132 		{ O_ROUND_EV,		V_ZERO },
1133 		{ O_TRUNC,			V_ZERO },
1134 		{ O_ABS,			V_ZERO },
1135 		{ O_FLOOR,			V_ZERO },
1136 		{ O_CEIL,			V_ZERO_OR_ONE },
1137 		{ O_FRACT,			V_ZERO },
1138 		{ O_RADIANS,		V_ZERO },
1139 		{ O_DEGREES,		V_ZERO },
1140 		{ O_SIN,			V_ZERO },
1141 		{ O_COS,			V_TRIG_ONE },
1142 		{ O_TAN,			V_ZERO },
1143 		{ O_ASIN,			V_ZERO },
1144 		{ O_ACOS,			V_PI_DIV_2 },
1145 		{ O_ATAN,			V_ZERO },
1146 		{ O_SINH,			V_ZERO },
1147 		{ O_COSH,			V_ONE },
1148 		{ O_TANH,			V_ZERO },
1149 		{ O_ASINH,			V_ZERO },
1150 		{ O_ACOSH,			V_UNUSED },
1151 		{ O_ATANH,			V_ZERO },
1152 		{ O_EXP,			V_ONE },
1153 		{ O_LOG,			V_MINUS_INF_OR_LOG_DENORM },
1154 		{ O_EXP2,			V_ONE },
1155 		{ O_LOG2,			V_MINUS_INF_OR_LOG2_DENORM },
1156 		{ O_SQRT,			V_ZERO_OR_SQRT_DENORM },
1157 		{ O_INV_SQRT,		V_INF_OR_INV_SQRT_DENORM },
1158 		{ O_MAT_DET,		V_ZERO },
1159 		{ O_MAT_INV,		V_ZERO_OR_MINUS_ZERO },
1160 		{ O_MODF,			V_ZERO },
1161 		{ O_MODF_ST,		V_ZERO },
1162 		{ O_NORMALIZE,		V_ZERO },
1163 		{ O_REFLECT,		V_ZERO },
1164 		{ O_REFRACT,		V_ZERO },
1165 		{ O_LENGHT,			V_ZERO },
1166 	};
1167 
1168 	const BinaryCase binaryOpDenormPreserveArr[] = {
1169 		//operation			den op one			den op den				den op inf		den op nan
1170 		{ O_PHI,			V_DENORM,			V_DENORM,				V_DENORM,		V_DENORM },
1171 		{ O_SELECT,			V_DENORM,			V_DENORM,				V_DENORM,		V_DENORM },
1172 		{ O_ADD,			V_ONE,				V_DENORM_TIMES_TWO,		V_INF,			V_NAN },
1173 		{ O_SUB,			V_MINUS_ONE,		V_ZERO,					V_MINUS_INF,	V_NAN },
1174 		{ O_MUL,			V_DENORM,			V_ZERO,					V_INF,			V_NAN },
1175 		{ O_VEC_MUL_S,		V_DENORM,			V_ZERO,					V_INF,			V_NAN },
1176 		{ O_VEC_MUL_M,		V_DENORM,			V_ZERO,					V_INF,			V_NAN },
1177 		{ O_MAT_MUL_S,		V_DENORM,			V_ZERO,					V_INF,			V_NAN },
1178 		{ O_MAT_MUL_V,		V_DENORM,			V_ZERO,					V_INF,			V_NAN },
1179 		{ O_MAT_MUL_M,		V_DENORM,			V_ZERO,					V_INF,			V_NAN },
1180 		{ O_OUT_PROD,		V_DENORM,			V_ZERO,					V_INF,			V_NAN },
1181 		{ O_DOT,			V_DENORM_TIMES_TWO,	V_ZERO,					V_INF,			V_NAN },
1182 		{ O_MIX,			V_HALF,				V_DENORM,				V_INF,			V_NAN },
1183 		{ O_FMA,			V_HALF,				V_HALF,					V_INF,			V_NAN },
1184 		{ O_MIN,			V_DENORM,			V_DENORM,				V_DENORM,		V_UNUSED },
1185 		{ O_MAX,			V_ONE,				V_DENORM,				V_INF,			V_UNUSED },
1186 		{ O_CLAMP,			V_ONE,				V_DENORM,				V_INF,			V_UNUSED },
1187 		{ O_NMIN,			V_DENORM,			V_DENORM,				V_DENORM,		V_DENORM },
1188 		{ O_NMAX,			V_ONE,				V_DENORM,				V_INF,			V_DENORM },
1189 		{ O_NCLAMP,			V_ONE,				V_DENORM,				V_INF,			V_DENORM },
1190 	};
1191 
1192 	const UnaryCase unaryOpDenormPreserveArr[] = {
1193 		//operation			op den
1194 		{ O_RETURN_VAL,		V_DENORM },
1195 		{ O_D_EXTRACT,		V_DENORM },
1196 		{ O_D_INSERT,		V_DENORM },
1197 		{ O_SHUFFLE,		V_DENORM },
1198 		{ O_COMPOSITE,		V_DENORM },
1199 		{ O_COMPOSITE_INS,	V_DENORM },
1200 		{ O_COPY,			V_DENORM },
1201 		{ O_TRANSPOSE,		V_DENORM },
1202 		{ O_NEGATE,			V_DENORM },
1203 		{ O_ABS,			V_DENORM },
1204 		{ O_SIGN,			V_ONE },
1205 		{ O_RADIANS,		V_DENORM },
1206 		{ O_DEGREES,		V_DEGREES_DENORM },
1207 	};
1208 
1209 	binaryOpFTZ.insert(binaryOpFTZ.begin(), binaryOpFTZArr,
1210 					   binaryOpFTZArr + DE_LENGTH_OF_ARRAY(binaryOpFTZArr));
1211 	unaryOpFTZ.insert(unaryOpFTZ.begin(), unaryOpFTZArr,
1212 					  unaryOpFTZArr + DE_LENGTH_OF_ARRAY(unaryOpFTZArr));
1213 	binaryOpDenormPreserve.insert(binaryOpDenormPreserve.begin(), binaryOpDenormPreserveArr,
1214 								  binaryOpDenormPreserveArr + DE_LENGTH_OF_ARRAY(binaryOpDenormPreserveArr));
1215 	unaryOpDenormPreserve.insert(unaryOpDenormPreserve.begin(), unaryOpDenormPreserveArr,
1216 								 unaryOpDenormPreserveArr + DE_LENGTH_OF_ARRAY(unaryOpDenormPreserveArr));
1217 }
1218 
1219 template<>
TypeTestResults()1220 TypeTestResults<double>::TypeTestResults()
1221 {
1222 	m_floatType = FP64;
1223 
1224 	// fp64 is supported by fewer operations then fp16 and fp32
1225 	// e.g. Radians and Degrees functions are not supported
1226 	const BinaryCase binaryOpFTZArr[] = {
1227 		//operation		den op one		den op den		den op inf		den op nan
1228 		{ O_ADD,		V_ONE,			V_ZERO,			V_INF,			V_UNUSED },
1229 		{ O_SUB,		V_MINUS_ONE,	V_ZERO,			V_MINUS_INF,	V_UNUSED },
1230 		{ O_MUL,		V_ZERO,			V_ZERO,			V_UNUSED,		V_UNUSED },
1231 		{ O_DIV,		V_ZERO,			V_UNUSED,		V_ZERO,			V_UNUSED },
1232 		{ O_REM,		V_ZERO,			V_UNUSED,		V_UNUSED,		V_UNUSED },
1233 		{ O_MOD,		V_ZERO,			V_UNUSED,		V_UNUSED,		V_UNUSED },
1234 		{ O_VEC_MUL_S,	V_ZERO,			V_ZERO,			V_UNUSED,		V_UNUSED },
1235 		{ O_VEC_MUL_M,	V_ZERO,			V_ZERO,			V_UNUSED,		V_UNUSED },
1236 		{ O_MAT_MUL_S,	V_ZERO,			V_ZERO,			V_UNUSED,		V_UNUSED },
1237 		{ O_MAT_MUL_V,	V_ZERO,			V_ZERO,			V_UNUSED,		V_UNUSED },
1238 		{ O_MAT_MUL_M,	V_ZERO,			V_ZERO,			V_UNUSED,		V_UNUSED },
1239 		{ O_OUT_PROD,	V_ZERO,			V_ZERO,			V_UNUSED,		V_UNUSED },
1240 		{ O_DOT,		V_ZERO,			V_ZERO,			V_UNUSED,		V_UNUSED },
1241 		{ O_MIX,		V_HALF,			V_ZERO,			V_INF,			V_UNUSED },
1242 		{ O_MIN,		V_ZERO,			V_ZERO,			V_ZERO,			V_UNUSED },
1243 		{ O_MAX,		V_ONE,			V_ZERO,			V_INF,			V_UNUSED },
1244 		{ O_CLAMP,		V_ONE,			V_ZERO,			V_INF,			V_UNUSED },
1245 		{ O_STEP,		V_ONE,			V_ONE,			V_ONE,			V_UNUSED },
1246 		{ O_SSTEP,		V_HALF,			V_ONE,			V_ZERO,			V_UNUSED },
1247 		{ O_FMA,		V_HALF,			V_HALF,			V_UNUSED,		V_UNUSED },
1248 		{ O_FACE_FWD,	V_MINUS_ONE,	V_MINUS_ONE,	V_MINUS_ONE,	V_MINUS_ONE },
1249 		{ O_NMIN,		V_ZERO,			V_ZERO,			V_ZERO,			V_ZERO },
1250 		{ O_NMAX,		V_ONE,			V_ZERO,			V_INF,			V_ZERO },
1251 		{ O_NCLAMP,		V_ONE,			V_ZERO,			V_INF,			V_ZERO },
1252 		{ O_DIST,		V_ONE,			V_ZERO,			V_INF,			V_UNUSED },
1253 		{ O_CROSS,		V_ZERO,			V_ZERO,			V_UNUSED,		V_UNUSED },
1254 	};
1255 
1256 	const UnaryCase unaryOpFTZArr[] = {
1257 		//operation			op den
1258 		{ O_NEGATE,			V_MINUS_ZERO },
1259 		{ O_ROUND,			V_ZERO },
1260 		{ O_ROUND_EV,		V_ZERO },
1261 		{ O_TRUNC,			V_ZERO },
1262 		{ O_ABS,			V_ZERO },
1263 		{ O_FLOOR,			V_ZERO },
1264 		{ O_CEIL,			V_ZERO_OR_ONE },
1265 		{ O_FRACT,			V_ZERO },
1266 		{ O_SQRT,			V_ZERO_OR_SQRT_DENORM },
1267 		{ O_INV_SQRT,		V_INF_OR_INV_SQRT_DENORM },
1268 		{ O_MAT_DET,		V_ZERO },
1269 		{ O_MAT_INV,		V_ZERO_OR_MINUS_ZERO },
1270 		{ O_MODF,			V_ZERO },
1271 		{ O_MODF_ST,		V_ZERO },
1272 		{ O_NORMALIZE,		V_ZERO },
1273 		{ O_REFLECT,		V_ZERO },
1274 		{ O_LENGHT,			V_ZERO },
1275 	};
1276 
1277 	const BinaryCase binaryOpDenormPreserveArr[] = {
1278 		//operation			den op one			den op den				den op inf		den op nan
1279 		{ O_PHI,			V_DENORM,			V_DENORM,				V_DENORM,		V_DENORM },
1280 		{ O_SELECT,			V_DENORM,			V_DENORM,				V_DENORM,		V_DENORM },
1281 		{ O_ADD,			V_ONE,				V_DENORM_TIMES_TWO,		V_INF,			V_NAN },
1282 		{ O_SUB,			V_MINUS_ONE,		V_ZERO,					V_MINUS_INF,	V_NAN },
1283 		{ O_MUL,			V_DENORM,			V_ZERO,					V_INF,			V_NAN },
1284 		{ O_VEC_MUL_S,		V_DENORM,			V_ZERO,					V_INF,			V_NAN },
1285 		{ O_VEC_MUL_M,		V_DENORM_TIMES_TWO,	V_ZERO,					V_INF,			V_NAN },
1286 		{ O_MAT_MUL_S,		V_DENORM,			V_ZERO,					V_INF,			V_NAN },
1287 		{ O_MAT_MUL_V,		V_DENORM_TIMES_TWO,	V_ZERO,					V_INF,			V_NAN },
1288 		{ O_MAT_MUL_M,		V_DENORM_TIMES_TWO,	V_ZERO,					V_INF,			V_NAN },
1289 		{ O_OUT_PROD,		V_DENORM,			V_ZERO,					V_INF,			V_NAN },
1290 		{ O_DOT,			V_DENORM_TIMES_TWO,	V_ZERO,					V_INF,			V_NAN },
1291 		{ O_MIX,			V_HALF,				V_DENORM,				V_INF,			V_NAN },
1292 		{ O_FMA,			V_HALF,				V_HALF,					V_INF,			V_NAN },
1293 		{ O_MIN,			V_DENORM,			V_DENORM,				V_DENORM,		V_UNUSED },
1294 		{ O_MAX,			V_ONE,				V_DENORM,				V_INF,			V_UNUSED },
1295 		{ O_CLAMP,			V_ONE,				V_DENORM,				V_INF,			V_UNUSED },
1296 		{ O_NMIN,			V_DENORM,			V_DENORM,				V_DENORM,		V_DENORM },
1297 		{ O_NMAX,			V_ONE,				V_DENORM,				V_INF,			V_DENORM },
1298 		{ O_NCLAMP,			V_ONE,				V_DENORM,				V_INF,			V_DENORM },
1299 	};
1300 
1301 	const UnaryCase unaryOpDenormPreserveArr[] = {
1302 		//operation			op den
1303 		{ O_RETURN_VAL,		V_DENORM },
1304 		{ O_D_EXTRACT,		V_DENORM },
1305 		{ O_D_INSERT,		V_DENORM },
1306 		{ O_SHUFFLE,		V_DENORM },
1307 		{ O_COMPOSITE,		V_DENORM },
1308 		{ O_COMPOSITE_INS,	V_DENORM },
1309 		{ O_COPY,			V_DENORM },
1310 		{ O_TRANSPOSE,		V_DENORM },
1311 		{ O_NEGATE,			V_DENORM },
1312 		{ O_ABS,			V_DENORM },
1313 		{ O_SIGN,			V_ONE },
1314 	};
1315 
1316 	binaryOpFTZ.insert(binaryOpFTZ.begin(), binaryOpFTZArr,
1317 					   binaryOpFTZArr + DE_LENGTH_OF_ARRAY(binaryOpFTZArr));
1318 	unaryOpFTZ.insert(unaryOpFTZ.begin(), unaryOpFTZArr,
1319 					  unaryOpFTZArr + DE_LENGTH_OF_ARRAY(unaryOpFTZArr));
1320 	binaryOpDenormPreserve.insert(binaryOpDenormPreserve.begin(), binaryOpDenormPreserveArr,
1321 								  binaryOpDenormPreserveArr + DE_LENGTH_OF_ARRAY(binaryOpDenormPreserveArr));
1322 	unaryOpDenormPreserve.insert(unaryOpDenormPreserve.begin(), unaryOpDenormPreserveArr,
1323 								 unaryOpDenormPreserveArr + DE_LENGTH_OF_ARRAY(unaryOpDenormPreserveArr));
1324 }
1325 
1326 // Operation structure holds data needed to test specified SPIR-V operation. This class contains
1327 // additional annotations, additional types and aditional constants that should be properly included
1328 // in SPIR-V code. Commands attribute in this structure contains code that performs tested operation
1329 // on given arguments, in some cases verification is also performed there.
1330 // All snipets stroed in this structure are generic and can be specialized for fp16, fp32 or fp64,
1331 // thanks to that this data can be shared by many OperationTestCase instances (testing diferent
1332 // float behaviours on diferent float widths).
1333 struct Operation
1334 {
1335 	// operation name is included in test case name
1336 	const char*	name;
1337 
1338 	// How extensively is the floating point type used?
1339 	FloatUsage floatUsage;
1340 
1341 	// operation specific spir-v snippets that will be
1342 	// placed in proper places in final test shader
1343 	const char*	annotations;
1344 	const char*	types;
1345 	const char*	constants;
1346 	const char*	variables;
1347 	const char*	functions;
1348 	const char*	commands;
1349 
1350 	// conversion operations operate on one float type and produce float
1351 	// type with different bit width; restrictedInputType is used only when
1352 	// isInputTypeRestricted is set to true and it restricts usage of this
1353 	// operation to specified input type
1354 	bool		isInputTypeRestricted;
1355 	FloatType	restrictedInputType;
1356 
1357 	// arguments for OpSpecConstant need to be specified also as constant
1358 	bool		isSpecConstant;
1359 
1360 	// set if c_float* constant is used in operation
1361 	FloatStatementUsageFlags	statementUsageFlags;
1362 
Operationvkt::SpirVAssembly::__anonaf1dd5180111::Operation1363 	Operation()		{}
1364 
1365 	// Minimal constructor - used by most of operations
Operationvkt::SpirVAssembly::__anonaf1dd5180111::Operation1366 	Operation(const char* _name, FloatUsage _floatUsage, const char* _commands, const FloatStatementUsageFlags _statementUsageFlags = 0)
1367 		: name(_name)
1368 		, floatUsage(_floatUsage)
1369 		, annotations("")
1370 		, types("")
1371 		, constants("")
1372 		, variables("")
1373 		, functions("")
1374 		, commands(_commands)
1375 		, isInputTypeRestricted(false)
1376 		, restrictedInputType(FP16)		// not used as isInputTypeRestricted is false
1377 		, isSpecConstant(false)
1378 		, statementUsageFlags(_statementUsageFlags)
1379 	{}
1380 
1381 	// Conversion operations constructor (used also by conversions done in SpecConstantOp)
Operationvkt::SpirVAssembly::__anonaf1dd5180111::Operation1382 	Operation(const char* _name,
1383 			  FloatUsage _floatUsage,
1384 			  bool specConstant,
1385 			  FloatType _inputType,
1386 			  const char* _constants,
1387 			  const char* _commands,
1388 			  const FloatStatementUsageFlags _statementUsageFlags = 0)
1389 		: name(_name)
1390 		, floatUsage(_floatUsage)
1391 		, annotations("")
1392 		, types("")
1393 		, constants(_constants)
1394 		, variables("")
1395 		, functions("")
1396 		, commands(_commands)
1397 		, isInputTypeRestricted(true)
1398 		, restrictedInputType(_inputType)
1399 		, isSpecConstant(specConstant)
1400 		, statementUsageFlags(_statementUsageFlags)
1401 	{}
1402 
1403 	// Full constructor - used by few operations, that are more complex to test
Operationvkt::SpirVAssembly::__anonaf1dd5180111::Operation1404 	Operation(const char* _name,
1405 			  FloatUsage _floatUsage,
1406 			  const char* _annotations,
1407 			  const char* _types,
1408 			  const char* _constants,
1409 			  const char* _variables,
1410 			  const char* _functions,
1411 			  const char* _commands,
1412 			  const FloatStatementUsageFlags _statementUsageFlags = 0)
1413 		: name(_name)
1414 		, floatUsage(_floatUsage)
1415 		, annotations(_annotations)
1416 		, types(_types)
1417 		, constants(_constants)
1418 		, variables(_variables)
1419 		, functions(_functions)
1420 		, commands(_commands)
1421 		, isInputTypeRestricted(false)
1422 		, restrictedInputType(FP16)		// not used as isInputTypeRestricted is false
1423 		, isSpecConstant(false)
1424 		, statementUsageFlags(_statementUsageFlags)
1425 	{}
1426 
1427 	// Full constructor - used by rounding override cases
Operationvkt::SpirVAssembly::__anonaf1dd5180111::Operation1428 	Operation(const char* _name,
1429 			  FloatUsage _floatUsage,
1430 			  FloatType _inputType,
1431 			  const char* _annotations,
1432 			  const char* _types,
1433 			  const char* _constants,
1434 			  const char* _commands,
1435 			  const FloatStatementUsageFlags _statementUsageFlags = 0)
1436 		: name(_name)
1437 		, floatUsage(_floatUsage)
1438 		, annotations(_annotations)
1439 		, types(_types)
1440 		, constants(_constants)
1441 		, variables("")
1442 		, functions("")
1443 		, commands(_commands)
1444 		, isInputTypeRestricted(true)
1445 		, restrictedInputType(_inputType)
1446 		, isSpecConstant(false)
1447 		, statementUsageFlags(_statementUsageFlags)
1448 	{}
1449 };
1450 
1451 // Class storing input that will be passed to operation and expected
1452 // output that should be generated for specified behaviour.
1453 class OperationTestCase
1454 {
1455 public:
1456 
OperationTestCase()1457 	OperationTestCase()		{}
1458 
OperationTestCase(const char * _baseName,BehaviorFlags _behaviorFlags,OperationId _operatinId,ValueId _input1,ValueId _input2,ValueId _expectedOutput)1459 	OperationTestCase(const char*	_baseName,
1460 					  BehaviorFlags	_behaviorFlags,
1461 					  OperationId	_operatinId,
1462 					  ValueId		_input1,
1463 					  ValueId		_input2,
1464 					  ValueId		_expectedOutput)
1465 		: baseName(_baseName)
1466 		, behaviorFlags(_behaviorFlags)
1467 		, operationId(_operatinId)
1468 		, expectedOutput(_expectedOutput)
1469 	{
1470 		input[0] = _input1;
1471 		input[1] = _input2;
1472 	}
1473 
1474 public:
1475 
1476 	string					baseName;
1477 	BehaviorFlags			behaviorFlags;
1478 	OperationId				operationId;
1479 	ValueId					input[2];
1480 	ValueId					expectedOutput;
1481 };
1482 
1483 // Helper structure used to store specialized operation
1484 // data. This data is ready to be used during shader assembly.
1485 struct SpecializedOperation
1486 {
1487 	string constants;
1488 	string annotations;
1489 	string types;
1490 	string arguments;
1491 	string variables;
1492 	string functions;
1493 	string commands;
1494 
1495 	FloatType					inFloatType;
1496 	TypeSnippetsSP				inTypeSnippets;
1497 	TypeSnippetsSP				outTypeSnippets;
1498 	FloatStatementUsageFlags	argumentsUsesFloatConstant;
1499 };
1500 
1501 // Class responsible for constructing list of test cases for specified
1502 // float type and specified way of preparation of arguments.
1503 // Arguments can be either read from input SSBO or generated via math
1504 // operations in spir-v code.
1505 class TestCasesBuilder
1506 {
1507 public:
1508 
1509 	void init();
1510 	void build(vector<OperationTestCase>& testCases, TypeTestResultsSP typeTestResults, bool argumentsFromInput);
1511 	const Operation& getOperation(OperationId id) const;
1512 
1513 private:
1514 
1515 	void createUnaryTestCases(vector<OperationTestCase>& testCases,
1516 							  OperationId operationId,
1517 							  ValueId denormPreserveResult,
1518 							  ValueId denormFTZResult) const;
1519 
1520 private:
1521 
1522 	// Operations are shared betwean test cases so they are
1523 	// passed to them as pointers to data stored in TestCasesBuilder.
1524 	typedef OperationTestCase OTC;
1525 	typedef Operation Op;
1526 	map<int, Op> m_operations;
1527 };
1528 
init()1529 void TestCasesBuilder::init()
1530 {
1531 	map<int, Op>& mo = m_operations;
1532 
1533 	// predefine operations repeatedly used in tests; note that "_float"
1534 	// in every operation command will be replaced with either "_f16",
1535 	// "_f32" or "_f64" - StringTemplate is not used here because it
1536 	// would make code less readable
1537 	// m_operations contains generic operation definitions that can be
1538 	// used for all float types
1539 
1540 	mo[O_NEGATE]		= Op("negate",		FLOAT_ARITHMETIC,
1541 											"%result             = OpFNegate %type_float %arg1\n",
1542 											B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1543 	mo[O_COMPOSITE]		= Op("composite",	FLOAT_ARITHMETIC,
1544 											"%vec1               = OpCompositeConstruct %type_float_vec2 %arg1 %arg1\n"
1545 											"%result             = OpCompositeExtract %type_float %vec1 0\n",
1546 											B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1547 	mo[O_COMPOSITE_INS]	= Op("comp_ins",	FLOAT_ARITHMETIC,
1548 											"%vec1               = OpCompositeConstruct %type_float_vec2 %c_float_0 %c_float_0\n"
1549 											"%vec2               = OpCompositeInsert %type_float_vec2 %arg1 %vec1 0\n"
1550 											"%result             = OpCompositeExtract %type_float %vec2 0\n",
1551 											B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1552 	mo[O_COPY]			= Op("copy",		FLOAT_STORAGE_ONLY,
1553 											"%result             = OpCopyObject %type_float %arg1\n",
1554 											B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1555 	mo[O_D_EXTRACT]		= Op("extract",		FLOAT_ARITHMETIC,
1556 											"%vec1               = OpCompositeConstruct %type_float_vec2 %arg1 %arg1\n"
1557 											"%result             = OpVectorExtractDynamic %type_float %vec1 %c_i32_0\n",
1558 											B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1559 	mo[O_D_INSERT]		= Op("insert",		FLOAT_ARITHMETIC,
1560 											"%tmpVec             = OpCompositeConstruct %type_float_vec2 %c_float_2 %c_float_2\n"
1561 											"%vec1               = OpVectorInsertDynamic %type_float_vec2 %tmpVec %arg1 %c_i32_0\n"
1562 											"%result             = OpCompositeExtract %type_float %vec1 0\n",
1563 											B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1564 	mo[O_SHUFFLE]		= Op("shuffle",		FLOAT_ARITHMETIC,
1565 											"%tmpVec1            = OpCompositeConstruct %type_float_vec2 %arg1 %arg1\n"
1566 											"%tmpVec2            = OpCompositeConstruct %type_float_vec2 %c_float_2 %c_float_2\n"	// NOTE: its impossible to test shuffle with denorms flushed
1567 											"%vec1               = OpVectorShuffle %type_float_vec2 %tmpVec1 %tmpVec2 0 2\n"		//       to zero as this will be done by earlier operation
1568 											"%result             = OpCompositeExtract %type_float %vec1 0\n",						//       (this also applies to few other operations)
1569 											B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1570 	mo[O_TRANSPOSE]		= Op("transpose",	FLOAT_ARITHMETIC,
1571 											"%col                = OpCompositeConstruct %type_float_vec2 %arg1 %arg1\n"
1572 											"%mat                = OpCompositeConstruct %type_float_mat2x2 %col %col\n"
1573 											"%tmat               = OpTranspose %type_float_mat2x2 %mat\n"
1574 											"%tcol               = OpCompositeExtract %type_float_vec2 %tmat 0\n"
1575 											"%result             = OpCompositeExtract %type_float %tcol 0\n",
1576 											B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1577 	mo[O_RETURN_VAL]	= Op("ret_val",		FLOAT_ARITHMETIC,
1578 											"",
1579 											"%type_test_fun      = OpTypeFunction %type_float %type_float\n",
1580 											"",
1581 											"",
1582 											"%test_fun = OpFunction %type_float None %type_test_fun\n"
1583 											"%param = OpFunctionParameter %type_float\n"
1584 											"%entry = OpLabel\n"
1585 											"OpReturnValue %param\n"
1586 											"OpFunctionEnd\n",
1587 											"%result             = OpFunctionCall %type_float %test_fun %arg1\n",
1588 											B_STATEMENT_USAGE_TYPES_TYPE_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1589 
1590 	// conversion operations that are meant to be used only for single output type (defined by the second number in name)
1591 	const char* convertSource =				"%result             = OpFConvert %type_float %arg1\n";
1592 	mo[O_CONV_FROM_FP16]	= Op("conv_from_fp16", FLOAT_STORAGE_ONLY, false, FP16, "", convertSource, B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1593 	mo[O_CONV_FROM_FP32]	= Op("conv_from_fp32", FLOAT_STORAGE_ONLY, false, FP32, "", convertSource, B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1594 	mo[O_CONV_FROM_FP64]	= Op("conv_from_fp64", FLOAT_STORAGE_ONLY, false, FP64, "", convertSource, B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1595 
1596 	// from all operands supported by OpSpecConstantOp we can only test FConvert opcode with literals as everything
1597 	// else requires Karnel capability (OpenCL); values of literals used in SPIR-V code must be equiwalent to
1598 	// V_CONV_FROM_FP32_ARG and V_CONV_FROM_FP64_ARG so we can use same expected rounded values as for regular OpFConvert
1599 	mo[O_SCONST_CONV_FROM_FP32_TO_FP16]
1600 						= Op("sconst_conv_from_fp32", FLOAT_ARITHMETIC, true, FP32,
1601 											"%c_arg              = OpConstant %type_f32 1.22334445\n"
1602 											"%result             = OpSpecConstantOp %type_f16 FConvert %c_arg\n",
1603 											"",
1604 											B_STATEMENT_USAGE_CONSTS_TYPE_FP16 | B_STATEMENT_USAGE_CONSTS_TYPE_FP32);
1605 	mo[O_SCONST_CONV_FROM_FP64_TO_FP32]
1606 						= Op("sconst_conv_from_fp64", FLOAT_ARITHMETIC, true, FP64,
1607 											"%c_arg              = OpConstant %type_f64 1.22334455\n"
1608 											"%result             = OpSpecConstantOp %type_f32 FConvert %c_arg\n",
1609 											"",
1610 											B_STATEMENT_USAGE_CONSTS_TYPE_FP32 | B_STATEMENT_USAGE_CONSTS_TYPE_FP64);
1611 	mo[O_SCONST_CONV_FROM_FP64_TO_FP16]
1612 						= Op("sconst_conv_from_fp64", FLOAT_ARITHMETIC, true, FP64,
1613 											"%c_arg              = OpConstant %type_f64 1.22334445\n"
1614 											"%result             = OpSpecConstantOp %type_f16 FConvert %c_arg\n",
1615 											"",
1616 											B_STATEMENT_USAGE_CONSTS_TYPE_FP16 | B_STATEMENT_USAGE_CONSTS_TYPE_FP64);
1617 
1618 	mo[O_ADD]			= Op("add",			FLOAT_ARITHMETIC, "%result             = OpFAdd %type_float %arg1 %arg2\n", B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1619 	mo[O_SUB]			= Op("sub",			FLOAT_ARITHMETIC, "%result             = OpFSub %type_float %arg1 %arg2\n", B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1620 	mo[O_MUL]			= Op("mul",			FLOAT_ARITHMETIC, "%result             = OpFMul %type_float %arg1 %arg2\n", B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1621 	mo[O_DIV]			= Op("div",			FLOAT_ARITHMETIC, "%result             = OpFDiv %type_float %arg1 %arg2\n", B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1622 	mo[O_REM]			= Op("rem",			FLOAT_ARITHMETIC, "%result             = OpFRem %type_float %arg1 %arg2\n", B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1623 	mo[O_MOD]			= Op("mod",			FLOAT_ARITHMETIC, "%result             = OpFMod %type_float %arg1 %arg2\n", B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1624 	mo[O_PHI]			= Op("phi",			FLOAT_ARITHMETIC,
1625 											"%comp               = OpFOrdGreaterThan %type_bool %arg1 %arg2\n"
1626 											"                      OpSelectionMerge %comp_merge None\n"
1627 											"                      OpBranchConditional %comp %true_branch %false_branch\n"
1628 											"%true_branch        = OpLabel\n"
1629 											"                      OpBranch %comp_merge\n"
1630 											"%false_branch       = OpLabel\n"
1631 											"                      OpBranch %comp_merge\n"
1632 											"%comp_merge         = OpLabel\n"
1633 											"%result             = OpPhi %type_float %arg2 %true_branch %arg1 %false_branch\n",
1634 											B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1635 	mo[O_SELECT]		= Op("select",		FLOAT_ARITHMETIC,
1636 											"%always_true        = OpFOrdGreaterThan %type_bool %c_float_1 %c_float_0\n"
1637 											"%result             = OpSelect %type_float %always_true %arg1 %arg2\n",
1638 											B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1639 	mo[O_DOT]			= Op("dot",			FLOAT_ARITHMETIC,
1640 											"%vec1               = OpCompositeConstruct %type_float_vec2 %arg1 %arg1\n"
1641 											"%vec2               = OpCompositeConstruct %type_float_vec2 %arg2 %arg2\n"
1642 											"%result             = OpDot %type_float %vec1 %vec2\n",
1643 											B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1644 	mo[O_VEC_MUL_S]		= Op("vmuls",		FLOAT_ARITHMETIC,
1645 											"%vec                = OpCompositeConstruct %type_float_vec2 %arg1 %arg1\n"
1646 											"%tmpVec             = OpVectorTimesScalar %type_float_vec2 %vec %arg2\n"
1647 											"%result             = OpCompositeExtract %type_float %tmpVec 0\n",
1648 											B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1649 	mo[O_VEC_MUL_M]		= Op("vmulm",		FLOAT_ARITHMETIC,
1650 											"%col                = OpCompositeConstruct %type_float_vec2 %arg1 %arg1\n"
1651 											"%mat                = OpCompositeConstruct %type_float_mat2x2 %col %col\n"
1652 											"%vec                = OpCompositeConstruct %type_float_vec2 %arg2 %arg2\n"
1653 											"%tmpVec             = OpVectorTimesMatrix %type_float_vec2 %vec %mat\n"
1654 											"%result             = OpCompositeExtract %type_float %tmpVec 0\n",
1655 											B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1656 	mo[O_MAT_MUL_S]		= Op("mmuls",		FLOAT_ARITHMETIC,
1657 											"%col                = OpCompositeConstruct %type_float_vec2 %arg1 %arg1\n"
1658 											"%mat                = OpCompositeConstruct %type_float_mat2x2 %col %col\n"
1659 											"%mulMat             = OpMatrixTimesScalar %type_float_mat2x2 %mat %arg2\n"
1660 											"%extCol             = OpCompositeExtract %type_float_vec2 %mulMat 0\n"
1661 											"%result             = OpCompositeExtract %type_float %extCol 0\n",
1662 											B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1663 	mo[O_MAT_MUL_V]		= Op("mmulv",		FLOAT_ARITHMETIC,
1664 											"%col                = OpCompositeConstruct %type_float_vec2 %arg1 %arg1\n"
1665 											"%mat                = OpCompositeConstruct %type_float_mat2x2 %col %col\n"
1666 											"%vec                = OpCompositeConstruct %type_float_vec2 %arg2 %arg2\n"
1667 											"%mulVec             = OpMatrixTimesVector %type_float_vec2 %mat %vec\n"
1668 											"%result             = OpCompositeExtract %type_float %mulVec 0\n",
1669 											B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1670 	mo[O_MAT_MUL_M]		= Op("mmulm",		FLOAT_ARITHMETIC,
1671 											"%col1               = OpCompositeConstruct %type_float_vec2 %arg1 %arg1\n"
1672 											"%mat1               = OpCompositeConstruct %type_float_mat2x2 %col1 %col1\n"
1673 											"%col2               = OpCompositeConstruct %type_float_vec2 %arg2 %arg2\n"
1674 											"%mat2               = OpCompositeConstruct %type_float_mat2x2 %col2 %col2\n"
1675 											"%mulMat             = OpMatrixTimesMatrix %type_float_mat2x2 %mat1 %mat2\n"
1676 											"%extCol             = OpCompositeExtract %type_float_vec2 %mulMat 0\n"
1677 											"%result             = OpCompositeExtract %type_float %extCol 0\n",
1678 											B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1679 	mo[O_OUT_PROD]		= Op("out_prod",	FLOAT_ARITHMETIC,
1680 											"%vec1               = OpCompositeConstruct %type_float_vec2 %arg1 %arg1\n"
1681 											"%vec2               = OpCompositeConstruct %type_float_vec2 %arg2 %arg2\n"
1682 											"%mulMat             = OpOuterProduct %type_float_mat2x2 %vec1 %vec2\n"
1683 											"%extCol             = OpCompositeExtract %type_float_vec2 %mulMat 0\n"
1684 											"%result             = OpCompositeExtract %type_float %extCol 0\n",
1685 											B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1686 
1687 	// comparison operations
1688 	mo[O_ORD_EQ]		= Op("ord_eq",		FLOAT_ARITHMETIC,
1689 											"%boolVal           = OpFOrdEqual %type_bool %arg1 %arg2\n"
1690 											"%result            = OpSelect %type_float %boolVal %c_float_1 %c_float_0\n",
1691 											B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1692 	mo[O_UORD_EQ]		= Op("uord_eq",		FLOAT_ARITHMETIC,
1693 											"%boolVal           = OpFUnordEqual %type_bool %arg1 %arg2\n"
1694 											"%result            = OpSelect %type_float %boolVal %c_float_1 %c_float_0\n",
1695 											B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1696 	mo[O_ORD_NEQ]		= Op("ord_neq",		FLOAT_ARITHMETIC,
1697 											"%boolVal           = OpFOrdNotEqual %type_bool %arg1 %arg2\n"
1698 											"%result            = OpSelect %type_float %boolVal %c_float_1 %c_float_0\n",
1699 											B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1700 	mo[O_UORD_NEQ]		= Op("uord_neq",	FLOAT_ARITHMETIC,
1701 											"%boolVal           = OpFUnordNotEqual %type_bool %arg1 %arg2\n"
1702 											"%result            = OpSelect %type_float %boolVal %c_float_1 %c_float_0\n",
1703 											B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1704 	mo[O_ORD_LS]		= Op("ord_ls",		FLOAT_ARITHMETIC,
1705 											"%boolVal           = OpFOrdLessThan %type_bool %arg1 %arg2\n"
1706 											"%result            = OpSelect %type_float %boolVal %c_float_1 %c_float_0\n",
1707 											B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1708 	mo[O_UORD_LS]		= Op("uord_ls",		FLOAT_ARITHMETIC,
1709 											"%boolVal           = OpFUnordLessThan %type_bool %arg1 %arg2\n"
1710 											"%result            = OpSelect %type_float %boolVal %c_float_1 %c_float_0\n",
1711 											B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1712 	mo[O_ORD_GT]		= Op("ord_gt",		FLOAT_ARITHMETIC,
1713 											"%boolVal           = OpFOrdGreaterThan %type_bool %arg1 %arg2\n"
1714 											"%result            = OpSelect %type_float %boolVal %c_float_1 %c_float_0\n",
1715 											B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1716 	mo[O_UORD_GT]		= Op("uord_gt",		FLOAT_ARITHMETIC,
1717 											"%boolVal           = OpFUnordGreaterThan %type_bool %arg1 %arg2\n"
1718 											"%result            = OpSelect %type_float %boolVal %c_float_1 %c_float_0\n",
1719 											B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1720 	mo[O_ORD_LE]		= Op("ord_le",		FLOAT_ARITHMETIC,
1721 											"%boolVal           = OpFOrdLessThanEqual %type_bool %arg1 %arg2\n"
1722 											"%result            = OpSelect %type_float %boolVal %c_float_1 %c_float_0\n",
1723 											B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1724 	mo[O_UORD_LE]		= Op("uord_le",		FLOAT_ARITHMETIC,
1725 											"%boolVal           = OpFUnordLessThanEqual %type_bool %arg1 %arg2\n"
1726 											"%result            = OpSelect %type_float %boolVal %c_float_1 %c_float_0\n",
1727 											B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1728 	mo[O_ORD_GE]		= Op("ord_ge",		FLOAT_ARITHMETIC,
1729 											"%boolVal           = OpFOrdGreaterThanEqual %type_bool %arg1 %arg2\n"
1730 											"%result            = OpSelect %type_float %boolVal %c_float_1 %c_float_0\n",
1731 											B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1732 	mo[O_UORD_GE]		= Op("uord_ge",		FLOAT_ARITHMETIC,
1733 											"%boolVal           = OpFUnordGreaterThanEqual %type_bool %arg1 %arg2\n"
1734 											"%result            = OpSelect %type_float %boolVal %c_float_1 %c_float_0\n",
1735 											B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1736 
1737 	mo[O_ATAN2]			= Op("atan2",		FLOAT_ARITHMETIC,
1738 											"%result             = OpExtInst %type_float %std450 Atan2 %arg1 %arg2\n",
1739 											B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1740 	mo[O_POW]			= Op("pow",			FLOAT_ARITHMETIC,
1741 											"%result             = OpExtInst %type_float %std450 Pow %arg1 %arg2\n",
1742 											B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1743 	mo[O_MIX]			= Op("mix",			FLOAT_ARITHMETIC,
1744 											"%result             = OpExtInst %type_float %std450 FMix %arg1 %arg2 %c_float_0_5\n",
1745 											B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1746 	mo[O_FMA]			= Op("fma",			FLOAT_ARITHMETIC,
1747 											"%result             = OpExtInst %type_float %std450 Fma %arg1 %arg2 %c_float_0_5\n",
1748 											B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1749 	mo[O_MIN]			= Op("min",			FLOAT_ARITHMETIC,
1750 											"%result             = OpExtInst %type_float %std450 FMin %arg1 %arg2\n",
1751 											B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1752 	mo[O_MAX]			= Op("max",			FLOAT_ARITHMETIC,
1753 											"%result             = OpExtInst %type_float %std450 FMax %arg1 %arg2\n",
1754 											B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1755 	mo[O_CLAMP]			= Op("clamp",		FLOAT_ARITHMETIC,
1756 											"%result             = OpExtInst %type_float %std450 FClamp %arg1 %arg2 %arg2\n",
1757 											B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1758 	mo[O_STEP]			= Op("step",		FLOAT_ARITHMETIC,
1759 											"%result             = OpExtInst %type_float %std450 Step %arg1 %arg2\n",
1760 											B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1761 	mo[O_SSTEP]			= Op("sstep",		FLOAT_ARITHMETIC,
1762 											"%result             = OpExtInst %type_float %std450 SmoothStep %arg1 %arg2 %c_float_0_5\n",
1763 											B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1764 	mo[O_DIST]			= Op("distance",	FLOAT_ARITHMETIC,
1765 											"%result             = OpExtInst %type_float %std450 Distance %arg1 %arg2\n",
1766 											B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1767 	mo[O_CROSS]			= Op("cross",		FLOAT_ARITHMETIC,
1768 											"%vec1               = OpCompositeConstruct %type_float_vec3 %arg1 %arg1 %arg1\n"
1769 											"%vec2               = OpCompositeConstruct %type_float_vec3 %arg2 %arg2 %arg2\n"
1770 											"%tmpVec             = OpExtInst %type_float_vec3 %std450 Cross %vec1 %vec2\n"
1771 											"%result             = OpCompositeExtract %type_float %tmpVec 0\n",
1772 											B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1773 	mo[O_FACE_FWD]		= Op("face_fwd",	FLOAT_ARITHMETIC,
1774 											"%result             = OpExtInst %type_float %std450 FaceForward %c_float_1 %arg1 %arg2\n",
1775 											B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1776 	mo[O_NMIN]			= Op("nmin",		FLOAT_ARITHMETIC,
1777 											"%result             = OpExtInst %type_float %std450 NMin %arg1 %arg2\n",
1778 											B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1779 	mo[O_NMAX]			= Op("nmax",		FLOAT_ARITHMETIC,
1780 											"%result             = OpExtInst %type_float %std450 NMax %arg1 %arg2\n",
1781 											B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1782 	mo[O_NCLAMP]		= Op("nclamp",		FLOAT_ARITHMETIC,
1783 											"%result             = OpExtInst %type_float %std450 NClamp %arg2 %arg1 %arg2\n",
1784 											B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1785 
1786 	mo[O_ROUND]			= Op("round",		FLOAT_ARITHMETIC,
1787 											"%result             = OpExtInst %type_float %std450 Round %arg1\n",
1788 											B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1789 	mo[O_ROUND_EV]		= Op("round_ev",	FLOAT_ARITHMETIC,
1790 											"%result             = OpExtInst %type_float %std450 RoundEven %arg1\n",
1791 											B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1792 	mo[O_TRUNC]			= Op("trunc",		FLOAT_ARITHMETIC,
1793 											"%result             = OpExtInst %type_float %std450 Trunc %arg1\n",
1794 											B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1795 	mo[O_ABS]			= Op("abs",			FLOAT_ARITHMETIC,
1796 											"%result             = OpExtInst %type_float %std450 FAbs %arg1\n",
1797 											B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1798 	mo[O_SIGN]			= Op("sign",		FLOAT_ARITHMETIC,
1799 											"%result             = OpExtInst %type_float %std450 FSign %arg1\n",
1800 											B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1801 	mo[O_FLOOR]			= Op("floor",		FLOAT_ARITHMETIC,
1802 											"%result             = OpExtInst %type_float %std450 Floor %arg1\n",
1803 											B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1804 	mo[O_CEIL]			= Op("ceil",		FLOAT_ARITHMETIC,
1805 											"%result             = OpExtInst %type_float %std450 Ceil %arg1\n",
1806 											B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1807 	mo[O_FRACT]			= Op("fract",		FLOAT_ARITHMETIC,
1808 											"%result             = OpExtInst %type_float %std450 Fract %arg1\n",
1809 											B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1810 	mo[O_RADIANS]		= Op("radians",		FLOAT_ARITHMETIC,
1811 											"%result             = OpExtInst %type_float %std450 Radians %arg1\n",
1812 											B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1813 	mo[O_DEGREES]		= Op("degrees",		FLOAT_ARITHMETIC,
1814 											"%result             = OpExtInst %type_float %std450 Degrees %arg1\n",
1815 											B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1816 	mo[O_SIN]			= Op("sin",			FLOAT_ARITHMETIC,
1817 											"%result             = OpExtInst %type_float %std450 Sin %arg1\n",
1818 											B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1819 	mo[O_COS]			= Op("cos",			FLOAT_ARITHMETIC,
1820 											"%result             = OpExtInst %type_float %std450 Cos %arg1\n",
1821 											B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1822 	mo[O_TAN]			= Op("tan",			FLOAT_ARITHMETIC,
1823 											"%result             = OpExtInst %type_float %std450 Tan %arg1\n",
1824 											B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1825 	mo[O_ASIN]			= Op("asin",		FLOAT_ARITHMETIC,
1826 											"%result             = OpExtInst %type_float %std450 Asin %arg1\n",
1827 											B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1828 	mo[O_ACOS]			= Op("acos",		FLOAT_ARITHMETIC,
1829 											"%result             = OpExtInst %type_float %std450 Acos %arg1\n",
1830 											B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1831 	mo[O_ATAN]			= Op("atan",		FLOAT_ARITHMETIC,
1832 											"%result             = OpExtInst %type_float %std450 Atan %arg1\n",
1833 											B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1834 	mo[O_SINH]			= Op("sinh",		FLOAT_ARITHMETIC,
1835 											"%result             = OpExtInst %type_float %std450 Sinh %arg1\n",
1836 											B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1837 	mo[O_COSH]			= Op("cosh",		FLOAT_ARITHMETIC,
1838 											"%result             = OpExtInst %type_float %std450 Cosh %arg1\n",
1839 											B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1840 	mo[O_TANH]			= Op("tanh",		FLOAT_ARITHMETIC,
1841 											"%result             = OpExtInst %type_float %std450 Tanh %arg1\n",
1842 											B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1843 	mo[O_ASINH]			= Op("asinh",		FLOAT_ARITHMETIC,
1844 											"%result             = OpExtInst %type_float %std450 Asinh %arg1\n",
1845 											B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1846 	mo[O_ACOSH]			= Op("acosh",		FLOAT_ARITHMETIC,
1847 											"%result             = OpExtInst %type_float %std450 Acosh %arg1\n",
1848 											B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1849 	mo[O_ATANH]			= Op("atanh",		FLOAT_ARITHMETIC,
1850 											"%result             = OpExtInst %type_float %std450 Atanh %arg1\n",
1851 											B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1852 	mo[O_EXP]			= Op("exp",			FLOAT_ARITHMETIC,
1853 											"%result             = OpExtInst %type_float %std450 Exp %arg1\n",
1854 											B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1855 	mo[O_LOG]			= Op("log",			FLOAT_ARITHMETIC,
1856 											"%result             = OpExtInst %type_float %std450 Log %arg1\n",
1857 											B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1858 	mo[O_EXP2]			= Op("exp2",		FLOAT_ARITHMETIC,
1859 											"%result             = OpExtInst %type_float %std450 Exp2 %arg1\n",
1860 											B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1861 	mo[O_LOG2]			= Op("log2",		FLOAT_ARITHMETIC,
1862 											"%result             = OpExtInst %type_float %std450 Log2 %arg1\n",
1863 											B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1864 	mo[O_SQRT]			= Op("sqrt",		FLOAT_ARITHMETIC,
1865 											"%result             = OpExtInst %type_float %std450 Sqrt %arg1\n",
1866 											B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1867 	mo[O_INV_SQRT]		= Op("inv_sqrt",	FLOAT_ARITHMETIC,
1868 											"%result             = OpExtInst %type_float %std450 InverseSqrt %arg1\n",
1869 											B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1870 	mo[O_MODF]			= Op("modf",		FLOAT_ARITHMETIC,
1871 											"",
1872 											"",
1873 											"",
1874 											"%tmpVarPtr          = OpVariable %type_float_fptr Function\n",
1875 											"",
1876 											"%result             = OpExtInst %type_float %std450 Modf %arg1 %tmpVarPtr\n",
1877 											B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1878 	mo[O_MODF_ST]		= Op("modf_st",		FLOAT_ARITHMETIC,
1879 											"OpMemberDecorate %struct_ff 0 Offset ${float_width}\n"
1880 											"OpMemberDecorate %struct_ff 1 Offset ${float_width}\n",
1881 											"%struct_ff          = OpTypeStruct %type_float %type_float\n"
1882 											"%struct_ff_fptr     = OpTypePointer Function %struct_ff\n",
1883 											"",
1884 											"%tmpStructPtr       = OpVariable %struct_ff_fptr Function\n",
1885 											"",
1886 											"%tmpStruct          = OpExtInst %struct_ff %std450 ModfStruct %arg1\n"
1887 											"                      OpStore %tmpStructPtr %tmpStruct\n"
1888 											"%tmpLoc             = OpAccessChain %type_float_fptr %tmpStructPtr %c_i32_0\n"
1889 											"%result             = OpLoad %type_float %tmpLoc\n",
1890 											B_STATEMENT_USAGE_TYPES_TYPE_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1891 	mo[O_FREXP]			= Op("frexp",		FLOAT_ARITHMETIC,
1892 											"",
1893 											"",
1894 											"",
1895 											"%tmpVarPtr          = OpVariable %type_i32_fptr Function\n",
1896 											"",
1897 											"%result             = OpExtInst %type_float %std450 Frexp %arg1 %tmpVarPtr\n",
1898 											B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1899 	mo[O_FREXP_ST]		= Op("frexp_st",	FLOAT_ARITHMETIC,
1900 											"OpMemberDecorate %struct_fi 0 Offset ${float_width}\n"
1901 											"OpMemberDecorate %struct_fi 1 Offset 32\n",
1902 											"%struct_fi          = OpTypeStruct %type_float %type_i32\n"
1903 											"%struct_fi_fptr     = OpTypePointer Function %struct_fi\n",
1904 											"",
1905 											"%tmpStructPtr       = OpVariable %struct_fi_fptr Function\n",
1906 											"",
1907 											"%tmpStruct          = OpExtInst %struct_fi %std450 FrexpStruct %arg1\n"
1908 											"                      OpStore %tmpStructPtr %tmpStruct\n"
1909 											"%tmpLoc             = OpAccessChain %type_float_fptr %tmpStructPtr %c_i32_0\n"
1910 											"%result             = OpLoad %type_float %tmpLoc\n",
1911 											B_STATEMENT_USAGE_TYPES_TYPE_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1912 	mo[O_LENGHT]		= Op("length",		FLOAT_ARITHMETIC,
1913 											"%result             = OpExtInst %type_float %std450 Length %arg1\n",
1914 											B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1915 	mo[O_NORMALIZE]		= Op("normalize",	FLOAT_ARITHMETIC,
1916 											"%vec1               = OpCompositeConstruct %type_float_vec2 %arg1 %c_float_2\n"
1917 											"%tmpVec             = OpExtInst %type_float_vec2 %std450 Normalize %vec1\n"
1918 											"%result             = OpCompositeExtract %type_float %tmpVec 0\n",
1919 											B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1920 	mo[O_REFLECT]		= Op("reflect",		FLOAT_ARITHMETIC,
1921 											"%vec1               = OpCompositeConstruct %type_float_vec2 %arg1 %arg1\n"
1922 											"%vecN               = OpCompositeConstruct %type_float_vec2 %c_float_0 %c_float_n1\n"
1923 											"%tmpVec             = OpExtInst %type_float_vec2 %std450 Reflect %vec1 %vecN\n"
1924 											"%result             = OpCompositeExtract %type_float %tmpVec 0\n",
1925 											B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1926 	mo[O_REFRACT]		= Op("refract",		FLOAT_ARITHMETIC,
1927 											"%vec1               = OpCompositeConstruct %type_float_vec2 %arg1 %arg1\n"
1928 											"%vecN               = OpCompositeConstruct %type_float_vec2 %c_float_0 %c_float_n1\n"
1929 											"%tmpVec             = OpExtInst %type_float_vec2 %std450 Refract %vec1 %vecN %c_float_0_5\n"
1930 											"%result             = OpCompositeExtract %type_float %tmpVec 0\n",
1931 											B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1932 	mo[O_MAT_DET]		= Op("mat_det",		FLOAT_ARITHMETIC,
1933 											"%col                = OpCompositeConstruct %type_float_vec2 %arg1 %arg1\n"
1934 											"%mat                = OpCompositeConstruct %type_float_mat2x2 %col %col\n"
1935 											"%result             = OpExtInst %type_float %std450 Determinant %mat\n",
1936 											B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1937 	mo[O_MAT_INV]		= Op("mat_inv",		FLOAT_ARITHMETIC,
1938 											"%col1               = OpCompositeConstruct %type_float_vec2 %arg1 %c_float_1\n"
1939 											"%col2               = OpCompositeConstruct %type_float_vec2 %c_float_1 %c_float_1\n"
1940 											"%mat                = OpCompositeConstruct %type_float_mat2x2 %col1 %col2\n"
1941 											"%invMat             = OpExtInst %type_float_mat2x2 %std450 MatrixInverse %mat\n"
1942 											"%extCol             = OpCompositeExtract %type_float_vec2 %invMat 1\n"
1943 											"%result             = OpCompositeExtract %type_float %extCol 1\n",
1944 											B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1945 
1946 	// PackHalf2x16 is a special case as it operates on fp32 vec2 and returns unsigned int,
1947 	// the verification is done in SPIR-V code (if result is correct 1.0 will be written to SSBO)
1948 	mo[O_PH_DENORM]		= Op("ph_denorm",	FLOAT_STORAGE_ONLY,
1949 											"",
1950 											"",
1951 											"%c_fp32_denorm_fp16 = OpConstant %type_f32 6.01e-5\n"		// fp32 representation of fp16 denorm value
1952 											"%c_ref              = OpConstant %type_u32 66061296\n",
1953 											"",
1954 											"",
1955 											"%srcVec             = OpCompositeConstruct %type_f32_vec2 %c_fp32_denorm_fp16 %c_fp32_denorm_fp16\n"
1956 											"%packedInt          = OpExtInst %type_u32 %std450 PackHalf2x16 %srcVec\n"
1957 											"%boolVal            = OpIEqual %type_bool %c_ref %packedInt\n"
1958 											"%result             = OpSelect %type_f32 %boolVal %c_f32_1 %c_f32_0\n",
1959 											B_STATEMENT_USAGE_CONSTS_TYPE_FP32 | B_STATEMENT_USAGE_COMMANDS_CONST_FP32 | B_STATEMENT_USAGE_COMMANDS_TYPE_FP32);
1960 
1961 	// UnpackHalf2x16 is a special case that operates on uint32 and returns two 32-bit floats,
1962 	// this function is tested using constants
1963 	mo[O_UPH_DENORM]	= Op("uph_denorm",	FLOAT_STORAGE_ONLY,
1964 											"",
1965 											"",
1966 											"%c_u32_2_16_pack    = OpConstant %type_u32 66061296\n", // == packHalf2x16(vec2(denorm))
1967 											"",
1968 											"",
1969 											"%tmpVec             = OpExtInst %type_f32_vec2 %std450 UnpackHalf2x16 %c_u32_2_16_pack\n"
1970 											"%result             = OpCompositeExtract %type_f32 %tmpVec 0\n",
1971 											B_STATEMENT_USAGE_COMMANDS_TYPE_FP32);
1972 
1973 	// PackDouble2x32 is a special case that operates on two uint32 and returns
1974 	// double, this function is tested using constants
1975 	mo[O_PD_DENORM]		= Op("pd_denorm",	FLOAT_STORAGE_ONLY,
1976 											"",
1977 											"",
1978 											"%c_p1               = OpConstant %type_u32 0\n"
1979 											"%c_p2               = OpConstant %type_u32 262144\n",		// == UnpackDouble2x32(denorm)
1980 											"",
1981 											"",
1982 											"%srcVec             = OpCompositeConstruct %type_u32_vec2 %c_p1 %c_p2\n"
1983 											"%result             = OpExtInst %type_f64 %std450 PackDouble2x32 %srcVec\n",
1984 											B_STATEMENT_USAGE_COMMANDS_TYPE_FP64);
1985 
1986 	// UnpackDouble2x32 is a special case as it operates only on FP64 and returns two ints,
1987 	// the verification is done in SPIR-V code (if result is correct 1.0 will be written to SSBO)
1988 	const char* unpackDouble2x32Types	=	"%type_bool_vec2     = OpTypeVector %type_bool 2\n";
1989 	const char* unpackDouble2x32Source	=	"%refVec2            = OpCompositeConstruct %type_u32_vec2 %c_p1 %c_p2\n"
1990 											"%resVec2            = OpExtInst %type_u32_vec2 %std450 UnpackDouble2x32 %arg1\n"
1991 											"%boolVec2           = OpIEqual %type_bool_vec2 %refVec2 %resVec2\n"
1992 											"%boolVal            = OpAll %type_bool %boolVec2\n"
1993 											"%result             = OpSelect %type_f64 %boolVal %c_f64_1 %c_f64_0\n";
1994 	mo[O_UPD_DENORM_FLUSH]		= Op("upd_denorm",	FLOAT_STORAGE_ONLY, "",
1995 											unpackDouble2x32Types,
1996 											"%c_p1               = OpConstant %type_u32 0\n"
1997 											"%c_p2               = OpConstant %type_u32 0\n",
1998 											"",
1999 											"",
2000 											unpackDouble2x32Source,
2001 											B_STATEMENT_USAGE_COMMANDS_CONST_FP64 | B_STATEMENT_USAGE_COMMANDS_TYPE_FP64);
2002 	mo[O_UPD_DENORM_PRESERVE]	= Op("upd_denorm",	FLOAT_STORAGE_ONLY, "",
2003 											unpackDouble2x32Types,
2004 											"%c_p1               = OpConstant %type_u32 1008\n"
2005 											"%c_p2               = OpConstant %type_u32 0\n",
2006 											"",
2007 											"",
2008 											unpackDouble2x32Source,
2009 											B_STATEMENT_USAGE_COMMANDS_CONST_FP64 | B_STATEMENT_USAGE_COMMANDS_TYPE_FP64);
2010 
2011 	mo[O_ORTE_ROUND]	= Op("orte_round",	FLOAT_STORAGE_ONLY, FP32,
2012 											"OpDecorate %result FPRoundingMode RTE\n",
2013 											"",
2014 											"",
2015 											"%result             = OpFConvert %type_f16 %arg1\n",
2016 											B_STATEMENT_USAGE_COMMANDS_TYPE_FP16);
2017 	mo[O_ORTZ_ROUND]	= Op("ortz_round",	FLOAT_STORAGE_ONLY, FP32,
2018 											"OpDecorate %result FPRoundingMode RTZ\n",
2019 											"",
2020 											"",
2021 											"%result             = OpFConvert %type_f16 %arg1\n",
2022 											B_STATEMENT_USAGE_COMMANDS_TYPE_FP16);
2023 }
2024 
build(vector<OperationTestCase> & testCases,TypeTestResultsSP typeTestResults,bool argumentsFromInput)2025 void TestCasesBuilder::build(vector<OperationTestCase>& testCases, TypeTestResultsSP typeTestResults, bool argumentsFromInput)
2026 {
2027 	// this method constructs a list of test cases; this list is a bit different
2028 	// for every combination of float type, arguments preparation method and tested float control
2029 
2030 	testCases.reserve(750);
2031 
2032 	// Denorm - FlushToZero - binary operations
2033 	for (size_t i = 0 ; i < typeTestResults->binaryOpFTZ.size() ; ++i)
2034 	{
2035 		const BinaryCase&	binaryCase	= typeTestResults->binaryOpFTZ[i];
2036 		OperationId			operation	= binaryCase.operationId;
2037 		testCases.push_back(OTC("denorm_op_var_flush_to_zero",		B_DENORM_FLUSH,					 operation, V_DENORM, V_ONE,		binaryCase.opVarResult));
2038 		testCases.push_back(OTC("denorm_op_denorm_flush_to_zero",	B_DENORM_FLUSH,					 operation, V_DENORM, V_DENORM,		binaryCase.opDenormResult));
2039 		testCases.push_back(OTC("denorm_op_inf_flush_to_zero",		B_DENORM_FLUSH | B_ZIN_PRESERVE, operation, V_DENORM, V_INF,		binaryCase.opInfResult));
2040 		testCases.push_back(OTC("denorm_op_nan_flush_to_zero",		B_DENORM_FLUSH | B_ZIN_PRESERVE, operation, V_DENORM, V_NAN,		binaryCase.opNanResult));
2041 	}
2042 
2043 	// Denorm - FlushToZero - unary operations
2044 	for (size_t i = 0 ; i < typeTestResults->unaryOpFTZ.size() ; ++i)
2045 	{
2046 		const UnaryCase&	unaryCase = typeTestResults->unaryOpFTZ[i];
2047 		OperationId			operation = unaryCase.operationId;
2048 		testCases.push_back(OTC("op_denorm_flush_to_zero", B_DENORM_FLUSH, operation, V_DENORM, V_UNUSED, unaryCase.result));
2049 	}
2050 
2051 	// Denom - Preserve - binary operations
2052 	for (size_t i = 0 ; i < typeTestResults->binaryOpDenormPreserve.size() ; ++i)
2053 	{
2054 		const BinaryCase&	binaryCase	= typeTestResults->binaryOpDenormPreserve[i];
2055 		OperationId			operation	= binaryCase.operationId;
2056 		testCases.push_back(OTC("denorm_op_var_preserve",			B_DENORM_PRESERVE,					operation, V_DENORM,	V_ONE,		binaryCase.opVarResult));
2057 		testCases.push_back(OTC("denorm_op_denorm_preserve",		B_DENORM_PRESERVE,					operation, V_DENORM,	V_DENORM,	binaryCase.opDenormResult));
2058 		testCases.push_back(OTC("denorm_op_inf_preserve",			B_DENORM_PRESERVE | B_ZIN_PRESERVE, operation, V_DENORM,	V_INF,		binaryCase.opInfResult));
2059 		testCases.push_back(OTC("denorm_op_nan_preserve",			B_DENORM_PRESERVE | B_ZIN_PRESERVE, operation, V_DENORM,	V_NAN,		binaryCase.opNanResult));
2060 	}
2061 
2062 	// Denom - Preserve - unary operations
2063 	for (size_t i = 0 ; i < typeTestResults->unaryOpDenormPreserve.size() ; ++i)
2064 	{
2065 		const UnaryCase&	unaryCase	= typeTestResults->unaryOpDenormPreserve[i];
2066 		OperationId			operation	= unaryCase.operationId;
2067 		testCases.push_back(OTC("op_denorm_preserve", B_DENORM_PRESERVE, operation, V_DENORM, V_UNUSED, unaryCase.result));
2068 	}
2069 
2070 	struct ZINCase
2071 	{
2072 		OperationId	operationId;
2073 		bool		supportedByFP64;
2074 		ValueId		secondArgument;
2075 		ValueId		preserveZeroResult;
2076 		ValueId		preserveSZeroResult;
2077 		ValueId		preserveInfResult;
2078 		ValueId		preserveSInfResult;
2079 		ValueId		preserveNanResult;
2080 	};
2081 
2082 	const ZINCase binaryOpZINPreserve[] = {
2083 		// operation		fp64	second arg		preserve zero	preserve szero		preserve inf	preserve sinf		preserve nan
2084 		{ O_PHI,			true,	V_INF,			V_ZERO,			V_MINUS_ZERO,		V_INF,			V_MINUS_INF,		V_NAN },
2085 		{ O_SELECT,			true,	V_ONE,			V_ZERO,			V_MINUS_ZERO,		V_INF,			V_MINUS_INF,		V_NAN },
2086 		{ O_ADD,			true,	V_ZERO,			V_ZERO,			V_ZERO,				V_INF,			V_MINUS_INF,		V_NAN },
2087 		{ O_SUB,			true,	V_ZERO,			V_ZERO,			V_MINUS_ZERO,		V_INF,			V_MINUS_INF,		V_NAN },
2088 		{ O_MUL,			true,	V_ONE,			V_ZERO,			V_MINUS_ZERO,		V_INF,			V_MINUS_INF,		V_NAN },
2089 	};
2090 
2091 	const ZINCase unaryOpZINPreserve[] = {
2092 		// operation				fp64	second arg		preserve zero	preserve szero		preserve inf	preserve sinf		preserve nan
2093 		{ O_RETURN_VAL,				true,	V_UNUSED,		V_ZERO,			V_MINUS_ZERO,		V_INF,			V_MINUS_INF,		V_NAN },
2094 		{ O_D_EXTRACT,				true,	V_UNUSED,		V_ZERO,			V_MINUS_ZERO,		V_INF,			V_MINUS_INF,		V_NAN },
2095 		{ O_D_INSERT,				true,	V_UNUSED,		V_ZERO,			V_MINUS_ZERO,		V_INF,			V_MINUS_INF,		V_NAN },
2096 		{ O_SHUFFLE,				true,	V_UNUSED,		V_ZERO,			V_MINUS_ZERO,		V_INF,			V_MINUS_INF,		V_NAN },
2097 		{ O_COMPOSITE,				true,	V_UNUSED,		V_ZERO,			V_MINUS_ZERO,		V_INF,			V_MINUS_INF,		V_NAN },
2098 		{ O_COMPOSITE_INS,			true,	V_UNUSED,		V_ZERO,			V_MINUS_ZERO,		V_INF,			V_MINUS_INF,		V_NAN },
2099 		{ O_COPY,					true,	V_UNUSED,		V_ZERO,			V_MINUS_ZERO,		V_INF,			V_MINUS_INF,		V_NAN },
2100 		{ O_TRANSPOSE,				true,	V_UNUSED,		V_ZERO,			V_MINUS_ZERO,		V_INF,			V_MINUS_INF,		V_NAN },
2101 		{ O_NEGATE,					true,	V_UNUSED,		V_MINUS_ZERO,	V_ZERO,				V_MINUS_INF,	V_INF,				V_NAN },
2102 	};
2103 
2104 	bool isFP64 = typeTestResults->floatType() == FP64;
2105 
2106 	// Signed Zero Inf Nan - Preserve - binary operations
2107 	for (size_t i = 0 ; i < DE_LENGTH_OF_ARRAY(binaryOpZINPreserve) ; ++i)
2108 	{
2109 		const ZINCase& zc = binaryOpZINPreserve[i];
2110 		if (isFP64 && !zc.supportedByFP64)
2111 			continue;
2112 
2113 		testCases.push_back(OTC("zero_op_var_preserve",				B_ZIN_PRESERVE, zc.operationId, V_ZERO,			zc.secondArgument,	zc.preserveZeroResult));
2114 		testCases.push_back(OTC("signed_zero_op_var_preserve",		B_ZIN_PRESERVE, zc.operationId, V_MINUS_ZERO,	zc.secondArgument,	zc.preserveSZeroResult));
2115 		testCases.push_back(OTC("inf_op_var_preserve",				B_ZIN_PRESERVE, zc.operationId, V_INF,			zc.secondArgument,	zc.preserveInfResult));
2116 		testCases.push_back(OTC("signed_inf_op_var_preserve",		B_ZIN_PRESERVE, zc.operationId, V_MINUS_INF,	zc.secondArgument,	zc.preserveSInfResult));
2117 		testCases.push_back(OTC("nan_op_var_preserve",				B_ZIN_PRESERVE, zc.operationId, V_NAN,			zc.secondArgument,	zc.preserveNanResult));
2118 	}
2119 
2120 	// Signed Zero Inf Nan - Preserve - unary operations
2121 	for (size_t i = 0 ; i < DE_LENGTH_OF_ARRAY(unaryOpZINPreserve) ; ++i)
2122 	{
2123 		const ZINCase& zc = unaryOpZINPreserve[i];
2124 		if (isFP64 && !zc.supportedByFP64)
2125 			continue;
2126 
2127 		testCases.push_back(OTC("op_zero_preserve",			B_ZIN_PRESERVE,zc.operationId, V_ZERO,			V_UNUSED,	zc.preserveZeroResult));
2128 		testCases.push_back(OTC("op_signed_zero_preserve",	B_ZIN_PRESERVE,zc.operationId, V_MINUS_ZERO,	V_UNUSED,	zc.preserveSZeroResult));
2129 		testCases.push_back(OTC("op_inf_preserve",			B_ZIN_PRESERVE,zc.operationId, V_INF,			V_UNUSED,	zc.preserveInfResult));
2130 		testCases.push_back(OTC("op_signed_inf_preserve",	B_ZIN_PRESERVE,zc.operationId, V_MINUS_INF,		V_UNUSED,	zc.preserveSInfResult));
2131 		testCases.push_back(OTC("op_nan_preserve",			B_ZIN_PRESERVE,zc.operationId, V_NAN,			V_UNUSED,	zc.preserveNanResult));
2132 	}
2133 
2134 	// comparison operations - tested differently because they return true/false
2135 	struct ComparisonCase
2136 	{
2137 		OperationId	operationId;
2138 		ValueId		denormPreserveResult;
2139 	};
2140 	const ComparisonCase comparisonCases[] =
2141 	{
2142 		// operation	denorm
2143 		{ O_ORD_EQ,		V_ZERO },
2144 		{ O_UORD_EQ,	V_ZERO },
2145 		{ O_ORD_NEQ,	V_ONE  },
2146 		{ O_UORD_NEQ,	V_ONE  },
2147 		{ O_ORD_LS,		V_ONE  },
2148 		{ O_UORD_LS,	V_ONE  },
2149 		{ O_ORD_GT,		V_ZERO },
2150 		{ O_UORD_GT,	V_ZERO },
2151 		{ O_ORD_LE,		V_ONE  },
2152 		{ O_UORD_LE,	V_ONE  },
2153 		{ O_ORD_GE,		V_ZERO },
2154 		{ O_UORD_GE,	V_ZERO }
2155 	};
2156 	for (int op = 0 ; op < DE_LENGTH_OF_ARRAY(comparisonCases) ; ++op)
2157 	{
2158 		const ComparisonCase& cc = comparisonCases[op];
2159 		testCases.push_back(OTC("denorm_op_var_preserve", B_DENORM_PRESERVE, cc.operationId, V_DENORM, V_ONE, cc.denormPreserveResult));
2160 	}
2161 
2162 	if (argumentsFromInput)
2163 	{
2164 		struct RoundingModeCase
2165 		{
2166 			OperationId	operationId;
2167 			ValueId		arg1;
2168 			ValueId		arg2;
2169 			ValueId		expectedRTEResult;
2170 			ValueId		expectedRTZResult;
2171 		};
2172 
2173 		const RoundingModeCase roundingCases[] =
2174 		{
2175 			{ O_ADD,			V_ADD_ARG_A,	V_ADD_ARG_B,	V_ADD_RTE_RESULT,	V_ADD_RTZ_RESULT },
2176 			{ O_SUB,			V_SUB_ARG_A,	V_SUB_ARG_B,	V_SUB_RTE_RESULT,	V_SUB_RTZ_RESULT },
2177 			{ O_MUL,			V_MUL_ARG_A,	V_MUL_ARG_B,	V_MUL_RTE_RESULT,	V_MUL_RTZ_RESULT },
2178 			{ O_DOT,			V_DOT_ARG_A,	V_DOT_ARG_B,	V_DOT_RTE_RESULT,	V_DOT_RTZ_RESULT },
2179 
2180 			// in vect/mat multiplication by scalar operations only first element of result is checked
2181 			// so argument and result values prepared for multiplication can be reused for those cases
2182 			{ O_VEC_MUL_S,		V_MUL_ARG_A,	V_MUL_ARG_B,	V_MUL_RTE_RESULT,	V_MUL_RTZ_RESULT },
2183 			{ O_MAT_MUL_S,		V_MUL_ARG_A,	V_MUL_ARG_B,	V_MUL_RTE_RESULT,	V_MUL_RTZ_RESULT },
2184 			{ O_OUT_PROD,		V_MUL_ARG_A,	V_MUL_ARG_B,	V_MUL_RTE_RESULT,	V_MUL_RTZ_RESULT },
2185 
2186 			// in SPIR-V code we return first element of operation result so for following
2187 			// cases argument and result values prepared for dot product can be reused
2188 			{ O_VEC_MUL_M,		V_DOT_ARG_A,	V_DOT_ARG_B,	V_DOT_RTE_RESULT,	V_DOT_RTZ_RESULT },
2189 			{ O_MAT_MUL_V,		V_DOT_ARG_A,	V_DOT_ARG_B,	V_DOT_RTE_RESULT,	V_DOT_RTZ_RESULT },
2190 			{ O_MAT_MUL_M,		V_DOT_ARG_A,	V_DOT_ARG_B,	V_DOT_RTE_RESULT,	V_DOT_RTZ_RESULT },
2191 
2192 			// conversion operations are added separately - depending on float type width
2193 		};
2194 
2195 		for (int c = 0 ; c < DE_LENGTH_OF_ARRAY(roundingCases) ; ++c)
2196 		{
2197 			const RoundingModeCase& rmc = roundingCases[c];
2198 			testCases.push_back(OTC("rounding_rte_op", B_RTE_ROUNDING, rmc.operationId, rmc.arg1, rmc.arg2, rmc.expectedRTEResult));
2199 			testCases.push_back(OTC("rounding_rtz_op", B_RTZ_ROUNDING, rmc.operationId, rmc.arg1, rmc.arg2, rmc.expectedRTZResult));
2200 		}
2201 	}
2202 
2203 	// special cases
2204 	if (typeTestResults->floatType() == FP16)
2205 	{
2206 		if (argumentsFromInput)
2207 		{
2208 			testCases.push_back(OTC("rounding_rte_conv_from_fp32", B_RTE_ROUNDING, O_CONV_FROM_FP32, V_CONV_FROM_FP32_ARG, V_UNUSED, V_CONV_TO_FP16_RTE_RESULT));
2209 			testCases.push_back(OTC("rounding_rtz_conv_from_fp32", B_RTZ_ROUNDING, O_CONV_FROM_FP32, V_CONV_FROM_FP32_ARG, V_UNUSED, V_CONV_TO_FP16_RTZ_RESULT));
2210 			testCases.push_back(OTC("rounding_rte_conv_from_fp64", B_RTE_ROUNDING, O_CONV_FROM_FP64, V_CONV_FROM_FP64_ARG, V_UNUSED, V_CONV_TO_FP16_RTE_RESULT));
2211 			testCases.push_back(OTC("rounding_rtz_conv_from_fp64", B_RTZ_ROUNDING, O_CONV_FROM_FP64, V_CONV_FROM_FP64_ARG, V_UNUSED, V_CONV_TO_FP16_RTZ_RESULT));
2212 
2213 			testCases.push_back(OTC("rounding_rte_sconst_conv_from_fp32", B_RTE_ROUNDING, O_SCONST_CONV_FROM_FP32_TO_FP16, V_UNUSED, V_UNUSED, V_CONV_TO_FP16_RTE_RESULT));
2214 			testCases.push_back(OTC("rounding_rtz_sconst_conv_from_fp32", B_RTZ_ROUNDING, O_SCONST_CONV_FROM_FP32_TO_FP16, V_UNUSED, V_UNUSED, V_CONV_TO_FP16_RTZ_RESULT));
2215 			testCases.push_back(OTC("rounding_rte_sconst_conv_from_fp64", B_RTE_ROUNDING, O_SCONST_CONV_FROM_FP64_TO_FP16, V_UNUSED, V_UNUSED, V_CONV_TO_FP16_RTE_RESULT));
2216 			testCases.push_back(OTC("rounding_rtz_sconst_conv_from_fp64", B_RTZ_ROUNDING, O_SCONST_CONV_FROM_FP64_TO_FP16, V_UNUSED, V_UNUSED, V_CONV_TO_FP16_RTZ_RESULT));
2217 
2218 			// verify that VkShaderFloatingPointRoundingModeKHR can be overridden for a given instruction by the FPRoundingMode decoration
2219 			testCases.push_back(OTC("rounding_rte_override", B_RTE_ROUNDING, O_ORTZ_ROUND, V_CONV_FROM_FP32_ARG, V_UNUSED, V_CONV_TO_FP16_RTZ_RESULT));
2220 			testCases.push_back(OTC("rounding_rtz_override", B_RTZ_ROUNDING, O_ORTE_ROUND, V_CONV_FROM_FP32_ARG, V_UNUSED, V_CONV_TO_FP16_RTE_RESULT));
2221 		}
2222 
2223 		createUnaryTestCases(testCases, O_CONV_FROM_FP32, V_CONV_DENORM_SMALLER, V_ZERO);
2224 		createUnaryTestCases(testCases, O_CONV_FROM_FP64, V_CONV_DENORM_BIGGER, V_ZERO);
2225 	}
2226 	else if (typeTestResults->floatType() == FP32)
2227 	{
2228 		if (argumentsFromInput)
2229 		{
2230 			// convert from fp64 to fp32
2231 			testCases.push_back(OTC("rounding_rte_conv_from_fp64", B_RTE_ROUNDING, O_CONV_FROM_FP64, V_CONV_FROM_FP64_ARG, V_UNUSED, V_CONV_TO_FP32_RTE_RESULT));
2232 			testCases.push_back(OTC("rounding_rtz_conv_from_fp64", B_RTZ_ROUNDING, O_CONV_FROM_FP64, V_CONV_FROM_FP64_ARG, V_UNUSED, V_CONV_TO_FP32_RTZ_RESULT));
2233 
2234 			testCases.push_back(OTC("rounding_rte_sconst_conv_from_fp64", B_RTE_ROUNDING, O_SCONST_CONV_FROM_FP64_TO_FP32, V_UNUSED, V_UNUSED, V_CONV_TO_FP32_RTE_RESULT));
2235 			testCases.push_back(OTC("rounding_rtz_sconst_conv_from_fp64", B_RTZ_ROUNDING, O_SCONST_CONV_FROM_FP64_TO_FP32, V_UNUSED, V_UNUSED, V_CONV_TO_FP32_RTZ_RESULT));
2236 		}
2237 		else
2238 		{
2239 			// PackHalf2x16 - verification done in SPIR-V
2240 			testCases.push_back(OTC("pack_half_denorm_preserve",		B_DENORM_PRESERVE,	O_PH_DENORM,	V_UNUSED, V_UNUSED, V_ONE));
2241 
2242 			// UnpackHalf2x16 - custom arguments defined as constants
2243 			testCases.push_back(OTC("upack_half_denorm_flush_to_zero",	B_DENORM_FLUSH,		O_UPH_DENORM,	V_UNUSED, V_UNUSED, V_ZERO));
2244 			testCases.push_back(OTC("upack_half_denorm_preserve",		B_DENORM_PRESERVE,	O_UPH_DENORM,	V_UNUSED, V_UNUSED, V_CONV_DENORM_SMALLER));
2245 		}
2246 
2247 		createUnaryTestCases(testCases, O_CONV_FROM_FP16, V_CONV_DENORM_SMALLER, V_ZERO_OR_FP16_DENORM_TO_FP32);
2248 		createUnaryTestCases(testCases, O_CONV_FROM_FP64, V_CONV_DENORM_BIGGER, V_ZERO);
2249 	}
2250 	else // FP64
2251 	{
2252 		if (!argumentsFromInput)
2253 		{
2254 			// PackDouble2x32 - custom arguments defined as constants
2255 			testCases.push_back(OTC("pack_double_denorm_preserve",			B_DENORM_PRESERVE,	O_PD_DENORM,			V_UNUSED, V_UNUSED, V_DENORM));
2256 
2257 			// UnpackDouble2x32 - verification done in SPIR-V
2258 			testCases.push_back(OTC("upack_double_denorm_flush_to_zero",	B_DENORM_FLUSH,		O_UPD_DENORM_FLUSH,		V_DENORM, V_UNUSED, V_ONE));
2259 			testCases.push_back(OTC("upack_double_denorm_preserve",			B_DENORM_PRESERVE,	O_UPD_DENORM_PRESERVE,	V_DENORM, V_UNUSED, V_ONE));
2260 		}
2261 
2262 		createUnaryTestCases(testCases, O_CONV_FROM_FP16, V_CONV_DENORM_SMALLER, V_ZERO_OR_FP16_DENORM_TO_FP64);
2263 		createUnaryTestCases(testCases, O_CONV_FROM_FP32, V_CONV_DENORM_BIGGER, V_ZERO_OR_FP32_DENORM_TO_FP64);
2264 	}
2265 }
2266 
getOperation(OperationId id) const2267 const Operation& TestCasesBuilder::getOperation(OperationId id) const
2268 {
2269 	return m_operations.at(id);
2270 }
2271 
createUnaryTestCases(vector<OperationTestCase> & testCases,OperationId operationId,ValueId denormPreserveResult,ValueId denormFTZResult) const2272 void TestCasesBuilder::createUnaryTestCases(vector<OperationTestCase>& testCases, OperationId operationId, ValueId denormPreserveResult, ValueId denormFTZResult) const
2273 {
2274 	// Denom - Preserve
2275 	testCases.push_back(OTC("op_denorm_preserve",		B_DENORM_PRESERVE,	operationId, V_DENORM,	V_UNUSED, denormPreserveResult));
2276 
2277 	// Denorm - FlushToZero
2278 	testCases.push_back(OTC("op_denorm_flush_to_zero",	B_DENORM_FLUSH,		operationId, V_DENORM,	V_UNUSED, denormFTZResult));
2279 
2280 	// Signed Zero Inf Nan - Preserve
2281 	testCases.push_back(OTC("op_zero_preserve",			B_ZIN_PRESERVE,		operationId, V_ZERO,		V_UNUSED, V_ZERO));
2282 	testCases.push_back(OTC("op_signed_zero_preserve",	B_ZIN_PRESERVE,		operationId, V_MINUS_ZERO,	V_UNUSED, V_MINUS_ZERO));
2283 	testCases.push_back(OTC("op_inf_preserve",			B_ZIN_PRESERVE,		operationId, V_INF,			V_UNUSED, V_INF));
2284 	testCases.push_back(OTC("op_nan_preserve",			B_ZIN_PRESERVE,		operationId, V_NAN,			V_UNUSED, V_NAN));
2285 }
2286 
2287 template <typename TYPE, typename FLOAT_TYPE>
isZeroOrOtherValue(const TYPE & returnedFloat,ValueId secondAcceptableResult,TestLog & log)2288 bool isZeroOrOtherValue(const TYPE& returnedFloat, ValueId secondAcceptableResult, TestLog& log)
2289 {
2290 	if (returnedFloat.isZero() && !returnedFloat.signBit())
2291 		return true;
2292 
2293 	TypeValues<FLOAT_TYPE> typeValues;
2294 	typedef typename TYPE::StorageType SType;
2295 	typename RawConvert<FLOAT_TYPE, SType>::Value value;
2296 	value.fp = typeValues.getValue(secondAcceptableResult);
2297 
2298 	if (returnedFloat.bits() == value.ui)
2299 		return true;
2300 
2301 	log << TestLog::Message << "Expected 0 or " << toHex(value.ui)
2302 		<< " (" << value.fp << ")" << TestLog::EndMessage;
2303 	return false;
2304 }
2305 
2306 template <typename TYPE>
isAcosResultCorrect(const TYPE & returnedFloat,TestLog & log)2307 bool isAcosResultCorrect(const TYPE& returnedFloat, TestLog& log)
2308 {
2309 	// pi/2 is result of acos(0) which in the specs is defined as equivalent to
2310 	// atan2(sqrt(1.0 - x^2), x), where atan2 has 4096 ULP, sqrt is equivalent to
2311 	// 1.0 /inversesqrt(), inversesqrt() is 2 ULP and rcp is another 2.5 ULP
2312 
2313 	double precision = 0;
2314 	const double piDiv2 = 3.14159265358979323846 / 2;
2315 	if (returnedFloat.MANTISSA_BITS == 23)
2316 	{
2317 		FloatFormat fp32Format(-126, 127, 23, true, tcu::MAYBE, tcu::YES, tcu::MAYBE);
2318 		precision = fp32Format.ulp(piDiv2, 4096.0);
2319 	}
2320 	else
2321 	{
2322 		FloatFormat fp16Format(-14, 15, 10, true, tcu::MAYBE);
2323 		precision = fp16Format.ulp(piDiv2, 5.0);
2324 	}
2325 
2326 	if (deAbs(returnedFloat.asDouble() - piDiv2) < precision)
2327 		return true;
2328 
2329 	log << TestLog::Message << "Expected result to be in range"
2330 		<< " (" << piDiv2 - precision << ", " << piDiv2 + precision << "), got "
2331 		<< returnedFloat.asDouble() << TestLog::EndMessage;
2332 	return false;
2333 }
2334 
2335 template <typename TYPE>
isCosResultCorrect(const TYPE & returnedFloat,TestLog & log)2336 bool isCosResultCorrect(const TYPE& returnedFloat, TestLog& log)
2337 {
2338 	// for cos(x) with x between -pi and pi, the precision error is 2^-11 for fp32 and 2^-7 for fp16.
2339 	double precision = returnedFloat.MANTISSA_BITS == 23 ? dePow(2, -11) : dePow(2, -7);
2340 	const double expected = 1.0;
2341 
2342 	if (deAbs(returnedFloat.asDouble() - expected) < precision)
2343 		return true;
2344 
2345 	log << TestLog::Message << "Expected result to be in range"
2346 		<< " (" << expected - precision << ", " << expected + precision << "), got "
2347 		<< returnedFloat.asDouble() << TestLog::EndMessage;
2348 	return false;
2349 }
2350 
2351 template <typename FLOAT_TYPE>
getFloatTypeAsDouble(FLOAT_TYPE param)2352 double getFloatTypeAsDouble(FLOAT_TYPE param)
2353 {
2354 	return param;
2355 }
getFloatTypeAsDouble(deFloat16 param)2356 template<> double getFloatTypeAsDouble(deFloat16 param)
2357 {
2358 	return deFloat16To64(param);
2359 }
2360 
2361 
getPrecisionAt(double value,float ulp,int mantissaBits)2362 double getPrecisionAt(double value, float ulp, int mantissaBits)
2363 {
2364 	if (mantissaBits == 23)
2365 	{
2366 		FloatFormat fp32Format(-126, 127, 23, true, tcu::MAYBE, tcu::YES, tcu::MAYBE);
2367 		return fp32Format.ulp(value, ulp);
2368 	}
2369 	else if (mantissaBits == 52)
2370 	{
2371 		FloatFormat fp32Format(-1022, 1023, 52, true, tcu::MAYBE, tcu::YES, tcu::MAYBE);
2372 		return fp32Format.ulp(value, ulp);
2373 	}
2374 	else
2375 	{
2376 		DE_ASSERT(mantissaBits == 10);
2377 		FloatFormat fp16Format(-14, 15, 10, true, tcu::MAYBE);
2378 		return fp16Format.ulp(value, ulp);
2379 	}
2380 }
2381 
2382 template <typename TYPE, typename FLOAT_TYPE, typename REF_FUNCTION>
isLogResultCorrect(const TYPE & returnedFloat,FLOAT_TYPE param,REF_FUNCTION refFunction,TestLog & log)2383 bool isLogResultCorrect(const TYPE& returnedFloat, FLOAT_TYPE param, REF_FUNCTION refFunction, TestLog& log)
2384 {
2385 	if (returnedFloat.isInf() && returnedFloat.signBit())
2386 		return true;
2387 
2388 	const double expected	= refFunction(getFloatTypeAsDouble(param));
2389 	const double precision	= getPrecisionAt(expected, 3.0, returnedFloat.MANTISSA_BITS);
2390 
2391 	if (deAbs(returnedFloat.asDouble() - expected) < precision)
2392 		return true;
2393 
2394 	log << TestLog::Message << "Expected result to be -INF or in range"
2395 		<< " (" << expected - precision << ", " << expected + precision << "), got "
2396 		<< returnedFloat.asDouble() << TestLog::EndMessage;
2397 	return false;
2398 }
2399 
2400 template <typename TYPE, typename FLOAT_TYPE>
isInverseSqrtResultCorrect(const TYPE & returnedFloat,FLOAT_TYPE param,TestLog & log)2401 bool isInverseSqrtResultCorrect(const TYPE& returnedFloat, FLOAT_TYPE param, TestLog& log)
2402 {
2403 	if (returnedFloat.isInf() && !returnedFloat.signBit())
2404 		return true;
2405 
2406 	const double expected	= 1.0/ deSqrt(getFloatTypeAsDouble(param));
2407 	const double precision	= getPrecisionAt(expected, 2.0, returnedFloat.MANTISSA_BITS);
2408 
2409 	if (deAbs(returnedFloat.asDouble() - expected) < precision)
2410 		return true;
2411 
2412 	log << TestLog::Message << "Expected result to be INF or in range"
2413 		<< " (" << expected - precision << ", " << expected + precision << "), got "
2414 		<< returnedFloat.asDouble() << TestLog::EndMessage;
2415 	return false;
2416 }
2417 
2418 template <typename TYPE, typename FLOAT_TYPE>
isSqrtResultCorrect(const TYPE & returnedFloat,FLOAT_TYPE param,TestLog & log)2419 bool isSqrtResultCorrect(const TYPE& returnedFloat, FLOAT_TYPE param, TestLog& log)
2420 {
2421 	if (returnedFloat.isZero() && !returnedFloat.signBit())
2422 		return true;
2423 
2424 
2425 	const double expected				= deSqrt(getFloatTypeAsDouble(param));
2426 	const double expectedInverseSqrt	= 1.0 / expected;
2427 	const double inverseSqrtPrecision	= getPrecisionAt(expectedInverseSqrt, 2.0, returnedFloat.MANTISSA_BITS);
2428 
2429 	double expectedMin = deMin(1.0 / (expectedInverseSqrt - inverseSqrtPrecision), 1.0 / (expectedInverseSqrt + inverseSqrtPrecision));
2430 	double expectedMax = deMax(1.0 / (expectedInverseSqrt - inverseSqrtPrecision), 1.0 / (expectedInverseSqrt + inverseSqrtPrecision));
2431 
2432 	expectedMin -= getPrecisionAt(expectedMin, 2.5, returnedFloat.MANTISSA_BITS);
2433 	expectedMax += getPrecisionAt(expectedMax, 2.5, returnedFloat.MANTISSA_BITS);
2434 
2435 	if (returnedFloat.asDouble() >= expectedMin  && returnedFloat.asDouble() <= expectedMax)
2436 		return true;
2437 
2438 	log << TestLog::Message << "Expected result to be +0 or in range"
2439 		<< " (" << expectedMin << ", " << expectedMax << "), got "
2440 		<< returnedFloat.asDouble() << TestLog::EndMessage;
2441 	return false;
2442 }
2443 
2444 // Function used to compare test result with expected output.
2445 // TYPE can be Float16, Float32 or Float64.
2446 // FLOAT_TYPE can be deFloat16, float, double.
2447 template <typename TYPE, typename FLOAT_TYPE>
compareBytes(vector<deUint8> & expectedBytes,AllocationSp outputAlloc,TestLog & log)2448 bool compareBytes(vector<deUint8>& expectedBytes, AllocationSp outputAlloc, TestLog& log)
2449 {
2450 	const TYPE* returned	= static_cast<const TYPE*>(outputAlloc->getHostPtr());
2451 	const TYPE* fValueId	= reinterpret_cast<const TYPE*>(&expectedBytes.front());
2452 
2453 	// all test return single value
2454 	DE_ASSERT((expectedBytes.size() / sizeof(TYPE)) == 1);
2455 
2456 	// during test setup we do not store expected value but id that can be used to
2457 	// retrieve actual value - this is done to handle special cases like multiple
2458 	// allowed results or epsilon checks for some cases
2459 	// note that this is workaround - this should be done by changing
2460 	// ComputerShaderCase and GraphicsShaderCase so that additional arguments can
2461 	// be passed to this verification callback
2462 	typedef typename TYPE::StorageType SType;
2463 	SType		expectedInt		= fValueId[0].bits();
2464 	ValueId		expectedValueId	= static_cast<ValueId>(expectedInt);
2465 
2466 	// something went wrong, expected value cant be V_UNUSED,
2467 	// if this is the case then test shouldn't be created at all
2468 	DE_ASSERT(expectedValueId != V_UNUSED);
2469 
2470 	TYPE returnedFloat = returned[0];
2471 
2472 	log << TestLog::Message << "Calculated result: " << toHex(returnedFloat.bits())
2473 		<< " (" << returnedFloat.asFloat() << ")" << TestLog::EndMessage;
2474 
2475 	if (expectedValueId == V_NAN)
2476 	{
2477 		if (returnedFloat.isNaN())
2478 			return true;
2479 
2480 		log << TestLog::Message << "Expected NaN" << TestLog::EndMessage;
2481 		return false;
2482 	}
2483 
2484 	if (expectedValueId == V_DENORM)
2485 	{
2486 		if (returnedFloat.isDenorm())
2487 			return true;
2488 
2489 		log << TestLog::Message << "Expected Denorm" << TestLog::EndMessage;
2490 		return false;
2491 	}
2492 
2493 	// handle multiple acceptable results cases
2494 	if (expectedValueId == V_ZERO_OR_MINUS_ZERO)
2495 	{
2496 		if (returnedFloat.isZero())
2497 			return true;
2498 
2499 		log << TestLog::Message << "Expected 0 or -0" << TestLog::EndMessage;
2500 		return false;
2501 	}
2502 	if (expectedValueId == V_ZERO_OR_ONE)
2503 		return isZeroOrOtherValue<TYPE, FLOAT_TYPE>(returnedFloat, V_ONE, log);
2504 	if ((expectedValueId == V_ZERO_OR_FP16_DENORM_TO_FP32) || (expectedValueId == V_ZERO_OR_FP16_DENORM_TO_FP64))
2505 		return isZeroOrOtherValue<TYPE, FLOAT_TYPE>(returnedFloat, V_CONV_DENORM_SMALLER, log);
2506 	if (expectedValueId == V_ZERO_OR_FP32_DENORM_TO_FP64)
2507 		return isZeroOrOtherValue<TYPE, FLOAT_TYPE>(returnedFloat, V_CONV_DENORM_BIGGER, log);
2508 	if (expectedValueId == V_MINUS_ONE_OR_CLOSE)
2509 	{
2510 		// this expected value is only needed for fp16
2511 		DE_ASSERT(returnedFloat.EXPONENT_BIAS == 15);
2512 		typename TYPE::StorageType returnedValue = returnedFloat.bits();
2513 		return (returnedValue == 0xbc00) || (returnedValue == 0xbbff);
2514 	}
2515 
2516 	// handle trigonometric operations precision errors
2517 	if (expectedValueId == V_TRIG_ONE)
2518 		return isCosResultCorrect<TYPE>(returnedFloat, log);
2519 
2520 	// handle acos(0) case
2521 	if (expectedValueId == V_PI_DIV_2)
2522 		return isAcosResultCorrect<TYPE>(returnedFloat, log);
2523 
2524 	TypeValues<FLOAT_TYPE> typeValues;
2525 
2526 	if (expectedValueId == V_MINUS_INF_OR_LOG_DENORM)
2527 		return isLogResultCorrect<TYPE>(returnedFloat, typeValues.getValue(V_DENORM), deLog, log);
2528 
2529 	if (expectedValueId == V_MINUS_INF_OR_LOG2_DENORM)
2530 		return isLogResultCorrect<TYPE>(returnedFloat, typeValues.getValue(V_DENORM), deLog2, log);
2531 
2532 	if (expectedValueId == V_ZERO_OR_SQRT_DENORM)
2533 		return isSqrtResultCorrect<TYPE>(returnedFloat, typeValues.getValue(V_DENORM), log);
2534 
2535 	if (expectedValueId == V_INF_OR_INV_SQRT_DENORM)
2536 		return isInverseSqrtResultCorrect<TYPE>(returnedFloat, typeValues.getValue(V_DENORM), log);
2537 
2538 
2539 	typename RawConvert<FLOAT_TYPE, SType>::Value value;
2540 	value.fp = typeValues.getValue(expectedValueId);
2541 
2542 	if (returnedFloat.bits() == value.ui)
2543 		return true;
2544 
2545 	log << TestLog::Message << "Expected " << toHex(value.ui)
2546 		<< " (" << value.fp << ")" << TestLog::EndMessage;
2547 	return false;
2548 }
2549 
2550 template <typename TYPE, typename FLOAT_TYPE>
checkFloats(const vector<Resource> &,const vector<AllocationSp> & outputAllocs,const vector<Resource> & expectedOutputs,TestLog & log)2551 bool checkFloats (const vector<Resource>&		,
2552 				  const vector<AllocationSp>&	outputAllocs,
2553 				  const vector<Resource>&		expectedOutputs,
2554 				  TestLog&						log)
2555 {
2556 	if (outputAllocs.size() != expectedOutputs.size())
2557 		return false;
2558 
2559 	for (deUint32 outputNdx = 0; outputNdx < outputAllocs.size(); ++outputNdx)
2560 	{
2561 		vector<deUint8> expectedBytes;
2562 		expectedOutputs[outputNdx].getBytes(expectedBytes);
2563 
2564 		if (!compareBytes<TYPE, FLOAT_TYPE>(expectedBytes, outputAllocs[outputNdx], log))
2565 			return false;
2566 	}
2567 
2568 	return true;
2569 }
2570 
checkMixedFloats(const vector<Resource> &,const vector<AllocationSp> & outputAllocs,const vector<Resource> & expectedOutputs,TestLog & log)2571 bool checkMixedFloats (const vector<Resource>&		,
2572 					   const vector<AllocationSp>&	outputAllocs,
2573 					   const vector<Resource>&		expectedOutputs,
2574 					   TestLog&						log)
2575 {
2576 	// this function validates buffers containing floats of diferent widths, order is not important
2577 
2578 	if (outputAllocs.size() != expectedOutputs.size())
2579 		return false;
2580 
2581 	// create map storing functions that should be used for comparision
2582 	// depending on float width in bytes; this lets us later to avoid switch in while
2583 	typedef bool (*compareFun)(vector<deUint8>& expectedBytes, AllocationSp outputAlloc, TestLog& log);
2584 	const map<size_t, compareFun> compareMap =
2585 	{
2586 		{ 2, compareBytes<Float16, deFloat16> },
2587 		{ 4, compareBytes<Float32, float> },
2588 		{ 8, compareBytes<Float64, double>},
2589 	};
2590 
2591 	vector<deUint8> expectedBytes;
2592 	bool			allResultsAreCorrect	= true;
2593 	int				resultIndex				= static_cast<int>(outputAllocs.size());
2594 
2595 	while (resultIndex--)
2596 	{
2597 		expectedOutputs[resultIndex].getBytes(expectedBytes);
2598 		size_t byteWidth		 = expectedOutputs[resultIndex].getByteSize();
2599 		allResultsAreCorrect	&= compareMap.at(byteWidth)(expectedBytes, outputAllocs[resultIndex], log);
2600 	}
2601 
2602 	return allResultsAreCorrect;
2603 }
2604 
2605 // Base class for ComputeTestGroupBuilder and GrephicstestGroupBuilder classes.
2606 // It contains all functionalities that are used by both child classes.
2607 class TestGroupBuilderBase
2608 {
2609 public:
2610 
2611 	TestGroupBuilderBase();
2612 	virtual ~TestGroupBuilderBase() = default;
2613 
2614 	virtual void createOperationTests(TestCaseGroup* parentGroup,
2615 									  const char* groupName,
2616 									  FloatType floatType,
2617 									  bool argumentsFromInput) = 0;
2618 
2619 	virtual void createSettingsTests(TestCaseGroup* parentGroup) = 0;
2620 
2621 protected:
2622 
2623 	typedef vector<OperationTestCase> TestCaseVect;
2624 
2625 	// Structure containing all data required to create single operation test.
2626 	struct OperationTestCaseInfo
2627 	{
2628 		FloatType					outFloatType;
2629 		bool						argumentsFromInput;
2630 		VkShaderStageFlagBits		testedStage;
2631 		const Operation&			operation;
2632 		const OperationTestCase&	testCase;
2633 	};
2634 
2635 	// Mode used by SettingsTestCaseInfo to specify what settings do we want to test.
2636 	enum SettingsMode
2637 	{
2638 		SM_ROUNDING			= 0,
2639 		SM_DENORMS
2640 	};
2641 
2642 	// Enum containing available options. When rounding is tested only SO_RTE and SO_RTZ
2643 	// should be used. SO_FLUSH and SO_PRESERVE should be used only for denorm tests.
2644 	enum SettingsOption
2645 	{
2646 		SO_UNUSED			= 0,
2647 		SO_RTE,
2648 		SO_RTZ,
2649 		SO_FLUSH,
2650 		SO_PRESERVE
2651 	};
2652 
2653 	// Structure containing all data required to create single settings test.
2654 	struct SettingsTestCaseInfo
2655 	{
2656 		const char*								name;
2657 		SettingsMode							testedMode;
2658 		VkShaderFloatControlsIndependence		independenceSetting;
2659 
2660 		SettingsOption							fp16Option;
2661 		SettingsOption							fp32Option;
2662 		SettingsOption							fp64Option;
2663 	};
2664 
2665 	void specializeOperation(const OperationTestCaseInfo&	testCaseInfo,
2666 							 SpecializedOperation&			specializedOperation) const;
2667 
2668 	void getBehaviorCapabilityAndExecutionMode(BehaviorFlags behaviorFlags,
2669 											   const string inBitWidth,
2670 											   const string outBitWidth,
2671 											   string& capability,
2672 											   string& executionMode) const;
2673 
2674 	void setupVulkanFeatures(FloatType			inFloatType,
2675 							 FloatType			outFloatType,
2676 							 BehaviorFlags		behaviorFlags,
2677 							 bool				float64FeatureRequired,
2678 							 VulkanFeatures&	features) const;
2679 
2680 protected:
2681 
2682 	struct TypeData
2683 	{
2684 		TypeValuesSP		values;
2685 		TypeSnippetsSP		snippets;
2686 		TypeTestResultsSP	testResults;
2687 	};
2688 
2689 	// Type specific parameters are stored in this map.
2690 	map<FloatType, TypeData> m_typeData;
2691 
2692 	// Map converting behaviuor id to OpCapability instruction
2693 	typedef map<BehaviorFlagBits, string> BehaviorNameMap;
2694 	BehaviorNameMap m_behaviorToName;
2695 };
2696 
TestGroupBuilderBase()2697 TestGroupBuilderBase::TestGroupBuilderBase()
2698 {
2699 	m_typeData[FP16] = TypeData();
2700 	m_typeData[FP16].values			= TypeValuesSP(new TypeValues<deFloat16>);
2701 	m_typeData[FP16].snippets		= TypeSnippetsSP(new TypeSnippets<deFloat16>);
2702 	m_typeData[FP16].testResults	= TypeTestResultsSP(new TypeTestResults<deFloat16>);
2703 	m_typeData[FP32] = TypeData();
2704 	m_typeData[FP32].values			= TypeValuesSP(new TypeValues<float>);
2705 	m_typeData[FP32].snippets		= TypeSnippetsSP(new TypeSnippets<float>);
2706 	m_typeData[FP32].testResults	= TypeTestResultsSP(new TypeTestResults<float>);
2707 	m_typeData[FP64] = TypeData();
2708 	m_typeData[FP64].values			= TypeValuesSP(new TypeValues<double>);
2709 	m_typeData[FP64].snippets		= TypeSnippetsSP(new TypeSnippets<double>);
2710 	m_typeData[FP64].testResults	= TypeTestResultsSP(new TypeTestResults<double>);
2711 
2712 	m_behaviorToName[B_DENORM_PRESERVE]	= "DenormPreserve";
2713 	m_behaviorToName[B_DENORM_FLUSH]	= "DenormFlushToZero";
2714 	m_behaviorToName[B_ZIN_PRESERVE]	= "SignedZeroInfNanPreserve";
2715 	m_behaviorToName[B_RTE_ROUNDING]	= "RoundingModeRTE";
2716 	m_behaviorToName[B_RTZ_ROUNDING]	= "RoundingModeRTZ";
2717 }
2718 
specializeOperation(const OperationTestCaseInfo & testCaseInfo,SpecializedOperation & specializedOperation) const2719 void TestGroupBuilderBase::specializeOperation (const OperationTestCaseInfo&	testCaseInfo,
2720 												SpecializedOperation&			specializedOperation) const
2721 {
2722 	const string		typeToken		= "_float";
2723 	const string		widthToken		= "${float_width}";
2724 
2725 	FloatType				outFloatType	= testCaseInfo.outFloatType;
2726 	const Operation&		operation		= testCaseInfo.operation;
2727 	const TypeSnippetsSP	outTypeSnippets	= m_typeData.at(outFloatType).snippets;
2728 	const bool				inputRestricted	= operation.isInputTypeRestricted;
2729 	FloatType				inFloatType		= operation.restrictedInputType;
2730 
2731 	// usually input type is same as output but this is not the case for conversion
2732 	// operations; in those cases operation definitions have restricted input type
2733 	inFloatType = inputRestricted ? inFloatType : outFloatType;
2734 
2735 	TypeSnippetsSP inTypeSnippets = m_typeData.at(inFloatType).snippets;
2736 
2737 	const string inTypePrefix	= string("_f") + inTypeSnippets->bitWidth;
2738 	const string outTypePrefix	= string("_f") + outTypeSnippets->bitWidth;
2739 
2740 	specializedOperation.constants		= replace(operation.constants, typeToken, inTypePrefix);
2741 	specializedOperation.annotations	= replace(operation.annotations, widthToken, outTypeSnippets->bitWidth);
2742 	specializedOperation.types			= replace(operation.types, typeToken, outTypePrefix);
2743 	specializedOperation.variables		= replace(operation.variables, typeToken, outTypePrefix);
2744 	specializedOperation.functions		= replace(operation.functions, typeToken, outTypePrefix);
2745 	specializedOperation.commands		= replace(operation.commands, typeToken, outTypePrefix);
2746 
2747 	specializedOperation.inFloatType				= inFloatType;
2748 	specializedOperation.inTypeSnippets				= inTypeSnippets;
2749 	specializedOperation.outTypeSnippets			= outTypeSnippets;
2750 	specializedOperation.argumentsUsesFloatConstant	= 0;
2751 
2752 	if (operation.isSpecConstant)
2753 		return;
2754 
2755 	// select way arguments are prepared
2756 	if (testCaseInfo.argumentsFromInput)
2757 	{
2758 		// read arguments from input SSBO in main function
2759 		specializedOperation.arguments = inTypeSnippets->argumentsFromInputSnippet;
2760 	}
2761 	else
2762 	{
2763 		// generate proper values in main function
2764 		const string arg1 = "%arg1                 = ";
2765 		const string arg2 = "%arg2                 = ";
2766 
2767 		const ValueId* inputArguments = testCaseInfo.testCase.input;
2768 		if (inputArguments[0] != V_UNUSED)
2769 		{
2770 			specializedOperation.arguments					= arg1 + inTypeSnippets->valueIdToSnippetArgMap.at(inputArguments[0]);
2771 			specializedOperation.argumentsUsesFloatConstant	|= B_STATEMENT_USAGE_ARGS_CONST_FLOAT;
2772 		}
2773 		if (inputArguments[1] != V_UNUSED)
2774 		{
2775 			specializedOperation.arguments					+= arg2 + inTypeSnippets->valueIdToSnippetArgMap.at(inputArguments[1]);
2776 			specializedOperation.argumentsUsesFloatConstant	|= B_STATEMENT_USAGE_ARGS_CONST_FLOAT;
2777 		}
2778 	}
2779 }
2780 
2781 
getBehaviorCapabilityAndExecutionMode(BehaviorFlags behaviorFlags,const string inBitWidth,const string outBitWidth,string & capability,string & executionMode) const2782 void TestGroupBuilderBase::getBehaviorCapabilityAndExecutionMode(BehaviorFlags behaviorFlags,
2783 																 const string inBitWidth,
2784 																 const string outBitWidth,
2785 																 string& capability,
2786 																 string& executionMode) const
2787 {
2788 	// iterate over all behaviours and request those that are needed
2789 	BehaviorNameMap::const_iterator it = m_behaviorToName.begin();
2790 	while (it != m_behaviorToName.end())
2791 	{
2792 		BehaviorFlagBits	behaviorId		= it->first;
2793 		string				behaviorName	= it->second;
2794 
2795 		if (behaviorFlags & behaviorId)
2796 		{
2797 			capability += "OpCapability " + behaviorName + "\n";
2798 
2799 			// rounding mode should be obeyed for destination type
2800 			bool rounding = (behaviorId == B_RTE_ROUNDING) || (behaviorId == B_RTZ_ROUNDING);
2801 			executionMode += "OpExecutionMode %main " + behaviorName + " " +
2802 							 (rounding ? outBitWidth : inBitWidth) + "\n";
2803 		}
2804 
2805 		++it;
2806 	}
2807 
2808 	DE_ASSERT(!capability.empty() && !executionMode.empty());
2809 }
2810 
setupVulkanFeatures(FloatType inFloatType,FloatType outFloatType,BehaviorFlags behaviorFlags,bool float64FeatureRequired,VulkanFeatures & features) const2811 void TestGroupBuilderBase::setupVulkanFeatures(FloatType		inFloatType,
2812 											   FloatType		outFloatType,
2813 											   BehaviorFlags	behaviorFlags,
2814 											   bool				float64FeatureRequired,
2815 											   VulkanFeatures&	features) const
2816 {
2817 	features.coreFeatures.shaderFloat64 = float64FeatureRequired;
2818 
2819 	// request proper float controls features
2820 	ExtensionFloatControlsFeatures& floatControls = features.floatControlsProperties;
2821 
2822 	// rounding mode should obey the destination type
2823 	bool rteRounding = (behaviorFlags & B_RTE_ROUNDING) != 0;
2824 	bool rtzRounding = (behaviorFlags & B_RTZ_ROUNDING) != 0;
2825 	if (rteRounding || rtzRounding)
2826 	{
2827 		switch(outFloatType)
2828 		{
2829 		case FP16:
2830 			floatControls.shaderRoundingModeRTEFloat16 = rteRounding;
2831 			floatControls.shaderRoundingModeRTZFloat16 = rtzRounding;
2832 			return;
2833 		case FP32:
2834 			floatControls.shaderRoundingModeRTEFloat32 = rteRounding;
2835 			floatControls.shaderRoundingModeRTZFloat32 = rtzRounding;
2836 			return;
2837 		case FP64:
2838 			floatControls.shaderRoundingModeRTEFloat64 = rteRounding;
2839 			floatControls.shaderRoundingModeRTZFloat64 = rtzRounding;
2840 			return;
2841 		}
2842 	}
2843 
2844 	switch(inFloatType)
2845 	{
2846 	case FP16:
2847 		floatControls.shaderDenormPreserveFloat16			= behaviorFlags & B_DENORM_PRESERVE;
2848 		floatControls.shaderDenormFlushToZeroFloat16		= behaviorFlags & B_DENORM_FLUSH;
2849 		floatControls.shaderSignedZeroInfNanPreserveFloat16	= behaviorFlags & B_ZIN_PRESERVE;
2850 		return;
2851 	case FP32:
2852 		floatControls.shaderDenormPreserveFloat32			= behaviorFlags & B_DENORM_PRESERVE;
2853 		floatControls.shaderDenormFlushToZeroFloat32		= behaviorFlags & B_DENORM_FLUSH;
2854 		floatControls.shaderSignedZeroInfNanPreserveFloat32	= behaviorFlags & B_ZIN_PRESERVE;
2855 		return;
2856 	case FP64:
2857 		floatControls.shaderDenormPreserveFloat64			= behaviorFlags & B_DENORM_PRESERVE;
2858 		floatControls.shaderDenormFlushToZeroFloat64		= behaviorFlags & B_DENORM_FLUSH;
2859 		floatControls.shaderSignedZeroInfNanPreserveFloat64	= behaviorFlags & B_ZIN_PRESERVE;
2860 		return;
2861 	}
2862 }
2863 
2864 // Test case not related to SPIR-V but executed with compute tests. It checks if specified
2865 // features are set to the same value when specific independence settings are used.
verifyIndependenceSettings(Context & context)2866 tcu::TestStatus verifyIndependenceSettings(Context& context)
2867 {
2868 	if (!context.isDeviceFunctionalitySupported("VK_KHR_shader_float_controls"))
2869 		TCU_THROW(NotSupportedError, "VK_KHR_shader_float_controls not supported");
2870 
2871 	vk::VkPhysicalDeviceFloatControlsPropertiesKHR	fcProperties;
2872 	fcProperties.sType	= VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT_CONTROLS_PROPERTIES_KHR;
2873 	fcProperties.pNext	= DE_NULL;
2874 
2875 	vk::VkPhysicalDeviceProperties2 deviceProperties;
2876 	deviceProperties.sType	= VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
2877 	deviceProperties.pNext	= &fcProperties;
2878 
2879 	auto fail = [](const string& featureGroup)
2880 	{
2881 		return tcu::TestStatus::fail(featureGroup + " features should be set to the same value");
2882 	};
2883 
2884 	const VkPhysicalDevice			physicalDevice		= context.getPhysicalDevice();
2885 	const vk::InstanceInterface&	instanceInterface	= context.getInstanceInterface();
2886 	instanceInterface.getPhysicalDeviceProperties2(physicalDevice, &deviceProperties);
2887 
2888 	if (fcProperties.roundingModeIndependence == VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_NONE_KHR)
2889 	{
2890 		vk::VkBool32 fp16rte = fcProperties.shaderRoundingModeRTEFloat16;
2891 		vk::VkBool32 fp32rte = fcProperties.shaderRoundingModeRTEFloat32;
2892 		vk::VkBool32 fp64rte = fcProperties.shaderRoundingModeRTEFloat64;
2893 		if ((fp16rte != fp32rte) || (fp32rte != fp64rte))
2894 			return fail("shaderRoundingModeRTEFloat*");
2895 
2896 		vk::VkBool32 fp16rtz = fcProperties.shaderRoundingModeRTZFloat16;
2897 		vk::VkBool32 fp32rtz = fcProperties.shaderRoundingModeRTZFloat32;
2898 		vk::VkBool32 fp64rtz = fcProperties.shaderRoundingModeRTZFloat64;
2899 		if ((fp16rtz != fp32rtz) || (fp32rtz != fp64rtz))
2900 			return fail("shaderRoundingModeRTZFloat*");
2901 	}
2902 	else if (fcProperties.roundingModeIndependence == VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_32_BIT_ONLY_KHR)
2903 	{
2904 		vk::VkBool32 fp16rte = fcProperties.shaderRoundingModeRTEFloat16;
2905 		vk::VkBool32 fp64rte = fcProperties.shaderRoundingModeRTEFloat64;
2906 		if ((fp16rte != fp64rte))
2907 			return fail("shaderRoundingModeRTEFloat16 and 64");
2908 
2909 		vk::VkBool32 fp16rtz = fcProperties.shaderRoundingModeRTZFloat16;
2910 		vk::VkBool32 fp64rtz = fcProperties.shaderRoundingModeRTZFloat64;
2911 		if ((fp16rtz != fp64rtz))
2912 			return fail("shaderRoundingModeRTZFloat16 and 64");
2913 	}
2914 
2915 	if (fcProperties.denormBehaviorIndependence == VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_NONE_KHR)
2916 	{
2917 		vk::VkBool32 fp16flush = fcProperties.shaderDenormFlushToZeroFloat16;
2918 		vk::VkBool32 fp32flush = fcProperties.shaderDenormFlushToZeroFloat32;
2919 		vk::VkBool32 fp64flush = fcProperties.shaderDenormFlushToZeroFloat64;
2920 		if ((fp16flush != fp32flush) || (fp32flush != fp64flush))
2921 			return fail("shaderDenormFlushToZeroFloat*");
2922 
2923 		vk::VkBool32 fp16preserve = fcProperties.shaderDenormPreserveFloat16;
2924 		vk::VkBool32 fp32preserve = fcProperties.shaderDenormPreserveFloat32;
2925 		vk::VkBool32 fp64preserve = fcProperties.shaderDenormPreserveFloat64;
2926 		if ((fp16preserve != fp32preserve) || (fp32preserve != fp64preserve))
2927 			return fail("shaderDenormPreserveFloat*");
2928 	}
2929 	else if (fcProperties.denormBehaviorIndependence == VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_32_BIT_ONLY_KHR)
2930 	{
2931 		vk::VkBool32 fp16flush = fcProperties.shaderDenormFlushToZeroFloat16;
2932 		vk::VkBool32 fp64flush = fcProperties.shaderDenormFlushToZeroFloat64;
2933 		if ((fp16flush != fp64flush))
2934 			return fail("shaderDenormFlushToZeroFloat16 and 64");
2935 
2936 		vk::VkBool32 fp16preserve = fcProperties.shaderDenormPreserveFloat16;
2937 		vk::VkBool32 fp64preserve = fcProperties.shaderDenormPreserveFloat64;
2938 		if ((fp16preserve != fp64preserve))
2939 			return fail("shaderDenormPreserveFloat16 and 64");
2940 	}
2941 
2942 	return tcu::TestStatus::pass("Pass");
2943 }
2944 
2945 // ComputeTestGroupBuilder contains logic that creates compute shaders
2946 // for all test cases. As most tests in spirv-assembly it uses functionality
2947 // implemented in vktSpvAsmComputeShaderTestUtil.cpp.
2948 class ComputeTestGroupBuilder: public TestGroupBuilderBase
2949 {
2950 public:
2951 
2952 	void init();
2953 
2954 	void createOperationTests(TestCaseGroup* parentGroup,
2955 							  const char* groupName,
2956 							  FloatType floatType,
2957 							  bool argumentsFromInput) override;
2958 
2959 	void createSettingsTests(TestCaseGroup* parentGroup) override;
2960 
2961 protected:
2962 
2963 	void fillShaderSpec(const OperationTestCaseInfo&	testCaseInfo,
2964 						ComputeShaderSpec&				csSpec) const;
2965 	void fillShaderSpec(const SettingsTestCaseInfo&		testCaseInfo,
2966 						ComputeShaderSpec&				csSpec) const;
2967 
2968 private:
2969 
2970 
2971 	StringTemplate		m_operationShaderTemplate;
2972 	StringTemplate		m_settingsShaderTemplate;
2973 	TestCasesBuilder	m_operationTestCaseBuilder;
2974 };
2975 
init()2976 void ComputeTestGroupBuilder::init()
2977 {
2978 	m_operationTestCaseBuilder.init();
2979 
2980 	// generic compute shader template with common code for all
2981 	// float types and all possible operations listed in OperationId enum
2982 	m_operationShaderTemplate.setString(
2983 		"OpCapability Shader\n"
2984 		"${capabilities}"
2985 
2986 		"OpExtension \"SPV_KHR_float_controls\"\n"
2987 		"${extensions}"
2988 
2989 		"%std450            = OpExtInstImport \"GLSL.std.450\"\n"
2990 		"OpMemoryModel Logical GLSL450\n"
2991 		"OpEntryPoint GLCompute %main \"main\" %id\n"
2992 		"OpExecutionMode %main LocalSize 1 1 1\n"
2993 		"${execution_mode}"
2994 
2995 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
2996 
2997 		// some tests require additional annotations
2998 		"${annotations}"
2999 
3000 		"%type_void            = OpTypeVoid\n"
3001 		"%type_voidf           = OpTypeFunction %type_void\n"
3002 		"%type_bool            = OpTypeBool\n"
3003 		"%type_u32             = OpTypeInt 32 0\n"
3004 		"%type_i32             = OpTypeInt 32 1\n"
3005 		"%type_i32_fptr        = OpTypePointer Function %type_i32\n"
3006 		"%type_u32_vec2        = OpTypeVector %type_u32 2\n"
3007 		"%type_u32_vec3        = OpTypeVector %type_u32 3\n"
3008 		"%type_u32_vec3_ptr    = OpTypePointer Input %type_u32_vec3\n"
3009 
3010 		"%c_i32_0              = OpConstant %type_i32 0\n"
3011 		"%c_i32_1              = OpConstant %type_i32 1\n"
3012 		"%c_i32_2              = OpConstant %type_i32 2\n"
3013 		"%c_u32_1              = OpConstant %type_u32 1\n"
3014 
3015 		// if input float type has different width then output then
3016 		// both types are defined here along with all types derived from
3017 		// them that are commonly used by tests; some tests also define
3018 		// their own types (those that are needed just by this single test)
3019 		"${types}"
3020 
3021 		// SSBO definitions
3022 		"${io_definitions}"
3023 
3024 		"%id                   = OpVariable %type_u32_vec3_ptr Input\n"
3025 
3026 		// set of default constants per float type is placed here,
3027 		// operation tests can also define additional constants.
3028 		"${constants}"
3029 
3030 		// O_RETURN_VAL defines function here and becouse
3031 		// of that this token needs to be directly before main function
3032 		"${functions}"
3033 
3034 		"%main                 = OpFunction %type_void None %type_voidf\n"
3035 		"%label                = OpLabel\n"
3036 
3037 		"${variables}"
3038 
3039 		// depending on test case arguments are either read from input ssbo
3040 		// or generated in spir-v code - in later case shader input is not used
3041 		"${arguments}"
3042 
3043 		// perform test commands
3044 		"${commands}"
3045 
3046 		// save result to SSBO
3047 		"${save_result}"
3048 
3049 		"OpReturn\n"
3050 		"OpFunctionEnd\n");
3051 
3052 	m_settingsShaderTemplate.setString(
3053 		"OpCapability Shader\n"
3054 		"${capabilities}"
3055 
3056 		"OpExtension \"SPV_KHR_float_controls\"\n"
3057 		"${extensions}"
3058 
3059 		"%std450 = OpExtInstImport \"GLSL.std.450\"\n"
3060 		"OpMemoryModel Logical GLSL450\n"
3061 		"OpEntryPoint GLCompute %main \"main\" %id\n"
3062 		"OpExecutionMode %main LocalSize 1 1 1\n"
3063 		"${execution_modes}"
3064 
3065 		// annotations
3066 		"OpDecorate %SSBO_in BufferBlock\n"
3067 		"OpDecorate %ssbo_in DescriptorSet 0\n"
3068 		"OpDecorate %ssbo_in Binding 0\n"
3069 		"OpDecorate %ssbo_in NonWritable\n"
3070 		"${io_annotations}"
3071 
3072 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
3073 
3074 		// types
3075 		"%type_void            = OpTypeVoid\n"
3076 		"%type_voidf           = OpTypeFunction %type_void\n"
3077 		"%type_u32             = OpTypeInt 32 0\n"
3078 		"%type_i32             = OpTypeInt 32 1\n"
3079 		"%type_i32_fptr        = OpTypePointer Function %type_i32\n"
3080 		"%type_u32_vec3        = OpTypeVector %type_u32 3\n"
3081 		"%type_u32_vec3_ptr    = OpTypePointer Input %type_u32_vec3\n"
3082 
3083 		"%c_i32_0              = OpConstant %type_i32 0\n"
3084 		"%c_i32_1              = OpConstant %type_i32 1\n"
3085 		"%c_i32_2              = OpConstant %type_i32 2\n"
3086 
3087 		"${types}"
3088 
3089 		// in SSBO definition
3090 		"%SSBO_in              = OpTypeStruct ${in_struct}\n"
3091 		"%up_SSBO_in           = OpTypePointer Uniform %SSBO_in\n"
3092 		"%ssbo_in              = OpVariable %up_SSBO_in Uniform\n"
3093 
3094 		// out SSBO definitions
3095 		"${out_definitions}"
3096 
3097 		"%id                   = OpVariable %type_u32_vec3_ptr Input\n"
3098 		"%main                 = OpFunction %type_void None %type_voidf\n"
3099 		"%label                = OpLabel\n"
3100 
3101 		"${commands}"
3102 
3103 		"${save_result}"
3104 
3105 		"OpReturn\n"
3106 		"OpFunctionEnd\n");
3107 }
3108 
createOperationTests(TestCaseGroup * parentGroup,const char * groupName,FloatType floatType,bool argumentsFromInput)3109 void ComputeTestGroupBuilder::createOperationTests(TestCaseGroup* parentGroup, const char* groupName, FloatType floatType, bool argumentsFromInput)
3110 {
3111 	TestContext&	testCtx	= parentGroup->getTestContext();
3112 	TestCaseGroup*	group	= new TestCaseGroup(testCtx, groupName, "");
3113 	parentGroup->addChild(group);
3114 
3115 	TestCaseVect testCases;
3116 	m_operationTestCaseBuilder.build(testCases, m_typeData[floatType].testResults, argumentsFromInput);
3117 
3118 	TestCaseVect::const_iterator currTestCase = testCases.begin();
3119 	TestCaseVect::const_iterator lastTestCase = testCases.end();
3120 	while(currTestCase != lastTestCase)
3121 	{
3122 		const OperationTestCase& testCase = *currTestCase;
3123 		++currTestCase;
3124 
3125 		// skip cases with undefined output
3126 		if (testCase.expectedOutput == V_UNUSED)
3127 			continue;
3128 
3129 		OperationTestCaseInfo testCaseInfo =
3130 		{
3131 			floatType,
3132 			argumentsFromInput,
3133 			VK_SHADER_STAGE_COMPUTE_BIT,
3134 			m_operationTestCaseBuilder.getOperation(testCase.operationId),
3135 			testCase
3136 		};
3137 
3138 		ComputeShaderSpec	csSpec;
3139 
3140 		fillShaderSpec(testCaseInfo, csSpec);
3141 
3142 		string testName = replace(testCase.baseName, "op", testCaseInfo.operation.name);
3143 		group->addChild(new SpvAsmComputeShaderCase(testCtx, testName.c_str(), "", csSpec));
3144 	}
3145 }
3146 
createSettingsTests(TestCaseGroup * parentGroup)3147 void ComputeTestGroupBuilder::createSettingsTests(TestCaseGroup* parentGroup)
3148 {
3149 	TestContext&	testCtx	= parentGroup->getTestContext();
3150 	TestCaseGroup*	group	= new TestCaseGroup(testCtx, "independence_settings", "");
3151 	parentGroup->addChild(group);
3152 
3153 	using SFCI = VkShaderFloatControlsIndependence;
3154 	const SFCI independence32	= VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_32_BIT_ONLY_KHR;
3155 	const SFCI independenceAll	= VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL_KHR;
3156 
3157 	vector<SettingsTestCaseInfo> testCases =
3158 	{
3159 		// name															mode			independenceSetting		fp16Option		fp32Option		fp64Option
3160 
3161 		// test rounding modes when only two float widths are available
3162 		{ "rounding_ind_all_fp16_rte_fp32_rtz",							SM_ROUNDING,	independenceAll,		SO_RTE,			SO_RTZ,			SO_UNUSED },
3163 		{ "rounding_ind_all_fp16_rtz_fp32_rte",							SM_ROUNDING,	independenceAll,		SO_RTZ,			SO_RTE,			SO_UNUSED },
3164 		{ "rounding_ind_32_fp16_rte_fp32_rtz",							SM_ROUNDING,	independence32,			SO_RTE,			SO_RTZ,			SO_UNUSED },
3165 		{ "rounding_ind_32_fp16_rtz_fp32_rte",							SM_ROUNDING,	independence32,			SO_RTZ,			SO_RTE,			SO_UNUSED },
3166 		{ "rounding_ind_all_fp16_rte_fp64_rtz",							SM_ROUNDING,	independenceAll,		SO_RTE,			SO_UNUSED,		SO_RTZ },
3167 		{ "rounding_ind_all_fp16_rtz_fp64_rte",							SM_ROUNDING,	independenceAll,		SO_RTZ,			SO_UNUSED,		SO_RTE },
3168 		{ "rounding_ind_all_fp32_rte_fp64_rtz",							SM_ROUNDING,	independenceAll,		SO_UNUSED,		SO_RTE,			SO_RTZ },
3169 		{ "rounding_ind_all_fp32_rtz_fp64_rte",							SM_ROUNDING,	independenceAll,		SO_UNUSED,		SO_RTZ,			SO_RTE },
3170 		{ "rounding_ind_32_fp32_rte_fp64_rtz",							SM_ROUNDING,	independence32,			SO_UNUSED,		SO_RTE,			SO_RTZ },
3171 		{ "rounding_ind_32_fp32_rtz_fp64_rte",							SM_ROUNDING,	independence32,			SO_UNUSED,		SO_RTZ,			SO_RTE },
3172 
3173 		// test rounding modes when three widths are available
3174 		{ "rounding_ind_all_fp16_rtz_fp32_rte_fp64_rtz",				SM_ROUNDING,	independenceAll,		SO_RTZ,			SO_RTE,			SO_RTZ },
3175 		{ "rounding_ind_32_fp16_rtz_fp32_rte_fp64_rtz",					SM_ROUNDING,	independence32,			SO_RTZ,			SO_RTE,			SO_RTZ },
3176 		{ "rounding_ind_all_fp16_rte_fp32_rtz_fp64_rte",				SM_ROUNDING,	independenceAll,		SO_RTE,			SO_RTZ,			SO_RTE },
3177 		{ "rounding_ind_32_fp16_rte_fp32_rtz_fp64_rte",					SM_ROUNDING,	independence32,			SO_RTE,			SO_RTZ,			SO_RTE },
3178 		{ "rounding_ind_all_fp16_rtz_fp32_rtz_fp64_rte",				SM_ROUNDING,	independenceAll,		SO_RTZ,			SO_RTZ,			SO_RTE },
3179 		{ "rounding_ind_all_fp16_rtz_fp32_rte_fp64_rte",				SM_ROUNDING,	independenceAll,		SO_RTZ,			SO_RTE,			SO_RTE },
3180 		{ "rounding_ind_all_fp16_rte_fp32_rte_fp64_rtz",				SM_ROUNDING,	independenceAll,		SO_RTE,			SO_RTE,			SO_RTZ },
3181 		{ "rounding_ind_all_fp16_rte_fp32_rtz_fp64_rtz",				SM_ROUNDING,	independenceAll,		SO_RTE,			SO_RTZ,			SO_RTZ },
3182 
3183 		// test denorm settings when only two float widths are available
3184 		{ "denorm_ind_all_fp16_flush_fp32_preserve",					SM_DENORMS,		independenceAll,		SO_FLUSH,		SO_PRESERVE,	SO_UNUSED },
3185 		{ "denorm_ind_all_fp16_preserve_fp32_flush",					SM_DENORMS,		independenceAll,		SO_PRESERVE,	SO_FLUSH,		SO_UNUSED },
3186 		{ "denorm_ind_32_fp16_flush_fp32_preserve",						SM_DENORMS,		independence32,			SO_FLUSH,		SO_PRESERVE,	SO_UNUSED },
3187 		{ "denorm_ind_32_fp16_preserve_fp32_flush",						SM_DENORMS,		independence32,			SO_PRESERVE,	SO_FLUSH,		SO_UNUSED },
3188 		{ "denorm_ind_all_fp16_flush_fp64_preserve",					SM_DENORMS,		independenceAll,		SO_FLUSH,		SO_UNUSED,		SO_PRESERVE },
3189 		{ "denorm_ind_all_fp16_preserve_fp64_flush",					SM_DENORMS,		independenceAll,		SO_PRESERVE,	SO_UNUSED,		SO_FLUSH },
3190 		{ "denorm_ind_all_fp32_flush_fp64_preserve",					SM_DENORMS,		independenceAll,		SO_UNUSED,		SO_FLUSH,		SO_PRESERVE },
3191 		{ "denorm_ind_all_fp32_preserve_fp64_flush",					SM_DENORMS,		independenceAll,		SO_UNUSED,		SO_PRESERVE,	SO_FLUSH },
3192 		{ "denorm_ind_32_fp32_flush_fp64_preserve",						SM_DENORMS,		independence32,			SO_UNUSED,		SO_FLUSH,		SO_PRESERVE },
3193 		{ "denorm_ind_32_fp32_preserve_fp64_flush",						SM_DENORMS,		independence32,			SO_UNUSED,		SO_PRESERVE,	SO_FLUSH },
3194 
3195 		// test denorm settings when three widths are available
3196 		{ "denorm_ind_all_fp16_preserve_fp32_flush_fp64_preserve",		SM_DENORMS,		independenceAll,		SO_PRESERVE,	SO_FLUSH,		SO_PRESERVE },
3197 		{ "denorm_ind_32_fp16_preserve_fp32_flush_fp64_preserve",		SM_DENORMS,		independence32,			SO_PRESERVE,	SO_FLUSH,		SO_PRESERVE },
3198 		{ "denorm_ind_all_fp16_flush_fp32_preserve_fp64_flush",			SM_DENORMS,		independenceAll,		SO_FLUSH,		SO_PRESERVE,	SO_FLUSH },
3199 		{ "denorm_ind_32_fp16_flush_fp32_preserve_fp64_flush",			SM_DENORMS,		independence32,			SO_FLUSH,		SO_PRESERVE,	SO_FLUSH },
3200 		{ "denorm_ind_all_fp16_preserve_fp32_preserve_fp64_flush",		SM_DENORMS,		independenceAll,		SO_PRESERVE,	SO_PRESERVE,	SO_FLUSH },
3201 		{ "denorm_ind_all_fp16_preserve_fp32_flush_fp64_flush",			SM_DENORMS,		independenceAll,		SO_PRESERVE,	SO_FLUSH,		SO_FLUSH },
3202 		{ "denorm_ind_all_fp16_flush_fp32_flush_fp64_preserve",			SM_DENORMS,		independenceAll,		SO_FLUSH,		SO_FLUSH,		SO_PRESERVE },
3203 		{ "denorm_ind_all_fp16_flush_fp32_preserve_fp64_preserve",		SM_DENORMS,		independenceAll,		SO_FLUSH,		SO_PRESERVE,	SO_PRESERVE }
3204 	};
3205 
3206 	for(const auto& testCase : testCases)
3207 	{
3208 		ComputeShaderSpec	csSpec;
3209 		fillShaderSpec(testCase, csSpec);
3210 		group->addChild(new SpvAsmComputeShaderCase(testCtx, testCase.name, "", csSpec));
3211 	}
3212 
3213 	addFunctionCase(group, "independence_settings", "", verifyIndependenceSettings);
3214 }
3215 
fillShaderSpec(const OperationTestCaseInfo & testCaseInfo,ComputeShaderSpec & csSpec) const3216 void ComputeTestGroupBuilder::fillShaderSpec(const OperationTestCaseInfo&	testCaseInfo,
3217 											 ComputeShaderSpec&				csSpec) const
3218 {
3219 	// LUT storing functions used to verify test results
3220 	const VerifyIOFunc checkFloatsLUT[] =
3221 	{
3222 		checkFloats<Float16, deFloat16>,
3223 		checkFloats<Float32, float>,
3224 		checkFloats<Float64, double>
3225 	};
3226 
3227 	const Operation&			testOperation	= testCaseInfo.operation;
3228 	const OperationTestCase&	testCase		= testCaseInfo.testCase;
3229 	FloatType					outFloatType	= testCaseInfo.outFloatType;
3230 
3231 	SpecializedOperation specOpData;
3232 	specializeOperation(testCaseInfo, specOpData);
3233 
3234 	TypeSnippetsSP	inTypeSnippets		= specOpData.inTypeSnippets;
3235 	TypeSnippetsSP	outTypeSnippets		= specOpData.outTypeSnippets;
3236 	FloatType		inFloatType			= specOpData.inFloatType;
3237 
3238 	// UnpackHalf2x16 is a corner case - it returns two 32-bit floats but
3239 	// internaly operates on fp16 and this type should be used by float controls
3240 	FloatType		inFloatTypeForCaps		= inFloatType;
3241 	string			inFloatWidthForCaps		= inTypeSnippets->bitWidth;
3242 	if (testCase.operationId == O_UPH_DENORM)
3243 	{
3244 		inFloatTypeForCaps	= FP16;
3245 		inFloatWidthForCaps	= "16";
3246 	}
3247 
3248 	string behaviorCapability;
3249 	string behaviorExecutionMode;
3250 	getBehaviorCapabilityAndExecutionMode(testCase.behaviorFlags,
3251 										  inFloatWidthForCaps,
3252 										  outTypeSnippets->bitWidth,
3253 										  behaviorCapability,
3254 										  behaviorExecutionMode);
3255 
3256 	string capabilities		= behaviorCapability + outTypeSnippets->capabilities;
3257 	string extensions		= outTypeSnippets->extensions;
3258 	string annotations		= inTypeSnippets->inputAnnotationsSnippet + outTypeSnippets->outputAnnotationsSnippet +
3259 							  outTypeSnippets->typeAnnotationsSnippet;
3260 	string types			= outTypeSnippets->typeDefinitionsSnippet;
3261 	string constants		= outTypeSnippets->constantsDefinitionsSnippet;
3262 	string ioDefinitions	= inTypeSnippets->inputDefinitionsSnippet + outTypeSnippets->outputDefinitionsSnippet;
3263 
3264 	bool outFp16TypeUsage	= outTypeSnippets->loadStoreRequiresShaderFloat16;
3265 	bool inFp16TypeUsage	= false;
3266 
3267 	if (testOperation.isInputTypeRestricted)
3268 	{
3269 		annotations		+= inTypeSnippets->typeAnnotationsSnippet;
3270 		capabilities	+= inTypeSnippets->capabilities;
3271 		extensions		+= inTypeSnippets->extensions;
3272 		types			+= inTypeSnippets->typeDefinitionsSnippet;
3273 		constants		+= inTypeSnippets->constantsDefinitionsSnippet;
3274 
3275 		inFp16TypeUsage	= inTypeSnippets->loadStoreRequiresShaderFloat16;
3276 	}
3277 
3278 	map<string, string> specializations;
3279 	specializations["extensions"]		= extensions;
3280 	specializations["execution_mode"]	= behaviorExecutionMode;
3281 	specializations["annotations"]		= annotations + specOpData.annotations;
3282 	specializations["types"]			= types + specOpData.types;
3283 	specializations["io_definitions"]	= ioDefinitions;
3284 	specializations["variables"]		= specOpData.variables;
3285 	specializations["functions"]		= specOpData.functions;
3286 	specializations["save_result"]		= outTypeSnippets->storeResultsSnippet;
3287 	specializations["arguments"]		= specOpData.arguments;
3288 	specializations["commands"]			= specOpData.commands;
3289 
3290 	// Build constants. They are only needed sometimes.
3291 	const FloatStatementUsageFlags	argsAnyFloatConstMask				= B_STATEMENT_USAGE_ARGS_CONST_FLOAT | B_STATEMENT_USAGE_ARGS_CONST_FP16 | B_STATEMENT_USAGE_ARGS_CONST_FP32 | B_STATEMENT_USAGE_ARGS_CONST_FP64;
3292 	const bool						argsUseFPConstants					= (specOpData.argumentsUsesFloatConstant & argsAnyFloatConstMask) != 0;
3293 	const FloatStatementUsageFlags	commandsAnyFloatConstMask			= B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_CONST_FP16 | B_STATEMENT_USAGE_COMMANDS_CONST_FP32 | B_STATEMENT_USAGE_COMMANDS_CONST_FP64;
3294 	const bool						commandsUseFPConstants				= (testCaseInfo.operation.statementUsageFlags & commandsAnyFloatConstMask) != 0;
3295 	const bool						needConstants						= argsUseFPConstants || commandsUseFPConstants;
3296 	const FloatStatementUsageFlags	constsFloatTypeMask					= B_STATEMENT_USAGE_CONSTS_TYPE_FLOAT | B_STATEMENT_USAGE_CONSTS_TYPE_FP16;
3297 	const bool						constsUsesFP16Type					= (testCaseInfo.operation.statementUsageFlags & constsFloatTypeMask) != 0;
3298 	const bool						loadStoreRequiresShaderFloat16		= inFp16TypeUsage || outFp16TypeUsage;
3299 	const bool						usesFP16Constants					= constsUsesFP16Type || (needConstants && loadStoreRequiresShaderFloat16);
3300 
3301 	specializations["constants"]		= "";
3302 	if (needConstants)
3303 	{
3304 		specializations["constants"]	= constants;
3305 	}
3306 	specializations["constants"]		+= specOpData.constants;
3307 
3308 	// check which format features are needed
3309 	bool float16FeatureRequired = (outFloatType == FP16) || (inFloatType == FP16);
3310 	bool float64FeatureRequired = (outFloatType == FP64) || (inFloatType == FP64);
3311 
3312 	// Determine required capabilities.
3313 	if ((testOperation.floatUsage == FLOAT_ARITHMETIC && float16FeatureRequired) || usesFP16Constants)
3314 	{
3315 		capabilities += "OpCapability Float16\n";
3316 	}
3317 	specializations["capabilities"]		= capabilities;
3318 
3319 	// specialize shader
3320 	const string shaderCode = m_operationShaderTemplate.specialize(specializations);
3321 
3322 	// construct input and output buffers of proper types
3323 	TypeValuesSP inTypeValues	= m_typeData.at(inFloatType).values;
3324 	TypeValuesSP outTypeValues	= m_typeData.at(outFloatType).values;
3325 	BufferSp inBufferSp			= inTypeValues->constructInputBuffer(testCase.input);
3326 	BufferSp outBufferSp		= outTypeValues->constructOutputBuffer(testCase.expectedOutput);
3327 	csSpec.inputs.push_back(Resource(inBufferSp, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
3328 	csSpec.outputs.push_back(Resource(outBufferSp));
3329 
3330 	// check which format features are needed
3331 	setupVulkanFeatures(inFloatTypeForCaps,		// usualy same as inFloatType - different only for UnpackHalf2x16
3332 						outFloatType,
3333 						testCase.behaviorFlags,
3334 						float64FeatureRequired,
3335 						csSpec.requestedVulkanFeatures);
3336 
3337 	csSpec.assembly			= shaderCode;
3338 	csSpec.numWorkGroups	= IVec3(1, 1, 1);
3339 	csSpec.verifyIO			= checkFloatsLUT[outFloatType];
3340 
3341 	csSpec.extensions.push_back("VK_KHR_shader_float_controls");
3342 	bool needShaderFloat16 = false;
3343 	if (float16FeatureRequired)
3344 	{
3345 		csSpec.extensions.push_back("VK_KHR_16bit_storage");
3346 		csSpec.requestedVulkanFeatures.ext16BitStorage = EXT16BITSTORAGEFEATURES_UNIFORM_BUFFER_BLOCK;
3347 		needShaderFloat16 |= testOperation.floatUsage == FLOAT_ARITHMETIC;
3348 	}
3349 	needShaderFloat16 |= usesFP16Constants;
3350 	if (needShaderFloat16)
3351 	{
3352 		csSpec.extensions.push_back("VK_KHR_shader_float16_int8");
3353 		csSpec.requestedVulkanFeatures.extFloat16Int8 = EXTFLOAT16INT8FEATURES_FLOAT16;
3354 	}
3355 	if (float64FeatureRequired)
3356 		csSpec.requestedVulkanFeatures.coreFeatures.shaderFloat64 = VK_TRUE;
3357 }
3358 
fillShaderSpec(const SettingsTestCaseInfo & testCaseInfo,ComputeShaderSpec & csSpec) const3359 void ComputeTestGroupBuilder::fillShaderSpec(const SettingsTestCaseInfo&	testCaseInfo,
3360 											 ComputeShaderSpec&				csSpec) const
3361 {
3362 	string		capabilities;
3363 	string		fp16behaviorName;
3364 	string		fp32behaviorName;
3365 	string		fp64behaviorName;
3366 
3367 	ValueId		addArgs[2];
3368 	ValueId		fp16resultValue;
3369 	ValueId		fp32resultValue;
3370 	ValueId		fp64resultValue;
3371 
3372 	ExtensionFloatControlsFeatures& floatControls = csSpec.requestedVulkanFeatures.floatControlsProperties;
3373 	bool fp16Required	= testCaseInfo.fp16Option != SO_UNUSED;
3374 	bool fp32Required	= testCaseInfo.fp32Option != SO_UNUSED;
3375 	bool fp64Required	= testCaseInfo.fp64Option != SO_UNUSED;
3376 
3377 	if (testCaseInfo.testedMode == SM_ROUNDING)
3378 	{
3379 		// make sure that only rounding options are used
3380 		DE_ASSERT((testCaseInfo.fp16Option != SO_FLUSH) ||
3381 				  (testCaseInfo.fp16Option != SO_PRESERVE) ||
3382 				  (testCaseInfo.fp32Option != SO_FLUSH) ||
3383 				  (testCaseInfo.fp32Option != SO_PRESERVE) ||
3384 				  (testCaseInfo.fp64Option != SO_FLUSH) ||
3385 				  (testCaseInfo.fp64Option != SO_PRESERVE));
3386 
3387 		bool fp16RteRounding	= testCaseInfo.fp16Option == SO_RTE;
3388 		bool fp32RteRounding	= testCaseInfo.fp32Option == SO_RTE;
3389 		bool fp64RteRounding	= testCaseInfo.fp64Option == SO_RTE;
3390 
3391 		const string& rte		= m_behaviorToName.at(B_RTE_ROUNDING);
3392 		const string& rtz		= m_behaviorToName.at(B_RTZ_ROUNDING);
3393 
3394 		fp16behaviorName		= fp16RteRounding ? rte : rtz;
3395 		fp32behaviorName		= fp32RteRounding ? rte : rtz;
3396 		fp64behaviorName		= fp64RteRounding ? rte : rtz;
3397 
3398 		addArgs[0]				= V_ADD_ARG_A;
3399 		addArgs[1]				= V_ADD_ARG_B;
3400 		fp16resultValue			= fp16RteRounding ? V_ADD_RTE_RESULT : V_ADD_RTZ_RESULT;
3401 		fp32resultValue			= fp32RteRounding ? V_ADD_RTE_RESULT : V_ADD_RTZ_RESULT;
3402 		fp64resultValue			= fp64RteRounding ? V_ADD_RTE_RESULT : V_ADD_RTZ_RESULT;
3403 
3404 		capabilities			= "OpCapability " + rte + "\n"
3405 								  "OpCapability " + rtz + "\n";
3406 
3407 		floatControls.roundingModeIndependence		= testCaseInfo.independenceSetting;
3408 		floatControls.denormBehaviorIndependence	= VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_NONE_KHR;
3409 		floatControls.shaderRoundingModeRTEFloat16	= fp16RteRounding;
3410 		floatControls.shaderRoundingModeRTZFloat16	= fp16Required && !fp16RteRounding;
3411 		floatControls.shaderRoundingModeRTEFloat32	= fp32RteRounding;
3412 		floatControls.shaderRoundingModeRTZFloat32	= fp32Required && !fp32RteRounding;
3413 		floatControls.shaderRoundingModeRTEFloat64	= fp64RteRounding;
3414 		floatControls.shaderRoundingModeRTZFloat64	= fp64Required && !fp64RteRounding;
3415 	}
3416 	else // SM_DENORMS
3417 	{
3418 		// make sure that only denorm options are used
3419 		DE_ASSERT((testCaseInfo.fp16Option != SO_RTE) ||
3420 				  (testCaseInfo.fp16Option != SO_RTZ) ||
3421 				  (testCaseInfo.fp32Option != SO_RTE) ||
3422 				  (testCaseInfo.fp32Option != SO_RTZ) ||
3423 				  (testCaseInfo.fp64Option != SO_RTE) ||
3424 				  (testCaseInfo.fp64Option != SO_RTZ));
3425 
3426 		bool fp16DenormPreserve		= testCaseInfo.fp16Option == SO_PRESERVE;
3427 		bool fp32DenormPreserve		= testCaseInfo.fp32Option == SO_PRESERVE;
3428 		bool fp64DenormPreserve		= testCaseInfo.fp64Option == SO_PRESERVE;
3429 
3430 		const string& preserve		= m_behaviorToName.at(B_DENORM_PRESERVE);
3431 		const string& flush			= m_behaviorToName.at(B_DENORM_FLUSH);
3432 
3433 		fp16behaviorName			= fp16DenormPreserve ? preserve : flush;
3434 		fp32behaviorName			= fp32DenormPreserve ? preserve : flush;
3435 		fp64behaviorName			= fp64DenormPreserve ? preserve : flush;
3436 
3437 		addArgs[0]					= V_DENORM;
3438 		addArgs[1]					= V_DENORM;
3439 		fp16resultValue				= fp16DenormPreserve ? V_DENORM_TIMES_TWO : V_ZERO;
3440 		fp32resultValue				= fp32DenormPreserve ? V_DENORM_TIMES_TWO : V_ZERO;
3441 		fp64resultValue				= fp64DenormPreserve ? V_DENORM_TIMES_TWO : V_ZERO;
3442 
3443 		capabilities				= "OpCapability " + preserve + "\n"
3444 									  "OpCapability " + flush + "\n";
3445 
3446 		floatControls.denormBehaviorIndependence		= testCaseInfo.independenceSetting;
3447 		floatControls.roundingModeIndependence			= VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_NONE_KHR;
3448 		floatControls.shaderDenormPreserveFloat16		= fp16DenormPreserve;
3449 		floatControls.shaderDenormFlushToZeroFloat16	= fp16Required && !fp16DenormPreserve;
3450 		floatControls.shaderDenormPreserveFloat32		= fp32DenormPreserve;
3451 		floatControls.shaderDenormFlushToZeroFloat32	= fp32Required && !fp32DenormPreserve;
3452 		floatControls.shaderDenormPreserveFloat64		= fp64DenormPreserve;
3453 		floatControls.shaderDenormFlushToZeroFloat64	= fp64Required && !fp64DenormPreserve;
3454 	}
3455 
3456 	const auto&	fp64Data			= m_typeData.at(FP64);
3457 	const auto&	fp32Data			= m_typeData.at(FP32);
3458 	const auto&	fp16Data			= m_typeData.at(FP16);
3459 
3460 	deUint32	attributeIndex		= 0;
3461 	deUint32	attributeOffset		= 0;
3462 	string		attribute;
3463 	string		extensions			= "";
3464 	string		executionModes		= "";
3465 	string		ioAnnotations		= "";
3466 	string		types				= "";
3467 	string		inStruct			= "";
3468 	string		outDefinitions		= "";
3469 	string		commands			= "";
3470 	string		saveResult			= "";
3471 
3472 	// construct single input buffer containing arguments for all float widths
3473 	// (maxPerStageDescriptorStorageBuffers can be min 4 and we need 3 for outputs)
3474 	deUint32				inputOffset	= 0;
3475 	std::vector<deUint8>	inputData	((fp64Required * sizeof(double) + sizeof(float) + fp16Required * sizeof(deFloat16)) * 2);
3476 
3477 	// to follow storage buffer layout rules we store data in ssbo in order 64 -> 16
3478 	if (fp64Required)
3479 	{
3480 		capabilities	+= fp64Data.snippets->capabilities;
3481 		executionModes	+= "OpExecutionMode %main " + fp64behaviorName + " 64\n";
3482 		attribute		 = to_string(attributeIndex);
3483 		ioAnnotations	+= "OpMemberDecorate %SSBO_in " + attribute + " Offset " + to_string(attributeOffset) +"\n" +
3484 						   fp64Data.snippets->multiOutputAnnotationsSnippet +
3485 						   "OpDecorate %ssbo_f64_out Binding " + to_string(attributeIndex+1) + "\n";
3486 		types			+= fp64Data.snippets->minTypeDefinitionsSnippet;
3487 		inStruct		+= " %type_f64_arr_2";
3488 		outDefinitions	+= fp64Data.snippets->multiOutputDefinitionsSnippet;
3489 		commands		+= replace(fp64Data.snippets->multiArgumentsFromInputSnippet, "${attr}", attribute) +
3490 						   "%result64             = OpFAdd %type_f64 %arg1_f64 %arg2_f64\n";
3491 		saveResult		+= fp64Data.snippets->multiStoreResultsSnippet;
3492 		attributeOffset += 2 * static_cast<deUint32>(sizeof(double));
3493 		attributeIndex++;
3494 
3495 		fp64Data.values->fillInputData(addArgs, inputData, inputOffset);
3496 
3497 		// construct separate buffers for outputs to make validation easier
3498 		BufferSp fp64OutBufferSp = fp64Data.values->constructOutputBuffer(fp64resultValue);
3499 		csSpec.outputs.push_back(Resource(fp64OutBufferSp));
3500 
3501 		csSpec.requestedVulkanFeatures.coreFeatures.shaderFloat64 = VK_TRUE;
3502 	}
3503 	if (fp32Required)
3504 	{
3505 		executionModes		+= "OpExecutionMode %main " + fp32behaviorName + " 32\n";
3506 		attribute			 = to_string(attributeIndex);
3507 		ioAnnotations		+= "OpMemberDecorate %SSBO_in " + attribute + " Offset " + to_string(attributeOffset) +"\n" +
3508 							   fp32Data.snippets->multiOutputAnnotationsSnippet +
3509 							   "OpDecorate %ssbo_f32_out Binding " + to_string(attributeIndex+1) + "\n";
3510 		types				+= fp32Data.snippets->minTypeDefinitionsSnippet;
3511 		inStruct			+= " %type_f32_arr_2";
3512 		outDefinitions		+= fp32Data.snippets->multiOutputDefinitionsSnippet;
3513 		commands			+= replace(fp32Data.snippets->multiArgumentsFromInputSnippet, "${attr}", attribute) +
3514 							   "%result32             = OpFAdd %type_f32 %arg1_f32 %arg2_f32\n";
3515 		saveResult			+= fp32Data.snippets->multiStoreResultsSnippet;
3516 		attributeOffset		+= 2 * static_cast<deUint32>(sizeof(float));
3517 		attributeIndex++;
3518 
3519 		fp32Data.values->fillInputData(addArgs, inputData, inputOffset);
3520 
3521 		BufferSp fp32OutBufferSp = fp32Data.values->constructOutputBuffer(fp32resultValue);
3522 		csSpec.outputs.push_back(Resource(fp32OutBufferSp));
3523 	}
3524 	if (fp16Required)
3525 	{
3526 		capabilities	+= fp16Data.snippets->capabilities +
3527 						   "OpCapability Float16\n";
3528 		extensions		+= fp16Data.snippets->extensions;
3529 		executionModes	+= "OpExecutionMode %main " + fp16behaviorName + " 16\n";
3530 		attribute		 = to_string(attributeIndex);
3531 		ioAnnotations	+= "OpMemberDecorate %SSBO_in " + attribute + " Offset " + to_string(attributeOffset) +"\n" +
3532 						   fp16Data.snippets->multiOutputAnnotationsSnippet +
3533 						   "OpDecorate %ssbo_f16_out Binding " + to_string(attributeIndex+1) + "\n";
3534 		types			+= fp16Data.snippets->minTypeDefinitionsSnippet;
3535 		inStruct		+= " %type_f16_arr_2";
3536 		outDefinitions	+= fp16Data.snippets->multiOutputDefinitionsSnippet;
3537 		commands		+= replace(fp16Data.snippets->multiArgumentsFromInputSnippet, "${attr}", attribute) +
3538 						   "%result16             = OpFAdd %type_f16 %arg1_f16 %arg2_f16\n";
3539 		saveResult		+= fp16Data.snippets->multiStoreResultsSnippet;
3540 
3541 		fp16Data.values->fillInputData(addArgs, inputData, inputOffset);
3542 
3543 		BufferSp fp16OutBufferSp = fp16Data.values->constructOutputBuffer(fp16resultValue);
3544 		csSpec.outputs.push_back(Resource(fp16OutBufferSp));
3545 
3546 		csSpec.extensions.push_back("VK_KHR_16bit_storage");
3547 		csSpec.requestedVulkanFeatures.ext16BitStorage = EXT16BITSTORAGEFEATURES_UNIFORM_BUFFER_BLOCK;
3548 	}
3549 
3550 	BufferSp inBufferSp(new Buffer<deUint8>(inputData));
3551 	csSpec.inputs.push_back(Resource(inBufferSp, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
3552 
3553 	map<string, string> specializations =
3554 	{
3555 		{ "capabilities",		capabilities },
3556 		{ "extensions",			extensions },
3557 		{ "execution_modes",	executionModes },
3558 		{ "io_annotations",		ioAnnotations },
3559 		{ "types",				types },
3560 		{ "in_struct",			inStruct },
3561 		{ "out_definitions",	outDefinitions },
3562 		{ "commands",			commands },
3563 		{ "save_result",		saveResult }
3564 	};
3565 
3566 	// specialize shader
3567 	const string shaderCode = m_settingsShaderTemplate.specialize(specializations);
3568 
3569 	csSpec.assembly			= shaderCode;
3570 	csSpec.numWorkGroups	= IVec3(1, 1, 1);
3571 	csSpec.verifyIO			= checkMixedFloats;
3572 	csSpec.extensions.push_back("VK_KHR_shader_float_controls");
3573 }
3574 
getGraphicsShaderCode(vk::SourceCollections & dst,InstanceContext context)3575 void getGraphicsShaderCode (vk::SourceCollections& dst, InstanceContext context)
3576 {
3577 	// this function is used only by GraphicsTestGroupBuilder but it couldn't
3578 	// be implemented as a method because of how addFunctionCaseWithPrograms
3579 	// was implemented
3580 
3581 	SpirvVersion	targetSpirvVersion	= context.resources.spirvVersion;
3582 	const deUint32	vulkanVersion		= dst.usedVulkanVersion;
3583 
3584 	static const string vertexTemplate =
3585 		"OpCapability Shader\n"
3586 		"${vert_capabilities}"
3587 
3588 		"OpExtension \"SPV_KHR_float_controls\"\n"
3589 		"${vert_extensions}"
3590 
3591 		"%std450            = OpExtInstImport \"GLSL.std.450\"\n"
3592 		"OpMemoryModel Logical GLSL450\n"
3593 		"OpEntryPoint Vertex %main \"main\" %BP_stream %BP_position %BP_color %BP_gl_VertexIndex %BP_gl_InstanceIndex %BP_vertex_color %BP_vertex_result \n"
3594 		"${vert_execution_mode}"
3595 
3596 		"OpMemberDecorate %BP_gl_PerVertex 0 BuiltIn Position\n"
3597 		"OpMemberDecorate %BP_gl_PerVertex 1 BuiltIn PointSize\n"
3598 		"OpMemberDecorate %BP_gl_PerVertex 2 BuiltIn ClipDistance\n"
3599 		"OpMemberDecorate %BP_gl_PerVertex 3 BuiltIn CullDistance\n"
3600 		"OpDecorate %BP_gl_PerVertex Block\n"
3601 		"OpDecorate %BP_position Location 0\n"
3602 		"OpDecorate %BP_color Location 1\n"
3603 		"OpDecorate %BP_vertex_color Location 1\n"
3604 		"OpDecorate %BP_vertex_result Location 2\n"
3605 		"OpDecorate %BP_vertex_result Flat\n"
3606 		"OpDecorate %BP_gl_VertexIndex BuiltIn VertexIndex\n"
3607 		"OpDecorate %BP_gl_InstanceIndex BuiltIn InstanceIndex\n"
3608 
3609 		// some tests require additional annotations
3610 		"${vert_annotations}"
3611 
3612 		// types required by most of tests
3613 		"%type_void            = OpTypeVoid\n"
3614 		"%type_voidf           = OpTypeFunction %type_void\n"
3615 		"%type_bool            = OpTypeBool\n"
3616 		"%type_i32             = OpTypeInt 32 1\n"
3617 		"%type_u32             = OpTypeInt 32 0\n"
3618 		"%type_u32_vec2        = OpTypeVector %type_u32 2\n"
3619 		"%type_i32_iptr        = OpTypePointer Input %type_i32\n"
3620 		"%type_i32_optr        = OpTypePointer Output %type_i32\n"
3621 		"%type_i32_fptr        = OpTypePointer Function %type_i32\n"
3622 
3623 		// constants required by most of tests
3624 		"%c_i32_0              = OpConstant %type_i32 0\n"
3625 		"%c_i32_1              = OpConstant %type_i32 1\n"
3626 		"%c_i32_2              = OpConstant %type_i32 2\n"
3627 		"%c_u32_1              = OpConstant %type_u32 1\n"
3628 
3629 		// if input float type has different width then output then
3630 		// both types are defined here along with all types derived from
3631 		// them that are commonly used by tests; some tests also define
3632 		// their own types (those that are needed just by this single test)
3633 		"${vert_types}"
3634 
3635 		// SSBO is not universally supported for storing
3636 		// data in vertex stages - it is onle read here
3637 		"${vert_io_definitions}"
3638 
3639 		"%BP_gl_PerVertex      = OpTypeStruct %type_f32_vec4 %type_f32 %type_f32_arr_1 %type_f32_arr_1\n"
3640 		"%BP_gl_PerVertex_optr = OpTypePointer Output %BP_gl_PerVertex\n"
3641 		"%BP_stream            = OpVariable %BP_gl_PerVertex_optr Output\n"
3642 		"%BP_position          = OpVariable %type_f32_vec4_iptr Input\n"
3643 		"%BP_color             = OpVariable %type_f32_vec4_iptr Input\n"
3644 		"%BP_gl_VertexIndex    = OpVariable %type_i32_iptr Input\n"
3645 		"%BP_gl_InstanceIndex  = OpVariable %type_i32_iptr Input\n"
3646 		"%BP_vertex_color      = OpVariable %type_f32_vec4_optr Output\n"
3647 
3648 		// set of default constants per float type is placed here,
3649 		// operation tests can also define additional constants.
3650 		"${vert_constants}"
3651 
3652 		// O_RETURN_VAL defines function here and because
3653 		// of that this token needs to be directly before main function.
3654 		"${vert_functions}"
3655 
3656 		"%main                 = OpFunction %type_void None %type_voidf\n"
3657 		"%label                = OpLabel\n"
3658 
3659 		"${vert_variables}"
3660 
3661 		"%position             = OpLoad %type_f32_vec4 %BP_position\n"
3662 		"%gl_pos               = OpAccessChain %type_f32_vec4_optr %BP_stream %c_i32_0\n"
3663 		"OpStore %gl_pos %position\n"
3664 		"%color                = OpLoad %type_f32_vec4 %BP_color\n"
3665 		"OpStore %BP_vertex_color %color\n"
3666 
3667 		// this token is filled only when vertex stage is tested;
3668 		// depending on test case arguments are either read from input ssbo
3669 		// or generated in spir-v code - in later case ssbo is not used
3670 		"${vert_arguments}"
3671 
3672 		// when vertex shader is tested then test operations are performed
3673 		// here and passed to fragment stage; if fragment stage ts tested
3674 		// then ${comands} and ${vert_process_result} are rplaced with nop
3675 		"${vert_commands}"
3676 
3677 		"${vert_process_result}"
3678 
3679 		"OpReturn\n"
3680 		"OpFunctionEnd\n";
3681 
3682 
3683 	static const string fragmentTemplate =
3684 		"OpCapability Shader\n"
3685 		"${frag_capabilities}"
3686 
3687 		"OpExtension \"SPV_KHR_float_controls\"\n"
3688 		"${frag_extensions}"
3689 
3690 		"%std450            = OpExtInstImport \"GLSL.std.450\"\n"
3691 		"OpMemoryModel Logical GLSL450\n"
3692 		"OpEntryPoint Fragment %main \"main\" %BP_vertex_color %BP_vertex_result %BP_fragColor %BP_gl_FragCoord \n"
3693 		"OpExecutionMode %main OriginUpperLeft\n"
3694 		"${frag_execution_mode}"
3695 
3696 		"OpDecorate %BP_fragColor Location 0\n"
3697 		"OpDecorate %BP_vertex_color Location 1\n"
3698 		"OpDecorate %BP_vertex_result Location 2\n"
3699 		"OpDecorate %BP_vertex_result Flat\n"
3700 		"OpDecorate %BP_gl_FragCoord BuiltIn FragCoord\n"
3701 
3702 		// some tests require additional annotations
3703 		"${frag_annotations}"
3704 
3705 		// types required by most of tests
3706 		"%type_void            = OpTypeVoid\n"
3707 		"%type_voidf           = OpTypeFunction %type_void\n"
3708 		"%type_bool            = OpTypeBool\n"
3709 		"%type_i32             = OpTypeInt 32 1\n"
3710 		"%type_u32             = OpTypeInt 32 0\n"
3711 		"%type_u32_vec2        = OpTypeVector %type_u32 2\n"
3712 		"%type_i32_iptr        = OpTypePointer Input %type_i32\n"
3713 		"%type_i32_optr        = OpTypePointer Output %type_i32\n"
3714 		"%type_i32_fptr        = OpTypePointer Function %type_i32\n"
3715 
3716 		// constants required by most of tests
3717 		"%c_i32_0              = OpConstant %type_i32 0\n"
3718 		"%c_i32_1              = OpConstant %type_i32 1\n"
3719 		"%c_i32_2              = OpConstant %type_i32 2\n"
3720 		"%c_u32_1              = OpConstant %type_u32 1\n"
3721 
3722 		// if input float type has different width then output then
3723 		// both types are defined here along with all types derived from
3724 		// them that are commonly used by tests; some tests also define
3725 		// their own types (those that are needed just by this single test)
3726 		"${frag_types}"
3727 
3728 		"%BP_gl_FragCoord      = OpVariable %type_f32_vec4_iptr Input\n"
3729 		"%BP_vertex_color      = OpVariable %type_f32_vec4_iptr Input\n"
3730 		"%BP_fragColor         = OpVariable %type_f32_vec4_optr Output\n"
3731 
3732 		// SSBO definitions
3733 		"${frag_io_definitions}"
3734 
3735 		// set of default constants per float type is placed here,
3736 		// operation tests can also define additional constants.
3737 		"${frag_constants}"
3738 
3739 		// O_RETURN_VAL defines function here and because
3740 		// of that this token needs to be directly before main function.
3741 		"${frag_functions}"
3742 
3743 		"%main                 = OpFunction %type_void None %type_voidf\n"
3744 		"%label                = OpLabel\n"
3745 
3746 		"${frag_variables}"
3747 
3748 		// just pass vertex color - rendered image is not important in our case
3749 		"%vertex_color         = OpLoad %type_f32_vec4 %BP_vertex_color\n"
3750 		"OpStore %BP_fragColor %vertex_color\n"
3751 
3752 		// this token is filled only when fragment stage is tested;
3753 		// depending on test case arguments are either read from input ssbo or
3754 		// generated in spir-v code - in later case ssbo is used only for output
3755 		"${frag_arguments}"
3756 
3757 		// when fragment shader is tested then test operations are performed
3758 		// here and saved to ssbo; if vertex stage was tested then its
3759 		// result is just saved to ssbo here
3760 		"${frag_commands}"
3761 		"${frag_process_result}"
3762 
3763 		"OpReturn\n"
3764 		"OpFunctionEnd\n";
3765 
3766 	dst.spirvAsmSources.add("vert", DE_NULL)
3767 		<< StringTemplate(vertexTemplate).specialize(context.testCodeFragments)
3768 		<< SpirVAsmBuildOptions(vulkanVersion, targetSpirvVersion);
3769 	dst.spirvAsmSources.add("frag", DE_NULL)
3770 		<< StringTemplate(fragmentTemplate).specialize(context.testCodeFragments)
3771 		<< SpirVAsmBuildOptions(vulkanVersion, targetSpirvVersion);
3772 }
3773 
3774 // GraphicsTestGroupBuilder iterates over all test cases and creates test for both
3775 // vertex and fragment stages. As in most spirv-assembly tests, tests here are also
3776 // executed using functionality defined in vktSpvAsmGraphicsShaderTestUtil.cpp but
3777 // because one of requirements during development was that SSBO wont be used in
3778 // vertex stage we couldn't use createTestForStage functions - we need a custom
3779 // version for both vertex and fragmen shaders at the same time. This was required
3780 // as we needed to pass result from vertex stage to fragment stage where it could
3781 // be saved to ssbo. To achieve that InstanceContext is created manually in
3782 // createInstanceContext method.
3783 class GraphicsTestGroupBuilder: public TestGroupBuilderBase
3784 {
3785 public:
3786 
3787 	void init();
3788 
3789 	void createOperationTests(TestCaseGroup* parentGroup, const char* groupName, FloatType floatType, bool argumentsFromInput) override;
3790 	void createSettingsTests(TestCaseGroup* parentGroup) override;
3791 
3792 protected:
3793 
3794 	InstanceContext createInstanceContext(const OperationTestCaseInfo& testCaseInfo) const;
3795 
3796 private:
3797 
3798 	TestCasesBuilder	m_testCaseBuilder;
3799 };
3800 
init()3801 void GraphicsTestGroupBuilder::init()
3802 {
3803 	m_testCaseBuilder.init();
3804 }
3805 
createOperationTests(TestCaseGroup * parentGroup,const char * groupName,FloatType floatType,bool argumentsFromInput)3806 void GraphicsTestGroupBuilder::createOperationTests(TestCaseGroup* parentGroup, const char* groupName, FloatType floatType, bool argumentsFromInput)
3807 {
3808 	TestContext&	testCtx	= parentGroup->getTestContext();
3809 	TestCaseGroup*	group	= new TestCaseGroup(testCtx, groupName, "");
3810 	parentGroup->addChild(group);
3811 
3812 	// create test cases for vertex stage
3813 	TestCaseVect testCases;
3814 	m_testCaseBuilder.build(testCases, m_typeData[floatType].testResults, argumentsFromInput);
3815 
3816 	TestCaseVect::const_iterator currTestCase = testCases.begin();
3817 	TestCaseVect::const_iterator lastTestCase = testCases.end();
3818 	while(currTestCase != lastTestCase)
3819 	{
3820 		const OperationTestCase& testCase = *currTestCase;
3821 		++currTestCase;
3822 
3823 		// skip cases with undefined output
3824 		if (testCase.expectedOutput == V_UNUSED)
3825 			continue;
3826 
3827 		// FPRoundingMode decoration can be applied only to conversion instruction that is used as the object
3828 		// argument of an OpStore storing through a pointer to a 16-bit floating-point object in Uniform, or
3829 		// PushConstant, or Input, or Output Storage Classes. SSBO writes are not commonly supported
3830 		// in VS so this test case needs to be skiped for vertex stage.
3831 		if ((testCase.operationId == O_ORTZ_ROUND) || (testCase.operationId == O_ORTE_ROUND))
3832 			continue;
3833 
3834 		OperationTestCaseInfo testCaseInfo =
3835 		{
3836 			floatType,
3837 			argumentsFromInput,
3838 			VK_SHADER_STAGE_VERTEX_BIT,
3839 			m_testCaseBuilder.getOperation(testCase.operationId),
3840 			testCase
3841 		};
3842 
3843 		InstanceContext ctxVertex	= createInstanceContext(testCaseInfo);
3844 		string			testName	= replace(testCase.baseName, "op", testCaseInfo.operation.name);
3845 
3846 		addFunctionCaseWithPrograms<InstanceContext>(group, testName + "_vert", "", getGraphicsShaderCode, runAndVerifyDefaultPipeline, ctxVertex);
3847 	}
3848 
3849 	// create test cases for fragment stage
3850 	testCases.clear();
3851 	m_testCaseBuilder.build(testCases, m_typeData[floatType].testResults, argumentsFromInput);
3852 
3853 	currTestCase = testCases.begin();
3854 	lastTestCase = testCases.end();
3855 	while(currTestCase != lastTestCase)
3856 	{
3857 		const OperationTestCase& testCase = *currTestCase;
3858 		++currTestCase;
3859 
3860 		// skip cases with undefined output
3861 		if (testCase.expectedOutput == V_UNUSED)
3862 			continue;
3863 
3864 		OperationTestCaseInfo testCaseInfo =
3865 		{
3866 			floatType,
3867 			argumentsFromInput,
3868 			VK_SHADER_STAGE_FRAGMENT_BIT,
3869 			m_testCaseBuilder.getOperation(testCase.operationId),
3870 			testCase
3871 		};
3872 
3873 		InstanceContext ctxFragment	= createInstanceContext(testCaseInfo);
3874 		string			testName	= replace(testCase.baseName, "op", testCaseInfo.operation.name);
3875 
3876 		addFunctionCaseWithPrograms<InstanceContext>(group, testName + "_frag", "", getGraphicsShaderCode, runAndVerifyDefaultPipeline, ctxFragment);
3877 	}
3878 }
3879 
createSettingsTests(TestCaseGroup * parentGroup)3880 void GraphicsTestGroupBuilder::createSettingsTests(TestCaseGroup* parentGroup)
3881 {
3882 	DE_UNREF(parentGroup);
3883 
3884 	// WG decided that testing settings only for compute stage is sufficient
3885 }
3886 
createInstanceContext(const OperationTestCaseInfo & testCaseInfo) const3887 InstanceContext GraphicsTestGroupBuilder::createInstanceContext(const OperationTestCaseInfo& testCaseInfo) const
3888 {
3889 	// LUT storing functions used to verify test results
3890 	const VerifyIOFunc checkFloatsLUT[] =
3891 	{
3892 		checkFloats<Float16, deFloat16>,
3893 		checkFloats<Float32, float>,
3894 		checkFloats<Float64, double>
3895 	};
3896 
3897 	// 32-bit float types are always needed for standard operations on color
3898 	// if tested operation does not require fp32 for either input or output
3899 	// then this minimal type definitions must be appended to types section
3900 	const string f32TypeMinimalRequired =
3901 		"%type_f32             = OpTypeFloat 32\n"
3902 		"%type_f32_arr_1       = OpTypeArray %type_f32 %c_i32_1\n"
3903 		"%type_f32_iptr        = OpTypePointer Input %type_f32\n"
3904 		"%type_f32_optr        = OpTypePointer Output %type_f32\n"
3905 		"%type_f32_vec4        = OpTypeVector %type_f32 4\n"
3906 		"%type_f32_vec4_iptr   = OpTypePointer Input %type_f32_vec4\n"
3907 		"%type_f32_vec4_optr   = OpTypePointer Output %type_f32_vec4\n";
3908 
3909 	const Operation&			testOperation	= testCaseInfo.operation;
3910 	const OperationTestCase&	testCase		= testCaseInfo.testCase;
3911 	FloatType					outFloatType	= testCaseInfo.outFloatType;
3912 	VkShaderStageFlagBits		testedStage		= testCaseInfo.testedStage;
3913 
3914 	DE_ASSERT((testedStage == VK_SHADER_STAGE_VERTEX_BIT) || (testedStage == VK_SHADER_STAGE_FRAGMENT_BIT));
3915 
3916 	SpecializedOperation specOpData;
3917 	specializeOperation(testCaseInfo, specOpData);
3918 
3919 	TypeSnippetsSP	inTypeSnippets		= specOpData.inTypeSnippets;
3920 	TypeSnippetsSP	outTypeSnippets		= specOpData.outTypeSnippets;
3921 	FloatType		inFloatType			= specOpData.inFloatType;
3922 
3923 	// There may be several reasons why we need the shaderFloat16 Vulkan feature.
3924 	bool needsShaderFloat16 = false;
3925 	// There are some weird cases where we need the constants, but would otherwise drop them.
3926 	bool needsSpecialConstants = false;
3927 
3928 	// UnpackHalf2x16 is a corner case - it returns two 32-bit floats but
3929 	// internaly operates on fp16 and this type should be used by float controls
3930 	FloatType		inFloatTypeForCaps		= inFloatType;
3931 	string			inFloatWidthForCaps		= inTypeSnippets->bitWidth;
3932 	if (testCase.operationId == O_UPH_DENORM)
3933 	{
3934 		inFloatTypeForCaps	= FP16;
3935 		inFloatWidthForCaps	= "16";
3936 	}
3937 
3938 	string behaviorCapability;
3939 	string behaviorExecutionMode;
3940 	getBehaviorCapabilityAndExecutionMode(testCase.behaviorFlags,
3941 										  inFloatWidthForCaps,
3942 										  outTypeSnippets->bitWidth,
3943 										  behaviorCapability,
3944 										  behaviorExecutionMode);
3945 
3946 	// check which format features are needed
3947 	bool float16FeatureRequired = (inFloatType == FP16) || (outFloatType == FP16);
3948 	bool float64FeatureRequired = (inFloatType == FP64) || (outFloatType == FP64);
3949 
3950 	string vertExecutionMode;
3951 	string fragExecutionMode;
3952 	string vertCapabilities;
3953 	string fragCapabilities;
3954 	string vertExtensions;
3955 	string fragExtensions;
3956 	string vertAnnotations;
3957 	string fragAnnotations;
3958 	string vertTypes;
3959 	string fragTypes;
3960 	string vertConstants;
3961 	string fragConstants;
3962 	string vertFunctions;
3963 	string fragFunctions;
3964 	string vertIODefinitions;
3965 	string fragIODefinitions;
3966 	string vertArguments;
3967 	string fragArguments;
3968 	string vertVariables;
3969 	string fragVariables;
3970 	string vertCommands;
3971 	string fragCommands;
3972 	string vertProcessResult;
3973 	string fragProcessResult;
3974 
3975 	// check if operation should be executed in vertex stage
3976 	if (testedStage == VK_SHADER_STAGE_VERTEX_BIT)
3977 	{
3978 		vertAnnotations = inTypeSnippets->inputAnnotationsSnippet + inTypeSnippets->typeAnnotationsSnippet;
3979 		fragAnnotations = outTypeSnippets->outputAnnotationsSnippet + outTypeSnippets->typeAnnotationsSnippet;
3980 		vertFunctions = specOpData.functions;
3981 
3982 		// check if input type is different from tested type (conversion operations)
3983 		if (testOperation.isInputTypeRestricted)
3984 		{
3985 			vertCapabilities	= behaviorCapability + inTypeSnippets->capabilities + outTypeSnippets->capabilities;
3986 			fragCapabilities	= outTypeSnippets->capabilities;
3987 			vertExtensions		= inTypeSnippets->extensions + outTypeSnippets->extensions;
3988 			fragExtensions		= outTypeSnippets->extensions;
3989 			vertTypes			= inTypeSnippets->typeDefinitionsSnippet + outTypeSnippets->typeDefinitionsSnippet + outTypeSnippets->varyingsTypesSnippet;
3990 			fragTypes			= outTypeSnippets->typeDefinitionsSnippet + outTypeSnippets->varyingsTypesSnippet;
3991 			vertConstants		= inTypeSnippets->constantsDefinitionsSnippet + outTypeSnippets->constantsDefinitionsSnippet;
3992 			fragConstants		= outTypeSnippets->constantsDefinitionsSnippet;
3993 		}
3994 		else
3995 		{
3996 			// input and output types are the same (majority of operations)
3997 
3998 			vertCapabilities	= behaviorCapability + outTypeSnippets->capabilities;
3999 			fragCapabilities	= vertCapabilities;
4000 			vertExtensions		= outTypeSnippets->extensions;
4001 			fragExtensions		= vertExtensions;
4002 			vertTypes			= outTypeSnippets->typeDefinitionsSnippet + outTypeSnippets->varyingsTypesSnippet;
4003 			fragTypes			= vertTypes;
4004 			vertConstants		= outTypeSnippets->constantsDefinitionsSnippet;
4005 			fragConstants		= outTypeSnippets->constantsDefinitionsSnippet;
4006 		}
4007 
4008 		if (outFloatType != FP32)
4009 		{
4010 			fragTypes += f32TypeMinimalRequired;
4011 			if (inFloatType != FP32)
4012 				vertTypes += f32TypeMinimalRequired;
4013 		}
4014 
4015 		vertAnnotations	+= specOpData.annotations;
4016 		vertTypes		+= specOpData.types;
4017 		vertConstants	+= specOpData.constants;
4018 
4019 		vertExecutionMode		= behaviorExecutionMode;
4020 		fragExecutionMode		= "";
4021 		vertIODefinitions		= inTypeSnippets->inputDefinitionsSnippet + outTypeSnippets->outputVaryingsSnippet;
4022 		fragIODefinitions		= outTypeSnippets->outputDefinitionsSnippet + outTypeSnippets->inputVaryingsSnippet;
4023 		vertArguments			= specOpData.arguments;
4024 		fragArguments			= "";
4025 		vertVariables			= specOpData.variables;
4026 		fragVariables			= "";
4027 		vertCommands			= specOpData.commands;
4028 		fragCommands			= "";
4029 		vertProcessResult		= outTypeSnippets->storeVertexResultSnippet;
4030 		fragProcessResult		= outTypeSnippets->loadVertexResultSnippet + outTypeSnippets->storeResultsSnippet;
4031 		needsShaderFloat16		|= outTypeSnippets->loadStoreRequiresShaderFloat16;
4032 	}
4033 	else // perform test in fragment stage - vertex stage is empty
4034 	{
4035 		fragFunctions = specOpData.functions;
4036 		// check if input type is different from tested type
4037 		if (testOperation.isInputTypeRestricted)
4038 		{
4039 			fragAnnotations		= inTypeSnippets->inputAnnotationsSnippet + inTypeSnippets->typeAnnotationsSnippet +
4040 								  outTypeSnippets->outputAnnotationsSnippet + outTypeSnippets->typeAnnotationsSnippet;
4041 			fragCapabilities	= behaviorCapability + inTypeSnippets->capabilities + outTypeSnippets->capabilities;
4042 			fragExtensions		= inTypeSnippets->extensions + outTypeSnippets->extensions;
4043 			fragTypes			= inTypeSnippets->typeDefinitionsSnippet + outTypeSnippets->typeDefinitionsSnippet;
4044 			fragConstants		= inTypeSnippets->constantsDefinitionsSnippet + outTypeSnippets->constantsDefinitionsSnippet;
4045 		}
4046 		else
4047 		{
4048 			// input and output types are the same
4049 
4050 			fragAnnotations		= inTypeSnippets->inputAnnotationsSnippet + inTypeSnippets->typeAnnotationsSnippet +
4051 								  outTypeSnippets->outputAnnotationsSnippet;
4052 			fragCapabilities	= behaviorCapability + outTypeSnippets->capabilities;
4053 			fragExtensions		= outTypeSnippets->extensions;
4054 			fragTypes			= outTypeSnippets->typeDefinitionsSnippet;
4055 			fragConstants		= outTypeSnippets->constantsDefinitionsSnippet;
4056 		}
4057 
4058 		// varying is not used but it needs to be specified so lets use type_i32 for it
4059 		string dummyVertVarying = "%BP_vertex_result     = OpVariable %type_i32_optr Output\n";
4060 		string dummyFragVarying = "%BP_vertex_result     = OpVariable %type_i32_iptr Input\n";
4061 
4062 		vertCapabilities	= "";
4063 		vertExtensions		= "";
4064 		vertAnnotations		= "OpDecorate %type_f32_arr_1 ArrayStride 4\n";
4065 		vertTypes			= f32TypeMinimalRequired;
4066 		vertConstants		= "";
4067 
4068 		if ((outFloatType != FP32) && (inFloatType != FP32))
4069 			fragTypes += f32TypeMinimalRequired;
4070 
4071 		fragAnnotations += specOpData.annotations;
4072 		fragTypes		+= specOpData.types;
4073 		fragConstants	+= specOpData.constants;
4074 
4075 		vertExecutionMode	= "";
4076 		fragExecutionMode	= behaviorExecutionMode;
4077 		vertIODefinitions	= dummyVertVarying;
4078 		fragIODefinitions	= inTypeSnippets->inputDefinitionsSnippet +
4079 							  outTypeSnippets->outputDefinitionsSnippet + dummyFragVarying;
4080 		vertArguments		= "";
4081 		fragArguments		= specOpData.arguments;
4082 		vertVariables		= "";
4083 		fragVariables		= specOpData.variables;
4084 		vertCommands		= "";
4085 		fragCommands		= specOpData.commands;
4086 		vertProcessResult	= "";
4087 		fragProcessResult	= outTypeSnippets->storeResultsSnippet;
4088 
4089 		if (!testCaseInfo.argumentsFromInput)
4090 		{
4091 			switch(testCaseInfo.testCase.operationId)
4092 			{
4093 				case O_CONV_FROM_FP32:
4094 				case O_CONV_FROM_FP64:
4095 					needsSpecialConstants = true;
4096 					break;
4097 				default:
4098 					break;
4099 			}
4100 		}
4101 	}
4102 
4103 	// Another reason we need shaderFloat16 is the executable instructions uses fp16
4104 	// in a way not supported by the 16bit storage extension.
4105 	needsShaderFloat16 |= float16FeatureRequired && testOperation.floatUsage == FLOAT_ARITHMETIC;
4106 
4107 	// Constants are only needed sometimes.  Drop them in the fp16 case if the code doesn't need
4108 	// them, and if we don't otherwise need shaderFloat16.
4109 	bool needsFP16Constants = needsShaderFloat16 || needsSpecialConstants;
4110 
4111 	if (!needsFP16Constants && float16FeatureRequired)
4112 	{
4113 		// Check various code fragments
4114 		const FloatStatementUsageFlags	commandsFloatConstMask				= B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_CONST_FP16;
4115 		const bool						commandsUsesFloatConstant			= (testCaseInfo.operation.statementUsageFlags & commandsFloatConstMask) != 0;;
4116 		const FloatStatementUsageFlags	argumentsFloatConstMask				= B_STATEMENT_USAGE_ARGS_CONST_FLOAT | B_STATEMENT_USAGE_ARGS_CONST_FP16;
4117 		const bool						argumentsUsesFloatConstant			= (specOpData.argumentsUsesFloatConstant & argumentsFloatConstMask) != 0;
4118 		bool							hasFP16ConstsInCommandsOrArguments	= commandsUsesFloatConstant || argumentsUsesFloatConstant;
4119 
4120 		needsFP16Constants |= hasFP16ConstsInCommandsOrArguments;
4121 
4122 		if (!needsFP16Constants)
4123 		{
4124 			vertConstants = "";
4125 			fragConstants = "";
4126 		}
4127 	}
4128 	needsShaderFloat16 |= needsFP16Constants;
4129 
4130 	if (needsShaderFloat16)
4131 	{
4132 		vertCapabilities += "OpCapability Float16\n";
4133 		fragCapabilities += "OpCapability Float16\n";
4134 	}
4135 
4136 	map<string, string> specializations;
4137 	specializations["vert_capabilities"]	= vertCapabilities;
4138 	specializations["vert_extensions"]		= vertExtensions;
4139 	specializations["vert_execution_mode"]	= vertExecutionMode;
4140 	specializations["vert_annotations"]		= vertAnnotations;
4141 	specializations["vert_types"]			= vertTypes;
4142 	specializations["vert_constants"]		= vertConstants;
4143 	specializations["vert_io_definitions"]	= vertIODefinitions;
4144 	specializations["vert_arguments"]		= vertArguments;
4145 	specializations["vert_variables"]		= vertVariables;
4146 	specializations["vert_functions"]		= vertFunctions;
4147 	specializations["vert_commands"]		= vertCommands;
4148 	specializations["vert_process_result"]	= vertProcessResult;
4149 	specializations["frag_capabilities"]	= fragCapabilities;
4150 	specializations["frag_extensions"]		= fragExtensions;
4151 	specializations["frag_execution_mode"]	= fragExecutionMode;
4152 	specializations["frag_annotations"]		= fragAnnotations;
4153 	specializations["frag_types"]			= fragTypes;
4154 	specializations["frag_constants"]		= fragConstants;
4155 	specializations["frag_functions"]		= fragFunctions;
4156 	specializations["frag_io_definitions"]	= fragIODefinitions;
4157 	specializations["frag_arguments"]		= fragArguments;
4158 	specializations["frag_variables"]		= fragVariables;
4159 	specializations["frag_commands"]		= fragCommands;
4160 	specializations["frag_process_result"]	= fragProcessResult;
4161 
4162 	// colors are not used by the test - input is passed via uniform buffer
4163 	RGBA defaultColors[4] = { RGBA::white(), RGBA::red(), RGBA::green(), RGBA::blue() };
4164 
4165 	// construct input and output buffers of proper types
4166 	TypeValuesSP inTypeValues	= m_typeData.at(inFloatType).values;
4167 	TypeValuesSP outTypeValues	= m_typeData.at(outFloatType).values;
4168 	BufferSp inBufferSp			= inTypeValues->constructInputBuffer(testCase.input);
4169 	BufferSp outBufferSp		= outTypeValues->constructOutputBuffer(testCase.expectedOutput);
4170 
4171 	vkt::SpirVAssembly::GraphicsResources resources;
4172 	resources.inputs.push_back( Resource(inBufferSp, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
4173 	resources.outputs.push_back(Resource(outBufferSp, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
4174 	resources.verifyIO = checkFloatsLUT[outFloatType];
4175 
4176 	StageToSpecConstantMap	noSpecConstants;
4177 	PushConstants			noPushConstants;
4178 	GraphicsInterfaces		noInterfaces;
4179 
4180 	VulkanFeatures vulkanFeatures;
4181 	setupVulkanFeatures(inFloatTypeForCaps,		// usualy same as inFloatType - different only for UnpackHalf2x16
4182 						outFloatType,
4183 						testCase.behaviorFlags,
4184 						float64FeatureRequired,
4185 						vulkanFeatures);
4186 	vulkanFeatures.coreFeatures.fragmentStoresAndAtomics = true;
4187 
4188 	vector<string> extensions;
4189 	extensions.push_back("VK_KHR_shader_float_controls");
4190 	if (needsShaderFloat16)
4191 	{
4192 		extensions.push_back("VK_KHR_shader_float16_int8");
4193 		vulkanFeatures.extFloat16Int8 = EXTFLOAT16INT8FEATURES_FLOAT16;
4194 	}
4195 	if (float16FeatureRequired)
4196 	{
4197 		extensions.push_back("VK_KHR_16bit_storage");
4198 		vulkanFeatures.ext16BitStorage = EXT16BITSTORAGEFEATURES_UNIFORM_BUFFER_BLOCK;
4199 	}
4200 
4201 	InstanceContext ctx(defaultColors,
4202 						defaultColors,
4203 						specializations,
4204 						noSpecConstants,
4205 						noPushConstants,
4206 						resources,
4207 						noInterfaces,
4208 						extensions,
4209 						vulkanFeatures,
4210 						testedStage);
4211 
4212 	ctx.moduleMap["vert"].push_back(std::make_pair("main", VK_SHADER_STAGE_VERTEX_BIT));
4213 	ctx.moduleMap["frag"].push_back(std::make_pair("main", VK_SHADER_STAGE_FRAGMENT_BIT));
4214 
4215 	ctx.requiredStages			= static_cast<VkShaderStageFlagBits>(VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT);
4216 	ctx.failResult				= QP_TEST_RESULT_FAIL;
4217 	ctx.failMessageTemplate		= "Output doesn't match with expected";
4218 
4219 	return ctx;
4220 }
4221 
4222 } // anonymous
4223 
createFloatControlsTestGroup(TestContext & testCtx,TestGroupBuilderBase * groupBuilder)4224 tcu::TestCaseGroup* createFloatControlsTestGroup (TestContext& testCtx, TestGroupBuilderBase* groupBuilder)
4225 {
4226 	de::MovePtr<TestCaseGroup>	group(new TestCaseGroup(testCtx, "float_controls", "Tests for VK_KHR_shader_float_controls extension"));
4227 
4228 	struct TestGroup
4229 	{
4230 		FloatType		floatType;
4231 		const char*		groupName;
4232 	};
4233 	TestGroup testGroups[] =
4234 	{
4235 		{ FP16, "fp16" },
4236 		{ FP32, "fp32" },
4237 		{ FP64, "fp64" },
4238 	};
4239 
4240 	for (int i = 0 ; i < DE_LENGTH_OF_ARRAY(testGroups) ; ++i)
4241 	{
4242 		const TestGroup& testGroup = testGroups[i];
4243 		TestCaseGroup* typeGroup = new TestCaseGroup(testCtx, testGroup.groupName, "");
4244 		group->addChild(typeGroup);
4245 
4246 		groupBuilder->createOperationTests(typeGroup, "input_args", testGroup.floatType, true);
4247 		groupBuilder->createOperationTests(typeGroup, "generated_args", testGroup.floatType, false);
4248 	}
4249 
4250 	groupBuilder->createSettingsTests(group.get());
4251 
4252 	return group.release();
4253 }
4254 
createFloatControlsComputeGroup(TestContext & testCtx)4255 tcu::TestCaseGroup* createFloatControlsComputeGroup (TestContext& testCtx)
4256 {
4257 	ComputeTestGroupBuilder computeTestGroupBuilder;
4258 	computeTestGroupBuilder.init();
4259 
4260 	return createFloatControlsTestGroup(testCtx, &computeTestGroupBuilder);
4261 }
4262 
createFloatControlsGraphicsGroup(TestContext & testCtx)4263 tcu::TestCaseGroup* createFloatControlsGraphicsGroup (TestContext& testCtx)
4264 {
4265 	GraphicsTestGroupBuilder graphicsTestGroupBuilder;
4266 	graphicsTestGroupBuilder.init();
4267 
4268 	return createFloatControlsTestGroup(testCtx, &graphicsTestGroupBuilder);
4269 }
4270 
4271 } // SpirVAssembly
4272 } // vkt
4273