• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*-------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2018 The Khronos Group Inc.
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  *      http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  *
19  *//*!
20  * \file
21  * \brief VK_KHR_shader_float_controls tests.
22  *//*--------------------------------------------------------------------*/
23 
24 
25 #include "vktSpvAsmFloatControlsTests.hpp"
26 #include "vktSpvAsmComputeShaderCase.hpp"
27 #include "vktSpvAsmGraphicsShaderTestUtil.hpp"
28 #include "vktTestGroupUtil.hpp"
29 #include "tcuFloat.hpp"
30 #include "tcuFloatFormat.hpp"
31 #include "tcuStringTemplate.hpp"
32 #include "deUniquePtr.hpp"
33 #include "deFloat16.h"
34 #include "vkQueryUtil.hpp"
35 #include "vkRefUtil.hpp"
36 #include <cstring>
37 #include <vector>
38 #include <limits>
39 #include <cstdint>
40 #include <fenv.h>
41 #include <cstdint>
42 
43 namespace vkt
44 {
45 namespace SpirVAssembly
46 {
47 
48 namespace
49 {
50 
51 using namespace std;
52 using namespace tcu;
53 
54 enum FloatType
55 {
56 	FP16 = 0,
57 	FP32,
58 	FP64
59 };
60 
61 enum class BufferDataType
62 {
63 	DATA_UNKNOWN	= 0,
64 	DATA_FP16		= 1,
65 	DATA_FP32		= 2,
66 	DATA_FP64		= 3,
67 };
68 
69 enum FloatUsage
70 {
71 	// If the float type is 16bit, then the use of the type is supported by
72 	// VK_KHR_16bit_storage.
73 	FLOAT_STORAGE_ONLY = 0,
74 	// Use of the float type goes beyond VK_KHR_16bit_storage.
75 	FLOAT_ARITHMETIC
76 };
77 
78 enum FloatStatementUsageBits
79 {
80 	B_STATEMENT_USAGE_ARGS_CONST_FLOAT		= (1<<0 ),
81 	B_STATEMENT_USAGE_ARGS_CONST_FP16		= (1<<1 ),
82 	B_STATEMENT_USAGE_ARGS_CONST_FP32		= (1<<2 ),
83 	B_STATEMENT_USAGE_ARGS_CONST_FP64		= (1<<3 ),
84 	B_STATEMENT_USAGE_TYPES_TYPE_FLOAT		= (1<<4 ),
85 	B_STATEMENT_USAGE_TYPES_TYPE_FP16		= (1<<5 ),
86 	B_STATEMENT_USAGE_TYPES_TYPE_FP32		= (1<<6 ),
87 	B_STATEMENT_USAGE_TYPES_TYPE_FP64		= (1<<7 ),
88 	B_STATEMENT_USAGE_CONSTS_TYPE_FLOAT		= (1<<8 ),
89 	B_STATEMENT_USAGE_CONSTS_TYPE_FP16		= (1<<9 ),
90 	B_STATEMENT_USAGE_CONSTS_TYPE_FP32		= (1<<10),
91 	B_STATEMENT_USAGE_CONSTS_TYPE_FP64		= (1<<11),
92 	B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT	= (1<<12),
93 	B_STATEMENT_USAGE_COMMANDS_CONST_FP16	= (1<<13),
94 	B_STATEMENT_USAGE_COMMANDS_CONST_FP32	= (1<<14),
95 	B_STATEMENT_USAGE_COMMANDS_CONST_FP64	= (1<<15),
96 	B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT	= (1<<16),
97 	B_STATEMENT_USAGE_COMMANDS_TYPE_FP16	= (1<<17),
98 	B_STATEMENT_USAGE_COMMANDS_TYPE_FP32	= (1<<18),
99 	B_STATEMENT_USAGE_COMMANDS_TYPE_FP64	= (1<<19),
100 };
101 
102 typedef deUint32 FloatStatementUsageFlags;
103 
104 // Enum containing float behaviors that its possible to test.
105 enum BehaviorFlagBits
106 {
107 	B_DENORM_PRESERVE	= 0x00000001,		// DenormPreserve
108 	B_DENORM_FLUSH		= 0x00000002,		// DenormFlushToZero
109 	B_ZIN_PRESERVE		= 0x00000004,		// SignedZeroInfNanPreserve
110 	B_RTE_ROUNDING		= 0x00000008,		// RoundingModeRTE
111 	B_RTZ_ROUNDING		= 0x00000010		// RoundingModeRTZ
112 };
113 
114 typedef deUint32 BehaviorFlags;
115 
116 // Codes for all float values used in tests as arguments and operation results
117 // This approach allows to replace values with different types reducing complexity of the tests implementation
118 enum ValueId
119 {
120 	// common values used as both arguments and results
121 	V_UNUSED = 0,		//  used to mark arguments that are not used in operation
122 	V_MINUS_INF,		//    or results of tests cases that should be skipped
123 	V_MINUS_ONE,		// -1.0
124 	V_MINUS_ZERO,		// -0.0
125 	V_ZERO,				//  0.0
126 	V_HALF,				//  0.5
127 	V_ONE,				//  1.0
128 	V_INF,
129 	V_DENORM,
130 	V_NAN,
131 
132 	// arguments for rounding mode tests - used only when arguments are passed from input
133 	V_ADD_ARG_A,
134 	V_ADD_ARG_B,
135 	V_SUB_ARG_A,
136 	V_SUB_ARG_B,
137 	V_MUL_ARG_A,
138 	V_MUL_ARG_B,
139 	V_DOT_ARG_A,
140 	V_DOT_ARG_B,
141 
142 	// arguments of conversion operations - used only when arguments are passed from input
143 	V_CONV_FROM_FP32_ARG,
144 	V_CONV_FROM_FP64_ARG,
145 
146 	// arguments of rounding operations
147 	V_ADD_RTZ_RESULT,
148 	V_ADD_RTE_RESULT,
149 	V_SUB_RTZ_RESULT,
150 	V_SUB_RTE_RESULT,
151 	V_MUL_RTZ_RESULT,
152 	V_MUL_RTE_RESULT,
153 	V_DOT_RTZ_RESULT,
154 	V_DOT_RTE_RESULT,
155 
156 	// non comon results of some operation - corner cases
157 	V_ZERO_OR_DENORM_TIMES_TWO,		// fp16 addition of non-flushed denorm with itself (or equivalent dot-product or vector-matrix multiply)
158 	V_MINUS_ONE_OR_CLOSE,			// value used only for fp16 subtraction result of preserved denorm and one
159 	V_PI_DIV_2,
160 	V_ZERO_OR_MINUS_ZERO,			// both +0 and -0 are accepted
161 	V_ZERO_OR_ONE,					// both +0 and 1 are accepted
162 	V_ZERO_OR_FP16_DENORM_TO_FP32,	// both 0 and fp32 representation of fp16 denorm are accepted
163 	V_ZERO_OR_FP16_DENORM_TO_FP64,
164 	V_ZERO_OR_FP32_DENORM_TO_FP64,
165 	V_DENORM_TIMES_TWO,
166 	V_DEGREES_DENORM,
167 	V_TRIG_ONE,						// 1.0 trigonometric operations, including precision margin
168 	V_MINUS_INF_OR_LOG_DENORM,
169 	V_MINUS_INF_OR_LOG2_DENORM,
170 	V_ZERO_OR_SQRT_DENORM,
171 	V_INF_OR_INV_SQRT_DENORM,
172 
173 	//results of conversion operations
174 	V_CONV_TO_FP16_RTZ_RESULT,
175 	V_CONV_TO_FP16_RTE_RESULT,
176 	V_CONV_TO_FP32_RTZ_RESULT,
177 	V_CONV_TO_FP32_RTE_RESULT,
178 	V_CONV_DENORM_SMALLER,			// used e.g. when converting fp16 denorm to fp32
179 	V_CONV_DENORM_BIGGER,
180 };
181 
182 // Enum containing all tested operatios. Operations are defined in generic way so that
183 // they can be used to generate tests operating on arguments with different values of
184 // specified float type.
185 enum OperationId
186 {
187 	// spir-v unary operations
188 	OID_NEGATE = 0,
189 	OID_COMPOSITE,
190 	OID_COMPOSITE_INS,
191 	OID_COPY,
192 	OID_D_EXTRACT,
193 	OID_D_INSERT,
194 	OID_SHUFFLE,
195 	OID_TRANSPOSE,
196 	OID_CONV_FROM_FP16,
197 	OID_CONV_FROM_FP32,
198 	OID_CONV_FROM_FP64,
199 	OID_SCONST_CONV_FROM_FP32_TO_FP16,
200 	OID_SCONST_CONV_FROM_FP64_TO_FP32,
201 	OID_SCONST_CONV_FROM_FP64_TO_FP16,
202 	OID_RETURN_VAL,
203 
204 	// spir-v binary operations
205 	OID_ADD,
206 	OID_SUB,
207 	OID_MUL,
208 	OID_DIV,
209 	OID_REM,
210 	OID_MOD,
211 	OID_PHI,
212 	OID_SELECT,
213 	OID_DOT,
214 	OID_VEC_MUL_S,
215 	OID_VEC_MUL_M,
216 	OID_MAT_MUL_S,
217 	OID_MAT_MUL_V,
218 	OID_MAT_MUL_M,
219 	OID_OUT_PROD,
220 	OID_ORD_EQ,
221 	OID_UORD_EQ,
222 	OID_ORD_NEQ,
223 	OID_UORD_NEQ,
224 	OID_ORD_LS,
225 	OID_UORD_LS,
226 	OID_ORD_GT,
227 	OID_UORD_GT,
228 	OID_ORD_LE,
229 	OID_UORD_LE,
230 	OID_ORD_GE,
231 	OID_UORD_GE,
232 
233 	// glsl unary operations
234 	OID_ROUND,
235 	OID_ROUND_EV,
236 	OID_TRUNC,
237 	OID_ABS,
238 	OID_SIGN,
239 	OID_FLOOR,
240 	OID_CEIL,
241 	OID_FRACT,
242 	OID_RADIANS,
243 	OID_DEGREES,
244 	OID_SIN,
245 	OID_COS,
246 	OID_TAN,
247 	OID_ASIN,
248 	OID_ACOS,
249 	OID_ATAN,
250 	OID_SINH,
251 	OID_COSH,
252 	OID_TANH,
253 	OID_ASINH,
254 	OID_ACOSH,
255 	OID_ATANH,
256 	OID_EXP,
257 	OID_LOG,
258 	OID_EXP2,
259 	OID_LOG2,
260 	OID_SQRT,
261 	OID_INV_SQRT,
262 	OID_MODF,
263 	OID_MODF_ST,
264 	OID_FREXP,
265 	OID_FREXP_ST,
266 	OID_LENGTH,
267 	OID_NORMALIZE,
268 	OID_REFLECT,
269 	OID_REFRACT,
270 	OID_MAT_DET,
271 	OID_MAT_INV,
272 	OID_PH_DENORM,	// PackHalf2x16
273 	OID_UPH_DENORM,
274 	OID_PD_DENORM,	// PackDouble2x32
275 	OID_UPD_DENORM_FLUSH,
276 	OID_UPD_DENORM_PRESERVE,
277 
278 	// glsl binary operations
279 	OID_ATAN2,
280 	OID_POW,
281 	OID_MIX,
282 	OID_FMA,
283 	OID_MIN,
284 	OID_MAX,
285 	OID_CLAMP,
286 	OID_STEP,
287 	OID_SSTEP,
288 	OID_DIST,
289 	OID_CROSS,
290 	OID_FACE_FWD,
291 	OID_NMIN,
292 	OID_NMAX,
293 	OID_NCLAMP,
294 
295 	OID_ORTE_ROUND,
296 	OID_ORTZ_ROUND
297 };
298 
299 // Structures storing data required to test DenormPreserve and DenormFlushToZero modes.
300 // Operations are separated into binary and unary lists because binary operations can be tested with
301 // two attributes and thus denorms can be tested in combination with value, denorm, inf and nan.
302 // Unary operations are only tested with denorms.
303 struct BinaryCase
304 {
305 	OperationId	operationId;
306 	ValueId		opVarResult;
307 	ValueId		opDenormResult;
308 	ValueId		opInfResult;
309 	ValueId		opNanResult;
310 };
311 struct UnaryCase
312 {
313 	OperationId	operationId;
314 	ValueId		result;
315 };
316 
317 // Function replacing all occurrences of substring with string passed in last parameter.
replace(string str,const string & from,const string & to)318 string replace(string str, const string& from, const string& to)
319 {
320 	// to keep spir-v code clean and easier to read parts of it are processed
321 	// with this method instead of StringTemplate; main usage of this method is the
322 	// replacement of "float_" with "f16_", "f32_" or "f64_" depending on test case
323 
324 	size_t start_pos = 0;
325 	while((start_pos = str.find(from, start_pos)) != std::string::npos)
326 	{
327 		str.replace(start_pos, from.length(), to);
328 		start_pos += to.length();
329 	}
330 	return str;
331 }
332 
333 // Structure used to perform bits conversion int type <-> float type.
334 template<typename FLOAT_TYPE, typename UINT_TYPE>
335 struct RawConvert
336 {
337 	union Value
338 	{
339 		FLOAT_TYPE	fp;
340 		UINT_TYPE	ui;
341 	};
342 };
343 
344 // Traits used to get int type that can store equivalent float type.
345 template<typename FLOAT_TYPE>
346 struct GetCoresponding
347 {
348 	typedef deUint16 uint_type;
349 };
350 template<>
351 struct GetCoresponding<float>
352 {
353 	typedef deUint32 uint_type;
354 };
355 template<>
356 struct GetCoresponding<double>
357 {
358 	typedef deUint64 uint_type;
359 };
360 
361 // All values used for arguments and operation results are stored in single map.
362 // Each float type (fp16, fp32, fp64) has its own map that is used during
363 // test setup and during verification. TypeValuesBase is interface to that map.
364 class TypeValuesBase
365 {
366 public:
367 	TypeValuesBase();
368 	virtual ~TypeValuesBase() = default;
369 
370 	virtual BufferSp	constructInputBuffer	(const ValueId* twoArguments) const = 0;
371 	virtual BufferSp	constructOutputBuffer	(ValueId result) const = 0;
372 	virtual void		fillInputData			(const ValueId* twoArguments, vector<deUint8>& bufferData, deUint32& offset) const = 0;
373 
374 protected:
375 	const double	pi;
376 };
377 
TypeValuesBase()378 TypeValuesBase::TypeValuesBase()
379 	: pi(3.14159265358979323846)
380 {
381 }
382 
383 typedef de::SharedPtr<TypeValuesBase> TypeValuesSP;
384 
385 template <typename FLOAT_TYPE>
386 class TypeValues: public TypeValuesBase
387 {
388 public:
389 	TypeValues();
390 
391 	BufferSp	constructInputBuffer	(const ValueId* twoArguments) const override;
392 	BufferSp	constructOutputBuffer	(ValueId result) const override;
393 	void		fillInputData			(const ValueId* twoArguments, vector<deUint8>& bufferData, deUint32& offset) const override;
394 
395 	FLOAT_TYPE getValue(ValueId id) const;
396 
397 	template <typename UINT_TYPE>
398 	FLOAT_TYPE exactByteEquivalent(UINT_TYPE byteValue) const;
399 
400 private:
401 	typedef map<ValueId, FLOAT_TYPE> ValueMap;
402 	ValueMap m_valueIdToFloatType;
403 };
404 
405 template <typename FLOAT_TYPE>
constructInputBuffer(const ValueId * twoArguments) const406 BufferSp TypeValues<FLOAT_TYPE>::constructInputBuffer(const ValueId* twoArguments) const
407 {
408 	std::vector<FLOAT_TYPE> inputData(2);
409 	inputData[0] = m_valueIdToFloatType.at(twoArguments[0]);
410 	inputData[1] = m_valueIdToFloatType.at(twoArguments[1]);
411 	return BufferSp(new Buffer<FLOAT_TYPE>(inputData));
412 }
413 
414 template <typename FLOAT_TYPE>
constructOutputBuffer(ValueId result) const415 BufferSp TypeValues<FLOAT_TYPE>::constructOutputBuffer(ValueId result) const
416 {
417 	// note: we are not doing maping here, ValueId is directly saved in
418 	// float type in order to be able to retireve it during verification
419 
420 	typedef typename GetCoresponding<FLOAT_TYPE>::uint_type uint_t;
421 	uint_t value = static_cast<uint_t>(result);
422 
423 	// For FP16 we increase the buffer size to hold an unsigned integer, as
424 	// we can be in the no 16bit_storage case.
425 	const uint_t outputSize = sizeof(FLOAT_TYPE) == 2u ? 2u : 1u;
426 	std::vector<FLOAT_TYPE> outputData(outputSize, exactByteEquivalent<uint_t>(value));
427 	return BufferSp(new Buffer<FLOAT_TYPE>(outputData));
428 }
429 
430 template <typename FLOAT_TYPE>
fillInputData(const ValueId * twoArguments,vector<deUint8> & bufferData,deUint32 & offset) const431 void TypeValues<FLOAT_TYPE>::fillInputData(const ValueId* twoArguments, vector<deUint8>& bufferData, deUint32& offset) const
432 {
433 	deUint32 typeSize = sizeof(FLOAT_TYPE);
434 
435 	FLOAT_TYPE argA = getValue(twoArguments[0]);
436 	deMemcpy(&bufferData[offset], &argA, typeSize);
437 	offset += typeSize;
438 
439 	FLOAT_TYPE argB = getValue(twoArguments[1]);
440 	deMemcpy(&bufferData[offset], &argB, typeSize);
441 	offset += typeSize;
442 }
443 
444 template <typename FLOAT_TYPE>
getValue(ValueId id) const445 FLOAT_TYPE TypeValues<FLOAT_TYPE>::getValue(ValueId id) const
446 {
447 	return m_valueIdToFloatType.at(id);
448 }
449 
450 template <typename FLOAT_TYPE>
451 template <typename UINT_TYPE>
exactByteEquivalent(UINT_TYPE byteValue) const452 FLOAT_TYPE TypeValues<FLOAT_TYPE>::exactByteEquivalent(UINT_TYPE byteValue) const
453 {
454 	typename RawConvert<FLOAT_TYPE, UINT_TYPE>::Value value;
455 	value.ui = byteValue;
456 	return value.fp;
457 }
458 
459 template <>
TypeValues()460 TypeValues<deFloat16>::TypeValues()
461 	: TypeValuesBase()
462 {
463 	// NOTE: when updating entries in m_valueIdToFloatType make sure to
464 	// update also valueIdToSnippetArgMap defined in updateSpirvSnippets()
465 	ValueMap& vm = m_valueIdToFloatType;
466 	vm[V_UNUSED]			= deFloat32To16(0.0f);
467 	vm[V_MINUS_INF]			= 0xfc00;
468 	vm[V_MINUS_ONE]			= deFloat32To16(-1.0f);
469 	vm[V_MINUS_ZERO]		= 0x8000;
470 	vm[V_ZERO]				= 0x0000;
471 	vm[V_HALF]				= deFloat32To16(0.5f);
472 	vm[V_ONE]				= deFloat32To16(1.0f);
473 	vm[V_INF]				= 0x7c00;
474 	vm[V_DENORM]			= 0x03f0; // this value should be the same as the result of denormBase - epsilon
475 	vm[V_NAN]				= 0x7cf0;
476 
477 	vm[V_PI_DIV_2]			= 0x3e48;
478 	vm[V_DENORM_TIMES_TWO]	= 0x07e0;
479 	vm[V_DEGREES_DENORM]	= 0x1b0c;
480 
481 	vm[V_ADD_ARG_A]					= 0x3c03;
482 	vm[V_ADD_ARG_B]					= vm[V_ONE];
483 	vm[V_SUB_ARG_A]					= vm[V_ADD_ARG_A];
484 	vm[V_SUB_ARG_B]					= 0x4203;
485 	vm[V_MUL_ARG_A]					= vm[V_ADD_ARG_A];
486 	vm[V_MUL_ARG_B]					= 0x1900;
487 	vm[V_DOT_ARG_A]					= vm[V_ADD_ARG_A];
488 	vm[V_DOT_ARG_B]					= vm[V_MUL_ARG_B];
489 	vm[V_CONV_FROM_FP32_ARG]		= vm[V_UNUSED];
490 	vm[V_CONV_FROM_FP64_ARG]		= vm[V_UNUSED];
491 
492 	vm[V_ADD_RTZ_RESULT]			= 0x4001;	// deFloat16Add(vm[V_ADD_ARG_A], vm[V_ADD_ARG_B], rtz)
493 	vm[V_SUB_RTZ_RESULT]			= 0xc001;	// deFloat16Sub(vm[V_SUB_ARG_A], vm[V_SUB_ARG_B], rtz)
494 	vm[V_MUL_RTZ_RESULT]			= 0x1903;	// deFloat16Mul(vm[V_MUL_ARG_A], vm[V_MUL_ARG_B], rtz)
495 	vm[V_DOT_RTZ_RESULT]			= 0x1d03;
496 	vm[V_CONV_TO_FP16_RTZ_RESULT]	= deFloat32To16Round(1.22334445f, DE_ROUNDINGMODE_TO_ZERO);
497 	vm[V_CONV_TO_FP32_RTZ_RESULT]	= vm[V_UNUSED];
498 
499 	vm[V_ADD_RTE_RESULT]			= 0x4002;	// deFloat16Add(vm[V_ADD_ARG_A], vm[V_ADD_ARG_B], rte)
500 	vm[V_SUB_RTE_RESULT]			= 0xc002;	// deFloat16Sub(vm[V_SUB_ARG_A], vm[V_SUB_ARG_B], rte)
501 	vm[V_MUL_RTE_RESULT]			= 0x1904;	// deFloat16Mul(vm[V_MUL_ARG_A], vm[V_MUL_ARG_B], rte)
502 	vm[V_DOT_RTE_RESULT]			= 0x1d04;
503 	vm[V_CONV_TO_FP16_RTE_RESULT]	= deFloat32To16Round(1.22334445f, DE_ROUNDINGMODE_TO_NEAREST_EVEN);
504 	vm[V_CONV_TO_FP32_RTE_RESULT]	= vm[V_UNUSED];
505 
506 	// there is no precision to store fp32 denorm nor fp64 denorm
507 	vm[V_CONV_DENORM_SMALLER]		= vm[V_ZERO];
508 	vm[V_CONV_DENORM_BIGGER]		= vm[V_ZERO];
509 }
510 
511 template <>
TypeValues()512 TypeValues<float>::TypeValues()
513 	: TypeValuesBase()
514 {
515 	// NOTE: when updating entries in m_valueIdToFloatType make sure to
516 	// update also valueIdToSnippetArgMap defined in updateSpirvSnippets()
517 	ValueMap& vm = m_valueIdToFloatType;
518 	vm[V_UNUSED]			=  0.0f;
519 	vm[V_MINUS_INF]			= -std::numeric_limits<float>::infinity();
520 	vm[V_MINUS_ONE]			= -1.0f;
521 	vm[V_MINUS_ZERO]		= -0.0f;
522 	vm[V_ZERO]				=  0.0f;
523 	vm[V_HALF]				=  0.5f;
524 	vm[V_ONE]				=  1.0f;
525 	vm[V_INF]				=  std::numeric_limits<float>::infinity();
526 	vm[V_DENORM]			=  static_cast<float>(1.413e-42); // 0x000003f0
527 	vm[V_NAN]				=  std::numeric_limits<float>::quiet_NaN();
528 
529 	vm[V_PI_DIV_2]			=  static_cast<float>(pi / 2);
530 	vm[V_DENORM_TIMES_TWO]	=  vm[V_DENORM] + vm[V_DENORM];
531 	vm[V_DEGREES_DENORM]	=  deFloatDegrees(vm[V_DENORM]);
532 
533 	float e = std::numeric_limits<float>::epsilon();
534 	vm[V_ADD_ARG_A]					= 1.0f + 3 * e;
535 	vm[V_ADD_ARG_B]					= 1.0f;
536 	vm[V_SUB_ARG_A]					= vm[V_ADD_ARG_A];
537 	vm[V_SUB_ARG_B]					= 3.0f + 6 * e;
538 	vm[V_MUL_ARG_A]					= vm[V_ADD_ARG_A];
539 	vm[V_MUL_ARG_B]					= 5 * e;
540 	vm[V_DOT_ARG_A]					= vm[V_ADD_ARG_A];
541 	vm[V_DOT_ARG_B]					= 5 * e;
542 	vm[V_CONV_FROM_FP32_ARG]		= 1.22334445f;
543 	vm[V_CONV_FROM_FP64_ARG]		= vm[V_UNUSED];
544 
545 	int prevRound = fegetround();
546 	fesetround(FE_TOWARDZERO);
547 	vm[V_ADD_RTZ_RESULT]			= vm[V_ADD_ARG_A] + vm[V_ADD_ARG_B];
548 	vm[V_SUB_RTZ_RESULT]			= vm[V_SUB_ARG_A] - vm[V_SUB_ARG_B];
549 	vm[V_MUL_RTZ_RESULT]			= vm[V_MUL_ARG_A] * vm[V_MUL_ARG_B];
550 	vm[V_DOT_RTZ_RESULT]			= vm[V_MUL_RTZ_RESULT] + vm[V_MUL_RTZ_RESULT];
551 	vm[V_CONV_TO_FP16_RTZ_RESULT]	= vm[V_UNUSED];
552 	vm[V_CONV_TO_FP32_RTZ_RESULT]	= exactByteEquivalent<deUint32>(0x3f9c968d); // result of conversion from double(1.22334455)
553 
554 	fesetround(FE_TONEAREST);
555 	vm[V_ADD_RTE_RESULT]			= vm[V_ADD_ARG_A] + vm[V_ADD_ARG_B];
556 	vm[V_SUB_RTE_RESULT]			= vm[V_SUB_ARG_A] - vm[V_SUB_ARG_B];
557 	vm[V_MUL_RTE_RESULT]			= vm[V_MUL_ARG_A] * vm[V_MUL_ARG_B];
558 	vm[V_DOT_RTE_RESULT]			= vm[V_MUL_RTE_RESULT] + vm[V_MUL_RTE_RESULT];
559 	vm[V_CONV_TO_FP16_RTE_RESULT]	= vm[V_UNUSED];
560 	vm[V_CONV_TO_FP32_RTE_RESULT]	= exactByteEquivalent<deUint32>(0x3f9c968e); // result of conversion from double(1.22334455)
561 	fesetround(prevRound);
562 
563 	// there is no precision to store fp64 denorm
564 	vm[V_CONV_DENORM_SMALLER]		= exactByteEquivalent<deUint32>(0x387c0000); // fp16 denorm
565 	vm[V_CONV_DENORM_BIGGER]		= vm[V_ZERO];
566 }
567 
568 template <>
TypeValues()569 TypeValues<double>::TypeValues()
570 	: TypeValuesBase()
571 {
572 	// NOTE: when updating entries in m_valueIdToFloatType make sure to
573 	// update also valueIdToSnippetArgMap defined in updateSpirvSnippets()
574 	ValueMap& vm = m_valueIdToFloatType;
575 	vm[V_UNUSED]			=  0.0;
576 	vm[V_MINUS_INF]			= -std::numeric_limits<double>::infinity();
577 	vm[V_MINUS_ONE]			= -1.0;
578 	vm[V_MINUS_ZERO]		= -0.0;
579 	vm[V_ZERO]				=  0.0;
580 	vm[V_HALF]				=  0.5;
581 	vm[V_ONE]				=  1.0;
582 	vm[V_INF]				=  std::numeric_limits<double>::infinity();
583 	vm[V_DENORM]			=  4.98e-321; // 0x00000000000003F0
584 	vm[V_NAN]				=  std::numeric_limits<double>::quiet_NaN();
585 
586 	vm[V_PI_DIV_2]			=  pi / 2;
587 	vm[V_DENORM_TIMES_TWO]	=  vm[V_DENORM] + vm[V_DENORM];
588 	vm[V_DEGREES_DENORM]	=  vm[V_UNUSED];
589 
590 	double e = std::numeric_limits<double>::epsilon();
591 	vm[V_ADD_ARG_A]				= 1.0 + 3 * e;
592 	vm[V_ADD_ARG_B]				= 1.0;
593 	vm[V_SUB_ARG_A]				= vm[V_ADD_ARG_A];
594 	vm[V_SUB_ARG_B]				= 3.0 + 6 * e;
595 	vm[V_MUL_ARG_A]				= vm[V_ADD_ARG_A];
596 	vm[V_MUL_ARG_B]				= 5 * e;
597 	vm[V_DOT_ARG_A]				= vm[V_ADD_ARG_A];
598 	vm[V_DOT_ARG_B]				= 5 * e;
599 	vm[V_CONV_FROM_FP32_ARG]	= vm[V_UNUSED];
600 	vm[V_CONV_FROM_FP64_ARG]	= 1.22334455;
601 
602 	int prevRound = fegetround();
603 	fesetround(FE_TOWARDZERO);
604 	vm[V_ADD_RTZ_RESULT]			= vm[V_ADD_ARG_A] + vm[V_ADD_ARG_B];
605 	vm[V_SUB_RTZ_RESULT]			= vm[V_SUB_ARG_A] - vm[V_SUB_ARG_B];
606 	vm[V_MUL_RTZ_RESULT]			= vm[V_MUL_ARG_A] * vm[V_MUL_ARG_B];
607 	vm[V_DOT_RTZ_RESULT]			= vm[V_MUL_RTZ_RESULT] + vm[V_MUL_RTZ_RESULT];
608 	vm[V_CONV_TO_FP16_RTZ_RESULT]	= vm[V_UNUSED];
609 	vm[V_CONV_TO_FP32_RTZ_RESULT]	= vm[V_UNUSED];
610 
611 	fesetround(FE_TONEAREST);
612 	vm[V_ADD_RTE_RESULT]			= vm[V_ADD_ARG_A] + vm[V_ADD_ARG_B];
613 	vm[V_SUB_RTE_RESULT]			= vm[V_SUB_ARG_A] - vm[V_SUB_ARG_B];
614 	vm[V_MUL_RTE_RESULT]			= vm[V_MUL_ARG_A] * vm[V_MUL_ARG_B];
615 	vm[V_DOT_RTE_RESULT]			= vm[V_MUL_RTE_RESULT] + vm[V_MUL_RTE_RESULT];
616 	vm[V_CONV_TO_FP16_RTE_RESULT]	= vm[V_UNUSED];
617 	vm[V_CONV_TO_FP32_RTE_RESULT]	= vm[V_UNUSED];
618 	fesetround(prevRound);
619 
620 	vm[V_CONV_DENORM_SMALLER]		= exactByteEquivalent<deUint64>(0x3f0f800000000000); // 0x03f0 is fp16 denorm
621 	vm[V_CONV_DENORM_BIGGER]		= exactByteEquivalent<deUint64>(0x373f800000000000); // 0x000003f0 is fp32 denorm
622 }
623 
624 // Each float type (fp16, fp32, fp64) has specific set of SPIR-V snippets
625 // that was extracted to separate template specialization. Those snippets
626 // are used to compose final test shaders. With this approach
627 // parameterization can be done just once per type and reused for many tests.
628 class TypeSnippetsBase
629 {
630 public:
631 	virtual ~TypeSnippetsBase() = default;
632 
633 protected:
634 	void updateSpirvSnippets();
635 
636 public: // Type specific data:
637 
638 	// Number of bits consumed by float type
639 	string bitWidth;
640 
641 	// Minimum positive normal
642 	string epsilon;
643 
644 	// denormBase is a normal value (found empirically) used to generate denorm value.
645 	// Denorm is generated by substracting epsilon from denormBase.
646 	// denormBase is not a denorm - it is used to create denorm.
647 	// This value is needed when operations are tested with arguments that were
648 	// generated in the code. Generated denorm should be the same as denorm
649 	// used when arguments are passed via input (m_valueIdToFloatType[V_DENORM]).
650 	// This is required as result of some operations depends on actual denorm value
651 	// e.g. OpRadians(0x0001) is 0 but OpRadians(0x03f0) is denorm.
652 	string denormBase;
653 
654 	string capabilities;
655 	string extensions;
656 	string capabilitiesFp16Without16BitStorage;
657 	string extensionsFp16Without16BitStorage;
658 	string arrayStride;
659 
660 	bool loadStoreRequiresShaderFloat16;
661 
662 public: // Type specific spir-v snippets:
663 
664 	// Common annotations
665 	string typeAnnotationsSnippet;
666 
667 	// Definitions of all types commonly used by operation tests
668 	string typeDefinitionsSnippet;
669 
670 	// Definitions of all types commonly used by settings tests
671 	string minTypeDefinitionsSnippet;
672 
673 	// Definitions of all constants commonly used by tests
674 	string constantsDefinitionsSnippet;
675 
676 	// Map that stores instructions that generate arguments of specified value.
677 	// Every test that uses generated inputod will select up to two items from this map
678 	typedef map<ValueId, string> SnippetMap;
679 	SnippetMap valueIdToSnippetArgMap;
680 
681 	// Spir-v snippets that read argument from SSBO
682 	string argumentsFromInputSnippet;
683 	string multiArgumentsFromInputSnippet;
684 
685 	// SSBO with stage input/output definitions
686 	string inputAnnotationsSnippet;
687 	string inputDefinitionsSnippet;
688 	string outputAnnotationsSnippet;
689 	string multiOutputAnnotationsSnippet;
690 	string outputDefinitionsSnippet;
691 	string multiOutputDefinitionsSnippet;
692 
693 	// Varying is required to pass result from vertex stage to fragment stage,
694 	// one of requirements was to not use SSBO writes in vertex stage so we
695 	// need to do that in fragment stage; we also cant pass operation result
696 	// directly because of interpolation, to avoid it we do a bitcast to uint
697 	string varyingsTypesSnippet;
698 	string inputVaryingsSnippet;
699 	string outputVaryingsSnippet;
700 	string storeVertexResultSnippet;
701 	string loadVertexResultSnippet;
702 
703 	string storeResultsSnippet;
704 	string multiStoreResultsSnippet;
705 
706 	string argumentsFromInputFp16Snippet;
707 	string storeResultsFp16Snippet;
708 	string multiArgumentsFromInputFp16Snippet;
709 	string multiOutputAnnotationsFp16Snippet;
710 	string multiStoreResultsFp16Snippet;
711 	string multiOutputDefinitionsFp16Snippet;
712 	string inputDefinitionsFp16Snippet;
713 	string outputDefinitionsFp16Snippet;
714 	string typeAnnotationsFp16Snippet;
715 	string typeDefinitionsFp16Snippet;
716 };
717 
updateSpirvSnippets()718 void TypeSnippetsBase::updateSpirvSnippets()
719 {
720 	// annotations to types that are commonly used by tests
721 	const string typeAnnotationsTemplate =
722 		"OpDecorate %type_float_arr_1 ArrayStride " + arrayStride + "\n"
723 		"OpDecorate %type_float_arr_2 ArrayStride " + arrayStride + "\n";
724 
725 	// definition off all types that are commonly used by tests
726 	const string typeDefinitionsTemplate =
727 		"%type_float             = OpTypeFloat " + bitWidth + "\n"
728 		"%type_float_uptr        = OpTypePointer Uniform %type_float\n"
729 		"%type_float_fptr        = OpTypePointer Function %type_float\n"
730 		"%type_float_vec2        = OpTypeVector %type_float 2\n"
731 		"%type_float_vec3        = OpTypeVector %type_float 3\n"
732 		"%type_float_vec4        = OpTypeVector %type_float 4\n"
733 		"%type_float_vec4_iptr   = OpTypePointer Input %type_float_vec4\n"
734 		"%type_float_vec4_optr   = OpTypePointer Output %type_float_vec4\n"
735 		"%type_float_mat2x2      = OpTypeMatrix %type_float_vec2 2\n"
736 		"%type_float_arr_1       = OpTypeArray %type_float %c_i32_1\n"
737 		"%type_float_arr_2       = OpTypeArray %type_float %c_i32_2\n";
738 
739 	// minimal type definition set that is used by settings tests
740 	const string minTypeDefinitionsTemplate =
741 		"%type_float             = OpTypeFloat " + bitWidth + "\n"
742 		"%type_float_uptr        = OpTypePointer Uniform %type_float\n"
743 		"%type_float_arr_2       = OpTypeArray %type_float %c_i32_2\n";
744 
745 	// definition off all constants that are used by tests
746 	const string constantsDefinitionsTemplate =
747 		"%c_float_n1             = OpConstant %type_float -1\n"
748 		"%c_float_0              = OpConstant %type_float 0.0\n"
749 		"%c_float_0_5            = OpConstant %type_float 0.5\n"
750 		"%c_float_1              = OpConstant %type_float 1\n"
751 		"%c_float_2              = OpConstant %type_float 2\n"
752 		"%c_float_3              = OpConstant %type_float 3\n"
753 		"%c_float_4              = OpConstant %type_float 4\n"
754 		"%c_float_5              = OpConstant %type_float 5\n"
755 		"%c_float_6              = OpConstant %type_float 6\n"
756 		"%c_float_eps            = OpConstant %type_float " + epsilon + "\n"
757 		"%c_float_denorm_base    = OpConstant %type_float " + denormBase + "\n";
758 
759 	// when arguments are read from SSBO this snipped is placed in main function
760 	const string argumentsFromInputTemplate =
761 		"%arg1loc                = OpAccessChain %type_float_uptr %ssbo_in %c_i32_0 %c_i32_0\n"
762 		"%arg1                   = OpLoad %type_float %arg1loc\n"
763 		"%arg2loc                = OpAccessChain %type_float_uptr %ssbo_in %c_i32_0 %c_i32_1\n"
764 		"%arg2                   = OpLoad %type_float %arg2loc\n";
765 
766 	const string multiArgumentsFromInputTemplate =
767 		"%arg1_float_loc         = OpAccessChain %type_float_uptr %ssbo_in %c_i32_${attr} %c_i32_0\n"
768 		"%arg2_float_loc         = OpAccessChain %type_float_uptr %ssbo_in %c_i32_${attr} %c_i32_1\n"
769 		"%arg1_float             = OpLoad %type_float %arg1_float_loc\n"
770 		"%arg2_float             = OpLoad %type_float %arg2_float_loc\n";
771 
772 	// when tested shader stage reads from SSBO it has to have this snippet
773 	inputAnnotationsSnippet =
774 		"OpMemberDecorate %SSBO_in 0 Offset 0\n"
775 		"OpDecorate %SSBO_in BufferBlock\n"
776 		"OpDecorate %ssbo_in DescriptorSet 0\n"
777 		"OpDecorate %ssbo_in Binding 0\n"
778 		"OpDecorate %ssbo_in NonWritable\n";
779 
780 	const string inputDefinitionsTemplate =
781 		"%SSBO_in              = OpTypeStruct %type_float_arr_2\n"
782 		"%up_SSBO_in           = OpTypePointer Uniform %SSBO_in\n"
783 		"%ssbo_in              = OpVariable %up_SSBO_in Uniform\n";
784 
785 	outputAnnotationsSnippet =
786 		"OpMemberDecorate %SSBO_out 0 Offset 0\n"
787 		"OpDecorate %SSBO_out BufferBlock\n"
788 		"OpDecorate %ssbo_out DescriptorSet 0\n"
789 		"OpDecorate %ssbo_out Binding 1\n";
790 
791 	const string multiOutputAnnotationsTemplate =
792 		"OpMemberDecorate %SSBO_float_out 0 Offset 0\n"
793 		"OpDecorate %type_float_arr_2 ArrayStride "+ arrayStride + "\n"
794 		"OpDecorate %SSBO_float_out BufferBlock\n"
795 		"OpDecorate %ssbo_float_out DescriptorSet 0\n";
796 
797 	const string outputDefinitionsTemplate =
798 		"%SSBO_out             = OpTypeStruct %type_float_arr_1\n"
799 		"%up_SSBO_out          = OpTypePointer Uniform %SSBO_out\n"
800 		"%ssbo_out             = OpVariable %up_SSBO_out Uniform\n";
801 
802 	const string multiOutputDefinitionsTemplate =
803 		"%SSBO_float_out         = OpTypeStruct %type_float\n"
804 		"%up_SSBO_float_out      = OpTypePointer Uniform %SSBO_float_out\n"
805 		"%ssbo_float_out         = OpVariable %up_SSBO_float_out Uniform\n";
806 
807 	// this snippet is used by compute and fragment stage but not by vertex stage
808 	const string storeResultsTemplate =
809 		"%outloc               = OpAccessChain %type_float_uptr %ssbo_out %c_i32_0 %c_i32_0\n"
810 		"OpStore %outloc %result\n";
811 
812 	const string multiStoreResultsTemplate =
813 		"%outloc" + bitWidth + "             = OpAccessChain %type_float_uptr %ssbo_float_out %c_i32_0\n"
814 		"                        OpStore %outloc" + bitWidth + " %result" + bitWidth + "\n";
815 
816 	const string typeToken	= "_float";
817 	const string typeName	= "_f" + bitWidth;
818 
819 	typeAnnotationsSnippet			= replace(typeAnnotationsTemplate, typeToken, typeName);
820 	typeDefinitionsSnippet			= replace(typeDefinitionsTemplate, typeToken, typeName);
821 	minTypeDefinitionsSnippet		= replace(minTypeDefinitionsTemplate, typeToken, typeName);
822 	constantsDefinitionsSnippet		= replace(constantsDefinitionsTemplate, typeToken, typeName);
823 	argumentsFromInputSnippet		= replace(argumentsFromInputTemplate, typeToken, typeName);
824 	multiArgumentsFromInputSnippet	= replace(multiArgumentsFromInputTemplate, typeToken, typeName);
825 	inputDefinitionsSnippet			= replace(inputDefinitionsTemplate, typeToken, typeName);
826 	multiOutputAnnotationsSnippet	= replace(multiOutputAnnotationsTemplate, typeToken, typeName);
827 	outputDefinitionsSnippet		= replace(outputDefinitionsTemplate, typeToken, typeName);
828 	multiOutputDefinitionsSnippet	= replace(multiOutputDefinitionsTemplate, typeToken, typeName);
829 	storeResultsSnippet				= replace(storeResultsTemplate, typeToken, typeName);
830 	multiStoreResultsSnippet		= replace(multiStoreResultsTemplate, typeToken, typeName);
831 
832 	argumentsFromInputFp16Snippet		= "";
833 	storeResultsFp16Snippet				= "";
834 	multiArgumentsFromInputFp16Snippet	= "";
835 	multiOutputAnnotationsFp16Snippet	= "";
836 	multiStoreResultsFp16Snippet		= "";
837 	multiOutputDefinitionsFp16Snippet	= "";
838 	inputDefinitionsFp16Snippet			= "";
839 	typeAnnotationsFp16Snippet			= "";
840 	outputDefinitionsFp16Snippet		= "";
841 	typeDefinitionsFp16Snippet			= "";
842 
843 	if (bitWidth.compare("16") == 0)
844 	{
845 		typeDefinitionsFp16Snippet		=
846 			"%type_u32_uptr       = OpTypePointer Uniform %type_u32\n"
847 			"%type_u32_arr_1      = OpTypeArray %type_u32 %c_i32_1\n";
848 
849 		typeAnnotationsFp16Snippet		= "OpDecorate %type_u32_arr_1 ArrayStride 4\n";
850 		const string inputToken			= "_f16_arr_2";
851 		const string inputName			= "_u32_arr_1";
852 		inputDefinitionsFp16Snippet		= replace(inputDefinitionsSnippet, inputToken, inputName);
853 
854 		argumentsFromInputFp16Snippet	=
855 			"%argloc            = OpAccessChain %type_u32_uptr %ssbo_in %c_i32_0 %c_i32_0\n"
856 			"%inval             = OpLoad %type_u32 %argloc\n"
857 			"%arg               = OpBitcast %type_f16_vec2 %inval\n"
858 			"%arg1              = OpCompositeExtract %type_f16 %arg 0\n"
859 			"%arg2              = OpCompositeExtract %type_f16 %arg 1\n";
860 
861 		const string outputToken		= "_f16_arr_1";
862 		const string outputName			= "_u32_arr_1";
863 		outputDefinitionsFp16Snippet	= replace(outputDefinitionsSnippet, outputToken, outputName);
864 
865 		storeResultsFp16Snippet	=
866 			"%result_f16_vec2   = OpCompositeConstruct %type_f16_vec2 %result %c_f16_0\n"
867 			"%result_u32		= OpBitcast %type_u32 %result_f16_vec2\n"
868 			"%outloc            = OpAccessChain %type_u32_uptr %ssbo_out %c_i32_0 %c_i32_0\n"
869 			"OpStore %outloc %result_u32\n";
870 
871 		multiArgumentsFromInputFp16Snippet	=
872 			"%arg_u32_loc         = OpAccessChain %type_u32_uptr %ssbo_in %c_i32_${attr} %c_i32_0\n"
873 			"%arg_u32             = OpLoad %type_u32 %arg_u32_loc\n"
874 			"%arg_f16_vec2        = OpBitcast %type_f16_vec2 %arg_u32\n"
875 			"%arg1_f16            = OpCompositeExtract %type_f16 %arg_f16_vec2 0\n"
876 			"%arg2_f16            = OpCompositeExtract %type_f16 %arg_f16_vec2 1\n";
877 
878 		multiOutputAnnotationsFp16Snippet	=
879 			"OpMemberDecorate %SSBO_u32_out 0 Offset 0\n"
880 			"OpDecorate %type_u32_arr_1 ArrayStride 4\n"
881 			"OpDecorate %SSBO_u32_out BufferBlock\n"
882 			"OpDecorate %ssbo_u32_out DescriptorSet 0\n";
883 
884 		multiStoreResultsFp16Snippet		=
885 			"%outloc_u32            = OpAccessChain %type_u32_uptr %ssbo_u32_out %c_i32_0\n"
886 			"%result16_vec2			= OpCompositeConstruct %type_f16_vec2 %result16 %c_f16_0\n"
887 			"%result_u32            = OpBitcast %type_u32 %result16_vec2\n"
888 			"                        OpStore %outloc_u32 %result_u32\n";
889 
890 		multiOutputDefinitionsFp16Snippet	=
891 			"%c_f16_0              = OpConstant %type_f16 0.0\n"
892 			"%SSBO_u32_out         = OpTypeStruct %type_u32\n"
893 			"%up_SSBO_u32_out      = OpTypePointer Uniform %SSBO_u32_out\n"
894 			"%ssbo_u32_out         = OpVariable %up_SSBO_u32_out Uniform\n";
895 	}
896 
897 	// NOTE: only values used as _generated_ arguments in test operations
898 	// need to be in this map, arguments that are only used by tests,
899 	// that grab arguments from input, do need to be in this map
900 	// NOTE: when updating entries in valueIdToSnippetArgMap make
901 	// sure to update also m_valueIdToFloatType for all float width
902 	SnippetMap& sm = valueIdToSnippetArgMap;
903 	sm[V_UNUSED]		= "OpFSub %type_float %c_float_0 %c_float_0\n";
904 	sm[V_MINUS_INF]		= "OpFDiv %type_float %c_float_n1 %c_float_0\n";
905 	sm[V_MINUS_ONE]		= "OpFAdd %type_float %c_float_n1 %c_float_0\n";
906 	sm[V_MINUS_ZERO]	= "OpFMul %type_float %c_float_n1 %c_float_0\n";
907 	sm[V_ZERO]			= "OpFMul %type_float %c_float_0 %c_float_0\n";
908 	sm[V_HALF]			= "OpFAdd %type_float %c_float_0_5 %c_float_0\n";
909 	sm[V_ONE]			= "OpFAdd %type_float %c_float_1 %c_float_0\n";
910 	sm[V_INF]			= "OpFDiv %type_float %c_float_1 %c_float_0\n";					// x / 0		== Inf
911 	sm[V_DENORM]		= "OpFSub %type_float %c_float_denorm_base %c_float_eps\n";
912 	sm[V_NAN]			= "OpFDiv %type_float %c_float_0 %c_float_0\n";					// 0 / 0		== Nan
913 
914 	map<ValueId, string>::iterator it;
915 	for ( it = sm.begin(); it != sm.end(); it++ )
916 		sm[it->first] = replace(it->second, typeToken, typeName);
917 }
918 
919 typedef de::SharedPtr<TypeSnippetsBase> TypeSnippetsSP;
920 
921 template<typename FLOAT_TYPE>
922 class TypeSnippets: public TypeSnippetsBase
923 {
924 public:
925 	TypeSnippets();
926 };
927 
928 template<>
TypeSnippets()929 TypeSnippets<deFloat16>::TypeSnippets()
930 {
931 	bitWidth		= "16";
932 	epsilon			= "6.104e-5";	// 2^-14 = 0x0400
933 
934 	// 1.2113e-4 is 0x07f0 which after substracting epsilon will give 0x03f0 (same as vm[V_DENORM])
935 	// NOTE: constants in SPIR-V cant be specified as exact fp16 - there is conversion from double to fp16
936 	denormBase		= "1.2113e-4";
937 
938 	capabilities	= "OpCapability StorageUniform16\n";
939 	extensions		= "OpExtension \"SPV_KHR_16bit_storage\"\n";
940 
941 	capabilitiesFp16Without16BitStorage	= "OpCapability Float16\n";
942 	extensionsFp16Without16BitStorage	= "";
943 
944 	arrayStride		= "2";
945 
946 	varyingsTypesSnippet =
947 					"%type_u32_iptr        = OpTypePointer Input %type_u32\n"
948 					"%type_u32_optr        = OpTypePointer Output %type_u32\n";
949 	inputVaryingsSnippet =
950 					"%BP_vertex_result    = OpVariable %type_u32_iptr Input\n";
951 	outputVaryingsSnippet =
952 					"%BP_vertex_result    = OpVariable %type_u32_optr Output\n";
953 	storeVertexResultSnippet =
954 					"%tmp_vec2            = OpCompositeConstruct %type_f16_vec2 %result %c_f16_0\n"
955 					"%packed_result       = OpBitcast %type_u32 %tmp_vec2\n"
956 					"OpStore %BP_vertex_result %packed_result\n";
957 	loadVertexResultSnippet =
958 					"%packed_result       = OpLoad %type_u32 %BP_vertex_result\n"
959 					"%tmp_vec2            = OpBitcast %type_f16_vec2 %packed_result\n"
960 					"%result              = OpCompositeExtract %type_f16 %tmp_vec2 0\n";
961 
962 	loadStoreRequiresShaderFloat16 = true;
963 
964 	updateSpirvSnippets();
965 }
966 
967 template<>
TypeSnippets()968 TypeSnippets<float>::TypeSnippets()
969 {
970 	bitWidth		= "32";
971 	epsilon			= "1.175494351e-38";
972 	denormBase		= "1.1756356e-38";
973 	capabilities	= "";
974 	extensions		= "";
975 	capabilitiesFp16Without16BitStorage	= "";
976 	extensionsFp16Without16BitStorage	= "";
977 	arrayStride		= "4";
978 
979 	varyingsTypesSnippet =
980 					"%type_u32_iptr        = OpTypePointer Input %type_u32\n"
981 					"%type_u32_optr        = OpTypePointer Output %type_u32\n";
982 	inputVaryingsSnippet =
983 					"%BP_vertex_result    = OpVariable %type_u32_iptr Input\n";
984 	outputVaryingsSnippet =
985 					"%BP_vertex_result    = OpVariable %type_u32_optr Output\n";
986 	storeVertexResultSnippet =
987 					"%packed_result       = OpBitcast %type_u32 %result\n"
988 					"OpStore %BP_vertex_result %packed_result\n";
989 	loadVertexResultSnippet =
990 					"%packed_result       = OpLoad %type_u32 %BP_vertex_result\n"
991 					"%result              = OpBitcast %type_f32 %packed_result\n";
992 
993 	loadStoreRequiresShaderFloat16 = false;
994 
995 	updateSpirvSnippets();
996 }
997 
998 template<>
TypeSnippets()999 TypeSnippets<double>::TypeSnippets()
1000 {
1001 	bitWidth		= "64";
1002 	epsilon			= "2.2250738585072014e-308"; // 0x0010000000000000
1003 	denormBase		= "2.2250738585076994e-308"; // 0x00100000000003F0
1004 	capabilities	= "OpCapability Float64\n";
1005 	extensions		= "";
1006 	capabilitiesFp16Without16BitStorage	= "";
1007 	extensionsFp16Without16BitStorage	= "";
1008 	arrayStride		= "8";
1009 
1010 	varyingsTypesSnippet =
1011 					"%type_u32_vec2_iptr   = OpTypePointer Input %type_u32_vec2\n"
1012 					"%type_u32_vec2_optr   = OpTypePointer Output %type_u32_vec2\n";
1013 	inputVaryingsSnippet =
1014 					"%BP_vertex_result     = OpVariable %type_u32_vec2_iptr Input\n";
1015 	outputVaryingsSnippet =
1016 					"%BP_vertex_result     = OpVariable %type_u32_vec2_optr Output\n";
1017 	storeVertexResultSnippet =
1018 					"%packed_result        = OpBitcast %type_u32_vec2 %result\n"
1019 					"OpStore %BP_vertex_result %packed_result\n";
1020 	loadVertexResultSnippet =
1021 					"%packed_result        = OpLoad %type_u32_vec2 %BP_vertex_result\n"
1022 					"%result               = OpBitcast %type_f64 %packed_result\n";
1023 
1024 	loadStoreRequiresShaderFloat16 = false;
1025 
1026 	updateSpirvSnippets();
1027 }
1028 
1029 class TypeTestResultsBase
1030 {
1031 public:
~TypeTestResultsBase()1032 	virtual ~TypeTestResultsBase() {}
1033 	FloatType floatType() const;
1034 
1035 protected:
1036 	FloatType m_floatType;
1037 
1038 public:
1039 	// Vectors containing test data for float controls
1040 	vector<BinaryCase>	binaryOpFTZ;
1041 	vector<UnaryCase>	unaryOpFTZ;
1042 	vector<BinaryCase>	binaryOpDenormPreserve;
1043 	vector<UnaryCase>	unaryOpDenormPreserve;
1044 };
1045 
floatType() const1046 FloatType TypeTestResultsBase::floatType() const
1047 {
1048 	return m_floatType;
1049 }
1050 
1051 typedef de::SharedPtr<TypeTestResultsBase> TypeTestResultsSP;
1052 
1053 template<typename FLOAT_TYPE>
1054 class TypeTestResults: public TypeTestResultsBase
1055 {
1056 public:
1057 	TypeTestResults();
1058 };
1059 
1060 template<>
TypeTestResults()1061 TypeTestResults<deFloat16>::TypeTestResults()
1062 {
1063 	m_floatType = FP16;
1064 
1065 	// note: there are many FTZ test cases that can produce diferent result depending
1066 	// on input denorm being flushed or not; because of that FTZ tests can be limited
1067 	// to those that return denorm as those are the ones affected by tested extension
1068 	const BinaryCase binaryOpFTZArr[] = {
1069 		//operation			den op one		den op den		den op inf		den op nan
1070 		{ OID_ADD,			V_ONE,			V_ZERO_OR_DENORM_TIMES_TWO,
1071 														V_INF,			V_UNUSED },
1072 		{ OID_SUB,			V_MINUS_ONE,	V_ZERO,			V_MINUS_INF,	V_UNUSED },
1073 		{ OID_MUL,			V_ZERO,			V_ZERO,			V_UNUSED,		V_UNUSED },
1074 		{ OID_DIV,			V_ZERO,			V_UNUSED,		V_ZERO,			V_UNUSED },
1075 		{ OID_REM,			V_ZERO,			V_UNUSED,		V_UNUSED,		V_UNUSED },
1076 		{ OID_MOD,			V_ZERO,			V_UNUSED,		V_UNUSED,		V_UNUSED },
1077 		{ OID_VEC_MUL_S,	V_ZERO,			V_ZERO,			V_UNUSED,		V_UNUSED },
1078 		{ OID_VEC_MUL_M,	V_ZERO_OR_DENORM_TIMES_TWO,
1079 											V_ZERO,			V_UNUSED,		V_UNUSED },
1080 		{ OID_MAT_MUL_S,	V_ZERO,			V_ZERO,			V_UNUSED,		V_UNUSED },
1081 		{ OID_MAT_MUL_V,	V_ZERO,			V_ZERO,			V_UNUSED,		V_UNUSED },
1082 		{ OID_MAT_MUL_M,	V_ZERO,			V_ZERO,			V_UNUSED,		V_UNUSED },
1083 		{ OID_OUT_PROD,		V_ZERO,			V_ZERO,			V_UNUSED,		V_UNUSED },
1084 		{ OID_DOT,			V_ZERO_OR_DENORM_TIMES_TWO,
1085 											V_ZERO,			V_UNUSED,		V_UNUSED },
1086 		{ OID_ATAN2,		V_ZERO,			V_UNUSED,		V_ZERO,			V_UNUSED },
1087 		{ OID_POW,			V_ZERO,			V_UNUSED,		V_ZERO,			V_UNUSED },
1088 		{ OID_MIX,			V_HALF,			V_ZERO,			V_INF,			V_UNUSED },
1089 		{ OID_MIN,			V_ZERO,			V_ZERO,			V_ZERO,			V_UNUSED },
1090 		{ OID_MAX,			V_ONE,			V_ZERO,			V_INF,			V_UNUSED },
1091 		{ OID_CLAMP,		V_ONE,			V_ZERO,			V_INF,			V_UNUSED },
1092 		{ OID_STEP,			V_ONE,			V_ONE,			V_ONE,			V_UNUSED },
1093 		{ OID_SSTEP,		V_HALF,			V_ONE,			V_ZERO,			V_UNUSED },
1094 		{ OID_FMA,			V_HALF,			V_HALF,			V_UNUSED,		V_UNUSED },
1095 		{ OID_FACE_FWD,		V_MINUS_ONE,	V_MINUS_ONE,	V_MINUS_ONE,	V_MINUS_ONE },
1096 		{ OID_NMIN,			V_ZERO,			V_ZERO,			V_ZERO,			V_ZERO },
1097 		{ OID_NMAX,			V_ONE,			V_ZERO,			V_INF,			V_ZERO },
1098 		{ OID_NCLAMP,		V_ONE,			V_ZERO,			V_INF,			V_ZERO },
1099 		{ OID_DIST,			V_ONE,			V_ZERO,			V_INF,			V_UNUSED },
1100 		{ OID_CROSS,		V_ZERO,			V_ZERO,			V_UNUSED,		V_UNUSED },
1101 	};
1102 
1103 	const UnaryCase unaryOpFTZArr[] = {
1104 		//operation			op den
1105 		{ OID_NEGATE,		V_MINUS_ZERO },
1106 		{ OID_ROUND,		V_ZERO },
1107 		{ OID_ROUND_EV,		V_ZERO },
1108 		{ OID_TRUNC,		V_ZERO },
1109 		{ OID_ABS,			V_ZERO },
1110 		{ OID_FLOOR,		V_ZERO },
1111 		{ OID_CEIL,			V_ZERO_OR_ONE },
1112 		{ OID_FRACT,		V_ZERO },
1113 		{ OID_RADIANS,		V_ZERO },
1114 		{ OID_DEGREES,		V_ZERO },
1115 		{ OID_SIN,			V_ZERO },
1116 		{ OID_COS,			V_TRIG_ONE },
1117 		{ OID_TAN,			V_ZERO },
1118 		{ OID_ASIN,			V_ZERO },
1119 		{ OID_ACOS,			V_PI_DIV_2 },
1120 		{ OID_ATAN,			V_ZERO },
1121 		{ OID_SINH,			V_ZERO },
1122 		{ OID_COSH,			V_ONE },
1123 		{ OID_TANH,			V_ZERO },
1124 		{ OID_ASINH,		V_ZERO },
1125 		{ OID_ACOSH,		V_UNUSED },
1126 		{ OID_ATANH,		V_ZERO },
1127 		{ OID_EXP,			V_ONE },
1128 		{ OID_LOG,			V_MINUS_INF_OR_LOG_DENORM },
1129 		{ OID_EXP2,			V_ONE },
1130 		{ OID_LOG2,			V_MINUS_INF_OR_LOG2_DENORM },
1131 		{ OID_SQRT,			V_ZERO_OR_SQRT_DENORM },
1132 		{ OID_INV_SQRT,		V_INF_OR_INV_SQRT_DENORM },
1133 		{ OID_MAT_DET,		V_ZERO },
1134 		{ OID_MAT_INV,		V_ZERO_OR_MINUS_ZERO },
1135 		{ OID_MODF,			V_ZERO },
1136 		{ OID_MODF_ST,		V_ZERO },
1137 		{ OID_NORMALIZE,	V_ZERO },
1138 		{ OID_REFLECT,		V_ZERO },
1139 		{ OID_REFRACT,		V_ZERO },
1140 		{ OID_LENGTH,		V_ZERO },
1141 	};
1142 
1143 	const BinaryCase binaryOpDenormPreserveArr[] = {
1144 		//operation			den op one				den op den				den op inf		den op nan
1145 		{ OID_PHI,			V_DENORM,				V_DENORM,				V_DENORM,		V_DENORM },
1146 		{ OID_SELECT,		V_DENORM,				V_DENORM,				V_DENORM,		V_DENORM },
1147 		{ OID_ADD,			V_ONE,					V_DENORM_TIMES_TWO,		V_INF,			V_NAN },
1148 		{ OID_SUB,			V_MINUS_ONE_OR_CLOSE,	V_ZERO,					V_MINUS_INF,	V_NAN },
1149 		{ OID_MUL,			V_DENORM,				V_ZERO,					V_INF,			V_NAN },
1150 		{ OID_VEC_MUL_S,	V_DENORM,				V_ZERO,					V_INF,			V_NAN },
1151 		{ OID_VEC_MUL_M,	V_DENORM_TIMES_TWO,		V_ZERO,					V_INF,			V_NAN },
1152 		{ OID_MAT_MUL_S,	V_DENORM,				V_ZERO,					V_INF,			V_NAN },
1153 		{ OID_MAT_MUL_V,	V_DENORM_TIMES_TWO,		V_ZERO,					V_INF,			V_NAN },
1154 		{ OID_MAT_MUL_M,	V_DENORM_TIMES_TWO,		V_ZERO,					V_INF,			V_NAN },
1155 		{ OID_OUT_PROD,		V_DENORM,				V_ZERO,					V_INF,			V_NAN },
1156 		{ OID_DOT,			V_DENORM_TIMES_TWO,		V_ZERO,					V_INF,			V_NAN },
1157 		{ OID_MIX,			V_HALF,					V_DENORM,				V_INF,			V_NAN },
1158 		{ OID_FMA,			V_HALF,					V_HALF,					V_INF,			V_NAN },
1159 		{ OID_MIN,			V_DENORM,				V_DENORM,				V_DENORM,		V_UNUSED },
1160 		{ OID_MAX,			V_ONE,					V_DENORM,				V_INF,			V_UNUSED },
1161 		{ OID_CLAMP,		V_ONE,					V_DENORM,				V_INF,			V_UNUSED },
1162 		{ OID_NMIN,			V_DENORM,				V_DENORM,				V_DENORM,		V_DENORM },
1163 		{ OID_NMAX,			V_ONE,					V_DENORM,				V_INF,			V_DENORM },
1164 		{ OID_NCLAMP,		V_ONE,					V_DENORM,				V_INF,			V_DENORM },
1165 	};
1166 
1167 	const UnaryCase unaryOpDenormPreserveArr[] = {
1168 		//operation				op den
1169 		{ OID_RETURN_VAL,		V_DENORM },
1170 		{ OID_D_EXTRACT,		V_DENORM },
1171 		{ OID_D_INSERT,			V_DENORM },
1172 		{ OID_SHUFFLE,			V_DENORM },
1173 		{ OID_COMPOSITE,		V_DENORM },
1174 		{ OID_COMPOSITE_INS,	V_DENORM },
1175 		{ OID_COPY,				V_DENORM },
1176 		{ OID_TRANSPOSE,		V_DENORM },
1177 		{ OID_NEGATE,			V_DENORM },
1178 		{ OID_ABS,				V_DENORM },
1179 		{ OID_SIGN,				V_ONE },
1180 		{ OID_RADIANS,			V_DENORM },
1181 		{ OID_DEGREES,			V_DEGREES_DENORM },
1182 	};
1183 
1184 	binaryOpFTZ.insert(binaryOpFTZ.begin(), binaryOpFTZArr,
1185 					   binaryOpFTZArr + DE_LENGTH_OF_ARRAY(binaryOpFTZArr));
1186 	unaryOpFTZ.insert(unaryOpFTZ.begin(), unaryOpFTZArr,
1187 					  unaryOpFTZArr + DE_LENGTH_OF_ARRAY(unaryOpFTZArr));
1188 	binaryOpDenormPreserve.insert(binaryOpDenormPreserve.begin(), binaryOpDenormPreserveArr,
1189 								  binaryOpDenormPreserveArr + DE_LENGTH_OF_ARRAY(binaryOpDenormPreserveArr));
1190 	unaryOpDenormPreserve.insert(unaryOpDenormPreserve.begin(), unaryOpDenormPreserveArr,
1191 								 unaryOpDenormPreserveArr + DE_LENGTH_OF_ARRAY(unaryOpDenormPreserveArr));
1192 }
1193 
1194 template<>
TypeTestResults()1195 TypeTestResults<float>::TypeTestResults()
1196 {
1197 	m_floatType = FP32;
1198 
1199 	const BinaryCase binaryOpFTZArr[] = {
1200 		//operation			den op one		den op den		den op inf		den op nan
1201 		{ OID_ADD,			V_ONE,			V_ZERO,			V_INF,			V_UNUSED },
1202 		{ OID_SUB,			V_MINUS_ONE,	V_ZERO,			V_MINUS_INF,	V_UNUSED },
1203 		{ OID_MUL,			V_ZERO,			V_ZERO,			V_UNUSED,		V_UNUSED },
1204 		{ OID_DIV,			V_ZERO,			V_UNUSED,		V_ZERO,			V_UNUSED },
1205 		{ OID_REM,			V_ZERO,			V_UNUSED,		V_UNUSED,		V_UNUSED },
1206 		{ OID_MOD,			V_ZERO,			V_UNUSED,		V_UNUSED,		V_UNUSED },
1207 		{ OID_VEC_MUL_S,	V_ZERO,			V_ZERO,			V_UNUSED,		V_UNUSED },
1208 		{ OID_VEC_MUL_M,	V_ZERO,			V_ZERO,			V_UNUSED,		V_UNUSED },
1209 		{ OID_MAT_MUL_S,	V_ZERO,			V_ZERO,			V_UNUSED,		V_UNUSED },
1210 		{ OID_MAT_MUL_V,	V_ZERO,			V_ZERO,			V_UNUSED,		V_UNUSED },
1211 		{ OID_MAT_MUL_M,	V_ZERO,			V_ZERO,			V_UNUSED,		V_UNUSED },
1212 		{ OID_OUT_PROD,		V_ZERO,			V_ZERO,			V_UNUSED,		V_UNUSED },
1213 		{ OID_DOT,			V_ZERO,			V_ZERO,			V_UNUSED,		V_UNUSED },
1214 		{ OID_ATAN2,		V_ZERO,			V_UNUSED,		V_ZERO,			V_UNUSED },
1215 		{ OID_POW,			V_ZERO,			V_UNUSED,		V_ZERO,			V_UNUSED },
1216 		{ OID_MIX,			V_HALF,			V_ZERO,			V_INF,			V_UNUSED },
1217 		{ OID_MIN,			V_ZERO,			V_ZERO,			V_ZERO,			V_UNUSED },
1218 		{ OID_MAX,			V_ONE,			V_ZERO,			V_INF,			V_UNUSED },
1219 		{ OID_CLAMP,		V_ONE,			V_ZERO,			V_INF,			V_UNUSED },
1220 		{ OID_STEP,			V_ONE,			V_ONE,			V_ONE,			V_UNUSED },
1221 		{ OID_SSTEP,		V_HALF,			V_ONE,			V_ZERO,			V_UNUSED },
1222 		{ OID_FMA,			V_HALF,			V_HALF,			V_UNUSED,		V_UNUSED },
1223 		{ OID_FACE_FWD,		V_MINUS_ONE,	V_MINUS_ONE,	V_MINUS_ONE,	V_MINUS_ONE },
1224 		{ OID_NMIN,			V_ZERO,			V_ZERO,			V_ZERO,			V_ZERO },
1225 		{ OID_NMAX,			V_ONE,			V_ZERO,			V_INF,			V_ZERO },
1226 		{ OID_NCLAMP,		V_ONE,			V_ZERO,			V_INF,			V_ZERO },
1227 		{ OID_DIST,			V_ONE,			V_ZERO,			V_INF,			V_UNUSED },
1228 		{ OID_CROSS,		V_ZERO,			V_ZERO,			V_UNUSED,		V_UNUSED },
1229 	};
1230 
1231 	const UnaryCase unaryOpFTZArr[] = {
1232 		//operation			op den
1233 		{ OID_NEGATE,		V_MINUS_ZERO },
1234 		{ OID_ROUND,		V_ZERO },
1235 		{ OID_ROUND_EV,		V_ZERO },
1236 		{ OID_TRUNC,		V_ZERO },
1237 		{ OID_ABS,			V_ZERO },
1238 		{ OID_FLOOR,		V_ZERO },
1239 		{ OID_CEIL,			V_ZERO_OR_ONE },
1240 		{ OID_FRACT,		V_ZERO },
1241 		{ OID_RADIANS,		V_ZERO },
1242 		{ OID_DEGREES,		V_ZERO },
1243 		{ OID_SIN,			V_ZERO },
1244 		{ OID_COS,			V_TRIG_ONE },
1245 		{ OID_TAN,			V_ZERO },
1246 		{ OID_ASIN,			V_ZERO },
1247 		{ OID_ACOS,			V_PI_DIV_2 },
1248 		{ OID_ATAN,			V_ZERO },
1249 		{ OID_SINH,			V_ZERO },
1250 		{ OID_COSH,			V_ONE },
1251 		{ OID_TANH,			V_ZERO },
1252 		{ OID_ASINH,		V_ZERO },
1253 		{ OID_ACOSH,		V_UNUSED },
1254 		{ OID_ATANH,		V_ZERO },
1255 		{ OID_EXP,			V_ONE },
1256 		{ OID_LOG,			V_MINUS_INF_OR_LOG_DENORM },
1257 		{ OID_EXP2,			V_ONE },
1258 		{ OID_LOG2,			V_MINUS_INF_OR_LOG2_DENORM },
1259 		{ OID_SQRT,			V_ZERO_OR_SQRT_DENORM },
1260 		{ OID_INV_SQRT,		V_INF_OR_INV_SQRT_DENORM },
1261 		{ OID_MAT_DET,		V_ZERO },
1262 		{ OID_MAT_INV,		V_ZERO_OR_MINUS_ZERO },
1263 		{ OID_MODF,			V_ZERO },
1264 		{ OID_MODF_ST,		V_ZERO },
1265 		{ OID_NORMALIZE,	V_ZERO },
1266 		{ OID_REFLECT,		V_ZERO },
1267 		{ OID_REFRACT,		V_ZERO },
1268 		{ OID_LENGTH,		V_ZERO },
1269 	};
1270 
1271 	const BinaryCase binaryOpDenormPreserveArr[] = {
1272 		//operation			den op one			den op den				den op inf		den op nan
1273 		{ OID_PHI,			V_DENORM,			V_DENORM,				V_DENORM,		V_DENORM },
1274 		{ OID_SELECT,		V_DENORM,			V_DENORM,				V_DENORM,		V_DENORM },
1275 		{ OID_ADD,			V_ONE,				V_DENORM_TIMES_TWO,		V_INF,			V_NAN },
1276 		{ OID_SUB,			V_MINUS_ONE,		V_ZERO,					V_MINUS_INF,	V_NAN },
1277 		{ OID_MUL,			V_DENORM,			V_ZERO,					V_INF,			V_NAN },
1278 		{ OID_VEC_MUL_S,	V_DENORM,			V_ZERO,					V_INF,			V_NAN },
1279 		{ OID_VEC_MUL_M,	V_DENORM,			V_ZERO,					V_INF,			V_NAN },
1280 		{ OID_MAT_MUL_S,	V_DENORM,			V_ZERO,					V_INF,			V_NAN },
1281 		{ OID_MAT_MUL_V,	V_DENORM,			V_ZERO,					V_INF,			V_NAN },
1282 		{ OID_MAT_MUL_M,	V_DENORM,			V_ZERO,					V_INF,			V_NAN },
1283 		{ OID_OUT_PROD,		V_DENORM,			V_ZERO,					V_INF,			V_NAN },
1284 		{ OID_DOT,			V_DENORM_TIMES_TWO,	V_ZERO,					V_INF,			V_NAN },
1285 		{ OID_MIX,			V_HALF,				V_DENORM,				V_INF,			V_NAN },
1286 		{ OID_FMA,			V_HALF,				V_HALF,					V_INF,			V_NAN },
1287 		{ OID_MIN,			V_DENORM,			V_DENORM,				V_DENORM,		V_UNUSED },
1288 		{ OID_MAX,			V_ONE,				V_DENORM,				V_INF,			V_UNUSED },
1289 		{ OID_CLAMP,		V_ONE,				V_DENORM,				V_INF,			V_UNUSED },
1290 		{ OID_NMIN,			V_DENORM,			V_DENORM,				V_DENORM,		V_DENORM },
1291 		{ OID_NMAX,			V_ONE,				V_DENORM,				V_INF,			V_DENORM },
1292 		{ OID_NCLAMP,		V_ONE,				V_DENORM,				V_INF,			V_DENORM },
1293 	};
1294 
1295 	const UnaryCase unaryOpDenormPreserveArr[] = {
1296 		//operation				op den
1297 		{ OID_RETURN_VAL,		V_DENORM },
1298 		{ OID_D_EXTRACT,		V_DENORM },
1299 		{ OID_D_INSERT,			V_DENORM },
1300 		{ OID_SHUFFLE,			V_DENORM },
1301 		{ OID_COMPOSITE,		V_DENORM },
1302 		{ OID_COMPOSITE_INS,	V_DENORM },
1303 		{ OID_COPY,				V_DENORM },
1304 		{ OID_TRANSPOSE,		V_DENORM },
1305 		{ OID_NEGATE,			V_DENORM },
1306 		{ OID_ABS,				V_DENORM },
1307 		{ OID_SIGN,				V_ONE },
1308 		{ OID_RADIANS,			V_DENORM },
1309 		{ OID_DEGREES,			V_DEGREES_DENORM },
1310 	};
1311 
1312 	binaryOpFTZ.insert(binaryOpFTZ.begin(), binaryOpFTZArr,
1313 					   binaryOpFTZArr + DE_LENGTH_OF_ARRAY(binaryOpFTZArr));
1314 	unaryOpFTZ.insert(unaryOpFTZ.begin(), unaryOpFTZArr,
1315 					  unaryOpFTZArr + DE_LENGTH_OF_ARRAY(unaryOpFTZArr));
1316 	binaryOpDenormPreserve.insert(binaryOpDenormPreserve.begin(), binaryOpDenormPreserveArr,
1317 								  binaryOpDenormPreserveArr + DE_LENGTH_OF_ARRAY(binaryOpDenormPreserveArr));
1318 	unaryOpDenormPreserve.insert(unaryOpDenormPreserve.begin(), unaryOpDenormPreserveArr,
1319 								 unaryOpDenormPreserveArr + DE_LENGTH_OF_ARRAY(unaryOpDenormPreserveArr));
1320 }
1321 
1322 template<>
TypeTestResults()1323 TypeTestResults<double>::TypeTestResults()
1324 {
1325 	m_floatType = FP64;
1326 
1327 	// fp64 is supported by fewer operations then fp16 and fp32
1328 	// e.g. Radians and Degrees functions are not supported
1329 	const BinaryCase binaryOpFTZArr[] = {
1330 		//operation			den op one		den op den		den op inf		den op nan
1331 		{ OID_ADD,			V_ONE,			V_ZERO,			V_INF,			V_UNUSED },
1332 		{ OID_SUB,			V_MINUS_ONE,	V_ZERO,			V_MINUS_INF,	V_UNUSED },
1333 		{ OID_MUL,			V_ZERO,			V_ZERO,			V_UNUSED,		V_UNUSED },
1334 		{ OID_DIV,			V_ZERO,			V_UNUSED,		V_ZERO,			V_UNUSED },
1335 		{ OID_REM,			V_ZERO,			V_UNUSED,		V_UNUSED,		V_UNUSED },
1336 		{ OID_MOD,			V_ZERO,			V_UNUSED,		V_UNUSED,		V_UNUSED },
1337 		{ OID_VEC_MUL_S,	V_ZERO,			V_ZERO,			V_UNUSED,		V_UNUSED },
1338 		{ OID_VEC_MUL_M,	V_ZERO,			V_ZERO,			V_UNUSED,		V_UNUSED },
1339 		{ OID_MAT_MUL_S,	V_ZERO,			V_ZERO,			V_UNUSED,		V_UNUSED },
1340 		{ OID_MAT_MUL_V,	V_ZERO,			V_ZERO,			V_UNUSED,		V_UNUSED },
1341 		{ OID_MAT_MUL_M,	V_ZERO,			V_ZERO,			V_UNUSED,		V_UNUSED },
1342 		{ OID_OUT_PROD,		V_ZERO,			V_ZERO,			V_UNUSED,		V_UNUSED },
1343 		{ OID_DOT,			V_ZERO,			V_ZERO,			V_UNUSED,		V_UNUSED },
1344 		{ OID_MIX,			V_HALF,			V_ZERO,			V_INF,			V_UNUSED },
1345 		{ OID_MIN,			V_ZERO,			V_ZERO,			V_ZERO,			V_UNUSED },
1346 		{ OID_MAX,			V_ONE,			V_ZERO,			V_INF,			V_UNUSED },
1347 		{ OID_CLAMP,		V_ONE,			V_ZERO,			V_INF,			V_UNUSED },
1348 		{ OID_STEP,			V_ONE,			V_ONE,			V_ONE,			V_UNUSED },
1349 		{ OID_SSTEP,		V_HALF,			V_ONE,			V_ZERO,			V_UNUSED },
1350 		{ OID_FMA,			V_HALF,			V_HALF,			V_UNUSED,		V_UNUSED },
1351 		{ OID_FACE_FWD,		V_MINUS_ONE,	V_MINUS_ONE,	V_MINUS_ONE,	V_MINUS_ONE },
1352 		{ OID_NMIN,			V_ZERO,			V_ZERO,			V_ZERO,			V_ZERO },
1353 		{ OID_NMAX,			V_ONE,			V_ZERO,			V_INF,			V_ZERO },
1354 		{ OID_NCLAMP,		V_ONE,			V_ZERO,			V_INF,			V_ZERO },
1355 		{ OID_DIST,			V_ONE,			V_ZERO,			V_INF,			V_UNUSED },
1356 		{ OID_CROSS,		V_ZERO,			V_ZERO,			V_UNUSED,		V_UNUSED },
1357 	};
1358 
1359 	const UnaryCase unaryOpFTZArr[] = {
1360 		//operation			op den
1361 		{ OID_NEGATE,		V_MINUS_ZERO },
1362 		{ OID_ROUND,		V_ZERO },
1363 		{ OID_ROUND_EV,		V_ZERO },
1364 		{ OID_TRUNC,		V_ZERO },
1365 		{ OID_ABS,			V_ZERO },
1366 		{ OID_FLOOR,		V_ZERO },
1367 		{ OID_CEIL,			V_ZERO_OR_ONE },
1368 		{ OID_FRACT,		V_ZERO },
1369 		{ OID_SQRT,			V_ZERO_OR_SQRT_DENORM },
1370 		{ OID_INV_SQRT,		V_INF_OR_INV_SQRT_DENORM },
1371 		{ OID_MAT_DET,		V_ZERO },
1372 		{ OID_MAT_INV,		V_ZERO_OR_MINUS_ZERO },
1373 		{ OID_MODF,			V_ZERO },
1374 		{ OID_MODF_ST,		V_ZERO },
1375 		{ OID_NORMALIZE,	V_ZERO },
1376 		{ OID_REFLECT,		V_ZERO },
1377 		{ OID_LENGTH,		V_ZERO },
1378 	};
1379 
1380 	const BinaryCase binaryOpDenormPreserveArr[] = {
1381 		//operation			den op one			den op den				den op inf		den op nan
1382 		{ OID_PHI,			V_DENORM,			V_DENORM,				V_DENORM,		V_DENORM },
1383 		{ OID_SELECT,		V_DENORM,			V_DENORM,				V_DENORM,		V_DENORM },
1384 		{ OID_ADD,			V_ONE,				V_DENORM_TIMES_TWO,		V_INF,			V_NAN },
1385 		{ OID_SUB,			V_MINUS_ONE,		V_ZERO,					V_MINUS_INF,	V_NAN },
1386 		{ OID_MUL,			V_DENORM,			V_ZERO,					V_INF,			V_NAN },
1387 		{ OID_VEC_MUL_S,	V_DENORM,			V_ZERO,					V_INF,			V_NAN },
1388 		{ OID_VEC_MUL_M,	V_DENORM_TIMES_TWO,	V_ZERO,					V_INF,			V_NAN },
1389 		{ OID_MAT_MUL_S,	V_DENORM,			V_ZERO,					V_INF,			V_NAN },
1390 		{ OID_MAT_MUL_V,	V_DENORM_TIMES_TWO,	V_ZERO,					V_INF,			V_NAN },
1391 		{ OID_MAT_MUL_M,	V_DENORM_TIMES_TWO,	V_ZERO,					V_INF,			V_NAN },
1392 		{ OID_OUT_PROD,		V_DENORM,			V_ZERO,					V_INF,			V_NAN },
1393 		{ OID_DOT,			V_DENORM_TIMES_TWO,	V_ZERO,					V_INF,			V_NAN },
1394 		{ OID_MIX,			V_HALF,				V_DENORM,				V_INF,			V_NAN },
1395 		{ OID_FMA,			V_HALF,				V_HALF,					V_INF,			V_NAN },
1396 		{ OID_MIN,			V_DENORM,			V_DENORM,				V_DENORM,		V_UNUSED },
1397 		{ OID_MAX,			V_ONE,				V_DENORM,				V_INF,			V_UNUSED },
1398 		{ OID_CLAMP,		V_ONE,				V_DENORM,				V_INF,			V_UNUSED },
1399 		{ OID_NMIN,			V_DENORM,			V_DENORM,				V_DENORM,		V_DENORM },
1400 		{ OID_NMAX,			V_ONE,				V_DENORM,				V_INF,			V_DENORM },
1401 		{ OID_NCLAMP,		V_ONE,				V_DENORM,				V_INF,			V_DENORM },
1402 	};
1403 
1404 	const UnaryCase unaryOpDenormPreserveArr[] = {
1405 		//operation				op den
1406 		{ OID_RETURN_VAL,		V_DENORM },
1407 		{ OID_D_EXTRACT,		V_DENORM },
1408 		{ OID_D_INSERT,			V_DENORM },
1409 		{ OID_SHUFFLE,			V_DENORM },
1410 		{ OID_COMPOSITE,		V_DENORM },
1411 		{ OID_COMPOSITE_INS,	V_DENORM },
1412 		{ OID_COPY,				V_DENORM },
1413 		{ OID_TRANSPOSE,		V_DENORM },
1414 		{ OID_NEGATE,			V_DENORM },
1415 		{ OID_ABS,				V_DENORM },
1416 		{ OID_SIGN,				V_ONE },
1417 	};
1418 
1419 	binaryOpFTZ.insert(binaryOpFTZ.begin(), binaryOpFTZArr,
1420 					   binaryOpFTZArr + DE_LENGTH_OF_ARRAY(binaryOpFTZArr));
1421 	unaryOpFTZ.insert(unaryOpFTZ.begin(), unaryOpFTZArr,
1422 					  unaryOpFTZArr + DE_LENGTH_OF_ARRAY(unaryOpFTZArr));
1423 	binaryOpDenormPreserve.insert(binaryOpDenormPreserve.begin(), binaryOpDenormPreserveArr,
1424 								  binaryOpDenormPreserveArr + DE_LENGTH_OF_ARRAY(binaryOpDenormPreserveArr));
1425 	unaryOpDenormPreserve.insert(unaryOpDenormPreserve.begin(), unaryOpDenormPreserveArr,
1426 								 unaryOpDenormPreserveArr + DE_LENGTH_OF_ARRAY(unaryOpDenormPreserveArr));
1427 }
1428 
1429 // Operation structure holds data needed to test specified SPIR-V operation. This class contains
1430 // additional annotations, additional types and aditional constants that should be properly included
1431 // in SPIR-V code. Commands attribute in this structure contains code that performs tested operation
1432 // on given arguments, in some cases verification is also performed there.
1433 // All snipets stroed in this structure are generic and can be specialized for fp16, fp32 or fp64,
1434 // thanks to that this data can be shared by many OperationTestCase instances (testing diferent
1435 // float behaviours on diferent float widths).
1436 struct Operation
1437 {
1438 	// operation name is included in test case name
1439 	const char*	name;
1440 
1441 	// How extensively is the floating point type used?
1442 	FloatUsage floatUsage;
1443 
1444 	// operation specific spir-v snippets that will be
1445 	// placed in proper places in final test shader
1446 	const char*	annotations;
1447 	const char*	types;
1448 	const char*	constants;
1449 	const char*	variables;
1450 	const char*	functions;
1451 	const char*	commands;
1452 
1453 	// conversion operations operate on one float type and produce float
1454 	// type with different bit width; restrictedInputType is used only when
1455 	// isInputTypeRestricted is set to true and it restricts usage of this
1456 	// operation to specified input type
1457 	bool		isInputTypeRestricted;
1458 	FloatType	restrictedInputType;
1459 
1460 	// arguments for OpSpecConstant need to be specified also as constant
1461 	bool		isSpecConstant;
1462 
1463 	// set if c_float* constant is used in operation
1464 	FloatStatementUsageFlags	statementUsageFlags;
1465 
Operationvkt::SpirVAssembly::__anonbe9b2f4c0111::Operation1466 	Operation()		{}
1467 
1468 	// Minimal constructor - used by most of operations
Operationvkt::SpirVAssembly::__anonbe9b2f4c0111::Operation1469 	Operation(const char* _name, FloatUsage _floatUsage, const char* _commands, const FloatStatementUsageFlags _statementUsageFlags = 0)
1470 		: name(_name)
1471 		, floatUsage(_floatUsage)
1472 		, annotations("")
1473 		, types("")
1474 		, constants("")
1475 		, variables("")
1476 		, functions("")
1477 		, commands(_commands)
1478 		, isInputTypeRestricted(false)
1479 		, restrictedInputType(FP16)		// not used as isInputTypeRestricted is false
1480 		, isSpecConstant(false)
1481 		, statementUsageFlags(_statementUsageFlags)
1482 	{}
1483 
1484 	// Conversion operations constructor (used also by conversions done in SpecConstantOp)
Operationvkt::SpirVAssembly::__anonbe9b2f4c0111::Operation1485 	Operation(const char* _name,
1486 			  FloatUsage _floatUsage,
1487 			  bool specConstant,
1488 			  FloatType _inputType,
1489 			  const char* _constants,
1490 			  const char* _commands,
1491 			  const FloatStatementUsageFlags _statementUsageFlags = 0)
1492 		: name(_name)
1493 		, floatUsage(_floatUsage)
1494 		, annotations("")
1495 		, types("")
1496 		, constants(_constants)
1497 		, variables("")
1498 		, functions("")
1499 		, commands(_commands)
1500 		, isInputTypeRestricted(true)
1501 		, restrictedInputType(_inputType)
1502 		, isSpecConstant(specConstant)
1503 		, statementUsageFlags(_statementUsageFlags)
1504 	{}
1505 
1506 	// Full constructor - used by few operations, that are more complex to test
Operationvkt::SpirVAssembly::__anonbe9b2f4c0111::Operation1507 	Operation(const char* _name,
1508 			  FloatUsage _floatUsage,
1509 			  const char* _annotations,
1510 			  const char* _types,
1511 			  const char* _constants,
1512 			  const char* _variables,
1513 			  const char* _functions,
1514 			  const char* _commands,
1515 			  const FloatStatementUsageFlags _statementUsageFlags = 0)
1516 		: name(_name)
1517 		, floatUsage(_floatUsage)
1518 		, annotations(_annotations)
1519 		, types(_types)
1520 		, constants(_constants)
1521 		, variables(_variables)
1522 		, functions(_functions)
1523 		, commands(_commands)
1524 		, isInputTypeRestricted(false)
1525 		, restrictedInputType(FP16)		// not used as isInputTypeRestricted is false
1526 		, isSpecConstant(false)
1527 		, statementUsageFlags(_statementUsageFlags)
1528 	{}
1529 
1530 	// Full constructor - used by rounding override cases
Operationvkt::SpirVAssembly::__anonbe9b2f4c0111::Operation1531 	Operation(const char* _name,
1532 			  FloatUsage _floatUsage,
1533 			  FloatType _inputType,
1534 			  const char* _annotations,
1535 			  const char* _types,
1536 			  const char* _constants,
1537 			  const char* _commands,
1538 			  const FloatStatementUsageFlags _statementUsageFlags = 0)
1539 		: name(_name)
1540 		, floatUsage(_floatUsage)
1541 		, annotations(_annotations)
1542 		, types(_types)
1543 		, constants(_constants)
1544 		, variables("")
1545 		, functions("")
1546 		, commands(_commands)
1547 		, isInputTypeRestricted(true)
1548 		, restrictedInputType(_inputType)
1549 		, isSpecConstant(false)
1550 		, statementUsageFlags(_statementUsageFlags)
1551 	{}
1552 };
1553 
1554 // Class storing input that will be passed to operation and expected
1555 // output that should be generated for specified behaviour.
1556 class OperationTestCase
1557 {
1558 public:
1559 
OperationTestCase()1560 	OperationTestCase()		{}
1561 
OperationTestCase(const char * _baseName,BehaviorFlags _behaviorFlags,OperationId _operatinId,ValueId _input1,ValueId _input2,ValueId _expectedOutput,bool _fp16Without16BitStorage=false)1562 	OperationTestCase(const char*	_baseName,
1563 					  BehaviorFlags	_behaviorFlags,
1564 					  OperationId	_operatinId,
1565 					  ValueId		_input1,
1566 					  ValueId		_input2,
1567 					  ValueId		_expectedOutput,
1568 					  bool			_fp16Without16BitStorage = false)
1569 		: baseName(_baseName)
1570 		, behaviorFlags(_behaviorFlags)
1571 		, operationId(_operatinId)
1572 		, expectedOutput(_expectedOutput)
1573 		, fp16Without16BitStorage(_fp16Without16BitStorage)
1574 	{
1575 		input[0] = _input1;
1576 		input[1] = _input2;
1577 	}
1578 
1579 public:
1580 
1581 	string					baseName;
1582 	BehaviorFlags			behaviorFlags;
1583 	OperationId				operationId;
1584 	ValueId					input[2];
1585 	ValueId					expectedOutput;
1586 	bool					fp16Without16BitStorage;
1587 };
1588 
1589 // Helper structure used to store specialized operation
1590 // data. This data is ready to be used during shader assembly.
1591 struct SpecializedOperation
1592 {
1593 	string constants;
1594 	string annotations;
1595 	string types;
1596 	string arguments;
1597 	string variables;
1598 	string functions;
1599 	string commands;
1600 
1601 	FloatType					inFloatType;
1602 	TypeSnippetsSP				inTypeSnippets;
1603 	TypeSnippetsSP				outTypeSnippets;
1604 	FloatStatementUsageFlags	argumentsUsesFloatConstant;
1605 };
1606 
1607 // Class responsible for constructing list of test cases for specified
1608 // float type and specified way of preparation of arguments.
1609 // Arguments can be either read from input SSBO or generated via math
1610 // operations in spir-v code.
1611 class TestCasesBuilder
1612 {
1613 public:
1614 
1615 	void init();
1616 	void build(vector<OperationTestCase>& testCases, TypeTestResultsSP typeTestResults, bool argumentsFromInput);
1617 	const Operation& getOperation(OperationId id) const;
1618 
1619 private:
1620 
1621 	void createUnaryTestCases(vector<OperationTestCase>& testCases,
1622 							  OperationId operationId,
1623 							  ValueId denormPreserveResult,
1624 							  ValueId denormFTZResult,
1625 							  bool fp16WithoutStorage = false) const;
1626 
1627 private:
1628 
1629 	// Operations are shared betwean test cases so they are
1630 	// passed to them as pointers to data stored in TestCasesBuilder.
1631 	typedef OperationTestCase OTC;
1632 	typedef Operation Op;
1633 	map<int, Op> m_operations;
1634 };
1635 
init()1636 void TestCasesBuilder::init()
1637 {
1638 	map<int, Op>& mo = m_operations;
1639 
1640 	// predefine operations repeatedly used in tests; note that "_float"
1641 	// in every operation command will be replaced with either "_f16",
1642 	// "_f32" or "_f64" - StringTemplate is not used here because it
1643 	// would make code less readable
1644 	// m_operations contains generic operation definitions that can be
1645 	// used for all float types
1646 
1647 	mo[OID_NEGATE]			= Op("negate",		FLOAT_ARITHMETIC,
1648 												"%result             = OpFNegate %type_float %arg1\n",
1649 												B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1650 	mo[OID_COMPOSITE]		= Op("composite",	FLOAT_ARITHMETIC,
1651 												"%vec1               = OpCompositeConstruct %type_float_vec2 %arg1 %arg1\n"
1652 												"%result             = OpCompositeExtract %type_float %vec1 0\n",
1653 												B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1654 	mo[OID_COMPOSITE_INS]	= Op("comp_ins",	FLOAT_ARITHMETIC,
1655 												"%vec1               = OpCompositeConstruct %type_float_vec2 %c_float_0 %c_float_0\n"
1656 												"%vec2               = OpCompositeInsert %type_float_vec2 %arg1 %vec1 0\n"
1657 												"%result             = OpCompositeExtract %type_float %vec2 0\n",
1658 												B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1659 	mo[OID_COPY]			= Op("copy",		FLOAT_STORAGE_ONLY,
1660 												"%result             = OpCopyObject %type_float %arg1\n",
1661 												B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1662 	mo[OID_D_EXTRACT]		= Op("extract",		FLOAT_ARITHMETIC,
1663 												"%vec1               = OpCompositeConstruct %type_float_vec2 %arg1 %arg1\n"
1664 												"%result             = OpVectorExtractDynamic %type_float %vec1 %c_i32_0\n",
1665 												B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1666 	mo[OID_D_INSERT]		= Op("insert",		FLOAT_ARITHMETIC,
1667 												"%tmpVec             = OpCompositeConstruct %type_float_vec2 %c_float_2 %c_float_2\n"
1668 												"%vec1               = OpVectorInsertDynamic %type_float_vec2 %tmpVec %arg1 %c_i32_0\n"
1669 												"%result             = OpCompositeExtract %type_float %vec1 0\n",
1670 												B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1671 	mo[OID_SHUFFLE]			= Op("shuffle",		FLOAT_ARITHMETIC,
1672 												"%tmpVec1            = OpCompositeConstruct %type_float_vec2 %arg1 %arg1\n"
1673 												"%tmpVec2            = OpCompositeConstruct %type_float_vec2 %c_float_2 %c_float_2\n"	// NOTE: its impossible to test shuffle with denorms flushed
1674 												"%vec1               = OpVectorShuffle %type_float_vec2 %tmpVec1 %tmpVec2 0 2\n"		//       to zero as this will be done by earlier operation
1675 												"%result             = OpCompositeExtract %type_float %vec1 0\n",						//       (this also applies to few other operations)
1676 												B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1677 	mo[OID_TRANSPOSE]		= Op("transpose",	FLOAT_ARITHMETIC,
1678 												"%col                = OpCompositeConstruct %type_float_vec2 %arg1 %arg1\n"
1679 												"%mat                = OpCompositeConstruct %type_float_mat2x2 %col %col\n"
1680 												"%tmat               = OpTranspose %type_float_mat2x2 %mat\n"
1681 												"%tcol               = OpCompositeExtract %type_float_vec2 %tmat 0\n"
1682 												"%result             = OpCompositeExtract %type_float %tcol 0\n",
1683 												B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1684 	mo[OID_RETURN_VAL]		= Op("ret_val",		FLOAT_ARITHMETIC,
1685 												"",
1686 												"%type_test_fun      = OpTypeFunction %type_float %type_float\n",
1687 												"",
1688 												"",
1689 												"%test_fun = OpFunction %type_float None %type_test_fun\n"
1690 												"%param = OpFunctionParameter %type_float\n"
1691 												"%entry = OpLabel\n"
1692 												"OpReturnValue %param\n"
1693 												"OpFunctionEnd\n",
1694 												"%result             = OpFunctionCall %type_float %test_fun %arg1\n",
1695 												B_STATEMENT_USAGE_TYPES_TYPE_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1696 
1697 	// conversion operations that are meant to be used only for single output type (defined by the second number in name)
1698 	const char* convertSource =					"%result             = OpFConvert %type_float %arg1\n";
1699 	mo[OID_CONV_FROM_FP16]	= Op("conv_from_fp16", FLOAT_STORAGE_ONLY, false, FP16, "", convertSource, B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1700 	mo[OID_CONV_FROM_FP32]	= Op("conv_from_fp32", FLOAT_STORAGE_ONLY, false, FP32, "", convertSource, B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1701 	mo[OID_CONV_FROM_FP64]	= Op("conv_from_fp64", FLOAT_STORAGE_ONLY, false, FP64, "", convertSource, B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1702 
1703 	// from all operands supported by OpSpecConstantOp we can only test FConvert opcode with literals as everything
1704 	// else requires Karnel capability (OpenCL); values of literals used in SPIR-V code must be equiwalent to
1705 	// V_CONV_FROM_FP32_ARG and V_CONV_FROM_FP64_ARG so we can use same expected rounded values as for regular OpFConvert
1706 	mo[OID_SCONST_CONV_FROM_FP32_TO_FP16]
1707 						= Op("sconst_conv_from_fp32", FLOAT_ARITHMETIC, true, FP32,
1708 											"%c_arg              = OpConstant %type_f32 1.22334445\n"
1709 											"%result             = OpSpecConstantOp %type_f16 FConvert %c_arg\n",
1710 											"",
1711 											B_STATEMENT_USAGE_CONSTS_TYPE_FP16 | B_STATEMENT_USAGE_CONSTS_TYPE_FP32);
1712 	mo[OID_SCONST_CONV_FROM_FP64_TO_FP32]
1713 						= Op("sconst_conv_from_fp64", FLOAT_ARITHMETIC, true, FP64,
1714 											"%c_arg              = OpConstant %type_f64 1.22334455\n"
1715 											"%result             = OpSpecConstantOp %type_f32 FConvert %c_arg\n",
1716 											"",
1717 											B_STATEMENT_USAGE_CONSTS_TYPE_FP32 | B_STATEMENT_USAGE_CONSTS_TYPE_FP64);
1718 	mo[OID_SCONST_CONV_FROM_FP64_TO_FP16]
1719 						= Op("sconst_conv_from_fp64", FLOAT_ARITHMETIC, true, FP64,
1720 											"%c_arg              = OpConstant %type_f64 1.22334445\n"
1721 											"%result             = OpSpecConstantOp %type_f16 FConvert %c_arg\n",
1722 											"",
1723 											B_STATEMENT_USAGE_CONSTS_TYPE_FP16 | B_STATEMENT_USAGE_CONSTS_TYPE_FP64);
1724 
1725 	mo[OID_ADD]			= Op("add",			FLOAT_ARITHMETIC, "%result             = OpFAdd %type_float %arg1 %arg2\n", B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1726 	mo[OID_SUB]			= Op("sub",			FLOAT_ARITHMETIC, "%result             = OpFSub %type_float %arg1 %arg2\n", B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1727 	mo[OID_MUL]			= Op("mul",			FLOAT_ARITHMETIC, "%result             = OpFMul %type_float %arg1 %arg2\n", B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1728 	mo[OID_DIV]			= Op("div",			FLOAT_ARITHMETIC, "%result             = OpFDiv %type_float %arg1 %arg2\n", B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1729 	mo[OID_REM]			= Op("rem",			FLOAT_ARITHMETIC, "%result             = OpFRem %type_float %arg1 %arg2\n", B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1730 	mo[OID_MOD]			= Op("mod",			FLOAT_ARITHMETIC, "%result             = OpFMod %type_float %arg1 %arg2\n", B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1731 	mo[OID_PHI]			= Op("phi",			FLOAT_ARITHMETIC,
1732 											"%comp               = OpFOrdGreaterThan %type_bool %arg1 %arg2\n"
1733 											"                      OpSelectionMerge %comp_merge None\n"
1734 											"                      OpBranchConditional %comp %true_branch %false_branch\n"
1735 											"%true_branch        = OpLabel\n"
1736 											"                      OpBranch %comp_merge\n"
1737 											"%false_branch       = OpLabel\n"
1738 											"                      OpBranch %comp_merge\n"
1739 											"%comp_merge         = OpLabel\n"
1740 											"%result             = OpPhi %type_float %arg2 %true_branch %arg1 %false_branch\n",
1741 											B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1742 	mo[OID_SELECT]		= Op("select",		FLOAT_ARITHMETIC,
1743 											"%always_true        = OpFOrdGreaterThan %type_bool %c_float_1 %c_float_0\n"
1744 											"%result             = OpSelect %type_float %always_true %arg1 %arg2\n",
1745 											B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1746 	mo[OID_DOT]			= Op("dot",			FLOAT_ARITHMETIC,
1747 											"%vec1               = OpCompositeConstruct %type_float_vec2 %arg1 %arg1\n"
1748 											"%vec2               = OpCompositeConstruct %type_float_vec2 %arg2 %arg2\n"
1749 											"%result             = OpDot %type_float %vec1 %vec2\n",
1750 											B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1751 	mo[OID_VEC_MUL_S]	= Op("vmuls",		FLOAT_ARITHMETIC,
1752 											"%vec                = OpCompositeConstruct %type_float_vec2 %arg1 %arg1\n"
1753 											"%tmpVec             = OpVectorTimesScalar %type_float_vec2 %vec %arg2\n"
1754 											"%result             = OpCompositeExtract %type_float %tmpVec 0\n",
1755 											B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1756 	mo[OID_VEC_MUL_M]	= Op("vmulm",		FLOAT_ARITHMETIC,
1757 											"%col                = OpCompositeConstruct %type_float_vec2 %arg1 %arg1\n"
1758 											"%mat                = OpCompositeConstruct %type_float_mat2x2 %col %col\n"
1759 											"%vec                = OpCompositeConstruct %type_float_vec2 %arg2 %arg2\n"
1760 											"%tmpVec             = OpVectorTimesMatrix %type_float_vec2 %vec %mat\n"
1761 											"%result             = OpCompositeExtract %type_float %tmpVec 0\n",
1762 											B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1763 	mo[OID_MAT_MUL_S]	= Op("mmuls",		FLOAT_ARITHMETIC,
1764 											"%col                = OpCompositeConstruct %type_float_vec2 %arg1 %arg1\n"
1765 											"%mat                = OpCompositeConstruct %type_float_mat2x2 %col %col\n"
1766 											"%mulMat             = OpMatrixTimesScalar %type_float_mat2x2 %mat %arg2\n"
1767 											"%extCol             = OpCompositeExtract %type_float_vec2 %mulMat 0\n"
1768 											"%result             = OpCompositeExtract %type_float %extCol 0\n",
1769 											B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1770 	mo[OID_MAT_MUL_V]	= Op("mmulv",		FLOAT_ARITHMETIC,
1771 											"%col                = OpCompositeConstruct %type_float_vec2 %arg1 %arg1\n"
1772 											"%mat                = OpCompositeConstruct %type_float_mat2x2 %col %col\n"
1773 											"%vec                = OpCompositeConstruct %type_float_vec2 %arg2 %arg2\n"
1774 											"%mulVec             = OpMatrixTimesVector %type_float_vec2 %mat %vec\n"
1775 											"%result             = OpCompositeExtract %type_float %mulVec 0\n",
1776 											B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1777 	mo[OID_MAT_MUL_M]	= Op("mmulm",		FLOAT_ARITHMETIC,
1778 											"%col1               = OpCompositeConstruct %type_float_vec2 %arg1 %arg1\n"
1779 											"%mat1               = OpCompositeConstruct %type_float_mat2x2 %col1 %col1\n"
1780 											"%col2               = OpCompositeConstruct %type_float_vec2 %arg2 %arg2\n"
1781 											"%mat2               = OpCompositeConstruct %type_float_mat2x2 %col2 %col2\n"
1782 											"%mulMat             = OpMatrixTimesMatrix %type_float_mat2x2 %mat1 %mat2\n"
1783 											"%extCol             = OpCompositeExtract %type_float_vec2 %mulMat 0\n"
1784 											"%result             = OpCompositeExtract %type_float %extCol 0\n",
1785 											B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1786 	mo[OID_OUT_PROD]	= Op("out_prod",	FLOAT_ARITHMETIC,
1787 											"%vec1               = OpCompositeConstruct %type_float_vec2 %arg1 %arg1\n"
1788 											"%vec2               = OpCompositeConstruct %type_float_vec2 %arg2 %arg2\n"
1789 											"%mulMat             = OpOuterProduct %type_float_mat2x2 %vec1 %vec2\n"
1790 											"%extCol             = OpCompositeExtract %type_float_vec2 %mulMat 0\n"
1791 											"%result             = OpCompositeExtract %type_float %extCol 0\n",
1792 											B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1793 
1794 	// comparison operations
1795 	mo[OID_ORD_EQ]		= Op("ord_eq",		FLOAT_ARITHMETIC,
1796 											"%boolVal           = OpFOrdEqual %type_bool %arg1 %arg2\n"
1797 											"%result            = OpSelect %type_float %boolVal %c_float_1 %c_float_0\n",
1798 											B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1799 	mo[OID_UORD_EQ]		= Op("uord_eq",		FLOAT_ARITHMETIC,
1800 											"%boolVal           = OpFUnordEqual %type_bool %arg1 %arg2\n"
1801 											"%result            = OpSelect %type_float %boolVal %c_float_1 %c_float_0\n",
1802 											B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1803 	mo[OID_ORD_NEQ]		= Op("ord_neq",		FLOAT_ARITHMETIC,
1804 											"%boolVal           = OpFOrdNotEqual %type_bool %arg1 %arg2\n"
1805 											"%result            = OpSelect %type_float %boolVal %c_float_1 %c_float_0\n",
1806 											B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1807 	mo[OID_UORD_NEQ]	= Op("uord_neq",	FLOAT_ARITHMETIC,
1808 											"%boolVal           = OpFUnordNotEqual %type_bool %arg1 %arg2\n"
1809 											"%result            = OpSelect %type_float %boolVal %c_float_1 %c_float_0\n",
1810 											B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1811 	mo[OID_ORD_LS]		= Op("ord_ls",		FLOAT_ARITHMETIC,
1812 											"%boolVal           = OpFOrdLessThan %type_bool %arg1 %arg2\n"
1813 											"%result            = OpSelect %type_float %boolVal %c_float_1 %c_float_0\n",
1814 											B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1815 	mo[OID_UORD_LS]		= Op("uord_ls",		FLOAT_ARITHMETIC,
1816 											"%boolVal           = OpFUnordLessThan %type_bool %arg1 %arg2\n"
1817 											"%result            = OpSelect %type_float %boolVal %c_float_1 %c_float_0\n",
1818 											B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1819 	mo[OID_ORD_GT]		= Op("ord_gt",		FLOAT_ARITHMETIC,
1820 											"%boolVal           = OpFOrdGreaterThan %type_bool %arg1 %arg2\n"
1821 											"%result            = OpSelect %type_float %boolVal %c_float_1 %c_float_0\n",
1822 											B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1823 	mo[OID_UORD_GT]		= Op("uord_gt",		FLOAT_ARITHMETIC,
1824 											"%boolVal           = OpFUnordGreaterThan %type_bool %arg1 %arg2\n"
1825 											"%result            = OpSelect %type_float %boolVal %c_float_1 %c_float_0\n",
1826 											B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1827 	mo[OID_ORD_LE]		= Op("ord_le",		FLOAT_ARITHMETIC,
1828 											"%boolVal           = OpFOrdLessThanEqual %type_bool %arg1 %arg2\n"
1829 											"%result            = OpSelect %type_float %boolVal %c_float_1 %c_float_0\n",
1830 											B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1831 	mo[OID_UORD_LE]		= Op("uord_le",		FLOAT_ARITHMETIC,
1832 											"%boolVal           = OpFUnordLessThanEqual %type_bool %arg1 %arg2\n"
1833 											"%result            = OpSelect %type_float %boolVal %c_float_1 %c_float_0\n",
1834 											B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1835 	mo[OID_ORD_GE]		= Op("ord_ge",		FLOAT_ARITHMETIC,
1836 											"%boolVal           = OpFOrdGreaterThanEqual %type_bool %arg1 %arg2\n"
1837 											"%result            = OpSelect %type_float %boolVal %c_float_1 %c_float_0\n",
1838 											B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1839 	mo[OID_UORD_GE]		= Op("uord_ge",		FLOAT_ARITHMETIC,
1840 											"%boolVal           = OpFUnordGreaterThanEqual %type_bool %arg1 %arg2\n"
1841 											"%result            = OpSelect %type_float %boolVal %c_float_1 %c_float_0\n",
1842 											B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1843 
1844 	mo[OID_ATAN2]		= Op("atan2",		FLOAT_ARITHMETIC,
1845 											"%result             = OpExtInst %type_float %std450 Atan2 %arg1 %arg2\n",
1846 											B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1847 	mo[OID_POW]			= Op("pow",			FLOAT_ARITHMETIC,
1848 											"%result             = OpExtInst %type_float %std450 Pow %arg1 %arg2\n",
1849 											B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1850 	mo[OID_MIX]			= Op("mix",			FLOAT_ARITHMETIC,
1851 											"%result             = OpExtInst %type_float %std450 FMix %arg1 %arg2 %c_float_0_5\n",
1852 											B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1853 	mo[OID_FMA]			= Op("fma",			FLOAT_ARITHMETIC,
1854 											"%result             = OpExtInst %type_float %std450 Fma %arg1 %arg2 %c_float_0_5\n",
1855 											B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1856 	mo[OID_MIN]			= Op("min",			FLOAT_ARITHMETIC,
1857 											"%result             = OpExtInst %type_float %std450 FMin %arg1 %arg2\n",
1858 											B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1859 	mo[OID_MAX]			= Op("max",			FLOAT_ARITHMETIC,
1860 											"%result             = OpExtInst %type_float %std450 FMax %arg1 %arg2\n",
1861 											B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1862 	mo[OID_CLAMP]		= Op("clamp",		FLOAT_ARITHMETIC,
1863 											"%result             = OpExtInst %type_float %std450 FClamp %arg1 %arg2 %arg2\n",
1864 											B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1865 	mo[OID_STEP]		= Op("step",		FLOAT_ARITHMETIC,
1866 											"%result             = OpExtInst %type_float %std450 Step %arg1 %arg2\n",
1867 											B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1868 	mo[OID_SSTEP]		= Op("sstep",		FLOAT_ARITHMETIC,
1869 											"%result             = OpExtInst %type_float %std450 SmoothStep %arg1 %arg2 %c_float_0_5\n",
1870 											B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1871 	mo[OID_DIST]		= Op("distance",	FLOAT_ARITHMETIC,
1872 											"%result             = OpExtInst %type_float %std450 Distance %arg1 %arg2\n",
1873 											B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1874 	mo[OID_CROSS]		= Op("cross",		FLOAT_ARITHMETIC,
1875 											"%vec1               = OpCompositeConstruct %type_float_vec3 %arg1 %arg1 %arg1\n"
1876 											"%vec2               = OpCompositeConstruct %type_float_vec3 %arg2 %arg2 %arg2\n"
1877 											"%tmpVec             = OpExtInst %type_float_vec3 %std450 Cross %vec1 %vec2\n"
1878 											"%result             = OpCompositeExtract %type_float %tmpVec 0\n",
1879 											B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1880 	mo[OID_FACE_FWD]	= Op("face_fwd",	FLOAT_ARITHMETIC,
1881 											"%result             = OpExtInst %type_float %std450 FaceForward %c_float_1 %arg1 %arg2\n",
1882 											B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1883 	mo[OID_NMIN]		= Op("nmin",		FLOAT_ARITHMETIC,
1884 											"%result             = OpExtInst %type_float %std450 NMin %arg1 %arg2\n",
1885 											B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1886 	mo[OID_NMAX]		= Op("nmax",		FLOAT_ARITHMETIC,
1887 											"%result             = OpExtInst %type_float %std450 NMax %arg1 %arg2\n",
1888 											B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1889 	mo[OID_NCLAMP]		= Op("nclamp",		FLOAT_ARITHMETIC,
1890 											"%result             = OpExtInst %type_float %std450 NClamp %arg2 %arg1 %arg2\n",
1891 											B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1892 
1893 	mo[OID_ROUND]		= Op("round",		FLOAT_ARITHMETIC,
1894 											"%result             = OpExtInst %type_float %std450 Round %arg1\n",
1895 											B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1896 	mo[OID_ROUND_EV]	= Op("round_ev",	FLOAT_ARITHMETIC,
1897 											"%result             = OpExtInst %type_float %std450 RoundEven %arg1\n",
1898 											B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1899 	mo[OID_TRUNC]		= Op("trunc",		FLOAT_ARITHMETIC,
1900 											"%result             = OpExtInst %type_float %std450 Trunc %arg1\n",
1901 											B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1902 	mo[OID_ABS]			= Op("abs",			FLOAT_ARITHMETIC,
1903 											"%result             = OpExtInst %type_float %std450 FAbs %arg1\n",
1904 											B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1905 	mo[OID_SIGN]		= Op("sign",		FLOAT_ARITHMETIC,
1906 											"%result             = OpExtInst %type_float %std450 FSign %arg1\n",
1907 											B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1908 	mo[OID_FLOOR]		= Op("floor",		FLOAT_ARITHMETIC,
1909 											"%result             = OpExtInst %type_float %std450 Floor %arg1\n",
1910 											B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1911 	mo[OID_CEIL]		= Op("ceil",		FLOAT_ARITHMETIC,
1912 											"%result             = OpExtInst %type_float %std450 Ceil %arg1\n",
1913 											B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1914 	mo[OID_FRACT]		= Op("fract",		FLOAT_ARITHMETIC,
1915 											"%result             = OpExtInst %type_float %std450 Fract %arg1\n",
1916 											B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1917 	mo[OID_RADIANS]		= Op("radians",		FLOAT_ARITHMETIC,
1918 											"%result             = OpExtInst %type_float %std450 Radians %arg1\n",
1919 											B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1920 	mo[OID_DEGREES]		= Op("degrees",		FLOAT_ARITHMETIC,
1921 											"%result             = OpExtInst %type_float %std450 Degrees %arg1\n",
1922 											B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1923 	mo[OID_SIN]			= Op("sin",			FLOAT_ARITHMETIC,
1924 											"%result             = OpExtInst %type_float %std450 Sin %arg1\n",
1925 											B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1926 	mo[OID_COS]			= Op("cos",			FLOAT_ARITHMETIC,
1927 											"%result             = OpExtInst %type_float %std450 Cos %arg1\n",
1928 											B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1929 	mo[OID_TAN]			= Op("tan",			FLOAT_ARITHMETIC,
1930 											"%result             = OpExtInst %type_float %std450 Tan %arg1\n",
1931 											B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1932 	mo[OID_ASIN]		= Op("asin",		FLOAT_ARITHMETIC,
1933 											"%result             = OpExtInst %type_float %std450 Asin %arg1\n",
1934 											B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1935 	mo[OID_ACOS]		= Op("acos",		FLOAT_ARITHMETIC,
1936 											"%result             = OpExtInst %type_float %std450 Acos %arg1\n",
1937 											B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1938 	mo[OID_ATAN]		= Op("atan",		FLOAT_ARITHMETIC,
1939 											"%result             = OpExtInst %type_float %std450 Atan %arg1\n",
1940 											B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1941 	mo[OID_SINH]		= Op("sinh",		FLOAT_ARITHMETIC,
1942 											"%result             = OpExtInst %type_float %std450 Sinh %arg1\n",
1943 											B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1944 	mo[OID_COSH]		= Op("cosh",		FLOAT_ARITHMETIC,
1945 											"%result             = OpExtInst %type_float %std450 Cosh %arg1\n",
1946 											B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1947 	mo[OID_TANH]		= Op("tanh",		FLOAT_ARITHMETIC,
1948 											"%result             = OpExtInst %type_float %std450 Tanh %arg1\n",
1949 											B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1950 	mo[OID_ASINH]		= Op("asinh",		FLOAT_ARITHMETIC,
1951 											"%result             = OpExtInst %type_float %std450 Asinh %arg1\n",
1952 											B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1953 	mo[OID_ACOSH]		= Op("acosh",		FLOAT_ARITHMETIC,
1954 											"%result             = OpExtInst %type_float %std450 Acosh %arg1\n",
1955 											B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1956 	mo[OID_ATANH]		= Op("atanh",		FLOAT_ARITHMETIC,
1957 											"%result             = OpExtInst %type_float %std450 Atanh %arg1\n",
1958 											B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1959 	mo[OID_EXP]			= Op("exp",			FLOAT_ARITHMETIC,
1960 											"%result             = OpExtInst %type_float %std450 Exp %arg1\n",
1961 											B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1962 	mo[OID_LOG]			= Op("log",			FLOAT_ARITHMETIC,
1963 											"%result             = OpExtInst %type_float %std450 Log %arg1\n",
1964 											B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1965 	mo[OID_EXP2]		= Op("exp2",		FLOAT_ARITHMETIC,
1966 											"%result             = OpExtInst %type_float %std450 Exp2 %arg1\n",
1967 											B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1968 	mo[OID_LOG2]		= Op("log2",		FLOAT_ARITHMETIC,
1969 											"%result             = OpExtInst %type_float %std450 Log2 %arg1\n",
1970 											B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1971 	mo[OID_SQRT]		= Op("sqrt",		FLOAT_ARITHMETIC,
1972 											"%result             = OpExtInst %type_float %std450 Sqrt %arg1\n",
1973 											B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1974 	mo[OID_INV_SQRT]	= Op("inv_sqrt",	FLOAT_ARITHMETIC,
1975 											"%result             = OpExtInst %type_float %std450 InverseSqrt %arg1\n",
1976 											B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1977 	mo[OID_MODF]		= Op("modf",		FLOAT_ARITHMETIC,
1978 											"",
1979 											"",
1980 											"",
1981 											"%tmpVarPtr          = OpVariable %type_float_fptr Function\n",
1982 											"",
1983 											"%result             = OpExtInst %type_float %std450 Modf %arg1 %tmpVarPtr\n",
1984 											B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1985 	mo[OID_MODF_ST]		= Op("modf_st",		FLOAT_ARITHMETIC,
1986 											"OpMemberDecorate %struct_ff 0 Offset ${float_width}\n"
1987 											"OpMemberDecorate %struct_ff 1 Offset ${float_width}\n",
1988 											"%struct_ff          = OpTypeStruct %type_float %type_float\n"
1989 											"%struct_ff_fptr     = OpTypePointer Function %struct_ff\n",
1990 											"",
1991 											"%tmpStructPtr       = OpVariable %struct_ff_fptr Function\n",
1992 											"",
1993 											"%tmpStruct          = OpExtInst %struct_ff %std450 ModfStruct %arg1\n"
1994 											"                      OpStore %tmpStructPtr %tmpStruct\n"
1995 											"%tmpLoc             = OpAccessChain %type_float_fptr %tmpStructPtr %c_i32_0\n"
1996 											"%result             = OpLoad %type_float %tmpLoc\n",
1997 											B_STATEMENT_USAGE_TYPES_TYPE_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1998 	mo[OID_FREXP]		= Op("frexp",		FLOAT_ARITHMETIC,
1999 											"",
2000 											"",
2001 											"",
2002 											"%tmpVarPtr          = OpVariable %type_i32_fptr Function\n",
2003 											"",
2004 											"%result             = OpExtInst %type_float %std450 Frexp %arg1 %tmpVarPtr\n",
2005 											B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2006 	mo[OID_FREXP_ST]	= Op("frexp_st",	FLOAT_ARITHMETIC,
2007 											"OpMemberDecorate %struct_fi 0 Offset ${float_width}\n"
2008 											"OpMemberDecorate %struct_fi 1 Offset 32\n",
2009 											"%struct_fi          = OpTypeStruct %type_float %type_i32\n"
2010 											"%struct_fi_fptr     = OpTypePointer Function %struct_fi\n",
2011 											"",
2012 											"%tmpStructPtr       = OpVariable %struct_fi_fptr Function\n",
2013 											"",
2014 											"%tmpStruct          = OpExtInst %struct_fi %std450 FrexpStruct %arg1\n"
2015 											"                      OpStore %tmpStructPtr %tmpStruct\n"
2016 											"%tmpLoc             = OpAccessChain %type_float_fptr %tmpStructPtr %c_i32_0\n"
2017 											"%result             = OpLoad %type_float %tmpLoc\n",
2018 											B_STATEMENT_USAGE_TYPES_TYPE_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2019 	mo[OID_LENGTH]		= Op("length",		FLOAT_ARITHMETIC,
2020 											"%result             = OpExtInst %type_float %std450 Length %arg1\n",
2021 											B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2022 	mo[OID_NORMALIZE]	= Op("normalize",	FLOAT_ARITHMETIC,
2023 											"%vec1               = OpCompositeConstruct %type_float_vec2 %arg1 %c_float_2\n"
2024 											"%tmpVec             = OpExtInst %type_float_vec2 %std450 Normalize %vec1\n"
2025 											"%result             = OpCompositeExtract %type_float %tmpVec 0\n",
2026 											B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2027 	mo[OID_REFLECT]		= Op("reflect",		FLOAT_ARITHMETIC,
2028 											"%vec1               = OpCompositeConstruct %type_float_vec2 %arg1 %arg1\n"
2029 											"%vecN               = OpCompositeConstruct %type_float_vec2 %c_float_0 %c_float_n1\n"
2030 											"%tmpVec             = OpExtInst %type_float_vec2 %std450 Reflect %vec1 %vecN\n"
2031 											"%result             = OpCompositeExtract %type_float %tmpVec 0\n",
2032 											B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2033 	mo[OID_REFRACT]		= Op("refract",		FLOAT_ARITHMETIC,
2034 											"%vec1               = OpCompositeConstruct %type_float_vec2 %arg1 %arg1\n"
2035 											"%vecN               = OpCompositeConstruct %type_float_vec2 %c_float_0 %c_float_n1\n"
2036 											"%tmpVec             = OpExtInst %type_float_vec2 %std450 Refract %vec1 %vecN %c_float_0_5\n"
2037 											"%result             = OpCompositeExtract %type_float %tmpVec 0\n",
2038 											B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2039 	mo[OID_MAT_DET]		= Op("mat_det",		FLOAT_ARITHMETIC,
2040 											"%col                = OpCompositeConstruct %type_float_vec2 %arg1 %arg1\n"
2041 											"%mat                = OpCompositeConstruct %type_float_mat2x2 %col %col\n"
2042 											"%result             = OpExtInst %type_float %std450 Determinant %mat\n",
2043 											B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2044 	mo[OID_MAT_INV]		= Op("mat_inv",		FLOAT_ARITHMETIC,
2045 											"%col1               = OpCompositeConstruct %type_float_vec2 %arg1 %c_float_1\n"
2046 											"%col2               = OpCompositeConstruct %type_float_vec2 %c_float_1 %c_float_1\n"
2047 											"%mat                = OpCompositeConstruct %type_float_mat2x2 %col1 %col2\n"
2048 											"%invMat             = OpExtInst %type_float_mat2x2 %std450 MatrixInverse %mat\n"
2049 											"%extCol             = OpCompositeExtract %type_float_vec2 %invMat 1\n"
2050 											"%result             = OpCompositeExtract %type_float %extCol 1\n",
2051 											B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2052 
2053 	// PackHalf2x16 is a special case as it operates on fp32 vec2 and returns unsigned int,
2054 	// the verification is done in SPIR-V code (if result is correct 1.0 will be written to SSBO)
2055 	mo[OID_PH_DENORM]	= Op("ph_denorm",	FLOAT_STORAGE_ONLY,
2056 											"",
2057 											"",
2058 											"%c_fp32_denorm_fp16 = OpConstant %type_f32 6.01e-5\n"		// fp32 representation of fp16 denorm value
2059 											"%c_ref              = OpConstant %type_u32 66061296\n",
2060 											"",
2061 											"",
2062 											"%srcVec             = OpCompositeConstruct %type_f32_vec2 %c_fp32_denorm_fp16 %c_fp32_denorm_fp16\n"
2063 											"%packedInt          = OpExtInst %type_u32 %std450 PackHalf2x16 %srcVec\n"
2064 											"%boolVal            = OpIEqual %type_bool %c_ref %packedInt\n"
2065 											"%result             = OpSelect %type_f32 %boolVal %c_f32_1 %c_f32_0\n",
2066 											B_STATEMENT_USAGE_CONSTS_TYPE_FP32 | B_STATEMENT_USAGE_COMMANDS_CONST_FP32 | B_STATEMENT_USAGE_COMMANDS_TYPE_FP32);
2067 
2068 	// UnpackHalf2x16 is a special case that operates on uint32 and returns two 32-bit floats,
2069 	// this function is tested using constants
2070 	mo[OID_UPH_DENORM]	= Op("uph_denorm",	FLOAT_STORAGE_ONLY,
2071 											"",
2072 											"",
2073 											"%c_u32_2_16_pack    = OpConstant %type_u32 66061296\n", // == packHalf2x16(vec2(denorm))
2074 											"",
2075 											"",
2076 											"%tmpVec             = OpExtInst %type_f32_vec2 %std450 UnpackHalf2x16 %c_u32_2_16_pack\n"
2077 											"%result             = OpCompositeExtract %type_f32 %tmpVec 0\n",
2078 											B_STATEMENT_USAGE_COMMANDS_TYPE_FP32);
2079 
2080 	// PackDouble2x32 is a special case that operates on two uint32 and returns
2081 	// double, this function is tested using constants
2082 	mo[OID_PD_DENORM]	= Op("pd_denorm",	FLOAT_STORAGE_ONLY,
2083 											"",
2084 											"",
2085 											"%c_p1               = OpConstant %type_u32 0\n"
2086 											"%c_p2               = OpConstant %type_u32 262144\n",		// == UnpackDouble2x32(denorm)
2087 											"",
2088 											"",
2089 											"%srcVec             = OpCompositeConstruct %type_u32_vec2 %c_p1 %c_p2\n"
2090 											"%result             = OpExtInst %type_f64 %std450 PackDouble2x32 %srcVec\n",
2091 											B_STATEMENT_USAGE_COMMANDS_TYPE_FP64);
2092 
2093 	// UnpackDouble2x32 is a special case as it operates only on FP64 and returns two ints,
2094 	// the verification is done in SPIR-V code (if result is correct 1.0 will be written to SSBO)
2095 	const char* unpackDouble2x32Types	=	"%type_bool_vec2     = OpTypeVector %type_bool 2\n";
2096 	const char* unpackDouble2x32Source	=	"%refVec2            = OpCompositeConstruct %type_u32_vec2 %c_p1 %c_p2\n"
2097 											"%resVec2            = OpExtInst %type_u32_vec2 %std450 UnpackDouble2x32 %arg1\n"
2098 											"%boolVec2           = OpIEqual %type_bool_vec2 %refVec2 %resVec2\n"
2099 											"%boolVal            = OpAll %type_bool %boolVec2\n"
2100 											"%result             = OpSelect %type_f64 %boolVal %c_f64_1 %c_f64_0\n";
2101 	mo[OID_UPD_DENORM_FLUSH]	= Op("upd_denorm",	FLOAT_STORAGE_ONLY, "",
2102 											unpackDouble2x32Types,
2103 											"%c_p1               = OpConstant %type_u32 0\n"
2104 											"%c_p2               = OpConstant %type_u32 0\n",
2105 											"",
2106 											"",
2107 											unpackDouble2x32Source,
2108 											B_STATEMENT_USAGE_COMMANDS_CONST_FP64 | B_STATEMENT_USAGE_COMMANDS_TYPE_FP64);
2109 	mo[OID_UPD_DENORM_PRESERVE]	= Op("upd_denorm",	FLOAT_STORAGE_ONLY, "",
2110 											unpackDouble2x32Types,
2111 											"%c_p1               = OpConstant %type_u32 1008\n"
2112 											"%c_p2               = OpConstant %type_u32 0\n",
2113 											"",
2114 											"",
2115 											unpackDouble2x32Source,
2116 											B_STATEMENT_USAGE_COMMANDS_CONST_FP64 | B_STATEMENT_USAGE_COMMANDS_TYPE_FP64);
2117 
2118 	mo[OID_ORTE_ROUND]	= Op("orte_round",	FLOAT_STORAGE_ONLY, FP32,
2119 											"OpDecorate %result FPRoundingMode RTE\n",
2120 											"",
2121 											"",
2122 											"%result             = OpFConvert %type_f16 %arg1\n",
2123 											B_STATEMENT_USAGE_COMMANDS_TYPE_FP16);
2124 	mo[OID_ORTZ_ROUND]	= Op("ortz_round",	FLOAT_STORAGE_ONLY, FP32,
2125 											"OpDecorate %result FPRoundingMode RTZ\n",
2126 											"",
2127 											"",
2128 											"%result             = OpFConvert %type_f16 %arg1\n",
2129 											B_STATEMENT_USAGE_COMMANDS_TYPE_FP16);
2130 }
2131 
build(vector<OperationTestCase> & testCases,TypeTestResultsSP typeTestResults,bool argumentsFromInput)2132 void TestCasesBuilder::build(vector<OperationTestCase>& testCases, TypeTestResultsSP typeTestResults, bool argumentsFromInput)
2133 {
2134 	// this method constructs a list of test cases; this list is a bit different
2135 	// for every combination of float type, arguments preparation method and tested float control
2136 
2137 	testCases.reserve(750);
2138 
2139 	bool isFP16 = typeTestResults->floatType() == FP16;
2140 
2141 	// Denorm - FlushToZero - binary operations
2142 	for (size_t i = 0 ; i < typeTestResults->binaryOpFTZ.size() ; ++i)
2143 	{
2144 		const BinaryCase&	binaryCase	= typeTestResults->binaryOpFTZ[i];
2145 		OperationId			operation	= binaryCase.operationId;
2146 		testCases.push_back(OTC("denorm_op_var_flush_to_zero",		B_DENORM_FLUSH,					 operation, V_DENORM, V_ONE,		binaryCase.opVarResult));
2147 		testCases.push_back(OTC("denorm_op_denorm_flush_to_zero",	B_DENORM_FLUSH,					 operation, V_DENORM, V_DENORM,		binaryCase.opDenormResult));
2148 		testCases.push_back(OTC("denorm_op_inf_flush_to_zero",		B_DENORM_FLUSH | B_ZIN_PRESERVE, operation, V_DENORM, V_INF,		binaryCase.opInfResult));
2149 		testCases.push_back(OTC("denorm_op_nan_flush_to_zero",		B_DENORM_FLUSH | B_ZIN_PRESERVE, operation, V_DENORM, V_NAN,		binaryCase.opNanResult));
2150 
2151 		if (isFP16)
2152 		{
2153 			testCases.push_back(OTC("denorm_op_var_flush_to_zero_nostorage",		B_DENORM_FLUSH,					 operation, V_DENORM, V_ONE,		binaryCase.opVarResult, true));
2154 			testCases.push_back(OTC("denorm_op_denorm_flush_to_zero_nostorage",		B_DENORM_FLUSH,					 operation, V_DENORM, V_DENORM,		binaryCase.opDenormResult, true));
2155 			testCases.push_back(OTC("denorm_op_inf_flush_to_zero_nostorage",		B_DENORM_FLUSH | B_ZIN_PRESERVE, operation, V_DENORM, V_INF,		binaryCase.opInfResult, true));
2156 			testCases.push_back(OTC("denorm_op_nan_flush_to_zero_nostorage",		B_DENORM_FLUSH | B_ZIN_PRESERVE, operation, V_DENORM, V_NAN,		binaryCase.opNanResult, true));
2157 		}
2158 	}
2159 
2160 	// Denorm - FlushToZero - unary operations
2161 	for (size_t i = 0 ; i < typeTestResults->unaryOpFTZ.size() ; ++i)
2162 	{
2163 		const UnaryCase&	unaryCase = typeTestResults->unaryOpFTZ[i];
2164 		OperationId			operation = unaryCase.operationId;
2165 		testCases.push_back(OTC("op_denorm_flush_to_zero", B_DENORM_FLUSH, operation, V_DENORM, V_UNUSED, unaryCase.result));
2166 		if (isFP16)
2167 			testCases.push_back(OTC("op_denorm_flush_to_zero_nostorage", B_DENORM_FLUSH, operation, V_DENORM, V_UNUSED, unaryCase.result, true));
2168 
2169 	}
2170 
2171 	// Denom - Preserve - binary operations
2172 	for (size_t i = 0 ; i < typeTestResults->binaryOpDenormPreserve.size() ; ++i)
2173 	{
2174 		const BinaryCase&	binaryCase	= typeTestResults->binaryOpDenormPreserve[i];
2175 		OperationId			operation	= binaryCase.operationId;
2176 		testCases.push_back(OTC("denorm_op_var_preserve",			B_DENORM_PRESERVE,					operation, V_DENORM,	V_ONE,		binaryCase.opVarResult));
2177 		testCases.push_back(OTC("denorm_op_denorm_preserve",		B_DENORM_PRESERVE,					operation, V_DENORM,	V_DENORM,	binaryCase.opDenormResult));
2178 		testCases.push_back(OTC("denorm_op_inf_preserve",			B_DENORM_PRESERVE | B_ZIN_PRESERVE, operation, V_DENORM,	V_INF,		binaryCase.opInfResult));
2179 		testCases.push_back(OTC("denorm_op_nan_preserve",			B_DENORM_PRESERVE | B_ZIN_PRESERVE, operation, V_DENORM,	V_NAN,		binaryCase.opNanResult));
2180 
2181 		if (isFP16)
2182 		{
2183 			testCases.push_back(OTC("denorm_op_var_preserve_nostorage",			B_DENORM_PRESERVE,					operation, V_DENORM,	V_ONE,		binaryCase.opVarResult, true));
2184 			testCases.push_back(OTC("denorm_op_denorm_preserve_nostorage",		B_DENORM_PRESERVE,					operation, V_DENORM,	V_DENORM,	binaryCase.opDenormResult, true));
2185 			testCases.push_back(OTC("denorm_op_inf_preserve_nostorage",			B_DENORM_PRESERVE | B_ZIN_PRESERVE, operation, V_DENORM,	V_INF,		binaryCase.opInfResult, true));
2186 			testCases.push_back(OTC("denorm_op_nan_preserve_nostorage",			B_DENORM_PRESERVE | B_ZIN_PRESERVE, operation, V_DENORM,	V_NAN,		binaryCase.opNanResult, true));
2187 		}
2188 	}
2189 
2190 	// Denom - Preserve - unary operations
2191 	for (size_t i = 0 ; i < typeTestResults->unaryOpDenormPreserve.size() ; ++i)
2192 	{
2193 		const UnaryCase&	unaryCase	= typeTestResults->unaryOpDenormPreserve[i];
2194 		OperationId			operation	= unaryCase.operationId;
2195 		testCases.push_back(OTC("op_denorm_preserve", B_DENORM_PRESERVE, operation, V_DENORM, V_UNUSED, unaryCase.result));
2196 		if (isFP16)
2197 			testCases.push_back(OTC("op_denorm_preserve_nostorage", B_DENORM_PRESERVE, operation, V_DENORM, V_UNUSED, unaryCase.result, true));
2198 	}
2199 
2200 	struct ZINCase
2201 	{
2202 		OperationId	operationId;
2203 		bool		supportedByFP64;
2204 		ValueId		secondArgument;
2205 		ValueId		preserveZeroResult;
2206 		ValueId		preserveSZeroResult;
2207 		ValueId		preserveInfResult;
2208 		ValueId		preserveSInfResult;
2209 		ValueId		preserveNanResult;
2210 	};
2211 
2212 	const ZINCase binaryOpZINPreserve[] = {
2213 		// operation		fp64	second arg		preserve zero	preserve szero		preserve inf	preserve sinf		preserve nan
2214 		{ OID_PHI,			true,	V_INF,			V_ZERO,			V_MINUS_ZERO,		V_INF,			V_MINUS_INF,		V_NAN },
2215 		{ OID_SELECT,		true,	V_ONE,			V_ZERO,			V_MINUS_ZERO,		V_INF,			V_MINUS_INF,		V_NAN },
2216 		{ OID_ADD,			true,	V_ZERO,			V_ZERO,			V_ZERO,				V_INF,			V_MINUS_INF,		V_NAN },
2217 		{ OID_SUB,			true,	V_ZERO,			V_ZERO,			V_MINUS_ZERO,		V_INF,			V_MINUS_INF,		V_NAN },
2218 		{ OID_MUL,			true,	V_ONE,			V_ZERO,			V_MINUS_ZERO,		V_INF,			V_MINUS_INF,		V_NAN },
2219 	};
2220 
2221 	const ZINCase unaryOpZINPreserve[] = {
2222 		// operation				fp64	second arg		preserve zero	preserve szero		preserve inf	preserve sinf		preserve nan
2223 		{ OID_RETURN_VAL,			true,	V_UNUSED,		V_ZERO,			V_MINUS_ZERO,		V_INF,			V_MINUS_INF,		V_NAN },
2224 		{ OID_D_EXTRACT,			true,	V_UNUSED,		V_ZERO,			V_MINUS_ZERO,		V_INF,			V_MINUS_INF,		V_NAN },
2225 		{ OID_D_INSERT,				true,	V_UNUSED,		V_ZERO,			V_MINUS_ZERO,		V_INF,			V_MINUS_INF,		V_NAN },
2226 		{ OID_SHUFFLE,				true,	V_UNUSED,		V_ZERO,			V_MINUS_ZERO,		V_INF,			V_MINUS_INF,		V_NAN },
2227 		{ OID_COMPOSITE,			true,	V_UNUSED,		V_ZERO,			V_MINUS_ZERO,		V_INF,			V_MINUS_INF,		V_NAN },
2228 		{ OID_COMPOSITE_INS,		true,	V_UNUSED,		V_ZERO,			V_MINUS_ZERO,		V_INF,			V_MINUS_INF,		V_NAN },
2229 		{ OID_COPY,					true,	V_UNUSED,		V_ZERO,			V_MINUS_ZERO,		V_INF,			V_MINUS_INF,		V_NAN },
2230 		{ OID_TRANSPOSE,			true,	V_UNUSED,		V_ZERO,			V_MINUS_ZERO,		V_INF,			V_MINUS_INF,		V_NAN },
2231 		{ OID_NEGATE,				true,	V_UNUSED,		V_MINUS_ZERO,	V_ZERO,				V_MINUS_INF,	V_INF,				V_NAN },
2232 	};
2233 
2234 	bool isFP64 = typeTestResults->floatType() == FP64;
2235 
2236 	// Signed Zero Inf Nan - Preserve - binary operations
2237 	for (size_t i = 0 ; i < DE_LENGTH_OF_ARRAY(binaryOpZINPreserve) ; ++i)
2238 	{
2239 		const ZINCase& zc = binaryOpZINPreserve[i];
2240 		if (isFP64 && !zc.supportedByFP64)
2241 			continue;
2242 
2243 		testCases.push_back(OTC("zero_op_var_preserve",				B_ZIN_PRESERVE, zc.operationId, V_ZERO,			zc.secondArgument,	zc.preserveZeroResult));
2244 		testCases.push_back(OTC("signed_zero_op_var_preserve",		B_ZIN_PRESERVE, zc.operationId, V_MINUS_ZERO,	zc.secondArgument,	zc.preserveSZeroResult));
2245 		testCases.push_back(OTC("inf_op_var_preserve",				B_ZIN_PRESERVE, zc.operationId, V_INF,			zc.secondArgument,	zc.preserveInfResult));
2246 		testCases.push_back(OTC("signed_inf_op_var_preserve",		B_ZIN_PRESERVE, zc.operationId, V_MINUS_INF,	zc.secondArgument,	zc.preserveSInfResult));
2247 		testCases.push_back(OTC("nan_op_var_preserve",				B_ZIN_PRESERVE, zc.operationId, V_NAN,			zc.secondArgument,	zc.preserveNanResult));
2248 
2249 		if (isFP16)
2250 		{
2251 			testCases.push_back(OTC("zero_op_var_preserve_nostorage",				B_ZIN_PRESERVE, zc.operationId, V_ZERO,			zc.secondArgument,	zc.preserveZeroResult, true));
2252 			testCases.push_back(OTC("signed_zero_op_var_preserve_nostorage",		B_ZIN_PRESERVE, zc.operationId, V_MINUS_ZERO,	zc.secondArgument,	zc.preserveSZeroResult, true));
2253 			testCases.push_back(OTC("inf_op_var_preserve_nostorage",				B_ZIN_PRESERVE, zc.operationId, V_INF,			zc.secondArgument,	zc.preserveInfResult, true));
2254 			testCases.push_back(OTC("signed_inf_op_var_preserve_nostorage",			B_ZIN_PRESERVE, zc.operationId, V_MINUS_INF,	zc.secondArgument,	zc.preserveSInfResult, true));
2255 			testCases.push_back(OTC("nan_op_var_preserve_nostorage",				B_ZIN_PRESERVE, zc.operationId, V_NAN,			zc.secondArgument,	zc.preserveNanResult, true));
2256 		}
2257 	}
2258 
2259 	// Signed Zero Inf Nan - Preserve - unary operations
2260 	for (size_t i = 0 ; i < DE_LENGTH_OF_ARRAY(unaryOpZINPreserve) ; ++i)
2261 	{
2262 		const ZINCase& zc = unaryOpZINPreserve[i];
2263 		if (isFP64 && !zc.supportedByFP64)
2264 			continue;
2265 
2266 		testCases.push_back(OTC("op_zero_preserve",			B_ZIN_PRESERVE,zc.operationId, V_ZERO,			V_UNUSED,	zc.preserveZeroResult));
2267 		testCases.push_back(OTC("op_signed_zero_preserve",	B_ZIN_PRESERVE,zc.operationId, V_MINUS_ZERO,	V_UNUSED,	zc.preserveSZeroResult));
2268 		testCases.push_back(OTC("op_inf_preserve",			B_ZIN_PRESERVE,zc.operationId, V_INF,			V_UNUSED,	zc.preserveInfResult));
2269 		testCases.push_back(OTC("op_signed_inf_preserve",	B_ZIN_PRESERVE,zc.operationId, V_MINUS_INF,		V_UNUSED,	zc.preserveSInfResult));
2270 		testCases.push_back(OTC("op_nan_preserve",			B_ZIN_PRESERVE,zc.operationId, V_NAN,			V_UNUSED,	zc.preserveNanResult));
2271 
2272 		if (isFP16)
2273 		{
2274 			testCases.push_back(OTC("op_zero_preserve_nostorage",			B_ZIN_PRESERVE,zc.operationId, V_ZERO,			V_UNUSED,	zc.preserveZeroResult, true));
2275 			testCases.push_back(OTC("op_signed_zero_preserve_nostorage",	B_ZIN_PRESERVE,zc.operationId, V_MINUS_ZERO,	V_UNUSED,	zc.preserveSZeroResult, true));
2276 			testCases.push_back(OTC("op_inf_preserve_nostorage",			B_ZIN_PRESERVE,zc.operationId, V_INF,			V_UNUSED,	zc.preserveInfResult, true));
2277 			testCases.push_back(OTC("op_signed_inf_preserve_nostorage",		B_ZIN_PRESERVE,zc.operationId, V_MINUS_INF,		V_UNUSED,	zc.preserveSInfResult, true));
2278 			testCases.push_back(OTC("op_nan_preserve_nostorage",			B_ZIN_PRESERVE,zc.operationId, V_NAN,			V_UNUSED,	zc.preserveNanResult, true));
2279 		}
2280 	}
2281 
2282 	// comparison operations - tested differently because they return true/false
2283 	struct ComparisonCase
2284 	{
2285 		OperationId	operationId;
2286 		ValueId		denormPreserveResult;
2287 	};
2288 	const ComparisonCase comparisonCases[] =
2289 	{
2290 		// operation	denorm
2291 		{ OID_ORD_EQ,	V_ZERO },
2292 		{ OID_UORD_EQ,	V_ZERO },
2293 		{ OID_ORD_NEQ,	V_ONE  },
2294 		{ OID_UORD_NEQ,	V_ONE  },
2295 		{ OID_ORD_LS,	V_ONE  },
2296 		{ OID_UORD_LS,	V_ONE  },
2297 		{ OID_ORD_GT,	V_ZERO },
2298 		{ OID_UORD_GT,	V_ZERO },
2299 		{ OID_ORD_LE,	V_ONE  },
2300 		{ OID_UORD_LE,	V_ONE  },
2301 		{ OID_ORD_GE,	V_ZERO },
2302 		{ OID_UORD_GE,	V_ZERO }
2303 	};
2304 	for (int op = 0 ; op < DE_LENGTH_OF_ARRAY(comparisonCases) ; ++op)
2305 	{
2306 		const ComparisonCase& cc = comparisonCases[op];
2307 		testCases.push_back(OTC("denorm_op_var_preserve", B_DENORM_PRESERVE, cc.operationId, V_DENORM, V_ONE, cc.denormPreserveResult));
2308 		if (isFP16)
2309 			testCases.push_back(OTC("denorm_op_var_preserve_nostorage", B_DENORM_PRESERVE, cc.operationId, V_DENORM, V_ONE, cc.denormPreserveResult, true));
2310 	}
2311 
2312 	if (argumentsFromInput)
2313 	{
2314 		struct RoundingModeCase
2315 		{
2316 			OperationId	operationId;
2317 			ValueId		arg1;
2318 			ValueId		arg2;
2319 			ValueId		expectedRTEResult;
2320 			ValueId		expectedRTZResult;
2321 		};
2322 
2323 		const RoundingModeCase roundingCases[] =
2324 		{
2325 			{ OID_ADD,			V_ADD_ARG_A,	V_ADD_ARG_B,	V_ADD_RTE_RESULT,	V_ADD_RTZ_RESULT },
2326 			{ OID_SUB,			V_SUB_ARG_A,	V_SUB_ARG_B,	V_SUB_RTE_RESULT,	V_SUB_RTZ_RESULT },
2327 			{ OID_MUL,			V_MUL_ARG_A,	V_MUL_ARG_B,	V_MUL_RTE_RESULT,	V_MUL_RTZ_RESULT },
2328 			{ OID_DOT,			V_DOT_ARG_A,	V_DOT_ARG_B,	V_DOT_RTE_RESULT,	V_DOT_RTZ_RESULT },
2329 
2330 			// in vect/mat multiplication by scalar operations only first element of result is checked
2331 			// so argument and result values prepared for multiplication can be reused for those cases
2332 			{ OID_VEC_MUL_S,	V_MUL_ARG_A,	V_MUL_ARG_B,	V_MUL_RTE_RESULT,	V_MUL_RTZ_RESULT },
2333 			{ OID_MAT_MUL_S,	V_MUL_ARG_A,	V_MUL_ARG_B,	V_MUL_RTE_RESULT,	V_MUL_RTZ_RESULT },
2334 			{ OID_OUT_PROD,		V_MUL_ARG_A,	V_MUL_ARG_B,	V_MUL_RTE_RESULT,	V_MUL_RTZ_RESULT },
2335 
2336 			// in SPIR-V code we return first element of operation result so for following
2337 			// cases argument and result values prepared for dot product can be reused
2338 			{ OID_VEC_MUL_M,	V_DOT_ARG_A,	V_DOT_ARG_B,	V_DOT_RTE_RESULT,	V_DOT_RTZ_RESULT },
2339 			{ OID_MAT_MUL_V,	V_DOT_ARG_A,	V_DOT_ARG_B,	V_DOT_RTE_RESULT,	V_DOT_RTZ_RESULT },
2340 			{ OID_MAT_MUL_M,	V_DOT_ARG_A,	V_DOT_ARG_B,	V_DOT_RTE_RESULT,	V_DOT_RTZ_RESULT },
2341 
2342 			// conversion operations are added separately - depending on float type width
2343 		};
2344 
2345 		for (int c = 0 ; c < DE_LENGTH_OF_ARRAY(roundingCases) ; ++c)
2346 		{
2347 			const RoundingModeCase& rmc = roundingCases[c];
2348 			testCases.push_back(OTC("rounding_rte_op", B_RTE_ROUNDING, rmc.operationId, rmc.arg1, rmc.arg2, rmc.expectedRTEResult));
2349 			testCases.push_back(OTC("rounding_rtz_op", B_RTZ_ROUNDING, rmc.operationId, rmc.arg1, rmc.arg2, rmc.expectedRTZResult));
2350 			if (isFP16)
2351 			{
2352 				testCases.push_back(OTC("rounding_rte_op_nostorage", B_RTE_ROUNDING, rmc.operationId, rmc.arg1, rmc.arg2, rmc.expectedRTEResult, true));
2353 				testCases.push_back(OTC("rounding_rtz_op_nostorage", B_RTZ_ROUNDING, rmc.operationId, rmc.arg1, rmc.arg2, rmc.expectedRTZResult, true));
2354 			}
2355 		}
2356 	}
2357 
2358 	// special cases
2359 	if (typeTestResults->floatType() == FP16)
2360 	{
2361 		if (argumentsFromInput)
2362 		{
2363 			testCases.push_back(OTC("rounding_rte_conv_from_fp32", B_RTE_ROUNDING, OID_CONV_FROM_FP32, V_CONV_FROM_FP32_ARG, V_UNUSED, V_CONV_TO_FP16_RTE_RESULT));
2364 			testCases.push_back(OTC("rounding_rtz_conv_from_fp32", B_RTZ_ROUNDING, OID_CONV_FROM_FP32, V_CONV_FROM_FP32_ARG, V_UNUSED, V_CONV_TO_FP16_RTZ_RESULT));
2365 			testCases.push_back(OTC("rounding_rte_conv_from_fp64", B_RTE_ROUNDING, OID_CONV_FROM_FP64, V_CONV_FROM_FP64_ARG, V_UNUSED, V_CONV_TO_FP16_RTE_RESULT));
2366 			testCases.push_back(OTC("rounding_rtz_conv_from_fp64", B_RTZ_ROUNDING, OID_CONV_FROM_FP64, V_CONV_FROM_FP64_ARG, V_UNUSED, V_CONV_TO_FP16_RTZ_RESULT));
2367 
2368 			testCases.push_back(OTC("rounding_rte_sconst_conv_from_fp32", B_RTE_ROUNDING, OID_SCONST_CONV_FROM_FP32_TO_FP16, V_UNUSED, V_UNUSED, V_CONV_TO_FP16_RTE_RESULT));
2369 			testCases.push_back(OTC("rounding_rtz_sconst_conv_from_fp32", B_RTZ_ROUNDING, OID_SCONST_CONV_FROM_FP32_TO_FP16, V_UNUSED, V_UNUSED, V_CONV_TO_FP16_RTZ_RESULT));
2370 			testCases.push_back(OTC("rounding_rte_sconst_conv_from_fp64", B_RTE_ROUNDING, OID_SCONST_CONV_FROM_FP64_TO_FP16, V_UNUSED, V_UNUSED, V_CONV_TO_FP16_RTE_RESULT));
2371 			testCases.push_back(OTC("rounding_rtz_sconst_conv_from_fp64", B_RTZ_ROUNDING, OID_SCONST_CONV_FROM_FP64_TO_FP16, V_UNUSED, V_UNUSED, V_CONV_TO_FP16_RTZ_RESULT));
2372 
2373 			testCases.push_back(OTC("rounding_rte_conv_from_fp32_nostorage", B_RTE_ROUNDING, OID_CONV_FROM_FP32, V_CONV_FROM_FP32_ARG, V_UNUSED, V_CONV_TO_FP16_RTE_RESULT, true));
2374 			testCases.push_back(OTC("rounding_rtz_conv_from_fp32_nostorage", B_RTZ_ROUNDING, OID_CONV_FROM_FP32, V_CONV_FROM_FP32_ARG, V_UNUSED, V_CONV_TO_FP16_RTZ_RESULT, true));
2375 			testCases.push_back(OTC("rounding_rte_conv_from_fp64_nostorage", B_RTE_ROUNDING, OID_CONV_FROM_FP64, V_CONV_FROM_FP64_ARG, V_UNUSED, V_CONV_TO_FP16_RTE_RESULT, true));
2376 			testCases.push_back(OTC("rounding_rtz_conv_from_fp64_nostorage", B_RTZ_ROUNDING, OID_CONV_FROM_FP64, V_CONV_FROM_FP64_ARG, V_UNUSED, V_CONV_TO_FP16_RTZ_RESULT, true));
2377 
2378 			testCases.push_back(OTC("rounding_rte_sconst_conv_from_fp32_nostorage", B_RTE_ROUNDING, OID_SCONST_CONV_FROM_FP32_TO_FP16, V_UNUSED, V_UNUSED, V_CONV_TO_FP16_RTE_RESULT, true));
2379 			testCases.push_back(OTC("rounding_rtz_sconst_conv_from_fp32_nostorage", B_RTZ_ROUNDING, OID_SCONST_CONV_FROM_FP32_TO_FP16, V_UNUSED, V_UNUSED, V_CONV_TO_FP16_RTZ_RESULT, true));
2380 			testCases.push_back(OTC("rounding_rte_sconst_conv_from_fp64_nostorage", B_RTE_ROUNDING, OID_SCONST_CONV_FROM_FP64_TO_FP16, V_UNUSED, V_UNUSED, V_CONV_TO_FP16_RTE_RESULT, true));
2381 			testCases.push_back(OTC("rounding_rtz_sconst_conv_from_fp64_nostorage", B_RTZ_ROUNDING, OID_SCONST_CONV_FROM_FP64_TO_FP16, V_UNUSED, V_UNUSED, V_CONV_TO_FP16_RTZ_RESULT, true));
2382 
2383 			// verify that VkShaderFloatingPointRoundingModeKHR can be overridden for a given instruction by the FPRoundingMode decoration.
2384 			// FPRoundingMode decoration requires VK_KHR_16bit_storage.
2385 			testCases.push_back(OTC("rounding_rte_override", B_RTE_ROUNDING, OID_ORTZ_ROUND, V_CONV_FROM_FP32_ARG, V_UNUSED, V_CONV_TO_FP16_RTZ_RESULT));
2386 			testCases.push_back(OTC("rounding_rtz_override", B_RTZ_ROUNDING, OID_ORTE_ROUND, V_CONV_FROM_FP32_ARG, V_UNUSED, V_CONV_TO_FP16_RTE_RESULT));
2387 		}
2388 
2389 		createUnaryTestCases(testCases, OID_CONV_FROM_FP32, V_CONV_DENORM_SMALLER, V_ZERO);
2390 		createUnaryTestCases(testCases, OID_CONV_FROM_FP64, V_CONV_DENORM_BIGGER, V_ZERO);
2391 		createUnaryTestCases(testCases, OID_CONV_FROM_FP32, V_CONV_DENORM_SMALLER, V_ZERO, true);
2392 		createUnaryTestCases(testCases, OID_CONV_FROM_FP64, V_CONV_DENORM_BIGGER, V_ZERO, true);
2393 
2394 	}
2395 	else if (typeTestResults->floatType() == FP32)
2396 	{
2397 		if (argumentsFromInput)
2398 		{
2399 			// convert from fp64 to fp32
2400 			testCases.push_back(OTC("rounding_rte_conv_from_fp64", B_RTE_ROUNDING, OID_CONV_FROM_FP64, V_CONV_FROM_FP64_ARG, V_UNUSED, V_CONV_TO_FP32_RTE_RESULT));
2401 			testCases.push_back(OTC("rounding_rtz_conv_from_fp64", B_RTZ_ROUNDING, OID_CONV_FROM_FP64, V_CONV_FROM_FP64_ARG, V_UNUSED, V_CONV_TO_FP32_RTZ_RESULT));
2402 
2403 			testCases.push_back(OTC("rounding_rte_sconst_conv_from_fp64", B_RTE_ROUNDING, OID_SCONST_CONV_FROM_FP64_TO_FP32, V_UNUSED, V_UNUSED, V_CONV_TO_FP32_RTE_RESULT));
2404 			testCases.push_back(OTC("rounding_rtz_sconst_conv_from_fp64", B_RTZ_ROUNDING, OID_SCONST_CONV_FROM_FP64_TO_FP32, V_UNUSED, V_UNUSED, V_CONV_TO_FP32_RTZ_RESULT));
2405 		}
2406 		else
2407 		{
2408 			// PackHalf2x16 - verification done in SPIR-V
2409 			testCases.push_back(OTC("pack_half_denorm_preserve",		B_DENORM_PRESERVE,	OID_PH_DENORM,	V_UNUSED, V_UNUSED, V_ONE));
2410 
2411 			// UnpackHalf2x16 - custom arguments defined as constants
2412 			testCases.push_back(OTC("upack_half_denorm_flush_to_zero",	B_DENORM_FLUSH,		OID_UPH_DENORM,	V_UNUSED, V_UNUSED, V_ZERO));
2413 			testCases.push_back(OTC("upack_half_denorm_preserve",		B_DENORM_PRESERVE,	OID_UPH_DENORM,	V_UNUSED, V_UNUSED, V_CONV_DENORM_SMALLER));
2414 		}
2415 
2416 		createUnaryTestCases(testCases, OID_CONV_FROM_FP16, V_CONV_DENORM_SMALLER, V_ZERO_OR_FP16_DENORM_TO_FP32);
2417 		createUnaryTestCases(testCases, OID_CONV_FROM_FP16, V_CONV_DENORM_SMALLER, V_ZERO_OR_FP16_DENORM_TO_FP32, true);
2418 		createUnaryTestCases(testCases, OID_CONV_FROM_FP64, V_CONV_DENORM_BIGGER, V_ZERO);
2419 	}
2420 	else // FP64
2421 	{
2422 		if (!argumentsFromInput)
2423 		{
2424 			// PackDouble2x32 - custom arguments defined as constants
2425 			testCases.push_back(OTC("pack_double_denorm_preserve",			B_DENORM_PRESERVE,	OID_PD_DENORM,			V_UNUSED, V_UNUSED, V_DENORM));
2426 
2427 			// UnpackDouble2x32 - verification done in SPIR-V
2428 			testCases.push_back(OTC("upack_double_denorm_flush_to_zero",	B_DENORM_FLUSH,		OID_UPD_DENORM_FLUSH,		V_DENORM, V_UNUSED, V_ONE));
2429 			testCases.push_back(OTC("upack_double_denorm_preserve",			B_DENORM_PRESERVE,	OID_UPD_DENORM_PRESERVE,	V_DENORM, V_UNUSED, V_ONE));
2430 		}
2431 
2432 		createUnaryTestCases(testCases, OID_CONV_FROM_FP16, V_CONV_DENORM_SMALLER, V_ZERO_OR_FP16_DENORM_TO_FP64);
2433 		createUnaryTestCases(testCases, OID_CONV_FROM_FP16, V_CONV_DENORM_SMALLER, V_ZERO_OR_FP16_DENORM_TO_FP64, true);
2434 		createUnaryTestCases(testCases, OID_CONV_FROM_FP32, V_CONV_DENORM_BIGGER, V_ZERO_OR_FP32_DENORM_TO_FP64);
2435 	}
2436 }
2437 
getOperation(OperationId id) const2438 const Operation& TestCasesBuilder::getOperation(OperationId id) const
2439 {
2440 	return m_operations.at(id);
2441 }
2442 
createUnaryTestCases(vector<OperationTestCase> & testCases,OperationId operationId,ValueId denormPreserveResult,ValueId denormFTZResult,bool fp16WithoutStorage) const2443 void TestCasesBuilder::createUnaryTestCases(vector<OperationTestCase>& testCases, OperationId operationId, ValueId denormPreserveResult, ValueId denormFTZResult, bool fp16WithoutStorage) const
2444 {
2445 	if (fp16WithoutStorage)
2446 	{
2447 		// Denom - Preserve
2448 		testCases.push_back(OTC("op_denorm_preserve_nostorage",		B_DENORM_PRESERVE,	operationId, V_DENORM,	V_UNUSED, denormPreserveResult, true));
2449 
2450 		// Denorm - FlushToZero
2451 		testCases.push_back(OTC("op_denorm_flush_to_zero_nostorage",	B_DENORM_FLUSH,		operationId, V_DENORM,	V_UNUSED, denormFTZResult, true));
2452 
2453 		// Signed Zero Inf Nan - Preserve
2454 		testCases.push_back(OTC("op_zero_preserve_nostorage",			B_ZIN_PRESERVE,		operationId, V_ZERO,		V_UNUSED, V_ZERO, true));
2455 		testCases.push_back(OTC("op_signed_zero_preserve_nostorage",	B_ZIN_PRESERVE,		operationId, V_MINUS_ZERO,	V_UNUSED, V_MINUS_ZERO, true));
2456 		testCases.push_back(OTC("op_inf_preserve_nostorage",			B_ZIN_PRESERVE,		operationId, V_INF,			V_UNUSED, V_INF, true));
2457 		testCases.push_back(OTC("op_nan_preserve_nostorage",			B_ZIN_PRESERVE,		operationId, V_NAN,			V_UNUSED, V_NAN, true));
2458 	}
2459 	else
2460 	{
2461 		// Denom - Preserve
2462 		testCases.push_back(OTC("op_denorm_preserve",		B_DENORM_PRESERVE,	operationId, V_DENORM,	V_UNUSED, denormPreserveResult));
2463 
2464 		// Denorm - FlushToZero
2465 		testCases.push_back(OTC("op_denorm_flush_to_zero",	B_DENORM_FLUSH,		operationId, V_DENORM,	V_UNUSED, denormFTZResult));
2466 
2467 		// Signed Zero Inf Nan - Preserve
2468 		testCases.push_back(OTC("op_zero_preserve",			B_ZIN_PRESERVE,		operationId, V_ZERO,		V_UNUSED, V_ZERO));
2469 		testCases.push_back(OTC("op_signed_zero_preserve",	B_ZIN_PRESERVE,		operationId, V_MINUS_ZERO,	V_UNUSED, V_MINUS_ZERO));
2470 		testCases.push_back(OTC("op_inf_preserve",			B_ZIN_PRESERVE,		operationId, V_INF,			V_UNUSED, V_INF));
2471 		testCases.push_back(OTC("op_nan_preserve",			B_ZIN_PRESERVE,		operationId, V_NAN,			V_UNUSED, V_NAN));
2472 	}
2473 }
2474 
2475 template <typename TYPE, typename FLOAT_TYPE>
isZeroOrOtherValue(const TYPE & returnedFloat,ValueId secondAcceptableResult,TestLog & log)2476 bool isZeroOrOtherValue(const TYPE& returnedFloat, ValueId secondAcceptableResult, TestLog& log)
2477 {
2478 	if (returnedFloat.isZero() && !returnedFloat.signBit())
2479 		return true;
2480 
2481 	TypeValues<FLOAT_TYPE> typeValues;
2482 	typedef typename TYPE::StorageType SType;
2483 	typename RawConvert<FLOAT_TYPE, SType>::Value value;
2484 	value.fp = typeValues.getValue(secondAcceptableResult);
2485 
2486 	if (returnedFloat.bits() == value.ui)
2487 		return true;
2488 
2489 	log << TestLog::Message << "Expected 0 or " << toHex(value.ui)
2490 		<< " (" << value.fp << ")" << TestLog::EndMessage;
2491 	return false;
2492 }
2493 
2494 template <typename TYPE>
isAcosResultCorrect(const TYPE & returnedFloat,TestLog & log)2495 bool isAcosResultCorrect(const TYPE& returnedFloat, TestLog& log)
2496 {
2497 	// pi/2 is result of acos(0) which in the specs is defined as equivalent to
2498 	// atan2(sqrt(1.0 - x^2), x), where atan2 has 4096 ULP, sqrt is equivalent to
2499 	// 1.0 /inversesqrt(), inversesqrt() is 2 ULP and rcp is another 2.5 ULP
2500 
2501 	double precision = 0;
2502 	const double piDiv2 = 3.14159265358979323846 / 2;
2503 	if (returnedFloat.MANTISSA_BITS == 23)
2504 	{
2505 		FloatFormat fp32Format(-126, 127, 23, true, tcu::MAYBE, tcu::YES, tcu::MAYBE);
2506 		precision = fp32Format.ulp(piDiv2, 4096.0);
2507 	}
2508 	else
2509 	{
2510 		FloatFormat fp16Format(-14, 15, 10, true, tcu::MAYBE);
2511 		precision = fp16Format.ulp(piDiv2, 5.0);
2512 	}
2513 
2514 	if (deAbs(returnedFloat.asDouble() - piDiv2) < precision)
2515 		return true;
2516 
2517 	log << TestLog::Message << "Expected result to be in range"
2518 		<< " (" << piDiv2 - precision << ", " << piDiv2 + precision << "), got "
2519 		<< returnedFloat.asDouble() << TestLog::EndMessage;
2520 	return false;
2521 }
2522 
2523 template <typename TYPE>
isCosResultCorrect(const TYPE & returnedFloat,TestLog & log)2524 bool isCosResultCorrect(const TYPE& returnedFloat, TestLog& log)
2525 {
2526 	// for cos(x) with x between -pi and pi, the precision error is 2^-11 for fp32 and 2^-7 for fp16.
2527 	double precision = returnedFloat.MANTISSA_BITS == 23 ? dePow(2, -11) : dePow(2, -7);
2528 	const double expected = 1.0;
2529 
2530 	if (deAbs(returnedFloat.asDouble() - expected) < precision)
2531 		return true;
2532 
2533 	log << TestLog::Message << "Expected result to be in range"
2534 		<< " (" << expected - precision << ", " << expected + precision << "), got "
2535 		<< returnedFloat.asDouble() << TestLog::EndMessage;
2536 	return false;
2537 }
2538 
2539 template <typename FLOAT_TYPE>
getFloatTypeAsDouble(FLOAT_TYPE param)2540 double getFloatTypeAsDouble(FLOAT_TYPE param)
2541 {
2542 	return param;
2543 }
getFloatTypeAsDouble(deFloat16 param)2544 template<> double getFloatTypeAsDouble(deFloat16 param)
2545 {
2546 	return deFloat16To64(param);
2547 }
2548 
2549 
getPrecisionAt(double value,float ulp,int mantissaBits)2550 double getPrecisionAt(double value, float ulp, int mantissaBits)
2551 {
2552 	if (mantissaBits == 23)
2553 	{
2554 		FloatFormat fp32Format(-126, 127, 23, true, tcu::MAYBE, tcu::YES, tcu::MAYBE);
2555 		return fp32Format.ulp(value, ulp);
2556 	}
2557 	else if (mantissaBits == 52)
2558 	{
2559 		FloatFormat fp32Format(-1022, 1023, 52, true, tcu::MAYBE, tcu::YES, tcu::MAYBE);
2560 		return fp32Format.ulp(value, ulp);
2561 	}
2562 	else
2563 	{
2564 		DE_ASSERT(mantissaBits == 10);
2565 		FloatFormat fp16Format(-14, 15, 10, true, tcu::MAYBE);
2566 		return fp16Format.ulp(value, ulp);
2567 	}
2568 }
2569 
2570 template <typename TYPE, typename FLOAT_TYPE, typename REF_FUNCTION>
isLogResultCorrect(const TYPE & returnedFloat,FLOAT_TYPE param,REF_FUNCTION refFunction,TestLog & log)2571 bool isLogResultCorrect(const TYPE& returnedFloat, FLOAT_TYPE param, REF_FUNCTION refFunction, TestLog& log)
2572 {
2573 	if (returnedFloat.isInf() && returnedFloat.signBit())
2574 		return true;
2575 
2576 	const double expected	= refFunction(getFloatTypeAsDouble(param));
2577 	const double precision	= getPrecisionAt(expected, 3.0, returnedFloat.MANTISSA_BITS);
2578 
2579 	if (deAbs(returnedFloat.asDouble() - expected) < precision)
2580 		return true;
2581 
2582 	log << TestLog::Message << "Expected result to be -INF or in range"
2583 		<< " (" << expected - precision << ", " << expected + precision << "), got "
2584 		<< returnedFloat.asDouble() << TestLog::EndMessage;
2585 	return false;
2586 }
2587 
2588 template <typename TYPE, typename FLOAT_TYPE>
isInverseSqrtResultCorrect(const TYPE & returnedFloat,FLOAT_TYPE param,TestLog & log)2589 bool isInverseSqrtResultCorrect(const TYPE& returnedFloat, FLOAT_TYPE param, TestLog& log)
2590 {
2591 	if (returnedFloat.isInf() && !returnedFloat.signBit())
2592 		return true;
2593 
2594 	const double expected	= 1.0/ deSqrt(getFloatTypeAsDouble(param));
2595 	const double precision	= getPrecisionAt(expected, 2.0, returnedFloat.MANTISSA_BITS);
2596 
2597 	if (deAbs(returnedFloat.asDouble() - expected) < precision)
2598 		return true;
2599 
2600 	log << TestLog::Message << "Expected result to be INF or in range"
2601 		<< " (" << expected - precision << ", " << expected + precision << "), got "
2602 		<< returnedFloat.asDouble() << TestLog::EndMessage;
2603 	return false;
2604 }
2605 
2606 template <typename TYPE, typename FLOAT_TYPE>
isSqrtResultCorrect(const TYPE & returnedFloat,FLOAT_TYPE param,TestLog & log)2607 bool isSqrtResultCorrect(const TYPE& returnedFloat, FLOAT_TYPE param, TestLog& log)
2608 {
2609 	if (returnedFloat.isZero() && !returnedFloat.signBit())
2610 		return true;
2611 
2612 
2613 	const double expected				= deSqrt(getFloatTypeAsDouble(param));
2614 	const double expectedInverseSqrt	= 1.0 / expected;
2615 	const double inverseSqrtPrecision	= getPrecisionAt(expectedInverseSqrt, 2.0, returnedFloat.MANTISSA_BITS);
2616 
2617 	double expectedMin = deMin(1.0 / (expectedInverseSqrt - inverseSqrtPrecision), 1.0 / (expectedInverseSqrt + inverseSqrtPrecision));
2618 	double expectedMax = deMax(1.0 / (expectedInverseSqrt - inverseSqrtPrecision), 1.0 / (expectedInverseSqrt + inverseSqrtPrecision));
2619 
2620 	expectedMin -= getPrecisionAt(expectedMin, 2.5, returnedFloat.MANTISSA_BITS);
2621 	expectedMax += getPrecisionAt(expectedMax, 2.5, returnedFloat.MANTISSA_BITS);
2622 
2623 	if (returnedFloat.asDouble() >= expectedMin  && returnedFloat.asDouble() <= expectedMax)
2624 		return true;
2625 
2626 	log << TestLog::Message << "Expected result to be +0 or in range"
2627 		<< " (" << expectedMin << ", " << expectedMax << "), got "
2628 		<< returnedFloat.asDouble() << TestLog::EndMessage;
2629 	return false;
2630 }
2631 
2632 // Function used to compare test result with expected output.
2633 // TYPE can be Float16, Float32 or Float64.
2634 // FLOAT_TYPE can be deFloat16, float, double.
2635 template <typename TYPE, typename FLOAT_TYPE>
compareBytes(vector<deUint8> & expectedBytes,AllocationSp outputAlloc,TestLog & log)2636 bool compareBytes(vector<deUint8>& expectedBytes, AllocationSp outputAlloc, TestLog& log)
2637 {
2638 	const TYPE* returned	= static_cast<const TYPE*>(outputAlloc->getHostPtr());
2639 	const TYPE* fValueId	= reinterpret_cast<const TYPE*>(&expectedBytes.front());
2640 
2641 	// all test return single value
2642 	// Fp16 nostorage tests get their values from a deUint32 value, but we create the
2643 	// buffer with the same size for both cases: 4 bytes.
2644 	if (sizeof(TYPE) == 2u)
2645 		DE_ASSERT((expectedBytes.size() / sizeof(TYPE)) == 2);
2646 	else
2647 		DE_ASSERT((expectedBytes.size() / sizeof(TYPE)) == 1);
2648 
2649 	// during test setup we do not store expected value but id that can be used to
2650 	// retrieve actual value - this is done to handle special cases like multiple
2651 	// allowed results or epsilon checks for some cases
2652 	// note that this is workaround - this should be done by changing
2653 	// ComputerShaderCase and GraphicsShaderCase so that additional arguments can
2654 	// be passed to this verification callback
2655 	typedef typename TYPE::StorageType SType;
2656 	SType		expectedInt		= fValueId[0].bits();
2657 	ValueId		expectedValueId	= static_cast<ValueId>(expectedInt);
2658 
2659 	// something went wrong, expected value cant be V_UNUSED,
2660 	// if this is the case then test shouldn't be created at all
2661 	DE_ASSERT(expectedValueId != V_UNUSED);
2662 
2663 	TYPE returnedFloat = returned[0];
2664 
2665 	log << TestLog::Message << "Calculated result: " << toHex(returnedFloat.bits())
2666 		<< " (" << returnedFloat.asFloat() << ")" << TestLog::EndMessage;
2667 
2668 	if (expectedValueId == V_NAN)
2669 	{
2670 		if (returnedFloat.isNaN())
2671 			return true;
2672 
2673 		log << TestLog::Message << "Expected NaN" << TestLog::EndMessage;
2674 		return false;
2675 	}
2676 
2677 	if (expectedValueId == V_DENORM)
2678 	{
2679 		if (returnedFloat.isDenorm())
2680 			return true;
2681 
2682 		log << TestLog::Message << "Expected Denorm" << TestLog::EndMessage;
2683 		return false;
2684 	}
2685 
2686 	// handle multiple acceptable results cases
2687 	if (expectedValueId == V_ZERO_OR_MINUS_ZERO)
2688 	{
2689 		if (returnedFloat.isZero())
2690 			return true;
2691 
2692 		log << TestLog::Message << "Expected 0 or -0" << TestLog::EndMessage;
2693 		return false;
2694 	}
2695 	if (expectedValueId == V_ZERO_OR_ONE)
2696 		return isZeroOrOtherValue<TYPE, FLOAT_TYPE>(returnedFloat, V_ONE, log);
2697 	if ((expectedValueId == V_ZERO_OR_FP16_DENORM_TO_FP32) || (expectedValueId == V_ZERO_OR_FP16_DENORM_TO_FP64))
2698 		return isZeroOrOtherValue<TYPE, FLOAT_TYPE>(returnedFloat, V_CONV_DENORM_SMALLER, log);
2699 	if (expectedValueId == V_ZERO_OR_FP32_DENORM_TO_FP64)
2700 		return isZeroOrOtherValue<TYPE, FLOAT_TYPE>(returnedFloat, V_CONV_DENORM_BIGGER, log);
2701 	if (expectedValueId == V_ZERO_OR_DENORM_TIMES_TWO)
2702 	{
2703 		// this expected value is only needed for fp16
2704 		DE_ASSERT(returnedFloat.EXPONENT_BIAS == 15);
2705 		return isZeroOrOtherValue<TYPE, FLOAT_TYPE>(returnedFloat, V_DENORM_TIMES_TWO, log);
2706 	}
2707 	if (expectedValueId == V_MINUS_ONE_OR_CLOSE)
2708 	{
2709 		// this expected value is only needed for fp16
2710 		DE_ASSERT(returnedFloat.EXPONENT_BIAS == 15);
2711 		typename TYPE::StorageType returnedValue = returnedFloat.bits();
2712 		return (returnedValue == 0xbc00) || (returnedValue == 0xbbff);
2713 	}
2714 
2715 	// handle trigonometric operations precision errors
2716 	if (expectedValueId == V_TRIG_ONE)
2717 		return isCosResultCorrect<TYPE>(returnedFloat, log);
2718 
2719 	// handle acos(0) case
2720 	if (expectedValueId == V_PI_DIV_2)
2721 		return isAcosResultCorrect<TYPE>(returnedFloat, log);
2722 
2723 	TypeValues<FLOAT_TYPE> typeValues;
2724 
2725 	if (expectedValueId == V_MINUS_INF_OR_LOG_DENORM)
2726 		return isLogResultCorrect<TYPE>(returnedFloat, typeValues.getValue(V_DENORM), deLog, log);
2727 
2728 	if (expectedValueId == V_MINUS_INF_OR_LOG2_DENORM)
2729 		return isLogResultCorrect<TYPE>(returnedFloat, typeValues.getValue(V_DENORM), deLog2, log);
2730 
2731 	if (expectedValueId == V_ZERO_OR_SQRT_DENORM)
2732 		return isSqrtResultCorrect<TYPE>(returnedFloat, typeValues.getValue(V_DENORM), log);
2733 
2734 	if (expectedValueId == V_INF_OR_INV_SQRT_DENORM)
2735 		return isInverseSqrtResultCorrect<TYPE>(returnedFloat, typeValues.getValue(V_DENORM), log);
2736 
2737 
2738 	typename RawConvert<FLOAT_TYPE, SType>::Value value;
2739 	value.fp = typeValues.getValue(expectedValueId);
2740 
2741 	if (returnedFloat.bits() == value.ui)
2742 		return true;
2743 
2744 	log << TestLog::Message << "Expected " << toHex(value.ui)
2745 		<< " (" << value.fp << ")" << TestLog::EndMessage;
2746 	return false;
2747 }
2748 
2749 template <typename TYPE, typename FLOAT_TYPE>
checkFloats(const vector<Resource> &,const vector<AllocationSp> & outputAllocs,const vector<Resource> & expectedOutputs,TestLog & log)2750 bool checkFloats (const vector<Resource>&		,
2751 				  const vector<AllocationSp>&	outputAllocs,
2752 				  const vector<Resource>&		expectedOutputs,
2753 				  TestLog&						log)
2754 {
2755 	if (outputAllocs.size() != expectedOutputs.size())
2756 		return false;
2757 
2758 	for (deUint32 outputNdx = 0; outputNdx < outputAllocs.size(); ++outputNdx)
2759 	{
2760 		vector<deUint8> expectedBytes;
2761 		expectedOutputs[outputNdx].getBytes(expectedBytes);
2762 
2763 		if (!compareBytes<TYPE, FLOAT_TYPE>(expectedBytes, outputAllocs[outputNdx], log))
2764 			return false;
2765 	}
2766 
2767 	return true;
2768 }
2769 
checkMixedFloats(const vector<Resource> &,const vector<AllocationSp> & outputAllocs,const vector<Resource> & expectedOutputs,TestLog & log)2770 bool checkMixedFloats (const vector<Resource>&		,
2771 					   const vector<AllocationSp>&	outputAllocs,
2772 					   const vector<Resource>&		expectedOutputs,
2773 					   TestLog&						log)
2774 {
2775 	// this function validates buffers containing floats of diferent widths, order is not important
2776 
2777 	if (outputAllocs.size() != expectedOutputs.size())
2778 		return false;
2779 
2780 	// The comparison function depends on the data type stored in the resource.
2781 	using compareFun = bool (*)(vector<deUint8>& expectedBytes, AllocationSp outputAlloc, TestLog& log);
2782 	const map<BufferDataType, compareFun> compareMap =
2783 	{
2784 		{ BufferDataType::DATA_FP16, compareBytes<Float16, deFloat16> },
2785 		{ BufferDataType::DATA_FP32, compareBytes<Float32, float> },
2786 		{ BufferDataType::DATA_FP64, compareBytes<Float64, double>},
2787 	};
2788 
2789 	vector<deUint8> expectedBytes;
2790 	bool			allResultsAreCorrect	= true;
2791 	int				resultIndex				= static_cast<int>(outputAllocs.size());
2792 
2793 	while (resultIndex--)
2794 	{
2795 		expectedOutputs[resultIndex].getBytes(expectedBytes);
2796 		BufferDataType type		 = static_cast<BufferDataType>(reinterpret_cast<std::uintptr_t>(expectedOutputs[resultIndex].getUserData()));
2797 		allResultsAreCorrect	&= compareMap.at(type)(expectedBytes, outputAllocs[resultIndex], log);
2798 	}
2799 
2800 	return allResultsAreCorrect;
2801 }
2802 
2803 // Base class for ComputeTestGroupBuilder and GrephicstestGroupBuilder classes.
2804 // It contains all functionalities that are used by both child classes.
2805 class TestGroupBuilderBase
2806 {
2807 public:
2808 
2809 	TestGroupBuilderBase();
2810 	virtual ~TestGroupBuilderBase() = default;
2811 
2812 	virtual void createOperationTests(TestCaseGroup* parentGroup,
2813 									  const char* groupName,
2814 									  FloatType floatType,
2815 									  bool argumentsFromInput) = 0;
2816 
2817 	virtual void createSettingsTests(TestCaseGroup* parentGroup) = 0;
2818 
2819 protected:
2820 
2821 	typedef vector<OperationTestCase> TestCaseVect;
2822 
2823 	// Structure containing all data required to create single operation test.
2824 	struct OperationTestCaseInfo
2825 	{
2826 		FloatType					outFloatType;
2827 		bool						argumentsFromInput;
2828 		VkShaderStageFlagBits		testedStage;
2829 		const Operation&			operation;
2830 		const OperationTestCase&	testCase;
2831 	};
2832 
2833 	// Mode used by SettingsTestCaseInfo to specify what settings do we want to test.
2834 	enum SettingsMode
2835 	{
2836 		SM_ROUNDING			= 0,
2837 		SM_DENORMS
2838 	};
2839 
2840 	// Enum containing available options. When rounding is tested only SO_RTE and SO_RTZ
2841 	// should be used. SO_FLUSH and SO_PRESERVE should be used only for denorm tests.
2842 	enum SettingsOption
2843 	{
2844 		SO_UNUSED			= 0,
2845 		SO_RTE,
2846 		SO_RTZ,
2847 		SO_FLUSH,
2848 		SO_PRESERVE
2849 	};
2850 
2851 	// Structure containing all data required to create single settings test.
2852 	struct SettingsTestCaseInfo
2853 	{
2854 		const char*								name;
2855 		SettingsMode							testedMode;
2856 		VkShaderFloatControlsIndependence		independenceSetting;
2857 
2858 		SettingsOption							fp16Option;
2859 		SettingsOption							fp32Option;
2860 		SettingsOption							fp64Option;
2861 		bool									fp16Without16BitStorage;
2862 	};
2863 
2864 	void specializeOperation(const OperationTestCaseInfo&	testCaseInfo,
2865 							 SpecializedOperation&			specializedOperation) const;
2866 
2867 	void getBehaviorCapabilityAndExecutionMode(BehaviorFlags behaviorFlags,
2868 											   const string inBitWidth,
2869 											   const string outBitWidth,
2870 											   string& capability,
2871 											   string& executionMode) const;
2872 
2873 	void setupVulkanFeatures(FloatType			inFloatType,
2874 							 FloatType			outFloatType,
2875 							 BehaviorFlags		behaviorFlags,
2876 							 bool				float64FeatureRequired,
2877 							 VulkanFeatures&	features) const;
2878 
2879 protected:
2880 
2881 	struct TypeData
2882 	{
2883 		TypeValuesSP		values;
2884 		TypeSnippetsSP		snippets;
2885 		TypeTestResultsSP	testResults;
2886 	};
2887 
2888 	// Type specific parameters are stored in this map.
2889 	map<FloatType, TypeData> m_typeData;
2890 
2891 	// Map converting behaviuor id to OpCapability instruction
2892 	typedef map<BehaviorFlagBits, string> BehaviorNameMap;
2893 	BehaviorNameMap m_behaviorToName;
2894 };
2895 
TestGroupBuilderBase()2896 TestGroupBuilderBase::TestGroupBuilderBase()
2897 {
2898 	m_typeData[FP16] = TypeData();
2899 	m_typeData[FP16].values			= TypeValuesSP(new TypeValues<deFloat16>);
2900 	m_typeData[FP16].snippets		= TypeSnippetsSP(new TypeSnippets<deFloat16>);
2901 	m_typeData[FP16].testResults	= TypeTestResultsSP(new TypeTestResults<deFloat16>);
2902 	m_typeData[FP32] = TypeData();
2903 	m_typeData[FP32].values			= TypeValuesSP(new TypeValues<float>);
2904 	m_typeData[FP32].snippets		= TypeSnippetsSP(new TypeSnippets<float>);
2905 	m_typeData[FP32].testResults	= TypeTestResultsSP(new TypeTestResults<float>);
2906 	m_typeData[FP64] = TypeData();
2907 	m_typeData[FP64].values			= TypeValuesSP(new TypeValues<double>);
2908 	m_typeData[FP64].snippets		= TypeSnippetsSP(new TypeSnippets<double>);
2909 	m_typeData[FP64].testResults	= TypeTestResultsSP(new TypeTestResults<double>);
2910 
2911 	m_behaviorToName[B_DENORM_PRESERVE]	= "DenormPreserve";
2912 	m_behaviorToName[B_DENORM_FLUSH]	= "DenormFlushToZero";
2913 	m_behaviorToName[B_ZIN_PRESERVE]	= "SignedZeroInfNanPreserve";
2914 	m_behaviorToName[B_RTE_ROUNDING]	= "RoundingModeRTE";
2915 	m_behaviorToName[B_RTZ_ROUNDING]	= "RoundingModeRTZ";
2916 }
2917 
specializeOperation(const OperationTestCaseInfo & testCaseInfo,SpecializedOperation & specializedOperation) const2918 void TestGroupBuilderBase::specializeOperation (const OperationTestCaseInfo&	testCaseInfo,
2919 												SpecializedOperation&			specializedOperation) const
2920 {
2921 	const string		typeToken		= "_float";
2922 	const string		widthToken		= "${float_width}";
2923 
2924 	FloatType				outFloatType	= testCaseInfo.outFloatType;
2925 	const Operation&		operation		= testCaseInfo.operation;
2926 	const TypeSnippetsSP	outTypeSnippets	= m_typeData.at(outFloatType).snippets;
2927 	const bool				inputRestricted	= operation.isInputTypeRestricted;
2928 	FloatType				inFloatType		= operation.restrictedInputType;
2929 
2930 	// usually input type is same as output but this is not the case for conversion
2931 	// operations; in those cases operation definitions have restricted input type
2932 	inFloatType = inputRestricted ? inFloatType : outFloatType;
2933 
2934 	TypeSnippetsSP inTypeSnippets = m_typeData.at(inFloatType).snippets;
2935 
2936 	const string inTypePrefix	= string("_f") + inTypeSnippets->bitWidth;
2937 	const string outTypePrefix	= string("_f") + outTypeSnippets->bitWidth;
2938 
2939 	specializedOperation.constants		= replace(operation.constants, typeToken, inTypePrefix);
2940 	specializedOperation.annotations	= replace(operation.annotations, widthToken, outTypeSnippets->bitWidth);
2941 	specializedOperation.types			= replace(operation.types, typeToken, outTypePrefix);
2942 	specializedOperation.variables		= replace(operation.variables, typeToken, outTypePrefix);
2943 	specializedOperation.functions		= replace(operation.functions, typeToken, outTypePrefix);
2944 	specializedOperation.commands		= replace(operation.commands, typeToken, outTypePrefix);
2945 
2946 	specializedOperation.inFloatType				= inFloatType;
2947 	specializedOperation.inTypeSnippets				= inTypeSnippets;
2948 	specializedOperation.outTypeSnippets			= outTypeSnippets;
2949 	specializedOperation.argumentsUsesFloatConstant	= 0;
2950 
2951 	if (operation.isSpecConstant)
2952 		return;
2953 
2954 	// select way arguments are prepared
2955 	if (testCaseInfo.argumentsFromInput)
2956 	{
2957 		// read arguments from input SSBO in main function
2958 		specializedOperation.arguments = inTypeSnippets->argumentsFromInputSnippet;
2959 
2960 		if (inFloatType == FP16 && testCaseInfo.testCase.fp16Without16BitStorage)
2961 			specializedOperation.arguments = inTypeSnippets->argumentsFromInputFp16Snippet;
2962 	}
2963 	else
2964 	{
2965 		// generate proper values in main function
2966 		const string arg1 = "%arg1                 = ";
2967 		const string arg2 = "%arg2                 = ";
2968 
2969 		const ValueId* inputArguments = testCaseInfo.testCase.input;
2970 		if (inputArguments[0] != V_UNUSED)
2971 		{
2972 			specializedOperation.arguments					= arg1 + inTypeSnippets->valueIdToSnippetArgMap.at(inputArguments[0]);
2973 			specializedOperation.argumentsUsesFloatConstant	|= B_STATEMENT_USAGE_ARGS_CONST_FLOAT;
2974 		}
2975 		if (inputArguments[1] != V_UNUSED)
2976 		{
2977 			specializedOperation.arguments					+= arg2 + inTypeSnippets->valueIdToSnippetArgMap.at(inputArguments[1]);
2978 			specializedOperation.argumentsUsesFloatConstant	|= B_STATEMENT_USAGE_ARGS_CONST_FLOAT;
2979 		}
2980 	}
2981 }
2982 
2983 
getBehaviorCapabilityAndExecutionMode(BehaviorFlags behaviorFlags,const string inBitWidth,const string outBitWidth,string & capability,string & executionMode) const2984 void TestGroupBuilderBase::getBehaviorCapabilityAndExecutionMode(BehaviorFlags behaviorFlags,
2985 																 const string inBitWidth,
2986 																 const string outBitWidth,
2987 																 string& capability,
2988 																 string& executionMode) const
2989 {
2990 	// iterate over all behaviours and request those that are needed
2991 	BehaviorNameMap::const_iterator it = m_behaviorToName.begin();
2992 	while (it != m_behaviorToName.end())
2993 	{
2994 		BehaviorFlagBits	behaviorId		= it->first;
2995 		string				behaviorName	= it->second;
2996 
2997 		if (behaviorFlags & behaviorId)
2998 		{
2999 			capability += "OpCapability " + behaviorName + "\n";
3000 
3001 			// rounding mode should be obeyed for destination type
3002 			bool rounding = (behaviorId == B_RTE_ROUNDING) || (behaviorId == B_RTZ_ROUNDING);
3003 			executionMode += "OpExecutionMode %main " + behaviorName + " " +
3004 							 (rounding ? outBitWidth : inBitWidth) + "\n";
3005 		}
3006 
3007 		++it;
3008 	}
3009 
3010 	DE_ASSERT(!capability.empty() && !executionMode.empty());
3011 }
3012 
setupVulkanFeatures(FloatType inFloatType,FloatType outFloatType,BehaviorFlags behaviorFlags,bool float64FeatureRequired,VulkanFeatures & features) const3013 void TestGroupBuilderBase::setupVulkanFeatures(FloatType		inFloatType,
3014 											   FloatType		outFloatType,
3015 											   BehaviorFlags	behaviorFlags,
3016 											   bool				float64FeatureRequired,
3017 											   VulkanFeatures&	features) const
3018 {
3019 	features.coreFeatures.shaderFloat64 = float64FeatureRequired;
3020 
3021 	// request proper float controls features
3022 	vk::VkPhysicalDeviceFloatControlsProperties& floatControls = features.floatControlsProperties;
3023 
3024 	// rounding mode should obey the destination type
3025 	bool rteRounding = (behaviorFlags & B_RTE_ROUNDING) != 0;
3026 	bool rtzRounding = (behaviorFlags & B_RTZ_ROUNDING) != 0;
3027 	if (rteRounding || rtzRounding)
3028 	{
3029 		switch(outFloatType)
3030 		{
3031 		case FP16:
3032 			floatControls.shaderRoundingModeRTEFloat16 = rteRounding;
3033 			floatControls.shaderRoundingModeRTZFloat16 = rtzRounding;
3034 			return;
3035 		case FP32:
3036 			floatControls.shaderRoundingModeRTEFloat32 = rteRounding;
3037 			floatControls.shaderRoundingModeRTZFloat32 = rtzRounding;
3038 			return;
3039 		case FP64:
3040 			floatControls.shaderRoundingModeRTEFloat64 = rteRounding;
3041 			floatControls.shaderRoundingModeRTZFloat64 = rtzRounding;
3042 			return;
3043 		}
3044 	}
3045 
3046 	switch(inFloatType)
3047 	{
3048 	case FP16:
3049 		floatControls.shaderDenormPreserveFloat16			= behaviorFlags & B_DENORM_PRESERVE;
3050 		floatControls.shaderDenormFlushToZeroFloat16		= behaviorFlags & B_DENORM_FLUSH;
3051 		floatControls.shaderSignedZeroInfNanPreserveFloat16	= behaviorFlags & B_ZIN_PRESERVE;
3052 		return;
3053 	case FP32:
3054 		floatControls.shaderDenormPreserveFloat32			= behaviorFlags & B_DENORM_PRESERVE;
3055 		floatControls.shaderDenormFlushToZeroFloat32		= behaviorFlags & B_DENORM_FLUSH;
3056 		floatControls.shaderSignedZeroInfNanPreserveFloat32	= behaviorFlags & B_ZIN_PRESERVE;
3057 		return;
3058 	case FP64:
3059 		floatControls.shaderDenormPreserveFloat64			= behaviorFlags & B_DENORM_PRESERVE;
3060 		floatControls.shaderDenormFlushToZeroFloat64		= behaviorFlags & B_DENORM_FLUSH;
3061 		floatControls.shaderSignedZeroInfNanPreserveFloat64	= behaviorFlags & B_ZIN_PRESERVE;
3062 		return;
3063 	}
3064 }
3065 
3066 // Test case not related to SPIR-V but executed with compute tests. It checks if specified
3067 // features are set to the same value when specific independence settings are used.
verifyIndependenceSettings(Context & context)3068 tcu::TestStatus verifyIndependenceSettings(Context& context)
3069 {
3070 	if (!context.isDeviceFunctionalitySupported("VK_KHR_shader_float_controls"))
3071 		TCU_THROW(NotSupportedError, "VK_KHR_shader_float_controls not supported");
3072 
3073 	vk::VkPhysicalDeviceFloatControlsProperties	fcProperties;
3074 	fcProperties.sType	= VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT_CONTROLS_PROPERTIES;
3075 	fcProperties.pNext	= DE_NULL;
3076 
3077 	vk::VkPhysicalDeviceProperties2 deviceProperties;
3078 	deviceProperties.sType	= VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
3079 	deviceProperties.pNext	= &fcProperties;
3080 
3081 	auto fail = [](const string& featureGroup)
3082 	{
3083 		return tcu::TestStatus::fail(featureGroup + " features should be set to the same value");
3084 	};
3085 
3086 	const VkPhysicalDevice			physicalDevice		= context.getPhysicalDevice();
3087 	const vk::InstanceInterface&	instanceInterface	= context.getInstanceInterface();
3088 	instanceInterface.getPhysicalDeviceProperties2(physicalDevice, &deviceProperties);
3089 
3090 	if (fcProperties.roundingModeIndependence == VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_NONE)
3091 	{
3092 		vk::VkBool32 fp16rte = fcProperties.shaderRoundingModeRTEFloat16;
3093 		vk::VkBool32 fp32rte = fcProperties.shaderRoundingModeRTEFloat32;
3094 		vk::VkBool32 fp64rte = fcProperties.shaderRoundingModeRTEFloat64;
3095 		if ((fp16rte != fp32rte) || (fp32rte != fp64rte))
3096 			return fail("shaderRoundingModeRTEFloat*");
3097 
3098 		vk::VkBool32 fp16rtz = fcProperties.shaderRoundingModeRTZFloat16;
3099 		vk::VkBool32 fp32rtz = fcProperties.shaderRoundingModeRTZFloat32;
3100 		vk::VkBool32 fp64rtz = fcProperties.shaderRoundingModeRTZFloat64;
3101 		if ((fp16rtz != fp32rtz) || (fp32rtz != fp64rtz))
3102 			return fail("shaderRoundingModeRTZFloat*");
3103 	}
3104 	else if (fcProperties.roundingModeIndependence == VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_32_BIT_ONLY)
3105 	{
3106 		vk::VkBool32 fp16rte = fcProperties.shaderRoundingModeRTEFloat16;
3107 		vk::VkBool32 fp64rte = fcProperties.shaderRoundingModeRTEFloat64;
3108 		if ((fp16rte != fp64rte))
3109 			return fail("shaderRoundingModeRTEFloat16 and 64");
3110 
3111 		vk::VkBool32 fp16rtz = fcProperties.shaderRoundingModeRTZFloat16;
3112 		vk::VkBool32 fp64rtz = fcProperties.shaderRoundingModeRTZFloat64;
3113 		if ((fp16rtz != fp64rtz))
3114 			return fail("shaderRoundingModeRTZFloat16 and 64");
3115 	}
3116 
3117 	if (fcProperties.denormBehaviorIndependence == VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_NONE)
3118 	{
3119 		vk::VkBool32 fp16flush = fcProperties.shaderDenormFlushToZeroFloat16;
3120 		vk::VkBool32 fp32flush = fcProperties.shaderDenormFlushToZeroFloat32;
3121 		vk::VkBool32 fp64flush = fcProperties.shaderDenormFlushToZeroFloat64;
3122 		if ((fp16flush != fp32flush) || (fp32flush != fp64flush))
3123 			return fail("shaderDenormFlushToZeroFloat*");
3124 
3125 		vk::VkBool32 fp16preserve = fcProperties.shaderDenormPreserveFloat16;
3126 		vk::VkBool32 fp32preserve = fcProperties.shaderDenormPreserveFloat32;
3127 		vk::VkBool32 fp64preserve = fcProperties.shaderDenormPreserveFloat64;
3128 		if ((fp16preserve != fp32preserve) || (fp32preserve != fp64preserve))
3129 			return fail("shaderDenormPreserveFloat*");
3130 	}
3131 	else if (fcProperties.denormBehaviorIndependence == VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_32_BIT_ONLY)
3132 	{
3133 		vk::VkBool32 fp16flush = fcProperties.shaderDenormFlushToZeroFloat16;
3134 		vk::VkBool32 fp64flush = fcProperties.shaderDenormFlushToZeroFloat64;
3135 		if ((fp16flush != fp64flush))
3136 			return fail("shaderDenormFlushToZeroFloat16 and 64");
3137 
3138 		vk::VkBool32 fp16preserve = fcProperties.shaderDenormPreserveFloat16;
3139 		vk::VkBool32 fp64preserve = fcProperties.shaderDenormPreserveFloat64;
3140 		if ((fp16preserve != fp64preserve))
3141 			return fail("shaderDenormPreserveFloat16 and 64");
3142 	}
3143 
3144 	return tcu::TestStatus::pass("Pass");
3145 }
3146 
3147 // ComputeTestGroupBuilder contains logic that creates compute shaders
3148 // for all test cases. As most tests in spirv-assembly it uses functionality
3149 // implemented in vktSpvAsmComputeShaderTestUtil.cpp.
3150 class ComputeTestGroupBuilder: public TestGroupBuilderBase
3151 {
3152 public:
3153 
3154 	void init();
3155 
3156 	void createOperationTests(TestCaseGroup* parentGroup,
3157 							  const char* groupName,
3158 							  FloatType floatType,
3159 							  bool argumentsFromInput) override;
3160 
3161 	void createSettingsTests(TestCaseGroup* parentGroup) override;
3162 
3163 protected:
3164 
3165 	void fillShaderSpec(const OperationTestCaseInfo&	testCaseInfo,
3166 						ComputeShaderSpec&				csSpec) const;
3167 	void fillShaderSpec(const SettingsTestCaseInfo&		testCaseInfo,
3168 						ComputeShaderSpec&				csSpec) const;
3169 
3170 private:
3171 
3172 
3173 	StringTemplate		m_operationShaderTemplate;
3174 	StringTemplate		m_settingsShaderTemplate;
3175 	TestCasesBuilder	m_operationTestCaseBuilder;
3176 };
3177 
init()3178 void ComputeTestGroupBuilder::init()
3179 {
3180 	m_operationTestCaseBuilder.init();
3181 
3182 	// generic compute shader template with common code for all
3183 	// float types and all possible operations listed in OperationId enum
3184 	m_operationShaderTemplate.setString(
3185 		"OpCapability Shader\n"
3186 		"${capabilities}"
3187 
3188 		"OpExtension \"SPV_KHR_float_controls\"\n"
3189 		"${extensions}"
3190 
3191 		"%std450            = OpExtInstImport \"GLSL.std.450\"\n"
3192 		"OpMemoryModel Logical GLSL450\n"
3193 		"OpEntryPoint GLCompute %main \"main\" %id\n"
3194 		"OpExecutionMode %main LocalSize 1 1 1\n"
3195 		"${execution_mode}"
3196 
3197 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
3198 
3199 		// some tests require additional annotations
3200 		"${annotations}"
3201 
3202 		"%type_void            = OpTypeVoid\n"
3203 		"%type_voidf           = OpTypeFunction %type_void\n"
3204 		"%type_bool            = OpTypeBool\n"
3205 		"%type_u32             = OpTypeInt 32 0\n"
3206 		"%type_i32             = OpTypeInt 32 1\n"
3207 		"%type_i32_fptr        = OpTypePointer Function %type_i32\n"
3208 		"%type_u32_vec2        = OpTypeVector %type_u32 2\n"
3209 		"%type_u32_vec3        = OpTypeVector %type_u32 3\n"
3210 		"%type_u32_vec3_ptr    = OpTypePointer Input %type_u32_vec3\n"
3211 
3212 		"%c_i32_0              = OpConstant %type_i32 0\n"
3213 		"%c_i32_1              = OpConstant %type_i32 1\n"
3214 		"%c_i32_2              = OpConstant %type_i32 2\n"
3215 		"%c_u32_1              = OpConstant %type_u32 1\n"
3216 
3217 		// if input float type has different width then output then
3218 		// both types are defined here along with all types derived from
3219 		// them that are commonly used by tests; some tests also define
3220 		// their own types (those that are needed just by this single test)
3221 		"${types}"
3222 
3223 		// SSBO definitions
3224 		"${io_definitions}"
3225 
3226 		"%id                   = OpVariable %type_u32_vec3_ptr Input\n"
3227 
3228 		// set of default constants per float type is placed here,
3229 		// operation tests can also define additional constants.
3230 		"${constants}"
3231 
3232 		// O_RETURN_VAL defines function here and becouse
3233 		// of that this token needs to be directly before main function
3234 		"${functions}"
3235 
3236 		"%main                 = OpFunction %type_void None %type_voidf\n"
3237 		"%label                = OpLabel\n"
3238 
3239 		"${variables}"
3240 
3241 		// depending on test case arguments are either read from input ssbo
3242 		// or generated in spir-v code - in later case shader input is not used
3243 		"${arguments}"
3244 
3245 		// perform test commands
3246 		"${commands}"
3247 
3248 		// save result to SSBO
3249 		"${save_result}"
3250 
3251 		"OpReturn\n"
3252 		"OpFunctionEnd\n");
3253 
3254 	m_settingsShaderTemplate.setString(
3255 		"OpCapability Shader\n"
3256 		"${capabilities}"
3257 
3258 		"OpExtension \"SPV_KHR_float_controls\"\n"
3259 		"${extensions}"
3260 
3261 		"%std450 = OpExtInstImport \"GLSL.std.450\"\n"
3262 		"OpMemoryModel Logical GLSL450\n"
3263 		"OpEntryPoint GLCompute %main \"main\" %id\n"
3264 		"OpExecutionMode %main LocalSize 1 1 1\n"
3265 		"${execution_modes}"
3266 
3267 		// annotations
3268 		"OpDecorate %SSBO_in BufferBlock\n"
3269 		"OpDecorate %ssbo_in DescriptorSet 0\n"
3270 		"OpDecorate %ssbo_in Binding 0\n"
3271 		"OpDecorate %ssbo_in NonWritable\n"
3272 		"${io_annotations}"
3273 
3274 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
3275 
3276 		// types
3277 		"%type_void            = OpTypeVoid\n"
3278 		"%type_voidf           = OpTypeFunction %type_void\n"
3279 		"%type_u32             = OpTypeInt 32 0\n"
3280 		"%type_i32             = OpTypeInt 32 1\n"
3281 		"%type_i32_fptr        = OpTypePointer Function %type_i32\n"
3282 		"%type_u32_vec3        = OpTypeVector %type_u32 3\n"
3283 		"%type_u32_vec3_ptr    = OpTypePointer Input %type_u32_vec3\n"
3284 
3285 		"%c_i32_0              = OpConstant %type_i32 0\n"
3286 		"%c_i32_1              = OpConstant %type_i32 1\n"
3287 		"%c_i32_2              = OpConstant %type_i32 2\n"
3288 
3289 		"${types}"
3290 
3291 		// in SSBO definition
3292 		"%SSBO_in              = OpTypeStruct ${in_struct}\n"
3293 		"%up_SSBO_in           = OpTypePointer Uniform %SSBO_in\n"
3294 		"%ssbo_in              = OpVariable %up_SSBO_in Uniform\n"
3295 
3296 		// out SSBO definitions
3297 		"${out_definitions}"
3298 
3299 		"%id                   = OpVariable %type_u32_vec3_ptr Input\n"
3300 		"%main                 = OpFunction %type_void None %type_voidf\n"
3301 		"%label                = OpLabel\n"
3302 
3303 		"${commands}"
3304 
3305 		"${save_result}"
3306 
3307 		"OpReturn\n"
3308 		"OpFunctionEnd\n");
3309 }
3310 
createOperationTests(TestCaseGroup * parentGroup,const char * groupName,FloatType floatType,bool argumentsFromInput)3311 void ComputeTestGroupBuilder::createOperationTests(TestCaseGroup* parentGroup, const char* groupName, FloatType floatType, bool argumentsFromInput)
3312 {
3313 	TestContext&	testCtx	= parentGroup->getTestContext();
3314 	TestCaseGroup*	group	= new TestCaseGroup(testCtx, groupName);
3315 	parentGroup->addChild(group);
3316 
3317 	TestCaseVect testCases;
3318 	m_operationTestCaseBuilder.build(testCases, m_typeData[floatType].testResults, argumentsFromInput);
3319 
3320 	TestCaseVect::const_iterator currTestCase = testCases.begin();
3321 	TestCaseVect::const_iterator lastTestCase = testCases.end();
3322 	while(currTestCase != lastTestCase)
3323 	{
3324 		const OperationTestCase& testCase = *currTestCase;
3325 		++currTestCase;
3326 
3327 		// skip cases with undefined output
3328 		if (testCase.expectedOutput == V_UNUSED)
3329 			continue;
3330 
3331 		OperationTestCaseInfo testCaseInfo =
3332 		{
3333 			floatType,
3334 			argumentsFromInput,
3335 			VK_SHADER_STAGE_COMPUTE_BIT,
3336 			m_operationTestCaseBuilder.getOperation(testCase.operationId),
3337 			testCase
3338 		};
3339 
3340 		ComputeShaderSpec	csSpec;
3341 
3342 		fillShaderSpec(testCaseInfo, csSpec);
3343 
3344 		string testName = replace(testCase.baseName, "op", testCaseInfo.operation.name);
3345 		group->addChild(new SpvAsmComputeShaderCase(testCtx, testName.c_str(), csSpec));
3346 	}
3347 }
3348 
createSettingsTests(TestCaseGroup * parentGroup)3349 void ComputeTestGroupBuilder::createSettingsTests(TestCaseGroup* parentGroup)
3350 {
3351 	TestContext&	testCtx	= parentGroup->getTestContext();
3352 	TestCaseGroup*	group	= new TestCaseGroup(testCtx, "independence_settings");
3353 	parentGroup->addChild(group);
3354 
3355 	using SFCI = VkShaderFloatControlsIndependence;
3356 	const SFCI independence32	= VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_32_BIT_ONLY;
3357 	const SFCI independenceAll	= VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL;
3358 
3359 	vector<SettingsTestCaseInfo> testCases =
3360 	{
3361 		// name															mode			independenceSetting		fp16Option		fp32Option		fp64Option		fp16Without16bitstorage
3362 
3363 		// test rounding modes when only two float widths are available
3364 		{ "rounding_ind_all_fp16_rte_fp32_rtz",							SM_ROUNDING,	independenceAll,		SO_RTE,			SO_RTZ,			SO_UNUSED,		false },
3365 		{ "rounding_ind_all_fp16_rtz_fp32_rte",							SM_ROUNDING,	independenceAll,		SO_RTZ,			SO_RTE,			SO_UNUSED,		false },
3366 		{ "rounding_ind_32_fp16_rte_fp32_rtz",							SM_ROUNDING,	independence32,			SO_RTE,			SO_RTZ,			SO_UNUSED,		false },
3367 		{ "rounding_ind_32_fp16_rtz_fp32_rte",							SM_ROUNDING,	independence32,			SO_RTZ,			SO_RTE,			SO_UNUSED,		false },
3368 		{ "rounding_ind_all_fp16_rte_fp64_rtz",							SM_ROUNDING,	independenceAll,		SO_RTE,			SO_UNUSED,		SO_RTZ,			false },
3369 		{ "rounding_ind_all_fp16_rtz_fp64_rte",							SM_ROUNDING,	independenceAll,		SO_RTZ,			SO_UNUSED,		SO_RTE,			false },
3370 		{ "rounding_ind_all_fp32_rte_fp64_rtz",							SM_ROUNDING,	independenceAll,		SO_UNUSED,		SO_RTE,			SO_RTZ,			false },
3371 		{ "rounding_ind_all_fp32_rtz_fp64_rte",							SM_ROUNDING,	independenceAll,		SO_UNUSED,		SO_RTZ,			SO_RTE,			false },
3372 		{ "rounding_ind_32_fp32_rte_fp64_rtz",							SM_ROUNDING,	independence32,			SO_UNUSED,		SO_RTE,			SO_RTZ,			false },
3373 		{ "rounding_ind_32_fp32_rtz_fp64_rte",							SM_ROUNDING,	independence32,			SO_UNUSED,		SO_RTZ,			SO_RTE,			false },
3374 
3375 		// test rounding modes when three widths are available
3376 		{ "rounding_ind_all_fp16_rtz_fp32_rte_fp64_rtz",				SM_ROUNDING,	independenceAll,		SO_RTZ,			SO_RTE,			SO_RTZ,			false },
3377 		{ "rounding_ind_32_fp16_rtz_fp32_rte_fp64_rtz",					SM_ROUNDING,	independence32,			SO_RTZ,			SO_RTE,			SO_RTZ,			false },
3378 		{ "rounding_ind_all_fp16_rte_fp32_rtz_fp64_rte",				SM_ROUNDING,	independenceAll,		SO_RTE,			SO_RTZ,			SO_RTE,			false },
3379 		{ "rounding_ind_32_fp16_rte_fp32_rtz_fp64_rte",					SM_ROUNDING,	independence32,			SO_RTE,			SO_RTZ,			SO_RTE,			false },
3380 		{ "rounding_ind_all_fp16_rtz_fp32_rtz_fp64_rte",				SM_ROUNDING,	independenceAll,		SO_RTZ,			SO_RTZ,			SO_RTE,			false },
3381 		{ "rounding_ind_all_fp16_rtz_fp32_rte_fp64_rte",				SM_ROUNDING,	independenceAll,		SO_RTZ,			SO_RTE,			SO_RTE,			false },
3382 		{ "rounding_ind_all_fp16_rte_fp32_rte_fp64_rtz",				SM_ROUNDING,	independenceAll,		SO_RTE,			SO_RTE,			SO_RTZ,			false },
3383 		{ "rounding_ind_all_fp16_rte_fp32_rtz_fp64_rtz",				SM_ROUNDING,	independenceAll,		SO_RTE,			SO_RTZ,			SO_RTZ,			false },
3384 
3385 		// test denorm settings when only two float widths are available
3386 		{ "denorm_ind_all_fp16_flush_fp32_preserve",					SM_DENORMS,		independenceAll,		SO_FLUSH,		SO_PRESERVE,	SO_UNUSED,		false },
3387 		{ "denorm_ind_all_fp16_preserve_fp32_flush",					SM_DENORMS,		independenceAll,		SO_PRESERVE,	SO_FLUSH,		SO_UNUSED,		false },
3388 		{ "denorm_ind_32_fp16_flush_fp32_preserve",						SM_DENORMS,		independence32,			SO_FLUSH,		SO_PRESERVE,	SO_UNUSED,		false },
3389 		{ "denorm_ind_32_fp16_preserve_fp32_flush",						SM_DENORMS,		independence32,			SO_PRESERVE,	SO_FLUSH,		SO_UNUSED,		false },
3390 		{ "denorm_ind_all_fp16_flush_fp64_preserve",					SM_DENORMS,		independenceAll,		SO_FLUSH,		SO_UNUSED,		SO_PRESERVE,	false },
3391 		{ "denorm_ind_all_fp16_preserve_fp64_flush",					SM_DENORMS,		independenceAll,		SO_PRESERVE,	SO_UNUSED,		SO_FLUSH,		false },
3392 		{ "denorm_ind_all_fp32_flush_fp64_preserve",					SM_DENORMS,		independenceAll,		SO_UNUSED,		SO_FLUSH,		SO_PRESERVE,	false },
3393 		{ "denorm_ind_all_fp32_preserve_fp64_flush",					SM_DENORMS,		independenceAll,		SO_UNUSED,		SO_PRESERVE,	SO_FLUSH,		false },
3394 		{ "denorm_ind_32_fp32_flush_fp64_preserve",						SM_DENORMS,		independence32,			SO_UNUSED,		SO_FLUSH,		SO_PRESERVE,	false },
3395 		{ "denorm_ind_32_fp32_preserve_fp64_flush",						SM_DENORMS,		independence32,			SO_UNUSED,		SO_PRESERVE,	SO_FLUSH,		false },
3396 
3397 		// test denorm settings when three widths are available
3398 		{ "denorm_ind_all_fp16_preserve_fp32_flush_fp64_preserve",		SM_DENORMS,		independenceAll,		SO_PRESERVE,	SO_FLUSH,		SO_PRESERVE,	false },
3399 		{ "denorm_ind_32_fp16_preserve_fp32_flush_fp64_preserve",		SM_DENORMS,		independence32,			SO_PRESERVE,	SO_FLUSH,		SO_PRESERVE,	false },
3400 		{ "denorm_ind_all_fp16_flush_fp32_preserve_fp64_flush",			SM_DENORMS,		independenceAll,		SO_FLUSH,		SO_PRESERVE,	SO_FLUSH,		false },
3401 		{ "denorm_ind_32_fp16_flush_fp32_preserve_fp64_flush",			SM_DENORMS,		independence32,			SO_FLUSH,		SO_PRESERVE,	SO_FLUSH,		false },
3402 		{ "denorm_ind_all_fp16_preserve_fp32_preserve_fp64_flush",		SM_DENORMS,		independenceAll,		SO_PRESERVE,	SO_PRESERVE,	SO_FLUSH,		false },
3403 		{ "denorm_ind_all_fp16_preserve_fp32_flush_fp64_flush",			SM_DENORMS,		independenceAll,		SO_PRESERVE,	SO_FLUSH,		SO_FLUSH,		false },
3404 		{ "denorm_ind_all_fp16_flush_fp32_flush_fp64_preserve",			SM_DENORMS,		independenceAll,		SO_FLUSH,		SO_FLUSH,		SO_PRESERVE,	false },
3405 		{ "denorm_ind_all_fp16_flush_fp32_preserve_fp64_preserve",		SM_DENORMS,		independenceAll,		SO_FLUSH,		SO_PRESERVE,	SO_PRESERVE,	false },
3406 
3407 		// Same fp16 tests but without requiring VK_KHR_16bit_storage
3408 		// test rounding modes when only two float widths are available
3409 		{ "rounding_ind_all_fp16_rte_fp32_rtz_nostorage",				SM_ROUNDING,	independenceAll,		SO_RTE,			SO_RTZ,			SO_UNUSED,		true },
3410 		{ "rounding_ind_all_fp16_rtz_fp32_rte_nostorage",				SM_ROUNDING,	independenceAll,		SO_RTZ,			SO_RTE,			SO_UNUSED,		true },
3411 		{ "rounding_ind_32_fp16_rte_fp32_rtz_nostorage",				SM_ROUNDING,	independence32,			SO_RTE,			SO_RTZ,			SO_UNUSED,		true },
3412 		{ "rounding_ind_32_fp16_rtz_fp32_rte_nostorage",				SM_ROUNDING,	independence32,			SO_RTZ,			SO_RTE,			SO_UNUSED,		true },
3413 		{ "rounding_ind_all_fp16_rte_fp64_rtz_nostorage",				SM_ROUNDING,	independenceAll,		SO_RTE,			SO_UNUSED,		SO_RTZ,			true },
3414 		{ "rounding_ind_all_fp16_rtz_fp64_rte_nostorage",				SM_ROUNDING,	independenceAll,		SO_RTZ,			SO_UNUSED,		SO_RTE,			true },
3415 
3416 		// test rounding modes when three widths are available
3417 		{ "rounding_ind_all_fp16_rtz_fp32_rte_fp64_rtz_nostorage",		SM_ROUNDING,	independenceAll,		SO_RTZ,			SO_RTE,			SO_RTZ,			true },
3418 		{ "rounding_ind_32_fp16_rtz_fp32_rte_fp64_rtz_nostorage",		SM_ROUNDING,	independence32,			SO_RTZ,			SO_RTE,			SO_RTZ,			true },
3419 		{ "rounding_ind_all_fp16_rte_fp32_rtz_fp64_rte_nostorage",		SM_ROUNDING,	independenceAll,		SO_RTE,			SO_RTZ,			SO_RTE,			true },
3420 		{ "rounding_ind_32_fp16_rte_fp32_rtz_fp64_rte_nostorage",		SM_ROUNDING,	independence32,			SO_RTE,			SO_RTZ,			SO_RTE,			true },
3421 		{ "rounding_ind_all_fp16_rtz_fp32_rtz_fp64_rte_nostorage",		SM_ROUNDING,	independenceAll,		SO_RTZ,			SO_RTZ,			SO_RTE,			true },
3422 		{ "rounding_ind_all_fp16_rtz_fp32_rte_fp64_rte_nostorage",		SM_ROUNDING,	independenceAll,		SO_RTZ,			SO_RTE,			SO_RTE,			true },
3423 		{ "rounding_ind_all_fp16_rte_fp32_rte_fp64_rtz_nostorage",		SM_ROUNDING,	independenceAll,		SO_RTE,			SO_RTE,			SO_RTZ,			true },
3424 		{ "rounding_ind_all_fp16_rte_fp32_rtz_fp64_rtz_nostorage",		SM_ROUNDING,	independenceAll,		SO_RTE,			SO_RTZ,			SO_RTZ,			true },
3425 
3426 		// test denorm settings when only two float widths are available
3427 		{ "denorm_ind_all_fp16_flush_fp32_preserve_nostorage",			SM_DENORMS,		independenceAll,		SO_FLUSH,		SO_PRESERVE,	SO_UNUSED,		true },
3428 		{ "denorm_ind_all_fp16_preserve_fp32_flush_nostorage",			SM_DENORMS,		independenceAll,		SO_PRESERVE,	SO_FLUSH,		SO_UNUSED,		true },
3429 		{ "denorm_ind_32_fp16_flush_fp32_preserve_nostorage",			SM_DENORMS,		independence32,			SO_FLUSH,		SO_PRESERVE,	SO_UNUSED,		true },
3430 		{ "denorm_ind_32_fp16_preserve_fp32_flush_nostorage",			SM_DENORMS,		independence32,			SO_PRESERVE,	SO_FLUSH,		SO_UNUSED,		true },
3431 		{ "denorm_ind_all_fp16_flush_fp64_preserve_nostorage",			SM_DENORMS,		independenceAll,		SO_FLUSH,		SO_UNUSED,		SO_PRESERVE,	true },
3432 		{ "denorm_ind_all_fp16_preserve_fp64_flush_nostorage",			SM_DENORMS,		independenceAll,		SO_PRESERVE,	SO_UNUSED,		SO_FLUSH,		true },
3433 
3434 		// test denorm settings when three widths are available
3435 		{ "denorm_ind_all_fp16_preserve_fp32_flush_fp64_preserve_nostorage",	SM_DENORMS,		independenceAll,		SO_PRESERVE,	SO_FLUSH,		SO_PRESERVE,	true },
3436 		{ "denorm_ind_32_fp16_preserve_fp32_flush_fp64_preserve_nostorage",		SM_DENORMS,		independence32,			SO_PRESERVE,	SO_FLUSH,		SO_PRESERVE,	true },
3437 		{ "denorm_ind_all_fp16_flush_fp32_preserve_fp64_flush_nostorage",		SM_DENORMS,		independenceAll,		SO_FLUSH,		SO_PRESERVE,	SO_FLUSH,		true },
3438 		{ "denorm_ind_32_fp16_flush_fp32_preserve_fp64_flush_nostorage",		SM_DENORMS,		independence32,			SO_FLUSH,		SO_PRESERVE,	SO_FLUSH,		true },
3439 		{ "denorm_ind_all_fp16_preserve_fp32_preserve_fp64_flush_nostorage",	SM_DENORMS,		independenceAll,		SO_PRESERVE,	SO_PRESERVE,	SO_FLUSH,		true },
3440 		{ "denorm_ind_all_fp16_preserve_fp32_flush_fp64_flush_nostorage",		SM_DENORMS,		independenceAll,		SO_PRESERVE,	SO_FLUSH,		SO_FLUSH,		true },
3441 		{ "denorm_ind_all_fp16_flush_fp32_flush_fp64_preserve_nostorage",		SM_DENORMS,		independenceAll,		SO_FLUSH,		SO_FLUSH,		SO_PRESERVE,	true },
3442 		{ "denorm_ind_all_fp16_flush_fp32_preserve_fp64_preserve_nostorage",	SM_DENORMS,		independenceAll,		SO_FLUSH,		SO_PRESERVE,	SO_PRESERVE,	true },
3443 	};
3444 
3445 	for(const auto& testCase : testCases)
3446 	{
3447 		ComputeShaderSpec	csSpec;
3448 		fillShaderSpec(testCase, csSpec);
3449 		group->addChild(new SpvAsmComputeShaderCase(testCtx, testCase.name, csSpec));
3450 	}
3451 
3452 	addFunctionCase(group, "independence_settings", verifyIndependenceSettings);
3453 }
3454 
fillShaderSpec(const OperationTestCaseInfo & testCaseInfo,ComputeShaderSpec & csSpec) const3455 void ComputeTestGroupBuilder::fillShaderSpec(const OperationTestCaseInfo&	testCaseInfo,
3456 											 ComputeShaderSpec&				csSpec) const
3457 {
3458 	// LUT storing functions used to verify test results
3459 	const VerifyIOFunc checkFloatsLUT[] =
3460 	{
3461 		checkFloats<Float16, deFloat16>,
3462 		checkFloats<Float32, float>,
3463 		checkFloats<Float64, double>
3464 	};
3465 
3466 	const Operation&			testOperation	= testCaseInfo.operation;
3467 	const OperationTestCase&	testCase		= testCaseInfo.testCase;
3468 	FloatType					outFloatType	= testCaseInfo.outFloatType;
3469 
3470 	SpecializedOperation specOpData;
3471 	specializeOperation(testCaseInfo, specOpData);
3472 
3473 	TypeSnippetsSP	inTypeSnippets		= specOpData.inTypeSnippets;
3474 	TypeSnippetsSP	outTypeSnippets		= specOpData.outTypeSnippets;
3475 	FloatType		inFloatType			= specOpData.inFloatType;
3476 
3477 	bool			outFp16WithoutStorage	= (outFloatType == FP16) && testCase.fp16Without16BitStorage;
3478 	bool			inFp16WithoutStorage	= (inFloatType == FP16) && testCase.fp16Without16BitStorage;
3479 
3480 	// UnpackHalf2x16 is a corner case - it returns two 32-bit floats but
3481 	// internaly operates on fp16 and this type should be used by float controls
3482 	FloatType		inFloatTypeForCaps		= inFloatType;
3483 	string			inFloatWidthForCaps		= inTypeSnippets->bitWidth;
3484 	if (testCase.operationId == OID_UPH_DENORM)
3485 	{
3486 		inFloatTypeForCaps	= FP16;
3487 		inFloatWidthForCaps	= "16";
3488 	}
3489 
3490 	string behaviorCapability;
3491 	string behaviorExecutionMode;
3492 	getBehaviorCapabilityAndExecutionMode(testCase.behaviorFlags,
3493 										  inFloatWidthForCaps,
3494 										  outTypeSnippets->bitWidth,
3495 										  behaviorCapability,
3496 										  behaviorExecutionMode);
3497 
3498 	string capabilities		= behaviorCapability + outTypeSnippets->capabilities;
3499 	string extensions		= outTypeSnippets->extensions;
3500 	string annotations		= inTypeSnippets->inputAnnotationsSnippet + outTypeSnippets->outputAnnotationsSnippet + outTypeSnippets->typeAnnotationsSnippet;
3501 	string types			= outTypeSnippets->typeDefinitionsSnippet;
3502 	string constants		= outTypeSnippets->constantsDefinitionsSnippet;
3503 	string ioDefinitions	= "";
3504 
3505 	// Getting rid of 16bit_storage dependency imply replacing lots of snippets.
3506 	{
3507 		if (inFp16WithoutStorage)
3508 		{
3509 			ioDefinitions	= inTypeSnippets->inputDefinitionsFp16Snippet;
3510 		}
3511 		else
3512 		{
3513 			ioDefinitions	= inTypeSnippets->inputDefinitionsSnippet;
3514 		}
3515 
3516 		if (outFp16WithoutStorage)
3517 		{
3518 			extensions		= outTypeSnippets->extensionsFp16Without16BitStorage;
3519 			capabilities	= behaviorCapability + outTypeSnippets->capabilitiesFp16Without16BitStorage;
3520 			types			+= outTypeSnippets->typeDefinitionsFp16Snippet;
3521 			annotations	+= outTypeSnippets->typeAnnotationsFp16Snippet;
3522 			ioDefinitions	+= outTypeSnippets->outputDefinitionsFp16Snippet;
3523 		}
3524 		else
3525 		{
3526 			ioDefinitions	+= outTypeSnippets->outputDefinitionsSnippet;
3527 		}
3528 	}
3529 
3530 	bool outFp16TypeUsage	= outTypeSnippets->loadStoreRequiresShaderFloat16;
3531 	bool inFp16TypeUsage	= false;
3532 
3533 	if (testOperation.isInputTypeRestricted)
3534 	{
3535 		annotations		+= inTypeSnippets->typeAnnotationsSnippet;
3536 		types			+= inTypeSnippets->typeDefinitionsSnippet;
3537 		constants		+= inTypeSnippets->constantsDefinitionsSnippet;
3538 
3539 		if (inFp16WithoutStorage)
3540 		{
3541 			annotations		+= inTypeSnippets->typeAnnotationsFp16Snippet;
3542 			types			+= inTypeSnippets->typeDefinitionsFp16Snippet;
3543 			capabilities	+= inTypeSnippets->capabilitiesFp16Without16BitStorage;
3544 			extensions		+= inTypeSnippets->extensionsFp16Without16BitStorage;
3545 		}
3546 		else
3547 		{
3548 			capabilities	+= inTypeSnippets->capabilities;
3549 			extensions		+= inTypeSnippets->extensions;
3550 		}
3551 
3552 		inFp16TypeUsage	= inTypeSnippets->loadStoreRequiresShaderFloat16;
3553 	}
3554 
3555 	map<string, string> specializations;
3556 	specializations["extensions"]		= extensions;
3557 	specializations["execution_mode"]	= behaviorExecutionMode;
3558 	specializations["annotations"]		= annotations + specOpData.annotations;
3559 	specializations["types"]			= types + specOpData.types;
3560 	specializations["io_definitions"]	= ioDefinitions;
3561 	specializations["variables"]		= specOpData.variables;
3562 	specializations["functions"]		= specOpData.functions;
3563 	specializations["save_result"]		= (outFp16WithoutStorage ? outTypeSnippets->storeResultsFp16Snippet : outTypeSnippets->storeResultsSnippet);
3564 	specializations["arguments"]		= specOpData.arguments;
3565 	specializations["commands"]			= specOpData.commands;
3566 
3567 	// Build constants. They are only needed sometimes.
3568 	const FloatStatementUsageFlags	argsAnyFloatConstMask				= B_STATEMENT_USAGE_ARGS_CONST_FLOAT | B_STATEMENT_USAGE_ARGS_CONST_FP16 | B_STATEMENT_USAGE_ARGS_CONST_FP32 | B_STATEMENT_USAGE_ARGS_CONST_FP64;
3569 	const bool						argsUseFPConstants					= (specOpData.argumentsUsesFloatConstant & argsAnyFloatConstMask) != 0;
3570 	const FloatStatementUsageFlags	commandsAnyFloatConstMask			= B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_CONST_FP16 | B_STATEMENT_USAGE_COMMANDS_CONST_FP32 | B_STATEMENT_USAGE_COMMANDS_CONST_FP64;
3571 	const bool						commandsUseFPConstants				= (testCaseInfo.operation.statementUsageFlags & commandsAnyFloatConstMask) != 0;
3572 	const bool						needConstants						= argsUseFPConstants || commandsUseFPConstants;
3573 	const FloatStatementUsageFlags	constsFloatTypeMask					= B_STATEMENT_USAGE_CONSTS_TYPE_FLOAT | B_STATEMENT_USAGE_CONSTS_TYPE_FP16;
3574 	const bool						constsUsesFP16Type					= (testCaseInfo.operation.statementUsageFlags & constsFloatTypeMask) != 0;
3575 	const bool						loadStoreRequiresShaderFloat16		= inFp16TypeUsage || outFp16TypeUsage;
3576 	const bool						usesFP16Constants					= constsUsesFP16Type || (needConstants && loadStoreRequiresShaderFloat16);
3577 
3578 	specializations["constants"]		= "";
3579 	if (needConstants || outFp16WithoutStorage)
3580 	{
3581 		specializations["constants"]	= constants;
3582 	}
3583 	specializations["constants"]		+= specOpData.constants;
3584 
3585 	// check which format features are needed
3586 	bool float16FeatureRequired = (outFloatType == FP16) || (inFloatType == FP16);
3587 	bool float64FeatureRequired = (outFloatType == FP64) || (inFloatType == FP64);
3588 
3589 	// Determine required capabilities.
3590 	bool float16CapabilityAlreadyAdded = inFp16WithoutStorage || outFp16WithoutStorage;
3591 	if ((testOperation.floatUsage == FLOAT_ARITHMETIC && float16FeatureRequired && !float16CapabilityAlreadyAdded) || usesFP16Constants)
3592 	{
3593 		capabilities += "OpCapability Float16\n";
3594 	}
3595 	specializations["capabilities"]		= capabilities;
3596 
3597 	// specialize shader
3598 	const string shaderCode = m_operationShaderTemplate.specialize(specializations);
3599 
3600 	// construct input and output buffers of proper types
3601 	TypeValuesSP inTypeValues	= m_typeData.at(inFloatType).values;
3602 	TypeValuesSP outTypeValues	= m_typeData.at(outFloatType).values;
3603 	BufferSp inBufferSp			= inTypeValues->constructInputBuffer(testCase.input);
3604 	BufferSp outBufferSp		= outTypeValues->constructOutputBuffer(testCase.expectedOutput);
3605 	csSpec.inputs.push_back(Resource(inBufferSp, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
3606 	csSpec.outputs.push_back(Resource(outBufferSp));
3607 
3608 	// check which format features are needed
3609 	setupVulkanFeatures(inFloatTypeForCaps,		// usualy same as inFloatType - different only for UnpackHalf2x16
3610 						outFloatType,
3611 						testCase.behaviorFlags,
3612 						float64FeatureRequired,
3613 						csSpec.requestedVulkanFeatures);
3614 
3615 	csSpec.assembly			= shaderCode;
3616 	csSpec.numWorkGroups	= IVec3(1, 1, 1);
3617 	csSpec.verifyIO			= checkFloatsLUT[outFloatType];
3618 
3619 	csSpec.extensions.push_back("VK_KHR_shader_float_controls");
3620 	bool needShaderFloat16 = float16CapabilityAlreadyAdded;
3621 
3622 	if (float16FeatureRequired && !testCase.fp16Without16BitStorage)
3623 	{
3624 		csSpec.extensions.push_back("VK_KHR_16bit_storage");
3625 		csSpec.requestedVulkanFeatures.ext16BitStorage.storageBuffer16BitAccess = true;
3626 		needShaderFloat16 |= testOperation.floatUsage == FLOAT_ARITHMETIC;
3627 	}
3628 	needShaderFloat16 |= usesFP16Constants;
3629 	if (needShaderFloat16)
3630 	{
3631 		csSpec.extensions.push_back("VK_KHR_shader_float16_int8");
3632 		csSpec.requestedVulkanFeatures.extFloat16Int8.shaderFloat16 = true;
3633 	}
3634 	if (float64FeatureRequired)
3635 		csSpec.requestedVulkanFeatures.coreFeatures.shaderFloat64 = VK_TRUE;
3636 }
3637 
fillShaderSpec(const SettingsTestCaseInfo & testCaseInfo,ComputeShaderSpec & csSpec) const3638 void ComputeTestGroupBuilder::fillShaderSpec(const SettingsTestCaseInfo&	testCaseInfo,
3639 											 ComputeShaderSpec&				csSpec) const
3640 {
3641 	string		capabilities;
3642 	string		fp16behaviorName;
3643 	string		fp32behaviorName;
3644 	string		fp64behaviorName;
3645 
3646 	ValueId		addArgs[2];
3647 	ValueId		fp16resultValue;
3648 	ValueId		fp32resultValue;
3649 	ValueId		fp64resultValue;
3650 
3651 	vk::VkPhysicalDeviceFloatControlsProperties& floatControls = csSpec.requestedVulkanFeatures.floatControlsProperties;
3652 	bool fp16Required	= testCaseInfo.fp16Option != SO_UNUSED;
3653 	bool fp32Required	= testCaseInfo.fp32Option != SO_UNUSED;
3654 	bool fp64Required	= testCaseInfo.fp64Option != SO_UNUSED;
3655 
3656 	if (testCaseInfo.testedMode == SM_ROUNDING)
3657 	{
3658 		// make sure that only rounding options are used
3659 		DE_ASSERT((testCaseInfo.fp16Option != SO_FLUSH) &&
3660 				  (testCaseInfo.fp16Option != SO_PRESERVE) &&
3661 				  (testCaseInfo.fp32Option != SO_FLUSH) &&
3662 				  (testCaseInfo.fp32Option != SO_PRESERVE) &&
3663 				  (testCaseInfo.fp64Option != SO_FLUSH) &&
3664 				  (testCaseInfo.fp64Option != SO_PRESERVE));
3665 
3666 		bool fp16RteRounding	= testCaseInfo.fp16Option == SO_RTE;
3667 		bool fp32RteRounding	= testCaseInfo.fp32Option == SO_RTE;
3668 		bool fp64RteRounding	= testCaseInfo.fp64Option == SO_RTE;
3669 
3670 		const string& rte		= m_behaviorToName.at(B_RTE_ROUNDING);
3671 		const string& rtz		= m_behaviorToName.at(B_RTZ_ROUNDING);
3672 
3673 		fp16behaviorName		= fp16RteRounding ? rte : rtz;
3674 		fp32behaviorName		= fp32RteRounding ? rte : rtz;
3675 		fp64behaviorName		= fp64RteRounding ? rte : rtz;
3676 
3677 		addArgs[0]				= V_ADD_ARG_A;
3678 		addArgs[1]				= V_ADD_ARG_B;
3679 		fp16resultValue			= fp16RteRounding ? V_ADD_RTE_RESULT : V_ADD_RTZ_RESULT;
3680 		fp32resultValue			= fp32RteRounding ? V_ADD_RTE_RESULT : V_ADD_RTZ_RESULT;
3681 		fp64resultValue			= fp64RteRounding ? V_ADD_RTE_RESULT : V_ADD_RTZ_RESULT;
3682 
3683 		capabilities			= "OpCapability " + rte + "\n"
3684 								  "OpCapability " + rtz + "\n";
3685 
3686 		floatControls.roundingModeIndependence		= testCaseInfo.independenceSetting;
3687 		floatControls.denormBehaviorIndependence	= VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_NONE;
3688 		floatControls.shaderRoundingModeRTEFloat16	= fp16RteRounding;
3689 		floatControls.shaderRoundingModeRTZFloat16	= fp16Required && !fp16RteRounding;
3690 		floatControls.shaderRoundingModeRTEFloat32	= fp32RteRounding;
3691 		floatControls.shaderRoundingModeRTZFloat32	= fp32Required && !fp32RteRounding;
3692 		floatControls.shaderRoundingModeRTEFloat64	= fp64RteRounding;
3693 		floatControls.shaderRoundingModeRTZFloat64	= fp64Required && !fp64RteRounding;
3694 	}
3695 	else // SM_DENORMS
3696 	{
3697 		// make sure that only denorm options are used
3698 		DE_ASSERT((testCaseInfo.fp16Option != SO_RTE) &&
3699 				  (testCaseInfo.fp16Option != SO_RTZ) &&
3700 				  (testCaseInfo.fp32Option != SO_RTE) &&
3701 				  (testCaseInfo.fp32Option != SO_RTZ) &&
3702 				  (testCaseInfo.fp64Option != SO_RTE) &&
3703 				  (testCaseInfo.fp64Option != SO_RTZ));
3704 
3705 		bool fp16DenormPreserve		= testCaseInfo.fp16Option == SO_PRESERVE;
3706 		bool fp32DenormPreserve		= testCaseInfo.fp32Option == SO_PRESERVE;
3707 		bool fp64DenormPreserve		= testCaseInfo.fp64Option == SO_PRESERVE;
3708 
3709 		const string& preserve		= m_behaviorToName.at(B_DENORM_PRESERVE);
3710 		const string& flush			= m_behaviorToName.at(B_DENORM_FLUSH);
3711 
3712 		fp16behaviorName			= fp16DenormPreserve ? preserve : flush;
3713 		fp32behaviorName			= fp32DenormPreserve ? preserve : flush;
3714 		fp64behaviorName			= fp64DenormPreserve ? preserve : flush;
3715 
3716 		addArgs[0]					= V_DENORM;
3717 		addArgs[1]					= V_DENORM;
3718 		fp16resultValue				= fp16DenormPreserve ? V_DENORM_TIMES_TWO : V_ZERO_OR_DENORM_TIMES_TWO;
3719 		fp32resultValue				= fp32DenormPreserve ? V_DENORM_TIMES_TWO : V_ZERO;
3720 		fp64resultValue				= fp64DenormPreserve ? V_DENORM_TIMES_TWO : V_ZERO;
3721 
3722 		capabilities				= "OpCapability " + preserve + "\n"
3723 									  "OpCapability " + flush + "\n";
3724 
3725 		floatControls.denormBehaviorIndependence		= testCaseInfo.independenceSetting;
3726 		floatControls.roundingModeIndependence			= VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_NONE;
3727 		floatControls.shaderDenormPreserveFloat16		= fp16DenormPreserve;
3728 		floatControls.shaderDenormFlushToZeroFloat16	= fp16Required && !fp16DenormPreserve;
3729 		floatControls.shaderDenormPreserveFloat32		= fp32DenormPreserve;
3730 		floatControls.shaderDenormFlushToZeroFloat32	= fp32Required && !fp32DenormPreserve;
3731 		floatControls.shaderDenormPreserveFloat64		= fp64DenormPreserve;
3732 		floatControls.shaderDenormFlushToZeroFloat64	= fp64Required && !fp64DenormPreserve;
3733 	}
3734 
3735 	const auto&	fp64Data			= m_typeData.at(FP64);
3736 	const auto&	fp32Data			= m_typeData.at(FP32);
3737 	const auto&	fp16Data			= m_typeData.at(FP16);
3738 
3739 	deUint32	attributeIndex		= 0;
3740 	deUint32	attributeOffset		= 0;
3741 	string		attribute;
3742 	string		extensions			= "";
3743 	string		executionModes		= "";
3744 	string		ioAnnotations		= "";
3745 	string		types				= "";
3746 	string		inStruct			= "";
3747 	string		outDefinitions		= "";
3748 	string		commands			= "";
3749 	string		saveResult			= "";
3750 
3751 	// construct single input buffer containing arguments for all float widths
3752 	// (maxPerStageDescriptorStorageBuffers can be min 4 and we need 3 for outputs)
3753 	deUint32				inputOffset	= 0;
3754 	std::vector<deUint8>	inputData	((fp64Required * sizeof(double) + sizeof(float) + fp16Required * sizeof(deFloat16)) * 2);
3755 
3756 	// to follow storage buffer layout rules we store data in ssbo in order 64 -> 16
3757 	if (fp64Required)
3758 	{
3759 		capabilities	+= fp64Data.snippets->capabilities;
3760 		executionModes	+= "OpExecutionMode %main " + fp64behaviorName + " 64\n";
3761 		attribute		 = to_string(attributeIndex);
3762 		ioAnnotations	+= "OpMemberDecorate %SSBO_in " + attribute + " Offset " + to_string(attributeOffset) +"\n" +
3763 						   fp64Data.snippets->multiOutputAnnotationsSnippet +
3764 						   "OpDecorate %ssbo_f64_out Binding " + to_string(attributeIndex+1) + "\n";
3765 		types			+= fp64Data.snippets->minTypeDefinitionsSnippet;
3766 		inStruct		+= " %type_f64_arr_2";
3767 		outDefinitions	+= fp64Data.snippets->multiOutputDefinitionsSnippet;
3768 		commands		+= replace(fp64Data.snippets->multiArgumentsFromInputSnippet, "${attr}", attribute) +
3769 						   "%result64             = OpFAdd %type_f64 %arg1_f64 %arg2_f64\n";
3770 		saveResult		+= fp64Data.snippets->multiStoreResultsSnippet;
3771 		attributeOffset += 2 * static_cast<deUint32>(sizeof(double));
3772 		attributeIndex++;
3773 
3774 		fp64Data.values->fillInputData(addArgs, inputData, inputOffset);
3775 
3776 		// construct separate buffers for outputs to make validation easier
3777 		BufferSp fp64OutBufferSp = fp64Data.values->constructOutputBuffer(fp64resultValue);
3778 		csSpec.outputs.push_back(Resource(fp64OutBufferSp, vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, reinterpret_cast<void*>(BufferDataType::DATA_FP64)));
3779 
3780 		csSpec.requestedVulkanFeatures.coreFeatures.shaderFloat64 = VK_TRUE;
3781 	}
3782 	if (fp32Required)
3783 	{
3784 		executionModes		+= "OpExecutionMode %main " + fp32behaviorName + " 32\n";
3785 		attribute			 = to_string(attributeIndex);
3786 		ioAnnotations		+= "OpMemberDecorate %SSBO_in " + attribute + " Offset " + to_string(attributeOffset) +"\n" +
3787 							   fp32Data.snippets->multiOutputAnnotationsSnippet +
3788 							   "OpDecorate %ssbo_f32_out Binding " + to_string(attributeIndex+1) + "\n";
3789 		types				+= fp32Data.snippets->minTypeDefinitionsSnippet;
3790 		inStruct			+= " %type_f32_arr_2";
3791 		outDefinitions		+= fp32Data.snippets->multiOutputDefinitionsSnippet;
3792 		commands			+= replace(fp32Data.snippets->multiArgumentsFromInputSnippet, "${attr}", attribute) +
3793 							   "%result32             = OpFAdd %type_f32 %arg1_f32 %arg2_f32\n";
3794 		saveResult			+= fp32Data.snippets->multiStoreResultsSnippet;
3795 		attributeOffset		+= 2 * static_cast<deUint32>(sizeof(float));
3796 		attributeIndex++;
3797 
3798 		fp32Data.values->fillInputData(addArgs, inputData, inputOffset);
3799 
3800 		BufferSp fp32OutBufferSp = fp32Data.values->constructOutputBuffer(fp32resultValue);
3801 		csSpec.outputs.push_back(Resource(fp32OutBufferSp, vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, reinterpret_cast<void*>(BufferDataType::DATA_FP32)));
3802 	}
3803 	if (fp16Required)
3804 	{
3805 		if (testCaseInfo.fp16Without16BitStorage)
3806 		{
3807 			capabilities	+= fp16Data.snippets->capabilitiesFp16Without16BitStorage;
3808 			extensions		+= fp16Data.snippets->extensionsFp16Without16BitStorage;
3809 			executionModes	+= "OpExecutionMode %main " + fp16behaviorName + " 16\n";
3810 			attribute		 = to_string(attributeIndex);
3811 			ioAnnotations	+= "OpMemberDecorate %SSBO_in " + attribute + " Offset " + to_string(attributeOffset) +"\n" +
3812 							   fp16Data.snippets->multiOutputAnnotationsFp16Snippet +
3813 							   "OpDecorate %ssbo_u32_out Binding " + to_string(attributeIndex+1) + "\n";
3814 			types			+= fp16Data.snippets->minTypeDefinitionsSnippet + fp16Data.snippets->typeDefinitionsFp16Snippet + "%type_f16_vec2        = OpTypeVector %type_f16 2\n";
3815 			inStruct		+= " %type_u32_arr_1";
3816 			outDefinitions	+= fp16Data.snippets->multiOutputDefinitionsFp16Snippet;
3817 			commands		+= replace(fp16Data.snippets->multiArgumentsFromInputFp16Snippet, "${attr}", attribute) +
3818 							   "%result16             = OpFAdd %type_f16 %arg1_f16 %arg2_f16\n";
3819 			saveResult		+= fp16Data.snippets->multiStoreResultsFp16Snippet;
3820 
3821 			csSpec.extensions.push_back("VK_KHR_shader_float16_int8");
3822 			csSpec.requestedVulkanFeatures.extFloat16Int8.shaderFloat16 = true;
3823 		}
3824 		else
3825 		{
3826 			capabilities	+= fp16Data.snippets->capabilities +
3827 							   "OpCapability Float16\n";
3828 			extensions		+= fp16Data.snippets->extensions;
3829 			executionModes	+= "OpExecutionMode %main " + fp16behaviorName + " 16\n";
3830 			attribute		= to_string(attributeIndex);
3831 			ioAnnotations	+= "OpMemberDecorate %SSBO_in " + attribute + " Offset " + to_string(attributeOffset) +"\n" +
3832 							   fp16Data.snippets->multiOutputAnnotationsSnippet +
3833 							   "OpDecorate %ssbo_f16_out Binding " + to_string(attributeIndex+1) + "\n";
3834 			types			+= fp16Data.snippets->minTypeDefinitionsSnippet;
3835 			inStruct		+= " %type_f16_arr_2";
3836 			outDefinitions	+= fp16Data.snippets->multiOutputDefinitionsSnippet;
3837 			commands		+= replace(fp16Data.snippets->multiArgumentsFromInputSnippet, "${attr}", attribute) +
3838 							   "%result16             = OpFAdd %type_f16 %arg1_f16 %arg2_f16\n";
3839 			saveResult		+= fp16Data.snippets->multiStoreResultsSnippet;
3840 
3841 			csSpec.extensions.push_back("VK_KHR_16bit_storage");
3842 			csSpec.requestedVulkanFeatures.ext16BitStorage.storageBuffer16BitAccess = true;
3843 		}
3844 
3845 		fp16Data.values->fillInputData(addArgs, inputData, inputOffset);
3846 
3847 		BufferSp fp16OutBufferSp = fp16Data.values->constructOutputBuffer(fp16resultValue);
3848 		csSpec.outputs.push_back(Resource(fp16OutBufferSp, vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, reinterpret_cast<void*>(BufferDataType::DATA_FP16)));
3849 	}
3850 
3851 	BufferSp inBufferSp(new Buffer<deUint8>(inputData));
3852 	csSpec.inputs.push_back(Resource(inBufferSp, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
3853 
3854 	map<string, string> specializations =
3855 	{
3856 		{ "capabilities",		capabilities },
3857 		{ "extensions",			extensions },
3858 		{ "execution_modes",	executionModes },
3859 		{ "io_annotations",		ioAnnotations },
3860 		{ "types",				types },
3861 		{ "in_struct",			inStruct },
3862 		{ "out_definitions",	outDefinitions },
3863 		{ "commands",			commands },
3864 		{ "save_result",		saveResult }
3865 	};
3866 
3867 	// specialize shader
3868 	const string shaderCode = m_settingsShaderTemplate.specialize(specializations);
3869 
3870 	csSpec.assembly			= shaderCode;
3871 	csSpec.numWorkGroups	= IVec3(1, 1, 1);
3872 	csSpec.verifyIO			= checkMixedFloats;
3873 	csSpec.extensions.push_back("VK_KHR_shader_float_controls");
3874 }
3875 
getGraphicsShaderCode(vk::SourceCollections & dst,InstanceContext context)3876 void getGraphicsShaderCode (vk::SourceCollections& dst, InstanceContext context)
3877 {
3878 	// this function is used only by GraphicsTestGroupBuilder but it couldn't
3879 	// be implemented as a method because of how addFunctionCaseWithPrograms
3880 	// was implemented
3881 
3882 	SpirvVersion	targetSpirvVersion	= context.resources.spirvVersion;
3883 	const deUint32	vulkanVersion		= dst.usedVulkanVersion;
3884 
3885 	static const string vertexTemplate =
3886 		"OpCapability Shader\n"
3887 		"${vert_capabilities}"
3888 
3889 		"OpExtension \"SPV_KHR_float_controls\"\n"
3890 		"${vert_extensions}"
3891 
3892 		"%std450            = OpExtInstImport \"GLSL.std.450\"\n"
3893 		"OpMemoryModel Logical GLSL450\n"
3894 		"OpEntryPoint Vertex %main \"main\" %BP_stream %BP_position %BP_color %BP_gl_VertexIndex %BP_gl_InstanceIndex %BP_vertex_color %BP_vertex_result \n"
3895 		"${vert_execution_mode}"
3896 
3897 		"OpMemberDecorate %BP_gl_PerVertex 0 BuiltIn Position\n"
3898 		"OpMemberDecorate %BP_gl_PerVertex 1 BuiltIn PointSize\n"
3899 		"OpMemberDecorate %BP_gl_PerVertex 2 BuiltIn ClipDistance\n"
3900 		"OpMemberDecorate %BP_gl_PerVertex 3 BuiltIn CullDistance\n"
3901 		"OpDecorate %BP_gl_PerVertex Block\n"
3902 		"OpDecorate %BP_position Location 0\n"
3903 		"OpDecorate %BP_color Location 1\n"
3904 		"OpDecorate %BP_vertex_color Location 1\n"
3905 		"OpDecorate %BP_vertex_result Location 2\n"
3906 		"OpDecorate %BP_vertex_result Flat\n"
3907 		"OpDecorate %BP_gl_VertexIndex BuiltIn VertexIndex\n"
3908 		"OpDecorate %BP_gl_InstanceIndex BuiltIn InstanceIndex\n"
3909 
3910 		// some tests require additional annotations
3911 		"${vert_annotations}"
3912 
3913 		// types required by most of tests
3914 		"%type_void            = OpTypeVoid\n"
3915 		"%type_voidf           = OpTypeFunction %type_void\n"
3916 		"%type_bool            = OpTypeBool\n"
3917 		"%type_i32             = OpTypeInt 32 1\n"
3918 		"%type_u32             = OpTypeInt 32 0\n"
3919 		"%type_u32_vec2        = OpTypeVector %type_u32 2\n"
3920 		"%type_i32_iptr        = OpTypePointer Input %type_i32\n"
3921 		"%type_i32_optr        = OpTypePointer Output %type_i32\n"
3922 		"%type_i32_fptr        = OpTypePointer Function %type_i32\n"
3923 
3924 		// constants required by most of tests
3925 		"%c_i32_0              = OpConstant %type_i32 0\n"
3926 		"%c_i32_1              = OpConstant %type_i32 1\n"
3927 		"%c_i32_2              = OpConstant %type_i32 2\n"
3928 		"%c_u32_1              = OpConstant %type_u32 1\n"
3929 
3930 		// if input float type has different width then output then
3931 		// both types are defined here along with all types derived from
3932 		// them that are commonly used by tests; some tests also define
3933 		// their own types (those that are needed just by this single test)
3934 		"${vert_types}"
3935 
3936 		// SSBO is not universally supported for storing
3937 		// data in vertex stages - it is onle read here
3938 		"${vert_io_definitions}"
3939 
3940 		"%BP_gl_PerVertex      = OpTypeStruct %type_f32_vec4 %type_f32 %type_f32_arr_1 %type_f32_arr_1\n"
3941 		"%BP_gl_PerVertex_optr = OpTypePointer Output %BP_gl_PerVertex\n"
3942 		"%BP_stream            = OpVariable %BP_gl_PerVertex_optr Output\n"
3943 		"%BP_position          = OpVariable %type_f32_vec4_iptr Input\n"
3944 		"%BP_color             = OpVariable %type_f32_vec4_iptr Input\n"
3945 		"%BP_gl_VertexIndex    = OpVariable %type_i32_iptr Input\n"
3946 		"%BP_gl_InstanceIndex  = OpVariable %type_i32_iptr Input\n"
3947 		"%BP_vertex_color      = OpVariable %type_f32_vec4_optr Output\n"
3948 
3949 		// set of default constants per float type is placed here,
3950 		// operation tests can also define additional constants.
3951 		"${vert_constants}"
3952 
3953 		// O_RETURN_VAL defines function here and because
3954 		// of that this token needs to be directly before main function.
3955 		"${vert_functions}"
3956 
3957 		"%main                 = OpFunction %type_void None %type_voidf\n"
3958 		"%label                = OpLabel\n"
3959 
3960 		"${vert_variables}"
3961 
3962 		"%position             = OpLoad %type_f32_vec4 %BP_position\n"
3963 		"%gl_pos               = OpAccessChain %type_f32_vec4_optr %BP_stream %c_i32_0\n"
3964 		"OpStore %gl_pos %position\n"
3965 		"%color                = OpLoad %type_f32_vec4 %BP_color\n"
3966 		"OpStore %BP_vertex_color %color\n"
3967 
3968 		// this token is filled only when vertex stage is tested;
3969 		// depending on test case arguments are either read from input ssbo
3970 		// or generated in spir-v code - in later case ssbo is not used
3971 		"${vert_arguments}"
3972 
3973 		// when vertex shader is tested then test operations are performed
3974 		// here and passed to fragment stage; if fragment stage ts tested
3975 		// then ${comands} and ${vert_process_result} are rplaced with nop
3976 		"${vert_commands}"
3977 
3978 		"${vert_process_result}"
3979 
3980 		"OpReturn\n"
3981 		"OpFunctionEnd\n";
3982 
3983 
3984 	static const string fragmentTemplate =
3985 		"OpCapability Shader\n"
3986 		"${frag_capabilities}"
3987 
3988 		"OpExtension \"SPV_KHR_float_controls\"\n"
3989 		"${frag_extensions}"
3990 
3991 		"%std450            = OpExtInstImport \"GLSL.std.450\"\n"
3992 		"OpMemoryModel Logical GLSL450\n"
3993 		"OpEntryPoint Fragment %main \"main\" %BP_vertex_color %BP_vertex_result %BP_fragColor %BP_gl_FragCoord \n"
3994 		"OpExecutionMode %main OriginUpperLeft\n"
3995 		"${frag_execution_mode}"
3996 
3997 		"OpDecorate %BP_fragColor Location 0\n"
3998 		"OpDecorate %BP_vertex_color Location 1\n"
3999 		"OpDecorate %BP_vertex_result Location 2\n"
4000 		"OpDecorate %BP_vertex_result Flat\n"
4001 		"OpDecorate %BP_gl_FragCoord BuiltIn FragCoord\n"
4002 
4003 		// some tests require additional annotations
4004 		"${frag_annotations}"
4005 
4006 		// types required by most of tests
4007 		"%type_void            = OpTypeVoid\n"
4008 		"%type_voidf           = OpTypeFunction %type_void\n"
4009 		"%type_bool            = OpTypeBool\n"
4010 		"%type_i32             = OpTypeInt 32 1\n"
4011 		"%type_u32             = OpTypeInt 32 0\n"
4012 		"%type_u32_vec2        = OpTypeVector %type_u32 2\n"
4013 		"%type_i32_iptr        = OpTypePointer Input %type_i32\n"
4014 		"%type_i32_optr        = OpTypePointer Output %type_i32\n"
4015 		"%type_i32_fptr        = OpTypePointer Function %type_i32\n"
4016 
4017 		// constants required by most of tests
4018 		"%c_i32_0              = OpConstant %type_i32 0\n"
4019 		"%c_i32_1              = OpConstant %type_i32 1\n"
4020 		"%c_i32_2              = OpConstant %type_i32 2\n"
4021 		"%c_u32_1              = OpConstant %type_u32 1\n"
4022 
4023 		// if input float type has different width then output then
4024 		// both types are defined here along with all types derived from
4025 		// them that are commonly used by tests; some tests also define
4026 		// their own types (those that are needed just by this single test)
4027 		"${frag_types}"
4028 
4029 		"%BP_gl_FragCoord      = OpVariable %type_f32_vec4_iptr Input\n"
4030 		"%BP_vertex_color      = OpVariable %type_f32_vec4_iptr Input\n"
4031 		"%BP_fragColor         = OpVariable %type_f32_vec4_optr Output\n"
4032 
4033 		// SSBO definitions
4034 		"${frag_io_definitions}"
4035 
4036 		// set of default constants per float type is placed here,
4037 		// operation tests can also define additional constants.
4038 		"${frag_constants}"
4039 
4040 		// O_RETURN_VAL defines function here and because
4041 		// of that this token needs to be directly before main function.
4042 		"${frag_functions}"
4043 
4044 		"%main                 = OpFunction %type_void None %type_voidf\n"
4045 		"%label                = OpLabel\n"
4046 
4047 		"${frag_variables}"
4048 
4049 		// just pass vertex color - rendered image is not important in our case
4050 		"%vertex_color         = OpLoad %type_f32_vec4 %BP_vertex_color\n"
4051 		"OpStore %BP_fragColor %vertex_color\n"
4052 
4053 		// this token is filled only when fragment stage is tested;
4054 		// depending on test case arguments are either read from input ssbo or
4055 		// generated in spir-v code - in later case ssbo is used only for output
4056 		"${frag_arguments}"
4057 
4058 		// when fragment shader is tested then test operations are performed
4059 		// here and saved to ssbo; if vertex stage was tested then its
4060 		// result is just saved to ssbo here
4061 		"${frag_commands}"
4062 		"${frag_process_result}"
4063 
4064 		"OpReturn\n"
4065 		"OpFunctionEnd\n";
4066 
4067 	dst.spirvAsmSources.add("vert", DE_NULL)
4068 		<< StringTemplate(vertexTemplate).specialize(context.testCodeFragments)
4069 		<< SpirVAsmBuildOptions(vulkanVersion, targetSpirvVersion);
4070 	dst.spirvAsmSources.add("frag", DE_NULL)
4071 		<< StringTemplate(fragmentTemplate).specialize(context.testCodeFragments)
4072 		<< SpirVAsmBuildOptions(vulkanVersion, targetSpirvVersion);
4073 }
4074 
4075 // GraphicsTestGroupBuilder iterates over all test cases and creates test for both
4076 // vertex and fragment stages. As in most spirv-assembly tests, tests here are also
4077 // executed using functionality defined in vktSpvAsmGraphicsShaderTestUtil.cpp but
4078 // because one of requirements during development was that SSBO wont be used in
4079 // vertex stage we couldn't use createTestForStage functions - we need a custom
4080 // version for both vertex and fragmen shaders at the same time. This was required
4081 // as we needed to pass result from vertex stage to fragment stage where it could
4082 // be saved to ssbo. To achieve that InstanceContext is created manually in
4083 // createInstanceContext method.
4084 class GraphicsTestGroupBuilder: public TestGroupBuilderBase
4085 {
4086 public:
4087 
4088 	void init();
4089 
4090 	void createOperationTests(TestCaseGroup* parentGroup, const char* groupName, FloatType floatType, bool argumentsFromInput) override;
4091 	void createSettingsTests(TestCaseGroup* parentGroup) override;
4092 
4093 protected:
4094 
4095 	InstanceContext createInstanceContext(const OperationTestCaseInfo& testCaseInfo) const;
4096 
4097 private:
4098 
4099 	TestCasesBuilder	m_testCaseBuilder;
4100 };
4101 
init()4102 void GraphicsTestGroupBuilder::init()
4103 {
4104 	m_testCaseBuilder.init();
4105 }
4106 
createOperationTests(TestCaseGroup * parentGroup,const char * groupName,FloatType floatType,bool argumentsFromInput)4107 void GraphicsTestGroupBuilder::createOperationTests(TestCaseGroup* parentGroup, const char* groupName, FloatType floatType, bool argumentsFromInput)
4108 {
4109 	TestContext&	testCtx	= parentGroup->getTestContext();
4110 	TestCaseGroup*	group	= new TestCaseGroup(testCtx, groupName);
4111 	parentGroup->addChild(group);
4112 
4113 	// create test cases for vertex stage
4114 	TestCaseVect testCases;
4115 	m_testCaseBuilder.build(testCases, m_typeData[floatType].testResults, argumentsFromInput);
4116 
4117 	TestCaseVect::const_iterator currTestCase = testCases.begin();
4118 	TestCaseVect::const_iterator lastTestCase = testCases.end();
4119 	while(currTestCase != lastTestCase)
4120 	{
4121 		const OperationTestCase& testCase = *currTestCase;
4122 		++currTestCase;
4123 
4124 		// skip cases with undefined output
4125 		if (testCase.expectedOutput == V_UNUSED)
4126 			continue;
4127 
4128 		// FPRoundingMode decoration can be applied only to conversion instruction that is used as the object
4129 		// argument of an OpStore storing through a pointer to a 16-bit floating-point object in Uniform, or
4130 		// PushConstant, or Input, or Output Storage Classes. SSBO writes are not commonly supported
4131 		// in VS so this test case needs to be skiped for vertex stage.
4132 		if ((testCase.operationId == OID_ORTZ_ROUND) || (testCase.operationId == OID_ORTE_ROUND))
4133 			continue;
4134 
4135 		OperationTestCaseInfo testCaseInfo =
4136 		{
4137 			floatType,
4138 			argumentsFromInput,
4139 			VK_SHADER_STAGE_VERTEX_BIT,
4140 			m_testCaseBuilder.getOperation(testCase.operationId),
4141 			testCase
4142 		};
4143 
4144 		InstanceContext ctxVertex	= createInstanceContext(testCaseInfo);
4145 		string			testName	= replace(testCase.baseName, "op", testCaseInfo.operation.name);
4146 
4147 		addFunctionCaseWithPrograms<InstanceContext>(group, testName + "_vert", getGraphicsShaderCode, runAndVerifyDefaultPipeline, ctxVertex);
4148 	}
4149 
4150 	// create test cases for fragment stage
4151 	testCases.clear();
4152 	m_testCaseBuilder.build(testCases, m_typeData[floatType].testResults, argumentsFromInput);
4153 
4154 	currTestCase = testCases.begin();
4155 	lastTestCase = testCases.end();
4156 	while(currTestCase != lastTestCase)
4157 	{
4158 		const OperationTestCase& testCase = *currTestCase;
4159 		++currTestCase;
4160 
4161 		// skip cases with undefined output
4162 		if (testCase.expectedOutput == V_UNUSED)
4163 			continue;
4164 
4165 		OperationTestCaseInfo testCaseInfo =
4166 		{
4167 			floatType,
4168 			argumentsFromInput,
4169 			VK_SHADER_STAGE_FRAGMENT_BIT,
4170 			m_testCaseBuilder.getOperation(testCase.operationId),
4171 			testCase
4172 		};
4173 
4174 		InstanceContext ctxFragment	= createInstanceContext(testCaseInfo);
4175 		string			testName	= replace(testCase.baseName, "op", testCaseInfo.operation.name);
4176 
4177 		addFunctionCaseWithPrograms<InstanceContext>(group, testName + "_frag", getGraphicsShaderCode, runAndVerifyDefaultPipeline, ctxFragment);
4178 	}
4179 }
4180 
createSettingsTests(TestCaseGroup * parentGroup)4181 void GraphicsTestGroupBuilder::createSettingsTests(TestCaseGroup* parentGroup)
4182 {
4183 	DE_UNREF(parentGroup);
4184 
4185 	// WG decided that testing settings only for compute stage is sufficient
4186 }
4187 
createInstanceContext(const OperationTestCaseInfo & testCaseInfo) const4188 InstanceContext GraphicsTestGroupBuilder::createInstanceContext(const OperationTestCaseInfo& testCaseInfo) const
4189 {
4190 	// LUT storing functions used to verify test results
4191 	const VerifyIOFunc checkFloatsLUT[] =
4192 	{
4193 		checkFloats<Float16, deFloat16>,
4194 		checkFloats<Float32, float>,
4195 		checkFloats<Float64, double>
4196 	};
4197 
4198 	// 32-bit float types are always needed for standard operations on color
4199 	// if tested operation does not require fp32 for either input or output
4200 	// then this minimal type definitions must be appended to types section
4201 	const string f32TypeMinimalRequired =
4202 		"%type_f32             = OpTypeFloat 32\n"
4203 		"%type_f32_arr_1       = OpTypeArray %type_f32 %c_i32_1\n"
4204 		"%type_f32_iptr        = OpTypePointer Input %type_f32\n"
4205 		"%type_f32_optr        = OpTypePointer Output %type_f32\n"
4206 		"%type_f32_vec4        = OpTypeVector %type_f32 4\n"
4207 		"%type_f32_vec4_iptr   = OpTypePointer Input %type_f32_vec4\n"
4208 		"%type_f32_vec4_optr   = OpTypePointer Output %type_f32_vec4\n";
4209 
4210 	const Operation&			testOperation	= testCaseInfo.operation;
4211 	const OperationTestCase&	testCase		= testCaseInfo.testCase;
4212 	FloatType					outFloatType	= testCaseInfo.outFloatType;
4213 	VkShaderStageFlagBits		testedStage		= testCaseInfo.testedStage;
4214 
4215 	DE_ASSERT((testedStage == VK_SHADER_STAGE_VERTEX_BIT) || (testedStage == VK_SHADER_STAGE_FRAGMENT_BIT));
4216 
4217 	SpecializedOperation specOpData;
4218 	specializeOperation(testCaseInfo, specOpData);
4219 
4220 	TypeSnippetsSP	inTypeSnippets		= specOpData.inTypeSnippets;
4221 	TypeSnippetsSP	outTypeSnippets		= specOpData.outTypeSnippets;
4222 	FloatType		inFloatType			= specOpData.inFloatType;
4223 
4224 	bool			outFp16WithoutStorage	= (outFloatType == FP16) && testCase.fp16Without16BitStorage;
4225 	bool			inFp16WithoutStorage	= (inFloatType == FP16) && testCase.fp16Without16BitStorage;
4226 
4227 	// There may be several reasons why we need the shaderFloat16 Vulkan feature.
4228 	bool needsShaderFloat16 = inFp16WithoutStorage || outFp16WithoutStorage;
4229 	// There are some weird cases where we need the constants, but would otherwise drop them.
4230 	bool needsSpecialConstants = false;
4231 
4232 	// UnpackHalf2x16 is a corner case - it returns two 32-bit floats but
4233 	// internaly operates on fp16 and this type should be used by float controls
4234 	FloatType		inFloatTypeForCaps		= inFloatType;
4235 	string			inFloatWidthForCaps		= inTypeSnippets->bitWidth;
4236 	if (testCase.operationId == OID_UPH_DENORM)
4237 	{
4238 		inFloatTypeForCaps	= FP16;
4239 		inFloatWidthForCaps	= "16";
4240 	}
4241 
4242 	string behaviorCapability;
4243 	string behaviorExecutionMode;
4244 	getBehaviorCapabilityAndExecutionMode(testCase.behaviorFlags,
4245 										  inFloatWidthForCaps,
4246 										  outTypeSnippets->bitWidth,
4247 										  behaviorCapability,
4248 										  behaviorExecutionMode);
4249 
4250 	// check which format features are needed
4251 	bool float16FeatureRequired = (inFloatType == FP16) || (outFloatType == FP16);
4252 	bool float64FeatureRequired = (inFloatType == FP64) || (outFloatType == FP64);
4253 
4254 	string vertExecutionMode;
4255 	string fragExecutionMode;
4256 	string vertCapabilities;
4257 	string fragCapabilities;
4258 	string vertExtensions;
4259 	string fragExtensions;
4260 	string vertAnnotations;
4261 	string fragAnnotations;
4262 	string vertTypes;
4263 	string fragTypes;
4264 	string vertConstants;
4265 	string fragConstants;
4266 	string vertFunctions;
4267 	string fragFunctions;
4268 	string vertIODefinitions;
4269 	string fragIODefinitions;
4270 	string vertArguments;
4271 	string fragArguments;
4272 	string vertVariables;
4273 	string fragVariables;
4274 	string vertCommands;
4275 	string fragCommands;
4276 	string vertProcessResult;
4277 	string fragProcessResult;
4278 
4279 	// check if operation should be executed in vertex stage
4280 	if (testedStage == VK_SHADER_STAGE_VERTEX_BIT)
4281 	{
4282 		vertAnnotations = inTypeSnippets->inputAnnotationsSnippet + inTypeSnippets->typeAnnotationsSnippet;
4283 		fragAnnotations = outTypeSnippets->outputAnnotationsSnippet + outTypeSnippets->typeAnnotationsSnippet;
4284 		vertFunctions = specOpData.functions;
4285 
4286 		// check if input type is different from tested type (conversion operations)
4287 		if (testOperation.isInputTypeRestricted)
4288 		{
4289 			vertCapabilities	= behaviorCapability + inTypeSnippets->capabilities + outTypeSnippets->capabilities;
4290 			fragCapabilities	= outTypeSnippets->capabilities;
4291 			vertExtensions		= inTypeSnippets->extensions + outTypeSnippets->extensions;
4292 			fragExtensions		= outTypeSnippets->extensions;
4293 			vertTypes			= inTypeSnippets->typeDefinitionsSnippet + outTypeSnippets->typeDefinitionsSnippet + outTypeSnippets->varyingsTypesSnippet;
4294 			if (inFp16WithoutStorage)
4295 				vertTypes			+= inTypeSnippets->typeDefinitionsFp16Snippet;
4296 
4297 			fragTypes			= outTypeSnippets->typeDefinitionsSnippet + outTypeSnippets->varyingsTypesSnippet;
4298 			vertConstants		= inTypeSnippets->constantsDefinitionsSnippet + outTypeSnippets->constantsDefinitionsSnippet;
4299 			fragConstants		= outTypeSnippets->constantsDefinitionsSnippet;
4300 		}
4301 		else
4302 		{
4303 			// input and output types are the same (majority of operations)
4304 
4305 			vertCapabilities	= behaviorCapability + outTypeSnippets->capabilities;
4306 			fragCapabilities	= vertCapabilities;
4307 			vertExtensions		= outTypeSnippets->extensions;
4308 			fragExtensions		= vertExtensions;
4309 			vertTypes			= outTypeSnippets->typeDefinitionsSnippet + outTypeSnippets->varyingsTypesSnippet;
4310 			fragTypes			= vertTypes;
4311 			vertConstants		= outTypeSnippets->constantsDefinitionsSnippet;
4312 			fragConstants		= outTypeSnippets->constantsDefinitionsSnippet;
4313 		}
4314 
4315 		if (outFloatType != FP32)
4316 		{
4317 			fragTypes += f32TypeMinimalRequired;
4318 			if (inFloatType != FP32)
4319 				vertTypes += f32TypeMinimalRequired;
4320 		}
4321 
4322 		vertAnnotations	+= specOpData.annotations;
4323 		vertTypes		+= specOpData.types;
4324 		vertConstants	+= specOpData.constants;
4325 
4326 		vertExecutionMode		= behaviorExecutionMode;
4327 		fragExecutionMode		= "";
4328 		vertIODefinitions		= inTypeSnippets->inputDefinitionsSnippet + outTypeSnippets->outputVaryingsSnippet;
4329 		fragIODefinitions		= outTypeSnippets->inputVaryingsSnippet + outTypeSnippets->outputDefinitionsSnippet;
4330 		vertArguments			= specOpData.arguments;
4331 		fragArguments			= "";
4332 		vertVariables			= specOpData.variables;
4333 		fragVariables			= "";
4334 		vertCommands			= specOpData.commands;
4335 		fragCommands			= "";
4336 		vertProcessResult		= outTypeSnippets->storeVertexResultSnippet;
4337 		fragProcessResult		= outTypeSnippets->loadVertexResultSnippet + outTypeSnippets->storeResultsSnippet;
4338 
4339 		if (inFp16WithoutStorage)
4340 		{
4341 			vertAnnotations		+= inTypeSnippets->typeAnnotationsFp16Snippet;
4342 			vertIODefinitions	= inTypeSnippets->inputDefinitionsFp16Snippet + outTypeSnippets->outputVaryingsSnippet;
4343 		}
4344 
4345 		if (outFp16WithoutStorage)
4346 		{
4347 			vertTypes			+= outTypeSnippets->typeDefinitionsFp16Snippet;
4348 			fragTypes			+= outTypeSnippets->typeDefinitionsFp16Snippet;
4349 			fragAnnotations		+= outTypeSnippets->typeAnnotationsFp16Snippet;
4350 			fragIODefinitions	= outTypeSnippets->inputVaryingsSnippet + outTypeSnippets->outputDefinitionsFp16Snippet;
4351 			fragProcessResult	= outTypeSnippets->loadVertexResultSnippet + outTypeSnippets->storeResultsFp16Snippet;
4352 
4353 		}
4354 
4355 		needsShaderFloat16		|= outTypeSnippets->loadStoreRequiresShaderFloat16;
4356 	}
4357 	else // perform test in fragment stage - vertex stage is empty
4358 	{
4359 		fragFunctions = specOpData.functions;
4360 		// check if input type is different from tested type
4361 		if (testOperation.isInputTypeRestricted)
4362 		{
4363 			fragAnnotations		= inTypeSnippets->inputAnnotationsSnippet + inTypeSnippets->typeAnnotationsSnippet +
4364 								  outTypeSnippets->outputAnnotationsSnippet + outTypeSnippets->typeAnnotationsSnippet;
4365 			fragCapabilities	= behaviorCapability +
4366 				(inFp16WithoutStorage ? inTypeSnippets->capabilitiesFp16Without16BitStorage : inTypeSnippets->capabilities) +
4367 				(outFp16WithoutStorage ? outTypeSnippets->capabilitiesFp16Without16BitStorage : outTypeSnippets->capabilities);
4368 			fragExtensions		=
4369 				(inFp16WithoutStorage ? inTypeSnippets->extensionsFp16Without16BitStorage : inTypeSnippets->extensions) +
4370 				(outFp16WithoutStorage ? outTypeSnippets->extensionsFp16Without16BitStorage : outTypeSnippets->extensions);
4371 			fragTypes			= inTypeSnippets->typeDefinitionsSnippet + outTypeSnippets->typeDefinitionsSnippet;
4372 			fragConstants		= inTypeSnippets->constantsDefinitionsSnippet + outTypeSnippets->constantsDefinitionsSnippet;
4373 		}
4374 		else
4375 		{
4376 			// input and output types are the same
4377 
4378 			fragAnnotations		= inTypeSnippets->inputAnnotationsSnippet + inTypeSnippets->typeAnnotationsSnippet +
4379 								  outTypeSnippets->outputAnnotationsSnippet;
4380 			fragCapabilities	= behaviorCapability +
4381 				(outFp16WithoutStorage ? outTypeSnippets->capabilitiesFp16Without16BitStorage : outTypeSnippets->capabilities);
4382 			fragExtensions		= (outFp16WithoutStorage ? outTypeSnippets->extensionsFp16Without16BitStorage : outTypeSnippets->extensions);
4383 			fragTypes			= outTypeSnippets->typeDefinitionsSnippet;
4384 			fragConstants		= outTypeSnippets->constantsDefinitionsSnippet;
4385 		}
4386 
4387 		// varying is not used but it needs to be specified so lets use type_i32 for it
4388 		string unusedVertVarying = "%BP_vertex_result     = OpVariable %type_i32_optr Output\n";
4389 		string unusedFragVarying = "%BP_vertex_result     = OpVariable %type_i32_iptr Input\n";
4390 
4391 		vertCapabilities	= "";
4392 		vertExtensions		= "";
4393 		vertAnnotations		= "OpDecorate %type_f32_arr_1 ArrayStride 4\n";
4394 		vertTypes			= f32TypeMinimalRequired;
4395 		vertConstants		= "";
4396 
4397 		if ((outFloatType != FP32) && (inFloatType != FP32))
4398 			fragTypes += f32TypeMinimalRequired;
4399 
4400 		fragAnnotations += specOpData.annotations;
4401 		fragTypes		+= specOpData.types;
4402 		fragConstants	+= specOpData.constants;
4403 
4404 		vertExecutionMode	= "";
4405 		fragExecutionMode	= behaviorExecutionMode;
4406 		vertIODefinitions	= unusedVertVarying;
4407 		fragIODefinitions	= unusedFragVarying;
4408 
4409 		vertArguments		= "";
4410 		fragArguments		= specOpData.arguments;
4411 		vertVariables		= "";
4412 		fragVariables		= specOpData.variables;
4413 		vertCommands		= "";
4414 		fragCommands		= specOpData.commands;
4415 		vertProcessResult	= "";
4416 		fragProcessResult	= outTypeSnippets->storeResultsSnippet;
4417 
4418 		if (inFp16WithoutStorage)
4419 		{
4420 			fragAnnotations		+= inTypeSnippets->typeAnnotationsFp16Snippet;
4421 			if (testOperation.isInputTypeRestricted)
4422 			{
4423 				fragTypes			+= inTypeSnippets->typeDefinitionsFp16Snippet;
4424 			}
4425 			fragIODefinitions	+= inTypeSnippets->inputDefinitionsFp16Snippet;
4426 		}
4427 		else
4428 		{
4429 			fragIODefinitions	+= inTypeSnippets->inputDefinitionsSnippet;
4430 		}
4431 
4432 		if (outFp16WithoutStorage)
4433 		{
4434 			if (testOperation.isInputTypeRestricted)
4435 			{
4436 				fragAnnotations		+= outTypeSnippets->typeAnnotationsFp16Snippet;
4437 			}
4438 			fragTypes			+= outTypeSnippets->typeDefinitionsFp16Snippet;
4439 			fragIODefinitions	+= outTypeSnippets->outputDefinitionsFp16Snippet;
4440 			fragProcessResult	= outTypeSnippets->storeResultsFp16Snippet;
4441 		}
4442 		else
4443 		{
4444 			fragIODefinitions	+= outTypeSnippets->outputDefinitionsSnippet;
4445 		}
4446 
4447 		if (!testCaseInfo.argumentsFromInput)
4448 		{
4449 			switch(testCaseInfo.testCase.operationId)
4450 			{
4451 				case OID_CONV_FROM_FP32:
4452 				case OID_CONV_FROM_FP64:
4453 					needsSpecialConstants = true;
4454 					break;
4455 				default:
4456 					break;
4457 			}
4458 		}
4459 	}
4460 
4461 	// Another reason we need shaderFloat16 is the executable instructions uses fp16
4462 	// in a way not supported by the 16bit storage extension.
4463 	needsShaderFloat16 |= float16FeatureRequired && testOperation.floatUsage == FLOAT_ARITHMETIC;
4464 
4465 	// Constants are only needed sometimes.  Drop them in the fp16 case if the code doesn't need
4466 	// them, and if we don't otherwise need shaderFloat16.
4467 	bool needsFP16Constants = needsShaderFloat16 || needsSpecialConstants || outFp16WithoutStorage;
4468 
4469 	if (!needsFP16Constants && float16FeatureRequired)
4470 	{
4471 		// Check various code fragments
4472 		const FloatStatementUsageFlags	commandsFloatConstMask				= B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_CONST_FP16;
4473 		const bool						commandsUsesFloatConstant			= (testCaseInfo.operation.statementUsageFlags & commandsFloatConstMask) != 0;
4474 		const FloatStatementUsageFlags	argumentsFloatConstMask				= B_STATEMENT_USAGE_ARGS_CONST_FLOAT | B_STATEMENT_USAGE_ARGS_CONST_FP16;
4475 		const bool						argumentsUsesFloatConstant			= (specOpData.argumentsUsesFloatConstant & argumentsFloatConstMask) != 0;
4476 		bool							hasFP16ConstsInCommandsOrArguments	= commandsUsesFloatConstant || argumentsUsesFloatConstant;
4477 
4478 		needsFP16Constants |= hasFP16ConstsInCommandsOrArguments;
4479 
4480 		if (!needsFP16Constants)
4481 		{
4482 			vertConstants = "";
4483 			fragConstants = "";
4484 		}
4485 	}
4486 	needsShaderFloat16 |= needsFP16Constants;
4487 
4488 	if (needsShaderFloat16)
4489 	{
4490 		vertCapabilities += "OpCapability Float16\n";
4491 		fragCapabilities += "OpCapability Float16\n";
4492 	}
4493 
4494 	map<string, string> specializations;
4495 	specializations["vert_capabilities"]	= vertCapabilities;
4496 	specializations["vert_extensions"]		= vertExtensions;
4497 	specializations["vert_execution_mode"]	= vertExecutionMode;
4498 	specializations["vert_annotations"]		= vertAnnotations;
4499 	specializations["vert_types"]			= vertTypes;
4500 	specializations["vert_constants"]		= vertConstants;
4501 	specializations["vert_io_definitions"]	= vertIODefinitions;
4502 	specializations["vert_arguments"]		= vertArguments;
4503 	specializations["vert_variables"]		= vertVariables;
4504 	specializations["vert_functions"]		= vertFunctions;
4505 	specializations["vert_commands"]		= vertCommands;
4506 	specializations["vert_process_result"]	= vertProcessResult;
4507 	specializations["frag_capabilities"]	= fragCapabilities;
4508 	specializations["frag_extensions"]		= fragExtensions;
4509 	specializations["frag_execution_mode"]	= fragExecutionMode;
4510 	specializations["frag_annotations"]		= fragAnnotations;
4511 	specializations["frag_types"]			= fragTypes;
4512 	specializations["frag_constants"]		= fragConstants;
4513 	specializations["frag_functions"]		= fragFunctions;
4514 	specializations["frag_io_definitions"]	= fragIODefinitions;
4515 	specializations["frag_arguments"]		= fragArguments;
4516 	specializations["frag_variables"]		= fragVariables;
4517 	specializations["frag_commands"]		= fragCommands;
4518 	specializations["frag_process_result"]	= fragProcessResult;
4519 
4520 	// colors are not used by the test - input is passed via uniform buffer
4521 	RGBA defaultColors[4] = { RGBA::white(), RGBA::red(), RGBA::green(), RGBA::blue() };
4522 
4523 	// construct input and output buffers of proper types
4524 	TypeValuesSP inTypeValues	= m_typeData.at(inFloatType).values;
4525 	TypeValuesSP outTypeValues	= m_typeData.at(outFloatType).values;
4526 	BufferSp inBufferSp			= inTypeValues->constructInputBuffer(testCase.input);
4527 	BufferSp outBufferSp		= outTypeValues->constructOutputBuffer(testCase.expectedOutput);
4528 
4529 	vkt::SpirVAssembly::GraphicsResources resources;
4530 	resources.inputs.push_back( Resource(inBufferSp, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
4531 	resources.outputs.push_back(Resource(outBufferSp, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
4532 	resources.verifyIO = checkFloatsLUT[outFloatType];
4533 
4534 	StageToSpecConstantMap	noSpecConstants;
4535 	PushConstants			noPushConstants;
4536 	GraphicsInterfaces		noInterfaces;
4537 
4538 	VulkanFeatures vulkanFeatures;
4539 	setupVulkanFeatures(inFloatTypeForCaps,		// usualy same as inFloatType - different only for UnpackHalf2x16
4540 						outFloatType,
4541 						testCase.behaviorFlags,
4542 						float64FeatureRequired,
4543 						vulkanFeatures);
4544 	vulkanFeatures.coreFeatures.fragmentStoresAndAtomics = true;
4545 
4546 	vector<string> extensions;
4547 	extensions.push_back("VK_KHR_shader_float_controls");
4548 	if (needsShaderFloat16)
4549 	{
4550 		extensions.push_back("VK_KHR_shader_float16_int8");
4551 		vulkanFeatures.extFloat16Int8.shaderFloat16 = true;
4552 	}
4553 	if (float16FeatureRequired && !testCase.fp16Without16BitStorage)
4554 	{
4555 		extensions.push_back("VK_KHR_16bit_storage");
4556 		vulkanFeatures.ext16BitStorage.storageBuffer16BitAccess = true;
4557 	}
4558 
4559 	InstanceContext ctx(defaultColors,
4560 						defaultColors,
4561 						specializations,
4562 						noSpecConstants,
4563 						noPushConstants,
4564 						resources,
4565 						noInterfaces,
4566 						extensions,
4567 						vulkanFeatures,
4568 						testedStage);
4569 
4570 	ctx.moduleMap["vert"].push_back(std::make_pair("main", VK_SHADER_STAGE_VERTEX_BIT));
4571 	ctx.moduleMap["frag"].push_back(std::make_pair("main", VK_SHADER_STAGE_FRAGMENT_BIT));
4572 
4573 	ctx.requiredStages			= static_cast<VkShaderStageFlagBits>(VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT);
4574 	ctx.failResult				= QP_TEST_RESULT_FAIL;
4575 	ctx.failMessageTemplate		= "Output doesn't match with expected";
4576 
4577 	return ctx;
4578 }
4579 
4580 } // anonymous
4581 
createFloatControlsTestGroup(TestContext & testCtx,TestGroupBuilderBase * groupBuilder)4582 tcu::TestCaseGroup* createFloatControlsTestGroup (TestContext& testCtx, TestGroupBuilderBase* groupBuilder)
4583 {
4584 	de::MovePtr<TestCaseGroup>	group(new TestCaseGroup(testCtx, "float_controls", "Tests for VK_KHR_shader_float_controls extension"));
4585 
4586 	struct TestGroup
4587 	{
4588 		FloatType		floatType;
4589 		const char*		groupName;
4590 	};
4591 	TestGroup testGroups[] =
4592 	{
4593 		{ FP16, "fp16" },
4594 		{ FP32, "fp32" },
4595 		{ FP64, "fp64" },
4596 	};
4597 
4598 	for (int i = 0 ; i < DE_LENGTH_OF_ARRAY(testGroups) ; ++i)
4599 	{
4600 		const TestGroup& testGroup = testGroups[i];
4601 		TestCaseGroup* typeGroup = new TestCaseGroup(testCtx, testGroup.groupName);
4602 		group->addChild(typeGroup);
4603 
4604 		groupBuilder->createOperationTests(typeGroup, "input_args", testGroup.floatType, true);
4605 		groupBuilder->createOperationTests(typeGroup, "generated_args", testGroup.floatType, false);
4606 	}
4607 
4608 	groupBuilder->createSettingsTests(group.get());
4609 
4610 	return group.release();
4611 }
4612 
createFloatControlsComputeGroup(TestContext & testCtx)4613 tcu::TestCaseGroup* createFloatControlsComputeGroup (TestContext& testCtx)
4614 {
4615 	ComputeTestGroupBuilder computeTestGroupBuilder;
4616 	computeTestGroupBuilder.init();
4617 
4618 	return createFloatControlsTestGroup(testCtx, &computeTestGroupBuilder);
4619 }
4620 
createFloatControlsGraphicsGroup(TestContext & testCtx)4621 tcu::TestCaseGroup* createFloatControlsGraphicsGroup (TestContext& testCtx)
4622 {
4623 	GraphicsTestGroupBuilder graphicsTestGroupBuilder;
4624 	graphicsTestGroupBuilder.init();
4625 
4626 	return createFloatControlsTestGroup(testCtx, &graphicsTestGroupBuilder);
4627 }
4628 
4629 } // SpirVAssembly
4630 } // vkt
4631