1 /*-------------------------------------------------------------------------
2 * Vulkan Conformance Tests
3 * ------------------------
4 *
5 * Copyright (c) 2018 The Khronos Group Inc.
6 *
7 * Licensed under the Apache License, Version 2.0 (the "License");
8 * you may not use this file except in compliance with the License.
9 * You may obtain a copy of the License at
10 *
11 * http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
18 *
19 *//*!
20 * \file
21 * \brief VK_KHR_shader_float_controls tests.
22 *//*--------------------------------------------------------------------*/
23
24
25 #include "vktSpvAsmFloatControlsTests.hpp"
26 #include "vktSpvAsmComputeShaderCase.hpp"
27 #include "vktSpvAsmGraphicsShaderTestUtil.hpp"
28 #include "vktTestGroupUtil.hpp"
29 #include "tcuFloat.hpp"
30 #include "tcuFloatFormat.hpp"
31 #include "tcuStringTemplate.hpp"
32 #include "deUniquePtr.hpp"
33 #include "deFloat16.h"
34 #include "vkQueryUtil.hpp"
35 #include "vkRefUtil.hpp"
36 #include <cstring>
37 #include <vector>
38 #include <limits>
39 #include <fenv.h>
40
41 namespace vkt
42 {
43 namespace SpirVAssembly
44 {
45
46 namespace
47 {
48
49 using namespace std;
50 using namespace tcu;
51
52 enum FloatType
53 {
54 FP16 = 0,
55 FP32,
56 FP64
57 };
58
59 enum class BufferDataType
60 {
61 DATA_UNKNOWN = 0,
62 DATA_FP16 = 1,
63 DATA_FP32 = 2,
64 DATA_FP64 = 3,
65 };
66
67 enum FloatUsage
68 {
69 // If the float type is 16bit, then the use of the type is supported by
70 // VK_KHR_16bit_storage.
71 FLOAT_STORAGE_ONLY = 0,
72 // Use of the float type goes beyond VK_KHR_16bit_storage.
73 FLOAT_ARITHMETIC
74 };
75
76 enum FloatStatementUsageBits
77 {
78 B_STATEMENT_USAGE_ARGS_CONST_FLOAT = (1<<0 ),
79 B_STATEMENT_USAGE_ARGS_CONST_FP16 = (1<<1 ),
80 B_STATEMENT_USAGE_ARGS_CONST_FP32 = (1<<2 ),
81 B_STATEMENT_USAGE_ARGS_CONST_FP64 = (1<<3 ),
82 B_STATEMENT_USAGE_TYPES_TYPE_FLOAT = (1<<4 ),
83 B_STATEMENT_USAGE_TYPES_TYPE_FP16 = (1<<5 ),
84 B_STATEMENT_USAGE_TYPES_TYPE_FP32 = (1<<6 ),
85 B_STATEMENT_USAGE_TYPES_TYPE_FP64 = (1<<7 ),
86 B_STATEMENT_USAGE_CONSTS_TYPE_FLOAT = (1<<8 ),
87 B_STATEMENT_USAGE_CONSTS_TYPE_FP16 = (1<<9 ),
88 B_STATEMENT_USAGE_CONSTS_TYPE_FP32 = (1<<10),
89 B_STATEMENT_USAGE_CONSTS_TYPE_FP64 = (1<<11),
90 B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT = (1<<12),
91 B_STATEMENT_USAGE_COMMANDS_CONST_FP16 = (1<<13),
92 B_STATEMENT_USAGE_COMMANDS_CONST_FP32 = (1<<14),
93 B_STATEMENT_USAGE_COMMANDS_CONST_FP64 = (1<<15),
94 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT = (1<<16),
95 B_STATEMENT_USAGE_COMMANDS_TYPE_FP16 = (1<<17),
96 B_STATEMENT_USAGE_COMMANDS_TYPE_FP32 = (1<<18),
97 B_STATEMENT_USAGE_COMMANDS_TYPE_FP64 = (1<<19),
98 };
99
100 typedef deUint32 FloatStatementUsageFlags;
101
102 // Enum containing float behaviors that its possible to test.
103 enum BehaviorFlagBits
104 {
105 B_DENORM_PRESERVE = 0x00000001, // DenormPreserve
106 B_DENORM_FLUSH = 0x00000002, // DenormFlushToZero
107 B_ZIN_PRESERVE = 0x00000004, // SignedZeroInfNanPreserve
108 B_RTE_ROUNDING = 0x00000008, // RoundingModeRTE
109 B_RTZ_ROUNDING = 0x00000010 // RoundingModeRTZ
110 };
111
112 typedef deUint32 BehaviorFlags;
113
114 // Codes for all float values used in tests as arguments and operation results
115 // This approach allows to replace values with different types reducing complexity of the tests implementation
116 enum ValueId
117 {
118 // common values used as both arguments and results
119 V_UNUSED = 0, // used to mark arguments that are not used in operation
120 V_MINUS_INF, // or results of tests cases that should be skipped
121 V_MINUS_ONE, // -1.0
122 V_MINUS_ZERO, // -0.0
123 V_ZERO, // 0.0
124 V_HALF, // 0.5
125 V_ONE, // 1.0
126 V_INF,
127 V_DENORM,
128 V_NAN,
129
130 // arguments for rounding mode tests - used only when arguments are passed from input
131 V_ADD_ARG_A,
132 V_ADD_ARG_B,
133 V_SUB_ARG_A,
134 V_SUB_ARG_B,
135 V_MUL_ARG_A,
136 V_MUL_ARG_B,
137 V_DOT_ARG_A,
138 V_DOT_ARG_B,
139
140 // arguments of conversion operations - used only when arguments are passed from input
141 V_CONV_FROM_FP32_ARG,
142 V_CONV_FROM_FP64_ARG,
143
144 // arguments of rounding operations
145 V_ADD_RTZ_RESULT,
146 V_ADD_RTE_RESULT,
147 V_SUB_RTZ_RESULT,
148 V_SUB_RTE_RESULT,
149 V_MUL_RTZ_RESULT,
150 V_MUL_RTE_RESULT,
151 V_DOT_RTZ_RESULT,
152 V_DOT_RTE_RESULT,
153
154 // non comon results of some operation - corner cases
155 V_ZERO_OR_DENORM_TIMES_TWO, // fp16 addition of non-flushed denorm with itself (or equivalent dot-product or vector-matrix multiply)
156 V_MINUS_ONE_OR_CLOSE, // value used only for fp16 subtraction result of preserved denorm and one
157 V_PI_DIV_2,
158 V_ZERO_OR_MINUS_ZERO, // both +0 and -0 are accepted
159 V_ZERO_OR_ONE, // both +0 and 1 are accepted
160 V_ZERO_OR_FP16_DENORM_TO_FP32, // both 0 and fp32 representation of fp16 denorm are accepted
161 V_ZERO_OR_FP16_DENORM_TO_FP64,
162 V_ZERO_OR_FP32_DENORM_TO_FP64,
163 V_DENORM_TIMES_TWO,
164 V_DEGREES_DENORM,
165 V_TRIG_ONE, // 1.0 trigonometric operations, including precision margin
166 V_MINUS_INF_OR_LOG_DENORM,
167 V_MINUS_INF_OR_LOG2_DENORM,
168 V_ZERO_OR_SQRT_DENORM,
169 V_INF_OR_INV_SQRT_DENORM,
170
171 //results of conversion operations
172 V_CONV_TO_FP16_RTZ_RESULT,
173 V_CONV_TO_FP16_RTE_RESULT,
174 V_CONV_TO_FP32_RTZ_RESULT,
175 V_CONV_TO_FP32_RTE_RESULT,
176 V_CONV_DENORM_SMALLER, // used e.g. when converting fp16 denorm to fp32
177 V_CONV_DENORM_BIGGER,
178 };
179
180 // Enum containing all tested operatios. Operations are defined in generic way so that
181 // they can be used to generate tests operating on arguments with different values of
182 // specified float type.
183 enum OperationId
184 {
185 // spir-v unary operations
186 O_NEGATE = 0,
187 O_COMPOSITE,
188 O_COMPOSITE_INS,
189 O_COPY,
190 O_D_EXTRACT,
191 O_D_INSERT,
192 O_SHUFFLE,
193 O_TRANSPOSE,
194 O_CONV_FROM_FP16,
195 O_CONV_FROM_FP32,
196 O_CONV_FROM_FP64,
197 O_SCONST_CONV_FROM_FP32_TO_FP16,
198 O_SCONST_CONV_FROM_FP64_TO_FP32,
199 O_SCONST_CONV_FROM_FP64_TO_FP16,
200 O_RETURN_VAL,
201
202 // spir-v binary operations
203 O_ADD,
204 O_SUB,
205 O_MUL,
206 O_DIV,
207 O_REM,
208 O_MOD,
209 O_PHI,
210 O_SELECT,
211 O_DOT,
212 O_VEC_MUL_S,
213 O_VEC_MUL_M,
214 O_MAT_MUL_S,
215 O_MAT_MUL_V,
216 O_MAT_MUL_M,
217 O_OUT_PROD,
218 O_ORD_EQ,
219 O_UORD_EQ,
220 O_ORD_NEQ,
221 O_UORD_NEQ,
222 O_ORD_LS,
223 O_UORD_LS,
224 O_ORD_GT,
225 O_UORD_GT,
226 O_ORD_LE,
227 O_UORD_LE,
228 O_ORD_GE,
229 O_UORD_GE,
230
231 // glsl unary operations
232 O_ROUND,
233 O_ROUND_EV,
234 O_TRUNC,
235 O_ABS,
236 O_SIGN,
237 O_FLOOR,
238 O_CEIL,
239 O_FRACT,
240 O_RADIANS,
241 O_DEGREES,
242 O_SIN,
243 O_COS,
244 O_TAN,
245 O_ASIN,
246 O_ACOS,
247 O_ATAN,
248 O_SINH,
249 O_COSH,
250 O_TANH,
251 O_ASINH,
252 O_ACOSH,
253 O_ATANH,
254 O_EXP,
255 O_LOG,
256 O_EXP2,
257 O_LOG2,
258 O_SQRT,
259 O_INV_SQRT,
260 O_MODF,
261 O_MODF_ST,
262 O_FREXP,
263 O_FREXP_ST,
264 O_LENGHT,
265 O_NORMALIZE,
266 O_REFLECT,
267 O_REFRACT,
268 O_MAT_DET,
269 O_MAT_INV,
270 O_PH_DENORM, // PackHalf2x16
271 O_UPH_DENORM,
272 O_PD_DENORM, // PackDouble2x32
273 O_UPD_DENORM_FLUSH,
274 O_UPD_DENORM_PRESERVE,
275
276 // glsl binary operations
277 O_ATAN2,
278 O_POW,
279 O_MIX,
280 O_FMA,
281 O_MIN,
282 O_MAX,
283 O_CLAMP,
284 O_STEP,
285 O_SSTEP,
286 O_DIST,
287 O_CROSS,
288 O_FACE_FWD,
289 O_NMIN,
290 O_NMAX,
291 O_NCLAMP,
292
293 O_ORTE_ROUND,
294 O_ORTZ_ROUND
295 };
296
297 // Structures storing data required to test DenormPreserve and DenormFlushToZero modes.
298 // Operations are separated into binary and unary lists because binary operations can be tested with
299 // two attributes and thus denorms can be tested in combination with value, denorm, inf and nan.
300 // Unary operations are only tested with denorms.
301 struct BinaryCase
302 {
303 OperationId operationId;
304 ValueId opVarResult;
305 ValueId opDenormResult;
306 ValueId opInfResult;
307 ValueId opNanResult;
308 };
309 struct UnaryCase
310 {
311 OperationId operationId;
312 ValueId result;
313 };
314
315 // Function replacing all occurrences of substring with string passed in last parameter.
replace(string str,const string & from,const string & to)316 string replace(string str, const string& from, const string& to)
317 {
318 // to keep spir-v code clean and easier to read parts of it are processed
319 // with this method instead of StringTemplate; main usage of this method is the
320 // replacement of "float_" with "f16_", "f32_" or "f64_" depending on test case
321
322 size_t start_pos = 0;
323 while((start_pos = str.find(from, start_pos)) != std::string::npos)
324 {
325 str.replace(start_pos, from.length(), to);
326 start_pos += to.length();
327 }
328 return str;
329 }
330
331 // Structure used to perform bits conversion int type <-> float type.
332 template<typename FLOAT_TYPE, typename UINT_TYPE>
333 struct RawConvert
334 {
335 union Value
336 {
337 FLOAT_TYPE fp;
338 UINT_TYPE ui;
339 };
340 };
341
342 // Traits used to get int type that can store equivalent float type.
343 template<typename FLOAT_TYPE>
344 struct GetCoresponding
345 {
346 typedef deUint16 uint_type;
347 };
348 template<>
349 struct GetCoresponding<float>
350 {
351 typedef deUint32 uint_type;
352 };
353 template<>
354 struct GetCoresponding<double>
355 {
356 typedef deUint64 uint_type;
357 };
358
359 // All values used for arguments and operation results are stored in single map.
360 // Each float type (fp16, fp32, fp64) has its own map that is used during
361 // test setup and during verification. TypeValuesBase is interface to that map.
362 class TypeValuesBase
363 {
364 public:
365 TypeValuesBase();
366 virtual ~TypeValuesBase() = default;
367
368 virtual BufferSp constructInputBuffer (const ValueId* twoArguments) const = 0;
369 virtual BufferSp constructOutputBuffer (ValueId result) const = 0;
370 virtual void fillInputData (const ValueId* twoArguments, vector<deUint8>& bufferData, deUint32& offset) const = 0;
371
372 protected:
373 const double pi;
374 };
375
TypeValuesBase()376 TypeValuesBase::TypeValuesBase()
377 : pi(3.14159265358979323846)
378 {
379 }
380
381 typedef de::SharedPtr<TypeValuesBase> TypeValuesSP;
382
383 template <typename FLOAT_TYPE>
384 class TypeValues: public TypeValuesBase
385 {
386 public:
387 TypeValues();
388
389 BufferSp constructInputBuffer (const ValueId* twoArguments) const override;
390 BufferSp constructOutputBuffer (ValueId result) const override;
391 void fillInputData (const ValueId* twoArguments, vector<deUint8>& bufferData, deUint32& offset) const override;
392
393 FLOAT_TYPE getValue(ValueId id) const;
394
395 template <typename UINT_TYPE>
396 FLOAT_TYPE exactByteEquivalent(UINT_TYPE byteValue) const;
397
398 private:
399 typedef map<ValueId, FLOAT_TYPE> ValueMap;
400 ValueMap m_valueIdToFloatType;
401 };
402
403 template <typename FLOAT_TYPE>
constructInputBuffer(const ValueId * twoArguments) const404 BufferSp TypeValues<FLOAT_TYPE>::constructInputBuffer(const ValueId* twoArguments) const
405 {
406 std::vector<FLOAT_TYPE> inputData(2);
407 inputData[0] = m_valueIdToFloatType.at(twoArguments[0]);
408 inputData[1] = m_valueIdToFloatType.at(twoArguments[1]);
409 return BufferSp(new Buffer<FLOAT_TYPE>(inputData));
410 }
411
412 template <typename FLOAT_TYPE>
constructOutputBuffer(ValueId result) const413 BufferSp TypeValues<FLOAT_TYPE>::constructOutputBuffer(ValueId result) const
414 {
415 // note: we are not doing maping here, ValueId is directly saved in
416 // float type in order to be able to retireve it during verification
417
418 typedef typename GetCoresponding<FLOAT_TYPE>::uint_type uint_t;
419 uint_t value = static_cast<uint_t>(result);
420
421 // For FP16 we increase the buffer size to hold an unsigned integer, as
422 // we can be in the no 16bit_storage case.
423 const uint_t outputSize = sizeof(FLOAT_TYPE) == 2u ? 2u : 1u;
424 std::vector<FLOAT_TYPE> outputData(outputSize, exactByteEquivalent<uint_t>(value));
425 return BufferSp(new Buffer<FLOAT_TYPE>(outputData));
426 }
427
428 template <typename FLOAT_TYPE>
fillInputData(const ValueId * twoArguments,vector<deUint8> & bufferData,deUint32 & offset) const429 void TypeValues<FLOAT_TYPE>::fillInputData(const ValueId* twoArguments, vector<deUint8>& bufferData, deUint32& offset) const
430 {
431 deUint32 typeSize = sizeof(FLOAT_TYPE);
432
433 FLOAT_TYPE argA = getValue(twoArguments[0]);
434 deMemcpy(&bufferData[offset], &argA, typeSize);
435 offset += typeSize;
436
437 FLOAT_TYPE argB = getValue(twoArguments[1]);
438 deMemcpy(&bufferData[offset], &argB, typeSize);
439 offset += typeSize;
440 }
441
442 template <typename FLOAT_TYPE>
getValue(ValueId id) const443 FLOAT_TYPE TypeValues<FLOAT_TYPE>::getValue(ValueId id) const
444 {
445 return m_valueIdToFloatType.at(id);
446 }
447
448 template <typename FLOAT_TYPE>
449 template <typename UINT_TYPE>
exactByteEquivalent(UINT_TYPE byteValue) const450 FLOAT_TYPE TypeValues<FLOAT_TYPE>::exactByteEquivalent(UINT_TYPE byteValue) const
451 {
452 typename RawConvert<FLOAT_TYPE, UINT_TYPE>::Value value;
453 value.ui = byteValue;
454 return value.fp;
455 }
456
457 template <>
TypeValues()458 TypeValues<deFloat16>::TypeValues()
459 : TypeValuesBase()
460 {
461 // NOTE: when updating entries in m_valueIdToFloatType make sure to
462 // update also valueIdToSnippetArgMap defined in updateSpirvSnippets()
463 ValueMap& vm = m_valueIdToFloatType;
464 vm[V_UNUSED] = deFloat32To16(0.0f);
465 vm[V_MINUS_INF] = 0xfc00;
466 vm[V_MINUS_ONE] = deFloat32To16(-1.0f);
467 vm[V_MINUS_ZERO] = 0x8000;
468 vm[V_ZERO] = 0x0000;
469 vm[V_HALF] = deFloat32To16(0.5f);
470 vm[V_ONE] = deFloat32To16(1.0f);
471 vm[V_INF] = 0x7c00;
472 vm[V_DENORM] = 0x03f0; // this value should be the same as the result of denormBase - epsilon
473 vm[V_NAN] = 0x7cf0;
474
475 vm[V_PI_DIV_2] = 0x3e48;
476 vm[V_DENORM_TIMES_TWO] = 0x07e0;
477 vm[V_DEGREES_DENORM] = 0x1b0c;
478
479 vm[V_ADD_ARG_A] = 0x3c03;
480 vm[V_ADD_ARG_B] = vm[V_ONE];
481 vm[V_SUB_ARG_A] = vm[V_ADD_ARG_A];
482 vm[V_SUB_ARG_B] = 0x4203;
483 vm[V_MUL_ARG_A] = vm[V_ADD_ARG_A];
484 vm[V_MUL_ARG_B] = 0x1900;
485 vm[V_DOT_ARG_A] = vm[V_ADD_ARG_A];
486 vm[V_DOT_ARG_B] = vm[V_MUL_ARG_B];
487 vm[V_CONV_FROM_FP32_ARG] = vm[V_UNUSED];
488 vm[V_CONV_FROM_FP64_ARG] = vm[V_UNUSED];
489
490 vm[V_ADD_RTZ_RESULT] = 0x4001; // deFloat16Add(vm[V_ADD_ARG_A], vm[V_ADD_ARG_B], rtz)
491 vm[V_SUB_RTZ_RESULT] = 0xc001; // deFloat16Sub(vm[V_SUB_ARG_A], vm[V_SUB_ARG_B], rtz)
492 vm[V_MUL_RTZ_RESULT] = 0x1903; // deFloat16Mul(vm[V_MUL_ARG_A], vm[V_MUL_ARG_B], rtz)
493 vm[V_DOT_RTZ_RESULT] = 0x1d03;
494 vm[V_CONV_TO_FP16_RTZ_RESULT] = deFloat32To16Round(1.22334445f, DE_ROUNDINGMODE_TO_ZERO);
495 vm[V_CONV_TO_FP32_RTZ_RESULT] = vm[V_UNUSED];
496
497 vm[V_ADD_RTE_RESULT] = 0x4002; // deFloat16Add(vm[V_ADD_ARG_A], vm[V_ADD_ARG_B], rte)
498 vm[V_SUB_RTE_RESULT] = 0xc002; // deFloat16Sub(vm[V_SUB_ARG_A], vm[V_SUB_ARG_B], rte)
499 vm[V_MUL_RTE_RESULT] = 0x1904; // deFloat16Mul(vm[V_MUL_ARG_A], vm[V_MUL_ARG_B], rte)
500 vm[V_DOT_RTE_RESULT] = 0x1d04;
501 vm[V_CONV_TO_FP16_RTE_RESULT] = deFloat32To16Round(1.22334445f, DE_ROUNDINGMODE_TO_NEAREST_EVEN);
502 vm[V_CONV_TO_FP32_RTE_RESULT] = vm[V_UNUSED];
503
504 // there is no precision to store fp32 denorm nor fp64 denorm
505 vm[V_CONV_DENORM_SMALLER] = vm[V_ZERO];
506 vm[V_CONV_DENORM_BIGGER] = vm[V_ZERO];
507 }
508
509 template <>
TypeValues()510 TypeValues<float>::TypeValues()
511 : TypeValuesBase()
512 {
513 // NOTE: when updating entries in m_valueIdToFloatType make sure to
514 // update also valueIdToSnippetArgMap defined in updateSpirvSnippets()
515 ValueMap& vm = m_valueIdToFloatType;
516 vm[V_UNUSED] = 0.0f;
517 vm[V_MINUS_INF] = -std::numeric_limits<float>::infinity();
518 vm[V_MINUS_ONE] = -1.0f;
519 vm[V_MINUS_ZERO] = -0.0f;
520 vm[V_ZERO] = 0.0f;
521 vm[V_HALF] = 0.5f;
522 vm[V_ONE] = 1.0f;
523 vm[V_INF] = std::numeric_limits<float>::infinity();
524 vm[V_DENORM] = static_cast<float>(1.413e-42); // 0x000003f0
525 vm[V_NAN] = std::numeric_limits<float>::quiet_NaN();
526
527 vm[V_PI_DIV_2] = static_cast<float>(pi / 2);
528 vm[V_DENORM_TIMES_TWO] = vm[V_DENORM] + vm[V_DENORM];
529 vm[V_DEGREES_DENORM] = deFloatDegrees(vm[V_DENORM]);
530
531 float e = std::numeric_limits<float>::epsilon();
532 vm[V_ADD_ARG_A] = 1.0f + 3 * e;
533 vm[V_ADD_ARG_B] = 1.0f;
534 vm[V_SUB_ARG_A] = vm[V_ADD_ARG_A];
535 vm[V_SUB_ARG_B] = 3.0f + 6 * e;
536 vm[V_MUL_ARG_A] = vm[V_ADD_ARG_A];
537 vm[V_MUL_ARG_B] = 5 * e;
538 vm[V_DOT_ARG_A] = vm[V_ADD_ARG_A];
539 vm[V_DOT_ARG_B] = 5 * e;
540 vm[V_CONV_FROM_FP32_ARG] = 1.22334445f;
541 vm[V_CONV_FROM_FP64_ARG] = vm[V_UNUSED];
542
543 int prevRound = fegetround();
544 fesetround(FE_TOWARDZERO);
545 vm[V_ADD_RTZ_RESULT] = vm[V_ADD_ARG_A] + vm[V_ADD_ARG_B];
546 vm[V_SUB_RTZ_RESULT] = vm[V_SUB_ARG_A] - vm[V_SUB_ARG_B];
547 vm[V_MUL_RTZ_RESULT] = vm[V_MUL_ARG_A] * vm[V_MUL_ARG_B];
548 vm[V_DOT_RTZ_RESULT] = vm[V_MUL_RTZ_RESULT] + vm[V_MUL_RTZ_RESULT];
549 vm[V_CONV_TO_FP16_RTZ_RESULT] = vm[V_UNUSED];
550 vm[V_CONV_TO_FP32_RTZ_RESULT] = exactByteEquivalent<deUint32>(0x3f9c968d); // result of conversion from double(1.22334455)
551
552 fesetround(FE_TONEAREST);
553 vm[V_ADD_RTE_RESULT] = vm[V_ADD_ARG_A] + vm[V_ADD_ARG_B];
554 vm[V_SUB_RTE_RESULT] = vm[V_SUB_ARG_A] - vm[V_SUB_ARG_B];
555 vm[V_MUL_RTE_RESULT] = vm[V_MUL_ARG_A] * vm[V_MUL_ARG_B];
556 vm[V_DOT_RTE_RESULT] = vm[V_MUL_RTE_RESULT] + vm[V_MUL_RTE_RESULT];
557 vm[V_CONV_TO_FP16_RTE_RESULT] = vm[V_UNUSED];
558 vm[V_CONV_TO_FP32_RTE_RESULT] = exactByteEquivalent<deUint32>(0x3f9c968e); // result of conversion from double(1.22334455)
559 fesetround(prevRound);
560
561 // there is no precision to store fp64 denorm
562 vm[V_CONV_DENORM_SMALLER] = exactByteEquivalent<deUint32>(0x387c0000); // fp16 denorm
563 vm[V_CONV_DENORM_BIGGER] = vm[V_ZERO];
564 }
565
566 template <>
TypeValues()567 TypeValues<double>::TypeValues()
568 : TypeValuesBase()
569 {
570 // NOTE: when updating entries in m_valueIdToFloatType make sure to
571 // update also valueIdToSnippetArgMap defined in updateSpirvSnippets()
572 ValueMap& vm = m_valueIdToFloatType;
573 vm[V_UNUSED] = 0.0;
574 vm[V_MINUS_INF] = -std::numeric_limits<double>::infinity();
575 vm[V_MINUS_ONE] = -1.0;
576 vm[V_MINUS_ZERO] = -0.0;
577 vm[V_ZERO] = 0.0;
578 vm[V_HALF] = 0.5;
579 vm[V_ONE] = 1.0;
580 vm[V_INF] = std::numeric_limits<double>::infinity();
581 vm[V_DENORM] = 4.98e-321; // 0x00000000000003F0
582 vm[V_NAN] = std::numeric_limits<double>::quiet_NaN();
583
584 vm[V_PI_DIV_2] = pi / 2;
585 vm[V_DENORM_TIMES_TWO] = vm[V_DENORM] + vm[V_DENORM];
586 vm[V_DEGREES_DENORM] = vm[V_UNUSED];
587
588 double e = std::numeric_limits<double>::epsilon();
589 vm[V_ADD_ARG_A] = 1.0 + 3 * e;
590 vm[V_ADD_ARG_B] = 1.0;
591 vm[V_SUB_ARG_A] = vm[V_ADD_ARG_A];
592 vm[V_SUB_ARG_B] = 3.0 + 6 * e;
593 vm[V_MUL_ARG_A] = vm[V_ADD_ARG_A];
594 vm[V_MUL_ARG_B] = 5 * e;
595 vm[V_DOT_ARG_A] = vm[V_ADD_ARG_A];
596 vm[V_DOT_ARG_B] = 5 * e;
597 vm[V_CONV_FROM_FP32_ARG] = vm[V_UNUSED];
598 vm[V_CONV_FROM_FP64_ARG] = 1.22334455;
599
600 int prevRound = fegetround();
601 fesetround(FE_TOWARDZERO);
602 vm[V_ADD_RTZ_RESULT] = vm[V_ADD_ARG_A] + vm[V_ADD_ARG_B];
603 vm[V_SUB_RTZ_RESULT] = vm[V_SUB_ARG_A] - vm[V_SUB_ARG_B];
604 vm[V_MUL_RTZ_RESULT] = vm[V_MUL_ARG_A] * vm[V_MUL_ARG_B];
605 vm[V_DOT_RTZ_RESULT] = vm[V_MUL_RTZ_RESULT] + vm[V_MUL_RTZ_RESULT];
606 vm[V_CONV_TO_FP16_RTZ_RESULT] = vm[V_UNUSED];
607 vm[V_CONV_TO_FP32_RTZ_RESULT] = vm[V_UNUSED];
608
609 fesetround(FE_TONEAREST);
610 vm[V_ADD_RTE_RESULT] = vm[V_ADD_ARG_A] + vm[V_ADD_ARG_B];
611 vm[V_SUB_RTE_RESULT] = vm[V_SUB_ARG_A] - vm[V_SUB_ARG_B];
612 vm[V_MUL_RTE_RESULT] = vm[V_MUL_ARG_A] * vm[V_MUL_ARG_B];
613 vm[V_DOT_RTE_RESULT] = vm[V_MUL_RTE_RESULT] + vm[V_MUL_RTE_RESULT];
614 vm[V_CONV_TO_FP16_RTE_RESULT] = vm[V_UNUSED];
615 vm[V_CONV_TO_FP32_RTE_RESULT] = vm[V_UNUSED];
616 fesetround(prevRound);
617
618 vm[V_CONV_DENORM_SMALLER] = exactByteEquivalent<deUint64>(0x3f0f800000000000); // 0x03f0 is fp16 denorm
619 vm[V_CONV_DENORM_BIGGER] = exactByteEquivalent<deUint64>(0x373f800000000000); // 0x000003f0 is fp32 denorm
620 }
621
622 // Each float type (fp16, fp32, fp64) has specific set of SPIR-V snippets
623 // that was extracted to separate template specialization. Those snippets
624 // are used to compose final test shaders. With this approach
625 // parameterization can be done just once per type and reused for many tests.
626 class TypeSnippetsBase
627 {
628 public:
629 virtual ~TypeSnippetsBase() = default;
630
631 protected:
632 void updateSpirvSnippets();
633
634 public: // Type specific data:
635
636 // Number of bits consumed by float type
637 string bitWidth;
638
639 // Minimum positive normal
640 string epsilon;
641
642 // denormBase is a normal value (found empirically) used to generate denorm value.
643 // Denorm is generated by substracting epsilon from denormBase.
644 // denormBase is not a denorm - it is used to create denorm.
645 // This value is needed when operations are tested with arguments that were
646 // generated in the code. Generated denorm should be the same as denorm
647 // used when arguments are passed via input (m_valueIdToFloatType[V_DENORM]).
648 // This is required as result of some operations depends on actual denorm value
649 // e.g. OpRadians(0x0001) is 0 but OpRadians(0x03f0) is denorm.
650 string denormBase;
651
652 string capabilities;
653 string extensions;
654 string capabilitiesFp16Without16BitStorage;
655 string extensionsFp16Without16BitStorage;
656 string arrayStride;
657
658 bool loadStoreRequiresShaderFloat16;
659
660 public: // Type specific spir-v snippets:
661
662 // Common annotations
663 string typeAnnotationsSnippet;
664
665 // Definitions of all types commonly used by operation tests
666 string typeDefinitionsSnippet;
667
668 // Definitions of all types commonly used by settings tests
669 string minTypeDefinitionsSnippet;
670
671 // Definitions of all constants commonly used by tests
672 string constantsDefinitionsSnippet;
673
674 // Map that stores instructions that generate arguments of specified value.
675 // Every test that uses generated inputod will select up to two items from this map
676 typedef map<ValueId, string> SnippetMap;
677 SnippetMap valueIdToSnippetArgMap;
678
679 // Spir-v snippets that read argument from SSBO
680 string argumentsFromInputSnippet;
681 string multiArgumentsFromInputSnippet;
682
683 // SSBO with stage input/output definitions
684 string inputAnnotationsSnippet;
685 string inputDefinitionsSnippet;
686 string outputAnnotationsSnippet;
687 string multiOutputAnnotationsSnippet;
688 string outputDefinitionsSnippet;
689 string multiOutputDefinitionsSnippet;
690
691 // Varying is required to pass result from vertex stage to fragment stage,
692 // one of requirements was to not use SSBO writes in vertex stage so we
693 // need to do that in fragment stage; we also cant pass operation result
694 // directly because of interpolation, to avoid it we do a bitcast to uint
695 string varyingsTypesSnippet;
696 string inputVaryingsSnippet;
697 string outputVaryingsSnippet;
698 string storeVertexResultSnippet;
699 string loadVertexResultSnippet;
700
701 string storeResultsSnippet;
702 string multiStoreResultsSnippet;
703
704 string argumentsFromInputFp16Snippet;
705 string storeResultsFp16Snippet;
706 string multiArgumentsFromInputFp16Snippet;
707 string multiOutputAnnotationsFp16Snippet;
708 string multiStoreResultsFp16Snippet;
709 string multiOutputDefinitionsFp16Snippet;
710 string inputDefinitionsFp16Snippet;
711 string outputDefinitionsFp16Snippet;
712 string typeAnnotationsFp16Snippet;
713 string typeDefinitionsFp16Snippet;
714 };
715
updateSpirvSnippets()716 void TypeSnippetsBase::updateSpirvSnippets()
717 {
718 // annotations to types that are commonly used by tests
719 const string typeAnnotationsTemplate =
720 "OpDecorate %type_float_arr_1 ArrayStride " + arrayStride + "\n"
721 "OpDecorate %type_float_arr_2 ArrayStride " + arrayStride + "\n";
722
723 // definition off all types that are commonly used by tests
724 const string typeDefinitionsTemplate =
725 "%type_float = OpTypeFloat " + bitWidth + "\n"
726 "%type_float_uptr = OpTypePointer Uniform %type_float\n"
727 "%type_float_fptr = OpTypePointer Function %type_float\n"
728 "%type_float_vec2 = OpTypeVector %type_float 2\n"
729 "%type_float_vec3 = OpTypeVector %type_float 3\n"
730 "%type_float_vec4 = OpTypeVector %type_float 4\n"
731 "%type_float_vec4_iptr = OpTypePointer Input %type_float_vec4\n"
732 "%type_float_vec4_optr = OpTypePointer Output %type_float_vec4\n"
733 "%type_float_mat2x2 = OpTypeMatrix %type_float_vec2 2\n"
734 "%type_float_arr_1 = OpTypeArray %type_float %c_i32_1\n"
735 "%type_float_arr_2 = OpTypeArray %type_float %c_i32_2\n";
736
737 // minimal type definition set that is used by settings tests
738 const string minTypeDefinitionsTemplate =
739 "%type_float = OpTypeFloat " + bitWidth + "\n"
740 "%type_float_uptr = OpTypePointer Uniform %type_float\n"
741 "%type_float_arr_2 = OpTypeArray %type_float %c_i32_2\n";
742
743 // definition off all constants that are used by tests
744 const string constantsDefinitionsTemplate =
745 "%c_float_n1 = OpConstant %type_float -1\n"
746 "%c_float_0 = OpConstant %type_float 0.0\n"
747 "%c_float_0_5 = OpConstant %type_float 0.5\n"
748 "%c_float_1 = OpConstant %type_float 1\n"
749 "%c_float_2 = OpConstant %type_float 2\n"
750 "%c_float_3 = OpConstant %type_float 3\n"
751 "%c_float_4 = OpConstant %type_float 4\n"
752 "%c_float_5 = OpConstant %type_float 5\n"
753 "%c_float_6 = OpConstant %type_float 6\n"
754 "%c_float_eps = OpConstant %type_float " + epsilon + "\n"
755 "%c_float_denorm_base = OpConstant %type_float " + denormBase + "\n";
756
757 // when arguments are read from SSBO this snipped is placed in main function
758 const string argumentsFromInputTemplate =
759 "%arg1loc = OpAccessChain %type_float_uptr %ssbo_in %c_i32_0 %c_i32_0\n"
760 "%arg1 = OpLoad %type_float %arg1loc\n"
761 "%arg2loc = OpAccessChain %type_float_uptr %ssbo_in %c_i32_0 %c_i32_1\n"
762 "%arg2 = OpLoad %type_float %arg2loc\n";
763
764 const string multiArgumentsFromInputTemplate =
765 "%arg1_float_loc = OpAccessChain %type_float_uptr %ssbo_in %c_i32_${attr} %c_i32_0\n"
766 "%arg2_float_loc = OpAccessChain %type_float_uptr %ssbo_in %c_i32_${attr} %c_i32_1\n"
767 "%arg1_float = OpLoad %type_float %arg1_float_loc\n"
768 "%arg2_float = OpLoad %type_float %arg2_float_loc\n";
769
770 // when tested shader stage reads from SSBO it has to have this snippet
771 inputAnnotationsSnippet =
772 "OpMemberDecorate %SSBO_in 0 Offset 0\n"
773 "OpDecorate %SSBO_in BufferBlock\n"
774 "OpDecorate %ssbo_in DescriptorSet 0\n"
775 "OpDecorate %ssbo_in Binding 0\n"
776 "OpDecorate %ssbo_in NonWritable\n";
777
778 const string inputDefinitionsTemplate =
779 "%SSBO_in = OpTypeStruct %type_float_arr_2\n"
780 "%up_SSBO_in = OpTypePointer Uniform %SSBO_in\n"
781 "%ssbo_in = OpVariable %up_SSBO_in Uniform\n";
782
783 outputAnnotationsSnippet =
784 "OpMemberDecorate %SSBO_out 0 Offset 0\n"
785 "OpDecorate %SSBO_out BufferBlock\n"
786 "OpDecorate %ssbo_out DescriptorSet 0\n"
787 "OpDecorate %ssbo_out Binding 1\n";
788
789 const string multiOutputAnnotationsTemplate =
790 "OpMemberDecorate %SSBO_float_out 0 Offset 0\n"
791 "OpDecorate %type_float_arr_2 ArrayStride "+ arrayStride + "\n"
792 "OpDecorate %SSBO_float_out BufferBlock\n"
793 "OpDecorate %ssbo_float_out DescriptorSet 0\n";
794
795 const string outputDefinitionsTemplate =
796 "%SSBO_out = OpTypeStruct %type_float_arr_1\n"
797 "%up_SSBO_out = OpTypePointer Uniform %SSBO_out\n"
798 "%ssbo_out = OpVariable %up_SSBO_out Uniform\n";
799
800 const string multiOutputDefinitionsTemplate =
801 "%SSBO_float_out = OpTypeStruct %type_float\n"
802 "%up_SSBO_float_out = OpTypePointer Uniform %SSBO_float_out\n"
803 "%ssbo_float_out = OpVariable %up_SSBO_float_out Uniform\n";
804
805 // this snippet is used by compute and fragment stage but not by vertex stage
806 const string storeResultsTemplate =
807 "%outloc = OpAccessChain %type_float_uptr %ssbo_out %c_i32_0 %c_i32_0\n"
808 "OpStore %outloc %result\n";
809
810 const string multiStoreResultsTemplate =
811 "%outloc" + bitWidth + " = OpAccessChain %type_float_uptr %ssbo_float_out %c_i32_0\n"
812 " OpStore %outloc" + bitWidth + " %result" + bitWidth + "\n";
813
814 const string typeToken = "_float";
815 const string typeName = "_f" + bitWidth;
816
817 typeAnnotationsSnippet = replace(typeAnnotationsTemplate, typeToken, typeName);
818 typeDefinitionsSnippet = replace(typeDefinitionsTemplate, typeToken, typeName);
819 minTypeDefinitionsSnippet = replace(minTypeDefinitionsTemplate, typeToken, typeName);
820 constantsDefinitionsSnippet = replace(constantsDefinitionsTemplate, typeToken, typeName);
821 argumentsFromInputSnippet = replace(argumentsFromInputTemplate, typeToken, typeName);
822 multiArgumentsFromInputSnippet = replace(multiArgumentsFromInputTemplate, typeToken, typeName);
823 inputDefinitionsSnippet = replace(inputDefinitionsTemplate, typeToken, typeName);
824 multiOutputAnnotationsSnippet = replace(multiOutputAnnotationsTemplate, typeToken, typeName);
825 outputDefinitionsSnippet = replace(outputDefinitionsTemplate, typeToken, typeName);
826 multiOutputDefinitionsSnippet = replace(multiOutputDefinitionsTemplate, typeToken, typeName);
827 storeResultsSnippet = replace(storeResultsTemplate, typeToken, typeName);
828 multiStoreResultsSnippet = replace(multiStoreResultsTemplate, typeToken, typeName);
829
830 argumentsFromInputFp16Snippet = "";
831 storeResultsFp16Snippet = "";
832 multiArgumentsFromInputFp16Snippet = "";
833 multiOutputAnnotationsFp16Snippet = "";
834 multiStoreResultsFp16Snippet = "";
835 multiOutputDefinitionsFp16Snippet = "";
836 inputDefinitionsFp16Snippet = "";
837 typeAnnotationsFp16Snippet = "";
838 outputDefinitionsFp16Snippet = "";
839 typeDefinitionsFp16Snippet = "";
840
841 if (bitWidth.compare("16") == 0)
842 {
843 typeDefinitionsFp16Snippet =
844 "%type_u32_uptr = OpTypePointer Uniform %type_u32\n"
845 "%type_u32_arr_1 = OpTypeArray %type_u32 %c_i32_1\n";
846
847 typeAnnotationsFp16Snippet = "OpDecorate %type_u32_arr_1 ArrayStride 4\n";
848 const string inputToken = "_f16_arr_2";
849 const string inputName = "_u32_arr_1";
850 inputDefinitionsFp16Snippet = replace(inputDefinitionsSnippet, inputToken, inputName);
851
852 argumentsFromInputFp16Snippet =
853 "%argloc = OpAccessChain %type_u32_uptr %ssbo_in %c_i32_0 %c_i32_0\n"
854 "%inval = OpLoad %type_u32 %argloc\n"
855 "%arg = OpBitcast %type_f16_vec2 %inval\n"
856 "%arg1 = OpCompositeExtract %type_f16 %arg 0\n"
857 "%arg2 = OpCompositeExtract %type_f16 %arg 1\n";
858
859 const string outputToken = "_f16_arr_1";
860 const string outputName = "_u32_arr_1";
861 outputDefinitionsFp16Snippet = replace(outputDefinitionsSnippet, outputToken, outputName);
862
863 storeResultsFp16Snippet =
864 "%result_f16_vec2 = OpCompositeConstruct %type_f16_vec2 %result %c_f16_0\n"
865 "%result_u32 = OpBitcast %type_u32 %result_f16_vec2\n"
866 "%outloc = OpAccessChain %type_u32_uptr %ssbo_out %c_i32_0 %c_i32_0\n"
867 "OpStore %outloc %result_u32\n";
868
869 multiArgumentsFromInputFp16Snippet =
870 "%arg_u32_loc = OpAccessChain %type_u32_uptr %ssbo_in %c_i32_${attr} %c_i32_0\n"
871 "%arg_u32 = OpLoad %type_u32 %arg_u32_loc\n"
872 "%arg_f16_vec2 = OpBitcast %type_f16_vec2 %arg_u32\n"
873 "%arg1_f16 = OpCompositeExtract %type_f16 %arg_f16_vec2 0\n"
874 "%arg2_f16 = OpCompositeExtract %type_f16 %arg_f16_vec2 1\n";
875
876 multiOutputAnnotationsFp16Snippet =
877 "OpMemberDecorate %SSBO_u32_out 0 Offset 0\n"
878 "OpDecorate %type_u32_arr_1 ArrayStride 4\n"
879 "OpDecorate %SSBO_u32_out BufferBlock\n"
880 "OpDecorate %ssbo_u32_out DescriptorSet 0\n";
881
882 multiStoreResultsFp16Snippet =
883 "%outloc_u32 = OpAccessChain %type_u32_uptr %ssbo_u32_out %c_i32_0\n"
884 "%result16_vec2 = OpCompositeConstruct %type_f16_vec2 %result16 %c_f16_0\n"
885 "%result_u32 = OpBitcast %type_u32 %result16_vec2\n"
886 " OpStore %outloc_u32 %result_u32\n";
887
888 multiOutputDefinitionsFp16Snippet =
889 "%c_f16_0 = OpConstant %type_f16 0.0\n"
890 "%SSBO_u32_out = OpTypeStruct %type_u32\n"
891 "%up_SSBO_u32_out = OpTypePointer Uniform %SSBO_u32_out\n"
892 "%ssbo_u32_out = OpVariable %up_SSBO_u32_out Uniform\n";
893 }
894
895 // NOTE: only values used as _generated_ arguments in test operations
896 // need to be in this map, arguments that are only used by tests,
897 // that grab arguments from input, do need to be in this map
898 // NOTE: when updating entries in valueIdToSnippetArgMap make
899 // sure to update also m_valueIdToFloatType for all float width
900 SnippetMap& sm = valueIdToSnippetArgMap;
901 sm[V_UNUSED] = "OpFSub %type_float %c_float_0 %c_float_0\n";
902 sm[V_MINUS_INF] = "OpFDiv %type_float %c_float_n1 %c_float_0\n";
903 sm[V_MINUS_ONE] = "OpFAdd %type_float %c_float_n1 %c_float_0\n";
904 sm[V_MINUS_ZERO] = "OpFMul %type_float %c_float_n1 %c_float_0\n";
905 sm[V_ZERO] = "OpFMul %type_float %c_float_0 %c_float_0\n";
906 sm[V_HALF] = "OpFAdd %type_float %c_float_0_5 %c_float_0\n";
907 sm[V_ONE] = "OpFAdd %type_float %c_float_1 %c_float_0\n";
908 sm[V_INF] = "OpFDiv %type_float %c_float_1 %c_float_0\n"; // x / 0 == Inf
909 sm[V_DENORM] = "OpFSub %type_float %c_float_denorm_base %c_float_eps\n";
910 sm[V_NAN] = "OpFDiv %type_float %c_float_0 %c_float_0\n"; // 0 / 0 == Nan
911
912 map<ValueId, string>::iterator it;
913 for ( it = sm.begin(); it != sm.end(); it++ )
914 sm[it->first] = replace(it->second, typeToken, typeName);
915 }
916
917 typedef de::SharedPtr<TypeSnippetsBase> TypeSnippetsSP;
918
919 template<typename FLOAT_TYPE>
920 class TypeSnippets: public TypeSnippetsBase
921 {
922 public:
923 TypeSnippets();
924 };
925
926 template<>
TypeSnippets()927 TypeSnippets<deFloat16>::TypeSnippets()
928 {
929 bitWidth = "16";
930 epsilon = "6.104e-5"; // 2^-14 = 0x0400
931
932 // 1.2113e-4 is 0x07f0 which after substracting epsilon will give 0x03f0 (same as vm[V_DENORM])
933 // NOTE: constants in SPIR-V cant be specified as exact fp16 - there is conversion from double to fp16
934 denormBase = "1.2113e-4";
935
936 capabilities = "OpCapability StorageUniform16\n";
937 extensions = "OpExtension \"SPV_KHR_16bit_storage\"\n";
938
939 capabilitiesFp16Without16BitStorage = "OpCapability Float16\n";
940 extensionsFp16Without16BitStorage = "";
941
942 arrayStride = "2";
943
944 varyingsTypesSnippet =
945 "%type_u32_iptr = OpTypePointer Input %type_u32\n"
946 "%type_u32_optr = OpTypePointer Output %type_u32\n";
947 inputVaryingsSnippet =
948 "%BP_vertex_result = OpVariable %type_u32_iptr Input\n";
949 outputVaryingsSnippet =
950 "%BP_vertex_result = OpVariable %type_u32_optr Output\n";
951 storeVertexResultSnippet =
952 "%tmp_vec2 = OpCompositeConstruct %type_f16_vec2 %result %c_f16_0\n"
953 "%packed_result = OpBitcast %type_u32 %tmp_vec2\n"
954 "OpStore %BP_vertex_result %packed_result\n";
955 loadVertexResultSnippet =
956 "%packed_result = OpLoad %type_u32 %BP_vertex_result\n"
957 "%tmp_vec2 = OpBitcast %type_f16_vec2 %packed_result\n"
958 "%result = OpCompositeExtract %type_f16 %tmp_vec2 0\n";
959
960 loadStoreRequiresShaderFloat16 = true;
961
962 updateSpirvSnippets();
963 }
964
965 template<>
TypeSnippets()966 TypeSnippets<float>::TypeSnippets()
967 {
968 bitWidth = "32";
969 epsilon = "1.175494351e-38";
970 denormBase = "1.1756356e-38";
971 capabilities = "";
972 extensions = "";
973 capabilitiesFp16Without16BitStorage = "";
974 extensionsFp16Without16BitStorage = "";
975 arrayStride = "4";
976
977 varyingsTypesSnippet =
978 "%type_u32_iptr = OpTypePointer Input %type_u32\n"
979 "%type_u32_optr = OpTypePointer Output %type_u32\n";
980 inputVaryingsSnippet =
981 "%BP_vertex_result = OpVariable %type_u32_iptr Input\n";
982 outputVaryingsSnippet =
983 "%BP_vertex_result = OpVariable %type_u32_optr Output\n";
984 storeVertexResultSnippet =
985 "%packed_result = OpBitcast %type_u32 %result\n"
986 "OpStore %BP_vertex_result %packed_result\n";
987 loadVertexResultSnippet =
988 "%packed_result = OpLoad %type_u32 %BP_vertex_result\n"
989 "%result = OpBitcast %type_f32 %packed_result\n";
990
991 loadStoreRequiresShaderFloat16 = false;
992
993 updateSpirvSnippets();
994 }
995
996 template<>
TypeSnippets()997 TypeSnippets<double>::TypeSnippets()
998 {
999 bitWidth = "64";
1000 epsilon = "2.2250738585072014e-308"; // 0x0010000000000000
1001 denormBase = "2.2250738585076994e-308"; // 0x00100000000003F0
1002 capabilities = "OpCapability Float64\n";
1003 extensions = "";
1004 capabilitiesFp16Without16BitStorage = "";
1005 extensionsFp16Without16BitStorage = "";
1006 arrayStride = "8";
1007
1008 varyingsTypesSnippet =
1009 "%type_u32_vec2_iptr = OpTypePointer Input %type_u32_vec2\n"
1010 "%type_u32_vec2_optr = OpTypePointer Output %type_u32_vec2\n";
1011 inputVaryingsSnippet =
1012 "%BP_vertex_result = OpVariable %type_u32_vec2_iptr Input\n";
1013 outputVaryingsSnippet =
1014 "%BP_vertex_result = OpVariable %type_u32_vec2_optr Output\n";
1015 storeVertexResultSnippet =
1016 "%packed_result = OpBitcast %type_u32_vec2 %result\n"
1017 "OpStore %BP_vertex_result %packed_result\n";
1018 loadVertexResultSnippet =
1019 "%packed_result = OpLoad %type_u32_vec2 %BP_vertex_result\n"
1020 "%result = OpBitcast %type_f64 %packed_result\n";
1021
1022 loadStoreRequiresShaderFloat16 = false;
1023
1024 updateSpirvSnippets();
1025 }
1026
1027 class TypeTestResultsBase
1028 {
1029 public:
~TypeTestResultsBase()1030 virtual ~TypeTestResultsBase() {}
1031 FloatType floatType() const;
1032
1033 protected:
1034 FloatType m_floatType;
1035
1036 public:
1037 // Vectors containing test data for float controls
1038 vector<BinaryCase> binaryOpFTZ;
1039 vector<UnaryCase> unaryOpFTZ;
1040 vector<BinaryCase> binaryOpDenormPreserve;
1041 vector<UnaryCase> unaryOpDenormPreserve;
1042 };
1043
floatType() const1044 FloatType TypeTestResultsBase::floatType() const
1045 {
1046 return m_floatType;
1047 }
1048
1049 typedef de::SharedPtr<TypeTestResultsBase> TypeTestResultsSP;
1050
1051 template<typename FLOAT_TYPE>
1052 class TypeTestResults: public TypeTestResultsBase
1053 {
1054 public:
1055 TypeTestResults();
1056 };
1057
1058 template<>
TypeTestResults()1059 TypeTestResults<deFloat16>::TypeTestResults()
1060 {
1061 m_floatType = FP16;
1062
1063 // note: there are many FTZ test cases that can produce diferent result depending
1064 // on input denorm being flushed or not; because of that FTZ tests can be limited
1065 // to those that return denorm as those are the ones affected by tested extension
1066 const BinaryCase binaryOpFTZArr[] = {
1067 //operation den op one den op den den op inf den op nan
1068 { O_ADD, V_ONE, V_ZERO_OR_DENORM_TIMES_TWO,
1069 V_INF, V_UNUSED },
1070 { O_SUB, V_MINUS_ONE, V_ZERO, V_MINUS_INF, V_UNUSED },
1071 { O_MUL, V_ZERO, V_ZERO, V_UNUSED, V_UNUSED },
1072 { O_DIV, V_ZERO, V_UNUSED, V_ZERO, V_UNUSED },
1073 { O_REM, V_ZERO, V_UNUSED, V_UNUSED, V_UNUSED },
1074 { O_MOD, V_ZERO, V_UNUSED, V_UNUSED, V_UNUSED },
1075 { O_VEC_MUL_S, V_ZERO, V_ZERO, V_UNUSED, V_UNUSED },
1076 { O_VEC_MUL_M, V_ZERO_OR_DENORM_TIMES_TWO,
1077 V_ZERO, V_UNUSED, V_UNUSED },
1078 { O_MAT_MUL_S, V_ZERO, V_ZERO, V_UNUSED, V_UNUSED },
1079 { O_MAT_MUL_V, V_ZERO, V_ZERO, V_UNUSED, V_UNUSED },
1080 { O_MAT_MUL_M, V_ZERO, V_ZERO, V_UNUSED, V_UNUSED },
1081 { O_OUT_PROD, V_ZERO, V_ZERO, V_UNUSED, V_UNUSED },
1082 { O_DOT, V_ZERO_OR_DENORM_TIMES_TWO,
1083 V_ZERO, V_UNUSED, V_UNUSED },
1084 { O_ATAN2, V_ZERO, V_UNUSED, V_ZERO, V_UNUSED },
1085 { O_POW, V_ZERO, V_UNUSED, V_ZERO, V_UNUSED },
1086 { O_MIX, V_HALF, V_ZERO, V_INF, V_UNUSED },
1087 { O_MIN, V_ZERO, V_ZERO, V_ZERO, V_UNUSED },
1088 { O_MAX, V_ONE, V_ZERO, V_INF, V_UNUSED },
1089 { O_CLAMP, V_ONE, V_ZERO, V_INF, V_UNUSED },
1090 { O_STEP, V_ONE, V_ONE, V_ONE, V_UNUSED },
1091 { O_SSTEP, V_HALF, V_ONE, V_ZERO, V_UNUSED },
1092 { O_FMA, V_HALF, V_HALF, V_UNUSED, V_UNUSED },
1093 { O_FACE_FWD, V_MINUS_ONE, V_MINUS_ONE, V_MINUS_ONE, V_MINUS_ONE },
1094 { O_NMIN, V_ZERO, V_ZERO, V_ZERO, V_ZERO },
1095 { O_NMAX, V_ONE, V_ZERO, V_INF, V_ZERO },
1096 { O_NCLAMP, V_ONE, V_ZERO, V_INF, V_ZERO },
1097 { O_DIST, V_ONE, V_ZERO, V_INF, V_UNUSED },
1098 { O_CROSS, V_ZERO, V_ZERO, V_UNUSED, V_UNUSED },
1099 };
1100
1101 const UnaryCase unaryOpFTZArr[] = {
1102 //operation op den
1103 { O_NEGATE, V_MINUS_ZERO },
1104 { O_ROUND, V_ZERO },
1105 { O_ROUND_EV, V_ZERO },
1106 { O_TRUNC, V_ZERO },
1107 { O_ABS, V_ZERO },
1108 { O_FLOOR, V_ZERO },
1109 { O_CEIL, V_ZERO_OR_ONE },
1110 { O_FRACT, V_ZERO },
1111 { O_RADIANS, V_ZERO },
1112 { O_DEGREES, V_ZERO },
1113 { O_SIN, V_ZERO },
1114 { O_COS, V_TRIG_ONE },
1115 { O_TAN, V_ZERO },
1116 { O_ASIN, V_ZERO },
1117 { O_ACOS, V_PI_DIV_2 },
1118 { O_ATAN, V_ZERO },
1119 { O_SINH, V_ZERO },
1120 { O_COSH, V_ONE },
1121 { O_TANH, V_ZERO },
1122 { O_ASINH, V_ZERO },
1123 { O_ACOSH, V_UNUSED },
1124 { O_ATANH, V_ZERO },
1125 { O_EXP, V_ONE },
1126 { O_LOG, V_MINUS_INF_OR_LOG_DENORM },
1127 { O_EXP2, V_ONE },
1128 { O_LOG2, V_MINUS_INF_OR_LOG2_DENORM },
1129 { O_SQRT, V_ZERO_OR_SQRT_DENORM },
1130 { O_INV_SQRT, V_INF_OR_INV_SQRT_DENORM },
1131 { O_MAT_DET, V_ZERO },
1132 { O_MAT_INV, V_ZERO_OR_MINUS_ZERO },
1133 { O_MODF, V_ZERO },
1134 { O_MODF_ST, V_ZERO },
1135 { O_NORMALIZE, V_ZERO },
1136 { O_REFLECT, V_ZERO },
1137 { O_REFRACT, V_ZERO },
1138 { O_LENGHT, V_ZERO },
1139 };
1140
1141 const BinaryCase binaryOpDenormPreserveArr[] = {
1142 //operation den op one den op den den op inf den op nan
1143 { O_PHI, V_DENORM, V_DENORM, V_DENORM, V_DENORM },
1144 { O_SELECT, V_DENORM, V_DENORM, V_DENORM, V_DENORM },
1145 { O_ADD, V_ONE, V_DENORM_TIMES_TWO, V_INF, V_NAN },
1146 { O_SUB, V_MINUS_ONE_OR_CLOSE, V_ZERO, V_MINUS_INF, V_NAN },
1147 { O_MUL, V_DENORM, V_ZERO, V_INF, V_NAN },
1148 { O_VEC_MUL_S, V_DENORM, V_ZERO, V_INF, V_NAN },
1149 { O_VEC_MUL_M, V_DENORM_TIMES_TWO, V_ZERO, V_INF, V_NAN },
1150 { O_MAT_MUL_S, V_DENORM, V_ZERO, V_INF, V_NAN },
1151 { O_MAT_MUL_V, V_DENORM_TIMES_TWO, V_ZERO, V_INF, V_NAN },
1152 { O_MAT_MUL_M, V_DENORM_TIMES_TWO, V_ZERO, V_INF, V_NAN },
1153 { O_OUT_PROD, V_DENORM, V_ZERO, V_INF, V_NAN },
1154 { O_DOT, V_DENORM_TIMES_TWO, V_ZERO, V_INF, V_NAN },
1155 { O_MIX, V_HALF, V_DENORM, V_INF, V_NAN },
1156 { O_FMA, V_HALF, V_HALF, V_INF, V_NAN },
1157 { O_MIN, V_DENORM, V_DENORM, V_DENORM, V_UNUSED },
1158 { O_MAX, V_ONE, V_DENORM, V_INF, V_UNUSED },
1159 { O_CLAMP, V_ONE, V_DENORM, V_INF, V_UNUSED },
1160 { O_NMIN, V_DENORM, V_DENORM, V_DENORM, V_DENORM },
1161 { O_NMAX, V_ONE, V_DENORM, V_INF, V_DENORM },
1162 { O_NCLAMP, V_ONE, V_DENORM, V_INF, V_DENORM },
1163 };
1164
1165 const UnaryCase unaryOpDenormPreserveArr[] = {
1166 //operation op den
1167 { O_RETURN_VAL, V_DENORM },
1168 { O_D_EXTRACT, V_DENORM },
1169 { O_D_INSERT, V_DENORM },
1170 { O_SHUFFLE, V_DENORM },
1171 { O_COMPOSITE, V_DENORM },
1172 { O_COMPOSITE_INS, V_DENORM },
1173 { O_COPY, V_DENORM },
1174 { O_TRANSPOSE, V_DENORM },
1175 { O_NEGATE, V_DENORM },
1176 { O_ABS, V_DENORM },
1177 { O_SIGN, V_ONE },
1178 { O_RADIANS, V_DENORM },
1179 { O_DEGREES, V_DEGREES_DENORM },
1180 };
1181
1182 binaryOpFTZ.insert(binaryOpFTZ.begin(), binaryOpFTZArr,
1183 binaryOpFTZArr + DE_LENGTH_OF_ARRAY(binaryOpFTZArr));
1184 unaryOpFTZ.insert(unaryOpFTZ.begin(), unaryOpFTZArr,
1185 unaryOpFTZArr + DE_LENGTH_OF_ARRAY(unaryOpFTZArr));
1186 binaryOpDenormPreserve.insert(binaryOpDenormPreserve.begin(), binaryOpDenormPreserveArr,
1187 binaryOpDenormPreserveArr + DE_LENGTH_OF_ARRAY(binaryOpDenormPreserveArr));
1188 unaryOpDenormPreserve.insert(unaryOpDenormPreserve.begin(), unaryOpDenormPreserveArr,
1189 unaryOpDenormPreserveArr + DE_LENGTH_OF_ARRAY(unaryOpDenormPreserveArr));
1190 }
1191
1192 template<>
TypeTestResults()1193 TypeTestResults<float>::TypeTestResults()
1194 {
1195 m_floatType = FP32;
1196
1197 const BinaryCase binaryOpFTZArr[] = {
1198 //operation den op one den op den den op inf den op nan
1199 { O_ADD, V_ONE, V_ZERO, V_INF, V_UNUSED },
1200 { O_SUB, V_MINUS_ONE, V_ZERO, V_MINUS_INF, V_UNUSED },
1201 { O_MUL, V_ZERO, V_ZERO, V_UNUSED, V_UNUSED },
1202 { O_DIV, V_ZERO, V_UNUSED, V_ZERO, V_UNUSED },
1203 { O_REM, V_ZERO, V_UNUSED, V_UNUSED, V_UNUSED },
1204 { O_MOD, V_ZERO, V_UNUSED, V_UNUSED, V_UNUSED },
1205 { O_VEC_MUL_S, V_ZERO, V_ZERO, V_UNUSED, V_UNUSED },
1206 { O_VEC_MUL_M, V_ZERO, V_ZERO, V_UNUSED, V_UNUSED },
1207 { O_MAT_MUL_S, V_ZERO, V_ZERO, V_UNUSED, V_UNUSED },
1208 { O_MAT_MUL_V, V_ZERO, V_ZERO, V_UNUSED, V_UNUSED },
1209 { O_MAT_MUL_M, V_ZERO, V_ZERO, V_UNUSED, V_UNUSED },
1210 { O_OUT_PROD, V_ZERO, V_ZERO, V_UNUSED, V_UNUSED },
1211 { O_DOT, V_ZERO, V_ZERO, V_UNUSED, V_UNUSED },
1212 { O_ATAN2, V_ZERO, V_UNUSED, V_ZERO, V_UNUSED },
1213 { O_POW, V_ZERO, V_UNUSED, V_ZERO, V_UNUSED },
1214 { O_MIX, V_HALF, V_ZERO, V_INF, V_UNUSED },
1215 { O_MIN, V_ZERO, V_ZERO, V_ZERO, V_UNUSED },
1216 { O_MAX, V_ONE, V_ZERO, V_INF, V_UNUSED },
1217 { O_CLAMP, V_ONE, V_ZERO, V_INF, V_UNUSED },
1218 { O_STEP, V_ONE, V_ONE, V_ONE, V_UNUSED },
1219 { O_SSTEP, V_HALF, V_ONE, V_ZERO, V_UNUSED },
1220 { O_FMA, V_HALF, V_HALF, V_UNUSED, V_UNUSED },
1221 { O_FACE_FWD, V_MINUS_ONE, V_MINUS_ONE, V_MINUS_ONE, V_MINUS_ONE },
1222 { O_NMIN, V_ZERO, V_ZERO, V_ZERO, V_ZERO },
1223 { O_NMAX, V_ONE, V_ZERO, V_INF, V_ZERO },
1224 { O_NCLAMP, V_ONE, V_ZERO, V_INF, V_ZERO },
1225 { O_DIST, V_ONE, V_ZERO, V_INF, V_UNUSED },
1226 { O_CROSS, V_ZERO, V_ZERO, V_UNUSED, V_UNUSED },
1227 };
1228
1229 const UnaryCase unaryOpFTZArr[] = {
1230 //operation op den
1231 { O_NEGATE, V_MINUS_ZERO },
1232 { O_ROUND, V_ZERO },
1233 { O_ROUND_EV, V_ZERO },
1234 { O_TRUNC, V_ZERO },
1235 { O_ABS, V_ZERO },
1236 { O_FLOOR, V_ZERO },
1237 { O_CEIL, V_ZERO_OR_ONE },
1238 { O_FRACT, V_ZERO },
1239 { O_RADIANS, V_ZERO },
1240 { O_DEGREES, V_ZERO },
1241 { O_SIN, V_ZERO },
1242 { O_COS, V_TRIG_ONE },
1243 { O_TAN, V_ZERO },
1244 { O_ASIN, V_ZERO },
1245 { O_ACOS, V_PI_DIV_2 },
1246 { O_ATAN, V_ZERO },
1247 { O_SINH, V_ZERO },
1248 { O_COSH, V_ONE },
1249 { O_TANH, V_ZERO },
1250 { O_ASINH, V_ZERO },
1251 { O_ACOSH, V_UNUSED },
1252 { O_ATANH, V_ZERO },
1253 { O_EXP, V_ONE },
1254 { O_LOG, V_MINUS_INF_OR_LOG_DENORM },
1255 { O_EXP2, V_ONE },
1256 { O_LOG2, V_MINUS_INF_OR_LOG2_DENORM },
1257 { O_SQRT, V_ZERO_OR_SQRT_DENORM },
1258 { O_INV_SQRT, V_INF_OR_INV_SQRT_DENORM },
1259 { O_MAT_DET, V_ZERO },
1260 { O_MAT_INV, V_ZERO_OR_MINUS_ZERO },
1261 { O_MODF, V_ZERO },
1262 { O_MODF_ST, V_ZERO },
1263 { O_NORMALIZE, V_ZERO },
1264 { O_REFLECT, V_ZERO },
1265 { O_REFRACT, V_ZERO },
1266 { O_LENGHT, V_ZERO },
1267 };
1268
1269 const BinaryCase binaryOpDenormPreserveArr[] = {
1270 //operation den op one den op den den op inf den op nan
1271 { O_PHI, V_DENORM, V_DENORM, V_DENORM, V_DENORM },
1272 { O_SELECT, V_DENORM, V_DENORM, V_DENORM, V_DENORM },
1273 { O_ADD, V_ONE, V_DENORM_TIMES_TWO, V_INF, V_NAN },
1274 { O_SUB, V_MINUS_ONE, V_ZERO, V_MINUS_INF, V_NAN },
1275 { O_MUL, V_DENORM, V_ZERO, V_INF, V_NAN },
1276 { O_VEC_MUL_S, V_DENORM, V_ZERO, V_INF, V_NAN },
1277 { O_VEC_MUL_M, V_DENORM, V_ZERO, V_INF, V_NAN },
1278 { O_MAT_MUL_S, V_DENORM, V_ZERO, V_INF, V_NAN },
1279 { O_MAT_MUL_V, V_DENORM, V_ZERO, V_INF, V_NAN },
1280 { O_MAT_MUL_M, V_DENORM, V_ZERO, V_INF, V_NAN },
1281 { O_OUT_PROD, V_DENORM, V_ZERO, V_INF, V_NAN },
1282 { O_DOT, V_DENORM_TIMES_TWO, V_ZERO, V_INF, V_NAN },
1283 { O_MIX, V_HALF, V_DENORM, V_INF, V_NAN },
1284 { O_FMA, V_HALF, V_HALF, V_INF, V_NAN },
1285 { O_MIN, V_DENORM, V_DENORM, V_DENORM, V_UNUSED },
1286 { O_MAX, V_ONE, V_DENORM, V_INF, V_UNUSED },
1287 { O_CLAMP, V_ONE, V_DENORM, V_INF, V_UNUSED },
1288 { O_NMIN, V_DENORM, V_DENORM, V_DENORM, V_DENORM },
1289 { O_NMAX, V_ONE, V_DENORM, V_INF, V_DENORM },
1290 { O_NCLAMP, V_ONE, V_DENORM, V_INF, V_DENORM },
1291 };
1292
1293 const UnaryCase unaryOpDenormPreserveArr[] = {
1294 //operation op den
1295 { O_RETURN_VAL, V_DENORM },
1296 { O_D_EXTRACT, V_DENORM },
1297 { O_D_INSERT, V_DENORM },
1298 { O_SHUFFLE, V_DENORM },
1299 { O_COMPOSITE, V_DENORM },
1300 { O_COMPOSITE_INS, V_DENORM },
1301 { O_COPY, V_DENORM },
1302 { O_TRANSPOSE, V_DENORM },
1303 { O_NEGATE, V_DENORM },
1304 { O_ABS, V_DENORM },
1305 { O_SIGN, V_ONE },
1306 { O_RADIANS, V_DENORM },
1307 { O_DEGREES, V_DEGREES_DENORM },
1308 };
1309
1310 binaryOpFTZ.insert(binaryOpFTZ.begin(), binaryOpFTZArr,
1311 binaryOpFTZArr + DE_LENGTH_OF_ARRAY(binaryOpFTZArr));
1312 unaryOpFTZ.insert(unaryOpFTZ.begin(), unaryOpFTZArr,
1313 unaryOpFTZArr + DE_LENGTH_OF_ARRAY(unaryOpFTZArr));
1314 binaryOpDenormPreserve.insert(binaryOpDenormPreserve.begin(), binaryOpDenormPreserveArr,
1315 binaryOpDenormPreserveArr + DE_LENGTH_OF_ARRAY(binaryOpDenormPreserveArr));
1316 unaryOpDenormPreserve.insert(unaryOpDenormPreserve.begin(), unaryOpDenormPreserveArr,
1317 unaryOpDenormPreserveArr + DE_LENGTH_OF_ARRAY(unaryOpDenormPreserveArr));
1318 }
1319
1320 template<>
TypeTestResults()1321 TypeTestResults<double>::TypeTestResults()
1322 {
1323 m_floatType = FP64;
1324
1325 // fp64 is supported by fewer operations then fp16 and fp32
1326 // e.g. Radians and Degrees functions are not supported
1327 const BinaryCase binaryOpFTZArr[] = {
1328 //operation den op one den op den den op inf den op nan
1329 { O_ADD, V_ONE, V_ZERO, V_INF, V_UNUSED },
1330 { O_SUB, V_MINUS_ONE, V_ZERO, V_MINUS_INF, V_UNUSED },
1331 { O_MUL, V_ZERO, V_ZERO, V_UNUSED, V_UNUSED },
1332 { O_DIV, V_ZERO, V_UNUSED, V_ZERO, V_UNUSED },
1333 { O_REM, V_ZERO, V_UNUSED, V_UNUSED, V_UNUSED },
1334 { O_MOD, V_ZERO, V_UNUSED, V_UNUSED, V_UNUSED },
1335 { O_VEC_MUL_S, V_ZERO, V_ZERO, V_UNUSED, V_UNUSED },
1336 { O_VEC_MUL_M, V_ZERO, V_ZERO, V_UNUSED, V_UNUSED },
1337 { O_MAT_MUL_S, V_ZERO, V_ZERO, V_UNUSED, V_UNUSED },
1338 { O_MAT_MUL_V, V_ZERO, V_ZERO, V_UNUSED, V_UNUSED },
1339 { O_MAT_MUL_M, V_ZERO, V_ZERO, V_UNUSED, V_UNUSED },
1340 { O_OUT_PROD, V_ZERO, V_ZERO, V_UNUSED, V_UNUSED },
1341 { O_DOT, V_ZERO, V_ZERO, V_UNUSED, V_UNUSED },
1342 { O_MIX, V_HALF, V_ZERO, V_INF, V_UNUSED },
1343 { O_MIN, V_ZERO, V_ZERO, V_ZERO, V_UNUSED },
1344 { O_MAX, V_ONE, V_ZERO, V_INF, V_UNUSED },
1345 { O_CLAMP, V_ONE, V_ZERO, V_INF, V_UNUSED },
1346 { O_STEP, V_ONE, V_ONE, V_ONE, V_UNUSED },
1347 { O_SSTEP, V_HALF, V_ONE, V_ZERO, V_UNUSED },
1348 { O_FMA, V_HALF, V_HALF, V_UNUSED, V_UNUSED },
1349 { O_FACE_FWD, V_MINUS_ONE, V_MINUS_ONE, V_MINUS_ONE, V_MINUS_ONE },
1350 { O_NMIN, V_ZERO, V_ZERO, V_ZERO, V_ZERO },
1351 { O_NMAX, V_ONE, V_ZERO, V_INF, V_ZERO },
1352 { O_NCLAMP, V_ONE, V_ZERO, V_INF, V_ZERO },
1353 { O_DIST, V_ONE, V_ZERO, V_INF, V_UNUSED },
1354 { O_CROSS, V_ZERO, V_ZERO, V_UNUSED, V_UNUSED },
1355 };
1356
1357 const UnaryCase unaryOpFTZArr[] = {
1358 //operation op den
1359 { O_NEGATE, V_MINUS_ZERO },
1360 { O_ROUND, V_ZERO },
1361 { O_ROUND_EV, V_ZERO },
1362 { O_TRUNC, V_ZERO },
1363 { O_ABS, V_ZERO },
1364 { O_FLOOR, V_ZERO },
1365 { O_CEIL, V_ZERO_OR_ONE },
1366 { O_FRACT, V_ZERO },
1367 { O_SQRT, V_ZERO_OR_SQRT_DENORM },
1368 { O_INV_SQRT, V_INF_OR_INV_SQRT_DENORM },
1369 { O_MAT_DET, V_ZERO },
1370 { O_MAT_INV, V_ZERO_OR_MINUS_ZERO },
1371 { O_MODF, V_ZERO },
1372 { O_MODF_ST, V_ZERO },
1373 { O_NORMALIZE, V_ZERO },
1374 { O_REFLECT, V_ZERO },
1375 { O_LENGHT, V_ZERO },
1376 };
1377
1378 const BinaryCase binaryOpDenormPreserveArr[] = {
1379 //operation den op one den op den den op inf den op nan
1380 { O_PHI, V_DENORM, V_DENORM, V_DENORM, V_DENORM },
1381 { O_SELECT, V_DENORM, V_DENORM, V_DENORM, V_DENORM },
1382 { O_ADD, V_ONE, V_DENORM_TIMES_TWO, V_INF, V_NAN },
1383 { O_SUB, V_MINUS_ONE, V_ZERO, V_MINUS_INF, V_NAN },
1384 { O_MUL, V_DENORM, V_ZERO, V_INF, V_NAN },
1385 { O_VEC_MUL_S, V_DENORM, V_ZERO, V_INF, V_NAN },
1386 { O_VEC_MUL_M, V_DENORM_TIMES_TWO, V_ZERO, V_INF, V_NAN },
1387 { O_MAT_MUL_S, V_DENORM, V_ZERO, V_INF, V_NAN },
1388 { O_MAT_MUL_V, V_DENORM_TIMES_TWO, V_ZERO, V_INF, V_NAN },
1389 { O_MAT_MUL_M, V_DENORM_TIMES_TWO, V_ZERO, V_INF, V_NAN },
1390 { O_OUT_PROD, V_DENORM, V_ZERO, V_INF, V_NAN },
1391 { O_DOT, V_DENORM_TIMES_TWO, V_ZERO, V_INF, V_NAN },
1392 { O_MIX, V_HALF, V_DENORM, V_INF, V_NAN },
1393 { O_FMA, V_HALF, V_HALF, V_INF, V_NAN },
1394 { O_MIN, V_DENORM, V_DENORM, V_DENORM, V_UNUSED },
1395 { O_MAX, V_ONE, V_DENORM, V_INF, V_UNUSED },
1396 { O_CLAMP, V_ONE, V_DENORM, V_INF, V_UNUSED },
1397 { O_NMIN, V_DENORM, V_DENORM, V_DENORM, V_DENORM },
1398 { O_NMAX, V_ONE, V_DENORM, V_INF, V_DENORM },
1399 { O_NCLAMP, V_ONE, V_DENORM, V_INF, V_DENORM },
1400 };
1401
1402 const UnaryCase unaryOpDenormPreserveArr[] = {
1403 //operation op den
1404 { O_RETURN_VAL, V_DENORM },
1405 { O_D_EXTRACT, V_DENORM },
1406 { O_D_INSERT, V_DENORM },
1407 { O_SHUFFLE, V_DENORM },
1408 { O_COMPOSITE, V_DENORM },
1409 { O_COMPOSITE_INS, V_DENORM },
1410 { O_COPY, V_DENORM },
1411 { O_TRANSPOSE, V_DENORM },
1412 { O_NEGATE, V_DENORM },
1413 { O_ABS, V_DENORM },
1414 { O_SIGN, V_ONE },
1415 };
1416
1417 binaryOpFTZ.insert(binaryOpFTZ.begin(), binaryOpFTZArr,
1418 binaryOpFTZArr + DE_LENGTH_OF_ARRAY(binaryOpFTZArr));
1419 unaryOpFTZ.insert(unaryOpFTZ.begin(), unaryOpFTZArr,
1420 unaryOpFTZArr + DE_LENGTH_OF_ARRAY(unaryOpFTZArr));
1421 binaryOpDenormPreserve.insert(binaryOpDenormPreserve.begin(), binaryOpDenormPreserveArr,
1422 binaryOpDenormPreserveArr + DE_LENGTH_OF_ARRAY(binaryOpDenormPreserveArr));
1423 unaryOpDenormPreserve.insert(unaryOpDenormPreserve.begin(), unaryOpDenormPreserveArr,
1424 unaryOpDenormPreserveArr + DE_LENGTH_OF_ARRAY(unaryOpDenormPreserveArr));
1425 }
1426
1427 // Operation structure holds data needed to test specified SPIR-V operation. This class contains
1428 // additional annotations, additional types and aditional constants that should be properly included
1429 // in SPIR-V code. Commands attribute in this structure contains code that performs tested operation
1430 // on given arguments, in some cases verification is also performed there.
1431 // All snipets stroed in this structure are generic and can be specialized for fp16, fp32 or fp64,
1432 // thanks to that this data can be shared by many OperationTestCase instances (testing diferent
1433 // float behaviours on diferent float widths).
1434 struct Operation
1435 {
1436 // operation name is included in test case name
1437 const char* name;
1438
1439 // How extensively is the floating point type used?
1440 FloatUsage floatUsage;
1441
1442 // operation specific spir-v snippets that will be
1443 // placed in proper places in final test shader
1444 const char* annotations;
1445 const char* types;
1446 const char* constants;
1447 const char* variables;
1448 const char* functions;
1449 const char* commands;
1450
1451 // conversion operations operate on one float type and produce float
1452 // type with different bit width; restrictedInputType is used only when
1453 // isInputTypeRestricted is set to true and it restricts usage of this
1454 // operation to specified input type
1455 bool isInputTypeRestricted;
1456 FloatType restrictedInputType;
1457
1458 // arguments for OpSpecConstant need to be specified also as constant
1459 bool isSpecConstant;
1460
1461 // set if c_float* constant is used in operation
1462 FloatStatementUsageFlags statementUsageFlags;
1463
Operationvkt::SpirVAssembly::__anonedfcea740111::Operation1464 Operation() {}
1465
1466 // Minimal constructor - used by most of operations
Operationvkt::SpirVAssembly::__anonedfcea740111::Operation1467 Operation(const char* _name, FloatUsage _floatUsage, const char* _commands, const FloatStatementUsageFlags _statementUsageFlags = 0)
1468 : name(_name)
1469 , floatUsage(_floatUsage)
1470 , annotations("")
1471 , types("")
1472 , constants("")
1473 , variables("")
1474 , functions("")
1475 , commands(_commands)
1476 , isInputTypeRestricted(false)
1477 , restrictedInputType(FP16) // not used as isInputTypeRestricted is false
1478 , isSpecConstant(false)
1479 , statementUsageFlags(_statementUsageFlags)
1480 {}
1481
1482 // Conversion operations constructor (used also by conversions done in SpecConstantOp)
Operationvkt::SpirVAssembly::__anonedfcea740111::Operation1483 Operation(const char* _name,
1484 FloatUsage _floatUsage,
1485 bool specConstant,
1486 FloatType _inputType,
1487 const char* _constants,
1488 const char* _commands,
1489 const FloatStatementUsageFlags _statementUsageFlags = 0)
1490 : name(_name)
1491 , floatUsage(_floatUsage)
1492 , annotations("")
1493 , types("")
1494 , constants(_constants)
1495 , variables("")
1496 , functions("")
1497 , commands(_commands)
1498 , isInputTypeRestricted(true)
1499 , restrictedInputType(_inputType)
1500 , isSpecConstant(specConstant)
1501 , statementUsageFlags(_statementUsageFlags)
1502 {}
1503
1504 // Full constructor - used by few operations, that are more complex to test
Operationvkt::SpirVAssembly::__anonedfcea740111::Operation1505 Operation(const char* _name,
1506 FloatUsage _floatUsage,
1507 const char* _annotations,
1508 const char* _types,
1509 const char* _constants,
1510 const char* _variables,
1511 const char* _functions,
1512 const char* _commands,
1513 const FloatStatementUsageFlags _statementUsageFlags = 0)
1514 : name(_name)
1515 , floatUsage(_floatUsage)
1516 , annotations(_annotations)
1517 , types(_types)
1518 , constants(_constants)
1519 , variables(_variables)
1520 , functions(_functions)
1521 , commands(_commands)
1522 , isInputTypeRestricted(false)
1523 , restrictedInputType(FP16) // not used as isInputTypeRestricted is false
1524 , isSpecConstant(false)
1525 , statementUsageFlags(_statementUsageFlags)
1526 {}
1527
1528 // Full constructor - used by rounding override cases
Operationvkt::SpirVAssembly::__anonedfcea740111::Operation1529 Operation(const char* _name,
1530 FloatUsage _floatUsage,
1531 FloatType _inputType,
1532 const char* _annotations,
1533 const char* _types,
1534 const char* _constants,
1535 const char* _commands,
1536 const FloatStatementUsageFlags _statementUsageFlags = 0)
1537 : name(_name)
1538 , floatUsage(_floatUsage)
1539 , annotations(_annotations)
1540 , types(_types)
1541 , constants(_constants)
1542 , variables("")
1543 , functions("")
1544 , commands(_commands)
1545 , isInputTypeRestricted(true)
1546 , restrictedInputType(_inputType)
1547 , isSpecConstant(false)
1548 , statementUsageFlags(_statementUsageFlags)
1549 {}
1550 };
1551
1552 // Class storing input that will be passed to operation and expected
1553 // output that should be generated for specified behaviour.
1554 class OperationTestCase
1555 {
1556 public:
1557
OperationTestCase()1558 OperationTestCase() {}
1559
OperationTestCase(const char * _baseName,BehaviorFlags _behaviorFlags,OperationId _operatinId,ValueId _input1,ValueId _input2,ValueId _expectedOutput,deBool _fp16Without16BitStorage=DE_FALSE)1560 OperationTestCase(const char* _baseName,
1561 BehaviorFlags _behaviorFlags,
1562 OperationId _operatinId,
1563 ValueId _input1,
1564 ValueId _input2,
1565 ValueId _expectedOutput,
1566 deBool _fp16Without16BitStorage = DE_FALSE)
1567 : baseName(_baseName)
1568 , behaviorFlags(_behaviorFlags)
1569 , operationId(_operatinId)
1570 , expectedOutput(_expectedOutput)
1571 , fp16Without16BitStorage(_fp16Without16BitStorage)
1572 {
1573 input[0] = _input1;
1574 input[1] = _input2;
1575 }
1576
1577 public:
1578
1579 string baseName;
1580 BehaviorFlags behaviorFlags;
1581 OperationId operationId;
1582 ValueId input[2];
1583 ValueId expectedOutput;
1584 deBool fp16Without16BitStorage;
1585 };
1586
1587 // Helper structure used to store specialized operation
1588 // data. This data is ready to be used during shader assembly.
1589 struct SpecializedOperation
1590 {
1591 string constants;
1592 string annotations;
1593 string types;
1594 string arguments;
1595 string variables;
1596 string functions;
1597 string commands;
1598
1599 FloatType inFloatType;
1600 TypeSnippetsSP inTypeSnippets;
1601 TypeSnippetsSP outTypeSnippets;
1602 FloatStatementUsageFlags argumentsUsesFloatConstant;
1603 };
1604
1605 // Class responsible for constructing list of test cases for specified
1606 // float type and specified way of preparation of arguments.
1607 // Arguments can be either read from input SSBO or generated via math
1608 // operations in spir-v code.
1609 class TestCasesBuilder
1610 {
1611 public:
1612
1613 void init();
1614 void build(vector<OperationTestCase>& testCases, TypeTestResultsSP typeTestResults, bool argumentsFromInput);
1615 const Operation& getOperation(OperationId id) const;
1616
1617 private:
1618
1619 void createUnaryTestCases(vector<OperationTestCase>& testCases,
1620 OperationId operationId,
1621 ValueId denormPreserveResult,
1622 ValueId denormFTZResult,
1623 deBool fp16WithoutStorage = DE_FALSE) const;
1624
1625 private:
1626
1627 // Operations are shared betwean test cases so they are
1628 // passed to them as pointers to data stored in TestCasesBuilder.
1629 typedef OperationTestCase OTC;
1630 typedef Operation Op;
1631 map<int, Op> m_operations;
1632 };
1633
init()1634 void TestCasesBuilder::init()
1635 {
1636 map<int, Op>& mo = m_operations;
1637
1638 // predefine operations repeatedly used in tests; note that "_float"
1639 // in every operation command will be replaced with either "_f16",
1640 // "_f32" or "_f64" - StringTemplate is not used here because it
1641 // would make code less readable
1642 // m_operations contains generic operation definitions that can be
1643 // used for all float types
1644
1645 mo[O_NEGATE] = Op("negate", FLOAT_ARITHMETIC,
1646 "%result = OpFNegate %type_float %arg1\n",
1647 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1648 mo[O_COMPOSITE] = Op("composite", FLOAT_ARITHMETIC,
1649 "%vec1 = OpCompositeConstruct %type_float_vec2 %arg1 %arg1\n"
1650 "%result = OpCompositeExtract %type_float %vec1 0\n",
1651 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1652 mo[O_COMPOSITE_INS] = Op("comp_ins", FLOAT_ARITHMETIC,
1653 "%vec1 = OpCompositeConstruct %type_float_vec2 %c_float_0 %c_float_0\n"
1654 "%vec2 = OpCompositeInsert %type_float_vec2 %arg1 %vec1 0\n"
1655 "%result = OpCompositeExtract %type_float %vec2 0\n",
1656 B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1657 mo[O_COPY] = Op("copy", FLOAT_STORAGE_ONLY,
1658 "%result = OpCopyObject %type_float %arg1\n",
1659 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1660 mo[O_D_EXTRACT] = Op("extract", FLOAT_ARITHMETIC,
1661 "%vec1 = OpCompositeConstruct %type_float_vec2 %arg1 %arg1\n"
1662 "%result = OpVectorExtractDynamic %type_float %vec1 %c_i32_0\n",
1663 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1664 mo[O_D_INSERT] = Op("insert", FLOAT_ARITHMETIC,
1665 "%tmpVec = OpCompositeConstruct %type_float_vec2 %c_float_2 %c_float_2\n"
1666 "%vec1 = OpVectorInsertDynamic %type_float_vec2 %tmpVec %arg1 %c_i32_0\n"
1667 "%result = OpCompositeExtract %type_float %vec1 0\n",
1668 B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1669 mo[O_SHUFFLE] = Op("shuffle", FLOAT_ARITHMETIC,
1670 "%tmpVec1 = OpCompositeConstruct %type_float_vec2 %arg1 %arg1\n"
1671 "%tmpVec2 = OpCompositeConstruct %type_float_vec2 %c_float_2 %c_float_2\n" // NOTE: its impossible to test shuffle with denorms flushed
1672 "%vec1 = OpVectorShuffle %type_float_vec2 %tmpVec1 %tmpVec2 0 2\n" // to zero as this will be done by earlier operation
1673 "%result = OpCompositeExtract %type_float %vec1 0\n", // (this also applies to few other operations)
1674 B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1675 mo[O_TRANSPOSE] = Op("transpose", FLOAT_ARITHMETIC,
1676 "%col = OpCompositeConstruct %type_float_vec2 %arg1 %arg1\n"
1677 "%mat = OpCompositeConstruct %type_float_mat2x2 %col %col\n"
1678 "%tmat = OpTranspose %type_float_mat2x2 %mat\n"
1679 "%tcol = OpCompositeExtract %type_float_vec2 %tmat 0\n"
1680 "%result = OpCompositeExtract %type_float %tcol 0\n",
1681 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1682 mo[O_RETURN_VAL] = Op("ret_val", FLOAT_ARITHMETIC,
1683 "",
1684 "%type_test_fun = OpTypeFunction %type_float %type_float\n",
1685 "",
1686 "",
1687 "%test_fun = OpFunction %type_float None %type_test_fun\n"
1688 "%param = OpFunctionParameter %type_float\n"
1689 "%entry = OpLabel\n"
1690 "OpReturnValue %param\n"
1691 "OpFunctionEnd\n",
1692 "%result = OpFunctionCall %type_float %test_fun %arg1\n",
1693 B_STATEMENT_USAGE_TYPES_TYPE_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1694
1695 // conversion operations that are meant to be used only for single output type (defined by the second number in name)
1696 const char* convertSource = "%result = OpFConvert %type_float %arg1\n";
1697 mo[O_CONV_FROM_FP16] = Op("conv_from_fp16", FLOAT_STORAGE_ONLY, false, FP16, "", convertSource, B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1698 mo[O_CONV_FROM_FP32] = Op("conv_from_fp32", FLOAT_STORAGE_ONLY, false, FP32, "", convertSource, B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1699 mo[O_CONV_FROM_FP64] = Op("conv_from_fp64", FLOAT_STORAGE_ONLY, false, FP64, "", convertSource, B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1700
1701 // from all operands supported by OpSpecConstantOp we can only test FConvert opcode with literals as everything
1702 // else requires Karnel capability (OpenCL); values of literals used in SPIR-V code must be equiwalent to
1703 // V_CONV_FROM_FP32_ARG and V_CONV_FROM_FP64_ARG so we can use same expected rounded values as for regular OpFConvert
1704 mo[O_SCONST_CONV_FROM_FP32_TO_FP16]
1705 = Op("sconst_conv_from_fp32", FLOAT_ARITHMETIC, true, FP32,
1706 "%c_arg = OpConstant %type_f32 1.22334445\n"
1707 "%result = OpSpecConstantOp %type_f16 FConvert %c_arg\n",
1708 "",
1709 B_STATEMENT_USAGE_CONSTS_TYPE_FP16 | B_STATEMENT_USAGE_CONSTS_TYPE_FP32);
1710 mo[O_SCONST_CONV_FROM_FP64_TO_FP32]
1711 = Op("sconst_conv_from_fp64", FLOAT_ARITHMETIC, true, FP64,
1712 "%c_arg = OpConstant %type_f64 1.22334455\n"
1713 "%result = OpSpecConstantOp %type_f32 FConvert %c_arg\n",
1714 "",
1715 B_STATEMENT_USAGE_CONSTS_TYPE_FP32 | B_STATEMENT_USAGE_CONSTS_TYPE_FP64);
1716 mo[O_SCONST_CONV_FROM_FP64_TO_FP16]
1717 = Op("sconst_conv_from_fp64", FLOAT_ARITHMETIC, true, FP64,
1718 "%c_arg = OpConstant %type_f64 1.22334445\n"
1719 "%result = OpSpecConstantOp %type_f16 FConvert %c_arg\n",
1720 "",
1721 B_STATEMENT_USAGE_CONSTS_TYPE_FP16 | B_STATEMENT_USAGE_CONSTS_TYPE_FP64);
1722
1723 mo[O_ADD] = Op("add", FLOAT_ARITHMETIC, "%result = OpFAdd %type_float %arg1 %arg2\n", B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1724 mo[O_SUB] = Op("sub", FLOAT_ARITHMETIC, "%result = OpFSub %type_float %arg1 %arg2\n", B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1725 mo[O_MUL] = Op("mul", FLOAT_ARITHMETIC, "%result = OpFMul %type_float %arg1 %arg2\n", B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1726 mo[O_DIV] = Op("div", FLOAT_ARITHMETIC, "%result = OpFDiv %type_float %arg1 %arg2\n", B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1727 mo[O_REM] = Op("rem", FLOAT_ARITHMETIC, "%result = OpFRem %type_float %arg1 %arg2\n", B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1728 mo[O_MOD] = Op("mod", FLOAT_ARITHMETIC, "%result = OpFMod %type_float %arg1 %arg2\n", B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1729 mo[O_PHI] = Op("phi", FLOAT_ARITHMETIC,
1730 "%comp = OpFOrdGreaterThan %type_bool %arg1 %arg2\n"
1731 " OpSelectionMerge %comp_merge None\n"
1732 " OpBranchConditional %comp %true_branch %false_branch\n"
1733 "%true_branch = OpLabel\n"
1734 " OpBranch %comp_merge\n"
1735 "%false_branch = OpLabel\n"
1736 " OpBranch %comp_merge\n"
1737 "%comp_merge = OpLabel\n"
1738 "%result = OpPhi %type_float %arg2 %true_branch %arg1 %false_branch\n",
1739 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1740 mo[O_SELECT] = Op("select", FLOAT_ARITHMETIC,
1741 "%always_true = OpFOrdGreaterThan %type_bool %c_float_1 %c_float_0\n"
1742 "%result = OpSelect %type_float %always_true %arg1 %arg2\n",
1743 B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1744 mo[O_DOT] = Op("dot", FLOAT_ARITHMETIC,
1745 "%vec1 = OpCompositeConstruct %type_float_vec2 %arg1 %arg1\n"
1746 "%vec2 = OpCompositeConstruct %type_float_vec2 %arg2 %arg2\n"
1747 "%result = OpDot %type_float %vec1 %vec2\n",
1748 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1749 mo[O_VEC_MUL_S] = Op("vmuls", FLOAT_ARITHMETIC,
1750 "%vec = OpCompositeConstruct %type_float_vec2 %arg1 %arg1\n"
1751 "%tmpVec = OpVectorTimesScalar %type_float_vec2 %vec %arg2\n"
1752 "%result = OpCompositeExtract %type_float %tmpVec 0\n",
1753 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1754 mo[O_VEC_MUL_M] = Op("vmulm", FLOAT_ARITHMETIC,
1755 "%col = OpCompositeConstruct %type_float_vec2 %arg1 %arg1\n"
1756 "%mat = OpCompositeConstruct %type_float_mat2x2 %col %col\n"
1757 "%vec = OpCompositeConstruct %type_float_vec2 %arg2 %arg2\n"
1758 "%tmpVec = OpVectorTimesMatrix %type_float_vec2 %vec %mat\n"
1759 "%result = OpCompositeExtract %type_float %tmpVec 0\n",
1760 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1761 mo[O_MAT_MUL_S] = Op("mmuls", FLOAT_ARITHMETIC,
1762 "%col = OpCompositeConstruct %type_float_vec2 %arg1 %arg1\n"
1763 "%mat = OpCompositeConstruct %type_float_mat2x2 %col %col\n"
1764 "%mulMat = OpMatrixTimesScalar %type_float_mat2x2 %mat %arg2\n"
1765 "%extCol = OpCompositeExtract %type_float_vec2 %mulMat 0\n"
1766 "%result = OpCompositeExtract %type_float %extCol 0\n",
1767 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1768 mo[O_MAT_MUL_V] = Op("mmulv", FLOAT_ARITHMETIC,
1769 "%col = OpCompositeConstruct %type_float_vec2 %arg1 %arg1\n"
1770 "%mat = OpCompositeConstruct %type_float_mat2x2 %col %col\n"
1771 "%vec = OpCompositeConstruct %type_float_vec2 %arg2 %arg2\n"
1772 "%mulVec = OpMatrixTimesVector %type_float_vec2 %mat %vec\n"
1773 "%result = OpCompositeExtract %type_float %mulVec 0\n",
1774 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1775 mo[O_MAT_MUL_M] = Op("mmulm", FLOAT_ARITHMETIC,
1776 "%col1 = OpCompositeConstruct %type_float_vec2 %arg1 %arg1\n"
1777 "%mat1 = OpCompositeConstruct %type_float_mat2x2 %col1 %col1\n"
1778 "%col2 = OpCompositeConstruct %type_float_vec2 %arg2 %arg2\n"
1779 "%mat2 = OpCompositeConstruct %type_float_mat2x2 %col2 %col2\n"
1780 "%mulMat = OpMatrixTimesMatrix %type_float_mat2x2 %mat1 %mat2\n"
1781 "%extCol = OpCompositeExtract %type_float_vec2 %mulMat 0\n"
1782 "%result = OpCompositeExtract %type_float %extCol 0\n",
1783 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1784 mo[O_OUT_PROD] = Op("out_prod", FLOAT_ARITHMETIC,
1785 "%vec1 = OpCompositeConstruct %type_float_vec2 %arg1 %arg1\n"
1786 "%vec2 = OpCompositeConstruct %type_float_vec2 %arg2 %arg2\n"
1787 "%mulMat = OpOuterProduct %type_float_mat2x2 %vec1 %vec2\n"
1788 "%extCol = OpCompositeExtract %type_float_vec2 %mulMat 0\n"
1789 "%result = OpCompositeExtract %type_float %extCol 0\n",
1790 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1791
1792 // comparison operations
1793 mo[O_ORD_EQ] = Op("ord_eq", FLOAT_ARITHMETIC,
1794 "%boolVal = OpFOrdEqual %type_bool %arg1 %arg2\n"
1795 "%result = OpSelect %type_float %boolVal %c_float_1 %c_float_0\n",
1796 B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1797 mo[O_UORD_EQ] = Op("uord_eq", FLOAT_ARITHMETIC,
1798 "%boolVal = OpFUnordEqual %type_bool %arg1 %arg2\n"
1799 "%result = OpSelect %type_float %boolVal %c_float_1 %c_float_0\n",
1800 B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1801 mo[O_ORD_NEQ] = Op("ord_neq", FLOAT_ARITHMETIC,
1802 "%boolVal = OpFOrdNotEqual %type_bool %arg1 %arg2\n"
1803 "%result = OpSelect %type_float %boolVal %c_float_1 %c_float_0\n",
1804 B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1805 mo[O_UORD_NEQ] = Op("uord_neq", FLOAT_ARITHMETIC,
1806 "%boolVal = OpFUnordNotEqual %type_bool %arg1 %arg2\n"
1807 "%result = OpSelect %type_float %boolVal %c_float_1 %c_float_0\n",
1808 B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1809 mo[O_ORD_LS] = Op("ord_ls", FLOAT_ARITHMETIC,
1810 "%boolVal = OpFOrdLessThan %type_bool %arg1 %arg2\n"
1811 "%result = OpSelect %type_float %boolVal %c_float_1 %c_float_0\n",
1812 B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1813 mo[O_UORD_LS] = Op("uord_ls", FLOAT_ARITHMETIC,
1814 "%boolVal = OpFUnordLessThan %type_bool %arg1 %arg2\n"
1815 "%result = OpSelect %type_float %boolVal %c_float_1 %c_float_0\n",
1816 B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1817 mo[O_ORD_GT] = Op("ord_gt", FLOAT_ARITHMETIC,
1818 "%boolVal = OpFOrdGreaterThan %type_bool %arg1 %arg2\n"
1819 "%result = OpSelect %type_float %boolVal %c_float_1 %c_float_0\n",
1820 B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1821 mo[O_UORD_GT] = Op("uord_gt", FLOAT_ARITHMETIC,
1822 "%boolVal = OpFUnordGreaterThan %type_bool %arg1 %arg2\n"
1823 "%result = OpSelect %type_float %boolVal %c_float_1 %c_float_0\n",
1824 B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1825 mo[O_ORD_LE] = Op("ord_le", FLOAT_ARITHMETIC,
1826 "%boolVal = OpFOrdLessThanEqual %type_bool %arg1 %arg2\n"
1827 "%result = OpSelect %type_float %boolVal %c_float_1 %c_float_0\n",
1828 B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1829 mo[O_UORD_LE] = Op("uord_le", FLOAT_ARITHMETIC,
1830 "%boolVal = OpFUnordLessThanEqual %type_bool %arg1 %arg2\n"
1831 "%result = OpSelect %type_float %boolVal %c_float_1 %c_float_0\n",
1832 B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1833 mo[O_ORD_GE] = Op("ord_ge", FLOAT_ARITHMETIC,
1834 "%boolVal = OpFOrdGreaterThanEqual %type_bool %arg1 %arg2\n"
1835 "%result = OpSelect %type_float %boolVal %c_float_1 %c_float_0\n",
1836 B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1837 mo[O_UORD_GE] = Op("uord_ge", FLOAT_ARITHMETIC,
1838 "%boolVal = OpFUnordGreaterThanEqual %type_bool %arg1 %arg2\n"
1839 "%result = OpSelect %type_float %boolVal %c_float_1 %c_float_0\n",
1840 B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1841
1842 mo[O_ATAN2] = Op("atan2", FLOAT_ARITHMETIC,
1843 "%result = OpExtInst %type_float %std450 Atan2 %arg1 %arg2\n",
1844 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1845 mo[O_POW] = Op("pow", FLOAT_ARITHMETIC,
1846 "%result = OpExtInst %type_float %std450 Pow %arg1 %arg2\n",
1847 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1848 mo[O_MIX] = Op("mix", FLOAT_ARITHMETIC,
1849 "%result = OpExtInst %type_float %std450 FMix %arg1 %arg2 %c_float_0_5\n",
1850 B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1851 mo[O_FMA] = Op("fma", FLOAT_ARITHMETIC,
1852 "%result = OpExtInst %type_float %std450 Fma %arg1 %arg2 %c_float_0_5\n",
1853 B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1854 mo[O_MIN] = Op("min", FLOAT_ARITHMETIC,
1855 "%result = OpExtInst %type_float %std450 FMin %arg1 %arg2\n",
1856 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1857 mo[O_MAX] = Op("max", FLOAT_ARITHMETIC,
1858 "%result = OpExtInst %type_float %std450 FMax %arg1 %arg2\n",
1859 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1860 mo[O_CLAMP] = Op("clamp", FLOAT_ARITHMETIC,
1861 "%result = OpExtInst %type_float %std450 FClamp %arg1 %arg2 %arg2\n",
1862 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1863 mo[O_STEP] = Op("step", FLOAT_ARITHMETIC,
1864 "%result = OpExtInst %type_float %std450 Step %arg1 %arg2\n",
1865 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1866 mo[O_SSTEP] = Op("sstep", FLOAT_ARITHMETIC,
1867 "%result = OpExtInst %type_float %std450 SmoothStep %arg1 %arg2 %c_float_0_5\n",
1868 B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1869 mo[O_DIST] = Op("distance", FLOAT_ARITHMETIC,
1870 "%result = OpExtInst %type_float %std450 Distance %arg1 %arg2\n",
1871 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1872 mo[O_CROSS] = Op("cross", FLOAT_ARITHMETIC,
1873 "%vec1 = OpCompositeConstruct %type_float_vec3 %arg1 %arg1 %arg1\n"
1874 "%vec2 = OpCompositeConstruct %type_float_vec3 %arg2 %arg2 %arg2\n"
1875 "%tmpVec = OpExtInst %type_float_vec3 %std450 Cross %vec1 %vec2\n"
1876 "%result = OpCompositeExtract %type_float %tmpVec 0\n",
1877 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1878 mo[O_FACE_FWD] = Op("face_fwd", FLOAT_ARITHMETIC,
1879 "%result = OpExtInst %type_float %std450 FaceForward %c_float_1 %arg1 %arg2\n",
1880 B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1881 mo[O_NMIN] = Op("nmin", FLOAT_ARITHMETIC,
1882 "%result = OpExtInst %type_float %std450 NMin %arg1 %arg2\n",
1883 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1884 mo[O_NMAX] = Op("nmax", FLOAT_ARITHMETIC,
1885 "%result = OpExtInst %type_float %std450 NMax %arg1 %arg2\n",
1886 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1887 mo[O_NCLAMP] = Op("nclamp", FLOAT_ARITHMETIC,
1888 "%result = OpExtInst %type_float %std450 NClamp %arg2 %arg1 %arg2\n",
1889 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1890
1891 mo[O_ROUND] = Op("round", FLOAT_ARITHMETIC,
1892 "%result = OpExtInst %type_float %std450 Round %arg1\n",
1893 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1894 mo[O_ROUND_EV] = Op("round_ev", FLOAT_ARITHMETIC,
1895 "%result = OpExtInst %type_float %std450 RoundEven %arg1\n",
1896 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1897 mo[O_TRUNC] = Op("trunc", FLOAT_ARITHMETIC,
1898 "%result = OpExtInst %type_float %std450 Trunc %arg1\n",
1899 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1900 mo[O_ABS] = Op("abs", FLOAT_ARITHMETIC,
1901 "%result = OpExtInst %type_float %std450 FAbs %arg1\n",
1902 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1903 mo[O_SIGN] = Op("sign", FLOAT_ARITHMETIC,
1904 "%result = OpExtInst %type_float %std450 FSign %arg1\n",
1905 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1906 mo[O_FLOOR] = Op("floor", FLOAT_ARITHMETIC,
1907 "%result = OpExtInst %type_float %std450 Floor %arg1\n",
1908 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1909 mo[O_CEIL] = Op("ceil", FLOAT_ARITHMETIC,
1910 "%result = OpExtInst %type_float %std450 Ceil %arg1\n",
1911 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1912 mo[O_FRACT] = Op("fract", FLOAT_ARITHMETIC,
1913 "%result = OpExtInst %type_float %std450 Fract %arg1\n",
1914 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1915 mo[O_RADIANS] = Op("radians", FLOAT_ARITHMETIC,
1916 "%result = OpExtInst %type_float %std450 Radians %arg1\n",
1917 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1918 mo[O_DEGREES] = Op("degrees", FLOAT_ARITHMETIC,
1919 "%result = OpExtInst %type_float %std450 Degrees %arg1\n",
1920 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1921 mo[O_SIN] = Op("sin", FLOAT_ARITHMETIC,
1922 "%result = OpExtInst %type_float %std450 Sin %arg1\n",
1923 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1924 mo[O_COS] = Op("cos", FLOAT_ARITHMETIC,
1925 "%result = OpExtInst %type_float %std450 Cos %arg1\n",
1926 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1927 mo[O_TAN] = Op("tan", FLOAT_ARITHMETIC,
1928 "%result = OpExtInst %type_float %std450 Tan %arg1\n",
1929 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1930 mo[O_ASIN] = Op("asin", FLOAT_ARITHMETIC,
1931 "%result = OpExtInst %type_float %std450 Asin %arg1\n",
1932 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1933 mo[O_ACOS] = Op("acos", FLOAT_ARITHMETIC,
1934 "%result = OpExtInst %type_float %std450 Acos %arg1\n",
1935 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1936 mo[O_ATAN] = Op("atan", FLOAT_ARITHMETIC,
1937 "%result = OpExtInst %type_float %std450 Atan %arg1\n",
1938 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1939 mo[O_SINH] = Op("sinh", FLOAT_ARITHMETIC,
1940 "%result = OpExtInst %type_float %std450 Sinh %arg1\n",
1941 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1942 mo[O_COSH] = Op("cosh", FLOAT_ARITHMETIC,
1943 "%result = OpExtInst %type_float %std450 Cosh %arg1\n",
1944 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1945 mo[O_TANH] = Op("tanh", FLOAT_ARITHMETIC,
1946 "%result = OpExtInst %type_float %std450 Tanh %arg1\n",
1947 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1948 mo[O_ASINH] = Op("asinh", FLOAT_ARITHMETIC,
1949 "%result = OpExtInst %type_float %std450 Asinh %arg1\n",
1950 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1951 mo[O_ACOSH] = Op("acosh", FLOAT_ARITHMETIC,
1952 "%result = OpExtInst %type_float %std450 Acosh %arg1\n",
1953 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1954 mo[O_ATANH] = Op("atanh", FLOAT_ARITHMETIC,
1955 "%result = OpExtInst %type_float %std450 Atanh %arg1\n",
1956 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1957 mo[O_EXP] = Op("exp", FLOAT_ARITHMETIC,
1958 "%result = OpExtInst %type_float %std450 Exp %arg1\n",
1959 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1960 mo[O_LOG] = Op("log", FLOAT_ARITHMETIC,
1961 "%result = OpExtInst %type_float %std450 Log %arg1\n",
1962 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1963 mo[O_EXP2] = Op("exp2", FLOAT_ARITHMETIC,
1964 "%result = OpExtInst %type_float %std450 Exp2 %arg1\n",
1965 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1966 mo[O_LOG2] = Op("log2", FLOAT_ARITHMETIC,
1967 "%result = OpExtInst %type_float %std450 Log2 %arg1\n",
1968 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1969 mo[O_SQRT] = Op("sqrt", FLOAT_ARITHMETIC,
1970 "%result = OpExtInst %type_float %std450 Sqrt %arg1\n",
1971 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1972 mo[O_INV_SQRT] = Op("inv_sqrt", FLOAT_ARITHMETIC,
1973 "%result = OpExtInst %type_float %std450 InverseSqrt %arg1\n",
1974 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1975 mo[O_MODF] = Op("modf", FLOAT_ARITHMETIC,
1976 "",
1977 "",
1978 "",
1979 "%tmpVarPtr = OpVariable %type_float_fptr Function\n",
1980 "",
1981 "%result = OpExtInst %type_float %std450 Modf %arg1 %tmpVarPtr\n",
1982 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1983 mo[O_MODF_ST] = Op("modf_st", FLOAT_ARITHMETIC,
1984 "OpMemberDecorate %struct_ff 0 Offset ${float_width}\n"
1985 "OpMemberDecorate %struct_ff 1 Offset ${float_width}\n",
1986 "%struct_ff = OpTypeStruct %type_float %type_float\n"
1987 "%struct_ff_fptr = OpTypePointer Function %struct_ff\n",
1988 "",
1989 "%tmpStructPtr = OpVariable %struct_ff_fptr Function\n",
1990 "",
1991 "%tmpStruct = OpExtInst %struct_ff %std450 ModfStruct %arg1\n"
1992 " OpStore %tmpStructPtr %tmpStruct\n"
1993 "%tmpLoc = OpAccessChain %type_float_fptr %tmpStructPtr %c_i32_0\n"
1994 "%result = OpLoad %type_float %tmpLoc\n",
1995 B_STATEMENT_USAGE_TYPES_TYPE_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1996 mo[O_FREXP] = Op("frexp", FLOAT_ARITHMETIC,
1997 "",
1998 "",
1999 "",
2000 "%tmpVarPtr = OpVariable %type_i32_fptr Function\n",
2001 "",
2002 "%result = OpExtInst %type_float %std450 Frexp %arg1 %tmpVarPtr\n",
2003 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2004 mo[O_FREXP_ST] = Op("frexp_st", FLOAT_ARITHMETIC,
2005 "OpMemberDecorate %struct_fi 0 Offset ${float_width}\n"
2006 "OpMemberDecorate %struct_fi 1 Offset 32\n",
2007 "%struct_fi = OpTypeStruct %type_float %type_i32\n"
2008 "%struct_fi_fptr = OpTypePointer Function %struct_fi\n",
2009 "",
2010 "%tmpStructPtr = OpVariable %struct_fi_fptr Function\n",
2011 "",
2012 "%tmpStruct = OpExtInst %struct_fi %std450 FrexpStruct %arg1\n"
2013 " OpStore %tmpStructPtr %tmpStruct\n"
2014 "%tmpLoc = OpAccessChain %type_float_fptr %tmpStructPtr %c_i32_0\n"
2015 "%result = OpLoad %type_float %tmpLoc\n",
2016 B_STATEMENT_USAGE_TYPES_TYPE_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2017 mo[O_LENGHT] = Op("length", FLOAT_ARITHMETIC,
2018 "%result = OpExtInst %type_float %std450 Length %arg1\n",
2019 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2020 mo[O_NORMALIZE] = Op("normalize", FLOAT_ARITHMETIC,
2021 "%vec1 = OpCompositeConstruct %type_float_vec2 %arg1 %c_float_2\n"
2022 "%tmpVec = OpExtInst %type_float_vec2 %std450 Normalize %vec1\n"
2023 "%result = OpCompositeExtract %type_float %tmpVec 0\n",
2024 B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2025 mo[O_REFLECT] = Op("reflect", FLOAT_ARITHMETIC,
2026 "%vec1 = OpCompositeConstruct %type_float_vec2 %arg1 %arg1\n"
2027 "%vecN = OpCompositeConstruct %type_float_vec2 %c_float_0 %c_float_n1\n"
2028 "%tmpVec = OpExtInst %type_float_vec2 %std450 Reflect %vec1 %vecN\n"
2029 "%result = OpCompositeExtract %type_float %tmpVec 0\n",
2030 B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2031 mo[O_REFRACT] = Op("refract", FLOAT_ARITHMETIC,
2032 "%vec1 = OpCompositeConstruct %type_float_vec2 %arg1 %arg1\n"
2033 "%vecN = OpCompositeConstruct %type_float_vec2 %c_float_0 %c_float_n1\n"
2034 "%tmpVec = OpExtInst %type_float_vec2 %std450 Refract %vec1 %vecN %c_float_0_5\n"
2035 "%result = OpCompositeExtract %type_float %tmpVec 0\n",
2036 B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2037 mo[O_MAT_DET] = Op("mat_det", FLOAT_ARITHMETIC,
2038 "%col = OpCompositeConstruct %type_float_vec2 %arg1 %arg1\n"
2039 "%mat = OpCompositeConstruct %type_float_mat2x2 %col %col\n"
2040 "%result = OpExtInst %type_float %std450 Determinant %mat\n",
2041 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2042 mo[O_MAT_INV] = Op("mat_inv", FLOAT_ARITHMETIC,
2043 "%col1 = OpCompositeConstruct %type_float_vec2 %arg1 %c_float_1\n"
2044 "%col2 = OpCompositeConstruct %type_float_vec2 %c_float_1 %c_float_1\n"
2045 "%mat = OpCompositeConstruct %type_float_mat2x2 %col1 %col2\n"
2046 "%invMat = OpExtInst %type_float_mat2x2 %std450 MatrixInverse %mat\n"
2047 "%extCol = OpCompositeExtract %type_float_vec2 %invMat 1\n"
2048 "%result = OpCompositeExtract %type_float %extCol 1\n",
2049 B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2050
2051 // PackHalf2x16 is a special case as it operates on fp32 vec2 and returns unsigned int,
2052 // the verification is done in SPIR-V code (if result is correct 1.0 will be written to SSBO)
2053 mo[O_PH_DENORM] = Op("ph_denorm", FLOAT_STORAGE_ONLY,
2054 "",
2055 "",
2056 "%c_fp32_denorm_fp16 = OpConstant %type_f32 6.01e-5\n" // fp32 representation of fp16 denorm value
2057 "%c_ref = OpConstant %type_u32 66061296\n",
2058 "",
2059 "",
2060 "%srcVec = OpCompositeConstruct %type_f32_vec2 %c_fp32_denorm_fp16 %c_fp32_denorm_fp16\n"
2061 "%packedInt = OpExtInst %type_u32 %std450 PackHalf2x16 %srcVec\n"
2062 "%boolVal = OpIEqual %type_bool %c_ref %packedInt\n"
2063 "%result = OpSelect %type_f32 %boolVal %c_f32_1 %c_f32_0\n",
2064 B_STATEMENT_USAGE_CONSTS_TYPE_FP32 | B_STATEMENT_USAGE_COMMANDS_CONST_FP32 | B_STATEMENT_USAGE_COMMANDS_TYPE_FP32);
2065
2066 // UnpackHalf2x16 is a special case that operates on uint32 and returns two 32-bit floats,
2067 // this function is tested using constants
2068 mo[O_UPH_DENORM] = Op("uph_denorm", FLOAT_STORAGE_ONLY,
2069 "",
2070 "",
2071 "%c_u32_2_16_pack = OpConstant %type_u32 66061296\n", // == packHalf2x16(vec2(denorm))
2072 "",
2073 "",
2074 "%tmpVec = OpExtInst %type_f32_vec2 %std450 UnpackHalf2x16 %c_u32_2_16_pack\n"
2075 "%result = OpCompositeExtract %type_f32 %tmpVec 0\n",
2076 B_STATEMENT_USAGE_COMMANDS_TYPE_FP32);
2077
2078 // PackDouble2x32 is a special case that operates on two uint32 and returns
2079 // double, this function is tested using constants
2080 mo[O_PD_DENORM] = Op("pd_denorm", FLOAT_STORAGE_ONLY,
2081 "",
2082 "",
2083 "%c_p1 = OpConstant %type_u32 0\n"
2084 "%c_p2 = OpConstant %type_u32 262144\n", // == UnpackDouble2x32(denorm)
2085 "",
2086 "",
2087 "%srcVec = OpCompositeConstruct %type_u32_vec2 %c_p1 %c_p2\n"
2088 "%result = OpExtInst %type_f64 %std450 PackDouble2x32 %srcVec\n",
2089 B_STATEMENT_USAGE_COMMANDS_TYPE_FP64);
2090
2091 // UnpackDouble2x32 is a special case as it operates only on FP64 and returns two ints,
2092 // the verification is done in SPIR-V code (if result is correct 1.0 will be written to SSBO)
2093 const char* unpackDouble2x32Types = "%type_bool_vec2 = OpTypeVector %type_bool 2\n";
2094 const char* unpackDouble2x32Source = "%refVec2 = OpCompositeConstruct %type_u32_vec2 %c_p1 %c_p2\n"
2095 "%resVec2 = OpExtInst %type_u32_vec2 %std450 UnpackDouble2x32 %arg1\n"
2096 "%boolVec2 = OpIEqual %type_bool_vec2 %refVec2 %resVec2\n"
2097 "%boolVal = OpAll %type_bool %boolVec2\n"
2098 "%result = OpSelect %type_f64 %boolVal %c_f64_1 %c_f64_0\n";
2099 mo[O_UPD_DENORM_FLUSH] = Op("upd_denorm", FLOAT_STORAGE_ONLY, "",
2100 unpackDouble2x32Types,
2101 "%c_p1 = OpConstant %type_u32 0\n"
2102 "%c_p2 = OpConstant %type_u32 0\n",
2103 "",
2104 "",
2105 unpackDouble2x32Source,
2106 B_STATEMENT_USAGE_COMMANDS_CONST_FP64 | B_STATEMENT_USAGE_COMMANDS_TYPE_FP64);
2107 mo[O_UPD_DENORM_PRESERVE] = Op("upd_denorm", FLOAT_STORAGE_ONLY, "",
2108 unpackDouble2x32Types,
2109 "%c_p1 = OpConstant %type_u32 1008\n"
2110 "%c_p2 = OpConstant %type_u32 0\n",
2111 "",
2112 "",
2113 unpackDouble2x32Source,
2114 B_STATEMENT_USAGE_COMMANDS_CONST_FP64 | B_STATEMENT_USAGE_COMMANDS_TYPE_FP64);
2115
2116 mo[O_ORTE_ROUND] = Op("orte_round", FLOAT_STORAGE_ONLY, FP32,
2117 "OpDecorate %result FPRoundingMode RTE\n",
2118 "",
2119 "",
2120 "%result = OpFConvert %type_f16 %arg1\n",
2121 B_STATEMENT_USAGE_COMMANDS_TYPE_FP16);
2122 mo[O_ORTZ_ROUND] = Op("ortz_round", FLOAT_STORAGE_ONLY, FP32,
2123 "OpDecorate %result FPRoundingMode RTZ\n",
2124 "",
2125 "",
2126 "%result = OpFConvert %type_f16 %arg1\n",
2127 B_STATEMENT_USAGE_COMMANDS_TYPE_FP16);
2128 }
2129
build(vector<OperationTestCase> & testCases,TypeTestResultsSP typeTestResults,bool argumentsFromInput)2130 void TestCasesBuilder::build(vector<OperationTestCase>& testCases, TypeTestResultsSP typeTestResults, bool argumentsFromInput)
2131 {
2132 // this method constructs a list of test cases; this list is a bit different
2133 // for every combination of float type, arguments preparation method and tested float control
2134
2135 testCases.reserve(750);
2136
2137 bool isFP16 = typeTestResults->floatType() == FP16;
2138
2139 // Denorm - FlushToZero - binary operations
2140 for (size_t i = 0 ; i < typeTestResults->binaryOpFTZ.size() ; ++i)
2141 {
2142 const BinaryCase& binaryCase = typeTestResults->binaryOpFTZ[i];
2143 OperationId operation = binaryCase.operationId;
2144 testCases.push_back(OTC("denorm_op_var_flush_to_zero", B_DENORM_FLUSH, operation, V_DENORM, V_ONE, binaryCase.opVarResult));
2145 testCases.push_back(OTC("denorm_op_denorm_flush_to_zero", B_DENORM_FLUSH, operation, V_DENORM, V_DENORM, binaryCase.opDenormResult));
2146 testCases.push_back(OTC("denorm_op_inf_flush_to_zero", B_DENORM_FLUSH | B_ZIN_PRESERVE, operation, V_DENORM, V_INF, binaryCase.opInfResult));
2147 testCases.push_back(OTC("denorm_op_nan_flush_to_zero", B_DENORM_FLUSH | B_ZIN_PRESERVE, operation, V_DENORM, V_NAN, binaryCase.opNanResult));
2148
2149 if (isFP16)
2150 {
2151 testCases.push_back(OTC("denorm_op_var_flush_to_zero_nostorage", B_DENORM_FLUSH, operation, V_DENORM, V_ONE, binaryCase.opVarResult, DE_TRUE));
2152 testCases.push_back(OTC("denorm_op_denorm_flush_to_zero_nostorage", B_DENORM_FLUSH, operation, V_DENORM, V_DENORM, binaryCase.opDenormResult, DE_TRUE));
2153 testCases.push_back(OTC("denorm_op_inf_flush_to_zero_nostorage", B_DENORM_FLUSH | B_ZIN_PRESERVE, operation, V_DENORM, V_INF, binaryCase.opInfResult, DE_TRUE));
2154 testCases.push_back(OTC("denorm_op_nan_flush_to_zero_nostorage", B_DENORM_FLUSH | B_ZIN_PRESERVE, operation, V_DENORM, V_NAN, binaryCase.opNanResult, DE_TRUE));
2155 }
2156 }
2157
2158 // Denorm - FlushToZero - unary operations
2159 for (size_t i = 0 ; i < typeTestResults->unaryOpFTZ.size() ; ++i)
2160 {
2161 const UnaryCase& unaryCase = typeTestResults->unaryOpFTZ[i];
2162 OperationId operation = unaryCase.operationId;
2163 testCases.push_back(OTC("op_denorm_flush_to_zero", B_DENORM_FLUSH, operation, V_DENORM, V_UNUSED, unaryCase.result));
2164 if (isFP16)
2165 testCases.push_back(OTC("op_denorm_flush_to_zero_nostorage", B_DENORM_FLUSH, operation, V_DENORM, V_UNUSED, unaryCase.result, DE_TRUE));
2166
2167 }
2168
2169 // Denom - Preserve - binary operations
2170 for (size_t i = 0 ; i < typeTestResults->binaryOpDenormPreserve.size() ; ++i)
2171 {
2172 const BinaryCase& binaryCase = typeTestResults->binaryOpDenormPreserve[i];
2173 OperationId operation = binaryCase.operationId;
2174 testCases.push_back(OTC("denorm_op_var_preserve", B_DENORM_PRESERVE, operation, V_DENORM, V_ONE, binaryCase.opVarResult));
2175 testCases.push_back(OTC("denorm_op_denorm_preserve", B_DENORM_PRESERVE, operation, V_DENORM, V_DENORM, binaryCase.opDenormResult));
2176 testCases.push_back(OTC("denorm_op_inf_preserve", B_DENORM_PRESERVE | B_ZIN_PRESERVE, operation, V_DENORM, V_INF, binaryCase.opInfResult));
2177 testCases.push_back(OTC("denorm_op_nan_preserve", B_DENORM_PRESERVE | B_ZIN_PRESERVE, operation, V_DENORM, V_NAN, binaryCase.opNanResult));
2178
2179 if (isFP16)
2180 {
2181 testCases.push_back(OTC("denorm_op_var_preserve_nostorage", B_DENORM_PRESERVE, operation, V_DENORM, V_ONE, binaryCase.opVarResult, DE_TRUE));
2182 testCases.push_back(OTC("denorm_op_denorm_preserve_nostorage", B_DENORM_PRESERVE, operation, V_DENORM, V_DENORM, binaryCase.opDenormResult, DE_TRUE));
2183 testCases.push_back(OTC("denorm_op_inf_preserve_nostorage", B_DENORM_PRESERVE | B_ZIN_PRESERVE, operation, V_DENORM, V_INF, binaryCase.opInfResult, DE_TRUE));
2184 testCases.push_back(OTC("denorm_op_nan_preserve_nostorage", B_DENORM_PRESERVE | B_ZIN_PRESERVE, operation, V_DENORM, V_NAN, binaryCase.opNanResult, DE_TRUE));
2185 }
2186 }
2187
2188 // Denom - Preserve - unary operations
2189 for (size_t i = 0 ; i < typeTestResults->unaryOpDenormPreserve.size() ; ++i)
2190 {
2191 const UnaryCase& unaryCase = typeTestResults->unaryOpDenormPreserve[i];
2192 OperationId operation = unaryCase.operationId;
2193 testCases.push_back(OTC("op_denorm_preserve", B_DENORM_PRESERVE, operation, V_DENORM, V_UNUSED, unaryCase.result));
2194 if (isFP16)
2195 testCases.push_back(OTC("op_denorm_preserve_nostorage", B_DENORM_PRESERVE, operation, V_DENORM, V_UNUSED, unaryCase.result, DE_TRUE));
2196 }
2197
2198 struct ZINCase
2199 {
2200 OperationId operationId;
2201 bool supportedByFP64;
2202 ValueId secondArgument;
2203 ValueId preserveZeroResult;
2204 ValueId preserveSZeroResult;
2205 ValueId preserveInfResult;
2206 ValueId preserveSInfResult;
2207 ValueId preserveNanResult;
2208 };
2209
2210 const ZINCase binaryOpZINPreserve[] = {
2211 // operation fp64 second arg preserve zero preserve szero preserve inf preserve sinf preserve nan
2212 { O_PHI, true, V_INF, V_ZERO, V_MINUS_ZERO, V_INF, V_MINUS_INF, V_NAN },
2213 { O_SELECT, true, V_ONE, V_ZERO, V_MINUS_ZERO, V_INF, V_MINUS_INF, V_NAN },
2214 { O_ADD, true, V_ZERO, V_ZERO, V_ZERO, V_INF, V_MINUS_INF, V_NAN },
2215 { O_SUB, true, V_ZERO, V_ZERO, V_MINUS_ZERO, V_INF, V_MINUS_INF, V_NAN },
2216 { O_MUL, true, V_ONE, V_ZERO, V_MINUS_ZERO, V_INF, V_MINUS_INF, V_NAN },
2217 };
2218
2219 const ZINCase unaryOpZINPreserve[] = {
2220 // operation fp64 second arg preserve zero preserve szero preserve inf preserve sinf preserve nan
2221 { O_RETURN_VAL, true, V_UNUSED, V_ZERO, V_MINUS_ZERO, V_INF, V_MINUS_INF, V_NAN },
2222 { O_D_EXTRACT, true, V_UNUSED, V_ZERO, V_MINUS_ZERO, V_INF, V_MINUS_INF, V_NAN },
2223 { O_D_INSERT, true, V_UNUSED, V_ZERO, V_MINUS_ZERO, V_INF, V_MINUS_INF, V_NAN },
2224 { O_SHUFFLE, true, V_UNUSED, V_ZERO, V_MINUS_ZERO, V_INF, V_MINUS_INF, V_NAN },
2225 { O_COMPOSITE, true, V_UNUSED, V_ZERO, V_MINUS_ZERO, V_INF, V_MINUS_INF, V_NAN },
2226 { O_COMPOSITE_INS, true, V_UNUSED, V_ZERO, V_MINUS_ZERO, V_INF, V_MINUS_INF, V_NAN },
2227 { O_COPY, true, V_UNUSED, V_ZERO, V_MINUS_ZERO, V_INF, V_MINUS_INF, V_NAN },
2228 { O_TRANSPOSE, true, V_UNUSED, V_ZERO, V_MINUS_ZERO, V_INF, V_MINUS_INF, V_NAN },
2229 { O_NEGATE, true, V_UNUSED, V_MINUS_ZERO, V_ZERO, V_MINUS_INF, V_INF, V_NAN },
2230 };
2231
2232 bool isFP64 = typeTestResults->floatType() == FP64;
2233
2234 // Signed Zero Inf Nan - Preserve - binary operations
2235 for (size_t i = 0 ; i < DE_LENGTH_OF_ARRAY(binaryOpZINPreserve) ; ++i)
2236 {
2237 const ZINCase& zc = binaryOpZINPreserve[i];
2238 if (isFP64 && !zc.supportedByFP64)
2239 continue;
2240
2241 testCases.push_back(OTC("zero_op_var_preserve", B_ZIN_PRESERVE, zc.operationId, V_ZERO, zc.secondArgument, zc.preserveZeroResult));
2242 testCases.push_back(OTC("signed_zero_op_var_preserve", B_ZIN_PRESERVE, zc.operationId, V_MINUS_ZERO, zc.secondArgument, zc.preserveSZeroResult));
2243 testCases.push_back(OTC("inf_op_var_preserve", B_ZIN_PRESERVE, zc.operationId, V_INF, zc.secondArgument, zc.preserveInfResult));
2244 testCases.push_back(OTC("signed_inf_op_var_preserve", B_ZIN_PRESERVE, zc.operationId, V_MINUS_INF, zc.secondArgument, zc.preserveSInfResult));
2245 testCases.push_back(OTC("nan_op_var_preserve", B_ZIN_PRESERVE, zc.operationId, V_NAN, zc.secondArgument, zc.preserveNanResult));
2246
2247 if (isFP16)
2248 {
2249 testCases.push_back(OTC("zero_op_var_preserve_nostorage", B_ZIN_PRESERVE, zc.operationId, V_ZERO, zc.secondArgument, zc.preserveZeroResult, DE_TRUE));
2250 testCases.push_back(OTC("signed_zero_op_var_preserve_nostorage", B_ZIN_PRESERVE, zc.operationId, V_MINUS_ZERO, zc.secondArgument, zc.preserveSZeroResult, DE_TRUE));
2251 testCases.push_back(OTC("inf_op_var_preserve_nostorage", B_ZIN_PRESERVE, zc.operationId, V_INF, zc.secondArgument, zc.preserveInfResult, DE_TRUE));
2252 testCases.push_back(OTC("signed_inf_op_var_preserve_nostorage", B_ZIN_PRESERVE, zc.operationId, V_MINUS_INF, zc.secondArgument, zc.preserveSInfResult, DE_TRUE));
2253 testCases.push_back(OTC("nan_op_var_preserve_nostorage", B_ZIN_PRESERVE, zc.operationId, V_NAN, zc.secondArgument, zc.preserveNanResult, DE_TRUE));
2254 }
2255 }
2256
2257 // Signed Zero Inf Nan - Preserve - unary operations
2258 for (size_t i = 0 ; i < DE_LENGTH_OF_ARRAY(unaryOpZINPreserve) ; ++i)
2259 {
2260 const ZINCase& zc = unaryOpZINPreserve[i];
2261 if (isFP64 && !zc.supportedByFP64)
2262 continue;
2263
2264 testCases.push_back(OTC("op_zero_preserve", B_ZIN_PRESERVE,zc.operationId, V_ZERO, V_UNUSED, zc.preserveZeroResult));
2265 testCases.push_back(OTC("op_signed_zero_preserve", B_ZIN_PRESERVE,zc.operationId, V_MINUS_ZERO, V_UNUSED, zc.preserveSZeroResult));
2266 testCases.push_back(OTC("op_inf_preserve", B_ZIN_PRESERVE,zc.operationId, V_INF, V_UNUSED, zc.preserveInfResult));
2267 testCases.push_back(OTC("op_signed_inf_preserve", B_ZIN_PRESERVE,zc.operationId, V_MINUS_INF, V_UNUSED, zc.preserveSInfResult));
2268 testCases.push_back(OTC("op_nan_preserve", B_ZIN_PRESERVE,zc.operationId, V_NAN, V_UNUSED, zc.preserveNanResult));
2269
2270 if (isFP16)
2271 {
2272 testCases.push_back(OTC("op_zero_preserve_nostorage", B_ZIN_PRESERVE,zc.operationId, V_ZERO, V_UNUSED, zc.preserveZeroResult, DE_TRUE));
2273 testCases.push_back(OTC("op_signed_zero_preserve_nostorage", B_ZIN_PRESERVE,zc.operationId, V_MINUS_ZERO, V_UNUSED, zc.preserveSZeroResult, DE_TRUE));
2274 testCases.push_back(OTC("op_inf_preserve_nostorage", B_ZIN_PRESERVE,zc.operationId, V_INF, V_UNUSED, zc.preserveInfResult, DE_TRUE));
2275 testCases.push_back(OTC("op_signed_inf_preserve_nostorage", B_ZIN_PRESERVE,zc.operationId, V_MINUS_INF, V_UNUSED, zc.preserveSInfResult, DE_TRUE));
2276 testCases.push_back(OTC("op_nan_preserve_nostorage", B_ZIN_PRESERVE,zc.operationId, V_NAN, V_UNUSED, zc.preserveNanResult, DE_TRUE));
2277 }
2278 }
2279
2280 // comparison operations - tested differently because they return true/false
2281 struct ComparisonCase
2282 {
2283 OperationId operationId;
2284 ValueId denormPreserveResult;
2285 };
2286 const ComparisonCase comparisonCases[] =
2287 {
2288 // operation denorm
2289 { O_ORD_EQ, V_ZERO },
2290 { O_UORD_EQ, V_ZERO },
2291 { O_ORD_NEQ, V_ONE },
2292 { O_UORD_NEQ, V_ONE },
2293 { O_ORD_LS, V_ONE },
2294 { O_UORD_LS, V_ONE },
2295 { O_ORD_GT, V_ZERO },
2296 { O_UORD_GT, V_ZERO },
2297 { O_ORD_LE, V_ONE },
2298 { O_UORD_LE, V_ONE },
2299 { O_ORD_GE, V_ZERO },
2300 { O_UORD_GE, V_ZERO }
2301 };
2302 for (int op = 0 ; op < DE_LENGTH_OF_ARRAY(comparisonCases) ; ++op)
2303 {
2304 const ComparisonCase& cc = comparisonCases[op];
2305 testCases.push_back(OTC("denorm_op_var_preserve", B_DENORM_PRESERVE, cc.operationId, V_DENORM, V_ONE, cc.denormPreserveResult));
2306 if (isFP16)
2307 testCases.push_back(OTC("denorm_op_var_preserve_nostorage", B_DENORM_PRESERVE, cc.operationId, V_DENORM, V_ONE, cc.denormPreserveResult, DE_TRUE));
2308 }
2309
2310 if (argumentsFromInput)
2311 {
2312 struct RoundingModeCase
2313 {
2314 OperationId operationId;
2315 ValueId arg1;
2316 ValueId arg2;
2317 ValueId expectedRTEResult;
2318 ValueId expectedRTZResult;
2319 };
2320
2321 const RoundingModeCase roundingCases[] =
2322 {
2323 { O_ADD, V_ADD_ARG_A, V_ADD_ARG_B, V_ADD_RTE_RESULT, V_ADD_RTZ_RESULT },
2324 { O_SUB, V_SUB_ARG_A, V_SUB_ARG_B, V_SUB_RTE_RESULT, V_SUB_RTZ_RESULT },
2325 { O_MUL, V_MUL_ARG_A, V_MUL_ARG_B, V_MUL_RTE_RESULT, V_MUL_RTZ_RESULT },
2326 { O_DOT, V_DOT_ARG_A, V_DOT_ARG_B, V_DOT_RTE_RESULT, V_DOT_RTZ_RESULT },
2327
2328 // in vect/mat multiplication by scalar operations only first element of result is checked
2329 // so argument and result values prepared for multiplication can be reused for those cases
2330 { O_VEC_MUL_S, V_MUL_ARG_A, V_MUL_ARG_B, V_MUL_RTE_RESULT, V_MUL_RTZ_RESULT },
2331 { O_MAT_MUL_S, V_MUL_ARG_A, V_MUL_ARG_B, V_MUL_RTE_RESULT, V_MUL_RTZ_RESULT },
2332 { O_OUT_PROD, V_MUL_ARG_A, V_MUL_ARG_B, V_MUL_RTE_RESULT, V_MUL_RTZ_RESULT },
2333
2334 // in SPIR-V code we return first element of operation result so for following
2335 // cases argument and result values prepared for dot product can be reused
2336 { O_VEC_MUL_M, V_DOT_ARG_A, V_DOT_ARG_B, V_DOT_RTE_RESULT, V_DOT_RTZ_RESULT },
2337 { O_MAT_MUL_V, V_DOT_ARG_A, V_DOT_ARG_B, V_DOT_RTE_RESULT, V_DOT_RTZ_RESULT },
2338 { O_MAT_MUL_M, V_DOT_ARG_A, V_DOT_ARG_B, V_DOT_RTE_RESULT, V_DOT_RTZ_RESULT },
2339
2340 // conversion operations are added separately - depending on float type width
2341 };
2342
2343 for (int c = 0 ; c < DE_LENGTH_OF_ARRAY(roundingCases) ; ++c)
2344 {
2345 const RoundingModeCase& rmc = roundingCases[c];
2346 testCases.push_back(OTC("rounding_rte_op", B_RTE_ROUNDING, rmc.operationId, rmc.arg1, rmc.arg2, rmc.expectedRTEResult));
2347 testCases.push_back(OTC("rounding_rtz_op", B_RTZ_ROUNDING, rmc.operationId, rmc.arg1, rmc.arg2, rmc.expectedRTZResult));
2348 if (isFP16)
2349 {
2350 testCases.push_back(OTC("rounding_rte_op_nostorage", B_RTE_ROUNDING, rmc.operationId, rmc.arg1, rmc.arg2, rmc.expectedRTEResult, DE_TRUE));
2351 testCases.push_back(OTC("rounding_rtz_op_nostorage", B_RTZ_ROUNDING, rmc.operationId, rmc.arg1, rmc.arg2, rmc.expectedRTZResult, DE_TRUE));
2352 }
2353 }
2354 }
2355
2356 // special cases
2357 if (typeTestResults->floatType() == FP16)
2358 {
2359 if (argumentsFromInput)
2360 {
2361 testCases.push_back(OTC("rounding_rte_conv_from_fp32", B_RTE_ROUNDING, O_CONV_FROM_FP32, V_CONV_FROM_FP32_ARG, V_UNUSED, V_CONV_TO_FP16_RTE_RESULT));
2362 testCases.push_back(OTC("rounding_rtz_conv_from_fp32", B_RTZ_ROUNDING, O_CONV_FROM_FP32, V_CONV_FROM_FP32_ARG, V_UNUSED, V_CONV_TO_FP16_RTZ_RESULT));
2363 testCases.push_back(OTC("rounding_rte_conv_from_fp64", B_RTE_ROUNDING, O_CONV_FROM_FP64, V_CONV_FROM_FP64_ARG, V_UNUSED, V_CONV_TO_FP16_RTE_RESULT));
2364 testCases.push_back(OTC("rounding_rtz_conv_from_fp64", B_RTZ_ROUNDING, O_CONV_FROM_FP64, V_CONV_FROM_FP64_ARG, V_UNUSED, V_CONV_TO_FP16_RTZ_RESULT));
2365
2366 testCases.push_back(OTC("rounding_rte_sconst_conv_from_fp32", B_RTE_ROUNDING, O_SCONST_CONV_FROM_FP32_TO_FP16, V_UNUSED, V_UNUSED, V_CONV_TO_FP16_RTE_RESULT));
2367 testCases.push_back(OTC("rounding_rtz_sconst_conv_from_fp32", B_RTZ_ROUNDING, O_SCONST_CONV_FROM_FP32_TO_FP16, V_UNUSED, V_UNUSED, V_CONV_TO_FP16_RTZ_RESULT));
2368 testCases.push_back(OTC("rounding_rte_sconst_conv_from_fp64", B_RTE_ROUNDING, O_SCONST_CONV_FROM_FP64_TO_FP16, V_UNUSED, V_UNUSED, V_CONV_TO_FP16_RTE_RESULT));
2369 testCases.push_back(OTC("rounding_rtz_sconst_conv_from_fp64", B_RTZ_ROUNDING, O_SCONST_CONV_FROM_FP64_TO_FP16, V_UNUSED, V_UNUSED, V_CONV_TO_FP16_RTZ_RESULT));
2370
2371 testCases.push_back(OTC("rounding_rte_conv_from_fp32_nostorage", B_RTE_ROUNDING, O_CONV_FROM_FP32, V_CONV_FROM_FP32_ARG, V_UNUSED, V_CONV_TO_FP16_RTE_RESULT, DE_TRUE));
2372 testCases.push_back(OTC("rounding_rtz_conv_from_fp32_nostorage", B_RTZ_ROUNDING, O_CONV_FROM_FP32, V_CONV_FROM_FP32_ARG, V_UNUSED, V_CONV_TO_FP16_RTZ_RESULT, DE_TRUE));
2373 testCases.push_back(OTC("rounding_rte_conv_from_fp64_nostorage", B_RTE_ROUNDING, O_CONV_FROM_FP64, V_CONV_FROM_FP64_ARG, V_UNUSED, V_CONV_TO_FP16_RTE_RESULT, DE_TRUE));
2374 testCases.push_back(OTC("rounding_rtz_conv_from_fp64_nostorage", B_RTZ_ROUNDING, O_CONV_FROM_FP64, V_CONV_FROM_FP64_ARG, V_UNUSED, V_CONV_TO_FP16_RTZ_RESULT, DE_TRUE));
2375
2376 testCases.push_back(OTC("rounding_rte_sconst_conv_from_fp32_nostorage", B_RTE_ROUNDING, O_SCONST_CONV_FROM_FP32_TO_FP16, V_UNUSED, V_UNUSED, V_CONV_TO_FP16_RTE_RESULT, DE_TRUE));
2377 testCases.push_back(OTC("rounding_rtz_sconst_conv_from_fp32_nostorage", B_RTZ_ROUNDING, O_SCONST_CONV_FROM_FP32_TO_FP16, V_UNUSED, V_UNUSED, V_CONV_TO_FP16_RTZ_RESULT, DE_TRUE));
2378 testCases.push_back(OTC("rounding_rte_sconst_conv_from_fp64_nostorage", B_RTE_ROUNDING, O_SCONST_CONV_FROM_FP64_TO_FP16, V_UNUSED, V_UNUSED, V_CONV_TO_FP16_RTE_RESULT, DE_TRUE));
2379 testCases.push_back(OTC("rounding_rtz_sconst_conv_from_fp64_nostorage", B_RTZ_ROUNDING, O_SCONST_CONV_FROM_FP64_TO_FP16, V_UNUSED, V_UNUSED, V_CONV_TO_FP16_RTZ_RESULT, DE_TRUE));
2380
2381 // verify that VkShaderFloatingPointRoundingModeKHR can be overridden for a given instruction by the FPRoundingMode decoration.
2382 // FPRoundingMode decoration requires VK_KHR_16bit_storage.
2383 testCases.push_back(OTC("rounding_rte_override", B_RTE_ROUNDING, O_ORTZ_ROUND, V_CONV_FROM_FP32_ARG, V_UNUSED, V_CONV_TO_FP16_RTZ_RESULT));
2384 testCases.push_back(OTC("rounding_rtz_override", B_RTZ_ROUNDING, O_ORTE_ROUND, V_CONV_FROM_FP32_ARG, V_UNUSED, V_CONV_TO_FP16_RTE_RESULT));
2385 }
2386
2387 createUnaryTestCases(testCases, O_CONV_FROM_FP32, V_CONV_DENORM_SMALLER, V_ZERO);
2388 createUnaryTestCases(testCases, O_CONV_FROM_FP64, V_CONV_DENORM_BIGGER, V_ZERO);
2389 createUnaryTestCases(testCases, O_CONV_FROM_FP32, V_CONV_DENORM_SMALLER, V_ZERO, DE_TRUE);
2390 createUnaryTestCases(testCases, O_CONV_FROM_FP64, V_CONV_DENORM_BIGGER, V_ZERO, DE_TRUE);
2391
2392 }
2393 else if (typeTestResults->floatType() == FP32)
2394 {
2395 if (argumentsFromInput)
2396 {
2397 // convert from fp64 to fp32
2398 testCases.push_back(OTC("rounding_rte_conv_from_fp64", B_RTE_ROUNDING, O_CONV_FROM_FP64, V_CONV_FROM_FP64_ARG, V_UNUSED, V_CONV_TO_FP32_RTE_RESULT));
2399 testCases.push_back(OTC("rounding_rtz_conv_from_fp64", B_RTZ_ROUNDING, O_CONV_FROM_FP64, V_CONV_FROM_FP64_ARG, V_UNUSED, V_CONV_TO_FP32_RTZ_RESULT));
2400
2401 testCases.push_back(OTC("rounding_rte_sconst_conv_from_fp64", B_RTE_ROUNDING, O_SCONST_CONV_FROM_FP64_TO_FP32, V_UNUSED, V_UNUSED, V_CONV_TO_FP32_RTE_RESULT));
2402 testCases.push_back(OTC("rounding_rtz_sconst_conv_from_fp64", B_RTZ_ROUNDING, O_SCONST_CONV_FROM_FP64_TO_FP32, V_UNUSED, V_UNUSED, V_CONV_TO_FP32_RTZ_RESULT));
2403 }
2404 else
2405 {
2406 // PackHalf2x16 - verification done in SPIR-V
2407 testCases.push_back(OTC("pack_half_denorm_preserve", B_DENORM_PRESERVE, O_PH_DENORM, V_UNUSED, V_UNUSED, V_ONE));
2408
2409 // UnpackHalf2x16 - custom arguments defined as constants
2410 testCases.push_back(OTC("upack_half_denorm_flush_to_zero", B_DENORM_FLUSH, O_UPH_DENORM, V_UNUSED, V_UNUSED, V_ZERO));
2411 testCases.push_back(OTC("upack_half_denorm_preserve", B_DENORM_PRESERVE, O_UPH_DENORM, V_UNUSED, V_UNUSED, V_CONV_DENORM_SMALLER));
2412 }
2413
2414 createUnaryTestCases(testCases, O_CONV_FROM_FP16, V_CONV_DENORM_SMALLER, V_ZERO_OR_FP16_DENORM_TO_FP32);
2415 createUnaryTestCases(testCases, O_CONV_FROM_FP16, V_CONV_DENORM_SMALLER, V_ZERO_OR_FP16_DENORM_TO_FP32, DE_TRUE);
2416 createUnaryTestCases(testCases, O_CONV_FROM_FP64, V_CONV_DENORM_BIGGER, V_ZERO);
2417 }
2418 else // FP64
2419 {
2420 if (!argumentsFromInput)
2421 {
2422 // PackDouble2x32 - custom arguments defined as constants
2423 testCases.push_back(OTC("pack_double_denorm_preserve", B_DENORM_PRESERVE, O_PD_DENORM, V_UNUSED, V_UNUSED, V_DENORM));
2424
2425 // UnpackDouble2x32 - verification done in SPIR-V
2426 testCases.push_back(OTC("upack_double_denorm_flush_to_zero", B_DENORM_FLUSH, O_UPD_DENORM_FLUSH, V_DENORM, V_UNUSED, V_ONE));
2427 testCases.push_back(OTC("upack_double_denorm_preserve", B_DENORM_PRESERVE, O_UPD_DENORM_PRESERVE, V_DENORM, V_UNUSED, V_ONE));
2428 }
2429
2430 createUnaryTestCases(testCases, O_CONV_FROM_FP16, V_CONV_DENORM_SMALLER, V_ZERO_OR_FP16_DENORM_TO_FP64);
2431 createUnaryTestCases(testCases, O_CONV_FROM_FP16, V_CONV_DENORM_SMALLER, V_ZERO_OR_FP16_DENORM_TO_FP64, DE_TRUE);
2432 createUnaryTestCases(testCases, O_CONV_FROM_FP32, V_CONV_DENORM_BIGGER, V_ZERO_OR_FP32_DENORM_TO_FP64);
2433 }
2434 }
2435
getOperation(OperationId id) const2436 const Operation& TestCasesBuilder::getOperation(OperationId id) const
2437 {
2438 return m_operations.at(id);
2439 }
2440
createUnaryTestCases(vector<OperationTestCase> & testCases,OperationId operationId,ValueId denormPreserveResult,ValueId denormFTZResult,deBool fp16WithoutStorage) const2441 void TestCasesBuilder::createUnaryTestCases(vector<OperationTestCase>& testCases, OperationId operationId, ValueId denormPreserveResult, ValueId denormFTZResult, deBool fp16WithoutStorage) const
2442 {
2443 if (fp16WithoutStorage)
2444 {
2445 // Denom - Preserve
2446 testCases.push_back(OTC("op_denorm_preserve_nostorage", B_DENORM_PRESERVE, operationId, V_DENORM, V_UNUSED, denormPreserveResult, DE_TRUE));
2447
2448 // Denorm - FlushToZero
2449 testCases.push_back(OTC("op_denorm_flush_to_zero_nostorage", B_DENORM_FLUSH, operationId, V_DENORM, V_UNUSED, denormFTZResult, DE_TRUE));
2450
2451 // Signed Zero Inf Nan - Preserve
2452 testCases.push_back(OTC("op_zero_preserve_nostorage", B_ZIN_PRESERVE, operationId, V_ZERO, V_UNUSED, V_ZERO, DE_TRUE));
2453 testCases.push_back(OTC("op_signed_zero_preserve_nostorage", B_ZIN_PRESERVE, operationId, V_MINUS_ZERO, V_UNUSED, V_MINUS_ZERO, DE_TRUE));
2454 testCases.push_back(OTC("op_inf_preserve_nostorage", B_ZIN_PRESERVE, operationId, V_INF, V_UNUSED, V_INF, DE_TRUE));
2455 testCases.push_back(OTC("op_nan_preserve_nostorage", B_ZIN_PRESERVE, operationId, V_NAN, V_UNUSED, V_NAN, DE_TRUE));
2456 }
2457 else
2458 {
2459 // Denom - Preserve
2460 testCases.push_back(OTC("op_denorm_preserve", B_DENORM_PRESERVE, operationId, V_DENORM, V_UNUSED, denormPreserveResult));
2461
2462 // Denorm - FlushToZero
2463 testCases.push_back(OTC("op_denorm_flush_to_zero", B_DENORM_FLUSH, operationId, V_DENORM, V_UNUSED, denormFTZResult));
2464
2465 // Signed Zero Inf Nan - Preserve
2466 testCases.push_back(OTC("op_zero_preserve", B_ZIN_PRESERVE, operationId, V_ZERO, V_UNUSED, V_ZERO));
2467 testCases.push_back(OTC("op_signed_zero_preserve", B_ZIN_PRESERVE, operationId, V_MINUS_ZERO, V_UNUSED, V_MINUS_ZERO));
2468 testCases.push_back(OTC("op_inf_preserve", B_ZIN_PRESERVE, operationId, V_INF, V_UNUSED, V_INF));
2469 testCases.push_back(OTC("op_nan_preserve", B_ZIN_PRESERVE, operationId, V_NAN, V_UNUSED, V_NAN));
2470 }
2471 }
2472
2473 template <typename TYPE, typename FLOAT_TYPE>
isZeroOrOtherValue(const TYPE & returnedFloat,ValueId secondAcceptableResult,TestLog & log)2474 bool isZeroOrOtherValue(const TYPE& returnedFloat, ValueId secondAcceptableResult, TestLog& log)
2475 {
2476 if (returnedFloat.isZero() && !returnedFloat.signBit())
2477 return true;
2478
2479 TypeValues<FLOAT_TYPE> typeValues;
2480 typedef typename TYPE::StorageType SType;
2481 typename RawConvert<FLOAT_TYPE, SType>::Value value;
2482 value.fp = typeValues.getValue(secondAcceptableResult);
2483
2484 if (returnedFloat.bits() == value.ui)
2485 return true;
2486
2487 log << TestLog::Message << "Expected 0 or " << toHex(value.ui)
2488 << " (" << value.fp << ")" << TestLog::EndMessage;
2489 return false;
2490 }
2491
2492 template <typename TYPE>
isAcosResultCorrect(const TYPE & returnedFloat,TestLog & log)2493 bool isAcosResultCorrect(const TYPE& returnedFloat, TestLog& log)
2494 {
2495 // pi/2 is result of acos(0) which in the specs is defined as equivalent to
2496 // atan2(sqrt(1.0 - x^2), x), where atan2 has 4096 ULP, sqrt is equivalent to
2497 // 1.0 /inversesqrt(), inversesqrt() is 2 ULP and rcp is another 2.5 ULP
2498
2499 double precision = 0;
2500 const double piDiv2 = 3.14159265358979323846 / 2;
2501 if (returnedFloat.MANTISSA_BITS == 23)
2502 {
2503 FloatFormat fp32Format(-126, 127, 23, true, tcu::MAYBE, tcu::YES, tcu::MAYBE);
2504 precision = fp32Format.ulp(piDiv2, 4096.0);
2505 }
2506 else
2507 {
2508 FloatFormat fp16Format(-14, 15, 10, true, tcu::MAYBE);
2509 precision = fp16Format.ulp(piDiv2, 5.0);
2510 }
2511
2512 if (deAbs(returnedFloat.asDouble() - piDiv2) < precision)
2513 return true;
2514
2515 log << TestLog::Message << "Expected result to be in range"
2516 << " (" << piDiv2 - precision << ", " << piDiv2 + precision << "), got "
2517 << returnedFloat.asDouble() << TestLog::EndMessage;
2518 return false;
2519 }
2520
2521 template <typename TYPE>
isCosResultCorrect(const TYPE & returnedFloat,TestLog & log)2522 bool isCosResultCorrect(const TYPE& returnedFloat, TestLog& log)
2523 {
2524 // for cos(x) with x between -pi and pi, the precision error is 2^-11 for fp32 and 2^-7 for fp16.
2525 double precision = returnedFloat.MANTISSA_BITS == 23 ? dePow(2, -11) : dePow(2, -7);
2526 const double expected = 1.0;
2527
2528 if (deAbs(returnedFloat.asDouble() - expected) < precision)
2529 return true;
2530
2531 log << TestLog::Message << "Expected result to be in range"
2532 << " (" << expected - precision << ", " << expected + precision << "), got "
2533 << returnedFloat.asDouble() << TestLog::EndMessage;
2534 return false;
2535 }
2536
2537 template <typename FLOAT_TYPE>
getFloatTypeAsDouble(FLOAT_TYPE param)2538 double getFloatTypeAsDouble(FLOAT_TYPE param)
2539 {
2540 return param;
2541 }
getFloatTypeAsDouble(deFloat16 param)2542 template<> double getFloatTypeAsDouble(deFloat16 param)
2543 {
2544 return deFloat16To64(param);
2545 }
2546
2547
getPrecisionAt(double value,float ulp,int mantissaBits)2548 double getPrecisionAt(double value, float ulp, int mantissaBits)
2549 {
2550 if (mantissaBits == 23)
2551 {
2552 FloatFormat fp32Format(-126, 127, 23, true, tcu::MAYBE, tcu::YES, tcu::MAYBE);
2553 return fp32Format.ulp(value, ulp);
2554 }
2555 else if (mantissaBits == 52)
2556 {
2557 FloatFormat fp32Format(-1022, 1023, 52, true, tcu::MAYBE, tcu::YES, tcu::MAYBE);
2558 return fp32Format.ulp(value, ulp);
2559 }
2560 else
2561 {
2562 DE_ASSERT(mantissaBits == 10);
2563 FloatFormat fp16Format(-14, 15, 10, true, tcu::MAYBE);
2564 return fp16Format.ulp(value, ulp);
2565 }
2566 }
2567
2568 template <typename TYPE, typename FLOAT_TYPE, typename REF_FUNCTION>
isLogResultCorrect(const TYPE & returnedFloat,FLOAT_TYPE param,REF_FUNCTION refFunction,TestLog & log)2569 bool isLogResultCorrect(const TYPE& returnedFloat, FLOAT_TYPE param, REF_FUNCTION refFunction, TestLog& log)
2570 {
2571 if (returnedFloat.isInf() && returnedFloat.signBit())
2572 return true;
2573
2574 const double expected = refFunction(getFloatTypeAsDouble(param));
2575 const double precision = getPrecisionAt(expected, 3.0, returnedFloat.MANTISSA_BITS);
2576
2577 if (deAbs(returnedFloat.asDouble() - expected) < precision)
2578 return true;
2579
2580 log << TestLog::Message << "Expected result to be -INF or in range"
2581 << " (" << expected - precision << ", " << expected + precision << "), got "
2582 << returnedFloat.asDouble() << TestLog::EndMessage;
2583 return false;
2584 }
2585
2586 template <typename TYPE, typename FLOAT_TYPE>
isInverseSqrtResultCorrect(const TYPE & returnedFloat,FLOAT_TYPE param,TestLog & log)2587 bool isInverseSqrtResultCorrect(const TYPE& returnedFloat, FLOAT_TYPE param, TestLog& log)
2588 {
2589 if (returnedFloat.isInf() && !returnedFloat.signBit())
2590 return true;
2591
2592 const double expected = 1.0/ deSqrt(getFloatTypeAsDouble(param));
2593 const double precision = getPrecisionAt(expected, 2.0, returnedFloat.MANTISSA_BITS);
2594
2595 if (deAbs(returnedFloat.asDouble() - expected) < precision)
2596 return true;
2597
2598 log << TestLog::Message << "Expected result to be INF or in range"
2599 << " (" << expected - precision << ", " << expected + precision << "), got "
2600 << returnedFloat.asDouble() << TestLog::EndMessage;
2601 return false;
2602 }
2603
2604 template <typename TYPE, typename FLOAT_TYPE>
isSqrtResultCorrect(const TYPE & returnedFloat,FLOAT_TYPE param,TestLog & log)2605 bool isSqrtResultCorrect(const TYPE& returnedFloat, FLOAT_TYPE param, TestLog& log)
2606 {
2607 if (returnedFloat.isZero() && !returnedFloat.signBit())
2608 return true;
2609
2610
2611 const double expected = deSqrt(getFloatTypeAsDouble(param));
2612 const double expectedInverseSqrt = 1.0 / expected;
2613 const double inverseSqrtPrecision = getPrecisionAt(expectedInverseSqrt, 2.0, returnedFloat.MANTISSA_BITS);
2614
2615 double expectedMin = deMin(1.0 / (expectedInverseSqrt - inverseSqrtPrecision), 1.0 / (expectedInverseSqrt + inverseSqrtPrecision));
2616 double expectedMax = deMax(1.0 / (expectedInverseSqrt - inverseSqrtPrecision), 1.0 / (expectedInverseSqrt + inverseSqrtPrecision));
2617
2618 expectedMin -= getPrecisionAt(expectedMin, 2.5, returnedFloat.MANTISSA_BITS);
2619 expectedMax += getPrecisionAt(expectedMax, 2.5, returnedFloat.MANTISSA_BITS);
2620
2621 if (returnedFloat.asDouble() >= expectedMin && returnedFloat.asDouble() <= expectedMax)
2622 return true;
2623
2624 log << TestLog::Message << "Expected result to be +0 or in range"
2625 << " (" << expectedMin << ", " << expectedMax << "), got "
2626 << returnedFloat.asDouble() << TestLog::EndMessage;
2627 return false;
2628 }
2629
2630 // Function used to compare test result with expected output.
2631 // TYPE can be Float16, Float32 or Float64.
2632 // FLOAT_TYPE can be deFloat16, float, double.
2633 template <typename TYPE, typename FLOAT_TYPE>
compareBytes(vector<deUint8> & expectedBytes,AllocationSp outputAlloc,TestLog & log)2634 bool compareBytes(vector<deUint8>& expectedBytes, AllocationSp outputAlloc, TestLog& log)
2635 {
2636 const TYPE* returned = static_cast<const TYPE*>(outputAlloc->getHostPtr());
2637 const TYPE* fValueId = reinterpret_cast<const TYPE*>(&expectedBytes.front());
2638
2639 // all test return single value
2640 // Fp16 nostorage tests get their values from a deUint32 value, but we create the
2641 // buffer with the same size for both cases: 4 bytes.
2642 if (sizeof(TYPE) == 2u)
2643 DE_ASSERT((expectedBytes.size() / sizeof(TYPE)) == 2);
2644 else
2645 DE_ASSERT((expectedBytes.size() / sizeof(TYPE)) == 1);
2646
2647 // during test setup we do not store expected value but id that can be used to
2648 // retrieve actual value - this is done to handle special cases like multiple
2649 // allowed results or epsilon checks for some cases
2650 // note that this is workaround - this should be done by changing
2651 // ComputerShaderCase and GraphicsShaderCase so that additional arguments can
2652 // be passed to this verification callback
2653 typedef typename TYPE::StorageType SType;
2654 SType expectedInt = fValueId[0].bits();
2655 ValueId expectedValueId = static_cast<ValueId>(expectedInt);
2656
2657 // something went wrong, expected value cant be V_UNUSED,
2658 // if this is the case then test shouldn't be created at all
2659 DE_ASSERT(expectedValueId != V_UNUSED);
2660
2661 TYPE returnedFloat = returned[0];
2662
2663 log << TestLog::Message << "Calculated result: " << toHex(returnedFloat.bits())
2664 << " (" << returnedFloat.asFloat() << ")" << TestLog::EndMessage;
2665
2666 if (expectedValueId == V_NAN)
2667 {
2668 if (returnedFloat.isNaN())
2669 return true;
2670
2671 log << TestLog::Message << "Expected NaN" << TestLog::EndMessage;
2672 return false;
2673 }
2674
2675 if (expectedValueId == V_DENORM)
2676 {
2677 if (returnedFloat.isDenorm())
2678 return true;
2679
2680 log << TestLog::Message << "Expected Denorm" << TestLog::EndMessage;
2681 return false;
2682 }
2683
2684 // handle multiple acceptable results cases
2685 if (expectedValueId == V_ZERO_OR_MINUS_ZERO)
2686 {
2687 if (returnedFloat.isZero())
2688 return true;
2689
2690 log << TestLog::Message << "Expected 0 or -0" << TestLog::EndMessage;
2691 return false;
2692 }
2693 if (expectedValueId == V_ZERO_OR_ONE)
2694 return isZeroOrOtherValue<TYPE, FLOAT_TYPE>(returnedFloat, V_ONE, log);
2695 if ((expectedValueId == V_ZERO_OR_FP16_DENORM_TO_FP32) || (expectedValueId == V_ZERO_OR_FP16_DENORM_TO_FP64))
2696 return isZeroOrOtherValue<TYPE, FLOAT_TYPE>(returnedFloat, V_CONV_DENORM_SMALLER, log);
2697 if (expectedValueId == V_ZERO_OR_FP32_DENORM_TO_FP64)
2698 return isZeroOrOtherValue<TYPE, FLOAT_TYPE>(returnedFloat, V_CONV_DENORM_BIGGER, log);
2699 if (expectedValueId == V_ZERO_OR_DENORM_TIMES_TWO)
2700 {
2701 // this expected value is only needed for fp16
2702 DE_ASSERT(returnedFloat.EXPONENT_BIAS == 15);
2703 return isZeroOrOtherValue<TYPE, FLOAT_TYPE>(returnedFloat, V_DENORM_TIMES_TWO, log);
2704 }
2705 if (expectedValueId == V_MINUS_ONE_OR_CLOSE)
2706 {
2707 // this expected value is only needed for fp16
2708 DE_ASSERT(returnedFloat.EXPONENT_BIAS == 15);
2709 typename TYPE::StorageType returnedValue = returnedFloat.bits();
2710 return (returnedValue == 0xbc00) || (returnedValue == 0xbbff);
2711 }
2712
2713 // handle trigonometric operations precision errors
2714 if (expectedValueId == V_TRIG_ONE)
2715 return isCosResultCorrect<TYPE>(returnedFloat, log);
2716
2717 // handle acos(0) case
2718 if (expectedValueId == V_PI_DIV_2)
2719 return isAcosResultCorrect<TYPE>(returnedFloat, log);
2720
2721 TypeValues<FLOAT_TYPE> typeValues;
2722
2723 if (expectedValueId == V_MINUS_INF_OR_LOG_DENORM)
2724 return isLogResultCorrect<TYPE>(returnedFloat, typeValues.getValue(V_DENORM), deLog, log);
2725
2726 if (expectedValueId == V_MINUS_INF_OR_LOG2_DENORM)
2727 return isLogResultCorrect<TYPE>(returnedFloat, typeValues.getValue(V_DENORM), deLog2, log);
2728
2729 if (expectedValueId == V_ZERO_OR_SQRT_DENORM)
2730 return isSqrtResultCorrect<TYPE>(returnedFloat, typeValues.getValue(V_DENORM), log);
2731
2732 if (expectedValueId == V_INF_OR_INV_SQRT_DENORM)
2733 return isInverseSqrtResultCorrect<TYPE>(returnedFloat, typeValues.getValue(V_DENORM), log);
2734
2735
2736 typename RawConvert<FLOAT_TYPE, SType>::Value value;
2737 value.fp = typeValues.getValue(expectedValueId);
2738
2739 if (returnedFloat.bits() == value.ui)
2740 return true;
2741
2742 log << TestLog::Message << "Expected " << toHex(value.ui)
2743 << " (" << value.fp << ")" << TestLog::EndMessage;
2744 return false;
2745 }
2746
2747 template <typename TYPE, typename FLOAT_TYPE>
checkFloats(const vector<Resource> &,const vector<AllocationSp> & outputAllocs,const vector<Resource> & expectedOutputs,TestLog & log)2748 bool checkFloats (const vector<Resource>& ,
2749 const vector<AllocationSp>& outputAllocs,
2750 const vector<Resource>& expectedOutputs,
2751 TestLog& log)
2752 {
2753 if (outputAllocs.size() != expectedOutputs.size())
2754 return false;
2755
2756 for (deUint32 outputNdx = 0; outputNdx < outputAllocs.size(); ++outputNdx)
2757 {
2758 vector<deUint8> expectedBytes;
2759 expectedOutputs[outputNdx].getBytes(expectedBytes);
2760
2761 if (!compareBytes<TYPE, FLOAT_TYPE>(expectedBytes, outputAllocs[outputNdx], log))
2762 return false;
2763 }
2764
2765 return true;
2766 }
2767
checkMixedFloats(const vector<Resource> &,const vector<AllocationSp> & outputAllocs,const vector<Resource> & expectedOutputs,TestLog & log)2768 bool checkMixedFloats (const vector<Resource>& ,
2769 const vector<AllocationSp>& outputAllocs,
2770 const vector<Resource>& expectedOutputs,
2771 TestLog& log)
2772 {
2773 // this function validates buffers containing floats of diferent widths, order is not important
2774
2775 if (outputAllocs.size() != expectedOutputs.size())
2776 return false;
2777
2778 // The comparison function depends on the data type stored in the resource.
2779 using compareFun = bool (*)(vector<deUint8>& expectedBytes, AllocationSp outputAlloc, TestLog& log);
2780 const map<BufferDataType, compareFun> compareMap =
2781 {
2782 { BufferDataType::DATA_FP16, compareBytes<Float16, deFloat16> },
2783 { BufferDataType::DATA_FP32, compareBytes<Float32, float> },
2784 { BufferDataType::DATA_FP64, compareBytes<Float64, double>},
2785 };
2786
2787 vector<deUint8> expectedBytes;
2788 bool allResultsAreCorrect = true;
2789 int resultIndex = static_cast<int>(outputAllocs.size());
2790
2791 while (resultIndex--)
2792 {
2793 expectedOutputs[resultIndex].getBytes(expectedBytes);
2794 BufferDataType type = static_cast<BufferDataType>(reinterpret_cast<std::uintptr_t>(expectedOutputs[resultIndex].getUserData()));
2795 allResultsAreCorrect &= compareMap.at(type)(expectedBytes, outputAllocs[resultIndex], log);
2796 }
2797
2798 return allResultsAreCorrect;
2799 }
2800
2801 // Base class for ComputeTestGroupBuilder and GrephicstestGroupBuilder classes.
2802 // It contains all functionalities that are used by both child classes.
2803 class TestGroupBuilderBase
2804 {
2805 public:
2806
2807 TestGroupBuilderBase();
2808 virtual ~TestGroupBuilderBase() = default;
2809
2810 virtual void createOperationTests(TestCaseGroup* parentGroup,
2811 const char* groupName,
2812 FloatType floatType,
2813 bool argumentsFromInput) = 0;
2814
2815 virtual void createSettingsTests(TestCaseGroup* parentGroup) = 0;
2816
2817 protected:
2818
2819 typedef vector<OperationTestCase> TestCaseVect;
2820
2821 // Structure containing all data required to create single operation test.
2822 struct OperationTestCaseInfo
2823 {
2824 FloatType outFloatType;
2825 bool argumentsFromInput;
2826 VkShaderStageFlagBits testedStage;
2827 const Operation& operation;
2828 const OperationTestCase& testCase;
2829 };
2830
2831 // Mode used by SettingsTestCaseInfo to specify what settings do we want to test.
2832 enum SettingsMode
2833 {
2834 SM_ROUNDING = 0,
2835 SM_DENORMS
2836 };
2837
2838 // Enum containing available options. When rounding is tested only SO_RTE and SO_RTZ
2839 // should be used. SO_FLUSH and SO_PRESERVE should be used only for denorm tests.
2840 enum SettingsOption
2841 {
2842 SO_UNUSED = 0,
2843 SO_RTE,
2844 SO_RTZ,
2845 SO_FLUSH,
2846 SO_PRESERVE
2847 };
2848
2849 // Structure containing all data required to create single settings test.
2850 struct SettingsTestCaseInfo
2851 {
2852 const char* name;
2853 SettingsMode testedMode;
2854 VkShaderFloatControlsIndependence independenceSetting;
2855
2856 SettingsOption fp16Option;
2857 SettingsOption fp32Option;
2858 SettingsOption fp64Option;
2859 deBool fp16Without16BitStorage;
2860 };
2861
2862 void specializeOperation(const OperationTestCaseInfo& testCaseInfo,
2863 SpecializedOperation& specializedOperation) const;
2864
2865 void getBehaviorCapabilityAndExecutionMode(BehaviorFlags behaviorFlags,
2866 const string inBitWidth,
2867 const string outBitWidth,
2868 string& capability,
2869 string& executionMode) const;
2870
2871 void setupVulkanFeatures(FloatType inFloatType,
2872 FloatType outFloatType,
2873 BehaviorFlags behaviorFlags,
2874 bool float64FeatureRequired,
2875 VulkanFeatures& features) const;
2876
2877 protected:
2878
2879 struct TypeData
2880 {
2881 TypeValuesSP values;
2882 TypeSnippetsSP snippets;
2883 TypeTestResultsSP testResults;
2884 };
2885
2886 // Type specific parameters are stored in this map.
2887 map<FloatType, TypeData> m_typeData;
2888
2889 // Map converting behaviuor id to OpCapability instruction
2890 typedef map<BehaviorFlagBits, string> BehaviorNameMap;
2891 BehaviorNameMap m_behaviorToName;
2892 };
2893
TestGroupBuilderBase()2894 TestGroupBuilderBase::TestGroupBuilderBase()
2895 {
2896 m_typeData[FP16] = TypeData();
2897 m_typeData[FP16].values = TypeValuesSP(new TypeValues<deFloat16>);
2898 m_typeData[FP16].snippets = TypeSnippetsSP(new TypeSnippets<deFloat16>);
2899 m_typeData[FP16].testResults = TypeTestResultsSP(new TypeTestResults<deFloat16>);
2900 m_typeData[FP32] = TypeData();
2901 m_typeData[FP32].values = TypeValuesSP(new TypeValues<float>);
2902 m_typeData[FP32].snippets = TypeSnippetsSP(new TypeSnippets<float>);
2903 m_typeData[FP32].testResults = TypeTestResultsSP(new TypeTestResults<float>);
2904 m_typeData[FP64] = TypeData();
2905 m_typeData[FP64].values = TypeValuesSP(new TypeValues<double>);
2906 m_typeData[FP64].snippets = TypeSnippetsSP(new TypeSnippets<double>);
2907 m_typeData[FP64].testResults = TypeTestResultsSP(new TypeTestResults<double>);
2908
2909 m_behaviorToName[B_DENORM_PRESERVE] = "DenormPreserve";
2910 m_behaviorToName[B_DENORM_FLUSH] = "DenormFlushToZero";
2911 m_behaviorToName[B_ZIN_PRESERVE] = "SignedZeroInfNanPreserve";
2912 m_behaviorToName[B_RTE_ROUNDING] = "RoundingModeRTE";
2913 m_behaviorToName[B_RTZ_ROUNDING] = "RoundingModeRTZ";
2914 }
2915
specializeOperation(const OperationTestCaseInfo & testCaseInfo,SpecializedOperation & specializedOperation) const2916 void TestGroupBuilderBase::specializeOperation (const OperationTestCaseInfo& testCaseInfo,
2917 SpecializedOperation& specializedOperation) const
2918 {
2919 const string typeToken = "_float";
2920 const string widthToken = "${float_width}";
2921
2922 FloatType outFloatType = testCaseInfo.outFloatType;
2923 const Operation& operation = testCaseInfo.operation;
2924 const TypeSnippetsSP outTypeSnippets = m_typeData.at(outFloatType).snippets;
2925 const bool inputRestricted = operation.isInputTypeRestricted;
2926 FloatType inFloatType = operation.restrictedInputType;
2927
2928 // usually input type is same as output but this is not the case for conversion
2929 // operations; in those cases operation definitions have restricted input type
2930 inFloatType = inputRestricted ? inFloatType : outFloatType;
2931
2932 TypeSnippetsSP inTypeSnippets = m_typeData.at(inFloatType).snippets;
2933
2934 const string inTypePrefix = string("_f") + inTypeSnippets->bitWidth;
2935 const string outTypePrefix = string("_f") + outTypeSnippets->bitWidth;
2936
2937 specializedOperation.constants = replace(operation.constants, typeToken, inTypePrefix);
2938 specializedOperation.annotations = replace(operation.annotations, widthToken, outTypeSnippets->bitWidth);
2939 specializedOperation.types = replace(operation.types, typeToken, outTypePrefix);
2940 specializedOperation.variables = replace(operation.variables, typeToken, outTypePrefix);
2941 specializedOperation.functions = replace(operation.functions, typeToken, outTypePrefix);
2942 specializedOperation.commands = replace(operation.commands, typeToken, outTypePrefix);
2943
2944 specializedOperation.inFloatType = inFloatType;
2945 specializedOperation.inTypeSnippets = inTypeSnippets;
2946 specializedOperation.outTypeSnippets = outTypeSnippets;
2947 specializedOperation.argumentsUsesFloatConstant = 0;
2948
2949 if (operation.isSpecConstant)
2950 return;
2951
2952 // select way arguments are prepared
2953 if (testCaseInfo.argumentsFromInput)
2954 {
2955 // read arguments from input SSBO in main function
2956 specializedOperation.arguments = inTypeSnippets->argumentsFromInputSnippet;
2957
2958 if (inFloatType == FP16 && testCaseInfo.testCase.fp16Without16BitStorage)
2959 specializedOperation.arguments = inTypeSnippets->argumentsFromInputFp16Snippet;
2960 }
2961 else
2962 {
2963 // generate proper values in main function
2964 const string arg1 = "%arg1 = ";
2965 const string arg2 = "%arg2 = ";
2966
2967 const ValueId* inputArguments = testCaseInfo.testCase.input;
2968 if (inputArguments[0] != V_UNUSED)
2969 {
2970 specializedOperation.arguments = arg1 + inTypeSnippets->valueIdToSnippetArgMap.at(inputArguments[0]);
2971 specializedOperation.argumentsUsesFloatConstant |= B_STATEMENT_USAGE_ARGS_CONST_FLOAT;
2972 }
2973 if (inputArguments[1] != V_UNUSED)
2974 {
2975 specializedOperation.arguments += arg2 + inTypeSnippets->valueIdToSnippetArgMap.at(inputArguments[1]);
2976 specializedOperation.argumentsUsesFloatConstant |= B_STATEMENT_USAGE_ARGS_CONST_FLOAT;
2977 }
2978 }
2979 }
2980
2981
getBehaviorCapabilityAndExecutionMode(BehaviorFlags behaviorFlags,const string inBitWidth,const string outBitWidth,string & capability,string & executionMode) const2982 void TestGroupBuilderBase::getBehaviorCapabilityAndExecutionMode(BehaviorFlags behaviorFlags,
2983 const string inBitWidth,
2984 const string outBitWidth,
2985 string& capability,
2986 string& executionMode) const
2987 {
2988 // iterate over all behaviours and request those that are needed
2989 BehaviorNameMap::const_iterator it = m_behaviorToName.begin();
2990 while (it != m_behaviorToName.end())
2991 {
2992 BehaviorFlagBits behaviorId = it->first;
2993 string behaviorName = it->second;
2994
2995 if (behaviorFlags & behaviorId)
2996 {
2997 capability += "OpCapability " + behaviorName + "\n";
2998
2999 // rounding mode should be obeyed for destination type
3000 bool rounding = (behaviorId == B_RTE_ROUNDING) || (behaviorId == B_RTZ_ROUNDING);
3001 executionMode += "OpExecutionMode %main " + behaviorName + " " +
3002 (rounding ? outBitWidth : inBitWidth) + "\n";
3003 }
3004
3005 ++it;
3006 }
3007
3008 DE_ASSERT(!capability.empty() && !executionMode.empty());
3009 }
3010
setupVulkanFeatures(FloatType inFloatType,FloatType outFloatType,BehaviorFlags behaviorFlags,bool float64FeatureRequired,VulkanFeatures & features) const3011 void TestGroupBuilderBase::setupVulkanFeatures(FloatType inFloatType,
3012 FloatType outFloatType,
3013 BehaviorFlags behaviorFlags,
3014 bool float64FeatureRequired,
3015 VulkanFeatures& features) const
3016 {
3017 features.coreFeatures.shaderFloat64 = float64FeatureRequired;
3018
3019 // request proper float controls features
3020 ExtensionFloatControlsFeatures& floatControls = features.floatControlsProperties;
3021
3022 // rounding mode should obey the destination type
3023 bool rteRounding = (behaviorFlags & B_RTE_ROUNDING) != 0;
3024 bool rtzRounding = (behaviorFlags & B_RTZ_ROUNDING) != 0;
3025 if (rteRounding || rtzRounding)
3026 {
3027 switch(outFloatType)
3028 {
3029 case FP16:
3030 floatControls.shaderRoundingModeRTEFloat16 = rteRounding;
3031 floatControls.shaderRoundingModeRTZFloat16 = rtzRounding;
3032 return;
3033 case FP32:
3034 floatControls.shaderRoundingModeRTEFloat32 = rteRounding;
3035 floatControls.shaderRoundingModeRTZFloat32 = rtzRounding;
3036 return;
3037 case FP64:
3038 floatControls.shaderRoundingModeRTEFloat64 = rteRounding;
3039 floatControls.shaderRoundingModeRTZFloat64 = rtzRounding;
3040 return;
3041 }
3042 }
3043
3044 switch(inFloatType)
3045 {
3046 case FP16:
3047 floatControls.shaderDenormPreserveFloat16 = behaviorFlags & B_DENORM_PRESERVE;
3048 floatControls.shaderDenormFlushToZeroFloat16 = behaviorFlags & B_DENORM_FLUSH;
3049 floatControls.shaderSignedZeroInfNanPreserveFloat16 = behaviorFlags & B_ZIN_PRESERVE;
3050 return;
3051 case FP32:
3052 floatControls.shaderDenormPreserveFloat32 = behaviorFlags & B_DENORM_PRESERVE;
3053 floatControls.shaderDenormFlushToZeroFloat32 = behaviorFlags & B_DENORM_FLUSH;
3054 floatControls.shaderSignedZeroInfNanPreserveFloat32 = behaviorFlags & B_ZIN_PRESERVE;
3055 return;
3056 case FP64:
3057 floatControls.shaderDenormPreserveFloat64 = behaviorFlags & B_DENORM_PRESERVE;
3058 floatControls.shaderDenormFlushToZeroFloat64 = behaviorFlags & B_DENORM_FLUSH;
3059 floatControls.shaderSignedZeroInfNanPreserveFloat64 = behaviorFlags & B_ZIN_PRESERVE;
3060 return;
3061 }
3062 }
3063
3064 // Test case not related to SPIR-V but executed with compute tests. It checks if specified
3065 // features are set to the same value when specific independence settings are used.
verifyIndependenceSettings(Context & context)3066 tcu::TestStatus verifyIndependenceSettings(Context& context)
3067 {
3068 if (!context.isDeviceFunctionalitySupported("VK_KHR_shader_float_controls"))
3069 TCU_THROW(NotSupportedError, "VK_KHR_shader_float_controls not supported");
3070
3071 vk::VkPhysicalDeviceFloatControlsPropertiesKHR fcProperties;
3072 fcProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT_CONTROLS_PROPERTIES_KHR;
3073 fcProperties.pNext = DE_NULL;
3074
3075 vk::VkPhysicalDeviceProperties2 deviceProperties;
3076 deviceProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
3077 deviceProperties.pNext = &fcProperties;
3078
3079 auto fail = [](const string& featureGroup)
3080 {
3081 return tcu::TestStatus::fail(featureGroup + " features should be set to the same value");
3082 };
3083
3084 const VkPhysicalDevice physicalDevice = context.getPhysicalDevice();
3085 const vk::InstanceInterface& instanceInterface = context.getInstanceInterface();
3086 instanceInterface.getPhysicalDeviceProperties2(physicalDevice, &deviceProperties);
3087
3088 if (fcProperties.roundingModeIndependence == VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_NONE_KHR)
3089 {
3090 vk::VkBool32 fp16rte = fcProperties.shaderRoundingModeRTEFloat16;
3091 vk::VkBool32 fp32rte = fcProperties.shaderRoundingModeRTEFloat32;
3092 vk::VkBool32 fp64rte = fcProperties.shaderRoundingModeRTEFloat64;
3093 if ((fp16rte != fp32rte) || (fp32rte != fp64rte))
3094 return fail("shaderRoundingModeRTEFloat*");
3095
3096 vk::VkBool32 fp16rtz = fcProperties.shaderRoundingModeRTZFloat16;
3097 vk::VkBool32 fp32rtz = fcProperties.shaderRoundingModeRTZFloat32;
3098 vk::VkBool32 fp64rtz = fcProperties.shaderRoundingModeRTZFloat64;
3099 if ((fp16rtz != fp32rtz) || (fp32rtz != fp64rtz))
3100 return fail("shaderRoundingModeRTZFloat*");
3101 }
3102 else if (fcProperties.roundingModeIndependence == VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_32_BIT_ONLY_KHR)
3103 {
3104 vk::VkBool32 fp16rte = fcProperties.shaderRoundingModeRTEFloat16;
3105 vk::VkBool32 fp64rte = fcProperties.shaderRoundingModeRTEFloat64;
3106 if ((fp16rte != fp64rte))
3107 return fail("shaderRoundingModeRTEFloat16 and 64");
3108
3109 vk::VkBool32 fp16rtz = fcProperties.shaderRoundingModeRTZFloat16;
3110 vk::VkBool32 fp64rtz = fcProperties.shaderRoundingModeRTZFloat64;
3111 if ((fp16rtz != fp64rtz))
3112 return fail("shaderRoundingModeRTZFloat16 and 64");
3113 }
3114
3115 if (fcProperties.denormBehaviorIndependence == VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_NONE_KHR)
3116 {
3117 vk::VkBool32 fp16flush = fcProperties.shaderDenormFlushToZeroFloat16;
3118 vk::VkBool32 fp32flush = fcProperties.shaderDenormFlushToZeroFloat32;
3119 vk::VkBool32 fp64flush = fcProperties.shaderDenormFlushToZeroFloat64;
3120 if ((fp16flush != fp32flush) || (fp32flush != fp64flush))
3121 return fail("shaderDenormFlushToZeroFloat*");
3122
3123 vk::VkBool32 fp16preserve = fcProperties.shaderDenormPreserveFloat16;
3124 vk::VkBool32 fp32preserve = fcProperties.shaderDenormPreserveFloat32;
3125 vk::VkBool32 fp64preserve = fcProperties.shaderDenormPreserveFloat64;
3126 if ((fp16preserve != fp32preserve) || (fp32preserve != fp64preserve))
3127 return fail("shaderDenormPreserveFloat*");
3128 }
3129 else if (fcProperties.denormBehaviorIndependence == VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_32_BIT_ONLY_KHR)
3130 {
3131 vk::VkBool32 fp16flush = fcProperties.shaderDenormFlushToZeroFloat16;
3132 vk::VkBool32 fp64flush = fcProperties.shaderDenormFlushToZeroFloat64;
3133 if ((fp16flush != fp64flush))
3134 return fail("shaderDenormFlushToZeroFloat16 and 64");
3135
3136 vk::VkBool32 fp16preserve = fcProperties.shaderDenormPreserveFloat16;
3137 vk::VkBool32 fp64preserve = fcProperties.shaderDenormPreserveFloat64;
3138 if ((fp16preserve != fp64preserve))
3139 return fail("shaderDenormPreserveFloat16 and 64");
3140 }
3141
3142 return tcu::TestStatus::pass("Pass");
3143 }
3144
3145 // ComputeTestGroupBuilder contains logic that creates compute shaders
3146 // for all test cases. As most tests in spirv-assembly it uses functionality
3147 // implemented in vktSpvAsmComputeShaderTestUtil.cpp.
3148 class ComputeTestGroupBuilder: public TestGroupBuilderBase
3149 {
3150 public:
3151
3152 void init();
3153
3154 void createOperationTests(TestCaseGroup* parentGroup,
3155 const char* groupName,
3156 FloatType floatType,
3157 bool argumentsFromInput) override;
3158
3159 void createSettingsTests(TestCaseGroup* parentGroup) override;
3160
3161 protected:
3162
3163 void fillShaderSpec(const OperationTestCaseInfo& testCaseInfo,
3164 ComputeShaderSpec& csSpec) const;
3165 void fillShaderSpec(const SettingsTestCaseInfo& testCaseInfo,
3166 ComputeShaderSpec& csSpec) const;
3167
3168 private:
3169
3170
3171 StringTemplate m_operationShaderTemplate;
3172 StringTemplate m_settingsShaderTemplate;
3173 TestCasesBuilder m_operationTestCaseBuilder;
3174 };
3175
init()3176 void ComputeTestGroupBuilder::init()
3177 {
3178 m_operationTestCaseBuilder.init();
3179
3180 // generic compute shader template with common code for all
3181 // float types and all possible operations listed in OperationId enum
3182 m_operationShaderTemplate.setString(
3183 "OpCapability Shader\n"
3184 "${capabilities}"
3185
3186 "OpExtension \"SPV_KHR_float_controls\"\n"
3187 "${extensions}"
3188
3189 "%std450 = OpExtInstImport \"GLSL.std.450\"\n"
3190 "OpMemoryModel Logical GLSL450\n"
3191 "OpEntryPoint GLCompute %main \"main\" %id\n"
3192 "OpExecutionMode %main LocalSize 1 1 1\n"
3193 "${execution_mode}"
3194
3195 "OpDecorate %id BuiltIn GlobalInvocationId\n"
3196
3197 // some tests require additional annotations
3198 "${annotations}"
3199
3200 "%type_void = OpTypeVoid\n"
3201 "%type_voidf = OpTypeFunction %type_void\n"
3202 "%type_bool = OpTypeBool\n"
3203 "%type_u32 = OpTypeInt 32 0\n"
3204 "%type_i32 = OpTypeInt 32 1\n"
3205 "%type_i32_fptr = OpTypePointer Function %type_i32\n"
3206 "%type_u32_vec2 = OpTypeVector %type_u32 2\n"
3207 "%type_u32_vec3 = OpTypeVector %type_u32 3\n"
3208 "%type_u32_vec3_ptr = OpTypePointer Input %type_u32_vec3\n"
3209
3210 "%c_i32_0 = OpConstant %type_i32 0\n"
3211 "%c_i32_1 = OpConstant %type_i32 1\n"
3212 "%c_i32_2 = OpConstant %type_i32 2\n"
3213 "%c_u32_1 = OpConstant %type_u32 1\n"
3214
3215 // if input float type has different width then output then
3216 // both types are defined here along with all types derived from
3217 // them that are commonly used by tests; some tests also define
3218 // their own types (those that are needed just by this single test)
3219 "${types}"
3220
3221 // SSBO definitions
3222 "${io_definitions}"
3223
3224 "%id = OpVariable %type_u32_vec3_ptr Input\n"
3225
3226 // set of default constants per float type is placed here,
3227 // operation tests can also define additional constants.
3228 "${constants}"
3229
3230 // O_RETURN_VAL defines function here and becouse
3231 // of that this token needs to be directly before main function
3232 "${functions}"
3233
3234 "%main = OpFunction %type_void None %type_voidf\n"
3235 "%label = OpLabel\n"
3236
3237 "${variables}"
3238
3239 // depending on test case arguments are either read from input ssbo
3240 // or generated in spir-v code - in later case shader input is not used
3241 "${arguments}"
3242
3243 // perform test commands
3244 "${commands}"
3245
3246 // save result to SSBO
3247 "${save_result}"
3248
3249 "OpReturn\n"
3250 "OpFunctionEnd\n");
3251
3252 m_settingsShaderTemplate.setString(
3253 "OpCapability Shader\n"
3254 "${capabilities}"
3255
3256 "OpExtension \"SPV_KHR_float_controls\"\n"
3257 "${extensions}"
3258
3259 "%std450 = OpExtInstImport \"GLSL.std.450\"\n"
3260 "OpMemoryModel Logical GLSL450\n"
3261 "OpEntryPoint GLCompute %main \"main\" %id\n"
3262 "OpExecutionMode %main LocalSize 1 1 1\n"
3263 "${execution_modes}"
3264
3265 // annotations
3266 "OpDecorate %SSBO_in BufferBlock\n"
3267 "OpDecorate %ssbo_in DescriptorSet 0\n"
3268 "OpDecorate %ssbo_in Binding 0\n"
3269 "OpDecorate %ssbo_in NonWritable\n"
3270 "${io_annotations}"
3271
3272 "OpDecorate %id BuiltIn GlobalInvocationId\n"
3273
3274 // types
3275 "%type_void = OpTypeVoid\n"
3276 "%type_voidf = OpTypeFunction %type_void\n"
3277 "%type_u32 = OpTypeInt 32 0\n"
3278 "%type_i32 = OpTypeInt 32 1\n"
3279 "%type_i32_fptr = OpTypePointer Function %type_i32\n"
3280 "%type_u32_vec3 = OpTypeVector %type_u32 3\n"
3281 "%type_u32_vec3_ptr = OpTypePointer Input %type_u32_vec3\n"
3282
3283 "%c_i32_0 = OpConstant %type_i32 0\n"
3284 "%c_i32_1 = OpConstant %type_i32 1\n"
3285 "%c_i32_2 = OpConstant %type_i32 2\n"
3286
3287 "${types}"
3288
3289 // in SSBO definition
3290 "%SSBO_in = OpTypeStruct ${in_struct}\n"
3291 "%up_SSBO_in = OpTypePointer Uniform %SSBO_in\n"
3292 "%ssbo_in = OpVariable %up_SSBO_in Uniform\n"
3293
3294 // out SSBO definitions
3295 "${out_definitions}"
3296
3297 "%id = OpVariable %type_u32_vec3_ptr Input\n"
3298 "%main = OpFunction %type_void None %type_voidf\n"
3299 "%label = OpLabel\n"
3300
3301 "${commands}"
3302
3303 "${save_result}"
3304
3305 "OpReturn\n"
3306 "OpFunctionEnd\n");
3307 }
3308
createOperationTests(TestCaseGroup * parentGroup,const char * groupName,FloatType floatType,bool argumentsFromInput)3309 void ComputeTestGroupBuilder::createOperationTests(TestCaseGroup* parentGroup, const char* groupName, FloatType floatType, bool argumentsFromInput)
3310 {
3311 TestContext& testCtx = parentGroup->getTestContext();
3312 TestCaseGroup* group = new TestCaseGroup(testCtx, groupName, "");
3313 parentGroup->addChild(group);
3314
3315 TestCaseVect testCases;
3316 m_operationTestCaseBuilder.build(testCases, m_typeData[floatType].testResults, argumentsFromInput);
3317
3318 TestCaseVect::const_iterator currTestCase = testCases.begin();
3319 TestCaseVect::const_iterator lastTestCase = testCases.end();
3320 while(currTestCase != lastTestCase)
3321 {
3322 const OperationTestCase& testCase = *currTestCase;
3323 ++currTestCase;
3324
3325 // skip cases with undefined output
3326 if (testCase.expectedOutput == V_UNUSED)
3327 continue;
3328
3329 OperationTestCaseInfo testCaseInfo =
3330 {
3331 floatType,
3332 argumentsFromInput,
3333 VK_SHADER_STAGE_COMPUTE_BIT,
3334 m_operationTestCaseBuilder.getOperation(testCase.operationId),
3335 testCase
3336 };
3337
3338 ComputeShaderSpec csSpec;
3339
3340 fillShaderSpec(testCaseInfo, csSpec);
3341
3342 string testName = replace(testCase.baseName, "op", testCaseInfo.operation.name);
3343 group->addChild(new SpvAsmComputeShaderCase(testCtx, testName.c_str(), "", csSpec));
3344 }
3345 }
3346
createSettingsTests(TestCaseGroup * parentGroup)3347 void ComputeTestGroupBuilder::createSettingsTests(TestCaseGroup* parentGroup)
3348 {
3349 TestContext& testCtx = parentGroup->getTestContext();
3350 TestCaseGroup* group = new TestCaseGroup(testCtx, "independence_settings", "");
3351 parentGroup->addChild(group);
3352
3353 using SFCI = VkShaderFloatControlsIndependence;
3354 const SFCI independence32 = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_32_BIT_ONLY_KHR;
3355 const SFCI independenceAll = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL_KHR;
3356
3357 vector<SettingsTestCaseInfo> testCases =
3358 {
3359 // name mode independenceSetting fp16Option fp32Option fp64Option fp16Without16bitstorage
3360
3361 // test rounding modes when only two float widths are available
3362 { "rounding_ind_all_fp16_rte_fp32_rtz", SM_ROUNDING, independenceAll, SO_RTE, SO_RTZ, SO_UNUSED, DE_FALSE },
3363 { "rounding_ind_all_fp16_rtz_fp32_rte", SM_ROUNDING, independenceAll, SO_RTZ, SO_RTE, SO_UNUSED, DE_FALSE },
3364 { "rounding_ind_32_fp16_rte_fp32_rtz", SM_ROUNDING, independence32, SO_RTE, SO_RTZ, SO_UNUSED, DE_FALSE },
3365 { "rounding_ind_32_fp16_rtz_fp32_rte", SM_ROUNDING, independence32, SO_RTZ, SO_RTE, SO_UNUSED, DE_FALSE },
3366 { "rounding_ind_all_fp16_rte_fp64_rtz", SM_ROUNDING, independenceAll, SO_RTE, SO_UNUSED, SO_RTZ, DE_FALSE },
3367 { "rounding_ind_all_fp16_rtz_fp64_rte", SM_ROUNDING, independenceAll, SO_RTZ, SO_UNUSED, SO_RTE, DE_FALSE },
3368 { "rounding_ind_all_fp32_rte_fp64_rtz", SM_ROUNDING, independenceAll, SO_UNUSED, SO_RTE, SO_RTZ, DE_FALSE },
3369 { "rounding_ind_all_fp32_rtz_fp64_rte", SM_ROUNDING, independenceAll, SO_UNUSED, SO_RTZ, SO_RTE, DE_FALSE },
3370 { "rounding_ind_32_fp32_rte_fp64_rtz", SM_ROUNDING, independence32, SO_UNUSED, SO_RTE, SO_RTZ, DE_FALSE },
3371 { "rounding_ind_32_fp32_rtz_fp64_rte", SM_ROUNDING, independence32, SO_UNUSED, SO_RTZ, SO_RTE, DE_FALSE },
3372
3373 // test rounding modes when three widths are available
3374 { "rounding_ind_all_fp16_rtz_fp32_rte_fp64_rtz", SM_ROUNDING, independenceAll, SO_RTZ, SO_RTE, SO_RTZ, DE_FALSE },
3375 { "rounding_ind_32_fp16_rtz_fp32_rte_fp64_rtz", SM_ROUNDING, independence32, SO_RTZ, SO_RTE, SO_RTZ, DE_FALSE },
3376 { "rounding_ind_all_fp16_rte_fp32_rtz_fp64_rte", SM_ROUNDING, independenceAll, SO_RTE, SO_RTZ, SO_RTE, DE_FALSE },
3377 { "rounding_ind_32_fp16_rte_fp32_rtz_fp64_rte", SM_ROUNDING, independence32, SO_RTE, SO_RTZ, SO_RTE, DE_FALSE },
3378 { "rounding_ind_all_fp16_rtz_fp32_rtz_fp64_rte", SM_ROUNDING, independenceAll, SO_RTZ, SO_RTZ, SO_RTE, DE_FALSE },
3379 { "rounding_ind_all_fp16_rtz_fp32_rte_fp64_rte", SM_ROUNDING, independenceAll, SO_RTZ, SO_RTE, SO_RTE, DE_FALSE },
3380 { "rounding_ind_all_fp16_rte_fp32_rte_fp64_rtz", SM_ROUNDING, independenceAll, SO_RTE, SO_RTE, SO_RTZ, DE_FALSE },
3381 { "rounding_ind_all_fp16_rte_fp32_rtz_fp64_rtz", SM_ROUNDING, independenceAll, SO_RTE, SO_RTZ, SO_RTZ, DE_FALSE },
3382
3383 // test denorm settings when only two float widths are available
3384 { "denorm_ind_all_fp16_flush_fp32_preserve", SM_DENORMS, independenceAll, SO_FLUSH, SO_PRESERVE, SO_UNUSED, DE_FALSE },
3385 { "denorm_ind_all_fp16_preserve_fp32_flush", SM_DENORMS, independenceAll, SO_PRESERVE, SO_FLUSH, SO_UNUSED, DE_FALSE },
3386 { "denorm_ind_32_fp16_flush_fp32_preserve", SM_DENORMS, independence32, SO_FLUSH, SO_PRESERVE, SO_UNUSED, DE_FALSE },
3387 { "denorm_ind_32_fp16_preserve_fp32_flush", SM_DENORMS, independence32, SO_PRESERVE, SO_FLUSH, SO_UNUSED, DE_FALSE },
3388 { "denorm_ind_all_fp16_flush_fp64_preserve", SM_DENORMS, independenceAll, SO_FLUSH, SO_UNUSED, SO_PRESERVE, DE_FALSE },
3389 { "denorm_ind_all_fp16_preserve_fp64_flush", SM_DENORMS, independenceAll, SO_PRESERVE, SO_UNUSED, SO_FLUSH, DE_FALSE },
3390 { "denorm_ind_all_fp32_flush_fp64_preserve", SM_DENORMS, independenceAll, SO_UNUSED, SO_FLUSH, SO_PRESERVE, DE_FALSE },
3391 { "denorm_ind_all_fp32_preserve_fp64_flush", SM_DENORMS, independenceAll, SO_UNUSED, SO_PRESERVE, SO_FLUSH, DE_FALSE },
3392 { "denorm_ind_32_fp32_flush_fp64_preserve", SM_DENORMS, independence32, SO_UNUSED, SO_FLUSH, SO_PRESERVE, DE_FALSE },
3393 { "denorm_ind_32_fp32_preserve_fp64_flush", SM_DENORMS, independence32, SO_UNUSED, SO_PRESERVE, SO_FLUSH, DE_FALSE },
3394
3395 // test denorm settings when three widths are available
3396 { "denorm_ind_all_fp16_preserve_fp32_flush_fp64_preserve", SM_DENORMS, independenceAll, SO_PRESERVE, SO_FLUSH, SO_PRESERVE, DE_FALSE },
3397 { "denorm_ind_32_fp16_preserve_fp32_flush_fp64_preserve", SM_DENORMS, independence32, SO_PRESERVE, SO_FLUSH, SO_PRESERVE, DE_FALSE },
3398 { "denorm_ind_all_fp16_flush_fp32_preserve_fp64_flush", SM_DENORMS, independenceAll, SO_FLUSH, SO_PRESERVE, SO_FLUSH, DE_FALSE },
3399 { "denorm_ind_32_fp16_flush_fp32_preserve_fp64_flush", SM_DENORMS, independence32, SO_FLUSH, SO_PRESERVE, SO_FLUSH, DE_FALSE },
3400 { "denorm_ind_all_fp16_preserve_fp32_preserve_fp64_flush", SM_DENORMS, independenceAll, SO_PRESERVE, SO_PRESERVE, SO_FLUSH, DE_FALSE },
3401 { "denorm_ind_all_fp16_preserve_fp32_flush_fp64_flush", SM_DENORMS, independenceAll, SO_PRESERVE, SO_FLUSH, SO_FLUSH, DE_FALSE },
3402 { "denorm_ind_all_fp16_flush_fp32_flush_fp64_preserve", SM_DENORMS, independenceAll, SO_FLUSH, SO_FLUSH, SO_PRESERVE, DE_FALSE },
3403 { "denorm_ind_all_fp16_flush_fp32_preserve_fp64_preserve", SM_DENORMS, independenceAll, SO_FLUSH, SO_PRESERVE, SO_PRESERVE, DE_FALSE },
3404
3405 // Same fp16 tests but without requiring VK_KHR_16bit_storage
3406 // test rounding modes when only two float widths are available
3407 { "rounding_ind_all_fp16_rte_fp32_rtz_nostorage", SM_ROUNDING, independenceAll, SO_RTE, SO_RTZ, SO_UNUSED, DE_TRUE },
3408 { "rounding_ind_all_fp16_rtz_fp32_rte_nostorage", SM_ROUNDING, independenceAll, SO_RTZ, SO_RTE, SO_UNUSED, DE_TRUE },
3409 { "rounding_ind_32_fp16_rte_fp32_rtz_nostorage", SM_ROUNDING, independence32, SO_RTE, SO_RTZ, SO_UNUSED, DE_TRUE },
3410 { "rounding_ind_32_fp16_rtz_fp32_rte_nostorage", SM_ROUNDING, independence32, SO_RTZ, SO_RTE, SO_UNUSED, DE_TRUE },
3411 { "rounding_ind_all_fp16_rte_fp64_rtz_nostorage", SM_ROUNDING, independenceAll, SO_RTE, SO_UNUSED, SO_RTZ, DE_TRUE },
3412 { "rounding_ind_all_fp16_rtz_fp64_rte_nostorage", SM_ROUNDING, independenceAll, SO_RTZ, SO_UNUSED, SO_RTE, DE_TRUE },
3413
3414 // test rounding modes when three widths are available
3415 { "rounding_ind_all_fp16_rtz_fp32_rte_fp64_rtz_nostorage", SM_ROUNDING, independenceAll, SO_RTZ, SO_RTE, SO_RTZ, DE_TRUE },
3416 { "rounding_ind_32_fp16_rtz_fp32_rte_fp64_rtz_nostorage", SM_ROUNDING, independence32, SO_RTZ, SO_RTE, SO_RTZ, DE_TRUE },
3417 { "rounding_ind_all_fp16_rte_fp32_rtz_fp64_rte_nostorage", SM_ROUNDING, independenceAll, SO_RTE, SO_RTZ, SO_RTE, DE_TRUE },
3418 { "rounding_ind_32_fp16_rte_fp32_rtz_fp64_rte_nostorage", SM_ROUNDING, independence32, SO_RTE, SO_RTZ, SO_RTE, DE_TRUE },
3419 { "rounding_ind_all_fp16_rtz_fp32_rtz_fp64_rte_nostorage", SM_ROUNDING, independenceAll, SO_RTZ, SO_RTZ, SO_RTE, DE_TRUE },
3420 { "rounding_ind_all_fp16_rtz_fp32_rte_fp64_rte_nostorage", SM_ROUNDING, independenceAll, SO_RTZ, SO_RTE, SO_RTE, DE_TRUE },
3421 { "rounding_ind_all_fp16_rte_fp32_rte_fp64_rtz_nostorage", SM_ROUNDING, independenceAll, SO_RTE, SO_RTE, SO_RTZ, DE_TRUE },
3422 { "rounding_ind_all_fp16_rte_fp32_rtz_fp64_rtz_nostorage", SM_ROUNDING, independenceAll, SO_RTE, SO_RTZ, SO_RTZ, DE_TRUE },
3423
3424 // test denorm settings when only two float widths are available
3425 { "denorm_ind_all_fp16_flush_fp32_preserve_nostorage", SM_DENORMS, independenceAll, SO_FLUSH, SO_PRESERVE, SO_UNUSED, DE_TRUE },
3426 { "denorm_ind_all_fp16_preserve_fp32_flush_nostorage", SM_DENORMS, independenceAll, SO_PRESERVE, SO_FLUSH, SO_UNUSED, DE_TRUE },
3427 { "denorm_ind_32_fp16_flush_fp32_preserve_nostorage", SM_DENORMS, independence32, SO_FLUSH, SO_PRESERVE, SO_UNUSED, DE_TRUE },
3428 { "denorm_ind_32_fp16_preserve_fp32_flush_nostorage", SM_DENORMS, independence32, SO_PRESERVE, SO_FLUSH, SO_UNUSED, DE_TRUE },
3429 { "denorm_ind_all_fp16_flush_fp64_preserve_nostorage", SM_DENORMS, independenceAll, SO_FLUSH, SO_UNUSED, SO_PRESERVE, DE_TRUE },
3430 { "denorm_ind_all_fp16_preserve_fp64_flush_nostorage", SM_DENORMS, independenceAll, SO_PRESERVE, SO_UNUSED, SO_FLUSH, DE_TRUE },
3431
3432 // test denorm settings when three widths are available
3433 { "denorm_ind_all_fp16_preserve_fp32_flush_fp64_preserve_nostorage", SM_DENORMS, independenceAll, SO_PRESERVE, SO_FLUSH, SO_PRESERVE, DE_TRUE },
3434 { "denorm_ind_32_fp16_preserve_fp32_flush_fp64_preserve_nostorage", SM_DENORMS, independence32, SO_PRESERVE, SO_FLUSH, SO_PRESERVE, DE_TRUE },
3435 { "denorm_ind_all_fp16_flush_fp32_preserve_fp64_flush_nostorage", SM_DENORMS, independenceAll, SO_FLUSH, SO_PRESERVE, SO_FLUSH, DE_TRUE },
3436 { "denorm_ind_32_fp16_flush_fp32_preserve_fp64_flush_nostorage", SM_DENORMS, independence32, SO_FLUSH, SO_PRESERVE, SO_FLUSH, DE_TRUE },
3437 { "denorm_ind_all_fp16_preserve_fp32_preserve_fp64_flush_nostorage", SM_DENORMS, independenceAll, SO_PRESERVE, SO_PRESERVE, SO_FLUSH, DE_TRUE },
3438 { "denorm_ind_all_fp16_preserve_fp32_flush_fp64_flush_nostorage", SM_DENORMS, independenceAll, SO_PRESERVE, SO_FLUSH, SO_FLUSH, DE_TRUE },
3439 { "denorm_ind_all_fp16_flush_fp32_flush_fp64_preserve_nostorage", SM_DENORMS, independenceAll, SO_FLUSH, SO_FLUSH, SO_PRESERVE, DE_TRUE },
3440 { "denorm_ind_all_fp16_flush_fp32_preserve_fp64_preserve_nostorage", SM_DENORMS, independenceAll, SO_FLUSH, SO_PRESERVE, SO_PRESERVE, DE_TRUE },
3441 };
3442
3443 for(const auto& testCase : testCases)
3444 {
3445 ComputeShaderSpec csSpec;
3446 fillShaderSpec(testCase, csSpec);
3447 group->addChild(new SpvAsmComputeShaderCase(testCtx, testCase.name, "", csSpec));
3448 }
3449
3450 addFunctionCase(group, "independence_settings", "", verifyIndependenceSettings);
3451 }
3452
fillShaderSpec(const OperationTestCaseInfo & testCaseInfo,ComputeShaderSpec & csSpec) const3453 void ComputeTestGroupBuilder::fillShaderSpec(const OperationTestCaseInfo& testCaseInfo,
3454 ComputeShaderSpec& csSpec) const
3455 {
3456 // LUT storing functions used to verify test results
3457 const VerifyIOFunc checkFloatsLUT[] =
3458 {
3459 checkFloats<Float16, deFloat16>,
3460 checkFloats<Float32, float>,
3461 checkFloats<Float64, double>
3462 };
3463
3464 const Operation& testOperation = testCaseInfo.operation;
3465 const OperationTestCase& testCase = testCaseInfo.testCase;
3466 FloatType outFloatType = testCaseInfo.outFloatType;
3467
3468 SpecializedOperation specOpData;
3469 specializeOperation(testCaseInfo, specOpData);
3470
3471 TypeSnippetsSP inTypeSnippets = specOpData.inTypeSnippets;
3472 TypeSnippetsSP outTypeSnippets = specOpData.outTypeSnippets;
3473 FloatType inFloatType = specOpData.inFloatType;
3474
3475 deBool outFp16WithoutStorage = (outFloatType == FP16) && testCase.fp16Without16BitStorage;
3476 deBool inFp16WithoutStorage = (inFloatType == FP16) && testCase.fp16Without16BitStorage;
3477
3478 // UnpackHalf2x16 is a corner case - it returns two 32-bit floats but
3479 // internaly operates on fp16 and this type should be used by float controls
3480 FloatType inFloatTypeForCaps = inFloatType;
3481 string inFloatWidthForCaps = inTypeSnippets->bitWidth;
3482 if (testCase.operationId == O_UPH_DENORM)
3483 {
3484 inFloatTypeForCaps = FP16;
3485 inFloatWidthForCaps = "16";
3486 }
3487
3488 string behaviorCapability;
3489 string behaviorExecutionMode;
3490 getBehaviorCapabilityAndExecutionMode(testCase.behaviorFlags,
3491 inFloatWidthForCaps,
3492 outTypeSnippets->bitWidth,
3493 behaviorCapability,
3494 behaviorExecutionMode);
3495
3496 string capabilities = behaviorCapability + outTypeSnippets->capabilities;
3497 string extensions = outTypeSnippets->extensions;
3498 string annotations = inTypeSnippets->inputAnnotationsSnippet + outTypeSnippets->outputAnnotationsSnippet + outTypeSnippets->typeAnnotationsSnippet;
3499 string types = outTypeSnippets->typeDefinitionsSnippet;
3500 string constants = outTypeSnippets->constantsDefinitionsSnippet;
3501 string ioDefinitions = "";
3502
3503 // Getting rid of 16bit_storage dependency imply replacing lots of snippets.
3504 {
3505 if (inFp16WithoutStorage)
3506 {
3507 ioDefinitions = inTypeSnippets->inputDefinitionsFp16Snippet;
3508 }
3509 else
3510 {
3511 ioDefinitions = inTypeSnippets->inputDefinitionsSnippet;
3512 }
3513
3514 if (outFp16WithoutStorage)
3515 {
3516 extensions = outTypeSnippets->extensionsFp16Without16BitStorage;
3517 capabilities = behaviorCapability + outTypeSnippets->capabilitiesFp16Without16BitStorage;
3518 types += outTypeSnippets->typeDefinitionsFp16Snippet;
3519 annotations += outTypeSnippets->typeAnnotationsFp16Snippet;
3520 ioDefinitions += outTypeSnippets->outputDefinitionsFp16Snippet;
3521 }
3522 else
3523 {
3524 ioDefinitions += outTypeSnippets->outputDefinitionsSnippet;
3525 }
3526 }
3527
3528 bool outFp16TypeUsage = outTypeSnippets->loadStoreRequiresShaderFloat16;
3529 bool inFp16TypeUsage = false;
3530
3531 if (testOperation.isInputTypeRestricted)
3532 {
3533 annotations += inTypeSnippets->typeAnnotationsSnippet;
3534 types += inTypeSnippets->typeDefinitionsSnippet;
3535 constants += inTypeSnippets->constantsDefinitionsSnippet;
3536
3537 if (inFp16WithoutStorage)
3538 {
3539 annotations += inTypeSnippets->typeAnnotationsFp16Snippet;
3540 types += inTypeSnippets->typeDefinitionsFp16Snippet;
3541 capabilities += inTypeSnippets->capabilitiesFp16Without16BitStorage;
3542 extensions += inTypeSnippets->extensionsFp16Without16BitStorage;
3543 }
3544 else
3545 {
3546 capabilities += inTypeSnippets->capabilities;
3547 extensions += inTypeSnippets->extensions;
3548 }
3549
3550 inFp16TypeUsage = inTypeSnippets->loadStoreRequiresShaderFloat16;
3551 }
3552
3553 map<string, string> specializations;
3554 specializations["extensions"] = extensions;
3555 specializations["execution_mode"] = behaviorExecutionMode;
3556 specializations["annotations"] = annotations + specOpData.annotations;
3557 specializations["types"] = types + specOpData.types;
3558 specializations["io_definitions"] = ioDefinitions;
3559 specializations["variables"] = specOpData.variables;
3560 specializations["functions"] = specOpData.functions;
3561 specializations["save_result"] = (outFp16WithoutStorage ? outTypeSnippets->storeResultsFp16Snippet : outTypeSnippets->storeResultsSnippet);
3562 specializations["arguments"] = specOpData.arguments;
3563 specializations["commands"] = specOpData.commands;
3564
3565 // Build constants. They are only needed sometimes.
3566 const FloatStatementUsageFlags argsAnyFloatConstMask = B_STATEMENT_USAGE_ARGS_CONST_FLOAT | B_STATEMENT_USAGE_ARGS_CONST_FP16 | B_STATEMENT_USAGE_ARGS_CONST_FP32 | B_STATEMENT_USAGE_ARGS_CONST_FP64;
3567 const bool argsUseFPConstants = (specOpData.argumentsUsesFloatConstant & argsAnyFloatConstMask) != 0;
3568 const FloatStatementUsageFlags commandsAnyFloatConstMask = B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_CONST_FP16 | B_STATEMENT_USAGE_COMMANDS_CONST_FP32 | B_STATEMENT_USAGE_COMMANDS_CONST_FP64;
3569 const bool commandsUseFPConstants = (testCaseInfo.operation.statementUsageFlags & commandsAnyFloatConstMask) != 0;
3570 const bool needConstants = argsUseFPConstants || commandsUseFPConstants;
3571 const FloatStatementUsageFlags constsFloatTypeMask = B_STATEMENT_USAGE_CONSTS_TYPE_FLOAT | B_STATEMENT_USAGE_CONSTS_TYPE_FP16;
3572 const bool constsUsesFP16Type = (testCaseInfo.operation.statementUsageFlags & constsFloatTypeMask) != 0;
3573 const bool loadStoreRequiresShaderFloat16 = inFp16TypeUsage || outFp16TypeUsage;
3574 const bool usesFP16Constants = constsUsesFP16Type || (needConstants && loadStoreRequiresShaderFloat16);
3575
3576 specializations["constants"] = "";
3577 if (needConstants || outFp16WithoutStorage)
3578 {
3579 specializations["constants"] = constants;
3580 }
3581 specializations["constants"] += specOpData.constants;
3582
3583 // check which format features are needed
3584 bool float16FeatureRequired = (outFloatType == FP16) || (inFloatType == FP16);
3585 bool float64FeatureRequired = (outFloatType == FP64) || (inFloatType == FP64);
3586
3587 // Determine required capabilities.
3588 bool float16CapabilityAlreadyAdded = inFp16WithoutStorage || outFp16WithoutStorage;
3589 if ((testOperation.floatUsage == FLOAT_ARITHMETIC && float16FeatureRequired && !float16CapabilityAlreadyAdded) || usesFP16Constants)
3590 {
3591 capabilities += "OpCapability Float16\n";
3592 }
3593 specializations["capabilities"] = capabilities;
3594
3595 // specialize shader
3596 const string shaderCode = m_operationShaderTemplate.specialize(specializations);
3597
3598 // construct input and output buffers of proper types
3599 TypeValuesSP inTypeValues = m_typeData.at(inFloatType).values;
3600 TypeValuesSP outTypeValues = m_typeData.at(outFloatType).values;
3601 BufferSp inBufferSp = inTypeValues->constructInputBuffer(testCase.input);
3602 BufferSp outBufferSp = outTypeValues->constructOutputBuffer(testCase.expectedOutput);
3603 csSpec.inputs.push_back(Resource(inBufferSp, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
3604 csSpec.outputs.push_back(Resource(outBufferSp));
3605
3606 // check which format features are needed
3607 setupVulkanFeatures(inFloatTypeForCaps, // usualy same as inFloatType - different only for UnpackHalf2x16
3608 outFloatType,
3609 testCase.behaviorFlags,
3610 float64FeatureRequired,
3611 csSpec.requestedVulkanFeatures);
3612
3613 csSpec.assembly = shaderCode;
3614 csSpec.numWorkGroups = IVec3(1, 1, 1);
3615 csSpec.verifyIO = checkFloatsLUT[outFloatType];
3616
3617 csSpec.extensions.push_back("VK_KHR_shader_float_controls");
3618 bool needShaderFloat16 = float16CapabilityAlreadyAdded;
3619
3620 if (float16FeatureRequired && !testCase.fp16Without16BitStorage)
3621 {
3622 csSpec.extensions.push_back("VK_KHR_16bit_storage");
3623 csSpec.requestedVulkanFeatures.ext16BitStorage = EXT16BITSTORAGEFEATURES_UNIFORM_BUFFER_BLOCK;
3624 needShaderFloat16 |= testOperation.floatUsage == FLOAT_ARITHMETIC;
3625 }
3626 needShaderFloat16 |= usesFP16Constants;
3627 if (needShaderFloat16)
3628 {
3629 csSpec.extensions.push_back("VK_KHR_shader_float16_int8");
3630 csSpec.requestedVulkanFeatures.extFloat16Int8 = EXTFLOAT16INT8FEATURES_FLOAT16;
3631 }
3632 if (float64FeatureRequired)
3633 csSpec.requestedVulkanFeatures.coreFeatures.shaderFloat64 = VK_TRUE;
3634 }
3635
fillShaderSpec(const SettingsTestCaseInfo & testCaseInfo,ComputeShaderSpec & csSpec) const3636 void ComputeTestGroupBuilder::fillShaderSpec(const SettingsTestCaseInfo& testCaseInfo,
3637 ComputeShaderSpec& csSpec) const
3638 {
3639 string capabilities;
3640 string fp16behaviorName;
3641 string fp32behaviorName;
3642 string fp64behaviorName;
3643
3644 ValueId addArgs[2];
3645 ValueId fp16resultValue;
3646 ValueId fp32resultValue;
3647 ValueId fp64resultValue;
3648
3649 ExtensionFloatControlsFeatures& floatControls = csSpec.requestedVulkanFeatures.floatControlsProperties;
3650 bool fp16Required = testCaseInfo.fp16Option != SO_UNUSED;
3651 bool fp32Required = testCaseInfo.fp32Option != SO_UNUSED;
3652 bool fp64Required = testCaseInfo.fp64Option != SO_UNUSED;
3653
3654 if (testCaseInfo.testedMode == SM_ROUNDING)
3655 {
3656 // make sure that only rounding options are used
3657 DE_ASSERT((testCaseInfo.fp16Option != SO_FLUSH) ||
3658 (testCaseInfo.fp16Option != SO_PRESERVE) ||
3659 (testCaseInfo.fp32Option != SO_FLUSH) ||
3660 (testCaseInfo.fp32Option != SO_PRESERVE) ||
3661 (testCaseInfo.fp64Option != SO_FLUSH) ||
3662 (testCaseInfo.fp64Option != SO_PRESERVE));
3663
3664 bool fp16RteRounding = testCaseInfo.fp16Option == SO_RTE;
3665 bool fp32RteRounding = testCaseInfo.fp32Option == SO_RTE;
3666 bool fp64RteRounding = testCaseInfo.fp64Option == SO_RTE;
3667
3668 const string& rte = m_behaviorToName.at(B_RTE_ROUNDING);
3669 const string& rtz = m_behaviorToName.at(B_RTZ_ROUNDING);
3670
3671 fp16behaviorName = fp16RteRounding ? rte : rtz;
3672 fp32behaviorName = fp32RteRounding ? rte : rtz;
3673 fp64behaviorName = fp64RteRounding ? rte : rtz;
3674
3675 addArgs[0] = V_ADD_ARG_A;
3676 addArgs[1] = V_ADD_ARG_B;
3677 fp16resultValue = fp16RteRounding ? V_ADD_RTE_RESULT : V_ADD_RTZ_RESULT;
3678 fp32resultValue = fp32RteRounding ? V_ADD_RTE_RESULT : V_ADD_RTZ_RESULT;
3679 fp64resultValue = fp64RteRounding ? V_ADD_RTE_RESULT : V_ADD_RTZ_RESULT;
3680
3681 capabilities = "OpCapability " + rte + "\n"
3682 "OpCapability " + rtz + "\n";
3683
3684 floatControls.roundingModeIndependence = testCaseInfo.independenceSetting;
3685 floatControls.denormBehaviorIndependence = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_NONE_KHR;
3686 floatControls.shaderRoundingModeRTEFloat16 = fp16RteRounding;
3687 floatControls.shaderRoundingModeRTZFloat16 = fp16Required && !fp16RteRounding;
3688 floatControls.shaderRoundingModeRTEFloat32 = fp32RteRounding;
3689 floatControls.shaderRoundingModeRTZFloat32 = fp32Required && !fp32RteRounding;
3690 floatControls.shaderRoundingModeRTEFloat64 = fp64RteRounding;
3691 floatControls.shaderRoundingModeRTZFloat64 = fp64Required && !fp64RteRounding;
3692 }
3693 else // SM_DENORMS
3694 {
3695 // make sure that only denorm options are used
3696 DE_ASSERT((testCaseInfo.fp16Option != SO_RTE) ||
3697 (testCaseInfo.fp16Option != SO_RTZ) ||
3698 (testCaseInfo.fp32Option != SO_RTE) ||
3699 (testCaseInfo.fp32Option != SO_RTZ) ||
3700 (testCaseInfo.fp64Option != SO_RTE) ||
3701 (testCaseInfo.fp64Option != SO_RTZ));
3702
3703 bool fp16DenormPreserve = testCaseInfo.fp16Option == SO_PRESERVE;
3704 bool fp32DenormPreserve = testCaseInfo.fp32Option == SO_PRESERVE;
3705 bool fp64DenormPreserve = testCaseInfo.fp64Option == SO_PRESERVE;
3706
3707 const string& preserve = m_behaviorToName.at(B_DENORM_PRESERVE);
3708 const string& flush = m_behaviorToName.at(B_DENORM_FLUSH);
3709
3710 fp16behaviorName = fp16DenormPreserve ? preserve : flush;
3711 fp32behaviorName = fp32DenormPreserve ? preserve : flush;
3712 fp64behaviorName = fp64DenormPreserve ? preserve : flush;
3713
3714 addArgs[0] = V_DENORM;
3715 addArgs[1] = V_DENORM;
3716 fp16resultValue = fp16DenormPreserve ? V_DENORM_TIMES_TWO : V_ZERO_OR_DENORM_TIMES_TWO;
3717 fp32resultValue = fp32DenormPreserve ? V_DENORM_TIMES_TWO : V_ZERO;
3718 fp64resultValue = fp64DenormPreserve ? V_DENORM_TIMES_TWO : V_ZERO;
3719
3720 capabilities = "OpCapability " + preserve + "\n"
3721 "OpCapability " + flush + "\n";
3722
3723 floatControls.denormBehaviorIndependence = testCaseInfo.independenceSetting;
3724 floatControls.roundingModeIndependence = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_NONE_KHR;
3725 floatControls.shaderDenormPreserveFloat16 = fp16DenormPreserve;
3726 floatControls.shaderDenormFlushToZeroFloat16 = fp16Required && !fp16DenormPreserve;
3727 floatControls.shaderDenormPreserveFloat32 = fp32DenormPreserve;
3728 floatControls.shaderDenormFlushToZeroFloat32 = fp32Required && !fp32DenormPreserve;
3729 floatControls.shaderDenormPreserveFloat64 = fp64DenormPreserve;
3730 floatControls.shaderDenormFlushToZeroFloat64 = fp64Required && !fp64DenormPreserve;
3731 }
3732
3733 const auto& fp64Data = m_typeData.at(FP64);
3734 const auto& fp32Data = m_typeData.at(FP32);
3735 const auto& fp16Data = m_typeData.at(FP16);
3736
3737 deUint32 attributeIndex = 0;
3738 deUint32 attributeOffset = 0;
3739 string attribute;
3740 string extensions = "";
3741 string executionModes = "";
3742 string ioAnnotations = "";
3743 string types = "";
3744 string inStruct = "";
3745 string outDefinitions = "";
3746 string commands = "";
3747 string saveResult = "";
3748
3749 // construct single input buffer containing arguments for all float widths
3750 // (maxPerStageDescriptorStorageBuffers can be min 4 and we need 3 for outputs)
3751 deUint32 inputOffset = 0;
3752 std::vector<deUint8> inputData ((fp64Required * sizeof(double) + sizeof(float) + fp16Required * sizeof(deFloat16)) * 2);
3753
3754 // to follow storage buffer layout rules we store data in ssbo in order 64 -> 16
3755 if (fp64Required)
3756 {
3757 capabilities += fp64Data.snippets->capabilities;
3758 executionModes += "OpExecutionMode %main " + fp64behaviorName + " 64\n";
3759 attribute = to_string(attributeIndex);
3760 ioAnnotations += "OpMemberDecorate %SSBO_in " + attribute + " Offset " + to_string(attributeOffset) +"\n" +
3761 fp64Data.snippets->multiOutputAnnotationsSnippet +
3762 "OpDecorate %ssbo_f64_out Binding " + to_string(attributeIndex+1) + "\n";
3763 types += fp64Data.snippets->minTypeDefinitionsSnippet;
3764 inStruct += " %type_f64_arr_2";
3765 outDefinitions += fp64Data.snippets->multiOutputDefinitionsSnippet;
3766 commands += replace(fp64Data.snippets->multiArgumentsFromInputSnippet, "${attr}", attribute) +
3767 "%result64 = OpFAdd %type_f64 %arg1_f64 %arg2_f64\n";
3768 saveResult += fp64Data.snippets->multiStoreResultsSnippet;
3769 attributeOffset += 2 * static_cast<deUint32>(sizeof(double));
3770 attributeIndex++;
3771
3772 fp64Data.values->fillInputData(addArgs, inputData, inputOffset);
3773
3774 // construct separate buffers for outputs to make validation easier
3775 BufferSp fp64OutBufferSp = fp64Data.values->constructOutputBuffer(fp64resultValue);
3776 csSpec.outputs.push_back(Resource(fp64OutBufferSp, vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, reinterpret_cast<void*>(BufferDataType::DATA_FP64)));
3777
3778 csSpec.requestedVulkanFeatures.coreFeatures.shaderFloat64 = VK_TRUE;
3779 }
3780 if (fp32Required)
3781 {
3782 executionModes += "OpExecutionMode %main " + fp32behaviorName + " 32\n";
3783 attribute = to_string(attributeIndex);
3784 ioAnnotations += "OpMemberDecorate %SSBO_in " + attribute + " Offset " + to_string(attributeOffset) +"\n" +
3785 fp32Data.snippets->multiOutputAnnotationsSnippet +
3786 "OpDecorate %ssbo_f32_out Binding " + to_string(attributeIndex+1) + "\n";
3787 types += fp32Data.snippets->minTypeDefinitionsSnippet;
3788 inStruct += " %type_f32_arr_2";
3789 outDefinitions += fp32Data.snippets->multiOutputDefinitionsSnippet;
3790 commands += replace(fp32Data.snippets->multiArgumentsFromInputSnippet, "${attr}", attribute) +
3791 "%result32 = OpFAdd %type_f32 %arg1_f32 %arg2_f32\n";
3792 saveResult += fp32Data.snippets->multiStoreResultsSnippet;
3793 attributeOffset += 2 * static_cast<deUint32>(sizeof(float));
3794 attributeIndex++;
3795
3796 fp32Data.values->fillInputData(addArgs, inputData, inputOffset);
3797
3798 BufferSp fp32OutBufferSp = fp32Data.values->constructOutputBuffer(fp32resultValue);
3799 csSpec.outputs.push_back(Resource(fp32OutBufferSp, vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, reinterpret_cast<void*>(BufferDataType::DATA_FP32)));
3800 }
3801 if (fp16Required)
3802 {
3803 if (testCaseInfo.fp16Without16BitStorage)
3804 {
3805 capabilities += fp16Data.snippets->capabilitiesFp16Without16BitStorage;
3806 extensions += fp16Data.snippets->extensionsFp16Without16BitStorage;
3807 executionModes += "OpExecutionMode %main " + fp16behaviorName + " 16\n";
3808 attribute = to_string(attributeIndex);
3809 ioAnnotations += "OpMemberDecorate %SSBO_in " + attribute + " Offset " + to_string(attributeOffset) +"\n" +
3810 fp16Data.snippets->multiOutputAnnotationsFp16Snippet +
3811 "OpDecorate %ssbo_u32_out Binding " + to_string(attributeIndex+1) + "\n";
3812 types += fp16Data.snippets->minTypeDefinitionsSnippet + fp16Data.snippets->typeDefinitionsFp16Snippet + "%type_f16_vec2 = OpTypeVector %type_f16 2\n";
3813 inStruct += " %type_u32_arr_1";
3814 outDefinitions += fp16Data.snippets->multiOutputDefinitionsFp16Snippet;
3815 commands += replace(fp16Data.snippets->multiArgumentsFromInputFp16Snippet, "${attr}", attribute) +
3816 "%result16 = OpFAdd %type_f16 %arg1_f16 %arg2_f16\n";
3817 saveResult += fp16Data.snippets->multiStoreResultsFp16Snippet;
3818
3819 csSpec.extensions.push_back("VK_KHR_shader_float16_int8");
3820 csSpec.requestedVulkanFeatures.extFloat16Int8 = EXTFLOAT16INT8FEATURES_FLOAT16;
3821 }
3822 else
3823 {
3824 capabilities += fp16Data.snippets->capabilities +
3825 "OpCapability Float16\n";
3826 extensions += fp16Data.snippets->extensions;
3827 executionModes += "OpExecutionMode %main " + fp16behaviorName + " 16\n";
3828 attribute = to_string(attributeIndex);
3829 ioAnnotations += "OpMemberDecorate %SSBO_in " + attribute + " Offset " + to_string(attributeOffset) +"\n" +
3830 fp16Data.snippets->multiOutputAnnotationsSnippet +
3831 "OpDecorate %ssbo_f16_out Binding " + to_string(attributeIndex+1) + "\n";
3832 types += fp16Data.snippets->minTypeDefinitionsSnippet;
3833 inStruct += " %type_f16_arr_2";
3834 outDefinitions += fp16Data.snippets->multiOutputDefinitionsSnippet;
3835 commands += replace(fp16Data.snippets->multiArgumentsFromInputSnippet, "${attr}", attribute) +
3836 "%result16 = OpFAdd %type_f16 %arg1_f16 %arg2_f16\n";
3837 saveResult += fp16Data.snippets->multiStoreResultsSnippet;
3838
3839 csSpec.extensions.push_back("VK_KHR_16bit_storage");
3840 csSpec.requestedVulkanFeatures.ext16BitStorage = EXT16BITSTORAGEFEATURES_UNIFORM_BUFFER_BLOCK;
3841 }
3842
3843 fp16Data.values->fillInputData(addArgs, inputData, inputOffset);
3844
3845 BufferSp fp16OutBufferSp = fp16Data.values->constructOutputBuffer(fp16resultValue);
3846 csSpec.outputs.push_back(Resource(fp16OutBufferSp, vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, reinterpret_cast<void*>(BufferDataType::DATA_FP16)));
3847 }
3848
3849 BufferSp inBufferSp(new Buffer<deUint8>(inputData));
3850 csSpec.inputs.push_back(Resource(inBufferSp, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
3851
3852 map<string, string> specializations =
3853 {
3854 { "capabilities", capabilities },
3855 { "extensions", extensions },
3856 { "execution_modes", executionModes },
3857 { "io_annotations", ioAnnotations },
3858 { "types", types },
3859 { "in_struct", inStruct },
3860 { "out_definitions", outDefinitions },
3861 { "commands", commands },
3862 { "save_result", saveResult }
3863 };
3864
3865 // specialize shader
3866 const string shaderCode = m_settingsShaderTemplate.specialize(specializations);
3867
3868 csSpec.assembly = shaderCode;
3869 csSpec.numWorkGroups = IVec3(1, 1, 1);
3870 csSpec.verifyIO = checkMixedFloats;
3871 csSpec.extensions.push_back("VK_KHR_shader_float_controls");
3872 }
3873
getGraphicsShaderCode(vk::SourceCollections & dst,InstanceContext context)3874 void getGraphicsShaderCode (vk::SourceCollections& dst, InstanceContext context)
3875 {
3876 // this function is used only by GraphicsTestGroupBuilder but it couldn't
3877 // be implemented as a method because of how addFunctionCaseWithPrograms
3878 // was implemented
3879
3880 SpirvVersion targetSpirvVersion = context.resources.spirvVersion;
3881 const deUint32 vulkanVersion = dst.usedVulkanVersion;
3882
3883 static const string vertexTemplate =
3884 "OpCapability Shader\n"
3885 "${vert_capabilities}"
3886
3887 "OpExtension \"SPV_KHR_float_controls\"\n"
3888 "${vert_extensions}"
3889
3890 "%std450 = OpExtInstImport \"GLSL.std.450\"\n"
3891 "OpMemoryModel Logical GLSL450\n"
3892 "OpEntryPoint Vertex %main \"main\" %BP_stream %BP_position %BP_color %BP_gl_VertexIndex %BP_gl_InstanceIndex %BP_vertex_color %BP_vertex_result \n"
3893 "${vert_execution_mode}"
3894
3895 "OpMemberDecorate %BP_gl_PerVertex 0 BuiltIn Position\n"
3896 "OpMemberDecorate %BP_gl_PerVertex 1 BuiltIn PointSize\n"
3897 "OpMemberDecorate %BP_gl_PerVertex 2 BuiltIn ClipDistance\n"
3898 "OpMemberDecorate %BP_gl_PerVertex 3 BuiltIn CullDistance\n"
3899 "OpDecorate %BP_gl_PerVertex Block\n"
3900 "OpDecorate %BP_position Location 0\n"
3901 "OpDecorate %BP_color Location 1\n"
3902 "OpDecorate %BP_vertex_color Location 1\n"
3903 "OpDecorate %BP_vertex_result Location 2\n"
3904 "OpDecorate %BP_vertex_result Flat\n"
3905 "OpDecorate %BP_gl_VertexIndex BuiltIn VertexIndex\n"
3906 "OpDecorate %BP_gl_InstanceIndex BuiltIn InstanceIndex\n"
3907
3908 // some tests require additional annotations
3909 "${vert_annotations}"
3910
3911 // types required by most of tests
3912 "%type_void = OpTypeVoid\n"
3913 "%type_voidf = OpTypeFunction %type_void\n"
3914 "%type_bool = OpTypeBool\n"
3915 "%type_i32 = OpTypeInt 32 1\n"
3916 "%type_u32 = OpTypeInt 32 0\n"
3917 "%type_u32_vec2 = OpTypeVector %type_u32 2\n"
3918 "%type_i32_iptr = OpTypePointer Input %type_i32\n"
3919 "%type_i32_optr = OpTypePointer Output %type_i32\n"
3920 "%type_i32_fptr = OpTypePointer Function %type_i32\n"
3921
3922 // constants required by most of tests
3923 "%c_i32_0 = OpConstant %type_i32 0\n"
3924 "%c_i32_1 = OpConstant %type_i32 1\n"
3925 "%c_i32_2 = OpConstant %type_i32 2\n"
3926 "%c_u32_1 = OpConstant %type_u32 1\n"
3927
3928 // if input float type has different width then output then
3929 // both types are defined here along with all types derived from
3930 // them that are commonly used by tests; some tests also define
3931 // their own types (those that are needed just by this single test)
3932 "${vert_types}"
3933
3934 // SSBO is not universally supported for storing
3935 // data in vertex stages - it is onle read here
3936 "${vert_io_definitions}"
3937
3938 "%BP_gl_PerVertex = OpTypeStruct %type_f32_vec4 %type_f32 %type_f32_arr_1 %type_f32_arr_1\n"
3939 "%BP_gl_PerVertex_optr = OpTypePointer Output %BP_gl_PerVertex\n"
3940 "%BP_stream = OpVariable %BP_gl_PerVertex_optr Output\n"
3941 "%BP_position = OpVariable %type_f32_vec4_iptr Input\n"
3942 "%BP_color = OpVariable %type_f32_vec4_iptr Input\n"
3943 "%BP_gl_VertexIndex = OpVariable %type_i32_iptr Input\n"
3944 "%BP_gl_InstanceIndex = OpVariable %type_i32_iptr Input\n"
3945 "%BP_vertex_color = OpVariable %type_f32_vec4_optr Output\n"
3946
3947 // set of default constants per float type is placed here,
3948 // operation tests can also define additional constants.
3949 "${vert_constants}"
3950
3951 // O_RETURN_VAL defines function here and because
3952 // of that this token needs to be directly before main function.
3953 "${vert_functions}"
3954
3955 "%main = OpFunction %type_void None %type_voidf\n"
3956 "%label = OpLabel\n"
3957
3958 "${vert_variables}"
3959
3960 "%position = OpLoad %type_f32_vec4 %BP_position\n"
3961 "%gl_pos = OpAccessChain %type_f32_vec4_optr %BP_stream %c_i32_0\n"
3962 "OpStore %gl_pos %position\n"
3963 "%color = OpLoad %type_f32_vec4 %BP_color\n"
3964 "OpStore %BP_vertex_color %color\n"
3965
3966 // this token is filled only when vertex stage is tested;
3967 // depending on test case arguments are either read from input ssbo
3968 // or generated in spir-v code - in later case ssbo is not used
3969 "${vert_arguments}"
3970
3971 // when vertex shader is tested then test operations are performed
3972 // here and passed to fragment stage; if fragment stage ts tested
3973 // then ${comands} and ${vert_process_result} are rplaced with nop
3974 "${vert_commands}"
3975
3976 "${vert_process_result}"
3977
3978 "OpReturn\n"
3979 "OpFunctionEnd\n";
3980
3981
3982 static const string fragmentTemplate =
3983 "OpCapability Shader\n"
3984 "${frag_capabilities}"
3985
3986 "OpExtension \"SPV_KHR_float_controls\"\n"
3987 "${frag_extensions}"
3988
3989 "%std450 = OpExtInstImport \"GLSL.std.450\"\n"
3990 "OpMemoryModel Logical GLSL450\n"
3991 "OpEntryPoint Fragment %main \"main\" %BP_vertex_color %BP_vertex_result %BP_fragColor %BP_gl_FragCoord \n"
3992 "OpExecutionMode %main OriginUpperLeft\n"
3993 "${frag_execution_mode}"
3994
3995 "OpDecorate %BP_fragColor Location 0\n"
3996 "OpDecorate %BP_vertex_color Location 1\n"
3997 "OpDecorate %BP_vertex_result Location 2\n"
3998 "OpDecorate %BP_vertex_result Flat\n"
3999 "OpDecorate %BP_gl_FragCoord BuiltIn FragCoord\n"
4000
4001 // some tests require additional annotations
4002 "${frag_annotations}"
4003
4004 // types required by most of tests
4005 "%type_void = OpTypeVoid\n"
4006 "%type_voidf = OpTypeFunction %type_void\n"
4007 "%type_bool = OpTypeBool\n"
4008 "%type_i32 = OpTypeInt 32 1\n"
4009 "%type_u32 = OpTypeInt 32 0\n"
4010 "%type_u32_vec2 = OpTypeVector %type_u32 2\n"
4011 "%type_i32_iptr = OpTypePointer Input %type_i32\n"
4012 "%type_i32_optr = OpTypePointer Output %type_i32\n"
4013 "%type_i32_fptr = OpTypePointer Function %type_i32\n"
4014
4015 // constants required by most of tests
4016 "%c_i32_0 = OpConstant %type_i32 0\n"
4017 "%c_i32_1 = OpConstant %type_i32 1\n"
4018 "%c_i32_2 = OpConstant %type_i32 2\n"
4019 "%c_u32_1 = OpConstant %type_u32 1\n"
4020
4021 // if input float type has different width then output then
4022 // both types are defined here along with all types derived from
4023 // them that are commonly used by tests; some tests also define
4024 // their own types (those that are needed just by this single test)
4025 "${frag_types}"
4026
4027 "%BP_gl_FragCoord = OpVariable %type_f32_vec4_iptr Input\n"
4028 "%BP_vertex_color = OpVariable %type_f32_vec4_iptr Input\n"
4029 "%BP_fragColor = OpVariable %type_f32_vec4_optr Output\n"
4030
4031 // SSBO definitions
4032 "${frag_io_definitions}"
4033
4034 // set of default constants per float type is placed here,
4035 // operation tests can also define additional constants.
4036 "${frag_constants}"
4037
4038 // O_RETURN_VAL defines function here and because
4039 // of that this token needs to be directly before main function.
4040 "${frag_functions}"
4041
4042 "%main = OpFunction %type_void None %type_voidf\n"
4043 "%label = OpLabel\n"
4044
4045 "${frag_variables}"
4046
4047 // just pass vertex color - rendered image is not important in our case
4048 "%vertex_color = OpLoad %type_f32_vec4 %BP_vertex_color\n"
4049 "OpStore %BP_fragColor %vertex_color\n"
4050
4051 // this token is filled only when fragment stage is tested;
4052 // depending on test case arguments are either read from input ssbo or
4053 // generated in spir-v code - in later case ssbo is used only for output
4054 "${frag_arguments}"
4055
4056 // when fragment shader is tested then test operations are performed
4057 // here and saved to ssbo; if vertex stage was tested then its
4058 // result is just saved to ssbo here
4059 "${frag_commands}"
4060 "${frag_process_result}"
4061
4062 "OpReturn\n"
4063 "OpFunctionEnd\n";
4064
4065 dst.spirvAsmSources.add("vert", DE_NULL)
4066 << StringTemplate(vertexTemplate).specialize(context.testCodeFragments)
4067 << SpirVAsmBuildOptions(vulkanVersion, targetSpirvVersion);
4068 dst.spirvAsmSources.add("frag", DE_NULL)
4069 << StringTemplate(fragmentTemplate).specialize(context.testCodeFragments)
4070 << SpirVAsmBuildOptions(vulkanVersion, targetSpirvVersion);
4071 }
4072
4073 // GraphicsTestGroupBuilder iterates over all test cases and creates test for both
4074 // vertex and fragment stages. As in most spirv-assembly tests, tests here are also
4075 // executed using functionality defined in vktSpvAsmGraphicsShaderTestUtil.cpp but
4076 // because one of requirements during development was that SSBO wont be used in
4077 // vertex stage we couldn't use createTestForStage functions - we need a custom
4078 // version for both vertex and fragmen shaders at the same time. This was required
4079 // as we needed to pass result from vertex stage to fragment stage where it could
4080 // be saved to ssbo. To achieve that InstanceContext is created manually in
4081 // createInstanceContext method.
4082 class GraphicsTestGroupBuilder: public TestGroupBuilderBase
4083 {
4084 public:
4085
4086 void init();
4087
4088 void createOperationTests(TestCaseGroup* parentGroup, const char* groupName, FloatType floatType, bool argumentsFromInput) override;
4089 void createSettingsTests(TestCaseGroup* parentGroup) override;
4090
4091 protected:
4092
4093 InstanceContext createInstanceContext(const OperationTestCaseInfo& testCaseInfo) const;
4094
4095 private:
4096
4097 TestCasesBuilder m_testCaseBuilder;
4098 };
4099
init()4100 void GraphicsTestGroupBuilder::init()
4101 {
4102 m_testCaseBuilder.init();
4103 }
4104
createOperationTests(TestCaseGroup * parentGroup,const char * groupName,FloatType floatType,bool argumentsFromInput)4105 void GraphicsTestGroupBuilder::createOperationTests(TestCaseGroup* parentGroup, const char* groupName, FloatType floatType, bool argumentsFromInput)
4106 {
4107 TestContext& testCtx = parentGroup->getTestContext();
4108 TestCaseGroup* group = new TestCaseGroup(testCtx, groupName, "");
4109 parentGroup->addChild(group);
4110
4111 // create test cases for vertex stage
4112 TestCaseVect testCases;
4113 m_testCaseBuilder.build(testCases, m_typeData[floatType].testResults, argumentsFromInput);
4114
4115 TestCaseVect::const_iterator currTestCase = testCases.begin();
4116 TestCaseVect::const_iterator lastTestCase = testCases.end();
4117 while(currTestCase != lastTestCase)
4118 {
4119 const OperationTestCase& testCase = *currTestCase;
4120 ++currTestCase;
4121
4122 // skip cases with undefined output
4123 if (testCase.expectedOutput == V_UNUSED)
4124 continue;
4125
4126 // FPRoundingMode decoration can be applied only to conversion instruction that is used as the object
4127 // argument of an OpStore storing through a pointer to a 16-bit floating-point object in Uniform, or
4128 // PushConstant, or Input, or Output Storage Classes. SSBO writes are not commonly supported
4129 // in VS so this test case needs to be skiped for vertex stage.
4130 if ((testCase.operationId == O_ORTZ_ROUND) || (testCase.operationId == O_ORTE_ROUND))
4131 continue;
4132
4133 OperationTestCaseInfo testCaseInfo =
4134 {
4135 floatType,
4136 argumentsFromInput,
4137 VK_SHADER_STAGE_VERTEX_BIT,
4138 m_testCaseBuilder.getOperation(testCase.operationId),
4139 testCase
4140 };
4141
4142 InstanceContext ctxVertex = createInstanceContext(testCaseInfo);
4143 string testName = replace(testCase.baseName, "op", testCaseInfo.operation.name);
4144
4145 addFunctionCaseWithPrograms<InstanceContext>(group, testName + "_vert", "", getGraphicsShaderCode, runAndVerifyDefaultPipeline, ctxVertex);
4146 }
4147
4148 // create test cases for fragment stage
4149 testCases.clear();
4150 m_testCaseBuilder.build(testCases, m_typeData[floatType].testResults, argumentsFromInput);
4151
4152 currTestCase = testCases.begin();
4153 lastTestCase = testCases.end();
4154 while(currTestCase != lastTestCase)
4155 {
4156 const OperationTestCase& testCase = *currTestCase;
4157 ++currTestCase;
4158
4159 // skip cases with undefined output
4160 if (testCase.expectedOutput == V_UNUSED)
4161 continue;
4162
4163 OperationTestCaseInfo testCaseInfo =
4164 {
4165 floatType,
4166 argumentsFromInput,
4167 VK_SHADER_STAGE_FRAGMENT_BIT,
4168 m_testCaseBuilder.getOperation(testCase.operationId),
4169 testCase
4170 };
4171
4172 InstanceContext ctxFragment = createInstanceContext(testCaseInfo);
4173 string testName = replace(testCase.baseName, "op", testCaseInfo.operation.name);
4174
4175 addFunctionCaseWithPrograms<InstanceContext>(group, testName + "_frag", "", getGraphicsShaderCode, runAndVerifyDefaultPipeline, ctxFragment);
4176 }
4177 }
4178
createSettingsTests(TestCaseGroup * parentGroup)4179 void GraphicsTestGroupBuilder::createSettingsTests(TestCaseGroup* parentGroup)
4180 {
4181 DE_UNREF(parentGroup);
4182
4183 // WG decided that testing settings only for compute stage is sufficient
4184 }
4185
createInstanceContext(const OperationTestCaseInfo & testCaseInfo) const4186 InstanceContext GraphicsTestGroupBuilder::createInstanceContext(const OperationTestCaseInfo& testCaseInfo) const
4187 {
4188 // LUT storing functions used to verify test results
4189 const VerifyIOFunc checkFloatsLUT[] =
4190 {
4191 checkFloats<Float16, deFloat16>,
4192 checkFloats<Float32, float>,
4193 checkFloats<Float64, double>
4194 };
4195
4196 // 32-bit float types are always needed for standard operations on color
4197 // if tested operation does not require fp32 for either input or output
4198 // then this minimal type definitions must be appended to types section
4199 const string f32TypeMinimalRequired =
4200 "%type_f32 = OpTypeFloat 32\n"
4201 "%type_f32_arr_1 = OpTypeArray %type_f32 %c_i32_1\n"
4202 "%type_f32_iptr = OpTypePointer Input %type_f32\n"
4203 "%type_f32_optr = OpTypePointer Output %type_f32\n"
4204 "%type_f32_vec4 = OpTypeVector %type_f32 4\n"
4205 "%type_f32_vec4_iptr = OpTypePointer Input %type_f32_vec4\n"
4206 "%type_f32_vec4_optr = OpTypePointer Output %type_f32_vec4\n";
4207
4208 const Operation& testOperation = testCaseInfo.operation;
4209 const OperationTestCase& testCase = testCaseInfo.testCase;
4210 FloatType outFloatType = testCaseInfo.outFloatType;
4211 VkShaderStageFlagBits testedStage = testCaseInfo.testedStage;
4212
4213 DE_ASSERT((testedStage == VK_SHADER_STAGE_VERTEX_BIT) || (testedStage == VK_SHADER_STAGE_FRAGMENT_BIT));
4214
4215 SpecializedOperation specOpData;
4216 specializeOperation(testCaseInfo, specOpData);
4217
4218 TypeSnippetsSP inTypeSnippets = specOpData.inTypeSnippets;
4219 TypeSnippetsSP outTypeSnippets = specOpData.outTypeSnippets;
4220 FloatType inFloatType = specOpData.inFloatType;
4221
4222 deBool outFp16WithoutStorage = (outFloatType == FP16) && testCase.fp16Without16BitStorage;
4223 deBool inFp16WithoutStorage = (inFloatType == FP16) && testCase.fp16Without16BitStorage;
4224
4225 // There may be several reasons why we need the shaderFloat16 Vulkan feature.
4226 bool needsShaderFloat16 = inFp16WithoutStorage || outFp16WithoutStorage;
4227 // There are some weird cases where we need the constants, but would otherwise drop them.
4228 bool needsSpecialConstants = false;
4229
4230 // UnpackHalf2x16 is a corner case - it returns two 32-bit floats but
4231 // internaly operates on fp16 and this type should be used by float controls
4232 FloatType inFloatTypeForCaps = inFloatType;
4233 string inFloatWidthForCaps = inTypeSnippets->bitWidth;
4234 if (testCase.operationId == O_UPH_DENORM)
4235 {
4236 inFloatTypeForCaps = FP16;
4237 inFloatWidthForCaps = "16";
4238 }
4239
4240 string behaviorCapability;
4241 string behaviorExecutionMode;
4242 getBehaviorCapabilityAndExecutionMode(testCase.behaviorFlags,
4243 inFloatWidthForCaps,
4244 outTypeSnippets->bitWidth,
4245 behaviorCapability,
4246 behaviorExecutionMode);
4247
4248 // check which format features are needed
4249 bool float16FeatureRequired = (inFloatType == FP16) || (outFloatType == FP16);
4250 bool float64FeatureRequired = (inFloatType == FP64) || (outFloatType == FP64);
4251
4252 string vertExecutionMode;
4253 string fragExecutionMode;
4254 string vertCapabilities;
4255 string fragCapabilities;
4256 string vertExtensions;
4257 string fragExtensions;
4258 string vertAnnotations;
4259 string fragAnnotations;
4260 string vertTypes;
4261 string fragTypes;
4262 string vertConstants;
4263 string fragConstants;
4264 string vertFunctions;
4265 string fragFunctions;
4266 string vertIODefinitions;
4267 string fragIODefinitions;
4268 string vertArguments;
4269 string fragArguments;
4270 string vertVariables;
4271 string fragVariables;
4272 string vertCommands;
4273 string fragCommands;
4274 string vertProcessResult;
4275 string fragProcessResult;
4276
4277 // check if operation should be executed in vertex stage
4278 if (testedStage == VK_SHADER_STAGE_VERTEX_BIT)
4279 {
4280 vertAnnotations = inTypeSnippets->inputAnnotationsSnippet + inTypeSnippets->typeAnnotationsSnippet;
4281 fragAnnotations = outTypeSnippets->outputAnnotationsSnippet + outTypeSnippets->typeAnnotationsSnippet;
4282 vertFunctions = specOpData.functions;
4283
4284 // check if input type is different from tested type (conversion operations)
4285 if (testOperation.isInputTypeRestricted)
4286 {
4287 vertCapabilities = behaviorCapability + inTypeSnippets->capabilities + outTypeSnippets->capabilities;
4288 fragCapabilities = outTypeSnippets->capabilities;
4289 vertExtensions = inTypeSnippets->extensions + outTypeSnippets->extensions;
4290 fragExtensions = outTypeSnippets->extensions;
4291 vertTypes = inTypeSnippets->typeDefinitionsSnippet + outTypeSnippets->typeDefinitionsSnippet + outTypeSnippets->varyingsTypesSnippet;
4292 if (inFp16WithoutStorage)
4293 vertTypes += inTypeSnippets->typeDefinitionsFp16Snippet;
4294
4295 fragTypes = outTypeSnippets->typeDefinitionsSnippet + outTypeSnippets->varyingsTypesSnippet;
4296 vertConstants = inTypeSnippets->constantsDefinitionsSnippet + outTypeSnippets->constantsDefinitionsSnippet;
4297 fragConstants = outTypeSnippets->constantsDefinitionsSnippet;
4298 }
4299 else
4300 {
4301 // input and output types are the same (majority of operations)
4302
4303 vertCapabilities = behaviorCapability + outTypeSnippets->capabilities;
4304 fragCapabilities = vertCapabilities;
4305 vertExtensions = outTypeSnippets->extensions;
4306 fragExtensions = vertExtensions;
4307 vertTypes = outTypeSnippets->typeDefinitionsSnippet + outTypeSnippets->varyingsTypesSnippet;
4308 fragTypes = vertTypes;
4309 vertConstants = outTypeSnippets->constantsDefinitionsSnippet;
4310 fragConstants = outTypeSnippets->constantsDefinitionsSnippet;
4311 }
4312
4313 if (outFloatType != FP32)
4314 {
4315 fragTypes += f32TypeMinimalRequired;
4316 if (inFloatType != FP32)
4317 vertTypes += f32TypeMinimalRequired;
4318 }
4319
4320 vertAnnotations += specOpData.annotations;
4321 vertTypes += specOpData.types;
4322 vertConstants += specOpData.constants;
4323
4324 vertExecutionMode = behaviorExecutionMode;
4325 fragExecutionMode = "";
4326 vertIODefinitions = inTypeSnippets->inputDefinitionsSnippet + outTypeSnippets->outputVaryingsSnippet;
4327 fragIODefinitions = outTypeSnippets->inputVaryingsSnippet + outTypeSnippets->outputDefinitionsSnippet;
4328 vertArguments = specOpData.arguments;
4329 fragArguments = "";
4330 vertVariables = specOpData.variables;
4331 fragVariables = "";
4332 vertCommands = specOpData.commands;
4333 fragCommands = "";
4334 vertProcessResult = outTypeSnippets->storeVertexResultSnippet;
4335 fragProcessResult = outTypeSnippets->loadVertexResultSnippet + outTypeSnippets->storeResultsSnippet;
4336
4337 if (inFp16WithoutStorage)
4338 {
4339 vertAnnotations += inTypeSnippets->typeAnnotationsFp16Snippet;
4340 vertIODefinitions = inTypeSnippets->inputDefinitionsFp16Snippet + outTypeSnippets->outputVaryingsSnippet;
4341 }
4342
4343 if (outFp16WithoutStorage)
4344 {
4345 vertTypes += outTypeSnippets->typeDefinitionsFp16Snippet;
4346 fragTypes += outTypeSnippets->typeDefinitionsFp16Snippet;
4347 fragAnnotations += outTypeSnippets->typeAnnotationsFp16Snippet;
4348 fragIODefinitions = outTypeSnippets->inputVaryingsSnippet + outTypeSnippets->outputDefinitionsFp16Snippet;
4349 fragProcessResult = outTypeSnippets->loadVertexResultSnippet + outTypeSnippets->storeResultsFp16Snippet;
4350
4351 }
4352
4353 needsShaderFloat16 |= outTypeSnippets->loadStoreRequiresShaderFloat16;
4354 }
4355 else // perform test in fragment stage - vertex stage is empty
4356 {
4357 fragFunctions = specOpData.functions;
4358 // check if input type is different from tested type
4359 if (testOperation.isInputTypeRestricted)
4360 {
4361 fragAnnotations = inTypeSnippets->inputAnnotationsSnippet + inTypeSnippets->typeAnnotationsSnippet +
4362 outTypeSnippets->outputAnnotationsSnippet + outTypeSnippets->typeAnnotationsSnippet;
4363 fragCapabilities = behaviorCapability +
4364 (inFp16WithoutStorage ? inTypeSnippets->capabilitiesFp16Without16BitStorage : inTypeSnippets->capabilities) +
4365 (outFp16WithoutStorage ? outTypeSnippets->capabilitiesFp16Without16BitStorage : outTypeSnippets->capabilities);
4366 fragExtensions =
4367 (inFp16WithoutStorage ? inTypeSnippets->extensionsFp16Without16BitStorage : inTypeSnippets->extensions) +
4368 (outFp16WithoutStorage ? outTypeSnippets->extensionsFp16Without16BitStorage : outTypeSnippets->extensions);
4369 fragTypes = inTypeSnippets->typeDefinitionsSnippet + outTypeSnippets->typeDefinitionsSnippet;
4370 fragConstants = inTypeSnippets->constantsDefinitionsSnippet + outTypeSnippets->constantsDefinitionsSnippet;
4371 }
4372 else
4373 {
4374 // input and output types are the same
4375
4376 fragAnnotations = inTypeSnippets->inputAnnotationsSnippet + inTypeSnippets->typeAnnotationsSnippet +
4377 outTypeSnippets->outputAnnotationsSnippet;
4378 fragCapabilities = behaviorCapability +
4379 (outFp16WithoutStorage ? outTypeSnippets->capabilitiesFp16Without16BitStorage : outTypeSnippets->capabilities);
4380 fragExtensions = (outFp16WithoutStorage ? outTypeSnippets->extensionsFp16Without16BitStorage : outTypeSnippets->extensions);
4381 fragTypes = outTypeSnippets->typeDefinitionsSnippet;
4382 fragConstants = outTypeSnippets->constantsDefinitionsSnippet;
4383 }
4384
4385 // varying is not used but it needs to be specified so lets use type_i32 for it
4386 string dummyVertVarying = "%BP_vertex_result = OpVariable %type_i32_optr Output\n";
4387 string dummyFragVarying = "%BP_vertex_result = OpVariable %type_i32_iptr Input\n";
4388
4389 vertCapabilities = "";
4390 vertExtensions = "";
4391 vertAnnotations = "OpDecorate %type_f32_arr_1 ArrayStride 4\n";
4392 vertTypes = f32TypeMinimalRequired;
4393 vertConstants = "";
4394
4395 if ((outFloatType != FP32) && (inFloatType != FP32))
4396 fragTypes += f32TypeMinimalRequired;
4397
4398 fragAnnotations += specOpData.annotations;
4399 fragTypes += specOpData.types;
4400 fragConstants += specOpData.constants;
4401
4402 vertExecutionMode = "";
4403 fragExecutionMode = behaviorExecutionMode;
4404 vertIODefinitions = dummyVertVarying;
4405 fragIODefinitions = dummyFragVarying;
4406
4407 vertArguments = "";
4408 fragArguments = specOpData.arguments;
4409 vertVariables = "";
4410 fragVariables = specOpData.variables;
4411 vertCommands = "";
4412 fragCommands = specOpData.commands;
4413 vertProcessResult = "";
4414 fragProcessResult = outTypeSnippets->storeResultsSnippet;
4415
4416 if (inFp16WithoutStorage)
4417 {
4418 fragAnnotations += inTypeSnippets->typeAnnotationsFp16Snippet;
4419 if (testOperation.isInputTypeRestricted)
4420 {
4421 fragTypes += inTypeSnippets->typeDefinitionsFp16Snippet;
4422 }
4423 fragIODefinitions += inTypeSnippets->inputDefinitionsFp16Snippet;
4424 }
4425 else
4426 {
4427 fragIODefinitions += inTypeSnippets->inputDefinitionsSnippet;
4428 }
4429
4430 if (outFp16WithoutStorage)
4431 {
4432 if (testOperation.isInputTypeRestricted)
4433 {
4434 fragAnnotations += outTypeSnippets->typeAnnotationsFp16Snippet;
4435 }
4436 fragTypes += outTypeSnippets->typeDefinitionsFp16Snippet;
4437 fragIODefinitions += outTypeSnippets->outputDefinitionsFp16Snippet;
4438 fragProcessResult = outTypeSnippets->storeResultsFp16Snippet;
4439 }
4440 else
4441 {
4442 fragIODefinitions += outTypeSnippets->outputDefinitionsSnippet;
4443 }
4444
4445 if (!testCaseInfo.argumentsFromInput)
4446 {
4447 switch(testCaseInfo.testCase.operationId)
4448 {
4449 case O_CONV_FROM_FP32:
4450 case O_CONV_FROM_FP64:
4451 needsSpecialConstants = true;
4452 break;
4453 default:
4454 break;
4455 }
4456 }
4457 }
4458
4459 // Another reason we need shaderFloat16 is the executable instructions uses fp16
4460 // in a way not supported by the 16bit storage extension.
4461 needsShaderFloat16 |= float16FeatureRequired && testOperation.floatUsage == FLOAT_ARITHMETIC;
4462
4463 // Constants are only needed sometimes. Drop them in the fp16 case if the code doesn't need
4464 // them, and if we don't otherwise need shaderFloat16.
4465 bool needsFP16Constants = needsShaderFloat16 || needsSpecialConstants || outFp16WithoutStorage;
4466
4467 if (!needsFP16Constants && float16FeatureRequired)
4468 {
4469 // Check various code fragments
4470 const FloatStatementUsageFlags commandsFloatConstMask = B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_CONST_FP16;
4471 const bool commandsUsesFloatConstant = (testCaseInfo.operation.statementUsageFlags & commandsFloatConstMask) != 0;;
4472 const FloatStatementUsageFlags argumentsFloatConstMask = B_STATEMENT_USAGE_ARGS_CONST_FLOAT | B_STATEMENT_USAGE_ARGS_CONST_FP16;
4473 const bool argumentsUsesFloatConstant = (specOpData.argumentsUsesFloatConstant & argumentsFloatConstMask) != 0;
4474 bool hasFP16ConstsInCommandsOrArguments = commandsUsesFloatConstant || argumentsUsesFloatConstant;
4475
4476 needsFP16Constants |= hasFP16ConstsInCommandsOrArguments;
4477
4478 if (!needsFP16Constants)
4479 {
4480 vertConstants = "";
4481 fragConstants = "";
4482 }
4483 }
4484 needsShaderFloat16 |= needsFP16Constants;
4485
4486 if (needsShaderFloat16)
4487 {
4488 vertCapabilities += "OpCapability Float16\n";
4489 fragCapabilities += "OpCapability Float16\n";
4490 }
4491
4492 map<string, string> specializations;
4493 specializations["vert_capabilities"] = vertCapabilities;
4494 specializations["vert_extensions"] = vertExtensions;
4495 specializations["vert_execution_mode"] = vertExecutionMode;
4496 specializations["vert_annotations"] = vertAnnotations;
4497 specializations["vert_types"] = vertTypes;
4498 specializations["vert_constants"] = vertConstants;
4499 specializations["vert_io_definitions"] = vertIODefinitions;
4500 specializations["vert_arguments"] = vertArguments;
4501 specializations["vert_variables"] = vertVariables;
4502 specializations["vert_functions"] = vertFunctions;
4503 specializations["vert_commands"] = vertCommands;
4504 specializations["vert_process_result"] = vertProcessResult;
4505 specializations["frag_capabilities"] = fragCapabilities;
4506 specializations["frag_extensions"] = fragExtensions;
4507 specializations["frag_execution_mode"] = fragExecutionMode;
4508 specializations["frag_annotations"] = fragAnnotations;
4509 specializations["frag_types"] = fragTypes;
4510 specializations["frag_constants"] = fragConstants;
4511 specializations["frag_functions"] = fragFunctions;
4512 specializations["frag_io_definitions"] = fragIODefinitions;
4513 specializations["frag_arguments"] = fragArguments;
4514 specializations["frag_variables"] = fragVariables;
4515 specializations["frag_commands"] = fragCommands;
4516 specializations["frag_process_result"] = fragProcessResult;
4517
4518 // colors are not used by the test - input is passed via uniform buffer
4519 RGBA defaultColors[4] = { RGBA::white(), RGBA::red(), RGBA::green(), RGBA::blue() };
4520
4521 // construct input and output buffers of proper types
4522 TypeValuesSP inTypeValues = m_typeData.at(inFloatType).values;
4523 TypeValuesSP outTypeValues = m_typeData.at(outFloatType).values;
4524 BufferSp inBufferSp = inTypeValues->constructInputBuffer(testCase.input);
4525 BufferSp outBufferSp = outTypeValues->constructOutputBuffer(testCase.expectedOutput);
4526
4527 vkt::SpirVAssembly::GraphicsResources resources;
4528 resources.inputs.push_back( Resource(inBufferSp, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
4529 resources.outputs.push_back(Resource(outBufferSp, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
4530 resources.verifyIO = checkFloatsLUT[outFloatType];
4531
4532 StageToSpecConstantMap noSpecConstants;
4533 PushConstants noPushConstants;
4534 GraphicsInterfaces noInterfaces;
4535
4536 VulkanFeatures vulkanFeatures;
4537 setupVulkanFeatures(inFloatTypeForCaps, // usualy same as inFloatType - different only for UnpackHalf2x16
4538 outFloatType,
4539 testCase.behaviorFlags,
4540 float64FeatureRequired,
4541 vulkanFeatures);
4542 vulkanFeatures.coreFeatures.fragmentStoresAndAtomics = true;
4543
4544 vector<string> extensions;
4545 extensions.push_back("VK_KHR_shader_float_controls");
4546 if (needsShaderFloat16)
4547 {
4548 extensions.push_back("VK_KHR_shader_float16_int8");
4549 vulkanFeatures.extFloat16Int8 = EXTFLOAT16INT8FEATURES_FLOAT16;
4550 }
4551 if (float16FeatureRequired && !testCase.fp16Without16BitStorage)
4552 {
4553 extensions.push_back("VK_KHR_16bit_storage");
4554 vulkanFeatures.ext16BitStorage = EXT16BITSTORAGEFEATURES_UNIFORM_BUFFER_BLOCK;
4555 }
4556
4557 InstanceContext ctx(defaultColors,
4558 defaultColors,
4559 specializations,
4560 noSpecConstants,
4561 noPushConstants,
4562 resources,
4563 noInterfaces,
4564 extensions,
4565 vulkanFeatures,
4566 testedStage);
4567
4568 ctx.moduleMap["vert"].push_back(std::make_pair("main", VK_SHADER_STAGE_VERTEX_BIT));
4569 ctx.moduleMap["frag"].push_back(std::make_pair("main", VK_SHADER_STAGE_FRAGMENT_BIT));
4570
4571 ctx.requiredStages = static_cast<VkShaderStageFlagBits>(VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT);
4572 ctx.failResult = QP_TEST_RESULT_FAIL;
4573 ctx.failMessageTemplate = "Output doesn't match with expected";
4574
4575 return ctx;
4576 }
4577
4578 } // anonymous
4579
createFloatControlsTestGroup(TestContext & testCtx,TestGroupBuilderBase * groupBuilder)4580 tcu::TestCaseGroup* createFloatControlsTestGroup (TestContext& testCtx, TestGroupBuilderBase* groupBuilder)
4581 {
4582 de::MovePtr<TestCaseGroup> group(new TestCaseGroup(testCtx, "float_controls", "Tests for VK_KHR_shader_float_controls extension"));
4583
4584 struct TestGroup
4585 {
4586 FloatType floatType;
4587 const char* groupName;
4588 };
4589 TestGroup testGroups[] =
4590 {
4591 { FP16, "fp16" },
4592 { FP32, "fp32" },
4593 { FP64, "fp64" },
4594 };
4595
4596 for (int i = 0 ; i < DE_LENGTH_OF_ARRAY(testGroups) ; ++i)
4597 {
4598 const TestGroup& testGroup = testGroups[i];
4599 TestCaseGroup* typeGroup = new TestCaseGroup(testCtx, testGroup.groupName, "");
4600 group->addChild(typeGroup);
4601
4602 groupBuilder->createOperationTests(typeGroup, "input_args", testGroup.floatType, true);
4603 groupBuilder->createOperationTests(typeGroup, "generated_args", testGroup.floatType, false);
4604 }
4605
4606 groupBuilder->createSettingsTests(group.get());
4607
4608 return group.release();
4609 }
4610
createFloatControlsComputeGroup(TestContext & testCtx)4611 tcu::TestCaseGroup* createFloatControlsComputeGroup (TestContext& testCtx)
4612 {
4613 ComputeTestGroupBuilder computeTestGroupBuilder;
4614 computeTestGroupBuilder.init();
4615
4616 return createFloatControlsTestGroup(testCtx, &computeTestGroupBuilder);
4617 }
4618
createFloatControlsGraphicsGroup(TestContext & testCtx)4619 tcu::TestCaseGroup* createFloatControlsGraphicsGroup (TestContext& testCtx)
4620 {
4621 GraphicsTestGroupBuilder graphicsTestGroupBuilder;
4622 graphicsTestGroupBuilder.init();
4623
4624 return createFloatControlsTestGroup(testCtx, &graphicsTestGroupBuilder);
4625 }
4626
4627 } // SpirVAssembly
4628 } // vkt
4629