1 /*-------------------------------------------------------------------------
2 * Vulkan Conformance Tests
3 * ------------------------
4 *
5 * Copyright (c) 2018 The Khronos Group Inc.
6 *
7 * Licensed under the Apache License, Version 2.0 (the "License");
8 * you may not use this file except in compliance with the License.
9 * You may obtain a copy of the License at
10 *
11 * http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
18 *
19 *//*!
20 * \file
21 * \brief VK_KHR_shader_float_controls tests.
22 *//*--------------------------------------------------------------------*/
23
24
25 #include "vktSpvAsmFloatControlsTests.hpp"
26 #include "vktSpvAsmComputeShaderCase.hpp"
27 #include "vktSpvAsmGraphicsShaderTestUtil.hpp"
28 #include "vktTestGroupUtil.hpp"
29 #include "tcuFloat.hpp"
30 #include "tcuFloatFormat.hpp"
31 #include "tcuStringTemplate.hpp"
32 #include "deUniquePtr.hpp"
33 #include "deFloat16.h"
34 #include "vkQueryUtil.hpp"
35 #include "vkRefUtil.hpp"
36 #include <cstring>
37 #include <vector>
38 #include <limits>
39 #include <cstdint>
40 #include <fenv.h>
41 #include <cstdint>
42
43 namespace vkt
44 {
45 namespace SpirVAssembly
46 {
47
48 namespace
49 {
50
51 using namespace std;
52 using namespace tcu;
53
54 enum FloatType
55 {
56 FP16 = 0,
57 FP32,
58 FP64
59 };
60
61 enum class BufferDataType
62 {
63 DATA_UNKNOWN = 0,
64 DATA_FP16 = 1,
65 DATA_FP32 = 2,
66 DATA_FP64 = 3,
67 };
68
69 enum FloatUsage
70 {
71 // If the float type is 16bit, then the use of the type is supported by
72 // VK_KHR_16bit_storage.
73 FLOAT_STORAGE_ONLY = 0,
74 // Use of the float type goes beyond VK_KHR_16bit_storage.
75 FLOAT_ARITHMETIC
76 };
77
78 enum FloatStatementUsageBits
79 {
80 B_STATEMENT_USAGE_ARGS_CONST_FLOAT = (1<<0 ),
81 B_STATEMENT_USAGE_ARGS_CONST_FP16 = (1<<1 ),
82 B_STATEMENT_USAGE_ARGS_CONST_FP32 = (1<<2 ),
83 B_STATEMENT_USAGE_ARGS_CONST_FP64 = (1<<3 ),
84 B_STATEMENT_USAGE_TYPES_TYPE_FLOAT = (1<<4 ),
85 B_STATEMENT_USAGE_TYPES_TYPE_FP16 = (1<<5 ),
86 B_STATEMENT_USAGE_TYPES_TYPE_FP32 = (1<<6 ),
87 B_STATEMENT_USAGE_TYPES_TYPE_FP64 = (1<<7 ),
88 B_STATEMENT_USAGE_CONSTS_TYPE_FLOAT = (1<<8 ),
89 B_STATEMENT_USAGE_CONSTS_TYPE_FP16 = (1<<9 ),
90 B_STATEMENT_USAGE_CONSTS_TYPE_FP32 = (1<<10),
91 B_STATEMENT_USAGE_CONSTS_TYPE_FP64 = (1<<11),
92 B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT = (1<<12),
93 B_STATEMENT_USAGE_COMMANDS_CONST_FP16 = (1<<13),
94 B_STATEMENT_USAGE_COMMANDS_CONST_FP32 = (1<<14),
95 B_STATEMENT_USAGE_COMMANDS_CONST_FP64 = (1<<15),
96 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT = (1<<16),
97 B_STATEMENT_USAGE_COMMANDS_TYPE_FP16 = (1<<17),
98 B_STATEMENT_USAGE_COMMANDS_TYPE_FP32 = (1<<18),
99 B_STATEMENT_USAGE_COMMANDS_TYPE_FP64 = (1<<19),
100 };
101
102 typedef deUint32 FloatStatementUsageFlags;
103
104 // Enum containing float behaviors that its possible to test.
105 enum BehaviorFlagBits
106 {
107 B_DENORM_PRESERVE = 0x00000001, // DenormPreserve
108 B_DENORM_FLUSH = 0x00000002, // DenormFlushToZero
109 B_ZIN_PRESERVE = 0x00000004, // SignedZeroInfNanPreserve
110 B_RTE_ROUNDING = 0x00000008, // RoundingModeRTE
111 B_RTZ_ROUNDING = 0x00000010 // RoundingModeRTZ
112 };
113
114 typedef deUint32 BehaviorFlags;
115
116 // Codes for all float values used in tests as arguments and operation results
117 // This approach allows to replace values with different types reducing complexity of the tests implementation
118 enum ValueId
119 {
120 // common values used as both arguments and results
121 V_UNUSED = 0, // used to mark arguments that are not used in operation
122 V_MINUS_INF, // or results of tests cases that should be skipped
123 V_MINUS_ONE, // -1.0
124 V_MINUS_ZERO, // -0.0
125 V_ZERO, // 0.0
126 V_HALF, // 0.5
127 V_ONE, // 1.0
128 V_INF,
129 V_DENORM,
130 V_NAN,
131
132 // arguments for rounding mode tests - used only when arguments are passed from input
133 V_ADD_ARG_A,
134 V_ADD_ARG_B,
135 V_SUB_ARG_A,
136 V_SUB_ARG_B,
137 V_MUL_ARG_A,
138 V_MUL_ARG_B,
139 V_DOT_ARG_A,
140 V_DOT_ARG_B,
141
142 // arguments of conversion operations - used only when arguments are passed from input
143 V_CONV_FROM_FP32_ARG,
144 V_CONV_FROM_FP64_ARG,
145
146 // arguments of rounding operations
147 V_ADD_RTZ_RESULT,
148 V_ADD_RTE_RESULT,
149 V_SUB_RTZ_RESULT,
150 V_SUB_RTE_RESULT,
151 V_MUL_RTZ_RESULT,
152 V_MUL_RTE_RESULT,
153 V_DOT_RTZ_RESULT,
154 V_DOT_RTE_RESULT,
155
156 // non comon results of some operation - corner cases
157 V_ZERO_OR_DENORM_TIMES_TWO, // fp16 addition of non-flushed denorm with itself (or equivalent dot-product or vector-matrix multiply)
158 V_MINUS_ONE_OR_CLOSE, // value used only for fp16 subtraction result of preserved denorm and one
159 V_PI_DIV_2,
160 V_ZERO_OR_MINUS_ZERO, // both +0 and -0 are accepted
161 V_ZERO_OR_ONE, // both +0 and 1 are accepted
162 V_ZERO_OR_FP16_DENORM_TO_FP32, // both 0 and fp32 representation of fp16 denorm are accepted
163 V_ZERO_OR_FP16_DENORM_TO_FP64,
164 V_ZERO_OR_FP32_DENORM_TO_FP64,
165 V_DENORM_TIMES_TWO,
166 V_DEGREES_DENORM,
167 V_TRIG_ONE, // 1.0 trigonometric operations, including precision margin
168 V_MINUS_INF_OR_LOG_DENORM,
169 V_MINUS_INF_OR_LOG2_DENORM,
170 V_ZERO_OR_SQRT_DENORM,
171 V_INF_OR_INV_SQRT_DENORM,
172
173 //results of conversion operations
174 V_CONV_TO_FP16_RTZ_RESULT,
175 V_CONV_TO_FP16_RTE_RESULT,
176 V_CONV_TO_FP32_RTZ_RESULT,
177 V_CONV_TO_FP32_RTE_RESULT,
178 V_CONV_DENORM_SMALLER, // used e.g. when converting fp16 denorm to fp32
179 V_CONV_DENORM_BIGGER,
180 };
181
182 // Enum containing all tested operatios. Operations are defined in generic way so that
183 // they can be used to generate tests operating on arguments with different values of
184 // specified float type.
185 enum OperationId
186 {
187 // spir-v unary operations
188 OID_NEGATE = 0,
189 OID_COMPOSITE,
190 OID_COMPOSITE_INS,
191 OID_COPY,
192 OID_D_EXTRACT,
193 OID_D_INSERT,
194 OID_SHUFFLE,
195 OID_TRANSPOSE,
196 OID_CONV_FROM_FP16,
197 OID_CONV_FROM_FP32,
198 OID_CONV_FROM_FP64,
199 OID_SCONST_CONV_FROM_FP32_TO_FP16,
200 OID_SCONST_CONV_FROM_FP64_TO_FP32,
201 OID_SCONST_CONV_FROM_FP64_TO_FP16,
202 OID_RETURN_VAL,
203
204 // spir-v binary operations
205 OID_ADD,
206 OID_SUB,
207 OID_MUL,
208 OID_DIV,
209 OID_REM,
210 OID_MOD,
211 OID_PHI,
212 OID_SELECT,
213 OID_DOT,
214 OID_VEC_MUL_S,
215 OID_VEC_MUL_M,
216 OID_MAT_MUL_S,
217 OID_MAT_MUL_V,
218 OID_MAT_MUL_M,
219 OID_OUT_PROD,
220 OID_ORD_EQ,
221 OID_UORD_EQ,
222 OID_ORD_NEQ,
223 OID_UORD_NEQ,
224 OID_ORD_LS,
225 OID_UORD_LS,
226 OID_ORD_GT,
227 OID_UORD_GT,
228 OID_ORD_LE,
229 OID_UORD_LE,
230 OID_ORD_GE,
231 OID_UORD_GE,
232
233 // glsl unary operations
234 OID_ROUND,
235 OID_ROUND_EV,
236 OID_TRUNC,
237 OID_ABS,
238 OID_SIGN,
239 OID_FLOOR,
240 OID_CEIL,
241 OID_FRACT,
242 OID_RADIANS,
243 OID_DEGREES,
244 OID_SIN,
245 OID_COS,
246 OID_TAN,
247 OID_ASIN,
248 OID_ACOS,
249 OID_ATAN,
250 OID_SINH,
251 OID_COSH,
252 OID_TANH,
253 OID_ASINH,
254 OID_ACOSH,
255 OID_ATANH,
256 OID_EXP,
257 OID_LOG,
258 OID_EXP2,
259 OID_LOG2,
260 OID_SQRT,
261 OID_INV_SQRT,
262 OID_MODF,
263 OID_MODF_ST,
264 OID_FREXP,
265 OID_FREXP_ST,
266 OID_LENGTH,
267 OID_NORMALIZE,
268 OID_REFLECT,
269 OID_REFRACT,
270 OID_MAT_DET,
271 OID_MAT_INV,
272 OID_PH_DENORM, // PackHalf2x16
273 OID_UPH_DENORM,
274 OID_PD_DENORM, // PackDouble2x32
275 OID_UPD_DENORM_FLUSH,
276 OID_UPD_DENORM_PRESERVE,
277
278 // glsl binary operations
279 OID_ATAN2,
280 OID_POW,
281 OID_MIX,
282 OID_FMA,
283 OID_MIN,
284 OID_MAX,
285 OID_CLAMP,
286 OID_STEP,
287 OID_SSTEP,
288 OID_DIST,
289 OID_CROSS,
290 OID_FACE_FWD,
291 OID_NMIN,
292 OID_NMAX,
293 OID_NCLAMP,
294
295 OID_ORTE_ROUND,
296 OID_ORTZ_ROUND
297 };
298
299 // Structures storing data required to test DenormPreserve and DenormFlushToZero modes.
300 // Operations are separated into binary and unary lists because binary operations can be tested with
301 // two attributes and thus denorms can be tested in combination with value, denorm, inf and nan.
302 // Unary operations are only tested with denorms.
303 struct BinaryCase
304 {
305 OperationId operationId;
306 ValueId opVarResult;
307 ValueId opDenormResult;
308 ValueId opInfResult;
309 ValueId opNanResult;
310 };
311 struct UnaryCase
312 {
313 OperationId operationId;
314 ValueId result;
315 };
316
317 // Function replacing all occurrences of substring with string passed in last parameter.
replace(string str,const string & from,const string & to)318 string replace(string str, const string& from, const string& to)
319 {
320 // to keep spir-v code clean and easier to read parts of it are processed
321 // with this method instead of StringTemplate; main usage of this method is the
322 // replacement of "float_" with "f16_", "f32_" or "f64_" depending on test case
323
324 size_t start_pos = 0;
325 while((start_pos = str.find(from, start_pos)) != std::string::npos)
326 {
327 str.replace(start_pos, from.length(), to);
328 start_pos += to.length();
329 }
330 return str;
331 }
332
333 // Structure used to perform bits conversion int type <-> float type.
334 template<typename FLOAT_TYPE, typename UINT_TYPE>
335 struct RawConvert
336 {
337 union Value
338 {
339 FLOAT_TYPE fp;
340 UINT_TYPE ui;
341 };
342 };
343
344 // Traits used to get int type that can store equivalent float type.
345 template<typename FLOAT_TYPE>
346 struct GetCoresponding
347 {
348 typedef deUint16 uint_type;
349 };
350 template<>
351 struct GetCoresponding<float>
352 {
353 typedef deUint32 uint_type;
354 };
355 template<>
356 struct GetCoresponding<double>
357 {
358 typedef deUint64 uint_type;
359 };
360
361 // All values used for arguments and operation results are stored in single map.
362 // Each float type (fp16, fp32, fp64) has its own map that is used during
363 // test setup and during verification. TypeValuesBase is interface to that map.
364 class TypeValuesBase
365 {
366 public:
367 TypeValuesBase();
368 virtual ~TypeValuesBase() = default;
369
370 virtual BufferSp constructInputBuffer (const ValueId* twoArguments) const = 0;
371 virtual BufferSp constructOutputBuffer (ValueId result) const = 0;
372 virtual void fillInputData (const ValueId* twoArguments, vector<deUint8>& bufferData, deUint32& offset) const = 0;
373
374 protected:
375 const double pi;
376 };
377
TypeValuesBase()378 TypeValuesBase::TypeValuesBase()
379 : pi(3.14159265358979323846)
380 {
381 }
382
383 typedef de::SharedPtr<TypeValuesBase> TypeValuesSP;
384
385 template <typename FLOAT_TYPE>
386 class TypeValues: public TypeValuesBase
387 {
388 public:
389 TypeValues();
390
391 BufferSp constructInputBuffer (const ValueId* twoArguments) const override;
392 BufferSp constructOutputBuffer (ValueId result) const override;
393 void fillInputData (const ValueId* twoArguments, vector<deUint8>& bufferData, deUint32& offset) const override;
394
395 FLOAT_TYPE getValue(ValueId id) const;
396
397 template <typename UINT_TYPE>
398 FLOAT_TYPE exactByteEquivalent(UINT_TYPE byteValue) const;
399
400 private:
401 typedef map<ValueId, FLOAT_TYPE> ValueMap;
402 ValueMap m_valueIdToFloatType;
403 };
404
405 template <typename FLOAT_TYPE>
constructInputBuffer(const ValueId * twoArguments) const406 BufferSp TypeValues<FLOAT_TYPE>::constructInputBuffer(const ValueId* twoArguments) const
407 {
408 std::vector<FLOAT_TYPE> inputData(2);
409 inputData[0] = m_valueIdToFloatType.at(twoArguments[0]);
410 inputData[1] = m_valueIdToFloatType.at(twoArguments[1]);
411 return BufferSp(new Buffer<FLOAT_TYPE>(inputData));
412 }
413
414 template <typename FLOAT_TYPE>
constructOutputBuffer(ValueId result) const415 BufferSp TypeValues<FLOAT_TYPE>::constructOutputBuffer(ValueId result) const
416 {
417 // note: we are not doing maping here, ValueId is directly saved in
418 // float type in order to be able to retireve it during verification
419
420 typedef typename GetCoresponding<FLOAT_TYPE>::uint_type uint_t;
421 uint_t value = static_cast<uint_t>(result);
422
423 // For FP16 we increase the buffer size to hold an unsigned integer, as
424 // we can be in the no 16bit_storage case.
425 const uint_t outputSize = sizeof(FLOAT_TYPE) == 2u ? 2u : 1u;
426 std::vector<FLOAT_TYPE> outputData(outputSize, exactByteEquivalent<uint_t>(value));
427 return BufferSp(new Buffer<FLOAT_TYPE>(outputData));
428 }
429
430 template <typename FLOAT_TYPE>
fillInputData(const ValueId * twoArguments,vector<deUint8> & bufferData,deUint32 & offset) const431 void TypeValues<FLOAT_TYPE>::fillInputData(const ValueId* twoArguments, vector<deUint8>& bufferData, deUint32& offset) const
432 {
433 deUint32 typeSize = sizeof(FLOAT_TYPE);
434
435 FLOAT_TYPE argA = getValue(twoArguments[0]);
436 deMemcpy(&bufferData[offset], &argA, typeSize);
437 offset += typeSize;
438
439 FLOAT_TYPE argB = getValue(twoArguments[1]);
440 deMemcpy(&bufferData[offset], &argB, typeSize);
441 offset += typeSize;
442 }
443
444 template <typename FLOAT_TYPE>
getValue(ValueId id) const445 FLOAT_TYPE TypeValues<FLOAT_TYPE>::getValue(ValueId id) const
446 {
447 return m_valueIdToFloatType.at(id);
448 }
449
450 template <typename FLOAT_TYPE>
451 template <typename UINT_TYPE>
exactByteEquivalent(UINT_TYPE byteValue) const452 FLOAT_TYPE TypeValues<FLOAT_TYPE>::exactByteEquivalent(UINT_TYPE byteValue) const
453 {
454 typename RawConvert<FLOAT_TYPE, UINT_TYPE>::Value value;
455 value.ui = byteValue;
456 return value.fp;
457 }
458
459 template <>
TypeValues()460 TypeValues<deFloat16>::TypeValues()
461 : TypeValuesBase()
462 {
463 // NOTE: when updating entries in m_valueIdToFloatType make sure to
464 // update also valueIdToSnippetArgMap defined in updateSpirvSnippets()
465 ValueMap& vm = m_valueIdToFloatType;
466 vm[V_UNUSED] = deFloat32To16(0.0f);
467 vm[V_MINUS_INF] = 0xfc00;
468 vm[V_MINUS_ONE] = deFloat32To16(-1.0f);
469 vm[V_MINUS_ZERO] = 0x8000;
470 vm[V_ZERO] = 0x0000;
471 vm[V_HALF] = deFloat32To16(0.5f);
472 vm[V_ONE] = deFloat32To16(1.0f);
473 vm[V_INF] = 0x7c00;
474 vm[V_DENORM] = 0x03f0; // this value should be the same as the result of denormBase - epsilon
475 vm[V_NAN] = 0x7cf0;
476
477 vm[V_PI_DIV_2] = 0x3e48;
478 vm[V_DENORM_TIMES_TWO] = 0x07e0;
479 vm[V_DEGREES_DENORM] = 0x1b0c;
480
481 vm[V_ADD_ARG_A] = 0x3c03;
482 vm[V_ADD_ARG_B] = vm[V_ONE];
483 vm[V_SUB_ARG_A] = vm[V_ADD_ARG_A];
484 vm[V_SUB_ARG_B] = 0x4203;
485 vm[V_MUL_ARG_A] = vm[V_ADD_ARG_A];
486 vm[V_MUL_ARG_B] = 0x1900;
487 vm[V_DOT_ARG_A] = vm[V_ADD_ARG_A];
488 vm[V_DOT_ARG_B] = vm[V_MUL_ARG_B];
489 vm[V_CONV_FROM_FP32_ARG] = vm[V_UNUSED];
490 vm[V_CONV_FROM_FP64_ARG] = vm[V_UNUSED];
491
492 vm[V_ADD_RTZ_RESULT] = 0x4001; // deFloat16Add(vm[V_ADD_ARG_A], vm[V_ADD_ARG_B], rtz)
493 vm[V_SUB_RTZ_RESULT] = 0xc001; // deFloat16Sub(vm[V_SUB_ARG_A], vm[V_SUB_ARG_B], rtz)
494 vm[V_MUL_RTZ_RESULT] = 0x1903; // deFloat16Mul(vm[V_MUL_ARG_A], vm[V_MUL_ARG_B], rtz)
495 vm[V_DOT_RTZ_RESULT] = 0x1d03;
496 vm[V_CONV_TO_FP16_RTZ_RESULT] = deFloat32To16Round(1.22334445f, DE_ROUNDINGMODE_TO_ZERO);
497 vm[V_CONV_TO_FP32_RTZ_RESULT] = vm[V_UNUSED];
498
499 vm[V_ADD_RTE_RESULT] = 0x4002; // deFloat16Add(vm[V_ADD_ARG_A], vm[V_ADD_ARG_B], rte)
500 vm[V_SUB_RTE_RESULT] = 0xc002; // deFloat16Sub(vm[V_SUB_ARG_A], vm[V_SUB_ARG_B], rte)
501 vm[V_MUL_RTE_RESULT] = 0x1904; // deFloat16Mul(vm[V_MUL_ARG_A], vm[V_MUL_ARG_B], rte)
502 vm[V_DOT_RTE_RESULT] = 0x1d04;
503 vm[V_CONV_TO_FP16_RTE_RESULT] = deFloat32To16Round(1.22334445f, DE_ROUNDINGMODE_TO_NEAREST_EVEN);
504 vm[V_CONV_TO_FP32_RTE_RESULT] = vm[V_UNUSED];
505
506 // there is no precision to store fp32 denorm nor fp64 denorm
507 vm[V_CONV_DENORM_SMALLER] = vm[V_ZERO];
508 vm[V_CONV_DENORM_BIGGER] = vm[V_ZERO];
509 }
510
511 template <>
TypeValues()512 TypeValues<float>::TypeValues()
513 : TypeValuesBase()
514 {
515 // NOTE: when updating entries in m_valueIdToFloatType make sure to
516 // update also valueIdToSnippetArgMap defined in updateSpirvSnippets()
517 ValueMap& vm = m_valueIdToFloatType;
518 vm[V_UNUSED] = 0.0f;
519 vm[V_MINUS_INF] = -std::numeric_limits<float>::infinity();
520 vm[V_MINUS_ONE] = -1.0f;
521 vm[V_MINUS_ZERO] = -0.0f;
522 vm[V_ZERO] = 0.0f;
523 vm[V_HALF] = 0.5f;
524 vm[V_ONE] = 1.0f;
525 vm[V_INF] = std::numeric_limits<float>::infinity();
526 vm[V_DENORM] = static_cast<float>(1.413e-42); // 0x000003f0
527 vm[V_NAN] = std::numeric_limits<float>::quiet_NaN();
528
529 vm[V_PI_DIV_2] = static_cast<float>(pi / 2);
530 vm[V_DENORM_TIMES_TWO] = vm[V_DENORM] + vm[V_DENORM];
531 vm[V_DEGREES_DENORM] = deFloatDegrees(vm[V_DENORM]);
532
533 float e = std::numeric_limits<float>::epsilon();
534 vm[V_ADD_ARG_A] = 1.0f + 3 * e;
535 vm[V_ADD_ARG_B] = 1.0f;
536 vm[V_SUB_ARG_A] = vm[V_ADD_ARG_A];
537 vm[V_SUB_ARG_B] = 3.0f + 6 * e;
538 vm[V_MUL_ARG_A] = vm[V_ADD_ARG_A];
539 vm[V_MUL_ARG_B] = 5 * e;
540 vm[V_DOT_ARG_A] = vm[V_ADD_ARG_A];
541 vm[V_DOT_ARG_B] = 5 * e;
542 vm[V_CONV_FROM_FP32_ARG] = 1.22334445f;
543 vm[V_CONV_FROM_FP64_ARG] = vm[V_UNUSED];
544
545 int prevRound = fegetround();
546 fesetround(FE_TOWARDZERO);
547 vm[V_ADD_RTZ_RESULT] = vm[V_ADD_ARG_A] + vm[V_ADD_ARG_B];
548 vm[V_SUB_RTZ_RESULT] = vm[V_SUB_ARG_A] - vm[V_SUB_ARG_B];
549 vm[V_MUL_RTZ_RESULT] = vm[V_MUL_ARG_A] * vm[V_MUL_ARG_B];
550 vm[V_DOT_RTZ_RESULT] = vm[V_MUL_RTZ_RESULT] + vm[V_MUL_RTZ_RESULT];
551 vm[V_CONV_TO_FP16_RTZ_RESULT] = vm[V_UNUSED];
552 vm[V_CONV_TO_FP32_RTZ_RESULT] = exactByteEquivalent<deUint32>(0x3f9c968d); // result of conversion from double(1.22334455)
553
554 fesetround(FE_TONEAREST);
555 vm[V_ADD_RTE_RESULT] = vm[V_ADD_ARG_A] + vm[V_ADD_ARG_B];
556 vm[V_SUB_RTE_RESULT] = vm[V_SUB_ARG_A] - vm[V_SUB_ARG_B];
557 vm[V_MUL_RTE_RESULT] = vm[V_MUL_ARG_A] * vm[V_MUL_ARG_B];
558 vm[V_DOT_RTE_RESULT] = vm[V_MUL_RTE_RESULT] + vm[V_MUL_RTE_RESULT];
559 vm[V_CONV_TO_FP16_RTE_RESULT] = vm[V_UNUSED];
560 vm[V_CONV_TO_FP32_RTE_RESULT] = exactByteEquivalent<deUint32>(0x3f9c968e); // result of conversion from double(1.22334455)
561 fesetround(prevRound);
562
563 // there is no precision to store fp64 denorm
564 vm[V_CONV_DENORM_SMALLER] = exactByteEquivalent<deUint32>(0x387c0000); // fp16 denorm
565 vm[V_CONV_DENORM_BIGGER] = vm[V_ZERO];
566 }
567
568 template <>
TypeValues()569 TypeValues<double>::TypeValues()
570 : TypeValuesBase()
571 {
572 // NOTE: when updating entries in m_valueIdToFloatType make sure to
573 // update also valueIdToSnippetArgMap defined in updateSpirvSnippets()
574 ValueMap& vm = m_valueIdToFloatType;
575 vm[V_UNUSED] = 0.0;
576 vm[V_MINUS_INF] = -std::numeric_limits<double>::infinity();
577 vm[V_MINUS_ONE] = -1.0;
578 vm[V_MINUS_ZERO] = -0.0;
579 vm[V_ZERO] = 0.0;
580 vm[V_HALF] = 0.5;
581 vm[V_ONE] = 1.0;
582 vm[V_INF] = std::numeric_limits<double>::infinity();
583 vm[V_DENORM] = 4.98e-321; // 0x00000000000003F0
584 vm[V_NAN] = std::numeric_limits<double>::quiet_NaN();
585
586 vm[V_PI_DIV_2] = pi / 2;
587 vm[V_DENORM_TIMES_TWO] = vm[V_DENORM] + vm[V_DENORM];
588 vm[V_DEGREES_DENORM] = vm[V_UNUSED];
589
590 double e = std::numeric_limits<double>::epsilon();
591 vm[V_ADD_ARG_A] = 1.0 + 3 * e;
592 vm[V_ADD_ARG_B] = 1.0;
593 vm[V_SUB_ARG_A] = vm[V_ADD_ARG_A];
594 vm[V_SUB_ARG_B] = 3.0 + 6 * e;
595 vm[V_MUL_ARG_A] = vm[V_ADD_ARG_A];
596 vm[V_MUL_ARG_B] = 5 * e;
597 vm[V_DOT_ARG_A] = vm[V_ADD_ARG_A];
598 vm[V_DOT_ARG_B] = 5 * e;
599 vm[V_CONV_FROM_FP32_ARG] = vm[V_UNUSED];
600 vm[V_CONV_FROM_FP64_ARG] = 1.22334455;
601
602 int prevRound = fegetround();
603 fesetround(FE_TOWARDZERO);
604 vm[V_ADD_RTZ_RESULT] = vm[V_ADD_ARG_A] + vm[V_ADD_ARG_B];
605 vm[V_SUB_RTZ_RESULT] = vm[V_SUB_ARG_A] - vm[V_SUB_ARG_B];
606 vm[V_MUL_RTZ_RESULT] = vm[V_MUL_ARG_A] * vm[V_MUL_ARG_B];
607 vm[V_DOT_RTZ_RESULT] = vm[V_MUL_RTZ_RESULT] + vm[V_MUL_RTZ_RESULT];
608 vm[V_CONV_TO_FP16_RTZ_RESULT] = vm[V_UNUSED];
609 vm[V_CONV_TO_FP32_RTZ_RESULT] = vm[V_UNUSED];
610
611 fesetround(FE_TONEAREST);
612 vm[V_ADD_RTE_RESULT] = vm[V_ADD_ARG_A] + vm[V_ADD_ARG_B];
613 vm[V_SUB_RTE_RESULT] = vm[V_SUB_ARG_A] - vm[V_SUB_ARG_B];
614 vm[V_MUL_RTE_RESULT] = vm[V_MUL_ARG_A] * vm[V_MUL_ARG_B];
615 vm[V_DOT_RTE_RESULT] = vm[V_MUL_RTE_RESULT] + vm[V_MUL_RTE_RESULT];
616 vm[V_CONV_TO_FP16_RTE_RESULT] = vm[V_UNUSED];
617 vm[V_CONV_TO_FP32_RTE_RESULT] = vm[V_UNUSED];
618 fesetround(prevRound);
619
620 vm[V_CONV_DENORM_SMALLER] = exactByteEquivalent<deUint64>(0x3f0f800000000000); // 0x03f0 is fp16 denorm
621 vm[V_CONV_DENORM_BIGGER] = exactByteEquivalent<deUint64>(0x373f800000000000); // 0x000003f0 is fp32 denorm
622 }
623
624 // Each float type (fp16, fp32, fp64) has specific set of SPIR-V snippets
625 // that was extracted to separate template specialization. Those snippets
626 // are used to compose final test shaders. With this approach
627 // parameterization can be done just once per type and reused for many tests.
628 class TypeSnippetsBase
629 {
630 public:
631 virtual ~TypeSnippetsBase() = default;
632
633 protected:
634 void updateSpirvSnippets();
635
636 public: // Type specific data:
637
638 // Number of bits consumed by float type
639 string bitWidth;
640
641 // Minimum positive normal
642 string epsilon;
643
644 // denormBase is a normal value (found empirically) used to generate denorm value.
645 // Denorm is generated by substracting epsilon from denormBase.
646 // denormBase is not a denorm - it is used to create denorm.
647 // This value is needed when operations are tested with arguments that were
648 // generated in the code. Generated denorm should be the same as denorm
649 // used when arguments are passed via input (m_valueIdToFloatType[V_DENORM]).
650 // This is required as result of some operations depends on actual denorm value
651 // e.g. OpRadians(0x0001) is 0 but OpRadians(0x03f0) is denorm.
652 string denormBase;
653
654 string capabilities;
655 string extensions;
656 string capabilitiesFp16Without16BitStorage;
657 string extensionsFp16Without16BitStorage;
658 string arrayStride;
659
660 bool loadStoreRequiresShaderFloat16;
661
662 public: // Type specific spir-v snippets:
663
664 // Common annotations
665 string typeAnnotationsSnippet;
666
667 // Definitions of all types commonly used by operation tests
668 string typeDefinitionsSnippet;
669
670 // Definitions of all types commonly used by settings tests
671 string minTypeDefinitionsSnippet;
672
673 // Definitions of all constants commonly used by tests
674 string constantsDefinitionsSnippet;
675
676 // Map that stores instructions that generate arguments of specified value.
677 // Every test that uses generated inputod will select up to two items from this map
678 typedef map<ValueId, string> SnippetMap;
679 SnippetMap valueIdToSnippetArgMap;
680
681 // Spir-v snippets that read argument from SSBO
682 string argumentsFromInputSnippet;
683 string multiArgumentsFromInputSnippet;
684
685 // SSBO with stage input/output definitions
686 string inputAnnotationsSnippet;
687 string inputDefinitionsSnippet;
688 string outputAnnotationsSnippet;
689 string multiOutputAnnotationsSnippet;
690 string outputDefinitionsSnippet;
691 string multiOutputDefinitionsSnippet;
692
693 // Varying is required to pass result from vertex stage to fragment stage,
694 // one of requirements was to not use SSBO writes in vertex stage so we
695 // need to do that in fragment stage; we also cant pass operation result
696 // directly because of interpolation, to avoid it we do a bitcast to uint
697 string varyingsTypesSnippet;
698 string inputVaryingsSnippet;
699 string outputVaryingsSnippet;
700 string storeVertexResultSnippet;
701 string loadVertexResultSnippet;
702
703 string storeResultsSnippet;
704 string multiStoreResultsSnippet;
705
706 string argumentsFromInputFp16Snippet;
707 string storeResultsFp16Snippet;
708 string multiArgumentsFromInputFp16Snippet;
709 string multiOutputAnnotationsFp16Snippet;
710 string multiStoreResultsFp16Snippet;
711 string multiOutputDefinitionsFp16Snippet;
712 string inputDefinitionsFp16Snippet;
713 string outputDefinitionsFp16Snippet;
714 string typeAnnotationsFp16Snippet;
715 string typeDefinitionsFp16Snippet;
716 };
717
updateSpirvSnippets()718 void TypeSnippetsBase::updateSpirvSnippets()
719 {
720 // annotations to types that are commonly used by tests
721 const string typeAnnotationsTemplate =
722 "OpDecorate %type_float_arr_1 ArrayStride " + arrayStride + "\n"
723 "OpDecorate %type_float_arr_2 ArrayStride " + arrayStride + "\n";
724
725 // definition off all types that are commonly used by tests
726 const string typeDefinitionsTemplate =
727 "%type_float = OpTypeFloat " + bitWidth + "\n"
728 "%type_float_uptr = OpTypePointer Uniform %type_float\n"
729 "%type_float_fptr = OpTypePointer Function %type_float\n"
730 "%type_float_vec2 = OpTypeVector %type_float 2\n"
731 "%type_float_vec3 = OpTypeVector %type_float 3\n"
732 "%type_float_vec4 = OpTypeVector %type_float 4\n"
733 "%type_float_vec4_iptr = OpTypePointer Input %type_float_vec4\n"
734 "%type_float_vec4_optr = OpTypePointer Output %type_float_vec4\n"
735 "%type_float_mat2x2 = OpTypeMatrix %type_float_vec2 2\n"
736 "%type_float_arr_1 = OpTypeArray %type_float %c_i32_1\n"
737 "%type_float_arr_2 = OpTypeArray %type_float %c_i32_2\n";
738
739 // minimal type definition set that is used by settings tests
740 const string minTypeDefinitionsTemplate =
741 "%type_float = OpTypeFloat " + bitWidth + "\n"
742 "%type_float_uptr = OpTypePointer Uniform %type_float\n"
743 "%type_float_arr_2 = OpTypeArray %type_float %c_i32_2\n";
744
745 // definition off all constants that are used by tests
746 const string constantsDefinitionsTemplate =
747 "%c_float_n1 = OpConstant %type_float -1\n"
748 "%c_float_0 = OpConstant %type_float 0.0\n"
749 "%c_float_0_5 = OpConstant %type_float 0.5\n"
750 "%c_float_1 = OpConstant %type_float 1\n"
751 "%c_float_2 = OpConstant %type_float 2\n"
752 "%c_float_3 = OpConstant %type_float 3\n"
753 "%c_float_4 = OpConstant %type_float 4\n"
754 "%c_float_5 = OpConstant %type_float 5\n"
755 "%c_float_6 = OpConstant %type_float 6\n"
756 "%c_float_eps = OpConstant %type_float " + epsilon + "\n"
757 "%c_float_denorm_base = OpConstant %type_float " + denormBase + "\n";
758
759 // when arguments are read from SSBO this snipped is placed in main function
760 const string argumentsFromInputTemplate =
761 "%arg1loc = OpAccessChain %type_float_uptr %ssbo_in %c_i32_0 %c_i32_0\n"
762 "%arg1 = OpLoad %type_float %arg1loc\n"
763 "%arg2loc = OpAccessChain %type_float_uptr %ssbo_in %c_i32_0 %c_i32_1\n"
764 "%arg2 = OpLoad %type_float %arg2loc\n";
765
766 const string multiArgumentsFromInputTemplate =
767 "%arg1_float_loc = OpAccessChain %type_float_uptr %ssbo_in %c_i32_${attr} %c_i32_0\n"
768 "%arg2_float_loc = OpAccessChain %type_float_uptr %ssbo_in %c_i32_${attr} %c_i32_1\n"
769 "%arg1_float = OpLoad %type_float %arg1_float_loc\n"
770 "%arg2_float = OpLoad %type_float %arg2_float_loc\n";
771
772 // when tested shader stage reads from SSBO it has to have this snippet
773 inputAnnotationsSnippet =
774 "OpMemberDecorate %SSBO_in 0 Offset 0\n"
775 "OpDecorate %SSBO_in BufferBlock\n"
776 "OpDecorate %ssbo_in DescriptorSet 0\n"
777 "OpDecorate %ssbo_in Binding 0\n"
778 "OpDecorate %ssbo_in NonWritable\n";
779
780 const string inputDefinitionsTemplate =
781 "%SSBO_in = OpTypeStruct %type_float_arr_2\n"
782 "%up_SSBO_in = OpTypePointer Uniform %SSBO_in\n"
783 "%ssbo_in = OpVariable %up_SSBO_in Uniform\n";
784
785 outputAnnotationsSnippet =
786 "OpMemberDecorate %SSBO_out 0 Offset 0\n"
787 "OpDecorate %SSBO_out BufferBlock\n"
788 "OpDecorate %ssbo_out DescriptorSet 0\n"
789 "OpDecorate %ssbo_out Binding 1\n";
790
791 const string multiOutputAnnotationsTemplate =
792 "OpMemberDecorate %SSBO_float_out 0 Offset 0\n"
793 "OpDecorate %type_float_arr_2 ArrayStride "+ arrayStride + "\n"
794 "OpDecorate %SSBO_float_out BufferBlock\n"
795 "OpDecorate %ssbo_float_out DescriptorSet 0\n";
796
797 const string outputDefinitionsTemplate =
798 "%SSBO_out = OpTypeStruct %type_float_arr_1\n"
799 "%up_SSBO_out = OpTypePointer Uniform %SSBO_out\n"
800 "%ssbo_out = OpVariable %up_SSBO_out Uniform\n";
801
802 const string multiOutputDefinitionsTemplate =
803 "%SSBO_float_out = OpTypeStruct %type_float\n"
804 "%up_SSBO_float_out = OpTypePointer Uniform %SSBO_float_out\n"
805 "%ssbo_float_out = OpVariable %up_SSBO_float_out Uniform\n";
806
807 // this snippet is used by compute and fragment stage but not by vertex stage
808 const string storeResultsTemplate =
809 "%outloc = OpAccessChain %type_float_uptr %ssbo_out %c_i32_0 %c_i32_0\n"
810 "OpStore %outloc %result\n";
811
812 const string multiStoreResultsTemplate =
813 "%outloc" + bitWidth + " = OpAccessChain %type_float_uptr %ssbo_float_out %c_i32_0\n"
814 " OpStore %outloc" + bitWidth + " %result" + bitWidth + "\n";
815
816 const string typeToken = "_float";
817 const string typeName = "_f" + bitWidth;
818
819 typeAnnotationsSnippet = replace(typeAnnotationsTemplate, typeToken, typeName);
820 typeDefinitionsSnippet = replace(typeDefinitionsTemplate, typeToken, typeName);
821 minTypeDefinitionsSnippet = replace(minTypeDefinitionsTemplate, typeToken, typeName);
822 constantsDefinitionsSnippet = replace(constantsDefinitionsTemplate, typeToken, typeName);
823 argumentsFromInputSnippet = replace(argumentsFromInputTemplate, typeToken, typeName);
824 multiArgumentsFromInputSnippet = replace(multiArgumentsFromInputTemplate, typeToken, typeName);
825 inputDefinitionsSnippet = replace(inputDefinitionsTemplate, typeToken, typeName);
826 multiOutputAnnotationsSnippet = replace(multiOutputAnnotationsTemplate, typeToken, typeName);
827 outputDefinitionsSnippet = replace(outputDefinitionsTemplate, typeToken, typeName);
828 multiOutputDefinitionsSnippet = replace(multiOutputDefinitionsTemplate, typeToken, typeName);
829 storeResultsSnippet = replace(storeResultsTemplate, typeToken, typeName);
830 multiStoreResultsSnippet = replace(multiStoreResultsTemplate, typeToken, typeName);
831
832 argumentsFromInputFp16Snippet = "";
833 storeResultsFp16Snippet = "";
834 multiArgumentsFromInputFp16Snippet = "";
835 multiOutputAnnotationsFp16Snippet = "";
836 multiStoreResultsFp16Snippet = "";
837 multiOutputDefinitionsFp16Snippet = "";
838 inputDefinitionsFp16Snippet = "";
839 typeAnnotationsFp16Snippet = "";
840 outputDefinitionsFp16Snippet = "";
841 typeDefinitionsFp16Snippet = "";
842
843 if (bitWidth.compare("16") == 0)
844 {
845 typeDefinitionsFp16Snippet =
846 "%type_u32_uptr = OpTypePointer Uniform %type_u32\n"
847 "%type_u32_arr_1 = OpTypeArray %type_u32 %c_i32_1\n";
848
849 typeAnnotationsFp16Snippet = "OpDecorate %type_u32_arr_1 ArrayStride 4\n";
850 const string inputToken = "_f16_arr_2";
851 const string inputName = "_u32_arr_1";
852 inputDefinitionsFp16Snippet = replace(inputDefinitionsSnippet, inputToken, inputName);
853
854 argumentsFromInputFp16Snippet =
855 "%argloc = OpAccessChain %type_u32_uptr %ssbo_in %c_i32_0 %c_i32_0\n"
856 "%inval = OpLoad %type_u32 %argloc\n"
857 "%arg = OpBitcast %type_f16_vec2 %inval\n"
858 "%arg1 = OpCompositeExtract %type_f16 %arg 0\n"
859 "%arg2 = OpCompositeExtract %type_f16 %arg 1\n";
860
861 const string outputToken = "_f16_arr_1";
862 const string outputName = "_u32_arr_1";
863 outputDefinitionsFp16Snippet = replace(outputDefinitionsSnippet, outputToken, outputName);
864
865 storeResultsFp16Snippet =
866 "%result_f16_vec2 = OpCompositeConstruct %type_f16_vec2 %result %c_f16_0\n"
867 "%result_u32 = OpBitcast %type_u32 %result_f16_vec2\n"
868 "%outloc = OpAccessChain %type_u32_uptr %ssbo_out %c_i32_0 %c_i32_0\n"
869 "OpStore %outloc %result_u32\n";
870
871 multiArgumentsFromInputFp16Snippet =
872 "%arg_u32_loc = OpAccessChain %type_u32_uptr %ssbo_in %c_i32_${attr} %c_i32_0\n"
873 "%arg_u32 = OpLoad %type_u32 %arg_u32_loc\n"
874 "%arg_f16_vec2 = OpBitcast %type_f16_vec2 %arg_u32\n"
875 "%arg1_f16 = OpCompositeExtract %type_f16 %arg_f16_vec2 0\n"
876 "%arg2_f16 = OpCompositeExtract %type_f16 %arg_f16_vec2 1\n";
877
878 multiOutputAnnotationsFp16Snippet =
879 "OpMemberDecorate %SSBO_u32_out 0 Offset 0\n"
880 "OpDecorate %type_u32_arr_1 ArrayStride 4\n"
881 "OpDecorate %SSBO_u32_out BufferBlock\n"
882 "OpDecorate %ssbo_u32_out DescriptorSet 0\n";
883
884 multiStoreResultsFp16Snippet =
885 "%outloc_u32 = OpAccessChain %type_u32_uptr %ssbo_u32_out %c_i32_0\n"
886 "%result16_vec2 = OpCompositeConstruct %type_f16_vec2 %result16 %c_f16_0\n"
887 "%result_u32 = OpBitcast %type_u32 %result16_vec2\n"
888 " OpStore %outloc_u32 %result_u32\n";
889
890 multiOutputDefinitionsFp16Snippet =
891 "%c_f16_0 = OpConstant %type_f16 0.0\n"
892 "%SSBO_u32_out = OpTypeStruct %type_u32\n"
893 "%up_SSBO_u32_out = OpTypePointer Uniform %SSBO_u32_out\n"
894 "%ssbo_u32_out = OpVariable %up_SSBO_u32_out Uniform\n";
895 }
896
897 // NOTE: only values used as _generated_ arguments in test operations
898 // need to be in this map, arguments that are only used by tests,
899 // that grab arguments from input, do need to be in this map
900 // NOTE: when updating entries in valueIdToSnippetArgMap make
901 // sure to update also m_valueIdToFloatType for all float width
902 SnippetMap& sm = valueIdToSnippetArgMap;
903 sm[V_UNUSED] = "OpFSub %type_float %c_float_0 %c_float_0\n";
904 sm[V_MINUS_INF] = "OpFDiv %type_float %c_float_n1 %c_float_0\n";
905 sm[V_MINUS_ONE] = "OpFAdd %type_float %c_float_n1 %c_float_0\n";
906 sm[V_MINUS_ZERO] = "OpFMul %type_float %c_float_n1 %c_float_0\n";
907 sm[V_ZERO] = "OpFMul %type_float %c_float_0 %c_float_0\n";
908 sm[V_HALF] = "OpFAdd %type_float %c_float_0_5 %c_float_0\n";
909 sm[V_ONE] = "OpFAdd %type_float %c_float_1 %c_float_0\n";
910 sm[V_INF] = "OpFDiv %type_float %c_float_1 %c_float_0\n"; // x / 0 == Inf
911 sm[V_DENORM] = "OpFSub %type_float %c_float_denorm_base %c_float_eps\n";
912 sm[V_NAN] = "OpFDiv %type_float %c_float_0 %c_float_0\n"; // 0 / 0 == Nan
913
914 map<ValueId, string>::iterator it;
915 for ( it = sm.begin(); it != sm.end(); it++ )
916 sm[it->first] = replace(it->second, typeToken, typeName);
917 }
918
919 typedef de::SharedPtr<TypeSnippetsBase> TypeSnippetsSP;
920
921 template<typename FLOAT_TYPE>
922 class TypeSnippets: public TypeSnippetsBase
923 {
924 public:
925 TypeSnippets();
926 };
927
928 template<>
TypeSnippets()929 TypeSnippets<deFloat16>::TypeSnippets()
930 {
931 bitWidth = "16";
932 epsilon = "6.104e-5"; // 2^-14 = 0x0400
933
934 // 1.2113e-4 is 0x07f0 which after substracting epsilon will give 0x03f0 (same as vm[V_DENORM])
935 // NOTE: constants in SPIR-V cant be specified as exact fp16 - there is conversion from double to fp16
936 denormBase = "1.2113e-4";
937
938 capabilities = "OpCapability StorageUniform16\n";
939 extensions = "OpExtension \"SPV_KHR_16bit_storage\"\n";
940
941 capabilitiesFp16Without16BitStorage = "OpCapability Float16\n";
942 extensionsFp16Without16BitStorage = "";
943
944 arrayStride = "2";
945
946 varyingsTypesSnippet =
947 "%type_u32_iptr = OpTypePointer Input %type_u32\n"
948 "%type_u32_optr = OpTypePointer Output %type_u32\n";
949 inputVaryingsSnippet =
950 "%BP_vertex_result = OpVariable %type_u32_iptr Input\n";
951 outputVaryingsSnippet =
952 "%BP_vertex_result = OpVariable %type_u32_optr Output\n";
953 storeVertexResultSnippet =
954 "%tmp_vec2 = OpCompositeConstruct %type_f16_vec2 %result %c_f16_0\n"
955 "%packed_result = OpBitcast %type_u32 %tmp_vec2\n"
956 "OpStore %BP_vertex_result %packed_result\n";
957 loadVertexResultSnippet =
958 "%packed_result = OpLoad %type_u32 %BP_vertex_result\n"
959 "%tmp_vec2 = OpBitcast %type_f16_vec2 %packed_result\n"
960 "%result = OpCompositeExtract %type_f16 %tmp_vec2 0\n";
961
962 loadStoreRequiresShaderFloat16 = true;
963
964 updateSpirvSnippets();
965 }
966
967 template<>
TypeSnippets()968 TypeSnippets<float>::TypeSnippets()
969 {
970 bitWidth = "32";
971 epsilon = "1.175494351e-38";
972 denormBase = "1.1756356e-38";
973 capabilities = "";
974 extensions = "";
975 capabilitiesFp16Without16BitStorage = "";
976 extensionsFp16Without16BitStorage = "";
977 arrayStride = "4";
978
979 varyingsTypesSnippet =
980 "%type_u32_iptr = OpTypePointer Input %type_u32\n"
981 "%type_u32_optr = OpTypePointer Output %type_u32\n";
982 inputVaryingsSnippet =
983 "%BP_vertex_result = OpVariable %type_u32_iptr Input\n";
984 outputVaryingsSnippet =
985 "%BP_vertex_result = OpVariable %type_u32_optr Output\n";
986 storeVertexResultSnippet =
987 "%packed_result = OpBitcast %type_u32 %result\n"
988 "OpStore %BP_vertex_result %packed_result\n";
989 loadVertexResultSnippet =
990 "%packed_result = OpLoad %type_u32 %BP_vertex_result\n"
991 "%result = OpBitcast %type_f32 %packed_result\n";
992
993 loadStoreRequiresShaderFloat16 = false;
994
995 updateSpirvSnippets();
996 }
997
998 template<>
TypeSnippets()999 TypeSnippets<double>::TypeSnippets()
1000 {
1001 bitWidth = "64";
1002 epsilon = "2.2250738585072014e-308"; // 0x0010000000000000
1003 denormBase = "2.2250738585076994e-308"; // 0x00100000000003F0
1004 capabilities = "OpCapability Float64\n";
1005 extensions = "";
1006 capabilitiesFp16Without16BitStorage = "";
1007 extensionsFp16Without16BitStorage = "";
1008 arrayStride = "8";
1009
1010 varyingsTypesSnippet =
1011 "%type_u32_vec2_iptr = OpTypePointer Input %type_u32_vec2\n"
1012 "%type_u32_vec2_optr = OpTypePointer Output %type_u32_vec2\n";
1013 inputVaryingsSnippet =
1014 "%BP_vertex_result = OpVariable %type_u32_vec2_iptr Input\n";
1015 outputVaryingsSnippet =
1016 "%BP_vertex_result = OpVariable %type_u32_vec2_optr Output\n";
1017 storeVertexResultSnippet =
1018 "%packed_result = OpBitcast %type_u32_vec2 %result\n"
1019 "OpStore %BP_vertex_result %packed_result\n";
1020 loadVertexResultSnippet =
1021 "%packed_result = OpLoad %type_u32_vec2 %BP_vertex_result\n"
1022 "%result = OpBitcast %type_f64 %packed_result\n";
1023
1024 loadStoreRequiresShaderFloat16 = false;
1025
1026 updateSpirvSnippets();
1027 }
1028
1029 class TypeTestResultsBase
1030 {
1031 public:
~TypeTestResultsBase()1032 virtual ~TypeTestResultsBase() {}
1033 FloatType floatType() const;
1034
1035 protected:
1036 FloatType m_floatType;
1037
1038 public:
1039 // Vectors containing test data for float controls
1040 vector<BinaryCase> binaryOpFTZ;
1041 vector<UnaryCase> unaryOpFTZ;
1042 vector<BinaryCase> binaryOpDenormPreserve;
1043 vector<UnaryCase> unaryOpDenormPreserve;
1044 };
1045
floatType() const1046 FloatType TypeTestResultsBase::floatType() const
1047 {
1048 return m_floatType;
1049 }
1050
1051 typedef de::SharedPtr<TypeTestResultsBase> TypeTestResultsSP;
1052
1053 template<typename FLOAT_TYPE>
1054 class TypeTestResults: public TypeTestResultsBase
1055 {
1056 public:
1057 TypeTestResults();
1058 };
1059
1060 template<>
TypeTestResults()1061 TypeTestResults<deFloat16>::TypeTestResults()
1062 {
1063 m_floatType = FP16;
1064
1065 // note: there are many FTZ test cases that can produce diferent result depending
1066 // on input denorm being flushed or not; because of that FTZ tests can be limited
1067 // to those that return denorm as those are the ones affected by tested extension
1068 const BinaryCase binaryOpFTZArr[] = {
1069 //operation den op one den op den den op inf den op nan
1070 { OID_ADD, V_ONE, V_ZERO_OR_DENORM_TIMES_TWO,
1071 V_INF, V_UNUSED },
1072 { OID_SUB, V_MINUS_ONE, V_ZERO, V_MINUS_INF, V_UNUSED },
1073 { OID_MUL, V_ZERO, V_ZERO, V_UNUSED, V_UNUSED },
1074 { OID_DIV, V_ZERO, V_UNUSED, V_ZERO, V_UNUSED },
1075 { OID_REM, V_ZERO, V_UNUSED, V_UNUSED, V_UNUSED },
1076 { OID_MOD, V_ZERO, V_UNUSED, V_UNUSED, V_UNUSED },
1077 { OID_VEC_MUL_S, V_ZERO, V_ZERO, V_UNUSED, V_UNUSED },
1078 { OID_VEC_MUL_M, V_ZERO_OR_DENORM_TIMES_TWO,
1079 V_ZERO, V_UNUSED, V_UNUSED },
1080 { OID_MAT_MUL_S, V_ZERO, V_ZERO, V_UNUSED, V_UNUSED },
1081 { OID_MAT_MUL_V, V_ZERO, V_ZERO, V_UNUSED, V_UNUSED },
1082 { OID_MAT_MUL_M, V_ZERO, V_ZERO, V_UNUSED, V_UNUSED },
1083 { OID_OUT_PROD, V_ZERO, V_ZERO, V_UNUSED, V_UNUSED },
1084 { OID_DOT, V_ZERO_OR_DENORM_TIMES_TWO,
1085 V_ZERO, V_UNUSED, V_UNUSED },
1086 { OID_ATAN2, V_ZERO, V_UNUSED, V_ZERO, V_UNUSED },
1087 { OID_POW, V_ZERO, V_UNUSED, V_ZERO, V_UNUSED },
1088 { OID_MIX, V_HALF, V_ZERO, V_INF, V_UNUSED },
1089 { OID_MIN, V_ZERO, V_ZERO, V_ZERO, V_UNUSED },
1090 { OID_MAX, V_ONE, V_ZERO, V_INF, V_UNUSED },
1091 { OID_CLAMP, V_ONE, V_ZERO, V_INF, V_UNUSED },
1092 { OID_STEP, V_ONE, V_ONE, V_ONE, V_UNUSED },
1093 { OID_SSTEP, V_HALF, V_ONE, V_ZERO, V_UNUSED },
1094 { OID_FMA, V_HALF, V_HALF, V_UNUSED, V_UNUSED },
1095 { OID_FACE_FWD, V_MINUS_ONE, V_MINUS_ONE, V_MINUS_ONE, V_MINUS_ONE },
1096 { OID_NMIN, V_ZERO, V_ZERO, V_ZERO, V_ZERO },
1097 { OID_NMAX, V_ONE, V_ZERO, V_INF, V_ZERO },
1098 { OID_NCLAMP, V_ONE, V_ZERO, V_INF, V_ZERO },
1099 { OID_DIST, V_ONE, V_ZERO, V_INF, V_UNUSED },
1100 { OID_CROSS, V_ZERO, V_ZERO, V_UNUSED, V_UNUSED },
1101 };
1102
1103 const UnaryCase unaryOpFTZArr[] = {
1104 //operation op den
1105 { OID_NEGATE, V_MINUS_ZERO },
1106 { OID_ROUND, V_ZERO },
1107 { OID_ROUND_EV, V_ZERO },
1108 { OID_TRUNC, V_ZERO },
1109 { OID_ABS, V_ZERO },
1110 { OID_FLOOR, V_ZERO },
1111 { OID_CEIL, V_ZERO_OR_ONE },
1112 { OID_FRACT, V_ZERO },
1113 { OID_RADIANS, V_ZERO },
1114 { OID_DEGREES, V_ZERO },
1115 { OID_SIN, V_ZERO },
1116 { OID_COS, V_TRIG_ONE },
1117 { OID_TAN, V_ZERO },
1118 { OID_ASIN, V_ZERO },
1119 { OID_ACOS, V_PI_DIV_2 },
1120 { OID_ATAN, V_ZERO },
1121 { OID_SINH, V_ZERO },
1122 { OID_COSH, V_ONE },
1123 { OID_TANH, V_ZERO },
1124 { OID_ASINH, V_ZERO },
1125 { OID_ACOSH, V_UNUSED },
1126 { OID_ATANH, V_ZERO },
1127 { OID_EXP, V_ONE },
1128 { OID_LOG, V_MINUS_INF_OR_LOG_DENORM },
1129 { OID_EXP2, V_ONE },
1130 { OID_LOG2, V_MINUS_INF_OR_LOG2_DENORM },
1131 { OID_SQRT, V_ZERO_OR_SQRT_DENORM },
1132 { OID_INV_SQRT, V_INF_OR_INV_SQRT_DENORM },
1133 { OID_MAT_DET, V_ZERO },
1134 { OID_MAT_INV, V_ZERO_OR_MINUS_ZERO },
1135 { OID_MODF, V_ZERO },
1136 { OID_MODF_ST, V_ZERO },
1137 { OID_NORMALIZE, V_ZERO },
1138 { OID_REFLECT, V_ZERO },
1139 { OID_REFRACT, V_ZERO },
1140 { OID_LENGTH, V_ZERO },
1141 };
1142
1143 const BinaryCase binaryOpDenormPreserveArr[] = {
1144 //operation den op one den op den den op inf den op nan
1145 { OID_PHI, V_DENORM, V_DENORM, V_DENORM, V_DENORM },
1146 { OID_SELECT, V_DENORM, V_DENORM, V_DENORM, V_DENORM },
1147 { OID_ADD, V_ONE, V_DENORM_TIMES_TWO, V_INF, V_NAN },
1148 { OID_SUB, V_MINUS_ONE_OR_CLOSE, V_ZERO, V_MINUS_INF, V_NAN },
1149 { OID_MUL, V_DENORM, V_ZERO, V_INF, V_NAN },
1150 { OID_VEC_MUL_S, V_DENORM, V_ZERO, V_INF, V_NAN },
1151 { OID_VEC_MUL_M, V_DENORM_TIMES_TWO, V_ZERO, V_INF, V_NAN },
1152 { OID_MAT_MUL_S, V_DENORM, V_ZERO, V_INF, V_NAN },
1153 { OID_MAT_MUL_V, V_DENORM_TIMES_TWO, V_ZERO, V_INF, V_NAN },
1154 { OID_MAT_MUL_M, V_DENORM_TIMES_TWO, V_ZERO, V_INF, V_NAN },
1155 { OID_OUT_PROD, V_DENORM, V_ZERO, V_INF, V_NAN },
1156 { OID_DOT, V_DENORM_TIMES_TWO, V_ZERO, V_INF, V_NAN },
1157 { OID_MIX, V_HALF, V_DENORM, V_INF, V_NAN },
1158 { OID_FMA, V_HALF, V_HALF, V_INF, V_NAN },
1159 { OID_MIN, V_DENORM, V_DENORM, V_DENORM, V_UNUSED },
1160 { OID_MAX, V_ONE, V_DENORM, V_INF, V_UNUSED },
1161 { OID_CLAMP, V_ONE, V_DENORM, V_INF, V_UNUSED },
1162 { OID_NMIN, V_DENORM, V_DENORM, V_DENORM, V_DENORM },
1163 { OID_NMAX, V_ONE, V_DENORM, V_INF, V_DENORM },
1164 { OID_NCLAMP, V_ONE, V_DENORM, V_INF, V_DENORM },
1165 };
1166
1167 const UnaryCase unaryOpDenormPreserveArr[] = {
1168 //operation op den
1169 { OID_RETURN_VAL, V_DENORM },
1170 { OID_D_EXTRACT, V_DENORM },
1171 { OID_D_INSERT, V_DENORM },
1172 { OID_SHUFFLE, V_DENORM },
1173 { OID_COMPOSITE, V_DENORM },
1174 { OID_COMPOSITE_INS, V_DENORM },
1175 { OID_COPY, V_DENORM },
1176 { OID_TRANSPOSE, V_DENORM },
1177 { OID_NEGATE, V_DENORM },
1178 { OID_ABS, V_DENORM },
1179 { OID_SIGN, V_ONE },
1180 { OID_RADIANS, V_DENORM },
1181 { OID_DEGREES, V_DEGREES_DENORM },
1182 };
1183
1184 binaryOpFTZ.insert(binaryOpFTZ.begin(), binaryOpFTZArr,
1185 binaryOpFTZArr + DE_LENGTH_OF_ARRAY(binaryOpFTZArr));
1186 unaryOpFTZ.insert(unaryOpFTZ.begin(), unaryOpFTZArr,
1187 unaryOpFTZArr + DE_LENGTH_OF_ARRAY(unaryOpFTZArr));
1188 binaryOpDenormPreserve.insert(binaryOpDenormPreserve.begin(), binaryOpDenormPreserveArr,
1189 binaryOpDenormPreserveArr + DE_LENGTH_OF_ARRAY(binaryOpDenormPreserveArr));
1190 unaryOpDenormPreserve.insert(unaryOpDenormPreserve.begin(), unaryOpDenormPreserveArr,
1191 unaryOpDenormPreserveArr + DE_LENGTH_OF_ARRAY(unaryOpDenormPreserveArr));
1192 }
1193
1194 template<>
TypeTestResults()1195 TypeTestResults<float>::TypeTestResults()
1196 {
1197 m_floatType = FP32;
1198
1199 const BinaryCase binaryOpFTZArr[] = {
1200 //operation den op one den op den den op inf den op nan
1201 { OID_ADD, V_ONE, V_ZERO, V_INF, V_UNUSED },
1202 { OID_SUB, V_MINUS_ONE, V_ZERO, V_MINUS_INF, V_UNUSED },
1203 { OID_MUL, V_ZERO, V_ZERO, V_UNUSED, V_UNUSED },
1204 { OID_DIV, V_ZERO, V_UNUSED, V_ZERO, V_UNUSED },
1205 { OID_REM, V_ZERO, V_UNUSED, V_UNUSED, V_UNUSED },
1206 { OID_MOD, V_ZERO, V_UNUSED, V_UNUSED, V_UNUSED },
1207 { OID_VEC_MUL_S, V_ZERO, V_ZERO, V_UNUSED, V_UNUSED },
1208 { OID_VEC_MUL_M, V_ZERO, V_ZERO, V_UNUSED, V_UNUSED },
1209 { OID_MAT_MUL_S, V_ZERO, V_ZERO, V_UNUSED, V_UNUSED },
1210 { OID_MAT_MUL_V, V_ZERO, V_ZERO, V_UNUSED, V_UNUSED },
1211 { OID_MAT_MUL_M, V_ZERO, V_ZERO, V_UNUSED, V_UNUSED },
1212 { OID_OUT_PROD, V_ZERO, V_ZERO, V_UNUSED, V_UNUSED },
1213 { OID_DOT, V_ZERO, V_ZERO, V_UNUSED, V_UNUSED },
1214 { OID_ATAN2, V_ZERO, V_UNUSED, V_ZERO, V_UNUSED },
1215 { OID_POW, V_ZERO, V_UNUSED, V_ZERO, V_UNUSED },
1216 { OID_MIX, V_HALF, V_ZERO, V_INF, V_UNUSED },
1217 { OID_MIN, V_ZERO, V_ZERO, V_ZERO, V_UNUSED },
1218 { OID_MAX, V_ONE, V_ZERO, V_INF, V_UNUSED },
1219 { OID_CLAMP, V_ONE, V_ZERO, V_INF, V_UNUSED },
1220 { OID_STEP, V_ONE, V_ONE, V_ONE, V_UNUSED },
1221 { OID_SSTEP, V_HALF, V_ONE, V_ZERO, V_UNUSED },
1222 { OID_FMA, V_HALF, V_HALF, V_UNUSED, V_UNUSED },
1223 { OID_FACE_FWD, V_MINUS_ONE, V_MINUS_ONE, V_MINUS_ONE, V_MINUS_ONE },
1224 { OID_NMIN, V_ZERO, V_ZERO, V_ZERO, V_ZERO },
1225 { OID_NMAX, V_ONE, V_ZERO, V_INF, V_ZERO },
1226 { OID_NCLAMP, V_ONE, V_ZERO, V_INF, V_ZERO },
1227 { OID_DIST, V_ONE, V_ZERO, V_INF, V_UNUSED },
1228 { OID_CROSS, V_ZERO, V_ZERO, V_UNUSED, V_UNUSED },
1229 };
1230
1231 const UnaryCase unaryOpFTZArr[] = {
1232 //operation op den
1233 { OID_NEGATE, V_MINUS_ZERO },
1234 { OID_ROUND, V_ZERO },
1235 { OID_ROUND_EV, V_ZERO },
1236 { OID_TRUNC, V_ZERO },
1237 { OID_ABS, V_ZERO },
1238 { OID_FLOOR, V_ZERO },
1239 { OID_CEIL, V_ZERO_OR_ONE },
1240 { OID_FRACT, V_ZERO },
1241 { OID_RADIANS, V_ZERO },
1242 { OID_DEGREES, V_ZERO },
1243 { OID_SIN, V_ZERO },
1244 { OID_COS, V_TRIG_ONE },
1245 { OID_TAN, V_ZERO },
1246 { OID_ASIN, V_ZERO },
1247 { OID_ACOS, V_PI_DIV_2 },
1248 { OID_ATAN, V_ZERO },
1249 { OID_SINH, V_ZERO },
1250 { OID_COSH, V_ONE },
1251 { OID_TANH, V_ZERO },
1252 { OID_ASINH, V_ZERO },
1253 { OID_ACOSH, V_UNUSED },
1254 { OID_ATANH, V_ZERO },
1255 { OID_EXP, V_ONE },
1256 { OID_LOG, V_MINUS_INF_OR_LOG_DENORM },
1257 { OID_EXP2, V_ONE },
1258 { OID_LOG2, V_MINUS_INF_OR_LOG2_DENORM },
1259 { OID_SQRT, V_ZERO_OR_SQRT_DENORM },
1260 { OID_INV_SQRT, V_INF_OR_INV_SQRT_DENORM },
1261 { OID_MAT_DET, V_ZERO },
1262 { OID_MAT_INV, V_ZERO_OR_MINUS_ZERO },
1263 { OID_MODF, V_ZERO },
1264 { OID_MODF_ST, V_ZERO },
1265 { OID_NORMALIZE, V_ZERO },
1266 { OID_REFLECT, V_ZERO },
1267 { OID_REFRACT, V_ZERO },
1268 { OID_LENGTH, V_ZERO },
1269 };
1270
1271 const BinaryCase binaryOpDenormPreserveArr[] = {
1272 //operation den op one den op den den op inf den op nan
1273 { OID_PHI, V_DENORM, V_DENORM, V_DENORM, V_DENORM },
1274 { OID_SELECT, V_DENORM, V_DENORM, V_DENORM, V_DENORM },
1275 { OID_ADD, V_ONE, V_DENORM_TIMES_TWO, V_INF, V_NAN },
1276 { OID_SUB, V_MINUS_ONE, V_ZERO, V_MINUS_INF, V_NAN },
1277 { OID_MUL, V_DENORM, V_ZERO, V_INF, V_NAN },
1278 { OID_VEC_MUL_S, V_DENORM, V_ZERO, V_INF, V_NAN },
1279 { OID_VEC_MUL_M, V_DENORM, V_ZERO, V_INF, V_NAN },
1280 { OID_MAT_MUL_S, V_DENORM, V_ZERO, V_INF, V_NAN },
1281 { OID_MAT_MUL_V, V_DENORM, V_ZERO, V_INF, V_NAN },
1282 { OID_MAT_MUL_M, V_DENORM, V_ZERO, V_INF, V_NAN },
1283 { OID_OUT_PROD, V_DENORM, V_ZERO, V_INF, V_NAN },
1284 { OID_DOT, V_DENORM_TIMES_TWO, V_ZERO, V_INF, V_NAN },
1285 { OID_MIX, V_HALF, V_DENORM, V_INF, V_NAN },
1286 { OID_FMA, V_HALF, V_HALF, V_INF, V_NAN },
1287 { OID_MIN, V_DENORM, V_DENORM, V_DENORM, V_UNUSED },
1288 { OID_MAX, V_ONE, V_DENORM, V_INF, V_UNUSED },
1289 { OID_CLAMP, V_ONE, V_DENORM, V_INF, V_UNUSED },
1290 { OID_NMIN, V_DENORM, V_DENORM, V_DENORM, V_DENORM },
1291 { OID_NMAX, V_ONE, V_DENORM, V_INF, V_DENORM },
1292 { OID_NCLAMP, V_ONE, V_DENORM, V_INF, V_DENORM },
1293 };
1294
1295 const UnaryCase unaryOpDenormPreserveArr[] = {
1296 //operation op den
1297 { OID_RETURN_VAL, V_DENORM },
1298 { OID_D_EXTRACT, V_DENORM },
1299 { OID_D_INSERT, V_DENORM },
1300 { OID_SHUFFLE, V_DENORM },
1301 { OID_COMPOSITE, V_DENORM },
1302 { OID_COMPOSITE_INS, V_DENORM },
1303 { OID_COPY, V_DENORM },
1304 { OID_TRANSPOSE, V_DENORM },
1305 { OID_NEGATE, V_DENORM },
1306 { OID_ABS, V_DENORM },
1307 { OID_SIGN, V_ONE },
1308 { OID_RADIANS, V_DENORM },
1309 { OID_DEGREES, V_DEGREES_DENORM },
1310 };
1311
1312 binaryOpFTZ.insert(binaryOpFTZ.begin(), binaryOpFTZArr,
1313 binaryOpFTZArr + DE_LENGTH_OF_ARRAY(binaryOpFTZArr));
1314 unaryOpFTZ.insert(unaryOpFTZ.begin(), unaryOpFTZArr,
1315 unaryOpFTZArr + DE_LENGTH_OF_ARRAY(unaryOpFTZArr));
1316 binaryOpDenormPreserve.insert(binaryOpDenormPreserve.begin(), binaryOpDenormPreserveArr,
1317 binaryOpDenormPreserveArr + DE_LENGTH_OF_ARRAY(binaryOpDenormPreserveArr));
1318 unaryOpDenormPreserve.insert(unaryOpDenormPreserve.begin(), unaryOpDenormPreserveArr,
1319 unaryOpDenormPreserveArr + DE_LENGTH_OF_ARRAY(unaryOpDenormPreserveArr));
1320 }
1321
1322 template<>
TypeTestResults()1323 TypeTestResults<double>::TypeTestResults()
1324 {
1325 m_floatType = FP64;
1326
1327 // fp64 is supported by fewer operations then fp16 and fp32
1328 // e.g. Radians and Degrees functions are not supported
1329 const BinaryCase binaryOpFTZArr[] = {
1330 //operation den op one den op den den op inf den op nan
1331 { OID_ADD, V_ONE, V_ZERO, V_INF, V_UNUSED },
1332 { OID_SUB, V_MINUS_ONE, V_ZERO, V_MINUS_INF, V_UNUSED },
1333 { OID_MUL, V_ZERO, V_ZERO, V_UNUSED, V_UNUSED },
1334 { OID_DIV, V_ZERO, V_UNUSED, V_ZERO, V_UNUSED },
1335 { OID_REM, V_ZERO, V_UNUSED, V_UNUSED, V_UNUSED },
1336 { OID_MOD, V_ZERO, V_UNUSED, V_UNUSED, V_UNUSED },
1337 { OID_VEC_MUL_S, V_ZERO, V_ZERO, V_UNUSED, V_UNUSED },
1338 { OID_VEC_MUL_M, V_ZERO, V_ZERO, V_UNUSED, V_UNUSED },
1339 { OID_MAT_MUL_S, V_ZERO, V_ZERO, V_UNUSED, V_UNUSED },
1340 { OID_MAT_MUL_V, V_ZERO, V_ZERO, V_UNUSED, V_UNUSED },
1341 { OID_MAT_MUL_M, V_ZERO, V_ZERO, V_UNUSED, V_UNUSED },
1342 { OID_OUT_PROD, V_ZERO, V_ZERO, V_UNUSED, V_UNUSED },
1343 { OID_DOT, V_ZERO, V_ZERO, V_UNUSED, V_UNUSED },
1344 { OID_MIX, V_HALF, V_ZERO, V_INF, V_UNUSED },
1345 { OID_MIN, V_ZERO, V_ZERO, V_ZERO, V_UNUSED },
1346 { OID_MAX, V_ONE, V_ZERO, V_INF, V_UNUSED },
1347 { OID_CLAMP, V_ONE, V_ZERO, V_INF, V_UNUSED },
1348 { OID_STEP, V_ONE, V_ONE, V_ONE, V_UNUSED },
1349 { OID_SSTEP, V_HALF, V_ONE, V_ZERO, V_UNUSED },
1350 { OID_FMA, V_HALF, V_HALF, V_UNUSED, V_UNUSED },
1351 { OID_FACE_FWD, V_MINUS_ONE, V_MINUS_ONE, V_MINUS_ONE, V_MINUS_ONE },
1352 { OID_NMIN, V_ZERO, V_ZERO, V_ZERO, V_ZERO },
1353 { OID_NMAX, V_ONE, V_ZERO, V_INF, V_ZERO },
1354 { OID_NCLAMP, V_ONE, V_ZERO, V_INF, V_ZERO },
1355 { OID_DIST, V_ONE, V_ZERO, V_INF, V_UNUSED },
1356 { OID_CROSS, V_ZERO, V_ZERO, V_UNUSED, V_UNUSED },
1357 };
1358
1359 const UnaryCase unaryOpFTZArr[] = {
1360 //operation op den
1361 { OID_NEGATE, V_MINUS_ZERO },
1362 { OID_ROUND, V_ZERO },
1363 { OID_ROUND_EV, V_ZERO },
1364 { OID_TRUNC, V_ZERO },
1365 { OID_ABS, V_ZERO },
1366 { OID_FLOOR, V_ZERO },
1367 { OID_CEIL, V_ZERO_OR_ONE },
1368 { OID_FRACT, V_ZERO },
1369 { OID_SQRT, V_ZERO_OR_SQRT_DENORM },
1370 { OID_INV_SQRT, V_INF_OR_INV_SQRT_DENORM },
1371 { OID_MAT_DET, V_ZERO },
1372 { OID_MAT_INV, V_ZERO_OR_MINUS_ZERO },
1373 { OID_MODF, V_ZERO },
1374 { OID_MODF_ST, V_ZERO },
1375 { OID_NORMALIZE, V_ZERO },
1376 { OID_REFLECT, V_ZERO },
1377 { OID_LENGTH, V_ZERO },
1378 };
1379
1380 const BinaryCase binaryOpDenormPreserveArr[] = {
1381 //operation den op one den op den den op inf den op nan
1382 { OID_PHI, V_DENORM, V_DENORM, V_DENORM, V_DENORM },
1383 { OID_SELECT, V_DENORM, V_DENORM, V_DENORM, V_DENORM },
1384 { OID_ADD, V_ONE, V_DENORM_TIMES_TWO, V_INF, V_NAN },
1385 { OID_SUB, V_MINUS_ONE, V_ZERO, V_MINUS_INF, V_NAN },
1386 { OID_MUL, V_DENORM, V_ZERO, V_INF, V_NAN },
1387 { OID_VEC_MUL_S, V_DENORM, V_ZERO, V_INF, V_NAN },
1388 { OID_VEC_MUL_M, V_DENORM_TIMES_TWO, V_ZERO, V_INF, V_NAN },
1389 { OID_MAT_MUL_S, V_DENORM, V_ZERO, V_INF, V_NAN },
1390 { OID_MAT_MUL_V, V_DENORM_TIMES_TWO, V_ZERO, V_INF, V_NAN },
1391 { OID_MAT_MUL_M, V_DENORM_TIMES_TWO, V_ZERO, V_INF, V_NAN },
1392 { OID_OUT_PROD, V_DENORM, V_ZERO, V_INF, V_NAN },
1393 { OID_DOT, V_DENORM_TIMES_TWO, V_ZERO, V_INF, V_NAN },
1394 { OID_MIX, V_HALF, V_DENORM, V_INF, V_NAN },
1395 { OID_FMA, V_HALF, V_HALF, V_INF, V_NAN },
1396 { OID_MIN, V_DENORM, V_DENORM, V_DENORM, V_UNUSED },
1397 { OID_MAX, V_ONE, V_DENORM, V_INF, V_UNUSED },
1398 { OID_CLAMP, V_ONE, V_DENORM, V_INF, V_UNUSED },
1399 { OID_NMIN, V_DENORM, V_DENORM, V_DENORM, V_DENORM },
1400 { OID_NMAX, V_ONE, V_DENORM, V_INF, V_DENORM },
1401 { OID_NCLAMP, V_ONE, V_DENORM, V_INF, V_DENORM },
1402 };
1403
1404 const UnaryCase unaryOpDenormPreserveArr[] = {
1405 //operation op den
1406 { OID_RETURN_VAL, V_DENORM },
1407 { OID_D_EXTRACT, V_DENORM },
1408 { OID_D_INSERT, V_DENORM },
1409 { OID_SHUFFLE, V_DENORM },
1410 { OID_COMPOSITE, V_DENORM },
1411 { OID_COMPOSITE_INS, V_DENORM },
1412 { OID_COPY, V_DENORM },
1413 { OID_TRANSPOSE, V_DENORM },
1414 { OID_NEGATE, V_DENORM },
1415 { OID_ABS, V_DENORM },
1416 { OID_SIGN, V_ONE },
1417 };
1418
1419 binaryOpFTZ.insert(binaryOpFTZ.begin(), binaryOpFTZArr,
1420 binaryOpFTZArr + DE_LENGTH_OF_ARRAY(binaryOpFTZArr));
1421 unaryOpFTZ.insert(unaryOpFTZ.begin(), unaryOpFTZArr,
1422 unaryOpFTZArr + DE_LENGTH_OF_ARRAY(unaryOpFTZArr));
1423 binaryOpDenormPreserve.insert(binaryOpDenormPreserve.begin(), binaryOpDenormPreserveArr,
1424 binaryOpDenormPreserveArr + DE_LENGTH_OF_ARRAY(binaryOpDenormPreserveArr));
1425 unaryOpDenormPreserve.insert(unaryOpDenormPreserve.begin(), unaryOpDenormPreserveArr,
1426 unaryOpDenormPreserveArr + DE_LENGTH_OF_ARRAY(unaryOpDenormPreserveArr));
1427 }
1428
1429 // Operation structure holds data needed to test specified SPIR-V operation. This class contains
1430 // additional annotations, additional types and aditional constants that should be properly included
1431 // in SPIR-V code. Commands attribute in this structure contains code that performs tested operation
1432 // on given arguments, in some cases verification is also performed there.
1433 // All snipets stroed in this structure are generic and can be specialized for fp16, fp32 or fp64,
1434 // thanks to that this data can be shared by many OperationTestCase instances (testing diferent
1435 // float behaviours on diferent float widths).
1436 struct Operation
1437 {
1438 // operation name is included in test case name
1439 const char* name;
1440
1441 // How extensively is the floating point type used?
1442 FloatUsage floatUsage;
1443
1444 // operation specific spir-v snippets that will be
1445 // placed in proper places in final test shader
1446 const char* annotations;
1447 const char* types;
1448 const char* constants;
1449 const char* variables;
1450 const char* functions;
1451 const char* commands;
1452
1453 // conversion operations operate on one float type and produce float
1454 // type with different bit width; restrictedInputType is used only when
1455 // isInputTypeRestricted is set to true and it restricts usage of this
1456 // operation to specified input type
1457 bool isInputTypeRestricted;
1458 FloatType restrictedInputType;
1459
1460 // arguments for OpSpecConstant need to be specified also as constant
1461 bool isSpecConstant;
1462
1463 // set if c_float* constant is used in operation
1464 FloatStatementUsageFlags statementUsageFlags;
1465
Operationvkt::SpirVAssembly::__anonbe9b2f4c0111::Operation1466 Operation() {}
1467
1468 // Minimal constructor - used by most of operations
Operationvkt::SpirVAssembly::__anonbe9b2f4c0111::Operation1469 Operation(const char* _name, FloatUsage _floatUsage, const char* _commands, const FloatStatementUsageFlags _statementUsageFlags = 0)
1470 : name(_name)
1471 , floatUsage(_floatUsage)
1472 , annotations("")
1473 , types("")
1474 , constants("")
1475 , variables("")
1476 , functions("")
1477 , commands(_commands)
1478 , isInputTypeRestricted(false)
1479 , restrictedInputType(FP16) // not used as isInputTypeRestricted is false
1480 , isSpecConstant(false)
1481 , statementUsageFlags(_statementUsageFlags)
1482 {}
1483
1484 // Conversion operations constructor (used also by conversions done in SpecConstantOp)
Operationvkt::SpirVAssembly::__anonbe9b2f4c0111::Operation1485 Operation(const char* _name,
1486 FloatUsage _floatUsage,
1487 bool specConstant,
1488 FloatType _inputType,
1489 const char* _constants,
1490 const char* _commands,
1491 const FloatStatementUsageFlags _statementUsageFlags = 0)
1492 : name(_name)
1493 , floatUsage(_floatUsage)
1494 , annotations("")
1495 , types("")
1496 , constants(_constants)
1497 , variables("")
1498 , functions("")
1499 , commands(_commands)
1500 , isInputTypeRestricted(true)
1501 , restrictedInputType(_inputType)
1502 , isSpecConstant(specConstant)
1503 , statementUsageFlags(_statementUsageFlags)
1504 {}
1505
1506 // Full constructor - used by few operations, that are more complex to test
Operationvkt::SpirVAssembly::__anonbe9b2f4c0111::Operation1507 Operation(const char* _name,
1508 FloatUsage _floatUsage,
1509 const char* _annotations,
1510 const char* _types,
1511 const char* _constants,
1512 const char* _variables,
1513 const char* _functions,
1514 const char* _commands,
1515 const FloatStatementUsageFlags _statementUsageFlags = 0)
1516 : name(_name)
1517 , floatUsage(_floatUsage)
1518 , annotations(_annotations)
1519 , types(_types)
1520 , constants(_constants)
1521 , variables(_variables)
1522 , functions(_functions)
1523 , commands(_commands)
1524 , isInputTypeRestricted(false)
1525 , restrictedInputType(FP16) // not used as isInputTypeRestricted is false
1526 , isSpecConstant(false)
1527 , statementUsageFlags(_statementUsageFlags)
1528 {}
1529
1530 // Full constructor - used by rounding override cases
Operationvkt::SpirVAssembly::__anonbe9b2f4c0111::Operation1531 Operation(const char* _name,
1532 FloatUsage _floatUsage,
1533 FloatType _inputType,
1534 const char* _annotations,
1535 const char* _types,
1536 const char* _constants,
1537 const char* _commands,
1538 const FloatStatementUsageFlags _statementUsageFlags = 0)
1539 : name(_name)
1540 , floatUsage(_floatUsage)
1541 , annotations(_annotations)
1542 , types(_types)
1543 , constants(_constants)
1544 , variables("")
1545 , functions("")
1546 , commands(_commands)
1547 , isInputTypeRestricted(true)
1548 , restrictedInputType(_inputType)
1549 , isSpecConstant(false)
1550 , statementUsageFlags(_statementUsageFlags)
1551 {}
1552 };
1553
1554 // Class storing input that will be passed to operation and expected
1555 // output that should be generated for specified behaviour.
1556 class OperationTestCase
1557 {
1558 public:
1559
OperationTestCase()1560 OperationTestCase() {}
1561
OperationTestCase(const char * _baseName,BehaviorFlags _behaviorFlags,OperationId _operatinId,ValueId _input1,ValueId _input2,ValueId _expectedOutput,bool _fp16Without16BitStorage=false)1562 OperationTestCase(const char* _baseName,
1563 BehaviorFlags _behaviorFlags,
1564 OperationId _operatinId,
1565 ValueId _input1,
1566 ValueId _input2,
1567 ValueId _expectedOutput,
1568 bool _fp16Without16BitStorage = false)
1569 : baseName(_baseName)
1570 , behaviorFlags(_behaviorFlags)
1571 , operationId(_operatinId)
1572 , expectedOutput(_expectedOutput)
1573 , fp16Without16BitStorage(_fp16Without16BitStorage)
1574 {
1575 input[0] = _input1;
1576 input[1] = _input2;
1577 }
1578
1579 public:
1580
1581 string baseName;
1582 BehaviorFlags behaviorFlags;
1583 OperationId operationId;
1584 ValueId input[2];
1585 ValueId expectedOutput;
1586 bool fp16Without16BitStorage;
1587 };
1588
1589 // Helper structure used to store specialized operation
1590 // data. This data is ready to be used during shader assembly.
1591 struct SpecializedOperation
1592 {
1593 string constants;
1594 string annotations;
1595 string types;
1596 string arguments;
1597 string variables;
1598 string functions;
1599 string commands;
1600
1601 FloatType inFloatType;
1602 TypeSnippetsSP inTypeSnippets;
1603 TypeSnippetsSP outTypeSnippets;
1604 FloatStatementUsageFlags argumentsUsesFloatConstant;
1605 };
1606
1607 // Class responsible for constructing list of test cases for specified
1608 // float type and specified way of preparation of arguments.
1609 // Arguments can be either read from input SSBO or generated via math
1610 // operations in spir-v code.
1611 class TestCasesBuilder
1612 {
1613 public:
1614
1615 void init();
1616 void build(vector<OperationTestCase>& testCases, TypeTestResultsSP typeTestResults, bool argumentsFromInput);
1617 const Operation& getOperation(OperationId id) const;
1618
1619 private:
1620
1621 void createUnaryTestCases(vector<OperationTestCase>& testCases,
1622 OperationId operationId,
1623 ValueId denormPreserveResult,
1624 ValueId denormFTZResult,
1625 bool fp16WithoutStorage = false) const;
1626
1627 private:
1628
1629 // Operations are shared betwean test cases so they are
1630 // passed to them as pointers to data stored in TestCasesBuilder.
1631 typedef OperationTestCase OTC;
1632 typedef Operation Op;
1633 map<int, Op> m_operations;
1634 };
1635
init()1636 void TestCasesBuilder::init()
1637 {
1638 map<int, Op>& mo = m_operations;
1639
1640 // predefine operations repeatedly used in tests; note that "_float"
1641 // in every operation command will be replaced with either "_f16",
1642 // "_f32" or "_f64" - StringTemplate is not used here because it
1643 // would make code less readable
1644 // m_operations contains generic operation definitions that can be
1645 // used for all float types
1646
1647 mo[OID_NEGATE] = Op("negate", FLOAT_ARITHMETIC,
1648 "%result = OpFNegate %type_float %arg1\n",
1649 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1650 mo[OID_COMPOSITE] = Op("composite", FLOAT_ARITHMETIC,
1651 "%vec1 = OpCompositeConstruct %type_float_vec2 %arg1 %arg1\n"
1652 "%result = OpCompositeExtract %type_float %vec1 0\n",
1653 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1654 mo[OID_COMPOSITE_INS] = Op("comp_ins", FLOAT_ARITHMETIC,
1655 "%vec1 = OpCompositeConstruct %type_float_vec2 %c_float_0 %c_float_0\n"
1656 "%vec2 = OpCompositeInsert %type_float_vec2 %arg1 %vec1 0\n"
1657 "%result = OpCompositeExtract %type_float %vec2 0\n",
1658 B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1659 mo[OID_COPY] = Op("copy", FLOAT_STORAGE_ONLY,
1660 "%result = OpCopyObject %type_float %arg1\n",
1661 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1662 mo[OID_D_EXTRACT] = Op("extract", FLOAT_ARITHMETIC,
1663 "%vec1 = OpCompositeConstruct %type_float_vec2 %arg1 %arg1\n"
1664 "%result = OpVectorExtractDynamic %type_float %vec1 %c_i32_0\n",
1665 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1666 mo[OID_D_INSERT] = Op("insert", FLOAT_ARITHMETIC,
1667 "%tmpVec = OpCompositeConstruct %type_float_vec2 %c_float_2 %c_float_2\n"
1668 "%vec1 = OpVectorInsertDynamic %type_float_vec2 %tmpVec %arg1 %c_i32_0\n"
1669 "%result = OpCompositeExtract %type_float %vec1 0\n",
1670 B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1671 mo[OID_SHUFFLE] = Op("shuffle", FLOAT_ARITHMETIC,
1672 "%tmpVec1 = OpCompositeConstruct %type_float_vec2 %arg1 %arg1\n"
1673 "%tmpVec2 = OpCompositeConstruct %type_float_vec2 %c_float_2 %c_float_2\n" // NOTE: its impossible to test shuffle with denorms flushed
1674 "%vec1 = OpVectorShuffle %type_float_vec2 %tmpVec1 %tmpVec2 0 2\n" // to zero as this will be done by earlier operation
1675 "%result = OpCompositeExtract %type_float %vec1 0\n", // (this also applies to few other operations)
1676 B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1677 mo[OID_TRANSPOSE] = Op("transpose", FLOAT_ARITHMETIC,
1678 "%col = OpCompositeConstruct %type_float_vec2 %arg1 %arg1\n"
1679 "%mat = OpCompositeConstruct %type_float_mat2x2 %col %col\n"
1680 "%tmat = OpTranspose %type_float_mat2x2 %mat\n"
1681 "%tcol = OpCompositeExtract %type_float_vec2 %tmat 0\n"
1682 "%result = OpCompositeExtract %type_float %tcol 0\n",
1683 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1684 mo[OID_RETURN_VAL] = Op("ret_val", FLOAT_ARITHMETIC,
1685 "",
1686 "%type_test_fun = OpTypeFunction %type_float %type_float\n",
1687 "",
1688 "",
1689 "%test_fun = OpFunction %type_float None %type_test_fun\n"
1690 "%param = OpFunctionParameter %type_float\n"
1691 "%entry = OpLabel\n"
1692 "OpReturnValue %param\n"
1693 "OpFunctionEnd\n",
1694 "%result = OpFunctionCall %type_float %test_fun %arg1\n",
1695 B_STATEMENT_USAGE_TYPES_TYPE_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1696
1697 // conversion operations that are meant to be used only for single output type (defined by the second number in name)
1698 const char* convertSource = "%result = OpFConvert %type_float %arg1\n";
1699 mo[OID_CONV_FROM_FP16] = Op("conv_from_fp16", FLOAT_STORAGE_ONLY, false, FP16, "", convertSource, B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1700 mo[OID_CONV_FROM_FP32] = Op("conv_from_fp32", FLOAT_STORAGE_ONLY, false, FP32, "", convertSource, B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1701 mo[OID_CONV_FROM_FP64] = Op("conv_from_fp64", FLOAT_STORAGE_ONLY, false, FP64, "", convertSource, B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1702
1703 // from all operands supported by OpSpecConstantOp we can only test FConvert opcode with literals as everything
1704 // else requires Karnel capability (OpenCL); values of literals used in SPIR-V code must be equiwalent to
1705 // V_CONV_FROM_FP32_ARG and V_CONV_FROM_FP64_ARG so we can use same expected rounded values as for regular OpFConvert
1706 mo[OID_SCONST_CONV_FROM_FP32_TO_FP16]
1707 = Op("sconst_conv_from_fp32", FLOAT_ARITHMETIC, true, FP32,
1708 "%c_arg = OpConstant %type_f32 1.22334445\n"
1709 "%result = OpSpecConstantOp %type_f16 FConvert %c_arg\n",
1710 "",
1711 B_STATEMENT_USAGE_CONSTS_TYPE_FP16 | B_STATEMENT_USAGE_CONSTS_TYPE_FP32);
1712 mo[OID_SCONST_CONV_FROM_FP64_TO_FP32]
1713 = Op("sconst_conv_from_fp64", FLOAT_ARITHMETIC, true, FP64,
1714 "%c_arg = OpConstant %type_f64 1.22334455\n"
1715 "%result = OpSpecConstantOp %type_f32 FConvert %c_arg\n",
1716 "",
1717 B_STATEMENT_USAGE_CONSTS_TYPE_FP32 | B_STATEMENT_USAGE_CONSTS_TYPE_FP64);
1718 mo[OID_SCONST_CONV_FROM_FP64_TO_FP16]
1719 = Op("sconst_conv_from_fp64", FLOAT_ARITHMETIC, true, FP64,
1720 "%c_arg = OpConstant %type_f64 1.22334445\n"
1721 "%result = OpSpecConstantOp %type_f16 FConvert %c_arg\n",
1722 "",
1723 B_STATEMENT_USAGE_CONSTS_TYPE_FP16 | B_STATEMENT_USAGE_CONSTS_TYPE_FP64);
1724
1725 mo[OID_ADD] = Op("add", FLOAT_ARITHMETIC, "%result = OpFAdd %type_float %arg1 %arg2\n", B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1726 mo[OID_SUB] = Op("sub", FLOAT_ARITHMETIC, "%result = OpFSub %type_float %arg1 %arg2\n", B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1727 mo[OID_MUL] = Op("mul", FLOAT_ARITHMETIC, "%result = OpFMul %type_float %arg1 %arg2\n", B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1728 mo[OID_DIV] = Op("div", FLOAT_ARITHMETIC, "%result = OpFDiv %type_float %arg1 %arg2\n", B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1729 mo[OID_REM] = Op("rem", FLOAT_ARITHMETIC, "%result = OpFRem %type_float %arg1 %arg2\n", B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1730 mo[OID_MOD] = Op("mod", FLOAT_ARITHMETIC, "%result = OpFMod %type_float %arg1 %arg2\n", B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1731 mo[OID_PHI] = Op("phi", FLOAT_ARITHMETIC,
1732 "%comp = OpFOrdGreaterThan %type_bool %arg1 %arg2\n"
1733 " OpSelectionMerge %comp_merge None\n"
1734 " OpBranchConditional %comp %true_branch %false_branch\n"
1735 "%true_branch = OpLabel\n"
1736 " OpBranch %comp_merge\n"
1737 "%false_branch = OpLabel\n"
1738 " OpBranch %comp_merge\n"
1739 "%comp_merge = OpLabel\n"
1740 "%result = OpPhi %type_float %arg2 %true_branch %arg1 %false_branch\n",
1741 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1742 mo[OID_SELECT] = Op("select", FLOAT_ARITHMETIC,
1743 "%always_true = OpFOrdGreaterThan %type_bool %c_float_1 %c_float_0\n"
1744 "%result = OpSelect %type_float %always_true %arg1 %arg2\n",
1745 B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1746 mo[OID_DOT] = Op("dot", FLOAT_ARITHMETIC,
1747 "%vec1 = OpCompositeConstruct %type_float_vec2 %arg1 %arg1\n"
1748 "%vec2 = OpCompositeConstruct %type_float_vec2 %arg2 %arg2\n"
1749 "%result = OpDot %type_float %vec1 %vec2\n",
1750 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1751 mo[OID_VEC_MUL_S] = Op("vmuls", FLOAT_ARITHMETIC,
1752 "%vec = OpCompositeConstruct %type_float_vec2 %arg1 %arg1\n"
1753 "%tmpVec = OpVectorTimesScalar %type_float_vec2 %vec %arg2\n"
1754 "%result = OpCompositeExtract %type_float %tmpVec 0\n",
1755 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1756 mo[OID_VEC_MUL_M] = Op("vmulm", FLOAT_ARITHMETIC,
1757 "%col = OpCompositeConstruct %type_float_vec2 %arg1 %arg1\n"
1758 "%mat = OpCompositeConstruct %type_float_mat2x2 %col %col\n"
1759 "%vec = OpCompositeConstruct %type_float_vec2 %arg2 %arg2\n"
1760 "%tmpVec = OpVectorTimesMatrix %type_float_vec2 %vec %mat\n"
1761 "%result = OpCompositeExtract %type_float %tmpVec 0\n",
1762 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1763 mo[OID_MAT_MUL_S] = Op("mmuls", FLOAT_ARITHMETIC,
1764 "%col = OpCompositeConstruct %type_float_vec2 %arg1 %arg1\n"
1765 "%mat = OpCompositeConstruct %type_float_mat2x2 %col %col\n"
1766 "%mulMat = OpMatrixTimesScalar %type_float_mat2x2 %mat %arg2\n"
1767 "%extCol = OpCompositeExtract %type_float_vec2 %mulMat 0\n"
1768 "%result = OpCompositeExtract %type_float %extCol 0\n",
1769 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1770 mo[OID_MAT_MUL_V] = Op("mmulv", FLOAT_ARITHMETIC,
1771 "%col = OpCompositeConstruct %type_float_vec2 %arg1 %arg1\n"
1772 "%mat = OpCompositeConstruct %type_float_mat2x2 %col %col\n"
1773 "%vec = OpCompositeConstruct %type_float_vec2 %arg2 %arg2\n"
1774 "%mulVec = OpMatrixTimesVector %type_float_vec2 %mat %vec\n"
1775 "%result = OpCompositeExtract %type_float %mulVec 0\n",
1776 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1777 mo[OID_MAT_MUL_M] = Op("mmulm", FLOAT_ARITHMETIC,
1778 "%col1 = OpCompositeConstruct %type_float_vec2 %arg1 %arg1\n"
1779 "%mat1 = OpCompositeConstruct %type_float_mat2x2 %col1 %col1\n"
1780 "%col2 = OpCompositeConstruct %type_float_vec2 %arg2 %arg2\n"
1781 "%mat2 = OpCompositeConstruct %type_float_mat2x2 %col2 %col2\n"
1782 "%mulMat = OpMatrixTimesMatrix %type_float_mat2x2 %mat1 %mat2\n"
1783 "%extCol = OpCompositeExtract %type_float_vec2 %mulMat 0\n"
1784 "%result = OpCompositeExtract %type_float %extCol 0\n",
1785 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1786 mo[OID_OUT_PROD] = Op("out_prod", FLOAT_ARITHMETIC,
1787 "%vec1 = OpCompositeConstruct %type_float_vec2 %arg1 %arg1\n"
1788 "%vec2 = OpCompositeConstruct %type_float_vec2 %arg2 %arg2\n"
1789 "%mulMat = OpOuterProduct %type_float_mat2x2 %vec1 %vec2\n"
1790 "%extCol = OpCompositeExtract %type_float_vec2 %mulMat 0\n"
1791 "%result = OpCompositeExtract %type_float %extCol 0\n",
1792 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1793
1794 // comparison operations
1795 mo[OID_ORD_EQ] = Op("ord_eq", FLOAT_ARITHMETIC,
1796 "%boolVal = OpFOrdEqual %type_bool %arg1 %arg2\n"
1797 "%result = OpSelect %type_float %boolVal %c_float_1 %c_float_0\n",
1798 B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1799 mo[OID_UORD_EQ] = Op("uord_eq", FLOAT_ARITHMETIC,
1800 "%boolVal = OpFUnordEqual %type_bool %arg1 %arg2\n"
1801 "%result = OpSelect %type_float %boolVal %c_float_1 %c_float_0\n",
1802 B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1803 mo[OID_ORD_NEQ] = Op("ord_neq", FLOAT_ARITHMETIC,
1804 "%boolVal = OpFOrdNotEqual %type_bool %arg1 %arg2\n"
1805 "%result = OpSelect %type_float %boolVal %c_float_1 %c_float_0\n",
1806 B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1807 mo[OID_UORD_NEQ] = Op("uord_neq", FLOAT_ARITHMETIC,
1808 "%boolVal = OpFUnordNotEqual %type_bool %arg1 %arg2\n"
1809 "%result = OpSelect %type_float %boolVal %c_float_1 %c_float_0\n",
1810 B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1811 mo[OID_ORD_LS] = Op("ord_ls", FLOAT_ARITHMETIC,
1812 "%boolVal = OpFOrdLessThan %type_bool %arg1 %arg2\n"
1813 "%result = OpSelect %type_float %boolVal %c_float_1 %c_float_0\n",
1814 B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1815 mo[OID_UORD_LS] = Op("uord_ls", FLOAT_ARITHMETIC,
1816 "%boolVal = OpFUnordLessThan %type_bool %arg1 %arg2\n"
1817 "%result = OpSelect %type_float %boolVal %c_float_1 %c_float_0\n",
1818 B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1819 mo[OID_ORD_GT] = Op("ord_gt", FLOAT_ARITHMETIC,
1820 "%boolVal = OpFOrdGreaterThan %type_bool %arg1 %arg2\n"
1821 "%result = OpSelect %type_float %boolVal %c_float_1 %c_float_0\n",
1822 B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1823 mo[OID_UORD_GT] = Op("uord_gt", FLOAT_ARITHMETIC,
1824 "%boolVal = OpFUnordGreaterThan %type_bool %arg1 %arg2\n"
1825 "%result = OpSelect %type_float %boolVal %c_float_1 %c_float_0\n",
1826 B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1827 mo[OID_ORD_LE] = Op("ord_le", FLOAT_ARITHMETIC,
1828 "%boolVal = OpFOrdLessThanEqual %type_bool %arg1 %arg2\n"
1829 "%result = OpSelect %type_float %boolVal %c_float_1 %c_float_0\n",
1830 B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1831 mo[OID_UORD_LE] = Op("uord_le", FLOAT_ARITHMETIC,
1832 "%boolVal = OpFUnordLessThanEqual %type_bool %arg1 %arg2\n"
1833 "%result = OpSelect %type_float %boolVal %c_float_1 %c_float_0\n",
1834 B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1835 mo[OID_ORD_GE] = Op("ord_ge", FLOAT_ARITHMETIC,
1836 "%boolVal = OpFOrdGreaterThanEqual %type_bool %arg1 %arg2\n"
1837 "%result = OpSelect %type_float %boolVal %c_float_1 %c_float_0\n",
1838 B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1839 mo[OID_UORD_GE] = Op("uord_ge", FLOAT_ARITHMETIC,
1840 "%boolVal = OpFUnordGreaterThanEqual %type_bool %arg1 %arg2\n"
1841 "%result = OpSelect %type_float %boolVal %c_float_1 %c_float_0\n",
1842 B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1843
1844 mo[OID_ATAN2] = Op("atan2", FLOAT_ARITHMETIC,
1845 "%result = OpExtInst %type_float %std450 Atan2 %arg1 %arg2\n",
1846 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1847 mo[OID_POW] = Op("pow", FLOAT_ARITHMETIC,
1848 "%result = OpExtInst %type_float %std450 Pow %arg1 %arg2\n",
1849 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1850 mo[OID_MIX] = Op("mix", FLOAT_ARITHMETIC,
1851 "%result = OpExtInst %type_float %std450 FMix %arg1 %arg2 %c_float_0_5\n",
1852 B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1853 mo[OID_FMA] = Op("fma", FLOAT_ARITHMETIC,
1854 "%result = OpExtInst %type_float %std450 Fma %arg1 %arg2 %c_float_0_5\n",
1855 B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1856 mo[OID_MIN] = Op("min", FLOAT_ARITHMETIC,
1857 "%result = OpExtInst %type_float %std450 FMin %arg1 %arg2\n",
1858 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1859 mo[OID_MAX] = Op("max", FLOAT_ARITHMETIC,
1860 "%result = OpExtInst %type_float %std450 FMax %arg1 %arg2\n",
1861 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1862 mo[OID_CLAMP] = Op("clamp", FLOAT_ARITHMETIC,
1863 "%result = OpExtInst %type_float %std450 FClamp %arg1 %arg2 %arg2\n",
1864 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1865 mo[OID_STEP] = Op("step", FLOAT_ARITHMETIC,
1866 "%result = OpExtInst %type_float %std450 Step %arg1 %arg2\n",
1867 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1868 mo[OID_SSTEP] = Op("sstep", FLOAT_ARITHMETIC,
1869 "%result = OpExtInst %type_float %std450 SmoothStep %arg1 %arg2 %c_float_0_5\n",
1870 B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1871 mo[OID_DIST] = Op("distance", FLOAT_ARITHMETIC,
1872 "%result = OpExtInst %type_float %std450 Distance %arg1 %arg2\n",
1873 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1874 mo[OID_CROSS] = Op("cross", FLOAT_ARITHMETIC,
1875 "%vec1 = OpCompositeConstruct %type_float_vec3 %arg1 %arg1 %arg1\n"
1876 "%vec2 = OpCompositeConstruct %type_float_vec3 %arg2 %arg2 %arg2\n"
1877 "%tmpVec = OpExtInst %type_float_vec3 %std450 Cross %vec1 %vec2\n"
1878 "%result = OpCompositeExtract %type_float %tmpVec 0\n",
1879 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1880 mo[OID_FACE_FWD] = Op("face_fwd", FLOAT_ARITHMETIC,
1881 "%result = OpExtInst %type_float %std450 FaceForward %c_float_1 %arg1 %arg2\n",
1882 B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1883 mo[OID_NMIN] = Op("nmin", FLOAT_ARITHMETIC,
1884 "%result = OpExtInst %type_float %std450 NMin %arg1 %arg2\n",
1885 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1886 mo[OID_NMAX] = Op("nmax", FLOAT_ARITHMETIC,
1887 "%result = OpExtInst %type_float %std450 NMax %arg1 %arg2\n",
1888 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1889 mo[OID_NCLAMP] = Op("nclamp", FLOAT_ARITHMETIC,
1890 "%result = OpExtInst %type_float %std450 NClamp %arg2 %arg1 %arg2\n",
1891 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1892
1893 mo[OID_ROUND] = Op("round", FLOAT_ARITHMETIC,
1894 "%result = OpExtInst %type_float %std450 Round %arg1\n",
1895 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1896 mo[OID_ROUND_EV] = Op("round_ev", FLOAT_ARITHMETIC,
1897 "%result = OpExtInst %type_float %std450 RoundEven %arg1\n",
1898 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1899 mo[OID_TRUNC] = Op("trunc", FLOAT_ARITHMETIC,
1900 "%result = OpExtInst %type_float %std450 Trunc %arg1\n",
1901 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1902 mo[OID_ABS] = Op("abs", FLOAT_ARITHMETIC,
1903 "%result = OpExtInst %type_float %std450 FAbs %arg1\n",
1904 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1905 mo[OID_SIGN] = Op("sign", FLOAT_ARITHMETIC,
1906 "%result = OpExtInst %type_float %std450 FSign %arg1\n",
1907 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1908 mo[OID_FLOOR] = Op("floor", FLOAT_ARITHMETIC,
1909 "%result = OpExtInst %type_float %std450 Floor %arg1\n",
1910 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1911 mo[OID_CEIL] = Op("ceil", FLOAT_ARITHMETIC,
1912 "%result = OpExtInst %type_float %std450 Ceil %arg1\n",
1913 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1914 mo[OID_FRACT] = Op("fract", FLOAT_ARITHMETIC,
1915 "%result = OpExtInst %type_float %std450 Fract %arg1\n",
1916 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1917 mo[OID_RADIANS] = Op("radians", FLOAT_ARITHMETIC,
1918 "%result = OpExtInst %type_float %std450 Radians %arg1\n",
1919 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1920 mo[OID_DEGREES] = Op("degrees", FLOAT_ARITHMETIC,
1921 "%result = OpExtInst %type_float %std450 Degrees %arg1\n",
1922 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1923 mo[OID_SIN] = Op("sin", FLOAT_ARITHMETIC,
1924 "%result = OpExtInst %type_float %std450 Sin %arg1\n",
1925 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1926 mo[OID_COS] = Op("cos", FLOAT_ARITHMETIC,
1927 "%result = OpExtInst %type_float %std450 Cos %arg1\n",
1928 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1929 mo[OID_TAN] = Op("tan", FLOAT_ARITHMETIC,
1930 "%result = OpExtInst %type_float %std450 Tan %arg1\n",
1931 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1932 mo[OID_ASIN] = Op("asin", FLOAT_ARITHMETIC,
1933 "%result = OpExtInst %type_float %std450 Asin %arg1\n",
1934 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1935 mo[OID_ACOS] = Op("acos", FLOAT_ARITHMETIC,
1936 "%result = OpExtInst %type_float %std450 Acos %arg1\n",
1937 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1938 mo[OID_ATAN] = Op("atan", FLOAT_ARITHMETIC,
1939 "%result = OpExtInst %type_float %std450 Atan %arg1\n",
1940 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1941 mo[OID_SINH] = Op("sinh", FLOAT_ARITHMETIC,
1942 "%result = OpExtInst %type_float %std450 Sinh %arg1\n",
1943 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1944 mo[OID_COSH] = Op("cosh", FLOAT_ARITHMETIC,
1945 "%result = OpExtInst %type_float %std450 Cosh %arg1\n",
1946 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1947 mo[OID_TANH] = Op("tanh", FLOAT_ARITHMETIC,
1948 "%result = OpExtInst %type_float %std450 Tanh %arg1\n",
1949 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1950 mo[OID_ASINH] = Op("asinh", FLOAT_ARITHMETIC,
1951 "%result = OpExtInst %type_float %std450 Asinh %arg1\n",
1952 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1953 mo[OID_ACOSH] = Op("acosh", FLOAT_ARITHMETIC,
1954 "%result = OpExtInst %type_float %std450 Acosh %arg1\n",
1955 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1956 mo[OID_ATANH] = Op("atanh", FLOAT_ARITHMETIC,
1957 "%result = OpExtInst %type_float %std450 Atanh %arg1\n",
1958 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1959 mo[OID_EXP] = Op("exp", FLOAT_ARITHMETIC,
1960 "%result = OpExtInst %type_float %std450 Exp %arg1\n",
1961 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1962 mo[OID_LOG] = Op("log", FLOAT_ARITHMETIC,
1963 "%result = OpExtInst %type_float %std450 Log %arg1\n",
1964 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1965 mo[OID_EXP2] = Op("exp2", FLOAT_ARITHMETIC,
1966 "%result = OpExtInst %type_float %std450 Exp2 %arg1\n",
1967 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1968 mo[OID_LOG2] = Op("log2", FLOAT_ARITHMETIC,
1969 "%result = OpExtInst %type_float %std450 Log2 %arg1\n",
1970 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1971 mo[OID_SQRT] = Op("sqrt", FLOAT_ARITHMETIC,
1972 "%result = OpExtInst %type_float %std450 Sqrt %arg1\n",
1973 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1974 mo[OID_INV_SQRT] = Op("inv_sqrt", FLOAT_ARITHMETIC,
1975 "%result = OpExtInst %type_float %std450 InverseSqrt %arg1\n",
1976 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1977 mo[OID_MODF] = Op("modf", FLOAT_ARITHMETIC,
1978 "",
1979 "",
1980 "",
1981 "%tmpVarPtr = OpVariable %type_float_fptr Function\n",
1982 "",
1983 "%result = OpExtInst %type_float %std450 Modf %arg1 %tmpVarPtr\n",
1984 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1985 mo[OID_MODF_ST] = Op("modf_st", FLOAT_ARITHMETIC,
1986 "OpMemberDecorate %struct_ff 0 Offset ${float_width}\n"
1987 "OpMemberDecorate %struct_ff 1 Offset ${float_width}\n",
1988 "%struct_ff = OpTypeStruct %type_float %type_float\n"
1989 "%struct_ff_fptr = OpTypePointer Function %struct_ff\n",
1990 "",
1991 "%tmpStructPtr = OpVariable %struct_ff_fptr Function\n",
1992 "",
1993 "%tmpStruct = OpExtInst %struct_ff %std450 ModfStruct %arg1\n"
1994 " OpStore %tmpStructPtr %tmpStruct\n"
1995 "%tmpLoc = OpAccessChain %type_float_fptr %tmpStructPtr %c_i32_0\n"
1996 "%result = OpLoad %type_float %tmpLoc\n",
1997 B_STATEMENT_USAGE_TYPES_TYPE_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1998 mo[OID_FREXP] = Op("frexp", FLOAT_ARITHMETIC,
1999 "",
2000 "",
2001 "",
2002 "%tmpVarPtr = OpVariable %type_i32_fptr Function\n",
2003 "",
2004 "%result = OpExtInst %type_float %std450 Frexp %arg1 %tmpVarPtr\n",
2005 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2006 mo[OID_FREXP_ST] = Op("frexp_st", FLOAT_ARITHMETIC,
2007 "OpMemberDecorate %struct_fi 0 Offset ${float_width}\n"
2008 "OpMemberDecorate %struct_fi 1 Offset 32\n",
2009 "%struct_fi = OpTypeStruct %type_float %type_i32\n"
2010 "%struct_fi_fptr = OpTypePointer Function %struct_fi\n",
2011 "",
2012 "%tmpStructPtr = OpVariable %struct_fi_fptr Function\n",
2013 "",
2014 "%tmpStruct = OpExtInst %struct_fi %std450 FrexpStruct %arg1\n"
2015 " OpStore %tmpStructPtr %tmpStruct\n"
2016 "%tmpLoc = OpAccessChain %type_float_fptr %tmpStructPtr %c_i32_0\n"
2017 "%result = OpLoad %type_float %tmpLoc\n",
2018 B_STATEMENT_USAGE_TYPES_TYPE_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2019 mo[OID_LENGTH] = Op("length", FLOAT_ARITHMETIC,
2020 "%result = OpExtInst %type_float %std450 Length %arg1\n",
2021 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2022 mo[OID_NORMALIZE] = Op("normalize", FLOAT_ARITHMETIC,
2023 "%vec1 = OpCompositeConstruct %type_float_vec2 %arg1 %c_float_2\n"
2024 "%tmpVec = OpExtInst %type_float_vec2 %std450 Normalize %vec1\n"
2025 "%result = OpCompositeExtract %type_float %tmpVec 0\n",
2026 B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2027 mo[OID_REFLECT] = Op("reflect", FLOAT_ARITHMETIC,
2028 "%vec1 = OpCompositeConstruct %type_float_vec2 %arg1 %arg1\n"
2029 "%vecN = OpCompositeConstruct %type_float_vec2 %c_float_0 %c_float_n1\n"
2030 "%tmpVec = OpExtInst %type_float_vec2 %std450 Reflect %vec1 %vecN\n"
2031 "%result = OpCompositeExtract %type_float %tmpVec 0\n",
2032 B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2033 mo[OID_REFRACT] = Op("refract", FLOAT_ARITHMETIC,
2034 "%vec1 = OpCompositeConstruct %type_float_vec2 %arg1 %arg1\n"
2035 "%vecN = OpCompositeConstruct %type_float_vec2 %c_float_0 %c_float_n1\n"
2036 "%tmpVec = OpExtInst %type_float_vec2 %std450 Refract %vec1 %vecN %c_float_0_5\n"
2037 "%result = OpCompositeExtract %type_float %tmpVec 0\n",
2038 B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2039 mo[OID_MAT_DET] = Op("mat_det", FLOAT_ARITHMETIC,
2040 "%col = OpCompositeConstruct %type_float_vec2 %arg1 %arg1\n"
2041 "%mat = OpCompositeConstruct %type_float_mat2x2 %col %col\n"
2042 "%result = OpExtInst %type_float %std450 Determinant %mat\n",
2043 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2044 mo[OID_MAT_INV] = Op("mat_inv", FLOAT_ARITHMETIC,
2045 "%col1 = OpCompositeConstruct %type_float_vec2 %arg1 %c_float_1\n"
2046 "%col2 = OpCompositeConstruct %type_float_vec2 %c_float_1 %c_float_1\n"
2047 "%mat = OpCompositeConstruct %type_float_mat2x2 %col1 %col2\n"
2048 "%invMat = OpExtInst %type_float_mat2x2 %std450 MatrixInverse %mat\n"
2049 "%extCol = OpCompositeExtract %type_float_vec2 %invMat 1\n"
2050 "%result = OpCompositeExtract %type_float %extCol 1\n",
2051 B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2052
2053 // PackHalf2x16 is a special case as it operates on fp32 vec2 and returns unsigned int,
2054 // the verification is done in SPIR-V code (if result is correct 1.0 will be written to SSBO)
2055 mo[OID_PH_DENORM] = Op("ph_denorm", FLOAT_STORAGE_ONLY,
2056 "",
2057 "",
2058 "%c_fp32_denorm_fp16 = OpConstant %type_f32 6.01e-5\n" // fp32 representation of fp16 denorm value
2059 "%c_ref = OpConstant %type_u32 66061296\n",
2060 "",
2061 "",
2062 "%srcVec = OpCompositeConstruct %type_f32_vec2 %c_fp32_denorm_fp16 %c_fp32_denorm_fp16\n"
2063 "%packedInt = OpExtInst %type_u32 %std450 PackHalf2x16 %srcVec\n"
2064 "%boolVal = OpIEqual %type_bool %c_ref %packedInt\n"
2065 "%result = OpSelect %type_f32 %boolVal %c_f32_1 %c_f32_0\n",
2066 B_STATEMENT_USAGE_CONSTS_TYPE_FP32 | B_STATEMENT_USAGE_COMMANDS_CONST_FP32 | B_STATEMENT_USAGE_COMMANDS_TYPE_FP32);
2067
2068 // UnpackHalf2x16 is a special case that operates on uint32 and returns two 32-bit floats,
2069 // this function is tested using constants
2070 mo[OID_UPH_DENORM] = Op("uph_denorm", FLOAT_STORAGE_ONLY,
2071 "",
2072 "",
2073 "%c_u32_2_16_pack = OpConstant %type_u32 66061296\n", // == packHalf2x16(vec2(denorm))
2074 "",
2075 "",
2076 "%tmpVec = OpExtInst %type_f32_vec2 %std450 UnpackHalf2x16 %c_u32_2_16_pack\n"
2077 "%result = OpCompositeExtract %type_f32 %tmpVec 0\n",
2078 B_STATEMENT_USAGE_COMMANDS_TYPE_FP32);
2079
2080 // PackDouble2x32 is a special case that operates on two uint32 and returns
2081 // double, this function is tested using constants
2082 mo[OID_PD_DENORM] = Op("pd_denorm", FLOAT_STORAGE_ONLY,
2083 "",
2084 "",
2085 "%c_p1 = OpConstant %type_u32 0\n"
2086 "%c_p2 = OpConstant %type_u32 262144\n", // == UnpackDouble2x32(denorm)
2087 "",
2088 "",
2089 "%srcVec = OpCompositeConstruct %type_u32_vec2 %c_p1 %c_p2\n"
2090 "%result = OpExtInst %type_f64 %std450 PackDouble2x32 %srcVec\n",
2091 B_STATEMENT_USAGE_COMMANDS_TYPE_FP64);
2092
2093 // UnpackDouble2x32 is a special case as it operates only on FP64 and returns two ints,
2094 // the verification is done in SPIR-V code (if result is correct 1.0 will be written to SSBO)
2095 const char* unpackDouble2x32Types = "%type_bool_vec2 = OpTypeVector %type_bool 2\n";
2096 const char* unpackDouble2x32Source = "%refVec2 = OpCompositeConstruct %type_u32_vec2 %c_p1 %c_p2\n"
2097 "%resVec2 = OpExtInst %type_u32_vec2 %std450 UnpackDouble2x32 %arg1\n"
2098 "%boolVec2 = OpIEqual %type_bool_vec2 %refVec2 %resVec2\n"
2099 "%boolVal = OpAll %type_bool %boolVec2\n"
2100 "%result = OpSelect %type_f64 %boolVal %c_f64_1 %c_f64_0\n";
2101 mo[OID_UPD_DENORM_FLUSH] = Op("upd_denorm", FLOAT_STORAGE_ONLY, "",
2102 unpackDouble2x32Types,
2103 "%c_p1 = OpConstant %type_u32 0\n"
2104 "%c_p2 = OpConstant %type_u32 0\n",
2105 "",
2106 "",
2107 unpackDouble2x32Source,
2108 B_STATEMENT_USAGE_COMMANDS_CONST_FP64 | B_STATEMENT_USAGE_COMMANDS_TYPE_FP64);
2109 mo[OID_UPD_DENORM_PRESERVE] = Op("upd_denorm", FLOAT_STORAGE_ONLY, "",
2110 unpackDouble2x32Types,
2111 "%c_p1 = OpConstant %type_u32 1008\n"
2112 "%c_p2 = OpConstant %type_u32 0\n",
2113 "",
2114 "",
2115 unpackDouble2x32Source,
2116 B_STATEMENT_USAGE_COMMANDS_CONST_FP64 | B_STATEMENT_USAGE_COMMANDS_TYPE_FP64);
2117
2118 mo[OID_ORTE_ROUND] = Op("orte_round", FLOAT_STORAGE_ONLY, FP32,
2119 "OpDecorate %result FPRoundingMode RTE\n",
2120 "",
2121 "",
2122 "%result = OpFConvert %type_f16 %arg1\n",
2123 B_STATEMENT_USAGE_COMMANDS_TYPE_FP16);
2124 mo[OID_ORTZ_ROUND] = Op("ortz_round", FLOAT_STORAGE_ONLY, FP32,
2125 "OpDecorate %result FPRoundingMode RTZ\n",
2126 "",
2127 "",
2128 "%result = OpFConvert %type_f16 %arg1\n",
2129 B_STATEMENT_USAGE_COMMANDS_TYPE_FP16);
2130 }
2131
build(vector<OperationTestCase> & testCases,TypeTestResultsSP typeTestResults,bool argumentsFromInput)2132 void TestCasesBuilder::build(vector<OperationTestCase>& testCases, TypeTestResultsSP typeTestResults, bool argumentsFromInput)
2133 {
2134 // this method constructs a list of test cases; this list is a bit different
2135 // for every combination of float type, arguments preparation method and tested float control
2136
2137 testCases.reserve(750);
2138
2139 bool isFP16 = typeTestResults->floatType() == FP16;
2140
2141 // Denorm - FlushToZero - binary operations
2142 for (size_t i = 0 ; i < typeTestResults->binaryOpFTZ.size() ; ++i)
2143 {
2144 const BinaryCase& binaryCase = typeTestResults->binaryOpFTZ[i];
2145 OperationId operation = binaryCase.operationId;
2146 testCases.push_back(OTC("denorm_op_var_flush_to_zero", B_DENORM_FLUSH, operation, V_DENORM, V_ONE, binaryCase.opVarResult));
2147 testCases.push_back(OTC("denorm_op_denorm_flush_to_zero", B_DENORM_FLUSH, operation, V_DENORM, V_DENORM, binaryCase.opDenormResult));
2148 testCases.push_back(OTC("denorm_op_inf_flush_to_zero", B_DENORM_FLUSH | B_ZIN_PRESERVE, operation, V_DENORM, V_INF, binaryCase.opInfResult));
2149 testCases.push_back(OTC("denorm_op_nan_flush_to_zero", B_DENORM_FLUSH | B_ZIN_PRESERVE, operation, V_DENORM, V_NAN, binaryCase.opNanResult));
2150
2151 if (isFP16)
2152 {
2153 testCases.push_back(OTC("denorm_op_var_flush_to_zero_nostorage", B_DENORM_FLUSH, operation, V_DENORM, V_ONE, binaryCase.opVarResult, true));
2154 testCases.push_back(OTC("denorm_op_denorm_flush_to_zero_nostorage", B_DENORM_FLUSH, operation, V_DENORM, V_DENORM, binaryCase.opDenormResult, true));
2155 testCases.push_back(OTC("denorm_op_inf_flush_to_zero_nostorage", B_DENORM_FLUSH | B_ZIN_PRESERVE, operation, V_DENORM, V_INF, binaryCase.opInfResult, true));
2156 testCases.push_back(OTC("denorm_op_nan_flush_to_zero_nostorage", B_DENORM_FLUSH | B_ZIN_PRESERVE, operation, V_DENORM, V_NAN, binaryCase.opNanResult, true));
2157 }
2158 }
2159
2160 // Denorm - FlushToZero - unary operations
2161 for (size_t i = 0 ; i < typeTestResults->unaryOpFTZ.size() ; ++i)
2162 {
2163 const UnaryCase& unaryCase = typeTestResults->unaryOpFTZ[i];
2164 OperationId operation = unaryCase.operationId;
2165 testCases.push_back(OTC("op_denorm_flush_to_zero", B_DENORM_FLUSH, operation, V_DENORM, V_UNUSED, unaryCase.result));
2166 if (isFP16)
2167 testCases.push_back(OTC("op_denorm_flush_to_zero_nostorage", B_DENORM_FLUSH, operation, V_DENORM, V_UNUSED, unaryCase.result, true));
2168
2169 }
2170
2171 // Denom - Preserve - binary operations
2172 for (size_t i = 0 ; i < typeTestResults->binaryOpDenormPreserve.size() ; ++i)
2173 {
2174 const BinaryCase& binaryCase = typeTestResults->binaryOpDenormPreserve[i];
2175 OperationId operation = binaryCase.operationId;
2176 testCases.push_back(OTC("denorm_op_var_preserve", B_DENORM_PRESERVE, operation, V_DENORM, V_ONE, binaryCase.opVarResult));
2177 testCases.push_back(OTC("denorm_op_denorm_preserve", B_DENORM_PRESERVE, operation, V_DENORM, V_DENORM, binaryCase.opDenormResult));
2178 testCases.push_back(OTC("denorm_op_inf_preserve", B_DENORM_PRESERVE | B_ZIN_PRESERVE, operation, V_DENORM, V_INF, binaryCase.opInfResult));
2179 testCases.push_back(OTC("denorm_op_nan_preserve", B_DENORM_PRESERVE | B_ZIN_PRESERVE, operation, V_DENORM, V_NAN, binaryCase.opNanResult));
2180
2181 if (isFP16)
2182 {
2183 testCases.push_back(OTC("denorm_op_var_preserve_nostorage", B_DENORM_PRESERVE, operation, V_DENORM, V_ONE, binaryCase.opVarResult, true));
2184 testCases.push_back(OTC("denorm_op_denorm_preserve_nostorage", B_DENORM_PRESERVE, operation, V_DENORM, V_DENORM, binaryCase.opDenormResult, true));
2185 testCases.push_back(OTC("denorm_op_inf_preserve_nostorage", B_DENORM_PRESERVE | B_ZIN_PRESERVE, operation, V_DENORM, V_INF, binaryCase.opInfResult, true));
2186 testCases.push_back(OTC("denorm_op_nan_preserve_nostorage", B_DENORM_PRESERVE | B_ZIN_PRESERVE, operation, V_DENORM, V_NAN, binaryCase.opNanResult, true));
2187 }
2188 }
2189
2190 // Denom - Preserve - unary operations
2191 for (size_t i = 0 ; i < typeTestResults->unaryOpDenormPreserve.size() ; ++i)
2192 {
2193 const UnaryCase& unaryCase = typeTestResults->unaryOpDenormPreserve[i];
2194 OperationId operation = unaryCase.operationId;
2195 testCases.push_back(OTC("op_denorm_preserve", B_DENORM_PRESERVE, operation, V_DENORM, V_UNUSED, unaryCase.result));
2196 if (isFP16)
2197 testCases.push_back(OTC("op_denorm_preserve_nostorage", B_DENORM_PRESERVE, operation, V_DENORM, V_UNUSED, unaryCase.result, true));
2198 }
2199
2200 struct ZINCase
2201 {
2202 OperationId operationId;
2203 bool supportedByFP64;
2204 ValueId secondArgument;
2205 ValueId preserveZeroResult;
2206 ValueId preserveSZeroResult;
2207 ValueId preserveInfResult;
2208 ValueId preserveSInfResult;
2209 ValueId preserveNanResult;
2210 };
2211
2212 const ZINCase binaryOpZINPreserve[] = {
2213 // operation fp64 second arg preserve zero preserve szero preserve inf preserve sinf preserve nan
2214 { OID_PHI, true, V_INF, V_ZERO, V_MINUS_ZERO, V_INF, V_MINUS_INF, V_NAN },
2215 { OID_SELECT, true, V_ONE, V_ZERO, V_MINUS_ZERO, V_INF, V_MINUS_INF, V_NAN },
2216 { OID_ADD, true, V_ZERO, V_ZERO, V_ZERO, V_INF, V_MINUS_INF, V_NAN },
2217 { OID_SUB, true, V_ZERO, V_ZERO, V_MINUS_ZERO, V_INF, V_MINUS_INF, V_NAN },
2218 { OID_MUL, true, V_ONE, V_ZERO, V_MINUS_ZERO, V_INF, V_MINUS_INF, V_NAN },
2219 };
2220
2221 const ZINCase unaryOpZINPreserve[] = {
2222 // operation fp64 second arg preserve zero preserve szero preserve inf preserve sinf preserve nan
2223 { OID_RETURN_VAL, true, V_UNUSED, V_ZERO, V_MINUS_ZERO, V_INF, V_MINUS_INF, V_NAN },
2224 { OID_D_EXTRACT, true, V_UNUSED, V_ZERO, V_MINUS_ZERO, V_INF, V_MINUS_INF, V_NAN },
2225 { OID_D_INSERT, true, V_UNUSED, V_ZERO, V_MINUS_ZERO, V_INF, V_MINUS_INF, V_NAN },
2226 { OID_SHUFFLE, true, V_UNUSED, V_ZERO, V_MINUS_ZERO, V_INF, V_MINUS_INF, V_NAN },
2227 { OID_COMPOSITE, true, V_UNUSED, V_ZERO, V_MINUS_ZERO, V_INF, V_MINUS_INF, V_NAN },
2228 { OID_COMPOSITE_INS, true, V_UNUSED, V_ZERO, V_MINUS_ZERO, V_INF, V_MINUS_INF, V_NAN },
2229 { OID_COPY, true, V_UNUSED, V_ZERO, V_MINUS_ZERO, V_INF, V_MINUS_INF, V_NAN },
2230 { OID_TRANSPOSE, true, V_UNUSED, V_ZERO, V_MINUS_ZERO, V_INF, V_MINUS_INF, V_NAN },
2231 { OID_NEGATE, true, V_UNUSED, V_MINUS_ZERO, V_ZERO, V_MINUS_INF, V_INF, V_NAN },
2232 };
2233
2234 bool isFP64 = typeTestResults->floatType() == FP64;
2235
2236 // Signed Zero Inf Nan - Preserve - binary operations
2237 for (size_t i = 0 ; i < DE_LENGTH_OF_ARRAY(binaryOpZINPreserve) ; ++i)
2238 {
2239 const ZINCase& zc = binaryOpZINPreserve[i];
2240 if (isFP64 && !zc.supportedByFP64)
2241 continue;
2242
2243 testCases.push_back(OTC("zero_op_var_preserve", B_ZIN_PRESERVE, zc.operationId, V_ZERO, zc.secondArgument, zc.preserveZeroResult));
2244 testCases.push_back(OTC("signed_zero_op_var_preserve", B_ZIN_PRESERVE, zc.operationId, V_MINUS_ZERO, zc.secondArgument, zc.preserveSZeroResult));
2245 testCases.push_back(OTC("inf_op_var_preserve", B_ZIN_PRESERVE, zc.operationId, V_INF, zc.secondArgument, zc.preserveInfResult));
2246 testCases.push_back(OTC("signed_inf_op_var_preserve", B_ZIN_PRESERVE, zc.operationId, V_MINUS_INF, zc.secondArgument, zc.preserveSInfResult));
2247 testCases.push_back(OTC("nan_op_var_preserve", B_ZIN_PRESERVE, zc.operationId, V_NAN, zc.secondArgument, zc.preserveNanResult));
2248
2249 if (isFP16)
2250 {
2251 testCases.push_back(OTC("zero_op_var_preserve_nostorage", B_ZIN_PRESERVE, zc.operationId, V_ZERO, zc.secondArgument, zc.preserveZeroResult, true));
2252 testCases.push_back(OTC("signed_zero_op_var_preserve_nostorage", B_ZIN_PRESERVE, zc.operationId, V_MINUS_ZERO, zc.secondArgument, zc.preserveSZeroResult, true));
2253 testCases.push_back(OTC("inf_op_var_preserve_nostorage", B_ZIN_PRESERVE, zc.operationId, V_INF, zc.secondArgument, zc.preserveInfResult, true));
2254 testCases.push_back(OTC("signed_inf_op_var_preserve_nostorage", B_ZIN_PRESERVE, zc.operationId, V_MINUS_INF, zc.secondArgument, zc.preserveSInfResult, true));
2255 testCases.push_back(OTC("nan_op_var_preserve_nostorage", B_ZIN_PRESERVE, zc.operationId, V_NAN, zc.secondArgument, zc.preserveNanResult, true));
2256 }
2257 }
2258
2259 // Signed Zero Inf Nan - Preserve - unary operations
2260 for (size_t i = 0 ; i < DE_LENGTH_OF_ARRAY(unaryOpZINPreserve) ; ++i)
2261 {
2262 const ZINCase& zc = unaryOpZINPreserve[i];
2263 if (isFP64 && !zc.supportedByFP64)
2264 continue;
2265
2266 testCases.push_back(OTC("op_zero_preserve", B_ZIN_PRESERVE,zc.operationId, V_ZERO, V_UNUSED, zc.preserveZeroResult));
2267 testCases.push_back(OTC("op_signed_zero_preserve", B_ZIN_PRESERVE,zc.operationId, V_MINUS_ZERO, V_UNUSED, zc.preserveSZeroResult));
2268 testCases.push_back(OTC("op_inf_preserve", B_ZIN_PRESERVE,zc.operationId, V_INF, V_UNUSED, zc.preserveInfResult));
2269 testCases.push_back(OTC("op_signed_inf_preserve", B_ZIN_PRESERVE,zc.operationId, V_MINUS_INF, V_UNUSED, zc.preserveSInfResult));
2270 testCases.push_back(OTC("op_nan_preserve", B_ZIN_PRESERVE,zc.operationId, V_NAN, V_UNUSED, zc.preserveNanResult));
2271
2272 if (isFP16)
2273 {
2274 testCases.push_back(OTC("op_zero_preserve_nostorage", B_ZIN_PRESERVE,zc.operationId, V_ZERO, V_UNUSED, zc.preserveZeroResult, true));
2275 testCases.push_back(OTC("op_signed_zero_preserve_nostorage", B_ZIN_PRESERVE,zc.operationId, V_MINUS_ZERO, V_UNUSED, zc.preserveSZeroResult, true));
2276 testCases.push_back(OTC("op_inf_preserve_nostorage", B_ZIN_PRESERVE,zc.operationId, V_INF, V_UNUSED, zc.preserveInfResult, true));
2277 testCases.push_back(OTC("op_signed_inf_preserve_nostorage", B_ZIN_PRESERVE,zc.operationId, V_MINUS_INF, V_UNUSED, zc.preserveSInfResult, true));
2278 testCases.push_back(OTC("op_nan_preserve_nostorage", B_ZIN_PRESERVE,zc.operationId, V_NAN, V_UNUSED, zc.preserveNanResult, true));
2279 }
2280 }
2281
2282 // comparison operations - tested differently because they return true/false
2283 struct ComparisonCase
2284 {
2285 OperationId operationId;
2286 ValueId denormPreserveResult;
2287 };
2288 const ComparisonCase comparisonCases[] =
2289 {
2290 // operation denorm
2291 { OID_ORD_EQ, V_ZERO },
2292 { OID_UORD_EQ, V_ZERO },
2293 { OID_ORD_NEQ, V_ONE },
2294 { OID_UORD_NEQ, V_ONE },
2295 { OID_ORD_LS, V_ONE },
2296 { OID_UORD_LS, V_ONE },
2297 { OID_ORD_GT, V_ZERO },
2298 { OID_UORD_GT, V_ZERO },
2299 { OID_ORD_LE, V_ONE },
2300 { OID_UORD_LE, V_ONE },
2301 { OID_ORD_GE, V_ZERO },
2302 { OID_UORD_GE, V_ZERO }
2303 };
2304 for (int op = 0 ; op < DE_LENGTH_OF_ARRAY(comparisonCases) ; ++op)
2305 {
2306 const ComparisonCase& cc = comparisonCases[op];
2307 testCases.push_back(OTC("denorm_op_var_preserve", B_DENORM_PRESERVE, cc.operationId, V_DENORM, V_ONE, cc.denormPreserveResult));
2308 if (isFP16)
2309 testCases.push_back(OTC("denorm_op_var_preserve_nostorage", B_DENORM_PRESERVE, cc.operationId, V_DENORM, V_ONE, cc.denormPreserveResult, true));
2310 }
2311
2312 if (argumentsFromInput)
2313 {
2314 struct RoundingModeCase
2315 {
2316 OperationId operationId;
2317 ValueId arg1;
2318 ValueId arg2;
2319 ValueId expectedRTEResult;
2320 ValueId expectedRTZResult;
2321 };
2322
2323 const RoundingModeCase roundingCases[] =
2324 {
2325 { OID_ADD, V_ADD_ARG_A, V_ADD_ARG_B, V_ADD_RTE_RESULT, V_ADD_RTZ_RESULT },
2326 { OID_SUB, V_SUB_ARG_A, V_SUB_ARG_B, V_SUB_RTE_RESULT, V_SUB_RTZ_RESULT },
2327 { OID_MUL, V_MUL_ARG_A, V_MUL_ARG_B, V_MUL_RTE_RESULT, V_MUL_RTZ_RESULT },
2328 { OID_DOT, V_DOT_ARG_A, V_DOT_ARG_B, V_DOT_RTE_RESULT, V_DOT_RTZ_RESULT },
2329
2330 // in vect/mat multiplication by scalar operations only first element of result is checked
2331 // so argument and result values prepared for multiplication can be reused for those cases
2332 { OID_VEC_MUL_S, V_MUL_ARG_A, V_MUL_ARG_B, V_MUL_RTE_RESULT, V_MUL_RTZ_RESULT },
2333 { OID_MAT_MUL_S, V_MUL_ARG_A, V_MUL_ARG_B, V_MUL_RTE_RESULT, V_MUL_RTZ_RESULT },
2334 { OID_OUT_PROD, V_MUL_ARG_A, V_MUL_ARG_B, V_MUL_RTE_RESULT, V_MUL_RTZ_RESULT },
2335
2336 // in SPIR-V code we return first element of operation result so for following
2337 // cases argument and result values prepared for dot product can be reused
2338 { OID_VEC_MUL_M, V_DOT_ARG_A, V_DOT_ARG_B, V_DOT_RTE_RESULT, V_DOT_RTZ_RESULT },
2339 { OID_MAT_MUL_V, V_DOT_ARG_A, V_DOT_ARG_B, V_DOT_RTE_RESULT, V_DOT_RTZ_RESULT },
2340 { OID_MAT_MUL_M, V_DOT_ARG_A, V_DOT_ARG_B, V_DOT_RTE_RESULT, V_DOT_RTZ_RESULT },
2341
2342 // conversion operations are added separately - depending on float type width
2343 };
2344
2345 for (int c = 0 ; c < DE_LENGTH_OF_ARRAY(roundingCases) ; ++c)
2346 {
2347 const RoundingModeCase& rmc = roundingCases[c];
2348 testCases.push_back(OTC("rounding_rte_op", B_RTE_ROUNDING, rmc.operationId, rmc.arg1, rmc.arg2, rmc.expectedRTEResult));
2349 testCases.push_back(OTC("rounding_rtz_op", B_RTZ_ROUNDING, rmc.operationId, rmc.arg1, rmc.arg2, rmc.expectedRTZResult));
2350 if (isFP16)
2351 {
2352 testCases.push_back(OTC("rounding_rte_op_nostorage", B_RTE_ROUNDING, rmc.operationId, rmc.arg1, rmc.arg2, rmc.expectedRTEResult, true));
2353 testCases.push_back(OTC("rounding_rtz_op_nostorage", B_RTZ_ROUNDING, rmc.operationId, rmc.arg1, rmc.arg2, rmc.expectedRTZResult, true));
2354 }
2355 }
2356 }
2357
2358 // special cases
2359 if (typeTestResults->floatType() == FP16)
2360 {
2361 if (argumentsFromInput)
2362 {
2363 testCases.push_back(OTC("rounding_rte_conv_from_fp32", B_RTE_ROUNDING, OID_CONV_FROM_FP32, V_CONV_FROM_FP32_ARG, V_UNUSED, V_CONV_TO_FP16_RTE_RESULT));
2364 testCases.push_back(OTC("rounding_rtz_conv_from_fp32", B_RTZ_ROUNDING, OID_CONV_FROM_FP32, V_CONV_FROM_FP32_ARG, V_UNUSED, V_CONV_TO_FP16_RTZ_RESULT));
2365 testCases.push_back(OTC("rounding_rte_conv_from_fp64", B_RTE_ROUNDING, OID_CONV_FROM_FP64, V_CONV_FROM_FP64_ARG, V_UNUSED, V_CONV_TO_FP16_RTE_RESULT));
2366 testCases.push_back(OTC("rounding_rtz_conv_from_fp64", B_RTZ_ROUNDING, OID_CONV_FROM_FP64, V_CONV_FROM_FP64_ARG, V_UNUSED, V_CONV_TO_FP16_RTZ_RESULT));
2367
2368 testCases.push_back(OTC("rounding_rte_sconst_conv_from_fp32", B_RTE_ROUNDING, OID_SCONST_CONV_FROM_FP32_TO_FP16, V_UNUSED, V_UNUSED, V_CONV_TO_FP16_RTE_RESULT));
2369 testCases.push_back(OTC("rounding_rtz_sconst_conv_from_fp32", B_RTZ_ROUNDING, OID_SCONST_CONV_FROM_FP32_TO_FP16, V_UNUSED, V_UNUSED, V_CONV_TO_FP16_RTZ_RESULT));
2370 testCases.push_back(OTC("rounding_rte_sconst_conv_from_fp64", B_RTE_ROUNDING, OID_SCONST_CONV_FROM_FP64_TO_FP16, V_UNUSED, V_UNUSED, V_CONV_TO_FP16_RTE_RESULT));
2371 testCases.push_back(OTC("rounding_rtz_sconst_conv_from_fp64", B_RTZ_ROUNDING, OID_SCONST_CONV_FROM_FP64_TO_FP16, V_UNUSED, V_UNUSED, V_CONV_TO_FP16_RTZ_RESULT));
2372
2373 testCases.push_back(OTC("rounding_rte_conv_from_fp32_nostorage", B_RTE_ROUNDING, OID_CONV_FROM_FP32, V_CONV_FROM_FP32_ARG, V_UNUSED, V_CONV_TO_FP16_RTE_RESULT, true));
2374 testCases.push_back(OTC("rounding_rtz_conv_from_fp32_nostorage", B_RTZ_ROUNDING, OID_CONV_FROM_FP32, V_CONV_FROM_FP32_ARG, V_UNUSED, V_CONV_TO_FP16_RTZ_RESULT, true));
2375 testCases.push_back(OTC("rounding_rte_conv_from_fp64_nostorage", B_RTE_ROUNDING, OID_CONV_FROM_FP64, V_CONV_FROM_FP64_ARG, V_UNUSED, V_CONV_TO_FP16_RTE_RESULT, true));
2376 testCases.push_back(OTC("rounding_rtz_conv_from_fp64_nostorage", B_RTZ_ROUNDING, OID_CONV_FROM_FP64, V_CONV_FROM_FP64_ARG, V_UNUSED, V_CONV_TO_FP16_RTZ_RESULT, true));
2377
2378 testCases.push_back(OTC("rounding_rte_sconst_conv_from_fp32_nostorage", B_RTE_ROUNDING, OID_SCONST_CONV_FROM_FP32_TO_FP16, V_UNUSED, V_UNUSED, V_CONV_TO_FP16_RTE_RESULT, true));
2379 testCases.push_back(OTC("rounding_rtz_sconst_conv_from_fp32_nostorage", B_RTZ_ROUNDING, OID_SCONST_CONV_FROM_FP32_TO_FP16, V_UNUSED, V_UNUSED, V_CONV_TO_FP16_RTZ_RESULT, true));
2380 testCases.push_back(OTC("rounding_rte_sconst_conv_from_fp64_nostorage", B_RTE_ROUNDING, OID_SCONST_CONV_FROM_FP64_TO_FP16, V_UNUSED, V_UNUSED, V_CONV_TO_FP16_RTE_RESULT, true));
2381 testCases.push_back(OTC("rounding_rtz_sconst_conv_from_fp64_nostorage", B_RTZ_ROUNDING, OID_SCONST_CONV_FROM_FP64_TO_FP16, V_UNUSED, V_UNUSED, V_CONV_TO_FP16_RTZ_RESULT, true));
2382
2383 // verify that VkShaderFloatingPointRoundingModeKHR can be overridden for a given instruction by the FPRoundingMode decoration.
2384 // FPRoundingMode decoration requires VK_KHR_16bit_storage.
2385 testCases.push_back(OTC("rounding_rte_override", B_RTE_ROUNDING, OID_ORTZ_ROUND, V_CONV_FROM_FP32_ARG, V_UNUSED, V_CONV_TO_FP16_RTZ_RESULT));
2386 testCases.push_back(OTC("rounding_rtz_override", B_RTZ_ROUNDING, OID_ORTE_ROUND, V_CONV_FROM_FP32_ARG, V_UNUSED, V_CONV_TO_FP16_RTE_RESULT));
2387 }
2388
2389 createUnaryTestCases(testCases, OID_CONV_FROM_FP32, V_CONV_DENORM_SMALLER, V_ZERO);
2390 createUnaryTestCases(testCases, OID_CONV_FROM_FP64, V_CONV_DENORM_BIGGER, V_ZERO);
2391 createUnaryTestCases(testCases, OID_CONV_FROM_FP32, V_CONV_DENORM_SMALLER, V_ZERO, true);
2392 createUnaryTestCases(testCases, OID_CONV_FROM_FP64, V_CONV_DENORM_BIGGER, V_ZERO, true);
2393
2394 }
2395 else if (typeTestResults->floatType() == FP32)
2396 {
2397 if (argumentsFromInput)
2398 {
2399 // convert from fp64 to fp32
2400 testCases.push_back(OTC("rounding_rte_conv_from_fp64", B_RTE_ROUNDING, OID_CONV_FROM_FP64, V_CONV_FROM_FP64_ARG, V_UNUSED, V_CONV_TO_FP32_RTE_RESULT));
2401 testCases.push_back(OTC("rounding_rtz_conv_from_fp64", B_RTZ_ROUNDING, OID_CONV_FROM_FP64, V_CONV_FROM_FP64_ARG, V_UNUSED, V_CONV_TO_FP32_RTZ_RESULT));
2402
2403 testCases.push_back(OTC("rounding_rte_sconst_conv_from_fp64", B_RTE_ROUNDING, OID_SCONST_CONV_FROM_FP64_TO_FP32, V_UNUSED, V_UNUSED, V_CONV_TO_FP32_RTE_RESULT));
2404 testCases.push_back(OTC("rounding_rtz_sconst_conv_from_fp64", B_RTZ_ROUNDING, OID_SCONST_CONV_FROM_FP64_TO_FP32, V_UNUSED, V_UNUSED, V_CONV_TO_FP32_RTZ_RESULT));
2405 }
2406 else
2407 {
2408 // PackHalf2x16 - verification done in SPIR-V
2409 testCases.push_back(OTC("pack_half_denorm_preserve", B_DENORM_PRESERVE, OID_PH_DENORM, V_UNUSED, V_UNUSED, V_ONE));
2410
2411 // UnpackHalf2x16 - custom arguments defined as constants
2412 testCases.push_back(OTC("upack_half_denorm_flush_to_zero", B_DENORM_FLUSH, OID_UPH_DENORM, V_UNUSED, V_UNUSED, V_ZERO));
2413 testCases.push_back(OTC("upack_half_denorm_preserve", B_DENORM_PRESERVE, OID_UPH_DENORM, V_UNUSED, V_UNUSED, V_CONV_DENORM_SMALLER));
2414 }
2415
2416 createUnaryTestCases(testCases, OID_CONV_FROM_FP16, V_CONV_DENORM_SMALLER, V_ZERO_OR_FP16_DENORM_TO_FP32);
2417 createUnaryTestCases(testCases, OID_CONV_FROM_FP16, V_CONV_DENORM_SMALLER, V_ZERO_OR_FP16_DENORM_TO_FP32, true);
2418 createUnaryTestCases(testCases, OID_CONV_FROM_FP64, V_CONV_DENORM_BIGGER, V_ZERO);
2419 }
2420 else // FP64
2421 {
2422 if (!argumentsFromInput)
2423 {
2424 // PackDouble2x32 - custom arguments defined as constants
2425 testCases.push_back(OTC("pack_double_denorm_preserve", B_DENORM_PRESERVE, OID_PD_DENORM, V_UNUSED, V_UNUSED, V_DENORM));
2426
2427 // UnpackDouble2x32 - verification done in SPIR-V
2428 testCases.push_back(OTC("upack_double_denorm_flush_to_zero", B_DENORM_FLUSH, OID_UPD_DENORM_FLUSH, V_DENORM, V_UNUSED, V_ONE));
2429 testCases.push_back(OTC("upack_double_denorm_preserve", B_DENORM_PRESERVE, OID_UPD_DENORM_PRESERVE, V_DENORM, V_UNUSED, V_ONE));
2430 }
2431
2432 createUnaryTestCases(testCases, OID_CONV_FROM_FP16, V_CONV_DENORM_SMALLER, V_ZERO_OR_FP16_DENORM_TO_FP64);
2433 createUnaryTestCases(testCases, OID_CONV_FROM_FP16, V_CONV_DENORM_SMALLER, V_ZERO_OR_FP16_DENORM_TO_FP64, true);
2434 createUnaryTestCases(testCases, OID_CONV_FROM_FP32, V_CONV_DENORM_BIGGER, V_ZERO_OR_FP32_DENORM_TO_FP64);
2435 }
2436 }
2437
getOperation(OperationId id) const2438 const Operation& TestCasesBuilder::getOperation(OperationId id) const
2439 {
2440 return m_operations.at(id);
2441 }
2442
createUnaryTestCases(vector<OperationTestCase> & testCases,OperationId operationId,ValueId denormPreserveResult,ValueId denormFTZResult,bool fp16WithoutStorage) const2443 void TestCasesBuilder::createUnaryTestCases(vector<OperationTestCase>& testCases, OperationId operationId, ValueId denormPreserveResult, ValueId denormFTZResult, bool fp16WithoutStorage) const
2444 {
2445 if (fp16WithoutStorage)
2446 {
2447 // Denom - Preserve
2448 testCases.push_back(OTC("op_denorm_preserve_nostorage", B_DENORM_PRESERVE, operationId, V_DENORM, V_UNUSED, denormPreserveResult, true));
2449
2450 // Denorm - FlushToZero
2451 testCases.push_back(OTC("op_denorm_flush_to_zero_nostorage", B_DENORM_FLUSH, operationId, V_DENORM, V_UNUSED, denormFTZResult, true));
2452
2453 // Signed Zero Inf Nan - Preserve
2454 testCases.push_back(OTC("op_zero_preserve_nostorage", B_ZIN_PRESERVE, operationId, V_ZERO, V_UNUSED, V_ZERO, true));
2455 testCases.push_back(OTC("op_signed_zero_preserve_nostorage", B_ZIN_PRESERVE, operationId, V_MINUS_ZERO, V_UNUSED, V_MINUS_ZERO, true));
2456 testCases.push_back(OTC("op_inf_preserve_nostorage", B_ZIN_PRESERVE, operationId, V_INF, V_UNUSED, V_INF, true));
2457 testCases.push_back(OTC("op_nan_preserve_nostorage", B_ZIN_PRESERVE, operationId, V_NAN, V_UNUSED, V_NAN, true));
2458 }
2459 else
2460 {
2461 // Denom - Preserve
2462 testCases.push_back(OTC("op_denorm_preserve", B_DENORM_PRESERVE, operationId, V_DENORM, V_UNUSED, denormPreserveResult));
2463
2464 // Denorm - FlushToZero
2465 testCases.push_back(OTC("op_denorm_flush_to_zero", B_DENORM_FLUSH, operationId, V_DENORM, V_UNUSED, denormFTZResult));
2466
2467 // Signed Zero Inf Nan - Preserve
2468 testCases.push_back(OTC("op_zero_preserve", B_ZIN_PRESERVE, operationId, V_ZERO, V_UNUSED, V_ZERO));
2469 testCases.push_back(OTC("op_signed_zero_preserve", B_ZIN_PRESERVE, operationId, V_MINUS_ZERO, V_UNUSED, V_MINUS_ZERO));
2470 testCases.push_back(OTC("op_inf_preserve", B_ZIN_PRESERVE, operationId, V_INF, V_UNUSED, V_INF));
2471 testCases.push_back(OTC("op_nan_preserve", B_ZIN_PRESERVE, operationId, V_NAN, V_UNUSED, V_NAN));
2472 }
2473 }
2474
2475 template <typename TYPE, typename FLOAT_TYPE>
isZeroOrOtherValue(const TYPE & returnedFloat,ValueId secondAcceptableResult,TestLog & log)2476 bool isZeroOrOtherValue(const TYPE& returnedFloat, ValueId secondAcceptableResult, TestLog& log)
2477 {
2478 if (returnedFloat.isZero() && !returnedFloat.signBit())
2479 return true;
2480
2481 TypeValues<FLOAT_TYPE> typeValues;
2482 typedef typename TYPE::StorageType SType;
2483 typename RawConvert<FLOAT_TYPE, SType>::Value value;
2484 value.fp = typeValues.getValue(secondAcceptableResult);
2485
2486 if (returnedFloat.bits() == value.ui)
2487 return true;
2488
2489 log << TestLog::Message << "Expected 0 or " << toHex(value.ui)
2490 << " (" << value.fp << ")" << TestLog::EndMessage;
2491 return false;
2492 }
2493
2494 template <typename TYPE>
isAcosResultCorrect(const TYPE & returnedFloat,TestLog & log)2495 bool isAcosResultCorrect(const TYPE& returnedFloat, TestLog& log)
2496 {
2497 // pi/2 is result of acos(0) which in the specs is defined as equivalent to
2498 // atan2(sqrt(1.0 - x^2), x), where atan2 has 4096 ULP, sqrt is equivalent to
2499 // 1.0 /inversesqrt(), inversesqrt() is 2 ULP and rcp is another 2.5 ULP
2500
2501 double precision = 0;
2502 const double piDiv2 = 3.14159265358979323846 / 2;
2503 if (returnedFloat.MANTISSA_BITS == 23)
2504 {
2505 FloatFormat fp32Format(-126, 127, 23, true, tcu::MAYBE, tcu::YES, tcu::MAYBE);
2506 precision = fp32Format.ulp(piDiv2, 4096.0);
2507 }
2508 else
2509 {
2510 FloatFormat fp16Format(-14, 15, 10, true, tcu::MAYBE);
2511 precision = fp16Format.ulp(piDiv2, 5.0);
2512 }
2513
2514 if (deAbs(returnedFloat.asDouble() - piDiv2) < precision)
2515 return true;
2516
2517 log << TestLog::Message << "Expected result to be in range"
2518 << " (" << piDiv2 - precision << ", " << piDiv2 + precision << "), got "
2519 << returnedFloat.asDouble() << TestLog::EndMessage;
2520 return false;
2521 }
2522
2523 template <typename TYPE>
isCosResultCorrect(const TYPE & returnedFloat,TestLog & log)2524 bool isCosResultCorrect(const TYPE& returnedFloat, TestLog& log)
2525 {
2526 // for cos(x) with x between -pi and pi, the precision error is 2^-11 for fp32 and 2^-7 for fp16.
2527 double precision = returnedFloat.MANTISSA_BITS == 23 ? dePow(2, -11) : dePow(2, -7);
2528 const double expected = 1.0;
2529
2530 if (deAbs(returnedFloat.asDouble() - expected) < precision)
2531 return true;
2532
2533 log << TestLog::Message << "Expected result to be in range"
2534 << " (" << expected - precision << ", " << expected + precision << "), got "
2535 << returnedFloat.asDouble() << TestLog::EndMessage;
2536 return false;
2537 }
2538
2539 template <typename FLOAT_TYPE>
getFloatTypeAsDouble(FLOAT_TYPE param)2540 double getFloatTypeAsDouble(FLOAT_TYPE param)
2541 {
2542 return param;
2543 }
getFloatTypeAsDouble(deFloat16 param)2544 template<> double getFloatTypeAsDouble(deFloat16 param)
2545 {
2546 return deFloat16To64(param);
2547 }
2548
2549
getPrecisionAt(double value,float ulp,int mantissaBits)2550 double getPrecisionAt(double value, float ulp, int mantissaBits)
2551 {
2552 if (mantissaBits == 23)
2553 {
2554 FloatFormat fp32Format(-126, 127, 23, true, tcu::MAYBE, tcu::YES, tcu::MAYBE);
2555 return fp32Format.ulp(value, ulp);
2556 }
2557 else if (mantissaBits == 52)
2558 {
2559 FloatFormat fp32Format(-1022, 1023, 52, true, tcu::MAYBE, tcu::YES, tcu::MAYBE);
2560 return fp32Format.ulp(value, ulp);
2561 }
2562 else
2563 {
2564 DE_ASSERT(mantissaBits == 10);
2565 FloatFormat fp16Format(-14, 15, 10, true, tcu::MAYBE);
2566 return fp16Format.ulp(value, ulp);
2567 }
2568 }
2569
2570 template <typename TYPE, typename FLOAT_TYPE, typename REF_FUNCTION>
isLogResultCorrect(const TYPE & returnedFloat,FLOAT_TYPE param,REF_FUNCTION refFunction,TestLog & log)2571 bool isLogResultCorrect(const TYPE& returnedFloat, FLOAT_TYPE param, REF_FUNCTION refFunction, TestLog& log)
2572 {
2573 if (returnedFloat.isInf() && returnedFloat.signBit())
2574 return true;
2575
2576 const double expected = refFunction(getFloatTypeAsDouble(param));
2577 const double precision = getPrecisionAt(expected, 3.0, returnedFloat.MANTISSA_BITS);
2578
2579 if (deAbs(returnedFloat.asDouble() - expected) < precision)
2580 return true;
2581
2582 log << TestLog::Message << "Expected result to be -INF or in range"
2583 << " (" << expected - precision << ", " << expected + precision << "), got "
2584 << returnedFloat.asDouble() << TestLog::EndMessage;
2585 return false;
2586 }
2587
2588 template <typename TYPE, typename FLOAT_TYPE>
isInverseSqrtResultCorrect(const TYPE & returnedFloat,FLOAT_TYPE param,TestLog & log)2589 bool isInverseSqrtResultCorrect(const TYPE& returnedFloat, FLOAT_TYPE param, TestLog& log)
2590 {
2591 if (returnedFloat.isInf() && !returnedFloat.signBit())
2592 return true;
2593
2594 const double expected = 1.0/ deSqrt(getFloatTypeAsDouble(param));
2595 const double precision = getPrecisionAt(expected, 2.0, returnedFloat.MANTISSA_BITS);
2596
2597 if (deAbs(returnedFloat.asDouble() - expected) < precision)
2598 return true;
2599
2600 log << TestLog::Message << "Expected result to be INF or in range"
2601 << " (" << expected - precision << ", " << expected + precision << "), got "
2602 << returnedFloat.asDouble() << TestLog::EndMessage;
2603 return false;
2604 }
2605
2606 template <typename TYPE, typename FLOAT_TYPE>
isSqrtResultCorrect(const TYPE & returnedFloat,FLOAT_TYPE param,TestLog & log)2607 bool isSqrtResultCorrect(const TYPE& returnedFloat, FLOAT_TYPE param, TestLog& log)
2608 {
2609 if (returnedFloat.isZero() && !returnedFloat.signBit())
2610 return true;
2611
2612
2613 const double expected = deSqrt(getFloatTypeAsDouble(param));
2614 const double expectedInverseSqrt = 1.0 / expected;
2615 const double inverseSqrtPrecision = getPrecisionAt(expectedInverseSqrt, 2.0, returnedFloat.MANTISSA_BITS);
2616
2617 double expectedMin = deMin(1.0 / (expectedInverseSqrt - inverseSqrtPrecision), 1.0 / (expectedInverseSqrt + inverseSqrtPrecision));
2618 double expectedMax = deMax(1.0 / (expectedInverseSqrt - inverseSqrtPrecision), 1.0 / (expectedInverseSqrt + inverseSqrtPrecision));
2619
2620 expectedMin -= getPrecisionAt(expectedMin, 2.5, returnedFloat.MANTISSA_BITS);
2621 expectedMax += getPrecisionAt(expectedMax, 2.5, returnedFloat.MANTISSA_BITS);
2622
2623 if (returnedFloat.asDouble() >= expectedMin && returnedFloat.asDouble() <= expectedMax)
2624 return true;
2625
2626 log << TestLog::Message << "Expected result to be +0 or in range"
2627 << " (" << expectedMin << ", " << expectedMax << "), got "
2628 << returnedFloat.asDouble() << TestLog::EndMessage;
2629 return false;
2630 }
2631
2632 // Function used to compare test result with expected output.
2633 // TYPE can be Float16, Float32 or Float64.
2634 // FLOAT_TYPE can be deFloat16, float, double.
2635 template <typename TYPE, typename FLOAT_TYPE>
compareBytes(vector<deUint8> & expectedBytes,AllocationSp outputAlloc,TestLog & log)2636 bool compareBytes(vector<deUint8>& expectedBytes, AllocationSp outputAlloc, TestLog& log)
2637 {
2638 const TYPE* returned = static_cast<const TYPE*>(outputAlloc->getHostPtr());
2639 const TYPE* fValueId = reinterpret_cast<const TYPE*>(&expectedBytes.front());
2640
2641 // all test return single value
2642 // Fp16 nostorage tests get their values from a deUint32 value, but we create the
2643 // buffer with the same size for both cases: 4 bytes.
2644 if (sizeof(TYPE) == 2u)
2645 DE_ASSERT((expectedBytes.size() / sizeof(TYPE)) == 2);
2646 else
2647 DE_ASSERT((expectedBytes.size() / sizeof(TYPE)) == 1);
2648
2649 // during test setup we do not store expected value but id that can be used to
2650 // retrieve actual value - this is done to handle special cases like multiple
2651 // allowed results or epsilon checks for some cases
2652 // note that this is workaround - this should be done by changing
2653 // ComputerShaderCase and GraphicsShaderCase so that additional arguments can
2654 // be passed to this verification callback
2655 typedef typename TYPE::StorageType SType;
2656 SType expectedInt = fValueId[0].bits();
2657 ValueId expectedValueId = static_cast<ValueId>(expectedInt);
2658
2659 // something went wrong, expected value cant be V_UNUSED,
2660 // if this is the case then test shouldn't be created at all
2661 DE_ASSERT(expectedValueId != V_UNUSED);
2662
2663 TYPE returnedFloat = returned[0];
2664
2665 log << TestLog::Message << "Calculated result: " << toHex(returnedFloat.bits())
2666 << " (" << returnedFloat.asFloat() << ")" << TestLog::EndMessage;
2667
2668 if (expectedValueId == V_NAN)
2669 {
2670 if (returnedFloat.isNaN())
2671 return true;
2672
2673 log << TestLog::Message << "Expected NaN" << TestLog::EndMessage;
2674 return false;
2675 }
2676
2677 if (expectedValueId == V_DENORM)
2678 {
2679 if (returnedFloat.isDenorm())
2680 return true;
2681
2682 log << TestLog::Message << "Expected Denorm" << TestLog::EndMessage;
2683 return false;
2684 }
2685
2686 // handle multiple acceptable results cases
2687 if (expectedValueId == V_ZERO_OR_MINUS_ZERO)
2688 {
2689 if (returnedFloat.isZero())
2690 return true;
2691
2692 log << TestLog::Message << "Expected 0 or -0" << TestLog::EndMessage;
2693 return false;
2694 }
2695 if (expectedValueId == V_ZERO_OR_ONE)
2696 return isZeroOrOtherValue<TYPE, FLOAT_TYPE>(returnedFloat, V_ONE, log);
2697 if ((expectedValueId == V_ZERO_OR_FP16_DENORM_TO_FP32) || (expectedValueId == V_ZERO_OR_FP16_DENORM_TO_FP64))
2698 return isZeroOrOtherValue<TYPE, FLOAT_TYPE>(returnedFloat, V_CONV_DENORM_SMALLER, log);
2699 if (expectedValueId == V_ZERO_OR_FP32_DENORM_TO_FP64)
2700 return isZeroOrOtherValue<TYPE, FLOAT_TYPE>(returnedFloat, V_CONV_DENORM_BIGGER, log);
2701 if (expectedValueId == V_ZERO_OR_DENORM_TIMES_TWO)
2702 {
2703 // this expected value is only needed for fp16
2704 DE_ASSERT(returnedFloat.EXPONENT_BIAS == 15);
2705 return isZeroOrOtherValue<TYPE, FLOAT_TYPE>(returnedFloat, V_DENORM_TIMES_TWO, log);
2706 }
2707 if (expectedValueId == V_MINUS_ONE_OR_CLOSE)
2708 {
2709 // this expected value is only needed for fp16
2710 DE_ASSERT(returnedFloat.EXPONENT_BIAS == 15);
2711 typename TYPE::StorageType returnedValue = returnedFloat.bits();
2712 return (returnedValue == 0xbc00) || (returnedValue == 0xbbff);
2713 }
2714
2715 // handle trigonometric operations precision errors
2716 if (expectedValueId == V_TRIG_ONE)
2717 return isCosResultCorrect<TYPE>(returnedFloat, log);
2718
2719 // handle acos(0) case
2720 if (expectedValueId == V_PI_DIV_2)
2721 return isAcosResultCorrect<TYPE>(returnedFloat, log);
2722
2723 TypeValues<FLOAT_TYPE> typeValues;
2724
2725 if (expectedValueId == V_MINUS_INF_OR_LOG_DENORM)
2726 return isLogResultCorrect<TYPE>(returnedFloat, typeValues.getValue(V_DENORM), deLog, log);
2727
2728 if (expectedValueId == V_MINUS_INF_OR_LOG2_DENORM)
2729 return isLogResultCorrect<TYPE>(returnedFloat, typeValues.getValue(V_DENORM), deLog2, log);
2730
2731 if (expectedValueId == V_ZERO_OR_SQRT_DENORM)
2732 return isSqrtResultCorrect<TYPE>(returnedFloat, typeValues.getValue(V_DENORM), log);
2733
2734 if (expectedValueId == V_INF_OR_INV_SQRT_DENORM)
2735 return isInverseSqrtResultCorrect<TYPE>(returnedFloat, typeValues.getValue(V_DENORM), log);
2736
2737
2738 typename RawConvert<FLOAT_TYPE, SType>::Value value;
2739 value.fp = typeValues.getValue(expectedValueId);
2740
2741 if (returnedFloat.bits() == value.ui)
2742 return true;
2743
2744 log << TestLog::Message << "Expected " << toHex(value.ui)
2745 << " (" << value.fp << ")" << TestLog::EndMessage;
2746 return false;
2747 }
2748
2749 template <typename TYPE, typename FLOAT_TYPE>
checkFloats(const vector<Resource> &,const vector<AllocationSp> & outputAllocs,const vector<Resource> & expectedOutputs,TestLog & log)2750 bool checkFloats (const vector<Resource>& ,
2751 const vector<AllocationSp>& outputAllocs,
2752 const vector<Resource>& expectedOutputs,
2753 TestLog& log)
2754 {
2755 if (outputAllocs.size() != expectedOutputs.size())
2756 return false;
2757
2758 for (deUint32 outputNdx = 0; outputNdx < outputAllocs.size(); ++outputNdx)
2759 {
2760 vector<deUint8> expectedBytes;
2761 expectedOutputs[outputNdx].getBytes(expectedBytes);
2762
2763 if (!compareBytes<TYPE, FLOAT_TYPE>(expectedBytes, outputAllocs[outputNdx], log))
2764 return false;
2765 }
2766
2767 return true;
2768 }
2769
checkMixedFloats(const vector<Resource> &,const vector<AllocationSp> & outputAllocs,const vector<Resource> & expectedOutputs,TestLog & log)2770 bool checkMixedFloats (const vector<Resource>& ,
2771 const vector<AllocationSp>& outputAllocs,
2772 const vector<Resource>& expectedOutputs,
2773 TestLog& log)
2774 {
2775 // this function validates buffers containing floats of diferent widths, order is not important
2776
2777 if (outputAllocs.size() != expectedOutputs.size())
2778 return false;
2779
2780 // The comparison function depends on the data type stored in the resource.
2781 using compareFun = bool (*)(vector<deUint8>& expectedBytes, AllocationSp outputAlloc, TestLog& log);
2782 const map<BufferDataType, compareFun> compareMap =
2783 {
2784 { BufferDataType::DATA_FP16, compareBytes<Float16, deFloat16> },
2785 { BufferDataType::DATA_FP32, compareBytes<Float32, float> },
2786 { BufferDataType::DATA_FP64, compareBytes<Float64, double>},
2787 };
2788
2789 vector<deUint8> expectedBytes;
2790 bool allResultsAreCorrect = true;
2791 int resultIndex = static_cast<int>(outputAllocs.size());
2792
2793 while (resultIndex--)
2794 {
2795 expectedOutputs[resultIndex].getBytes(expectedBytes);
2796 BufferDataType type = static_cast<BufferDataType>(reinterpret_cast<std::uintptr_t>(expectedOutputs[resultIndex].getUserData()));
2797 allResultsAreCorrect &= compareMap.at(type)(expectedBytes, outputAllocs[resultIndex], log);
2798 }
2799
2800 return allResultsAreCorrect;
2801 }
2802
2803 // Base class for ComputeTestGroupBuilder and GrephicstestGroupBuilder classes.
2804 // It contains all functionalities that are used by both child classes.
2805 class TestGroupBuilderBase
2806 {
2807 public:
2808
2809 TestGroupBuilderBase();
2810 virtual ~TestGroupBuilderBase() = default;
2811
2812 virtual void createOperationTests(TestCaseGroup* parentGroup,
2813 const char* groupName,
2814 FloatType floatType,
2815 bool argumentsFromInput) = 0;
2816
2817 virtual void createSettingsTests(TestCaseGroup* parentGroup) = 0;
2818
2819 protected:
2820
2821 typedef vector<OperationTestCase> TestCaseVect;
2822
2823 // Structure containing all data required to create single operation test.
2824 struct OperationTestCaseInfo
2825 {
2826 FloatType outFloatType;
2827 bool argumentsFromInput;
2828 VkShaderStageFlagBits testedStage;
2829 const Operation& operation;
2830 const OperationTestCase& testCase;
2831 };
2832
2833 // Mode used by SettingsTestCaseInfo to specify what settings do we want to test.
2834 enum SettingsMode
2835 {
2836 SM_ROUNDING = 0,
2837 SM_DENORMS
2838 };
2839
2840 // Enum containing available options. When rounding is tested only SO_RTE and SO_RTZ
2841 // should be used. SO_FLUSH and SO_PRESERVE should be used only for denorm tests.
2842 enum SettingsOption
2843 {
2844 SO_UNUSED = 0,
2845 SO_RTE,
2846 SO_RTZ,
2847 SO_FLUSH,
2848 SO_PRESERVE
2849 };
2850
2851 // Structure containing all data required to create single settings test.
2852 struct SettingsTestCaseInfo
2853 {
2854 const char* name;
2855 SettingsMode testedMode;
2856 VkShaderFloatControlsIndependence independenceSetting;
2857
2858 SettingsOption fp16Option;
2859 SettingsOption fp32Option;
2860 SettingsOption fp64Option;
2861 bool fp16Without16BitStorage;
2862 };
2863
2864 void specializeOperation(const OperationTestCaseInfo& testCaseInfo,
2865 SpecializedOperation& specializedOperation) const;
2866
2867 void getBehaviorCapabilityAndExecutionMode(BehaviorFlags behaviorFlags,
2868 const string inBitWidth,
2869 const string outBitWidth,
2870 string& capability,
2871 string& executionMode) const;
2872
2873 void setupVulkanFeatures(FloatType inFloatType,
2874 FloatType outFloatType,
2875 BehaviorFlags behaviorFlags,
2876 bool float64FeatureRequired,
2877 VulkanFeatures& features) const;
2878
2879 protected:
2880
2881 struct TypeData
2882 {
2883 TypeValuesSP values;
2884 TypeSnippetsSP snippets;
2885 TypeTestResultsSP testResults;
2886 };
2887
2888 // Type specific parameters are stored in this map.
2889 map<FloatType, TypeData> m_typeData;
2890
2891 // Map converting behaviuor id to OpCapability instruction
2892 typedef map<BehaviorFlagBits, string> BehaviorNameMap;
2893 BehaviorNameMap m_behaviorToName;
2894 };
2895
TestGroupBuilderBase()2896 TestGroupBuilderBase::TestGroupBuilderBase()
2897 {
2898 m_typeData[FP16] = TypeData();
2899 m_typeData[FP16].values = TypeValuesSP(new TypeValues<deFloat16>);
2900 m_typeData[FP16].snippets = TypeSnippetsSP(new TypeSnippets<deFloat16>);
2901 m_typeData[FP16].testResults = TypeTestResultsSP(new TypeTestResults<deFloat16>);
2902 m_typeData[FP32] = TypeData();
2903 m_typeData[FP32].values = TypeValuesSP(new TypeValues<float>);
2904 m_typeData[FP32].snippets = TypeSnippetsSP(new TypeSnippets<float>);
2905 m_typeData[FP32].testResults = TypeTestResultsSP(new TypeTestResults<float>);
2906 m_typeData[FP64] = TypeData();
2907 m_typeData[FP64].values = TypeValuesSP(new TypeValues<double>);
2908 m_typeData[FP64].snippets = TypeSnippetsSP(new TypeSnippets<double>);
2909 m_typeData[FP64].testResults = TypeTestResultsSP(new TypeTestResults<double>);
2910
2911 m_behaviorToName[B_DENORM_PRESERVE] = "DenormPreserve";
2912 m_behaviorToName[B_DENORM_FLUSH] = "DenormFlushToZero";
2913 m_behaviorToName[B_ZIN_PRESERVE] = "SignedZeroInfNanPreserve";
2914 m_behaviorToName[B_RTE_ROUNDING] = "RoundingModeRTE";
2915 m_behaviorToName[B_RTZ_ROUNDING] = "RoundingModeRTZ";
2916 }
2917
specializeOperation(const OperationTestCaseInfo & testCaseInfo,SpecializedOperation & specializedOperation) const2918 void TestGroupBuilderBase::specializeOperation (const OperationTestCaseInfo& testCaseInfo,
2919 SpecializedOperation& specializedOperation) const
2920 {
2921 const string typeToken = "_float";
2922 const string widthToken = "${float_width}";
2923
2924 FloatType outFloatType = testCaseInfo.outFloatType;
2925 const Operation& operation = testCaseInfo.operation;
2926 const TypeSnippetsSP outTypeSnippets = m_typeData.at(outFloatType).snippets;
2927 const bool inputRestricted = operation.isInputTypeRestricted;
2928 FloatType inFloatType = operation.restrictedInputType;
2929
2930 // usually input type is same as output but this is not the case for conversion
2931 // operations; in those cases operation definitions have restricted input type
2932 inFloatType = inputRestricted ? inFloatType : outFloatType;
2933
2934 TypeSnippetsSP inTypeSnippets = m_typeData.at(inFloatType).snippets;
2935
2936 const string inTypePrefix = string("_f") + inTypeSnippets->bitWidth;
2937 const string outTypePrefix = string("_f") + outTypeSnippets->bitWidth;
2938
2939 specializedOperation.constants = replace(operation.constants, typeToken, inTypePrefix);
2940 specializedOperation.annotations = replace(operation.annotations, widthToken, outTypeSnippets->bitWidth);
2941 specializedOperation.types = replace(operation.types, typeToken, outTypePrefix);
2942 specializedOperation.variables = replace(operation.variables, typeToken, outTypePrefix);
2943 specializedOperation.functions = replace(operation.functions, typeToken, outTypePrefix);
2944 specializedOperation.commands = replace(operation.commands, typeToken, outTypePrefix);
2945
2946 specializedOperation.inFloatType = inFloatType;
2947 specializedOperation.inTypeSnippets = inTypeSnippets;
2948 specializedOperation.outTypeSnippets = outTypeSnippets;
2949 specializedOperation.argumentsUsesFloatConstant = 0;
2950
2951 if (operation.isSpecConstant)
2952 return;
2953
2954 // select way arguments are prepared
2955 if (testCaseInfo.argumentsFromInput)
2956 {
2957 // read arguments from input SSBO in main function
2958 specializedOperation.arguments = inTypeSnippets->argumentsFromInputSnippet;
2959
2960 if (inFloatType == FP16 && testCaseInfo.testCase.fp16Without16BitStorage)
2961 specializedOperation.arguments = inTypeSnippets->argumentsFromInputFp16Snippet;
2962 }
2963 else
2964 {
2965 // generate proper values in main function
2966 const string arg1 = "%arg1 = ";
2967 const string arg2 = "%arg2 = ";
2968
2969 const ValueId* inputArguments = testCaseInfo.testCase.input;
2970 if (inputArguments[0] != V_UNUSED)
2971 {
2972 specializedOperation.arguments = arg1 + inTypeSnippets->valueIdToSnippetArgMap.at(inputArguments[0]);
2973 specializedOperation.argumentsUsesFloatConstant |= B_STATEMENT_USAGE_ARGS_CONST_FLOAT;
2974 }
2975 if (inputArguments[1] != V_UNUSED)
2976 {
2977 specializedOperation.arguments += arg2 + inTypeSnippets->valueIdToSnippetArgMap.at(inputArguments[1]);
2978 specializedOperation.argumentsUsesFloatConstant |= B_STATEMENT_USAGE_ARGS_CONST_FLOAT;
2979 }
2980 }
2981 }
2982
2983
getBehaviorCapabilityAndExecutionMode(BehaviorFlags behaviorFlags,const string inBitWidth,const string outBitWidth,string & capability,string & executionMode) const2984 void TestGroupBuilderBase::getBehaviorCapabilityAndExecutionMode(BehaviorFlags behaviorFlags,
2985 const string inBitWidth,
2986 const string outBitWidth,
2987 string& capability,
2988 string& executionMode) const
2989 {
2990 // iterate over all behaviours and request those that are needed
2991 BehaviorNameMap::const_iterator it = m_behaviorToName.begin();
2992 while (it != m_behaviorToName.end())
2993 {
2994 BehaviorFlagBits behaviorId = it->first;
2995 string behaviorName = it->second;
2996
2997 if (behaviorFlags & behaviorId)
2998 {
2999 capability += "OpCapability " + behaviorName + "\n";
3000
3001 // rounding mode should be obeyed for destination type
3002 bool rounding = (behaviorId == B_RTE_ROUNDING) || (behaviorId == B_RTZ_ROUNDING);
3003 executionMode += "OpExecutionMode %main " + behaviorName + " " +
3004 (rounding ? outBitWidth : inBitWidth) + "\n";
3005 }
3006
3007 ++it;
3008 }
3009
3010 DE_ASSERT(!capability.empty() && !executionMode.empty());
3011 }
3012
setupVulkanFeatures(FloatType inFloatType,FloatType outFloatType,BehaviorFlags behaviorFlags,bool float64FeatureRequired,VulkanFeatures & features) const3013 void TestGroupBuilderBase::setupVulkanFeatures(FloatType inFloatType,
3014 FloatType outFloatType,
3015 BehaviorFlags behaviorFlags,
3016 bool float64FeatureRequired,
3017 VulkanFeatures& features) const
3018 {
3019 features.coreFeatures.shaderFloat64 = float64FeatureRequired;
3020
3021 // request proper float controls features
3022 vk::VkPhysicalDeviceFloatControlsProperties& floatControls = features.floatControlsProperties;
3023
3024 // rounding mode should obey the destination type
3025 bool rteRounding = (behaviorFlags & B_RTE_ROUNDING) != 0;
3026 bool rtzRounding = (behaviorFlags & B_RTZ_ROUNDING) != 0;
3027 if (rteRounding || rtzRounding)
3028 {
3029 switch(outFloatType)
3030 {
3031 case FP16:
3032 floatControls.shaderRoundingModeRTEFloat16 = rteRounding;
3033 floatControls.shaderRoundingModeRTZFloat16 = rtzRounding;
3034 return;
3035 case FP32:
3036 floatControls.shaderRoundingModeRTEFloat32 = rteRounding;
3037 floatControls.shaderRoundingModeRTZFloat32 = rtzRounding;
3038 return;
3039 case FP64:
3040 floatControls.shaderRoundingModeRTEFloat64 = rteRounding;
3041 floatControls.shaderRoundingModeRTZFloat64 = rtzRounding;
3042 return;
3043 }
3044 }
3045
3046 switch(inFloatType)
3047 {
3048 case FP16:
3049 floatControls.shaderDenormPreserveFloat16 = behaviorFlags & B_DENORM_PRESERVE;
3050 floatControls.shaderDenormFlushToZeroFloat16 = behaviorFlags & B_DENORM_FLUSH;
3051 floatControls.shaderSignedZeroInfNanPreserveFloat16 = behaviorFlags & B_ZIN_PRESERVE;
3052 return;
3053 case FP32:
3054 floatControls.shaderDenormPreserveFloat32 = behaviorFlags & B_DENORM_PRESERVE;
3055 floatControls.shaderDenormFlushToZeroFloat32 = behaviorFlags & B_DENORM_FLUSH;
3056 floatControls.shaderSignedZeroInfNanPreserveFloat32 = behaviorFlags & B_ZIN_PRESERVE;
3057 return;
3058 case FP64:
3059 floatControls.shaderDenormPreserveFloat64 = behaviorFlags & B_DENORM_PRESERVE;
3060 floatControls.shaderDenormFlushToZeroFloat64 = behaviorFlags & B_DENORM_FLUSH;
3061 floatControls.shaderSignedZeroInfNanPreserveFloat64 = behaviorFlags & B_ZIN_PRESERVE;
3062 return;
3063 }
3064 }
3065
3066 // Test case not related to SPIR-V but executed with compute tests. It checks if specified
3067 // features are set to the same value when specific independence settings are used.
verifyIndependenceSettings(Context & context)3068 tcu::TestStatus verifyIndependenceSettings(Context& context)
3069 {
3070 if (!context.isDeviceFunctionalitySupported("VK_KHR_shader_float_controls"))
3071 TCU_THROW(NotSupportedError, "VK_KHR_shader_float_controls not supported");
3072
3073 vk::VkPhysicalDeviceFloatControlsProperties fcProperties;
3074 fcProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT_CONTROLS_PROPERTIES;
3075 fcProperties.pNext = DE_NULL;
3076
3077 vk::VkPhysicalDeviceProperties2 deviceProperties;
3078 deviceProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
3079 deviceProperties.pNext = &fcProperties;
3080
3081 auto fail = [](const string& featureGroup)
3082 {
3083 return tcu::TestStatus::fail(featureGroup + " features should be set to the same value");
3084 };
3085
3086 const VkPhysicalDevice physicalDevice = context.getPhysicalDevice();
3087 const vk::InstanceInterface& instanceInterface = context.getInstanceInterface();
3088 instanceInterface.getPhysicalDeviceProperties2(physicalDevice, &deviceProperties);
3089
3090 if (fcProperties.roundingModeIndependence == VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_NONE)
3091 {
3092 vk::VkBool32 fp16rte = fcProperties.shaderRoundingModeRTEFloat16;
3093 vk::VkBool32 fp32rte = fcProperties.shaderRoundingModeRTEFloat32;
3094 vk::VkBool32 fp64rte = fcProperties.shaderRoundingModeRTEFloat64;
3095 if ((fp16rte != fp32rte) || (fp32rte != fp64rte))
3096 return fail("shaderRoundingModeRTEFloat*");
3097
3098 vk::VkBool32 fp16rtz = fcProperties.shaderRoundingModeRTZFloat16;
3099 vk::VkBool32 fp32rtz = fcProperties.shaderRoundingModeRTZFloat32;
3100 vk::VkBool32 fp64rtz = fcProperties.shaderRoundingModeRTZFloat64;
3101 if ((fp16rtz != fp32rtz) || (fp32rtz != fp64rtz))
3102 return fail("shaderRoundingModeRTZFloat*");
3103 }
3104 else if (fcProperties.roundingModeIndependence == VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_32_BIT_ONLY)
3105 {
3106 vk::VkBool32 fp16rte = fcProperties.shaderRoundingModeRTEFloat16;
3107 vk::VkBool32 fp64rte = fcProperties.shaderRoundingModeRTEFloat64;
3108 if ((fp16rte != fp64rte))
3109 return fail("shaderRoundingModeRTEFloat16 and 64");
3110
3111 vk::VkBool32 fp16rtz = fcProperties.shaderRoundingModeRTZFloat16;
3112 vk::VkBool32 fp64rtz = fcProperties.shaderRoundingModeRTZFloat64;
3113 if ((fp16rtz != fp64rtz))
3114 return fail("shaderRoundingModeRTZFloat16 and 64");
3115 }
3116
3117 if (fcProperties.denormBehaviorIndependence == VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_NONE)
3118 {
3119 vk::VkBool32 fp16flush = fcProperties.shaderDenormFlushToZeroFloat16;
3120 vk::VkBool32 fp32flush = fcProperties.shaderDenormFlushToZeroFloat32;
3121 vk::VkBool32 fp64flush = fcProperties.shaderDenormFlushToZeroFloat64;
3122 if ((fp16flush != fp32flush) || (fp32flush != fp64flush))
3123 return fail("shaderDenormFlushToZeroFloat*");
3124
3125 vk::VkBool32 fp16preserve = fcProperties.shaderDenormPreserveFloat16;
3126 vk::VkBool32 fp32preserve = fcProperties.shaderDenormPreserveFloat32;
3127 vk::VkBool32 fp64preserve = fcProperties.shaderDenormPreserveFloat64;
3128 if ((fp16preserve != fp32preserve) || (fp32preserve != fp64preserve))
3129 return fail("shaderDenormPreserveFloat*");
3130 }
3131 else if (fcProperties.denormBehaviorIndependence == VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_32_BIT_ONLY)
3132 {
3133 vk::VkBool32 fp16flush = fcProperties.shaderDenormFlushToZeroFloat16;
3134 vk::VkBool32 fp64flush = fcProperties.shaderDenormFlushToZeroFloat64;
3135 if ((fp16flush != fp64flush))
3136 return fail("shaderDenormFlushToZeroFloat16 and 64");
3137
3138 vk::VkBool32 fp16preserve = fcProperties.shaderDenormPreserveFloat16;
3139 vk::VkBool32 fp64preserve = fcProperties.shaderDenormPreserveFloat64;
3140 if ((fp16preserve != fp64preserve))
3141 return fail("shaderDenormPreserveFloat16 and 64");
3142 }
3143
3144 return tcu::TestStatus::pass("Pass");
3145 }
3146
3147 // ComputeTestGroupBuilder contains logic that creates compute shaders
3148 // for all test cases. As most tests in spirv-assembly it uses functionality
3149 // implemented in vktSpvAsmComputeShaderTestUtil.cpp.
3150 class ComputeTestGroupBuilder: public TestGroupBuilderBase
3151 {
3152 public:
3153
3154 void init();
3155
3156 void createOperationTests(TestCaseGroup* parentGroup,
3157 const char* groupName,
3158 FloatType floatType,
3159 bool argumentsFromInput) override;
3160
3161 void createSettingsTests(TestCaseGroup* parentGroup) override;
3162
3163 protected:
3164
3165 void fillShaderSpec(const OperationTestCaseInfo& testCaseInfo,
3166 ComputeShaderSpec& csSpec) const;
3167 void fillShaderSpec(const SettingsTestCaseInfo& testCaseInfo,
3168 ComputeShaderSpec& csSpec) const;
3169
3170 private:
3171
3172
3173 StringTemplate m_operationShaderTemplate;
3174 StringTemplate m_settingsShaderTemplate;
3175 TestCasesBuilder m_operationTestCaseBuilder;
3176 };
3177
init()3178 void ComputeTestGroupBuilder::init()
3179 {
3180 m_operationTestCaseBuilder.init();
3181
3182 // generic compute shader template with common code for all
3183 // float types and all possible operations listed in OperationId enum
3184 m_operationShaderTemplate.setString(
3185 "OpCapability Shader\n"
3186 "${capabilities}"
3187
3188 "OpExtension \"SPV_KHR_float_controls\"\n"
3189 "${extensions}"
3190
3191 "%std450 = OpExtInstImport \"GLSL.std.450\"\n"
3192 "OpMemoryModel Logical GLSL450\n"
3193 "OpEntryPoint GLCompute %main \"main\" %id\n"
3194 "OpExecutionMode %main LocalSize 1 1 1\n"
3195 "${execution_mode}"
3196
3197 "OpDecorate %id BuiltIn GlobalInvocationId\n"
3198
3199 // some tests require additional annotations
3200 "${annotations}"
3201
3202 "%type_void = OpTypeVoid\n"
3203 "%type_voidf = OpTypeFunction %type_void\n"
3204 "%type_bool = OpTypeBool\n"
3205 "%type_u32 = OpTypeInt 32 0\n"
3206 "%type_i32 = OpTypeInt 32 1\n"
3207 "%type_i32_fptr = OpTypePointer Function %type_i32\n"
3208 "%type_u32_vec2 = OpTypeVector %type_u32 2\n"
3209 "%type_u32_vec3 = OpTypeVector %type_u32 3\n"
3210 "%type_u32_vec3_ptr = OpTypePointer Input %type_u32_vec3\n"
3211
3212 "%c_i32_0 = OpConstant %type_i32 0\n"
3213 "%c_i32_1 = OpConstant %type_i32 1\n"
3214 "%c_i32_2 = OpConstant %type_i32 2\n"
3215 "%c_u32_1 = OpConstant %type_u32 1\n"
3216
3217 // if input float type has different width then output then
3218 // both types are defined here along with all types derived from
3219 // them that are commonly used by tests; some tests also define
3220 // their own types (those that are needed just by this single test)
3221 "${types}"
3222
3223 // SSBO definitions
3224 "${io_definitions}"
3225
3226 "%id = OpVariable %type_u32_vec3_ptr Input\n"
3227
3228 // set of default constants per float type is placed here,
3229 // operation tests can also define additional constants.
3230 "${constants}"
3231
3232 // O_RETURN_VAL defines function here and becouse
3233 // of that this token needs to be directly before main function
3234 "${functions}"
3235
3236 "%main = OpFunction %type_void None %type_voidf\n"
3237 "%label = OpLabel\n"
3238
3239 "${variables}"
3240
3241 // depending on test case arguments are either read from input ssbo
3242 // or generated in spir-v code - in later case shader input is not used
3243 "${arguments}"
3244
3245 // perform test commands
3246 "${commands}"
3247
3248 // save result to SSBO
3249 "${save_result}"
3250
3251 "OpReturn\n"
3252 "OpFunctionEnd\n");
3253
3254 m_settingsShaderTemplate.setString(
3255 "OpCapability Shader\n"
3256 "${capabilities}"
3257
3258 "OpExtension \"SPV_KHR_float_controls\"\n"
3259 "${extensions}"
3260
3261 "%std450 = OpExtInstImport \"GLSL.std.450\"\n"
3262 "OpMemoryModel Logical GLSL450\n"
3263 "OpEntryPoint GLCompute %main \"main\" %id\n"
3264 "OpExecutionMode %main LocalSize 1 1 1\n"
3265 "${execution_modes}"
3266
3267 // annotations
3268 "OpDecorate %SSBO_in BufferBlock\n"
3269 "OpDecorate %ssbo_in DescriptorSet 0\n"
3270 "OpDecorate %ssbo_in Binding 0\n"
3271 "OpDecorate %ssbo_in NonWritable\n"
3272 "${io_annotations}"
3273
3274 "OpDecorate %id BuiltIn GlobalInvocationId\n"
3275
3276 // types
3277 "%type_void = OpTypeVoid\n"
3278 "%type_voidf = OpTypeFunction %type_void\n"
3279 "%type_u32 = OpTypeInt 32 0\n"
3280 "%type_i32 = OpTypeInt 32 1\n"
3281 "%type_i32_fptr = OpTypePointer Function %type_i32\n"
3282 "%type_u32_vec3 = OpTypeVector %type_u32 3\n"
3283 "%type_u32_vec3_ptr = OpTypePointer Input %type_u32_vec3\n"
3284
3285 "%c_i32_0 = OpConstant %type_i32 0\n"
3286 "%c_i32_1 = OpConstant %type_i32 1\n"
3287 "%c_i32_2 = OpConstant %type_i32 2\n"
3288
3289 "${types}"
3290
3291 // in SSBO definition
3292 "%SSBO_in = OpTypeStruct ${in_struct}\n"
3293 "%up_SSBO_in = OpTypePointer Uniform %SSBO_in\n"
3294 "%ssbo_in = OpVariable %up_SSBO_in Uniform\n"
3295
3296 // out SSBO definitions
3297 "${out_definitions}"
3298
3299 "%id = OpVariable %type_u32_vec3_ptr Input\n"
3300 "%main = OpFunction %type_void None %type_voidf\n"
3301 "%label = OpLabel\n"
3302
3303 "${commands}"
3304
3305 "${save_result}"
3306
3307 "OpReturn\n"
3308 "OpFunctionEnd\n");
3309 }
3310
createOperationTests(TestCaseGroup * parentGroup,const char * groupName,FloatType floatType,bool argumentsFromInput)3311 void ComputeTestGroupBuilder::createOperationTests(TestCaseGroup* parentGroup, const char* groupName, FloatType floatType, bool argumentsFromInput)
3312 {
3313 TestContext& testCtx = parentGroup->getTestContext();
3314 TestCaseGroup* group = new TestCaseGroup(testCtx, groupName);
3315 parentGroup->addChild(group);
3316
3317 TestCaseVect testCases;
3318 m_operationTestCaseBuilder.build(testCases, m_typeData[floatType].testResults, argumentsFromInput);
3319
3320 TestCaseVect::const_iterator currTestCase = testCases.begin();
3321 TestCaseVect::const_iterator lastTestCase = testCases.end();
3322 while(currTestCase != lastTestCase)
3323 {
3324 const OperationTestCase& testCase = *currTestCase;
3325 ++currTestCase;
3326
3327 // skip cases with undefined output
3328 if (testCase.expectedOutput == V_UNUSED)
3329 continue;
3330
3331 OperationTestCaseInfo testCaseInfo =
3332 {
3333 floatType,
3334 argumentsFromInput,
3335 VK_SHADER_STAGE_COMPUTE_BIT,
3336 m_operationTestCaseBuilder.getOperation(testCase.operationId),
3337 testCase
3338 };
3339
3340 ComputeShaderSpec csSpec;
3341
3342 fillShaderSpec(testCaseInfo, csSpec);
3343
3344 string testName = replace(testCase.baseName, "op", testCaseInfo.operation.name);
3345 group->addChild(new SpvAsmComputeShaderCase(testCtx, testName.c_str(), csSpec));
3346 }
3347 }
3348
createSettingsTests(TestCaseGroup * parentGroup)3349 void ComputeTestGroupBuilder::createSettingsTests(TestCaseGroup* parentGroup)
3350 {
3351 TestContext& testCtx = parentGroup->getTestContext();
3352 TestCaseGroup* group = new TestCaseGroup(testCtx, "independence_settings");
3353 parentGroup->addChild(group);
3354
3355 using SFCI = VkShaderFloatControlsIndependence;
3356 const SFCI independence32 = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_32_BIT_ONLY;
3357 const SFCI independenceAll = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL;
3358
3359 vector<SettingsTestCaseInfo> testCases =
3360 {
3361 // name mode independenceSetting fp16Option fp32Option fp64Option fp16Without16bitstorage
3362
3363 // test rounding modes when only two float widths are available
3364 { "rounding_ind_all_fp16_rte_fp32_rtz", SM_ROUNDING, independenceAll, SO_RTE, SO_RTZ, SO_UNUSED, false },
3365 { "rounding_ind_all_fp16_rtz_fp32_rte", SM_ROUNDING, independenceAll, SO_RTZ, SO_RTE, SO_UNUSED, false },
3366 { "rounding_ind_32_fp16_rte_fp32_rtz", SM_ROUNDING, independence32, SO_RTE, SO_RTZ, SO_UNUSED, false },
3367 { "rounding_ind_32_fp16_rtz_fp32_rte", SM_ROUNDING, independence32, SO_RTZ, SO_RTE, SO_UNUSED, false },
3368 { "rounding_ind_all_fp16_rte_fp64_rtz", SM_ROUNDING, independenceAll, SO_RTE, SO_UNUSED, SO_RTZ, false },
3369 { "rounding_ind_all_fp16_rtz_fp64_rte", SM_ROUNDING, independenceAll, SO_RTZ, SO_UNUSED, SO_RTE, false },
3370 { "rounding_ind_all_fp32_rte_fp64_rtz", SM_ROUNDING, independenceAll, SO_UNUSED, SO_RTE, SO_RTZ, false },
3371 { "rounding_ind_all_fp32_rtz_fp64_rte", SM_ROUNDING, independenceAll, SO_UNUSED, SO_RTZ, SO_RTE, false },
3372 { "rounding_ind_32_fp32_rte_fp64_rtz", SM_ROUNDING, independence32, SO_UNUSED, SO_RTE, SO_RTZ, false },
3373 { "rounding_ind_32_fp32_rtz_fp64_rte", SM_ROUNDING, independence32, SO_UNUSED, SO_RTZ, SO_RTE, false },
3374
3375 // test rounding modes when three widths are available
3376 { "rounding_ind_all_fp16_rtz_fp32_rte_fp64_rtz", SM_ROUNDING, independenceAll, SO_RTZ, SO_RTE, SO_RTZ, false },
3377 { "rounding_ind_32_fp16_rtz_fp32_rte_fp64_rtz", SM_ROUNDING, independence32, SO_RTZ, SO_RTE, SO_RTZ, false },
3378 { "rounding_ind_all_fp16_rte_fp32_rtz_fp64_rte", SM_ROUNDING, independenceAll, SO_RTE, SO_RTZ, SO_RTE, false },
3379 { "rounding_ind_32_fp16_rte_fp32_rtz_fp64_rte", SM_ROUNDING, independence32, SO_RTE, SO_RTZ, SO_RTE, false },
3380 { "rounding_ind_all_fp16_rtz_fp32_rtz_fp64_rte", SM_ROUNDING, independenceAll, SO_RTZ, SO_RTZ, SO_RTE, false },
3381 { "rounding_ind_all_fp16_rtz_fp32_rte_fp64_rte", SM_ROUNDING, independenceAll, SO_RTZ, SO_RTE, SO_RTE, false },
3382 { "rounding_ind_all_fp16_rte_fp32_rte_fp64_rtz", SM_ROUNDING, independenceAll, SO_RTE, SO_RTE, SO_RTZ, false },
3383 { "rounding_ind_all_fp16_rte_fp32_rtz_fp64_rtz", SM_ROUNDING, independenceAll, SO_RTE, SO_RTZ, SO_RTZ, false },
3384
3385 // test denorm settings when only two float widths are available
3386 { "denorm_ind_all_fp16_flush_fp32_preserve", SM_DENORMS, independenceAll, SO_FLUSH, SO_PRESERVE, SO_UNUSED, false },
3387 { "denorm_ind_all_fp16_preserve_fp32_flush", SM_DENORMS, independenceAll, SO_PRESERVE, SO_FLUSH, SO_UNUSED, false },
3388 { "denorm_ind_32_fp16_flush_fp32_preserve", SM_DENORMS, independence32, SO_FLUSH, SO_PRESERVE, SO_UNUSED, false },
3389 { "denorm_ind_32_fp16_preserve_fp32_flush", SM_DENORMS, independence32, SO_PRESERVE, SO_FLUSH, SO_UNUSED, false },
3390 { "denorm_ind_all_fp16_flush_fp64_preserve", SM_DENORMS, independenceAll, SO_FLUSH, SO_UNUSED, SO_PRESERVE, false },
3391 { "denorm_ind_all_fp16_preserve_fp64_flush", SM_DENORMS, independenceAll, SO_PRESERVE, SO_UNUSED, SO_FLUSH, false },
3392 { "denorm_ind_all_fp32_flush_fp64_preserve", SM_DENORMS, independenceAll, SO_UNUSED, SO_FLUSH, SO_PRESERVE, false },
3393 { "denorm_ind_all_fp32_preserve_fp64_flush", SM_DENORMS, independenceAll, SO_UNUSED, SO_PRESERVE, SO_FLUSH, false },
3394 { "denorm_ind_32_fp32_flush_fp64_preserve", SM_DENORMS, independence32, SO_UNUSED, SO_FLUSH, SO_PRESERVE, false },
3395 { "denorm_ind_32_fp32_preserve_fp64_flush", SM_DENORMS, independence32, SO_UNUSED, SO_PRESERVE, SO_FLUSH, false },
3396
3397 // test denorm settings when three widths are available
3398 { "denorm_ind_all_fp16_preserve_fp32_flush_fp64_preserve", SM_DENORMS, independenceAll, SO_PRESERVE, SO_FLUSH, SO_PRESERVE, false },
3399 { "denorm_ind_32_fp16_preserve_fp32_flush_fp64_preserve", SM_DENORMS, independence32, SO_PRESERVE, SO_FLUSH, SO_PRESERVE, false },
3400 { "denorm_ind_all_fp16_flush_fp32_preserve_fp64_flush", SM_DENORMS, independenceAll, SO_FLUSH, SO_PRESERVE, SO_FLUSH, false },
3401 { "denorm_ind_32_fp16_flush_fp32_preserve_fp64_flush", SM_DENORMS, independence32, SO_FLUSH, SO_PRESERVE, SO_FLUSH, false },
3402 { "denorm_ind_all_fp16_preserve_fp32_preserve_fp64_flush", SM_DENORMS, independenceAll, SO_PRESERVE, SO_PRESERVE, SO_FLUSH, false },
3403 { "denorm_ind_all_fp16_preserve_fp32_flush_fp64_flush", SM_DENORMS, independenceAll, SO_PRESERVE, SO_FLUSH, SO_FLUSH, false },
3404 { "denorm_ind_all_fp16_flush_fp32_flush_fp64_preserve", SM_DENORMS, independenceAll, SO_FLUSH, SO_FLUSH, SO_PRESERVE, false },
3405 { "denorm_ind_all_fp16_flush_fp32_preserve_fp64_preserve", SM_DENORMS, independenceAll, SO_FLUSH, SO_PRESERVE, SO_PRESERVE, false },
3406
3407 // Same fp16 tests but without requiring VK_KHR_16bit_storage
3408 // test rounding modes when only two float widths are available
3409 { "rounding_ind_all_fp16_rte_fp32_rtz_nostorage", SM_ROUNDING, independenceAll, SO_RTE, SO_RTZ, SO_UNUSED, true },
3410 { "rounding_ind_all_fp16_rtz_fp32_rte_nostorage", SM_ROUNDING, independenceAll, SO_RTZ, SO_RTE, SO_UNUSED, true },
3411 { "rounding_ind_32_fp16_rte_fp32_rtz_nostorage", SM_ROUNDING, independence32, SO_RTE, SO_RTZ, SO_UNUSED, true },
3412 { "rounding_ind_32_fp16_rtz_fp32_rte_nostorage", SM_ROUNDING, independence32, SO_RTZ, SO_RTE, SO_UNUSED, true },
3413 { "rounding_ind_all_fp16_rte_fp64_rtz_nostorage", SM_ROUNDING, independenceAll, SO_RTE, SO_UNUSED, SO_RTZ, true },
3414 { "rounding_ind_all_fp16_rtz_fp64_rte_nostorage", SM_ROUNDING, independenceAll, SO_RTZ, SO_UNUSED, SO_RTE, true },
3415
3416 // test rounding modes when three widths are available
3417 { "rounding_ind_all_fp16_rtz_fp32_rte_fp64_rtz_nostorage", SM_ROUNDING, independenceAll, SO_RTZ, SO_RTE, SO_RTZ, true },
3418 { "rounding_ind_32_fp16_rtz_fp32_rte_fp64_rtz_nostorage", SM_ROUNDING, independence32, SO_RTZ, SO_RTE, SO_RTZ, true },
3419 { "rounding_ind_all_fp16_rte_fp32_rtz_fp64_rte_nostorage", SM_ROUNDING, independenceAll, SO_RTE, SO_RTZ, SO_RTE, true },
3420 { "rounding_ind_32_fp16_rte_fp32_rtz_fp64_rte_nostorage", SM_ROUNDING, independence32, SO_RTE, SO_RTZ, SO_RTE, true },
3421 { "rounding_ind_all_fp16_rtz_fp32_rtz_fp64_rte_nostorage", SM_ROUNDING, independenceAll, SO_RTZ, SO_RTZ, SO_RTE, true },
3422 { "rounding_ind_all_fp16_rtz_fp32_rte_fp64_rte_nostorage", SM_ROUNDING, independenceAll, SO_RTZ, SO_RTE, SO_RTE, true },
3423 { "rounding_ind_all_fp16_rte_fp32_rte_fp64_rtz_nostorage", SM_ROUNDING, independenceAll, SO_RTE, SO_RTE, SO_RTZ, true },
3424 { "rounding_ind_all_fp16_rte_fp32_rtz_fp64_rtz_nostorage", SM_ROUNDING, independenceAll, SO_RTE, SO_RTZ, SO_RTZ, true },
3425
3426 // test denorm settings when only two float widths are available
3427 { "denorm_ind_all_fp16_flush_fp32_preserve_nostorage", SM_DENORMS, independenceAll, SO_FLUSH, SO_PRESERVE, SO_UNUSED, true },
3428 { "denorm_ind_all_fp16_preserve_fp32_flush_nostorage", SM_DENORMS, independenceAll, SO_PRESERVE, SO_FLUSH, SO_UNUSED, true },
3429 { "denorm_ind_32_fp16_flush_fp32_preserve_nostorage", SM_DENORMS, independence32, SO_FLUSH, SO_PRESERVE, SO_UNUSED, true },
3430 { "denorm_ind_32_fp16_preserve_fp32_flush_nostorage", SM_DENORMS, independence32, SO_PRESERVE, SO_FLUSH, SO_UNUSED, true },
3431 { "denorm_ind_all_fp16_flush_fp64_preserve_nostorage", SM_DENORMS, independenceAll, SO_FLUSH, SO_UNUSED, SO_PRESERVE, true },
3432 { "denorm_ind_all_fp16_preserve_fp64_flush_nostorage", SM_DENORMS, independenceAll, SO_PRESERVE, SO_UNUSED, SO_FLUSH, true },
3433
3434 // test denorm settings when three widths are available
3435 { "denorm_ind_all_fp16_preserve_fp32_flush_fp64_preserve_nostorage", SM_DENORMS, independenceAll, SO_PRESERVE, SO_FLUSH, SO_PRESERVE, true },
3436 { "denorm_ind_32_fp16_preserve_fp32_flush_fp64_preserve_nostorage", SM_DENORMS, independence32, SO_PRESERVE, SO_FLUSH, SO_PRESERVE, true },
3437 { "denorm_ind_all_fp16_flush_fp32_preserve_fp64_flush_nostorage", SM_DENORMS, independenceAll, SO_FLUSH, SO_PRESERVE, SO_FLUSH, true },
3438 { "denorm_ind_32_fp16_flush_fp32_preserve_fp64_flush_nostorage", SM_DENORMS, independence32, SO_FLUSH, SO_PRESERVE, SO_FLUSH, true },
3439 { "denorm_ind_all_fp16_preserve_fp32_preserve_fp64_flush_nostorage", SM_DENORMS, independenceAll, SO_PRESERVE, SO_PRESERVE, SO_FLUSH, true },
3440 { "denorm_ind_all_fp16_preserve_fp32_flush_fp64_flush_nostorage", SM_DENORMS, independenceAll, SO_PRESERVE, SO_FLUSH, SO_FLUSH, true },
3441 { "denorm_ind_all_fp16_flush_fp32_flush_fp64_preserve_nostorage", SM_DENORMS, independenceAll, SO_FLUSH, SO_FLUSH, SO_PRESERVE, true },
3442 { "denorm_ind_all_fp16_flush_fp32_preserve_fp64_preserve_nostorage", SM_DENORMS, independenceAll, SO_FLUSH, SO_PRESERVE, SO_PRESERVE, true },
3443 };
3444
3445 for(const auto& testCase : testCases)
3446 {
3447 ComputeShaderSpec csSpec;
3448 fillShaderSpec(testCase, csSpec);
3449 group->addChild(new SpvAsmComputeShaderCase(testCtx, testCase.name, csSpec));
3450 }
3451
3452 addFunctionCase(group, "independence_settings", verifyIndependenceSettings);
3453 }
3454
fillShaderSpec(const OperationTestCaseInfo & testCaseInfo,ComputeShaderSpec & csSpec) const3455 void ComputeTestGroupBuilder::fillShaderSpec(const OperationTestCaseInfo& testCaseInfo,
3456 ComputeShaderSpec& csSpec) const
3457 {
3458 // LUT storing functions used to verify test results
3459 const VerifyIOFunc checkFloatsLUT[] =
3460 {
3461 checkFloats<Float16, deFloat16>,
3462 checkFloats<Float32, float>,
3463 checkFloats<Float64, double>
3464 };
3465
3466 const Operation& testOperation = testCaseInfo.operation;
3467 const OperationTestCase& testCase = testCaseInfo.testCase;
3468 FloatType outFloatType = testCaseInfo.outFloatType;
3469
3470 SpecializedOperation specOpData;
3471 specializeOperation(testCaseInfo, specOpData);
3472
3473 TypeSnippetsSP inTypeSnippets = specOpData.inTypeSnippets;
3474 TypeSnippetsSP outTypeSnippets = specOpData.outTypeSnippets;
3475 FloatType inFloatType = specOpData.inFloatType;
3476
3477 bool outFp16WithoutStorage = (outFloatType == FP16) && testCase.fp16Without16BitStorage;
3478 bool inFp16WithoutStorage = (inFloatType == FP16) && testCase.fp16Without16BitStorage;
3479
3480 // UnpackHalf2x16 is a corner case - it returns two 32-bit floats but
3481 // internaly operates on fp16 and this type should be used by float controls
3482 FloatType inFloatTypeForCaps = inFloatType;
3483 string inFloatWidthForCaps = inTypeSnippets->bitWidth;
3484 if (testCase.operationId == OID_UPH_DENORM)
3485 {
3486 inFloatTypeForCaps = FP16;
3487 inFloatWidthForCaps = "16";
3488 }
3489
3490 string behaviorCapability;
3491 string behaviorExecutionMode;
3492 getBehaviorCapabilityAndExecutionMode(testCase.behaviorFlags,
3493 inFloatWidthForCaps,
3494 outTypeSnippets->bitWidth,
3495 behaviorCapability,
3496 behaviorExecutionMode);
3497
3498 string capabilities = behaviorCapability + outTypeSnippets->capabilities;
3499 string extensions = outTypeSnippets->extensions;
3500 string annotations = inTypeSnippets->inputAnnotationsSnippet + outTypeSnippets->outputAnnotationsSnippet + outTypeSnippets->typeAnnotationsSnippet;
3501 string types = outTypeSnippets->typeDefinitionsSnippet;
3502 string constants = outTypeSnippets->constantsDefinitionsSnippet;
3503 string ioDefinitions = "";
3504
3505 // Getting rid of 16bit_storage dependency imply replacing lots of snippets.
3506 {
3507 if (inFp16WithoutStorage)
3508 {
3509 ioDefinitions = inTypeSnippets->inputDefinitionsFp16Snippet;
3510 }
3511 else
3512 {
3513 ioDefinitions = inTypeSnippets->inputDefinitionsSnippet;
3514 }
3515
3516 if (outFp16WithoutStorage)
3517 {
3518 extensions = outTypeSnippets->extensionsFp16Without16BitStorage;
3519 capabilities = behaviorCapability + outTypeSnippets->capabilitiesFp16Without16BitStorage;
3520 types += outTypeSnippets->typeDefinitionsFp16Snippet;
3521 annotations += outTypeSnippets->typeAnnotationsFp16Snippet;
3522 ioDefinitions += outTypeSnippets->outputDefinitionsFp16Snippet;
3523 }
3524 else
3525 {
3526 ioDefinitions += outTypeSnippets->outputDefinitionsSnippet;
3527 }
3528 }
3529
3530 bool outFp16TypeUsage = outTypeSnippets->loadStoreRequiresShaderFloat16;
3531 bool inFp16TypeUsage = false;
3532
3533 if (testOperation.isInputTypeRestricted)
3534 {
3535 annotations += inTypeSnippets->typeAnnotationsSnippet;
3536 types += inTypeSnippets->typeDefinitionsSnippet;
3537 constants += inTypeSnippets->constantsDefinitionsSnippet;
3538
3539 if (inFp16WithoutStorage)
3540 {
3541 annotations += inTypeSnippets->typeAnnotationsFp16Snippet;
3542 types += inTypeSnippets->typeDefinitionsFp16Snippet;
3543 capabilities += inTypeSnippets->capabilitiesFp16Without16BitStorage;
3544 extensions += inTypeSnippets->extensionsFp16Without16BitStorage;
3545 }
3546 else
3547 {
3548 capabilities += inTypeSnippets->capabilities;
3549 extensions += inTypeSnippets->extensions;
3550 }
3551
3552 inFp16TypeUsage = inTypeSnippets->loadStoreRequiresShaderFloat16;
3553 }
3554
3555 map<string, string> specializations;
3556 specializations["extensions"] = extensions;
3557 specializations["execution_mode"] = behaviorExecutionMode;
3558 specializations["annotations"] = annotations + specOpData.annotations;
3559 specializations["types"] = types + specOpData.types;
3560 specializations["io_definitions"] = ioDefinitions;
3561 specializations["variables"] = specOpData.variables;
3562 specializations["functions"] = specOpData.functions;
3563 specializations["save_result"] = (outFp16WithoutStorage ? outTypeSnippets->storeResultsFp16Snippet : outTypeSnippets->storeResultsSnippet);
3564 specializations["arguments"] = specOpData.arguments;
3565 specializations["commands"] = specOpData.commands;
3566
3567 // Build constants. They are only needed sometimes.
3568 const FloatStatementUsageFlags argsAnyFloatConstMask = B_STATEMENT_USAGE_ARGS_CONST_FLOAT | B_STATEMENT_USAGE_ARGS_CONST_FP16 | B_STATEMENT_USAGE_ARGS_CONST_FP32 | B_STATEMENT_USAGE_ARGS_CONST_FP64;
3569 const bool argsUseFPConstants = (specOpData.argumentsUsesFloatConstant & argsAnyFloatConstMask) != 0;
3570 const FloatStatementUsageFlags commandsAnyFloatConstMask = B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_CONST_FP16 | B_STATEMENT_USAGE_COMMANDS_CONST_FP32 | B_STATEMENT_USAGE_COMMANDS_CONST_FP64;
3571 const bool commandsUseFPConstants = (testCaseInfo.operation.statementUsageFlags & commandsAnyFloatConstMask) != 0;
3572 const bool needConstants = argsUseFPConstants || commandsUseFPConstants;
3573 const FloatStatementUsageFlags constsFloatTypeMask = B_STATEMENT_USAGE_CONSTS_TYPE_FLOAT | B_STATEMENT_USAGE_CONSTS_TYPE_FP16;
3574 const bool constsUsesFP16Type = (testCaseInfo.operation.statementUsageFlags & constsFloatTypeMask) != 0;
3575 const bool loadStoreRequiresShaderFloat16 = inFp16TypeUsage || outFp16TypeUsage;
3576 const bool usesFP16Constants = constsUsesFP16Type || (needConstants && loadStoreRequiresShaderFloat16);
3577
3578 specializations["constants"] = "";
3579 if (needConstants || outFp16WithoutStorage)
3580 {
3581 specializations["constants"] = constants;
3582 }
3583 specializations["constants"] += specOpData.constants;
3584
3585 // check which format features are needed
3586 bool float16FeatureRequired = (outFloatType == FP16) || (inFloatType == FP16);
3587 bool float64FeatureRequired = (outFloatType == FP64) || (inFloatType == FP64);
3588
3589 // Determine required capabilities.
3590 bool float16CapabilityAlreadyAdded = inFp16WithoutStorage || outFp16WithoutStorage;
3591 if ((testOperation.floatUsage == FLOAT_ARITHMETIC && float16FeatureRequired && !float16CapabilityAlreadyAdded) || usesFP16Constants)
3592 {
3593 capabilities += "OpCapability Float16\n";
3594 }
3595 specializations["capabilities"] = capabilities;
3596
3597 // specialize shader
3598 const string shaderCode = m_operationShaderTemplate.specialize(specializations);
3599
3600 // construct input and output buffers of proper types
3601 TypeValuesSP inTypeValues = m_typeData.at(inFloatType).values;
3602 TypeValuesSP outTypeValues = m_typeData.at(outFloatType).values;
3603 BufferSp inBufferSp = inTypeValues->constructInputBuffer(testCase.input);
3604 BufferSp outBufferSp = outTypeValues->constructOutputBuffer(testCase.expectedOutput);
3605 csSpec.inputs.push_back(Resource(inBufferSp, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
3606 csSpec.outputs.push_back(Resource(outBufferSp));
3607
3608 // check which format features are needed
3609 setupVulkanFeatures(inFloatTypeForCaps, // usualy same as inFloatType - different only for UnpackHalf2x16
3610 outFloatType,
3611 testCase.behaviorFlags,
3612 float64FeatureRequired,
3613 csSpec.requestedVulkanFeatures);
3614
3615 csSpec.assembly = shaderCode;
3616 csSpec.numWorkGroups = IVec3(1, 1, 1);
3617 csSpec.verifyIO = checkFloatsLUT[outFloatType];
3618
3619 csSpec.extensions.push_back("VK_KHR_shader_float_controls");
3620 bool needShaderFloat16 = float16CapabilityAlreadyAdded;
3621
3622 if (float16FeatureRequired && !testCase.fp16Without16BitStorage)
3623 {
3624 csSpec.extensions.push_back("VK_KHR_16bit_storage");
3625 csSpec.requestedVulkanFeatures.ext16BitStorage.storageBuffer16BitAccess = true;
3626 needShaderFloat16 |= testOperation.floatUsage == FLOAT_ARITHMETIC;
3627 }
3628 needShaderFloat16 |= usesFP16Constants;
3629 if (needShaderFloat16)
3630 {
3631 csSpec.extensions.push_back("VK_KHR_shader_float16_int8");
3632 csSpec.requestedVulkanFeatures.extFloat16Int8.shaderFloat16 = true;
3633 }
3634 if (float64FeatureRequired)
3635 csSpec.requestedVulkanFeatures.coreFeatures.shaderFloat64 = VK_TRUE;
3636 }
3637
fillShaderSpec(const SettingsTestCaseInfo & testCaseInfo,ComputeShaderSpec & csSpec) const3638 void ComputeTestGroupBuilder::fillShaderSpec(const SettingsTestCaseInfo& testCaseInfo,
3639 ComputeShaderSpec& csSpec) const
3640 {
3641 string capabilities;
3642 string fp16behaviorName;
3643 string fp32behaviorName;
3644 string fp64behaviorName;
3645
3646 ValueId addArgs[2];
3647 ValueId fp16resultValue;
3648 ValueId fp32resultValue;
3649 ValueId fp64resultValue;
3650
3651 vk::VkPhysicalDeviceFloatControlsProperties& floatControls = csSpec.requestedVulkanFeatures.floatControlsProperties;
3652 bool fp16Required = testCaseInfo.fp16Option != SO_UNUSED;
3653 bool fp32Required = testCaseInfo.fp32Option != SO_UNUSED;
3654 bool fp64Required = testCaseInfo.fp64Option != SO_UNUSED;
3655
3656 if (testCaseInfo.testedMode == SM_ROUNDING)
3657 {
3658 // make sure that only rounding options are used
3659 DE_ASSERT((testCaseInfo.fp16Option != SO_FLUSH) &&
3660 (testCaseInfo.fp16Option != SO_PRESERVE) &&
3661 (testCaseInfo.fp32Option != SO_FLUSH) &&
3662 (testCaseInfo.fp32Option != SO_PRESERVE) &&
3663 (testCaseInfo.fp64Option != SO_FLUSH) &&
3664 (testCaseInfo.fp64Option != SO_PRESERVE));
3665
3666 bool fp16RteRounding = testCaseInfo.fp16Option == SO_RTE;
3667 bool fp32RteRounding = testCaseInfo.fp32Option == SO_RTE;
3668 bool fp64RteRounding = testCaseInfo.fp64Option == SO_RTE;
3669
3670 const string& rte = m_behaviorToName.at(B_RTE_ROUNDING);
3671 const string& rtz = m_behaviorToName.at(B_RTZ_ROUNDING);
3672
3673 fp16behaviorName = fp16RteRounding ? rte : rtz;
3674 fp32behaviorName = fp32RteRounding ? rte : rtz;
3675 fp64behaviorName = fp64RteRounding ? rte : rtz;
3676
3677 addArgs[0] = V_ADD_ARG_A;
3678 addArgs[1] = V_ADD_ARG_B;
3679 fp16resultValue = fp16RteRounding ? V_ADD_RTE_RESULT : V_ADD_RTZ_RESULT;
3680 fp32resultValue = fp32RteRounding ? V_ADD_RTE_RESULT : V_ADD_RTZ_RESULT;
3681 fp64resultValue = fp64RteRounding ? V_ADD_RTE_RESULT : V_ADD_RTZ_RESULT;
3682
3683 capabilities = "OpCapability " + rte + "\n"
3684 "OpCapability " + rtz + "\n";
3685
3686 floatControls.roundingModeIndependence = testCaseInfo.independenceSetting;
3687 floatControls.denormBehaviorIndependence = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_NONE;
3688 floatControls.shaderRoundingModeRTEFloat16 = fp16RteRounding;
3689 floatControls.shaderRoundingModeRTZFloat16 = fp16Required && !fp16RteRounding;
3690 floatControls.shaderRoundingModeRTEFloat32 = fp32RteRounding;
3691 floatControls.shaderRoundingModeRTZFloat32 = fp32Required && !fp32RteRounding;
3692 floatControls.shaderRoundingModeRTEFloat64 = fp64RteRounding;
3693 floatControls.shaderRoundingModeRTZFloat64 = fp64Required && !fp64RteRounding;
3694 }
3695 else // SM_DENORMS
3696 {
3697 // make sure that only denorm options are used
3698 DE_ASSERT((testCaseInfo.fp16Option != SO_RTE) &&
3699 (testCaseInfo.fp16Option != SO_RTZ) &&
3700 (testCaseInfo.fp32Option != SO_RTE) &&
3701 (testCaseInfo.fp32Option != SO_RTZ) &&
3702 (testCaseInfo.fp64Option != SO_RTE) &&
3703 (testCaseInfo.fp64Option != SO_RTZ));
3704
3705 bool fp16DenormPreserve = testCaseInfo.fp16Option == SO_PRESERVE;
3706 bool fp32DenormPreserve = testCaseInfo.fp32Option == SO_PRESERVE;
3707 bool fp64DenormPreserve = testCaseInfo.fp64Option == SO_PRESERVE;
3708
3709 const string& preserve = m_behaviorToName.at(B_DENORM_PRESERVE);
3710 const string& flush = m_behaviorToName.at(B_DENORM_FLUSH);
3711
3712 fp16behaviorName = fp16DenormPreserve ? preserve : flush;
3713 fp32behaviorName = fp32DenormPreserve ? preserve : flush;
3714 fp64behaviorName = fp64DenormPreserve ? preserve : flush;
3715
3716 addArgs[0] = V_DENORM;
3717 addArgs[1] = V_DENORM;
3718 fp16resultValue = fp16DenormPreserve ? V_DENORM_TIMES_TWO : V_ZERO_OR_DENORM_TIMES_TWO;
3719 fp32resultValue = fp32DenormPreserve ? V_DENORM_TIMES_TWO : V_ZERO;
3720 fp64resultValue = fp64DenormPreserve ? V_DENORM_TIMES_TWO : V_ZERO;
3721
3722 capabilities = "OpCapability " + preserve + "\n"
3723 "OpCapability " + flush + "\n";
3724
3725 floatControls.denormBehaviorIndependence = testCaseInfo.independenceSetting;
3726 floatControls.roundingModeIndependence = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_NONE;
3727 floatControls.shaderDenormPreserveFloat16 = fp16DenormPreserve;
3728 floatControls.shaderDenormFlushToZeroFloat16 = fp16Required && !fp16DenormPreserve;
3729 floatControls.shaderDenormPreserveFloat32 = fp32DenormPreserve;
3730 floatControls.shaderDenormFlushToZeroFloat32 = fp32Required && !fp32DenormPreserve;
3731 floatControls.shaderDenormPreserveFloat64 = fp64DenormPreserve;
3732 floatControls.shaderDenormFlushToZeroFloat64 = fp64Required && !fp64DenormPreserve;
3733 }
3734
3735 const auto& fp64Data = m_typeData.at(FP64);
3736 const auto& fp32Data = m_typeData.at(FP32);
3737 const auto& fp16Data = m_typeData.at(FP16);
3738
3739 deUint32 attributeIndex = 0;
3740 deUint32 attributeOffset = 0;
3741 string attribute;
3742 string extensions = "";
3743 string executionModes = "";
3744 string ioAnnotations = "";
3745 string types = "";
3746 string inStruct = "";
3747 string outDefinitions = "";
3748 string commands = "";
3749 string saveResult = "";
3750
3751 // construct single input buffer containing arguments for all float widths
3752 // (maxPerStageDescriptorStorageBuffers can be min 4 and we need 3 for outputs)
3753 deUint32 inputOffset = 0;
3754 std::vector<deUint8> inputData ((fp64Required * sizeof(double) + sizeof(float) + fp16Required * sizeof(deFloat16)) * 2);
3755
3756 // to follow storage buffer layout rules we store data in ssbo in order 64 -> 16
3757 if (fp64Required)
3758 {
3759 capabilities += fp64Data.snippets->capabilities;
3760 executionModes += "OpExecutionMode %main " + fp64behaviorName + " 64\n";
3761 attribute = to_string(attributeIndex);
3762 ioAnnotations += "OpMemberDecorate %SSBO_in " + attribute + " Offset " + to_string(attributeOffset) +"\n" +
3763 fp64Data.snippets->multiOutputAnnotationsSnippet +
3764 "OpDecorate %ssbo_f64_out Binding " + to_string(attributeIndex+1) + "\n";
3765 types += fp64Data.snippets->minTypeDefinitionsSnippet;
3766 inStruct += " %type_f64_arr_2";
3767 outDefinitions += fp64Data.snippets->multiOutputDefinitionsSnippet;
3768 commands += replace(fp64Data.snippets->multiArgumentsFromInputSnippet, "${attr}", attribute) +
3769 "%result64 = OpFAdd %type_f64 %arg1_f64 %arg2_f64\n";
3770 saveResult += fp64Data.snippets->multiStoreResultsSnippet;
3771 attributeOffset += 2 * static_cast<deUint32>(sizeof(double));
3772 attributeIndex++;
3773
3774 fp64Data.values->fillInputData(addArgs, inputData, inputOffset);
3775
3776 // construct separate buffers for outputs to make validation easier
3777 BufferSp fp64OutBufferSp = fp64Data.values->constructOutputBuffer(fp64resultValue);
3778 csSpec.outputs.push_back(Resource(fp64OutBufferSp, vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, reinterpret_cast<void*>(BufferDataType::DATA_FP64)));
3779
3780 csSpec.requestedVulkanFeatures.coreFeatures.shaderFloat64 = VK_TRUE;
3781 }
3782 if (fp32Required)
3783 {
3784 executionModes += "OpExecutionMode %main " + fp32behaviorName + " 32\n";
3785 attribute = to_string(attributeIndex);
3786 ioAnnotations += "OpMemberDecorate %SSBO_in " + attribute + " Offset " + to_string(attributeOffset) +"\n" +
3787 fp32Data.snippets->multiOutputAnnotationsSnippet +
3788 "OpDecorate %ssbo_f32_out Binding " + to_string(attributeIndex+1) + "\n";
3789 types += fp32Data.snippets->minTypeDefinitionsSnippet;
3790 inStruct += " %type_f32_arr_2";
3791 outDefinitions += fp32Data.snippets->multiOutputDefinitionsSnippet;
3792 commands += replace(fp32Data.snippets->multiArgumentsFromInputSnippet, "${attr}", attribute) +
3793 "%result32 = OpFAdd %type_f32 %arg1_f32 %arg2_f32\n";
3794 saveResult += fp32Data.snippets->multiStoreResultsSnippet;
3795 attributeOffset += 2 * static_cast<deUint32>(sizeof(float));
3796 attributeIndex++;
3797
3798 fp32Data.values->fillInputData(addArgs, inputData, inputOffset);
3799
3800 BufferSp fp32OutBufferSp = fp32Data.values->constructOutputBuffer(fp32resultValue);
3801 csSpec.outputs.push_back(Resource(fp32OutBufferSp, vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, reinterpret_cast<void*>(BufferDataType::DATA_FP32)));
3802 }
3803 if (fp16Required)
3804 {
3805 if (testCaseInfo.fp16Without16BitStorage)
3806 {
3807 capabilities += fp16Data.snippets->capabilitiesFp16Without16BitStorage;
3808 extensions += fp16Data.snippets->extensionsFp16Without16BitStorage;
3809 executionModes += "OpExecutionMode %main " + fp16behaviorName + " 16\n";
3810 attribute = to_string(attributeIndex);
3811 ioAnnotations += "OpMemberDecorate %SSBO_in " + attribute + " Offset " + to_string(attributeOffset) +"\n" +
3812 fp16Data.snippets->multiOutputAnnotationsFp16Snippet +
3813 "OpDecorate %ssbo_u32_out Binding " + to_string(attributeIndex+1) + "\n";
3814 types += fp16Data.snippets->minTypeDefinitionsSnippet + fp16Data.snippets->typeDefinitionsFp16Snippet + "%type_f16_vec2 = OpTypeVector %type_f16 2\n";
3815 inStruct += " %type_u32_arr_1";
3816 outDefinitions += fp16Data.snippets->multiOutputDefinitionsFp16Snippet;
3817 commands += replace(fp16Data.snippets->multiArgumentsFromInputFp16Snippet, "${attr}", attribute) +
3818 "%result16 = OpFAdd %type_f16 %arg1_f16 %arg2_f16\n";
3819 saveResult += fp16Data.snippets->multiStoreResultsFp16Snippet;
3820
3821 csSpec.extensions.push_back("VK_KHR_shader_float16_int8");
3822 csSpec.requestedVulkanFeatures.extFloat16Int8.shaderFloat16 = true;
3823 }
3824 else
3825 {
3826 capabilities += fp16Data.snippets->capabilities +
3827 "OpCapability Float16\n";
3828 extensions += fp16Data.snippets->extensions;
3829 executionModes += "OpExecutionMode %main " + fp16behaviorName + " 16\n";
3830 attribute = to_string(attributeIndex);
3831 ioAnnotations += "OpMemberDecorate %SSBO_in " + attribute + " Offset " + to_string(attributeOffset) +"\n" +
3832 fp16Data.snippets->multiOutputAnnotationsSnippet +
3833 "OpDecorate %ssbo_f16_out Binding " + to_string(attributeIndex+1) + "\n";
3834 types += fp16Data.snippets->minTypeDefinitionsSnippet;
3835 inStruct += " %type_f16_arr_2";
3836 outDefinitions += fp16Data.snippets->multiOutputDefinitionsSnippet;
3837 commands += replace(fp16Data.snippets->multiArgumentsFromInputSnippet, "${attr}", attribute) +
3838 "%result16 = OpFAdd %type_f16 %arg1_f16 %arg2_f16\n";
3839 saveResult += fp16Data.snippets->multiStoreResultsSnippet;
3840
3841 csSpec.extensions.push_back("VK_KHR_16bit_storage");
3842 csSpec.requestedVulkanFeatures.ext16BitStorage.storageBuffer16BitAccess = true;
3843 }
3844
3845 fp16Data.values->fillInputData(addArgs, inputData, inputOffset);
3846
3847 BufferSp fp16OutBufferSp = fp16Data.values->constructOutputBuffer(fp16resultValue);
3848 csSpec.outputs.push_back(Resource(fp16OutBufferSp, vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, reinterpret_cast<void*>(BufferDataType::DATA_FP16)));
3849 }
3850
3851 BufferSp inBufferSp(new Buffer<deUint8>(inputData));
3852 csSpec.inputs.push_back(Resource(inBufferSp, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
3853
3854 map<string, string> specializations =
3855 {
3856 { "capabilities", capabilities },
3857 { "extensions", extensions },
3858 { "execution_modes", executionModes },
3859 { "io_annotations", ioAnnotations },
3860 { "types", types },
3861 { "in_struct", inStruct },
3862 { "out_definitions", outDefinitions },
3863 { "commands", commands },
3864 { "save_result", saveResult }
3865 };
3866
3867 // specialize shader
3868 const string shaderCode = m_settingsShaderTemplate.specialize(specializations);
3869
3870 csSpec.assembly = shaderCode;
3871 csSpec.numWorkGroups = IVec3(1, 1, 1);
3872 csSpec.verifyIO = checkMixedFloats;
3873 csSpec.extensions.push_back("VK_KHR_shader_float_controls");
3874 }
3875
getGraphicsShaderCode(vk::SourceCollections & dst,InstanceContext context)3876 void getGraphicsShaderCode (vk::SourceCollections& dst, InstanceContext context)
3877 {
3878 // this function is used only by GraphicsTestGroupBuilder but it couldn't
3879 // be implemented as a method because of how addFunctionCaseWithPrograms
3880 // was implemented
3881
3882 SpirvVersion targetSpirvVersion = context.resources.spirvVersion;
3883 const deUint32 vulkanVersion = dst.usedVulkanVersion;
3884
3885 static const string vertexTemplate =
3886 "OpCapability Shader\n"
3887 "${vert_capabilities}"
3888
3889 "OpExtension \"SPV_KHR_float_controls\"\n"
3890 "${vert_extensions}"
3891
3892 "%std450 = OpExtInstImport \"GLSL.std.450\"\n"
3893 "OpMemoryModel Logical GLSL450\n"
3894 "OpEntryPoint Vertex %main \"main\" %BP_stream %BP_position %BP_color %BP_gl_VertexIndex %BP_gl_InstanceIndex %BP_vertex_color %BP_vertex_result \n"
3895 "${vert_execution_mode}"
3896
3897 "OpMemberDecorate %BP_gl_PerVertex 0 BuiltIn Position\n"
3898 "OpMemberDecorate %BP_gl_PerVertex 1 BuiltIn PointSize\n"
3899 "OpMemberDecorate %BP_gl_PerVertex 2 BuiltIn ClipDistance\n"
3900 "OpMemberDecorate %BP_gl_PerVertex 3 BuiltIn CullDistance\n"
3901 "OpDecorate %BP_gl_PerVertex Block\n"
3902 "OpDecorate %BP_position Location 0\n"
3903 "OpDecorate %BP_color Location 1\n"
3904 "OpDecorate %BP_vertex_color Location 1\n"
3905 "OpDecorate %BP_vertex_result Location 2\n"
3906 "OpDecorate %BP_vertex_result Flat\n"
3907 "OpDecorate %BP_gl_VertexIndex BuiltIn VertexIndex\n"
3908 "OpDecorate %BP_gl_InstanceIndex BuiltIn InstanceIndex\n"
3909
3910 // some tests require additional annotations
3911 "${vert_annotations}"
3912
3913 // types required by most of tests
3914 "%type_void = OpTypeVoid\n"
3915 "%type_voidf = OpTypeFunction %type_void\n"
3916 "%type_bool = OpTypeBool\n"
3917 "%type_i32 = OpTypeInt 32 1\n"
3918 "%type_u32 = OpTypeInt 32 0\n"
3919 "%type_u32_vec2 = OpTypeVector %type_u32 2\n"
3920 "%type_i32_iptr = OpTypePointer Input %type_i32\n"
3921 "%type_i32_optr = OpTypePointer Output %type_i32\n"
3922 "%type_i32_fptr = OpTypePointer Function %type_i32\n"
3923
3924 // constants required by most of tests
3925 "%c_i32_0 = OpConstant %type_i32 0\n"
3926 "%c_i32_1 = OpConstant %type_i32 1\n"
3927 "%c_i32_2 = OpConstant %type_i32 2\n"
3928 "%c_u32_1 = OpConstant %type_u32 1\n"
3929
3930 // if input float type has different width then output then
3931 // both types are defined here along with all types derived from
3932 // them that are commonly used by tests; some tests also define
3933 // their own types (those that are needed just by this single test)
3934 "${vert_types}"
3935
3936 // SSBO is not universally supported for storing
3937 // data in vertex stages - it is onle read here
3938 "${vert_io_definitions}"
3939
3940 "%BP_gl_PerVertex = OpTypeStruct %type_f32_vec4 %type_f32 %type_f32_arr_1 %type_f32_arr_1\n"
3941 "%BP_gl_PerVertex_optr = OpTypePointer Output %BP_gl_PerVertex\n"
3942 "%BP_stream = OpVariable %BP_gl_PerVertex_optr Output\n"
3943 "%BP_position = OpVariable %type_f32_vec4_iptr Input\n"
3944 "%BP_color = OpVariable %type_f32_vec4_iptr Input\n"
3945 "%BP_gl_VertexIndex = OpVariable %type_i32_iptr Input\n"
3946 "%BP_gl_InstanceIndex = OpVariable %type_i32_iptr Input\n"
3947 "%BP_vertex_color = OpVariable %type_f32_vec4_optr Output\n"
3948
3949 // set of default constants per float type is placed here,
3950 // operation tests can also define additional constants.
3951 "${vert_constants}"
3952
3953 // O_RETURN_VAL defines function here and because
3954 // of that this token needs to be directly before main function.
3955 "${vert_functions}"
3956
3957 "%main = OpFunction %type_void None %type_voidf\n"
3958 "%label = OpLabel\n"
3959
3960 "${vert_variables}"
3961
3962 "%position = OpLoad %type_f32_vec4 %BP_position\n"
3963 "%gl_pos = OpAccessChain %type_f32_vec4_optr %BP_stream %c_i32_0\n"
3964 "OpStore %gl_pos %position\n"
3965 "%color = OpLoad %type_f32_vec4 %BP_color\n"
3966 "OpStore %BP_vertex_color %color\n"
3967
3968 // this token is filled only when vertex stage is tested;
3969 // depending on test case arguments are either read from input ssbo
3970 // or generated in spir-v code - in later case ssbo is not used
3971 "${vert_arguments}"
3972
3973 // when vertex shader is tested then test operations are performed
3974 // here and passed to fragment stage; if fragment stage ts tested
3975 // then ${comands} and ${vert_process_result} are rplaced with nop
3976 "${vert_commands}"
3977
3978 "${vert_process_result}"
3979
3980 "OpReturn\n"
3981 "OpFunctionEnd\n";
3982
3983
3984 static const string fragmentTemplate =
3985 "OpCapability Shader\n"
3986 "${frag_capabilities}"
3987
3988 "OpExtension \"SPV_KHR_float_controls\"\n"
3989 "${frag_extensions}"
3990
3991 "%std450 = OpExtInstImport \"GLSL.std.450\"\n"
3992 "OpMemoryModel Logical GLSL450\n"
3993 "OpEntryPoint Fragment %main \"main\" %BP_vertex_color %BP_vertex_result %BP_fragColor %BP_gl_FragCoord \n"
3994 "OpExecutionMode %main OriginUpperLeft\n"
3995 "${frag_execution_mode}"
3996
3997 "OpDecorate %BP_fragColor Location 0\n"
3998 "OpDecorate %BP_vertex_color Location 1\n"
3999 "OpDecorate %BP_vertex_result Location 2\n"
4000 "OpDecorate %BP_vertex_result Flat\n"
4001 "OpDecorate %BP_gl_FragCoord BuiltIn FragCoord\n"
4002
4003 // some tests require additional annotations
4004 "${frag_annotations}"
4005
4006 // types required by most of tests
4007 "%type_void = OpTypeVoid\n"
4008 "%type_voidf = OpTypeFunction %type_void\n"
4009 "%type_bool = OpTypeBool\n"
4010 "%type_i32 = OpTypeInt 32 1\n"
4011 "%type_u32 = OpTypeInt 32 0\n"
4012 "%type_u32_vec2 = OpTypeVector %type_u32 2\n"
4013 "%type_i32_iptr = OpTypePointer Input %type_i32\n"
4014 "%type_i32_optr = OpTypePointer Output %type_i32\n"
4015 "%type_i32_fptr = OpTypePointer Function %type_i32\n"
4016
4017 // constants required by most of tests
4018 "%c_i32_0 = OpConstant %type_i32 0\n"
4019 "%c_i32_1 = OpConstant %type_i32 1\n"
4020 "%c_i32_2 = OpConstant %type_i32 2\n"
4021 "%c_u32_1 = OpConstant %type_u32 1\n"
4022
4023 // if input float type has different width then output then
4024 // both types are defined here along with all types derived from
4025 // them that are commonly used by tests; some tests also define
4026 // their own types (those that are needed just by this single test)
4027 "${frag_types}"
4028
4029 "%BP_gl_FragCoord = OpVariable %type_f32_vec4_iptr Input\n"
4030 "%BP_vertex_color = OpVariable %type_f32_vec4_iptr Input\n"
4031 "%BP_fragColor = OpVariable %type_f32_vec4_optr Output\n"
4032
4033 // SSBO definitions
4034 "${frag_io_definitions}"
4035
4036 // set of default constants per float type is placed here,
4037 // operation tests can also define additional constants.
4038 "${frag_constants}"
4039
4040 // O_RETURN_VAL defines function here and because
4041 // of that this token needs to be directly before main function.
4042 "${frag_functions}"
4043
4044 "%main = OpFunction %type_void None %type_voidf\n"
4045 "%label = OpLabel\n"
4046
4047 "${frag_variables}"
4048
4049 // just pass vertex color - rendered image is not important in our case
4050 "%vertex_color = OpLoad %type_f32_vec4 %BP_vertex_color\n"
4051 "OpStore %BP_fragColor %vertex_color\n"
4052
4053 // this token is filled only when fragment stage is tested;
4054 // depending on test case arguments are either read from input ssbo or
4055 // generated in spir-v code - in later case ssbo is used only for output
4056 "${frag_arguments}"
4057
4058 // when fragment shader is tested then test operations are performed
4059 // here and saved to ssbo; if vertex stage was tested then its
4060 // result is just saved to ssbo here
4061 "${frag_commands}"
4062 "${frag_process_result}"
4063
4064 "OpReturn\n"
4065 "OpFunctionEnd\n";
4066
4067 dst.spirvAsmSources.add("vert", DE_NULL)
4068 << StringTemplate(vertexTemplate).specialize(context.testCodeFragments)
4069 << SpirVAsmBuildOptions(vulkanVersion, targetSpirvVersion);
4070 dst.spirvAsmSources.add("frag", DE_NULL)
4071 << StringTemplate(fragmentTemplate).specialize(context.testCodeFragments)
4072 << SpirVAsmBuildOptions(vulkanVersion, targetSpirvVersion);
4073 }
4074
4075 // GraphicsTestGroupBuilder iterates over all test cases and creates test for both
4076 // vertex and fragment stages. As in most spirv-assembly tests, tests here are also
4077 // executed using functionality defined in vktSpvAsmGraphicsShaderTestUtil.cpp but
4078 // because one of requirements during development was that SSBO wont be used in
4079 // vertex stage we couldn't use createTestForStage functions - we need a custom
4080 // version for both vertex and fragmen shaders at the same time. This was required
4081 // as we needed to pass result from vertex stage to fragment stage where it could
4082 // be saved to ssbo. To achieve that InstanceContext is created manually in
4083 // createInstanceContext method.
4084 class GraphicsTestGroupBuilder: public TestGroupBuilderBase
4085 {
4086 public:
4087
4088 void init();
4089
4090 void createOperationTests(TestCaseGroup* parentGroup, const char* groupName, FloatType floatType, bool argumentsFromInput) override;
4091 void createSettingsTests(TestCaseGroup* parentGroup) override;
4092
4093 protected:
4094
4095 InstanceContext createInstanceContext(const OperationTestCaseInfo& testCaseInfo) const;
4096
4097 private:
4098
4099 TestCasesBuilder m_testCaseBuilder;
4100 };
4101
init()4102 void GraphicsTestGroupBuilder::init()
4103 {
4104 m_testCaseBuilder.init();
4105 }
4106
createOperationTests(TestCaseGroup * parentGroup,const char * groupName,FloatType floatType,bool argumentsFromInput)4107 void GraphicsTestGroupBuilder::createOperationTests(TestCaseGroup* parentGroup, const char* groupName, FloatType floatType, bool argumentsFromInput)
4108 {
4109 TestContext& testCtx = parentGroup->getTestContext();
4110 TestCaseGroup* group = new TestCaseGroup(testCtx, groupName);
4111 parentGroup->addChild(group);
4112
4113 // create test cases for vertex stage
4114 TestCaseVect testCases;
4115 m_testCaseBuilder.build(testCases, m_typeData[floatType].testResults, argumentsFromInput);
4116
4117 TestCaseVect::const_iterator currTestCase = testCases.begin();
4118 TestCaseVect::const_iterator lastTestCase = testCases.end();
4119 while(currTestCase != lastTestCase)
4120 {
4121 const OperationTestCase& testCase = *currTestCase;
4122 ++currTestCase;
4123
4124 // skip cases with undefined output
4125 if (testCase.expectedOutput == V_UNUSED)
4126 continue;
4127
4128 // FPRoundingMode decoration can be applied only to conversion instruction that is used as the object
4129 // argument of an OpStore storing through a pointer to a 16-bit floating-point object in Uniform, or
4130 // PushConstant, or Input, or Output Storage Classes. SSBO writes are not commonly supported
4131 // in VS so this test case needs to be skiped for vertex stage.
4132 if ((testCase.operationId == OID_ORTZ_ROUND) || (testCase.operationId == OID_ORTE_ROUND))
4133 continue;
4134
4135 OperationTestCaseInfo testCaseInfo =
4136 {
4137 floatType,
4138 argumentsFromInput,
4139 VK_SHADER_STAGE_VERTEX_BIT,
4140 m_testCaseBuilder.getOperation(testCase.operationId),
4141 testCase
4142 };
4143
4144 InstanceContext ctxVertex = createInstanceContext(testCaseInfo);
4145 string testName = replace(testCase.baseName, "op", testCaseInfo.operation.name);
4146
4147 addFunctionCaseWithPrograms<InstanceContext>(group, testName + "_vert", getGraphicsShaderCode, runAndVerifyDefaultPipeline, ctxVertex);
4148 }
4149
4150 // create test cases for fragment stage
4151 testCases.clear();
4152 m_testCaseBuilder.build(testCases, m_typeData[floatType].testResults, argumentsFromInput);
4153
4154 currTestCase = testCases.begin();
4155 lastTestCase = testCases.end();
4156 while(currTestCase != lastTestCase)
4157 {
4158 const OperationTestCase& testCase = *currTestCase;
4159 ++currTestCase;
4160
4161 // skip cases with undefined output
4162 if (testCase.expectedOutput == V_UNUSED)
4163 continue;
4164
4165 OperationTestCaseInfo testCaseInfo =
4166 {
4167 floatType,
4168 argumentsFromInput,
4169 VK_SHADER_STAGE_FRAGMENT_BIT,
4170 m_testCaseBuilder.getOperation(testCase.operationId),
4171 testCase
4172 };
4173
4174 InstanceContext ctxFragment = createInstanceContext(testCaseInfo);
4175 string testName = replace(testCase.baseName, "op", testCaseInfo.operation.name);
4176
4177 addFunctionCaseWithPrograms<InstanceContext>(group, testName + "_frag", getGraphicsShaderCode, runAndVerifyDefaultPipeline, ctxFragment);
4178 }
4179 }
4180
createSettingsTests(TestCaseGroup * parentGroup)4181 void GraphicsTestGroupBuilder::createSettingsTests(TestCaseGroup* parentGroup)
4182 {
4183 DE_UNREF(parentGroup);
4184
4185 // WG decided that testing settings only for compute stage is sufficient
4186 }
4187
createInstanceContext(const OperationTestCaseInfo & testCaseInfo) const4188 InstanceContext GraphicsTestGroupBuilder::createInstanceContext(const OperationTestCaseInfo& testCaseInfo) const
4189 {
4190 // LUT storing functions used to verify test results
4191 const VerifyIOFunc checkFloatsLUT[] =
4192 {
4193 checkFloats<Float16, deFloat16>,
4194 checkFloats<Float32, float>,
4195 checkFloats<Float64, double>
4196 };
4197
4198 // 32-bit float types are always needed for standard operations on color
4199 // if tested operation does not require fp32 for either input or output
4200 // then this minimal type definitions must be appended to types section
4201 const string f32TypeMinimalRequired =
4202 "%type_f32 = OpTypeFloat 32\n"
4203 "%type_f32_arr_1 = OpTypeArray %type_f32 %c_i32_1\n"
4204 "%type_f32_iptr = OpTypePointer Input %type_f32\n"
4205 "%type_f32_optr = OpTypePointer Output %type_f32\n"
4206 "%type_f32_vec4 = OpTypeVector %type_f32 4\n"
4207 "%type_f32_vec4_iptr = OpTypePointer Input %type_f32_vec4\n"
4208 "%type_f32_vec4_optr = OpTypePointer Output %type_f32_vec4\n";
4209
4210 const Operation& testOperation = testCaseInfo.operation;
4211 const OperationTestCase& testCase = testCaseInfo.testCase;
4212 FloatType outFloatType = testCaseInfo.outFloatType;
4213 VkShaderStageFlagBits testedStage = testCaseInfo.testedStage;
4214
4215 DE_ASSERT((testedStage == VK_SHADER_STAGE_VERTEX_BIT) || (testedStage == VK_SHADER_STAGE_FRAGMENT_BIT));
4216
4217 SpecializedOperation specOpData;
4218 specializeOperation(testCaseInfo, specOpData);
4219
4220 TypeSnippetsSP inTypeSnippets = specOpData.inTypeSnippets;
4221 TypeSnippetsSP outTypeSnippets = specOpData.outTypeSnippets;
4222 FloatType inFloatType = specOpData.inFloatType;
4223
4224 bool outFp16WithoutStorage = (outFloatType == FP16) && testCase.fp16Without16BitStorage;
4225 bool inFp16WithoutStorage = (inFloatType == FP16) && testCase.fp16Without16BitStorage;
4226
4227 // There may be several reasons why we need the shaderFloat16 Vulkan feature.
4228 bool needsShaderFloat16 = inFp16WithoutStorage || outFp16WithoutStorage;
4229 // There are some weird cases where we need the constants, but would otherwise drop them.
4230 bool needsSpecialConstants = false;
4231
4232 // UnpackHalf2x16 is a corner case - it returns two 32-bit floats but
4233 // internaly operates on fp16 and this type should be used by float controls
4234 FloatType inFloatTypeForCaps = inFloatType;
4235 string inFloatWidthForCaps = inTypeSnippets->bitWidth;
4236 if (testCase.operationId == OID_UPH_DENORM)
4237 {
4238 inFloatTypeForCaps = FP16;
4239 inFloatWidthForCaps = "16";
4240 }
4241
4242 string behaviorCapability;
4243 string behaviorExecutionMode;
4244 getBehaviorCapabilityAndExecutionMode(testCase.behaviorFlags,
4245 inFloatWidthForCaps,
4246 outTypeSnippets->bitWidth,
4247 behaviorCapability,
4248 behaviorExecutionMode);
4249
4250 // check which format features are needed
4251 bool float16FeatureRequired = (inFloatType == FP16) || (outFloatType == FP16);
4252 bool float64FeatureRequired = (inFloatType == FP64) || (outFloatType == FP64);
4253
4254 string vertExecutionMode;
4255 string fragExecutionMode;
4256 string vertCapabilities;
4257 string fragCapabilities;
4258 string vertExtensions;
4259 string fragExtensions;
4260 string vertAnnotations;
4261 string fragAnnotations;
4262 string vertTypes;
4263 string fragTypes;
4264 string vertConstants;
4265 string fragConstants;
4266 string vertFunctions;
4267 string fragFunctions;
4268 string vertIODefinitions;
4269 string fragIODefinitions;
4270 string vertArguments;
4271 string fragArguments;
4272 string vertVariables;
4273 string fragVariables;
4274 string vertCommands;
4275 string fragCommands;
4276 string vertProcessResult;
4277 string fragProcessResult;
4278
4279 // check if operation should be executed in vertex stage
4280 if (testedStage == VK_SHADER_STAGE_VERTEX_BIT)
4281 {
4282 vertAnnotations = inTypeSnippets->inputAnnotationsSnippet + inTypeSnippets->typeAnnotationsSnippet;
4283 fragAnnotations = outTypeSnippets->outputAnnotationsSnippet + outTypeSnippets->typeAnnotationsSnippet;
4284 vertFunctions = specOpData.functions;
4285
4286 // check if input type is different from tested type (conversion operations)
4287 if (testOperation.isInputTypeRestricted)
4288 {
4289 vertCapabilities = behaviorCapability + inTypeSnippets->capabilities + outTypeSnippets->capabilities;
4290 fragCapabilities = outTypeSnippets->capabilities;
4291 vertExtensions = inTypeSnippets->extensions + outTypeSnippets->extensions;
4292 fragExtensions = outTypeSnippets->extensions;
4293 vertTypes = inTypeSnippets->typeDefinitionsSnippet + outTypeSnippets->typeDefinitionsSnippet + outTypeSnippets->varyingsTypesSnippet;
4294 if (inFp16WithoutStorage)
4295 vertTypes += inTypeSnippets->typeDefinitionsFp16Snippet;
4296
4297 fragTypes = outTypeSnippets->typeDefinitionsSnippet + outTypeSnippets->varyingsTypesSnippet;
4298 vertConstants = inTypeSnippets->constantsDefinitionsSnippet + outTypeSnippets->constantsDefinitionsSnippet;
4299 fragConstants = outTypeSnippets->constantsDefinitionsSnippet;
4300 }
4301 else
4302 {
4303 // input and output types are the same (majority of operations)
4304
4305 vertCapabilities = behaviorCapability + outTypeSnippets->capabilities;
4306 fragCapabilities = vertCapabilities;
4307 vertExtensions = outTypeSnippets->extensions;
4308 fragExtensions = vertExtensions;
4309 vertTypes = outTypeSnippets->typeDefinitionsSnippet + outTypeSnippets->varyingsTypesSnippet;
4310 fragTypes = vertTypes;
4311 vertConstants = outTypeSnippets->constantsDefinitionsSnippet;
4312 fragConstants = outTypeSnippets->constantsDefinitionsSnippet;
4313 }
4314
4315 if (outFloatType != FP32)
4316 {
4317 fragTypes += f32TypeMinimalRequired;
4318 if (inFloatType != FP32)
4319 vertTypes += f32TypeMinimalRequired;
4320 }
4321
4322 vertAnnotations += specOpData.annotations;
4323 vertTypes += specOpData.types;
4324 vertConstants += specOpData.constants;
4325
4326 vertExecutionMode = behaviorExecutionMode;
4327 fragExecutionMode = "";
4328 vertIODefinitions = inTypeSnippets->inputDefinitionsSnippet + outTypeSnippets->outputVaryingsSnippet;
4329 fragIODefinitions = outTypeSnippets->inputVaryingsSnippet + outTypeSnippets->outputDefinitionsSnippet;
4330 vertArguments = specOpData.arguments;
4331 fragArguments = "";
4332 vertVariables = specOpData.variables;
4333 fragVariables = "";
4334 vertCommands = specOpData.commands;
4335 fragCommands = "";
4336 vertProcessResult = outTypeSnippets->storeVertexResultSnippet;
4337 fragProcessResult = outTypeSnippets->loadVertexResultSnippet + outTypeSnippets->storeResultsSnippet;
4338
4339 if (inFp16WithoutStorage)
4340 {
4341 vertAnnotations += inTypeSnippets->typeAnnotationsFp16Snippet;
4342 vertIODefinitions = inTypeSnippets->inputDefinitionsFp16Snippet + outTypeSnippets->outputVaryingsSnippet;
4343 }
4344
4345 if (outFp16WithoutStorage)
4346 {
4347 vertTypes += outTypeSnippets->typeDefinitionsFp16Snippet;
4348 fragTypes += outTypeSnippets->typeDefinitionsFp16Snippet;
4349 fragAnnotations += outTypeSnippets->typeAnnotationsFp16Snippet;
4350 fragIODefinitions = outTypeSnippets->inputVaryingsSnippet + outTypeSnippets->outputDefinitionsFp16Snippet;
4351 fragProcessResult = outTypeSnippets->loadVertexResultSnippet + outTypeSnippets->storeResultsFp16Snippet;
4352
4353 }
4354
4355 needsShaderFloat16 |= outTypeSnippets->loadStoreRequiresShaderFloat16;
4356 }
4357 else // perform test in fragment stage - vertex stage is empty
4358 {
4359 fragFunctions = specOpData.functions;
4360 // check if input type is different from tested type
4361 if (testOperation.isInputTypeRestricted)
4362 {
4363 fragAnnotations = inTypeSnippets->inputAnnotationsSnippet + inTypeSnippets->typeAnnotationsSnippet +
4364 outTypeSnippets->outputAnnotationsSnippet + outTypeSnippets->typeAnnotationsSnippet;
4365 fragCapabilities = behaviorCapability +
4366 (inFp16WithoutStorage ? inTypeSnippets->capabilitiesFp16Without16BitStorage : inTypeSnippets->capabilities) +
4367 (outFp16WithoutStorage ? outTypeSnippets->capabilitiesFp16Without16BitStorage : outTypeSnippets->capabilities);
4368 fragExtensions =
4369 (inFp16WithoutStorage ? inTypeSnippets->extensionsFp16Without16BitStorage : inTypeSnippets->extensions) +
4370 (outFp16WithoutStorage ? outTypeSnippets->extensionsFp16Without16BitStorage : outTypeSnippets->extensions);
4371 fragTypes = inTypeSnippets->typeDefinitionsSnippet + outTypeSnippets->typeDefinitionsSnippet;
4372 fragConstants = inTypeSnippets->constantsDefinitionsSnippet + outTypeSnippets->constantsDefinitionsSnippet;
4373 }
4374 else
4375 {
4376 // input and output types are the same
4377
4378 fragAnnotations = inTypeSnippets->inputAnnotationsSnippet + inTypeSnippets->typeAnnotationsSnippet +
4379 outTypeSnippets->outputAnnotationsSnippet;
4380 fragCapabilities = behaviorCapability +
4381 (outFp16WithoutStorage ? outTypeSnippets->capabilitiesFp16Without16BitStorage : outTypeSnippets->capabilities);
4382 fragExtensions = (outFp16WithoutStorage ? outTypeSnippets->extensionsFp16Without16BitStorage : outTypeSnippets->extensions);
4383 fragTypes = outTypeSnippets->typeDefinitionsSnippet;
4384 fragConstants = outTypeSnippets->constantsDefinitionsSnippet;
4385 }
4386
4387 // varying is not used but it needs to be specified so lets use type_i32 for it
4388 string unusedVertVarying = "%BP_vertex_result = OpVariable %type_i32_optr Output\n";
4389 string unusedFragVarying = "%BP_vertex_result = OpVariable %type_i32_iptr Input\n";
4390
4391 vertCapabilities = "";
4392 vertExtensions = "";
4393 vertAnnotations = "OpDecorate %type_f32_arr_1 ArrayStride 4\n";
4394 vertTypes = f32TypeMinimalRequired;
4395 vertConstants = "";
4396
4397 if ((outFloatType != FP32) && (inFloatType != FP32))
4398 fragTypes += f32TypeMinimalRequired;
4399
4400 fragAnnotations += specOpData.annotations;
4401 fragTypes += specOpData.types;
4402 fragConstants += specOpData.constants;
4403
4404 vertExecutionMode = "";
4405 fragExecutionMode = behaviorExecutionMode;
4406 vertIODefinitions = unusedVertVarying;
4407 fragIODefinitions = unusedFragVarying;
4408
4409 vertArguments = "";
4410 fragArguments = specOpData.arguments;
4411 vertVariables = "";
4412 fragVariables = specOpData.variables;
4413 vertCommands = "";
4414 fragCommands = specOpData.commands;
4415 vertProcessResult = "";
4416 fragProcessResult = outTypeSnippets->storeResultsSnippet;
4417
4418 if (inFp16WithoutStorage)
4419 {
4420 fragAnnotations += inTypeSnippets->typeAnnotationsFp16Snippet;
4421 if (testOperation.isInputTypeRestricted)
4422 {
4423 fragTypes += inTypeSnippets->typeDefinitionsFp16Snippet;
4424 }
4425 fragIODefinitions += inTypeSnippets->inputDefinitionsFp16Snippet;
4426 }
4427 else
4428 {
4429 fragIODefinitions += inTypeSnippets->inputDefinitionsSnippet;
4430 }
4431
4432 if (outFp16WithoutStorage)
4433 {
4434 if (testOperation.isInputTypeRestricted)
4435 {
4436 fragAnnotations += outTypeSnippets->typeAnnotationsFp16Snippet;
4437 }
4438 fragTypes += outTypeSnippets->typeDefinitionsFp16Snippet;
4439 fragIODefinitions += outTypeSnippets->outputDefinitionsFp16Snippet;
4440 fragProcessResult = outTypeSnippets->storeResultsFp16Snippet;
4441 }
4442 else
4443 {
4444 fragIODefinitions += outTypeSnippets->outputDefinitionsSnippet;
4445 }
4446
4447 if (!testCaseInfo.argumentsFromInput)
4448 {
4449 switch(testCaseInfo.testCase.operationId)
4450 {
4451 case OID_CONV_FROM_FP32:
4452 case OID_CONV_FROM_FP64:
4453 needsSpecialConstants = true;
4454 break;
4455 default:
4456 break;
4457 }
4458 }
4459 }
4460
4461 // Another reason we need shaderFloat16 is the executable instructions uses fp16
4462 // in a way not supported by the 16bit storage extension.
4463 needsShaderFloat16 |= float16FeatureRequired && testOperation.floatUsage == FLOAT_ARITHMETIC;
4464
4465 // Constants are only needed sometimes. Drop them in the fp16 case if the code doesn't need
4466 // them, and if we don't otherwise need shaderFloat16.
4467 bool needsFP16Constants = needsShaderFloat16 || needsSpecialConstants || outFp16WithoutStorage;
4468
4469 if (!needsFP16Constants && float16FeatureRequired)
4470 {
4471 // Check various code fragments
4472 const FloatStatementUsageFlags commandsFloatConstMask = B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_CONST_FP16;
4473 const bool commandsUsesFloatConstant = (testCaseInfo.operation.statementUsageFlags & commandsFloatConstMask) != 0;
4474 const FloatStatementUsageFlags argumentsFloatConstMask = B_STATEMENT_USAGE_ARGS_CONST_FLOAT | B_STATEMENT_USAGE_ARGS_CONST_FP16;
4475 const bool argumentsUsesFloatConstant = (specOpData.argumentsUsesFloatConstant & argumentsFloatConstMask) != 0;
4476 bool hasFP16ConstsInCommandsOrArguments = commandsUsesFloatConstant || argumentsUsesFloatConstant;
4477
4478 needsFP16Constants |= hasFP16ConstsInCommandsOrArguments;
4479
4480 if (!needsFP16Constants)
4481 {
4482 vertConstants = "";
4483 fragConstants = "";
4484 }
4485 }
4486 needsShaderFloat16 |= needsFP16Constants;
4487
4488 if (needsShaderFloat16)
4489 {
4490 vertCapabilities += "OpCapability Float16\n";
4491 fragCapabilities += "OpCapability Float16\n";
4492 }
4493
4494 map<string, string> specializations;
4495 specializations["vert_capabilities"] = vertCapabilities;
4496 specializations["vert_extensions"] = vertExtensions;
4497 specializations["vert_execution_mode"] = vertExecutionMode;
4498 specializations["vert_annotations"] = vertAnnotations;
4499 specializations["vert_types"] = vertTypes;
4500 specializations["vert_constants"] = vertConstants;
4501 specializations["vert_io_definitions"] = vertIODefinitions;
4502 specializations["vert_arguments"] = vertArguments;
4503 specializations["vert_variables"] = vertVariables;
4504 specializations["vert_functions"] = vertFunctions;
4505 specializations["vert_commands"] = vertCommands;
4506 specializations["vert_process_result"] = vertProcessResult;
4507 specializations["frag_capabilities"] = fragCapabilities;
4508 specializations["frag_extensions"] = fragExtensions;
4509 specializations["frag_execution_mode"] = fragExecutionMode;
4510 specializations["frag_annotations"] = fragAnnotations;
4511 specializations["frag_types"] = fragTypes;
4512 specializations["frag_constants"] = fragConstants;
4513 specializations["frag_functions"] = fragFunctions;
4514 specializations["frag_io_definitions"] = fragIODefinitions;
4515 specializations["frag_arguments"] = fragArguments;
4516 specializations["frag_variables"] = fragVariables;
4517 specializations["frag_commands"] = fragCommands;
4518 specializations["frag_process_result"] = fragProcessResult;
4519
4520 // colors are not used by the test - input is passed via uniform buffer
4521 RGBA defaultColors[4] = { RGBA::white(), RGBA::red(), RGBA::green(), RGBA::blue() };
4522
4523 // construct input and output buffers of proper types
4524 TypeValuesSP inTypeValues = m_typeData.at(inFloatType).values;
4525 TypeValuesSP outTypeValues = m_typeData.at(outFloatType).values;
4526 BufferSp inBufferSp = inTypeValues->constructInputBuffer(testCase.input);
4527 BufferSp outBufferSp = outTypeValues->constructOutputBuffer(testCase.expectedOutput);
4528
4529 vkt::SpirVAssembly::GraphicsResources resources;
4530 resources.inputs.push_back( Resource(inBufferSp, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
4531 resources.outputs.push_back(Resource(outBufferSp, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
4532 resources.verifyIO = checkFloatsLUT[outFloatType];
4533
4534 StageToSpecConstantMap noSpecConstants;
4535 PushConstants noPushConstants;
4536 GraphicsInterfaces noInterfaces;
4537
4538 VulkanFeatures vulkanFeatures;
4539 setupVulkanFeatures(inFloatTypeForCaps, // usualy same as inFloatType - different only for UnpackHalf2x16
4540 outFloatType,
4541 testCase.behaviorFlags,
4542 float64FeatureRequired,
4543 vulkanFeatures);
4544 vulkanFeatures.coreFeatures.fragmentStoresAndAtomics = true;
4545
4546 vector<string> extensions;
4547 extensions.push_back("VK_KHR_shader_float_controls");
4548 if (needsShaderFloat16)
4549 {
4550 extensions.push_back("VK_KHR_shader_float16_int8");
4551 vulkanFeatures.extFloat16Int8.shaderFloat16 = true;
4552 }
4553 if (float16FeatureRequired && !testCase.fp16Without16BitStorage)
4554 {
4555 extensions.push_back("VK_KHR_16bit_storage");
4556 vulkanFeatures.ext16BitStorage.storageBuffer16BitAccess = true;
4557 }
4558
4559 InstanceContext ctx(defaultColors,
4560 defaultColors,
4561 specializations,
4562 noSpecConstants,
4563 noPushConstants,
4564 resources,
4565 noInterfaces,
4566 extensions,
4567 vulkanFeatures,
4568 testedStage);
4569
4570 ctx.moduleMap["vert"].push_back(std::make_pair("main", VK_SHADER_STAGE_VERTEX_BIT));
4571 ctx.moduleMap["frag"].push_back(std::make_pair("main", VK_SHADER_STAGE_FRAGMENT_BIT));
4572
4573 ctx.requiredStages = static_cast<VkShaderStageFlagBits>(VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT);
4574 ctx.failResult = QP_TEST_RESULT_FAIL;
4575 ctx.failMessageTemplate = "Output doesn't match with expected";
4576
4577 return ctx;
4578 }
4579
4580 } // anonymous
4581
createFloatControlsTestGroup(TestContext & testCtx,TestGroupBuilderBase * groupBuilder)4582 tcu::TestCaseGroup* createFloatControlsTestGroup (TestContext& testCtx, TestGroupBuilderBase* groupBuilder)
4583 {
4584 de::MovePtr<TestCaseGroup> group(new TestCaseGroup(testCtx, "float_controls", "Tests for VK_KHR_shader_float_controls extension"));
4585
4586 struct TestGroup
4587 {
4588 FloatType floatType;
4589 const char* groupName;
4590 };
4591 TestGroup testGroups[] =
4592 {
4593 { FP16, "fp16" },
4594 { FP32, "fp32" },
4595 { FP64, "fp64" },
4596 };
4597
4598 for (int i = 0 ; i < DE_LENGTH_OF_ARRAY(testGroups) ; ++i)
4599 {
4600 const TestGroup& testGroup = testGroups[i];
4601 TestCaseGroup* typeGroup = new TestCaseGroup(testCtx, testGroup.groupName);
4602 group->addChild(typeGroup);
4603
4604 groupBuilder->createOperationTests(typeGroup, "input_args", testGroup.floatType, true);
4605 groupBuilder->createOperationTests(typeGroup, "generated_args", testGroup.floatType, false);
4606 }
4607
4608 groupBuilder->createSettingsTests(group.get());
4609
4610 return group.release();
4611 }
4612
createFloatControlsComputeGroup(TestContext & testCtx)4613 tcu::TestCaseGroup* createFloatControlsComputeGroup (TestContext& testCtx)
4614 {
4615 ComputeTestGroupBuilder computeTestGroupBuilder;
4616 computeTestGroupBuilder.init();
4617
4618 return createFloatControlsTestGroup(testCtx, &computeTestGroupBuilder);
4619 }
4620
createFloatControlsGraphicsGroup(TestContext & testCtx)4621 tcu::TestCaseGroup* createFloatControlsGraphicsGroup (TestContext& testCtx)
4622 {
4623 GraphicsTestGroupBuilder graphicsTestGroupBuilder;
4624 graphicsTestGroupBuilder.init();
4625
4626 return createFloatControlsTestGroup(testCtx, &graphicsTestGroupBuilder);
4627 }
4628
4629 } // SpirVAssembly
4630 } // vkt
4631