1 /*------------------------------------------------------------------------
2 * Vulkan Conformance Tests
3 * ------------------------
4 *
5 * Copyright (c) 2019 Valve Corporation.
6 * Copyright (c) 2019 The Khronos Group Inc.
7 *
8 * Licensed under the Apache License, Version 2.0 (the "License");
9 * you may not use this file except in compliance with the License.
10 * You may obtain a copy of the License at
11 *
12 * http://www.apache.org/licenses/LICENSE-2.0
13 *
14 * Unless required by applicable law or agreed to in writing, software
15 * distributed under the License is distributed on an "AS IS" BASIS,
16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 * See the License for the specific language governing permissions and
18 * limitations under the License.
19 *
20 *//*!
21 * \file
22 * \brief 64-bit data type comparison operations.
23 *//*--------------------------------------------------------------------*/
24
25 #include "vktSpvAsm64bitCompareTests.hpp"
26 #include "vktTestGroupUtil.hpp"
27 #include "vktSpvAsmUtils.hpp"
28 #include "vkDefs.hpp"
29 #include "vktTestCase.hpp"
30 #include "vkQueryUtil.hpp"
31 #include "vkMemUtil.hpp"
32 #include "vkRefUtil.hpp"
33 #include "vkBuilderUtil.hpp"
34 #include "vkPrograms.hpp"
35 #include "vkCmdUtil.hpp"
36
37 #include "tcuStringTemplate.hpp"
38
39 #include <string>
40 #include <vector>
41 #include <utility>
42 #include <cmath>
43 #include <sstream>
44 #include <memory>
45 #include <limits>
46
47 namespace vkt
48 {
49 namespace SpirVAssembly
50 {
51 namespace
52 {
53
54 template <typename T>
55 class CompareOperation
56 {
57 public:
58 virtual std::string spirvName () const = 0;
59 virtual bool run (T left, T right) const = 0;
60 };
61
62 // Helper intermediate class to be able to implement Ordered and Unordered floating point operations in a simpler way.
63 class DoubleCompareOperation: public CompareOperation<double>
64 {
65 public:
66 struct BasicImplementation
67 {
68 virtual std::string nameSuffix () const = 0;
69 virtual bool run (double left, double right) const = 0; // No NaNs here.
70 };
71
spirvName() const72 virtual std::string spirvName () const
73 {
74 return "OpF" + std::string(m_ordered ? "Ord" : "Unord") + m_impl.nameSuffix();
75 }
76
run(double left,double right) const77 virtual bool run (double left, double right) const
78 {
79 if (nanInvolved(left, right))
80 return !m_ordered; // Ordered operations return false when NaN is involved.
81 return m_impl.run(left, right);
82 }
83
DoubleCompareOperation(bool ordered,const BasicImplementation & impl)84 DoubleCompareOperation(bool ordered, const BasicImplementation& impl)
85 : m_ordered(ordered), m_impl(impl)
86 {}
87
88 private:
nanInvolved(double left,double right) const89 bool nanInvolved(double left, double right) const
90 {
91 return std::isnan(left) || std::isnan(right);
92 }
93
94 const bool m_ordered;
95 const BasicImplementation& m_impl;
96 };
97
98 #define GEN_DOUBLE_BASIC_IMPL(NAME, OPERATION) \
99 struct NAME##DoubleBasicImplClass : public DoubleCompareOperation::BasicImplementation \
100 { \
101 virtual std::string nameSuffix () const { return #NAME; } \
102 virtual bool run (double left, double right) const { return left OPERATION right; } \
103 }; \
104 NAME##DoubleBasicImplClass NAME##DoubleBasicImplInstance;
105
106 GEN_DOUBLE_BASIC_IMPL(Equal, == )
107 GEN_DOUBLE_BASIC_IMPL(NotEqual, != )
108 GEN_DOUBLE_BASIC_IMPL(LessThan, < )
109 GEN_DOUBLE_BASIC_IMPL(GreaterThan, > )
110 GEN_DOUBLE_BASIC_IMPL(LessThanEqual, <= )
111 GEN_DOUBLE_BASIC_IMPL(GreaterThanEqual, >= )
112
113 #define GEN_FORDERED_OP(NAME) DoubleCompareOperation FOrdered##NAME##Op(true, NAME##DoubleBasicImplInstance)
114 #define GEN_FUNORDERED_OP(NAME) DoubleCompareOperation FUnordered##NAME##Op(false, NAME##DoubleBasicImplInstance)
115 #define GEN_FBOTH_OP(NAME) GEN_FORDERED_OP(NAME); GEN_FUNORDERED_OP(NAME);
116
117 GEN_FBOTH_OP(Equal)
118 GEN_FBOTH_OP(NotEqual)
119 GEN_FBOTH_OP(LessThan)
120 GEN_FBOTH_OP(GreaterThan)
121 GEN_FBOTH_OP(LessThanEqual)
122 GEN_FBOTH_OP(GreaterThanEqual)
123
124 template <typename IntClass>
125 class IntCompareOperation: public CompareOperation<IntClass>
126 {
127 public:
128 struct Implementation
129 {
130 virtual std::string typeChar () const = 0;
131 virtual std::string opName () const = 0;
132 virtual bool run (IntClass left, IntClass right) const = 0;
133 };
134
spirvName() const135 virtual std::string spirvName () const
136 {
137 return "Op" + m_impl.typeChar() + m_impl.opName();
138 }
139
run(IntClass left,IntClass right) const140 virtual bool run (IntClass left, IntClass right) const
141 {
142 return m_impl.run(left, right);
143 }
144
IntCompareOperation(const Implementation & impl)145 IntCompareOperation(const Implementation& impl)
146 : m_impl(impl)
147 {}
148
149 private:
150 const Implementation& m_impl;
151 };
152
153 #define GEN_INT_IMPL(INTTYPE, TYPECHAR, OPNAME, OPERATOR) \
154 struct INTTYPE##OPNAME##IntImplClass : public IntCompareOperation<INTTYPE>::Implementation \
155 { \
156 virtual std::string typeChar () const { return #TYPECHAR; } \
157 virtual std::string opName () const { return #OPNAME; } \
158 virtual bool run (INTTYPE left, INTTYPE right) const { return left OPERATOR right; } \
159 }; \
160 INTTYPE##OPNAME##IntImplClass INTTYPE##OPNAME##IntImplInstance;
161
162 #define GEN_ALL_INT_TYPE_IMPL(INTTYPE, TYPECHAR) \
163 GEN_INT_IMPL(INTTYPE, I, Equal, == ) \
164 GEN_INT_IMPL(INTTYPE, I, NotEqual, != ) \
165 GEN_INT_IMPL(INTTYPE, TYPECHAR, GreaterThan, > ) \
166 GEN_INT_IMPL(INTTYPE, TYPECHAR, GreaterThanEqual, >= ) \
167 GEN_INT_IMPL(INTTYPE, TYPECHAR, LessThan, < ) \
168 GEN_INT_IMPL(INTTYPE, TYPECHAR, LessThanEqual, <= )
169
170 GEN_ALL_INT_TYPE_IMPL(deInt64, S)
171 GEN_ALL_INT_TYPE_IMPL(deUint64, U)
172
173 #define GEN_INT_OP(INTTYPE, OPNAME) \
174 struct INTTYPE##OPNAME##OpClass: public IntCompareOperation<INTTYPE> \
175 { \
176 INTTYPE##OPNAME##OpClass () : IntCompareOperation<INTTYPE>(INTTYPE##OPNAME##IntImplInstance) {} \
177 }; \
178 INTTYPE##OPNAME##OpClass INTTYPE##OPNAME##Op;
179
180 #define GEN_ALL_INT_OPS(INTTYPE) \
181 GEN_INT_OP(INTTYPE, Equal ) \
182 GEN_INT_OP(INTTYPE, NotEqual ) \
183 GEN_INT_OP(INTTYPE, GreaterThan ) \
184 GEN_INT_OP(INTTYPE, GreaterThanEqual ) \
185 GEN_INT_OP(INTTYPE, LessThan ) \
186 GEN_INT_OP(INTTYPE, LessThanEqual )
187
188 GEN_ALL_INT_OPS(deInt64)
189 GEN_ALL_INT_OPS(deUint64)
190
191 enum DataType {
192 DATA_TYPE_SINGLE = 0,
193 DATA_TYPE_VECTOR,
194 DATA_TYPE_MAX_ENUM,
195 };
196
197 template <class T>
198 using OperandsVector = std::vector<std::pair<T, T>>;
199
200 template <class T>
201 struct TestParameters
202 {
203 DataType dataType;
204 const CompareOperation<T>& operation;
205 vk::VkShaderStageFlagBits stage;
206 const OperandsVector<T>& operands;
207 bool requireNanPreserve;
208 };
209
210 // Shader template for the compute stage using single scalars.
211 // Generated from the following GLSL shader, replacing some bits by template parameters.
212 #if 0
213 #version 430
214
215 // Left operands, right operands and results.
216 layout(binding = 0) buffer Input1 { double values[]; } input1;
217 layout(binding = 1) buffer Input2 { double values[]; } input2;
218 layout(binding = 2) buffer Output1 { int values[]; } output1;
219
220 void main()
221 {
222 for (int i = 0; i < 20; i++) {
223 output1.values[i] = int(input1.values[i] == input2.values[i]);
224 }
225 }
226 #endif
227 const tcu::StringTemplate CompShaderSingle(R"(
228 OpCapability Shader
229 ${OPCAPABILITY}
230 ${NANCAP}
231 ${NANEXT}
232 %1 = OpExtInstImport "GLSL.std.450"
233 OpMemoryModel Logical GLSL450
234 OpEntryPoint GLCompute %main "main"
235 ${NANMODE}
236 OpExecutionMode %main LocalSize 1 1 1
237 OpName %main "main"
238 OpName %i "i"
239 OpName %Output1 "Output1"
240 OpMemberName %Output1 0 "values"
241 OpName %output1 "output1"
242 OpName %Input1 "Input1"
243 OpMemberName %Input1 0 "values"
244 OpName %input1 "input1"
245 OpName %Input2 "Input2"
246 OpMemberName %Input2 0 "values"
247 OpName %input2 "input2"
248 OpDecorate %_runtimearr_int ArrayStride 4
249 OpMemberDecorate %Output1 0 Offset 0
250 OpDecorate %Output1 BufferBlock
251 OpDecorate %output1 DescriptorSet 0
252 OpDecorate %output1 Binding 2
253 OpDecorate %_runtimearr_tinput ArrayStride 8
254 OpMemberDecorate %Input1 0 Offset 0
255 OpDecorate %Input1 BufferBlock
256 OpDecorate %input1 DescriptorSet 0
257 OpDecorate %input1 Binding 0
258 OpDecorate %_runtimearr_tinput_0 ArrayStride 8
259 OpMemberDecorate %Input2 0 Offset 0
260 OpDecorate %Input2 BufferBlock
261 OpDecorate %input2 DescriptorSet 0
262 OpDecorate %input2 Binding 1
263 %void = OpTypeVoid
264 %3 = OpTypeFunction %void
265 %int = OpTypeInt 32 1
266 %_ptr_Function_int = OpTypePointer Function %int
267 %int_0 = OpConstant %int 0
268 %niters = OpConstant %int ${ITERS}
269 %bool = OpTypeBool
270 %_runtimearr_int = OpTypeRuntimeArray %int
271 %Output1 = OpTypeStruct %_runtimearr_int
272 %_ptr_Uniform_Output1 = OpTypePointer Uniform %Output1
273 %output1 = OpVariable %_ptr_Uniform_Output1 Uniform
274 %tinput = ${OPTYPE}
275 %_runtimearr_tinput = OpTypeRuntimeArray %tinput
276 %Input1 = OpTypeStruct %_runtimearr_tinput
277 %_ptr_Uniform_Input1 = OpTypePointer Uniform %Input1
278 %input1 = OpVariable %_ptr_Uniform_Input1 Uniform
279 %_ptr_Uniform_tinput = OpTypePointer Uniform %tinput
280 %_runtimearr_tinput_0 = OpTypeRuntimeArray %tinput
281 %Input2 = OpTypeStruct %_runtimearr_tinput_0
282 %_ptr_Uniform_Input2 = OpTypePointer Uniform %Input2
283 %input2 = OpVariable %_ptr_Uniform_Input2 Uniform
284 %int_1 = OpConstant %int 1
285 %_ptr_Uniform_int = OpTypePointer Uniform %int
286 %main = OpFunction %void None %3
287 %5 = OpLabel
288 %i = OpVariable %_ptr_Function_int Function
289 OpStore %i %int_0
290 OpBranch %10
291 %10 = OpLabel
292 OpLoopMerge %12 %13 None
293 OpBranch %14
294 %14 = OpLabel
295 %15 = OpLoad %int %i
296 %18 = OpSLessThan %bool %15 %niters
297 OpBranchConditional %18 %11 %12
298 %11 = OpLabel
299 %23 = OpLoad %int %i
300 %29 = OpLoad %int %i
301 %31 = OpAccessChain %_ptr_Uniform_tinput %input1 %int_0 %29
302 %32 = OpLoad %tinput %31
303 %37 = OpLoad %int %i
304 %38 = OpAccessChain %_ptr_Uniform_tinput %input2 %int_0 %37
305 %39 = OpLoad %tinput %38
306 %40 = ${OPNAME} %bool %32 %39
307 %42 = OpSelect %int %40 %int_1 %int_0
308 %44 = OpAccessChain %_ptr_Uniform_int %output1 %int_0 %23
309 OpStore %44 %42
310 OpBranch %13
311 %13 = OpLabel
312 %45 = OpLoad %int %i
313 %46 = OpIAdd %int %45 %int_1
314 OpStore %i %46
315 OpBranch %10
316 %12 = OpLabel
317 OpReturn
318 OpFunctionEnd
319 )");
320
321 // Shader template for the compute stage using vectors.
322 // Generated from the following GLSL shader, replacing some bits by template parameters.
323 // Note the number of iterations needs to be divided by 4 as the shader will consume 4 doubles at a time.
324 #if 0
325 #version 430
326
327 // Left operands, right operands and results.
328 layout(binding = 0) buffer Input1 { dvec4 values[]; } input1;
329 layout(binding = 1) buffer Input2 { dvec4 values[]; } input2;
330 layout(binding = 2) buffer Output1 { ivec4 values[]; } output1;
331
332 void main()
333 {
334 for (int i = 0; i < 5; i++) {
335 output1.values[i] = ivec4(equal(input1.values[i], input2.values[i]));
336 }
337 }
338 #endif
339 const tcu::StringTemplate CompShaderVector(R"(
340 OpCapability Shader
341 ${OPCAPABILITY}
342 ${NANCAP}
343 ${NANEXT}
344 %1 = OpExtInstImport "GLSL.std.450"
345 OpMemoryModel Logical GLSL450
346 OpEntryPoint GLCompute %main "main"
347 ${NANMODE}
348 OpExecutionMode %main LocalSize 1 1 1
349 OpName %main "main"
350 OpName %i "i"
351 OpName %Output1 "Output1"
352 OpMemberName %Output1 0 "values"
353 OpName %output1 "output1"
354 OpName %Input1 "Input1"
355 OpMemberName %Input1 0 "values"
356 OpName %input1 "input1"
357 OpName %Input2 "Input2"
358 OpMemberName %Input2 0 "values"
359 OpName %input2 "input2"
360 OpDecorate %_runtimearr_v4int ArrayStride 16
361 OpMemberDecorate %Output1 0 Offset 0
362 OpDecorate %Output1 BufferBlock
363 OpDecorate %output1 DescriptorSet 0
364 OpDecorate %output1 Binding 2
365 OpDecorate %_runtimearr_v4tinput ArrayStride 32
366 OpMemberDecorate %Input1 0 Offset 0
367 OpDecorate %Input1 BufferBlock
368 OpDecorate %input1 DescriptorSet 0
369 OpDecorate %input1 Binding 0
370 OpDecorate %_runtimearr_v4tinput_0 ArrayStride 32
371 OpMemberDecorate %Input2 0 Offset 0
372 OpDecorate %Input2 BufferBlock
373 OpDecorate %input2 DescriptorSet 0
374 OpDecorate %input2 Binding 1
375 %void = OpTypeVoid
376 %3 = OpTypeFunction %void
377 %int = OpTypeInt 32 1
378 %_ptr_Function_int = OpTypePointer Function %int
379 %int_0 = OpConstant %int 0
380 %niters = OpConstant %int ${ITERS}
381 %bool = OpTypeBool
382 %v4int = OpTypeVector %int 4
383 %_runtimearr_v4int = OpTypeRuntimeArray %v4int
384 %Output1 = OpTypeStruct %_runtimearr_v4int
385 %_ptr_Uniform_Output1 = OpTypePointer Uniform %Output1
386 %output1 = OpVariable %_ptr_Uniform_Output1 Uniform
387 %tinput = ${OPTYPE}
388 %v4tinput = OpTypeVector %tinput 4
389 %_runtimearr_v4tinput = OpTypeRuntimeArray %v4tinput
390 %Input1 = OpTypeStruct %_runtimearr_v4tinput
391 %_ptr_Uniform_Input1 = OpTypePointer Uniform %Input1
392 %input1 = OpVariable %_ptr_Uniform_Input1 Uniform
393 %_ptr_Uniform_v4tinput = OpTypePointer Uniform %v4tinput
394 %_runtimearr_v4tinput_0 = OpTypeRuntimeArray %v4tinput
395 %Input2 = OpTypeStruct %_runtimearr_v4tinput_0
396 %_ptr_Uniform_Input2 = OpTypePointer Uniform %Input2
397 %input2 = OpVariable %_ptr_Uniform_Input2 Uniform
398 %v4bool = OpTypeVector %bool 4
399 %int_1 = OpConstant %int 1
400 %45 = OpConstantComposite %v4int %int_0 %int_0 %int_0 %int_0
401 %46 = OpConstantComposite %v4int %int_1 %int_1 %int_1 %int_1
402 %_ptr_Uniform_v4int = OpTypePointer Uniform %v4int
403 %main = OpFunction %void None %3
404 %5 = OpLabel
405 %i = OpVariable %_ptr_Function_int Function
406 OpStore %i %int_0
407 OpBranch %10
408 %10 = OpLabel
409 OpLoopMerge %12 %13 None
410 OpBranch %14
411 %14 = OpLabel
412 %15 = OpLoad %int %i
413 %18 = OpSLessThan %bool %15 %niters
414 OpBranchConditional %18 %11 %12
415 %11 = OpLabel
416 %24 = OpLoad %int %i
417 %31 = OpLoad %int %i
418 %33 = OpAccessChain %_ptr_Uniform_v4tinput %input1 %int_0 %31
419 %34 = OpLoad %v4tinput %33
420 %39 = OpLoad %int %i
421 %40 = OpAccessChain %_ptr_Uniform_v4tinput %input2 %int_0 %39
422 %41 = OpLoad %v4tinput %40
423 %43 = ${OPNAME} %v4bool %34 %41
424 %47 = OpSelect %v4int %43 %46 %45
425 %49 = OpAccessChain %_ptr_Uniform_v4int %output1 %int_0 %24
426 OpStore %49 %47
427 OpBranch %13
428 %13 = OpLabel
429 %50 = OpLoad %int %i
430 %51 = OpIAdd %int %50 %int_1
431 OpStore %i %51
432 OpBranch %10
433 %12 = OpLabel
434 OpReturn
435 OpFunctionEnd
436 )");
437
438 // Shader template for the vertex stage using single scalars.
439 // Generated from the following GLSL shader, replacing some bits by template parameters.
440 #if 0
441 #version 430
442
443 // Left operands, right operands and results.
444 layout(binding = 0) buffer Input1 { double values[]; } input1;
445 layout(binding = 1) buffer Input2 { double values[]; } input2;
446 layout(binding = 2) buffer Output1 { int values[]; } output1;
447
448 void main()
449 {
450 gl_PointSize = 1;
451 gl_Position = vec4(0.0, 0.0, 0.0, 1.0);
452
453 for (int i = 0; i < 20; i++) {
454 output1.values[i] = int(input1.values[i] == input2.values[i]);
455 }
456 }
457 #endif
458 const tcu::StringTemplate VertShaderSingle(R"(
459 OpCapability Shader
460 ${OPCAPABILITY}
461 ${NANCAP}
462 ${NANEXT}
463 %1 = OpExtInstImport "GLSL.std.450"
464 OpMemoryModel Logical GLSL450
465 OpEntryPoint Vertex %main "main" %_
466 ${NANMODE}
467 OpName %main "main"
468 OpName %gl_PerVertex "gl_PerVertex"
469 OpMemberName %gl_PerVertex 0 "gl_Position"
470 OpMemberName %gl_PerVertex 1 "gl_PointSize"
471 OpMemberName %gl_PerVertex 2 "gl_ClipDistance"
472 OpName %_ ""
473 OpName %i "i"
474 OpName %Output1 "Output1"
475 OpMemberName %Output1 0 "values"
476 OpName %output1 "output1"
477 OpName %Input1 "Input1"
478 OpMemberName %Input1 0 "values"
479 OpName %input1 "input1"
480 OpName %Input2 "Input2"
481 OpMemberName %Input2 0 "values"
482 OpName %input2 "input2"
483 OpMemberDecorate %gl_PerVertex 0 BuiltIn Position
484 OpMemberDecorate %gl_PerVertex 1 BuiltIn PointSize
485 OpMemberDecorate %gl_PerVertex 2 BuiltIn ClipDistance
486 OpDecorate %gl_PerVertex Block
487 OpDecorate %_runtimearr_int ArrayStride 4
488 OpMemberDecorate %Output1 0 Offset 0
489 OpDecorate %Output1 BufferBlock
490 OpDecorate %output1 DescriptorSet 0
491 OpDecorate %output1 Binding 2
492 OpDecorate %_runtimearr_tinput ArrayStride 8
493 OpMemberDecorate %Input1 0 Offset 0
494 OpDecorate %Input1 BufferBlock
495 OpDecorate %input1 DescriptorSet 0
496 OpDecorate %input1 Binding 0
497 OpDecorate %_runtimearr_tinput_0 ArrayStride 8
498 OpMemberDecorate %Input2 0 Offset 0
499 OpDecorate %Input2 BufferBlock
500 OpDecorate %input2 DescriptorSet 0
501 OpDecorate %input2 Binding 1
502 %void = OpTypeVoid
503 %3 = OpTypeFunction %void
504 %float = OpTypeFloat 32
505 %v4float = OpTypeVector %float 4
506 %uint = OpTypeInt 32 0
507 %uint_1 = OpConstant %uint 1
508 %_arr_float_uint_1 = OpTypeArray %float %uint_1
509 %gl_PerVertex = OpTypeStruct %v4float %float %_arr_float_uint_1
510 %_ptr_Output_gl_PerVertex = OpTypePointer Output %gl_PerVertex
511 %_ = OpVariable %_ptr_Output_gl_PerVertex Output
512 %int = OpTypeInt 32 1
513 %int_1 = OpConstant %int 1
514 %float_1 = OpConstant %float 1
515 %_ptr_Output_float = OpTypePointer Output %float
516 %int_0 = OpConstant %int 0
517 %float_0 = OpConstant %float 0
518 %21 = OpConstantComposite %v4float %float_0 %float_0 %float_0 %float_1
519 %_ptr_Output_v4float = OpTypePointer Output %v4float
520 %_ptr_Function_int = OpTypePointer Function %int
521 %niters = OpConstant %int ${ITERS}
522 %bool = OpTypeBool
523 %_runtimearr_int = OpTypeRuntimeArray %int
524 %Output1 = OpTypeStruct %_runtimearr_int
525 %_ptr_Uniform_Output1 = OpTypePointer Uniform %Output1
526 %output1 = OpVariable %_ptr_Uniform_Output1 Uniform
527 %tinput = ${OPTYPE}
528 %_runtimearr_tinput = OpTypeRuntimeArray %tinput
529 %Input1 = OpTypeStruct %_runtimearr_tinput
530 %_ptr_Uniform_Input1 = OpTypePointer Uniform %Input1
531 %input1 = OpVariable %_ptr_Uniform_Input1 Uniform
532 %_ptr_Uniform_tinput = OpTypePointer Uniform %tinput
533 %_runtimearr_tinput_0 = OpTypeRuntimeArray %tinput
534 %Input2 = OpTypeStruct %_runtimearr_tinput_0
535 %_ptr_Uniform_Input2 = OpTypePointer Uniform %Input2
536 %input2 = OpVariable %_ptr_Uniform_Input2 Uniform
537 %_ptr_Uniform_int = OpTypePointer Uniform %int
538 %main = OpFunction %void None %3
539 %5 = OpLabel
540 %i = OpVariable %_ptr_Function_int Function
541 %18 = OpAccessChain %_ptr_Output_float %_ %int_1
542 OpStore %18 %float_1
543 %23 = OpAccessChain %_ptr_Output_v4float %_ %int_0
544 OpStore %23 %21
545 OpStore %i %int_0
546 OpBranch %26
547 %26 = OpLabel
548 OpLoopMerge %28 %29 None
549 OpBranch %30
550 %30 = OpLabel
551 %31 = OpLoad %int %i
552 %34 = OpSLessThan %bool %31 %niters
553 OpBranchConditional %34 %27 %28
554 %27 = OpLabel
555 %39 = OpLoad %int %i
556 %45 = OpLoad %int %i
557 %47 = OpAccessChain %_ptr_Uniform_tinput %input1 %int_0 %45
558 %48 = OpLoad %tinput %47
559 %53 = OpLoad %int %i
560 %54 = OpAccessChain %_ptr_Uniform_tinput %input2 %int_0 %53
561 %55 = OpLoad %tinput %54
562 %56 = ${OPNAME} %bool %48 %55
563 %57 = OpSelect %int %56 %int_1 %int_0
564 %59 = OpAccessChain %_ptr_Uniform_int %output1 %int_0 %39
565 OpStore %59 %57
566 OpBranch %29
567 %29 = OpLabel
568 %60 = OpLoad %int %i
569 %61 = OpIAdd %int %60 %int_1
570 OpStore %i %61
571 OpBranch %26
572 %28 = OpLabel
573 OpReturn
574 OpFunctionEnd
575 )");
576
577 // Shader template for the vertex stage using vectors.
578 // Generated from the following GLSL shader, replacing some bits by template parameters.
579 // Note the number of iterations needs to be divided by 4 as the shader will consume 4 doubles at a time.
580 #if 0
581 #version 430
582
583 // Left operands, right operands and results.
584 layout(binding = 0) buffer Input1 { dvec4 values[]; } input1;
585 layout(binding = 1) buffer Input2 { dvec4 values[]; } input2;
586 layout(binding = 2) buffer Output1 { ivec4 values[]; } output1;
587
588 void main()
589 {
590 gl_PointSize = 1;
591 gl_Position = vec4(0.0, 0.0, 0.0, 1.0);
592
593 for (int i = 0; i < 5; i++) {
594 output1.values[i] = ivec4(equal(input1.values[i], input2.values[i]));
595 }
596 }
597 #endif
598 const tcu::StringTemplate VertShaderVector(R"(
599 OpCapability Shader
600 ${OPCAPABILITY}
601 ${NANCAP}
602 ${NANEXT}
603 %1 = OpExtInstImport "GLSL.std.450"
604 OpMemoryModel Logical GLSL450
605 OpEntryPoint Vertex %main "main" %_
606 ${NANMODE}
607 OpName %main "main"
608 OpName %gl_PerVertex "gl_PerVertex"
609 OpMemberName %gl_PerVertex 0 "gl_Position"
610 OpMemberName %gl_PerVertex 1 "gl_PointSize"
611 OpMemberName %gl_PerVertex 2 "gl_ClipDistance"
612 OpName %_ ""
613 OpName %i "i"
614 OpName %Output1 "Output1"
615 OpMemberName %Output1 0 "values"
616 OpName %output1 "output1"
617 OpName %Input1 "Input1"
618 OpMemberName %Input1 0 "values"
619 OpName %input1 "input1"
620 OpName %Input2 "Input2"
621 OpMemberName %Input2 0 "values"
622 OpName %input2 "input2"
623 OpMemberDecorate %gl_PerVertex 0 BuiltIn Position
624 OpMemberDecorate %gl_PerVertex 1 BuiltIn PointSize
625 OpMemberDecorate %gl_PerVertex 2 BuiltIn ClipDistance
626 OpDecorate %gl_PerVertex Block
627 OpDecorate %_runtimearr_v4int ArrayStride 16
628 OpMemberDecorate %Output1 0 Offset 0
629 OpDecorate %Output1 BufferBlock
630 OpDecorate %output1 DescriptorSet 0
631 OpDecorate %output1 Binding 2
632 OpDecorate %_runtimearr_v4tinput ArrayStride 32
633 OpMemberDecorate %Input1 0 Offset 0
634 OpDecorate %Input1 BufferBlock
635 OpDecorate %input1 DescriptorSet 0
636 OpDecorate %input1 Binding 0
637 OpDecorate %_runtimearr_v4tinput_0 ArrayStride 32
638 OpMemberDecorate %Input2 0 Offset 0
639 OpDecorate %Input2 BufferBlock
640 OpDecorate %input2 DescriptorSet 0
641 OpDecorate %input2 Binding 1
642 %void = OpTypeVoid
643 %3 = OpTypeFunction %void
644 %float = OpTypeFloat 32
645 %v4float = OpTypeVector %float 4
646 %uint = OpTypeInt 32 0
647 %uint_1 = OpConstant %uint 1
648 %_arr_float_uint_1 = OpTypeArray %float %uint_1
649 %gl_PerVertex = OpTypeStruct %v4float %float %_arr_float_uint_1
650 %_ptr_Output_gl_PerVertex = OpTypePointer Output %gl_PerVertex
651 %_ = OpVariable %_ptr_Output_gl_PerVertex Output
652 %int = OpTypeInt 32 1
653 %int_1 = OpConstant %int 1
654 %float_1 = OpConstant %float 1
655 %_ptr_Output_float = OpTypePointer Output %float
656 %int_0 = OpConstant %int 0
657 %float_0 = OpConstant %float 0
658 %21 = OpConstantComposite %v4float %float_0 %float_0 %float_0 %float_1
659 %_ptr_Output_v4float = OpTypePointer Output %v4float
660 %_ptr_Function_int = OpTypePointer Function %int
661 %niters = OpConstant %int ${ITERS}
662 %bool = OpTypeBool
663 %v4int = OpTypeVector %int 4
664 %_runtimearr_v4int = OpTypeRuntimeArray %v4int
665 %Output1 = OpTypeStruct %_runtimearr_v4int
666 %_ptr_Uniform_Output1 = OpTypePointer Uniform %Output1
667 %output1 = OpVariable %_ptr_Uniform_Output1 Uniform
668 %tinput = ${OPTYPE}
669 %v4tinput = OpTypeVector %tinput 4
670 %_runtimearr_v4tinput = OpTypeRuntimeArray %v4tinput
671 %Input1 = OpTypeStruct %_runtimearr_v4tinput
672 %_ptr_Uniform_Input1 = OpTypePointer Uniform %Input1
673 %input1 = OpVariable %_ptr_Uniform_Input1 Uniform
674 %_ptr_Uniform_v4tinput = OpTypePointer Uniform %v4tinput
675 %_runtimearr_v4tinput_0 = OpTypeRuntimeArray %v4tinput
676 %Input2 = OpTypeStruct %_runtimearr_v4tinput_0
677 %_ptr_Uniform_Input2 = OpTypePointer Uniform %Input2
678 %input2 = OpVariable %_ptr_Uniform_Input2 Uniform
679 %v4bool = OpTypeVector %bool 4
680 %60 = OpConstantComposite %v4int %int_0 %int_0 %int_0 %int_0
681 %61 = OpConstantComposite %v4int %int_1 %int_1 %int_1 %int_1
682 %_ptr_Uniform_v4int = OpTypePointer Uniform %v4int
683 %main = OpFunction %void None %3
684 %5 = OpLabel
685 %i = OpVariable %_ptr_Function_int Function
686 %18 = OpAccessChain %_ptr_Output_float %_ %int_1
687 OpStore %18 %float_1
688 %23 = OpAccessChain %_ptr_Output_v4float %_ %int_0
689 OpStore %23 %21
690 OpStore %i %int_0
691 OpBranch %26
692 %26 = OpLabel
693 OpLoopMerge %28 %29 None
694 OpBranch %30
695 %30 = OpLabel
696 %31 = OpLoad %int %i
697 %34 = OpSLessThan %bool %31 %niters
698 OpBranchConditional %34 %27 %28
699 %27 = OpLabel
700 %40 = OpLoad %int %i
701 %47 = OpLoad %int %i
702 %49 = OpAccessChain %_ptr_Uniform_v4tinput %input1 %int_0 %47
703 %50 = OpLoad %v4tinput %49
704 %55 = OpLoad %int %i
705 %56 = OpAccessChain %_ptr_Uniform_v4tinput %input2 %int_0 %55
706 %57 = OpLoad %v4tinput %56
707 %59 = ${OPNAME} %v4bool %50 %57
708 %62 = OpSelect %v4int %59 %61 %60
709 %64 = OpAccessChain %_ptr_Uniform_v4int %output1 %int_0 %40
710 OpStore %64 %62
711 OpBranch %29
712 %29 = OpLabel
713 %65 = OpLoad %int %i
714 %66 = OpIAdd %int %65 %int_1
715 OpStore %i %66
716 OpBranch %26
717 %28 = OpLabel
718 OpReturn
719 OpFunctionEnd
720 )");
721
722 // GLSL passthrough vertex shader to test the fragment shader.
723 const std::string VertShaderPassThrough = R"(
724 #version 430
725
726 layout(location = 0) out vec4 out_color;
727
728 void main()
729 {
730 gl_PointSize = 1;
731 gl_Position = vec4(0.0, 0.0, 0.0, 1.0);
732 out_color = vec4(0.0, 0.0, 0.0, 1.0);
733 }
734 )";
735
736 // Shader template for the fragment stage using single scalars.
737 // Generated from the following GLSL shader, replacing some bits by template parameters.
738 #if 0
739 #version 430
740
741 // Left operands, right operands and results.
742 layout(binding = 0) buffer Input1 { double values[]; } input1;
743 layout(binding = 1) buffer Input2 { double values[]; } input2;
744 layout(binding = 2) buffer Output1 { int values[]; } output1;
745
746 void main()
747 {
748 for (int i = 0; i < 20; i++) {
749 output1.values[i] = int(input1.values[i] == input2.values[i]);
750 }
751 }
752 #endif
753 const tcu::StringTemplate FragShaderSingle(R"(
754 OpCapability Shader
755 ${OPCAPABILITY}
756 ${NANCAP}
757 ${NANEXT}
758 %1 = OpExtInstImport "GLSL.std.450"
759 OpMemoryModel Logical GLSL450
760 OpEntryPoint Fragment %main "main"
761 ${NANMODE}
762 OpExecutionMode %main OriginUpperLeft
763 OpSource GLSL 430
764 OpName %main "main"
765 OpName %i "i"
766 OpName %Output1 "Output1"
767 OpMemberName %Output1 0 "values"
768 OpName %output1 "output1"
769 OpName %Input1 "Input1"
770 OpMemberName %Input1 0 "values"
771 OpName %input1 "input1"
772 OpName %Input2 "Input2"
773 OpMemberName %Input2 0 "values"
774 OpName %input2 "input2"
775 OpDecorate %_runtimearr_int ArrayStride 4
776 OpMemberDecorate %Output1 0 Offset 0
777 OpDecorate %Output1 BufferBlock
778 OpDecorate %output1 DescriptorSet 0
779 OpDecorate %output1 Binding 2
780 OpDecorate %_runtimearr_tinput ArrayStride 8
781 OpMemberDecorate %Input1 0 Offset 0
782 OpDecorate %Input1 BufferBlock
783 OpDecorate %input1 DescriptorSet 0
784 OpDecorate %input1 Binding 0
785 OpDecorate %_runtimearr_tinput_0 ArrayStride 8
786 OpMemberDecorate %Input2 0 Offset 0
787 OpDecorate %Input2 BufferBlock
788 OpDecorate %input2 DescriptorSet 0
789 OpDecorate %input2 Binding 1
790 %void = OpTypeVoid
791 %3 = OpTypeFunction %void
792 %int = OpTypeInt 32 1
793 %_ptr_Function_int = OpTypePointer Function %int
794 %int_0 = OpConstant %int 0
795 %niters = OpConstant %int ${ITERS}
796 %bool = OpTypeBool
797 %_runtimearr_int = OpTypeRuntimeArray %int
798 %Output1 = OpTypeStruct %_runtimearr_int
799 %_ptr_Uniform_Output1 = OpTypePointer Uniform %Output1
800 %output1 = OpVariable %_ptr_Uniform_Output1 Uniform
801 %tinput = ${OPTYPE}
802 %_runtimearr_tinput = OpTypeRuntimeArray %tinput
803 %Input1 = OpTypeStruct %_runtimearr_tinput
804 %_ptr_Uniform_Input1 = OpTypePointer Uniform %Input1
805 %input1 = OpVariable %_ptr_Uniform_Input1 Uniform
806 %_ptr_Uniform_tinput = OpTypePointer Uniform %tinput
807 %_runtimearr_tinput_0 = OpTypeRuntimeArray %tinput
808 %Input2 = OpTypeStruct %_runtimearr_tinput_0
809 %_ptr_Uniform_Input2 = OpTypePointer Uniform %Input2
810 %input2 = OpVariable %_ptr_Uniform_Input2 Uniform
811 %int_1 = OpConstant %int 1
812 %_ptr_Uniform_int = OpTypePointer Uniform %int
813 %main = OpFunction %void None %3
814 %5 = OpLabel
815 %i = OpVariable %_ptr_Function_int Function
816 OpStore %i %int_0
817 OpBranch %10
818 %10 = OpLabel
819 OpLoopMerge %12 %13 None
820 OpBranch %14
821 %14 = OpLabel
822 %15 = OpLoad %int %i
823 %18 = OpSLessThan %bool %15 %niters
824 OpBranchConditional %18 %11 %12
825 %11 = OpLabel
826 %23 = OpLoad %int %i
827 %29 = OpLoad %int %i
828 %31 = OpAccessChain %_ptr_Uniform_tinput %input1 %int_0 %29
829 %32 = OpLoad %tinput %31
830 %37 = OpLoad %int %i
831 %38 = OpAccessChain %_ptr_Uniform_tinput %input2 %int_0 %37
832 %39 = OpLoad %tinput %38
833 %40 = ${OPNAME} %bool %32 %39
834 %42 = OpSelect %int %40 %int_1 %int_0
835 %44 = OpAccessChain %_ptr_Uniform_int %output1 %int_0 %23
836 OpStore %44 %42
837 OpBranch %13
838 %13 = OpLabel
839 %45 = OpLoad %int %i
840 %46 = OpIAdd %int %45 %int_1
841 OpStore %i %46
842 OpBranch %10
843 %12 = OpLabel
844 OpReturn
845 OpFunctionEnd
846 )");
847
848 // Shader template for the fragment stage using vectors.
849 // Generated from the following GLSL shader, replacing some bits by template parameters.
850 // Note the number of iterations needs to be divided by 4 as the shader will consume 4 doubles at a time.
851 #if 0
852 #version 430
853
854 // Left operands, right operands and results.
855 layout(binding = 0) buffer Input1 { dvec4 values[]; } input1;
856 layout(binding = 1) buffer Input2 { dvec4 values[]; } input2;
857 layout(binding = 2) buffer Output1 { ivec4 values[]; } output1;
858
859 void main()
860 {
861 for (int i = 0; i < 5; i++) {
862 output1.values[i] = ivec4(equal(input1.values[i], input2.values[i]));
863 }
864 }
865 #endif
866 const tcu::StringTemplate FragShaderVector(R"(
867 OpCapability Shader
868 ${OPCAPABILITY}
869 ${NANCAP}
870 ${NANEXT}
871 %1 = OpExtInstImport "GLSL.std.450"
872 OpMemoryModel Logical GLSL450
873 OpEntryPoint Fragment %main "main"
874 ${NANMODE}
875 OpExecutionMode %main OriginUpperLeft
876 OpName %main "main"
877 OpName %i "i"
878 OpName %Output1 "Output1"
879 OpMemberName %Output1 0 "values"
880 OpName %output1 "output1"
881 OpName %Input1 "Input1"
882 OpMemberName %Input1 0 "values"
883 OpName %input1 "input1"
884 OpName %Input2 "Input2"
885 OpMemberName %Input2 0 "values"
886 OpName %input2 "input2"
887 OpDecorate %_runtimearr_v4int ArrayStride 16
888 OpMemberDecorate %Output1 0 Offset 0
889 OpDecorate %Output1 BufferBlock
890 OpDecorate %output1 DescriptorSet 0
891 OpDecorate %output1 Binding 2
892 OpDecorate %_runtimearr_v4tinput ArrayStride 32
893 OpMemberDecorate %Input1 0 Offset 0
894 OpDecorate %Input1 BufferBlock
895 OpDecorate %input1 DescriptorSet 0
896 OpDecorate %input1 Binding 0
897 OpDecorate %_runtimearr_v4tinput_0 ArrayStride 32
898 OpMemberDecorate %Input2 0 Offset 0
899 OpDecorate %Input2 BufferBlock
900 OpDecorate %input2 DescriptorSet 0
901 OpDecorate %input2 Binding 1
902 %void = OpTypeVoid
903 %3 = OpTypeFunction %void
904 %int = OpTypeInt 32 1
905 %_ptr_Function_int = OpTypePointer Function %int
906 %int_0 = OpConstant %int 0
907 %niters = OpConstant %int ${ITERS}
908 %bool = OpTypeBool
909 %v4int = OpTypeVector %int 4
910 %_runtimearr_v4int = OpTypeRuntimeArray %v4int
911 %Output1 = OpTypeStruct %_runtimearr_v4int
912 %_ptr_Uniform_Output1 = OpTypePointer Uniform %Output1
913 %output1 = OpVariable %_ptr_Uniform_Output1 Uniform
914 %tinput = ${OPTYPE}
915 %v4tinput = OpTypeVector %tinput 4
916 %_runtimearr_v4tinput = OpTypeRuntimeArray %v4tinput
917 %Input1 = OpTypeStruct %_runtimearr_v4tinput
918 %_ptr_Uniform_Input1 = OpTypePointer Uniform %Input1
919 %input1 = OpVariable %_ptr_Uniform_Input1 Uniform
920 %_ptr_Uniform_v4tinput = OpTypePointer Uniform %v4tinput
921 %_runtimearr_v4tinput_0 = OpTypeRuntimeArray %v4tinput
922 %Input2 = OpTypeStruct %_runtimearr_v4tinput_0
923 %_ptr_Uniform_Input2 = OpTypePointer Uniform %Input2
924 %input2 = OpVariable %_ptr_Uniform_Input2 Uniform
925 %v4bool = OpTypeVector %bool 4
926 %int_1 = OpConstant %int 1
927 %45 = OpConstantComposite %v4int %int_0 %int_0 %int_0 %int_0
928 %46 = OpConstantComposite %v4int %int_1 %int_1 %int_1 %int_1
929 %_ptr_Uniform_v4int = OpTypePointer Uniform %v4int
930 %main = OpFunction %void None %3
931 %5 = OpLabel
932 %i = OpVariable %_ptr_Function_int Function
933 OpStore %i %int_0
934 OpBranch %10
935 %10 = OpLabel
936 OpLoopMerge %12 %13 None
937 OpBranch %14
938 %14 = OpLabel
939 %15 = OpLoad %int %i
940 %18 = OpSLessThan %bool %15 %niters
941 OpBranchConditional %18 %11 %12
942 %11 = OpLabel
943 %24 = OpLoad %int %i
944 %31 = OpLoad %int %i
945 %33 = OpAccessChain %_ptr_Uniform_v4tinput %input1 %int_0 %31
946 %34 = OpLoad %v4tinput %33
947 %39 = OpLoad %int %i
948 %40 = OpAccessChain %_ptr_Uniform_v4tinput %input2 %int_0 %39
949 %41 = OpLoad %v4tinput %40
950 %43 = ${OPNAME} %v4bool %34 %41
951 %47 = OpSelect %v4int %43 %46 %45
952 %49 = OpAccessChain %_ptr_Uniform_v4int %output1 %int_0 %24
953 OpStore %49 %47
954 OpBranch %13
955 %13 = OpLabel
956 %50 = OpLoad %int %i
957 %51 = OpIAdd %int %50 %int_1
958 OpStore %i %51
959 OpBranch %10
960 %12 = OpLabel
961 OpReturn
962 OpFunctionEnd
963 )");
964
965 struct SpirvTemplateManager
966 {
getTemplatevkt::SpirVAssembly::__anon897f443a0111::SpirvTemplateManager967 static const tcu::StringTemplate& getTemplate (DataType type, vk::VkShaderStageFlagBits stage)
968 {
969 DE_ASSERT(type == DATA_TYPE_SINGLE || type == DATA_TYPE_VECTOR);
970 DE_ASSERT( stage == vk::VK_SHADER_STAGE_COMPUTE_BIT ||
971 stage == vk::VK_SHADER_STAGE_VERTEX_BIT ||
972 stage == vk::VK_SHADER_STAGE_FRAGMENT_BIT );
973
974 if (type == DATA_TYPE_SINGLE)
975 {
976 if (stage == vk::VK_SHADER_STAGE_COMPUTE_BIT) return CompShaderSingle;
977 if (stage == vk::VK_SHADER_STAGE_VERTEX_BIT) return VertShaderSingle;
978 else return FragShaderSingle;
979 }
980 else
981 {
982 if (stage == vk::VK_SHADER_STAGE_COMPUTE_BIT) return CompShaderVector;
983 if (stage == vk::VK_SHADER_STAGE_VERTEX_BIT) return VertShaderVector;
984 else return FragShaderVector;
985 }
986 }
987
988 // Specialized below for different types.
989 template <class T>
990 static std::string getOpCapability();
991
992 // Same.
993 template <class T>
994 static std::string getOpType();
995
996 // Return the capabilities, extensions and execution modes for NaN preservation.
997 static std::string getNanCapability (bool preserve);
998 static std::string getNanExtension (bool preserve);
999 static std::string getNanExeMode (bool preserve);
1000 };
1001
getOpCapability()1002 template <> std::string SpirvTemplateManager::getOpCapability<double>() { return "OpCapability Float64"; }
getOpCapability()1003 template <> std::string SpirvTemplateManager::getOpCapability<deInt64>() { return "OpCapability Int64"; }
getOpCapability()1004 template <> std::string SpirvTemplateManager::getOpCapability<deUint64>() { return "OpCapability Int64"; }
1005
getOpType()1006 template <> std::string SpirvTemplateManager::getOpType<double>() { return "OpTypeFloat 64"; }
getOpType()1007 template <> std::string SpirvTemplateManager::getOpType<deInt64>() { return "OpTypeInt 64 1"; }
getOpType()1008 template <> std::string SpirvTemplateManager::getOpType<deUint64>() { return "OpTypeInt 64 0"; }
1009
getNanCapability(bool preserve)1010 std::string SpirvTemplateManager::getNanCapability (bool preserve)
1011 {
1012 return (preserve ? "OpCapability SignedZeroInfNanPreserve" : "");
1013 }
1014
getNanExtension(bool preserve)1015 std::string SpirvTemplateManager::getNanExtension (bool preserve)
1016 {
1017 return (preserve ? "OpExtension \"SPV_KHR_float_controls\"" : "");
1018 }
1019
getNanExeMode(bool preserve)1020 std::string SpirvTemplateManager::getNanExeMode (bool preserve)
1021 {
1022 return (preserve ? "OpExecutionMode %main SignedZeroInfNanPreserve 64" : "");
1023 }
1024
1025 struct BufferWithMemory
1026 {
1027 vk::Move<vk::VkBuffer> buffer;
1028 de::MovePtr<vk::Allocation> allocation;
1029
BufferWithMemoryvkt::SpirVAssembly::__anon897f443a0111::BufferWithMemory1030 BufferWithMemory ()
1031 : buffer(), allocation()
1032 {}
1033
BufferWithMemoryvkt::SpirVAssembly::__anon897f443a0111::BufferWithMemory1034 BufferWithMemory (BufferWithMemory&& other)
1035 : buffer(other.buffer), allocation(other.allocation)
1036 {}
1037
operator =vkt::SpirVAssembly::__anon897f443a0111::BufferWithMemory1038 BufferWithMemory& operator= (BufferWithMemory&& other)
1039 {
1040 buffer = other.buffer;
1041 allocation = other.allocation;
1042 return *this;
1043 }
1044 };
1045
1046 // Create storage buffer, bind memory to it and return both things.
createStorageBuffer(const vk::DeviceInterface & vkdi,const vk::VkDevice device,vk::Allocator & allocator,size_t numBytes)1047 BufferWithMemory createStorageBuffer(const vk::DeviceInterface& vkdi,
1048 const vk::VkDevice device,
1049 vk::Allocator& allocator,
1050 size_t numBytes)
1051 {
1052 const vk::VkBufferCreateInfo bufferCreateInfo =
1053 {
1054 vk::VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, // sType
1055 DE_NULL, // pNext
1056 0u, // flags
1057 numBytes, // size
1058 vk::VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, // usage
1059 vk::VK_SHARING_MODE_EXCLUSIVE, // sharingMode
1060 0u, // queueFamilyCount
1061 DE_NULL, // pQueueFamilyIndices
1062 };
1063
1064 BufferWithMemory bufmem;
1065
1066 bufmem.buffer = vk::createBuffer(vkdi, device, &bufferCreateInfo);
1067 const vk::VkMemoryRequirements requirements = getBufferMemoryRequirements(vkdi, device, *bufmem.buffer);
1068 bufmem.allocation = allocator.allocate(requirements, vk::MemoryRequirement::HostVisible);
1069
1070 VK_CHECK(vkdi.bindBufferMemory(device, *bufmem.buffer, bufmem.allocation->getMemory(), bufmem.allocation->getOffset()));
1071
1072 return bufmem;
1073 }
1074
createShaderModule(const vk::DeviceInterface & deviceInterface,vk::VkDevice device,const vk::ProgramBinary & binary)1075 vk::Move<vk::VkShaderModule> createShaderModule (const vk::DeviceInterface& deviceInterface,
1076 vk::VkDevice device,
1077 const vk::ProgramBinary& binary)
1078 {
1079 DE_ASSERT(binary.getFormat() == vk::PROGRAM_FORMAT_SPIRV);
1080
1081 const struct vk::VkShaderModuleCreateInfo shaderModuleInfo =
1082 {
1083 vk::VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,
1084 DE_NULL,
1085 0,
1086 static_cast<deUintptr>(binary.getSize()),
1087 reinterpret_cast<const deUint32*>(binary.getBinary()),
1088 };
1089
1090 return createShaderModule(deviceInterface, device, &shaderModuleInfo);
1091 }
1092
1093 // Make sure the length of the following vectors is a multiple of 4. This will make sure operands can be reused for vectorized tests.
1094 const OperandsVector<double> DOUBLE_OPERANDS =
1095 {
1096 { -8.0, -5.0 },
1097 { -5.0, -8.0 },
1098 { -5.0, -5.0 },
1099 { -5.0, 0.0 },
1100 { 0.0, -5.0 },
1101 { 5.0, 0.0 },
1102 { 0.0, 5.0 },
1103 { 0.0, 0.0 },
1104 { -5.0, 5.0 },
1105 { 5.0, -5.0 },
1106 { 5.0, 8.0 },
1107 { 8.0, 5.0 },
1108 { 5.0, 5.0 },
1109 { -6.0, -5.0 },
1110 { 6.0, 5.0 },
1111 { 0.0, 1.0 },
1112 { 1.0, 0.0 },
1113 { 0.0, NAN },
1114 { NAN, 0.0 },
1115 { NAN, NAN },
1116 };
1117
1118 const OperandsVector<deInt64> INT64_OPERANDS =
1119 {
1120 { -8, -5 },
1121 { -5, -8 },
1122 { -5, -5 },
1123 { -5, 0 },
1124 { 0, -5 },
1125 { 5, 0 },
1126 { 0, 5 },
1127 { 0, 0 },
1128 { -5, 5 },
1129 { 5, -5 },
1130 { 5, 8 },
1131 { 8, 5 },
1132 { 5, 5 },
1133 { -6, -5 },
1134 { 6, 5 },
1135 { 0, 1 },
1136 };
1137
1138 constexpr auto MAX_DEUINT64 = std::numeric_limits<deUint64>::max();
1139 const OperandsVector<deUint64> UINT64_OPERANDS =
1140 {
1141 { 0, 0 },
1142 { 1, 0 },
1143 { 0, 1 },
1144 { 1, 1 },
1145 { 5, 8 },
1146 { 8, 5 },
1147 { 5, 5 },
1148 { 0, MAX_DEUINT64 },
1149 { MAX_DEUINT64, 0 },
1150 { MAX_DEUINT64 - 1, MAX_DEUINT64 },
1151 { MAX_DEUINT64, MAX_DEUINT64 - 1 },
1152 { MAX_DEUINT64, MAX_DEUINT64 },
1153 };
1154
1155 template <class T>
1156 class T64bitCompareTestInstance : public TestInstance
1157 {
1158 public:
1159 T64bitCompareTestInstance (Context& ctx, const TestParameters<T>& params);
1160 tcu::TestStatus iterate (void);
1161
1162 private:
1163 const TestParameters<T> m_params;
1164 const size_t m_numOperations;
1165 const size_t m_inputBufferSize;
1166 const size_t m_outputBufferSize;
1167 };
1168
1169 template <class T>
T64bitCompareTestInstance(Context & ctx,const TestParameters<T> & params)1170 T64bitCompareTestInstance<T>::T64bitCompareTestInstance (Context& ctx, const TestParameters<T>& params)
1171 : TestInstance(ctx)
1172 , m_params(params)
1173 , m_numOperations(m_params.operands.size())
1174 , m_inputBufferSize(m_numOperations * sizeof(T))
1175 , m_outputBufferSize(m_numOperations * sizeof(int))
1176 {
1177 }
1178
1179 template<class T>
genericIsNan(T)1180 bool genericIsNan (T)
1181 {
1182 return false;
1183 }
1184
1185 template<>
genericIsNan(double value)1186 bool genericIsNan<double> (double value)
1187 {
1188 return std::isnan(value);
1189 }
1190
1191 template <class T>
iterate(void)1192 tcu::TestStatus T64bitCompareTestInstance<T>::iterate (void)
1193 {
1194 DE_ASSERT(m_params.stage == vk::VK_SHADER_STAGE_COMPUTE_BIT ||
1195 m_params.stage == vk::VK_SHADER_STAGE_VERTEX_BIT ||
1196 m_params.stage == vk::VK_SHADER_STAGE_FRAGMENT_BIT );
1197
1198 auto& vkdi = m_context.getDeviceInterface();
1199 auto device = m_context.getDevice();
1200 auto& allocator = m_context.getDefaultAllocator();
1201
1202 // Create storage buffers (left operands, right operands and results buffer).
1203 BufferWithMemory input1 = createStorageBuffer(vkdi, device, allocator, m_inputBufferSize);
1204 BufferWithMemory input2 = createStorageBuffer(vkdi, device, allocator, m_inputBufferSize);
1205 BufferWithMemory output1 = createStorageBuffer(vkdi, device, allocator, m_outputBufferSize);
1206
1207 // Create an array of buffers.
1208 std::vector<vk::VkBuffer> buffers;
1209 buffers.push_back(input1.buffer.get());
1210 buffers.push_back(input2.buffer.get());
1211 buffers.push_back(output1.buffer.get());
1212
1213 // Create descriptor set layout.
1214 std::vector<vk::VkDescriptorSetLayoutBinding> bindings;
1215 for (size_t i = 0; i < buffers.size(); ++i)
1216 {
1217 vk::VkDescriptorSetLayoutBinding binding =
1218 {
1219 static_cast<deUint32>(i), // uint32_t binding;
1220 vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, // VkDescriptorType descriptorType;
1221 1u, // uint32_t descriptorCount;
1222 static_cast<vk::VkShaderStageFlags>(m_params.stage), // VkShaderStageFlags stageFlags;
1223 DE_NULL // const VkSampler* pImmutableSamplers;
1224 };
1225 bindings.push_back(binding);
1226 }
1227
1228 const vk::VkDescriptorSetLayoutCreateInfo layoutCreateInfo =
1229 {
1230 vk::VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, // VkStructureType sType;
1231 DE_NULL, // const void* pNext;
1232 0, // VkDescriptorSetLayoutCreateFlags flags;
1233 static_cast<deUint32>(bindings.size()), // uint32_t bindingCount;
1234 bindings.data() // const VkDescriptorSetLayoutBinding* pBindings;
1235 };
1236 auto descriptorSetLayout = vk::createDescriptorSetLayout(vkdi, device, &layoutCreateInfo);
1237
1238 // Create descriptor set.
1239 vk::DescriptorPoolBuilder poolBuilder;
1240 poolBuilder.addType(bindings[0].descriptorType, static_cast<deUint32>(bindings.size()));
1241 auto descriptorPool = poolBuilder.build(vkdi, device, vk::VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
1242
1243 const vk::VkDescriptorSetAllocateInfo allocateInfo =
1244 {
1245 vk::VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, // VkStructureType sType;
1246 DE_NULL, // const void* pNext;
1247 *descriptorPool, // VkDescriptorPool descriptorPool;
1248 1u, // uint32_t descriptorSetCount;
1249 &descriptorSetLayout.get() // const VkDescriptorSetLayout* pSetLayouts;
1250 };
1251 auto descriptorSet = vk::allocateDescriptorSet(vkdi, device, &allocateInfo);
1252
1253 // Update descriptor set.
1254 std::vector<vk::VkDescriptorBufferInfo> descriptorBufferInfos;
1255 std::vector<vk::VkWriteDescriptorSet> descriptorWrites;
1256
1257 descriptorBufferInfos.reserve(buffers.size());
1258 descriptorWrites.reserve(buffers.size());
1259
1260 for (size_t i = 0; i < buffers.size(); ++i)
1261 {
1262 vk::VkDescriptorBufferInfo bufferInfo =
1263 {
1264 buffers[i], // VkBuffer buffer;
1265 0u, // VkDeviceSize offset;
1266 VK_WHOLE_SIZE, // VkDeviceSize range;
1267 };
1268 descriptorBufferInfos.push_back(bufferInfo);
1269 }
1270
1271 for (size_t i = 0; i < buffers.size(); ++i)
1272 {
1273 vk::VkWriteDescriptorSet write =
1274 {
1275 vk::VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, // VkStructureType sType;
1276 DE_NULL, // const void* pNext;
1277 *descriptorSet, // VkDescriptorSet dstSet;
1278 static_cast<deUint32>(i), // uint32_t dstBinding;
1279 0u, // uint32_t dstArrayElement;
1280 1u, // uint32_t descriptorCount;
1281 bindings[i].descriptorType, // VkDescriptorType descriptorType;
1282 DE_NULL, // const VkDescriptorImageInfo* pImageInfo;
1283 &descriptorBufferInfos[i], // const VkDescriptorBufferInfo* pBufferInfo;
1284 DE_NULL, // const VkBufferView* pTexelBufferView;
1285 };
1286 descriptorWrites.push_back(write);
1287 }
1288 vkdi.updateDescriptorSets(device, static_cast<deUint32>(descriptorWrites.size()), descriptorWrites.data(), 0u, DE_NULL);
1289
1290 // Fill storage buffers with data. Note: VkPhysicalDeviceLimits.minMemoryMapAlignment guarantees this cast is safe.
1291 T* input1Ptr = reinterpret_cast<T*> (input1.allocation->getHostPtr());
1292 T* input2Ptr = reinterpret_cast<T*> (input2.allocation->getHostPtr());
1293 int* output1Ptr = reinterpret_cast<int*> (output1.allocation->getHostPtr());
1294
1295 for (size_t i = 0; i < m_numOperations; ++i)
1296 {
1297 input1Ptr[i] = m_params.operands[i].first;
1298 input2Ptr[i] = m_params.operands[i].second;
1299 output1Ptr[i] = -9;
1300 }
1301
1302 // Flush memory.
1303 vk::flushAlloc(vkdi, device, *input1.allocation);
1304 vk::flushAlloc(vkdi, device, *input2.allocation);
1305 vk::flushAlloc(vkdi, device, *output1.allocation);
1306
1307 // Prepare barriers in advance so data is visible to the shaders and the host.
1308 std::vector<vk::VkBufferMemoryBarrier> hostToDevBarriers;
1309 std::vector<vk::VkBufferMemoryBarrier> devToHostBarriers;
1310 for (size_t i = 0; i < buffers.size(); ++i)
1311 {
1312 const vk::VkBufferMemoryBarrier hostDev =
1313 {
1314 vk::VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, // VkStructureType sType;
1315 DE_NULL, // const void* pNext;
1316 vk::VK_ACCESS_HOST_WRITE_BIT, // VkAccessFlags srcAccessMask;
1317 (vk::VK_ACCESS_SHADER_READ_BIT | vk::VK_ACCESS_SHADER_WRITE_BIT), // VkAccessFlags dstAccessMask;
1318 VK_QUEUE_FAMILY_IGNORED, // deUint32 srcQueueFamilyIndex;
1319 VK_QUEUE_FAMILY_IGNORED, // deUint32 dstQueueFamilyIndex;
1320 buffers[i], // VkBuffer buffer;
1321 0u, // VkDeviceSize offset;
1322 VK_WHOLE_SIZE, // VkDeviceSize size;
1323 };
1324 hostToDevBarriers.push_back(hostDev);
1325
1326 const vk::VkBufferMemoryBarrier devHost =
1327 {
1328 vk::VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, // VkStructureType sType;
1329 DE_NULL, // const void* pNext;
1330 vk::VK_ACCESS_SHADER_WRITE_BIT, // VkAccessFlags srcAccessMask;
1331 vk::VK_ACCESS_HOST_READ_BIT, // VkAccessFlags dstAccessMask;
1332 VK_QUEUE_FAMILY_IGNORED, // deUint32 srcQueueFamilyIndex;
1333 VK_QUEUE_FAMILY_IGNORED, // deUint32 dstQueueFamilyIndex;
1334 buffers[i], // VkBuffer buffer;
1335 0u, // VkDeviceSize offset;
1336 VK_WHOLE_SIZE, // VkDeviceSize size;
1337 };
1338 devToHostBarriers.push_back(devHost);
1339 }
1340
1341 // Create command pool and command buffer.
1342 auto queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
1343
1344 const vk::VkCommandPoolCreateInfo cmdPoolCreateInfo =
1345 {
1346 vk::VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO, // VkStructureType sType;
1347 DE_NULL, // const void* pNext;
1348 vk::VK_COMMAND_POOL_CREATE_TRANSIENT_BIT, // VkCommandPoolCreateFlags flags;
1349 queueFamilyIndex, // deUint32 queueFamilyIndex;
1350 };
1351 auto cmdPool = vk::createCommandPool(vkdi, device, &cmdPoolCreateInfo);
1352
1353 const vk::VkCommandBufferAllocateInfo cmdBufferAllocateInfo =
1354 {
1355 vk::VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO, // VkStructureType sType;
1356 DE_NULL, // const void* pNext;
1357 *cmdPool, // VkCommandPool commandPool;
1358 vk::VK_COMMAND_BUFFER_LEVEL_PRIMARY, // VkCommandBufferLevel level;
1359 1u, // deUint32 commandBufferCount;
1360 };
1361 auto cmdBuffer = vk::allocateCommandBuffer(vkdi, device, &cmdBufferAllocateInfo);
1362
1363 // Create pipeline layout.
1364 const vk::VkPipelineLayoutCreateInfo pipelineLayoutCreateInfo =
1365 {
1366 vk::VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, // VkStructureType sType;
1367 DE_NULL, // const void* pNext;
1368 0, // VkPipelineLayoutCreateFlags flags;
1369 1u, // deUint32 setLayoutCount;
1370 &descriptorSetLayout.get(), // const VkDescriptorSetLayout* pSetLayouts;
1371 0u, // deUint32 pushConstantRangeCount;
1372 DE_NULL, // const VkPushConstantRange* pPushConstantRanges;
1373 };
1374 auto pipelineLayout = vk::createPipelineLayout(vkdi, device, &pipelineLayoutCreateInfo);
1375
1376 if (m_params.stage == vk::VK_SHADER_STAGE_COMPUTE_BIT)
1377 {
1378 // Create compute pipeline.
1379 auto compShaderModule = createShaderModule(vkdi, device, m_context.getBinaryCollection().get("comp"));
1380
1381 const vk::VkComputePipelineCreateInfo computeCreateInfo =
1382 {
1383 vk::VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, // VkStructureType sType;
1384 DE_NULL, // const void* pNext;
1385 0, // VkPipelineCreateFlags flags;
1386 { // VkPipelineShaderStageCreateInfo stage;
1387 vk::VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, // VkStructureType sType;
1388 DE_NULL, // const void* pNext;
1389 0, // VkPipelineShaderStageCreateFlags flags;
1390 vk::VK_SHADER_STAGE_COMPUTE_BIT, // VkShaderStageFlagBits stage;
1391 *compShaderModule, // VkShaderModule module;
1392 "main", // const char* pName;
1393 DE_NULL, // const VkSpecializationInfo* pSpecializationInfo;
1394 },
1395 *pipelineLayout, // VkPipelineLayout layout;
1396 DE_NULL, // VkPipeline basePipelineHandle;
1397 0, // int32_t basePipelineIndex;
1398 };
1399 auto computePipeline = vk::createComputePipeline(vkdi, device, DE_NULL, &computeCreateInfo);
1400
1401 // Run the shader.
1402 vk::beginCommandBuffer(vkdi, *cmdBuffer);
1403 vkdi.cmdBindPipeline(*cmdBuffer, vk::VK_PIPELINE_BIND_POINT_COMPUTE, *computePipeline);
1404 vkdi.cmdBindDescriptorSets(*cmdBuffer, vk::VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0, 1u, &descriptorSet.get(), 0u, DE_NULL);
1405 vkdi.cmdPipelineBarrier(*cmdBuffer, vk::VK_PIPELINE_STAGE_HOST_BIT, vk::VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0u, DE_NULL, static_cast<deUint32>(hostToDevBarriers.size()), hostToDevBarriers.data(), 0u, DE_NULL);
1406 vkdi.cmdDispatch(*cmdBuffer, 1u, 1u, 1u);
1407 vkdi.cmdPipelineBarrier(*cmdBuffer, vk::VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, vk::VK_PIPELINE_STAGE_HOST_BIT, 0, 0u, DE_NULL, static_cast<deUint32>(devToHostBarriers.size()), devToHostBarriers.data(), 0u, DE_NULL);
1408 vk::endCommandBuffer(vkdi, *cmdBuffer);
1409 vk::submitCommandsAndWait(vkdi, device, m_context.getUniversalQueue(), *cmdBuffer);
1410 }
1411 else if (m_params.stage == vk::VK_SHADER_STAGE_VERTEX_BIT ||
1412 m_params.stage == vk::VK_SHADER_STAGE_FRAGMENT_BIT )
1413 {
1414 const bool isFrag = (m_params.stage == vk::VK_SHADER_STAGE_FRAGMENT_BIT);
1415
1416 // Create graphics pipeline.
1417 auto vertShaderModule = createShaderModule(vkdi, device, m_context.getBinaryCollection().get("vert"));
1418 vk::Move<vk::VkShaderModule> fragShaderModule;
1419 std::vector<vk::VkPipelineShaderStageCreateInfo> shaderStages;
1420
1421 const vk::VkPipelineShaderStageCreateInfo vertexStage =
1422 {
1423 vk::VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, // VkStructureType sType;
1424 DE_NULL, // const void* pNext;
1425 0, // VkPipelineShaderStageCreateFlags flags;
1426 vk::VK_SHADER_STAGE_VERTEX_BIT, // VkShaderStageFlagBits stage;
1427 *vertShaderModule, // VkShaderModule module;
1428 "main", // const char* pName;
1429 DE_NULL, // const VkSpecializationInfo* pSpecializationInfo;
1430 };
1431 shaderStages.push_back(vertexStage);
1432
1433 if (isFrag)
1434 {
1435 fragShaderModule = createShaderModule(vkdi, device, m_context.getBinaryCollection().get("frag"));
1436
1437 const vk::VkPipelineShaderStageCreateInfo fragmentStage =
1438 {
1439 vk::VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, // VkStructureType sType;
1440 DE_NULL, // const void* pNext;
1441 0, // VkPipelineShaderStageCreateFlags flags;
1442 vk::VK_SHADER_STAGE_FRAGMENT_BIT, // VkShaderStageFlagBits stage;
1443 *fragShaderModule, // VkShaderModule module;
1444 "main", // const char* pName;
1445 DE_NULL, // const VkSpecializationInfo* pSpecializationInfo;
1446 };
1447 shaderStages.push_back(fragmentStage);
1448 }
1449
1450 const vk::VkPipelineVertexInputStateCreateInfo vertexInputInfo =
1451 {
1452 vk::VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, // VkStructureType sType;
1453 DE_NULL, // const void* pNext;
1454 0, // VkPipelineVertexInputStateCreateFlags flags;
1455 0u, // deUint32 vertexBindingDescriptionCount;
1456 DE_NULL, // const VkVertexInputBindingDescription* pVertexBindingDescriptions;
1457 0u, // deUint32 vertexAttributeDescriptionCount;
1458 DE_NULL, // const VkVertexInputAttributeDescription* pVertexAttributeDescriptions;
1459 };
1460
1461 const vk::VkPipelineInputAssemblyStateCreateInfo inputAssembly =
1462 {
1463 vk::VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, // VkStructureType sType;
1464 DE_NULL, // const void* pNext;
1465 0u, // VkPipelineInputAssemblyStateCreateFlags flags;
1466 vk::VK_PRIMITIVE_TOPOLOGY_POINT_LIST, // VkPrimitiveTopology topology;
1467 VK_FALSE, // VkBool32 primitiveRestartEnable;
1468 };
1469
1470 const vk::VkPipelineRasterizationStateCreateInfo rasterizationState =
1471 {
1472 vk::VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, // VkStructureType sType;
1473 DE_NULL, // const void* pNext;
1474 0, // VkPipelineRasterizationStateCreateFlags flags;
1475 VK_FALSE, // VkBool32 depthClampEnable;
1476 (isFrag ? VK_FALSE : VK_TRUE), // VkBool32 rasterizerDiscardEnable;
1477 vk::VK_POLYGON_MODE_FILL, // VkPolygonMode polygonMode;
1478 vk::VK_CULL_MODE_NONE, // VkCullModeFlags cullMode;
1479 vk::VK_FRONT_FACE_COUNTER_CLOCKWISE, // VkFrontFace frontFace;
1480 VK_FALSE, // VkBool32 depthBiasEnable;
1481 0.0f, // float depthBiasConstantFactor;
1482 0.0f, // float depthBiasClamp;
1483 0.0f, // float depthBiasSlopeFactor;
1484 1.0f, // float lineWidth;
1485 };
1486
1487 const vk::VkSubpassDescription subpassDescription =
1488 {
1489 0, // VkSubpassDescriptionFlags flags;
1490 vk::VK_PIPELINE_BIND_POINT_GRAPHICS, // VkPipelineBindPoint pipelineBindPoint;
1491 0u, // deUint32 inputAttachmentCount;
1492 DE_NULL, // const VkAttachmentReference* pInputAttachments;
1493 0u, // deUint32 colorAttachmentCount;
1494 DE_NULL, // const VkAttachmentReference* pColorAttachments;
1495 DE_NULL, // const VkAttachmentReference* pResolveAttachments;
1496 DE_NULL, // const VkAttachmentReference* pDepthStencilAttachment;
1497 0u, // deUint32 preserveAttachmentCount;
1498 0u, // const deUint32* pPreserveAttachments;
1499 };
1500
1501 const vk::VkRenderPassCreateInfo renderPassCreateInfo =
1502 {
1503 vk::VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, // VkStructureType sType;
1504 DE_NULL, // const void* pNext;
1505 0, // VkRenderPassCreateFlags flags;
1506 0u, // deUint32 attachmentCount;
1507 DE_NULL, // const VkAttachmentDescription* pAttachments;
1508 1u, // deUint32 subpassCount;
1509 &subpassDescription, // const VkSubpassDescription* pSubpasses;
1510 0u, // deUint32 dependencyCount;
1511 DE_NULL, // const VkSubpassDependency* pDependencies;
1512 };
1513 auto renderPass = vk::createRenderPass(vkdi, device, &renderPassCreateInfo);
1514
1515 std::unique_ptr<vk::VkPipelineMultisampleStateCreateInfo> multisampleState;
1516 if (isFrag)
1517 {
1518 multisampleState.reset(new vk::VkPipelineMultisampleStateCreateInfo);
1519 *multisampleState =
1520 {
1521 vk::VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, // VkStructureType sType;
1522 DE_NULL, // const void* pNext;
1523 0, // VkPipelineMultisampleStateCreateFlags flags;
1524 vk::VK_SAMPLE_COUNT_1_BIT, // VkSampleCountFlagBits rasterizationSamples;
1525 VK_FALSE, // VkBool32 sampleShadingEnable;
1526 0.0f, // float minSampleShading;
1527 DE_NULL, // const VkSampleMask* pSampleMask;
1528 VK_FALSE, // VkBool32 alphaToCoverageEnable;
1529 VK_FALSE, // VkBool32 alphaToOneEnable;
1530 };
1531 }
1532
1533 const vk::VkViewport viewport =
1534 {
1535 0.0f, // float x;
1536 0.0f, // float y;
1537 1.0f, // float width;
1538 1.0f, // float height;
1539 0.0f, // float minDepth;
1540 1.0f, // float maxDepth;
1541 };
1542
1543 const vk::VkRect2D renderArea = { { 0u, 0u }, { 1u, 1u } };
1544
1545 std::unique_ptr<vk::VkPipelineViewportStateCreateInfo> viewportState;
1546
1547 if (isFrag)
1548 {
1549 viewportState.reset(new vk::VkPipelineViewportStateCreateInfo);
1550 *viewportState =
1551 {
1552 vk::VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, // VkStructureType sType;
1553 DE_NULL, // const void* pNext;
1554 0, // VkPipelineViewportStateCreateFlags flags;
1555 1u, // deUint32 viewportCount;
1556 &viewport, // const VkViewport* pViewports;
1557 1u, // deUint32 scissorCount;
1558 &renderArea, // const VkRect2D* pScissors;
1559 };
1560 }
1561
1562 const vk::VkGraphicsPipelineCreateInfo graphicsCreateInfo =
1563 {
1564 vk::VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, // VkStructureType sType;
1565 DE_NULL, // const void* pNext;
1566 0, // VkPipelineCreateFlags flags;
1567 static_cast<deUint32>(shaderStages.size()), // deUint32 stageCount;
1568 shaderStages.data(), // const VkPipelineShaderStageCreateInfo* pStages;
1569 &vertexInputInfo, // const VkPipelineVertexInputStateCreateInfo* pVertexInputState;
1570 &inputAssembly, // const VkPipelineInputAssemblyStateCreateInfo* pInputAssemblyState;
1571 DE_NULL, // const VkPipelineTessellationStateCreateInfo* pTessellationState;
1572 viewportState.get(), // const VkPipelineViewportStateCreateInfo* pViewportState;
1573 &rasterizationState, // const VkPipelineRasterizationStateCreateInfo* pRasterizationState;
1574 multisampleState.get(), // const VkPipelineMultisampleStateCreateInfo* pMultisampleState;
1575 DE_NULL, // const VkPipelineDepthStencilStateCreateInfo* pDepthStencilState;
1576 DE_NULL, // const VkPipelineColorBlendStateCreateInfo* pColorBlendState;
1577 DE_NULL, // const VkPipelineDynamicStateCreateInfo* pDynamicState;
1578 *pipelineLayout, // VkPipelineLayout layout;
1579 *renderPass, // VkRenderPass renderPass;
1580 0u, // deUint32 subpass;
1581 DE_NULL, // VkPipeline basePipelineHandle;
1582 0u, // deInt32 basePipelineIndex;
1583 };
1584 auto graphicsPipeline = vk::createGraphicsPipeline(vkdi, device, DE_NULL, &graphicsCreateInfo);
1585
1586 const vk::VkFramebufferCreateInfo frameBufferCreateInfo =
1587 {
1588 vk::VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, // VkStructureType sType;
1589 DE_NULL, // const void* pNext;
1590 0, // VkFramebufferCreateFlags flags;
1591 *renderPass, // VkRenderPass renderPass;
1592 0u, // deUint32 attachmentCount;
1593 DE_NULL, // const VkImageView* pAttachments;
1594 1u, // deUint32 width;
1595 1u, // deUint32 height;
1596 1u, // deUint32 layers;
1597 };
1598 auto frameBuffer = vk::createFramebuffer(vkdi, device, &frameBufferCreateInfo);
1599
1600 const vk::VkRenderPassBeginInfo renderPassBeginInfo =
1601 {
1602 vk::VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, // VkStructureType sType;
1603 DE_NULL, // const void* pNext;
1604 *renderPass, // VkRenderPass renderPass;
1605 *frameBuffer, // VkFramebuffer framebuffer;
1606 renderArea, // VkRect2D renderArea;
1607 0u, // deUint32 clearValueCount;
1608 DE_NULL, // const VkClearValue* pClearValues;
1609 };
1610
1611 // Run the shader.
1612 vk::VkPipelineStageFlags pipelineStage = (isFrag ? vk::VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT : vk::VK_PIPELINE_STAGE_VERTEX_SHADER_BIT);
1613
1614 vk::beginCommandBuffer(vkdi, *cmdBuffer);
1615 vkdi.cmdPipelineBarrier(*cmdBuffer, vk::VK_PIPELINE_STAGE_HOST_BIT, pipelineStage, 0, 0u, DE_NULL, static_cast<deUint32>(hostToDevBarriers.size()), hostToDevBarriers.data(), 0u, DE_NULL);
1616 vkdi.cmdBeginRenderPass(*cmdBuffer, &renderPassBeginInfo, vk::VK_SUBPASS_CONTENTS_INLINE);
1617 vkdi.cmdBindPipeline(*cmdBuffer, vk::VK_PIPELINE_BIND_POINT_GRAPHICS, *graphicsPipeline);
1618 vkdi.cmdBindDescriptorSets(*cmdBuffer, vk::VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0, 1u, &descriptorSet.get(), 0u, DE_NULL);
1619 vkdi.cmdDraw(*cmdBuffer, 1u, 1u, 0u, 0u);
1620 vkdi.cmdEndRenderPass(*cmdBuffer);
1621 vkdi.cmdPipelineBarrier(*cmdBuffer, pipelineStage, vk::VK_PIPELINE_STAGE_HOST_BIT, 0, 0u, DE_NULL, static_cast<deUint32>(devToHostBarriers.size()), devToHostBarriers.data(), 0u, DE_NULL);
1622 vk::endCommandBuffer(vkdi, *cmdBuffer);
1623 vk::submitCommandsAndWait(vkdi, device, m_context.getUniversalQueue(), *cmdBuffer);
1624 }
1625
1626 // Invalidate allocations.
1627 vk::invalidateAlloc(vkdi, device, *input1.allocation);
1628 vk::invalidateAlloc(vkdi, device, *input2.allocation);
1629 vk::invalidateAlloc(vkdi, device, *output1.allocation);
1630
1631 // Read and verify results.
1632 std::vector<int> results(m_numOperations);
1633 deMemcpy(results.data(), output1.allocation->getHostPtr(), m_outputBufferSize);
1634 for (size_t i = 0; i < m_numOperations; ++i)
1635 {
1636 int expected = static_cast<int>(m_params.operation.run(m_params.operands[i].first, m_params.operands[i].second));
1637 if (results[i] != expected && (m_params.requireNanPreserve || (!genericIsNan<T>(m_params.operands[i].first) && !genericIsNan<T>(m_params.operands[i].second))))
1638 {
1639 std::ostringstream msg;
1640 msg << "Invalid result found in position " << i << ": expected " << expected << " and found " << results[i];
1641 return tcu::TestStatus::fail(msg.str());
1642 }
1643 }
1644
1645 return tcu::TestStatus::pass("Pass");
1646 }
1647
1648 template <class T>
1649 class T64bitCompareTest : public TestCase
1650 {
1651 public:
1652 T64bitCompareTest (tcu::TestContext& testCtx, const std::string& name, const std::string& description, const TestParameters<T>& params);
1653 virtual void checkSupport (Context& context) const;
1654 virtual void initPrograms (vk::SourceCollections& programCollection) const;
1655 virtual TestInstance* createInstance (Context& ctx) const;
1656
1657 private:
1658 const TestParameters<T> m_params;
1659 };
1660
1661 template <class T>
T64bitCompareTest(tcu::TestContext & testCtx,const std::string & name,const std::string & description,const TestParameters<T> & params)1662 T64bitCompareTest<T>::T64bitCompareTest (tcu::TestContext& testCtx, const std::string& name, const std::string& description, const TestParameters<T>& params)
1663 : TestCase(testCtx, name, description), m_params(params)
1664 {
1665 // This is needed so that the same operands can be used for single-element comparisons or for vectorized comparisons (which use *vec4 types).
1666 DE_ASSERT(m_params.operands.size() % 4 == 0);
1667 }
1668
1669 // This template checks the needed type support features in shaders for type T.
1670 // Specializations are provided below.
1671 template <class T>
1672 void checkTypeSupport(const vk::VkPhysicalDeviceFeatures& features);
1673
1674 template <>
checkTypeSupport(const vk::VkPhysicalDeviceFeatures & features)1675 void checkTypeSupport<double>(const vk::VkPhysicalDeviceFeatures& features)
1676 {
1677 if (!features.shaderFloat64)
1678 TCU_THROW(NotSupportedError, "64-bit floats not supported in shaders");
1679 }
1680
check64bitIntegers(const vk::VkPhysicalDeviceFeatures & features)1681 void check64bitIntegers(const vk::VkPhysicalDeviceFeatures& features)
1682 {
1683 if (!features.shaderInt64)
1684 TCU_THROW(NotSupportedError, "64-bit integer types not supported in shaders");
1685 }
1686
1687 template <>
checkTypeSupport(const vk::VkPhysicalDeviceFeatures & features)1688 void checkTypeSupport<deInt64>(const vk::VkPhysicalDeviceFeatures& features)
1689 {
1690 check64bitIntegers(features);
1691 }
1692
1693 template <>
checkTypeSupport(const vk::VkPhysicalDeviceFeatures & features)1694 void checkTypeSupport<deUint64>(const vk::VkPhysicalDeviceFeatures& features)
1695 {
1696 check64bitIntegers(features);
1697 }
1698
1699 template <class T>
checkSupport(Context & context) const1700 void T64bitCompareTest<T>::checkSupport (Context& context) const
1701 {
1702 auto& vki = context.getInstanceInterface();
1703 auto physicalDevice = context.getPhysicalDevice();
1704 auto features = vk::getPhysicalDeviceFeatures(vki, physicalDevice);
1705
1706 checkTypeSupport<T>(features);
1707
1708 switch (m_params.stage)
1709 {
1710 case vk::VK_SHADER_STAGE_COMPUTE_BIT:
1711 break;
1712 case vk::VK_SHADER_STAGE_VERTEX_BIT:
1713 if (!features.vertexPipelineStoresAndAtomics)
1714 TCU_THROW(NotSupportedError, "Vertex shader does not support stores");
1715 break;
1716 case vk::VK_SHADER_STAGE_FRAGMENT_BIT:
1717 if (!features.fragmentStoresAndAtomics)
1718 TCU_THROW(NotSupportedError, "Fragment shader does not support stores");
1719 break;
1720 default:
1721 DE_ASSERT(DE_NULL == "Invalid shader stage specified");
1722 }
1723
1724 ExtensionFloatControlsFeatures fcFeatures;
1725 deMemset(&fcFeatures, 0, sizeof(fcFeatures));
1726 fcFeatures.shaderSignedZeroInfNanPreserveFloat64 = VK_TRUE;
1727
1728 if (m_params.requireNanPreserve && !isFloatControlsFeaturesSupported(context, fcFeatures))
1729 TCU_THROW(NotSupportedError, "NaN preservation not supported");
1730 }
1731
1732 template <class T>
initPrograms(vk::SourceCollections & programCollection) const1733 void T64bitCompareTest<T>::initPrograms (vk::SourceCollections& programCollection) const
1734 {
1735 DE_ASSERT(m_params.stage == vk::VK_SHADER_STAGE_COMPUTE_BIT ||
1736 m_params.stage == vk::VK_SHADER_STAGE_VERTEX_BIT ||
1737 m_params.stage == vk::VK_SHADER_STAGE_FRAGMENT_BIT );
1738
1739 std::map<std::string, std::string> replacements;
1740 replacements["ITERS"] = de::toString((m_params.dataType == DATA_TYPE_SINGLE) ? m_params.operands.size() : m_params.operands.size() / 4);
1741 replacements["OPNAME"] = m_params.operation.spirvName();
1742 replacements["OPCAPABILITY"] = SpirvTemplateManager::getOpCapability<T>();
1743 replacements["OPTYPE"] = SpirvTemplateManager::getOpType<T>();
1744 replacements["NANCAP"] = SpirvTemplateManager::getNanCapability(m_params.requireNanPreserve);
1745 replacements["NANEXT"] = SpirvTemplateManager::getNanExtension(m_params.requireNanPreserve);
1746 replacements["NANMODE"] = SpirvTemplateManager::getNanExeMode(m_params.requireNanPreserve);
1747
1748 static const std::map<vk::VkShaderStageFlagBits, std::string> sourceNames =
1749 {
1750 std::make_pair( vk::VK_SHADER_STAGE_COMPUTE_BIT, "comp" ),
1751 std::make_pair( vk::VK_SHADER_STAGE_VERTEX_BIT, "vert" ),
1752 std::make_pair( vk::VK_SHADER_STAGE_FRAGMENT_BIT, "frag" ),
1753 };
1754
1755 // Add the proper template under the proper name.
1756 programCollection.spirvAsmSources.add(sourceNames.find(m_params.stage)->second) << SpirvTemplateManager::getTemplate(m_params.dataType, m_params.stage).specialize(replacements);
1757
1758 // Add the passthrough vertex shader needed for the fragment shader.
1759 if (m_params.stage == vk::VK_SHADER_STAGE_FRAGMENT_BIT)
1760 programCollection.glslSources.add("vert") << glu::VertexSource(VertShaderPassThrough);
1761 }
1762
1763 template <class T>
createInstance(Context & ctx) const1764 TestInstance* T64bitCompareTest<T>::createInstance (Context& ctx) const
1765 {
1766 return new T64bitCompareTestInstance<T>(ctx, m_params);
1767 }
1768
1769 const std::map<bool, std::string> requireNanName =
1770 {
1771 std::make_pair( false, "nonan" ),
1772 std::make_pair( true, "withnan" ),
1773 };
1774
1775 const std::map<DataType, std::string> dataTypeName =
1776 {
1777 std::make_pair(DATA_TYPE_SINGLE, "single"),
1778 std::make_pair(DATA_TYPE_VECTOR, "vector"),
1779 };
1780
1781 using StageName = std::map<vk::VkShaderStageFlagBits, std::string>;
1782
createDoubleCompareTestsInGroup(tcu::TestCaseGroup * tests,const StageName * stageNames)1783 void createDoubleCompareTestsInGroup (tcu::TestCaseGroup* tests, const StageName* stageNames)
1784 {
1785 static const std::vector<const CompareOperation<double>*> operationList =
1786 {
1787 // Ordered operations.
1788 &FOrderedEqualOp,
1789 &FOrderedNotEqualOp,
1790 &FOrderedLessThanOp,
1791 &FOrderedLessThanEqualOp,
1792 &FOrderedGreaterThanOp,
1793 &FOrderedGreaterThanEqualOp,
1794 // Unordered operations.
1795 &FUnorderedEqualOp,
1796 &FUnorderedNotEqualOp,
1797 &FUnorderedLessThanOp,
1798 &FUnorderedLessThanEqualOp,
1799 &FUnorderedGreaterThanOp,
1800 &FUnorderedGreaterThanEqualOp,
1801 };
1802
1803 for (const auto& stageNamePair : *stageNames)
1804 for (const auto& typeNamePair : dataTypeName)
1805 for (const auto& requireNanPair : requireNanName)
1806 for (const auto opPtr : operationList)
1807 {
1808 TestParameters<double> params = { typeNamePair.first, *opPtr, stageNamePair.first, DOUBLE_OPERANDS, requireNanPair.first };
1809 std::string testName = stageNamePair.second + "_" + de::toLower(opPtr->spirvName()) + "_" + requireNanPair.second + "_" + typeNamePair.second;
1810 tests->addChild(new T64bitCompareTest<double>(tests->getTestContext(), testName, "", params));
1811 }
1812 }
1813
createInt64CompareTestsInGroup(tcu::TestCaseGroup * tests,const StageName * stageNames)1814 void createInt64CompareTestsInGroup (tcu::TestCaseGroup* tests, const StageName* stageNames)
1815 {
1816 static const std::vector<const CompareOperation<deInt64>*> operationList =
1817 {
1818 &deInt64EqualOp,
1819 &deInt64NotEqualOp,
1820 &deInt64LessThanOp,
1821 &deInt64LessThanEqualOp,
1822 &deInt64GreaterThanOp,
1823 &deInt64GreaterThanEqualOp,
1824 };
1825
1826 for (const auto& stageNamePair : *stageNames)
1827 for (const auto& typeNamePair : dataTypeName)
1828 for (const auto opPtr : operationList)
1829 {
1830 TestParameters<deInt64> params = { typeNamePair.first, *opPtr, stageNamePair.first, INT64_OPERANDS, false };
1831 std::string testName = stageNamePair.second + "_" + de::toLower(opPtr->spirvName()) + "_" + typeNamePair.second;
1832 tests->addChild(new T64bitCompareTest<deInt64>(tests->getTestContext(), testName, "", params));
1833 }
1834 }
1835
createUint64CompareTestsInGroup(tcu::TestCaseGroup * tests,const StageName * stageNames)1836 void createUint64CompareTestsInGroup (tcu::TestCaseGroup* tests, const StageName* stageNames)
1837 {
1838 static const std::vector<const CompareOperation<deUint64>*> operationList =
1839 {
1840 &deUint64EqualOp,
1841 &deUint64NotEqualOp,
1842 &deUint64LessThanOp,
1843 &deUint64LessThanEqualOp,
1844 &deUint64GreaterThanOp,
1845 &deUint64GreaterThanEqualOp,
1846 };
1847
1848 for (const auto& stageNamePair : *stageNames)
1849 for (const auto& typeNamePair : dataTypeName)
1850 for (const auto opPtr : operationList)
1851 {
1852 TestParameters<deUint64> params = { typeNamePair.first, *opPtr, stageNamePair.first, UINT64_OPERANDS, false };
1853 std::string testName = stageNamePair.second + "_" + de::toLower(opPtr->spirvName()) + "_" + typeNamePair.second;
1854 tests->addChild(new T64bitCompareTest<deUint64>(tests->getTestContext(), testName, "", params));
1855 }
1856 }
1857
1858 struct TestMgr
1859 {
1860 typedef void (*CreationFunctionPtr)(tcu::TestCaseGroup*, const StageName*);
1861
getParentGroupNamevkt::SpirVAssembly::__anon897f443a0111::TestMgr1862 static const char* getParentGroupName () { return "64bit_compare"; }
getParentGroupDescvkt::SpirVAssembly::__anon897f443a0111::TestMgr1863 static const char* getParentGroupDesc () { return "64-bit type comparison operations"; }
1864
1865 template <class T>
1866 static std::string getGroupName ();
1867
1868 template <class T>
1869 static std::string getGroupDesc ();
1870
1871 template <class T>
1872 static CreationFunctionPtr getCreationFunction ();
1873 };
1874
getGroupName()1875 template <> std::string TestMgr::getGroupName<double>() { return "double"; }
getGroupName()1876 template <> std::string TestMgr::getGroupName<deInt64>() { return "int64"; }
getGroupName()1877 template <> std::string TestMgr::getGroupName<deUint64>() { return "uint64"; }
1878
getGroupDesc()1879 template <> std::string TestMgr::getGroupDesc<double>() { return "64-bit floating point tests"; }
getGroupDesc()1880 template <> std::string TestMgr::getGroupDesc<deInt64>() { return "64-bit signed integer tests"; }
getGroupDesc()1881 template <> std::string TestMgr::getGroupDesc<deUint64>() { return "64-bit unsigned integer tests"; }
1882
getCreationFunction()1883 template <> TestMgr::CreationFunctionPtr TestMgr::getCreationFunction<double> () { return createDoubleCompareTestsInGroup; }
getCreationFunction()1884 template <> TestMgr::CreationFunctionPtr TestMgr::getCreationFunction<deInt64> () { return createInt64CompareTestsInGroup; }
getCreationFunction()1885 template <> TestMgr::CreationFunctionPtr TestMgr::getCreationFunction<deUint64> () { return createUint64CompareTestsInGroup; }
1886
1887 } // anonymous
1888
create64bitCompareGraphicsGroup(tcu::TestContext & testCtx)1889 tcu::TestCaseGroup* create64bitCompareGraphicsGroup (tcu::TestContext& testCtx)
1890 {
1891 static const StageName graphicStages =
1892 {
1893 std::make_pair(vk::VK_SHADER_STAGE_VERTEX_BIT, "vert"),
1894 std::make_pair(vk::VK_SHADER_STAGE_FRAGMENT_BIT, "frag"),
1895 };
1896
1897 tcu::TestCaseGroup* newGroup = new tcu::TestCaseGroup(testCtx, TestMgr::getParentGroupName(), TestMgr::getParentGroupDesc());
1898 newGroup->addChild(createTestGroup(testCtx, TestMgr::getGroupName<double>(), TestMgr::getGroupDesc<double>(), TestMgr::getCreationFunction<double>(), &graphicStages));
1899 newGroup->addChild(createTestGroup(testCtx, TestMgr::getGroupName<deInt64>(), TestMgr::getGroupDesc<deInt64>(), TestMgr::getCreationFunction<deInt64>(), &graphicStages));
1900 newGroup->addChild(createTestGroup(testCtx, TestMgr::getGroupName<deUint64>(), TestMgr::getGroupDesc<deUint64>(), TestMgr::getCreationFunction<deUint64>(), &graphicStages));
1901 return newGroup;
1902 }
1903
create64bitCompareComputeGroup(tcu::TestContext & testCtx)1904 tcu::TestCaseGroup* create64bitCompareComputeGroup (tcu::TestContext& testCtx)
1905 {
1906 static const StageName computeStages =
1907 {
1908 std::make_pair(vk::VK_SHADER_STAGE_COMPUTE_BIT, "comp"),
1909 };
1910
1911 tcu::TestCaseGroup* newGroup = new tcu::TestCaseGroup(testCtx, TestMgr::getParentGroupName(), TestMgr::getParentGroupDesc());
1912 newGroup->addChild(createTestGroup(testCtx, TestMgr::getGroupName<double>(), TestMgr::getGroupDesc<double>(), TestMgr::getCreationFunction<double>(), &computeStages));
1913 newGroup->addChild(createTestGroup(testCtx, TestMgr::getGroupName<deInt64>(), TestMgr::getGroupDesc<deInt64>(), TestMgr::getCreationFunction<deInt64>(), &computeStages));
1914 newGroup->addChild(createTestGroup(testCtx, TestMgr::getGroupName<deUint64>(), TestMgr::getGroupDesc<deUint64>(), TestMgr::getCreationFunction<deUint64>(), &computeStages));
1915 return newGroup;
1916 }
1917
1918 } // SpirVAssembly
1919 } // vkt
1920