1 /*------------------------------------------------------------------------
2 * Vulkan Conformance Tests
3 * ------------------------
4 *
5 * Copyright (c) 2019 Valve Corporation.
6 * Copyright (c) 2019 The Khronos Group Inc.
7 *
8 * Licensed under the Apache License, Version 2.0 (the "License");
9 * you may not use this file except in compliance with the License.
10 * You may obtain a copy of the License at
11 *
12 * http://www.apache.org/licenses/LICENSE-2.0
13 *
14 * Unless required by applicable law or agreed to in writing, software
15 * distributed under the License is distributed on an "AS IS" BASIS,
16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 * See the License for the specific language governing permissions and
18 * limitations under the License.
19 *
20 *//*!
21 * \file
22 * \brief 64-bit data type comparison operations.
23 *//*--------------------------------------------------------------------*/
24
25 #include "vktSpvAsm64bitCompareTests.hpp"
26 #include "vktTestGroupUtil.hpp"
27 #include "vktSpvAsmUtils.hpp"
28 #include "vkDefs.hpp"
29 #include "vktTestCase.hpp"
30 #include "vkQueryUtil.hpp"
31 #include "vkMemUtil.hpp"
32 #include "vkRefUtil.hpp"
33 #include "vkBuilderUtil.hpp"
34 #include "vkPrograms.hpp"
35 #include "vkCmdUtil.hpp"
36
37 #include "tcuStringTemplate.hpp"
38
39 #include <string>
40 #include <vector>
41 #include <utility>
42 #include <cmath>
43 #include <sstream>
44 #include <memory>
45 #include <limits>
46
47 namespace vkt
48 {
49 namespace SpirVAssembly
50 {
51 namespace
52 {
53
54 template <typename T>
55 class CompareOperation
56 {
57 public:
58 virtual std::string spirvName () const = 0;
59 virtual bool run (T left, T right) const = 0;
60 };
61
62 // Helper intermediate class to be able to implement Ordered and Unordered floating point operations in a simpler way.
63 class DoubleCompareOperation: public CompareOperation<double>
64 {
65 public:
66 struct BasicImplementation
67 {
68 virtual std::string nameSuffix () const = 0;
69 virtual bool run (double left, double right) const = 0; // No NaNs here.
70 };
71
spirvName() const72 virtual std::string spirvName () const
73 {
74 return "OpF" + std::string(m_ordered ? "Ord" : "Unord") + m_impl.nameSuffix();
75 }
76
run(double left,double right) const77 virtual bool run (double left, double right) const
78 {
79 if (nanInvolved(left, right))
80 return !m_ordered; // Ordered operations return false when NaN is involved.
81 return m_impl.run(left, right);
82 }
83
DoubleCompareOperation(bool ordered,const BasicImplementation & impl)84 DoubleCompareOperation(bool ordered, const BasicImplementation& impl)
85 : m_ordered(ordered), m_impl(impl)
86 {}
87
88 private:
nanInvolved(double left,double right) const89 bool nanInvolved(double left, double right) const
90 {
91 return std::isnan(left) || std::isnan(right);
92 }
93
94 const bool m_ordered;
95 const BasicImplementation& m_impl;
96 };
97
98 #define GEN_DOUBLE_BASIC_IMPL(NAME, OPERATION) \
99 struct NAME##DoubleBasicImplClass : public DoubleCompareOperation::BasicImplementation \
100 { \
101 virtual std::string nameSuffix () const { return #NAME; } \
102 virtual bool run (double left, double right) const { return left OPERATION right; } \
103 }; \
104 NAME##DoubleBasicImplClass NAME##DoubleBasicImplInstance;
105
106 GEN_DOUBLE_BASIC_IMPL(Equal, == )
107 GEN_DOUBLE_BASIC_IMPL(NotEqual, != )
108 GEN_DOUBLE_BASIC_IMPL(LessThan, < )
109 GEN_DOUBLE_BASIC_IMPL(GreaterThan, > )
110 GEN_DOUBLE_BASIC_IMPL(LessThanEqual, <= )
111 GEN_DOUBLE_BASIC_IMPL(GreaterThanEqual, >= )
112
113 #define GEN_FORDERED_OP(NAME) DoubleCompareOperation FOrdered##NAME##Op(true, NAME##DoubleBasicImplInstance)
114 #define GEN_FUNORDERED_OP(NAME) DoubleCompareOperation FUnordered##NAME##Op(false, NAME##DoubleBasicImplInstance)
115 #define GEN_FBOTH_OP(NAME) GEN_FORDERED_OP(NAME); GEN_FUNORDERED_OP(NAME);
116
117 GEN_FBOTH_OP(Equal)
118 GEN_FBOTH_OP(NotEqual)
119 GEN_FBOTH_OP(LessThan)
120 GEN_FBOTH_OP(GreaterThan)
121 GEN_FBOTH_OP(LessThanEqual)
122 GEN_FBOTH_OP(GreaterThanEqual)
123
124 template <typename IntClass>
125 class IntCompareOperation: public CompareOperation<IntClass>
126 {
127 public:
128 struct Implementation
129 {
130 virtual std::string typeChar () const = 0;
131 virtual std::string opName () const = 0;
132 virtual bool run (IntClass left, IntClass right) const = 0;
133 };
134
spirvName() const135 virtual std::string spirvName () const
136 {
137 return "Op" + m_impl.typeChar() + m_impl.opName();
138 }
139
run(IntClass left,IntClass right) const140 virtual bool run (IntClass left, IntClass right) const
141 {
142 return m_impl.run(left, right);
143 }
144
IntCompareOperation(const Implementation & impl)145 IntCompareOperation(const Implementation& impl)
146 : m_impl(impl)
147 {}
148
149 private:
150 const Implementation& m_impl;
151 };
152
153 #define GEN_INT_IMPL(INTTYPE, TYPECHAR, OPNAME, OPERATOR) \
154 struct INTTYPE##OPNAME##IntImplClass : public IntCompareOperation<INTTYPE>::Implementation \
155 { \
156 virtual std::string typeChar () const { return #TYPECHAR; } \
157 virtual std::string opName () const { return #OPNAME; } \
158 virtual bool run (INTTYPE left, INTTYPE right) const { return left OPERATOR right; } \
159 }; \
160 INTTYPE##OPNAME##IntImplClass INTTYPE##OPNAME##IntImplInstance;
161
162 #define GEN_ALL_INT_TYPE_IMPL(INTTYPE, TYPECHAR) \
163 GEN_INT_IMPL(INTTYPE, I, Equal, == ) \
164 GEN_INT_IMPL(INTTYPE, I, NotEqual, != ) \
165 GEN_INT_IMPL(INTTYPE, TYPECHAR, GreaterThan, > ) \
166 GEN_INT_IMPL(INTTYPE, TYPECHAR, GreaterThanEqual, >= ) \
167 GEN_INT_IMPL(INTTYPE, TYPECHAR, LessThan, < ) \
168 GEN_INT_IMPL(INTTYPE, TYPECHAR, LessThanEqual, <= )
169
170 GEN_ALL_INT_TYPE_IMPL(deInt64, S)
171 GEN_ALL_INT_TYPE_IMPL(deUint64, U)
172
173 #define GEN_INT_OP(INTTYPE, OPNAME) \
174 struct INTTYPE##OPNAME##OpClass: public IntCompareOperation<INTTYPE> \
175 { \
176 INTTYPE##OPNAME##OpClass () : IntCompareOperation<INTTYPE>(INTTYPE##OPNAME##IntImplInstance) {} \
177 }; \
178 INTTYPE##OPNAME##OpClass INTTYPE##OPNAME##Op;
179
180 #define GEN_ALL_INT_OPS(INTTYPE) \
181 GEN_INT_OP(INTTYPE, Equal ) \
182 GEN_INT_OP(INTTYPE, NotEqual ) \
183 GEN_INT_OP(INTTYPE, GreaterThan ) \
184 GEN_INT_OP(INTTYPE, GreaterThanEqual ) \
185 GEN_INT_OP(INTTYPE, LessThan ) \
186 GEN_INT_OP(INTTYPE, LessThanEqual )
187
188 GEN_ALL_INT_OPS(deInt64)
189 GEN_ALL_INT_OPS(deUint64)
190
191 enum DataType {
192 DATA_TYPE_SINGLE = 0,
193 DATA_TYPE_VECTOR,
194 DATA_TYPE_MAX_ENUM,
195 };
196
197 template <class T>
198 using OperandsVector = std::vector<std::pair<T, T>>;
199
200 template <class T>
201 struct TestParameters
202 {
203 DataType dataType;
204 const CompareOperation<T>& operation;
205 vk::VkShaderStageFlagBits stage;
206 const OperandsVector<T>& operands;
207 bool requireNanPreserve;
208 };
209
210 // Shader template for the compute stage using single scalars.
211 // Generated from the following GLSL shader, replacing some bits by template parameters.
212 #if 0
213 #version 430
214
215 // Left operands, right operands and results.
216 layout(binding = 0) buffer Input1 { double values[]; } input1;
217 layout(binding = 1) buffer Input2 { double values[]; } input2;
218 layout(binding = 2) buffer Output1 { int values[]; } output1;
219
220 void main()
221 {
222 for (int i = 0; i < 20; i++) {
223 output1.values[i] = int(input1.values[i] == input2.values[i]);
224 }
225 }
226 #endif
227 const tcu::StringTemplate CompShaderSingle(R"(
228 OpCapability Shader
229 ${OPCAPABILITY}
230 ${NANCAP}
231 ${NANEXT}
232 %1 = OpExtInstImport "GLSL.std.450"
233 OpMemoryModel Logical GLSL450
234 OpEntryPoint GLCompute %main "main"
235 ${NANMODE}
236 OpExecutionMode %main LocalSize 1 1 1
237 OpName %main "main"
238 OpName %i "i"
239 OpName %Output1 "Output1"
240 OpMemberName %Output1 0 "values"
241 OpName %output1 "output1"
242 OpName %Input1 "Input1"
243 OpMemberName %Input1 0 "values"
244 OpName %input1 "input1"
245 OpName %Input2 "Input2"
246 OpMemberName %Input2 0 "values"
247 OpName %input2 "input2"
248 OpDecorate %_runtimearr_int ArrayStride 4
249 OpMemberDecorate %Output1 0 Offset 0
250 OpDecorate %Output1 BufferBlock
251 OpDecorate %output1 DescriptorSet 0
252 OpDecorate %output1 Binding 2
253 OpDecorate %_runtimearr_tinput ArrayStride 8
254 OpMemberDecorate %Input1 0 Offset 0
255 OpDecorate %Input1 BufferBlock
256 OpDecorate %input1 DescriptorSet 0
257 OpDecorate %input1 Binding 0
258 OpDecorate %_runtimearr_tinput_0 ArrayStride 8
259 OpMemberDecorate %Input2 0 Offset 0
260 OpDecorate %Input2 BufferBlock
261 OpDecorate %input2 DescriptorSet 0
262 OpDecorate %input2 Binding 1
263 %void = OpTypeVoid
264 %3 = OpTypeFunction %void
265 %int = OpTypeInt 32 1
266 %_ptr_Function_int = OpTypePointer Function %int
267 %int_0 = OpConstant %int 0
268 %niters = OpConstant %int ${ITERS}
269 %bool = OpTypeBool
270 %_runtimearr_int = OpTypeRuntimeArray %int
271 %Output1 = OpTypeStruct %_runtimearr_int
272 %_ptr_Uniform_Output1 = OpTypePointer Uniform %Output1
273 %output1 = OpVariable %_ptr_Uniform_Output1 Uniform
274 %tinput = ${OPTYPE}
275 %_runtimearr_tinput = OpTypeRuntimeArray %tinput
276 %Input1 = OpTypeStruct %_runtimearr_tinput
277 %_ptr_Uniform_Input1 = OpTypePointer Uniform %Input1
278 %input1 = OpVariable %_ptr_Uniform_Input1 Uniform
279 %_ptr_Uniform_tinput = OpTypePointer Uniform %tinput
280 %_runtimearr_tinput_0 = OpTypeRuntimeArray %tinput
281 %Input2 = OpTypeStruct %_runtimearr_tinput_0
282 %_ptr_Uniform_Input2 = OpTypePointer Uniform %Input2
283 %input2 = OpVariable %_ptr_Uniform_Input2 Uniform
284 %int_1 = OpConstant %int 1
285 %_ptr_Uniform_int = OpTypePointer Uniform %int
286 %main = OpFunction %void None %3
287 %5 = OpLabel
288 %i = OpVariable %_ptr_Function_int Function
289 OpStore %i %int_0
290 OpBranch %10
291 %10 = OpLabel
292 OpLoopMerge %12 %13 None
293 OpBranch %14
294 %14 = OpLabel
295 %15 = OpLoad %int %i
296 %18 = OpSLessThan %bool %15 %niters
297 OpBranchConditional %18 %11 %12
298 %11 = OpLabel
299 %23 = OpLoad %int %i
300 %29 = OpLoad %int %i
301 %31 = OpAccessChain %_ptr_Uniform_tinput %input1 %int_0 %29
302 %32 = OpLoad %tinput %31
303 %37 = OpLoad %int %i
304 %38 = OpAccessChain %_ptr_Uniform_tinput %input2 %int_0 %37
305 %39 = OpLoad %tinput %38
306 %40 = ${OPNAME} %bool %32 %39
307 %42 = OpSelect %int %40 %int_1 %int_0
308 %44 = OpAccessChain %_ptr_Uniform_int %output1 %int_0 %23
309 OpStore %44 %42
310 OpBranch %13
311 %13 = OpLabel
312 %45 = OpLoad %int %i
313 %46 = OpIAdd %int %45 %int_1
314 OpStore %i %46
315 OpBranch %10
316 %12 = OpLabel
317 OpReturn
318 OpFunctionEnd
319 )");
320
321 // Shader template for the compute stage using vectors.
322 // Generated from the following GLSL shader, replacing some bits by template parameters.
323 // Note the number of iterations needs to be divided by 4 as the shader will consume 4 doubles at a time.
324 #if 0
325 #version 430
326
327 // Left operands, right operands and results.
328 layout(binding = 0) buffer Input1 { dvec4 values[]; } input1;
329 layout(binding = 1) buffer Input2 { dvec4 values[]; } input2;
330 layout(binding = 2) buffer Output1 { ivec4 values[]; } output1;
331
332 void main()
333 {
334 for (int i = 0; i < 5; i++) {
335 output1.values[i] = ivec4(equal(input1.values[i], input2.values[i]));
336 }
337 }
338 #endif
339 const tcu::StringTemplate CompShaderVector(R"(
340 OpCapability Shader
341 ${OPCAPABILITY}
342 ${NANCAP}
343 ${NANEXT}
344 %1 = OpExtInstImport "GLSL.std.450"
345 OpMemoryModel Logical GLSL450
346 OpEntryPoint GLCompute %main "main"
347 ${NANMODE}
348 OpExecutionMode %main LocalSize 1 1 1
349 OpName %main "main"
350 OpName %i "i"
351 OpName %Output1 "Output1"
352 OpMemberName %Output1 0 "values"
353 OpName %output1 "output1"
354 OpName %Input1 "Input1"
355 OpMemberName %Input1 0 "values"
356 OpName %input1 "input1"
357 OpName %Input2 "Input2"
358 OpMemberName %Input2 0 "values"
359 OpName %input2 "input2"
360 OpDecorate %_runtimearr_v4int ArrayStride 16
361 OpMemberDecorate %Output1 0 Offset 0
362 OpDecorate %Output1 BufferBlock
363 OpDecorate %output1 DescriptorSet 0
364 OpDecorate %output1 Binding 2
365 OpDecorate %_runtimearr_v4tinput ArrayStride 32
366 OpMemberDecorate %Input1 0 Offset 0
367 OpDecorate %Input1 BufferBlock
368 OpDecorate %input1 DescriptorSet 0
369 OpDecorate %input1 Binding 0
370 OpDecorate %_runtimearr_v4tinput_0 ArrayStride 32
371 OpMemberDecorate %Input2 0 Offset 0
372 OpDecorate %Input2 BufferBlock
373 OpDecorate %input2 DescriptorSet 0
374 OpDecorate %input2 Binding 1
375 %void = OpTypeVoid
376 %3 = OpTypeFunction %void
377 %int = OpTypeInt 32 1
378 %_ptr_Function_int = OpTypePointer Function %int
379 %int_0 = OpConstant %int 0
380 %niters = OpConstant %int ${ITERS}
381 %bool = OpTypeBool
382 %v4int = OpTypeVector %int 4
383 %_runtimearr_v4int = OpTypeRuntimeArray %v4int
384 %Output1 = OpTypeStruct %_runtimearr_v4int
385 %_ptr_Uniform_Output1 = OpTypePointer Uniform %Output1
386 %output1 = OpVariable %_ptr_Uniform_Output1 Uniform
387 %tinput = ${OPTYPE}
388 %v4tinput = OpTypeVector %tinput 4
389 %_runtimearr_v4tinput = OpTypeRuntimeArray %v4tinput
390 %Input1 = OpTypeStruct %_runtimearr_v4tinput
391 %_ptr_Uniform_Input1 = OpTypePointer Uniform %Input1
392 %input1 = OpVariable %_ptr_Uniform_Input1 Uniform
393 %_ptr_Uniform_v4tinput = OpTypePointer Uniform %v4tinput
394 %_runtimearr_v4tinput_0 = OpTypeRuntimeArray %v4tinput
395 %Input2 = OpTypeStruct %_runtimearr_v4tinput_0
396 %_ptr_Uniform_Input2 = OpTypePointer Uniform %Input2
397 %input2 = OpVariable %_ptr_Uniform_Input2 Uniform
398 %v4bool = OpTypeVector %bool 4
399 %int_1 = OpConstant %int 1
400 %45 = OpConstantComposite %v4int %int_0 %int_0 %int_0 %int_0
401 %46 = OpConstantComposite %v4int %int_1 %int_1 %int_1 %int_1
402 %_ptr_Uniform_v4int = OpTypePointer Uniform %v4int
403 %main = OpFunction %void None %3
404 %5 = OpLabel
405 %i = OpVariable %_ptr_Function_int Function
406 OpStore %i %int_0
407 OpBranch %10
408 %10 = OpLabel
409 OpLoopMerge %12 %13 None
410 OpBranch %14
411 %14 = OpLabel
412 %15 = OpLoad %int %i
413 %18 = OpSLessThan %bool %15 %niters
414 OpBranchConditional %18 %11 %12
415 %11 = OpLabel
416 %24 = OpLoad %int %i
417 %31 = OpLoad %int %i
418 %33 = OpAccessChain %_ptr_Uniform_v4tinput %input1 %int_0 %31
419 %34 = OpLoad %v4tinput %33
420 %39 = OpLoad %int %i
421 %40 = OpAccessChain %_ptr_Uniform_v4tinput %input2 %int_0 %39
422 %41 = OpLoad %v4tinput %40
423 %43 = ${OPNAME} %v4bool %34 %41
424 %47 = OpSelect %v4int %43 %46 %45
425 %49 = OpAccessChain %_ptr_Uniform_v4int %output1 %int_0 %24
426 OpStore %49 %47
427 OpBranch %13
428 %13 = OpLabel
429 %50 = OpLoad %int %i
430 %51 = OpIAdd %int %50 %int_1
431 OpStore %i %51
432 OpBranch %10
433 %12 = OpLabel
434 OpReturn
435 OpFunctionEnd
436 )");
437
438 // Shader template for the vertex stage using single scalars.
439 // Generated from the following GLSL shader, replacing some bits by template parameters.
440 #if 0
441 #version 430
442
443 // Left operands, right operands and results.
444 layout(binding = 0) buffer Input1 { double values[]; } input1;
445 layout(binding = 1) buffer Input2 { double values[]; } input2;
446 layout(binding = 2) buffer Output1 { int values[]; } output1;
447
448 void main()
449 {
450 gl_PointSize = 1;
451 gl_Position = vec4(0.0, 0.0, 0.0, 1.0);
452
453 for (int i = 0; i < 20; i++) {
454 output1.values[i] = int(input1.values[i] == input2.values[i]);
455 }
456 }
457 #endif
458 const tcu::StringTemplate VertShaderSingle(R"(
459 OpCapability Shader
460 ${OPCAPABILITY}
461 ${NANCAP}
462 ${NANEXT}
463 %1 = OpExtInstImport "GLSL.std.450"
464 OpMemoryModel Logical GLSL450
465 OpEntryPoint Vertex %main "main" %_
466 ${NANMODE}
467 OpName %main "main"
468 OpName %gl_PerVertex "gl_PerVertex"
469 OpMemberName %gl_PerVertex 0 "gl_Position"
470 OpMemberName %gl_PerVertex 1 "gl_PointSize"
471 OpMemberName %gl_PerVertex 2 "gl_ClipDistance"
472 OpName %_ ""
473 OpName %i "i"
474 OpName %Output1 "Output1"
475 OpMemberName %Output1 0 "values"
476 OpName %output1 "output1"
477 OpName %Input1 "Input1"
478 OpMemberName %Input1 0 "values"
479 OpName %input1 "input1"
480 OpName %Input2 "Input2"
481 OpMemberName %Input2 0 "values"
482 OpName %input2 "input2"
483 OpMemberDecorate %gl_PerVertex 0 BuiltIn Position
484 OpMemberDecorate %gl_PerVertex 1 BuiltIn PointSize
485 OpMemberDecorate %gl_PerVertex 2 BuiltIn ClipDistance
486 OpDecorate %gl_PerVertex Block
487 OpDecorate %_runtimearr_int ArrayStride 4
488 OpMemberDecorate %Output1 0 Offset 0
489 OpDecorate %Output1 BufferBlock
490 OpDecorate %output1 DescriptorSet 0
491 OpDecorate %output1 Binding 2
492 OpDecorate %_runtimearr_tinput ArrayStride 8
493 OpMemberDecorate %Input1 0 Offset 0
494 OpDecorate %Input1 BufferBlock
495 OpDecorate %input1 DescriptorSet 0
496 OpDecorate %input1 Binding 0
497 OpDecorate %_runtimearr_tinput_0 ArrayStride 8
498 OpMemberDecorate %Input2 0 Offset 0
499 OpDecorate %Input2 BufferBlock
500 OpDecorate %input2 DescriptorSet 0
501 OpDecorate %input2 Binding 1
502 %void = OpTypeVoid
503 %3 = OpTypeFunction %void
504 %float = OpTypeFloat 32
505 %v4float = OpTypeVector %float 4
506 %uint = OpTypeInt 32 0
507 %uint_1 = OpConstant %uint 1
508 %_arr_float_uint_1 = OpTypeArray %float %uint_1
509 %gl_PerVertex = OpTypeStruct %v4float %float %_arr_float_uint_1
510 %_ptr_Output_gl_PerVertex = OpTypePointer Output %gl_PerVertex
511 %_ = OpVariable %_ptr_Output_gl_PerVertex Output
512 %int = OpTypeInt 32 1
513 %int_1 = OpConstant %int 1
514 %float_1 = OpConstant %float 1
515 %_ptr_Output_float = OpTypePointer Output %float
516 %int_0 = OpConstant %int 0
517 %float_0 = OpConstant %float 0
518 %21 = OpConstantComposite %v4float %float_0 %float_0 %float_0 %float_1
519 %_ptr_Output_v4float = OpTypePointer Output %v4float
520 %_ptr_Function_int = OpTypePointer Function %int
521 %niters = OpConstant %int ${ITERS}
522 %bool = OpTypeBool
523 %_runtimearr_int = OpTypeRuntimeArray %int
524 %Output1 = OpTypeStruct %_runtimearr_int
525 %_ptr_Uniform_Output1 = OpTypePointer Uniform %Output1
526 %output1 = OpVariable %_ptr_Uniform_Output1 Uniform
527 %tinput = ${OPTYPE}
528 %_runtimearr_tinput = OpTypeRuntimeArray %tinput
529 %Input1 = OpTypeStruct %_runtimearr_tinput
530 %_ptr_Uniform_Input1 = OpTypePointer Uniform %Input1
531 %input1 = OpVariable %_ptr_Uniform_Input1 Uniform
532 %_ptr_Uniform_tinput = OpTypePointer Uniform %tinput
533 %_runtimearr_tinput_0 = OpTypeRuntimeArray %tinput
534 %Input2 = OpTypeStruct %_runtimearr_tinput_0
535 %_ptr_Uniform_Input2 = OpTypePointer Uniform %Input2
536 %input2 = OpVariable %_ptr_Uniform_Input2 Uniform
537 %_ptr_Uniform_int = OpTypePointer Uniform %int
538 %main = OpFunction %void None %3
539 %5 = OpLabel
540 %i = OpVariable %_ptr_Function_int Function
541 %18 = OpAccessChain %_ptr_Output_float %_ %int_1
542 OpStore %18 %float_1
543 %23 = OpAccessChain %_ptr_Output_v4float %_ %int_0
544 OpStore %23 %21
545 OpStore %i %int_0
546 OpBranch %26
547 %26 = OpLabel
548 OpLoopMerge %28 %29 None
549 OpBranch %30
550 %30 = OpLabel
551 %31 = OpLoad %int %i
552 %34 = OpSLessThan %bool %31 %niters
553 OpBranchConditional %34 %27 %28
554 %27 = OpLabel
555 %39 = OpLoad %int %i
556 %45 = OpLoad %int %i
557 %47 = OpAccessChain %_ptr_Uniform_tinput %input1 %int_0 %45
558 %48 = OpLoad %tinput %47
559 %53 = OpLoad %int %i
560 %54 = OpAccessChain %_ptr_Uniform_tinput %input2 %int_0 %53
561 %55 = OpLoad %tinput %54
562 %56 = ${OPNAME} %bool %48 %55
563 %57 = OpSelect %int %56 %int_1 %int_0
564 %59 = OpAccessChain %_ptr_Uniform_int %output1 %int_0 %39
565 OpStore %59 %57
566 OpBranch %29
567 %29 = OpLabel
568 %60 = OpLoad %int %i
569 %61 = OpIAdd %int %60 %int_1
570 OpStore %i %61
571 OpBranch %26
572 %28 = OpLabel
573 OpReturn
574 OpFunctionEnd
575 )");
576
577 // Shader template for the vertex stage using vectors.
578 // Generated from the following GLSL shader, replacing some bits by template parameters.
579 // Note the number of iterations needs to be divided by 4 as the shader will consume 4 doubles at a time.
580 #if 0
581 #version 430
582
583 // Left operands, right operands and results.
584 layout(binding = 0) buffer Input1 { dvec4 values[]; } input1;
585 layout(binding = 1) buffer Input2 { dvec4 values[]; } input2;
586 layout(binding = 2) buffer Output1 { ivec4 values[]; } output1;
587
588 void main()
589 {
590 gl_PointSize = 1;
591 gl_Position = vec4(0.0, 0.0, 0.0, 1.0);
592
593 for (int i = 0; i < 5; i++) {
594 output1.values[i] = ivec4(equal(input1.values[i], input2.values[i]));
595 }
596 }
597 #endif
598 const tcu::StringTemplate VertShaderVector(R"(
599 OpCapability Shader
600 ${OPCAPABILITY}
601 ${NANCAP}
602 ${NANEXT}
603 %1 = OpExtInstImport "GLSL.std.450"
604 OpMemoryModel Logical GLSL450
605 OpEntryPoint Vertex %main "main" %_
606 ${NANMODE}
607 OpName %main "main"
608 OpName %gl_PerVertex "gl_PerVertex"
609 OpMemberName %gl_PerVertex 0 "gl_Position"
610 OpMemberName %gl_PerVertex 1 "gl_PointSize"
611 OpMemberName %gl_PerVertex 2 "gl_ClipDistance"
612 OpName %_ ""
613 OpName %i "i"
614 OpName %Output1 "Output1"
615 OpMemberName %Output1 0 "values"
616 OpName %output1 "output1"
617 OpName %Input1 "Input1"
618 OpMemberName %Input1 0 "values"
619 OpName %input1 "input1"
620 OpName %Input2 "Input2"
621 OpMemberName %Input2 0 "values"
622 OpName %input2 "input2"
623 OpMemberDecorate %gl_PerVertex 0 BuiltIn Position
624 OpMemberDecorate %gl_PerVertex 1 BuiltIn PointSize
625 OpMemberDecorate %gl_PerVertex 2 BuiltIn ClipDistance
626 OpDecorate %gl_PerVertex Block
627 OpDecorate %_runtimearr_v4int ArrayStride 16
628 OpMemberDecorate %Output1 0 Offset 0
629 OpDecorate %Output1 BufferBlock
630 OpDecorate %output1 DescriptorSet 0
631 OpDecorate %output1 Binding 2
632 OpDecorate %_runtimearr_v4tinput ArrayStride 32
633 OpMemberDecorate %Input1 0 Offset 0
634 OpDecorate %Input1 BufferBlock
635 OpDecorate %input1 DescriptorSet 0
636 OpDecorate %input1 Binding 0
637 OpDecorate %_runtimearr_v4tinput_0 ArrayStride 32
638 OpMemberDecorate %Input2 0 Offset 0
639 OpDecorate %Input2 BufferBlock
640 OpDecorate %input2 DescriptorSet 0
641 OpDecorate %input2 Binding 1
642 %void = OpTypeVoid
643 %3 = OpTypeFunction %void
644 %float = OpTypeFloat 32
645 %v4float = OpTypeVector %float 4
646 %uint = OpTypeInt 32 0
647 %uint_1 = OpConstant %uint 1
648 %_arr_float_uint_1 = OpTypeArray %float %uint_1
649 %gl_PerVertex = OpTypeStruct %v4float %float %_arr_float_uint_1
650 %_ptr_Output_gl_PerVertex = OpTypePointer Output %gl_PerVertex
651 %_ = OpVariable %_ptr_Output_gl_PerVertex Output
652 %int = OpTypeInt 32 1
653 %int_1 = OpConstant %int 1
654 %float_1 = OpConstant %float 1
655 %_ptr_Output_float = OpTypePointer Output %float
656 %int_0 = OpConstant %int 0
657 %float_0 = OpConstant %float 0
658 %21 = OpConstantComposite %v4float %float_0 %float_0 %float_0 %float_1
659 %_ptr_Output_v4float = OpTypePointer Output %v4float
660 %_ptr_Function_int = OpTypePointer Function %int
661 %niters = OpConstant %int ${ITERS}
662 %bool = OpTypeBool
663 %v4int = OpTypeVector %int 4
664 %_runtimearr_v4int = OpTypeRuntimeArray %v4int
665 %Output1 = OpTypeStruct %_runtimearr_v4int
666 %_ptr_Uniform_Output1 = OpTypePointer Uniform %Output1
667 %output1 = OpVariable %_ptr_Uniform_Output1 Uniform
668 %tinput = ${OPTYPE}
669 %v4tinput = OpTypeVector %tinput 4
670 %_runtimearr_v4tinput = OpTypeRuntimeArray %v4tinput
671 %Input1 = OpTypeStruct %_runtimearr_v4tinput
672 %_ptr_Uniform_Input1 = OpTypePointer Uniform %Input1
673 %input1 = OpVariable %_ptr_Uniform_Input1 Uniform
674 %_ptr_Uniform_v4tinput = OpTypePointer Uniform %v4tinput
675 %_runtimearr_v4tinput_0 = OpTypeRuntimeArray %v4tinput
676 %Input2 = OpTypeStruct %_runtimearr_v4tinput_0
677 %_ptr_Uniform_Input2 = OpTypePointer Uniform %Input2
678 %input2 = OpVariable %_ptr_Uniform_Input2 Uniform
679 %v4bool = OpTypeVector %bool 4
680 %60 = OpConstantComposite %v4int %int_0 %int_0 %int_0 %int_0
681 %61 = OpConstantComposite %v4int %int_1 %int_1 %int_1 %int_1
682 %_ptr_Uniform_v4int = OpTypePointer Uniform %v4int
683 %main = OpFunction %void None %3
684 %5 = OpLabel
685 %i = OpVariable %_ptr_Function_int Function
686 %18 = OpAccessChain %_ptr_Output_float %_ %int_1
687 OpStore %18 %float_1
688 %23 = OpAccessChain %_ptr_Output_v4float %_ %int_0
689 OpStore %23 %21
690 OpStore %i %int_0
691 OpBranch %26
692 %26 = OpLabel
693 OpLoopMerge %28 %29 None
694 OpBranch %30
695 %30 = OpLabel
696 %31 = OpLoad %int %i
697 %34 = OpSLessThan %bool %31 %niters
698 OpBranchConditional %34 %27 %28
699 %27 = OpLabel
700 %40 = OpLoad %int %i
701 %47 = OpLoad %int %i
702 %49 = OpAccessChain %_ptr_Uniform_v4tinput %input1 %int_0 %47
703 %50 = OpLoad %v4tinput %49
704 %55 = OpLoad %int %i
705 %56 = OpAccessChain %_ptr_Uniform_v4tinput %input2 %int_0 %55
706 %57 = OpLoad %v4tinput %56
707 %59 = ${OPNAME} %v4bool %50 %57
708 %62 = OpSelect %v4int %59 %61 %60
709 %64 = OpAccessChain %_ptr_Uniform_v4int %output1 %int_0 %40
710 OpStore %64 %62
711 OpBranch %29
712 %29 = OpLabel
713 %65 = OpLoad %int %i
714 %66 = OpIAdd %int %65 %int_1
715 OpStore %i %66
716 OpBranch %26
717 %28 = OpLabel
718 OpReturn
719 OpFunctionEnd
720 )");
721
722 // GLSL passthrough vertex shader to test the fragment shader.
723 const std::string VertShaderPassThrough = R"(
724 #version 430
725
726 layout(location = 0) out vec4 out_color;
727
728 void main()
729 {
730 gl_PointSize = 1;
731 gl_Position = vec4(0.0, 0.0, 0.0, 1.0);
732 out_color = vec4(0.0, 0.0, 0.0, 1.0);
733 }
734 )";
735
736 // Shader template for the fragment stage using single scalars.
737 // Generated from the following GLSL shader, replacing some bits by template parameters.
738 #if 0
739 #version 430
740
741 // Left operands, right operands and results.
742 layout(binding = 0) buffer Input1 { double values[]; } input1;
743 layout(binding = 1) buffer Input2 { double values[]; } input2;
744 layout(binding = 2) buffer Output1 { int values[]; } output1;
745
746 void main()
747 {
748 for (int i = 0; i < 20; i++) {
749 output1.values[i] = int(input1.values[i] == input2.values[i]);
750 }
751 }
752 #endif
753 const tcu::StringTemplate FragShaderSingle(R"(
754 OpCapability Shader
755 ${OPCAPABILITY}
756 ${NANCAP}
757 ${NANEXT}
758 %1 = OpExtInstImport "GLSL.std.450"
759 OpMemoryModel Logical GLSL450
760 OpEntryPoint Fragment %main "main"
761 ${NANMODE}
762 OpExecutionMode %main OriginUpperLeft
763 OpSource GLSL 430
764 OpName %main "main"
765 OpName %i "i"
766 OpName %Output1 "Output1"
767 OpMemberName %Output1 0 "values"
768 OpName %output1 "output1"
769 OpName %Input1 "Input1"
770 OpMemberName %Input1 0 "values"
771 OpName %input1 "input1"
772 OpName %Input2 "Input2"
773 OpMemberName %Input2 0 "values"
774 OpName %input2 "input2"
775 OpDecorate %_runtimearr_int ArrayStride 4
776 OpMemberDecorate %Output1 0 Offset 0
777 OpDecorate %Output1 BufferBlock
778 OpDecorate %output1 DescriptorSet 0
779 OpDecorate %output1 Binding 2
780 OpDecorate %_runtimearr_tinput ArrayStride 8
781 OpMemberDecorate %Input1 0 Offset 0
782 OpDecorate %Input1 BufferBlock
783 OpDecorate %input1 DescriptorSet 0
784 OpDecorate %input1 Binding 0
785 OpDecorate %_runtimearr_tinput_0 ArrayStride 8
786 OpMemberDecorate %Input2 0 Offset 0
787 OpDecorate %Input2 BufferBlock
788 OpDecorate %input2 DescriptorSet 0
789 OpDecorate %input2 Binding 1
790 %void = OpTypeVoid
791 %3 = OpTypeFunction %void
792 %int = OpTypeInt 32 1
793 %_ptr_Function_int = OpTypePointer Function %int
794 %int_0 = OpConstant %int 0
795 %niters = OpConstant %int ${ITERS}
796 %bool = OpTypeBool
797 %_runtimearr_int = OpTypeRuntimeArray %int
798 %Output1 = OpTypeStruct %_runtimearr_int
799 %_ptr_Uniform_Output1 = OpTypePointer Uniform %Output1
800 %output1 = OpVariable %_ptr_Uniform_Output1 Uniform
801 %tinput = ${OPTYPE}
802 %_runtimearr_tinput = OpTypeRuntimeArray %tinput
803 %Input1 = OpTypeStruct %_runtimearr_tinput
804 %_ptr_Uniform_Input1 = OpTypePointer Uniform %Input1
805 %input1 = OpVariable %_ptr_Uniform_Input1 Uniform
806 %_ptr_Uniform_tinput = OpTypePointer Uniform %tinput
807 %_runtimearr_tinput_0 = OpTypeRuntimeArray %tinput
808 %Input2 = OpTypeStruct %_runtimearr_tinput_0
809 %_ptr_Uniform_Input2 = OpTypePointer Uniform %Input2
810 %input2 = OpVariable %_ptr_Uniform_Input2 Uniform
811 %int_1 = OpConstant %int 1
812 %_ptr_Uniform_int = OpTypePointer Uniform %int
813 %main = OpFunction %void None %3
814 %5 = OpLabel
815 %i = OpVariable %_ptr_Function_int Function
816 OpStore %i %int_0
817 OpBranch %10
818 %10 = OpLabel
819 OpLoopMerge %12 %13 None
820 OpBranch %14
821 %14 = OpLabel
822 %15 = OpLoad %int %i
823 %18 = OpSLessThan %bool %15 %niters
824 OpBranchConditional %18 %11 %12
825 %11 = OpLabel
826 %23 = OpLoad %int %i
827 %29 = OpLoad %int %i
828 %31 = OpAccessChain %_ptr_Uniform_tinput %input1 %int_0 %29
829 %32 = OpLoad %tinput %31
830 %37 = OpLoad %int %i
831 %38 = OpAccessChain %_ptr_Uniform_tinput %input2 %int_0 %37
832 %39 = OpLoad %tinput %38
833 %40 = ${OPNAME} %bool %32 %39
834 %42 = OpSelect %int %40 %int_1 %int_0
835 %44 = OpAccessChain %_ptr_Uniform_int %output1 %int_0 %23
836 OpStore %44 %42
837 OpBranch %13
838 %13 = OpLabel
839 %45 = OpLoad %int %i
840 %46 = OpIAdd %int %45 %int_1
841 OpStore %i %46
842 OpBranch %10
843 %12 = OpLabel
844 OpReturn
845 OpFunctionEnd
846 )");
847
848 // Shader template for the fragment stage using vectors.
849 // Generated from the following GLSL shader, replacing some bits by template parameters.
850 // Note the number of iterations needs to be divided by 4 as the shader will consume 4 doubles at a time.
851 #if 0
852 #version 430
853
854 // Left operands, right operands and results.
855 layout(binding = 0) buffer Input1 { dvec4 values[]; } input1;
856 layout(binding = 1) buffer Input2 { dvec4 values[]; } input2;
857 layout(binding = 2) buffer Output1 { ivec4 values[]; } output1;
858
859 void main()
860 {
861 for (int i = 0; i < 5; i++) {
862 output1.values[i] = ivec4(equal(input1.values[i], input2.values[i]));
863 }
864 }
865 #endif
866 const tcu::StringTemplate FragShaderVector(R"(
867 OpCapability Shader
868 ${OPCAPABILITY}
869 ${NANCAP}
870 ${NANEXT}
871 %1 = OpExtInstImport "GLSL.std.450"
872 OpMemoryModel Logical GLSL450
873 OpEntryPoint Fragment %main "main"
874 ${NANMODE}
875 OpExecutionMode %main OriginUpperLeft
876 OpName %main "main"
877 OpName %i "i"
878 OpName %Output1 "Output1"
879 OpMemberName %Output1 0 "values"
880 OpName %output1 "output1"
881 OpName %Input1 "Input1"
882 OpMemberName %Input1 0 "values"
883 OpName %input1 "input1"
884 OpName %Input2 "Input2"
885 OpMemberName %Input2 0 "values"
886 OpName %input2 "input2"
887 OpDecorate %_runtimearr_v4int ArrayStride 16
888 OpMemberDecorate %Output1 0 Offset 0
889 OpDecorate %Output1 BufferBlock
890 OpDecorate %output1 DescriptorSet 0
891 OpDecorate %output1 Binding 2
892 OpDecorate %_runtimearr_v4tinput ArrayStride 32
893 OpMemberDecorate %Input1 0 Offset 0
894 OpDecorate %Input1 BufferBlock
895 OpDecorate %input1 DescriptorSet 0
896 OpDecorate %input1 Binding 0
897 OpDecorate %_runtimearr_v4tinput_0 ArrayStride 32
898 OpMemberDecorate %Input2 0 Offset 0
899 OpDecorate %Input2 BufferBlock
900 OpDecorate %input2 DescriptorSet 0
901 OpDecorate %input2 Binding 1
902 %void = OpTypeVoid
903 %3 = OpTypeFunction %void
904 %int = OpTypeInt 32 1
905 %_ptr_Function_int = OpTypePointer Function %int
906 %int_0 = OpConstant %int 0
907 %niters = OpConstant %int ${ITERS}
908 %bool = OpTypeBool
909 %v4int = OpTypeVector %int 4
910 %_runtimearr_v4int = OpTypeRuntimeArray %v4int
911 %Output1 = OpTypeStruct %_runtimearr_v4int
912 %_ptr_Uniform_Output1 = OpTypePointer Uniform %Output1
913 %output1 = OpVariable %_ptr_Uniform_Output1 Uniform
914 %tinput = ${OPTYPE}
915 %v4tinput = OpTypeVector %tinput 4
916 %_runtimearr_v4tinput = OpTypeRuntimeArray %v4tinput
917 %Input1 = OpTypeStruct %_runtimearr_v4tinput
918 %_ptr_Uniform_Input1 = OpTypePointer Uniform %Input1
919 %input1 = OpVariable %_ptr_Uniform_Input1 Uniform
920 %_ptr_Uniform_v4tinput = OpTypePointer Uniform %v4tinput
921 %_runtimearr_v4tinput_0 = OpTypeRuntimeArray %v4tinput
922 %Input2 = OpTypeStruct %_runtimearr_v4tinput_0
923 %_ptr_Uniform_Input2 = OpTypePointer Uniform %Input2
924 %input2 = OpVariable %_ptr_Uniform_Input2 Uniform
925 %v4bool = OpTypeVector %bool 4
926 %int_1 = OpConstant %int 1
927 %45 = OpConstantComposite %v4int %int_0 %int_0 %int_0 %int_0
928 %46 = OpConstantComposite %v4int %int_1 %int_1 %int_1 %int_1
929 %_ptr_Uniform_v4int = OpTypePointer Uniform %v4int
930 %main = OpFunction %void None %3
931 %5 = OpLabel
932 %i = OpVariable %_ptr_Function_int Function
933 OpStore %i %int_0
934 OpBranch %10
935 %10 = OpLabel
936 OpLoopMerge %12 %13 None
937 OpBranch %14
938 %14 = OpLabel
939 %15 = OpLoad %int %i
940 %18 = OpSLessThan %bool %15 %niters
941 OpBranchConditional %18 %11 %12
942 %11 = OpLabel
943 %24 = OpLoad %int %i
944 %31 = OpLoad %int %i
945 %33 = OpAccessChain %_ptr_Uniform_v4tinput %input1 %int_0 %31
946 %34 = OpLoad %v4tinput %33
947 %39 = OpLoad %int %i
948 %40 = OpAccessChain %_ptr_Uniform_v4tinput %input2 %int_0 %39
949 %41 = OpLoad %v4tinput %40
950 %43 = ${OPNAME} %v4bool %34 %41
951 %47 = OpSelect %v4int %43 %46 %45
952 %49 = OpAccessChain %_ptr_Uniform_v4int %output1 %int_0 %24
953 OpStore %49 %47
954 OpBranch %13
955 %13 = OpLabel
956 %50 = OpLoad %int %i
957 %51 = OpIAdd %int %50 %int_1
958 OpStore %i %51
959 OpBranch %10
960 %12 = OpLabel
961 OpReturn
962 OpFunctionEnd
963 )");
964
965 struct SpirvTemplateManager
966 {
getTemplatevkt::SpirVAssembly::__anon69da27910111::SpirvTemplateManager967 static const tcu::StringTemplate& getTemplate (DataType type, vk::VkShaderStageFlagBits stage)
968 {
969 DE_ASSERT(type == DATA_TYPE_SINGLE || type == DATA_TYPE_VECTOR);
970 DE_ASSERT( stage == vk::VK_SHADER_STAGE_COMPUTE_BIT ||
971 stage == vk::VK_SHADER_STAGE_VERTEX_BIT ||
972 stage == vk::VK_SHADER_STAGE_FRAGMENT_BIT );
973
974 if (type == DATA_TYPE_SINGLE)
975 {
976 if (stage == vk::VK_SHADER_STAGE_COMPUTE_BIT) return CompShaderSingle;
977 if (stage == vk::VK_SHADER_STAGE_VERTEX_BIT) return VertShaderSingle;
978 else return FragShaderSingle;
979 }
980 else
981 {
982 if (stage == vk::VK_SHADER_STAGE_COMPUTE_BIT) return CompShaderVector;
983 if (stage == vk::VK_SHADER_STAGE_VERTEX_BIT) return VertShaderVector;
984 else return FragShaderVector;
985 }
986 }
987
988 // Specialized below for different types.
989 template <class T>
990 static std::string getOpCapability();
991
992 // Same.
993 template <class T>
994 static std::string getOpType();
995
996 // Return the capabilities, extensions and execution modes for NaN preservation.
997 static std::string getNanCapability (bool preserve);
998 static std::string getNanExtension (bool preserve);
999 static std::string getNanExeMode (bool preserve);
1000 };
1001
getOpCapability()1002 template <> std::string SpirvTemplateManager::getOpCapability<double>() { return "OpCapability Float64"; }
getOpCapability()1003 template <> std::string SpirvTemplateManager::getOpCapability<deInt64>() { return "OpCapability Int64"; }
getOpCapability()1004 template <> std::string SpirvTemplateManager::getOpCapability<deUint64>() { return "OpCapability Int64"; }
1005
getOpType()1006 template <> std::string SpirvTemplateManager::getOpType<double>() { return "OpTypeFloat 64"; }
getOpType()1007 template <> std::string SpirvTemplateManager::getOpType<deInt64>() { return "OpTypeInt 64 1"; }
getOpType()1008 template <> std::string SpirvTemplateManager::getOpType<deUint64>() { return "OpTypeInt 64 0"; }
1009
getNanCapability(bool preserve)1010 std::string SpirvTemplateManager::getNanCapability (bool preserve)
1011 {
1012 return (preserve ? "OpCapability SignedZeroInfNanPreserve" : "");
1013 }
1014
getNanExtension(bool preserve)1015 std::string SpirvTemplateManager::getNanExtension (bool preserve)
1016 {
1017 return (preserve ? "OpExtension \"SPV_KHR_float_controls\"" : "");
1018 }
1019
getNanExeMode(bool preserve)1020 std::string SpirvTemplateManager::getNanExeMode (bool preserve)
1021 {
1022 return (preserve ? "OpExecutionMode %main SignedZeroInfNanPreserve 64" : "");
1023 }
1024
1025 struct BufferWithMemory
1026 {
1027 vk::Move<vk::VkBuffer> buffer;
1028 de::MovePtr<vk::Allocation> allocation;
1029
BufferWithMemoryvkt::SpirVAssembly::__anon69da27910111::BufferWithMemory1030 BufferWithMemory ()
1031 : buffer(), allocation()
1032 {}
1033
BufferWithMemoryvkt::SpirVAssembly::__anon69da27910111::BufferWithMemory1034 BufferWithMemory (BufferWithMemory&& other)
1035 : buffer(other.buffer), allocation(other.allocation)
1036 {}
1037
operator =vkt::SpirVAssembly::__anon69da27910111::BufferWithMemory1038 BufferWithMemory& operator= (BufferWithMemory&& other)
1039 {
1040 buffer = other.buffer;
1041 allocation = other.allocation;
1042 return *this;
1043 }
1044 };
1045
1046 // Create storage buffer, bind memory to it and return both things.
createStorageBuffer(const vk::DeviceInterface & vkdi,const vk::VkDevice device,vk::Allocator & allocator,size_t numBytes)1047 BufferWithMemory createStorageBuffer(const vk::DeviceInterface& vkdi,
1048 const vk::VkDevice device,
1049 vk::Allocator& allocator,
1050 size_t numBytes)
1051 {
1052 const vk::VkBufferCreateInfo bufferCreateInfo =
1053 {
1054 vk::VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, // sType
1055 DE_NULL, // pNext
1056 0u, // flags
1057 numBytes, // size
1058 vk::VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, // usage
1059 vk::VK_SHARING_MODE_EXCLUSIVE, // sharingMode
1060 0u, // queueFamilyCount
1061 DE_NULL, // pQueueFamilyIndices
1062 };
1063
1064 BufferWithMemory bufmem;
1065
1066 bufmem.buffer = vk::createBuffer(vkdi, device, &bufferCreateInfo);
1067 const vk::VkMemoryRequirements requirements = getBufferMemoryRequirements(vkdi, device, *bufmem.buffer);
1068 bufmem.allocation = allocator.allocate(requirements, vk::MemoryRequirement::HostVisible);
1069
1070 VK_CHECK(vkdi.bindBufferMemory(device, *bufmem.buffer, bufmem.allocation->getMemory(), bufmem.allocation->getOffset()));
1071
1072 return bufmem;
1073 }
1074
1075 // Make sure the length of the following vectors is a multiple of 4. This will make sure operands can be reused for vectorized tests.
1076 const OperandsVector<double> DOUBLE_OPERANDS =
1077 {
1078 { -8.0, -5.0 },
1079 { -5.0, -8.0 },
1080 { -5.0, -5.0 },
1081 { -5.0, 0.0 },
1082 { 0.0, -5.0 },
1083 { 5.0, 0.0 },
1084 { 0.0, 5.0 },
1085 { 0.0, 0.0 },
1086 { -5.0, 5.0 },
1087 { 5.0, -5.0 },
1088 { 5.0, 8.0 },
1089 { 8.0, 5.0 },
1090 { 5.0, 5.0 },
1091 { -6.0, -5.0 },
1092 { 6.0, 5.0 },
1093 { 0.0, 1.0 },
1094 { 1.0, 0.0 },
1095 { 0.0, NAN },
1096 { NAN, 0.0 },
1097 { NAN, NAN },
1098 };
1099
1100 const OperandsVector<deInt64> INT64_OPERANDS =
1101 {
1102 { -8, -5 },
1103 { -5, -8 },
1104 { -5, -5 },
1105 { -5, 0 },
1106 { 0, -5 },
1107 { 5, 0 },
1108 { 0, 5 },
1109 { 0, 0 },
1110 { -5, 5 },
1111 { 5, -5 },
1112 { 5, 8 },
1113 { 8, 5 },
1114 { 5, 5 },
1115 { -6, -5 },
1116 { 6, 5 },
1117 { 0, 1 },
1118 };
1119
1120 constexpr auto MAX_DEUINT64 = std::numeric_limits<deUint64>::max();
1121 const OperandsVector<deUint64> UINT64_OPERANDS =
1122 {
1123 { 0, 0 },
1124 { 1, 0 },
1125 { 0, 1 },
1126 { 1, 1 },
1127 { 5, 8 },
1128 { 8, 5 },
1129 { 5, 5 },
1130 { 0, MAX_DEUINT64 },
1131 { MAX_DEUINT64, 0 },
1132 { MAX_DEUINT64 - 1, MAX_DEUINT64 },
1133 { MAX_DEUINT64, MAX_DEUINT64 - 1 },
1134 { MAX_DEUINT64, MAX_DEUINT64 },
1135 };
1136
1137 template <class T>
1138 class T64bitCompareTestInstance : public TestInstance
1139 {
1140 public:
1141 T64bitCompareTestInstance (Context& ctx, const TestParameters<T>& params);
1142 tcu::TestStatus iterate (void);
1143
1144 private:
1145 const TestParameters<T> m_params;
1146 const size_t m_numOperations;
1147 const size_t m_inputBufferSize;
1148 const size_t m_outputBufferSize;
1149 };
1150
1151 template <class T>
T64bitCompareTestInstance(Context & ctx,const TestParameters<T> & params)1152 T64bitCompareTestInstance<T>::T64bitCompareTestInstance (Context& ctx, const TestParameters<T>& params)
1153 : TestInstance(ctx)
1154 , m_params(params)
1155 , m_numOperations(m_params.operands.size())
1156 , m_inputBufferSize(m_numOperations * sizeof(T))
1157 , m_outputBufferSize(m_numOperations * sizeof(int))
1158 {
1159 }
1160
1161 template<class T>
genericIsNan(T)1162 bool genericIsNan (T)
1163 {
1164 return false;
1165 }
1166
1167 template<>
genericIsNan(double value)1168 bool genericIsNan<double> (double value)
1169 {
1170 return std::isnan(value);
1171 }
1172
1173 template <class T>
iterate(void)1174 tcu::TestStatus T64bitCompareTestInstance<T>::iterate (void)
1175 {
1176 DE_ASSERT(m_params.stage == vk::VK_SHADER_STAGE_COMPUTE_BIT ||
1177 m_params.stage == vk::VK_SHADER_STAGE_VERTEX_BIT ||
1178 m_params.stage == vk::VK_SHADER_STAGE_FRAGMENT_BIT );
1179
1180 auto& vkdi = m_context.getDeviceInterface();
1181 auto device = m_context.getDevice();
1182 auto& allocator = m_context.getDefaultAllocator();
1183
1184 // Create storage buffers (left operands, right operands and results buffer).
1185 BufferWithMemory input1 = createStorageBuffer(vkdi, device, allocator, m_inputBufferSize);
1186 BufferWithMemory input2 = createStorageBuffer(vkdi, device, allocator, m_inputBufferSize);
1187 BufferWithMemory output1 = createStorageBuffer(vkdi, device, allocator, m_outputBufferSize);
1188
1189 // Create an array of buffers.
1190 std::vector<vk::VkBuffer> buffers;
1191 buffers.push_back(input1.buffer.get());
1192 buffers.push_back(input2.buffer.get());
1193 buffers.push_back(output1.buffer.get());
1194
1195 // Create descriptor set layout.
1196 std::vector<vk::VkDescriptorSetLayoutBinding> bindings;
1197 for (size_t i = 0; i < buffers.size(); ++i)
1198 {
1199 vk::VkDescriptorSetLayoutBinding binding =
1200 {
1201 static_cast<deUint32>(i), // uint32_t binding;
1202 vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, // VkDescriptorType descriptorType;
1203 1u, // uint32_t descriptorCount;
1204 static_cast<vk::VkShaderStageFlags>(m_params.stage), // VkShaderStageFlags stageFlags;
1205 DE_NULL // const VkSampler* pImmutableSamplers;
1206 };
1207 bindings.push_back(binding);
1208 }
1209
1210 const vk::VkDescriptorSetLayoutCreateInfo layoutCreateInfo =
1211 {
1212 vk::VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, // VkStructureType sType;
1213 DE_NULL, // const void* pNext;
1214 0, // VkDescriptorSetLayoutCreateFlags flags;
1215 static_cast<deUint32>(bindings.size()), // uint32_t bindingCount;
1216 bindings.data() // const VkDescriptorSetLayoutBinding* pBindings;
1217 };
1218 auto descriptorSetLayout = vk::createDescriptorSetLayout(vkdi, device, &layoutCreateInfo);
1219
1220 // Create descriptor set.
1221 vk::DescriptorPoolBuilder poolBuilder;
1222 poolBuilder.addType(bindings[0].descriptorType, static_cast<deUint32>(bindings.size()));
1223 auto descriptorPool = poolBuilder.build(vkdi, device, vk::VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
1224
1225 const vk::VkDescriptorSetAllocateInfo allocateInfo =
1226 {
1227 vk::VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, // VkStructureType sType;
1228 DE_NULL, // const void* pNext;
1229 *descriptorPool, // VkDescriptorPool descriptorPool;
1230 1u, // uint32_t descriptorSetCount;
1231 &descriptorSetLayout.get() // const VkDescriptorSetLayout* pSetLayouts;
1232 };
1233 auto descriptorSet = vk::allocateDescriptorSet(vkdi, device, &allocateInfo);
1234
1235 // Update descriptor set.
1236 std::vector<vk::VkDescriptorBufferInfo> descriptorBufferInfos;
1237 std::vector<vk::VkWriteDescriptorSet> descriptorWrites;
1238
1239 descriptorBufferInfos.reserve(buffers.size());
1240 descriptorWrites.reserve(buffers.size());
1241
1242 for (size_t i = 0; i < buffers.size(); ++i)
1243 {
1244 vk::VkDescriptorBufferInfo bufferInfo =
1245 {
1246 buffers[i], // VkBuffer buffer;
1247 0u, // VkDeviceSize offset;
1248 VK_WHOLE_SIZE, // VkDeviceSize range;
1249 };
1250 descriptorBufferInfos.push_back(bufferInfo);
1251 }
1252
1253 for (size_t i = 0; i < buffers.size(); ++i)
1254 {
1255 vk::VkWriteDescriptorSet write =
1256 {
1257 vk::VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, // VkStructureType sType;
1258 DE_NULL, // const void* pNext;
1259 *descriptorSet, // VkDescriptorSet dstSet;
1260 static_cast<deUint32>(i), // uint32_t dstBinding;
1261 0u, // uint32_t dstArrayElement;
1262 1u, // uint32_t descriptorCount;
1263 bindings[i].descriptorType, // VkDescriptorType descriptorType;
1264 DE_NULL, // const VkDescriptorImageInfo* pImageInfo;
1265 &descriptorBufferInfos[i], // const VkDescriptorBufferInfo* pBufferInfo;
1266 DE_NULL, // const VkBufferView* pTexelBufferView;
1267 };
1268 descriptorWrites.push_back(write);
1269 }
1270 vkdi.updateDescriptorSets(device, static_cast<deUint32>(descriptorWrites.size()), descriptorWrites.data(), 0u, DE_NULL);
1271
1272 // Fill storage buffers with data. Note: VkPhysicalDeviceLimits.minMemoryMapAlignment guarantees this cast is safe.
1273 T* input1Ptr = reinterpret_cast<T*> (input1.allocation->getHostPtr());
1274 T* input2Ptr = reinterpret_cast<T*> (input2.allocation->getHostPtr());
1275 int* output1Ptr = reinterpret_cast<int*> (output1.allocation->getHostPtr());
1276
1277 for (size_t i = 0; i < m_numOperations; ++i)
1278 {
1279 input1Ptr[i] = m_params.operands[i].first;
1280 input2Ptr[i] = m_params.operands[i].second;
1281 output1Ptr[i] = -9;
1282 }
1283
1284 // Flush memory.
1285 vk::flushAlloc(vkdi, device, *input1.allocation);
1286 vk::flushAlloc(vkdi, device, *input2.allocation);
1287 vk::flushAlloc(vkdi, device, *output1.allocation);
1288
1289 // Prepare barriers in advance so data is visible to the shaders and the host.
1290 std::vector<vk::VkBufferMemoryBarrier> hostToDevBarriers;
1291 std::vector<vk::VkBufferMemoryBarrier> devToHostBarriers;
1292 for (size_t i = 0; i < buffers.size(); ++i)
1293 {
1294 const vk::VkBufferMemoryBarrier hostDev =
1295 {
1296 vk::VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, // VkStructureType sType;
1297 DE_NULL, // const void* pNext;
1298 vk::VK_ACCESS_HOST_WRITE_BIT, // VkAccessFlags srcAccessMask;
1299 (vk::VK_ACCESS_SHADER_READ_BIT | vk::VK_ACCESS_SHADER_WRITE_BIT), // VkAccessFlags dstAccessMask;
1300 VK_QUEUE_FAMILY_IGNORED, // deUint32 srcQueueFamilyIndex;
1301 VK_QUEUE_FAMILY_IGNORED, // deUint32 dstQueueFamilyIndex;
1302 buffers[i], // VkBuffer buffer;
1303 0u, // VkDeviceSize offset;
1304 VK_WHOLE_SIZE, // VkDeviceSize size;
1305 };
1306 hostToDevBarriers.push_back(hostDev);
1307
1308 const vk::VkBufferMemoryBarrier devHost =
1309 {
1310 vk::VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, // VkStructureType sType;
1311 DE_NULL, // const void* pNext;
1312 vk::VK_ACCESS_SHADER_WRITE_BIT, // VkAccessFlags srcAccessMask;
1313 vk::VK_ACCESS_HOST_READ_BIT, // VkAccessFlags dstAccessMask;
1314 VK_QUEUE_FAMILY_IGNORED, // deUint32 srcQueueFamilyIndex;
1315 VK_QUEUE_FAMILY_IGNORED, // deUint32 dstQueueFamilyIndex;
1316 buffers[i], // VkBuffer buffer;
1317 0u, // VkDeviceSize offset;
1318 VK_WHOLE_SIZE, // VkDeviceSize size;
1319 };
1320 devToHostBarriers.push_back(devHost);
1321 }
1322
1323 // Create command pool and command buffer.
1324 auto queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
1325
1326 const vk::VkCommandPoolCreateInfo cmdPoolCreateInfo =
1327 {
1328 vk::VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO, // VkStructureType sType;
1329 DE_NULL, // const void* pNext;
1330 vk::VK_COMMAND_POOL_CREATE_TRANSIENT_BIT, // VkCommandPoolCreateFlags flags;
1331 queueFamilyIndex, // deUint32 queueFamilyIndex;
1332 };
1333 auto cmdPool = vk::createCommandPool(vkdi, device, &cmdPoolCreateInfo);
1334
1335 const vk::VkCommandBufferAllocateInfo cmdBufferAllocateInfo =
1336 {
1337 vk::VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO, // VkStructureType sType;
1338 DE_NULL, // const void* pNext;
1339 *cmdPool, // VkCommandPool commandPool;
1340 vk::VK_COMMAND_BUFFER_LEVEL_PRIMARY, // VkCommandBufferLevel level;
1341 1u, // deUint32 commandBufferCount;
1342 };
1343 auto cmdBuffer = vk::allocateCommandBuffer(vkdi, device, &cmdBufferAllocateInfo);
1344
1345 // Create pipeline layout.
1346 const vk::VkPipelineLayoutCreateInfo pipelineLayoutCreateInfo =
1347 {
1348 vk::VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, // VkStructureType sType;
1349 DE_NULL, // const void* pNext;
1350 0, // VkPipelineLayoutCreateFlags flags;
1351 1u, // deUint32 setLayoutCount;
1352 &descriptorSetLayout.get(), // const VkDescriptorSetLayout* pSetLayouts;
1353 0u, // deUint32 pushConstantRangeCount;
1354 DE_NULL, // const VkPushConstantRange* pPushConstantRanges;
1355 };
1356 auto pipelineLayout = vk::createPipelineLayout(vkdi, device, &pipelineLayoutCreateInfo);
1357
1358 if (m_params.stage == vk::VK_SHADER_STAGE_COMPUTE_BIT)
1359 {
1360 // Create compute pipeline.
1361 auto compShaderModule = createShaderModule(vkdi, device, m_context.getBinaryCollection().get("comp"));
1362
1363 const vk::VkComputePipelineCreateInfo computeCreateInfo =
1364 {
1365 vk::VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, // VkStructureType sType;
1366 DE_NULL, // const void* pNext;
1367 0, // VkPipelineCreateFlags flags;
1368 { // VkPipelineShaderStageCreateInfo stage;
1369 vk::VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, // VkStructureType sType;
1370 DE_NULL, // const void* pNext;
1371 0, // VkPipelineShaderStageCreateFlags flags;
1372 vk::VK_SHADER_STAGE_COMPUTE_BIT, // VkShaderStageFlagBits stage;
1373 *compShaderModule, // VkShaderModule module;
1374 "main", // const char* pName;
1375 DE_NULL, // const VkSpecializationInfo* pSpecializationInfo;
1376 },
1377 *pipelineLayout, // VkPipelineLayout layout;
1378 DE_NULL, // VkPipeline basePipelineHandle;
1379 0, // int32_t basePipelineIndex;
1380 };
1381 auto computePipeline = vk::createComputePipeline(vkdi, device, DE_NULL, &computeCreateInfo);
1382
1383 // Run the shader.
1384 vk::beginCommandBuffer(vkdi, *cmdBuffer);
1385 vkdi.cmdBindPipeline(*cmdBuffer, vk::VK_PIPELINE_BIND_POINT_COMPUTE, *computePipeline);
1386 vkdi.cmdBindDescriptorSets(*cmdBuffer, vk::VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0, 1u, &descriptorSet.get(), 0u, DE_NULL);
1387 vkdi.cmdPipelineBarrier(*cmdBuffer, vk::VK_PIPELINE_STAGE_HOST_BIT, vk::VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0u, DE_NULL, static_cast<deUint32>(hostToDevBarriers.size()), hostToDevBarriers.data(), 0u, DE_NULL);
1388 vkdi.cmdDispatch(*cmdBuffer, 1u, 1u, 1u);
1389 vkdi.cmdPipelineBarrier(*cmdBuffer, vk::VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, vk::VK_PIPELINE_STAGE_HOST_BIT, 0, 0u, DE_NULL, static_cast<deUint32>(devToHostBarriers.size()), devToHostBarriers.data(), 0u, DE_NULL);
1390 vk::endCommandBuffer(vkdi, *cmdBuffer);
1391 vk::submitCommandsAndWait(vkdi, device, m_context.getUniversalQueue(), *cmdBuffer);
1392 }
1393 else if (m_params.stage == vk::VK_SHADER_STAGE_VERTEX_BIT ||
1394 m_params.stage == vk::VK_SHADER_STAGE_FRAGMENT_BIT )
1395 {
1396 const bool isFrag = (m_params.stage == vk::VK_SHADER_STAGE_FRAGMENT_BIT);
1397
1398 // Create graphics pipeline.
1399 auto vertShaderModule = createShaderModule(vkdi, device, m_context.getBinaryCollection().get("vert"));
1400 vk::Move<vk::VkShaderModule> fragShaderModule;
1401 std::vector<vk::VkPipelineShaderStageCreateInfo> shaderStages;
1402
1403 const vk::VkPipelineShaderStageCreateInfo vertexStage =
1404 {
1405 vk::VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, // VkStructureType sType;
1406 DE_NULL, // const void* pNext;
1407 0, // VkPipelineShaderStageCreateFlags flags;
1408 vk::VK_SHADER_STAGE_VERTEX_BIT, // VkShaderStageFlagBits stage;
1409 *vertShaderModule, // VkShaderModule module;
1410 "main", // const char* pName;
1411 DE_NULL, // const VkSpecializationInfo* pSpecializationInfo;
1412 };
1413 shaderStages.push_back(vertexStage);
1414
1415 if (isFrag)
1416 {
1417 fragShaderModule = createShaderModule(vkdi, device, m_context.getBinaryCollection().get("frag"));
1418
1419 const vk::VkPipelineShaderStageCreateInfo fragmentStage =
1420 {
1421 vk::VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, // VkStructureType sType;
1422 DE_NULL, // const void* pNext;
1423 0, // VkPipelineShaderStageCreateFlags flags;
1424 vk::VK_SHADER_STAGE_FRAGMENT_BIT, // VkShaderStageFlagBits stage;
1425 *fragShaderModule, // VkShaderModule module;
1426 "main", // const char* pName;
1427 DE_NULL, // const VkSpecializationInfo* pSpecializationInfo;
1428 };
1429 shaderStages.push_back(fragmentStage);
1430 }
1431
1432 const vk::VkPipelineVertexInputStateCreateInfo vertexInputInfo =
1433 {
1434 vk::VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, // VkStructureType sType;
1435 DE_NULL, // const void* pNext;
1436 0, // VkPipelineVertexInputStateCreateFlags flags;
1437 0u, // deUint32 vertexBindingDescriptionCount;
1438 DE_NULL, // const VkVertexInputBindingDescription* pVertexBindingDescriptions;
1439 0u, // deUint32 vertexAttributeDescriptionCount;
1440 DE_NULL, // const VkVertexInputAttributeDescription* pVertexAttributeDescriptions;
1441 };
1442
1443 const vk::VkPipelineInputAssemblyStateCreateInfo inputAssembly =
1444 {
1445 vk::VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, // VkStructureType sType;
1446 DE_NULL, // const void* pNext;
1447 0u, // VkPipelineInputAssemblyStateCreateFlags flags;
1448 vk::VK_PRIMITIVE_TOPOLOGY_POINT_LIST, // VkPrimitiveTopology topology;
1449 VK_FALSE, // VkBool32 primitiveRestartEnable;
1450 };
1451
1452 const vk::VkPipelineRasterizationStateCreateInfo rasterizationState =
1453 {
1454 vk::VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, // VkStructureType sType;
1455 DE_NULL, // const void* pNext;
1456 0, // VkPipelineRasterizationStateCreateFlags flags;
1457 VK_FALSE, // VkBool32 depthClampEnable;
1458 (isFrag ? VK_FALSE : VK_TRUE), // VkBool32 rasterizerDiscardEnable;
1459 vk::VK_POLYGON_MODE_FILL, // VkPolygonMode polygonMode;
1460 vk::VK_CULL_MODE_NONE, // VkCullModeFlags cullMode;
1461 vk::VK_FRONT_FACE_COUNTER_CLOCKWISE, // VkFrontFace frontFace;
1462 VK_FALSE, // VkBool32 depthBiasEnable;
1463 0.0f, // float depthBiasConstantFactor;
1464 0.0f, // float depthBiasClamp;
1465 0.0f, // float depthBiasSlopeFactor;
1466 1.0f, // float lineWidth;
1467 };
1468
1469 const vk::VkSubpassDescription subpassDescription =
1470 {
1471 0, // VkSubpassDescriptionFlags flags;
1472 vk::VK_PIPELINE_BIND_POINT_GRAPHICS, // VkPipelineBindPoint pipelineBindPoint;
1473 0u, // deUint32 inputAttachmentCount;
1474 DE_NULL, // const VkAttachmentReference* pInputAttachments;
1475 0u, // deUint32 colorAttachmentCount;
1476 DE_NULL, // const VkAttachmentReference* pColorAttachments;
1477 DE_NULL, // const VkAttachmentReference* pResolveAttachments;
1478 DE_NULL, // const VkAttachmentReference* pDepthStencilAttachment;
1479 0u, // deUint32 preserveAttachmentCount;
1480 0u, // const deUint32* pPreserveAttachments;
1481 };
1482
1483 const vk::VkRenderPassCreateInfo renderPassCreateInfo =
1484 {
1485 vk::VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, // VkStructureType sType;
1486 DE_NULL, // const void* pNext;
1487 0, // VkRenderPassCreateFlags flags;
1488 0u, // deUint32 attachmentCount;
1489 DE_NULL, // const VkAttachmentDescription* pAttachments;
1490 1u, // deUint32 subpassCount;
1491 &subpassDescription, // const VkSubpassDescription* pSubpasses;
1492 0u, // deUint32 dependencyCount;
1493 DE_NULL, // const VkSubpassDependency* pDependencies;
1494 };
1495 auto renderPass = vk::createRenderPass(vkdi, device, &renderPassCreateInfo);
1496
1497 std::unique_ptr<vk::VkPipelineMultisampleStateCreateInfo> multisampleState;
1498 if (isFrag)
1499 {
1500 multisampleState.reset(new vk::VkPipelineMultisampleStateCreateInfo);
1501 *multisampleState =
1502 {
1503 vk::VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, // VkStructureType sType;
1504 DE_NULL, // const void* pNext;
1505 0, // VkPipelineMultisampleStateCreateFlags flags;
1506 vk::VK_SAMPLE_COUNT_1_BIT, // VkSampleCountFlagBits rasterizationSamples;
1507 VK_FALSE, // VkBool32 sampleShadingEnable;
1508 0.0f, // float minSampleShading;
1509 DE_NULL, // const VkSampleMask* pSampleMask;
1510 VK_FALSE, // VkBool32 alphaToCoverageEnable;
1511 VK_FALSE, // VkBool32 alphaToOneEnable;
1512 };
1513 }
1514
1515 const vk::VkViewport viewport =
1516 {
1517 0.0f, // float x;
1518 0.0f, // float y;
1519 1.0f, // float width;
1520 1.0f, // float height;
1521 0.0f, // float minDepth;
1522 1.0f, // float maxDepth;
1523 };
1524
1525 const vk::VkRect2D renderArea = { { 0u, 0u }, { 1u, 1u } };
1526
1527 std::unique_ptr<vk::VkPipelineViewportStateCreateInfo> viewportState;
1528
1529 if (isFrag)
1530 {
1531 viewportState.reset(new vk::VkPipelineViewportStateCreateInfo);
1532 *viewportState =
1533 {
1534 vk::VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, // VkStructureType sType;
1535 DE_NULL, // const void* pNext;
1536 0, // VkPipelineViewportStateCreateFlags flags;
1537 1u, // deUint32 viewportCount;
1538 &viewport, // const VkViewport* pViewports;
1539 1u, // deUint32 scissorCount;
1540 &renderArea, // const VkRect2D* pScissors;
1541 };
1542 }
1543
1544 const vk::VkGraphicsPipelineCreateInfo graphicsCreateInfo =
1545 {
1546 vk::VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, // VkStructureType sType;
1547 DE_NULL, // const void* pNext;
1548 0, // VkPipelineCreateFlags flags;
1549 static_cast<deUint32>(shaderStages.size()), // deUint32 stageCount;
1550 shaderStages.data(), // const VkPipelineShaderStageCreateInfo* pStages;
1551 &vertexInputInfo, // const VkPipelineVertexInputStateCreateInfo* pVertexInputState;
1552 &inputAssembly, // const VkPipelineInputAssemblyStateCreateInfo* pInputAssemblyState;
1553 DE_NULL, // const VkPipelineTessellationStateCreateInfo* pTessellationState;
1554 viewportState.get(), // const VkPipelineViewportStateCreateInfo* pViewportState;
1555 &rasterizationState, // const VkPipelineRasterizationStateCreateInfo* pRasterizationState;
1556 multisampleState.get(), // const VkPipelineMultisampleStateCreateInfo* pMultisampleState;
1557 DE_NULL, // const VkPipelineDepthStencilStateCreateInfo* pDepthStencilState;
1558 DE_NULL, // const VkPipelineColorBlendStateCreateInfo* pColorBlendState;
1559 DE_NULL, // const VkPipelineDynamicStateCreateInfo* pDynamicState;
1560 *pipelineLayout, // VkPipelineLayout layout;
1561 *renderPass, // VkRenderPass renderPass;
1562 0u, // deUint32 subpass;
1563 DE_NULL, // VkPipeline basePipelineHandle;
1564 0u, // deInt32 basePipelineIndex;
1565 };
1566 auto graphicsPipeline = vk::createGraphicsPipeline(vkdi, device, DE_NULL, &graphicsCreateInfo);
1567
1568 const vk::VkFramebufferCreateInfo frameBufferCreateInfo =
1569 {
1570 vk::VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, // VkStructureType sType;
1571 DE_NULL, // const void* pNext;
1572 0, // VkFramebufferCreateFlags flags;
1573 *renderPass, // VkRenderPass renderPass;
1574 0u, // deUint32 attachmentCount;
1575 DE_NULL, // const VkImageView* pAttachments;
1576 1u, // deUint32 width;
1577 1u, // deUint32 height;
1578 1u, // deUint32 layers;
1579 };
1580 auto frameBuffer = vk::createFramebuffer(vkdi, device, &frameBufferCreateInfo);
1581
1582 const vk::VkRenderPassBeginInfo renderPassBeginInfo =
1583 {
1584 vk::VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, // VkStructureType sType;
1585 DE_NULL, // const void* pNext;
1586 *renderPass, // VkRenderPass renderPass;
1587 *frameBuffer, // VkFramebuffer framebuffer;
1588 renderArea, // VkRect2D renderArea;
1589 0u, // deUint32 clearValueCount;
1590 DE_NULL, // const VkClearValue* pClearValues;
1591 };
1592
1593 // Run the shader.
1594 vk::VkPipelineStageFlags pipelineStage = (isFrag ? vk::VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT : vk::VK_PIPELINE_STAGE_VERTEX_SHADER_BIT);
1595
1596 vk::beginCommandBuffer(vkdi, *cmdBuffer);
1597 vkdi.cmdPipelineBarrier(*cmdBuffer, vk::VK_PIPELINE_STAGE_HOST_BIT, pipelineStage, 0, 0u, DE_NULL, static_cast<deUint32>(hostToDevBarriers.size()), hostToDevBarriers.data(), 0u, DE_NULL);
1598 vkdi.cmdBeginRenderPass(*cmdBuffer, &renderPassBeginInfo, vk::VK_SUBPASS_CONTENTS_INLINE);
1599 vkdi.cmdBindPipeline(*cmdBuffer, vk::VK_PIPELINE_BIND_POINT_GRAPHICS, *graphicsPipeline);
1600 vkdi.cmdBindDescriptorSets(*cmdBuffer, vk::VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0, 1u, &descriptorSet.get(), 0u, DE_NULL);
1601 vkdi.cmdDraw(*cmdBuffer, 1u, 1u, 0u, 0u);
1602 vkdi.cmdEndRenderPass(*cmdBuffer);
1603 vkdi.cmdPipelineBarrier(*cmdBuffer, pipelineStage, vk::VK_PIPELINE_STAGE_HOST_BIT, 0, 0u, DE_NULL, static_cast<deUint32>(devToHostBarriers.size()), devToHostBarriers.data(), 0u, DE_NULL);
1604 vk::endCommandBuffer(vkdi, *cmdBuffer);
1605 vk::submitCommandsAndWait(vkdi, device, m_context.getUniversalQueue(), *cmdBuffer);
1606 }
1607
1608 // Invalidate allocations.
1609 vk::invalidateAlloc(vkdi, device, *input1.allocation);
1610 vk::invalidateAlloc(vkdi, device, *input2.allocation);
1611 vk::invalidateAlloc(vkdi, device, *output1.allocation);
1612
1613 // Read and verify results.
1614 std::vector<int> results(m_numOperations);
1615 deMemcpy(results.data(), output1.allocation->getHostPtr(), m_outputBufferSize);
1616 for (size_t i = 0; i < m_numOperations; ++i)
1617 {
1618 int expected = static_cast<int>(m_params.operation.run(m_params.operands[i].first, m_params.operands[i].second));
1619 if (results[i] != expected && (m_params.requireNanPreserve || (!genericIsNan<T>(m_params.operands[i].first) && !genericIsNan<T>(m_params.operands[i].second))))
1620 {
1621 std::ostringstream msg;
1622 msg << "Invalid result found in position " << i << ": expected " << expected << " and found " << results[i];
1623 return tcu::TestStatus::fail(msg.str());
1624 }
1625 }
1626
1627 return tcu::TestStatus::pass("Pass");
1628 }
1629
1630 template <class T>
1631 class T64bitCompareTest : public TestCase
1632 {
1633 public:
1634 T64bitCompareTest (tcu::TestContext& testCtx, const std::string& name, const std::string& description, const TestParameters<T>& params);
1635 virtual void checkSupport (Context& context) const;
1636 virtual void initPrograms (vk::SourceCollections& programCollection) const;
1637 virtual TestInstance* createInstance (Context& ctx) const;
1638
1639 private:
1640 const TestParameters<T> m_params;
1641 };
1642
1643 template <class T>
T64bitCompareTest(tcu::TestContext & testCtx,const std::string & name,const std::string & description,const TestParameters<T> & params)1644 T64bitCompareTest<T>::T64bitCompareTest (tcu::TestContext& testCtx, const std::string& name, const std::string& description, const TestParameters<T>& params)
1645 : TestCase(testCtx, name, description), m_params(params)
1646 {
1647 // This is needed so that the same operands can be used for single-element comparisons or for vectorized comparisons (which use *vec4 types).
1648 DE_ASSERT(m_params.operands.size() % 4 == 0);
1649 }
1650
1651 // This template checks the needed type support features in shaders for type T.
1652 // Specializations are provided below.
1653 template <class T>
1654 void checkTypeSupport(const vk::VkPhysicalDeviceFeatures& features);
1655
1656 template <>
checkTypeSupport(const vk::VkPhysicalDeviceFeatures & features)1657 void checkTypeSupport<double>(const vk::VkPhysicalDeviceFeatures& features)
1658 {
1659 if (!features.shaderFloat64)
1660 TCU_THROW(NotSupportedError, "64-bit floats not supported in shaders");
1661 }
1662
check64bitIntegers(const vk::VkPhysicalDeviceFeatures & features)1663 void check64bitIntegers(const vk::VkPhysicalDeviceFeatures& features)
1664 {
1665 if (!features.shaderInt64)
1666 TCU_THROW(NotSupportedError, "64-bit integer types not supported in shaders");
1667 }
1668
1669 template <>
checkTypeSupport(const vk::VkPhysicalDeviceFeatures & features)1670 void checkTypeSupport<deInt64>(const vk::VkPhysicalDeviceFeatures& features)
1671 {
1672 check64bitIntegers(features);
1673 }
1674
1675 template <>
checkTypeSupport(const vk::VkPhysicalDeviceFeatures & features)1676 void checkTypeSupport<deUint64>(const vk::VkPhysicalDeviceFeatures& features)
1677 {
1678 check64bitIntegers(features);
1679 }
1680
1681 template <class T>
checkSupport(Context & context) const1682 void T64bitCompareTest<T>::checkSupport (Context& context) const
1683 {
1684 auto& vki = context.getInstanceInterface();
1685 auto physicalDevice = context.getPhysicalDevice();
1686 auto features = vk::getPhysicalDeviceFeatures(vki, physicalDevice);
1687
1688 checkTypeSupport<T>(features);
1689
1690 switch (m_params.stage)
1691 {
1692 case vk::VK_SHADER_STAGE_COMPUTE_BIT:
1693 break;
1694 case vk::VK_SHADER_STAGE_VERTEX_BIT:
1695 if (!features.vertexPipelineStoresAndAtomics)
1696 TCU_THROW(NotSupportedError, "Vertex shader does not support stores");
1697 break;
1698 case vk::VK_SHADER_STAGE_FRAGMENT_BIT:
1699 if (!features.fragmentStoresAndAtomics)
1700 TCU_THROW(NotSupportedError, "Fragment shader does not support stores");
1701 break;
1702 default:
1703 DE_ASSERT(DE_NULL == "Invalid shader stage specified");
1704 }
1705
1706 vk::VkPhysicalDeviceFloatControlsProperties fcFeatures;
1707 deMemset(&fcFeatures, 0, sizeof(fcFeatures));
1708 fcFeatures.shaderSignedZeroInfNanPreserveFloat64 = VK_TRUE;
1709
1710 const char *unused;
1711 if (m_params.requireNanPreserve && !isFloatControlsFeaturesSupported(context, fcFeatures, &unused))
1712 TCU_THROW(NotSupportedError, "NaN preservation not supported");
1713 }
1714
1715 template <class T>
initPrograms(vk::SourceCollections & programCollection) const1716 void T64bitCompareTest<T>::initPrograms (vk::SourceCollections& programCollection) const
1717 {
1718 DE_ASSERT(m_params.stage == vk::VK_SHADER_STAGE_COMPUTE_BIT ||
1719 m_params.stage == vk::VK_SHADER_STAGE_VERTEX_BIT ||
1720 m_params.stage == vk::VK_SHADER_STAGE_FRAGMENT_BIT );
1721
1722 std::map<std::string, std::string> replacements;
1723 replacements["ITERS"] = de::toString((m_params.dataType == DATA_TYPE_SINGLE) ? m_params.operands.size() : m_params.operands.size() / 4);
1724 replacements["OPNAME"] = m_params.operation.spirvName();
1725 replacements["OPCAPABILITY"] = SpirvTemplateManager::getOpCapability<T>();
1726 replacements["OPTYPE"] = SpirvTemplateManager::getOpType<T>();
1727 replacements["NANCAP"] = SpirvTemplateManager::getNanCapability(m_params.requireNanPreserve);
1728 replacements["NANEXT"] = SpirvTemplateManager::getNanExtension(m_params.requireNanPreserve);
1729 replacements["NANMODE"] = SpirvTemplateManager::getNanExeMode(m_params.requireNanPreserve);
1730
1731 static const std::map<vk::VkShaderStageFlagBits, std::string> sourceNames =
1732 {
1733 std::make_pair( vk::VK_SHADER_STAGE_COMPUTE_BIT, "comp" ),
1734 std::make_pair( vk::VK_SHADER_STAGE_VERTEX_BIT, "vert" ),
1735 std::make_pair( vk::VK_SHADER_STAGE_FRAGMENT_BIT, "frag" ),
1736 };
1737
1738 // Add the proper template under the proper name.
1739 programCollection.spirvAsmSources.add(sourceNames.find(m_params.stage)->second) << SpirvTemplateManager::getTemplate(m_params.dataType, m_params.stage).specialize(replacements);
1740
1741 // Add the passthrough vertex shader needed for the fragment shader.
1742 if (m_params.stage == vk::VK_SHADER_STAGE_FRAGMENT_BIT)
1743 programCollection.glslSources.add("vert") << glu::VertexSource(VertShaderPassThrough);
1744 }
1745
1746 template <class T>
createInstance(Context & ctx) const1747 TestInstance* T64bitCompareTest<T>::createInstance (Context& ctx) const
1748 {
1749 return new T64bitCompareTestInstance<T>(ctx, m_params);
1750 }
1751
1752 const std::map<bool, std::string> requireNanName =
1753 {
1754 std::make_pair( false, "nonan" ),
1755 std::make_pair( true, "withnan" ),
1756 };
1757
1758 const std::map<DataType, std::string> dataTypeName =
1759 {
1760 std::make_pair(DATA_TYPE_SINGLE, "single"),
1761 std::make_pair(DATA_TYPE_VECTOR, "vector"),
1762 };
1763
1764 using StageName = std::map<vk::VkShaderStageFlagBits, std::string>;
1765
createDoubleCompareTestsInGroup(tcu::TestCaseGroup * tests,const StageName * stageNames)1766 void createDoubleCompareTestsInGroup (tcu::TestCaseGroup* tests, const StageName* stageNames)
1767 {
1768 static const std::vector<const CompareOperation<double>*> operationList =
1769 {
1770 // Ordered operations.
1771 &FOrderedEqualOp,
1772 &FOrderedNotEqualOp,
1773 &FOrderedLessThanOp,
1774 &FOrderedLessThanEqualOp,
1775 &FOrderedGreaterThanOp,
1776 &FOrderedGreaterThanEqualOp,
1777 // Unordered operations.
1778 &FUnorderedEqualOp,
1779 &FUnorderedNotEqualOp,
1780 &FUnorderedLessThanOp,
1781 &FUnorderedLessThanEqualOp,
1782 &FUnorderedGreaterThanOp,
1783 &FUnorderedGreaterThanEqualOp,
1784 };
1785
1786 for (const auto& stageNamePair : *stageNames)
1787 for (const auto& typeNamePair : dataTypeName)
1788 for (const auto& requireNanPair : requireNanName)
1789 for (const auto opPtr : operationList)
1790 {
1791 TestParameters<double> params = { typeNamePair.first, *opPtr, stageNamePair.first, DOUBLE_OPERANDS, requireNanPair.first };
1792 std::string testName = stageNamePair.second + "_" + de::toLower(opPtr->spirvName()) + "_" + requireNanPair.second + "_" + typeNamePair.second;
1793 tests->addChild(new T64bitCompareTest<double>(tests->getTestContext(), testName, "", params));
1794 }
1795 }
1796
createInt64CompareTestsInGroup(tcu::TestCaseGroup * tests,const StageName * stageNames)1797 void createInt64CompareTestsInGroup (tcu::TestCaseGroup* tests, const StageName* stageNames)
1798 {
1799 static const std::vector<const CompareOperation<deInt64>*> operationList =
1800 {
1801 &deInt64EqualOp,
1802 &deInt64NotEqualOp,
1803 &deInt64LessThanOp,
1804 &deInt64LessThanEqualOp,
1805 &deInt64GreaterThanOp,
1806 &deInt64GreaterThanEqualOp,
1807 };
1808
1809 for (const auto& stageNamePair : *stageNames)
1810 for (const auto& typeNamePair : dataTypeName)
1811 for (const auto opPtr : operationList)
1812 {
1813 TestParameters<deInt64> params = { typeNamePair.first, *opPtr, stageNamePair.first, INT64_OPERANDS, false };
1814 std::string testName = stageNamePair.second + "_" + de::toLower(opPtr->spirvName()) + "_" + typeNamePair.second;
1815 tests->addChild(new T64bitCompareTest<deInt64>(tests->getTestContext(), testName, "", params));
1816 }
1817 }
1818
createUint64CompareTestsInGroup(tcu::TestCaseGroup * tests,const StageName * stageNames)1819 void createUint64CompareTestsInGroup (tcu::TestCaseGroup* tests, const StageName* stageNames)
1820 {
1821 static const std::vector<const CompareOperation<deUint64>*> operationList =
1822 {
1823 &deUint64EqualOp,
1824 &deUint64NotEqualOp,
1825 &deUint64LessThanOp,
1826 &deUint64LessThanEqualOp,
1827 &deUint64GreaterThanOp,
1828 &deUint64GreaterThanEqualOp,
1829 };
1830
1831 for (const auto& stageNamePair : *stageNames)
1832 for (const auto& typeNamePair : dataTypeName)
1833 for (const auto opPtr : operationList)
1834 {
1835 TestParameters<deUint64> params = { typeNamePair.first, *opPtr, stageNamePair.first, UINT64_OPERANDS, false };
1836 std::string testName = stageNamePair.second + "_" + de::toLower(opPtr->spirvName()) + "_" + typeNamePair.second;
1837 tests->addChild(new T64bitCompareTest<deUint64>(tests->getTestContext(), testName, "", params));
1838 }
1839 }
1840
1841 struct TestMgr
1842 {
1843 typedef void (*CreationFunctionPtr)(tcu::TestCaseGroup*, const StageName*);
1844
getParentGroupNamevkt::SpirVAssembly::__anon69da27910111::TestMgr1845 static const char* getParentGroupName () { return "64bit_compare"; }
getParentGroupDescvkt::SpirVAssembly::__anon69da27910111::TestMgr1846 static const char* getParentGroupDesc () { return "64-bit type comparison operations"; }
1847
1848 template <class T>
1849 static std::string getGroupName ();
1850
1851 template <class T>
1852 static std::string getGroupDesc ();
1853
1854 template <class T>
1855 static CreationFunctionPtr getCreationFunction ();
1856 };
1857
getGroupName()1858 template <> std::string TestMgr::getGroupName<double>() { return "double"; }
getGroupName()1859 template <> std::string TestMgr::getGroupName<deInt64>() { return "int64"; }
getGroupName()1860 template <> std::string TestMgr::getGroupName<deUint64>() { return "uint64"; }
1861
getGroupDesc()1862 template <> std::string TestMgr::getGroupDesc<double>() { return "64-bit floating point tests"; }
getGroupDesc()1863 template <> std::string TestMgr::getGroupDesc<deInt64>() { return "64-bit signed integer tests"; }
getGroupDesc()1864 template <> std::string TestMgr::getGroupDesc<deUint64>() { return "64-bit unsigned integer tests"; }
1865
getCreationFunction()1866 template <> TestMgr::CreationFunctionPtr TestMgr::getCreationFunction<double> () { return createDoubleCompareTestsInGroup; }
getCreationFunction()1867 template <> TestMgr::CreationFunctionPtr TestMgr::getCreationFunction<deInt64> () { return createInt64CompareTestsInGroup; }
getCreationFunction()1868 template <> TestMgr::CreationFunctionPtr TestMgr::getCreationFunction<deUint64> () { return createUint64CompareTestsInGroup; }
1869
1870 } // anonymous
1871
create64bitCompareGraphicsGroup(tcu::TestContext & testCtx)1872 tcu::TestCaseGroup* create64bitCompareGraphicsGroup (tcu::TestContext& testCtx)
1873 {
1874 static const StageName graphicStages =
1875 {
1876 std::make_pair(vk::VK_SHADER_STAGE_VERTEX_BIT, "vert"),
1877 std::make_pair(vk::VK_SHADER_STAGE_FRAGMENT_BIT, "frag"),
1878 };
1879
1880 tcu::TestCaseGroup* newGroup = new tcu::TestCaseGroup(testCtx, TestMgr::getParentGroupName(), TestMgr::getParentGroupDesc());
1881 newGroup->addChild(createTestGroup(testCtx, TestMgr::getGroupName<double>(), TestMgr::getGroupDesc<double>(), TestMgr::getCreationFunction<double>(), &graphicStages));
1882 newGroup->addChild(createTestGroup(testCtx, TestMgr::getGroupName<deInt64>(), TestMgr::getGroupDesc<deInt64>(), TestMgr::getCreationFunction<deInt64>(), &graphicStages));
1883 newGroup->addChild(createTestGroup(testCtx, TestMgr::getGroupName<deUint64>(), TestMgr::getGroupDesc<deUint64>(), TestMgr::getCreationFunction<deUint64>(), &graphicStages));
1884 return newGroup;
1885 }
1886
create64bitCompareComputeGroup(tcu::TestContext & testCtx)1887 tcu::TestCaseGroup* create64bitCompareComputeGroup (tcu::TestContext& testCtx)
1888 {
1889 static const StageName computeStages =
1890 {
1891 std::make_pair(vk::VK_SHADER_STAGE_COMPUTE_BIT, "comp"),
1892 };
1893
1894 tcu::TestCaseGroup* newGroup = new tcu::TestCaseGroup(testCtx, TestMgr::getParentGroupName(), TestMgr::getParentGroupDesc());
1895 newGroup->addChild(createTestGroup(testCtx, TestMgr::getGroupName<double>(), TestMgr::getGroupDesc<double>(), TestMgr::getCreationFunction<double>(), &computeStages));
1896 newGroup->addChild(createTestGroup(testCtx, TestMgr::getGroupName<deInt64>(), TestMgr::getGroupDesc<deInt64>(), TestMgr::getCreationFunction<deInt64>(), &computeStages));
1897 newGroup->addChild(createTestGroup(testCtx, TestMgr::getGroupName<deUint64>(), TestMgr::getGroupDesc<deUint64>(), TestMgr::getCreationFunction<deUint64>(), &computeStages));
1898 return newGroup;
1899 }
1900
1901 } // SpirVAssembly
1902 } // vkt
1903