1 /*------------------------------------------------------------------------
2 * Vulkan Conformance Tests
3 * ------------------------
4 *
5 * Copyright (c) 2015 The Khronos Group Inc.
6 * Copyright (c) 2017 Google Inc.
7 *
8 * Licensed under the Apache License, Version 2.0 (the "License");
9 * you may not use this file except in compliance with the License.
10 * You may obtain a copy of the License at
11 *
12 * http://www.apache.org/licenses/LICENSE-2.0
13 *
14 * Unless required by applicable law or agreed to in writing, software
15 * distributed under the License is distributed on an "AS IS" BASIS,
16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 * See the License for the specific language governing permissions and
18 * limitations under the License.
19 *
20 *//*!
21 * \file
22 * \brief Atomic operations (OpAtomic*) tests.
23 *//*--------------------------------------------------------------------*/
24
25 #include "vktAtomicOperationTests.hpp"
26 #include "vktShaderExecutor.hpp"
27
28 #include "vkRefUtil.hpp"
29 #include "vkMemUtil.hpp"
30 #include "vkQueryUtil.hpp"
31 #include "vkObjUtil.hpp"
32 #include "vkBarrierUtil.hpp"
33 #include "vkCmdUtil.hpp"
34 #include "vktTestGroupUtil.hpp"
35
36 #include "tcuTestLog.hpp"
37 #include "tcuStringTemplate.hpp"
38 #include "tcuResultCollector.hpp"
39
40 #include "deFloat16.h"
41 #include "deMath.hpp"
42 #include "deStringUtil.hpp"
43 #include "deSharedPtr.hpp"
44 #include "deRandom.hpp"
45 #include "deArrayUtil.hpp"
46
47 #include <string>
48 #include <memory>
49 #include <cmath>
50
51 namespace vkt
52 {
53 namespace shaderexecutor
54 {
55
56 namespace
57 {
58
59 using de::UniquePtr;
60 using de::MovePtr;
61 using std::vector;
62
63 using namespace vk;
64
65 enum class AtomicMemoryType
66 {
67 BUFFER = 0, // Normal buffer.
68 SHARED, // Shared global struct in a compute workgroup.
69 REFERENCE, // Buffer passed as a reference.
70 };
71
72 // Helper struct to indicate the shader type and if it should use shared global memory.
73 class AtomicShaderType
74 {
75 public:
AtomicShaderType(glu::ShaderType type,AtomicMemoryType memoryType)76 AtomicShaderType (glu::ShaderType type, AtomicMemoryType memoryType)
77 : m_type (type)
78 , m_atomicMemoryType (memoryType)
79 {
80 // Shared global memory can only be set to true with compute shaders.
81 DE_ASSERT(memoryType != AtomicMemoryType::SHARED || type == glu::SHADERTYPE_COMPUTE);
82 }
83
getType(void) const84 glu::ShaderType getType (void) const { return m_type; }
getMemoryType(void) const85 AtomicMemoryType getMemoryType (void) const { return m_atomicMemoryType; }
86
87 private:
88 glu::ShaderType m_type;
89 AtomicMemoryType m_atomicMemoryType;
90 };
91
92 // Buffer helper
93 class Buffer
94 {
95 public:
96 Buffer (Context& context, VkBufferUsageFlags usage, size_t size, bool useRef);
97
getBuffer(void) const98 VkBuffer getBuffer (void) const { return *m_buffer; }
getHostPtr(void) const99 void* getHostPtr (void) const { return m_allocation->getHostPtr(); }
100 void flush (void);
101 void invalidate (void);
102
103 private:
104 const DeviceInterface& m_vkd;
105 const VkDevice m_device;
106 const VkQueue m_queue;
107 const deUint32 m_queueIndex;
108 const Unique<VkBuffer> m_buffer;
109 const UniquePtr<Allocation> m_allocation;
110 };
111
112 typedef de::SharedPtr<Buffer> BufferSp;
113
createBuffer(const DeviceInterface & vkd,VkDevice device,VkDeviceSize size,VkBufferUsageFlags usageFlags)114 Move<VkBuffer> createBuffer (const DeviceInterface& vkd, VkDevice device, VkDeviceSize size, VkBufferUsageFlags usageFlags)
115 {
116 const VkBufferCreateInfo createInfo =
117 {
118 VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
119 DE_NULL,
120 (VkBufferCreateFlags)0,
121 size,
122 usageFlags,
123 VK_SHARING_MODE_EXCLUSIVE,
124 0u,
125 DE_NULL
126 };
127 return createBuffer(vkd, device, &createInfo);
128 }
129
allocateAndBindMemory(const DeviceInterface & vkd,VkDevice device,Allocator & allocator,VkBuffer buffer,bool useRef)130 MovePtr<Allocation> allocateAndBindMemory (const DeviceInterface& vkd, VkDevice device, Allocator& allocator, VkBuffer buffer, bool useRef)
131 {
132 const MemoryRequirement allocationType = (MemoryRequirement::HostVisible | (useRef ? MemoryRequirement::DeviceAddress : MemoryRequirement::Any));
133 MovePtr<Allocation> alloc(allocator.allocate(getBufferMemoryRequirements(vkd, device, buffer), allocationType));
134
135 VK_CHECK(vkd.bindBufferMemory(device, buffer, alloc->getMemory(), alloc->getOffset()));
136
137 return alloc;
138 }
139
Buffer(Context & context,VkBufferUsageFlags usage,size_t size,bool useRef)140 Buffer::Buffer (Context& context, VkBufferUsageFlags usage, size_t size, bool useRef)
141 : m_vkd (context.getDeviceInterface())
142 , m_device (context.getDevice())
143 , m_queue (context.getUniversalQueue())
144 , m_queueIndex (context.getUniversalQueueFamilyIndex())
145 , m_buffer (createBuffer (context.getDeviceInterface(),
146 context.getDevice(),
147 (VkDeviceSize)size,
148 usage))
149 , m_allocation (allocateAndBindMemory (context.getDeviceInterface(),
150 context.getDevice(),
151 context.getDefaultAllocator(),
152 *m_buffer,
153 useRef))
154 {
155 }
156
flush(void)157 void Buffer::flush (void)
158 {
159 flushMappedMemoryRange(m_vkd, m_device, m_allocation->getMemory(), m_allocation->getOffset(), VK_WHOLE_SIZE);
160 }
161
invalidate(void)162 void Buffer::invalidate (void)
163 {
164 const auto cmdPool = vk::makeCommandPool(m_vkd, m_device, m_queueIndex);
165 const auto cmdBufferPtr = vk::allocateCommandBuffer(m_vkd, m_device, cmdPool.get(), VK_COMMAND_BUFFER_LEVEL_PRIMARY);
166 const auto cmdBuffer = cmdBufferPtr.get();
167 const auto bufferBarrier = vk::makeBufferMemoryBarrier(VK_ACCESS_MEMORY_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, m_buffer.get(), 0ull, VK_WHOLE_SIZE);
168
169 beginCommandBuffer(m_vkd, cmdBuffer);
170 m_vkd.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_HOST_BIT, 0u, 0u, nullptr, 1u, &bufferBarrier, 0u, nullptr);
171 endCommandBuffer(m_vkd, cmdBuffer);
172 submitCommandsAndWait(m_vkd, m_device, m_queue, cmdBuffer);
173
174 invalidateMappedMemoryRange(m_vkd, m_device, m_allocation->getMemory(), m_allocation->getOffset(), VK_WHOLE_SIZE);
175 }
176
177 // Tests
178
179 enum AtomicOperation
180 {
181 ATOMIC_OP_EXCHANGE = 0,
182 ATOMIC_OP_COMP_SWAP,
183 ATOMIC_OP_ADD,
184 ATOMIC_OP_MIN,
185 ATOMIC_OP_MAX,
186 ATOMIC_OP_AND,
187 ATOMIC_OP_OR,
188 ATOMIC_OP_XOR,
189
190 ATOMIC_OP_LAST
191 };
192
atomicOp2Str(AtomicOperation op)193 std::string atomicOp2Str (AtomicOperation op)
194 {
195 static const char* const s_names[] =
196 {
197 "atomicExchange",
198 "atomicCompSwap",
199 "atomicAdd",
200 "atomicMin",
201 "atomicMax",
202 "atomicAnd",
203 "atomicOr",
204 "atomicXor"
205 };
206 return de::getSizedArrayElement<ATOMIC_OP_LAST>(s_names, op);
207 }
208
209 enum
210 {
211 NUM_ELEMENTS = 32
212 };
213
214 enum DataType
215 {
216 DATA_TYPE_FLOAT16 = 0,
217 DATA_TYPE_INT32,
218 DATA_TYPE_UINT32,
219 DATA_TYPE_FLOAT32,
220 DATA_TYPE_INT64,
221 DATA_TYPE_UINT64,
222 DATA_TYPE_FLOAT64,
223
224 DATA_TYPE_LAST
225 };
226
dataType2Str(DataType type)227 std::string dataType2Str(DataType type)
228 {
229 static const char* const s_names[] =
230 {
231 "float16_t",
232 "int",
233 "uint",
234 "float",
235 "int64_t",
236 "uint64_t",
237 "double",
238 };
239 return de::getSizedArrayElement<DATA_TYPE_LAST>(s_names, type);
240 }
241
242 class BufferInterface
243 {
244 public:
245 virtual void setBuffer(void* ptr) = 0;
246
247 virtual size_t bufferSize() = 0;
248
249 virtual void fillWithTestData(de::Random &rnd) = 0;
250
251 virtual void checkResults(tcu::ResultCollector& resultCollector) = 0;
252
~BufferInterface()253 virtual ~BufferInterface() {}
254 };
255
256 template<typename dataTypeT>
257 class TestBuffer : public BufferInterface
258 {
259 public:
260
TestBuffer(AtomicOperation atomicOp)261 TestBuffer(AtomicOperation atomicOp)
262 : m_atomicOp(atomicOp)
263 {}
264
265 template<typename T>
266 struct BufferData
267 {
268 // Use half the number of elements for inout to cause overlap between atomic operations.
269 // Each inout element at index i will have two atomic operations using input from
270 // indices i and i + NUM_ELEMENTS / 2.
271 T inout[NUM_ELEMENTS / 2];
272 T input[NUM_ELEMENTS];
273 T compare[NUM_ELEMENTS];
274 T output[NUM_ELEMENTS];
275 T invocationHitCount[NUM_ELEMENTS];
276 deInt32 index;
277 };
278
setBuffer(void * ptr)279 virtual void setBuffer(void* ptr)
280 {
281 m_ptr = static_cast<BufferData<dataTypeT>*>(ptr);
282 }
283
bufferSize()284 virtual size_t bufferSize()
285 {
286 return sizeof(BufferData<dataTypeT>);
287 }
288
fillWithTestData(de::Random & rnd)289 virtual void fillWithTestData(de::Random &rnd)
290 {
291 dataTypeT pattern;
292 deMemset(&pattern, 0xcd, sizeof(dataTypeT));
293
294 for (int i = 0; i < NUM_ELEMENTS / 2; i++)
295 {
296 m_ptr->inout[i] = static_cast<dataTypeT>(rnd.getUint64());
297 // The first half of compare elements match with every even index.
298 // The second half matches with odd indices. This causes the
299 // overlapping operations to only select one.
300 m_ptr->compare[i] = m_ptr->inout[i] + (i % 2);
301 m_ptr->compare[i + NUM_ELEMENTS / 2] = m_ptr->inout[i] + 1 - (i % 2);
302 }
303 for (int i = 0; i < NUM_ELEMENTS; i++)
304 {
305 m_ptr->input[i] = static_cast<dataTypeT>(rnd.getUint64());
306 m_ptr->output[i] = pattern;
307 m_ptr->invocationHitCount[i] = 0;
308 }
309 m_ptr->index = 0;
310
311 // Take a copy to be used when calculating expected values.
312 m_original = *m_ptr;
313 }
314
checkResults(tcu::ResultCollector & resultCollector)315 virtual void checkResults(tcu::ResultCollector& resultCollector)
316 {
317 checkOperation(m_original, *m_ptr, resultCollector);
318 }
319
320 template<typename T>
321 struct Expected
322 {
323 T m_inout;
324 T m_output[2];
325
Expectedvkt::shaderexecutor::__anon9b36cc0a0111::TestBuffer::Expected326 Expected (T inout, T output0, T output1)
327 : m_inout(inout)
328 {
329 m_output[0] = output0;
330 m_output[1] = output1;
331 }
332
comparevkt::shaderexecutor::__anon9b36cc0a0111::TestBuffer::Expected333 bool compare (T inout, T output0, T output1)
334 {
335 return (deMemCmp((const void*)&m_inout, (const void*)&inout, sizeof(inout)) == 0
336 && deMemCmp((const void*)&m_output[0], (const void*)&output0, sizeof(output0)) == 0
337 && deMemCmp((const void*)&m_output[1], (const void*)&output1, sizeof(output1)) == 0);
338 }
339 };
340
341 void checkOperation (const BufferData<dataTypeT>& original,
342 const BufferData<dataTypeT>& result,
343 tcu::ResultCollector& resultCollector);
344
345 const AtomicOperation m_atomicOp;
346
347 BufferData<dataTypeT>* m_ptr;
348 BufferData<dataTypeT> m_original;
349
350 };
351
352 template<typename T>
nanSafeSloppyEquals(T x,T y)353 bool nanSafeSloppyEquals(T x, T y)
354 {
355 if (deIsIEEENaN(x) && deIsIEEENaN(y))
356 return true;
357
358 if (deIsIEEENaN(x) || deIsIEEENaN(y))
359 return false;
360
361 return fabs(deToDouble(x) - deToDouble(y)) < 0.00001;
362 }
363
364 template<typename dataTypeT>
365 class TestBufferFloatingPoint : public BufferInterface
366 {
367 public:
368
TestBufferFloatingPoint(AtomicOperation atomicOp)369 TestBufferFloatingPoint(AtomicOperation atomicOp)
370 : m_atomicOp(atomicOp)
371 {}
372
373 template<typename T>
374 struct BufferDataFloatingPoint
375 {
376 // Use half the number of elements for inout to cause overlap between atomic operations.
377 // Each inout element at index i will have two atomic operations using input from
378 // indices i and i + NUM_ELEMENTS / 2.
379 T inout[NUM_ELEMENTS / 2];
380 T input[NUM_ELEMENTS];
381 T compare[NUM_ELEMENTS];
382 T output[NUM_ELEMENTS];
383 deInt32 invocationHitCount[NUM_ELEMENTS];
384 deInt32 index;
385 };
386
setBuffer(void * ptr)387 virtual void setBuffer(void* ptr)
388 {
389 m_ptr = static_cast<BufferDataFloatingPoint<dataTypeT>*>(ptr);
390 }
391
bufferSize()392 virtual size_t bufferSize()
393 {
394 return sizeof(BufferDataFloatingPoint<dataTypeT>);
395 }
396
fillWithTestData(de::Random & rnd)397 virtual void fillWithTestData(de::Random& rnd)
398 {
399 dataTypeT pattern;
400 deMemset(&pattern, 0xcd, sizeof(dataTypeT));
401
402 for (int i = 0; i < NUM_ELEMENTS / 2; i++)
403 {
404 m_ptr->inout[i] = deToFloatType<dataTypeT>(rnd.getFloat());
405 // These aren't used by any of the float tests
406 m_ptr->compare[i] = deToFloatType<dataTypeT>(0.0);
407 }
408 // Add special cases for NaN and +/-0
409 // 0: min(sNaN, x)
410 m_ptr->inout[0] = deSignalingNaN<dataTypeT>();
411 // 1: min(x, sNaN)
412 m_ptr->input[1 * 2 + 0] = deSignalingNaN<dataTypeT>();
413 // 2: min(qNaN, x)
414 m_ptr->inout[2] = deQuietNaN<dataTypeT>();
415 // 3: min(x, qNaN)
416 m_ptr->input[3 * 2 + 0] = deQuietNaN<dataTypeT>();
417 // 4: min(NaN, NaN)
418 m_ptr->inout[4] = deSignalingNaN<dataTypeT>();
419 m_ptr->input[4 * 2 + 0] = deQuietNaN<dataTypeT>();
420 m_ptr->input[4 * 2 + 1] = deQuietNaN<dataTypeT>();
421 // 5: min(+0, -0)
422 m_ptr->inout[5] = deToFloatType<dataTypeT>(-0.0);
423 m_ptr->input[5 * 2 + 0] = deToFloatType<dataTypeT>(0.0);
424 m_ptr->input[5 * 2 + 1] = deToFloatType<dataTypeT>(0.0);
425
426 for (int i = 0; i < NUM_ELEMENTS; i++)
427 {
428 m_ptr->input[i] = deToFloatType<dataTypeT>(rnd.getFloat());
429 m_ptr->output[i] = pattern;
430 m_ptr->invocationHitCount[i] = 0;
431 }
432
433 m_ptr->index = 0;
434
435 // Take a copy to be used when calculating expected values.
436 m_original = *m_ptr;
437 }
438
checkResults(tcu::ResultCollector & resultCollector)439 virtual void checkResults(tcu::ResultCollector& resultCollector)
440 {
441 checkOperationFloatingPoint(m_original, *m_ptr, resultCollector);
442 }
443
444 template<typename T>
445 struct Expected
446 {
447 T m_inout;
448 T m_output[2];
449
Expectedvkt::shaderexecutor::__anon9b36cc0a0111::TestBufferFloatingPoint::Expected450 Expected(T inout, T output0, T output1)
451 : m_inout(inout)
452 {
453 m_output[0] = output0;
454 m_output[1] = output1;
455 }
456
comparevkt::shaderexecutor::__anon9b36cc0a0111::TestBufferFloatingPoint::Expected457 bool compare(T inout, T output0, T output1)
458 {
459 return nanSafeSloppyEquals(m_inout, inout) &&
460 nanSafeSloppyEquals(m_output[0], output0) &&
461 nanSafeSloppyEquals(m_output[1], output1);
462 }
463 };
464
465 void checkOperationFloatingPoint(const BufferDataFloatingPoint<dataTypeT>& original,
466 const BufferDataFloatingPoint<dataTypeT>& result,
467 tcu::ResultCollector& resultCollector);
468
469 const AtomicOperation m_atomicOp;
470
471 BufferDataFloatingPoint<dataTypeT>* m_ptr;
472 BufferDataFloatingPoint<dataTypeT> m_original;
473
474 };
475
createTestBuffer(DataType type,AtomicOperation atomicOp)476 static BufferInterface* createTestBuffer(DataType type, AtomicOperation atomicOp)
477 {
478 switch (type)
479 {
480 case DATA_TYPE_FLOAT16:
481 return new TestBufferFloatingPoint<deFloat16>(atomicOp);
482 case DATA_TYPE_INT32:
483 return new TestBuffer<deInt32>(atomicOp);
484 case DATA_TYPE_UINT32:
485 return new TestBuffer<deUint32>(atomicOp);
486 case DATA_TYPE_FLOAT32:
487 return new TestBufferFloatingPoint<float>(atomicOp);
488 case DATA_TYPE_INT64:
489 return new TestBuffer<deInt64>(atomicOp);
490 case DATA_TYPE_UINT64:
491 return new TestBuffer<deUint64>(atomicOp);
492 case DATA_TYPE_FLOAT64:
493 return new TestBufferFloatingPoint<double>(atomicOp);
494 default:
495 DE_ASSERT(false);
496 return DE_NULL;
497 }
498 }
499
500 // Use template to handle both signed and unsigned cases. SPIR-V should
501 // have separate operations for both.
502 template<typename T>
checkOperation(const BufferData<T> & original,const BufferData<T> & result,tcu::ResultCollector & resultCollector)503 void TestBuffer<T>::checkOperation (const BufferData<T>& original,
504 const BufferData<T>& result,
505 tcu::ResultCollector& resultCollector)
506 {
507 // originalInout = original inout
508 // input0 = input at index i
509 // iinput1 = input at index i + NUM_ELEMENTS / 2
510 //
511 // atomic operation will return the memory contents before
512 // the operation and this is stored as output. Two operations
513 // are executed for each InOut value (using input0 and input1).
514 //
515 // Since there is an overlap of two operations per each
516 // InOut element, the outcome of the resulting InOut and
517 // the outputs of the operations have two result candidates
518 // depending on the execution order. Verification passes
519 // if the results match one of these options.
520
521 for (int elementNdx = 0; elementNdx < NUM_ELEMENTS / 2; elementNdx++)
522 {
523 // Needed when reinterpeting the data as signed values.
524 const T originalInout = *reinterpret_cast<const T*>(&original.inout[elementNdx]);
525 const T input0 = *reinterpret_cast<const T*>(&original.input[elementNdx]);
526 const T input1 = *reinterpret_cast<const T*>(&original.input[elementNdx + NUM_ELEMENTS / 2]);
527
528 // Expected results are collected to this vector.
529 vector<Expected<T> > exp;
530
531 switch (m_atomicOp)
532 {
533 case ATOMIC_OP_ADD:
534 {
535 exp.push_back(Expected<T>(originalInout + input0 + input1, originalInout, originalInout + input0));
536 exp.push_back(Expected<T>(originalInout + input0 + input1, originalInout + input1, originalInout));
537 }
538 break;
539
540 case ATOMIC_OP_AND:
541 {
542 exp.push_back(Expected<T>(originalInout & input0 & input1, originalInout, originalInout & input0));
543 exp.push_back(Expected<T>(originalInout & input0 & input1, originalInout & input1, originalInout));
544 }
545 break;
546
547 case ATOMIC_OP_OR:
548 {
549 exp.push_back(Expected<T>(originalInout | input0 | input1, originalInout, originalInout | input0));
550 exp.push_back(Expected<T>(originalInout | input0 | input1, originalInout | input1, originalInout));
551 }
552 break;
553
554 case ATOMIC_OP_XOR:
555 {
556 exp.push_back(Expected<T>(originalInout ^ input0 ^ input1, originalInout, originalInout ^ input0));
557 exp.push_back(Expected<T>(originalInout ^ input0 ^ input1, originalInout ^ input1, originalInout));
558 }
559 break;
560
561 case ATOMIC_OP_MIN:
562 {
563 exp.push_back(Expected<T>(de::min(de::min(originalInout, input0), input1), originalInout, de::min(originalInout, input0)));
564 exp.push_back(Expected<T>(de::min(de::min(originalInout, input0), input1), de::min(originalInout, input1), originalInout));
565 }
566 break;
567
568 case ATOMIC_OP_MAX:
569 {
570 exp.push_back(Expected<T>(de::max(de::max(originalInout, input0), input1), originalInout, de::max(originalInout, input0)));
571 exp.push_back(Expected<T>(de::max(de::max(originalInout, input0), input1), de::max(originalInout, input1), originalInout));
572 }
573 break;
574
575 case ATOMIC_OP_EXCHANGE:
576 {
577 exp.push_back(Expected<T>(input1, originalInout, input0));
578 exp.push_back(Expected<T>(input0, input1, originalInout));
579 }
580 break;
581
582 case ATOMIC_OP_COMP_SWAP:
583 {
584 if (elementNdx % 2 == 0)
585 {
586 exp.push_back(Expected<T>(input0, originalInout, input0));
587 exp.push_back(Expected<T>(input0, originalInout, originalInout));
588 }
589 else
590 {
591 exp.push_back(Expected<T>(input1, input1, originalInout));
592 exp.push_back(Expected<T>(input1, originalInout, originalInout));
593 }
594 }
595 break;
596
597
598 default:
599 DE_FATAL("Unexpected atomic operation.");
600 break;
601 }
602
603 const T resIo = result.inout[elementNdx];
604 const T resOutput0 = result.output[elementNdx];
605 const T resOutput1 = result.output[elementNdx + NUM_ELEMENTS / 2];
606
607
608 if (!exp[0].compare(resIo, resOutput0, resOutput1) && !exp[1].compare(resIo, resOutput0, resOutput1))
609 {
610 std::ostringstream errorMessage;
611 errorMessage << "ERROR: Result value check failed at index " << elementNdx
612 << ". Expected one of the two outcomes: InOut = " << tcu::toHex(exp[0].m_inout)
613 << ", Output0 = " << tcu::toHex(exp[0].m_output[0]) << ", Output1 = "
614 << tcu::toHex(exp[0].m_output[1]) << ", or InOut = " << tcu::toHex(exp[1].m_inout)
615 << ", Output0 = " << tcu::toHex(exp[1].m_output[0]) << ", Output1 = "
616 << tcu::toHex(exp[1].m_output[1]) << ". Got: InOut = " << tcu::toHex(resIo)
617 << ", Output0 = " << tcu::toHex(resOutput0) << ", Output1 = "
618 << tcu::toHex(resOutput1) << ". Using Input0 = " << tcu::toHex(original.input[elementNdx])
619 << " and Input1 = " << tcu::toHex(original.input[elementNdx + NUM_ELEMENTS / 2]) << ".";
620
621 resultCollector.fail(errorMessage.str());
622 }
623 }
624 }
625
626 template<typename T>
handleExceptionalFloatMinMaxValues(vector<T> & values,T x,T y)627 void handleExceptionalFloatMinMaxValues(vector<T> &values, T x, T y)
628 {
629
630 if (deIsSignalingNaN(x) && deIsSignalingNaN(y))
631 {
632 values.push_back(deQuietNaN<T>());
633 values.push_back(deSignalingNaN<T>());
634 }
635 else if (deIsSignalingNaN(x))
636 {
637 values.push_back(deQuietNaN<T>());
638 values.push_back(deSignalingNaN<T>());
639 if (!deIsIEEENaN(y))
640 values.push_back(y);
641 }
642 else if (deIsSignalingNaN(y))
643 {
644 values.push_back(deQuietNaN<T>());
645 values.push_back(deSignalingNaN<T>());
646 if (!deIsIEEENaN(x))
647 values.push_back(x);
648 }
649 else if (deIsIEEENaN(x) && deIsIEEENaN(y))
650 {
651 // Both quiet NaNs
652 values.push_back(deQuietNaN<T>());
653 }
654 else if (deIsIEEENaN(x))
655 {
656 // One quiet NaN and one non-NaN.
657 values.push_back(y);
658 }
659 else if (deIsIEEENaN(y))
660 {
661 // One quiet NaN and one non-NaN.
662 values.push_back(x);
663 }
664 else if ((deIsPositiveZero(x) && deIsNegativeZero(y)) || (deIsNegativeZero(x) && deIsPositiveZero(y)))
665 {
666 values.push_back(deToFloatType<T>(0.0));
667 values.push_back(deToFloatType<T>(-0.0));
668 }
669 }
670
671 template<typename T>
floatAdd(T x,T y)672 T floatAdd(T x, T y)
673 {
674 if (deIsIEEENaN(x) || deIsIEEENaN(y))
675 return deQuietNaN<T>();
676 return deToFloatType<T>(deToDouble(x) + deToDouble(y));
677 }
678
679 template<typename T>
floatMinValues(T x,T y)680 vector<T> floatMinValues(T x, T y)
681 {
682 vector<T> values;
683 handleExceptionalFloatMinMaxValues(values, x, y);
684 if (values.empty())
685 {
686 values.push_back(deToDouble(x) < deToDouble(y) ? x : y);
687 }
688 return values;
689 }
690
691 template<typename T>
floatMaxValues(T x,T y)692 vector<T> floatMaxValues(T x, T y)
693 {
694 vector<T> values;
695 handleExceptionalFloatMinMaxValues(values, x, y);
696 if (values.empty())
697 {
698 values.push_back(deToDouble(x) > deToDouble(y) ? x : y);
699 }
700 return values;
701 }
702
703 // Use template to handle both float and double cases. SPIR-V should
704 // have separate operations for both.
705 template<typename T>
checkOperationFloatingPoint(const BufferDataFloatingPoint<T> & original,const BufferDataFloatingPoint<T> & result,tcu::ResultCollector & resultCollector)706 void TestBufferFloatingPoint<T>::checkOperationFloatingPoint(const BufferDataFloatingPoint<T>& original,
707 const BufferDataFloatingPoint<T>& result,
708 tcu::ResultCollector& resultCollector)
709 {
710 // originalInout = original inout
711 // input0 = input at index i
712 // iinput1 = input at index i + NUM_ELEMENTS / 2
713 //
714 // atomic operation will return the memory contents before
715 // the operation and this is stored as output. Two operations
716 // are executed for each InOut value (using input0 and input1).
717 //
718 // Since there is an overlap of two operations per each
719 // InOut element, the outcome of the resulting InOut and
720 // the outputs of the operations have two result candidates
721 // depending on the execution order. Verification passes
722 // if the results match one of these options.
723
724 for (int elementNdx = 0; elementNdx < NUM_ELEMENTS / 2; elementNdx++)
725 {
726 // Needed when reinterpeting the data as signed values.
727 const T originalInout = *reinterpret_cast<const T*>(&original.inout[elementNdx]);
728 const T input0 = *reinterpret_cast<const T*>(&original.input[elementNdx]);
729 const T input1 = *reinterpret_cast<const T*>(&original.input[elementNdx + NUM_ELEMENTS / 2]);
730
731 // Expected results are collected to this vector.
732 vector<Expected<T> > exp;
733
734 switch (m_atomicOp)
735 {
736 case ATOMIC_OP_ADD:
737 {
738 exp.push_back(Expected<T>(floatAdd(floatAdd(originalInout, input0), input1), originalInout, floatAdd(originalInout, input0)));
739 exp.push_back(Expected<T>(floatAdd(floatAdd(originalInout, input0), input1), floatAdd(originalInout, input1), originalInout));
740 }
741 break;
742
743 case ATOMIC_OP_MIN:
744 {
745 // The case where input0 is combined first
746 vector<T> minOriginalAndInput0 = floatMinValues(originalInout, input0);
747 for (T x : minOriginalAndInput0)
748 {
749 vector<T> minAll = floatMinValues(x, input1);
750 for (T y : minAll)
751 {
752 exp.push_back(Expected<T>(y, originalInout, x));
753 }
754 }
755
756 // The case where input1 is combined first
757 vector<T> minOriginalAndInput1 = floatMinValues(originalInout, input1);
758 for (T x : minOriginalAndInput1)
759 {
760 vector<T> minAll = floatMinValues(x, input0);
761 for (T y : minAll)
762 {
763 exp.push_back(Expected<T>(y, x, originalInout));
764 }
765 }
766 }
767 break;
768
769 case ATOMIC_OP_MAX:
770 {
771 // The case where input0 is combined first
772 vector<T> minOriginalAndInput0 = floatMaxValues(originalInout, input0);
773 for (T x : minOriginalAndInput0)
774 {
775 vector<T> minAll = floatMaxValues(x, input1);
776 for (T y : minAll)
777 {
778 exp.push_back(Expected<T>(y, originalInout, x));
779 }
780 }
781
782 // The case where input1 is combined first
783 vector<T> minOriginalAndInput1 = floatMaxValues(originalInout, input1);
784 for (T x : minOriginalAndInput1)
785 {
786 vector<T> minAll = floatMaxValues(x, input0);
787 for (T y : minAll)
788 {
789 exp.push_back(Expected<T>(y, x, originalInout));
790 }
791 }
792 }
793 break;
794
795 case ATOMIC_OP_EXCHANGE:
796 {
797 exp.push_back(Expected<T>(input1, originalInout, input0));
798 exp.push_back(Expected<T>(input0, input1, originalInout));
799 }
800 break;
801
802 default:
803 DE_FATAL("Unexpected atomic operation.");
804 break;
805 }
806
807 const T resIo = result.inout[elementNdx];
808 const T resOutput0 = result.output[elementNdx];
809 const T resOutput1 = result.output[elementNdx + NUM_ELEMENTS / 2];
810
811
812 bool hasMatch = false;
813 for (Expected<T> e : exp)
814 {
815 if (e.compare(resIo, resOutput0, resOutput1))
816 {
817 hasMatch = true;
818 break;
819 }
820 }
821 if (!hasMatch)
822 {
823 std::ostringstream errorMessage;
824 errorMessage << "ERROR: Result value check failed at index " << elementNdx
825 << ". Expected one of the outcomes:";
826
827 bool first = true;
828 for (Expected<T> e : exp)
829 {
830 if (!first)
831 errorMessage << ", or";
832 first = false;
833
834 errorMessage << " InOut = " << e.m_inout
835 << ", Output0 = " << e.m_output[0]
836 << ", Output1 = " << e.m_output[1];
837 }
838
839 errorMessage << ". Got: InOut = " << resIo
840 << ", Output0 = " << resOutput0
841 << ", Output1 = " << resOutput1
842 << ". Using Input0 = " << original.input[elementNdx]
843 << " and Input1 = " << original.input[elementNdx + NUM_ELEMENTS / 2] << ".";
844
845 resultCollector.fail(errorMessage.str());
846 }
847 }
848 }
849
850 class AtomicOperationCaseInstance : public TestInstance
851 {
852 public:
853 AtomicOperationCaseInstance (Context& context,
854 const ShaderSpec& shaderSpec,
855 AtomicShaderType shaderType,
856 DataType dataType,
857 AtomicOperation atomicOp);
858
859 virtual tcu::TestStatus iterate (void);
860
861 private:
862 const ShaderSpec& m_shaderSpec;
863 AtomicShaderType m_shaderType;
864 const DataType m_dataType;
865 AtomicOperation m_atomicOp;
866
867 };
868
AtomicOperationCaseInstance(Context & context,const ShaderSpec & shaderSpec,AtomicShaderType shaderType,DataType dataType,AtomicOperation atomicOp)869 AtomicOperationCaseInstance::AtomicOperationCaseInstance (Context& context,
870 const ShaderSpec& shaderSpec,
871 AtomicShaderType shaderType,
872 DataType dataType,
873 AtomicOperation atomicOp)
874 : TestInstance (context)
875 , m_shaderSpec (shaderSpec)
876 , m_shaderType (shaderType)
877 , m_dataType (dataType)
878 , m_atomicOp (atomicOp)
879 {
880 }
881
iterate(void)882 tcu::TestStatus AtomicOperationCaseInstance::iterate(void)
883 {
884 de::UniquePtr<BufferInterface> testBuffer (createTestBuffer(m_dataType, m_atomicOp));
885 tcu::TestLog& log = m_context.getTestContext().getLog();
886 const DeviceInterface& vkd = m_context.getDeviceInterface();
887 const VkDevice device = m_context.getDevice();
888 de::Random rnd (0x62a15e34);
889 const bool useRef = (m_shaderType.getMemoryType() == AtomicMemoryType::REFERENCE);
890 const VkDescriptorType descType = (useRef ? VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER : VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
891 const VkBufferUsageFlags usageFlags = (VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | (useRef ? static_cast<VkBufferUsageFlags>(VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT) : 0u));
892
893 // The main buffer will hold test data. When using buffer references, the buffer's address will be indirectly passed as part of
894 // a uniform buffer. If not, it will be passed directly as a descriptor.
895 Buffer buffer (m_context, usageFlags, testBuffer->bufferSize(), useRef);
896 std::unique_ptr<Buffer> auxBuffer;
897
898 if (useRef)
899 {
900 // Pass the main buffer address inside a uniform buffer.
901 const VkBufferDeviceAddressInfo addressInfo =
902 {
903 VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO, // VkStructureType sType;
904 nullptr, // const void* pNext;
905 buffer.getBuffer(), // VkBuffer buffer;
906 };
907 const auto address = vkd.getBufferDeviceAddress(device, &addressInfo);
908
909 auxBuffer.reset(new Buffer(m_context, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, sizeof(address), false));
910 deMemcpy(auxBuffer->getHostPtr(), &address, sizeof(address));
911 auxBuffer->flush();
912 }
913
914 testBuffer->setBuffer(buffer.getHostPtr());
915 testBuffer->fillWithTestData(rnd);
916
917 buffer.flush();
918
919 Move<VkDescriptorSetLayout> extraResourcesLayout;
920 Move<VkDescriptorPool> extraResourcesSetPool;
921 Move<VkDescriptorSet> extraResourcesSet;
922
923 const VkDescriptorSetLayoutBinding bindings[] =
924 {
925 { 0u, descType, 1, VK_SHADER_STAGE_ALL, DE_NULL }
926 };
927
928 const VkDescriptorSetLayoutCreateInfo layoutInfo =
929 {
930 VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
931 DE_NULL,
932 (VkDescriptorSetLayoutCreateFlags)0u,
933 DE_LENGTH_OF_ARRAY(bindings),
934 bindings
935 };
936
937 extraResourcesLayout = createDescriptorSetLayout(vkd, device, &layoutInfo);
938
939 const VkDescriptorPoolSize poolSizes[] =
940 {
941 { descType, 1u }
942 };
943
944 const VkDescriptorPoolCreateInfo poolInfo =
945 {
946 VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
947 DE_NULL,
948 (VkDescriptorPoolCreateFlags)VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT,
949 1u, // maxSets
950 DE_LENGTH_OF_ARRAY(poolSizes),
951 poolSizes
952 };
953
954 extraResourcesSetPool = createDescriptorPool(vkd, device, &poolInfo);
955
956 const VkDescriptorSetAllocateInfo allocInfo =
957 {
958 VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
959 DE_NULL,
960 *extraResourcesSetPool,
961 1u,
962 &extraResourcesLayout.get()
963 };
964
965 extraResourcesSet = allocateDescriptorSet(vkd, device, &allocInfo);
966
967 VkDescriptorBufferInfo bufferInfo;
968 bufferInfo.buffer = (useRef ? auxBuffer->getBuffer() : buffer.getBuffer());
969 bufferInfo.offset = 0u;
970 bufferInfo.range = VK_WHOLE_SIZE;
971
972 const VkWriteDescriptorSet descriptorWrite =
973 {
974 VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
975 DE_NULL,
976 *extraResourcesSet,
977 0u, // dstBinding
978 0u, // dstArrayElement
979 1u,
980 descType,
981 (const VkDescriptorImageInfo*)DE_NULL,
982 &bufferInfo,
983 (const VkBufferView*)DE_NULL
984 };
985
986 vkd.updateDescriptorSets(device, 1u, &descriptorWrite, 0u, DE_NULL);
987
988 // Storage for output varying data.
989 std::vector<deUint32> outputs (NUM_ELEMENTS);
990 std::vector<void*> outputPtr (NUM_ELEMENTS);
991
992 for (size_t i = 0; i < NUM_ELEMENTS; i++)
993 {
994 outputs[i] = 0xcdcdcdcd;
995 outputPtr[i] = &outputs[i];
996 }
997
998 const int numWorkGroups = ((m_shaderType.getMemoryType() == AtomicMemoryType::SHARED) ? 1 : static_cast<int>(NUM_ELEMENTS));
999 UniquePtr<ShaderExecutor> executor (createExecutor(m_context, m_shaderType.getType(), m_shaderSpec, *extraResourcesLayout));
1000
1001 executor->execute(numWorkGroups, DE_NULL, &outputPtr[0], *extraResourcesSet);
1002 buffer.invalidate();
1003
1004 tcu::ResultCollector resultCollector(log);
1005
1006 // Check the results of the atomic operation
1007 testBuffer->checkResults(resultCollector);
1008
1009 return tcu::TestStatus(resultCollector.getResult(), resultCollector.getMessage());
1010 }
1011
1012 class AtomicOperationCase : public TestCase
1013 {
1014 public:
1015 AtomicOperationCase (tcu::TestContext& testCtx,
1016 const char* name,
1017 const char* description,
1018 AtomicShaderType type,
1019 DataType dataType,
1020 AtomicOperation atomicOp);
1021 virtual ~AtomicOperationCase (void);
1022
1023 virtual TestInstance* createInstance (Context& ctx) const;
1024 virtual void checkSupport (Context& ctx) const;
initPrograms(vk::SourceCollections & programCollection) const1025 virtual void initPrograms (vk::SourceCollections& programCollection) const
1026 {
1027 generateSources(m_shaderType.getType(), m_shaderSpec, programCollection);
1028 }
1029
1030 private:
1031
1032 void createShaderSpec();
1033 ShaderSpec m_shaderSpec;
1034 const AtomicShaderType m_shaderType;
1035 const DataType m_dataType;
1036 const AtomicOperation m_atomicOp;
1037 };
1038
AtomicOperationCase(tcu::TestContext & testCtx,const char * name,const char * description,AtomicShaderType shaderType,DataType dataType,AtomicOperation atomicOp)1039 AtomicOperationCase::AtomicOperationCase (tcu::TestContext& testCtx,
1040 const char* name,
1041 const char* description,
1042 AtomicShaderType shaderType,
1043 DataType dataType,
1044 AtomicOperation atomicOp)
1045 : TestCase (testCtx, name, description)
1046 , m_shaderType (shaderType)
1047 , m_dataType (dataType)
1048 , m_atomicOp (atomicOp)
1049 {
1050 createShaderSpec();
1051 init();
1052 }
1053
~AtomicOperationCase(void)1054 AtomicOperationCase::~AtomicOperationCase (void)
1055 {
1056 }
1057
createInstance(Context & ctx) const1058 TestInstance* AtomicOperationCase::createInstance (Context& ctx) const
1059 {
1060 return new AtomicOperationCaseInstance(ctx, m_shaderSpec, m_shaderType, m_dataType, m_atomicOp);
1061 }
1062
checkSupport(Context & ctx) const1063 void AtomicOperationCase::checkSupport (Context& ctx) const
1064 {
1065 if ((m_dataType == DATA_TYPE_INT64) || (m_dataType == DATA_TYPE_UINT64))
1066 {
1067 ctx.requireDeviceFunctionality("VK_KHR_shader_atomic_int64");
1068
1069 const auto atomicInt64Features = ctx.getShaderAtomicInt64Features();
1070 const bool isSharedMemory = (m_shaderType.getMemoryType() == AtomicMemoryType::SHARED);
1071
1072 if (!isSharedMemory && atomicInt64Features.shaderBufferInt64Atomics == VK_FALSE)
1073 {
1074 TCU_THROW(NotSupportedError, "VkShaderAtomicInt64: 64-bit integer atomic operations not supported for buffers");
1075 }
1076 if (isSharedMemory && atomicInt64Features.shaderSharedInt64Atomics == VK_FALSE)
1077 {
1078 TCU_THROW(NotSupportedError, "VkShaderAtomicInt64: 64-bit integer atomic operations not supported for shared memory");
1079 }
1080 }
1081
1082 if (m_dataType == DATA_TYPE_FLOAT16)
1083 {
1084 ctx.requireDeviceFunctionality("VK_EXT_shader_atomic_float2");
1085 if (m_atomicOp == ATOMIC_OP_ADD)
1086 {
1087 if (m_shaderType.getMemoryType() == AtomicMemoryType::SHARED)
1088 {
1089 if (!ctx.getShaderAtomicFloat2FeaturesEXT().shaderSharedFloat16AtomicAdd)
1090 {
1091 TCU_THROW(NotSupportedError, "VkShaderAtomicFloat16: 16-bit floating point shared add atomic operation not supported");
1092 }
1093 }
1094 else
1095 {
1096 if (!ctx.getShaderAtomicFloat2FeaturesEXT().shaderBufferFloat16AtomicAdd)
1097 {
1098 TCU_THROW(NotSupportedError, "VkShaderAtomicFloat16: 16-bit floating point buffer add atomic operation not supported");
1099 }
1100 }
1101 }
1102 if (m_atomicOp == ATOMIC_OP_MIN || m_atomicOp == ATOMIC_OP_MAX)
1103 {
1104 if (m_shaderType.getMemoryType() == AtomicMemoryType::SHARED)
1105 {
1106 if (!ctx.getShaderAtomicFloat2FeaturesEXT().shaderSharedFloat16AtomicMinMax)
1107 {
1108 TCU_THROW(NotSupportedError, "VkShaderAtomicFloat16: 16-bit floating point shared min/max atomic operation not supported");
1109 }
1110 }
1111 else
1112 {
1113 if (!ctx.getShaderAtomicFloat2FeaturesEXT().shaderBufferFloat16AtomicMinMax)
1114 {
1115 TCU_THROW(NotSupportedError, "VkShaderAtomicFloat16: 16-bit floating point buffer min/max atomic operation not supported");
1116 }
1117 }
1118 }
1119 if (m_atomicOp == ATOMIC_OP_EXCHANGE)
1120 {
1121 if (m_shaderType.getMemoryType() == AtomicMemoryType::SHARED)
1122 {
1123 if (!ctx.getShaderAtomicFloat2FeaturesEXT().shaderSharedFloat16Atomics)
1124 {
1125 TCU_THROW(NotSupportedError, "VkShaderAtomicFloat16: 16-bit floating point shared atomic operations not supported");
1126 }
1127 }
1128 else
1129 {
1130 if (!ctx.getShaderAtomicFloat2FeaturesEXT().shaderBufferFloat16Atomics)
1131 {
1132 TCU_THROW(NotSupportedError, "VkShaderAtomicFloat16: 16-bit floating point buffer atomic operations not supported");
1133 }
1134 }
1135 }
1136 }
1137
1138 if (m_dataType == DATA_TYPE_FLOAT32)
1139 {
1140 ctx.requireDeviceFunctionality("VK_EXT_shader_atomic_float");
1141 if (m_atomicOp == ATOMIC_OP_ADD)
1142 {
1143 if (m_shaderType.getMemoryType() == AtomicMemoryType::SHARED)
1144 {
1145 if (!ctx.getShaderAtomicFloatFeaturesEXT().shaderSharedFloat32AtomicAdd)
1146 {
1147 TCU_THROW(NotSupportedError, "VkShaderAtomicFloat32: 32-bit floating point shared add atomic operation not supported");
1148 }
1149 }
1150 else
1151 {
1152 if (!ctx.getShaderAtomicFloatFeaturesEXT().shaderBufferFloat32AtomicAdd)
1153 {
1154 TCU_THROW(NotSupportedError, "VkShaderAtomicFloat32: 32-bit floating point buffer add atomic operation not supported");
1155 }
1156 }
1157 }
1158 if (m_atomicOp == ATOMIC_OP_MIN || m_atomicOp == ATOMIC_OP_MAX)
1159 {
1160 ctx.requireDeviceFunctionality("VK_EXT_shader_atomic_float2");
1161 if (m_shaderType.getMemoryType() == AtomicMemoryType::SHARED)
1162 {
1163 if (!ctx.getShaderAtomicFloat2FeaturesEXT().shaderSharedFloat32AtomicMinMax)
1164 {
1165 TCU_THROW(NotSupportedError, "VkShaderAtomicFloat32: 32-bit floating point shared min/max atomic operation not supported");
1166 }
1167 }
1168 else
1169 {
1170 if (!ctx.getShaderAtomicFloat2FeaturesEXT().shaderBufferFloat32AtomicMinMax)
1171 {
1172 TCU_THROW(NotSupportedError, "VkShaderAtomicFloat32: 32-bit floating point buffer min/max atomic operation not supported");
1173 }
1174 }
1175 }
1176 if (m_atomicOp == ATOMIC_OP_EXCHANGE)
1177 {
1178 if (m_shaderType.getMemoryType() == AtomicMemoryType::SHARED)
1179 {
1180 if (!ctx.getShaderAtomicFloatFeaturesEXT().shaderSharedFloat32Atomics)
1181 {
1182 TCU_THROW(NotSupportedError, "VkShaderAtomicFloat32: 32-bit floating point shared atomic operations not supported");
1183 }
1184 }
1185 else
1186 {
1187 if (!ctx.getShaderAtomicFloatFeaturesEXT().shaderBufferFloat32Atomics)
1188 {
1189 TCU_THROW(NotSupportedError, "VkShaderAtomicFloat32: 32-bit floating point buffer atomic operations not supported");
1190 }
1191 }
1192 }
1193 }
1194
1195 if (m_dataType == DATA_TYPE_FLOAT64)
1196 {
1197 ctx.requireDeviceFunctionality("VK_EXT_shader_atomic_float");
1198 if (m_atomicOp == ATOMIC_OP_ADD)
1199 {
1200 if (m_shaderType.getMemoryType() == AtomicMemoryType::SHARED)
1201 {
1202 if (!ctx.getShaderAtomicFloatFeaturesEXT().shaderSharedFloat64AtomicAdd)
1203 {
1204 TCU_THROW(NotSupportedError, "VkShaderAtomicFloat64: 64-bit floating point shared add atomic operation not supported");
1205 }
1206 }
1207 else
1208 {
1209 if (!ctx.getShaderAtomicFloatFeaturesEXT().shaderBufferFloat64AtomicAdd)
1210 {
1211 TCU_THROW(NotSupportedError, "VkShaderAtomicFloat64: 64-bit floating point buffer add atomic operation not supported");
1212 }
1213 }
1214 }
1215 if (m_atomicOp == ATOMIC_OP_MIN || m_atomicOp == ATOMIC_OP_MAX)
1216 {
1217 ctx.requireDeviceFunctionality("VK_EXT_shader_atomic_float2");
1218 if (m_shaderType.getMemoryType() == AtomicMemoryType::SHARED)
1219 {
1220 if (!ctx.getShaderAtomicFloat2FeaturesEXT().shaderSharedFloat64AtomicMinMax)
1221 {
1222 TCU_THROW(NotSupportedError, "VkShaderAtomicFloat64: 64-bit floating point shared min/max atomic operation not supported");
1223 }
1224 }
1225 else
1226 {
1227 if (!ctx.getShaderAtomicFloat2FeaturesEXT().shaderBufferFloat64AtomicMinMax)
1228 {
1229 TCU_THROW(NotSupportedError, "VkShaderAtomicFloat64: 64-bit floating point buffer min/max atomic operation not supported");
1230 }
1231 }
1232 }
1233 if (m_atomicOp == ATOMIC_OP_EXCHANGE)
1234 {
1235 if (m_shaderType.getMemoryType() == AtomicMemoryType::SHARED)
1236 {
1237 if (!ctx.getShaderAtomicFloatFeaturesEXT().shaderSharedFloat64Atomics)
1238 {
1239 TCU_THROW(NotSupportedError, "VkShaderAtomicFloat64: 64-bit floating point shared atomic operations not supported");
1240 }
1241 }
1242 else
1243 {
1244 if (!ctx.getShaderAtomicFloatFeaturesEXT().shaderBufferFloat64Atomics)
1245 {
1246 TCU_THROW(NotSupportedError, "VkShaderAtomicFloat64: 64-bit floating point buffer atomic operations not supported");
1247 }
1248 }
1249 }
1250 }
1251
1252 if (m_shaderType.getMemoryType() == AtomicMemoryType::REFERENCE)
1253 {
1254 ctx.requireDeviceFunctionality("VK_KHR_buffer_device_address");
1255 }
1256
1257 // Check stores and atomic operation support.
1258 switch (m_shaderType.getType())
1259 {
1260 case glu::SHADERTYPE_VERTEX:
1261 case glu::SHADERTYPE_TESSELLATION_CONTROL:
1262 case glu::SHADERTYPE_TESSELLATION_EVALUATION:
1263 case glu::SHADERTYPE_GEOMETRY:
1264 if (!ctx.getDeviceFeatures().vertexPipelineStoresAndAtomics)
1265 TCU_THROW(NotSupportedError, "Stores and atomic operations are not supported in Vertex, Tessellation, and Geometry shader.");
1266 break;
1267 case glu::SHADERTYPE_FRAGMENT:
1268 if (!ctx.getDeviceFeatures().fragmentStoresAndAtomics)
1269 TCU_THROW(NotSupportedError, "Stores and atomic operations are not supported in fragment shader.");
1270 break;
1271 case glu::SHADERTYPE_COMPUTE:
1272 break;
1273 default:
1274 DE_FATAL("Unsupported shader type");
1275 }
1276
1277 checkSupportShader(ctx, m_shaderType.getType());
1278 }
1279
createShaderSpec(void)1280 void AtomicOperationCase::createShaderSpec (void)
1281 {
1282 const AtomicMemoryType memoryType = m_shaderType.getMemoryType();
1283
1284 // Global declarations.
1285 std::ostringstream shaderTemplateGlobalStream;
1286
1287 // Structure in use for atomic operations.
1288 shaderTemplateGlobalStream
1289 << "${EXTENSIONS}\n"
1290 << "\n"
1291 << "struct AtomicStruct\n"
1292 << "{\n"
1293 << " ${DATATYPE} inoutValues[${N}/2];\n"
1294 << " ${DATATYPE} inputValues[${N}];\n"
1295 << " ${DATATYPE} compareValues[${N}];\n"
1296 << " ${DATATYPE} outputValues[${N}];\n"
1297 << " int invocationHitCount[${N}];\n"
1298 << " int index;\n"
1299 << "};\n"
1300 << "\n"
1301 ;
1302
1303 // The name dance and declarations below will make sure the structure that will be used with atomic operations can be accessed
1304 // as "buf.data", which is the name used in the atomic operation statements.
1305 //
1306 // * When using a buffer directly, RESULT_BUFFER_NAME will be "buf" and the inner struct will be "data".
1307 // * When using a workgroup-shared global variable, the "data" struct will be nested in an auxiliar "buf" struct.
1308 // * When using buffer references, the uniform buffer reference will be called "buf" and its contents "data".
1309 //
1310 if (memoryType != AtomicMemoryType::REFERENCE)
1311 {
1312 shaderTemplateGlobalStream
1313 << "layout (set = ${SETIDX}, binding = 0) buffer AtomicBuffer {\n"
1314 << " AtomicStruct data;\n"
1315 << "} ${RESULT_BUFFER_NAME};\n"
1316 << "\n"
1317 ;
1318
1319 // When using global shared memory in the compute variant, invocations will use a shared global structure instead of a
1320 // descriptor set as the sources and results of each tested operation.
1321 if (memoryType == AtomicMemoryType::SHARED)
1322 {
1323 shaderTemplateGlobalStream
1324 << "shared struct { AtomicStruct data; } buf;\n"
1325 << "\n"
1326 ;
1327 }
1328 }
1329 else
1330 {
1331 shaderTemplateGlobalStream
1332 << "layout (buffer_reference) buffer AtomicBuffer {\n"
1333 << " AtomicStruct data;\n"
1334 << "};\n"
1335 << "\n"
1336 << "layout (set = ${SETIDX}, binding = 0) uniform References {\n"
1337 << " AtomicBuffer buf;\n"
1338 << "};\n"
1339 << "\n"
1340 ;
1341 }
1342
1343 const auto shaderTemplateGlobalString = shaderTemplateGlobalStream.str();
1344 const tcu::StringTemplate shaderTemplateGlobal (shaderTemplateGlobalString);
1345
1346 // Shader body for the non-vertex case.
1347 std::ostringstream nonVertexShaderTemplateStream;
1348
1349 if (memoryType == AtomicMemoryType::SHARED)
1350 {
1351 // Invocation zero will initialize the shared structure from the descriptor set.
1352 nonVertexShaderTemplateStream
1353 << "if (gl_LocalInvocationIndex == 0u)\n"
1354 << "{\n"
1355 << " buf.data = ${RESULT_BUFFER_NAME}.data;\n"
1356 << "}\n"
1357 << "barrier();\n"
1358 ;
1359 }
1360
1361 if (m_shaderType.getType() == glu::SHADERTYPE_FRAGMENT)
1362 {
1363 nonVertexShaderTemplateStream
1364 << "if (!gl_HelperInvocation) {\n"
1365 << " int idx = atomicAdd(buf.data.index, 1);\n"
1366 << " buf.data.outputValues[idx] = ${ATOMICOP}(buf.data.inoutValues[idx % (${N}/2)], ${COMPARE_ARG}buf.data.inputValues[idx]);\n"
1367 << "}\n"
1368 ;
1369 }
1370 else
1371 {
1372 nonVertexShaderTemplateStream
1373 << "if (atomicAdd(buf.data.invocationHitCount[0], 1) < ${N})\n"
1374 << "{\n"
1375 << " int idx = atomicAdd(buf.data.index, 1);\n"
1376 << " buf.data.outputValues[idx] = ${ATOMICOP}(buf.data.inoutValues[idx % (${N}/2)], ${COMPARE_ARG}buf.data.inputValues[idx]);\n"
1377 << "}\n"
1378 ;
1379 }
1380
1381 if (memoryType == AtomicMemoryType::SHARED)
1382 {
1383 // Invocation zero will copy results back to the descriptor set.
1384 nonVertexShaderTemplateStream
1385 << "barrier();\n"
1386 << "if (gl_LocalInvocationIndex == 0u)\n"
1387 << "{\n"
1388 << " ${RESULT_BUFFER_NAME}.data = buf.data;\n"
1389 << "}\n"
1390 ;
1391 }
1392
1393 const auto nonVertexShaderTemplateStreamStr = nonVertexShaderTemplateStream.str();
1394 const tcu::StringTemplate nonVertexShaderTemplateSrc (nonVertexShaderTemplateStreamStr);
1395
1396 // Shader body for the vertex case.
1397 const tcu::StringTemplate vertexShaderTemplateSrc(
1398 "int idx = gl_VertexIndex;\n"
1399 "if (atomicAdd(buf.data.invocationHitCount[idx], 1) == 0)\n"
1400 "{\n"
1401 " buf.data.outputValues[idx] = ${ATOMICOP}(buf.data.inoutValues[idx % (${N}/2)], ${COMPARE_ARG}buf.data.inputValues[idx]);\n"
1402 "}\n");
1403
1404 // Extensions.
1405 std::ostringstream extensions;
1406
1407 if ((m_dataType == DATA_TYPE_INT64) || (m_dataType == DATA_TYPE_UINT64))
1408 {
1409 extensions
1410 << "#extension GL_EXT_shader_explicit_arithmetic_types_int64 : enable\n"
1411 << "#extension GL_EXT_shader_atomic_int64 : enable\n"
1412 ;
1413 }
1414 else if ((m_dataType == DATA_TYPE_FLOAT16) || (m_dataType == DATA_TYPE_FLOAT32) || (m_dataType == DATA_TYPE_FLOAT64))
1415 {
1416 extensions
1417 << "#extension GL_EXT_shader_explicit_arithmetic_types_float16 : enable\n"
1418 << "#extension GL_EXT_shader_atomic_float : enable\n"
1419 << "#extension GL_EXT_shader_atomic_float2 : enable\n"
1420 << "#extension GL_KHR_memory_scope_semantics : enable\n"
1421 ;
1422 }
1423
1424 if (memoryType == AtomicMemoryType::REFERENCE)
1425 {
1426 extensions << "#extension GL_EXT_buffer_reference : require\n";
1427 }
1428
1429 // Specializations.
1430 std::map<std::string, std::string> specializations;
1431
1432 specializations["EXTENSIONS"] = extensions.str();
1433 specializations["DATATYPE"] = dataType2Str(m_dataType);
1434 specializations["ATOMICOP"] = atomicOp2Str(m_atomicOp);
1435 specializations["SETIDX"] = de::toString((int)EXTRA_RESOURCES_DESCRIPTOR_SET_INDEX);
1436 specializations["N"] = de::toString((int)NUM_ELEMENTS);
1437 specializations["COMPARE_ARG"] = ((m_atomicOp == ATOMIC_OP_COMP_SWAP) ? "buf.data.compareValues[idx], " : "");
1438 specializations["RESULT_BUFFER_NAME"] = ((memoryType == AtomicMemoryType::SHARED) ? "result" : "buf");
1439
1440 // Shader spec.
1441 m_shaderSpec.outputs.push_back(Symbol("outData", glu::VarType(glu::TYPE_UINT, glu::PRECISION_HIGHP)));
1442 m_shaderSpec.glslVersion = glu::GLSL_VERSION_450;
1443 m_shaderSpec.globalDeclarations = shaderTemplateGlobal.specialize(specializations);
1444 m_shaderSpec.source = ((m_shaderType.getType() == glu::SHADERTYPE_VERTEX)
1445 ? vertexShaderTemplateSrc.specialize(specializations)
1446 : nonVertexShaderTemplateSrc.specialize(specializations));
1447
1448 if (memoryType == AtomicMemoryType::SHARED)
1449 {
1450 // When using global shared memory, use a single workgroup and an appropriate number of local invocations.
1451 m_shaderSpec.localSizeX = static_cast<int>(NUM_ELEMENTS);
1452 }
1453 }
1454
addAtomicOperationTests(tcu::TestCaseGroup * atomicOperationTestsGroup)1455 void addAtomicOperationTests (tcu::TestCaseGroup* atomicOperationTestsGroup)
1456 {
1457 tcu::TestContext& testCtx = atomicOperationTestsGroup->getTestContext();
1458
1459 static const struct
1460 {
1461 glu::ShaderType type;
1462 const char* name;
1463 } shaderTypes[] =
1464 {
1465 { glu::SHADERTYPE_VERTEX, "vertex" },
1466 { glu::SHADERTYPE_FRAGMENT, "fragment" },
1467 { glu::SHADERTYPE_GEOMETRY, "geometry" },
1468 { glu::SHADERTYPE_TESSELLATION_CONTROL, "tess_ctrl" },
1469 { glu::SHADERTYPE_TESSELLATION_EVALUATION, "tess_eval" },
1470 { glu::SHADERTYPE_COMPUTE, "compute" },
1471 };
1472
1473 static const struct
1474 {
1475 AtomicMemoryType type;
1476 const char* suffix;
1477 } kMemoryTypes[] =
1478 {
1479 { AtomicMemoryType::BUFFER, "" },
1480 { AtomicMemoryType::SHARED, "_shared" },
1481 { AtomicMemoryType::REFERENCE, "_reference" },
1482 };
1483
1484 static const struct
1485 {
1486 DataType dataType;
1487 const char* name;
1488 const char* description;
1489 } dataSign[] =
1490 {
1491 { DATA_TYPE_FLOAT16,"float16", "Tests using 16-bit float data" },
1492 { DATA_TYPE_INT32, "signed", "Tests using signed data (int)" },
1493 { DATA_TYPE_UINT32, "unsigned", "Tests using unsigned data (uint)" },
1494 { DATA_TYPE_FLOAT32,"float32", "Tests using 32-bit float data" },
1495 { DATA_TYPE_INT64, "signed64bit", "Tests using 64 bit signed data (int64)" },
1496 { DATA_TYPE_UINT64, "unsigned64bit", "Tests using 64 bit unsigned data (uint64)" },
1497 { DATA_TYPE_FLOAT64,"float64", "Tests using 64-bit float data)" }
1498 };
1499
1500 static const struct
1501 {
1502 AtomicOperation value;
1503 const char* name;
1504 } atomicOp[] =
1505 {
1506 { ATOMIC_OP_EXCHANGE, "exchange" },
1507 { ATOMIC_OP_COMP_SWAP, "comp_swap" },
1508 { ATOMIC_OP_ADD, "add" },
1509 { ATOMIC_OP_MIN, "min" },
1510 { ATOMIC_OP_MAX, "max" },
1511 { ATOMIC_OP_AND, "and" },
1512 { ATOMIC_OP_OR, "or" },
1513 { ATOMIC_OP_XOR, "xor" }
1514 };
1515
1516 for (int opNdx = 0; opNdx < DE_LENGTH_OF_ARRAY(atomicOp); opNdx++)
1517 {
1518 for (int signNdx = 0; signNdx < DE_LENGTH_OF_ARRAY(dataSign); signNdx++)
1519 {
1520 for (int shaderTypeNdx = 0; shaderTypeNdx < DE_LENGTH_OF_ARRAY(shaderTypes); shaderTypeNdx++)
1521 {
1522 // Only ADD and EXCHANGE are supported on floating-point
1523 if (dataSign[signNdx].dataType == DATA_TYPE_FLOAT16 || dataSign[signNdx].dataType == DATA_TYPE_FLOAT32 || dataSign[signNdx].dataType == DATA_TYPE_FLOAT64)
1524 {
1525 if (atomicOp[opNdx].value != ATOMIC_OP_ADD &&
1526 atomicOp[opNdx].value != ATOMIC_OP_MIN &&
1527 atomicOp[opNdx].value != ATOMIC_OP_MAX &&
1528 atomicOp[opNdx].value != ATOMIC_OP_EXCHANGE)
1529 {
1530 continue;
1531 }
1532 }
1533
1534 for (int memoryTypeNdx = 0; memoryTypeNdx < DE_LENGTH_OF_ARRAY(kMemoryTypes); ++memoryTypeNdx)
1535 {
1536 // Shared memory only available in compute shaders.
1537 if (kMemoryTypes[memoryTypeNdx].type == AtomicMemoryType::SHARED && shaderTypes[shaderTypeNdx].type != glu::SHADERTYPE_COMPUTE)
1538 continue;
1539
1540 const std::string description = std::string("Tests atomic operation ") + atomicOp2Str(atomicOp[opNdx].value) + std::string(".");
1541 const std::string name = std::string(atomicOp[opNdx].name) + "_" + std::string(dataSign[signNdx].name) + "_" + std::string(shaderTypes[shaderTypeNdx].name) + kMemoryTypes[memoryTypeNdx].suffix;
1542
1543 atomicOperationTestsGroup->addChild(new AtomicOperationCase(testCtx, name.c_str(), description.c_str(), AtomicShaderType(shaderTypes[shaderTypeNdx].type, kMemoryTypes[memoryTypeNdx].type), dataSign[signNdx].dataType, atomicOp[opNdx].value));
1544 }
1545 }
1546 }
1547 }
1548 }
1549
1550 } // anonymous
1551
createAtomicOperationTests(tcu::TestContext & testCtx)1552 tcu::TestCaseGroup* createAtomicOperationTests (tcu::TestContext& testCtx)
1553 {
1554 return createTestGroup(testCtx, "atomic_operations", "Atomic Operation Tests", addAtomicOperationTests);
1555 }
1556
1557 } // shaderexecutor
1558 } // vkt
1559