1 /*------------------------------------------------------------------------
2 * Vulkan Conformance Tests
3 * ------------------------
4 *
5 * Copyright (c) 2015-2024 The Khronos Group Inc.
6 * Copyright (c) 2017 Google Inc.
7 *
8 * Licensed under the Apache License, Version 2.0 (the "License");
9 * you may not use this file except in compliance with the License.
10 * You may obtain a copy of the License at
11 *
12 * http://www.apache.org/licenses/LICENSE-2.0
13 *
14 * Unless required by applicable law or agreed to in writing, software
15 * distributed under the License is distributed on an "AS IS" BASIS,
16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 * See the License for the specific language governing permissions and
18 * limitations under the License.
19 *
20 *//*!
21 * \file
22 * \brief Atomic operations (OpAtomic*) tests.
23 *//*--------------------------------------------------------------------*/
24
25 #include "vktAtomicOperationTests.hpp"
26 #include "vktShaderExecutor.hpp"
27
28 #include "vkRefUtil.hpp"
29 #include "vkMemUtil.hpp"
30 #include "vkQueryUtil.hpp"
31 #include "vkObjUtil.hpp"
32 #include "vkBarrierUtil.hpp"
33 #include "vkCmdUtil.hpp"
34 #include "vktTestGroupUtil.hpp"
35
36 #include "tcuTestLog.hpp"
37 #include "tcuStringTemplate.hpp"
38 #include "tcuResultCollector.hpp"
39
40 #include "deFloat16.h"
41 #include "deMath.hpp"
42 #include "deStringUtil.hpp"
43 #include "deSharedPtr.hpp"
44 #include "deRandom.hpp"
45 #include "deArrayUtil.hpp"
46
47 #include <string>
48 #include <memory>
49 #include <cmath>
50
51 namespace vkt
52 {
53 namespace shaderexecutor
54 {
55
56 namespace
57 {
58
59 using de::MovePtr;
60 using de::UniquePtr;
61 using std::vector;
62
63 using namespace vk;
64
65 enum class AtomicMemoryType
66 {
67 BUFFER = 0, // Normal buffer.
68 SHARED, // Shared global struct in a compute workgroup.
69 REFERENCE, // Buffer passed as a reference.
70 PAYLOAD, // Task payload.
71 };
72
73 // Helper struct to indicate the shader type and if it should use shared global memory.
74 class AtomicShaderType
75 {
76 public:
AtomicShaderType(glu::ShaderType type,AtomicMemoryType memoryType)77 AtomicShaderType(glu::ShaderType type, AtomicMemoryType memoryType) : m_type(type), m_atomicMemoryType(memoryType)
78 {
79 // Shared global memory can only be set to true with compute, task and mesh shaders.
80 DE_ASSERT(memoryType != AtomicMemoryType::SHARED || type == glu::SHADERTYPE_COMPUTE ||
81 type == glu::SHADERTYPE_TASK || type == glu::SHADERTYPE_MESH);
82
83 // Task payload memory can only be tested in task shaders.
84 DE_ASSERT(memoryType != AtomicMemoryType::PAYLOAD || type == glu::SHADERTYPE_TASK);
85 }
86
getType(void) const87 glu::ShaderType getType(void) const
88 {
89 return m_type;
90 }
getMemoryType(void) const91 AtomicMemoryType getMemoryType(void) const
92 {
93 return m_atomicMemoryType;
94 }
isSharedLike(void) const95 bool isSharedLike(void) const
96 {
97 return m_atomicMemoryType == AtomicMemoryType::SHARED || m_atomicMemoryType == AtomicMemoryType::PAYLOAD;
98 }
isMeshShadingStage(void) const99 bool isMeshShadingStage(void) const
100 {
101 return (m_type == glu::SHADERTYPE_TASK || m_type == glu::SHADERTYPE_MESH);
102 }
103
104 private:
105 glu::ShaderType m_type;
106 AtomicMemoryType m_atomicMemoryType;
107 };
108
109 // Buffer helper
110 class Buffer
111 {
112 public:
113 Buffer(Context &context, VkBufferUsageFlags usage, size_t size, bool useRef);
114
getBuffer(void) const115 VkBuffer getBuffer(void) const
116 {
117 return *m_buffer;
118 }
getHostPtr(void) const119 void *getHostPtr(void) const
120 {
121 return m_allocation->getHostPtr();
122 }
123 void flush(void);
124 void invalidate(void);
125
126 private:
127 const DeviceInterface &m_vkd;
128 const VkDevice m_device;
129 const VkQueue m_queue;
130 const uint32_t m_queueIndex;
131 const Unique<VkBuffer> m_buffer;
132 const UniquePtr<Allocation> m_allocation;
133 };
134
135 typedef de::SharedPtr<Buffer> BufferSp;
136
createBuffer(const DeviceInterface & vkd,VkDevice device,VkDeviceSize size,VkBufferUsageFlags usageFlags)137 Move<VkBuffer> createBuffer(const DeviceInterface &vkd, VkDevice device, VkDeviceSize size,
138 VkBufferUsageFlags usageFlags)
139 {
140 const VkBufferCreateInfo createInfo = {VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
141 nullptr,
142 (VkBufferCreateFlags)0,
143 size,
144 usageFlags,
145 VK_SHARING_MODE_EXCLUSIVE,
146 0u,
147 nullptr};
148 return createBuffer(vkd, device, &createInfo);
149 }
150
allocateAndBindMemory(const DeviceInterface & vkd,VkDevice device,Allocator & allocator,VkBuffer buffer,bool useRef)151 MovePtr<Allocation> allocateAndBindMemory(const DeviceInterface &vkd, VkDevice device, Allocator &allocator,
152 VkBuffer buffer, bool useRef)
153 {
154 const MemoryRequirement allocationType =
155 (MemoryRequirement::HostVisible | (useRef ? MemoryRequirement::DeviceAddress : MemoryRequirement::Any));
156 MovePtr<Allocation> alloc(allocator.allocate(getBufferMemoryRequirements(vkd, device, buffer), allocationType));
157
158 VK_CHECK(vkd.bindBufferMemory(device, buffer, alloc->getMemory(), alloc->getOffset()));
159
160 return alloc;
161 }
162
Buffer(Context & context,VkBufferUsageFlags usage,size_t size,bool useRef)163 Buffer::Buffer(Context &context, VkBufferUsageFlags usage, size_t size, bool useRef)
164 : m_vkd(context.getDeviceInterface())
165 , m_device(context.getDevice())
166 , m_queue(context.getUniversalQueue())
167 , m_queueIndex(context.getUniversalQueueFamilyIndex())
168 , m_buffer(createBuffer(context.getDeviceInterface(), context.getDevice(), (VkDeviceSize)size, usage))
169 , m_allocation(allocateAndBindMemory(context.getDeviceInterface(), context.getDevice(),
170 context.getDefaultAllocator(), *m_buffer, useRef))
171 {
172 }
173
flush(void)174 void Buffer::flush(void)
175 {
176 flushMappedMemoryRange(m_vkd, m_device, m_allocation->getMemory(), m_allocation->getOffset(), VK_WHOLE_SIZE);
177 }
178
invalidate(void)179 void Buffer::invalidate(void)
180 {
181 const auto cmdPool = vk::makeCommandPool(m_vkd, m_device, m_queueIndex);
182 const auto cmdBufferPtr =
183 vk::allocateCommandBuffer(m_vkd, m_device, cmdPool.get(), VK_COMMAND_BUFFER_LEVEL_PRIMARY);
184 const auto cmdBuffer = cmdBufferPtr.get();
185 const auto bufferBarrier = vk::makeBufferMemoryBarrier(VK_ACCESS_MEMORY_WRITE_BIT, VK_ACCESS_HOST_READ_BIT,
186 m_buffer.get(), 0ull, VK_WHOLE_SIZE);
187
188 beginCommandBuffer(m_vkd, cmdBuffer);
189 m_vkd.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_HOST_BIT, 0u, 0u, nullptr,
190 1u, &bufferBarrier, 0u, nullptr);
191 endCommandBuffer(m_vkd, cmdBuffer);
192 submitCommandsAndWait(m_vkd, m_device, m_queue, cmdBuffer);
193
194 invalidateMappedMemoryRange(m_vkd, m_device, m_allocation->getMemory(), m_allocation->getOffset(), VK_WHOLE_SIZE);
195 }
196
197 // Tests
198
199 enum AtomicOperation
200 {
201 ATOMIC_OP_EXCHANGE = 0,
202 ATOMIC_OP_COMP_SWAP,
203 ATOMIC_OP_ADD,
204 ATOMIC_OP_MIN,
205 ATOMIC_OP_MAX,
206 ATOMIC_OP_AND,
207 ATOMIC_OP_OR,
208 ATOMIC_OP_XOR,
209
210 ATOMIC_OP_LAST
211 };
212
atomicOp2Str(AtomicOperation op)213 std::string atomicOp2Str(AtomicOperation op)
214 {
215 static const char *const s_names[] = {"atomicExchange", "atomicCompSwap", "atomicAdd", "atomicMin",
216 "atomicMax", "atomicAnd", "atomicOr", "atomicXor"};
217 return de::getSizedArrayElement<ATOMIC_OP_LAST>(s_names, op);
218 }
219
220 enum
221 {
222 NUM_ELEMENTS = 32
223 };
224
225 enum DataType
226 {
227 DATA_TYPE_FLOAT16 = 0,
228 DATA_TYPE_FLOAT16X2,
229 DATA_TYPE_FLOAT16X4,
230 DATA_TYPE_INT32,
231 DATA_TYPE_UINT32,
232 DATA_TYPE_FLOAT32,
233 DATA_TYPE_INT64,
234 DATA_TYPE_UINT64,
235 DATA_TYPE_FLOAT64,
236
237 DATA_TYPE_LAST
238 };
239
dataType2Str(DataType type)240 std::string dataType2Str(DataType type)
241 {
242 static const char *const s_names[] = {
243 "float16_t", "f16vec2", "f16vec4", "int", "uint", "float", "int64_t", "uint64_t", "double",
244 };
245 return de::getSizedArrayElement<DATA_TYPE_LAST>(s_names, type);
246 }
247
248 class BufferInterface
249 {
250 public:
251 virtual void setBuffer(void *ptr) = 0;
252
253 virtual size_t bufferSize() = 0;
254
255 virtual void fillWithTestData(de::Random &rnd) = 0;
256
257 virtual void checkResults(tcu::ResultCollector &resultCollector) = 0;
258
~BufferInterface()259 virtual ~BufferInterface()
260 {
261 }
262 };
263
264 template <typename dataTypeT>
265 class TestBuffer : public BufferInterface
266 {
267 public:
TestBuffer(AtomicOperation atomicOp)268 TestBuffer(AtomicOperation atomicOp) : m_atomicOp(atomicOp)
269 {
270 }
271
272 template <typename T>
273 struct BufferData
274 {
275 // Use half the number of elements for inout to cause overlap between atomic operations.
276 // Each inout element at index i will have two atomic operations using input from
277 // indices i and i + NUM_ELEMENTS / 2.
278 T inout[NUM_ELEMENTS / 2];
279 T input[NUM_ELEMENTS];
280 T compare[NUM_ELEMENTS];
281 T output[NUM_ELEMENTS];
282 T invocationHitCount[NUM_ELEMENTS];
283 int32_t index;
284 };
285
setBuffer(void * ptr)286 virtual void setBuffer(void *ptr)
287 {
288 m_ptr = static_cast<BufferData<dataTypeT> *>(ptr);
289 }
290
bufferSize()291 virtual size_t bufferSize()
292 {
293 return sizeof(BufferData<dataTypeT>);
294 }
295
fillWithTestData(de::Random & rnd)296 virtual void fillWithTestData(de::Random &rnd)
297 {
298 dataTypeT pattern;
299 deMemset(&pattern, 0xcd, sizeof(dataTypeT));
300
301 for (int i = 0; i < NUM_ELEMENTS / 2; i++)
302 {
303 m_ptr->inout[i] = static_cast<dataTypeT>(rnd.getUint64());
304 // The first half of compare elements match with every even index.
305 // The second half matches with odd indices. This causes the
306 // overlapping operations to only select one.
307 m_ptr->compare[i] = m_ptr->inout[i] + (i % 2);
308 m_ptr->compare[i + NUM_ELEMENTS / 2] = m_ptr->inout[i] + 1 - (i % 2);
309 }
310 for (int i = 0; i < NUM_ELEMENTS; i++)
311 {
312 m_ptr->input[i] = static_cast<dataTypeT>(rnd.getUint64());
313 m_ptr->output[i] = pattern;
314 m_ptr->invocationHitCount[i] = 0;
315 }
316 m_ptr->index = 0;
317
318 // Take a copy to be used when calculating expected values.
319 m_original = *m_ptr;
320 }
321
checkResults(tcu::ResultCollector & resultCollector)322 virtual void checkResults(tcu::ResultCollector &resultCollector)
323 {
324 checkOperation(m_original, *m_ptr, resultCollector);
325 }
326
327 template <typename T>
328 struct Expected
329 {
330 T m_inout;
331 T m_output[2];
332
Expectedvkt::shaderexecutor::__anonc9d8e1400111::TestBuffer::Expected333 Expected(T inout, T output0, T output1) : m_inout(inout)
334 {
335 m_output[0] = output0;
336 m_output[1] = output1;
337 }
338
comparevkt::shaderexecutor::__anonc9d8e1400111::TestBuffer::Expected339 bool compare(T inout, T output0, T output1)
340 {
341 return (deMemCmp((const void *)&m_inout, (const void *)&inout, sizeof(inout)) == 0 &&
342 deMemCmp((const void *)&m_output[0], (const void *)&output0, sizeof(output0)) == 0 &&
343 deMemCmp((const void *)&m_output[1], (const void *)&output1, sizeof(output1)) == 0);
344 }
345 };
346
347 void checkOperation(const BufferData<dataTypeT> &original, const BufferData<dataTypeT> &result,
348 tcu::ResultCollector &resultCollector);
349
350 const AtomicOperation m_atomicOp;
351
352 BufferData<dataTypeT> *m_ptr;
353 BufferData<dataTypeT> m_original;
354 };
355
356 template <typename T>
sloppyFPCompare(T x,T y)357 bool sloppyFPCompare(T x, T y)
358 {
359 return fabs(deToDouble(x) - deToDouble(y)) < 0.00001;
360 }
361
362 template <>
sloppyFPCompare(deFloat16 x,deFloat16 y)363 bool sloppyFPCompare<deFloat16>(deFloat16 x, deFloat16 y)
364 {
365 return fabs(deToDouble(x) - deToDouble(y)) < 0.01;
366 }
367
368 template <typename T>
nanSafeSloppyEquals(T x,T y)369 bool nanSafeSloppyEquals(T x, T y)
370 {
371 if (deIsIEEENaN(x) && deIsIEEENaN(y))
372 return true;
373
374 if (deIsIEEENaN(x) || deIsIEEENaN(y))
375 return false;
376
377 return sloppyFPCompare(x, y);
378 }
379
380 template <typename dataTypeT, uint32_t VecSize = 1>
381 class TestBufferFloatingPoint : public BufferInterface
382 {
383 public:
TestBufferFloatingPoint(AtomicOperation atomicOp)384 TestBufferFloatingPoint(AtomicOperation atomicOp) : m_atomicOp(atomicOp)
385 {
386 }
387
388 template <typename T, uint32_t VecSize2>
389 struct BufferDataFloatingPoint
390 {
391 // Use half the number of elements for inout to cause overlap between atomic operations.
392 // Each inout element at index i will have two atomic operations using input from
393 // indices i and i + NUM_ELEMENTS / 2.
394 T inout[NUM_ELEMENTS / 2 * VecSize2];
395 T input[NUM_ELEMENTS * VecSize2];
396 T compare[NUM_ELEMENTS * VecSize2];
397 T output[NUM_ELEMENTS * VecSize2];
398 int32_t invocationHitCount[NUM_ELEMENTS];
399 int32_t index;
400 };
401
setBuffer(void * ptr)402 virtual void setBuffer(void *ptr)
403 {
404 m_ptr = static_cast<BufferDataFloatingPoint<dataTypeT, VecSize> *>(ptr);
405 }
406
bufferSize()407 virtual size_t bufferSize()
408 {
409 return sizeof(BufferDataFloatingPoint<dataTypeT, VecSize>);
410 }
411
fillWithTestData(de::Random & rnd)412 virtual void fillWithTestData(de::Random &rnd)
413 {
414 dataTypeT pattern;
415 deMemset(&pattern, 0xcd, sizeof(dataTypeT));
416
417 for (uint32_t i = 0; i < (NUM_ELEMENTS / 2) * VecSize; i++)
418 {
419 m_ptr->inout[i] = deToFloatType<dataTypeT>(rnd.getFloat());
420 }
421 for (uint32_t i = 0; i < NUM_ELEMENTS * VecSize; i++)
422 {
423 m_ptr->input[i] = deToFloatType<dataTypeT>(rnd.getFloat());
424 m_ptr->output[i] = pattern;
425 // These aren't used by any of the float tests
426 m_ptr->compare[i] = deToFloatType<dataTypeT>(0.0);
427 }
428 for (int i = 0; i < NUM_ELEMENTS; i++)
429 {
430 m_ptr->invocationHitCount[i] = 0;
431 }
432 // Add special cases for NaN and +/-0
433 // 0: min(sNaN, x)
434 m_ptr->inout[0] = deSignalingNaN<dataTypeT>();
435 // 1: min(x, sNaN)
436 m_ptr->input[1 * 2 + 0] = deSignalingNaN<dataTypeT>();
437 // 2: min(qNaN, x)
438 m_ptr->inout[2] = deQuietNaN<dataTypeT>();
439 // 3: min(x, qNaN)
440 m_ptr->input[3 * 2 + 0] = deQuietNaN<dataTypeT>();
441 // 4: min(NaN, NaN)
442 m_ptr->inout[4] = deSignalingNaN<dataTypeT>();
443 m_ptr->input[4 * 2 + 0] = deQuietNaN<dataTypeT>();
444 m_ptr->input[4 * 2 + 1] = deQuietNaN<dataTypeT>();
445 // 5: min(+0, -0)
446 m_ptr->inout[5] = deToFloatType<dataTypeT>(-0.0);
447 m_ptr->input[5 * 2 + 0] = deToFloatType<dataTypeT>(0.0);
448 m_ptr->input[5 * 2 + 1] = deToFloatType<dataTypeT>(0.0);
449
450 m_ptr->index = 0;
451
452 // Take a copy to be used when calculating expected values.
453 m_original = *m_ptr;
454 }
455
checkResults(tcu::ResultCollector & resultCollector)456 virtual void checkResults(tcu::ResultCollector &resultCollector)
457 {
458 checkOperationFloatingPoint(m_original, *m_ptr, resultCollector);
459 }
460
461 template <typename T>
462 struct Expected
463 {
464 T m_inout;
465 T m_output[2];
466
Expectedvkt::shaderexecutor::__anonc9d8e1400111::TestBufferFloatingPoint::Expected467 Expected(T inout, T output0, T output1) : m_inout(inout)
468 {
469 m_output[0] = output0;
470 m_output[1] = output1;
471 }
472
comparevkt::shaderexecutor::__anonc9d8e1400111::TestBufferFloatingPoint::Expected473 bool compare(T inout, T output0, T output1)
474 {
475 return nanSafeSloppyEquals(m_inout, inout) && nanSafeSloppyEquals(m_output[0], output0) &&
476 nanSafeSloppyEquals(m_output[1], output1);
477 }
478 };
479
480 void checkOperationFloatingPoint(const BufferDataFloatingPoint<dataTypeT, VecSize> &original,
481 const BufferDataFloatingPoint<dataTypeT, VecSize> &result,
482 tcu::ResultCollector &resultCollector);
483
484 const AtomicOperation m_atomicOp;
485
486 BufferDataFloatingPoint<dataTypeT, VecSize> *m_ptr;
487 BufferDataFloatingPoint<dataTypeT, VecSize> m_original;
488 };
489
createTestBuffer(DataType type,AtomicOperation atomicOp)490 static BufferInterface *createTestBuffer(DataType type, AtomicOperation atomicOp)
491 {
492 switch (type)
493 {
494 case DATA_TYPE_FLOAT16:
495 return new TestBufferFloatingPoint<deFloat16>(atomicOp);
496 case DATA_TYPE_FLOAT16X2:
497 return new TestBufferFloatingPoint<deFloat16, 2>(atomicOp);
498 case DATA_TYPE_FLOAT16X4:
499 return new TestBufferFloatingPoint<deFloat16, 4>(atomicOp);
500 case DATA_TYPE_INT32:
501 return new TestBuffer<int32_t>(atomicOp);
502 case DATA_TYPE_UINT32:
503 return new TestBuffer<uint32_t>(atomicOp);
504 case DATA_TYPE_FLOAT32:
505 return new TestBufferFloatingPoint<float>(atomicOp);
506 case DATA_TYPE_INT64:
507 return new TestBuffer<int64_t>(atomicOp);
508 case DATA_TYPE_UINT64:
509 return new TestBuffer<uint64_t>(atomicOp);
510 case DATA_TYPE_FLOAT64:
511 return new TestBufferFloatingPoint<double>(atomicOp);
512 default:
513 DE_ASSERT(false);
514 return nullptr;
515 }
516 }
517
518 // Use template to handle both signed and unsigned cases. SPIR-V should
519 // have separate operations for both.
520 template <typename T>
checkOperation(const BufferData<T> & original,const BufferData<T> & result,tcu::ResultCollector & resultCollector)521 void TestBuffer<T>::checkOperation(const BufferData<T> &original, const BufferData<T> &result,
522 tcu::ResultCollector &resultCollector)
523 {
524 // originalInout = original inout
525 // input0 = input at index i
526 // iinput1 = input at index i + NUM_ELEMENTS / 2
527 //
528 // atomic operation will return the memory contents before
529 // the operation and this is stored as output. Two operations
530 // are executed for each InOut value (using input0 and input1).
531 //
532 // Since there is an overlap of two operations per each
533 // InOut element, the outcome of the resulting InOut and
534 // the outputs of the operations have two result candidates
535 // depending on the execution order. Verification passes
536 // if the results match one of these options.
537
538 for (int elementNdx = 0; elementNdx < NUM_ELEMENTS / 2; elementNdx++)
539 {
540 // Needed when reinterpeting the data as signed values.
541 const T originalInout = *reinterpret_cast<const T *>(&original.inout[elementNdx]);
542 const T input0 = *reinterpret_cast<const T *>(&original.input[elementNdx]);
543 const T input1 = *reinterpret_cast<const T *>(&original.input[elementNdx + NUM_ELEMENTS / 2]);
544
545 // Expected results are collected to this vector.
546 vector<Expected<T>> exp;
547
548 switch (m_atomicOp)
549 {
550 case ATOMIC_OP_ADD:
551 {
552 exp.push_back(Expected<T>(originalInout + input0 + input1, originalInout, originalInout + input0));
553 exp.push_back(Expected<T>(originalInout + input0 + input1, originalInout + input1, originalInout));
554 }
555 break;
556
557 case ATOMIC_OP_AND:
558 {
559 exp.push_back(Expected<T>(originalInout & input0 & input1, originalInout, originalInout & input0));
560 exp.push_back(Expected<T>(originalInout & input0 & input1, originalInout & input1, originalInout));
561 }
562 break;
563
564 case ATOMIC_OP_OR:
565 {
566 exp.push_back(Expected<T>(originalInout | input0 | input1, originalInout, originalInout | input0));
567 exp.push_back(Expected<T>(originalInout | input0 | input1, originalInout | input1, originalInout));
568 }
569 break;
570
571 case ATOMIC_OP_XOR:
572 {
573 exp.push_back(Expected<T>(originalInout ^ input0 ^ input1, originalInout, originalInout ^ input0));
574 exp.push_back(Expected<T>(originalInout ^ input0 ^ input1, originalInout ^ input1, originalInout));
575 }
576 break;
577
578 case ATOMIC_OP_MIN:
579 {
580 exp.push_back(Expected<T>(de::min(de::min(originalInout, input0), input1), originalInout,
581 de::min(originalInout, input0)));
582 exp.push_back(Expected<T>(de::min(de::min(originalInout, input0), input1), de::min(originalInout, input1),
583 originalInout));
584 }
585 break;
586
587 case ATOMIC_OP_MAX:
588 {
589 exp.push_back(Expected<T>(de::max(de::max(originalInout, input0), input1), originalInout,
590 de::max(originalInout, input0)));
591 exp.push_back(Expected<T>(de::max(de::max(originalInout, input0), input1), de::max(originalInout, input1),
592 originalInout));
593 }
594 break;
595
596 case ATOMIC_OP_EXCHANGE:
597 {
598 exp.push_back(Expected<T>(input1, originalInout, input0));
599 exp.push_back(Expected<T>(input0, input1, originalInout));
600 }
601 break;
602
603 case ATOMIC_OP_COMP_SWAP:
604 {
605 if (elementNdx % 2 == 0)
606 {
607 exp.push_back(Expected<T>(input0, originalInout, input0));
608 exp.push_back(Expected<T>(input0, originalInout, originalInout));
609 }
610 else
611 {
612 exp.push_back(Expected<T>(input1, input1, originalInout));
613 exp.push_back(Expected<T>(input1, originalInout, originalInout));
614 }
615 }
616 break;
617
618 default:
619 DE_FATAL("Unexpected atomic operation.");
620 break;
621 }
622
623 const T resIo = result.inout[elementNdx];
624 const T resOutput0 = result.output[elementNdx];
625 const T resOutput1 = result.output[elementNdx + NUM_ELEMENTS / 2];
626
627 if (!exp[0].compare(resIo, resOutput0, resOutput1) && !exp[1].compare(resIo, resOutput0, resOutput1))
628 {
629 std::ostringstream errorMessage;
630 errorMessage << "ERROR: Result value check failed at index " << elementNdx
631 << ". Expected one of the two outcomes: InOut = " << tcu::toHex(exp[0].m_inout)
632 << ", Output0 = " << tcu::toHex(exp[0].m_output[0])
633 << ", Output1 = " << tcu::toHex(exp[0].m_output[1])
634 << ", or InOut = " << tcu::toHex(exp[1].m_inout)
635 << ", Output0 = " << tcu::toHex(exp[1].m_output[0])
636 << ", Output1 = " << tcu::toHex(exp[1].m_output[1]) << ". Got: InOut = " << tcu::toHex(resIo)
637 << ", Output0 = " << tcu::toHex(resOutput0) << ", Output1 = " << tcu::toHex(resOutput1)
638 << ". Using Input0 = " << tcu::toHex(original.input[elementNdx])
639 << " and Input1 = " << tcu::toHex(original.input[elementNdx + NUM_ELEMENTS / 2]) << ".";
640
641 resultCollector.fail(errorMessage.str());
642 }
643 }
644 }
645
646 template <typename T>
handleExceptionalFloatMinMaxValues(vector<T> & values,T x,T y)647 void handleExceptionalFloatMinMaxValues(vector<T> &values, T x, T y)
648 {
649
650 if (deIsSignalingNaN(x) && deIsSignalingNaN(y))
651 {
652 values.push_back(deQuietNaN<T>());
653 values.push_back(deSignalingNaN<T>());
654 }
655 else if (deIsSignalingNaN(x))
656 {
657 values.push_back(deQuietNaN<T>());
658 values.push_back(deSignalingNaN<T>());
659 if (!deIsIEEENaN(y))
660 values.push_back(y);
661 }
662 else if (deIsSignalingNaN(y))
663 {
664 values.push_back(deQuietNaN<T>());
665 values.push_back(deSignalingNaN<T>());
666 if (!deIsIEEENaN(x))
667 values.push_back(x);
668 }
669 else if (deIsIEEENaN(x) && deIsIEEENaN(y))
670 {
671 // Both quiet NaNs
672 values.push_back(deQuietNaN<T>());
673 }
674 else if (deIsIEEENaN(x))
675 {
676 // One quiet NaN and one non-NaN.
677 values.push_back(y);
678 }
679 else if (deIsIEEENaN(y))
680 {
681 // One quiet NaN and one non-NaN.
682 values.push_back(x);
683 }
684 else if ((deIsPositiveZero(x) && deIsNegativeZero(y)) || (deIsNegativeZero(x) && deIsPositiveZero(y)))
685 {
686 values.push_back(deToFloatType<T>(0.0));
687 values.push_back(deToFloatType<T>(-0.0));
688 }
689 }
690
691 template <typename T>
floatAdd(T x,T y)692 T floatAdd(T x, T y)
693 {
694 if (deIsIEEENaN(x) || deIsIEEENaN(y))
695 return deQuietNaN<T>();
696 return deToFloatType<T>(deToDouble(x) + deToDouble(y));
697 }
698
699 template <typename T>
floatMinValues(T x,T y)700 vector<T> floatMinValues(T x, T y)
701 {
702 vector<T> values;
703 handleExceptionalFloatMinMaxValues(values, x, y);
704 if (values.empty())
705 {
706 values.push_back(deToDouble(x) < deToDouble(y) ? x : y);
707 }
708 return values;
709 }
710
711 template <typename T>
floatMaxValues(T x,T y)712 vector<T> floatMaxValues(T x, T y)
713 {
714 vector<T> values;
715 handleExceptionalFloatMinMaxValues(values, x, y);
716 if (values.empty())
717 {
718 values.push_back(deToDouble(x) > deToDouble(y) ? x : y);
719 }
720 return values;
721 }
722
723 // Use template to handle both float and double cases. SPIR-V should
724 // have separate operations for both.
725 template <typename T, uint32_t VecSize>
checkOperationFloatingPoint(const BufferDataFloatingPoint<T,VecSize> & original,const BufferDataFloatingPoint<T,VecSize> & result,tcu::ResultCollector & resultCollector)726 void TestBufferFloatingPoint<T, VecSize>::checkOperationFloatingPoint(
727 const BufferDataFloatingPoint<T, VecSize> &original, const BufferDataFloatingPoint<T, VecSize> &result,
728 tcu::ResultCollector &resultCollector)
729 {
730 // originalInout = original inout
731 // input0 = input at index i
732 // iinput1 = input at index i + NUM_ELEMENTS / 2
733 //
734 // atomic operation will return the memory contents before
735 // the operation and this is stored as output. Two operations
736 // are executed for each InOut value (using input0 and input1).
737 //
738 // Since there is an overlap of two operations per each
739 // InOut element, the outcome of the resulting InOut and
740 // the outputs of the operations have two result candidates
741 // depending on the execution order. Verification passes
742 // if the results match one of these options.
743
744 for (int elementNdx = 0; elementNdx < NUM_ELEMENTS / 2; elementNdx++)
745 {
746 for (uint32_t vecIdx = 0; vecIdx < VecSize; ++vecIdx)
747 {
748 // Needed when reinterpeting the data as signed values.
749 const T originalInout = *reinterpret_cast<const T *>(&original.inout[elementNdx * VecSize + vecIdx]);
750 const T input0 = *reinterpret_cast<const T *>(&original.input[elementNdx * VecSize + vecIdx]);
751 const T input1 =
752 *reinterpret_cast<const T *>(&original.input[(elementNdx + NUM_ELEMENTS / 2) * VecSize + vecIdx]);
753
754 // Expected results are collected to this vector.
755 vector<Expected<T>> exp;
756
757 switch (m_atomicOp)
758 {
759 case ATOMIC_OP_ADD:
760 {
761 exp.push_back(Expected<T>(floatAdd(floatAdd(originalInout, input0), input1), originalInout,
762 floatAdd(originalInout, input0)));
763 exp.push_back(Expected<T>(floatAdd(floatAdd(originalInout, input1), input0),
764 floatAdd(originalInout, input1), originalInout));
765 }
766 break;
767
768 case ATOMIC_OP_MIN:
769 {
770 // The case where input0 is combined first
771 vector<T> minOriginalAndInput0 = floatMinValues(originalInout, input0);
772 for (T x : minOriginalAndInput0)
773 {
774 vector<T> minAll = floatMinValues(x, input1);
775 for (T y : minAll)
776 {
777 exp.push_back(Expected<T>(y, originalInout, x));
778 }
779 }
780
781 // The case where input1 is combined first
782 vector<T> minOriginalAndInput1 = floatMinValues(originalInout, input1);
783 for (T x : minOriginalAndInput1)
784 {
785 vector<T> minAll = floatMinValues(x, input0);
786 for (T y : minAll)
787 {
788 exp.push_back(Expected<T>(y, x, originalInout));
789 }
790 }
791 }
792 break;
793
794 case ATOMIC_OP_MAX:
795 {
796 // The case where input0 is combined first
797 vector<T> minOriginalAndInput0 = floatMaxValues(originalInout, input0);
798 for (T x : minOriginalAndInput0)
799 {
800 vector<T> minAll = floatMaxValues(x, input1);
801 for (T y : minAll)
802 {
803 exp.push_back(Expected<T>(y, originalInout, x));
804 }
805 }
806
807 // The case where input1 is combined first
808 vector<T> minOriginalAndInput1 = floatMaxValues(originalInout, input1);
809 for (T x : minOriginalAndInput1)
810 {
811 vector<T> minAll = floatMaxValues(x, input0);
812 for (T y : minAll)
813 {
814 exp.push_back(Expected<T>(y, x, originalInout));
815 }
816 }
817 }
818 break;
819
820 case ATOMIC_OP_EXCHANGE:
821 {
822 exp.push_back(Expected<T>(input1, originalInout, input0));
823 exp.push_back(Expected<T>(input0, input1, originalInout));
824 }
825 break;
826
827 default:
828 DE_FATAL("Unexpected atomic operation.");
829 break;
830 }
831
832 const T resIo = result.inout[elementNdx * VecSize + vecIdx];
833 const T resOutput0 = result.output[elementNdx * VecSize + vecIdx];
834 const T resOutput1 = result.output[(elementNdx + NUM_ELEMENTS / 2) * VecSize + vecIdx];
835
836 bool hasMatch = false;
837 for (Expected<T> e : exp)
838 {
839 if (e.compare(resIo, resOutput0, resOutput1))
840 {
841 hasMatch = true;
842 break;
843 }
844 }
845 if (!hasMatch)
846 {
847 std::ostringstream errorMessage;
848 errorMessage << "ERROR: Result value check failed at index (" << elementNdx << ", " << vecIdx << ")"
849 << ". Expected one of the outcomes:";
850
851 bool first = true;
852 for (Expected<T> e : exp)
853 {
854 if (!first)
855 errorMessage << ", or";
856 first = false;
857
858 errorMessage << " InOut = " << e.m_inout << ", Output0 = " << e.m_output[0]
859 << ", Output1 = " << e.m_output[1];
860 }
861
862 errorMessage << ". Got: InOut = " << resIo << ", Output0 = " << resOutput0
863 << ", Output1 = " << resOutput1
864 << ". Using Input0 = " << original.input[elementNdx * VecSize + vecIdx]
865 << " and Input1 = " << original.input[(elementNdx + NUM_ELEMENTS / 2) * VecSize + vecIdx]
866 << ".";
867
868 resultCollector.fail(errorMessage.str());
869 }
870 }
871 }
872 }
873
874 class AtomicOperationCaseInstance : public TestInstance
875 {
876 public:
877 AtomicOperationCaseInstance(Context &context, const ShaderSpec &shaderSpec, AtomicShaderType shaderType,
878 DataType dataType, AtomicOperation atomicOp);
879
880 virtual tcu::TestStatus iterate(void);
881
882 private:
883 const ShaderSpec &m_shaderSpec;
884 AtomicShaderType m_shaderType;
885 const DataType m_dataType;
886 AtomicOperation m_atomicOp;
887 };
888
AtomicOperationCaseInstance(Context & context,const ShaderSpec & shaderSpec,AtomicShaderType shaderType,DataType dataType,AtomicOperation atomicOp)889 AtomicOperationCaseInstance::AtomicOperationCaseInstance(Context &context, const ShaderSpec &shaderSpec,
890 AtomicShaderType shaderType, DataType dataType,
891 AtomicOperation atomicOp)
892 : TestInstance(context)
893 , m_shaderSpec(shaderSpec)
894 , m_shaderType(shaderType)
895 , m_dataType(dataType)
896 , m_atomicOp(atomicOp)
897 {
898 }
899
iterate(void)900 tcu::TestStatus AtomicOperationCaseInstance::iterate(void)
901 {
902 de::UniquePtr<BufferInterface> testBuffer(createTestBuffer(m_dataType, m_atomicOp));
903 tcu::TestLog &log = m_context.getTestContext().getLog();
904 const DeviceInterface &vkd = m_context.getDeviceInterface();
905 const VkDevice device = m_context.getDevice();
906 de::Random rnd(0x62a15e34);
907 const bool useRef = (m_shaderType.getMemoryType() == AtomicMemoryType::REFERENCE);
908 const VkDescriptorType descType = (useRef ? VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER : VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
909 const VkBufferUsageFlags usageFlags =
910 (VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
911 (useRef ? static_cast<VkBufferUsageFlags>(VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT) : 0u));
912
913 // The main buffer will hold test data. When using buffer references, the buffer's address will be indirectly passed as part of
914 // a uniform buffer. If not, it will be passed directly as a descriptor.
915 Buffer buffer(m_context, usageFlags, testBuffer->bufferSize(), useRef);
916 std::unique_ptr<Buffer> auxBuffer;
917
918 if (useRef)
919 {
920 // Pass the main buffer address inside a uniform buffer.
921 const VkBufferDeviceAddressInfo addressInfo = {
922 VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO, // VkStructureType sType;
923 nullptr, // const void* pNext;
924 buffer.getBuffer(), // VkBuffer buffer;
925 };
926 const auto address = vkd.getBufferDeviceAddress(device, &addressInfo);
927
928 auxBuffer.reset(new Buffer(m_context, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, sizeof(address), false));
929 deMemcpy(auxBuffer->getHostPtr(), &address, sizeof(address));
930 auxBuffer->flush();
931 }
932
933 testBuffer->setBuffer(buffer.getHostPtr());
934 testBuffer->fillWithTestData(rnd);
935
936 buffer.flush();
937
938 Move<VkDescriptorSetLayout> extraResourcesLayout;
939 Move<VkDescriptorPool> extraResourcesSetPool;
940 Move<VkDescriptorSet> extraResourcesSet;
941
942 const VkDescriptorSetLayoutBinding bindings[] = {{0u, descType, 1, VK_SHADER_STAGE_ALL, nullptr}};
943
944 const VkDescriptorSetLayoutCreateInfo layoutInfo = {VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, nullptr,
945 (VkDescriptorSetLayoutCreateFlags)0u,
946 DE_LENGTH_OF_ARRAY(bindings), bindings};
947
948 extraResourcesLayout = createDescriptorSetLayout(vkd, device, &layoutInfo);
949
950 const VkDescriptorPoolSize poolSizes[] = {{descType, 1u}};
951
952 const VkDescriptorPoolCreateInfo poolInfo = {
953 VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
954 nullptr,
955 (VkDescriptorPoolCreateFlags)VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT,
956 1u, // maxSets
957 DE_LENGTH_OF_ARRAY(poolSizes),
958 poolSizes};
959
960 extraResourcesSetPool = createDescriptorPool(vkd, device, &poolInfo);
961
962 const VkDescriptorSetAllocateInfo allocInfo = {VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, nullptr,
963 *extraResourcesSetPool, 1u, &extraResourcesLayout.get()};
964
965 extraResourcesSet = allocateDescriptorSet(vkd, device, &allocInfo);
966
967 VkDescriptorBufferInfo bufferInfo;
968 bufferInfo.buffer = (useRef ? auxBuffer->getBuffer() : buffer.getBuffer());
969 bufferInfo.offset = 0u;
970 bufferInfo.range = VK_WHOLE_SIZE;
971
972 const VkWriteDescriptorSet descriptorWrite = {VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
973 nullptr,
974 *extraResourcesSet,
975 0u, // dstBinding
976 0u, // dstArrayElement
977 1u,
978 descType,
979 nullptr,
980 &bufferInfo,
981 nullptr};
982
983 vkd.updateDescriptorSets(device, 1u, &descriptorWrite, 0u, nullptr);
984
985 // Storage for output varying data.
986 std::vector<uint32_t> outputs(NUM_ELEMENTS);
987 std::vector<void *> outputPtr(NUM_ELEMENTS);
988
989 for (size_t i = 0; i < NUM_ELEMENTS; i++)
990 {
991 outputs[i] = 0xcdcdcdcd;
992 outputPtr[i] = &outputs[i];
993 }
994
995 const int numWorkGroups = (m_shaderType.isSharedLike() ? 1 : static_cast<int>(NUM_ELEMENTS));
996 UniquePtr<ShaderExecutor> executor(
997 createExecutor(m_context, m_shaderType.getType(), m_shaderSpec, *extraResourcesLayout));
998
999 executor->execute(numWorkGroups, nullptr, &outputPtr[0], *extraResourcesSet);
1000 buffer.invalidate();
1001
1002 tcu::ResultCollector resultCollector(log);
1003
1004 // Check the results of the atomic operation
1005 testBuffer->checkResults(resultCollector);
1006
1007 return tcu::TestStatus(resultCollector.getResult(), resultCollector.getMessage());
1008 }
1009
1010 class AtomicOperationCase : public TestCase
1011 {
1012 public:
1013 AtomicOperationCase(tcu::TestContext &testCtx, const char *name, AtomicShaderType type, DataType dataType,
1014 AtomicOperation atomicOp);
1015 virtual ~AtomicOperationCase(void);
1016
1017 virtual TestInstance *createInstance(Context &ctx) const;
1018 virtual void checkSupport(Context &ctx) const;
initPrograms(vk::SourceCollections & programCollection) const1019 virtual void initPrograms(vk::SourceCollections &programCollection) const
1020 {
1021 const bool useSpv14 = m_shaderType.isMeshShadingStage();
1022 const auto spvVersion = (useSpv14 ? vk::SPIRV_VERSION_1_4 : vk::SPIRV_VERSION_1_0);
1023 const ShaderBuildOptions buildOptions(programCollection.usedVulkanVersion, spvVersion, 0u, useSpv14);
1024 ShaderSpec sourcesSpec(m_shaderSpec);
1025
1026 sourcesSpec.buildOptions = buildOptions;
1027 generateSources(m_shaderType.getType(), sourcesSpec, programCollection);
1028 }
1029
1030 private:
1031 void createShaderSpec();
1032 ShaderSpec m_shaderSpec;
1033 const AtomicShaderType m_shaderType;
1034 const DataType m_dataType;
1035 const AtomicOperation m_atomicOp;
1036 };
1037
AtomicOperationCase(tcu::TestContext & testCtx,const char * name,AtomicShaderType shaderType,DataType dataType,AtomicOperation atomicOp)1038 AtomicOperationCase::AtomicOperationCase(tcu::TestContext &testCtx, const char *name, AtomicShaderType shaderType,
1039 DataType dataType, AtomicOperation atomicOp)
1040 : TestCase(testCtx, name)
1041 , m_shaderType(shaderType)
1042 , m_dataType(dataType)
1043 , m_atomicOp(atomicOp)
1044 {
1045 createShaderSpec();
1046 init();
1047 }
1048
~AtomicOperationCase(void)1049 AtomicOperationCase::~AtomicOperationCase(void)
1050 {
1051 }
1052
createInstance(Context & ctx) const1053 TestInstance *AtomicOperationCase::createInstance(Context &ctx) const
1054 {
1055 return new AtomicOperationCaseInstance(ctx, m_shaderSpec, m_shaderType, m_dataType, m_atomicOp);
1056 }
1057
checkSupport(Context & ctx) const1058 void AtomicOperationCase::checkSupport(Context &ctx) const
1059 {
1060 if ((m_dataType == DATA_TYPE_INT64) || (m_dataType == DATA_TYPE_UINT64))
1061 {
1062 ctx.requireDeviceFunctionality("VK_KHR_shader_atomic_int64");
1063
1064 const auto atomicInt64Features = ctx.getShaderAtomicInt64Features();
1065 const bool isSharedMemory = m_shaderType.isSharedLike();
1066
1067 if (!isSharedMemory && atomicInt64Features.shaderBufferInt64Atomics == VK_FALSE)
1068 {
1069 TCU_THROW(NotSupportedError,
1070 "VkShaderAtomicInt64: 64-bit integer atomic operations not supported for buffers");
1071 }
1072 if (isSharedMemory && atomicInt64Features.shaderSharedInt64Atomics == VK_FALSE)
1073 {
1074 TCU_THROW(NotSupportedError,
1075 "VkShaderAtomicInt64: 64-bit integer atomic operations not supported for shared memory");
1076 }
1077 }
1078
1079 if (m_dataType == DATA_TYPE_FLOAT16)
1080 {
1081 ctx.requireDeviceFunctionality("VK_EXT_shader_atomic_float2");
1082 #ifndef CTS_USES_VULKANSC
1083 if (m_atomicOp == ATOMIC_OP_ADD)
1084 {
1085 if (m_shaderType.isSharedLike())
1086 {
1087 if (!ctx.getShaderAtomicFloat2FeaturesEXT().shaderSharedFloat16AtomicAdd)
1088 {
1089 TCU_THROW(NotSupportedError,
1090 "VkShaderAtomicFloat16: 16-bit floating point shared add atomic operation not supported");
1091 }
1092 }
1093 else
1094 {
1095 if (!ctx.getShaderAtomicFloat2FeaturesEXT().shaderBufferFloat16AtomicAdd)
1096 {
1097 TCU_THROW(NotSupportedError,
1098 "VkShaderAtomicFloat16: 16-bit floating point buffer add atomic operation not supported");
1099 }
1100 }
1101 }
1102 if (m_atomicOp == ATOMIC_OP_MIN || m_atomicOp == ATOMIC_OP_MAX)
1103 {
1104 if (m_shaderType.isSharedLike())
1105 {
1106 if (!ctx.getShaderAtomicFloat2FeaturesEXT().shaderSharedFloat16AtomicMinMax)
1107 {
1108 TCU_THROW(
1109 NotSupportedError,
1110 "VkShaderAtomicFloat16: 16-bit floating point shared min/max atomic operation not supported");
1111 }
1112 }
1113 else
1114 {
1115 if (!ctx.getShaderAtomicFloat2FeaturesEXT().shaderBufferFloat16AtomicMinMax)
1116 {
1117 TCU_THROW(
1118 NotSupportedError,
1119 "VkShaderAtomicFloat16: 16-bit floating point buffer min/max atomic operation not supported");
1120 }
1121 }
1122 }
1123 if (m_atomicOp == ATOMIC_OP_EXCHANGE)
1124 {
1125 if (m_shaderType.isSharedLike())
1126 {
1127 if (!ctx.getShaderAtomicFloat2FeaturesEXT().shaderSharedFloat16Atomics)
1128 {
1129 TCU_THROW(NotSupportedError,
1130 "VkShaderAtomicFloat16: 16-bit floating point shared atomic operations not supported");
1131 }
1132 }
1133 else
1134 {
1135 if (!ctx.getShaderAtomicFloat2FeaturesEXT().shaderBufferFloat16Atomics)
1136 {
1137 TCU_THROW(NotSupportedError,
1138 "VkShaderAtomicFloat16: 16-bit floating point buffer atomic operations not supported");
1139 }
1140 }
1141 }
1142 #endif // CTS_USES_VULKANSC
1143 }
1144
1145 #ifndef CTS_USES_VULKANSC
1146 if (m_dataType == DATA_TYPE_FLOAT16X2 || m_dataType == DATA_TYPE_FLOAT16X4)
1147 {
1148 ctx.requireDeviceFunctionality("VK_NV_shader_atomic_float16_vector");
1149 if (!ctx.getShaderAtomicFloat16VectorFeaturesNV().shaderFloat16VectorAtomics)
1150 {
1151 TCU_THROW(NotSupportedError, "16-bit floating point vector atomic operations not supported");
1152 }
1153 }
1154 #endif // CTS_USES_VULKANSC
1155
1156 if (m_dataType == DATA_TYPE_FLOAT32)
1157 {
1158 ctx.requireDeviceFunctionality("VK_EXT_shader_atomic_float");
1159 if (m_atomicOp == ATOMIC_OP_ADD)
1160 {
1161 if (m_shaderType.isSharedLike())
1162 {
1163 if (!ctx.getShaderAtomicFloatFeaturesEXT().shaderSharedFloat32AtomicAdd)
1164 {
1165 TCU_THROW(NotSupportedError,
1166 "VkShaderAtomicFloat32: 32-bit floating point shared add atomic operation not supported");
1167 }
1168 }
1169 else
1170 {
1171 if (!ctx.getShaderAtomicFloatFeaturesEXT().shaderBufferFloat32AtomicAdd)
1172 {
1173 TCU_THROW(NotSupportedError,
1174 "VkShaderAtomicFloat32: 32-bit floating point buffer add atomic operation not supported");
1175 }
1176 }
1177 }
1178 if (m_atomicOp == ATOMIC_OP_MIN || m_atomicOp == ATOMIC_OP_MAX)
1179 {
1180 ctx.requireDeviceFunctionality("VK_EXT_shader_atomic_float2");
1181 #ifndef CTS_USES_VULKANSC
1182 if (m_shaderType.isSharedLike())
1183 {
1184 if (!ctx.getShaderAtomicFloat2FeaturesEXT().shaderSharedFloat32AtomicMinMax)
1185 {
1186 TCU_THROW(
1187 NotSupportedError,
1188 "VkShaderAtomicFloat32: 32-bit floating point shared min/max atomic operation not supported");
1189 }
1190 }
1191 else
1192 {
1193 if (!ctx.getShaderAtomicFloat2FeaturesEXT().shaderBufferFloat32AtomicMinMax)
1194 {
1195 TCU_THROW(
1196 NotSupportedError,
1197 "VkShaderAtomicFloat32: 32-bit floating point buffer min/max atomic operation not supported");
1198 }
1199 }
1200 #endif // CTS_USES_VULKANSC
1201 }
1202 if (m_atomicOp == ATOMIC_OP_EXCHANGE)
1203 {
1204 if (m_shaderType.isSharedLike())
1205 {
1206 if (!ctx.getShaderAtomicFloatFeaturesEXT().shaderSharedFloat32Atomics)
1207 {
1208 TCU_THROW(NotSupportedError,
1209 "VkShaderAtomicFloat32: 32-bit floating point shared atomic operations not supported");
1210 }
1211 }
1212 else
1213 {
1214 if (!ctx.getShaderAtomicFloatFeaturesEXT().shaderBufferFloat32Atomics)
1215 {
1216 TCU_THROW(NotSupportedError,
1217 "VkShaderAtomicFloat32: 32-bit floating point buffer atomic operations not supported");
1218 }
1219 }
1220 }
1221 }
1222
1223 if (m_dataType == DATA_TYPE_FLOAT64)
1224 {
1225 ctx.requireDeviceFunctionality("VK_EXT_shader_atomic_float");
1226 if (m_atomicOp == ATOMIC_OP_ADD)
1227 {
1228 if (m_shaderType.isSharedLike())
1229 {
1230 if (!ctx.getShaderAtomicFloatFeaturesEXT().shaderSharedFloat64AtomicAdd)
1231 {
1232 TCU_THROW(NotSupportedError,
1233 "VkShaderAtomicFloat64: 64-bit floating point shared add atomic operation not supported");
1234 }
1235 }
1236 else
1237 {
1238 if (!ctx.getShaderAtomicFloatFeaturesEXT().shaderBufferFloat64AtomicAdd)
1239 {
1240 TCU_THROW(NotSupportedError,
1241 "VkShaderAtomicFloat64: 64-bit floating point buffer add atomic operation not supported");
1242 }
1243 }
1244 }
1245 if (m_atomicOp == ATOMIC_OP_MIN || m_atomicOp == ATOMIC_OP_MAX)
1246 {
1247 ctx.requireDeviceFunctionality("VK_EXT_shader_atomic_float2");
1248 #ifndef CTS_USES_VULKANSC
1249 if (m_shaderType.isSharedLike())
1250 {
1251 if (!ctx.getShaderAtomicFloat2FeaturesEXT().shaderSharedFloat64AtomicMinMax)
1252 {
1253 TCU_THROW(
1254 NotSupportedError,
1255 "VkShaderAtomicFloat64: 64-bit floating point shared min/max atomic operation not supported");
1256 }
1257 }
1258 else
1259 {
1260 if (!ctx.getShaderAtomicFloat2FeaturesEXT().shaderBufferFloat64AtomicMinMax)
1261 {
1262 TCU_THROW(
1263 NotSupportedError,
1264 "VkShaderAtomicFloat64: 64-bit floating point buffer min/max atomic operation not supported");
1265 }
1266 }
1267 #endif // CTS_USES_VULKANSC
1268 }
1269 if (m_atomicOp == ATOMIC_OP_EXCHANGE)
1270 {
1271 if (m_shaderType.isSharedLike())
1272 {
1273 if (!ctx.getShaderAtomicFloatFeaturesEXT().shaderSharedFloat64Atomics)
1274 {
1275 TCU_THROW(NotSupportedError,
1276 "VkShaderAtomicFloat64: 64-bit floating point shared atomic operations not supported");
1277 }
1278 }
1279 else
1280 {
1281 if (!ctx.getShaderAtomicFloatFeaturesEXT().shaderBufferFloat64Atomics)
1282 {
1283 TCU_THROW(NotSupportedError,
1284 "VkShaderAtomicFloat64: 64-bit floating point buffer atomic operations not supported");
1285 }
1286 }
1287 }
1288 }
1289
1290 if (m_shaderType.getMemoryType() == AtomicMemoryType::REFERENCE)
1291 {
1292 ctx.requireDeviceFunctionality("VK_KHR_buffer_device_address");
1293 }
1294
1295 checkSupportShader(ctx, m_shaderType.getType());
1296 }
1297
createShaderSpec(void)1298 void AtomicOperationCase::createShaderSpec(void)
1299 {
1300 const AtomicMemoryType memoryType = m_shaderType.getMemoryType();
1301 const bool isSharedLike = m_shaderType.isSharedLike();
1302
1303 // Global declarations.
1304 std::ostringstream shaderTemplateGlobalStream;
1305
1306 // Structure in use for atomic operations.
1307 shaderTemplateGlobalStream << "${EXTENSIONS}\n"
1308 << "\n"
1309 << "struct AtomicStruct\n"
1310 << "{\n"
1311 << " ${DATATYPE} inoutValues[${N}/2];\n"
1312 << " ${DATATYPE} inputValues[${N}];\n"
1313 << " ${DATATYPE} compareValues[${N}];\n"
1314 << " ${DATATYPE} outputValues[${N}];\n"
1315 << " int invocationHitCount[${N}];\n"
1316 << " int index;\n"
1317 << "};\n"
1318 << "\n";
1319
1320 // The name dance and declarations below will make sure the structure that will be used with atomic operations can be accessed
1321 // as "buf.data", which is the name used in the atomic operation statements.
1322 //
1323 // * When using a buffer directly, RESULT_BUFFER_NAME will be "buf" and the inner struct will be "data".
1324 // * When using a workgroup-shared global variable, the "data" struct will be nested in an auxiliar "buf" struct.
1325 // * When using buffer references, the uniform buffer reference will be called "buf" and its contents "data".
1326 //
1327 if (memoryType != AtomicMemoryType::REFERENCE)
1328 {
1329 shaderTemplateGlobalStream << "layout (set = ${SETIDX}, binding = 0) buffer AtomicBuffer {\n"
1330 << " AtomicStruct data;\n"
1331 << "} ${RESULT_BUFFER_NAME};\n"
1332 << "\n";
1333
1334 // When using global shared memory in the compute, task or mesh variants, invocations will use a shared global structure
1335 // instead of a descriptor set as the sources and results of each tested operation.
1336 if (memoryType == AtomicMemoryType::SHARED)
1337 {
1338 shaderTemplateGlobalStream << "shared struct { AtomicStruct data; } buf;\n"
1339 << "\n";
1340 }
1341 else if (memoryType == AtomicMemoryType::PAYLOAD)
1342 {
1343 shaderTemplateGlobalStream << "struct TaskData { AtomicStruct data; };\n"
1344 << "taskPayloadSharedEXT TaskData buf;\n";
1345 }
1346 }
1347 else
1348 {
1349 shaderTemplateGlobalStream << "layout (buffer_reference) buffer AtomicBuffer {\n"
1350 << " AtomicStruct data;\n"
1351 << "};\n"
1352 << "\n"
1353 << "layout (set = ${SETIDX}, binding = 0) uniform References {\n"
1354 << " AtomicBuffer buf;\n"
1355 << "};\n"
1356 << "\n";
1357 }
1358
1359 const auto shaderTemplateGlobalString = shaderTemplateGlobalStream.str();
1360 const tcu::StringTemplate shaderTemplateGlobal(shaderTemplateGlobalString);
1361
1362 // Shader body for the non-vertex case.
1363 std::ostringstream nonVertexShaderTemplateStream;
1364
1365 if (isSharedLike)
1366 {
1367 // Invocation zero will initialize the shared structure from the descriptor set.
1368 nonVertexShaderTemplateStream << "if (gl_LocalInvocationIndex == 0u)\n"
1369 << "{\n"
1370 << " buf.data = ${RESULT_BUFFER_NAME}.data;\n"
1371 << "}\n"
1372 << "barrier();\n";
1373 }
1374
1375 if (m_shaderType.getType() == glu::SHADERTYPE_FRAGMENT)
1376 {
1377 nonVertexShaderTemplateStream << "if (!gl_HelperInvocation) {\n"
1378 << " int idx = atomicAdd(buf.data.index, 1);\n"
1379 << " buf.data.outputValues[idx] = ${ATOMICOP}(buf.data.inoutValues[idx % "
1380 "(${N}/2)], ${COMPARE_ARG}buf.data.inputValues[idx]);\n"
1381 << "}\n";
1382 }
1383 else
1384 {
1385 nonVertexShaderTemplateStream << "if (atomicAdd(buf.data.invocationHitCount[0], 1) < ${N})\n"
1386 << "{\n"
1387 << " int idx = atomicAdd(buf.data.index, 1);\n"
1388 << " buf.data.outputValues[idx] = ${ATOMICOP}(buf.data.inoutValues[idx % "
1389 "(${N}/2)], ${COMPARE_ARG}buf.data.inputValues[idx]);\n"
1390 << "}\n";
1391 }
1392
1393 if (isSharedLike)
1394 {
1395 // Invocation zero will copy results back to the descriptor set.
1396 nonVertexShaderTemplateStream << "barrier();\n"
1397 << "if (gl_LocalInvocationIndex == 0u)\n"
1398 << "{\n"
1399 << " ${RESULT_BUFFER_NAME}.data = buf.data;\n"
1400 << "}\n";
1401 }
1402
1403 const auto nonVertexShaderTemplateStreamStr = nonVertexShaderTemplateStream.str();
1404 const tcu::StringTemplate nonVertexShaderTemplateSrc(nonVertexShaderTemplateStreamStr);
1405
1406 // Shader body for the vertex case.
1407 const tcu::StringTemplate vertexShaderTemplateSrc(
1408 "int idx = gl_VertexIndex;\n"
1409 "if (atomicAdd(buf.data.invocationHitCount[idx], 1) == 0)\n"
1410 "{\n"
1411 " buf.data.outputValues[idx] = ${ATOMICOP}(buf.data.inoutValues[idx % (${N}/2)], "
1412 "${COMPARE_ARG}buf.data.inputValues[idx]);\n"
1413 "}\n");
1414
1415 // Extensions.
1416 std::ostringstream extensions;
1417
1418 if ((m_dataType == DATA_TYPE_INT64) || (m_dataType == DATA_TYPE_UINT64))
1419 {
1420 extensions << "#extension GL_EXT_shader_explicit_arithmetic_types_int64 : enable\n"
1421 << "#extension GL_EXT_shader_atomic_int64 : enable\n";
1422 }
1423 else if ((m_dataType == DATA_TYPE_FLOAT16) || (m_dataType == DATA_TYPE_FLOAT16X2) ||
1424 (m_dataType == DATA_TYPE_FLOAT16X4) || (m_dataType == DATA_TYPE_FLOAT32) ||
1425 (m_dataType == DATA_TYPE_FLOAT64))
1426 {
1427 extensions << "#extension GL_EXT_shader_explicit_arithmetic_types_float16 : enable\n"
1428 << "#extension GL_EXT_shader_atomic_float : enable\n"
1429 << "#extension GL_EXT_shader_atomic_float2 : enable\n"
1430 << "#extension GL_KHR_memory_scope_semantics : enable\n";
1431 if (m_dataType == DATA_TYPE_FLOAT16X2 || m_dataType == DATA_TYPE_FLOAT16X4)
1432 {
1433 extensions << "#extension GL_NV_shader_atomic_fp16_vector : require\n";
1434 }
1435 }
1436
1437 if (memoryType == AtomicMemoryType::REFERENCE)
1438 {
1439 extensions << "#extension GL_EXT_buffer_reference : require\n";
1440 }
1441
1442 // Specializations.
1443 std::map<std::string, std::string> specializations;
1444
1445 specializations["EXTENSIONS"] = extensions.str();
1446 specializations["DATATYPE"] = dataType2Str(m_dataType);
1447 specializations["ATOMICOP"] = atomicOp2Str(m_atomicOp);
1448 specializations["SETIDX"] = de::toString((int)EXTRA_RESOURCES_DESCRIPTOR_SET_INDEX);
1449 specializations["N"] = de::toString((int)NUM_ELEMENTS);
1450 specializations["COMPARE_ARG"] = ((m_atomicOp == ATOMIC_OP_COMP_SWAP) ? "buf.data.compareValues[idx], " : "");
1451 specializations["RESULT_BUFFER_NAME"] = (isSharedLike ? "result" : "buf");
1452
1453 // Shader spec.
1454 m_shaderSpec.outputs.push_back(Symbol("outData", glu::VarType(glu::TYPE_UINT, glu::PRECISION_HIGHP)));
1455 m_shaderSpec.glslVersion = glu::GLSL_VERSION_450;
1456 m_shaderSpec.globalDeclarations = shaderTemplateGlobal.specialize(specializations);
1457 m_shaderSpec.source =
1458 ((m_shaderType.getType() == glu::SHADERTYPE_VERTEX) ? vertexShaderTemplateSrc.specialize(specializations) :
1459 nonVertexShaderTemplateSrc.specialize(specializations));
1460
1461 if (isSharedLike)
1462 {
1463 // When using global shared memory, use a single workgroup and an appropriate number of local invocations.
1464 m_shaderSpec.localSizeX = static_cast<int>(NUM_ELEMENTS);
1465 }
1466 }
1467
addAtomicOperationTests(tcu::TestCaseGroup * atomicOperationTestsGroup)1468 void addAtomicOperationTests(tcu::TestCaseGroup *atomicOperationTestsGroup)
1469 {
1470 tcu::TestContext &testCtx = atomicOperationTestsGroup->getTestContext();
1471
1472 static const struct
1473 {
1474 glu::ShaderType type;
1475 const char *name;
1476 } shaderTypes[] = {
1477 {glu::SHADERTYPE_VERTEX, "vertex"},
1478 {glu::SHADERTYPE_FRAGMENT, "fragment"},
1479 {glu::SHADERTYPE_GEOMETRY, "geometry"},
1480 {glu::SHADERTYPE_TESSELLATION_CONTROL, "tess_ctrl"},
1481 {glu::SHADERTYPE_TESSELLATION_EVALUATION, "tess_eval"},
1482 {glu::SHADERTYPE_COMPUTE, "compute"},
1483 {glu::SHADERTYPE_TASK, "task"},
1484 {glu::SHADERTYPE_MESH, "mesh"},
1485 };
1486
1487 static const struct
1488 {
1489 AtomicMemoryType type;
1490 const char *suffix;
1491 } kMemoryTypes[] = {
1492 {AtomicMemoryType::BUFFER, ""},
1493 {AtomicMemoryType::SHARED, "_shared"},
1494 {AtomicMemoryType::REFERENCE, "_reference"},
1495 {AtomicMemoryType::PAYLOAD, "_payload"},
1496 };
1497
1498 static const struct
1499 {
1500 DataType dataType;
1501 const char *name;
1502 } dataSign[] = {
1503 #ifndef CTS_USES_VULKANSC
1504 // Tests using 16-bit float data
1505 {DATA_TYPE_FLOAT16, "float16"},
1506 // Tests using f16vec2 data
1507 {DATA_TYPE_FLOAT16X2, "f16vec2"},
1508 // Tests using f16vec4 data
1509 {DATA_TYPE_FLOAT16X4, "f16vec4"},
1510 #endif // CTS_USES_VULKANSC
1511 // Tests using signed data (int)
1512 {DATA_TYPE_INT32, "signed"},
1513 // Tests using unsigned data (uint)
1514 {DATA_TYPE_UINT32, "unsigned"},
1515 // Tests using 32-bit float data
1516 {DATA_TYPE_FLOAT32, "float32"},
1517 // Tests using 64 bit signed data (int64)
1518 {DATA_TYPE_INT64, "signed64bit"},
1519 // Tests using 64 bit unsigned data (uint64)
1520 {DATA_TYPE_UINT64, "unsigned64bit"},
1521 // Tests using 64-bit float data)
1522 {DATA_TYPE_FLOAT64, "float64"}};
1523
1524 static const struct
1525 {
1526 AtomicOperation value;
1527 const char *name;
1528 } atomicOp[] = {{ATOMIC_OP_EXCHANGE, "exchange"},
1529 {ATOMIC_OP_COMP_SWAP, "comp_swap"},
1530 {ATOMIC_OP_ADD, "add"},
1531 {ATOMIC_OP_MIN, "min"},
1532 {ATOMIC_OP_MAX, "max"},
1533 {ATOMIC_OP_AND, "and"},
1534 {ATOMIC_OP_OR, "or"},
1535 {ATOMIC_OP_XOR, "xor"}};
1536
1537 for (int opNdx = 0; opNdx < DE_LENGTH_OF_ARRAY(atomicOp); opNdx++)
1538 {
1539 for (int signNdx = 0; signNdx < DE_LENGTH_OF_ARRAY(dataSign); signNdx++)
1540 {
1541 for (int shaderTypeNdx = 0; shaderTypeNdx < DE_LENGTH_OF_ARRAY(shaderTypes); shaderTypeNdx++)
1542 {
1543 // Only ADD and EXCHANGE are supported on floating-point
1544 if (dataSign[signNdx].dataType == DATA_TYPE_FLOAT16 ||
1545 dataSign[signNdx].dataType == DATA_TYPE_FLOAT16X2 ||
1546 dataSign[signNdx].dataType == DATA_TYPE_FLOAT16X4 ||
1547 dataSign[signNdx].dataType == DATA_TYPE_FLOAT32 || dataSign[signNdx].dataType == DATA_TYPE_FLOAT64)
1548 {
1549 if (atomicOp[opNdx].value != ATOMIC_OP_ADD &&
1550 #ifndef CTS_USES_VULKANSC
1551 atomicOp[opNdx].value != ATOMIC_OP_MIN && atomicOp[opNdx].value != ATOMIC_OP_MAX &&
1552 #endif // CTS_USES_VULKANSC
1553 atomicOp[opNdx].value != ATOMIC_OP_EXCHANGE)
1554 {
1555 continue;
1556 }
1557 }
1558
1559 for (int memoryTypeNdx = 0; memoryTypeNdx < DE_LENGTH_OF_ARRAY(kMemoryTypes); ++memoryTypeNdx)
1560 {
1561 // Shared memory only available in compute, task and mesh shaders.
1562 if (kMemoryTypes[memoryTypeNdx].type == AtomicMemoryType::SHARED &&
1563 shaderTypes[shaderTypeNdx].type != glu::SHADERTYPE_COMPUTE &&
1564 shaderTypes[shaderTypeNdx].type != glu::SHADERTYPE_TASK &&
1565 shaderTypes[shaderTypeNdx].type != glu::SHADERTYPE_MESH)
1566 continue;
1567
1568 // Payload memory is only available for atomics in task shaders (in mesh shaders it's read-only)
1569 if (kMemoryTypes[memoryTypeNdx].type == AtomicMemoryType::PAYLOAD &&
1570 shaderTypes[shaderTypeNdx].type != glu::SHADERTYPE_TASK)
1571 continue;
1572
1573 const std::string name =
1574 std::string(atomicOp[opNdx].name) + "_" + std::string(dataSign[signNdx].name) + "_" +
1575 std::string(shaderTypes[shaderTypeNdx].name) + kMemoryTypes[memoryTypeNdx].suffix;
1576
1577 atomicOperationTestsGroup->addChild(new AtomicOperationCase(
1578 testCtx, name.c_str(),
1579 AtomicShaderType(shaderTypes[shaderTypeNdx].type, kMemoryTypes[memoryTypeNdx].type),
1580 dataSign[signNdx].dataType, atomicOp[opNdx].value));
1581 }
1582 }
1583 }
1584 }
1585 }
1586
1587 } // namespace
1588
createAtomicOperationTests(tcu::TestContext & testCtx)1589 tcu::TestCaseGroup *createAtomicOperationTests(tcu::TestContext &testCtx)
1590 {
1591 return createTestGroup(testCtx, "atomic_operations", addAtomicOperationTests);
1592 }
1593
1594 } // namespace shaderexecutor
1595 } // namespace vkt
1596