1 /*------------------------------------------------------------------------
2 * Vulkan Conformance Tests
3 * ------------------------
4 *
5 * Copyright (c) 2015 The Khronos Group Inc.
6 * Copyright (c) 2017 Google Inc.
7 *
8 * Licensed under the Apache License, Version 2.0 (the "License");
9 * you may not use this file except in compliance with the License.
10 * You may obtain a copy of the License at
11 *
12 * http://www.apache.org/licenses/LICENSE-2.0
13 *
14 * Unless required by applicable law or agreed to in writing, software
15 * distributed under the License is distributed on an "AS IS" BASIS,
16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 * See the License for the specific language governing permissions and
18 * limitations under the License.
19 *
20 *//*!
21 * \file
22 * \brief Atomic operations (OpAtomic*) tests.
23 *//*--------------------------------------------------------------------*/
24
25 #include "vktAtomicOperationTests.hpp"
26 #include "vktShaderExecutor.hpp"
27
28 #include "vkRefUtil.hpp"
29 #include "vkMemUtil.hpp"
30 #include "vkQueryUtil.hpp"
31 #include "vkObjUtil.hpp"
32 #include "vkBarrierUtil.hpp"
33 #include "vkCmdUtil.hpp"
34 #include "vktTestGroupUtil.hpp"
35
36 #include "tcuTestLog.hpp"
37 #include "tcuStringTemplate.hpp"
38 #include "tcuResultCollector.hpp"
39
40 #include "deFloat16.h"
41 #include "deMath.hpp"
42 #include "deStringUtil.hpp"
43 #include "deSharedPtr.hpp"
44 #include "deRandom.hpp"
45 #include "deArrayUtil.hpp"
46
47 #include <string>
48 #include <memory>
49 #include <cmath>
50
51 namespace vkt
52 {
53 namespace shaderexecutor
54 {
55
56 namespace
57 {
58
59 using de::UniquePtr;
60 using de::MovePtr;
61 using std::vector;
62
63 using namespace vk;
64
65 enum class AtomicMemoryType
66 {
67 BUFFER = 0, // Normal buffer.
68 SHARED, // Shared global struct in a compute workgroup.
69 REFERENCE, // Buffer passed as a reference.
70 PAYLOAD, // Task payload.
71 };
72
73 // Helper struct to indicate the shader type and if it should use shared global memory.
74 class AtomicShaderType
75 {
76 public:
AtomicShaderType(glu::ShaderType type,AtomicMemoryType memoryType)77 AtomicShaderType (glu::ShaderType type, AtomicMemoryType memoryType)
78 : m_type (type)
79 , m_atomicMemoryType (memoryType)
80 {
81 // Shared global memory can only be set to true with compute, task and mesh shaders.
82 DE_ASSERT(memoryType != AtomicMemoryType::SHARED
83 || type == glu::SHADERTYPE_COMPUTE
84 || type == glu::SHADERTYPE_TASK
85 || type == glu::SHADERTYPE_MESH);
86
87 // Task payload memory can only be tested in task shaders.
88 DE_ASSERT(memoryType != AtomicMemoryType::PAYLOAD || type == glu::SHADERTYPE_TASK);
89 }
90
getType(void) const91 glu::ShaderType getType (void) const { return m_type; }
getMemoryType(void) const92 AtomicMemoryType getMemoryType (void) const { return m_atomicMemoryType; }
isSharedLike(void) const93 bool isSharedLike (void) const { return m_atomicMemoryType == AtomicMemoryType::SHARED || m_atomicMemoryType == AtomicMemoryType::PAYLOAD; }
isMeshShadingStage(void) const94 bool isMeshShadingStage (void) const { return (m_type == glu::SHADERTYPE_TASK || m_type == glu::SHADERTYPE_MESH); }
95
96 private:
97 glu::ShaderType m_type;
98 AtomicMemoryType m_atomicMemoryType;
99 };
100
101 // Buffer helper
102 class Buffer
103 {
104 public:
105 Buffer (Context& context, VkBufferUsageFlags usage, size_t size, bool useRef);
106
getBuffer(void) const107 VkBuffer getBuffer (void) const { return *m_buffer; }
getHostPtr(void) const108 void* getHostPtr (void) const { return m_allocation->getHostPtr(); }
109 void flush (void);
110 void invalidate (void);
111
112 private:
113 const DeviceInterface& m_vkd;
114 const VkDevice m_device;
115 const VkQueue m_queue;
116 const deUint32 m_queueIndex;
117 const Unique<VkBuffer> m_buffer;
118 const UniquePtr<Allocation> m_allocation;
119 };
120
121 typedef de::SharedPtr<Buffer> BufferSp;
122
createBuffer(const DeviceInterface & vkd,VkDevice device,VkDeviceSize size,VkBufferUsageFlags usageFlags)123 Move<VkBuffer> createBuffer (const DeviceInterface& vkd, VkDevice device, VkDeviceSize size, VkBufferUsageFlags usageFlags)
124 {
125 const VkBufferCreateInfo createInfo =
126 {
127 VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
128 DE_NULL,
129 (VkBufferCreateFlags)0,
130 size,
131 usageFlags,
132 VK_SHARING_MODE_EXCLUSIVE,
133 0u,
134 DE_NULL
135 };
136 return createBuffer(vkd, device, &createInfo);
137 }
138
allocateAndBindMemory(const DeviceInterface & vkd,VkDevice device,Allocator & allocator,VkBuffer buffer,bool useRef)139 MovePtr<Allocation> allocateAndBindMemory (const DeviceInterface& vkd, VkDevice device, Allocator& allocator, VkBuffer buffer, bool useRef)
140 {
141 const MemoryRequirement allocationType = (MemoryRequirement::HostVisible | (useRef ? MemoryRequirement::DeviceAddress : MemoryRequirement::Any));
142 MovePtr<Allocation> alloc(allocator.allocate(getBufferMemoryRequirements(vkd, device, buffer), allocationType));
143
144 VK_CHECK(vkd.bindBufferMemory(device, buffer, alloc->getMemory(), alloc->getOffset()));
145
146 return alloc;
147 }
148
Buffer(Context & context,VkBufferUsageFlags usage,size_t size,bool useRef)149 Buffer::Buffer (Context& context, VkBufferUsageFlags usage, size_t size, bool useRef)
150 : m_vkd (context.getDeviceInterface())
151 , m_device (context.getDevice())
152 , m_queue (context.getUniversalQueue())
153 , m_queueIndex (context.getUniversalQueueFamilyIndex())
154 , m_buffer (createBuffer (context.getDeviceInterface(),
155 context.getDevice(),
156 (VkDeviceSize)size,
157 usage))
158 , m_allocation (allocateAndBindMemory (context.getDeviceInterface(),
159 context.getDevice(),
160 context.getDefaultAllocator(),
161 *m_buffer,
162 useRef))
163 {
164 }
165
flush(void)166 void Buffer::flush (void)
167 {
168 flushMappedMemoryRange(m_vkd, m_device, m_allocation->getMemory(), m_allocation->getOffset(), VK_WHOLE_SIZE);
169 }
170
invalidate(void)171 void Buffer::invalidate (void)
172 {
173 const auto cmdPool = vk::makeCommandPool(m_vkd, m_device, m_queueIndex);
174 const auto cmdBufferPtr = vk::allocateCommandBuffer(m_vkd, m_device, cmdPool.get(), VK_COMMAND_BUFFER_LEVEL_PRIMARY);
175 const auto cmdBuffer = cmdBufferPtr.get();
176 const auto bufferBarrier = vk::makeBufferMemoryBarrier(VK_ACCESS_MEMORY_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, m_buffer.get(), 0ull, VK_WHOLE_SIZE);
177
178 beginCommandBuffer(m_vkd, cmdBuffer);
179 m_vkd.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_HOST_BIT, 0u, 0u, nullptr, 1u, &bufferBarrier, 0u, nullptr);
180 endCommandBuffer(m_vkd, cmdBuffer);
181 submitCommandsAndWait(m_vkd, m_device, m_queue, cmdBuffer);
182
183 invalidateMappedMemoryRange(m_vkd, m_device, m_allocation->getMemory(), m_allocation->getOffset(), VK_WHOLE_SIZE);
184 }
185
186 // Tests
187
188 enum AtomicOperation
189 {
190 ATOMIC_OP_EXCHANGE = 0,
191 ATOMIC_OP_COMP_SWAP,
192 ATOMIC_OP_ADD,
193 ATOMIC_OP_MIN,
194 ATOMIC_OP_MAX,
195 ATOMIC_OP_AND,
196 ATOMIC_OP_OR,
197 ATOMIC_OP_XOR,
198
199 ATOMIC_OP_LAST
200 };
201
atomicOp2Str(AtomicOperation op)202 std::string atomicOp2Str (AtomicOperation op)
203 {
204 static const char* const s_names[] =
205 {
206 "atomicExchange",
207 "atomicCompSwap",
208 "atomicAdd",
209 "atomicMin",
210 "atomicMax",
211 "atomicAnd",
212 "atomicOr",
213 "atomicXor"
214 };
215 return de::getSizedArrayElement<ATOMIC_OP_LAST>(s_names, op);
216 }
217
218 enum
219 {
220 NUM_ELEMENTS = 32
221 };
222
223 enum DataType
224 {
225 DATA_TYPE_FLOAT16 = 0,
226 DATA_TYPE_INT32,
227 DATA_TYPE_UINT32,
228 DATA_TYPE_FLOAT32,
229 DATA_TYPE_INT64,
230 DATA_TYPE_UINT64,
231 DATA_TYPE_FLOAT64,
232
233 DATA_TYPE_LAST
234 };
235
dataType2Str(DataType type)236 std::string dataType2Str(DataType type)
237 {
238 static const char* const s_names[] =
239 {
240 "float16_t",
241 "int",
242 "uint",
243 "float",
244 "int64_t",
245 "uint64_t",
246 "double",
247 };
248 return de::getSizedArrayElement<DATA_TYPE_LAST>(s_names, type);
249 }
250
251 class BufferInterface
252 {
253 public:
254 virtual void setBuffer(void* ptr) = 0;
255
256 virtual size_t bufferSize() = 0;
257
258 virtual void fillWithTestData(de::Random &rnd) = 0;
259
260 virtual void checkResults(tcu::ResultCollector& resultCollector) = 0;
261
~BufferInterface()262 virtual ~BufferInterface() {}
263 };
264
265 template<typename dataTypeT>
266 class TestBuffer : public BufferInterface
267 {
268 public:
269
TestBuffer(AtomicOperation atomicOp)270 TestBuffer(AtomicOperation atomicOp)
271 : m_atomicOp(atomicOp)
272 {}
273
274 template<typename T>
275 struct BufferData
276 {
277 // Use half the number of elements for inout to cause overlap between atomic operations.
278 // Each inout element at index i will have two atomic operations using input from
279 // indices i and i + NUM_ELEMENTS / 2.
280 T inout[NUM_ELEMENTS / 2];
281 T input[NUM_ELEMENTS];
282 T compare[NUM_ELEMENTS];
283 T output[NUM_ELEMENTS];
284 T invocationHitCount[NUM_ELEMENTS];
285 deInt32 index;
286 };
287
setBuffer(void * ptr)288 virtual void setBuffer(void* ptr)
289 {
290 m_ptr = static_cast<BufferData<dataTypeT>*>(ptr);
291 }
292
bufferSize()293 virtual size_t bufferSize()
294 {
295 return sizeof(BufferData<dataTypeT>);
296 }
297
fillWithTestData(de::Random & rnd)298 virtual void fillWithTestData(de::Random &rnd)
299 {
300 dataTypeT pattern;
301 deMemset(&pattern, 0xcd, sizeof(dataTypeT));
302
303 for (int i = 0; i < NUM_ELEMENTS / 2; i++)
304 {
305 m_ptr->inout[i] = static_cast<dataTypeT>(rnd.getUint64());
306 // The first half of compare elements match with every even index.
307 // The second half matches with odd indices. This causes the
308 // overlapping operations to only select one.
309 m_ptr->compare[i] = m_ptr->inout[i] + (i % 2);
310 m_ptr->compare[i + NUM_ELEMENTS / 2] = m_ptr->inout[i] + 1 - (i % 2);
311 }
312 for (int i = 0; i < NUM_ELEMENTS; i++)
313 {
314 m_ptr->input[i] = static_cast<dataTypeT>(rnd.getUint64());
315 m_ptr->output[i] = pattern;
316 m_ptr->invocationHitCount[i] = 0;
317 }
318 m_ptr->index = 0;
319
320 // Take a copy to be used when calculating expected values.
321 m_original = *m_ptr;
322 }
323
checkResults(tcu::ResultCollector & resultCollector)324 virtual void checkResults(tcu::ResultCollector& resultCollector)
325 {
326 checkOperation(m_original, *m_ptr, resultCollector);
327 }
328
329 template<typename T>
330 struct Expected
331 {
332 T m_inout;
333 T m_output[2];
334
Expectedvkt::shaderexecutor::__anon6cb584e80111::TestBuffer::Expected335 Expected (T inout, T output0, T output1)
336 : m_inout(inout)
337 {
338 m_output[0] = output0;
339 m_output[1] = output1;
340 }
341
comparevkt::shaderexecutor::__anon6cb584e80111::TestBuffer::Expected342 bool compare (T inout, T output0, T output1)
343 {
344 return (deMemCmp((const void*)&m_inout, (const void*)&inout, sizeof(inout)) == 0
345 && deMemCmp((const void*)&m_output[0], (const void*)&output0, sizeof(output0)) == 0
346 && deMemCmp((const void*)&m_output[1], (const void*)&output1, sizeof(output1)) == 0);
347 }
348 };
349
350 void checkOperation (const BufferData<dataTypeT>& original,
351 const BufferData<dataTypeT>& result,
352 tcu::ResultCollector& resultCollector);
353
354 const AtomicOperation m_atomicOp;
355
356 BufferData<dataTypeT>* m_ptr;
357 BufferData<dataTypeT> m_original;
358
359 };
360
361 template<typename T>
nanSafeSloppyEquals(T x,T y)362 bool nanSafeSloppyEquals(T x, T y)
363 {
364 if (deIsIEEENaN(x) && deIsIEEENaN(y))
365 return true;
366
367 if (deIsIEEENaN(x) || deIsIEEENaN(y))
368 return false;
369
370 return fabs(deToDouble(x) - deToDouble(y)) < 0.00001;
371 }
372
373 template<typename dataTypeT>
374 class TestBufferFloatingPoint : public BufferInterface
375 {
376 public:
377
TestBufferFloatingPoint(AtomicOperation atomicOp)378 TestBufferFloatingPoint(AtomicOperation atomicOp)
379 : m_atomicOp(atomicOp)
380 {}
381
382 template<typename T>
383 struct BufferDataFloatingPoint
384 {
385 // Use half the number of elements for inout to cause overlap between atomic operations.
386 // Each inout element at index i will have two atomic operations using input from
387 // indices i and i + NUM_ELEMENTS / 2.
388 T inout[NUM_ELEMENTS / 2];
389 T input[NUM_ELEMENTS];
390 T compare[NUM_ELEMENTS];
391 T output[NUM_ELEMENTS];
392 deInt32 invocationHitCount[NUM_ELEMENTS];
393 deInt32 index;
394 };
395
setBuffer(void * ptr)396 virtual void setBuffer(void* ptr)
397 {
398 m_ptr = static_cast<BufferDataFloatingPoint<dataTypeT>*>(ptr);
399 }
400
bufferSize()401 virtual size_t bufferSize()
402 {
403 return sizeof(BufferDataFloatingPoint<dataTypeT>);
404 }
405
fillWithTestData(de::Random & rnd)406 virtual void fillWithTestData(de::Random& rnd)
407 {
408 dataTypeT pattern;
409 deMemset(&pattern, 0xcd, sizeof(dataTypeT));
410
411 for (int i = 0; i < NUM_ELEMENTS / 2; i++)
412 {
413 m_ptr->inout[i] = deToFloatType<dataTypeT>(rnd.getFloat());
414 // These aren't used by any of the float tests
415 m_ptr->compare[i] = deToFloatType<dataTypeT>(0.0);
416 }
417 // Add special cases for NaN and +/-0
418 // 0: min(sNaN, x)
419 m_ptr->inout[0] = deSignalingNaN<dataTypeT>();
420 // 1: min(x, sNaN)
421 m_ptr->input[1 * 2 + 0] = deSignalingNaN<dataTypeT>();
422 // 2: min(qNaN, x)
423 m_ptr->inout[2] = deQuietNaN<dataTypeT>();
424 // 3: min(x, qNaN)
425 m_ptr->input[3 * 2 + 0] = deQuietNaN<dataTypeT>();
426 // 4: min(NaN, NaN)
427 m_ptr->inout[4] = deSignalingNaN<dataTypeT>();
428 m_ptr->input[4 * 2 + 0] = deQuietNaN<dataTypeT>();
429 m_ptr->input[4 * 2 + 1] = deQuietNaN<dataTypeT>();
430 // 5: min(+0, -0)
431 m_ptr->inout[5] = deToFloatType<dataTypeT>(-0.0);
432 m_ptr->input[5 * 2 + 0] = deToFloatType<dataTypeT>(0.0);
433 m_ptr->input[5 * 2 + 1] = deToFloatType<dataTypeT>(0.0);
434
435 for (int i = 0; i < NUM_ELEMENTS; i++)
436 {
437 m_ptr->input[i] = deToFloatType<dataTypeT>(rnd.getFloat());
438 m_ptr->output[i] = pattern;
439 m_ptr->invocationHitCount[i] = 0;
440 }
441
442 m_ptr->index = 0;
443
444 // Take a copy to be used when calculating expected values.
445 m_original = *m_ptr;
446 }
447
checkResults(tcu::ResultCollector & resultCollector)448 virtual void checkResults(tcu::ResultCollector& resultCollector)
449 {
450 checkOperationFloatingPoint(m_original, *m_ptr, resultCollector);
451 }
452
453 template<typename T>
454 struct Expected
455 {
456 T m_inout;
457 T m_output[2];
458
Expectedvkt::shaderexecutor::__anon6cb584e80111::TestBufferFloatingPoint::Expected459 Expected(T inout, T output0, T output1)
460 : m_inout(inout)
461 {
462 m_output[0] = output0;
463 m_output[1] = output1;
464 }
465
comparevkt::shaderexecutor::__anon6cb584e80111::TestBufferFloatingPoint::Expected466 bool compare(T inout, T output0, T output1)
467 {
468 return nanSafeSloppyEquals(m_inout, inout) &&
469 nanSafeSloppyEquals(m_output[0], output0) &&
470 nanSafeSloppyEquals(m_output[1], output1);
471 }
472 };
473
474 void checkOperationFloatingPoint(const BufferDataFloatingPoint<dataTypeT>& original,
475 const BufferDataFloatingPoint<dataTypeT>& result,
476 tcu::ResultCollector& resultCollector);
477
478 const AtomicOperation m_atomicOp;
479
480 BufferDataFloatingPoint<dataTypeT>* m_ptr;
481 BufferDataFloatingPoint<dataTypeT> m_original;
482
483 };
484
createTestBuffer(DataType type,AtomicOperation atomicOp)485 static BufferInterface* createTestBuffer(DataType type, AtomicOperation atomicOp)
486 {
487 switch (type)
488 {
489 case DATA_TYPE_FLOAT16:
490 return new TestBufferFloatingPoint<deFloat16>(atomicOp);
491 case DATA_TYPE_INT32:
492 return new TestBuffer<deInt32>(atomicOp);
493 case DATA_TYPE_UINT32:
494 return new TestBuffer<deUint32>(atomicOp);
495 case DATA_TYPE_FLOAT32:
496 return new TestBufferFloatingPoint<float>(atomicOp);
497 case DATA_TYPE_INT64:
498 return new TestBuffer<deInt64>(atomicOp);
499 case DATA_TYPE_UINT64:
500 return new TestBuffer<deUint64>(atomicOp);
501 case DATA_TYPE_FLOAT64:
502 return new TestBufferFloatingPoint<double>(atomicOp);
503 default:
504 DE_ASSERT(false);
505 return DE_NULL;
506 }
507 }
508
509 // Use template to handle both signed and unsigned cases. SPIR-V should
510 // have separate operations for both.
511 template<typename T>
checkOperation(const BufferData<T> & original,const BufferData<T> & result,tcu::ResultCollector & resultCollector)512 void TestBuffer<T>::checkOperation (const BufferData<T>& original,
513 const BufferData<T>& result,
514 tcu::ResultCollector& resultCollector)
515 {
516 // originalInout = original inout
517 // input0 = input at index i
518 // iinput1 = input at index i + NUM_ELEMENTS / 2
519 //
520 // atomic operation will return the memory contents before
521 // the operation and this is stored as output. Two operations
522 // are executed for each InOut value (using input0 and input1).
523 //
524 // Since there is an overlap of two operations per each
525 // InOut element, the outcome of the resulting InOut and
526 // the outputs of the operations have two result candidates
527 // depending on the execution order. Verification passes
528 // if the results match one of these options.
529
530 for (int elementNdx = 0; elementNdx < NUM_ELEMENTS / 2; elementNdx++)
531 {
532 // Needed when reinterpeting the data as signed values.
533 const T originalInout = *reinterpret_cast<const T*>(&original.inout[elementNdx]);
534 const T input0 = *reinterpret_cast<const T*>(&original.input[elementNdx]);
535 const T input1 = *reinterpret_cast<const T*>(&original.input[elementNdx + NUM_ELEMENTS / 2]);
536
537 // Expected results are collected to this vector.
538 vector<Expected<T> > exp;
539
540 switch (m_atomicOp)
541 {
542 case ATOMIC_OP_ADD:
543 {
544 exp.push_back(Expected<T>(originalInout + input0 + input1, originalInout, originalInout + input0));
545 exp.push_back(Expected<T>(originalInout + input0 + input1, originalInout + input1, originalInout));
546 }
547 break;
548
549 case ATOMIC_OP_AND:
550 {
551 exp.push_back(Expected<T>(originalInout & input0 & input1, originalInout, originalInout & input0));
552 exp.push_back(Expected<T>(originalInout & input0 & input1, originalInout & input1, originalInout));
553 }
554 break;
555
556 case ATOMIC_OP_OR:
557 {
558 exp.push_back(Expected<T>(originalInout | input0 | input1, originalInout, originalInout | input0));
559 exp.push_back(Expected<T>(originalInout | input0 | input1, originalInout | input1, originalInout));
560 }
561 break;
562
563 case ATOMIC_OP_XOR:
564 {
565 exp.push_back(Expected<T>(originalInout ^ input0 ^ input1, originalInout, originalInout ^ input0));
566 exp.push_back(Expected<T>(originalInout ^ input0 ^ input1, originalInout ^ input1, originalInout));
567 }
568 break;
569
570 case ATOMIC_OP_MIN:
571 {
572 exp.push_back(Expected<T>(de::min(de::min(originalInout, input0), input1), originalInout, de::min(originalInout, input0)));
573 exp.push_back(Expected<T>(de::min(de::min(originalInout, input0), input1), de::min(originalInout, input1), originalInout));
574 }
575 break;
576
577 case ATOMIC_OP_MAX:
578 {
579 exp.push_back(Expected<T>(de::max(de::max(originalInout, input0), input1), originalInout, de::max(originalInout, input0)));
580 exp.push_back(Expected<T>(de::max(de::max(originalInout, input0), input1), de::max(originalInout, input1), originalInout));
581 }
582 break;
583
584 case ATOMIC_OP_EXCHANGE:
585 {
586 exp.push_back(Expected<T>(input1, originalInout, input0));
587 exp.push_back(Expected<T>(input0, input1, originalInout));
588 }
589 break;
590
591 case ATOMIC_OP_COMP_SWAP:
592 {
593 if (elementNdx % 2 == 0)
594 {
595 exp.push_back(Expected<T>(input0, originalInout, input0));
596 exp.push_back(Expected<T>(input0, originalInout, originalInout));
597 }
598 else
599 {
600 exp.push_back(Expected<T>(input1, input1, originalInout));
601 exp.push_back(Expected<T>(input1, originalInout, originalInout));
602 }
603 }
604 break;
605
606
607 default:
608 DE_FATAL("Unexpected atomic operation.");
609 break;
610 }
611
612 const T resIo = result.inout[elementNdx];
613 const T resOutput0 = result.output[elementNdx];
614 const T resOutput1 = result.output[elementNdx + NUM_ELEMENTS / 2];
615
616
617 if (!exp[0].compare(resIo, resOutput0, resOutput1) && !exp[1].compare(resIo, resOutput0, resOutput1))
618 {
619 std::ostringstream errorMessage;
620 errorMessage << "ERROR: Result value check failed at index " << elementNdx
621 << ". Expected one of the two outcomes: InOut = " << tcu::toHex(exp[0].m_inout)
622 << ", Output0 = " << tcu::toHex(exp[0].m_output[0]) << ", Output1 = "
623 << tcu::toHex(exp[0].m_output[1]) << ", or InOut = " << tcu::toHex(exp[1].m_inout)
624 << ", Output0 = " << tcu::toHex(exp[1].m_output[0]) << ", Output1 = "
625 << tcu::toHex(exp[1].m_output[1]) << ". Got: InOut = " << tcu::toHex(resIo)
626 << ", Output0 = " << tcu::toHex(resOutput0) << ", Output1 = "
627 << tcu::toHex(resOutput1) << ". Using Input0 = " << tcu::toHex(original.input[elementNdx])
628 << " and Input1 = " << tcu::toHex(original.input[elementNdx + NUM_ELEMENTS / 2]) << ".";
629
630 resultCollector.fail(errorMessage.str());
631 }
632 }
633 }
634
635 template<typename T>
handleExceptionalFloatMinMaxValues(vector<T> & values,T x,T y)636 void handleExceptionalFloatMinMaxValues(vector<T> &values, T x, T y)
637 {
638
639 if (deIsSignalingNaN(x) && deIsSignalingNaN(y))
640 {
641 values.push_back(deQuietNaN<T>());
642 values.push_back(deSignalingNaN<T>());
643 }
644 else if (deIsSignalingNaN(x))
645 {
646 values.push_back(deQuietNaN<T>());
647 values.push_back(deSignalingNaN<T>());
648 if (!deIsIEEENaN(y))
649 values.push_back(y);
650 }
651 else if (deIsSignalingNaN(y))
652 {
653 values.push_back(deQuietNaN<T>());
654 values.push_back(deSignalingNaN<T>());
655 if (!deIsIEEENaN(x))
656 values.push_back(x);
657 }
658 else if (deIsIEEENaN(x) && deIsIEEENaN(y))
659 {
660 // Both quiet NaNs
661 values.push_back(deQuietNaN<T>());
662 }
663 else if (deIsIEEENaN(x))
664 {
665 // One quiet NaN and one non-NaN.
666 values.push_back(y);
667 }
668 else if (deIsIEEENaN(y))
669 {
670 // One quiet NaN and one non-NaN.
671 values.push_back(x);
672 }
673 else if ((deIsPositiveZero(x) && deIsNegativeZero(y)) || (deIsNegativeZero(x) && deIsPositiveZero(y)))
674 {
675 values.push_back(deToFloatType<T>(0.0));
676 values.push_back(deToFloatType<T>(-0.0));
677 }
678 }
679
680 template<typename T>
floatAdd(T x,T y)681 T floatAdd(T x, T y)
682 {
683 if (deIsIEEENaN(x) || deIsIEEENaN(y))
684 return deQuietNaN<T>();
685 return deToFloatType<T>(deToDouble(x) + deToDouble(y));
686 }
687
688 template<typename T>
floatMinValues(T x,T y)689 vector<T> floatMinValues(T x, T y)
690 {
691 vector<T> values;
692 handleExceptionalFloatMinMaxValues(values, x, y);
693 if (values.empty())
694 {
695 values.push_back(deToDouble(x) < deToDouble(y) ? x : y);
696 }
697 return values;
698 }
699
700 template<typename T>
floatMaxValues(T x,T y)701 vector<T> floatMaxValues(T x, T y)
702 {
703 vector<T> values;
704 handleExceptionalFloatMinMaxValues(values, x, y);
705 if (values.empty())
706 {
707 values.push_back(deToDouble(x) > deToDouble(y) ? x : y);
708 }
709 return values;
710 }
711
712 // Use template to handle both float and double cases. SPIR-V should
713 // have separate operations for both.
714 template<typename T>
checkOperationFloatingPoint(const BufferDataFloatingPoint<T> & original,const BufferDataFloatingPoint<T> & result,tcu::ResultCollector & resultCollector)715 void TestBufferFloatingPoint<T>::checkOperationFloatingPoint(const BufferDataFloatingPoint<T>& original,
716 const BufferDataFloatingPoint<T>& result,
717 tcu::ResultCollector& resultCollector)
718 {
719 // originalInout = original inout
720 // input0 = input at index i
721 // iinput1 = input at index i + NUM_ELEMENTS / 2
722 //
723 // atomic operation will return the memory contents before
724 // the operation and this is stored as output. Two operations
725 // are executed for each InOut value (using input0 and input1).
726 //
727 // Since there is an overlap of two operations per each
728 // InOut element, the outcome of the resulting InOut and
729 // the outputs of the operations have two result candidates
730 // depending on the execution order. Verification passes
731 // if the results match one of these options.
732
733 for (int elementNdx = 0; elementNdx < NUM_ELEMENTS / 2; elementNdx++)
734 {
735 // Needed when reinterpeting the data as signed values.
736 const T originalInout = *reinterpret_cast<const T*>(&original.inout[elementNdx]);
737 const T input0 = *reinterpret_cast<const T*>(&original.input[elementNdx]);
738 const T input1 = *reinterpret_cast<const T*>(&original.input[elementNdx + NUM_ELEMENTS / 2]);
739
740 // Expected results are collected to this vector.
741 vector<Expected<T> > exp;
742
743 switch (m_atomicOp)
744 {
745 case ATOMIC_OP_ADD:
746 {
747 exp.push_back(Expected<T>(floatAdd(floatAdd(originalInout, input0), input1), originalInout, floatAdd(originalInout, input0)));
748 exp.push_back(Expected<T>(floatAdd(floatAdd(originalInout, input0), input1), floatAdd(originalInout, input1), originalInout));
749 }
750 break;
751
752 case ATOMIC_OP_MIN:
753 {
754 // The case where input0 is combined first
755 vector<T> minOriginalAndInput0 = floatMinValues(originalInout, input0);
756 for (T x : minOriginalAndInput0)
757 {
758 vector<T> minAll = floatMinValues(x, input1);
759 for (T y : minAll)
760 {
761 exp.push_back(Expected<T>(y, originalInout, x));
762 }
763 }
764
765 // The case where input1 is combined first
766 vector<T> minOriginalAndInput1 = floatMinValues(originalInout, input1);
767 for (T x : minOriginalAndInput1)
768 {
769 vector<T> minAll = floatMinValues(x, input0);
770 for (T y : minAll)
771 {
772 exp.push_back(Expected<T>(y, x, originalInout));
773 }
774 }
775 }
776 break;
777
778 case ATOMIC_OP_MAX:
779 {
780 // The case where input0 is combined first
781 vector<T> minOriginalAndInput0 = floatMaxValues(originalInout, input0);
782 for (T x : minOriginalAndInput0)
783 {
784 vector<T> minAll = floatMaxValues(x, input1);
785 for (T y : minAll)
786 {
787 exp.push_back(Expected<T>(y, originalInout, x));
788 }
789 }
790
791 // The case where input1 is combined first
792 vector<T> minOriginalAndInput1 = floatMaxValues(originalInout, input1);
793 for (T x : minOriginalAndInput1)
794 {
795 vector<T> minAll = floatMaxValues(x, input0);
796 for (T y : minAll)
797 {
798 exp.push_back(Expected<T>(y, x, originalInout));
799 }
800 }
801 }
802 break;
803
804 case ATOMIC_OP_EXCHANGE:
805 {
806 exp.push_back(Expected<T>(input1, originalInout, input0));
807 exp.push_back(Expected<T>(input0, input1, originalInout));
808 }
809 break;
810
811 default:
812 DE_FATAL("Unexpected atomic operation.");
813 break;
814 }
815
816 const T resIo = result.inout[elementNdx];
817 const T resOutput0 = result.output[elementNdx];
818 const T resOutput1 = result.output[elementNdx + NUM_ELEMENTS / 2];
819
820
821 bool hasMatch = false;
822 for (Expected<T> e : exp)
823 {
824 if (e.compare(resIo, resOutput0, resOutput1))
825 {
826 hasMatch = true;
827 break;
828 }
829 }
830 if (!hasMatch)
831 {
832 std::ostringstream errorMessage;
833 errorMessage << "ERROR: Result value check failed at index " << elementNdx
834 << ". Expected one of the outcomes:";
835
836 bool first = true;
837 for (Expected<T> e : exp)
838 {
839 if (!first)
840 errorMessage << ", or";
841 first = false;
842
843 errorMessage << " InOut = " << e.m_inout
844 << ", Output0 = " << e.m_output[0]
845 << ", Output1 = " << e.m_output[1];
846 }
847
848 errorMessage << ". Got: InOut = " << resIo
849 << ", Output0 = " << resOutput0
850 << ", Output1 = " << resOutput1
851 << ". Using Input0 = " << original.input[elementNdx]
852 << " and Input1 = " << original.input[elementNdx + NUM_ELEMENTS / 2] << ".";
853
854 resultCollector.fail(errorMessage.str());
855 }
856 }
857 }
858
859 class AtomicOperationCaseInstance : public TestInstance
860 {
861 public:
862 AtomicOperationCaseInstance (Context& context,
863 const ShaderSpec& shaderSpec,
864 AtomicShaderType shaderType,
865 DataType dataType,
866 AtomicOperation atomicOp);
867
868 virtual tcu::TestStatus iterate (void);
869
870 private:
871 const ShaderSpec& m_shaderSpec;
872 AtomicShaderType m_shaderType;
873 const DataType m_dataType;
874 AtomicOperation m_atomicOp;
875
876 };
877
AtomicOperationCaseInstance(Context & context,const ShaderSpec & shaderSpec,AtomicShaderType shaderType,DataType dataType,AtomicOperation atomicOp)878 AtomicOperationCaseInstance::AtomicOperationCaseInstance (Context& context,
879 const ShaderSpec& shaderSpec,
880 AtomicShaderType shaderType,
881 DataType dataType,
882 AtomicOperation atomicOp)
883 : TestInstance (context)
884 , m_shaderSpec (shaderSpec)
885 , m_shaderType (shaderType)
886 , m_dataType (dataType)
887 , m_atomicOp (atomicOp)
888 {
889 }
890
iterate(void)891 tcu::TestStatus AtomicOperationCaseInstance::iterate(void)
892 {
893 de::UniquePtr<BufferInterface> testBuffer (createTestBuffer(m_dataType, m_atomicOp));
894 tcu::TestLog& log = m_context.getTestContext().getLog();
895 const DeviceInterface& vkd = m_context.getDeviceInterface();
896 const VkDevice device = m_context.getDevice();
897 de::Random rnd (0x62a15e34);
898 const bool useRef = (m_shaderType.getMemoryType() == AtomicMemoryType::REFERENCE);
899 const VkDescriptorType descType = (useRef ? VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER : VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
900 const VkBufferUsageFlags usageFlags = (VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | (useRef ? static_cast<VkBufferUsageFlags>(VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT) : 0u));
901
902 // The main buffer will hold test data. When using buffer references, the buffer's address will be indirectly passed as part of
903 // a uniform buffer. If not, it will be passed directly as a descriptor.
904 Buffer buffer (m_context, usageFlags, testBuffer->bufferSize(), useRef);
905 std::unique_ptr<Buffer> auxBuffer;
906
907 if (useRef)
908 {
909 // Pass the main buffer address inside a uniform buffer.
910 const VkBufferDeviceAddressInfo addressInfo =
911 {
912 VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO, // VkStructureType sType;
913 nullptr, // const void* pNext;
914 buffer.getBuffer(), // VkBuffer buffer;
915 };
916 const auto address = vkd.getBufferDeviceAddress(device, &addressInfo);
917
918 auxBuffer.reset(new Buffer(m_context, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, sizeof(address), false));
919 deMemcpy(auxBuffer->getHostPtr(), &address, sizeof(address));
920 auxBuffer->flush();
921 }
922
923 testBuffer->setBuffer(buffer.getHostPtr());
924 testBuffer->fillWithTestData(rnd);
925
926 buffer.flush();
927
928 Move<VkDescriptorSetLayout> extraResourcesLayout;
929 Move<VkDescriptorPool> extraResourcesSetPool;
930 Move<VkDescriptorSet> extraResourcesSet;
931
932 const VkDescriptorSetLayoutBinding bindings[] =
933 {
934 { 0u, descType, 1, VK_SHADER_STAGE_ALL, DE_NULL }
935 };
936
937 const VkDescriptorSetLayoutCreateInfo layoutInfo =
938 {
939 VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
940 DE_NULL,
941 (VkDescriptorSetLayoutCreateFlags)0u,
942 DE_LENGTH_OF_ARRAY(bindings),
943 bindings
944 };
945
946 extraResourcesLayout = createDescriptorSetLayout(vkd, device, &layoutInfo);
947
948 const VkDescriptorPoolSize poolSizes[] =
949 {
950 { descType, 1u }
951 };
952
953 const VkDescriptorPoolCreateInfo poolInfo =
954 {
955 VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
956 DE_NULL,
957 (VkDescriptorPoolCreateFlags)VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT,
958 1u, // maxSets
959 DE_LENGTH_OF_ARRAY(poolSizes),
960 poolSizes
961 };
962
963 extraResourcesSetPool = createDescriptorPool(vkd, device, &poolInfo);
964
965 const VkDescriptorSetAllocateInfo allocInfo =
966 {
967 VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
968 DE_NULL,
969 *extraResourcesSetPool,
970 1u,
971 &extraResourcesLayout.get()
972 };
973
974 extraResourcesSet = allocateDescriptorSet(vkd, device, &allocInfo);
975
976 VkDescriptorBufferInfo bufferInfo;
977 bufferInfo.buffer = (useRef ? auxBuffer->getBuffer() : buffer.getBuffer());
978 bufferInfo.offset = 0u;
979 bufferInfo.range = VK_WHOLE_SIZE;
980
981 const VkWriteDescriptorSet descriptorWrite =
982 {
983 VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
984 DE_NULL,
985 *extraResourcesSet,
986 0u, // dstBinding
987 0u, // dstArrayElement
988 1u,
989 descType,
990 (const VkDescriptorImageInfo*)DE_NULL,
991 &bufferInfo,
992 (const VkBufferView*)DE_NULL
993 };
994
995 vkd.updateDescriptorSets(device, 1u, &descriptorWrite, 0u, DE_NULL);
996
997 // Storage for output varying data.
998 std::vector<deUint32> outputs (NUM_ELEMENTS);
999 std::vector<void*> outputPtr (NUM_ELEMENTS);
1000
1001 for (size_t i = 0; i < NUM_ELEMENTS; i++)
1002 {
1003 outputs[i] = 0xcdcdcdcd;
1004 outputPtr[i] = &outputs[i];
1005 }
1006
1007 const int numWorkGroups = (m_shaderType.isSharedLike() ? 1 : static_cast<int>(NUM_ELEMENTS));
1008 UniquePtr<ShaderExecutor> executor (createExecutor(m_context, m_shaderType.getType(), m_shaderSpec, *extraResourcesLayout));
1009
1010 executor->execute(numWorkGroups, DE_NULL, &outputPtr[0], *extraResourcesSet);
1011 buffer.invalidate();
1012
1013 tcu::ResultCollector resultCollector(log);
1014
1015 // Check the results of the atomic operation
1016 testBuffer->checkResults(resultCollector);
1017
1018 return tcu::TestStatus(resultCollector.getResult(), resultCollector.getMessage());
1019 }
1020
1021 class AtomicOperationCase : public TestCase
1022 {
1023 public:
1024 AtomicOperationCase (tcu::TestContext& testCtx,
1025 const char* name,
1026 const char* description,
1027 AtomicShaderType type,
1028 DataType dataType,
1029 AtomicOperation atomicOp);
1030 virtual ~AtomicOperationCase (void);
1031
1032 virtual TestInstance* createInstance (Context& ctx) const;
1033 virtual void checkSupport (Context& ctx) const;
initPrograms(vk::SourceCollections & programCollection) const1034 virtual void initPrograms (vk::SourceCollections& programCollection) const
1035 {
1036 const bool useSpv14 = m_shaderType.isMeshShadingStage();
1037 const auto spvVersion = (useSpv14 ? vk::SPIRV_VERSION_1_4 : vk::SPIRV_VERSION_1_0);
1038 const ShaderBuildOptions buildOptions (programCollection.usedVulkanVersion, spvVersion, 0u, useSpv14);
1039 ShaderSpec sourcesSpec (m_shaderSpec);
1040
1041 sourcesSpec.buildOptions = buildOptions;
1042 generateSources(m_shaderType.getType(), sourcesSpec, programCollection);
1043 }
1044
1045 private:
1046
1047 void createShaderSpec();
1048 ShaderSpec m_shaderSpec;
1049 const AtomicShaderType m_shaderType;
1050 const DataType m_dataType;
1051 const AtomicOperation m_atomicOp;
1052 };
1053
AtomicOperationCase(tcu::TestContext & testCtx,const char * name,const char * description,AtomicShaderType shaderType,DataType dataType,AtomicOperation atomicOp)1054 AtomicOperationCase::AtomicOperationCase (tcu::TestContext& testCtx,
1055 const char* name,
1056 const char* description,
1057 AtomicShaderType shaderType,
1058 DataType dataType,
1059 AtomicOperation atomicOp)
1060 : TestCase (testCtx, name, description)
1061 , m_shaderType (shaderType)
1062 , m_dataType (dataType)
1063 , m_atomicOp (atomicOp)
1064 {
1065 createShaderSpec();
1066 init();
1067 }
1068
~AtomicOperationCase(void)1069 AtomicOperationCase::~AtomicOperationCase (void)
1070 {
1071 }
1072
createInstance(Context & ctx) const1073 TestInstance* AtomicOperationCase::createInstance (Context& ctx) const
1074 {
1075 return new AtomicOperationCaseInstance(ctx, m_shaderSpec, m_shaderType, m_dataType, m_atomicOp);
1076 }
1077
checkSupport(Context & ctx) const1078 void AtomicOperationCase::checkSupport (Context& ctx) const
1079 {
1080 if ((m_dataType == DATA_TYPE_INT64) || (m_dataType == DATA_TYPE_UINT64))
1081 {
1082 ctx.requireDeviceFunctionality("VK_KHR_shader_atomic_int64");
1083
1084 const auto atomicInt64Features = ctx.getShaderAtomicInt64Features();
1085 const bool isSharedMemory = m_shaderType.isSharedLike();
1086
1087 if (!isSharedMemory && atomicInt64Features.shaderBufferInt64Atomics == VK_FALSE)
1088 {
1089 TCU_THROW(NotSupportedError, "VkShaderAtomicInt64: 64-bit integer atomic operations not supported for buffers");
1090 }
1091 if (isSharedMemory && atomicInt64Features.shaderSharedInt64Atomics == VK_FALSE)
1092 {
1093 TCU_THROW(NotSupportedError, "VkShaderAtomicInt64: 64-bit integer atomic operations not supported for shared memory");
1094 }
1095 }
1096
1097 if (m_dataType == DATA_TYPE_FLOAT16)
1098 {
1099 ctx.requireDeviceFunctionality("VK_EXT_shader_atomic_float2");
1100 #ifndef CTS_USES_VULKANSC
1101 if (m_atomicOp == ATOMIC_OP_ADD)
1102 {
1103 if (m_shaderType.isSharedLike())
1104 {
1105 if (!ctx.getShaderAtomicFloat2FeaturesEXT().shaderSharedFloat16AtomicAdd)
1106 {
1107 TCU_THROW(NotSupportedError, "VkShaderAtomicFloat16: 16-bit floating point shared add atomic operation not supported");
1108 }
1109 }
1110 else
1111 {
1112 if (!ctx.getShaderAtomicFloat2FeaturesEXT().shaderBufferFloat16AtomicAdd)
1113 {
1114 TCU_THROW(NotSupportedError, "VkShaderAtomicFloat16: 16-bit floating point buffer add atomic operation not supported");
1115 }
1116 }
1117 }
1118 if (m_atomicOp == ATOMIC_OP_MIN || m_atomicOp == ATOMIC_OP_MAX)
1119 {
1120 if (m_shaderType.isSharedLike())
1121 {
1122 if (!ctx.getShaderAtomicFloat2FeaturesEXT().shaderSharedFloat16AtomicMinMax)
1123 {
1124 TCU_THROW(NotSupportedError, "VkShaderAtomicFloat16: 16-bit floating point shared min/max atomic operation not supported");
1125 }
1126 }
1127 else
1128 {
1129 if (!ctx.getShaderAtomicFloat2FeaturesEXT().shaderBufferFloat16AtomicMinMax)
1130 {
1131 TCU_THROW(NotSupportedError, "VkShaderAtomicFloat16: 16-bit floating point buffer min/max atomic operation not supported");
1132 }
1133 }
1134 }
1135 if (m_atomicOp == ATOMIC_OP_EXCHANGE)
1136 {
1137 if (m_shaderType.isSharedLike())
1138 {
1139 if (!ctx.getShaderAtomicFloat2FeaturesEXT().shaderSharedFloat16Atomics)
1140 {
1141 TCU_THROW(NotSupportedError, "VkShaderAtomicFloat16: 16-bit floating point shared atomic operations not supported");
1142 }
1143 }
1144 else
1145 {
1146 if (!ctx.getShaderAtomicFloat2FeaturesEXT().shaderBufferFloat16Atomics)
1147 {
1148 TCU_THROW(NotSupportedError, "VkShaderAtomicFloat16: 16-bit floating point buffer atomic operations not supported");
1149 }
1150 }
1151 }
1152 #endif // CTS_USES_VULKANSC
1153 }
1154
1155 if (m_dataType == DATA_TYPE_FLOAT32)
1156 {
1157 ctx.requireDeviceFunctionality("VK_EXT_shader_atomic_float");
1158 if (m_atomicOp == ATOMIC_OP_ADD)
1159 {
1160 if (m_shaderType.isSharedLike())
1161 {
1162 if (!ctx.getShaderAtomicFloatFeaturesEXT().shaderSharedFloat32AtomicAdd)
1163 {
1164 TCU_THROW(NotSupportedError, "VkShaderAtomicFloat32: 32-bit floating point shared add atomic operation not supported");
1165 }
1166 }
1167 else
1168 {
1169 if (!ctx.getShaderAtomicFloatFeaturesEXT().shaderBufferFloat32AtomicAdd)
1170 {
1171 TCU_THROW(NotSupportedError, "VkShaderAtomicFloat32: 32-bit floating point buffer add atomic operation not supported");
1172 }
1173 }
1174 }
1175 if (m_atomicOp == ATOMIC_OP_MIN || m_atomicOp == ATOMIC_OP_MAX)
1176 {
1177 ctx.requireDeviceFunctionality("VK_EXT_shader_atomic_float2");
1178 #ifndef CTS_USES_VULKANSC
1179 if (m_shaderType.isSharedLike())
1180 {
1181 if (!ctx.getShaderAtomicFloat2FeaturesEXT().shaderSharedFloat32AtomicMinMax)
1182 {
1183 TCU_THROW(NotSupportedError, "VkShaderAtomicFloat32: 32-bit floating point shared min/max atomic operation not supported");
1184 }
1185 }
1186 else
1187 {
1188 if (!ctx.getShaderAtomicFloat2FeaturesEXT().shaderBufferFloat32AtomicMinMax)
1189 {
1190 TCU_THROW(NotSupportedError, "VkShaderAtomicFloat32: 32-bit floating point buffer min/max atomic operation not supported");
1191 }
1192 }
1193 #endif // CTS_USES_VULKANSC
1194 }
1195 if (m_atomicOp == ATOMIC_OP_EXCHANGE)
1196 {
1197 if (m_shaderType.isSharedLike())
1198 {
1199 if (!ctx.getShaderAtomicFloatFeaturesEXT().shaderSharedFloat32Atomics)
1200 {
1201 TCU_THROW(NotSupportedError, "VkShaderAtomicFloat32: 32-bit floating point shared atomic operations not supported");
1202 }
1203 }
1204 else
1205 {
1206 if (!ctx.getShaderAtomicFloatFeaturesEXT().shaderBufferFloat32Atomics)
1207 {
1208 TCU_THROW(NotSupportedError, "VkShaderAtomicFloat32: 32-bit floating point buffer atomic operations not supported");
1209 }
1210 }
1211 }
1212 }
1213
1214 if (m_dataType == DATA_TYPE_FLOAT64)
1215 {
1216 ctx.requireDeviceFunctionality("VK_EXT_shader_atomic_float");
1217 if (m_atomicOp == ATOMIC_OP_ADD)
1218 {
1219 if (m_shaderType.isSharedLike())
1220 {
1221 if (!ctx.getShaderAtomicFloatFeaturesEXT().shaderSharedFloat64AtomicAdd)
1222 {
1223 TCU_THROW(NotSupportedError, "VkShaderAtomicFloat64: 64-bit floating point shared add atomic operation not supported");
1224 }
1225 }
1226 else
1227 {
1228 if (!ctx.getShaderAtomicFloatFeaturesEXT().shaderBufferFloat64AtomicAdd)
1229 {
1230 TCU_THROW(NotSupportedError, "VkShaderAtomicFloat64: 64-bit floating point buffer add atomic operation not supported");
1231 }
1232 }
1233 }
1234 if (m_atomicOp == ATOMIC_OP_MIN || m_atomicOp == ATOMIC_OP_MAX)
1235 {
1236 ctx.requireDeviceFunctionality("VK_EXT_shader_atomic_float2");
1237 #ifndef CTS_USES_VULKANSC
1238 if (m_shaderType.isSharedLike())
1239 {
1240 if (!ctx.getShaderAtomicFloat2FeaturesEXT().shaderSharedFloat64AtomicMinMax)
1241 {
1242 TCU_THROW(NotSupportedError, "VkShaderAtomicFloat64: 64-bit floating point shared min/max atomic operation not supported");
1243 }
1244 }
1245 else
1246 {
1247 if (!ctx.getShaderAtomicFloat2FeaturesEXT().shaderBufferFloat64AtomicMinMax)
1248 {
1249 TCU_THROW(NotSupportedError, "VkShaderAtomicFloat64: 64-bit floating point buffer min/max atomic operation not supported");
1250 }
1251 }
1252 #endif // CTS_USES_VULKANSC
1253 }
1254 if (m_atomicOp == ATOMIC_OP_EXCHANGE)
1255 {
1256 if (m_shaderType.isSharedLike())
1257 {
1258 if (!ctx.getShaderAtomicFloatFeaturesEXT().shaderSharedFloat64Atomics)
1259 {
1260 TCU_THROW(NotSupportedError, "VkShaderAtomicFloat64: 64-bit floating point shared atomic operations not supported");
1261 }
1262 }
1263 else
1264 {
1265 if (!ctx.getShaderAtomicFloatFeaturesEXT().shaderBufferFloat64Atomics)
1266 {
1267 TCU_THROW(NotSupportedError, "VkShaderAtomicFloat64: 64-bit floating point buffer atomic operations not supported");
1268 }
1269 }
1270 }
1271 }
1272
1273 if (m_shaderType.getMemoryType() == AtomicMemoryType::REFERENCE)
1274 {
1275 ctx.requireDeviceFunctionality("VK_KHR_buffer_device_address");
1276 }
1277
1278 checkSupportShader(ctx, m_shaderType.getType());
1279 }
1280
createShaderSpec(void)1281 void AtomicOperationCase::createShaderSpec (void)
1282 {
1283 const AtomicMemoryType memoryType = m_shaderType.getMemoryType();
1284 const bool isSharedLike = m_shaderType.isSharedLike();
1285
1286 // Global declarations.
1287 std::ostringstream shaderTemplateGlobalStream;
1288
1289 // Structure in use for atomic operations.
1290 shaderTemplateGlobalStream
1291 << "${EXTENSIONS}\n"
1292 << "\n"
1293 << "struct AtomicStruct\n"
1294 << "{\n"
1295 << " ${DATATYPE} inoutValues[${N}/2];\n"
1296 << " ${DATATYPE} inputValues[${N}];\n"
1297 << " ${DATATYPE} compareValues[${N}];\n"
1298 << " ${DATATYPE} outputValues[${N}];\n"
1299 << " int invocationHitCount[${N}];\n"
1300 << " int index;\n"
1301 << "};\n"
1302 << "\n"
1303 ;
1304
1305 // The name dance and declarations below will make sure the structure that will be used with atomic operations can be accessed
1306 // as "buf.data", which is the name used in the atomic operation statements.
1307 //
1308 // * When using a buffer directly, RESULT_BUFFER_NAME will be "buf" and the inner struct will be "data".
1309 // * When using a workgroup-shared global variable, the "data" struct will be nested in an auxiliar "buf" struct.
1310 // * When using buffer references, the uniform buffer reference will be called "buf" and its contents "data".
1311 //
1312 if (memoryType != AtomicMemoryType::REFERENCE)
1313 {
1314 shaderTemplateGlobalStream
1315 << "layout (set = ${SETIDX}, binding = 0) buffer AtomicBuffer {\n"
1316 << " AtomicStruct data;\n"
1317 << "} ${RESULT_BUFFER_NAME};\n"
1318 << "\n"
1319 ;
1320
1321 // When using global shared memory in the compute, task or mesh variants, invocations will use a shared global structure
1322 // instead of a descriptor set as the sources and results of each tested operation.
1323 if (memoryType == AtomicMemoryType::SHARED)
1324 {
1325 shaderTemplateGlobalStream
1326 << "shared struct { AtomicStruct data; } buf;\n"
1327 << "\n"
1328 ;
1329 }
1330 else if (memoryType == AtomicMemoryType::PAYLOAD)
1331 {
1332 shaderTemplateGlobalStream
1333 << "struct TaskData { AtomicStruct data; };\n"
1334 << "taskPayloadSharedEXT TaskData buf;\n"
1335 ;
1336 }
1337 }
1338 else
1339 {
1340 shaderTemplateGlobalStream
1341 << "layout (buffer_reference) buffer AtomicBuffer {\n"
1342 << " AtomicStruct data;\n"
1343 << "};\n"
1344 << "\n"
1345 << "layout (set = ${SETIDX}, binding = 0) uniform References {\n"
1346 << " AtomicBuffer buf;\n"
1347 << "};\n"
1348 << "\n"
1349 ;
1350 }
1351
1352 const auto shaderTemplateGlobalString = shaderTemplateGlobalStream.str();
1353 const tcu::StringTemplate shaderTemplateGlobal (shaderTemplateGlobalString);
1354
1355 // Shader body for the non-vertex case.
1356 std::ostringstream nonVertexShaderTemplateStream;
1357
1358 if (isSharedLike)
1359 {
1360 // Invocation zero will initialize the shared structure from the descriptor set.
1361 nonVertexShaderTemplateStream
1362 << "if (gl_LocalInvocationIndex == 0u)\n"
1363 << "{\n"
1364 << " buf.data = ${RESULT_BUFFER_NAME}.data;\n"
1365 << "}\n"
1366 << "barrier();\n"
1367 ;
1368 }
1369
1370 if (m_shaderType.getType() == glu::SHADERTYPE_FRAGMENT)
1371 {
1372 nonVertexShaderTemplateStream
1373 << "if (!gl_HelperInvocation) {\n"
1374 << " int idx = atomicAdd(buf.data.index, 1);\n"
1375 << " buf.data.outputValues[idx] = ${ATOMICOP}(buf.data.inoutValues[idx % (${N}/2)], ${COMPARE_ARG}buf.data.inputValues[idx]);\n"
1376 << "}\n"
1377 ;
1378 }
1379 else
1380 {
1381 nonVertexShaderTemplateStream
1382 << "if (atomicAdd(buf.data.invocationHitCount[0], 1) < ${N})\n"
1383 << "{\n"
1384 << " int idx = atomicAdd(buf.data.index, 1);\n"
1385 << " buf.data.outputValues[idx] = ${ATOMICOP}(buf.data.inoutValues[idx % (${N}/2)], ${COMPARE_ARG}buf.data.inputValues[idx]);\n"
1386 << "}\n"
1387 ;
1388 }
1389
1390 if (isSharedLike)
1391 {
1392 // Invocation zero will copy results back to the descriptor set.
1393 nonVertexShaderTemplateStream
1394 << "barrier();\n"
1395 << "if (gl_LocalInvocationIndex == 0u)\n"
1396 << "{\n"
1397 << " ${RESULT_BUFFER_NAME}.data = buf.data;\n"
1398 << "}\n"
1399 ;
1400 }
1401
1402 const auto nonVertexShaderTemplateStreamStr = nonVertexShaderTemplateStream.str();
1403 const tcu::StringTemplate nonVertexShaderTemplateSrc (nonVertexShaderTemplateStreamStr);
1404
1405 // Shader body for the vertex case.
1406 const tcu::StringTemplate vertexShaderTemplateSrc(
1407 "int idx = gl_VertexIndex;\n"
1408 "if (atomicAdd(buf.data.invocationHitCount[idx], 1) == 0)\n"
1409 "{\n"
1410 " buf.data.outputValues[idx] = ${ATOMICOP}(buf.data.inoutValues[idx % (${N}/2)], ${COMPARE_ARG}buf.data.inputValues[idx]);\n"
1411 "}\n");
1412
1413 // Extensions.
1414 std::ostringstream extensions;
1415
1416 if ((m_dataType == DATA_TYPE_INT64) || (m_dataType == DATA_TYPE_UINT64))
1417 {
1418 extensions
1419 << "#extension GL_EXT_shader_explicit_arithmetic_types_int64 : enable\n"
1420 << "#extension GL_EXT_shader_atomic_int64 : enable\n"
1421 ;
1422 }
1423 else if ((m_dataType == DATA_TYPE_FLOAT16) || (m_dataType == DATA_TYPE_FLOAT32) || (m_dataType == DATA_TYPE_FLOAT64))
1424 {
1425 extensions
1426 << "#extension GL_EXT_shader_explicit_arithmetic_types_float16 : enable\n"
1427 << "#extension GL_EXT_shader_atomic_float : enable\n"
1428 << "#extension GL_EXT_shader_atomic_float2 : enable\n"
1429 << "#extension GL_KHR_memory_scope_semantics : enable\n"
1430 ;
1431 }
1432
1433 if (memoryType == AtomicMemoryType::REFERENCE)
1434 {
1435 extensions << "#extension GL_EXT_buffer_reference : require\n";
1436 }
1437
1438 // Specializations.
1439 std::map<std::string, std::string> specializations;
1440
1441 specializations["EXTENSIONS"] = extensions.str();
1442 specializations["DATATYPE"] = dataType2Str(m_dataType);
1443 specializations["ATOMICOP"] = atomicOp2Str(m_atomicOp);
1444 specializations["SETIDX"] = de::toString((int)EXTRA_RESOURCES_DESCRIPTOR_SET_INDEX);
1445 specializations["N"] = de::toString((int)NUM_ELEMENTS);
1446 specializations["COMPARE_ARG"] = ((m_atomicOp == ATOMIC_OP_COMP_SWAP) ? "buf.data.compareValues[idx], " : "");
1447 specializations["RESULT_BUFFER_NAME"] = (isSharedLike ? "result" : "buf");
1448
1449 // Shader spec.
1450 m_shaderSpec.outputs.push_back(Symbol("outData", glu::VarType(glu::TYPE_UINT, glu::PRECISION_HIGHP)));
1451 m_shaderSpec.glslVersion = glu::GLSL_VERSION_450;
1452 m_shaderSpec.globalDeclarations = shaderTemplateGlobal.specialize(specializations);
1453 m_shaderSpec.source = ((m_shaderType.getType() == glu::SHADERTYPE_VERTEX)
1454 ? vertexShaderTemplateSrc.specialize(specializations)
1455 : nonVertexShaderTemplateSrc.specialize(specializations));
1456
1457 if (isSharedLike)
1458 {
1459 // When using global shared memory, use a single workgroup and an appropriate number of local invocations.
1460 m_shaderSpec.localSizeX = static_cast<int>(NUM_ELEMENTS);
1461 }
1462 }
1463
addAtomicOperationTests(tcu::TestCaseGroup * atomicOperationTestsGroup)1464 void addAtomicOperationTests (tcu::TestCaseGroup* atomicOperationTestsGroup)
1465 {
1466 tcu::TestContext& testCtx = atomicOperationTestsGroup->getTestContext();
1467
1468 static const struct
1469 {
1470 glu::ShaderType type;
1471 const char* name;
1472 } shaderTypes[] =
1473 {
1474 { glu::SHADERTYPE_VERTEX, "vertex" },
1475 { glu::SHADERTYPE_FRAGMENT, "fragment" },
1476 { glu::SHADERTYPE_GEOMETRY, "geometry" },
1477 { glu::SHADERTYPE_TESSELLATION_CONTROL, "tess_ctrl" },
1478 { glu::SHADERTYPE_TESSELLATION_EVALUATION, "tess_eval" },
1479 { glu::SHADERTYPE_COMPUTE, "compute" },
1480 { glu::SHADERTYPE_TASK, "task" },
1481 { glu::SHADERTYPE_MESH, "mesh" },
1482 };
1483
1484 static const struct
1485 {
1486 AtomicMemoryType type;
1487 const char* suffix;
1488 } kMemoryTypes[] =
1489 {
1490 { AtomicMemoryType::BUFFER, "" },
1491 { AtomicMemoryType::SHARED, "_shared" },
1492 { AtomicMemoryType::REFERENCE, "_reference" },
1493 { AtomicMemoryType::PAYLOAD, "_payload" },
1494 };
1495
1496 static const struct
1497 {
1498 DataType dataType;
1499 const char* name;
1500 const char* description;
1501 } dataSign[] =
1502 {
1503 #ifndef CTS_USES_VULKANSC
1504 { DATA_TYPE_FLOAT16,"float16", "Tests using 16-bit float data" },
1505 #endif // CTS_USES_VULKANSC
1506 { DATA_TYPE_INT32, "signed", "Tests using signed data (int)" },
1507 { DATA_TYPE_UINT32, "unsigned", "Tests using unsigned data (uint)" },
1508 { DATA_TYPE_FLOAT32,"float32", "Tests using 32-bit float data" },
1509 { DATA_TYPE_INT64, "signed64bit", "Tests using 64 bit signed data (int64)" },
1510 { DATA_TYPE_UINT64, "unsigned64bit", "Tests using 64 bit unsigned data (uint64)" },
1511 { DATA_TYPE_FLOAT64,"float64", "Tests using 64-bit float data)" }
1512 };
1513
1514 static const struct
1515 {
1516 AtomicOperation value;
1517 const char* name;
1518 } atomicOp[] =
1519 {
1520 { ATOMIC_OP_EXCHANGE, "exchange" },
1521 { ATOMIC_OP_COMP_SWAP, "comp_swap" },
1522 { ATOMIC_OP_ADD, "add" },
1523 { ATOMIC_OP_MIN, "min" },
1524 { ATOMIC_OP_MAX, "max" },
1525 { ATOMIC_OP_AND, "and" },
1526 { ATOMIC_OP_OR, "or" },
1527 { ATOMIC_OP_XOR, "xor" }
1528 };
1529
1530 for (int opNdx = 0; opNdx < DE_LENGTH_OF_ARRAY(atomicOp); opNdx++)
1531 {
1532 for (int signNdx = 0; signNdx < DE_LENGTH_OF_ARRAY(dataSign); signNdx++)
1533 {
1534 for (int shaderTypeNdx = 0; shaderTypeNdx < DE_LENGTH_OF_ARRAY(shaderTypes); shaderTypeNdx++)
1535 {
1536 // Only ADD and EXCHANGE are supported on floating-point
1537 if (dataSign[signNdx].dataType == DATA_TYPE_FLOAT16 || dataSign[signNdx].dataType == DATA_TYPE_FLOAT32 || dataSign[signNdx].dataType == DATA_TYPE_FLOAT64)
1538 {
1539 if (atomicOp[opNdx].value != ATOMIC_OP_ADD &&
1540 #ifndef CTS_USES_VULKANSC
1541 atomicOp[opNdx].value != ATOMIC_OP_MIN &&
1542 atomicOp[opNdx].value != ATOMIC_OP_MAX &&
1543 #endif // CTS_USES_VULKANSC
1544 atomicOp[opNdx].value != ATOMIC_OP_EXCHANGE)
1545 {
1546 continue;
1547 }
1548 }
1549
1550 for (int memoryTypeNdx = 0; memoryTypeNdx < DE_LENGTH_OF_ARRAY(kMemoryTypes); ++memoryTypeNdx)
1551 {
1552 // Shared memory only available in compute, task and mesh shaders.
1553 if (kMemoryTypes[memoryTypeNdx].type == AtomicMemoryType::SHARED
1554 && shaderTypes[shaderTypeNdx].type != glu::SHADERTYPE_COMPUTE
1555 && shaderTypes[shaderTypeNdx].type != glu::SHADERTYPE_TASK
1556 && shaderTypes[shaderTypeNdx].type != glu::SHADERTYPE_MESH)
1557 continue;
1558
1559 // Payload memory is only available for atomics in task shaders (in mesh shaders it's read-only)
1560 if (kMemoryTypes[memoryTypeNdx].type == AtomicMemoryType::PAYLOAD && shaderTypes[shaderTypeNdx].type != glu::SHADERTYPE_TASK)
1561 continue;
1562
1563 const std::string description = std::string("Tests atomic operation ") + atomicOp2Str(atomicOp[opNdx].value) + std::string(".");
1564 const std::string name = std::string(atomicOp[opNdx].name) + "_" + std::string(dataSign[signNdx].name) + "_" + std::string(shaderTypes[shaderTypeNdx].name) + kMemoryTypes[memoryTypeNdx].suffix;
1565
1566 atomicOperationTestsGroup->addChild(new AtomicOperationCase(testCtx, name.c_str(), description.c_str(), AtomicShaderType(shaderTypes[shaderTypeNdx].type, kMemoryTypes[memoryTypeNdx].type), dataSign[signNdx].dataType, atomicOp[opNdx].value));
1567 }
1568 }
1569 }
1570 }
1571 }
1572
1573 } // anonymous
1574
createAtomicOperationTests(tcu::TestContext & testCtx)1575 tcu::TestCaseGroup* createAtomicOperationTests (tcu::TestContext& testCtx)
1576 {
1577 return createTestGroup(testCtx, "atomic_operations", "Atomic Operation Tests", addAtomicOperationTests);
1578 }
1579
1580 } // shaderexecutor
1581 } // vkt
1582