• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2015 The Khronos Group Inc.
6  * Copyright (c) 2017 Google Inc.
7  *
8  * Licensed under the Apache License, Version 2.0 (the "License");
9  * you may not use this file except in compliance with the License.
10  * You may obtain a copy of the License at
11  *
12  *      http://www.apache.org/licenses/LICENSE-2.0
13  *
14  * Unless required by applicable law or agreed to in writing, software
15  * distributed under the License is distributed on an "AS IS" BASIS,
16  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17  * See the License for the specific language governing permissions and
18  * limitations under the License.
19  *
20  *//*!
21  * \file
22  * \brief Atomic operations (OpAtomic*) tests.
23  *//*--------------------------------------------------------------------*/
24 
25 #include "vktAtomicOperationTests.hpp"
26 #include "vktShaderExecutor.hpp"
27 
28 #include "vkRefUtil.hpp"
29 #include "vkMemUtil.hpp"
30 #include "vkQueryUtil.hpp"
31 #include "vkObjUtil.hpp"
32 #include "vkBarrierUtil.hpp"
33 #include "vkCmdUtil.hpp"
34 #include "vktTestGroupUtil.hpp"
35 
36 #include "tcuTestLog.hpp"
37 #include "tcuStringTemplate.hpp"
38 #include "tcuResultCollector.hpp"
39 
40 #include "deFloat16.h"
41 #include "deMath.hpp"
42 #include "deStringUtil.hpp"
43 #include "deSharedPtr.hpp"
44 #include "deRandom.hpp"
45 #include "deArrayUtil.hpp"
46 
47 #include <string>
48 #include <memory>
49 #include <cmath>
50 
51 namespace vkt
52 {
53 namespace shaderexecutor
54 {
55 
56 namespace
57 {
58 
59 using de::UniquePtr;
60 using de::MovePtr;
61 using std::vector;
62 
63 using namespace vk;
64 
65 enum class AtomicMemoryType
66 {
67 	BUFFER = 0,	// Normal buffer.
68 	SHARED,		// Shared global struct in a compute workgroup.
69 	REFERENCE,	// Buffer passed as a reference.
70 	PAYLOAD,	// Task payload.
71 };
72 
73 // Helper struct to indicate the shader type and if it should use shared global memory.
74 class AtomicShaderType
75 {
76 public:
AtomicShaderType(glu::ShaderType type,AtomicMemoryType memoryType)77 	AtomicShaderType (glu::ShaderType type, AtomicMemoryType memoryType)
78 		: m_type				(type)
79 		, m_atomicMemoryType	(memoryType)
80 	{
81 		// Shared global memory can only be set to true with compute, task and mesh shaders.
82 		DE_ASSERT(memoryType != AtomicMemoryType::SHARED
83 					|| type == glu::SHADERTYPE_COMPUTE
84 					|| type == glu::SHADERTYPE_TASK
85 					|| type == glu::SHADERTYPE_MESH);
86 
87 		// Task payload memory can only be tested in task shaders.
88 		DE_ASSERT(memoryType != AtomicMemoryType::PAYLOAD || type == glu::SHADERTYPE_TASK);
89 	}
90 
getType(void) const91 	glu::ShaderType		getType					(void) const	{ return m_type; }
getMemoryType(void) const92 	AtomicMemoryType	getMemoryType			(void) const	{ return m_atomicMemoryType; }
isSharedLike(void) const93 	bool				isSharedLike			(void) const	{ return m_atomicMemoryType == AtomicMemoryType::SHARED || m_atomicMemoryType == AtomicMemoryType::PAYLOAD; }
isMeshShadingStage(void) const94 	bool				isMeshShadingStage		(void) const	{ return (m_type == glu::SHADERTYPE_TASK || m_type == glu::SHADERTYPE_MESH); }
95 
96 private:
97 	glu::ShaderType		m_type;
98 	AtomicMemoryType	m_atomicMemoryType;
99 };
100 
101 // Buffer helper
102 class Buffer
103 {
104 public:
105 						Buffer				(Context& context, VkBufferUsageFlags usage, size_t size, bool useRef);
106 
getBuffer(void) const107 	VkBuffer			getBuffer			(void) const { return *m_buffer;					}
getHostPtr(void) const108 	void*				getHostPtr			(void) const { return m_allocation->getHostPtr();	}
109 	void				flush				(void);
110 	void				invalidate			(void);
111 
112 private:
113 	const DeviceInterface&		m_vkd;
114 	const VkDevice				m_device;
115 	const VkQueue				m_queue;
116 	const deUint32				m_queueIndex;
117 	const Unique<VkBuffer>		m_buffer;
118 	const UniquePtr<Allocation>	m_allocation;
119 };
120 
121 typedef de::SharedPtr<Buffer> BufferSp;
122 
createBuffer(const DeviceInterface & vkd,VkDevice device,VkDeviceSize size,VkBufferUsageFlags usageFlags)123 Move<VkBuffer> createBuffer (const DeviceInterface& vkd, VkDevice device, VkDeviceSize size, VkBufferUsageFlags usageFlags)
124 {
125 	const VkBufferCreateInfo createInfo	=
126 	{
127 		VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
128 		DE_NULL,
129 		(VkBufferCreateFlags)0,
130 		size,
131 		usageFlags,
132 		VK_SHARING_MODE_EXCLUSIVE,
133 		0u,
134 		DE_NULL
135 	};
136 	return createBuffer(vkd, device, &createInfo);
137 }
138 
allocateAndBindMemory(const DeviceInterface & vkd,VkDevice device,Allocator & allocator,VkBuffer buffer,bool useRef)139 MovePtr<Allocation> allocateAndBindMemory (const DeviceInterface& vkd, VkDevice device, Allocator& allocator, VkBuffer buffer, bool useRef)
140 {
141 	const MemoryRequirement allocationType = (MemoryRequirement::HostVisible | (useRef ? MemoryRequirement::DeviceAddress : MemoryRequirement::Any));
142 	MovePtr<Allocation>	alloc(allocator.allocate(getBufferMemoryRequirements(vkd, device, buffer), allocationType));
143 
144 	VK_CHECK(vkd.bindBufferMemory(device, buffer, alloc->getMemory(), alloc->getOffset()));
145 
146 	return alloc;
147 }
148 
Buffer(Context & context,VkBufferUsageFlags usage,size_t size,bool useRef)149 Buffer::Buffer (Context& context, VkBufferUsageFlags usage, size_t size, bool useRef)
150 	: m_vkd			(context.getDeviceInterface())
151 	, m_device		(context.getDevice())
152 	, m_queue		(context.getUniversalQueue())
153 	, m_queueIndex	(context.getUniversalQueueFamilyIndex())
154 	, m_buffer		(createBuffer			(context.getDeviceInterface(),
155 											 context.getDevice(),
156 											 (VkDeviceSize)size,
157 											 usage))
158 	, m_allocation	(allocateAndBindMemory	(context.getDeviceInterface(),
159 											 context.getDevice(),
160 											 context.getDefaultAllocator(),
161 											 *m_buffer,
162 											 useRef))
163 {
164 }
165 
flush(void)166 void Buffer::flush (void)
167 {
168 	flushMappedMemoryRange(m_vkd, m_device, m_allocation->getMemory(), m_allocation->getOffset(), VK_WHOLE_SIZE);
169 }
170 
invalidate(void)171 void Buffer::invalidate (void)
172 {
173 	const auto	cmdPool			= vk::makeCommandPool(m_vkd, m_device, m_queueIndex);
174 	const auto	cmdBufferPtr	= vk::allocateCommandBuffer(m_vkd, m_device, cmdPool.get(), VK_COMMAND_BUFFER_LEVEL_PRIMARY);
175 	const auto	cmdBuffer		= cmdBufferPtr.get();
176 	const auto	bufferBarrier	= vk::makeBufferMemoryBarrier(VK_ACCESS_MEMORY_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, m_buffer.get(), 0ull, VK_WHOLE_SIZE);
177 
178 	beginCommandBuffer(m_vkd, cmdBuffer);
179 	m_vkd.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_HOST_BIT, 0u, 0u, nullptr, 1u, &bufferBarrier, 0u, nullptr);
180 	endCommandBuffer(m_vkd, cmdBuffer);
181 	submitCommandsAndWait(m_vkd, m_device, m_queue, cmdBuffer);
182 
183 	invalidateMappedMemoryRange(m_vkd, m_device, m_allocation->getMemory(), m_allocation->getOffset(), VK_WHOLE_SIZE);
184 }
185 
186 // Tests
187 
188 enum AtomicOperation
189 {
190 	ATOMIC_OP_EXCHANGE = 0,
191 	ATOMIC_OP_COMP_SWAP,
192 	ATOMIC_OP_ADD,
193 	ATOMIC_OP_MIN,
194 	ATOMIC_OP_MAX,
195 	ATOMIC_OP_AND,
196 	ATOMIC_OP_OR,
197 	ATOMIC_OP_XOR,
198 
199 	ATOMIC_OP_LAST
200 };
201 
atomicOp2Str(AtomicOperation op)202 std::string atomicOp2Str (AtomicOperation op)
203 {
204 	static const char* const s_names[] =
205 	{
206 		"atomicExchange",
207 		"atomicCompSwap",
208 		"atomicAdd",
209 		"atomicMin",
210 		"atomicMax",
211 		"atomicAnd",
212 		"atomicOr",
213 		"atomicXor"
214 	};
215 	return de::getSizedArrayElement<ATOMIC_OP_LAST>(s_names, op);
216 }
217 
218 enum
219 {
220 	NUM_ELEMENTS = 32
221 };
222 
223 enum DataType
224 {
225 	DATA_TYPE_FLOAT16 = 0,
226 	DATA_TYPE_INT32,
227 	DATA_TYPE_UINT32,
228 	DATA_TYPE_FLOAT32,
229 	DATA_TYPE_INT64,
230 	DATA_TYPE_UINT64,
231 	DATA_TYPE_FLOAT64,
232 
233 	DATA_TYPE_LAST
234 };
235 
dataType2Str(DataType type)236 std::string dataType2Str(DataType type)
237 {
238 	static const char* const s_names[] =
239 	{
240 		"float16_t",
241 		"int",
242 		"uint",
243 		"float",
244 		"int64_t",
245 		"uint64_t",
246 		"double",
247 	};
248 	return de::getSizedArrayElement<DATA_TYPE_LAST>(s_names, type);
249 }
250 
251 class BufferInterface
252 {
253 public:
254 	virtual void setBuffer(void* ptr) = 0;
255 
256 	virtual size_t bufferSize() = 0;
257 
258 	virtual void fillWithTestData(de::Random &rnd) = 0;
259 
260 	virtual void checkResults(tcu::ResultCollector& resultCollector) = 0;
261 
~BufferInterface()262 	virtual ~BufferInterface() {}
263 };
264 
265 template<typename dataTypeT>
266 class TestBuffer : public BufferInterface
267 {
268 public:
269 
TestBuffer(AtomicOperation atomicOp)270 	TestBuffer(AtomicOperation	atomicOp)
271 		: m_atomicOp(atomicOp)
272 	{}
273 
274 	template<typename T>
275 	struct BufferData
276 	{
277 		// Use half the number of elements for inout to cause overlap between atomic operations.
278 		// Each inout element at index i will have two atomic operations using input from
279 		// indices i and i + NUM_ELEMENTS / 2.
280 		T			inout[NUM_ELEMENTS / 2];
281 		T			input[NUM_ELEMENTS];
282 		T			compare[NUM_ELEMENTS];
283 		T			output[NUM_ELEMENTS];
284 		T			invocationHitCount[NUM_ELEMENTS];
285 		deInt32		index;
286 	};
287 
setBuffer(void * ptr)288 	virtual void setBuffer(void* ptr)
289 	{
290 		m_ptr = static_cast<BufferData<dataTypeT>*>(ptr);
291 	}
292 
bufferSize()293 	virtual size_t bufferSize()
294 	{
295 		return sizeof(BufferData<dataTypeT>);
296 	}
297 
fillWithTestData(de::Random & rnd)298 	virtual void fillWithTestData(de::Random &rnd)
299 	{
300 		dataTypeT pattern;
301 		deMemset(&pattern, 0xcd, sizeof(dataTypeT));
302 
303 		for (int i = 0; i < NUM_ELEMENTS / 2; i++)
304 		{
305 			m_ptr->inout[i] = static_cast<dataTypeT>(rnd.getUint64());
306 			// The first half of compare elements match with every even index.
307 			// The second half matches with odd indices. This causes the
308 			// overlapping operations to only select one.
309 			m_ptr->compare[i] = m_ptr->inout[i] + (i % 2);
310 			m_ptr->compare[i + NUM_ELEMENTS / 2] = m_ptr->inout[i] + 1 - (i % 2);
311 		}
312 		for (int i = 0; i < NUM_ELEMENTS; i++)
313 		{
314 			m_ptr->input[i] = static_cast<dataTypeT>(rnd.getUint64());
315 			m_ptr->output[i] = pattern;
316 			m_ptr->invocationHitCount[i] = 0;
317 		}
318 		m_ptr->index = 0;
319 
320 		// Take a copy to be used when calculating expected values.
321 		m_original = *m_ptr;
322 	}
323 
checkResults(tcu::ResultCollector & resultCollector)324 	virtual void checkResults(tcu::ResultCollector&	resultCollector)
325 	{
326 		checkOperation(m_original, *m_ptr, resultCollector);
327 	}
328 
329 	template<typename T>
330 	struct Expected
331 	{
332 		T m_inout;
333 		T m_output[2];
334 
Expectedvkt::shaderexecutor::__anon6cb584e80111::TestBuffer::Expected335 		Expected (T inout, T output0, T output1)
336 		: m_inout(inout)
337 		{
338 			m_output[0] = output0;
339 			m_output[1] = output1;
340 		}
341 
comparevkt::shaderexecutor::__anon6cb584e80111::TestBuffer::Expected342 		bool compare (T inout, T output0, T output1)
343 		{
344 			return (deMemCmp((const void*)&m_inout, (const void*)&inout, sizeof(inout)) == 0
345 					&& deMemCmp((const void*)&m_output[0], (const void*)&output0, sizeof(output0)) == 0
346 					&& deMemCmp((const void*)&m_output[1], (const void*)&output1, sizeof(output1)) == 0);
347 		}
348 	};
349 
350 	void checkOperation	(const BufferData<dataTypeT>&	original,
351 						 const BufferData<dataTypeT>&	result,
352 						 tcu::ResultCollector&			resultCollector);
353 
354 	const AtomicOperation	m_atomicOp;
355 
356 	BufferData<dataTypeT>* m_ptr;
357 	BufferData<dataTypeT>  m_original;
358 
359 };
360 
361 template<typename T>
nanSafeSloppyEquals(T x,T y)362 bool nanSafeSloppyEquals(T x, T y)
363 {
364 	if (deIsIEEENaN(x) && deIsIEEENaN(y))
365 		return true;
366 
367 	if (deIsIEEENaN(x) || deIsIEEENaN(y))
368 		return false;
369 
370 	return fabs(deToDouble(x) - deToDouble(y)) < 0.00001;
371 }
372 
373 template<typename dataTypeT>
374 class TestBufferFloatingPoint : public BufferInterface
375 {
376 public:
377 
TestBufferFloatingPoint(AtomicOperation atomicOp)378 	TestBufferFloatingPoint(AtomicOperation	atomicOp)
379 		: m_atomicOp(atomicOp)
380 	{}
381 
382 	template<typename T>
383 	struct BufferDataFloatingPoint
384 	{
385 		// Use half the number of elements for inout to cause overlap between atomic operations.
386 		// Each inout element at index i will have two atomic operations using input from
387 		// indices i and i + NUM_ELEMENTS / 2.
388 		T			inout[NUM_ELEMENTS / 2];
389 		T			input[NUM_ELEMENTS];
390 		T			compare[NUM_ELEMENTS];
391 		T			output[NUM_ELEMENTS];
392 		deInt32		invocationHitCount[NUM_ELEMENTS];
393 		deInt32		index;
394 	};
395 
setBuffer(void * ptr)396 	virtual void setBuffer(void* ptr)
397 	{
398 		m_ptr = static_cast<BufferDataFloatingPoint<dataTypeT>*>(ptr);
399 	}
400 
bufferSize()401 	virtual size_t bufferSize()
402 	{
403 		return sizeof(BufferDataFloatingPoint<dataTypeT>);
404 	}
405 
fillWithTestData(de::Random & rnd)406 	virtual void fillWithTestData(de::Random& rnd)
407 	{
408 		dataTypeT pattern;
409 		deMemset(&pattern, 0xcd, sizeof(dataTypeT));
410 
411 		for (int i = 0; i < NUM_ELEMENTS / 2; i++)
412 		{
413 			m_ptr->inout[i] = deToFloatType<dataTypeT>(rnd.getFloat());
414 			// These aren't used by any of the float tests
415 			m_ptr->compare[i] = deToFloatType<dataTypeT>(0.0);
416 		}
417 		// Add special cases for NaN and +/-0
418 		// 0: min(sNaN, x)
419 		m_ptr->inout[0] = deSignalingNaN<dataTypeT>();
420 		// 1: min(x, sNaN)
421 		m_ptr->input[1 * 2 + 0] = deSignalingNaN<dataTypeT>();
422 		// 2: min(qNaN, x)
423 		m_ptr->inout[2] = deQuietNaN<dataTypeT>();
424 		// 3: min(x, qNaN)
425 		m_ptr->input[3 * 2 + 0] = deQuietNaN<dataTypeT>();
426 		// 4: min(NaN, NaN)
427 		m_ptr->inout[4] = deSignalingNaN<dataTypeT>();
428 		m_ptr->input[4 * 2 + 0] = deQuietNaN<dataTypeT>();
429 		m_ptr->input[4 * 2 + 1] = deQuietNaN<dataTypeT>();
430 		// 5: min(+0, -0)
431 		m_ptr->inout[5] = deToFloatType<dataTypeT>(-0.0);
432 		m_ptr->input[5 * 2 + 0] = deToFloatType<dataTypeT>(0.0);
433 		m_ptr->input[5 * 2 + 1] = deToFloatType<dataTypeT>(0.0);
434 
435 		for (int i = 0; i < NUM_ELEMENTS; i++)
436 		{
437 			m_ptr->input[i] = deToFloatType<dataTypeT>(rnd.getFloat());
438 			m_ptr->output[i] = pattern;
439 			m_ptr->invocationHitCount[i] = 0;
440 		}
441 
442 		m_ptr->index = 0;
443 
444 		// Take a copy to be used when calculating expected values.
445 		m_original = *m_ptr;
446 	}
447 
checkResults(tcu::ResultCollector & resultCollector)448 	virtual void checkResults(tcu::ResultCollector& resultCollector)
449 	{
450 		checkOperationFloatingPoint(m_original, *m_ptr, resultCollector);
451 	}
452 
453 	template<typename T>
454 	struct Expected
455 	{
456 		T m_inout;
457 		T m_output[2];
458 
Expectedvkt::shaderexecutor::__anon6cb584e80111::TestBufferFloatingPoint::Expected459 		Expected(T inout, T output0, T output1)
460 			: m_inout(inout)
461 		{
462 			m_output[0] = output0;
463 			m_output[1] = output1;
464 		}
465 
comparevkt::shaderexecutor::__anon6cb584e80111::TestBufferFloatingPoint::Expected466 		bool compare(T inout, T output0, T output1)
467 		{
468 			return nanSafeSloppyEquals(m_inout, inout) &&
469 			       nanSafeSloppyEquals(m_output[0], output0) &&
470 			       nanSafeSloppyEquals(m_output[1], output1);
471 		}
472 	};
473 
474 	void checkOperationFloatingPoint(const BufferDataFloatingPoint<dataTypeT>& original,
475 		const BufferDataFloatingPoint<dataTypeT>& result,
476 		tcu::ResultCollector& resultCollector);
477 
478 	const AtomicOperation	m_atomicOp;
479 
480 	BufferDataFloatingPoint<dataTypeT>* m_ptr;
481 	BufferDataFloatingPoint<dataTypeT>  m_original;
482 
483 };
484 
createTestBuffer(DataType type,AtomicOperation atomicOp)485 static BufferInterface* createTestBuffer(DataType type, AtomicOperation atomicOp)
486 {
487 	switch (type)
488 	{
489 	case DATA_TYPE_FLOAT16:
490 		return new TestBufferFloatingPoint<deFloat16>(atomicOp);
491 	case DATA_TYPE_INT32:
492 		return new TestBuffer<deInt32>(atomicOp);
493 	case DATA_TYPE_UINT32:
494 		return new TestBuffer<deUint32>(atomicOp);
495 	case DATA_TYPE_FLOAT32:
496 		return new TestBufferFloatingPoint<float>(atomicOp);
497 	case DATA_TYPE_INT64:
498 		return new TestBuffer<deInt64>(atomicOp);
499 	case DATA_TYPE_UINT64:
500 		return new TestBuffer<deUint64>(atomicOp);
501 	case DATA_TYPE_FLOAT64:
502 		return new TestBufferFloatingPoint<double>(atomicOp);
503 	default:
504 		DE_ASSERT(false);
505 		return DE_NULL;
506 	}
507 }
508 
509 // Use template to handle both signed and unsigned cases. SPIR-V should
510 // have separate operations for both.
511 template<typename T>
checkOperation(const BufferData<T> & original,const BufferData<T> & result,tcu::ResultCollector & resultCollector)512 void TestBuffer<T>::checkOperation (const BufferData<T>&	original,
513 									const BufferData<T>&	result,
514 									tcu::ResultCollector&	resultCollector)
515 {
516 	// originalInout = original inout
517 	// input0 = input at index i
518 	// iinput1 = input at index i + NUM_ELEMENTS / 2
519 	//
520 	// atomic operation will return the memory contents before
521 	// the operation and this is stored as output. Two operations
522 	// are executed for each InOut value (using input0 and input1).
523 	//
524 	// Since there is an overlap of two operations per each
525 	// InOut element, the outcome of the resulting InOut and
526 	// the outputs of the operations have two result candidates
527 	// depending on the execution order. Verification passes
528 	// if the results match one of these options.
529 
530 	for (int elementNdx = 0; elementNdx < NUM_ELEMENTS / 2; elementNdx++)
531 	{
532 		// Needed when reinterpeting the data as signed values.
533 		const T originalInout	= *reinterpret_cast<const T*>(&original.inout[elementNdx]);
534 		const T input0			= *reinterpret_cast<const T*>(&original.input[elementNdx]);
535 		const T input1			= *reinterpret_cast<const T*>(&original.input[elementNdx + NUM_ELEMENTS / 2]);
536 
537 		// Expected results are collected to this vector.
538 		vector<Expected<T> > exp;
539 
540 		switch (m_atomicOp)
541 		{
542 			case ATOMIC_OP_ADD:
543 			{
544 				exp.push_back(Expected<T>(originalInout + input0 + input1, originalInout, originalInout + input0));
545 				exp.push_back(Expected<T>(originalInout + input0 + input1, originalInout + input1, originalInout));
546 			}
547 			break;
548 
549 			case ATOMIC_OP_AND:
550 			{
551 				exp.push_back(Expected<T>(originalInout & input0 & input1, originalInout, originalInout & input0));
552 				exp.push_back(Expected<T>(originalInout & input0 & input1, originalInout & input1, originalInout));
553 			}
554 			break;
555 
556 			case ATOMIC_OP_OR:
557 			{
558 				exp.push_back(Expected<T>(originalInout | input0 | input1, originalInout, originalInout | input0));
559 				exp.push_back(Expected<T>(originalInout | input0 | input1, originalInout | input1, originalInout));
560 			}
561 			break;
562 
563 			case ATOMIC_OP_XOR:
564 			{
565 				exp.push_back(Expected<T>(originalInout ^ input0 ^ input1, originalInout, originalInout ^ input0));
566 				exp.push_back(Expected<T>(originalInout ^ input0 ^ input1, originalInout ^ input1, originalInout));
567 			}
568 			break;
569 
570 			case ATOMIC_OP_MIN:
571 			{
572 				exp.push_back(Expected<T>(de::min(de::min(originalInout, input0), input1), originalInout, de::min(originalInout, input0)));
573 				exp.push_back(Expected<T>(de::min(de::min(originalInout, input0), input1), de::min(originalInout, input1), originalInout));
574 			}
575 			break;
576 
577 			case ATOMIC_OP_MAX:
578 			{
579 				exp.push_back(Expected<T>(de::max(de::max(originalInout, input0), input1), originalInout, de::max(originalInout, input0)));
580 				exp.push_back(Expected<T>(de::max(de::max(originalInout, input0), input1), de::max(originalInout, input1), originalInout));
581 			}
582 			break;
583 
584 			case ATOMIC_OP_EXCHANGE:
585 			{
586 				exp.push_back(Expected<T>(input1, originalInout, input0));
587 				exp.push_back(Expected<T>(input0, input1, originalInout));
588 			}
589 			break;
590 
591 			case ATOMIC_OP_COMP_SWAP:
592 			{
593 				if (elementNdx % 2 == 0)
594 				{
595 					exp.push_back(Expected<T>(input0, originalInout, input0));
596 					exp.push_back(Expected<T>(input0, originalInout, originalInout));
597 				}
598 				else
599 				{
600 					exp.push_back(Expected<T>(input1, input1, originalInout));
601 					exp.push_back(Expected<T>(input1, originalInout, originalInout));
602 				}
603 			}
604 			break;
605 
606 
607 			default:
608 				DE_FATAL("Unexpected atomic operation.");
609 				break;
610 		}
611 
612 		const T resIo		= result.inout[elementNdx];
613 		const T resOutput0	= result.output[elementNdx];
614 		const T resOutput1	= result.output[elementNdx + NUM_ELEMENTS / 2];
615 
616 
617 		if (!exp[0].compare(resIo, resOutput0, resOutput1) && !exp[1].compare(resIo, resOutput0, resOutput1))
618 		{
619 			std::ostringstream errorMessage;
620 			errorMessage	<< "ERROR: Result value check failed at index " << elementNdx
621 							<< ". Expected one of the two outcomes: InOut = " << tcu::toHex(exp[0].m_inout)
622 							<< ", Output0 = " << tcu::toHex(exp[0].m_output[0]) << ", Output1 = "
623 							<< tcu::toHex(exp[0].m_output[1]) << ", or InOut = " << tcu::toHex(exp[1].m_inout)
624 							<< ", Output0 = " << tcu::toHex(exp[1].m_output[0]) << ", Output1 = "
625 							<< tcu::toHex(exp[1].m_output[1]) << ". Got: InOut = " << tcu::toHex(resIo)
626 							<< ", Output0 = " << tcu::toHex(resOutput0) << ", Output1 = "
627 							<< tcu::toHex(resOutput1) << ". Using Input0 = " << tcu::toHex(original.input[elementNdx])
628 							<< " and Input1 = " << tcu::toHex(original.input[elementNdx + NUM_ELEMENTS / 2]) << ".";
629 
630 			resultCollector.fail(errorMessage.str());
631 		}
632 	}
633 }
634 
635 template<typename T>
handleExceptionalFloatMinMaxValues(vector<T> & values,T x,T y)636 void handleExceptionalFloatMinMaxValues(vector<T> &values, T x, T y)
637 {
638 
639 	if (deIsSignalingNaN(x) && deIsSignalingNaN(y))
640 	{
641 		values.push_back(deQuietNaN<T>());
642 		values.push_back(deSignalingNaN<T>());
643 	}
644 	else if (deIsSignalingNaN(x))
645 	{
646 		values.push_back(deQuietNaN<T>());
647 		values.push_back(deSignalingNaN<T>());
648 		if (!deIsIEEENaN(y))
649 			values.push_back(y);
650 	}
651 	else if (deIsSignalingNaN(y))
652 	{
653 		values.push_back(deQuietNaN<T>());
654 		values.push_back(deSignalingNaN<T>());
655 		if (!deIsIEEENaN(x))
656 			values.push_back(x);
657 	}
658 	else if (deIsIEEENaN(x) && deIsIEEENaN(y))
659 	{
660 		// Both quiet NaNs
661 		values.push_back(deQuietNaN<T>());
662 	}
663 	else if (deIsIEEENaN(x))
664 	{
665 		// One quiet NaN and one non-NaN.
666 		values.push_back(y);
667 	}
668 	else if (deIsIEEENaN(y))
669 	{
670 		// One quiet NaN and one non-NaN.
671 		values.push_back(x);
672 	}
673 	else if ((deIsPositiveZero(x) && deIsNegativeZero(y)) || (deIsNegativeZero(x) && deIsPositiveZero(y)))
674 	{
675 		values.push_back(deToFloatType<T>(0.0));
676 		values.push_back(deToFloatType<T>(-0.0));
677 	}
678 }
679 
680 template<typename T>
floatAdd(T x,T y)681 T floatAdd(T x, T y)
682 {
683 	if (deIsIEEENaN(x) || deIsIEEENaN(y))
684 		return deQuietNaN<T>();
685 	return deToFloatType<T>(deToDouble(x) + deToDouble(y));
686 }
687 
688 template<typename T>
floatMinValues(T x,T y)689 vector<T> floatMinValues(T x, T y)
690 {
691 	vector<T> values;
692 	handleExceptionalFloatMinMaxValues(values, x, y);
693 	if (values.empty())
694 	{
695 		values.push_back(deToDouble(x) < deToDouble(y) ? x : y);
696 	}
697 	return values;
698 }
699 
700 template<typename T>
floatMaxValues(T x,T y)701 vector<T> floatMaxValues(T x, T y)
702 {
703 	vector<T> values;
704 	handleExceptionalFloatMinMaxValues(values, x, y);
705 	if (values.empty())
706 	{
707 		values.push_back(deToDouble(x) > deToDouble(y) ? x : y);
708 	}
709 	return values;
710 }
711 
712 // Use template to handle both float and double cases. SPIR-V should
713 // have separate operations for both.
714 template<typename T>
checkOperationFloatingPoint(const BufferDataFloatingPoint<T> & original,const BufferDataFloatingPoint<T> & result,tcu::ResultCollector & resultCollector)715 void TestBufferFloatingPoint<T>::checkOperationFloatingPoint(const BufferDataFloatingPoint<T>& original,
716 	const BufferDataFloatingPoint<T>& result,
717 	tcu::ResultCollector& resultCollector)
718 {
719 	// originalInout = original inout
720 	// input0 = input at index i
721 	// iinput1 = input at index i + NUM_ELEMENTS / 2
722 	//
723 	// atomic operation will return the memory contents before
724 	// the operation and this is stored as output. Two operations
725 	// are executed for each InOut value (using input0 and input1).
726 	//
727 	// Since there is an overlap of two operations per each
728 	// InOut element, the outcome of the resulting InOut and
729 	// the outputs of the operations have two result candidates
730 	// depending on the execution order. Verification passes
731 	// if the results match one of these options.
732 
733 	for (int elementNdx = 0; elementNdx < NUM_ELEMENTS / 2; elementNdx++)
734 	{
735 		// Needed when reinterpeting the data as signed values.
736 		const T originalInout = *reinterpret_cast<const T*>(&original.inout[elementNdx]);
737 		const T input0 = *reinterpret_cast<const T*>(&original.input[elementNdx]);
738 		const T input1 = *reinterpret_cast<const T*>(&original.input[elementNdx + NUM_ELEMENTS / 2]);
739 
740 		// Expected results are collected to this vector.
741 		vector<Expected<T> > exp;
742 
743 		switch (m_atomicOp)
744 		{
745 		case ATOMIC_OP_ADD:
746 		{
747 			exp.push_back(Expected<T>(floatAdd(floatAdd(originalInout, input0), input1), originalInout, floatAdd(originalInout, input0)));
748 			exp.push_back(Expected<T>(floatAdd(floatAdd(originalInout, input0), input1), floatAdd(originalInout, input1), originalInout));
749 		}
750 		break;
751 
752 		case ATOMIC_OP_MIN:
753 		{
754 			// The case where input0 is combined first
755 			vector<T> minOriginalAndInput0 = floatMinValues(originalInout, input0);
756 			for (T x : minOriginalAndInput0)
757 			{
758 				vector<T> minAll = floatMinValues(x, input1);
759 				for (T y : minAll)
760 				{
761 					exp.push_back(Expected<T>(y, originalInout, x));
762 				}
763 			}
764 
765 			// The case where input1 is combined first
766 			vector<T> minOriginalAndInput1 = floatMinValues(originalInout, input1);
767 			for (T x : minOriginalAndInput1)
768 			{
769 				vector<T> minAll = floatMinValues(x, input0);
770 				for (T y : minAll)
771 				{
772 					exp.push_back(Expected<T>(y, x, originalInout));
773 				}
774 			}
775 		}
776 		break;
777 
778 		case ATOMIC_OP_MAX:
779 		{
780 			// The case where input0 is combined first
781 			vector<T> minOriginalAndInput0 = floatMaxValues(originalInout, input0);
782 			for (T x : minOriginalAndInput0)
783 			{
784 				vector<T> minAll = floatMaxValues(x, input1);
785 				for (T y : minAll)
786 				{
787 					exp.push_back(Expected<T>(y, originalInout, x));
788 				}
789 			}
790 
791 			// The case where input1 is combined first
792 			vector<T> minOriginalAndInput1 = floatMaxValues(originalInout, input1);
793 			for (T x : minOriginalAndInput1)
794 			{
795 				vector<T> minAll = floatMaxValues(x, input0);
796 				for (T y : minAll)
797 				{
798 					exp.push_back(Expected<T>(y, x, originalInout));
799 				}
800 			}
801 		}
802 		break;
803 
804 		case ATOMIC_OP_EXCHANGE:
805 		{
806 			exp.push_back(Expected<T>(input1, originalInout, input0));
807 			exp.push_back(Expected<T>(input0, input1, originalInout));
808 		}
809 		break;
810 
811 		default:
812 			DE_FATAL("Unexpected atomic operation.");
813 			break;
814 		}
815 
816 		const T resIo = result.inout[elementNdx];
817 		const T resOutput0 = result.output[elementNdx];
818 		const T resOutput1 = result.output[elementNdx + NUM_ELEMENTS / 2];
819 
820 
821 		bool hasMatch = false;
822 		for (Expected<T> e : exp)
823 		{
824 			if (e.compare(resIo, resOutput0, resOutput1))
825 			{
826 				hasMatch = true;
827 				break;
828 			}
829 		}
830 		if (!hasMatch)
831 		{
832 			std::ostringstream errorMessage;
833 			errorMessage << "ERROR: Result value check failed at index " << elementNdx
834 				<< ". Expected one of the outcomes:";
835 
836 			bool first = true;
837 			for (Expected<T> e : exp)
838 			{
839 				if (!first)
840 					errorMessage << ", or";
841 				first = false;
842 
843 				errorMessage << " InOut = " << e.m_inout
844 					<< ", Output0 = " << e.m_output[0]
845 					<< ", Output1 = " << e.m_output[1];
846 			}
847 
848 			errorMessage << ". Got: InOut = " << resIo
849 				<< ", Output0 = " << resOutput0
850 				<< ", Output1 = " << resOutput1
851 				<< ". Using Input0 = " << original.input[elementNdx]
852 				<< " and Input1 = " << original.input[elementNdx + NUM_ELEMENTS / 2] << ".";
853 
854 			resultCollector.fail(errorMessage.str());
855 		}
856 	}
857 }
858 
859 class AtomicOperationCaseInstance : public TestInstance
860 {
861 public:
862 									AtomicOperationCaseInstance		(Context&			context,
863 																	 const ShaderSpec&	shaderSpec,
864 																	 AtomicShaderType	shaderType,
865 																	 DataType			dataType,
866 																	 AtomicOperation	atomicOp);
867 
868 	virtual tcu::TestStatus			iterate							(void);
869 
870 private:
871 	const ShaderSpec&				m_shaderSpec;
872 	AtomicShaderType				m_shaderType;
873 	const DataType					m_dataType;
874 	AtomicOperation					m_atomicOp;
875 
876 };
877 
AtomicOperationCaseInstance(Context & context,const ShaderSpec & shaderSpec,AtomicShaderType shaderType,DataType dataType,AtomicOperation atomicOp)878 AtomicOperationCaseInstance::AtomicOperationCaseInstance (Context&				context,
879 														  const ShaderSpec&		shaderSpec,
880 														  AtomicShaderType		shaderType,
881 														  DataType				dataType,
882 														  AtomicOperation		atomicOp)
883 	: TestInstance	(context)
884 	, m_shaderSpec	(shaderSpec)
885 	, m_shaderType	(shaderType)
886 	, m_dataType	(dataType)
887 	, m_atomicOp	(atomicOp)
888 {
889 }
890 
iterate(void)891 tcu::TestStatus AtomicOperationCaseInstance::iterate(void)
892 {
893 	de::UniquePtr<BufferInterface>	testBuffer	(createTestBuffer(m_dataType, m_atomicOp));
894 	tcu::TestLog&					log			= m_context.getTestContext().getLog();
895 	const DeviceInterface&			vkd			= m_context.getDeviceInterface();
896 	const VkDevice					device		= m_context.getDevice();
897 	de::Random						rnd			(0x62a15e34);
898 	const bool						useRef		= (m_shaderType.getMemoryType() == AtomicMemoryType::REFERENCE);
899 	const VkDescriptorType			descType	= (useRef ? VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER : VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
900 	const VkBufferUsageFlags		usageFlags	= (VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | (useRef ? static_cast<VkBufferUsageFlags>(VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT) : 0u));
901 
902 	// The main buffer will hold test data. When using buffer references, the buffer's address will be indirectly passed as part of
903 	// a uniform buffer. If not, it will be passed directly as a descriptor.
904 	Buffer							buffer		(m_context, usageFlags, testBuffer->bufferSize(), useRef);
905 	std::unique_ptr<Buffer>			auxBuffer;
906 
907 	if (useRef)
908 	{
909 		// Pass the main buffer address inside a uniform buffer.
910 		const VkBufferDeviceAddressInfo addressInfo =
911 		{
912 			VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO,	//	VkStructureType	sType;
913 			nullptr,										//	const void*		pNext;
914 			buffer.getBuffer(),								//	VkBuffer		buffer;
915 		};
916 		const auto address = vkd.getBufferDeviceAddress(device, &addressInfo);
917 
918 		auxBuffer.reset(new Buffer(m_context, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, sizeof(address), false));
919 		deMemcpy(auxBuffer->getHostPtr(), &address, sizeof(address));
920 		auxBuffer->flush();
921 	}
922 
923 	testBuffer->setBuffer(buffer.getHostPtr());
924 	testBuffer->fillWithTestData(rnd);
925 
926 	buffer.flush();
927 
928 	Move<VkDescriptorSetLayout>	extraResourcesLayout;
929 	Move<VkDescriptorPool>		extraResourcesSetPool;
930 	Move<VkDescriptorSet>		extraResourcesSet;
931 
932 	const VkDescriptorSetLayoutBinding bindings[] =
933 	{
934 		{ 0u, descType, 1, VK_SHADER_STAGE_ALL, DE_NULL }
935 	};
936 
937 	const VkDescriptorSetLayoutCreateInfo	layoutInfo	=
938 	{
939 		VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
940 		DE_NULL,
941 		(VkDescriptorSetLayoutCreateFlags)0u,
942 		DE_LENGTH_OF_ARRAY(bindings),
943 		bindings
944 	};
945 
946 	extraResourcesLayout = createDescriptorSetLayout(vkd, device, &layoutInfo);
947 
948 	const VkDescriptorPoolSize poolSizes[] =
949 	{
950 		{ descType, 1u }
951 	};
952 
953 	const VkDescriptorPoolCreateInfo poolInfo =
954 	{
955 		VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
956 		DE_NULL,
957 		(VkDescriptorPoolCreateFlags)VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT,
958 		1u,		// maxSets
959 		DE_LENGTH_OF_ARRAY(poolSizes),
960 		poolSizes
961 	};
962 
963 	extraResourcesSetPool = createDescriptorPool(vkd, device, &poolInfo);
964 
965 	const VkDescriptorSetAllocateInfo allocInfo =
966 	{
967 		VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
968 		DE_NULL,
969 		*extraResourcesSetPool,
970 		1u,
971 		&extraResourcesLayout.get()
972 	};
973 
974 	extraResourcesSet = allocateDescriptorSet(vkd, device, &allocInfo);
975 
976 	VkDescriptorBufferInfo bufferInfo;
977 	bufferInfo.buffer	= (useRef ? auxBuffer->getBuffer() : buffer.getBuffer());
978 	bufferInfo.offset	= 0u;
979 	bufferInfo.range	= VK_WHOLE_SIZE;
980 
981 	const VkWriteDescriptorSet descriptorWrite =
982 	{
983 		VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
984 		DE_NULL,
985 		*extraResourcesSet,
986 		0u,		// dstBinding
987 		0u,		// dstArrayElement
988 		1u,
989 		descType,
990 		(const VkDescriptorImageInfo*)DE_NULL,
991 		&bufferInfo,
992 		(const VkBufferView*)DE_NULL
993 	};
994 
995 	vkd.updateDescriptorSets(device, 1u, &descriptorWrite, 0u, DE_NULL);
996 
997 	// Storage for output varying data.
998 	std::vector<deUint32>	outputs		(NUM_ELEMENTS);
999 	std::vector<void*>		outputPtr	(NUM_ELEMENTS);
1000 
1001 	for (size_t i = 0; i < NUM_ELEMENTS; i++)
1002 	{
1003 		outputs[i] = 0xcdcdcdcd;
1004 		outputPtr[i] = &outputs[i];
1005 	}
1006 
1007 	const int					numWorkGroups	= (m_shaderType.isSharedLike() ? 1 : static_cast<int>(NUM_ELEMENTS));
1008 	UniquePtr<ShaderExecutor>	executor		(createExecutor(m_context, m_shaderType.getType(), m_shaderSpec, *extraResourcesLayout));
1009 
1010 	executor->execute(numWorkGroups, DE_NULL, &outputPtr[0], *extraResourcesSet);
1011 	buffer.invalidate();
1012 
1013 	tcu::ResultCollector resultCollector(log);
1014 
1015 	// Check the results of the atomic operation
1016 	testBuffer->checkResults(resultCollector);
1017 
1018 	return tcu::TestStatus(resultCollector.getResult(), resultCollector.getMessage());
1019 }
1020 
1021 class AtomicOperationCase : public TestCase
1022 {
1023 public:
1024 							AtomicOperationCase		(tcu::TestContext&		testCtx,
1025 													 const char*			name,
1026 													 const char*			description,
1027 													 AtomicShaderType		type,
1028 													 DataType				dataType,
1029 													 AtomicOperation		atomicOp);
1030 	virtual					~AtomicOperationCase	(void);
1031 
1032 	virtual TestInstance*	createInstance			(Context& ctx) const;
1033 	virtual void			checkSupport			(Context& ctx) const;
initPrograms(vk::SourceCollections & programCollection) const1034 	virtual void			initPrograms			(vk::SourceCollections& programCollection) const
1035 	{
1036 		const bool					useSpv14		= m_shaderType.isMeshShadingStage();
1037 		const auto					spvVersion		= (useSpv14 ? vk::SPIRV_VERSION_1_4 : vk::SPIRV_VERSION_1_0);
1038 		const ShaderBuildOptions	buildOptions	(programCollection.usedVulkanVersion, spvVersion, 0u, useSpv14);
1039 		ShaderSpec					sourcesSpec		(m_shaderSpec);
1040 
1041 		sourcesSpec.buildOptions = buildOptions;
1042 		generateSources(m_shaderType.getType(), sourcesSpec, programCollection);
1043 	}
1044 
1045 private:
1046 
1047 	void					createShaderSpec();
1048 	ShaderSpec				m_shaderSpec;
1049 	const AtomicShaderType	m_shaderType;
1050 	const DataType			m_dataType;
1051 	const AtomicOperation	m_atomicOp;
1052 };
1053 
AtomicOperationCase(tcu::TestContext & testCtx,const char * name,const char * description,AtomicShaderType shaderType,DataType dataType,AtomicOperation atomicOp)1054 AtomicOperationCase::AtomicOperationCase (tcu::TestContext&	testCtx,
1055 										  const char*		name,
1056 										  const char*		description,
1057 										  AtomicShaderType	shaderType,
1058 										  DataType			dataType,
1059 										  AtomicOperation	atomicOp)
1060 	: TestCase			(testCtx, name, description)
1061 	, m_shaderType		(shaderType)
1062 	, m_dataType		(dataType)
1063 	, m_atomicOp		(atomicOp)
1064 {
1065 	createShaderSpec();
1066 	init();
1067 }
1068 
~AtomicOperationCase(void)1069 AtomicOperationCase::~AtomicOperationCase (void)
1070 {
1071 }
1072 
createInstance(Context & ctx) const1073 TestInstance* AtomicOperationCase::createInstance (Context& ctx) const
1074 {
1075 	return new AtomicOperationCaseInstance(ctx, m_shaderSpec, m_shaderType, m_dataType, m_atomicOp);
1076 }
1077 
checkSupport(Context & ctx) const1078 void AtomicOperationCase::checkSupport (Context& ctx) const
1079 {
1080 	if ((m_dataType == DATA_TYPE_INT64) || (m_dataType == DATA_TYPE_UINT64))
1081 	{
1082 		ctx.requireDeviceFunctionality("VK_KHR_shader_atomic_int64");
1083 
1084 		const auto atomicInt64Features	= ctx.getShaderAtomicInt64Features();
1085 		const bool isSharedMemory		= m_shaderType.isSharedLike();
1086 
1087 		if (!isSharedMemory && atomicInt64Features.shaderBufferInt64Atomics == VK_FALSE)
1088 		{
1089 			TCU_THROW(NotSupportedError, "VkShaderAtomicInt64: 64-bit integer atomic operations not supported for buffers");
1090 		}
1091 		if (isSharedMemory && atomicInt64Features.shaderSharedInt64Atomics == VK_FALSE)
1092 		{
1093 			TCU_THROW(NotSupportedError, "VkShaderAtomicInt64: 64-bit integer atomic operations not supported for shared memory");
1094 		}
1095 	}
1096 
1097 	if (m_dataType == DATA_TYPE_FLOAT16)
1098 	{
1099 		ctx.requireDeviceFunctionality("VK_EXT_shader_atomic_float2");
1100 #ifndef CTS_USES_VULKANSC
1101 		if (m_atomicOp == ATOMIC_OP_ADD)
1102 		{
1103 			if (m_shaderType.isSharedLike())
1104 			{
1105 				if (!ctx.getShaderAtomicFloat2FeaturesEXT().shaderSharedFloat16AtomicAdd)
1106 				{
1107 					TCU_THROW(NotSupportedError, "VkShaderAtomicFloat16: 16-bit floating point shared add atomic operation not supported");
1108 				}
1109 			}
1110 			else
1111 			{
1112 				if (!ctx.getShaderAtomicFloat2FeaturesEXT().shaderBufferFloat16AtomicAdd)
1113 				{
1114 					TCU_THROW(NotSupportedError, "VkShaderAtomicFloat16: 16-bit floating point buffer add atomic operation not supported");
1115 				}
1116 			}
1117 		}
1118 		if (m_atomicOp == ATOMIC_OP_MIN || m_atomicOp == ATOMIC_OP_MAX)
1119 		{
1120 			if (m_shaderType.isSharedLike())
1121 			{
1122 				if (!ctx.getShaderAtomicFloat2FeaturesEXT().shaderSharedFloat16AtomicMinMax)
1123 				{
1124 					TCU_THROW(NotSupportedError, "VkShaderAtomicFloat16: 16-bit floating point shared min/max atomic operation not supported");
1125 				}
1126 			}
1127 			else
1128 			{
1129 				if (!ctx.getShaderAtomicFloat2FeaturesEXT().shaderBufferFloat16AtomicMinMax)
1130 				{
1131 					TCU_THROW(NotSupportedError, "VkShaderAtomicFloat16: 16-bit floating point buffer min/max atomic operation not supported");
1132 				}
1133 			}
1134 		}
1135 		if (m_atomicOp == ATOMIC_OP_EXCHANGE)
1136 		{
1137 			if (m_shaderType.isSharedLike())
1138 			{
1139 				if (!ctx.getShaderAtomicFloat2FeaturesEXT().shaderSharedFloat16Atomics)
1140 				{
1141 					TCU_THROW(NotSupportedError, "VkShaderAtomicFloat16: 16-bit floating point shared atomic operations not supported");
1142 				}
1143 			}
1144 			else
1145 			{
1146 				if (!ctx.getShaderAtomicFloat2FeaturesEXT().shaderBufferFloat16Atomics)
1147 				{
1148 					TCU_THROW(NotSupportedError, "VkShaderAtomicFloat16: 16-bit floating point buffer atomic operations not supported");
1149 				}
1150 			}
1151 		}
1152 #endif // CTS_USES_VULKANSC
1153 	}
1154 
1155 	if (m_dataType == DATA_TYPE_FLOAT32)
1156 	{
1157 		ctx.requireDeviceFunctionality("VK_EXT_shader_atomic_float");
1158 		if (m_atomicOp == ATOMIC_OP_ADD)
1159 		{
1160 			if (m_shaderType.isSharedLike())
1161 			{
1162 				if (!ctx.getShaderAtomicFloatFeaturesEXT().shaderSharedFloat32AtomicAdd)
1163 				{
1164 					TCU_THROW(NotSupportedError, "VkShaderAtomicFloat32: 32-bit floating point shared add atomic operation not supported");
1165 				}
1166 			}
1167 			else
1168 			{
1169 				if (!ctx.getShaderAtomicFloatFeaturesEXT().shaderBufferFloat32AtomicAdd)
1170 				{
1171 					TCU_THROW(NotSupportedError, "VkShaderAtomicFloat32: 32-bit floating point buffer add atomic operation not supported");
1172 				}
1173 			}
1174 		}
1175 		if (m_atomicOp == ATOMIC_OP_MIN || m_atomicOp == ATOMIC_OP_MAX)
1176 		{
1177 			ctx.requireDeviceFunctionality("VK_EXT_shader_atomic_float2");
1178 #ifndef CTS_USES_VULKANSC
1179 			if (m_shaderType.isSharedLike())
1180 			{
1181 				if (!ctx.getShaderAtomicFloat2FeaturesEXT().shaderSharedFloat32AtomicMinMax)
1182 				{
1183 					TCU_THROW(NotSupportedError, "VkShaderAtomicFloat32: 32-bit floating point shared min/max atomic operation not supported");
1184 				}
1185 			}
1186 			else
1187 			{
1188 				if (!ctx.getShaderAtomicFloat2FeaturesEXT().shaderBufferFloat32AtomicMinMax)
1189 				{
1190 					TCU_THROW(NotSupportedError, "VkShaderAtomicFloat32: 32-bit floating point buffer min/max atomic operation not supported");
1191 				}
1192 			}
1193 #endif // CTS_USES_VULKANSC
1194 		}
1195 		if (m_atomicOp == ATOMIC_OP_EXCHANGE)
1196 		{
1197 			if (m_shaderType.isSharedLike())
1198 			{
1199 				if (!ctx.getShaderAtomicFloatFeaturesEXT().shaderSharedFloat32Atomics)
1200 				{
1201 					TCU_THROW(NotSupportedError, "VkShaderAtomicFloat32: 32-bit floating point shared atomic operations not supported");
1202 				}
1203 			}
1204 			else
1205 			{
1206 				if (!ctx.getShaderAtomicFloatFeaturesEXT().shaderBufferFloat32Atomics)
1207 				{
1208 					TCU_THROW(NotSupportedError, "VkShaderAtomicFloat32: 32-bit floating point buffer atomic operations not supported");
1209 				}
1210 			}
1211 		}
1212 	}
1213 
1214 	if (m_dataType == DATA_TYPE_FLOAT64)
1215 	{
1216 		ctx.requireDeviceFunctionality("VK_EXT_shader_atomic_float");
1217 		if (m_atomicOp == ATOMIC_OP_ADD)
1218 		{
1219 			if (m_shaderType.isSharedLike())
1220 			{
1221 				if (!ctx.getShaderAtomicFloatFeaturesEXT().shaderSharedFloat64AtomicAdd)
1222 				{
1223 					TCU_THROW(NotSupportedError, "VkShaderAtomicFloat64: 64-bit floating point shared add atomic operation not supported");
1224 				}
1225 			}
1226 			else
1227 			{
1228 				if (!ctx.getShaderAtomicFloatFeaturesEXT().shaderBufferFloat64AtomicAdd)
1229 				{
1230 					TCU_THROW(NotSupportedError, "VkShaderAtomicFloat64: 64-bit floating point buffer add atomic operation not supported");
1231 				}
1232 			}
1233 		}
1234 		if (m_atomicOp == ATOMIC_OP_MIN || m_atomicOp == ATOMIC_OP_MAX)
1235 		{
1236 			ctx.requireDeviceFunctionality("VK_EXT_shader_atomic_float2");
1237 #ifndef CTS_USES_VULKANSC
1238 			if (m_shaderType.isSharedLike())
1239 			{
1240 				if (!ctx.getShaderAtomicFloat2FeaturesEXT().shaderSharedFloat64AtomicMinMax)
1241 				{
1242 					TCU_THROW(NotSupportedError, "VkShaderAtomicFloat64: 64-bit floating point shared min/max atomic operation not supported");
1243 				}
1244 			}
1245 			else
1246 			{
1247 				if (!ctx.getShaderAtomicFloat2FeaturesEXT().shaderBufferFloat64AtomicMinMax)
1248 				{
1249 					TCU_THROW(NotSupportedError, "VkShaderAtomicFloat64: 64-bit floating point buffer min/max atomic operation not supported");
1250 				}
1251 			}
1252 #endif // CTS_USES_VULKANSC
1253 		}
1254 		if (m_atomicOp == ATOMIC_OP_EXCHANGE)
1255 		{
1256 			if (m_shaderType.isSharedLike())
1257 			{
1258 				if (!ctx.getShaderAtomicFloatFeaturesEXT().shaderSharedFloat64Atomics)
1259 				{
1260 					TCU_THROW(NotSupportedError, "VkShaderAtomicFloat64: 64-bit floating point shared atomic operations not supported");
1261 				}
1262 			}
1263 			else
1264 			{
1265 				if (!ctx.getShaderAtomicFloatFeaturesEXT().shaderBufferFloat64Atomics)
1266 				{
1267 					TCU_THROW(NotSupportedError, "VkShaderAtomicFloat64: 64-bit floating point buffer atomic operations not supported");
1268 				}
1269 			}
1270 		}
1271 	}
1272 
1273 	if (m_shaderType.getMemoryType() == AtomicMemoryType::REFERENCE)
1274 	{
1275 		ctx.requireDeviceFunctionality("VK_KHR_buffer_device_address");
1276 	}
1277 
1278 	checkSupportShader(ctx, m_shaderType.getType());
1279 }
1280 
createShaderSpec(void)1281 void AtomicOperationCase::createShaderSpec (void)
1282 {
1283 	const AtomicMemoryType	memoryType		= m_shaderType.getMemoryType();
1284 	const bool				isSharedLike	= m_shaderType.isSharedLike();
1285 
1286 	// Global declarations.
1287 	std::ostringstream shaderTemplateGlobalStream;
1288 
1289 	// Structure in use for atomic operations.
1290 	shaderTemplateGlobalStream
1291 		<< "${EXTENSIONS}\n"
1292 		<< "\n"
1293 		<< "struct AtomicStruct\n"
1294 		<< "{\n"
1295 		<< "    ${DATATYPE} inoutValues[${N}/2];\n"
1296 		<< "    ${DATATYPE} inputValues[${N}];\n"
1297 		<< "    ${DATATYPE} compareValues[${N}];\n"
1298 		<< "    ${DATATYPE} outputValues[${N}];\n"
1299 		<< "    int invocationHitCount[${N}];\n"
1300 		<< "    int index;\n"
1301 		<< "};\n"
1302 		<< "\n"
1303 		;
1304 
1305 	// The name dance and declarations below will make sure the structure that will be used with atomic operations can be accessed
1306 	// as "buf.data", which is the name used in the atomic operation statements.
1307 	//
1308 	// * When using a buffer directly, RESULT_BUFFER_NAME will be "buf" and the inner struct will be "data".
1309 	// * When using a workgroup-shared global variable, the "data" struct will be nested in an auxiliar "buf" struct.
1310 	// * When using buffer references, the uniform buffer reference will be called "buf" and its contents "data".
1311 	//
1312 	if (memoryType != AtomicMemoryType::REFERENCE)
1313 	{
1314 		shaderTemplateGlobalStream
1315 			<< "layout (set = ${SETIDX}, binding = 0) buffer AtomicBuffer {\n"
1316 			<< "    AtomicStruct data;\n"
1317 			<< "} ${RESULT_BUFFER_NAME};\n"
1318 			<< "\n"
1319 			;
1320 
1321 		// When using global shared memory in the compute, task or mesh variants, invocations will use a shared global structure
1322 		// instead of a descriptor set as the sources and results of each tested operation.
1323 		if (memoryType == AtomicMemoryType::SHARED)
1324 		{
1325 			shaderTemplateGlobalStream
1326 				<< "shared struct { AtomicStruct data; } buf;\n"
1327 				<< "\n"
1328 				;
1329 		}
1330 		else if (memoryType == AtomicMemoryType::PAYLOAD)
1331 		{
1332 			shaderTemplateGlobalStream
1333 				<< "struct TaskData { AtomicStruct data; };\n"
1334 				<< "taskPayloadSharedEXT TaskData buf;\n"
1335 				;
1336 		}
1337 	}
1338 	else
1339 	{
1340 		shaderTemplateGlobalStream
1341 			<< "layout (buffer_reference) buffer AtomicBuffer {\n"
1342 			<< "    AtomicStruct data;\n"
1343 			<< "};\n"
1344 			<< "\n"
1345 			<< "layout (set = ${SETIDX}, binding = 0) uniform References {\n"
1346 			<< "    AtomicBuffer buf;\n"
1347 			<< "};\n"
1348 			<< "\n"
1349 			;
1350 	}
1351 
1352 	const auto					shaderTemplateGlobalString	= shaderTemplateGlobalStream.str();
1353 	const tcu::StringTemplate	shaderTemplateGlobal		(shaderTemplateGlobalString);
1354 
1355 	// Shader body for the non-vertex case.
1356 	std::ostringstream nonVertexShaderTemplateStream;
1357 
1358 	if (isSharedLike)
1359 	{
1360 		// Invocation zero will initialize the shared structure from the descriptor set.
1361 		nonVertexShaderTemplateStream
1362 			<< "if (gl_LocalInvocationIndex == 0u)\n"
1363 			<< "{\n"
1364 			<< "    buf.data = ${RESULT_BUFFER_NAME}.data;\n"
1365 			<< "}\n"
1366 			<< "barrier();\n"
1367 			;
1368 	}
1369 
1370 	if (m_shaderType.getType() == glu::SHADERTYPE_FRAGMENT)
1371 	{
1372 		nonVertexShaderTemplateStream
1373 			<< "if (!gl_HelperInvocation) {\n"
1374 			<< "    int idx = atomicAdd(buf.data.index, 1);\n"
1375 			<< "    buf.data.outputValues[idx] = ${ATOMICOP}(buf.data.inoutValues[idx % (${N}/2)], ${COMPARE_ARG}buf.data.inputValues[idx]);\n"
1376 			<< "}\n"
1377 			;
1378 	}
1379 	else
1380 	{
1381 		nonVertexShaderTemplateStream
1382 			<< "if (atomicAdd(buf.data.invocationHitCount[0], 1) < ${N})\n"
1383 			<< "{\n"
1384 			<< "    int idx = atomicAdd(buf.data.index, 1);\n"
1385 			<< "    buf.data.outputValues[idx] = ${ATOMICOP}(buf.data.inoutValues[idx % (${N}/2)], ${COMPARE_ARG}buf.data.inputValues[idx]);\n"
1386 			<< "}\n"
1387 			;
1388 	}
1389 
1390 	if (isSharedLike)
1391 	{
1392 		// Invocation zero will copy results back to the descriptor set.
1393 		nonVertexShaderTemplateStream
1394 			<< "barrier();\n"
1395 			<< "if (gl_LocalInvocationIndex == 0u)\n"
1396 			<< "{\n"
1397 			<< "    ${RESULT_BUFFER_NAME}.data = buf.data;\n"
1398 			<< "}\n"
1399 			;
1400 	}
1401 
1402 	const auto					nonVertexShaderTemplateStreamStr	= nonVertexShaderTemplateStream.str();
1403 	const tcu::StringTemplate	nonVertexShaderTemplateSrc			(nonVertexShaderTemplateStreamStr);
1404 
1405 	// Shader body for the vertex case.
1406 	const tcu::StringTemplate vertexShaderTemplateSrc(
1407 		"int idx = gl_VertexIndex;\n"
1408 		"if (atomicAdd(buf.data.invocationHitCount[idx], 1) == 0)\n"
1409 		"{\n"
1410 		"    buf.data.outputValues[idx] = ${ATOMICOP}(buf.data.inoutValues[idx % (${N}/2)], ${COMPARE_ARG}buf.data.inputValues[idx]);\n"
1411 		"}\n");
1412 
1413 	// Extensions.
1414 	std::ostringstream extensions;
1415 
1416 	if ((m_dataType == DATA_TYPE_INT64) || (m_dataType == DATA_TYPE_UINT64))
1417 	{
1418 		extensions
1419 			<< "#extension GL_EXT_shader_explicit_arithmetic_types_int64 : enable\n"
1420 			<< "#extension GL_EXT_shader_atomic_int64 : enable\n"
1421 			;
1422 	}
1423 	else if ((m_dataType == DATA_TYPE_FLOAT16) || (m_dataType == DATA_TYPE_FLOAT32) || (m_dataType == DATA_TYPE_FLOAT64))
1424 	{
1425 		extensions
1426 			<< "#extension GL_EXT_shader_explicit_arithmetic_types_float16 : enable\n"
1427 			<< "#extension GL_EXT_shader_atomic_float : enable\n"
1428 			<< "#extension GL_EXT_shader_atomic_float2 : enable\n"
1429 			<< "#extension GL_KHR_memory_scope_semantics : enable\n"
1430 			;
1431 	}
1432 
1433 	if (memoryType == AtomicMemoryType::REFERENCE)
1434 	{
1435 		extensions << "#extension GL_EXT_buffer_reference : require\n";
1436 	}
1437 
1438 	// Specializations.
1439 	std::map<std::string, std::string> specializations;
1440 
1441 	specializations["EXTENSIONS"]			= extensions.str();
1442 	specializations["DATATYPE"]				= dataType2Str(m_dataType);
1443 	specializations["ATOMICOP"]				= atomicOp2Str(m_atomicOp);
1444 	specializations["SETIDX"]				= de::toString((int)EXTRA_RESOURCES_DESCRIPTOR_SET_INDEX);
1445 	specializations["N"]					= de::toString((int)NUM_ELEMENTS);
1446 	specializations["COMPARE_ARG"]			= ((m_atomicOp == ATOMIC_OP_COMP_SWAP) ? "buf.data.compareValues[idx], " : "");
1447 	specializations["RESULT_BUFFER_NAME"]	= (isSharedLike ? "result" : "buf");
1448 
1449 	// Shader spec.
1450 	m_shaderSpec.outputs.push_back(Symbol("outData", glu::VarType(glu::TYPE_UINT, glu::PRECISION_HIGHP)));
1451 	m_shaderSpec.glslVersion		= glu::GLSL_VERSION_450;
1452 	m_shaderSpec.globalDeclarations	= shaderTemplateGlobal.specialize(specializations);
1453 	m_shaderSpec.source				= ((m_shaderType.getType() == glu::SHADERTYPE_VERTEX)
1454 										? vertexShaderTemplateSrc.specialize(specializations)
1455 										: nonVertexShaderTemplateSrc.specialize(specializations));
1456 
1457 	if (isSharedLike)
1458 	{
1459 		// When using global shared memory, use a single workgroup and an appropriate number of local invocations.
1460 		m_shaderSpec.localSizeX = static_cast<int>(NUM_ELEMENTS);
1461 	}
1462 }
1463 
addAtomicOperationTests(tcu::TestCaseGroup * atomicOperationTestsGroup)1464 void addAtomicOperationTests (tcu::TestCaseGroup* atomicOperationTestsGroup)
1465 {
1466 	tcu::TestContext& testCtx = atomicOperationTestsGroup->getTestContext();
1467 
1468 	static const struct
1469 	{
1470 		glu::ShaderType		type;
1471 		const char*			name;
1472 	} shaderTypes[] =
1473 	{
1474 		{ glu::SHADERTYPE_VERTEX,							"vertex"			},
1475 		{ glu::SHADERTYPE_FRAGMENT,							"fragment"			},
1476 		{ glu::SHADERTYPE_GEOMETRY,							"geometry"			},
1477 		{ glu::SHADERTYPE_TESSELLATION_CONTROL,				"tess_ctrl"			},
1478 		{ glu::SHADERTYPE_TESSELLATION_EVALUATION,			"tess_eval"			},
1479 		{ glu::SHADERTYPE_COMPUTE,							"compute"			},
1480 		{ glu::SHADERTYPE_TASK,								"task"				},
1481 		{ glu::SHADERTYPE_MESH,								"mesh"				},
1482 	};
1483 
1484 	static const struct
1485 	{
1486 		AtomicMemoryType	type;
1487 		const char*			suffix;
1488 	} kMemoryTypes[] =
1489 	{
1490 		{ AtomicMemoryType::BUFFER,		""				},
1491 		{ AtomicMemoryType::SHARED,		"_shared"		},
1492 		{ AtomicMemoryType::REFERENCE,	"_reference"	},
1493 		{ AtomicMemoryType::PAYLOAD,	"_payload"		},
1494 	};
1495 
1496 	static const struct
1497 	{
1498 		DataType		dataType;
1499 		const char*		name;
1500 		const char*		description;
1501 	} dataSign[] =
1502 	{
1503 #ifndef CTS_USES_VULKANSC
1504 		{ DATA_TYPE_FLOAT16,"float16",			"Tests using 16-bit float data"				},
1505 #endif // CTS_USES_VULKANSC
1506 		{ DATA_TYPE_INT32,	"signed",			"Tests using signed data (int)"				},
1507 		{ DATA_TYPE_UINT32,	"unsigned",			"Tests using unsigned data (uint)"			},
1508 		{ DATA_TYPE_FLOAT32,"float32",			"Tests using 32-bit float data"				},
1509 		{ DATA_TYPE_INT64,	"signed64bit",		"Tests using 64 bit signed data (int64)"	},
1510 		{ DATA_TYPE_UINT64,	"unsigned64bit",	"Tests using 64 bit unsigned data (uint64)"	},
1511 		{ DATA_TYPE_FLOAT64,"float64",			"Tests using 64-bit float data)"			}
1512 	};
1513 
1514 	static const struct
1515 	{
1516 		AtomicOperation		value;
1517 		const char*			name;
1518 	} atomicOp[] =
1519 	{
1520 		{ ATOMIC_OP_EXCHANGE,	"exchange"	},
1521 		{ ATOMIC_OP_COMP_SWAP,	"comp_swap"	},
1522 		{ ATOMIC_OP_ADD,		"add"		},
1523 		{ ATOMIC_OP_MIN,		"min"		},
1524 		{ ATOMIC_OP_MAX,		"max"		},
1525 		{ ATOMIC_OP_AND,		"and"		},
1526 		{ ATOMIC_OP_OR,			"or"		},
1527 		{ ATOMIC_OP_XOR,		"xor"		}
1528 	};
1529 
1530 	for (int opNdx = 0; opNdx < DE_LENGTH_OF_ARRAY(atomicOp); opNdx++)
1531 	{
1532 		for (int signNdx = 0; signNdx < DE_LENGTH_OF_ARRAY(dataSign); signNdx++)
1533 		{
1534 			for (int shaderTypeNdx = 0; shaderTypeNdx < DE_LENGTH_OF_ARRAY(shaderTypes); shaderTypeNdx++)
1535 			{
1536 				// Only ADD and EXCHANGE are supported on floating-point
1537 				if (dataSign[signNdx].dataType == DATA_TYPE_FLOAT16 || dataSign[signNdx].dataType == DATA_TYPE_FLOAT32 || dataSign[signNdx].dataType == DATA_TYPE_FLOAT64)
1538 				{
1539 					if (atomicOp[opNdx].value != ATOMIC_OP_ADD &&
1540 #ifndef CTS_USES_VULKANSC
1541 					    atomicOp[opNdx].value != ATOMIC_OP_MIN &&
1542 					    atomicOp[opNdx].value != ATOMIC_OP_MAX &&
1543 #endif // CTS_USES_VULKANSC
1544 						atomicOp[opNdx].value != ATOMIC_OP_EXCHANGE)
1545 					{
1546 						continue;
1547 					}
1548 				}
1549 
1550 				for (int memoryTypeNdx = 0; memoryTypeNdx < DE_LENGTH_OF_ARRAY(kMemoryTypes); ++memoryTypeNdx)
1551 				{
1552 					// Shared memory only available in compute, task and mesh shaders.
1553 					if (kMemoryTypes[memoryTypeNdx].type == AtomicMemoryType::SHARED
1554 						&& shaderTypes[shaderTypeNdx].type != glu::SHADERTYPE_COMPUTE
1555 						&& shaderTypes[shaderTypeNdx].type != glu::SHADERTYPE_TASK
1556 						&& shaderTypes[shaderTypeNdx].type != glu::SHADERTYPE_MESH)
1557 						continue;
1558 
1559 					// Payload memory is only available for atomics in task shaders (in mesh shaders it's read-only)
1560 					if (kMemoryTypes[memoryTypeNdx].type == AtomicMemoryType::PAYLOAD && shaderTypes[shaderTypeNdx].type != glu::SHADERTYPE_TASK)
1561 						continue;
1562 
1563 					const std::string description	= std::string("Tests atomic operation ") + atomicOp2Str(atomicOp[opNdx].value) + std::string(".");
1564 					const std::string name			= std::string(atomicOp[opNdx].name) + "_" + std::string(dataSign[signNdx].name) + "_" + std::string(shaderTypes[shaderTypeNdx].name) + kMemoryTypes[memoryTypeNdx].suffix;
1565 
1566 					atomicOperationTestsGroup->addChild(new AtomicOperationCase(testCtx, name.c_str(), description.c_str(), AtomicShaderType(shaderTypes[shaderTypeNdx].type, kMemoryTypes[memoryTypeNdx].type), dataSign[signNdx].dataType, atomicOp[opNdx].value));
1567 				}
1568 			}
1569 		}
1570 	}
1571 }
1572 
1573 } // anonymous
1574 
createAtomicOperationTests(tcu::TestContext & testCtx)1575 tcu::TestCaseGroup* createAtomicOperationTests (tcu::TestContext& testCtx)
1576 {
1577 	return createTestGroup(testCtx, "atomic_operations", "Atomic Operation Tests", addAtomicOperationTests);
1578 }
1579 
1580 } // shaderexecutor
1581 } // vkt
1582