• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2015 The Khronos Group Inc.
6  * Copyright (c) 2017 Google Inc.
7  *
8  * Licensed under the Apache License, Version 2.0 (the "License");
9  * you may not use this file except in compliance with the License.
10  * You may obtain a copy of the License at
11  *
12  *      http://www.apache.org/licenses/LICENSE-2.0
13  *
14  * Unless required by applicable law or agreed to in writing, software
15  * distributed under the License is distributed on an "AS IS" BASIS,
16  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17  * See the License for the specific language governing permissions and
18  * limitations under the License.
19  *
20  *//*!
21  * \file
22  * \brief Atomic operations (OpAtomic*) tests.
23  *//*--------------------------------------------------------------------*/
24 
25 #include "vktAtomicOperationTests.hpp"
26 #include "vktShaderExecutor.hpp"
27 
28 #include "vkRefUtil.hpp"
29 #include "vkMemUtil.hpp"
30 #include "vkQueryUtil.hpp"
31 #include "vkObjUtil.hpp"
32 #include "vkBarrierUtil.hpp"
33 #include "vkCmdUtil.hpp"
34 #include "vktTestGroupUtil.hpp"
35 
36 #include "tcuTestLog.hpp"
37 #include "tcuStringTemplate.hpp"
38 #include "tcuResultCollector.hpp"
39 
40 #include "deFloat16.h"
41 #include "deMath.hpp"
42 #include "deStringUtil.hpp"
43 #include "deSharedPtr.hpp"
44 #include "deRandom.hpp"
45 #include "deArrayUtil.hpp"
46 
47 #include <string>
48 #include <memory>
49 #include <cmath>
50 
51 namespace vkt
52 {
53 namespace shaderexecutor
54 {
55 
56 namespace
57 {
58 
59 using de::UniquePtr;
60 using de::MovePtr;
61 using std::vector;
62 
63 using namespace vk;
64 
65 enum class AtomicMemoryType
66 {
67 	BUFFER = 0,	// Normal buffer.
68 	SHARED,		// Shared global struct in a compute workgroup.
69 	REFERENCE,	// Buffer passed as a reference.
70 };
71 
72 // Helper struct to indicate the shader type and if it should use shared global memory.
73 class AtomicShaderType
74 {
75 public:
AtomicShaderType(glu::ShaderType type,AtomicMemoryType memoryType)76 	AtomicShaderType (glu::ShaderType type, AtomicMemoryType memoryType)
77 		: m_type				(type)
78 		, m_atomicMemoryType	(memoryType)
79 	{
80 		// Shared global memory can only be set to true with compute shaders.
81 		DE_ASSERT(memoryType != AtomicMemoryType::SHARED || type == glu::SHADERTYPE_COMPUTE);
82 	}
83 
getType(void) const84 	glu::ShaderType		getType					(void) const	{ return m_type; }
getMemoryType(void) const85 	AtomicMemoryType	getMemoryType			(void) const	{ return m_atomicMemoryType; }
86 
87 private:
88 	glu::ShaderType		m_type;
89 	AtomicMemoryType	m_atomicMemoryType;
90 };
91 
92 // Buffer helper
93 class Buffer
94 {
95 public:
96 						Buffer				(Context& context, VkBufferUsageFlags usage, size_t size, bool useRef);
97 
getBuffer(void) const98 	VkBuffer			getBuffer			(void) const { return *m_buffer;					}
getHostPtr(void) const99 	void*				getHostPtr			(void) const { return m_allocation->getHostPtr();	}
100 	void				flush				(void);
101 	void				invalidate			(void);
102 
103 private:
104 	const DeviceInterface&		m_vkd;
105 	const VkDevice				m_device;
106 	const VkQueue				m_queue;
107 	const deUint32				m_queueIndex;
108 	const Unique<VkBuffer>		m_buffer;
109 	const UniquePtr<Allocation>	m_allocation;
110 };
111 
112 typedef de::SharedPtr<Buffer> BufferSp;
113 
createBuffer(const DeviceInterface & vkd,VkDevice device,VkDeviceSize size,VkBufferUsageFlags usageFlags)114 Move<VkBuffer> createBuffer (const DeviceInterface& vkd, VkDevice device, VkDeviceSize size, VkBufferUsageFlags usageFlags)
115 {
116 	const VkBufferCreateInfo createInfo	=
117 	{
118 		VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
119 		DE_NULL,
120 		(VkBufferCreateFlags)0,
121 		size,
122 		usageFlags,
123 		VK_SHARING_MODE_EXCLUSIVE,
124 		0u,
125 		DE_NULL
126 	};
127 	return createBuffer(vkd, device, &createInfo);
128 }
129 
allocateAndBindMemory(const DeviceInterface & vkd,VkDevice device,Allocator & allocator,VkBuffer buffer,bool useRef)130 MovePtr<Allocation> allocateAndBindMemory (const DeviceInterface& vkd, VkDevice device, Allocator& allocator, VkBuffer buffer, bool useRef)
131 {
132 	const MemoryRequirement allocationType = (MemoryRequirement::HostVisible | (useRef ? MemoryRequirement::DeviceAddress : MemoryRequirement::Any));
133 	MovePtr<Allocation>	alloc(allocator.allocate(getBufferMemoryRequirements(vkd, device, buffer), allocationType));
134 
135 	VK_CHECK(vkd.bindBufferMemory(device, buffer, alloc->getMemory(), alloc->getOffset()));
136 
137 	return alloc;
138 }
139 
Buffer(Context & context,VkBufferUsageFlags usage,size_t size,bool useRef)140 Buffer::Buffer (Context& context, VkBufferUsageFlags usage, size_t size, bool useRef)
141 	: m_vkd			(context.getDeviceInterface())
142 	, m_device		(context.getDevice())
143 	, m_queue		(context.getUniversalQueue())
144 	, m_queueIndex	(context.getUniversalQueueFamilyIndex())
145 	, m_buffer		(createBuffer			(context.getDeviceInterface(),
146 											 context.getDevice(),
147 											 (VkDeviceSize)size,
148 											 usage))
149 	, m_allocation	(allocateAndBindMemory	(context.getDeviceInterface(),
150 											 context.getDevice(),
151 											 context.getDefaultAllocator(),
152 											 *m_buffer,
153 											 useRef))
154 {
155 }
156 
flush(void)157 void Buffer::flush (void)
158 {
159 	flushMappedMemoryRange(m_vkd, m_device, m_allocation->getMemory(), m_allocation->getOffset(), VK_WHOLE_SIZE);
160 }
161 
invalidate(void)162 void Buffer::invalidate (void)
163 {
164 	const auto	cmdPool			= vk::makeCommandPool(m_vkd, m_device, m_queueIndex);
165 	const auto	cmdBufferPtr	= vk::allocateCommandBuffer(m_vkd, m_device, cmdPool.get(), VK_COMMAND_BUFFER_LEVEL_PRIMARY);
166 	const auto	cmdBuffer		= cmdBufferPtr.get();
167 	const auto	bufferBarrier	= vk::makeBufferMemoryBarrier(VK_ACCESS_MEMORY_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, m_buffer.get(), 0ull, VK_WHOLE_SIZE);
168 
169 	beginCommandBuffer(m_vkd, cmdBuffer);
170 	m_vkd.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_HOST_BIT, 0u, 0u, nullptr, 1u, &bufferBarrier, 0u, nullptr);
171 	endCommandBuffer(m_vkd, cmdBuffer);
172 	submitCommandsAndWait(m_vkd, m_device, m_queue, cmdBuffer);
173 
174 	invalidateMappedMemoryRange(m_vkd, m_device, m_allocation->getMemory(), m_allocation->getOffset(), VK_WHOLE_SIZE);
175 }
176 
177 // Tests
178 
179 enum AtomicOperation
180 {
181 	ATOMIC_OP_EXCHANGE = 0,
182 	ATOMIC_OP_COMP_SWAP,
183 	ATOMIC_OP_ADD,
184 	ATOMIC_OP_MIN,
185 	ATOMIC_OP_MAX,
186 	ATOMIC_OP_AND,
187 	ATOMIC_OP_OR,
188 	ATOMIC_OP_XOR,
189 
190 	ATOMIC_OP_LAST
191 };
192 
atomicOp2Str(AtomicOperation op)193 std::string atomicOp2Str (AtomicOperation op)
194 {
195 	static const char* const s_names[] =
196 	{
197 		"atomicExchange",
198 		"atomicCompSwap",
199 		"atomicAdd",
200 		"atomicMin",
201 		"atomicMax",
202 		"atomicAnd",
203 		"atomicOr",
204 		"atomicXor"
205 	};
206 	return de::getSizedArrayElement<ATOMIC_OP_LAST>(s_names, op);
207 }
208 
209 enum
210 {
211 	NUM_ELEMENTS = 32
212 };
213 
214 enum DataType
215 {
216 	DATA_TYPE_FLOAT16 = 0,
217 	DATA_TYPE_INT32,
218 	DATA_TYPE_UINT32,
219 	DATA_TYPE_FLOAT32,
220 	DATA_TYPE_INT64,
221 	DATA_TYPE_UINT64,
222 	DATA_TYPE_FLOAT64,
223 
224 	DATA_TYPE_LAST
225 };
226 
dataType2Str(DataType type)227 std::string dataType2Str(DataType type)
228 {
229 	static const char* const s_names[] =
230 	{
231 		"float16_t",
232 		"int",
233 		"uint",
234 		"float",
235 		"int64_t",
236 		"uint64_t",
237 		"double",
238 	};
239 	return de::getSizedArrayElement<DATA_TYPE_LAST>(s_names, type);
240 }
241 
242 class BufferInterface
243 {
244 public:
245 	virtual void setBuffer(void* ptr) = 0;
246 
247 	virtual size_t bufferSize() = 0;
248 
249 	virtual void fillWithTestData(de::Random &rnd) = 0;
250 
251 	virtual void checkResults(tcu::ResultCollector& resultCollector) = 0;
252 
~BufferInterface()253 	virtual ~BufferInterface() {}
254 };
255 
256 template<typename dataTypeT>
257 class TestBuffer : public BufferInterface
258 {
259 public:
260 
TestBuffer(AtomicOperation atomicOp)261 	TestBuffer(AtomicOperation	atomicOp)
262 		: m_atomicOp(atomicOp)
263 	{}
264 
265 	template<typename T>
266 	struct BufferData
267 	{
268 		// Use half the number of elements for inout to cause overlap between atomic operations.
269 		// Each inout element at index i will have two atomic operations using input from
270 		// indices i and i + NUM_ELEMENTS / 2.
271 		T			inout[NUM_ELEMENTS / 2];
272 		T			input[NUM_ELEMENTS];
273 		T			compare[NUM_ELEMENTS];
274 		T			output[NUM_ELEMENTS];
275 		T			invocationHitCount[NUM_ELEMENTS];
276 		deInt32		index;
277 	};
278 
setBuffer(void * ptr)279 	virtual void setBuffer(void* ptr)
280 	{
281 		m_ptr = static_cast<BufferData<dataTypeT>*>(ptr);
282 	}
283 
bufferSize()284 	virtual size_t bufferSize()
285 	{
286 		return sizeof(BufferData<dataTypeT>);
287 	}
288 
fillWithTestData(de::Random & rnd)289 	virtual void fillWithTestData(de::Random &rnd)
290 	{
291 		dataTypeT pattern;
292 		deMemset(&pattern, 0xcd, sizeof(dataTypeT));
293 
294 		for (int i = 0; i < NUM_ELEMENTS / 2; i++)
295 		{
296 			m_ptr->inout[i] = static_cast<dataTypeT>(rnd.getUint64());
297 			// The first half of compare elements match with every even index.
298 			// The second half matches with odd indices. This causes the
299 			// overlapping operations to only select one.
300 			m_ptr->compare[i] = m_ptr->inout[i] + (i % 2);
301 			m_ptr->compare[i + NUM_ELEMENTS / 2] = m_ptr->inout[i] + 1 - (i % 2);
302 		}
303 		for (int i = 0; i < NUM_ELEMENTS; i++)
304 		{
305 			m_ptr->input[i] = static_cast<dataTypeT>(rnd.getUint64());
306 			m_ptr->output[i] = pattern;
307 			m_ptr->invocationHitCount[i] = 0;
308 		}
309 		m_ptr->index = 0;
310 
311 		// Take a copy to be used when calculating expected values.
312 		m_original = *m_ptr;
313 	}
314 
checkResults(tcu::ResultCollector & resultCollector)315 	virtual void checkResults(tcu::ResultCollector&	resultCollector)
316 	{
317 		checkOperation(m_original, *m_ptr, resultCollector);
318 	}
319 
320 	template<typename T>
321 	struct Expected
322 	{
323 		T m_inout;
324 		T m_output[2];
325 
Expectedvkt::shaderexecutor::__anon9b36cc0a0111::TestBuffer::Expected326 		Expected (T inout, T output0, T output1)
327 		: m_inout(inout)
328 		{
329 			m_output[0] = output0;
330 			m_output[1] = output1;
331 		}
332 
comparevkt::shaderexecutor::__anon9b36cc0a0111::TestBuffer::Expected333 		bool compare (T inout, T output0, T output1)
334 		{
335 			return (deMemCmp((const void*)&m_inout, (const void*)&inout, sizeof(inout)) == 0
336 					&& deMemCmp((const void*)&m_output[0], (const void*)&output0, sizeof(output0)) == 0
337 					&& deMemCmp((const void*)&m_output[1], (const void*)&output1, sizeof(output1)) == 0);
338 		}
339 	};
340 
341 	void checkOperation	(const BufferData<dataTypeT>&	original,
342 						 const BufferData<dataTypeT>&	result,
343 						 tcu::ResultCollector&			resultCollector);
344 
345 	const AtomicOperation	m_atomicOp;
346 
347 	BufferData<dataTypeT>* m_ptr;
348 	BufferData<dataTypeT>  m_original;
349 
350 };
351 
352 template<typename T>
nanSafeSloppyEquals(T x,T y)353 bool nanSafeSloppyEquals(T x, T y)
354 {
355 	if (deIsIEEENaN(x) && deIsIEEENaN(y))
356 		return true;
357 
358 	if (deIsIEEENaN(x) || deIsIEEENaN(y))
359 		return false;
360 
361 	return fabs(deToDouble(x) - deToDouble(y)) < 0.00001;
362 }
363 
364 template<typename dataTypeT>
365 class TestBufferFloatingPoint : public BufferInterface
366 {
367 public:
368 
TestBufferFloatingPoint(AtomicOperation atomicOp)369 	TestBufferFloatingPoint(AtomicOperation	atomicOp)
370 		: m_atomicOp(atomicOp)
371 	{}
372 
373 	template<typename T>
374 	struct BufferDataFloatingPoint
375 	{
376 		// Use half the number of elements for inout to cause overlap between atomic operations.
377 		// Each inout element at index i will have two atomic operations using input from
378 		// indices i and i + NUM_ELEMENTS / 2.
379 		T			inout[NUM_ELEMENTS / 2];
380 		T			input[NUM_ELEMENTS];
381 		T			compare[NUM_ELEMENTS];
382 		T			output[NUM_ELEMENTS];
383 		deInt32		invocationHitCount[NUM_ELEMENTS];
384 		deInt32		index;
385 	};
386 
setBuffer(void * ptr)387 	virtual void setBuffer(void* ptr)
388 	{
389 		m_ptr = static_cast<BufferDataFloatingPoint<dataTypeT>*>(ptr);
390 	}
391 
bufferSize()392 	virtual size_t bufferSize()
393 	{
394 		return sizeof(BufferDataFloatingPoint<dataTypeT>);
395 	}
396 
fillWithTestData(de::Random & rnd)397 	virtual void fillWithTestData(de::Random& rnd)
398 	{
399 		dataTypeT pattern;
400 		deMemset(&pattern, 0xcd, sizeof(dataTypeT));
401 
402 		for (int i = 0; i < NUM_ELEMENTS / 2; i++)
403 		{
404 			m_ptr->inout[i] = deToFloatType<dataTypeT>(rnd.getFloat());
405 			// These aren't used by any of the float tests
406 			m_ptr->compare[i] = deToFloatType<dataTypeT>(0.0);
407 		}
408 		// Add special cases for NaN and +/-0
409 		// 0: min(sNaN, x)
410 		m_ptr->inout[0] = deSignalingNaN<dataTypeT>();
411 		// 1: min(x, sNaN)
412 		m_ptr->input[1 * 2 + 0] = deSignalingNaN<dataTypeT>();
413 		// 2: min(qNaN, x)
414 		m_ptr->inout[2] = deQuietNaN<dataTypeT>();
415 		// 3: min(x, qNaN)
416 		m_ptr->input[3 * 2 + 0] = deQuietNaN<dataTypeT>();
417 		// 4: min(NaN, NaN)
418 		m_ptr->inout[4] = deSignalingNaN<dataTypeT>();
419 		m_ptr->input[4 * 2 + 0] = deQuietNaN<dataTypeT>();
420 		m_ptr->input[4 * 2 + 1] = deQuietNaN<dataTypeT>();
421 		// 5: min(+0, -0)
422 		m_ptr->inout[5] = deToFloatType<dataTypeT>(-0.0);
423 		m_ptr->input[5 * 2 + 0] = deToFloatType<dataTypeT>(0.0);
424 		m_ptr->input[5 * 2 + 1] = deToFloatType<dataTypeT>(0.0);
425 
426 		for (int i = 0; i < NUM_ELEMENTS; i++)
427 		{
428 			m_ptr->input[i] = deToFloatType<dataTypeT>(rnd.getFloat());
429 			m_ptr->output[i] = pattern;
430 			m_ptr->invocationHitCount[i] = 0;
431 		}
432 
433 		m_ptr->index = 0;
434 
435 		// Take a copy to be used when calculating expected values.
436 		m_original = *m_ptr;
437 	}
438 
checkResults(tcu::ResultCollector & resultCollector)439 	virtual void checkResults(tcu::ResultCollector& resultCollector)
440 	{
441 		checkOperationFloatingPoint(m_original, *m_ptr, resultCollector);
442 	}
443 
444 	template<typename T>
445 	struct Expected
446 	{
447 		T m_inout;
448 		T m_output[2];
449 
Expectedvkt::shaderexecutor::__anon9b36cc0a0111::TestBufferFloatingPoint::Expected450 		Expected(T inout, T output0, T output1)
451 			: m_inout(inout)
452 		{
453 			m_output[0] = output0;
454 			m_output[1] = output1;
455 		}
456 
comparevkt::shaderexecutor::__anon9b36cc0a0111::TestBufferFloatingPoint::Expected457 		bool compare(T inout, T output0, T output1)
458 		{
459 			return nanSafeSloppyEquals(m_inout, inout) &&
460 			       nanSafeSloppyEquals(m_output[0], output0) &&
461 			       nanSafeSloppyEquals(m_output[1], output1);
462 		}
463 	};
464 
465 	void checkOperationFloatingPoint(const BufferDataFloatingPoint<dataTypeT>& original,
466 		const BufferDataFloatingPoint<dataTypeT>& result,
467 		tcu::ResultCollector& resultCollector);
468 
469 	const AtomicOperation	m_atomicOp;
470 
471 	BufferDataFloatingPoint<dataTypeT>* m_ptr;
472 	BufferDataFloatingPoint<dataTypeT>  m_original;
473 
474 };
475 
createTestBuffer(DataType type,AtomicOperation atomicOp)476 static BufferInterface* createTestBuffer(DataType type, AtomicOperation atomicOp)
477 {
478 	switch (type)
479 	{
480 	case DATA_TYPE_FLOAT16:
481 		return new TestBufferFloatingPoint<deFloat16>(atomicOp);
482 	case DATA_TYPE_INT32:
483 		return new TestBuffer<deInt32>(atomicOp);
484 	case DATA_TYPE_UINT32:
485 		return new TestBuffer<deUint32>(atomicOp);
486 	case DATA_TYPE_FLOAT32:
487 		return new TestBufferFloatingPoint<float>(atomicOp);
488 	case DATA_TYPE_INT64:
489 		return new TestBuffer<deInt64>(atomicOp);
490 	case DATA_TYPE_UINT64:
491 		return new TestBuffer<deUint64>(atomicOp);
492 	case DATA_TYPE_FLOAT64:
493 		return new TestBufferFloatingPoint<double>(atomicOp);
494 	default:
495 		DE_ASSERT(false);
496 		return DE_NULL;
497 	}
498 }
499 
500 // Use template to handle both signed and unsigned cases. SPIR-V should
501 // have separate operations for both.
502 template<typename T>
checkOperation(const BufferData<T> & original,const BufferData<T> & result,tcu::ResultCollector & resultCollector)503 void TestBuffer<T>::checkOperation (const BufferData<T>&	original,
504 									const BufferData<T>&	result,
505 									tcu::ResultCollector&	resultCollector)
506 {
507 	// originalInout = original inout
508 	// input0 = input at index i
509 	// iinput1 = input at index i + NUM_ELEMENTS / 2
510 	//
511 	// atomic operation will return the memory contents before
512 	// the operation and this is stored as output. Two operations
513 	// are executed for each InOut value (using input0 and input1).
514 	//
515 	// Since there is an overlap of two operations per each
516 	// InOut element, the outcome of the resulting InOut and
517 	// the outputs of the operations have two result candidates
518 	// depending on the execution order. Verification passes
519 	// if the results match one of these options.
520 
521 	for (int elementNdx = 0; elementNdx < NUM_ELEMENTS / 2; elementNdx++)
522 	{
523 		// Needed when reinterpeting the data as signed values.
524 		const T originalInout	= *reinterpret_cast<const T*>(&original.inout[elementNdx]);
525 		const T input0			= *reinterpret_cast<const T*>(&original.input[elementNdx]);
526 		const T input1			= *reinterpret_cast<const T*>(&original.input[elementNdx + NUM_ELEMENTS / 2]);
527 
528 		// Expected results are collected to this vector.
529 		vector<Expected<T> > exp;
530 
531 		switch (m_atomicOp)
532 		{
533 			case ATOMIC_OP_ADD:
534 			{
535 				exp.push_back(Expected<T>(originalInout + input0 + input1, originalInout, originalInout + input0));
536 				exp.push_back(Expected<T>(originalInout + input0 + input1, originalInout + input1, originalInout));
537 			}
538 			break;
539 
540 			case ATOMIC_OP_AND:
541 			{
542 				exp.push_back(Expected<T>(originalInout & input0 & input1, originalInout, originalInout & input0));
543 				exp.push_back(Expected<T>(originalInout & input0 & input1, originalInout & input1, originalInout));
544 			}
545 			break;
546 
547 			case ATOMIC_OP_OR:
548 			{
549 				exp.push_back(Expected<T>(originalInout | input0 | input1, originalInout, originalInout | input0));
550 				exp.push_back(Expected<T>(originalInout | input0 | input1, originalInout | input1, originalInout));
551 			}
552 			break;
553 
554 			case ATOMIC_OP_XOR:
555 			{
556 				exp.push_back(Expected<T>(originalInout ^ input0 ^ input1, originalInout, originalInout ^ input0));
557 				exp.push_back(Expected<T>(originalInout ^ input0 ^ input1, originalInout ^ input1, originalInout));
558 			}
559 			break;
560 
561 			case ATOMIC_OP_MIN:
562 			{
563 				exp.push_back(Expected<T>(de::min(de::min(originalInout, input0), input1), originalInout, de::min(originalInout, input0)));
564 				exp.push_back(Expected<T>(de::min(de::min(originalInout, input0), input1), de::min(originalInout, input1), originalInout));
565 			}
566 			break;
567 
568 			case ATOMIC_OP_MAX:
569 			{
570 				exp.push_back(Expected<T>(de::max(de::max(originalInout, input0), input1), originalInout, de::max(originalInout, input0)));
571 				exp.push_back(Expected<T>(de::max(de::max(originalInout, input0), input1), de::max(originalInout, input1), originalInout));
572 			}
573 			break;
574 
575 			case ATOMIC_OP_EXCHANGE:
576 			{
577 				exp.push_back(Expected<T>(input1, originalInout, input0));
578 				exp.push_back(Expected<T>(input0, input1, originalInout));
579 			}
580 			break;
581 
582 			case ATOMIC_OP_COMP_SWAP:
583 			{
584 				if (elementNdx % 2 == 0)
585 				{
586 					exp.push_back(Expected<T>(input0, originalInout, input0));
587 					exp.push_back(Expected<T>(input0, originalInout, originalInout));
588 				}
589 				else
590 				{
591 					exp.push_back(Expected<T>(input1, input1, originalInout));
592 					exp.push_back(Expected<T>(input1, originalInout, originalInout));
593 				}
594 			}
595 			break;
596 
597 
598 			default:
599 				DE_FATAL("Unexpected atomic operation.");
600 				break;
601 		}
602 
603 		const T resIo		= result.inout[elementNdx];
604 		const T resOutput0	= result.output[elementNdx];
605 		const T resOutput1	= result.output[elementNdx + NUM_ELEMENTS / 2];
606 
607 
608 		if (!exp[0].compare(resIo, resOutput0, resOutput1) && !exp[1].compare(resIo, resOutput0, resOutput1))
609 		{
610 			std::ostringstream errorMessage;
611 			errorMessage	<< "ERROR: Result value check failed at index " << elementNdx
612 							<< ". Expected one of the two outcomes: InOut = " << tcu::toHex(exp[0].m_inout)
613 							<< ", Output0 = " << tcu::toHex(exp[0].m_output[0]) << ", Output1 = "
614 							<< tcu::toHex(exp[0].m_output[1]) << ", or InOut = " << tcu::toHex(exp[1].m_inout)
615 							<< ", Output0 = " << tcu::toHex(exp[1].m_output[0]) << ", Output1 = "
616 							<< tcu::toHex(exp[1].m_output[1]) << ". Got: InOut = " << tcu::toHex(resIo)
617 							<< ", Output0 = " << tcu::toHex(resOutput0) << ", Output1 = "
618 							<< tcu::toHex(resOutput1) << ". Using Input0 = " << tcu::toHex(original.input[elementNdx])
619 							<< " and Input1 = " << tcu::toHex(original.input[elementNdx + NUM_ELEMENTS / 2]) << ".";
620 
621 			resultCollector.fail(errorMessage.str());
622 		}
623 	}
624 }
625 
626 template<typename T>
handleExceptionalFloatMinMaxValues(vector<T> & values,T x,T y)627 void handleExceptionalFloatMinMaxValues(vector<T> &values, T x, T y)
628 {
629 
630 	if (deIsSignalingNaN(x) && deIsSignalingNaN(y))
631 	{
632 		values.push_back(deQuietNaN<T>());
633 		values.push_back(deSignalingNaN<T>());
634 	}
635 	else if (deIsSignalingNaN(x))
636 	{
637 		values.push_back(deQuietNaN<T>());
638 		values.push_back(deSignalingNaN<T>());
639 		if (!deIsIEEENaN(y))
640 			values.push_back(y);
641 	}
642 	else if (deIsSignalingNaN(y))
643 	{
644 		values.push_back(deQuietNaN<T>());
645 		values.push_back(deSignalingNaN<T>());
646 		if (!deIsIEEENaN(x))
647 			values.push_back(x);
648 	}
649 	else if (deIsIEEENaN(x) && deIsIEEENaN(y))
650 	{
651 		// Both quiet NaNs
652 		values.push_back(deQuietNaN<T>());
653 	}
654 	else if (deIsIEEENaN(x))
655 	{
656 		// One quiet NaN and one non-NaN.
657 		values.push_back(y);
658 	}
659 	else if (deIsIEEENaN(y))
660 	{
661 		// One quiet NaN and one non-NaN.
662 		values.push_back(x);
663 	}
664 	else if ((deIsPositiveZero(x) && deIsNegativeZero(y)) || (deIsNegativeZero(x) && deIsPositiveZero(y)))
665 	{
666 		values.push_back(deToFloatType<T>(0.0));
667 		values.push_back(deToFloatType<T>(-0.0));
668 	}
669 }
670 
671 template<typename T>
floatAdd(T x,T y)672 T floatAdd(T x, T y)
673 {
674 	if (deIsIEEENaN(x) || deIsIEEENaN(y))
675 		return deQuietNaN<T>();
676 	return deToFloatType<T>(deToDouble(x) + deToDouble(y));
677 }
678 
679 template<typename T>
floatMinValues(T x,T y)680 vector<T> floatMinValues(T x, T y)
681 {
682 	vector<T> values;
683 	handleExceptionalFloatMinMaxValues(values, x, y);
684 	if (values.empty())
685 	{
686 		values.push_back(deToDouble(x) < deToDouble(y) ? x : y);
687 	}
688 	return values;
689 }
690 
691 template<typename T>
floatMaxValues(T x,T y)692 vector<T> floatMaxValues(T x, T y)
693 {
694 	vector<T> values;
695 	handleExceptionalFloatMinMaxValues(values, x, y);
696 	if (values.empty())
697 	{
698 		values.push_back(deToDouble(x) > deToDouble(y) ? x : y);
699 	}
700 	return values;
701 }
702 
703 // Use template to handle both float and double cases. SPIR-V should
704 // have separate operations for both.
705 template<typename T>
checkOperationFloatingPoint(const BufferDataFloatingPoint<T> & original,const BufferDataFloatingPoint<T> & result,tcu::ResultCollector & resultCollector)706 void TestBufferFloatingPoint<T>::checkOperationFloatingPoint(const BufferDataFloatingPoint<T>& original,
707 	const BufferDataFloatingPoint<T>& result,
708 	tcu::ResultCollector& resultCollector)
709 {
710 	// originalInout = original inout
711 	// input0 = input at index i
712 	// iinput1 = input at index i + NUM_ELEMENTS / 2
713 	//
714 	// atomic operation will return the memory contents before
715 	// the operation and this is stored as output. Two operations
716 	// are executed for each InOut value (using input0 and input1).
717 	//
718 	// Since there is an overlap of two operations per each
719 	// InOut element, the outcome of the resulting InOut and
720 	// the outputs of the operations have two result candidates
721 	// depending on the execution order. Verification passes
722 	// if the results match one of these options.
723 
724 	for (int elementNdx = 0; elementNdx < NUM_ELEMENTS / 2; elementNdx++)
725 	{
726 		// Needed when reinterpeting the data as signed values.
727 		const T originalInout = *reinterpret_cast<const T*>(&original.inout[elementNdx]);
728 		const T input0 = *reinterpret_cast<const T*>(&original.input[elementNdx]);
729 		const T input1 = *reinterpret_cast<const T*>(&original.input[elementNdx + NUM_ELEMENTS / 2]);
730 
731 		// Expected results are collected to this vector.
732 		vector<Expected<T> > exp;
733 
734 		switch (m_atomicOp)
735 		{
736 		case ATOMIC_OP_ADD:
737 		{
738 			exp.push_back(Expected<T>(floatAdd(floatAdd(originalInout, input0), input1), originalInout, floatAdd(originalInout, input0)));
739 			exp.push_back(Expected<T>(floatAdd(floatAdd(originalInout, input0), input1), floatAdd(originalInout, input1), originalInout));
740 		}
741 		break;
742 
743 		case ATOMIC_OP_MIN:
744 		{
745 			// The case where input0 is combined first
746 			vector<T> minOriginalAndInput0 = floatMinValues(originalInout, input0);
747 			for (T x : minOriginalAndInput0)
748 			{
749 				vector<T> minAll = floatMinValues(x, input1);
750 				for (T y : minAll)
751 				{
752 					exp.push_back(Expected<T>(y, originalInout, x));
753 				}
754 			}
755 
756 			// The case where input1 is combined first
757 			vector<T> minOriginalAndInput1 = floatMinValues(originalInout, input1);
758 			for (T x : minOriginalAndInput1)
759 			{
760 				vector<T> minAll = floatMinValues(x, input0);
761 				for (T y : minAll)
762 				{
763 					exp.push_back(Expected<T>(y, x, originalInout));
764 				}
765 			}
766 		}
767 		break;
768 
769 		case ATOMIC_OP_MAX:
770 		{
771 			// The case where input0 is combined first
772 			vector<T> minOriginalAndInput0 = floatMaxValues(originalInout, input0);
773 			for (T x : minOriginalAndInput0)
774 			{
775 				vector<T> minAll = floatMaxValues(x, input1);
776 				for (T y : minAll)
777 				{
778 					exp.push_back(Expected<T>(y, originalInout, x));
779 				}
780 			}
781 
782 			// The case where input1 is combined first
783 			vector<T> minOriginalAndInput1 = floatMaxValues(originalInout, input1);
784 			for (T x : minOriginalAndInput1)
785 			{
786 				vector<T> minAll = floatMaxValues(x, input0);
787 				for (T y : minAll)
788 				{
789 					exp.push_back(Expected<T>(y, x, originalInout));
790 				}
791 			}
792 		}
793 		break;
794 
795 		case ATOMIC_OP_EXCHANGE:
796 		{
797 			exp.push_back(Expected<T>(input1, originalInout, input0));
798 			exp.push_back(Expected<T>(input0, input1, originalInout));
799 		}
800 		break;
801 
802 		default:
803 			DE_FATAL("Unexpected atomic operation.");
804 			break;
805 		}
806 
807 		const T resIo = result.inout[elementNdx];
808 		const T resOutput0 = result.output[elementNdx];
809 		const T resOutput1 = result.output[elementNdx + NUM_ELEMENTS / 2];
810 
811 
812 		bool hasMatch = false;
813 		for (Expected<T> e : exp)
814 		{
815 			if (e.compare(resIo, resOutput0, resOutput1))
816 			{
817 				hasMatch = true;
818 				break;
819 			}
820 		}
821 		if (!hasMatch)
822 		{
823 			std::ostringstream errorMessage;
824 			errorMessage << "ERROR: Result value check failed at index " << elementNdx
825 				<< ". Expected one of the outcomes:";
826 
827 			bool first = true;
828 			for (Expected<T> e : exp)
829 			{
830 				if (!first)
831 					errorMessage << ", or";
832 				first = false;
833 
834 				errorMessage << " InOut = " << e.m_inout
835 					<< ", Output0 = " << e.m_output[0]
836 					<< ", Output1 = " << e.m_output[1];
837 			}
838 
839 			errorMessage << ". Got: InOut = " << resIo
840 				<< ", Output0 = " << resOutput0
841 				<< ", Output1 = " << resOutput1
842 				<< ". Using Input0 = " << original.input[elementNdx]
843 				<< " and Input1 = " << original.input[elementNdx + NUM_ELEMENTS / 2] << ".";
844 
845 			resultCollector.fail(errorMessage.str());
846 		}
847 	}
848 }
849 
850 class AtomicOperationCaseInstance : public TestInstance
851 {
852 public:
853 									AtomicOperationCaseInstance		(Context&			context,
854 																	 const ShaderSpec&	shaderSpec,
855 																	 AtomicShaderType	shaderType,
856 																	 DataType			dataType,
857 																	 AtomicOperation	atomicOp);
858 
859 	virtual tcu::TestStatus			iterate							(void);
860 
861 private:
862 	const ShaderSpec&				m_shaderSpec;
863 	AtomicShaderType				m_shaderType;
864 	const DataType					m_dataType;
865 	AtomicOperation					m_atomicOp;
866 
867 };
868 
AtomicOperationCaseInstance(Context & context,const ShaderSpec & shaderSpec,AtomicShaderType shaderType,DataType dataType,AtomicOperation atomicOp)869 AtomicOperationCaseInstance::AtomicOperationCaseInstance (Context&				context,
870 														  const ShaderSpec&		shaderSpec,
871 														  AtomicShaderType		shaderType,
872 														  DataType				dataType,
873 														  AtomicOperation		atomicOp)
874 	: TestInstance	(context)
875 	, m_shaderSpec	(shaderSpec)
876 	, m_shaderType	(shaderType)
877 	, m_dataType	(dataType)
878 	, m_atomicOp	(atomicOp)
879 {
880 }
881 
iterate(void)882 tcu::TestStatus AtomicOperationCaseInstance::iterate(void)
883 {
884 	de::UniquePtr<BufferInterface>	testBuffer	(createTestBuffer(m_dataType, m_atomicOp));
885 	tcu::TestLog&					log			= m_context.getTestContext().getLog();
886 	const DeviceInterface&			vkd			= m_context.getDeviceInterface();
887 	const VkDevice					device		= m_context.getDevice();
888 	de::Random						rnd			(0x62a15e34);
889 	const bool						useRef		= (m_shaderType.getMemoryType() == AtomicMemoryType::REFERENCE);
890 	const VkDescriptorType			descType	= (useRef ? VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER : VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
891 	const VkBufferUsageFlags		usageFlags	= (VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | (useRef ? static_cast<VkBufferUsageFlags>(VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT) : 0u));
892 
893 	// The main buffer will hold test data. When using buffer references, the buffer's address will be indirectly passed as part of
894 	// a uniform buffer. If not, it will be passed directly as a descriptor.
895 	Buffer							buffer		(m_context, usageFlags, testBuffer->bufferSize(), useRef);
896 	std::unique_ptr<Buffer>			auxBuffer;
897 
898 	if (useRef)
899 	{
900 		// Pass the main buffer address inside a uniform buffer.
901 		const VkBufferDeviceAddressInfo addressInfo =
902 		{
903 			VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO,	//	VkStructureType	sType;
904 			nullptr,										//	const void*		pNext;
905 			buffer.getBuffer(),								//	VkBuffer		buffer;
906 		};
907 		const auto address = vkd.getBufferDeviceAddress(device, &addressInfo);
908 
909 		auxBuffer.reset(new Buffer(m_context, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, sizeof(address), false));
910 		deMemcpy(auxBuffer->getHostPtr(), &address, sizeof(address));
911 		auxBuffer->flush();
912 	}
913 
914 	testBuffer->setBuffer(buffer.getHostPtr());
915 	testBuffer->fillWithTestData(rnd);
916 
917 	buffer.flush();
918 
919 	Move<VkDescriptorSetLayout>	extraResourcesLayout;
920 	Move<VkDescriptorPool>		extraResourcesSetPool;
921 	Move<VkDescriptorSet>		extraResourcesSet;
922 
923 	const VkDescriptorSetLayoutBinding bindings[] =
924 	{
925 		{ 0u, descType, 1, VK_SHADER_STAGE_ALL, DE_NULL }
926 	};
927 
928 	const VkDescriptorSetLayoutCreateInfo	layoutInfo	=
929 	{
930 		VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
931 		DE_NULL,
932 		(VkDescriptorSetLayoutCreateFlags)0u,
933 		DE_LENGTH_OF_ARRAY(bindings),
934 		bindings
935 	};
936 
937 	extraResourcesLayout = createDescriptorSetLayout(vkd, device, &layoutInfo);
938 
939 	const VkDescriptorPoolSize poolSizes[] =
940 	{
941 		{ descType, 1u }
942 	};
943 
944 	const VkDescriptorPoolCreateInfo poolInfo =
945 	{
946 		VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
947 		DE_NULL,
948 		(VkDescriptorPoolCreateFlags)VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT,
949 		1u,		// maxSets
950 		DE_LENGTH_OF_ARRAY(poolSizes),
951 		poolSizes
952 	};
953 
954 	extraResourcesSetPool = createDescriptorPool(vkd, device, &poolInfo);
955 
956 	const VkDescriptorSetAllocateInfo allocInfo =
957 	{
958 		VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
959 		DE_NULL,
960 		*extraResourcesSetPool,
961 		1u,
962 		&extraResourcesLayout.get()
963 	};
964 
965 	extraResourcesSet = allocateDescriptorSet(vkd, device, &allocInfo);
966 
967 	VkDescriptorBufferInfo bufferInfo;
968 	bufferInfo.buffer	= (useRef ? auxBuffer->getBuffer() : buffer.getBuffer());
969 	bufferInfo.offset	= 0u;
970 	bufferInfo.range	= VK_WHOLE_SIZE;
971 
972 	const VkWriteDescriptorSet descriptorWrite =
973 	{
974 		VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
975 		DE_NULL,
976 		*extraResourcesSet,
977 		0u,		// dstBinding
978 		0u,		// dstArrayElement
979 		1u,
980 		descType,
981 		(const VkDescriptorImageInfo*)DE_NULL,
982 		&bufferInfo,
983 		(const VkBufferView*)DE_NULL
984 	};
985 
986 	vkd.updateDescriptorSets(device, 1u, &descriptorWrite, 0u, DE_NULL);
987 
988 	// Storage for output varying data.
989 	std::vector<deUint32>	outputs		(NUM_ELEMENTS);
990 	std::vector<void*>		outputPtr	(NUM_ELEMENTS);
991 
992 	for (size_t i = 0; i < NUM_ELEMENTS; i++)
993 	{
994 		outputs[i] = 0xcdcdcdcd;
995 		outputPtr[i] = &outputs[i];
996 	}
997 
998 	const int					numWorkGroups	= ((m_shaderType.getMemoryType() == AtomicMemoryType::SHARED) ? 1 : static_cast<int>(NUM_ELEMENTS));
999 	UniquePtr<ShaderExecutor>	executor		(createExecutor(m_context, m_shaderType.getType(), m_shaderSpec, *extraResourcesLayout));
1000 
1001 	executor->execute(numWorkGroups, DE_NULL, &outputPtr[0], *extraResourcesSet);
1002 	buffer.invalidate();
1003 
1004 	tcu::ResultCollector resultCollector(log);
1005 
1006 	// Check the results of the atomic operation
1007 	testBuffer->checkResults(resultCollector);
1008 
1009 	return tcu::TestStatus(resultCollector.getResult(), resultCollector.getMessage());
1010 }
1011 
1012 class AtomicOperationCase : public TestCase
1013 {
1014 public:
1015 							AtomicOperationCase		(tcu::TestContext&		testCtx,
1016 													 const char*			name,
1017 													 const char*			description,
1018 													 AtomicShaderType		type,
1019 													 DataType				dataType,
1020 													 AtomicOperation		atomicOp);
1021 	virtual					~AtomicOperationCase	(void);
1022 
1023 	virtual TestInstance*	createInstance			(Context& ctx) const;
1024 	virtual void			checkSupport			(Context& ctx) const;
initPrograms(vk::SourceCollections & programCollection) const1025 	virtual void			initPrograms			(vk::SourceCollections& programCollection) const
1026 	{
1027 		generateSources(m_shaderType.getType(), m_shaderSpec, programCollection);
1028 	}
1029 
1030 private:
1031 
1032 	void					createShaderSpec();
1033 	ShaderSpec				m_shaderSpec;
1034 	const AtomicShaderType	m_shaderType;
1035 	const DataType			m_dataType;
1036 	const AtomicOperation	m_atomicOp;
1037 };
1038 
AtomicOperationCase(tcu::TestContext & testCtx,const char * name,const char * description,AtomicShaderType shaderType,DataType dataType,AtomicOperation atomicOp)1039 AtomicOperationCase::AtomicOperationCase (tcu::TestContext&	testCtx,
1040 										  const char*		name,
1041 										  const char*		description,
1042 										  AtomicShaderType	shaderType,
1043 										  DataType			dataType,
1044 										  AtomicOperation	atomicOp)
1045 	: TestCase			(testCtx, name, description)
1046 	, m_shaderType		(shaderType)
1047 	, m_dataType		(dataType)
1048 	, m_atomicOp		(atomicOp)
1049 {
1050 	createShaderSpec();
1051 	init();
1052 }
1053 
~AtomicOperationCase(void)1054 AtomicOperationCase::~AtomicOperationCase (void)
1055 {
1056 }
1057 
createInstance(Context & ctx) const1058 TestInstance* AtomicOperationCase::createInstance (Context& ctx) const
1059 {
1060 	return new AtomicOperationCaseInstance(ctx, m_shaderSpec, m_shaderType, m_dataType, m_atomicOp);
1061 }
1062 
checkSupport(Context & ctx) const1063 void AtomicOperationCase::checkSupport (Context& ctx) const
1064 {
1065 	if ((m_dataType == DATA_TYPE_INT64) || (m_dataType == DATA_TYPE_UINT64))
1066 	{
1067 		ctx.requireDeviceFunctionality("VK_KHR_shader_atomic_int64");
1068 
1069 		const auto atomicInt64Features	= ctx.getShaderAtomicInt64Features();
1070 		const bool isSharedMemory		= (m_shaderType.getMemoryType() == AtomicMemoryType::SHARED);
1071 
1072 		if (!isSharedMemory && atomicInt64Features.shaderBufferInt64Atomics == VK_FALSE)
1073 		{
1074 			TCU_THROW(NotSupportedError, "VkShaderAtomicInt64: 64-bit integer atomic operations not supported for buffers");
1075 		}
1076 		if (isSharedMemory && atomicInt64Features.shaderSharedInt64Atomics == VK_FALSE)
1077 		{
1078 			TCU_THROW(NotSupportedError, "VkShaderAtomicInt64: 64-bit integer atomic operations not supported for shared memory");
1079 		}
1080 	}
1081 
1082 	if (m_dataType == DATA_TYPE_FLOAT16)
1083 	{
1084 		ctx.requireDeviceFunctionality("VK_EXT_shader_atomic_float2");
1085 		if (m_atomicOp == ATOMIC_OP_ADD)
1086 		{
1087 			if (m_shaderType.getMemoryType() == AtomicMemoryType::SHARED)
1088 			{
1089 				if (!ctx.getShaderAtomicFloat2FeaturesEXT().shaderSharedFloat16AtomicAdd)
1090 				{
1091 					TCU_THROW(NotSupportedError, "VkShaderAtomicFloat16: 16-bit floating point shared add atomic operation not supported");
1092 				}
1093 			}
1094 			else
1095 			{
1096 				if (!ctx.getShaderAtomicFloat2FeaturesEXT().shaderBufferFloat16AtomicAdd)
1097 				{
1098 					TCU_THROW(NotSupportedError, "VkShaderAtomicFloat16: 16-bit floating point buffer add atomic operation not supported");
1099 				}
1100 			}
1101 		}
1102 		if (m_atomicOp == ATOMIC_OP_MIN || m_atomicOp == ATOMIC_OP_MAX)
1103 		{
1104 			if (m_shaderType.getMemoryType() == AtomicMemoryType::SHARED)
1105 			{
1106 				if (!ctx.getShaderAtomicFloat2FeaturesEXT().shaderSharedFloat16AtomicMinMax)
1107 				{
1108 					TCU_THROW(NotSupportedError, "VkShaderAtomicFloat16: 16-bit floating point shared min/max atomic operation not supported");
1109 				}
1110 			}
1111 			else
1112 			{
1113 				if (!ctx.getShaderAtomicFloat2FeaturesEXT().shaderBufferFloat16AtomicMinMax)
1114 				{
1115 					TCU_THROW(NotSupportedError, "VkShaderAtomicFloat16: 16-bit floating point buffer min/max atomic operation not supported");
1116 				}
1117 			}
1118 		}
1119 		if (m_atomicOp == ATOMIC_OP_EXCHANGE)
1120 		{
1121 			if (m_shaderType.getMemoryType() == AtomicMemoryType::SHARED)
1122 			{
1123 				if (!ctx.getShaderAtomicFloat2FeaturesEXT().shaderSharedFloat16Atomics)
1124 				{
1125 					TCU_THROW(NotSupportedError, "VkShaderAtomicFloat16: 16-bit floating point shared atomic operations not supported");
1126 				}
1127 			}
1128 			else
1129 			{
1130 				if (!ctx.getShaderAtomicFloat2FeaturesEXT().shaderBufferFloat16Atomics)
1131 				{
1132 					TCU_THROW(NotSupportedError, "VkShaderAtomicFloat16: 16-bit floating point buffer atomic operations not supported");
1133 				}
1134 			}
1135 		}
1136 	}
1137 
1138 	if (m_dataType == DATA_TYPE_FLOAT32)
1139 	{
1140 		ctx.requireDeviceFunctionality("VK_EXT_shader_atomic_float");
1141 		if (m_atomicOp == ATOMIC_OP_ADD)
1142 		{
1143 			if (m_shaderType.getMemoryType() == AtomicMemoryType::SHARED)
1144 			{
1145 				if (!ctx.getShaderAtomicFloatFeaturesEXT().shaderSharedFloat32AtomicAdd)
1146 				{
1147 					TCU_THROW(NotSupportedError, "VkShaderAtomicFloat32: 32-bit floating point shared add atomic operation not supported");
1148 				}
1149 			}
1150 			else
1151 			{
1152 				if (!ctx.getShaderAtomicFloatFeaturesEXT().shaderBufferFloat32AtomicAdd)
1153 				{
1154 					TCU_THROW(NotSupportedError, "VkShaderAtomicFloat32: 32-bit floating point buffer add atomic operation not supported");
1155 				}
1156 			}
1157 		}
1158 		if (m_atomicOp == ATOMIC_OP_MIN || m_atomicOp == ATOMIC_OP_MAX)
1159 		{
1160 			ctx.requireDeviceFunctionality("VK_EXT_shader_atomic_float2");
1161 			if (m_shaderType.getMemoryType() == AtomicMemoryType::SHARED)
1162 			{
1163 				if (!ctx.getShaderAtomicFloat2FeaturesEXT().shaderSharedFloat32AtomicMinMax)
1164 				{
1165 					TCU_THROW(NotSupportedError, "VkShaderAtomicFloat32: 32-bit floating point shared min/max atomic operation not supported");
1166 				}
1167 			}
1168 			else
1169 			{
1170 				if (!ctx.getShaderAtomicFloat2FeaturesEXT().shaderBufferFloat32AtomicMinMax)
1171 				{
1172 					TCU_THROW(NotSupportedError, "VkShaderAtomicFloat32: 32-bit floating point buffer min/max atomic operation not supported");
1173 				}
1174 			}
1175 		}
1176 		if (m_atomicOp == ATOMIC_OP_EXCHANGE)
1177 		{
1178 			if (m_shaderType.getMemoryType() == AtomicMemoryType::SHARED)
1179 			{
1180 				if (!ctx.getShaderAtomicFloatFeaturesEXT().shaderSharedFloat32Atomics)
1181 				{
1182 					TCU_THROW(NotSupportedError, "VkShaderAtomicFloat32: 32-bit floating point shared atomic operations not supported");
1183 				}
1184 			}
1185 			else
1186 			{
1187 				if (!ctx.getShaderAtomicFloatFeaturesEXT().shaderBufferFloat32Atomics)
1188 				{
1189 					TCU_THROW(NotSupportedError, "VkShaderAtomicFloat32: 32-bit floating point buffer atomic operations not supported");
1190 				}
1191 			}
1192 		}
1193 	}
1194 
1195 	if (m_dataType == DATA_TYPE_FLOAT64)
1196 	{
1197 		ctx.requireDeviceFunctionality("VK_EXT_shader_atomic_float");
1198 		if (m_atomicOp == ATOMIC_OP_ADD)
1199 		{
1200 			if (m_shaderType.getMemoryType() == AtomicMemoryType::SHARED)
1201 			{
1202 				if (!ctx.getShaderAtomicFloatFeaturesEXT().shaderSharedFloat64AtomicAdd)
1203 				{
1204 					TCU_THROW(NotSupportedError, "VkShaderAtomicFloat64: 64-bit floating point shared add atomic operation not supported");
1205 				}
1206 			}
1207 			else
1208 			{
1209 				if (!ctx.getShaderAtomicFloatFeaturesEXT().shaderBufferFloat64AtomicAdd)
1210 				{
1211 					TCU_THROW(NotSupportedError, "VkShaderAtomicFloat64: 64-bit floating point buffer add atomic operation not supported");
1212 				}
1213 			}
1214 		}
1215 		if (m_atomicOp == ATOMIC_OP_MIN || m_atomicOp == ATOMIC_OP_MAX)
1216 		{
1217 			ctx.requireDeviceFunctionality("VK_EXT_shader_atomic_float2");
1218 			if (m_shaderType.getMemoryType() == AtomicMemoryType::SHARED)
1219 			{
1220 				if (!ctx.getShaderAtomicFloat2FeaturesEXT().shaderSharedFloat64AtomicMinMax)
1221 				{
1222 					TCU_THROW(NotSupportedError, "VkShaderAtomicFloat64: 64-bit floating point shared min/max atomic operation not supported");
1223 				}
1224 			}
1225 			else
1226 			{
1227 				if (!ctx.getShaderAtomicFloat2FeaturesEXT().shaderBufferFloat64AtomicMinMax)
1228 				{
1229 					TCU_THROW(NotSupportedError, "VkShaderAtomicFloat64: 64-bit floating point buffer min/max atomic operation not supported");
1230 				}
1231 			}
1232 		}
1233 		if (m_atomicOp == ATOMIC_OP_EXCHANGE)
1234 		{
1235 			if (m_shaderType.getMemoryType() == AtomicMemoryType::SHARED)
1236 			{
1237 				if (!ctx.getShaderAtomicFloatFeaturesEXT().shaderSharedFloat64Atomics)
1238 				{
1239 					TCU_THROW(NotSupportedError, "VkShaderAtomicFloat64: 64-bit floating point shared atomic operations not supported");
1240 				}
1241 			}
1242 			else
1243 			{
1244 				if (!ctx.getShaderAtomicFloatFeaturesEXT().shaderBufferFloat64Atomics)
1245 				{
1246 					TCU_THROW(NotSupportedError, "VkShaderAtomicFloat64: 64-bit floating point buffer atomic operations not supported");
1247 				}
1248 			}
1249 		}
1250 	}
1251 
1252 	if (m_shaderType.getMemoryType() == AtomicMemoryType::REFERENCE)
1253 	{
1254 		ctx.requireDeviceFunctionality("VK_KHR_buffer_device_address");
1255 	}
1256 
1257 	// Check stores and atomic operation support.
1258 	switch (m_shaderType.getType())
1259 	{
1260 	case glu::SHADERTYPE_VERTEX:
1261 	case glu::SHADERTYPE_TESSELLATION_CONTROL:
1262 	case glu::SHADERTYPE_TESSELLATION_EVALUATION:
1263 	case glu::SHADERTYPE_GEOMETRY:
1264 		if (!ctx.getDeviceFeatures().vertexPipelineStoresAndAtomics)
1265 			TCU_THROW(NotSupportedError, "Stores and atomic operations are not supported in Vertex, Tessellation, and Geometry shader.");
1266 		break;
1267 	case glu::SHADERTYPE_FRAGMENT:
1268 		if (!ctx.getDeviceFeatures().fragmentStoresAndAtomics)
1269 			TCU_THROW(NotSupportedError, "Stores and atomic operations are not supported in fragment shader.");
1270 		break;
1271 	case glu::SHADERTYPE_COMPUTE:
1272 		break;
1273 	default:
1274 		DE_FATAL("Unsupported shader type");
1275 	}
1276 
1277 	checkSupportShader(ctx, m_shaderType.getType());
1278 }
1279 
createShaderSpec(void)1280 void AtomicOperationCase::createShaderSpec (void)
1281 {
1282 	const AtomicMemoryType memoryType = m_shaderType.getMemoryType();
1283 
1284 	// Global declarations.
1285 	std::ostringstream shaderTemplateGlobalStream;
1286 
1287 	// Structure in use for atomic operations.
1288 	shaderTemplateGlobalStream
1289 		<< "${EXTENSIONS}\n"
1290 		<< "\n"
1291 		<< "struct AtomicStruct\n"
1292 		<< "{\n"
1293 		<< "    ${DATATYPE} inoutValues[${N}/2];\n"
1294 		<< "    ${DATATYPE} inputValues[${N}];\n"
1295 		<< "    ${DATATYPE} compareValues[${N}];\n"
1296 		<< "    ${DATATYPE} outputValues[${N}];\n"
1297 		<< "    int invocationHitCount[${N}];\n"
1298 		<< "    int index;\n"
1299 		<< "};\n"
1300 		<< "\n"
1301 		;
1302 
1303 	// The name dance and declarations below will make sure the structure that will be used with atomic operations can be accessed
1304 	// as "buf.data", which is the name used in the atomic operation statements.
1305 	//
1306 	// * When using a buffer directly, RESULT_BUFFER_NAME will be "buf" and the inner struct will be "data".
1307 	// * When using a workgroup-shared global variable, the "data" struct will be nested in an auxiliar "buf" struct.
1308 	// * When using buffer references, the uniform buffer reference will be called "buf" and its contents "data".
1309 	//
1310 	if (memoryType != AtomicMemoryType::REFERENCE)
1311 	{
1312 		shaderTemplateGlobalStream
1313 			<< "layout (set = ${SETIDX}, binding = 0) buffer AtomicBuffer {\n"
1314 			<< "    AtomicStruct data;\n"
1315 			<< "} ${RESULT_BUFFER_NAME};\n"
1316 			<< "\n"
1317 			;
1318 
1319 		// When using global shared memory in the compute variant, invocations will use a shared global structure instead of a
1320 		// descriptor set as the sources and results of each tested operation.
1321 		if (memoryType == AtomicMemoryType::SHARED)
1322 		{
1323 			shaderTemplateGlobalStream
1324 				<< "shared struct { AtomicStruct data; } buf;\n"
1325 				<< "\n"
1326 				;
1327 		}
1328 	}
1329 	else
1330 	{
1331 		shaderTemplateGlobalStream
1332 			<< "layout (buffer_reference) buffer AtomicBuffer {\n"
1333 			<< "    AtomicStruct data;\n"
1334 			<< "};\n"
1335 			<< "\n"
1336 			<< "layout (set = ${SETIDX}, binding = 0) uniform References {\n"
1337 			<< "    AtomicBuffer buf;\n"
1338 			<< "};\n"
1339 			<< "\n"
1340 			;
1341 	}
1342 
1343 	const auto					shaderTemplateGlobalString	= shaderTemplateGlobalStream.str();
1344 	const tcu::StringTemplate	shaderTemplateGlobal		(shaderTemplateGlobalString);
1345 
1346 	// Shader body for the non-vertex case.
1347 	std::ostringstream nonVertexShaderTemplateStream;
1348 
1349 	if (memoryType == AtomicMemoryType::SHARED)
1350 	{
1351 		// Invocation zero will initialize the shared structure from the descriptor set.
1352 		nonVertexShaderTemplateStream
1353 			<< "if (gl_LocalInvocationIndex == 0u)\n"
1354 			<< "{\n"
1355 			<< "    buf.data = ${RESULT_BUFFER_NAME}.data;\n"
1356 			<< "}\n"
1357 			<< "barrier();\n"
1358 			;
1359 	}
1360 
1361 	if (m_shaderType.getType() == glu::SHADERTYPE_FRAGMENT)
1362 	{
1363 		nonVertexShaderTemplateStream
1364 			<< "if (!gl_HelperInvocation) {\n"
1365 			<< "    int idx = atomicAdd(buf.data.index, 1);\n"
1366 			<< "    buf.data.outputValues[idx] = ${ATOMICOP}(buf.data.inoutValues[idx % (${N}/2)], ${COMPARE_ARG}buf.data.inputValues[idx]);\n"
1367 			<< "}\n"
1368 			;
1369 	}
1370 	else
1371 	{
1372 		nonVertexShaderTemplateStream
1373 			<< "if (atomicAdd(buf.data.invocationHitCount[0], 1) < ${N})\n"
1374 			<< "{\n"
1375 			<< "    int idx = atomicAdd(buf.data.index, 1);\n"
1376 			<< "    buf.data.outputValues[idx] = ${ATOMICOP}(buf.data.inoutValues[idx % (${N}/2)], ${COMPARE_ARG}buf.data.inputValues[idx]);\n"
1377 			<< "}\n"
1378 			;
1379 	}
1380 
1381 	if (memoryType == AtomicMemoryType::SHARED)
1382 	{
1383 		// Invocation zero will copy results back to the descriptor set.
1384 		nonVertexShaderTemplateStream
1385 			<< "barrier();\n"
1386 			<< "if (gl_LocalInvocationIndex == 0u)\n"
1387 			<< "{\n"
1388 			<< "    ${RESULT_BUFFER_NAME}.data = buf.data;\n"
1389 			<< "}\n"
1390 			;
1391 	}
1392 
1393 	const auto					nonVertexShaderTemplateStreamStr	= nonVertexShaderTemplateStream.str();
1394 	const tcu::StringTemplate	nonVertexShaderTemplateSrc			(nonVertexShaderTemplateStreamStr);
1395 
1396 	// Shader body for the vertex case.
1397 	const tcu::StringTemplate vertexShaderTemplateSrc(
1398 		"int idx = gl_VertexIndex;\n"
1399 		"if (atomicAdd(buf.data.invocationHitCount[idx], 1) == 0)\n"
1400 		"{\n"
1401 		"    buf.data.outputValues[idx] = ${ATOMICOP}(buf.data.inoutValues[idx % (${N}/2)], ${COMPARE_ARG}buf.data.inputValues[idx]);\n"
1402 		"}\n");
1403 
1404 	// Extensions.
1405 	std::ostringstream extensions;
1406 
1407 	if ((m_dataType == DATA_TYPE_INT64) || (m_dataType == DATA_TYPE_UINT64))
1408 	{
1409 		extensions
1410 			<< "#extension GL_EXT_shader_explicit_arithmetic_types_int64 : enable\n"
1411 			<< "#extension GL_EXT_shader_atomic_int64 : enable\n"
1412 			;
1413 	}
1414 	else if ((m_dataType == DATA_TYPE_FLOAT16) || (m_dataType == DATA_TYPE_FLOAT32) || (m_dataType == DATA_TYPE_FLOAT64))
1415 	{
1416 		extensions
1417 			<< "#extension GL_EXT_shader_explicit_arithmetic_types_float16 : enable\n"
1418 			<< "#extension GL_EXT_shader_atomic_float : enable\n"
1419 			<< "#extension GL_EXT_shader_atomic_float2 : enable\n"
1420 			<< "#extension GL_KHR_memory_scope_semantics : enable\n"
1421 			;
1422 	}
1423 
1424 	if (memoryType == AtomicMemoryType::REFERENCE)
1425 	{
1426 		extensions << "#extension GL_EXT_buffer_reference : require\n";
1427 	}
1428 
1429 	// Specializations.
1430 	std::map<std::string, std::string> specializations;
1431 
1432 	specializations["EXTENSIONS"]			= extensions.str();
1433 	specializations["DATATYPE"]				= dataType2Str(m_dataType);
1434 	specializations["ATOMICOP"]				= atomicOp2Str(m_atomicOp);
1435 	specializations["SETIDX"]				= de::toString((int)EXTRA_RESOURCES_DESCRIPTOR_SET_INDEX);
1436 	specializations["N"]					= de::toString((int)NUM_ELEMENTS);
1437 	specializations["COMPARE_ARG"]			= ((m_atomicOp == ATOMIC_OP_COMP_SWAP) ? "buf.data.compareValues[idx], " : "");
1438 	specializations["RESULT_BUFFER_NAME"]	= ((memoryType == AtomicMemoryType::SHARED) ? "result" : "buf");
1439 
1440 	// Shader spec.
1441 	m_shaderSpec.outputs.push_back(Symbol("outData", glu::VarType(glu::TYPE_UINT, glu::PRECISION_HIGHP)));
1442 	m_shaderSpec.glslVersion		= glu::GLSL_VERSION_450;
1443 	m_shaderSpec.globalDeclarations	= shaderTemplateGlobal.specialize(specializations);
1444 	m_shaderSpec.source				= ((m_shaderType.getType() == glu::SHADERTYPE_VERTEX)
1445 										? vertexShaderTemplateSrc.specialize(specializations)
1446 										: nonVertexShaderTemplateSrc.specialize(specializations));
1447 
1448 	if (memoryType == AtomicMemoryType::SHARED)
1449 	{
1450 		// When using global shared memory, use a single workgroup and an appropriate number of local invocations.
1451 		m_shaderSpec.localSizeX = static_cast<int>(NUM_ELEMENTS);
1452 	}
1453 }
1454 
addAtomicOperationTests(tcu::TestCaseGroup * atomicOperationTestsGroup)1455 void addAtomicOperationTests (tcu::TestCaseGroup* atomicOperationTestsGroup)
1456 {
1457 	tcu::TestContext& testCtx = atomicOperationTestsGroup->getTestContext();
1458 
1459 	static const struct
1460 	{
1461 		glu::ShaderType		type;
1462 		const char*			name;
1463 	} shaderTypes[] =
1464 	{
1465 		{ glu::SHADERTYPE_VERTEX,							"vertex"			},
1466 		{ glu::SHADERTYPE_FRAGMENT,							"fragment"			},
1467 		{ glu::SHADERTYPE_GEOMETRY,							"geometry"			},
1468 		{ glu::SHADERTYPE_TESSELLATION_CONTROL,				"tess_ctrl"			},
1469 		{ glu::SHADERTYPE_TESSELLATION_EVALUATION,			"tess_eval"			},
1470 		{ glu::SHADERTYPE_COMPUTE,							"compute"			},
1471 	};
1472 
1473 	static const struct
1474 	{
1475 		AtomicMemoryType	type;
1476 		const char*			suffix;
1477 	} kMemoryTypes[] =
1478 	{
1479 		{ AtomicMemoryType::BUFFER,		""				},
1480 		{ AtomicMemoryType::SHARED,		"_shared"		},
1481 		{ AtomicMemoryType::REFERENCE,	"_reference"	},
1482 	};
1483 
1484 	static const struct
1485 	{
1486 		DataType		dataType;
1487 		const char*		name;
1488 		const char*		description;
1489 	} dataSign[] =
1490 	{
1491 		{ DATA_TYPE_FLOAT16,"float16",			"Tests using 16-bit float data"				},
1492 		{ DATA_TYPE_INT32,	"signed",			"Tests using signed data (int)"				},
1493 		{ DATA_TYPE_UINT32,	"unsigned",			"Tests using unsigned data (uint)"			},
1494 		{ DATA_TYPE_FLOAT32,"float32",			"Tests using 32-bit float data"				},
1495 		{ DATA_TYPE_INT64,	"signed64bit",		"Tests using 64 bit signed data (int64)"	},
1496 		{ DATA_TYPE_UINT64,	"unsigned64bit",	"Tests using 64 bit unsigned data (uint64)"	},
1497 		{ DATA_TYPE_FLOAT64,"float64",			"Tests using 64-bit float data)"			}
1498 	};
1499 
1500 	static const struct
1501 	{
1502 		AtomicOperation		value;
1503 		const char*			name;
1504 	} atomicOp[] =
1505 	{
1506 		{ ATOMIC_OP_EXCHANGE,	"exchange"	},
1507 		{ ATOMIC_OP_COMP_SWAP,	"comp_swap"	},
1508 		{ ATOMIC_OP_ADD,		"add"		},
1509 		{ ATOMIC_OP_MIN,		"min"		},
1510 		{ ATOMIC_OP_MAX,		"max"		},
1511 		{ ATOMIC_OP_AND,		"and"		},
1512 		{ ATOMIC_OP_OR,			"or"		},
1513 		{ ATOMIC_OP_XOR,		"xor"		}
1514 	};
1515 
1516 	for (int opNdx = 0; opNdx < DE_LENGTH_OF_ARRAY(atomicOp); opNdx++)
1517 	{
1518 		for (int signNdx = 0; signNdx < DE_LENGTH_OF_ARRAY(dataSign); signNdx++)
1519 		{
1520 			for (int shaderTypeNdx = 0; shaderTypeNdx < DE_LENGTH_OF_ARRAY(shaderTypes); shaderTypeNdx++)
1521 			{
1522 				// Only ADD and EXCHANGE are supported on floating-point
1523 				if (dataSign[signNdx].dataType == DATA_TYPE_FLOAT16 || dataSign[signNdx].dataType == DATA_TYPE_FLOAT32 || dataSign[signNdx].dataType == DATA_TYPE_FLOAT64)
1524 				{
1525 					if (atomicOp[opNdx].value != ATOMIC_OP_ADD &&
1526 					    atomicOp[opNdx].value != ATOMIC_OP_MIN &&
1527 					    atomicOp[opNdx].value != ATOMIC_OP_MAX &&
1528 					    atomicOp[opNdx].value != ATOMIC_OP_EXCHANGE)
1529 					{
1530 						continue;
1531 					}
1532 				}
1533 
1534 				for (int memoryTypeNdx = 0; memoryTypeNdx < DE_LENGTH_OF_ARRAY(kMemoryTypes); ++memoryTypeNdx)
1535 				{
1536 					// Shared memory only available in compute shaders.
1537 					if (kMemoryTypes[memoryTypeNdx].type == AtomicMemoryType::SHARED && shaderTypes[shaderTypeNdx].type != glu::SHADERTYPE_COMPUTE)
1538 						continue;
1539 
1540 					const std::string description	= std::string("Tests atomic operation ") + atomicOp2Str(atomicOp[opNdx].value) + std::string(".");
1541 					const std::string name			= std::string(atomicOp[opNdx].name) + "_" + std::string(dataSign[signNdx].name) + "_" + std::string(shaderTypes[shaderTypeNdx].name) + kMemoryTypes[memoryTypeNdx].suffix;
1542 
1543 					atomicOperationTestsGroup->addChild(new AtomicOperationCase(testCtx, name.c_str(), description.c_str(), AtomicShaderType(shaderTypes[shaderTypeNdx].type, kMemoryTypes[memoryTypeNdx].type), dataSign[signNdx].dataType, atomicOp[opNdx].value));
1544 				}
1545 			}
1546 		}
1547 	}
1548 }
1549 
1550 } // anonymous
1551 
createAtomicOperationTests(tcu::TestContext & testCtx)1552 tcu::TestCaseGroup* createAtomicOperationTests (tcu::TestContext& testCtx)
1553 {
1554 	return createTestGroup(testCtx, "atomic_operations", "Atomic Operation Tests", addAtomicOperationTests);
1555 }
1556 
1557 } // shaderexecutor
1558 } // vkt
1559