• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2016 The Khronos Group Inc.
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  *      http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  *
19  *//*!
20  * \file  vktImageAtomicOperationTests.cpp
21  * \brief Image atomic operation tests
22  *//*--------------------------------------------------------------------*/
23 
24 #include "vktImageAtomicOperationTests.hpp"
25 #include "vktImageAtomicSpirvShaders.hpp"
26 
27 #include "deUniquePtr.hpp"
28 #include "deStringUtil.hpp"
29 #include "deSTLUtil.hpp"
30 
31 #include "vktTestCaseUtil.hpp"
32 #include "vkPrograms.hpp"
33 #include "vkImageUtil.hpp"
34 #include "vkQueryUtil.hpp"
35 #include "vkBarrierUtil.hpp"
36 #include "vktImageTestsUtil.hpp"
37 #include "vkBuilderUtil.hpp"
38 #include "vkRef.hpp"
39 #include "vkRefUtil.hpp"
40 #include "vkTypeUtil.hpp"
41 #include "vkCmdUtil.hpp"
42 #include "vkObjUtil.hpp"
43 #include "vkBufferWithMemory.hpp"
44 
45 #include "tcuTextureUtil.hpp"
46 #include "tcuTexture.hpp"
47 #include "tcuVectorType.hpp"
48 #include "tcuStringTemplate.hpp"
49 
50 namespace vkt
51 {
52 namespace image
53 {
54 namespace
55 {
56 
57 using namespace vk;
58 using namespace std;
59 using de::toString;
60 
61 using tcu::TextureFormat;
62 using tcu::IVec2;
63 using tcu::IVec3;
64 using tcu::UVec3;
65 using tcu::Vec4;
66 using tcu::IVec4;
67 using tcu::UVec4;
68 using tcu::CubeFace;
69 using tcu::Texture1D;
70 using tcu::Texture2D;
71 using tcu::Texture3D;
72 using tcu::Texture2DArray;
73 using tcu::TextureCube;
74 using tcu::PixelBufferAccess;
75 using tcu::ConstPixelBufferAccess;
76 using tcu::Vector;
77 using tcu::TestContext;
78 
79 enum
80 {
81 	NUM_INVOCATIONS_PER_PIXEL = 5u
82 };
83 
84 enum AtomicOperation
85 {
86 	ATOMIC_OPERATION_ADD = 0,
87 	ATOMIC_OPERATION_SUB,
88 	ATOMIC_OPERATION_INC,
89 	ATOMIC_OPERATION_DEC,
90 	ATOMIC_OPERATION_MIN,
91 	ATOMIC_OPERATION_MAX,
92 	ATOMIC_OPERATION_AND,
93 	ATOMIC_OPERATION_OR,
94 	ATOMIC_OPERATION_XOR,
95 	ATOMIC_OPERATION_EXCHANGE,
96 	ATOMIC_OPERATION_COMPARE_EXCHANGE,
97 
98 	ATOMIC_OPERATION_LAST
99 };
100 
101 enum class ShaderReadType
102 {
103 	NORMAL = 0,
104 	SPARSE,
105 };
106 
107 enum class ImageBackingType
108 {
109 	NORMAL = 0,
110 	SPARSE,
111 };
112 
getCoordStr(const ImageType imageType,const std::string & x,const std::string & y,const std::string & z)113 static string getCoordStr (const ImageType		imageType,
114 						   const std::string&	x,
115 						   const std::string&	y,
116 						   const std::string&	z)
117 {
118 	switch (imageType)
119 	{
120 		case IMAGE_TYPE_1D:
121 		case IMAGE_TYPE_BUFFER:
122 			return x;
123 		case IMAGE_TYPE_1D_ARRAY:
124 		case IMAGE_TYPE_2D:
125 			return string("ivec2(" + x + "," + y + ")");
126 		case IMAGE_TYPE_2D_ARRAY:
127 		case IMAGE_TYPE_3D:
128 		case IMAGE_TYPE_CUBE:
129 		case IMAGE_TYPE_CUBE_ARRAY:
130 			return string("ivec3(" + x + "," + y + "," + z + ")");
131 		default:
132 			DE_ASSERT(false);
133 			return "";
134 	}
135 }
136 
getComponentTypeStr(deUint32 componentWidth,bool intFormat,bool uintFormat,bool floatFormat)137 static string getComponentTypeStr (deUint32 componentWidth, bool intFormat, bool uintFormat, bool floatFormat)
138 {
139 	DE_ASSERT(intFormat || uintFormat || floatFormat);
140 
141 	const bool is64 = (componentWidth == 64);
142 
143 	if (intFormat)
144 		return (is64 ? "int64_t" : "int");
145 	if (uintFormat)
146 		return (is64 ? "uint64_t" : "uint");
147 	if (floatFormat)
148 		return (is64 ? "double" : "float");
149 
150 	return "";
151 }
152 
getVec4TypeStr(deUint32 componentWidth,bool intFormat,bool uintFormat,bool floatFormat)153 static string getVec4TypeStr (deUint32 componentWidth, bool intFormat, bool uintFormat, bool floatFormat)
154 {
155 	DE_ASSERT(intFormat || uintFormat || floatFormat);
156 
157 	const bool is64 = (componentWidth == 64);
158 
159 	if (intFormat)
160 		return (is64 ? "i64vec4" : "ivec4");
161 	if (uintFormat)
162 		return (is64 ? "u64vec4" : "uvec4");
163 	if (floatFormat)
164 		return (is64 ? "f64vec4" : "vec4");
165 
166 	return "";
167 }
168 
getAtomicFuncArgumentShaderStr(const AtomicOperation op,const string & x,const string & y,const string & z,const IVec3 & gridSize)169 static string getAtomicFuncArgumentShaderStr (const AtomicOperation	op,
170 											  const string&			x,
171 											  const string&			y,
172 											  const string&			z,
173 											  const IVec3&			gridSize)
174 {
175 	switch (op)
176 	{
177 		case ATOMIC_OPERATION_ADD:
178 		case ATOMIC_OPERATION_AND:
179 		case ATOMIC_OPERATION_OR:
180 		case ATOMIC_OPERATION_XOR:
181 			return string("(" + x + "*" + x + " + " + y + "*" + y + " + " + z + "*" + z + ")");
182 		case ATOMIC_OPERATION_MIN:
183 		case ATOMIC_OPERATION_MAX:
184 			// multiply by (1-2*(value % 2) to make half of the data negative
185 			// this will result in generating large numbers for uint formats
186 			return string("((1 - 2*(" + x + " % 2)) * (" + x + "*" + x + " + " + y + "*" + y + " + " + z + "*" + z + "))");
187 		case ATOMIC_OPERATION_EXCHANGE:
188 		case ATOMIC_OPERATION_COMPARE_EXCHANGE:
189 			return string("((" + z + "*" + toString(gridSize.x()) + " + " + x + ")*" + toString(gridSize.y()) + " + " + y + ")");
190 		default:
191 			DE_ASSERT(false);
192 			return "";
193 	}
194 }
195 
getAtomicOperationCaseName(const AtomicOperation op)196 static string getAtomicOperationCaseName (const AtomicOperation op)
197 {
198 	switch (op)
199 	{
200 		case ATOMIC_OPERATION_ADD:				return string("add");
201 		case ATOMIC_OPERATION_SUB:				return string("sub");
202 		case ATOMIC_OPERATION_INC:				return string("inc");
203 		case ATOMIC_OPERATION_DEC:				return string("dec");
204 		case ATOMIC_OPERATION_MIN:				return string("min");
205 		case ATOMIC_OPERATION_MAX:				return string("max");
206 		case ATOMIC_OPERATION_AND:				return string("and");
207 		case ATOMIC_OPERATION_OR:				return string("or");
208 		case ATOMIC_OPERATION_XOR:				return string("xor");
209 		case ATOMIC_OPERATION_EXCHANGE:			return string("exchange");
210 		case ATOMIC_OPERATION_COMPARE_EXCHANGE:	return string("compare_exchange");
211 		default:
212 			DE_ASSERT(false);
213 			return "";
214 	}
215 }
216 
getAtomicOperationShaderFuncName(const AtomicOperation op)217 static string getAtomicOperationShaderFuncName (const AtomicOperation op)
218 {
219 	switch (op)
220 	{
221 		case ATOMIC_OPERATION_ADD:				return string("imageAtomicAdd");
222 		case ATOMIC_OPERATION_MIN:				return string("imageAtomicMin");
223 		case ATOMIC_OPERATION_MAX:				return string("imageAtomicMax");
224 		case ATOMIC_OPERATION_AND:				return string("imageAtomicAnd");
225 		case ATOMIC_OPERATION_OR:				return string("imageAtomicOr");
226 		case ATOMIC_OPERATION_XOR:				return string("imageAtomicXor");
227 		case ATOMIC_OPERATION_EXCHANGE:			return string("imageAtomicExchange");
228 		case ATOMIC_OPERATION_COMPARE_EXCHANGE:	return string("imageAtomicCompSwap");
229 		default:
230 			DE_ASSERT(false);
231 			return "";
232 	}
233 }
234 
235 template <typename T>
getOperationInitialValue(const AtomicOperation op)236 T getOperationInitialValue (const AtomicOperation op)
237 {
238 	switch (op)
239 	{
240 		// \note 18 is just an arbitrary small nonzero value.
241 		case ATOMIC_OPERATION_ADD:				return 18;
242 		case ATOMIC_OPERATION_INC:				return 18;
243 		case ATOMIC_OPERATION_SUB:				return (1 << 24) - 1;
244 		case ATOMIC_OPERATION_DEC:				return (1 << 24) - 1;
245 		case ATOMIC_OPERATION_MIN:				return (1 << 15) - 1;
246 		case ATOMIC_OPERATION_MAX:				return 18;
247 		case ATOMIC_OPERATION_AND:				return (1 << 15) - 1;
248 		case ATOMIC_OPERATION_OR:				return 18;
249 		case ATOMIC_OPERATION_XOR:				return 18;
250 		case ATOMIC_OPERATION_EXCHANGE:			return 18;
251 		case ATOMIC_OPERATION_COMPARE_EXCHANGE:	return 18;
252 		default:
253 			DE_ASSERT(false);
254 			return 0xFFFFFFFF;
255 	}
256 }
257 
258 template <>
getOperationInitialValue(const AtomicOperation op)259 deInt64 getOperationInitialValue<deInt64>(const AtomicOperation op)
260 {
261 	switch (op)
262 	{
263 		// \note 0x000000BEFFFFFF18 is just an arbitrary nonzero value.
264 		case ATOMIC_OPERATION_ADD:				return 0x000000BEFFFFFF18;
265 		case ATOMIC_OPERATION_INC:				return 0x000000BEFFFFFF18;
266 		case ATOMIC_OPERATION_SUB:				return (1ull << 56) - 1;
267 		case ATOMIC_OPERATION_DEC:				return (1ull << 56) - 1;
268 		case ATOMIC_OPERATION_MIN:				return (1ull << 47) - 1;
269 		case ATOMIC_OPERATION_MAX:				return 0x000000BEFFFFFF18;
270 		case ATOMIC_OPERATION_AND:				return (1ull << 47) - 1;
271 		case ATOMIC_OPERATION_OR:				return 0x000000BEFFFFFF18;
272 		case ATOMIC_OPERATION_XOR:				return 0x000000BEFFFFFF18;
273 		case ATOMIC_OPERATION_EXCHANGE:			return 0x000000BEFFFFFF18;
274 		case ATOMIC_OPERATION_COMPARE_EXCHANGE:	return 0x000000BEFFFFFF18;
275 		default:
276 			DE_ASSERT(false);
277 			return 0xFFFFFFFFFFFFFFFF;
278 	}
279 }
280 
281 template <>
getOperationInitialValue(const AtomicOperation op)282 deUint64 getOperationInitialValue<deUint64>(const AtomicOperation op)
283 {
284 	return (deUint64)getOperationInitialValue<deInt64>(op);
285 }
286 
287 
288 template <typename T>
getAtomicFuncArgument(const AtomicOperation op,const IVec3 & invocationID,const IVec3 & gridSize)289 static T getAtomicFuncArgument (const AtomicOperation	op,
290 								const IVec3&			invocationID,
291 								const IVec3&			gridSize)
292 {
293 	const T x = static_cast<T>(invocationID.x());
294 	const T y = static_cast<T>(invocationID.y());
295 	const T z = static_cast<T>(invocationID.z());
296 
297 	switch (op)
298 	{
299 		// \note Fall-throughs.
300 		case ATOMIC_OPERATION_ADD:
301 		case ATOMIC_OPERATION_SUB:
302 		case ATOMIC_OPERATION_AND:
303 		case ATOMIC_OPERATION_OR:
304 		case ATOMIC_OPERATION_XOR:
305 			return x*x + y*y + z*z;
306 		case ATOMIC_OPERATION_INC:
307 		case ATOMIC_OPERATION_DEC:
308 			return 1;
309 		case ATOMIC_OPERATION_MIN:
310 		case ATOMIC_OPERATION_MAX:
311 			// multiply half of the data by -1
312 			return (1-2*(x % 2))*(x*x + y*y + z*z);
313 		case ATOMIC_OPERATION_EXCHANGE:
314 		case ATOMIC_OPERATION_COMPARE_EXCHANGE:
315 			return (z*static_cast<T>(gridSize.x()) + x)*static_cast<T>(gridSize.y()) + y;
316 		default:
317 			DE_ASSERT(false);
318 			return -1;
319 	}
320 }
321 
322 //! An order-independent operation is one for which the end result doesn't depend on the order in which the operations are carried (i.e. is both commutative and associative).
isOrderIndependentAtomicOperation(const AtomicOperation op)323 static bool isOrderIndependentAtomicOperation (const AtomicOperation op)
324 {
325 	return	op == ATOMIC_OPERATION_ADD ||
326 			op == ATOMIC_OPERATION_SUB ||
327 			op == ATOMIC_OPERATION_INC ||
328 			op == ATOMIC_OPERATION_DEC ||
329 			op == ATOMIC_OPERATION_MIN ||
330 			op == ATOMIC_OPERATION_MAX ||
331 			op == ATOMIC_OPERATION_AND ||
332 			op == ATOMIC_OPERATION_OR ||
333 			op == ATOMIC_OPERATION_XOR;
334 }
335 
336 //! Checks if the operation needs an SPIR-V shader.
isSpirvAtomicOperation(const AtomicOperation op)337 static bool isSpirvAtomicOperation (const AtomicOperation op)
338 {
339 	return	op == ATOMIC_OPERATION_SUB ||
340 			op == ATOMIC_OPERATION_INC ||
341 			op == ATOMIC_OPERATION_DEC;
342 }
343 
344 //! Returns the SPIR-V assembler name of the given operation.
getSpirvAtomicOpName(const AtomicOperation op)345 static std::string getSpirvAtomicOpName (const AtomicOperation op)
346 {
347 	switch (op)
348 	{
349 	case ATOMIC_OPERATION_SUB:	return "OpAtomicISub";
350 	case ATOMIC_OPERATION_INC:	return "OpAtomicIIncrement";
351 	case ATOMIC_OPERATION_DEC:	return "OpAtomicIDecrement";
352 	default:					break;
353 	}
354 
355 	DE_ASSERT(false);
356 	return "";
357 }
358 
359 //! Returns true if the given SPIR-V operation does not need the last argument, compared to OpAtomicIAdd.
isSpirvAtomicNoLastArgOp(const AtomicOperation op)360 static bool isSpirvAtomicNoLastArgOp (const AtomicOperation op)
361 {
362 	switch (op)
363 	{
364 	case ATOMIC_OPERATION_SUB:	return false;
365 	case ATOMIC_OPERATION_INC:	// fallthrough
366 	case ATOMIC_OPERATION_DEC:	return true;
367 	default:					break;
368 	}
369 
370 	DE_ASSERT(false);
371 	return false;
372 }
373 
374 //! Computes the result of an atomic operation where "a" is the data operated on and "b" is the parameter to the atomic function.
375 template <typename T>
computeBinaryAtomicOperationResult(const AtomicOperation op,const T a,const T b)376 static T computeBinaryAtomicOperationResult (const AtomicOperation op, const T a, const T b)
377 {
378 	switch (op)
379 	{
380 		case ATOMIC_OPERATION_INC:				// fallthrough.
381 		case ATOMIC_OPERATION_ADD:				return a + b;
382 		case ATOMIC_OPERATION_DEC:				// fallthrough.
383 		case ATOMIC_OPERATION_SUB:				return a - b;
384 		case ATOMIC_OPERATION_MIN:				return de::min(a, b);
385 		case ATOMIC_OPERATION_MAX:				return de::max(a, b);
386 		case ATOMIC_OPERATION_AND:				return a & b;
387 		case ATOMIC_OPERATION_OR:				return a | b;
388 		case ATOMIC_OPERATION_XOR:				return a ^ b;
389 		case ATOMIC_OPERATION_EXCHANGE:			return b;
390 		case ATOMIC_OPERATION_COMPARE_EXCHANGE:	return (a == (sizeof(T) == 8 ? 0xBEFFFFFF18 : 18)) ? b : a;
391 		default:
392 			DE_ASSERT(false);
393 			return -1;
394 	}
395 }
396 
getUsageFlags(bool useTransfer)397 VkImageUsageFlags getUsageFlags (bool useTransfer)
398 {
399 	VkImageUsageFlags usageFlags = VK_IMAGE_USAGE_STORAGE_BIT;
400 
401 	if (useTransfer)
402 		usageFlags |= (VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT);
403 
404 	return usageFlags;
405 }
406 
AddFillReadShader(SourceCollections & sourceCollections,const ImageType & imageType,const tcu::TextureFormat & format,const string & componentType,const string & vec4Type)407 void AddFillReadShader (SourceCollections&			sourceCollections,
408 						const ImageType&			imageType,
409 						const tcu::TextureFormat&	format,
410 						const string&				componentType,
411 						const string&				vec4Type)
412 {
413 	const string	imageInCoord			= getCoordStr(imageType, "gx", "gy", "gz");
414 	const string	shaderImageFormatStr	= getShaderImageFormatQualifier(format);
415 	const string	shaderImageTypeStr		= getShaderImageType(format, imageType);
416 	const auto		componentWidth			= getFormatComponentWidth(mapTextureFormat(format), 0u);
417 	const string	extensions				= ((componentWidth == 64u)
418 											?	"#extension GL_EXT_shader_explicit_arithmetic_types_int64 : require\n"
419 												"#extension GL_EXT_shader_image_int64 : require\n"
420 											:	"");
421 
422 
423 	const string fillShader =	"#version 450\n"
424 								+ extensions +
425 								"precision highp " + shaderImageTypeStr + ";\n"
426 								"\n"
427 								"layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
428 								"layout (" + shaderImageFormatStr + ", binding=0) coherent uniform " + shaderImageTypeStr + " u_resultImage;\n"
429 								"\n"
430 								"layout(std430, binding = 1) buffer inputBuffer\n"
431 								"{\n"
432 								"	"+ componentType + " data[];\n"
433 								"} inBuffer;\n"
434 								"\n"
435 								"void main(void)\n"
436 								"{\n"
437 								"	int gx = int(gl_GlobalInvocationID.x);\n"
438 								"	int gy = int(gl_GlobalInvocationID.y);\n"
439 								"	int gz = int(gl_GlobalInvocationID.z);\n"
440 								"	uint index = gx + (gy * gl_NumWorkGroups.x) + (gz *gl_NumWorkGroups.x * gl_NumWorkGroups.y);\n"
441 								"	imageStore(u_resultImage, " + imageInCoord + ", " + vec4Type + "(inBuffer.data[index]));\n"
442 								"}\n";
443 
444 	const string readShader =	"#version 450\n"
445 								+ extensions +
446 								"precision highp " + shaderImageTypeStr + ";\n"
447 								"\n"
448 								"layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
449 								"layout (" + shaderImageFormatStr + ", binding=0) coherent uniform " + shaderImageTypeStr + " u_resultImage;\n"
450 								"\n"
451 								"layout(std430, binding = 1) buffer outputBuffer\n"
452 								"{\n"
453 								"	" + componentType + " data[];\n"
454 								"} outBuffer;\n"
455 								"\n"
456 								"void main(void)\n"
457 								"{\n"
458 								"	int gx = int(gl_GlobalInvocationID.x);\n"
459 								"	int gy = int(gl_GlobalInvocationID.y);\n"
460 								"	int gz = int(gl_GlobalInvocationID.z);\n"
461 								"	uint index = gx + (gy * gl_NumWorkGroups.x) + (gz *gl_NumWorkGroups.x * gl_NumWorkGroups.y);\n"
462 								"	outBuffer.data[index] = imageLoad(u_resultImage, " + imageInCoord + ").x;\n"
463 								"}\n";
464 
465 
466 	if ((imageType != IMAGE_TYPE_1D) &&
467 		(imageType != IMAGE_TYPE_1D_ARRAY) &&
468 		(imageType != IMAGE_TYPE_BUFFER))
469 	{
470 		const string readShaderResidency  = "#version 450\n"
471 											"#extension GL_ARB_sparse_texture2 : require\n"
472 											+ extensions +
473 											"precision highp " + shaderImageTypeStr + ";\n"
474 											"\n"
475 											"layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
476 											"layout (" + shaderImageFormatStr + ", binding=0) coherent uniform " + shaderImageTypeStr + " u_resultImage;\n"
477 											"\n"
478 											"layout(std430, binding = 1) buffer outputBuffer\n"
479 											"{\n"
480 											"	" + componentType + " data[];\n"
481 											"} outBuffer;\n"
482 											"\n"
483 											"void main(void)\n"
484 											"{\n"
485 											"	int gx = int(gl_GlobalInvocationID.x);\n"
486 											"	int gy = int(gl_GlobalInvocationID.y);\n"
487 											"	int gz = int(gl_GlobalInvocationID.z);\n"
488 											"	uint index = gx + (gy * gl_NumWorkGroups.x) + (gz *gl_NumWorkGroups.x * gl_NumWorkGroups.y);\n"
489 											"	outBuffer.data[index] = imageLoad(u_resultImage, " + imageInCoord + ").x;\n"
490 											"	" + vec4Type + " sparseValue;\n"
491 											"	sparseImageLoadARB(u_resultImage, " + imageInCoord + ", sparseValue);\n"
492 											"	if (outBuffer.data[index] != sparseValue.x)\n"
493 											"		outBuffer.data[index] = " + vec4Type + "(1234).x;\n"
494 											"}\n";
495 
496 		sourceCollections.glslSources.add("readShaderResidency") << glu::ComputeSource(readShaderResidency.c_str()) << vk::ShaderBuildOptions(sourceCollections.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
497 	}
498 
499 	sourceCollections.glslSources.add("fillShader") << glu::ComputeSource(fillShader.c_str()) << vk::ShaderBuildOptions(sourceCollections.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
500 	sourceCollections.glslSources.add("readShader") << glu::ComputeSource(readShader.c_str()) << vk::ShaderBuildOptions(sourceCollections.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
501 }
502 
503 //! Prepare the initial data for the image
initDataForImage(const VkDevice device,const DeviceInterface & deviceInterface,const TextureFormat & format,const AtomicOperation operation,const tcu::UVec3 & gridSize,BufferWithMemory & buffer)504 static void initDataForImage (const VkDevice			device,
505 							  const DeviceInterface&	deviceInterface,
506 							  const TextureFormat&		format,
507 							  const AtomicOperation		operation,
508 							  const tcu::UVec3&			gridSize,
509 							  BufferWithMemory&			buffer)
510 {
511 	Allocation&				bufferAllocation	= buffer.getAllocation();
512 	const VkFormat			imageFormat			= mapTextureFormat(format);
513 	tcu::PixelBufferAccess	pixelBuffer			(format, gridSize.x(), gridSize.y(), gridSize.z(), bufferAllocation.getHostPtr());
514 
515 	if (imageFormat == VK_FORMAT_R64_UINT || imageFormat == VK_FORMAT_R64_SINT)
516 	{
517 		const deInt64 initialValue(getOperationInitialValue<deInt64>(operation));
518 
519 		for (deUint32 z = 0; z < gridSize.z(); z++)
520 		for (deUint32 y = 0; y < gridSize.y(); y++)
521 		for (deUint32 x = 0; x < gridSize.x(); x++)
522 		{
523 			*((deInt64*)pixelBuffer.getPixelPtr(x, y, z)) = initialValue;
524 		}
525 	}
526 	else
527 	{
528 		const tcu::IVec4 initialValue(getOperationInitialValue<deInt32>(operation));
529 
530 		for (deUint32 z = 0; z < gridSize.z(); z++)
531 		for (deUint32 y = 0; y < gridSize.y(); y++)
532 		for (deUint32 x = 0; x < gridSize.x(); x++)
533 		{
534 			pixelBuffer.setPixel(initialValue, x, y, z);
535 		}
536 	}
537 
538 	flushAlloc(deviceInterface, device, bufferAllocation);
539 }
540 
commonCheckSupport(Context & context,const tcu::TextureFormat & tcuFormat,VkImageTiling tiling,ImageType imageType,const tcu::UVec3 & imageSize,AtomicOperation operation,bool useTransfer,ShaderReadType readType,ImageBackingType backingType)541 void commonCheckSupport (Context& context, const tcu::TextureFormat& tcuFormat, VkImageTiling tiling, ImageType imageType, const tcu::UVec3& imageSize, AtomicOperation operation, bool useTransfer, ShaderReadType readType, ImageBackingType backingType)
542 {
543 	const VkFormat				format				= mapTextureFormat(tcuFormat);
544 	const VkImageType			vkImgType			= mapImageType(imageType);
545 	const VkFormatFeatureFlags	texelBufferSupport	= (VK_FORMAT_FEATURE_STORAGE_TEXEL_BUFFER_BIT | VK_FORMAT_FEATURE_STORAGE_TEXEL_BUFFER_ATOMIC_BIT);
546 
547 	const auto& vki				= context.getInstanceInterface();
548 	const auto	physicalDevice	= context.getPhysicalDevice();
549 	const auto usageFlags = getUsageFlags(useTransfer);
550 
551 	VkImageFormatProperties	vkImageFormatProperties;
552 	const auto result = vki.getPhysicalDeviceImageFormatProperties(physicalDevice, format, vkImgType, tiling, usageFlags, 0, &vkImageFormatProperties);
553 	if (result != VK_SUCCESS) {
554 		if (result == VK_ERROR_FORMAT_NOT_SUPPORTED)
555 			TCU_THROW(NotSupportedError, "Format unsupported for tiling");
556 		else
557 			TCU_FAIL("vkGetPhysicalDeviceImageFormatProperties returned unexpected error");
558 	}
559 
560 	if (vkImageFormatProperties.maxArrayLayers < (uint32_t)getNumLayers(imageType, imageSize)) {
561 		TCU_THROW(NotSupportedError, "This format and tiling combination does not support this number of aray layers");
562 	}
563 
564 	const VkFormatProperties	formatProperties	= getPhysicalDeviceFormatProperties(context.getInstanceInterface(),
565 																						context.getPhysicalDevice(), format);
566 	if ((imageType == IMAGE_TYPE_BUFFER) &&
567 		((formatProperties.bufferFeatures & texelBufferSupport) != texelBufferSupport))
568 		TCU_THROW(NotSupportedError, "Atomic storage texel buffers not supported");
569 
570 	const VkFormatFeatureFlags requiredFeaturesLinear = (VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT | VK_FORMAT_FEATURE_STORAGE_IMAGE_ATOMIC_BIT);
571 	if (tiling == vk::VK_IMAGE_TILING_LINEAR &&
572 			((formatProperties.linearTilingFeatures & requiredFeaturesLinear) != requiredFeaturesLinear)
573 	) {
574 		TCU_THROW(NotSupportedError, "Format doesn't support atomic storage with linear tiling");
575 	}
576 
577 	if (imageType == IMAGE_TYPE_CUBE_ARRAY)
578 		context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_IMAGE_CUBE_ARRAY);
579 
580 #ifndef CTS_USES_VULKANSC
581 	if (backingType == ImageBackingType::SPARSE)
582 	{
583 		context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_SPARSE_BINDING);
584 
585 		switch (vkImgType)
586 		{
587 		case VK_IMAGE_TYPE_2D:	context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_SPARSE_RESIDENCY_IMAGE2D); break;
588 		case VK_IMAGE_TYPE_3D:	context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_SPARSE_RESIDENCY_IMAGE3D); break;
589 		default:				DE_ASSERT(false); break;
590 		}
591 
592 		if (!checkSparseImageFormatSupport(context.getPhysicalDevice(), context.getInstanceInterface(), format, vkImgType, VK_SAMPLE_COUNT_1_BIT, usageFlags, tiling))
593 			TCU_THROW(NotSupportedError, "Format does not support sparse images");
594 	}
595 #endif // CTS_USES_VULKANSC
596 
597 	if (isFloatFormat(format))
598 	{
599 		context.requireDeviceFunctionality("VK_EXT_shader_atomic_float");
600 
601 		const VkFormatFeatureFlags	requiredFeatures	= (VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT | VK_FORMAT_FEATURE_STORAGE_IMAGE_ATOMIC_BIT);
602 		const auto&					atomicFloatFeatures	= context.getShaderAtomicFloatFeaturesEXT();
603 
604 		if (!atomicFloatFeatures.shaderImageFloat32Atomics)
605 			TCU_THROW(NotSupportedError, "shaderImageFloat32Atomics not supported");
606 
607 		if ((operation == ATOMIC_OPERATION_ADD) && !atomicFloatFeatures.shaderImageFloat32AtomicAdd)
608 			TCU_THROW(NotSupportedError, "shaderImageFloat32AtomicAdd not supported");
609 
610 		if (operation == ATOMIC_OPERATION_MIN || operation == ATOMIC_OPERATION_MAX)
611 		{
612 			context.requireDeviceFunctionality("VK_EXT_shader_atomic_float2");
613 #ifndef CTS_USES_VULKANSC
614 			if (!context.getShaderAtomicFloat2FeaturesEXT().shaderImageFloat32AtomicMinMax)
615 			{
616 				TCU_THROW(NotSupportedError, "shaderImageFloat32AtomicMinMax not supported");
617 			}
618 #endif // CTS_USES_VULKANSC
619 		}
620 
621 		if ((formatProperties.optimalTilingFeatures & requiredFeatures) != requiredFeatures)
622 			TCU_FAIL("Required format feature bits not supported");
623 
624 		if (backingType == ImageBackingType::SPARSE)
625 		{
626 			if (!atomicFloatFeatures.sparseImageFloat32Atomics)
627 				TCU_THROW(NotSupportedError, "sparseImageFloat32Atomics not supported");
628 
629 			if (operation == ATOMIC_OPERATION_ADD && !atomicFloatFeatures.sparseImageFloat32AtomicAdd)
630 				TCU_THROW(NotSupportedError, "sparseImageFloat32AtomicAdd not supported");
631 		}
632 
633 	}
634 	else if (format == VK_FORMAT_R64_UINT || format == VK_FORMAT_R64_SINT)
635 	{
636 		context.requireDeviceFunctionality("VK_EXT_shader_image_atomic_int64");
637 
638 		const VkFormatFeatureFlags	requiredFeatures	= (VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT | VK_FORMAT_FEATURE_STORAGE_IMAGE_ATOMIC_BIT);
639 		const auto&					atomicInt64Features	= context.getShaderImageAtomicInt64FeaturesEXT();
640 
641 		if (!atomicInt64Features.shaderImageInt64Atomics)
642 			TCU_THROW(NotSupportedError, "shaderImageInt64Atomics not supported");
643 
644 		if (backingType == ImageBackingType::SPARSE && !atomicInt64Features.sparseImageInt64Atomics)
645 			TCU_THROW(NotSupportedError, "sparseImageInt64Atomics not supported");
646 
647 		if ((formatProperties.optimalTilingFeatures & requiredFeatures) != requiredFeatures)
648 			TCU_FAIL("Mandatory format features not supported");
649 	}
650 
651 	if (useTransfer)
652 	{
653 		const VkFormatFeatureFlags transferFeatures = (VK_FORMAT_FEATURE_TRANSFER_SRC_BIT | VK_FORMAT_FEATURE_TRANSFER_DST_BIT);
654 		if ((formatProperties.optimalTilingFeatures & transferFeatures) != transferFeatures)
655 			TCU_THROW(NotSupportedError, "Transfer features not supported for this format");
656 	}
657 
658 	if (readType == ShaderReadType::SPARSE)
659 	{
660 		DE_ASSERT(imageType != IMAGE_TYPE_1D && imageType != IMAGE_TYPE_1D_ARRAY && imageType != IMAGE_TYPE_BUFFER);
661 		context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_SHADER_RESOURCE_RESIDENCY);
662 	}
663 }
664 
665 class BinaryAtomicEndResultCase : public vkt::TestCase
666 {
667 public:
668 								BinaryAtomicEndResultCase	(tcu::TestContext&			testCtx,
669 															 const string&				name,
670 															 const ImageType			imageType,
671 															 const tcu::UVec3&			imageSize,
672 															 const tcu::TextureFormat&	format,
673 															 const VkImageTiling		tiling,
674 															 const AtomicOperation		operation,
675 															 const bool					useTransfer,
676 															 const ShaderReadType		shaderReadType,
677 															 const ImageBackingType		backingType,
678 															 const glu::GLSLVersion		glslVersion);
679 
680 	void						initPrograms				(SourceCollections&			sourceCollections) const;
681 	TestInstance*				createInstance				(Context&					context) const;
682 	virtual void				checkSupport				(Context&					context) const;
683 
684 private:
685 	const ImageType				m_imageType;
686 	const tcu::UVec3			m_imageSize;
687 	const tcu::TextureFormat	m_format;
688 	const VkImageTiling			m_tiling;
689 	const AtomicOperation		m_operation;
690 	const bool					m_useTransfer;
691 	const ShaderReadType		m_readType;
692 	const ImageBackingType		m_backingType;
693 	const glu::GLSLVersion		m_glslVersion;
694 };
695 
BinaryAtomicEndResultCase(tcu::TestContext & testCtx,const string & name,const ImageType imageType,const tcu::UVec3 & imageSize,const tcu::TextureFormat & format,const VkImageTiling tiling,const AtomicOperation operation,const bool useTransfer,const ShaderReadType shaderReadType,const ImageBackingType backingType,const glu::GLSLVersion glslVersion)696 BinaryAtomicEndResultCase::BinaryAtomicEndResultCase (tcu::TestContext&			testCtx,
697 													  const string&				name,
698 													  const ImageType			imageType,
699 													  const tcu::UVec3&			imageSize,
700 													  const tcu::TextureFormat&	format,
701 													  const VkImageTiling		tiling,
702 													  const AtomicOperation		operation,
703 													  const bool				useTransfer,
704 													  const ShaderReadType		shaderReadType,
705 													  const ImageBackingType	backingType,
706 													  const glu::GLSLVersion	glslVersion)
707 	: TestCase		(testCtx, name)
708 	, m_imageType	(imageType)
709 	, m_imageSize	(imageSize)
710 	, m_format		(format)
711 	, m_tiling		(tiling)
712 	, m_operation	(operation)
713 	, m_useTransfer	(useTransfer)
714 	, m_readType	(shaderReadType)
715 	, m_backingType	(backingType)
716 	, m_glslVersion	(glslVersion)
717 {
718 }
719 
checkSupport(Context & context) const720 void BinaryAtomicEndResultCase::checkSupport (Context& context) const
721 {
722 	commonCheckSupport(context, m_format, m_tiling, m_imageType, m_imageSize, m_operation, m_useTransfer, m_readType, m_backingType);
723 }
724 
initPrograms(SourceCollections & sourceCollections) const725 void BinaryAtomicEndResultCase::initPrograms (SourceCollections& sourceCollections) const
726 {
727 	const VkFormat	imageFormat		= mapTextureFormat(m_format);
728 	const deUint32	componentWidth	= getFormatComponentWidth(imageFormat, 0);
729 	const bool		intFormat		= isIntFormat(imageFormat);
730 	const bool		uintFormat		= isUintFormat(imageFormat);
731 	const bool		floatFormat		= isFloatFormat(imageFormat);
732 	const string	type			= getComponentTypeStr(componentWidth, intFormat, uintFormat, floatFormat);
733 	const string	vec4Type		= getVec4TypeStr(componentWidth, intFormat, uintFormat, floatFormat);
734 
735 	AddFillReadShader(sourceCollections, m_imageType, m_format, type, vec4Type);
736 
737 	if (isSpirvAtomicOperation(m_operation))
738 	{
739 		const CaseVariant					caseVariant{m_imageType, m_format.order, m_format.type, CaseVariant::CHECK_TYPE_END_RESULTS};
740 		const tcu::StringTemplate			shaderTemplate{getSpirvAtomicOpShader(caseVariant)};
741 		std::map<std::string, std::string>	specializations;
742 
743 		specializations["OPNAME"] = getSpirvAtomicOpName(m_operation);
744 		if (isSpirvAtomicNoLastArgOp(m_operation))
745 			specializations["LASTARG"] = "";
746 
747 		sourceCollections.spirvAsmSources.add(m_name) << shaderTemplate.specialize(specializations);
748 	}
749 	else
750 	{
751 		const string	versionDecl				= glu::getGLSLVersionDeclaration(m_glslVersion);
752 
753 		const UVec3		gridSize				= getShaderGridSize(m_imageType, m_imageSize);
754 		const string	atomicCoord				= getCoordStr(m_imageType, "gx % " + toString(gridSize.x()), "gy", "gz");
755 
756 		const string	atomicArgExpr			= type + getAtomicFuncArgumentShaderStr(m_operation,
757 																						"gx", "gy", "gz",
758 																						IVec3(NUM_INVOCATIONS_PER_PIXEL*gridSize.x(), gridSize.y(), gridSize.z()));
759 
760 		const string	compareExchangeStr		= (m_operation == ATOMIC_OPERATION_COMPARE_EXCHANGE) ?
761 												(componentWidth == 64 ?", 820338753304": ", 18") + string(uintFormat ? "u" : "") + string(componentWidth == 64 ? "l" : "")
762 												: "";
763 		const string	atomicInvocation		= getAtomicOperationShaderFuncName(m_operation) + "(u_resultImage, " + atomicCoord + compareExchangeStr + ", " + atomicArgExpr + ")";
764 		const string	shaderImageFormatStr	= getShaderImageFormatQualifier(m_format);
765 		const string	shaderImageTypeStr		= getShaderImageType(m_format, m_imageType);
766 		const string	extensions				= "#extension GL_EXT_shader_atomic_float : enable\n"
767 												  "#extension GL_EXT_shader_atomic_float2 : enable\n"
768 												  "#extension GL_KHR_memory_scope_semantics : enable";
769 
770 		string source = versionDecl + "\n" + extensions + "\n";
771 
772 		if (64 == componentWidth)
773 		{
774 			source +=	"#extension GL_EXT_shader_explicit_arithmetic_types_int64 : require\n"
775 						"#extension GL_EXT_shader_image_int64 : require\n";
776 		}
777 
778 		source +=	"precision highp " + shaderImageTypeStr + ";\n"
779 					"\n"
780 					"layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
781 					"layout (" + shaderImageFormatStr + ", binding=0) coherent uniform " + shaderImageTypeStr + " u_resultImage;\n"
782 					"\n"
783 					"void main (void)\n"
784 					"{\n"
785 					"	int gx = int(gl_GlobalInvocationID.x);\n"
786 					"	int gy = int(gl_GlobalInvocationID.y);\n"
787 					"	int gz = int(gl_GlobalInvocationID.z);\n"
788 					"	" + atomicInvocation + ";\n"
789 					"}\n";
790 
791 		sourceCollections.glslSources.add(m_name) << glu::ComputeSource(source.c_str());
792 	}
793 }
794 
795 class BinaryAtomicIntermValuesCase : public vkt::TestCase
796 {
797 public:
798 								BinaryAtomicIntermValuesCase	(tcu::TestContext&			testCtx,
799 																 const string&				name,
800 																 const ImageType			imageType,
801 																 const tcu::UVec3&			imageSize,
802 																 const tcu::TextureFormat&	format,
803 																 const VkImageTiling		tiling,
804 																 const AtomicOperation		operation,
805 																 const bool					useTransfer,
806 																 const ShaderReadType		shaderReadType,
807 																 const ImageBackingType		backingType,
808 																 const glu::GLSLVersion		glslVersion);
809 
810 	void						initPrograms					(SourceCollections&			sourceCollections) const;
811 	TestInstance*				createInstance					(Context&					context) const;
812 	virtual void				checkSupport					(Context&					context) const;
813 
814 private:
815 	const ImageType				m_imageType;
816 	const tcu::UVec3			m_imageSize;
817 	const tcu::TextureFormat	m_format;
818 	const VkImageTiling			m_tiling;
819 	const AtomicOperation		m_operation;
820 	const bool					m_useTransfer;
821 	const ShaderReadType		m_readType;
822 	const ImageBackingType		m_backingType;
823 	const glu::GLSLVersion		m_glslVersion;
824 };
825 
BinaryAtomicIntermValuesCase(TestContext & testCtx,const string & name,const ImageType imageType,const tcu::UVec3 & imageSize,const TextureFormat & format,const VkImageTiling tiling,const AtomicOperation operation,const bool useTransfer,const ShaderReadType shaderReadType,const ImageBackingType backingType,const glu::GLSLVersion glslVersion)826 BinaryAtomicIntermValuesCase::BinaryAtomicIntermValuesCase (TestContext&			testCtx,
827 															const string&			name,
828 															const ImageType			imageType,
829 															const tcu::UVec3&		imageSize,
830 															const TextureFormat&	format,
831 															const VkImageTiling		tiling,
832 															const AtomicOperation	operation,
833 															const bool				useTransfer,
834 															const ShaderReadType	shaderReadType,
835 															const ImageBackingType	backingType,
836 															const glu::GLSLVersion	glslVersion)
837 	: TestCase		(testCtx, name)
838 	, m_imageType	(imageType)
839 	, m_imageSize	(imageSize)
840 	, m_format		(format)
841 	, m_tiling		(tiling)
842 	, m_operation	(operation)
843 	, m_useTransfer	(useTransfer)
844 	, m_readType	(shaderReadType)
845 	, m_backingType	(backingType)
846 	, m_glslVersion	(glslVersion)
847 {
848 }
849 
checkSupport(Context & context) const850 void BinaryAtomicIntermValuesCase::checkSupport (Context& context) const
851 {
852 	commonCheckSupport(context, m_format, m_tiling, m_imageType, m_imageSize, m_operation, m_useTransfer, m_readType, m_backingType);
853 }
854 
initPrograms(SourceCollections & sourceCollections) const855 void BinaryAtomicIntermValuesCase::initPrograms (SourceCollections& sourceCollections) const
856 {
857 	const VkFormat	imageFormat		= mapTextureFormat(m_format);
858 	const deUint32	componentWidth	= getFormatComponentWidth(imageFormat, 0);
859 	const bool		intFormat		= isIntFormat(imageFormat);
860 	const bool		uintFormat		= isUintFormat(imageFormat);
861 	const bool		floatFormat		= isFloatFormat(imageFormat);
862 	const string	type			= getComponentTypeStr(componentWidth, intFormat, uintFormat, floatFormat);
863 	const string	vec4Type		= getVec4TypeStr(componentWidth, intFormat, uintFormat, floatFormat);
864 
865 	AddFillReadShader(sourceCollections, m_imageType, m_format, type, vec4Type);
866 
867 	if (isSpirvAtomicOperation(m_operation))
868 	{
869 		const CaseVariant					caseVariant{m_imageType, m_format.order, m_format.type, CaseVariant::CHECK_TYPE_INTERMEDIATE_RESULTS};
870 		const tcu::StringTemplate			shaderTemplate{getSpirvAtomicOpShader(caseVariant)};
871 		std::map<std::string, std::string>	specializations;
872 
873 		specializations["OPNAME"] = getSpirvAtomicOpName(m_operation);
874 		if (isSpirvAtomicNoLastArgOp(m_operation))
875 			specializations["LASTARG"] = "";
876 
877 		sourceCollections.spirvAsmSources.add(m_name) << shaderTemplate.specialize(specializations);
878 	}
879 	else
880 	{
881 		const string	versionDecl				= glu::getGLSLVersionDeclaration(m_glslVersion);
882 		const UVec3		gridSize				= getShaderGridSize(m_imageType, m_imageSize);
883 		const string	atomicCoord				= getCoordStr(m_imageType, "gx % " + toString(gridSize.x()), "gy", "gz");
884 		const string	invocationCoord			= getCoordStr(m_imageType, "gx", "gy", "gz");
885 		const string	atomicArgExpr			= type + getAtomicFuncArgumentShaderStr(m_operation,
886 																						"gx", "gy", "gz",
887 																						IVec3(NUM_INVOCATIONS_PER_PIXEL*gridSize.x(), gridSize.y(), gridSize.z()));
888 
889 		const string	compareExchangeStr		= (m_operation == ATOMIC_OPERATION_COMPARE_EXCHANGE) ?
890 												  (componentWidth == 64 ? ", 820338753304" : ", 18") + string(uintFormat ? "u" : "") + string(componentWidth == 64 ? "l" : "") :
891 												  "";
892 		const string	atomicInvocation		= getAtomicOperationShaderFuncName(m_operation) +
893 												"(u_resultImage, " + atomicCoord + compareExchangeStr + ", " + atomicArgExpr + ")";
894 		const string	shaderImageFormatStr	= getShaderImageFormatQualifier(m_format);
895 		const string	shaderImageTypeStr		= getShaderImageType(m_format, m_imageType);
896 		const string	extensions				= "#extension GL_EXT_shader_atomic_float : enable\n"
897 												  "#extension GL_EXT_shader_atomic_float2 : enable\n"
898 												  "#extension GL_KHR_memory_scope_semantics : enable";
899 
900 		string source = versionDecl + "\n" + extensions + "\n"
901 						"\n";
902 
903 		if (64 == componentWidth)
904 		{
905 			source +=	"#extension GL_EXT_shader_explicit_arithmetic_types_int64 : require\n"
906 						"#extension GL_EXT_shader_image_int64 : require\n";
907 		}
908 
909 			source +=	"precision highp " + shaderImageTypeStr + "; \n"
910 						"layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
911 						"layout (" + shaderImageFormatStr + ", binding=0) coherent uniform " + shaderImageTypeStr + " u_resultImage;\n"
912 						"layout (" + shaderImageFormatStr + ", binding=1) writeonly uniform " + shaderImageTypeStr + " u_intermValuesImage;\n"
913 						"\n"
914 						"void main (void)\n"
915 						"{\n"
916 						"	int gx = int(gl_GlobalInvocationID.x);\n"
917 						"	int gy = int(gl_GlobalInvocationID.y);\n"
918 						"	int gz = int(gl_GlobalInvocationID.z);\n"
919 						"	imageStore(u_intermValuesImage, " + invocationCoord + ", " + vec4Type + "(" + atomicInvocation + "));\n"
920 						"}\n";
921 
922 		sourceCollections.glslSources.add(m_name) << glu::ComputeSource(source.c_str());
923 	}
924 }
925 
926 class BinaryAtomicInstanceBase : public vkt::TestInstance
927 {
928 public:
929 
930 								BinaryAtomicInstanceBase (Context&						context,
931 														  const string&					name,
932 														  const ImageType				imageType,
933 														  const tcu::UVec3&				imageSize,
934 														  const TextureFormat&			format,
935 														  const VkImageTiling			tiling,
936 														  const AtomicOperation			operation,
937 														  const bool					useTransfer,
938 														  const ShaderReadType			shaderReadType,
939 														  const ImageBackingType		backingType);
940 
941 	tcu::TestStatus				iterate					 (void);
942 
943 	virtual deUint32			getOutputBufferSize		 (void) const = 0;
944 
945 	virtual void				prepareResources		 (const bool					useTransfer) = 0;
946 	virtual void				prepareDescriptors		 (const bool					isTexelBuffer) = 0;
947 
948 	virtual void				commandsBeforeCompute	 (const VkCommandBuffer			cmdBuffer) const = 0;
949 	virtual void				commandsAfterCompute	 (const VkCommandBuffer			cmdBuffer,
950 														  const VkPipeline				pipeline,
951 														  const VkPipelineLayout		pipelineLayout,
952 														   const VkDescriptorSet		descriptorSet,
953 														  const VkDeviceSize&			range,
954 														  const bool					useTransfer) = 0;
955 
956 	virtual bool				verifyResult			 (Allocation&					outputBufferAllocation,
957 														  const bool					is64Bit) const = 0;
958 
959 protected:
960 
961 	void						shaderFillImage			 (const VkCommandBuffer			cmdBuffer,
962 														  const VkBuffer&				buffer,
963 														  const VkPipeline				pipeline,
964 														  const VkPipelineLayout		pipelineLayout,
965 														  const VkDescriptorSet			descriptorSet,
966 														  const VkDeviceSize&			range,
967 														  const tcu::UVec3&				gridSize);
968 
969 	void						createImageAndView		(VkFormat						imageFormat,
970 														 const tcu::UVec3&				imageExent,
971 														 bool							useTransfer,
972 														 de::MovePtr<Image>&			imagePtr,
973 														 Move<VkImageView>&				imageViewPtr);
974 
975 	void						createImageResources	(const VkFormat&				imageFormat,
976 														 const bool						useTransfer);
977 
978 	const string					m_name;
979 	const ImageType					m_imageType;
980 	const tcu::UVec3				m_imageSize;
981 	const TextureFormat				m_format;
982 	const VkImageTiling				m_tiling;
983 	const AtomicOperation			m_operation;
984 	const bool						m_useTransfer;
985 	const ShaderReadType			m_readType;
986 	const ImageBackingType			m_backingType;
987 
988 	de::MovePtr<BufferWithMemory>	m_inputBuffer;
989 	de::MovePtr<BufferWithMemory>	m_outputBuffer;
990 	Move<VkBufferView>				m_descResultBufferView;
991 	Move<VkBufferView>				m_descIntermResultsBufferView;
992 	Move<VkDescriptorPool>			m_descriptorPool;
993 	Move<VkDescriptorSetLayout>		m_descriptorSetLayout;
994 	Move<VkDescriptorSet>			m_descriptorSet;
995 
996 	Move<VkDescriptorSetLayout>		m_descriptorSetLayoutNoTransfer;
997 	Move<VkDescriptorPool>			m_descriptorPoolNoTransfer;
998 
999 	de::MovePtr<Image>				m_resultImage;
1000 	Move<VkImageView>				m_resultImageView;
1001 
1002 	std::vector<VkSemaphore>		m_waitSemaphores;
1003 };
1004 
BinaryAtomicInstanceBase(Context & context,const string & name,const ImageType imageType,const tcu::UVec3 & imageSize,const TextureFormat & format,const VkImageTiling tiling,const AtomicOperation operation,const bool useTransfer,const ShaderReadType shaderReadType,const ImageBackingType backingType)1005 BinaryAtomicInstanceBase::BinaryAtomicInstanceBase (Context&				context,
1006 													const string&			name,
1007 													const ImageType			imageType,
1008 													const tcu::UVec3&		imageSize,
1009 													const TextureFormat&	format,
1010 													const VkImageTiling		tiling,
1011 													const AtomicOperation	operation,
1012 													const bool				useTransfer,
1013 													const ShaderReadType	shaderReadType,
1014 													const ImageBackingType	backingType)
1015 	: vkt::TestInstance	(context)
1016 	, m_name			(name)
1017 	, m_imageType		(imageType)
1018 	, m_imageSize		(imageSize)
1019 	, m_format			(format)
1020 	, m_tiling			(tiling)
1021 	, m_operation		(operation)
1022 	, m_useTransfer		(useTransfer)
1023 	, m_readType		(shaderReadType)
1024 	, m_backingType		(backingType)
1025 {
1026 }
1027 
iterate(void)1028 tcu::TestStatus	BinaryAtomicInstanceBase::iterate (void)
1029 {
1030 	const VkDevice			device				= m_context.getDevice();
1031 	const DeviceInterface&	deviceInterface		= m_context.getDeviceInterface();
1032 	const VkQueue			queue				= m_context.getUniversalQueue();
1033 	const deUint32			queueFamilyIndex	= m_context.getUniversalQueueFamilyIndex();
1034 	Allocator&				allocator			= m_context.getDefaultAllocator();
1035 	const VkDeviceSize		imageSizeInBytes	= tcu::getPixelSize(m_format) * getNumPixels(m_imageType, m_imageSize);
1036 	const VkDeviceSize		outBuffSizeInBytes	= getOutputBufferSize();
1037 	const VkFormat			imageFormat			= mapTextureFormat(m_format);
1038 	const bool				isTexelBuffer		= (m_imageType == IMAGE_TYPE_BUFFER);
1039 
1040 	if (!isTexelBuffer)
1041 	{
1042 		createImageResources(imageFormat, m_useTransfer);
1043 	}
1044 
1045 	tcu::UVec3				gridSize			= getShaderGridSize(m_imageType, m_imageSize);
1046 
1047 	//Prepare the buffer with the initial data for the image
1048 	m_inputBuffer = de::MovePtr<BufferWithMemory>(new BufferWithMemory(deviceInterface,
1049 													device,
1050 													allocator,
1051 													makeBufferCreateInfo(imageSizeInBytes,
1052 																		 VK_BUFFER_USAGE_TRANSFER_SRC_BIT |
1053 																		 VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
1054 																		 (isTexelBuffer ? VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT : static_cast<VkBufferUsageFlagBits>(0u))),
1055 													MemoryRequirement::HostVisible));
1056 
1057 	// Fill in buffer with initial data used for image.
1058 	initDataForImage(device, deviceInterface, m_format, m_operation, gridSize, *m_inputBuffer);
1059 
1060 	// Create a buffer to store shader output copied from result image
1061 	m_outputBuffer = de::MovePtr<BufferWithMemory>(new BufferWithMemory(deviceInterface,
1062 													device,
1063 													allocator,
1064 													makeBufferCreateInfo(outBuffSizeInBytes,
1065 																		 VK_BUFFER_USAGE_TRANSFER_DST_BIT |
1066 																		 VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
1067 																		 (isTexelBuffer ? VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT : static_cast<VkBufferUsageFlagBits>(0u))),
1068 													MemoryRequirement::HostVisible));
1069 
1070 	if (!isTexelBuffer)
1071 	{
1072 		prepareResources(m_useTransfer);
1073 	}
1074 
1075 	prepareDescriptors(isTexelBuffer);
1076 
1077 	Move<VkDescriptorSet>	descriptorSetFillImage;
1078 	Move<VkShaderModule>	shaderModuleFillImage;
1079 	Move<VkPipelineLayout>	pipelineLayoutFillImage;
1080 	Move<VkPipeline>		pipelineFillImage;
1081 
1082 	Move<VkDescriptorSet>	descriptorSetReadImage;
1083 	Move<VkShaderModule>	shaderModuleReadImage;
1084 	Move<VkPipelineLayout>	pipelineLayoutReadImage;
1085 	Move<VkPipeline>		pipelineReadImage;
1086 
1087 	if (!m_useTransfer)
1088 	{
1089 		m_descriptorSetLayoutNoTransfer =
1090 			DescriptorSetLayoutBuilder()
1091 			.addSingleBinding((isTexelBuffer ? VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER : VK_DESCRIPTOR_TYPE_STORAGE_IMAGE), VK_SHADER_STAGE_COMPUTE_BIT)
1092 			.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
1093 			.build(deviceInterface, device);
1094 
1095 		m_descriptorPoolNoTransfer =
1096 			DescriptorPoolBuilder()
1097 			.addType((isTexelBuffer ? VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER : VK_DESCRIPTOR_TYPE_STORAGE_IMAGE), 2)
1098 			.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 2)
1099 			.build(deviceInterface, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 2u);
1100 
1101 		descriptorSetFillImage = makeDescriptorSet(deviceInterface,
1102 			device,
1103 			*m_descriptorPoolNoTransfer,
1104 			*m_descriptorSetLayoutNoTransfer);
1105 
1106 		descriptorSetReadImage = makeDescriptorSet(deviceInterface,
1107 			device,
1108 			*m_descriptorPoolNoTransfer,
1109 			*m_descriptorSetLayoutNoTransfer);
1110 
1111 		shaderModuleFillImage	= createShaderModule(deviceInterface, device, m_context.getBinaryCollection().get("fillShader"), 0);
1112 		pipelineLayoutFillImage	= makePipelineLayout(deviceInterface, device, *m_descriptorSetLayoutNoTransfer);
1113 		pipelineFillImage		= makeComputePipeline(deviceInterface, device, *pipelineLayoutFillImage, *shaderModuleFillImage);
1114 
1115 		if (m_readType == ShaderReadType::SPARSE)
1116 		{
1117 			shaderModuleReadImage = createShaderModule(deviceInterface, device, m_context.getBinaryCollection().get("readShaderResidency"), 0);
1118 		}
1119 		else
1120 		{
1121 			shaderModuleReadImage = createShaderModule(deviceInterface, device, m_context.getBinaryCollection().get("readShader"), 0);
1122 		}
1123 		pipelineLayoutReadImage = makePipelineLayout(deviceInterface, device, *m_descriptorSetLayoutNoTransfer);
1124 		pipelineReadImage		= makeComputePipeline(deviceInterface, device, *pipelineLayoutFillImage, *shaderModuleReadImage);
1125 	}
1126 
1127 	// Create pipeline
1128 	const Unique<VkShaderModule>	shaderModule(createShaderModule(deviceInterface, device, m_context.getBinaryCollection().get(m_name), 0));
1129 	const Unique<VkPipelineLayout>	pipelineLayout(makePipelineLayout(deviceInterface, device, *m_descriptorSetLayout));
1130 	const Unique<VkPipeline>		pipeline(makeComputePipeline(deviceInterface, device, *pipelineLayout, *shaderModule));
1131 
1132 	// Create command buffer
1133 	const Unique<VkCommandPool>		cmdPool(createCommandPool(deviceInterface, device, VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, queueFamilyIndex));
1134 	const Unique<VkCommandBuffer>	cmdBuffer(allocateCommandBuffer(deviceInterface, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
1135 
1136 	beginCommandBuffer(deviceInterface, *cmdBuffer);
1137 
1138 	if (!isTexelBuffer)
1139 	{
1140 		if (m_useTransfer)
1141 		{
1142 			const vector<VkBufferImageCopy>	bufferImageCopy(1, makeBufferImageCopy(makeExtent3D(getLayerSize(m_imageType, m_imageSize)), getNumLayers(m_imageType, m_imageSize)));
1143 			copyBufferToImage(deviceInterface,
1144 							  *cmdBuffer,
1145 							  *(*m_inputBuffer),
1146 							  imageSizeInBytes,
1147 							  bufferImageCopy,
1148 							  VK_IMAGE_ASPECT_COLOR_BIT,
1149 							  1,
1150 							  getNumLayers(m_imageType, m_imageSize), m_resultImage->get(), VK_IMAGE_LAYOUT_GENERAL, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT);
1151 		}
1152 		else
1153 		{
1154 			shaderFillImage(*cmdBuffer, *(*m_inputBuffer), *pipelineFillImage, *pipelineLayoutFillImage, *descriptorSetFillImage, imageSizeInBytes, gridSize);
1155 		}
1156 		commandsBeforeCompute(*cmdBuffer);
1157 	}
1158 
1159 	deviceInterface.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
1160 	deviceInterface.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &m_descriptorSet.get(), 0u, DE_NULL);
1161 
1162 	deviceInterface.cmdDispatch(*cmdBuffer, NUM_INVOCATIONS_PER_PIXEL * gridSize.x(), gridSize.y(), gridSize.z());
1163 
1164 	commandsAfterCompute(*cmdBuffer,
1165 						 *pipelineReadImage,
1166 						 *pipelineLayoutReadImage,
1167 						 *descriptorSetReadImage,
1168 						 outBuffSizeInBytes,
1169 						 m_useTransfer);
1170 
1171 	const VkBufferMemoryBarrier	outputBufferPreHostReadBarrier
1172 		= makeBufferMemoryBarrier(((m_useTransfer || isTexelBuffer) ? VK_ACCESS_TRANSFER_WRITE_BIT : VK_ACCESS_SHADER_WRITE_BIT),
1173 								  VK_ACCESS_HOST_READ_BIT,
1174 								  m_outputBuffer->get(),
1175 								  0ull,
1176 								  outBuffSizeInBytes);
1177 
1178 	deviceInterface.cmdPipelineBarrier(*cmdBuffer,
1179 									   ((m_useTransfer || isTexelBuffer) ? VK_PIPELINE_STAGE_TRANSFER_BIT : VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT),
1180 									   VK_PIPELINE_STAGE_HOST_BIT,
1181 									   DE_FALSE, 0u, DE_NULL,
1182 									   1u, &outputBufferPreHostReadBarrier, 0u, DE_NULL);
1183 
1184 	endCommandBuffer(deviceInterface, *cmdBuffer);
1185 
1186 	std::vector<VkPipelineStageFlags> waitStages(m_waitSemaphores.size(), VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT);
1187 	submitCommandsAndWait(deviceInterface, device, queue, *cmdBuffer, false, 1u,
1188 		static_cast<deUint32>(m_waitSemaphores.size()), de::dataOrNull(m_waitSemaphores), de::dataOrNull(waitStages));
1189 
1190 	Allocation& outputBufferAllocation = m_outputBuffer->getAllocation();
1191 
1192 	invalidateAlloc(deviceInterface, device, outputBufferAllocation);
1193 
1194 	if (verifyResult(outputBufferAllocation, (imageFormat == VK_FORMAT_R64_UINT || imageFormat == VK_FORMAT_R64_SINT)))
1195 		return tcu::TestStatus::pass("Comparison succeeded");
1196 	else
1197 		return tcu::TestStatus::fail("Comparison failed");
1198 }
1199 
shaderFillImage(const VkCommandBuffer cmdBuffer,const VkBuffer & buffer,const VkPipeline pipeline,const VkPipelineLayout pipelineLayout,const VkDescriptorSet descriptorSet,const VkDeviceSize & range,const tcu::UVec3 & gridSize)1200 void BinaryAtomicInstanceBase::shaderFillImage (const VkCommandBuffer	cmdBuffer,
1201 												const VkBuffer&			buffer,
1202 												const VkPipeline		pipeline,
1203 												const VkPipelineLayout	pipelineLayout,
1204 												const VkDescriptorSet	descriptorSet,
1205 												const VkDeviceSize&		range,
1206 												const tcu::UVec3&		gridSize)
1207 {
1208 	const VkDevice					device					= m_context.getDevice();
1209 	const DeviceInterface&			deviceInterface			= m_context.getDeviceInterface();
1210 	const VkDescriptorImageInfo		descResultImageInfo		= makeDescriptorImageInfo(DE_NULL, *m_resultImageView, VK_IMAGE_LAYOUT_GENERAL);
1211 	const VkDescriptorBufferInfo	descResultBufferInfo	= makeDescriptorBufferInfo(buffer, 0, range);
1212 	const VkImageSubresourceRange	subresourceRange		= makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, getNumLayers(m_imageType, m_imageSize));
1213 
1214 	DescriptorSetUpdateBuilder()
1215 		.writeSingle(descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &descResultImageInfo)
1216 		.writeSingle(descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &descResultBufferInfo)
1217 		.update(deviceInterface, device);
1218 
1219 	const VkImageMemoryBarrier imageBarrierPre = makeImageMemoryBarrier(0,
1220 																		VK_ACCESS_SHADER_WRITE_BIT,
1221 																		VK_IMAGE_LAYOUT_UNDEFINED,
1222 																		VK_IMAGE_LAYOUT_GENERAL,
1223 																		m_resultImage->get(),
1224 																		subresourceRange);
1225 
1226 	deviceInterface.cmdPipelineBarrier(	cmdBuffer,
1227 										VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
1228 										VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
1229 										(VkDependencyFlags)0,
1230 										0, (const VkMemoryBarrier*)DE_NULL,
1231 										0, (const VkBufferMemoryBarrier*)DE_NULL,
1232 										1, &imageBarrierPre);
1233 
1234 	deviceInterface.cmdBindPipeline(cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
1235 	deviceInterface.cmdBindDescriptorSets(cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipelineLayout, 0u, 1u, &descriptorSet, 0u, DE_NULL);
1236 
1237 	deviceInterface.cmdDispatch(cmdBuffer, gridSize.x(), gridSize.y(), gridSize.z());
1238 
1239 	const VkImageMemoryBarrier imageBarrierPost = makeImageMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT,
1240 																		 VK_ACCESS_SHADER_READ_BIT,
1241 																		 VK_IMAGE_LAYOUT_GENERAL,
1242 																		 VK_IMAGE_LAYOUT_GENERAL,
1243 																		 m_resultImage->get(),
1244 																		 subresourceRange);
1245 
1246 	deviceInterface.cmdPipelineBarrier(	cmdBuffer,
1247 										VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
1248 										VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
1249 										(VkDependencyFlags)0,
1250 										0, (const VkMemoryBarrier*)DE_NULL,
1251 										0, (const VkBufferMemoryBarrier*)DE_NULL,
1252 										1, &imageBarrierPost);
1253 }
1254 
createImageAndView(VkFormat imageFormat,const tcu::UVec3 & imageExent,bool useTransfer,de::MovePtr<Image> & imagePtr,Move<VkImageView> & imageViewPtr)1255 void BinaryAtomicInstanceBase::createImageAndView	(VkFormat						imageFormat,
1256 													 const tcu::UVec3&				imageExent,
1257 													 bool							useTransfer,
1258 													 de::MovePtr<Image>&			imagePtr,
1259 													 Move<VkImageView>&				imageViewPtr)
1260 {
1261 	const VkDevice			device			= m_context.getDevice();
1262 	const DeviceInterface&	deviceInterface	= m_context.getDeviceInterface();
1263 	Allocator&				allocator		= m_context.getDefaultAllocator();
1264 	const VkImageUsageFlags	usageFlags		= getUsageFlags(useTransfer);
1265 	VkImageCreateFlags		createFlags		= 0u;
1266 
1267 	if (m_imageType == IMAGE_TYPE_CUBE || m_imageType == IMAGE_TYPE_CUBE_ARRAY)
1268 		createFlags |= VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT;
1269 
1270 	const auto numLayers = getNumLayers(m_imageType, m_imageSize);
1271 
1272 	VkImageCreateInfo createInfo =
1273 	{
1274 		VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,					// VkStructureType			sType;
1275 		DE_NULL,												// const void*				pNext;
1276 		createFlags,											// VkImageCreateFlags		flags;
1277 		mapImageType(m_imageType),								// VkImageType				imageType;
1278 		imageFormat,											// VkFormat					format;
1279 		makeExtent3D(imageExent),								// VkExtent3D				extent;
1280 		1u,														// deUint32					mipLevels;
1281 		numLayers,												// deUint32					arrayLayers;
1282 		VK_SAMPLE_COUNT_1_BIT,									// VkSampleCountFlagBits	samples;
1283 		m_tiling,												// VkImageTiling			tiling;
1284 		usageFlags,												// VkImageUsageFlags		usage;
1285 		VK_SHARING_MODE_EXCLUSIVE,								// VkSharingMode			sharingMode;
1286 		0u,														// deUint32					queueFamilyIndexCount;
1287 		DE_NULL,												// const deUint32*			pQueueFamilyIndices;
1288 		VK_IMAGE_LAYOUT_UNDEFINED,								// VkImageLayout			initialLayout;
1289 	};
1290 
1291 #ifndef CTS_USES_VULKANSC
1292 	if (m_backingType == ImageBackingType::SPARSE)
1293 	{
1294 		const auto&		vki				= m_context.getInstanceInterface();
1295 		const auto		physicalDevice	= m_context.getPhysicalDevice();
1296 		const auto		sparseQueue		= m_context.getSparseQueue();
1297 		const auto		sparseQueueIdx	= m_context.getSparseQueueFamilyIndex();
1298 		const auto		universalQIdx	= m_context.getUniversalQueueFamilyIndex();
1299 		const deUint32	queueIndices[]	= { universalQIdx, sparseQueueIdx };
1300 
1301 		createInfo.flags |= (VK_IMAGE_CREATE_SPARSE_BINDING_BIT | VK_IMAGE_CREATE_SPARSE_RESIDENCY_BIT);
1302 
1303 		if (sparseQueueIdx != universalQIdx)
1304 		{
1305 			createInfo.sharingMode				= VK_SHARING_MODE_CONCURRENT;
1306 			createInfo.queueFamilyIndexCount	= static_cast<deUint32>(DE_LENGTH_OF_ARRAY(queueIndices));
1307 			createInfo.pQueueFamilyIndices		= queueIndices;
1308 		}
1309 
1310 		const auto sparseImage = new SparseImage(deviceInterface, device, physicalDevice, vki, createInfo, sparseQueue, allocator, m_format);
1311 		m_waitSemaphores.push_back(sparseImage->getSemaphore());
1312 		imagePtr = de::MovePtr<Image>(sparseImage);
1313 	}
1314 	else
1315 #endif // CTS_USES_VULKANSC
1316 		imagePtr = de::MovePtr<Image>(new Image(deviceInterface, device, allocator, createInfo, MemoryRequirement::Any));
1317 
1318 	const VkImageSubresourceRange subresourceRange = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, numLayers);
1319 
1320 	imageViewPtr = makeImageView(deviceInterface, device, imagePtr->get(), mapImageViewType(m_imageType), imageFormat, subresourceRange);
1321 }
1322 
createImageResources(const VkFormat & imageFormat,const bool useTransfer)1323 void BinaryAtomicInstanceBase::createImageResources (const VkFormat&	imageFormat,
1324 													 const bool			useTransfer)
1325 {
1326 	//Create the image that is going to store results of atomic operations
1327 	createImageAndView(imageFormat, getLayerSize(m_imageType, m_imageSize), useTransfer, m_resultImage, m_resultImageView);
1328 }
1329 
1330 class BinaryAtomicEndResultInstance : public BinaryAtomicInstanceBase
1331 {
1332 public:
1333 
BinaryAtomicEndResultInstance(Context & context,const string & name,const ImageType imageType,const tcu::UVec3 & imageSize,const TextureFormat & format,const VkImageTiling tiling,const AtomicOperation operation,const bool useTransfer,const ShaderReadType shaderReadType,const ImageBackingType backingType)1334 						BinaryAtomicEndResultInstance  (Context&					context,
1335 														const string&				name,
1336 														const ImageType				imageType,
1337 														const tcu::UVec3&			imageSize,
1338 														const TextureFormat&		format,
1339 														const VkImageTiling			tiling,
1340 														const AtomicOperation		operation,
1341 														const bool					useTransfer,
1342 														const ShaderReadType		shaderReadType,
1343 														const ImageBackingType		backingType)
1344 							: BinaryAtomicInstanceBase(context, name, imageType, imageSize, format, tiling, operation, useTransfer, shaderReadType, backingType) {}
1345 
1346 	virtual deUint32	getOutputBufferSize			   (void) const;
1347 
prepareResources(const bool useTransfer)1348 	virtual void		prepareResources			   (const bool					useTransfer) { DE_UNREF(useTransfer); }
1349 	virtual void		prepareDescriptors			   (const bool					isTexelBuffer);
1350 
commandsBeforeCompute(const VkCommandBuffer) const1351 	virtual void		commandsBeforeCompute		   (const VkCommandBuffer) const {}
1352 	virtual void		commandsAfterCompute		   (const VkCommandBuffer		cmdBuffer,
1353 														const VkPipeline			pipeline,
1354 														const VkPipelineLayout		pipelineLayout,
1355 														const VkDescriptorSet		descriptorSet,
1356 														const VkDeviceSize&			range,
1357 														const bool					useTransfer);
1358 
1359 	virtual bool		verifyResult				   (Allocation&					outputBufferAllocation,
1360 														const bool					is64Bit) const;
1361 
1362 protected:
1363 
1364 	template <typename T>
1365 	bool				isValueCorrect				   (const T						resultValue,
1366 														deInt32						x,
1367 														deInt32						y,
1368 														deInt32						z,
1369 														const UVec3&				gridSize,
1370 														const IVec3					extendedGridSize) const;
1371 };
1372 
getOutputBufferSize(void) const1373 deUint32 BinaryAtomicEndResultInstance::getOutputBufferSize (void) const
1374 {
1375 	return tcu::getPixelSize(m_format) * getNumPixels(m_imageType, m_imageSize);
1376 }
1377 
prepareDescriptors(const bool isTexelBuffer)1378 void BinaryAtomicEndResultInstance::prepareDescriptors (const bool	isTexelBuffer)
1379 {
1380 	const VkDescriptorType	descriptorType	= isTexelBuffer ?
1381 											VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER :
1382 											VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
1383 	const VkDevice			device			= m_context.getDevice();
1384 	const DeviceInterface&	deviceInterface = m_context.getDeviceInterface();
1385 
1386 	m_descriptorSetLayout =
1387 		DescriptorSetLayoutBuilder()
1388 		.addSingleBinding(descriptorType, VK_SHADER_STAGE_COMPUTE_BIT)
1389 		.build(deviceInterface, device);
1390 
1391 	m_descriptorPool =
1392 		DescriptorPoolBuilder()
1393 		.addType(descriptorType)
1394 		.build(deviceInterface, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
1395 
1396 	m_descriptorSet = makeDescriptorSet(deviceInterface, device, *m_descriptorPool, *m_descriptorSetLayout);
1397 
1398 	if (isTexelBuffer)
1399 	{
1400 		m_descResultBufferView = makeBufferView(deviceInterface, device, *(*m_inputBuffer), mapTextureFormat(m_format), 0, VK_WHOLE_SIZE);
1401 
1402 		DescriptorSetUpdateBuilder()
1403 			.writeSingle(*m_descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), descriptorType, &(m_descResultBufferView.get()))
1404 			.update(deviceInterface, device);
1405 	}
1406 	else
1407 	{
1408 		const VkDescriptorImageInfo	descResultImageInfo = makeDescriptorImageInfo(DE_NULL, *m_resultImageView, VK_IMAGE_LAYOUT_GENERAL);
1409 
1410 		DescriptorSetUpdateBuilder()
1411 			.writeSingle(*m_descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), descriptorType, &descResultImageInfo)
1412 			.update(deviceInterface, device);
1413 	}
1414 }
1415 
commandsAfterCompute(const VkCommandBuffer cmdBuffer,const VkPipeline pipeline,const VkPipelineLayout pipelineLayout,const VkDescriptorSet descriptorSet,const VkDeviceSize & range,const bool useTransfer)1416 void BinaryAtomicEndResultInstance::commandsAfterCompute (const VkCommandBuffer		cmdBuffer,
1417 														  const VkPipeline			pipeline,
1418 														  const VkPipelineLayout	pipelineLayout,
1419 														  const VkDescriptorSet		descriptorSet,
1420 														  const VkDeviceSize&		range,
1421 														  const bool				useTransfer)
1422 {
1423 	const DeviceInterface&			deviceInterface		= m_context.getDeviceInterface();
1424 	const VkImageSubresourceRange	subresourceRange	= makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, getNumLayers(m_imageType, m_imageSize));
1425 	const UVec3						layerSize			= getLayerSize(m_imageType, m_imageSize);
1426 
1427 	if (m_imageType == IMAGE_TYPE_BUFFER)
1428 	{
1429 		m_outputBuffer = m_inputBuffer;
1430 	}
1431 	else if (useTransfer)
1432 	{
1433 		const VkImageMemoryBarrier	resultImagePostDispatchBarrier =
1434 			makeImageMemoryBarrier(	VK_ACCESS_SHADER_WRITE_BIT,
1435 									VK_ACCESS_TRANSFER_READ_BIT,
1436 									VK_IMAGE_LAYOUT_GENERAL,
1437 									VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
1438 									m_resultImage->get(),
1439 									subresourceRange);
1440 
1441 		deviceInterface.cmdPipelineBarrier(	cmdBuffer,
1442 											VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
1443 											VK_PIPELINE_STAGE_TRANSFER_BIT,
1444 											DE_FALSE, 0u, DE_NULL, 0u, DE_NULL,
1445 											1u, &resultImagePostDispatchBarrier);
1446 
1447 		const VkBufferImageCopy		bufferImageCopyParams = makeBufferImageCopy(makeExtent3D(layerSize), getNumLayers(m_imageType, m_imageSize));
1448 
1449 		deviceInterface.cmdCopyImageToBuffer(cmdBuffer, m_resultImage->get(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, m_outputBuffer->get(), 1u, &bufferImageCopyParams);
1450 	}
1451 	else
1452 	{
1453 		const VkDevice					device					= m_context.getDevice();
1454 		const VkDescriptorImageInfo		descResultImageInfo		= makeDescriptorImageInfo(DE_NULL, *m_resultImageView, VK_IMAGE_LAYOUT_GENERAL);
1455 		const VkDescriptorBufferInfo	descResultBufferInfo	= makeDescriptorBufferInfo(m_outputBuffer->get(), 0, range);
1456 
1457 		DescriptorSetUpdateBuilder()
1458 			.writeSingle(descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &descResultImageInfo)
1459 			.writeSingle(descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &descResultBufferInfo)
1460 			.update(deviceInterface, device);
1461 
1462 		const VkImageMemoryBarrier	resultImagePostDispatchBarrier =
1463 			makeImageMemoryBarrier(	VK_ACCESS_SHADER_WRITE_BIT,
1464 									VK_ACCESS_SHADER_READ_BIT,
1465 									VK_IMAGE_LAYOUT_GENERAL,
1466 									VK_IMAGE_LAYOUT_GENERAL,
1467 									m_resultImage->get(),
1468 									subresourceRange);
1469 
1470 		deviceInterface.cmdPipelineBarrier(	cmdBuffer,
1471 											VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
1472 											VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
1473 											DE_FALSE, 0u, DE_NULL, 0u, DE_NULL,
1474 											1u, &resultImagePostDispatchBarrier);
1475 
1476 		deviceInterface.cmdBindPipeline(cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
1477 		deviceInterface.cmdBindDescriptorSets(cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipelineLayout, 0u, 1u, &descriptorSet, 0u, DE_NULL);
1478 
1479 		switch (m_imageType)
1480 		{
1481 			case IMAGE_TYPE_1D_ARRAY:
1482 				deviceInterface.cmdDispatch(cmdBuffer, layerSize.x(), subresourceRange.layerCount, layerSize.z());
1483 				break;
1484 			case IMAGE_TYPE_2D_ARRAY:
1485 			case IMAGE_TYPE_CUBE:
1486 			case IMAGE_TYPE_CUBE_ARRAY:
1487 				deviceInterface.cmdDispatch(cmdBuffer, layerSize.x(), layerSize.y(), subresourceRange.layerCount);
1488 				break;
1489 			default:
1490 				deviceInterface.cmdDispatch(cmdBuffer, layerSize.x(), layerSize.y(), layerSize.z());
1491 				break;
1492 		}
1493 	}
1494 }
1495 
verifyResult(Allocation & outputBufferAllocation,const bool is64Bit) const1496 bool BinaryAtomicEndResultInstance::verifyResult (Allocation&	outputBufferAllocation,
1497 												  const bool	is64Bit) const
1498 {
1499 	const UVec3	gridSize			= getShaderGridSize(m_imageType, m_imageSize);
1500 	const IVec3 extendedGridSize	= IVec3(NUM_INVOCATIONS_PER_PIXEL*gridSize.x(), gridSize.y(), gridSize.z());
1501 
1502 	tcu::ConstPixelBufferAccess resultBuffer(m_format, gridSize.x(), gridSize.y(), gridSize.z(), outputBufferAllocation.getHostPtr());
1503 
1504 	for (deInt32 z = 0; z < resultBuffer.getDepth();  z++)
1505 	for (deInt32 y = 0; y < resultBuffer.getHeight(); y++)
1506 	for (deInt32 x = 0; x < resultBuffer.getWidth();  x++)
1507 	{
1508 		const void* resultValue = resultBuffer.getPixelPtr(x, y, z);
1509 		deInt32 floatToIntValue = 0;
1510 		bool isFloatValue = false;
1511 		if (isFloatFormat(mapTextureFormat(m_format)))
1512 		{
1513 			isFloatValue = true;
1514 			floatToIntValue = static_cast<deInt32>(*((float*)resultValue));
1515 		}
1516 
1517 		if (isOrderIndependentAtomicOperation(m_operation))
1518 		{
1519 			if (isUintFormat(mapTextureFormat(m_format)))
1520 			{
1521 				if(is64Bit)
1522 				{
1523 					if (!isValueCorrect<deUint64>(*((deUint64*)resultValue), x, y, z, gridSize, extendedGridSize))
1524 						return false;
1525 				}
1526 				else
1527 				{
1528 					if (!isValueCorrect<deUint32>(*((deUint32*)resultValue), x, y, z, gridSize, extendedGridSize))
1529 						return false;
1530 				}
1531 			}
1532 			else if (isIntFormat(mapTextureFormat(m_format)))
1533 			{
1534 				if (is64Bit)
1535 				{
1536 					if (!isValueCorrect<deInt64>(*((deInt64*)resultValue), x, y, z, gridSize, extendedGridSize))
1537 						return false;
1538 				}
1539 				else
1540 				{
1541 					if (!isValueCorrect<deInt32>(*((deInt32*)resultValue), x, y, z, gridSize, extendedGridSize))
1542 						return false;
1543 				}
1544 			}
1545 			else
1546 			{
1547 				// 32-bit floating point
1548 				if (!isValueCorrect<deInt32>(floatToIntValue, x, y, z, gridSize, extendedGridSize))
1549 					return false;
1550 			}
1551 		}
1552 		else if (m_operation == ATOMIC_OPERATION_EXCHANGE)
1553 		{
1554 			// Check if the end result equals one of the atomic args.
1555 			bool matchFound = false;
1556 
1557 			for (deInt32 i = 0; i < static_cast<deInt32>(NUM_INVOCATIONS_PER_PIXEL) && !matchFound; i++)
1558 			{
1559 				const IVec3 gid(x + i*gridSize.x(), y, z);
1560 				matchFound = is64Bit ?
1561 					(*((deInt64*)resultValue) == getAtomicFuncArgument<deInt64>(m_operation, gid, extendedGridSize)) :
1562 					isFloatValue ?
1563 					floatToIntValue == getAtomicFuncArgument<deInt32>(m_operation, gid, extendedGridSize) :
1564 					(*((deInt32*)resultValue) == getAtomicFuncArgument<deInt32>(m_operation, gid, extendedGridSize));
1565 
1566 			}
1567 
1568 			if (!matchFound)
1569 				return false;
1570 		}
1571 		else if (m_operation == ATOMIC_OPERATION_COMPARE_EXCHANGE)
1572 		{
1573 			// Check if the end result equals one of the atomic args.
1574 			bool matchFound = false;
1575 
1576 			for (deInt32 i = 0; i < static_cast<deInt32>(NUM_INVOCATIONS_PER_PIXEL) && !matchFound; i++)
1577 			{
1578 				const IVec3 gid(x + i*gridSize.x(), y, z);
1579 				matchFound = is64Bit ?
1580 					(*((deInt64*)resultValue) == getAtomicFuncArgument<deInt64>(m_operation, gid, extendedGridSize)) :
1581 					isFloatValue ?
1582 					floatToIntValue == getAtomicFuncArgument<deInt32>(m_operation, gid, extendedGridSize) :
1583 					(*((deInt32*)resultValue) == getAtomicFuncArgument<deInt32>(m_operation, gid, extendedGridSize));
1584 			}
1585 
1586 			if (!matchFound)
1587 				return false;
1588 		}
1589 		else
1590 			DE_ASSERT(false);
1591 	}
1592 	return true;
1593 }
1594 
1595 template <typename T>
isValueCorrect(const T resultValue,deInt32 x,deInt32 y,deInt32 z,const UVec3 & gridSize,const IVec3 extendedGridSize) const1596 bool BinaryAtomicEndResultInstance::isValueCorrect(const T resultValue, deInt32 x, deInt32 y, deInt32 z, const UVec3& gridSize, const IVec3 extendedGridSize) const
1597 {
1598 	T reference = getOperationInitialValue<T>(m_operation);
1599 	for (deInt32 i = 0; i < static_cast<deInt32>(NUM_INVOCATIONS_PER_PIXEL); i++)
1600 	{
1601 		const IVec3 gid(x + i*gridSize.x(), y, z);
1602 		T			arg = getAtomicFuncArgument<T>(m_operation, gid, extendedGridSize);
1603 		reference = computeBinaryAtomicOperationResult(m_operation, reference, arg);
1604 	}
1605 	return (resultValue == reference);
1606 }
1607 
createInstance(Context & context) const1608 TestInstance* BinaryAtomicEndResultCase::createInstance (Context& context) const
1609 {
1610 	return new BinaryAtomicEndResultInstance(context, m_name, m_imageType, m_imageSize, m_format, m_tiling, m_operation, m_useTransfer, m_readType, m_backingType);
1611 }
1612 
1613 class BinaryAtomicIntermValuesInstance : public BinaryAtomicInstanceBase
1614 {
1615 public:
1616 
BinaryAtomicIntermValuesInstance(Context & context,const string & name,const ImageType imageType,const tcu::UVec3 & imageSize,const TextureFormat & format,const VkImageTiling tiling,const AtomicOperation operation,const bool useTransfer,const ShaderReadType shaderReadType,const ImageBackingType backingType)1617 						BinaryAtomicIntermValuesInstance   (Context&				context,
1618 															const string&			name,
1619 															const ImageType			imageType,
1620 															const tcu::UVec3&		imageSize,
1621 															const TextureFormat&	format,
1622 															const VkImageTiling		tiling,
1623 															const AtomicOperation	operation,
1624 															const bool				useTransfer,
1625 															const ShaderReadType	shaderReadType,
1626 															const ImageBackingType	backingType)
1627 							: BinaryAtomicInstanceBase(context, name, imageType, imageSize, format, tiling, operation, useTransfer, shaderReadType, backingType) {}
1628 
1629 	virtual deUint32	getOutputBufferSize				   (void) const;
1630 
1631 	virtual void		prepareResources				   (const bool				useTransfer);
1632 	virtual void		prepareDescriptors				   (const bool				isTexelBuffer);
1633 
1634 	virtual void		commandsBeforeCompute			   (const VkCommandBuffer	cmdBuffer) const;
1635 	virtual void		commandsAfterCompute			   (const VkCommandBuffer	cmdBuffer,
1636 															const VkPipeline		pipeline,
1637 															const VkPipelineLayout	pipelineLayout,
1638 															const VkDescriptorSet	descriptorSet,
1639 															const VkDeviceSize&		range,
1640 															const bool				useTransfer);
1641 
1642 	virtual bool		verifyResult					   (Allocation&				outputBufferAllocation,
1643 															const bool				is64Bit) const;
1644 
1645 protected:
1646 
1647 	template <typename T>
1648 	bool				areValuesCorrect				   (tcu::ConstPixelBufferAccess& resultBuffer,
1649 															const bool isFloatingPoint,
1650 															deInt32 x,
1651 															deInt32 y,
1652 															deInt32 z,
1653 															const UVec3& gridSize,
1654 															const IVec3 extendedGridSize) const;
1655 
1656 	template <typename T>
1657 	bool				verifyRecursive					   (const deInt32			index,
1658 															const T					valueSoFar,
1659 															bool					argsUsed[NUM_INVOCATIONS_PER_PIXEL],
1660 															const T					atomicArgs[NUM_INVOCATIONS_PER_PIXEL],
1661 															const T					resultValues[NUM_INVOCATIONS_PER_PIXEL]) const;
1662 	de::MovePtr<Image>	m_intermResultsImage;
1663 	Move<VkImageView>	m_intermResultsImageView;
1664 };
1665 
getOutputBufferSize(void) const1666 deUint32 BinaryAtomicIntermValuesInstance::getOutputBufferSize (void) const
1667 {
1668 	return NUM_INVOCATIONS_PER_PIXEL * tcu::getPixelSize(m_format) * getNumPixels(m_imageType, m_imageSize);
1669 }
1670 
prepareResources(const bool useTransfer)1671 void BinaryAtomicIntermValuesInstance::prepareResources (const bool useTransfer)
1672 {
1673 	const UVec3 layerSize			= getLayerSize(m_imageType, m_imageSize);
1674 	const bool  isCubeBasedImage	= (m_imageType == IMAGE_TYPE_CUBE || m_imageType == IMAGE_TYPE_CUBE_ARRAY);
1675 	const UVec3 extendedLayerSize	= isCubeBasedImage	? UVec3(NUM_INVOCATIONS_PER_PIXEL * layerSize.x(), NUM_INVOCATIONS_PER_PIXEL * layerSize.y(), layerSize.z())
1676 														: UVec3(NUM_INVOCATIONS_PER_PIXEL * layerSize.x(), layerSize.y(), layerSize.z());
1677 
1678 	createImageAndView(mapTextureFormat(m_format), extendedLayerSize, useTransfer, m_intermResultsImage, m_intermResultsImageView);
1679 }
1680 
prepareDescriptors(const bool isTexelBuffer)1681 void BinaryAtomicIntermValuesInstance::prepareDescriptors (const bool	isTexelBuffer)
1682 {
1683 	const VkDescriptorType	descriptorType	= isTexelBuffer ?
1684 											VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER :
1685 											VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
1686 
1687 	const VkDevice			device			= m_context.getDevice();
1688 	const DeviceInterface&	deviceInterface = m_context.getDeviceInterface();
1689 
1690 	m_descriptorSetLayout =
1691 		DescriptorSetLayoutBuilder()
1692 		.addSingleBinding(descriptorType, VK_SHADER_STAGE_COMPUTE_BIT)
1693 		.addSingleBinding(descriptorType, VK_SHADER_STAGE_COMPUTE_BIT)
1694 		.build(deviceInterface, device);
1695 
1696 	m_descriptorPool =
1697 		DescriptorPoolBuilder()
1698 		.addType(descriptorType, 2u)
1699 		.build(deviceInterface, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
1700 
1701 	m_descriptorSet = makeDescriptorSet(deviceInterface, device, *m_descriptorPool, *m_descriptorSetLayout);
1702 
1703 	if (isTexelBuffer)
1704 	{
1705 		m_descResultBufferView			= makeBufferView(deviceInterface, device, *(*m_inputBuffer), mapTextureFormat(m_format), 0, VK_WHOLE_SIZE);
1706 		m_descIntermResultsBufferView	= makeBufferView(deviceInterface, device, *(*m_outputBuffer), mapTextureFormat(m_format), 0, VK_WHOLE_SIZE);
1707 
1708 		DescriptorSetUpdateBuilder()
1709 			.writeSingle(*m_descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), descriptorType, &(m_descResultBufferView.get()))
1710 			.writeSingle(*m_descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), descriptorType, &(m_descIntermResultsBufferView.get()))
1711 			.update(deviceInterface, device);
1712 	}
1713 	else
1714 	{
1715 		const VkDescriptorImageInfo	descResultImageInfo			= makeDescriptorImageInfo(DE_NULL, *m_resultImageView, VK_IMAGE_LAYOUT_GENERAL);
1716 		const VkDescriptorImageInfo	descIntermResultsImageInfo	= makeDescriptorImageInfo(DE_NULL, *m_intermResultsImageView, VK_IMAGE_LAYOUT_GENERAL);
1717 
1718 		DescriptorSetUpdateBuilder()
1719 			.writeSingle(*m_descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), descriptorType, &descResultImageInfo)
1720 			.writeSingle(*m_descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), descriptorType, &descIntermResultsImageInfo)
1721 			.update(deviceInterface, device);
1722 	}
1723 }
1724 
commandsBeforeCompute(const VkCommandBuffer cmdBuffer) const1725 void BinaryAtomicIntermValuesInstance::commandsBeforeCompute (const VkCommandBuffer cmdBuffer) const
1726 {
1727 	const DeviceInterface&			deviceInterface		= m_context.getDeviceInterface();
1728 	const VkImageSubresourceRange	subresourceRange	= makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, getNumLayers(m_imageType, m_imageSize));
1729 
1730 	const VkImageMemoryBarrier	imagePreDispatchBarrier =
1731 		makeImageMemoryBarrier(	0u,
1732 								VK_ACCESS_SHADER_WRITE_BIT,
1733 								VK_IMAGE_LAYOUT_UNDEFINED,
1734 								VK_IMAGE_LAYOUT_GENERAL,
1735 								m_intermResultsImage->get(),
1736 								subresourceRange);
1737 
1738 	deviceInterface.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, DE_FALSE, 0u, DE_NULL, 0u, DE_NULL, 1u, &imagePreDispatchBarrier);
1739 }
1740 
commandsAfterCompute(const VkCommandBuffer cmdBuffer,const VkPipeline pipeline,const VkPipelineLayout pipelineLayout,const VkDescriptorSet descriptorSet,const VkDeviceSize & range,const bool useTransfer)1741 void BinaryAtomicIntermValuesInstance::commandsAfterCompute (const VkCommandBuffer		cmdBuffer,
1742 															 const VkPipeline			pipeline,
1743 															 const VkPipelineLayout		pipelineLayout,
1744 															 const VkDescriptorSet		descriptorSet,
1745 															 const VkDeviceSize&		range,
1746 															 const bool					useTransfer)
1747 {
1748 	// nothing is needed for texel image buffer
1749 	if (m_imageType == IMAGE_TYPE_BUFFER)
1750 		return;
1751 
1752 	const DeviceInterface&			deviceInterface		= m_context.getDeviceInterface();
1753 	const VkImageSubresourceRange	subresourceRange	= makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, getNumLayers(m_imageType, m_imageSize));
1754 	const UVec3						layerSize			= getLayerSize(m_imageType, m_imageSize);
1755 
1756 	if (useTransfer)
1757 	{
1758 		const VkImageMemoryBarrier	imagePostDispatchBarrier =
1759 			makeImageMemoryBarrier(	VK_ACCESS_SHADER_WRITE_BIT,
1760 									VK_ACCESS_TRANSFER_READ_BIT,
1761 									VK_IMAGE_LAYOUT_GENERAL,
1762 									VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
1763 									m_intermResultsImage->get(),
1764 									subresourceRange);
1765 
1766 		deviceInterface.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, DE_FALSE, 0u, DE_NULL, 0u, DE_NULL, 1u, &imagePostDispatchBarrier);
1767 
1768 		const UVec3					extendedLayerSize		= UVec3(NUM_INVOCATIONS_PER_PIXEL * layerSize.x(), layerSize.y(), layerSize.z());
1769 		const VkBufferImageCopy		bufferImageCopyParams	= makeBufferImageCopy(makeExtent3D(extendedLayerSize), getNumLayers(m_imageType, m_imageSize));
1770 
1771 		deviceInterface.cmdCopyImageToBuffer(cmdBuffer, m_intermResultsImage->get(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, m_outputBuffer->get(), 1u, &bufferImageCopyParams);
1772 	}
1773 	else
1774 	{
1775 		const VkDevice					device					= m_context.getDevice();
1776 		const VkDescriptorImageInfo		descResultImageInfo		= makeDescriptorImageInfo(DE_NULL, *m_intermResultsImageView, VK_IMAGE_LAYOUT_GENERAL);
1777 		const VkDescriptorBufferInfo	descResultBufferInfo	= makeDescriptorBufferInfo(m_outputBuffer->get(), 0, range);
1778 
1779 		DescriptorSetUpdateBuilder()
1780 			.writeSingle(descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &descResultImageInfo)
1781 			.writeSingle(descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &descResultBufferInfo)
1782 			.update(deviceInterface, device);
1783 
1784 		const VkImageMemoryBarrier	resultImagePostDispatchBarrier =
1785 		makeImageMemoryBarrier(	VK_ACCESS_SHADER_WRITE_BIT,
1786 								VK_ACCESS_SHADER_READ_BIT,
1787 								VK_IMAGE_LAYOUT_GENERAL,
1788 								VK_IMAGE_LAYOUT_GENERAL,
1789 								m_intermResultsImage->get(),
1790 								subresourceRange);
1791 
1792 		deviceInterface.cmdPipelineBarrier(	cmdBuffer,
1793 									VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
1794 									VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
1795 									DE_FALSE, 0u, DE_NULL, 0u, DE_NULL,
1796 									1u, &resultImagePostDispatchBarrier);
1797 
1798 		deviceInterface.cmdBindPipeline(cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
1799 		deviceInterface.cmdBindDescriptorSets(cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipelineLayout, 0u, 1u, &descriptorSet, 0u, DE_NULL);
1800 
1801 		switch (m_imageType)
1802 		{
1803 			case IMAGE_TYPE_1D_ARRAY:
1804 				deviceInterface.cmdDispatch(cmdBuffer, NUM_INVOCATIONS_PER_PIXEL * layerSize.x(), subresourceRange.layerCount, layerSize.z());
1805 				break;
1806 			case IMAGE_TYPE_2D_ARRAY:
1807 			case IMAGE_TYPE_CUBE:
1808 			case IMAGE_TYPE_CUBE_ARRAY:
1809 				deviceInterface.cmdDispatch(cmdBuffer, NUM_INVOCATIONS_PER_PIXEL * layerSize.x(), layerSize.y(), subresourceRange.layerCount);
1810 				break;
1811 			default:
1812 				deviceInterface.cmdDispatch(cmdBuffer, NUM_INVOCATIONS_PER_PIXEL * layerSize.x(), layerSize.y(), layerSize.z());
1813 				break;
1814 		}
1815 	}
1816 }
1817 
verifyResult(Allocation & outputBufferAllocation,const bool is64Bit) const1818 bool BinaryAtomicIntermValuesInstance::verifyResult (Allocation&	outputBufferAllocation,
1819 													 const bool		is64Bit) const
1820 {
1821 	const UVec3	gridSize		 = getShaderGridSize(m_imageType, m_imageSize);
1822 	const IVec3 extendedGridSize = IVec3(NUM_INVOCATIONS_PER_PIXEL*gridSize.x(), gridSize.y(), gridSize.z());
1823 
1824 	tcu::ConstPixelBufferAccess resultBuffer(m_format, extendedGridSize.x(), extendedGridSize.y(), extendedGridSize.z(), outputBufferAllocation.getHostPtr());
1825 
1826 	for (deInt32 z = 0; z < resultBuffer.getDepth(); z++)
1827 	for (deInt32 y = 0; y < resultBuffer.getHeight(); y++)
1828 	for (deUint32 x = 0; x < gridSize.x(); x++)
1829 	{
1830 		if (isUintFormat(mapTextureFormat(m_format)))
1831 		{
1832 			if (is64Bit)
1833 			{
1834 				if (!areValuesCorrect<deUint64>(resultBuffer, false, x, y, z, gridSize, extendedGridSize))
1835 					return false;
1836 			}
1837 			else
1838 			{
1839 				if (!areValuesCorrect<deUint32>(resultBuffer, false, x, y, z, gridSize, extendedGridSize))
1840 					return false;
1841 			}
1842 		}
1843 		else if (isIntFormat(mapTextureFormat(m_format)))
1844 		{
1845 			if (is64Bit)
1846 			{
1847 				if (!areValuesCorrect<deInt64>(resultBuffer, false, x, y, z, gridSize, extendedGridSize))
1848 					return false;
1849 			}
1850 			else
1851 			{
1852 				if (!areValuesCorrect<deInt32>(resultBuffer, false, x, y, z, gridSize, extendedGridSize))
1853 					return false;
1854 			}
1855 		}
1856 		else
1857 		{
1858 			// 32-bit floating point
1859 			if (!areValuesCorrect<deInt32>(resultBuffer, true, x, y, z, gridSize, extendedGridSize))
1860 				return false;
1861 		}
1862 	}
1863 
1864 	return true;
1865 }
1866 
1867 template <typename T>
areValuesCorrect(tcu::ConstPixelBufferAccess & resultBuffer,const bool isFloatingPoint,deInt32 x,deInt32 y,deInt32 z,const UVec3 & gridSize,const IVec3 extendedGridSize) const1868 bool BinaryAtomicIntermValuesInstance::areValuesCorrect(tcu::ConstPixelBufferAccess& resultBuffer, const bool isFloatingPoint, deInt32 x, deInt32 y, deInt32 z, const UVec3& gridSize, const IVec3 extendedGridSize) const
1869 {
1870 	T		resultValues[NUM_INVOCATIONS_PER_PIXEL];
1871 	T		atomicArgs[NUM_INVOCATIONS_PER_PIXEL];
1872 	bool	argsUsed[NUM_INVOCATIONS_PER_PIXEL];
1873 
1874 	for (deInt32 i = 0; i < static_cast<deInt32>(NUM_INVOCATIONS_PER_PIXEL); i++)
1875 	{
1876 		IVec3 gid(x + i*gridSize.x(), y, z);
1877 		T data = *((T*)resultBuffer.getPixelPtr(gid.x(), gid.y(), gid.z()));
1878 		if (isFloatingPoint)
1879 		{
1880 			float fData;
1881 			deMemcpy(&fData, &data, sizeof(fData));
1882 			data = static_cast<T>(fData);
1883 		}
1884 		resultValues[i] = data;
1885 		atomicArgs[i]	= getAtomicFuncArgument<T>(m_operation, gid, extendedGridSize);
1886 		argsUsed[i]		= false;
1887 	}
1888 
1889 	// Verify that the return values form a valid sequence.
1890 	return verifyRecursive(0, getOperationInitialValue<T>(m_operation), argsUsed, atomicArgs, resultValues);
1891 }
1892 
1893 template <typename T>
verifyRecursive(const deInt32 index,const T valueSoFar,bool argsUsed[NUM_INVOCATIONS_PER_PIXEL],const T atomicArgs[NUM_INVOCATIONS_PER_PIXEL],const T resultValues[NUM_INVOCATIONS_PER_PIXEL]) const1894 bool BinaryAtomicIntermValuesInstance::verifyRecursive (const deInt32	index,
1895 														const T			valueSoFar,
1896 														bool			argsUsed[NUM_INVOCATIONS_PER_PIXEL],
1897 														const T			atomicArgs[NUM_INVOCATIONS_PER_PIXEL],
1898 														const T			resultValues[NUM_INVOCATIONS_PER_PIXEL]) const
1899 {
1900 	if (index >= static_cast<deInt32>(NUM_INVOCATIONS_PER_PIXEL))
1901 		return true;
1902 
1903 	for (deInt32 i = 0; i < static_cast<deInt32>(NUM_INVOCATIONS_PER_PIXEL); i++)
1904 	{
1905 		if (!argsUsed[i] && resultValues[i] == valueSoFar)
1906 		{
1907 			argsUsed[i] = true;
1908 
1909 			if (verifyRecursive(index + 1, computeBinaryAtomicOperationResult(m_operation, valueSoFar, atomicArgs[i]), argsUsed, atomicArgs, resultValues))
1910 			{
1911 				return true;
1912 			}
1913 
1914 			argsUsed[i] = false;
1915 		}
1916 	}
1917 
1918 	return false;
1919 }
1920 
createInstance(Context & context) const1921 TestInstance* BinaryAtomicIntermValuesCase::createInstance (Context& context) const
1922 {
1923 	return new BinaryAtomicIntermValuesInstance(context, m_name, m_imageType, m_imageSize, m_format, m_tiling, m_operation, m_useTransfer, m_readType, m_backingType);
1924 }
1925 
1926 } // anonymous ns
1927 
createImageAtomicOperationTests(tcu::TestContext & testCtx)1928 tcu::TestCaseGroup* createImageAtomicOperationTests (tcu::TestContext& testCtx)
1929 {
1930 	de::MovePtr<tcu::TestCaseGroup> imageAtomicOperationsTests(new tcu::TestCaseGroup(testCtx, "atomic_operations"));
1931 
1932 	struct ImageParams
1933 	{
1934 		ImageParams(const ImageType imageType, const tcu::UVec3& imageSize)
1935 			: m_imageType	(imageType)
1936 			, m_imageSize	(imageSize)
1937 		{
1938 		}
1939 		const ImageType		m_imageType;
1940 		const tcu::UVec3	m_imageSize;
1941 	};
1942 
1943 	const ImageParams imageParamsArray[] =
1944 	{
1945 		ImageParams(IMAGE_TYPE_1D,			tcu::UVec3(64u, 1u, 1u)),
1946 		ImageParams(IMAGE_TYPE_1D_ARRAY,	tcu::UVec3(64u, 1u, 8u)),
1947 		ImageParams(IMAGE_TYPE_2D,			tcu::UVec3(64u, 64u, 1u)),
1948 		ImageParams(IMAGE_TYPE_2D_ARRAY,	tcu::UVec3(64u, 64u, 8u)),
1949 		ImageParams(IMAGE_TYPE_3D,			tcu::UVec3(48u, 48u, 8u)),
1950 		ImageParams(IMAGE_TYPE_CUBE,		tcu::UVec3(64u, 64u, 1u)),
1951 		ImageParams(IMAGE_TYPE_CUBE_ARRAY,	tcu::UVec3(64u, 64u, 2u)),
1952 		ImageParams(IMAGE_TYPE_BUFFER,		tcu::UVec3(64u, 1u, 1u))
1953 	};
1954 
1955 	const tcu::TextureFormat formats[] =
1956 	{
1957 		tcu::TextureFormat(tcu::TextureFormat::R, tcu::TextureFormat::UNSIGNED_INT32),
1958 		tcu::TextureFormat(tcu::TextureFormat::R, tcu::TextureFormat::SIGNED_INT32),
1959 		tcu::TextureFormat(tcu::TextureFormat::R, tcu::TextureFormat::FLOAT),
1960 		tcu::TextureFormat(tcu::TextureFormat::R, tcu::TextureFormat::UNSIGNED_INT64),
1961 		tcu::TextureFormat(tcu::TextureFormat::R, tcu::TextureFormat::SIGNED_INT64)
1962 	};
1963 
1964     static const VkImageTiling s_tilings[] = {
1965         VK_IMAGE_TILING_OPTIMAL,
1966         VK_IMAGE_TILING_LINEAR,
1967     };
1968 
1969 	const struct
1970 	{
1971 		ShaderReadType		type;
1972 		const char*			name;
1973 	} readTypes[] =
1974 	{
1975 		{	ShaderReadType::NORMAL,	"normal_read"	},
1976 #ifndef CTS_USES_VULKANSC
1977 		{	ShaderReadType::SPARSE,	"sparse_read"	},
1978 #endif // CTS_USES_VULKANSC
1979 	};
1980 
1981 	const struct
1982 	{
1983 		ImageBackingType	type;
1984 		const char*			name;
1985 	} backingTypes[] =
1986 	{
1987 		{	ImageBackingType::NORMAL,	"normal_img"	},
1988 #ifndef CTS_USES_VULKANSC
1989 		{	ImageBackingType::SPARSE,	"sparse_img"	},
1990 #endif // CTS_USES_VULKANSC
1991 	};
1992 
1993 	for (deUint32 operationI = 0; operationI < ATOMIC_OPERATION_LAST; operationI++)
1994 	{
1995 		const AtomicOperation operation = (AtomicOperation)operationI;
1996 
1997 		de::MovePtr<tcu::TestCaseGroup> operationGroup(new tcu::TestCaseGroup(testCtx, getAtomicOperationCaseName(operation).c_str()));
1998 
1999 		for (deUint32 imageTypeNdx = 0; imageTypeNdx < DE_LENGTH_OF_ARRAY(imageParamsArray); imageTypeNdx++)
2000 		{
2001 			const ImageType	 imageType = imageParamsArray[imageTypeNdx].m_imageType;
2002 			const tcu::UVec3 imageSize = imageParamsArray[imageTypeNdx].m_imageSize;
2003 
2004 			de::MovePtr<tcu::TestCaseGroup> imageTypeGroup(new tcu::TestCaseGroup(testCtx, getImageTypeName(imageType).c_str()));
2005 
2006 			for (int useTransferIdx = 0; useTransferIdx < 2; ++useTransferIdx)
2007 			{
2008 				const bool				useTransfer	= (useTransferIdx > 0);
2009 				const string			groupName	= (!useTransfer ? "no" : "") + string("transfer");
2010 
2011 				de::MovePtr<tcu::TestCaseGroup> transferGroup(new tcu::TestCaseGroup(testCtx, groupName.c_str()));
2012 
2013 				for (int readTypeIdx = 0; readTypeIdx < DE_LENGTH_OF_ARRAY(readTypes); ++readTypeIdx)
2014 				{
2015 					const auto& readType = readTypes[readTypeIdx];
2016 
2017 					de::MovePtr<tcu::TestCaseGroup> readTypeGroup(new tcu::TestCaseGroup(testCtx, readType.name));
2018 
2019 					for (int backingTypeIdx = 0; backingTypeIdx < DE_LENGTH_OF_ARRAY(backingTypes); ++backingTypeIdx)
2020 					{
2021 						const auto& backingType = backingTypes[backingTypeIdx];
2022 
2023 						de::MovePtr<tcu::TestCaseGroup> backingTypeGroup(new tcu::TestCaseGroup(testCtx, backingType.name));
2024 
2025 						for (deUint32 formatNdx = 0; formatNdx < DE_LENGTH_OF_ARRAY(formats); formatNdx++)
2026 						{
2027 							for (int tilingNdx = 0; tilingNdx < DE_LENGTH_OF_ARRAY(s_tilings); tilingNdx++)
2028 							{
2029 								const TextureFormat&	format		= formats[formatNdx];
2030 								const std::string		formatName	= getShaderImageFormatQualifier(format);
2031 								const char* suffix = (s_tilings[tilingNdx] == VK_IMAGE_TILING_OPTIMAL) ? "" : "_linear";
2032 
2033 								// Need SPIRV programs in vktImageAtomicSpirvShaders.cpp
2034 								if (imageType == IMAGE_TYPE_BUFFER && (format.type != tcu::TextureFormat::FLOAT))
2035 								{
2036 									continue;
2037 								}
2038 
2039 								// Only 2D and 3D images may support sparse residency.
2040 								// VK_IMAGE_TILING_LINEAR does not support sparse residency
2041 								const auto vkImageType = mapImageType(imageType);
2042 								if (backingType.type == ImageBackingType::SPARSE && ((vkImageType != VK_IMAGE_TYPE_2D && vkImageType != VK_IMAGE_TYPE_3D) || (s_tilings[tilingNdx] == VK_IMAGE_TILING_LINEAR)))
2043 									continue;
2044 
2045 								// Only some operations are supported on floating-point
2046 								if (format.type == tcu::TextureFormat::FLOAT)
2047 								{
2048 									if (operation != ATOMIC_OPERATION_ADD &&
2049 #ifndef CTS_USES_VULKANSC
2050 										operation != ATOMIC_OPERATION_MIN &&
2051 										operation != ATOMIC_OPERATION_MAX &&
2052 #endif // CTS_USES_VULKANSC
2053 										operation != ATOMIC_OPERATION_EXCHANGE)
2054 									{
2055 										continue;
2056 									}
2057 								}
2058 
2059 								if (readType.type == ShaderReadType::SPARSE)
2060 								{
2061 									// When using transfer, shader reads will not be used, so avoid creating two identical cases.
2062 									if (useTransfer)
2063 										continue;
2064 
2065 									// Sparse reads are not supported for all types of images.
2066 									if (imageType == IMAGE_TYPE_1D || imageType == IMAGE_TYPE_1D_ARRAY || imageType == IMAGE_TYPE_BUFFER)
2067 										continue;
2068 								}
2069 
2070 								//!< Atomic case checks the end result of the operations, and not the intermediate return values
2071 								const string caseEndResult = formatName + "_end_result" + suffix;
2072 								backingTypeGroup->addChild(new BinaryAtomicEndResultCase(testCtx, caseEndResult, imageType, imageSize, format, s_tilings[tilingNdx], operation, useTransfer, readType.type, backingType.type, glu::GLSL_VERSION_450));
2073 
2074 								//!< Atomic case checks the return values of the atomic function and not the end result.
2075 								const string caseIntermValues = formatName + "_intermediate_values" + suffix;
2076 								backingTypeGroup->addChild(new BinaryAtomicIntermValuesCase(testCtx, caseIntermValues, imageType, imageSize, format, s_tilings[tilingNdx], operation, useTransfer, readType.type, backingType.type, glu::GLSL_VERSION_450));
2077 							}
2078 						}
2079 
2080 						readTypeGroup->addChild(backingTypeGroup.release());
2081 					}
2082 
2083 					transferGroup->addChild(readTypeGroup.release());
2084 				}
2085 
2086 				imageTypeGroup->addChild(transferGroup.release());
2087 			}
2088 
2089 			operationGroup->addChild(imageTypeGroup.release());
2090 		}
2091 
2092 		imageAtomicOperationsTests->addChild(operationGroup.release());
2093 	}
2094 
2095 	return imageAtomicOperationsTests.release();
2096 }
2097 
2098 } // image
2099 } // vkt
2100