• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2016 The Khronos Group Inc.
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  *      http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  *
19  *//*!
20  * \file  vktImageAtomicOperationTests.cpp
21  * \brief Image atomic operation tests
22  *//*--------------------------------------------------------------------*/
23 
24 #include "vktImageAtomicOperationTests.hpp"
25 #include "vktImageAtomicSpirvShaders.hpp"
26 
27 #include "deUniquePtr.hpp"
28 #include "deStringUtil.hpp"
29 #include "deSTLUtil.hpp"
30 
31 #include "vktTestCaseUtil.hpp"
32 #include "vkPrograms.hpp"
33 #include "vkImageUtil.hpp"
34 #include "vkQueryUtil.hpp"
35 #include "vkBarrierUtil.hpp"
36 #include "vktImageTestsUtil.hpp"
37 #include "vkBuilderUtil.hpp"
38 #include "vkRef.hpp"
39 #include "vkRefUtil.hpp"
40 #include "vkTypeUtil.hpp"
41 #include "vkCmdUtil.hpp"
42 #include "vkObjUtil.hpp"
43 #include "vkBufferWithMemory.hpp"
44 
45 #include "tcuTextureUtil.hpp"
46 #include "tcuTexture.hpp"
47 #include "tcuVectorType.hpp"
48 #include "tcuStringTemplate.hpp"
49 
50 namespace vkt
51 {
52 namespace image
53 {
54 namespace
55 {
56 
57 using namespace vk;
58 using namespace std;
59 using de::toString;
60 
61 using tcu::TextureFormat;
62 using tcu::IVec2;
63 using tcu::IVec3;
64 using tcu::UVec3;
65 using tcu::Vec4;
66 using tcu::IVec4;
67 using tcu::UVec4;
68 using tcu::CubeFace;
69 using tcu::Texture1D;
70 using tcu::Texture2D;
71 using tcu::Texture3D;
72 using tcu::Texture2DArray;
73 using tcu::TextureCube;
74 using tcu::PixelBufferAccess;
75 using tcu::ConstPixelBufferAccess;
76 using tcu::Vector;
77 using tcu::TestContext;
78 
79 enum
80 {
81 	NUM_INVOCATIONS_PER_PIXEL = 5u
82 };
83 
84 enum AtomicOperation
85 {
86 	ATOMIC_OPERATION_ADD = 0,
87 	ATOMIC_OPERATION_SUB,
88 	ATOMIC_OPERATION_INC,
89 	ATOMIC_OPERATION_DEC,
90 	ATOMIC_OPERATION_MIN,
91 	ATOMIC_OPERATION_MAX,
92 	ATOMIC_OPERATION_AND,
93 	ATOMIC_OPERATION_OR,
94 	ATOMIC_OPERATION_XOR,
95 	ATOMIC_OPERATION_EXCHANGE,
96 	ATOMIC_OPERATION_COMPARE_EXCHANGE,
97 
98 	ATOMIC_OPERATION_LAST
99 };
100 
101 enum class ShaderReadType
102 {
103 	NORMAL = 0,
104 	SPARSE,
105 };
106 
107 enum class ImageBackingType
108 {
109 	NORMAL = 0,
110 	SPARSE,
111 };
112 
getCoordStr(const ImageType imageType,const std::string & x,const std::string & y,const std::string & z)113 static string getCoordStr (const ImageType		imageType,
114 						   const std::string&	x,
115 						   const std::string&	y,
116 						   const std::string&	z)
117 {
118 	switch (imageType)
119 	{
120 		case IMAGE_TYPE_1D:
121 		case IMAGE_TYPE_BUFFER:
122 			return x;
123 		case IMAGE_TYPE_1D_ARRAY:
124 		case IMAGE_TYPE_2D:
125 			return string("ivec2(" + x + "," + y + ")");
126 		case IMAGE_TYPE_2D_ARRAY:
127 		case IMAGE_TYPE_3D:
128 		case IMAGE_TYPE_CUBE:
129 		case IMAGE_TYPE_CUBE_ARRAY:
130 			return string("ivec3(" + x + "," + y + "," + z + ")");
131 		default:
132 			DE_ASSERT(false);
133 			return "";
134 	}
135 }
136 
getComponentTypeStr(deUint32 componentWidth,bool intFormat,bool uintFormat,bool floatFormat)137 static string getComponentTypeStr (deUint32 componentWidth, bool intFormat, bool uintFormat, bool floatFormat)
138 {
139 	DE_ASSERT(intFormat || uintFormat || floatFormat);
140 
141 	const bool is64 = (componentWidth == 64);
142 
143 	if (intFormat)
144 		return (is64 ? "int64_t" : "int");
145 	if (uintFormat)
146 		return (is64 ? "uint64_t" : "uint");
147 	if (floatFormat)
148 		return (is64 ? "double" : "float");
149 
150 	return "";
151 }
152 
getVec4TypeStr(deUint32 componentWidth,bool intFormat,bool uintFormat,bool floatFormat)153 static string getVec4TypeStr (deUint32 componentWidth, bool intFormat, bool uintFormat, bool floatFormat)
154 {
155 	DE_ASSERT(intFormat || uintFormat || floatFormat);
156 
157 	const bool is64 = (componentWidth == 64);
158 
159 	if (intFormat)
160 		return (is64 ? "i64vec4" : "ivec4");
161 	if (uintFormat)
162 		return (is64 ? "u64vec4" : "uvec4");
163 	if (floatFormat)
164 		return (is64 ? "f64vec4" : "vec4");
165 
166 	return "";
167 }
168 
getAtomicFuncArgumentShaderStr(const AtomicOperation op,const string & x,const string & y,const string & z,const IVec3 & gridSize)169 static string getAtomicFuncArgumentShaderStr (const AtomicOperation	op,
170 											  const string&			x,
171 											  const string&			y,
172 											  const string&			z,
173 											  const IVec3&			gridSize)
174 {
175 	switch (op)
176 	{
177 		case ATOMIC_OPERATION_ADD:
178 		case ATOMIC_OPERATION_AND:
179 		case ATOMIC_OPERATION_OR:
180 		case ATOMIC_OPERATION_XOR:
181 			return string("(" + x + "*" + x + " + " + y + "*" + y + " + " + z + "*" + z + ")");
182 		case ATOMIC_OPERATION_MIN:
183 		case ATOMIC_OPERATION_MAX:
184 			// multiply by (1-2*(value % 2) to make half of the data negative
185 			// this will result in generating large numbers for uint formats
186 			return string("((1 - 2*(" + x + " % 2)) * (" + x + "*" + x + " + " + y + "*" + y + " + " + z + "*" + z + "))");
187 		case ATOMIC_OPERATION_EXCHANGE:
188 		case ATOMIC_OPERATION_COMPARE_EXCHANGE:
189 			return string("((" + z + "*" + toString(gridSize.x()) + " + " + x + ")*" + toString(gridSize.y()) + " + " + y + ")");
190 		default:
191 			DE_ASSERT(false);
192 			return "";
193 	}
194 }
195 
getAtomicOperationCaseName(const AtomicOperation op)196 static string getAtomicOperationCaseName (const AtomicOperation op)
197 {
198 	switch (op)
199 	{
200 		case ATOMIC_OPERATION_ADD:				return string("add");
201 		case ATOMIC_OPERATION_SUB:				return string("sub");
202 		case ATOMIC_OPERATION_INC:				return string("inc");
203 		case ATOMIC_OPERATION_DEC:				return string("dec");
204 		case ATOMIC_OPERATION_MIN:				return string("min");
205 		case ATOMIC_OPERATION_MAX:				return string("max");
206 		case ATOMIC_OPERATION_AND:				return string("and");
207 		case ATOMIC_OPERATION_OR:				return string("or");
208 		case ATOMIC_OPERATION_XOR:				return string("xor");
209 		case ATOMIC_OPERATION_EXCHANGE:			return string("exchange");
210 		case ATOMIC_OPERATION_COMPARE_EXCHANGE:	return string("compare_exchange");
211 		default:
212 			DE_ASSERT(false);
213 			return "";
214 	}
215 }
216 
getAtomicOperationShaderFuncName(const AtomicOperation op)217 static string getAtomicOperationShaderFuncName (const AtomicOperation op)
218 {
219 	switch (op)
220 	{
221 		case ATOMIC_OPERATION_ADD:				return string("imageAtomicAdd");
222 		case ATOMIC_OPERATION_MIN:				return string("imageAtomicMin");
223 		case ATOMIC_OPERATION_MAX:				return string("imageAtomicMax");
224 		case ATOMIC_OPERATION_AND:				return string("imageAtomicAnd");
225 		case ATOMIC_OPERATION_OR:				return string("imageAtomicOr");
226 		case ATOMIC_OPERATION_XOR:				return string("imageAtomicXor");
227 		case ATOMIC_OPERATION_EXCHANGE:			return string("imageAtomicExchange");
228 		case ATOMIC_OPERATION_COMPARE_EXCHANGE:	return string("imageAtomicCompSwap");
229 		default:
230 			DE_ASSERT(false);
231 			return "";
232 	}
233 }
234 
235 template <typename T>
getOperationInitialValue(const AtomicOperation op)236 T getOperationInitialValue (const AtomicOperation op)
237 {
238 	switch (op)
239 	{
240 		// \note 18 is just an arbitrary small nonzero value.
241 		case ATOMIC_OPERATION_ADD:				return 18;
242 		case ATOMIC_OPERATION_INC:				return 18;
243 		case ATOMIC_OPERATION_SUB:				return (1 << 24) - 1;
244 		case ATOMIC_OPERATION_DEC:				return (1 << 24) - 1;
245 		case ATOMIC_OPERATION_MIN:				return (1 << 15) - 1;
246 		case ATOMIC_OPERATION_MAX:				return 18;
247 		case ATOMIC_OPERATION_AND:				return (1 << 15) - 1;
248 		case ATOMIC_OPERATION_OR:				return 18;
249 		case ATOMIC_OPERATION_XOR:				return 18;
250 		case ATOMIC_OPERATION_EXCHANGE:			return 18;
251 		case ATOMIC_OPERATION_COMPARE_EXCHANGE:	return 18;
252 		default:
253 			DE_ASSERT(false);
254 			return 0xFFFFFFFF;
255 	}
256 }
257 
258 template <>
getOperationInitialValue(const AtomicOperation op)259 deInt64 getOperationInitialValue<deInt64>(const AtomicOperation op)
260 {
261 	switch (op)
262 	{
263 		// \note 0x000000BEFFFFFF18 is just an arbitrary nonzero value.
264 		case ATOMIC_OPERATION_ADD:				return 0x000000BEFFFFFF18;
265 		case ATOMIC_OPERATION_INC:				return 0x000000BEFFFFFF18;
266 		case ATOMIC_OPERATION_SUB:				return (1ull << 56) - 1;
267 		case ATOMIC_OPERATION_DEC:				return (1ull << 56) - 1;
268 		case ATOMIC_OPERATION_MIN:				return (1ull << 47) - 1;
269 		case ATOMIC_OPERATION_MAX:				return 0x000000BEFFFFFF18;
270 		case ATOMIC_OPERATION_AND:				return (1ull << 47) - 1;
271 		case ATOMIC_OPERATION_OR:				return 0x000000BEFFFFFF18;
272 		case ATOMIC_OPERATION_XOR:				return 0x000000BEFFFFFF18;
273 		case ATOMIC_OPERATION_EXCHANGE:			return 0x000000BEFFFFFF18;
274 		case ATOMIC_OPERATION_COMPARE_EXCHANGE:	return 0x000000BEFFFFFF18;
275 		default:
276 			DE_ASSERT(false);
277 			return 0xFFFFFFFFFFFFFFFF;
278 	}
279 }
280 
281 template <>
getOperationInitialValue(const AtomicOperation op)282 deUint64 getOperationInitialValue<deUint64>(const AtomicOperation op)
283 {
284 	return (deUint64)getOperationInitialValue<deInt64>(op);
285 }
286 
287 
288 template <typename T>
getAtomicFuncArgument(const AtomicOperation op,const IVec3 & invocationID,const IVec3 & gridSize)289 static T getAtomicFuncArgument (const AtomicOperation	op,
290 								const IVec3&			invocationID,
291 								const IVec3&			gridSize)
292 {
293 	const T x = static_cast<T>(invocationID.x());
294 	const T y = static_cast<T>(invocationID.y());
295 	const T z = static_cast<T>(invocationID.z());
296 
297 	switch (op)
298 	{
299 		// \note Fall-throughs.
300 		case ATOMIC_OPERATION_ADD:
301 		case ATOMIC_OPERATION_SUB:
302 		case ATOMIC_OPERATION_AND:
303 		case ATOMIC_OPERATION_OR:
304 		case ATOMIC_OPERATION_XOR:
305 			return x*x + y*y + z*z;
306 		case ATOMIC_OPERATION_INC:
307 		case ATOMIC_OPERATION_DEC:
308 			return 1;
309 		case ATOMIC_OPERATION_MIN:
310 		case ATOMIC_OPERATION_MAX:
311 			// multiply half of the data by -1
312 			return (1-2*(x % 2))*(x*x + y*y + z*z);
313 		case ATOMIC_OPERATION_EXCHANGE:
314 		case ATOMIC_OPERATION_COMPARE_EXCHANGE:
315 			return (z*static_cast<T>(gridSize.x()) + x)*static_cast<T>(gridSize.y()) + y;
316 		default:
317 			DE_ASSERT(false);
318 			return -1;
319 	}
320 }
321 
322 //! An order-independent operation is one for which the end result doesn't depend on the order in which the operations are carried (i.e. is both commutative and associative).
isOrderIndependentAtomicOperation(const AtomicOperation op)323 static bool isOrderIndependentAtomicOperation (const AtomicOperation op)
324 {
325 	return	op == ATOMIC_OPERATION_ADD ||
326 			op == ATOMIC_OPERATION_SUB ||
327 			op == ATOMIC_OPERATION_INC ||
328 			op == ATOMIC_OPERATION_DEC ||
329 			op == ATOMIC_OPERATION_MIN ||
330 			op == ATOMIC_OPERATION_MAX ||
331 			op == ATOMIC_OPERATION_AND ||
332 			op == ATOMIC_OPERATION_OR ||
333 			op == ATOMIC_OPERATION_XOR;
334 }
335 
336 //! Checks if the operation needs an SPIR-V shader.
isSpirvAtomicOperation(const AtomicOperation op)337 static bool isSpirvAtomicOperation (const AtomicOperation op)
338 {
339 	return	op == ATOMIC_OPERATION_SUB ||
340 			op == ATOMIC_OPERATION_INC ||
341 			op == ATOMIC_OPERATION_DEC;
342 }
343 
344 //! Returns the SPIR-V assembler name of the given operation.
getSpirvAtomicOpName(const AtomicOperation op)345 static std::string getSpirvAtomicOpName (const AtomicOperation op)
346 {
347 	switch (op)
348 	{
349 	case ATOMIC_OPERATION_SUB:	return "OpAtomicISub";
350 	case ATOMIC_OPERATION_INC:	return "OpAtomicIIncrement";
351 	case ATOMIC_OPERATION_DEC:	return "OpAtomicIDecrement";
352 	default:					break;
353 	}
354 
355 	DE_ASSERT(false);
356 	return "";
357 }
358 
359 //! Returns true if the given SPIR-V operation does not need the last argument, compared to OpAtomicIAdd.
isSpirvAtomicNoLastArgOp(const AtomicOperation op)360 static bool isSpirvAtomicNoLastArgOp (const AtomicOperation op)
361 {
362 	switch (op)
363 	{
364 	case ATOMIC_OPERATION_SUB:	return false;
365 	case ATOMIC_OPERATION_INC:	// fallthrough
366 	case ATOMIC_OPERATION_DEC:	return true;
367 	default:					break;
368 	}
369 
370 	DE_ASSERT(false);
371 	return false;
372 }
373 
374 //! Computes the result of an atomic operation where "a" is the data operated on and "b" is the parameter to the atomic function.
375 template <typename T>
computeBinaryAtomicOperationResult(const AtomicOperation op,const T a,const T b)376 static T computeBinaryAtomicOperationResult (const AtomicOperation op, const T a, const T b)
377 {
378 	switch (op)
379 	{
380 		case ATOMIC_OPERATION_INC:				// fallthrough.
381 		case ATOMIC_OPERATION_ADD:				return a + b;
382 		case ATOMIC_OPERATION_DEC:				// fallthrough.
383 		case ATOMIC_OPERATION_SUB:				return a - b;
384 		case ATOMIC_OPERATION_MIN:				return de::min(a, b);
385 		case ATOMIC_OPERATION_MAX:				return de::max(a, b);
386 		case ATOMIC_OPERATION_AND:				return a & b;
387 		case ATOMIC_OPERATION_OR:				return a | b;
388 		case ATOMIC_OPERATION_XOR:				return a ^ b;
389 		case ATOMIC_OPERATION_EXCHANGE:			return b;
390 		case ATOMIC_OPERATION_COMPARE_EXCHANGE:	return (a == (sizeof(T) == 8 ? 0xBEFFFFFF18 : 18)) ? b : a;
391 		default:
392 			DE_ASSERT(false);
393 			return -1;
394 	}
395 }
396 
getUsageFlags(bool useTransfer)397 VkImageUsageFlags getUsageFlags (bool useTransfer)
398 {
399 	VkImageUsageFlags usageFlags = VK_IMAGE_USAGE_STORAGE_BIT;
400 
401 	if (useTransfer)
402 		usageFlags |= (VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT);
403 
404 	return usageFlags;
405 }
406 
AddFillReadShader(SourceCollections & sourceCollections,const ImageType & imageType,const tcu::TextureFormat & format,const string & componentType,const string & vec4Type)407 void AddFillReadShader (SourceCollections&			sourceCollections,
408 						const ImageType&			imageType,
409 						const tcu::TextureFormat&	format,
410 						const string&				componentType,
411 						const string&				vec4Type)
412 {
413 	const string	imageInCoord			= getCoordStr(imageType, "gx", "gy", "gz");
414 	const string	shaderImageFormatStr	= getShaderImageFormatQualifier(format);
415 	const string	shaderImageTypeStr		= getShaderImageType(format, imageType);
416 	const auto		componentWidth			= getFormatComponentWidth(mapTextureFormat(format), 0u);
417 	const string	extensions				= ((componentWidth == 64u)
418 											?	"#extension GL_EXT_shader_explicit_arithmetic_types_int64 : require\n"
419 												"#extension GL_EXT_shader_image_int64 : require\n"
420 											:	"");
421 
422 
423 	const string fillShader =	"#version 450\n"
424 								+ extensions +
425 								"precision highp " + shaderImageTypeStr + ";\n"
426 								"\n"
427 								"layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
428 								"layout (" + shaderImageFormatStr + ", binding=0) coherent uniform " + shaderImageTypeStr + " u_resultImage;\n"
429 								"\n"
430 								"layout(std430, binding = 1) buffer inputBuffer\n"
431 								"{\n"
432 								"	"+ componentType + " data[];\n"
433 								"} inBuffer;\n"
434 								"\n"
435 								"void main(void)\n"
436 								"{\n"
437 								"	int gx = int(gl_GlobalInvocationID.x);\n"
438 								"	int gy = int(gl_GlobalInvocationID.y);\n"
439 								"	int gz = int(gl_GlobalInvocationID.z);\n"
440 								"	uint index = gx + (gy * gl_NumWorkGroups.x) + (gz *gl_NumWorkGroups.x * gl_NumWorkGroups.y);\n"
441 								"	imageStore(u_resultImage, " + imageInCoord + ", " + vec4Type + "(inBuffer.data[index]));\n"
442 								"}\n";
443 
444 	const string readShader =	"#version 450\n"
445 								+ extensions +
446 								"precision highp " + shaderImageTypeStr + ";\n"
447 								"\n"
448 								"layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
449 								"layout (" + shaderImageFormatStr + ", binding=0) coherent uniform " + shaderImageTypeStr + " u_resultImage;\n"
450 								"\n"
451 								"layout(std430, binding = 1) buffer outputBuffer\n"
452 								"{\n"
453 								"	" + componentType + " data[];\n"
454 								"} outBuffer;\n"
455 								"\n"
456 								"void main(void)\n"
457 								"{\n"
458 								"	int gx = int(gl_GlobalInvocationID.x);\n"
459 								"	int gy = int(gl_GlobalInvocationID.y);\n"
460 								"	int gz = int(gl_GlobalInvocationID.z);\n"
461 								"	uint index = gx + (gy * gl_NumWorkGroups.x) + (gz *gl_NumWorkGroups.x * gl_NumWorkGroups.y);\n"
462 								"	outBuffer.data[index] = imageLoad(u_resultImage, " + imageInCoord + ").x;\n"
463 								"}\n";
464 
465 
466 	if ((imageType != IMAGE_TYPE_1D) &&
467 		(imageType != IMAGE_TYPE_1D_ARRAY) &&
468 		(imageType != IMAGE_TYPE_BUFFER))
469 	{
470 		const string readShaderResidency  = "#version 450\n"
471 											"#extension GL_ARB_sparse_texture2 : require\n"
472 											+ extensions +
473 											"precision highp " + shaderImageTypeStr + ";\n"
474 											"\n"
475 											"layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
476 											"layout (" + shaderImageFormatStr + ", binding=0) coherent uniform " + shaderImageTypeStr + " u_resultImage;\n"
477 											"\n"
478 											"layout(std430, binding = 1) buffer outputBuffer\n"
479 											"{\n"
480 											"	" + componentType + " data[];\n"
481 											"} outBuffer;\n"
482 											"\n"
483 											"void main(void)\n"
484 											"{\n"
485 											"	int gx = int(gl_GlobalInvocationID.x);\n"
486 											"	int gy = int(gl_GlobalInvocationID.y);\n"
487 											"	int gz = int(gl_GlobalInvocationID.z);\n"
488 											"	uint index = gx + (gy * gl_NumWorkGroups.x) + (gz *gl_NumWorkGroups.x * gl_NumWorkGroups.y);\n"
489 											"	outBuffer.data[index] = imageLoad(u_resultImage, " + imageInCoord + ").x;\n"
490 											"	" + vec4Type + " sparseValue;\n"
491 											"	sparseImageLoadARB(u_resultImage, " + imageInCoord + ", sparseValue);\n"
492 											"	if (outBuffer.data[index] != sparseValue.x)\n"
493 											"		outBuffer.data[index] = " + vec4Type + "(1234).x;\n"
494 											"}\n";
495 
496 		sourceCollections.glslSources.add("readShaderResidency") << glu::ComputeSource(readShaderResidency.c_str()) << vk::ShaderBuildOptions(sourceCollections.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
497 	}
498 
499 	sourceCollections.glslSources.add("fillShader") << glu::ComputeSource(fillShader.c_str()) << vk::ShaderBuildOptions(sourceCollections.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
500 	sourceCollections.glslSources.add("readShader") << glu::ComputeSource(readShader.c_str()) << vk::ShaderBuildOptions(sourceCollections.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
501 }
502 
503 //! Prepare the initial data for the image
initDataForImage(const VkDevice device,const DeviceInterface & deviceInterface,const TextureFormat & format,const AtomicOperation operation,const tcu::UVec3 & gridSize,BufferWithMemory & buffer)504 static void initDataForImage (const VkDevice			device,
505 							  const DeviceInterface&	deviceInterface,
506 							  const TextureFormat&		format,
507 							  const AtomicOperation		operation,
508 							  const tcu::UVec3&			gridSize,
509 							  BufferWithMemory&			buffer)
510 {
511 	Allocation&				bufferAllocation	= buffer.getAllocation();
512 	const VkFormat			imageFormat			= mapTextureFormat(format);
513 	tcu::PixelBufferAccess	pixelBuffer			(format, gridSize.x(), gridSize.y(), gridSize.z(), bufferAllocation.getHostPtr());
514 
515 	if (imageFormat == VK_FORMAT_R64_UINT || imageFormat == VK_FORMAT_R64_SINT)
516 	{
517 		const deInt64 initialValue(getOperationInitialValue<deInt64>(operation));
518 
519 		for (deUint32 z = 0; z < gridSize.z(); z++)
520 		for (deUint32 y = 0; y < gridSize.y(); y++)
521 		for (deUint32 x = 0; x < gridSize.x(); x++)
522 		{
523 			*((deInt64*)pixelBuffer.getPixelPtr(x, y, z)) = initialValue;
524 		}
525 	}
526 	else
527 	{
528 		const tcu::IVec4 initialValue(getOperationInitialValue<deInt32>(operation));
529 
530 		for (deUint32 z = 0; z < gridSize.z(); z++)
531 		for (deUint32 y = 0; y < gridSize.y(); y++)
532 		for (deUint32 x = 0; x < gridSize.x(); x++)
533 		{
534 			pixelBuffer.setPixel(initialValue, x, y, z);
535 		}
536 	}
537 
538 	flushAlloc(deviceInterface, device, bufferAllocation);
539 }
540 
commonCheckSupport(Context & context,const tcu::TextureFormat & tcuFormat,VkImageTiling tiling,ImageType imageType,const tcu::UVec3 & imageSize,AtomicOperation operation,bool useTransfer,ShaderReadType readType,ImageBackingType backingType)541 void commonCheckSupport (Context& context, const tcu::TextureFormat& tcuFormat, VkImageTiling tiling, ImageType imageType, const tcu::UVec3& imageSize, AtomicOperation operation, bool useTransfer, ShaderReadType readType, ImageBackingType backingType)
542 {
543 	const VkFormat				format				= mapTextureFormat(tcuFormat);
544 	const VkImageType			vkImgType			= mapImageType(imageType);
545 	const VkFormatFeatureFlags	texelBufferSupport	= (VK_FORMAT_FEATURE_STORAGE_TEXEL_BUFFER_BIT | VK_FORMAT_FEATURE_STORAGE_TEXEL_BUFFER_ATOMIC_BIT);
546 
547 	const auto& vki				= context.getInstanceInterface();
548 	const auto	physicalDevice	= context.getPhysicalDevice();
549 	const auto usageFlags = getUsageFlags(useTransfer);
550 
551 	VkImageFormatProperties	vkImageFormatProperties;
552 	const auto result = vki.getPhysicalDeviceImageFormatProperties(physicalDevice, format, vkImgType, tiling, usageFlags, 0, &vkImageFormatProperties);
553 	if (result != VK_SUCCESS) {
554 		if (result == VK_ERROR_FORMAT_NOT_SUPPORTED)
555 			TCU_THROW(NotSupportedError, "Format unsupported for tiling");
556 		else
557 			TCU_FAIL("vkGetPhysicalDeviceImageFormatProperties returned unexpected error");
558 	}
559 
560 	if (vkImageFormatProperties.maxArrayLayers < (uint32_t)getNumLayers(imageType, imageSize)) {
561 		TCU_THROW(NotSupportedError, "This format and tiling combination does not support this number of aray layers");
562 	}
563 
564 	const VkFormatProperties	formatProperties	= getPhysicalDeviceFormatProperties(context.getInstanceInterface(),
565 																						context.getPhysicalDevice(), format);
566 	if ((imageType == IMAGE_TYPE_BUFFER) &&
567 		((formatProperties.bufferFeatures & texelBufferSupport) != texelBufferSupport))
568 		TCU_THROW(NotSupportedError, "Atomic storage texel buffers not supported");
569 
570 	const VkFormatFeatureFlags requiredFeaturesLinear = (VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT | VK_FORMAT_FEATURE_STORAGE_IMAGE_ATOMIC_BIT);
571 	if (tiling == vk::VK_IMAGE_TILING_LINEAR &&
572 			((formatProperties.linearTilingFeatures & requiredFeaturesLinear) != requiredFeaturesLinear)
573 	) {
574 		TCU_THROW(NotSupportedError, "Format doesn't support atomic storage with linear tiling");
575 	}
576 
577 	if (imageType == IMAGE_TYPE_CUBE_ARRAY)
578 		context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_IMAGE_CUBE_ARRAY);
579 
580 #ifndef CTS_USES_VULKANSC
581 	if (backingType == ImageBackingType::SPARSE)
582 	{
583 		context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_SPARSE_BINDING);
584 
585 		switch (vkImgType)
586 		{
587 		case VK_IMAGE_TYPE_2D:	context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_SPARSE_RESIDENCY_IMAGE2D); break;
588 		case VK_IMAGE_TYPE_3D:	context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_SPARSE_RESIDENCY_IMAGE3D); break;
589 		default:				DE_ASSERT(false); break;
590 		}
591 
592 		if (!checkSparseImageFormatSupport(context.getPhysicalDevice(), context.getInstanceInterface(), format, vkImgType, VK_SAMPLE_COUNT_1_BIT, usageFlags, tiling))
593 			TCU_THROW(NotSupportedError, "Format does not support sparse images");
594 	}
595 #endif // CTS_USES_VULKANSC
596 
597 	if (isFloatFormat(format))
598 	{
599 		context.requireDeviceFunctionality("VK_EXT_shader_atomic_float");
600 
601 		const VkFormatFeatureFlags	requiredFeatures	= (VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT | VK_FORMAT_FEATURE_STORAGE_IMAGE_ATOMIC_BIT);
602 		const auto&					atomicFloatFeatures	= context.getShaderAtomicFloatFeaturesEXT();
603 
604 		if (!atomicFloatFeatures.shaderImageFloat32Atomics)
605 			TCU_THROW(NotSupportedError, "shaderImageFloat32Atomics not supported");
606 
607 		if ((operation == ATOMIC_OPERATION_ADD) && !atomicFloatFeatures.shaderImageFloat32AtomicAdd)
608 			TCU_THROW(NotSupportedError, "shaderImageFloat32AtomicAdd not supported");
609 
610 		if (operation == ATOMIC_OPERATION_MIN || operation == ATOMIC_OPERATION_MAX)
611 		{
612 			context.requireDeviceFunctionality("VK_EXT_shader_atomic_float2");
613 #ifndef CTS_USES_VULKANSC
614 			if (!context.getShaderAtomicFloat2FeaturesEXT().shaderImageFloat32AtomicMinMax)
615 			{
616 				TCU_THROW(NotSupportedError, "shaderImageFloat32AtomicMinMax not supported");
617 			}
618 #endif // CTS_USES_VULKANSC
619 		}
620 
621 		if ((formatProperties.optimalTilingFeatures & requiredFeatures) != requiredFeatures)
622 			TCU_FAIL("Required format feature bits not supported");
623 
624 		if (backingType == ImageBackingType::SPARSE)
625 		{
626 			if (!atomicFloatFeatures.sparseImageFloat32Atomics)
627 				TCU_THROW(NotSupportedError, "sparseImageFloat32Atomics not supported");
628 
629 			if (operation == ATOMIC_OPERATION_ADD && !atomicFloatFeatures.sparseImageFloat32AtomicAdd)
630 				TCU_THROW(NotSupportedError, "sparseImageFloat32AtomicAdd not supported");
631 		}
632 
633 	}
634 	else if (format == VK_FORMAT_R64_UINT || format == VK_FORMAT_R64_SINT)
635 	{
636 		context.requireDeviceFunctionality("VK_EXT_shader_image_atomic_int64");
637 
638 		const VkFormatFeatureFlags	requiredFeatures	= (VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT | VK_FORMAT_FEATURE_STORAGE_IMAGE_ATOMIC_BIT);
639 		const auto&					atomicInt64Features	= context.getShaderImageAtomicInt64FeaturesEXT();
640 
641 		if (!atomicInt64Features.shaderImageInt64Atomics)
642 			TCU_THROW(NotSupportedError, "shaderImageInt64Atomics not supported");
643 
644 		if (backingType == ImageBackingType::SPARSE && !atomicInt64Features.sparseImageInt64Atomics)
645 			TCU_THROW(NotSupportedError, "sparseImageInt64Atomics not supported");
646 
647 		if ((formatProperties.optimalTilingFeatures & requiredFeatures) != requiredFeatures)
648 			TCU_FAIL("Mandatory format features not supported");
649 	}
650 
651 	if (useTransfer)
652 	{
653 		const VkFormatFeatureFlags transferFeatures = (VK_FORMAT_FEATURE_TRANSFER_SRC_BIT | VK_FORMAT_FEATURE_TRANSFER_DST_BIT);
654 		if ((formatProperties.optimalTilingFeatures & transferFeatures) != transferFeatures)
655 			TCU_THROW(NotSupportedError, "Transfer features not supported for this format");
656 	}
657 
658 	if (readType == ShaderReadType::SPARSE)
659 	{
660 		DE_ASSERT(imageType != IMAGE_TYPE_1D && imageType != IMAGE_TYPE_1D_ARRAY && imageType != IMAGE_TYPE_BUFFER);
661 		context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_SHADER_RESOURCE_RESIDENCY);
662 	}
663 }
664 
665 class BinaryAtomicEndResultCase : public vkt::TestCase
666 {
667 public:
668 								BinaryAtomicEndResultCase	(tcu::TestContext&			testCtx,
669 															 const string&				name,
670 															 const string&				description,
671 															 const ImageType			imageType,
672 															 const tcu::UVec3&			imageSize,
673 															 const tcu::TextureFormat&	format,
674 															 const VkImageTiling		tiling,
675 															 const AtomicOperation		operation,
676 															 const bool					useTransfer,
677 															 const ShaderReadType		shaderReadType,
678 															 const ImageBackingType		backingType,
679 															 const glu::GLSLVersion		glslVersion);
680 
681 	void						initPrograms				(SourceCollections&			sourceCollections) const;
682 	TestInstance*				createInstance				(Context&					context) const;
683 	virtual void				checkSupport				(Context&					context) const;
684 
685 private:
686 	const ImageType				m_imageType;
687 	const tcu::UVec3			m_imageSize;
688 	const tcu::TextureFormat	m_format;
689 	const VkImageTiling			m_tiling;
690 	const AtomicOperation		m_operation;
691 	const bool					m_useTransfer;
692 	const ShaderReadType		m_readType;
693 	const ImageBackingType		m_backingType;
694 	const glu::GLSLVersion		m_glslVersion;
695 };
696 
BinaryAtomicEndResultCase(tcu::TestContext & testCtx,const string & name,const string & description,const ImageType imageType,const tcu::UVec3 & imageSize,const tcu::TextureFormat & format,const VkImageTiling tiling,const AtomicOperation operation,const bool useTransfer,const ShaderReadType shaderReadType,const ImageBackingType backingType,const glu::GLSLVersion glslVersion)697 BinaryAtomicEndResultCase::BinaryAtomicEndResultCase (tcu::TestContext&			testCtx,
698 													  const string&				name,
699 													  const string&				description,
700 													  const ImageType			imageType,
701 													  const tcu::UVec3&			imageSize,
702 													  const tcu::TextureFormat&	format,
703 													  const VkImageTiling		tiling,
704 													  const AtomicOperation		operation,
705 													  const bool				useTransfer,
706 													  const ShaderReadType		shaderReadType,
707 													  const ImageBackingType	backingType,
708 													  const glu::GLSLVersion	glslVersion)
709 	: TestCase		(testCtx, name, description)
710 	, m_imageType	(imageType)
711 	, m_imageSize	(imageSize)
712 	, m_format		(format)
713 	, m_tiling		(tiling)
714 	, m_operation	(operation)
715 	, m_useTransfer	(useTransfer)
716 	, m_readType	(shaderReadType)
717 	, m_backingType	(backingType)
718 	, m_glslVersion	(glslVersion)
719 {
720 }
721 
checkSupport(Context & context) const722 void BinaryAtomicEndResultCase::checkSupport (Context& context) const
723 {
724 	commonCheckSupport(context, m_format, m_tiling, m_imageType, m_imageSize, m_operation, m_useTransfer, m_readType, m_backingType);
725 }
726 
initPrograms(SourceCollections & sourceCollections) const727 void BinaryAtomicEndResultCase::initPrograms (SourceCollections& sourceCollections) const
728 {
729 	const VkFormat	imageFormat		= mapTextureFormat(m_format);
730 	const deUint32	componentWidth	= getFormatComponentWidth(imageFormat, 0);
731 	const bool		intFormat		= isIntFormat(imageFormat);
732 	const bool		uintFormat		= isUintFormat(imageFormat);
733 	const bool		floatFormat		= isFloatFormat(imageFormat);
734 	const string	type			= getComponentTypeStr(componentWidth, intFormat, uintFormat, floatFormat);
735 	const string	vec4Type		= getVec4TypeStr(componentWidth, intFormat, uintFormat, floatFormat);
736 
737 	AddFillReadShader(sourceCollections, m_imageType, m_format, type, vec4Type);
738 
739 	if (isSpirvAtomicOperation(m_operation))
740 	{
741 		const CaseVariant					caseVariant{m_imageType, m_format.order, m_format.type, CaseVariant::CHECK_TYPE_END_RESULTS};
742 		const tcu::StringTemplate			shaderTemplate{getSpirvAtomicOpShader(caseVariant)};
743 		std::map<std::string, std::string>	specializations;
744 
745 		specializations["OPNAME"] = getSpirvAtomicOpName(m_operation);
746 		if (isSpirvAtomicNoLastArgOp(m_operation))
747 			specializations["LASTARG"] = "";
748 
749 		sourceCollections.spirvAsmSources.add(m_name) << shaderTemplate.specialize(specializations);
750 	}
751 	else
752 	{
753 		const string	versionDecl				= glu::getGLSLVersionDeclaration(m_glslVersion);
754 
755 		const UVec3		gridSize				= getShaderGridSize(m_imageType, m_imageSize);
756 		const string	atomicCoord				= getCoordStr(m_imageType, "gx % " + toString(gridSize.x()), "gy", "gz");
757 
758 		const string	atomicArgExpr			= type + getAtomicFuncArgumentShaderStr(m_operation,
759 																						"gx", "gy", "gz",
760 																						IVec3(NUM_INVOCATIONS_PER_PIXEL*gridSize.x(), gridSize.y(), gridSize.z()));
761 
762 		const string	compareExchangeStr		= (m_operation == ATOMIC_OPERATION_COMPARE_EXCHANGE) ?
763 												(componentWidth == 64 ?", 820338753304": ", 18") + string(uintFormat ? "u" : "") + string(componentWidth == 64 ? "l" : "")
764 												: "";
765 		const string	atomicInvocation		= getAtomicOperationShaderFuncName(m_operation) + "(u_resultImage, " + atomicCoord + compareExchangeStr + ", " + atomicArgExpr + ")";
766 		const string	shaderImageFormatStr	= getShaderImageFormatQualifier(m_format);
767 		const string	shaderImageTypeStr		= getShaderImageType(m_format, m_imageType);
768 		const string	extensions				= "#extension GL_EXT_shader_atomic_float : enable\n"
769 												  "#extension GL_EXT_shader_atomic_float2 : enable\n"
770 												  "#extension GL_KHR_memory_scope_semantics : enable";
771 
772 		string source = versionDecl + "\n" + extensions + "\n";
773 
774 		if (64 == componentWidth)
775 		{
776 			source +=	"#extension GL_EXT_shader_explicit_arithmetic_types_int64 : require\n"
777 						"#extension GL_EXT_shader_image_int64 : require\n";
778 		}
779 
780 		source +=	"precision highp " + shaderImageTypeStr + ";\n"
781 					"\n"
782 					"layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
783 					"layout (" + shaderImageFormatStr + ", binding=0) coherent uniform " + shaderImageTypeStr + " u_resultImage;\n"
784 					"\n"
785 					"void main (void)\n"
786 					"{\n"
787 					"	int gx = int(gl_GlobalInvocationID.x);\n"
788 					"	int gy = int(gl_GlobalInvocationID.y);\n"
789 					"	int gz = int(gl_GlobalInvocationID.z);\n"
790 					"	" + atomicInvocation + ";\n"
791 					"}\n";
792 
793 		sourceCollections.glslSources.add(m_name) << glu::ComputeSource(source.c_str());
794 	}
795 }
796 
797 class BinaryAtomicIntermValuesCase : public vkt::TestCase
798 {
799 public:
800 								BinaryAtomicIntermValuesCase	(tcu::TestContext&			testCtx,
801 																 const string&				name,
802 																 const string&				description,
803 																 const ImageType			imageType,
804 																 const tcu::UVec3&			imageSize,
805 																 const tcu::TextureFormat&	format,
806 																 const VkImageTiling		tiling,
807 																 const AtomicOperation		operation,
808 																 const bool					useTransfer,
809 																 const ShaderReadType		shaderReadType,
810 																 const ImageBackingType		backingType,
811 																 const glu::GLSLVersion		glslVersion);
812 
813 	void						initPrograms					(SourceCollections&			sourceCollections) const;
814 	TestInstance*				createInstance					(Context&					context) const;
815 	virtual void				checkSupport					(Context&					context) const;
816 
817 private:
818 	const ImageType				m_imageType;
819 	const tcu::UVec3			m_imageSize;
820 	const tcu::TextureFormat	m_format;
821 	const VkImageTiling			m_tiling;
822 	const AtomicOperation		m_operation;
823 	const bool					m_useTransfer;
824 	const ShaderReadType		m_readType;
825 	const ImageBackingType		m_backingType;
826 	const glu::GLSLVersion		m_glslVersion;
827 };
828 
BinaryAtomicIntermValuesCase(TestContext & testCtx,const string & name,const string & description,const ImageType imageType,const tcu::UVec3 & imageSize,const TextureFormat & format,const VkImageTiling tiling,const AtomicOperation operation,const bool useTransfer,const ShaderReadType shaderReadType,const ImageBackingType backingType,const glu::GLSLVersion glslVersion)829 BinaryAtomicIntermValuesCase::BinaryAtomicIntermValuesCase (TestContext&			testCtx,
830 															const string&			name,
831 															const string&			description,
832 															const ImageType			imageType,
833 															const tcu::UVec3&		imageSize,
834 															const TextureFormat&	format,
835 															const VkImageTiling		tiling,
836 															const AtomicOperation	operation,
837 															const bool				useTransfer,
838 															const ShaderReadType	shaderReadType,
839 															const ImageBackingType	backingType,
840 															const glu::GLSLVersion	glslVersion)
841 	: TestCase		(testCtx, name, description)
842 	, m_imageType	(imageType)
843 	, m_imageSize	(imageSize)
844 	, m_format		(format)
845 	, m_tiling		(tiling)
846 	, m_operation	(operation)
847 	, m_useTransfer	(useTransfer)
848 	, m_readType	(shaderReadType)
849 	, m_backingType	(backingType)
850 	, m_glslVersion	(glslVersion)
851 {
852 }
853 
checkSupport(Context & context) const854 void BinaryAtomicIntermValuesCase::checkSupport (Context& context) const
855 {
856 	commonCheckSupport(context, m_format, m_tiling, m_imageType, m_imageSize, m_operation, m_useTransfer, m_readType, m_backingType);
857 }
858 
initPrograms(SourceCollections & sourceCollections) const859 void BinaryAtomicIntermValuesCase::initPrograms (SourceCollections& sourceCollections) const
860 {
861 	const VkFormat	imageFormat		= mapTextureFormat(m_format);
862 	const deUint32	componentWidth	= getFormatComponentWidth(imageFormat, 0);
863 	const bool		intFormat		= isIntFormat(imageFormat);
864 	const bool		uintFormat		= isUintFormat(imageFormat);
865 	const bool		floatFormat		= isFloatFormat(imageFormat);
866 	const string	type			= getComponentTypeStr(componentWidth, intFormat, uintFormat, floatFormat);
867 	const string	vec4Type		= getVec4TypeStr(componentWidth, intFormat, uintFormat, floatFormat);
868 
869 	AddFillReadShader(sourceCollections, m_imageType, m_format, type, vec4Type);
870 
871 	if (isSpirvAtomicOperation(m_operation))
872 	{
873 		const CaseVariant					caseVariant{m_imageType, m_format.order, m_format.type, CaseVariant::CHECK_TYPE_INTERMEDIATE_RESULTS};
874 		const tcu::StringTemplate			shaderTemplate{getSpirvAtomicOpShader(caseVariant)};
875 		std::map<std::string, std::string>	specializations;
876 
877 		specializations["OPNAME"] = getSpirvAtomicOpName(m_operation);
878 		if (isSpirvAtomicNoLastArgOp(m_operation))
879 			specializations["LASTARG"] = "";
880 
881 		sourceCollections.spirvAsmSources.add(m_name) << shaderTemplate.specialize(specializations);
882 	}
883 	else
884 	{
885 		const string	versionDecl				= glu::getGLSLVersionDeclaration(m_glslVersion);
886 		const UVec3		gridSize				= getShaderGridSize(m_imageType, m_imageSize);
887 		const string	atomicCoord				= getCoordStr(m_imageType, "gx % " + toString(gridSize.x()), "gy", "gz");
888 		const string	invocationCoord			= getCoordStr(m_imageType, "gx", "gy", "gz");
889 		const string	atomicArgExpr			= type + getAtomicFuncArgumentShaderStr(m_operation,
890 																						"gx", "gy", "gz",
891 																						IVec3(NUM_INVOCATIONS_PER_PIXEL*gridSize.x(), gridSize.y(), gridSize.z()));
892 
893 		const string	compareExchangeStr		= (m_operation == ATOMIC_OPERATION_COMPARE_EXCHANGE) ?
894 												  (componentWidth == 64 ? ", 820338753304" : ", 18") + string(uintFormat ? "u" : "") + string(componentWidth == 64 ? "l" : "") :
895 												  "";
896 		const string	atomicInvocation		= getAtomicOperationShaderFuncName(m_operation) +
897 												"(u_resultImage, " + atomicCoord + compareExchangeStr + ", " + atomicArgExpr + ")";
898 		const string	shaderImageFormatStr	= getShaderImageFormatQualifier(m_format);
899 		const string	shaderImageTypeStr		= getShaderImageType(m_format, m_imageType);
900 		const string	extensions				= "#extension GL_EXT_shader_atomic_float : enable\n"
901 												  "#extension GL_EXT_shader_atomic_float2 : enable\n"
902 												  "#extension GL_KHR_memory_scope_semantics : enable";
903 
904 		string source = versionDecl + "\n" + extensions + "\n"
905 						"\n";
906 
907 		if (64 == componentWidth)
908 		{
909 			source +=	"#extension GL_EXT_shader_explicit_arithmetic_types_int64 : require\n"
910 						"#extension GL_EXT_shader_image_int64 : require\n";
911 		}
912 
913 			source +=	"precision highp " + shaderImageTypeStr + "; \n"
914 						"layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
915 						"layout (" + shaderImageFormatStr + ", binding=0) coherent uniform " + shaderImageTypeStr + " u_resultImage;\n"
916 						"layout (" + shaderImageFormatStr + ", binding=1) writeonly uniform " + shaderImageTypeStr + " u_intermValuesImage;\n"
917 						"\n"
918 						"void main (void)\n"
919 						"{\n"
920 						"	int gx = int(gl_GlobalInvocationID.x);\n"
921 						"	int gy = int(gl_GlobalInvocationID.y);\n"
922 						"	int gz = int(gl_GlobalInvocationID.z);\n"
923 						"	imageStore(u_intermValuesImage, " + invocationCoord + ", " + vec4Type + "(" + atomicInvocation + "));\n"
924 						"}\n";
925 
926 		sourceCollections.glslSources.add(m_name) << glu::ComputeSource(source.c_str());
927 	}
928 }
929 
930 class BinaryAtomicInstanceBase : public vkt::TestInstance
931 {
932 public:
933 
934 								BinaryAtomicInstanceBase (Context&						context,
935 														  const string&					name,
936 														  const ImageType				imageType,
937 														  const tcu::UVec3&				imageSize,
938 														  const TextureFormat&			format,
939 														  const VkImageTiling			tiling,
940 														  const AtomicOperation			operation,
941 														  const bool					useTransfer,
942 														  const ShaderReadType			shaderReadType,
943 														  const ImageBackingType		backingType);
944 
945 	tcu::TestStatus				iterate					 (void);
946 
947 	virtual deUint32			getOutputBufferSize		 (void) const = 0;
948 
949 	virtual void				prepareResources		 (const bool					useTransfer) = 0;
950 	virtual void				prepareDescriptors		 (const bool					isTexelBuffer) = 0;
951 
952 	virtual void				commandsBeforeCompute	 (const VkCommandBuffer			cmdBuffer) const = 0;
953 	virtual void				commandsAfterCompute	 (const VkCommandBuffer			cmdBuffer,
954 														  const VkPipeline				pipeline,
955 														  const VkPipelineLayout		pipelineLayout,
956 														   const VkDescriptorSet		descriptorSet,
957 														  const VkDeviceSize&			range,
958 														  const bool					useTransfer) = 0;
959 
960 	virtual bool				verifyResult			 (Allocation&					outputBufferAllocation,
961 														  const bool					is64Bit) const = 0;
962 
963 protected:
964 
965 	void						shaderFillImage			 (const VkCommandBuffer			cmdBuffer,
966 														  const VkBuffer&				buffer,
967 														  const VkPipeline				pipeline,
968 														  const VkPipelineLayout		pipelineLayout,
969 														  const VkDescriptorSet			descriptorSet,
970 														  const VkDeviceSize&			range,
971 														  const tcu::UVec3&				gridSize);
972 
973 	void						createImageAndView		(VkFormat						imageFormat,
974 														 const tcu::UVec3&				imageExent,
975 														 bool							useTransfer,
976 														 de::MovePtr<Image>&			imagePtr,
977 														 Move<VkImageView>&				imageViewPtr);
978 
979 	void						createImageResources	(const VkFormat&				imageFormat,
980 														 const bool						useTransfer);
981 
982 	const string					m_name;
983 	const ImageType					m_imageType;
984 	const tcu::UVec3				m_imageSize;
985 	const TextureFormat				m_format;
986 	const VkImageTiling				m_tiling;
987 	const AtomicOperation			m_operation;
988 	const bool						m_useTransfer;
989 	const ShaderReadType			m_readType;
990 	const ImageBackingType			m_backingType;
991 
992 	de::MovePtr<BufferWithMemory>	m_inputBuffer;
993 	de::MovePtr<BufferWithMemory>	m_outputBuffer;
994 	Move<VkBufferView>				m_descResultBufferView;
995 	Move<VkBufferView>				m_descIntermResultsBufferView;
996 	Move<VkDescriptorPool>			m_descriptorPool;
997 	Move<VkDescriptorSetLayout>		m_descriptorSetLayout;
998 	Move<VkDescriptorSet>			m_descriptorSet;
999 
1000 	Move<VkDescriptorSetLayout>		m_descriptorSetLayoutNoTransfer;
1001 	Move<VkDescriptorPool>			m_descriptorPoolNoTransfer;
1002 
1003 	de::MovePtr<Image>				m_resultImage;
1004 	Move<VkImageView>				m_resultImageView;
1005 
1006 	std::vector<VkSemaphore>		m_waitSemaphores;
1007 };
1008 
BinaryAtomicInstanceBase(Context & context,const string & name,const ImageType imageType,const tcu::UVec3 & imageSize,const TextureFormat & format,const VkImageTiling tiling,const AtomicOperation operation,const bool useTransfer,const ShaderReadType shaderReadType,const ImageBackingType backingType)1009 BinaryAtomicInstanceBase::BinaryAtomicInstanceBase (Context&				context,
1010 													const string&			name,
1011 													const ImageType			imageType,
1012 													const tcu::UVec3&		imageSize,
1013 													const TextureFormat&	format,
1014 													const VkImageTiling		tiling,
1015 													const AtomicOperation	operation,
1016 													const bool				useTransfer,
1017 													const ShaderReadType	shaderReadType,
1018 													const ImageBackingType	backingType)
1019 	: vkt::TestInstance	(context)
1020 	, m_name			(name)
1021 	, m_imageType		(imageType)
1022 	, m_imageSize		(imageSize)
1023 	, m_format			(format)
1024 	, m_tiling			(tiling)
1025 	, m_operation		(operation)
1026 	, m_useTransfer		(useTransfer)
1027 	, m_readType		(shaderReadType)
1028 	, m_backingType		(backingType)
1029 {
1030 }
1031 
iterate(void)1032 tcu::TestStatus	BinaryAtomicInstanceBase::iterate (void)
1033 {
1034 	const VkDevice			device				= m_context.getDevice();
1035 	const DeviceInterface&	deviceInterface		= m_context.getDeviceInterface();
1036 	const VkQueue			queue				= m_context.getUniversalQueue();
1037 	const deUint32			queueFamilyIndex	= m_context.getUniversalQueueFamilyIndex();
1038 	Allocator&				allocator			= m_context.getDefaultAllocator();
1039 	const VkDeviceSize		imageSizeInBytes	= tcu::getPixelSize(m_format) * getNumPixels(m_imageType, m_imageSize);
1040 	const VkDeviceSize		outBuffSizeInBytes	= getOutputBufferSize();
1041 	const VkFormat			imageFormat			= mapTextureFormat(m_format);
1042 	const bool				isTexelBuffer		= (m_imageType == IMAGE_TYPE_BUFFER);
1043 
1044 	if (!isTexelBuffer)
1045 	{
1046 		createImageResources(imageFormat, m_useTransfer);
1047 	}
1048 
1049 	tcu::UVec3				gridSize			= getShaderGridSize(m_imageType, m_imageSize);
1050 
1051 	//Prepare the buffer with the initial data for the image
1052 	m_inputBuffer = de::MovePtr<BufferWithMemory>(new BufferWithMemory(deviceInterface,
1053 													device,
1054 													allocator,
1055 													makeBufferCreateInfo(imageSizeInBytes,
1056 																		 VK_BUFFER_USAGE_TRANSFER_SRC_BIT |
1057 																		 VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
1058 																		 (isTexelBuffer ? VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT : static_cast<VkBufferUsageFlagBits>(0u))),
1059 													MemoryRequirement::HostVisible));
1060 
1061 	// Fill in buffer with initial data used for image.
1062 	initDataForImage(device, deviceInterface, m_format, m_operation, gridSize, *m_inputBuffer);
1063 
1064 	// Create a buffer to store shader output copied from result image
1065 	m_outputBuffer = de::MovePtr<BufferWithMemory>(new BufferWithMemory(deviceInterface,
1066 													device,
1067 													allocator,
1068 													makeBufferCreateInfo(outBuffSizeInBytes,
1069 																		 VK_BUFFER_USAGE_TRANSFER_DST_BIT |
1070 																		 VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
1071 																		 (isTexelBuffer ? VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT : static_cast<VkBufferUsageFlagBits>(0u))),
1072 													MemoryRequirement::HostVisible));
1073 
1074 	if (!isTexelBuffer)
1075 	{
1076 		prepareResources(m_useTransfer);
1077 	}
1078 
1079 	prepareDescriptors(isTexelBuffer);
1080 
1081 	Move<VkDescriptorSet>	descriptorSetFillImage;
1082 	Move<VkShaderModule>	shaderModuleFillImage;
1083 	Move<VkPipelineLayout>	pipelineLayoutFillImage;
1084 	Move<VkPipeline>		pipelineFillImage;
1085 
1086 	Move<VkDescriptorSet>	descriptorSetReadImage;
1087 	Move<VkShaderModule>	shaderModuleReadImage;
1088 	Move<VkPipelineLayout>	pipelineLayoutReadImage;
1089 	Move<VkPipeline>		pipelineReadImage;
1090 
1091 	if (!m_useTransfer)
1092 	{
1093 		m_descriptorSetLayoutNoTransfer =
1094 			DescriptorSetLayoutBuilder()
1095 			.addSingleBinding((isTexelBuffer ? VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER : VK_DESCRIPTOR_TYPE_STORAGE_IMAGE), VK_SHADER_STAGE_COMPUTE_BIT)
1096 			.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
1097 			.build(deviceInterface, device);
1098 
1099 		m_descriptorPoolNoTransfer =
1100 			DescriptorPoolBuilder()
1101 			.addType((isTexelBuffer ? VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER : VK_DESCRIPTOR_TYPE_STORAGE_IMAGE), 2)
1102 			.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 2)
1103 			.build(deviceInterface, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 2u);
1104 
1105 		descriptorSetFillImage = makeDescriptorSet(deviceInterface,
1106 			device,
1107 			*m_descriptorPoolNoTransfer,
1108 			*m_descriptorSetLayoutNoTransfer);
1109 
1110 		descriptorSetReadImage = makeDescriptorSet(deviceInterface,
1111 			device,
1112 			*m_descriptorPoolNoTransfer,
1113 			*m_descriptorSetLayoutNoTransfer);
1114 
1115 		shaderModuleFillImage	= createShaderModule(deviceInterface, device, m_context.getBinaryCollection().get("fillShader"), 0);
1116 		pipelineLayoutFillImage	= makePipelineLayout(deviceInterface, device, *m_descriptorSetLayoutNoTransfer);
1117 		pipelineFillImage		= makeComputePipeline(deviceInterface, device, *pipelineLayoutFillImage, *shaderModuleFillImage);
1118 
1119 		if (m_readType == ShaderReadType::SPARSE)
1120 		{
1121 			shaderModuleReadImage = createShaderModule(deviceInterface, device, m_context.getBinaryCollection().get("readShaderResidency"), 0);
1122 		}
1123 		else
1124 		{
1125 			shaderModuleReadImage = createShaderModule(deviceInterface, device, m_context.getBinaryCollection().get("readShader"), 0);
1126 		}
1127 		pipelineLayoutReadImage = makePipelineLayout(deviceInterface, device, *m_descriptorSetLayoutNoTransfer);
1128 		pipelineReadImage		= makeComputePipeline(deviceInterface, device, *pipelineLayoutFillImage, *shaderModuleReadImage);
1129 	}
1130 
1131 	// Create pipeline
1132 	const Unique<VkShaderModule>	shaderModule(createShaderModule(deviceInterface, device, m_context.getBinaryCollection().get(m_name), 0));
1133 	const Unique<VkPipelineLayout>	pipelineLayout(makePipelineLayout(deviceInterface, device, *m_descriptorSetLayout));
1134 	const Unique<VkPipeline>		pipeline(makeComputePipeline(deviceInterface, device, *pipelineLayout, *shaderModule));
1135 
1136 	// Create command buffer
1137 	const Unique<VkCommandPool>		cmdPool(createCommandPool(deviceInterface, device, VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, queueFamilyIndex));
1138 	const Unique<VkCommandBuffer>	cmdBuffer(allocateCommandBuffer(deviceInterface, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
1139 
1140 	beginCommandBuffer(deviceInterface, *cmdBuffer);
1141 
1142 	if (!isTexelBuffer)
1143 	{
1144 		if (m_useTransfer)
1145 		{
1146 			const vector<VkBufferImageCopy>	bufferImageCopy(1, makeBufferImageCopy(makeExtent3D(getLayerSize(m_imageType, m_imageSize)), getNumLayers(m_imageType, m_imageSize)));
1147 			copyBufferToImage(deviceInterface,
1148 							  *cmdBuffer,
1149 							  *(*m_inputBuffer),
1150 							  imageSizeInBytes,
1151 							  bufferImageCopy,
1152 							  VK_IMAGE_ASPECT_COLOR_BIT,
1153 							  1,
1154 							  getNumLayers(m_imageType, m_imageSize), m_resultImage->get(), VK_IMAGE_LAYOUT_GENERAL, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT);
1155 		}
1156 		else
1157 		{
1158 			shaderFillImage(*cmdBuffer, *(*m_inputBuffer), *pipelineFillImage, *pipelineLayoutFillImage, *descriptorSetFillImage, imageSizeInBytes, gridSize);
1159 		}
1160 		commandsBeforeCompute(*cmdBuffer);
1161 	}
1162 
1163 	deviceInterface.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
1164 	deviceInterface.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &m_descriptorSet.get(), 0u, DE_NULL);
1165 
1166 	deviceInterface.cmdDispatch(*cmdBuffer, NUM_INVOCATIONS_PER_PIXEL * gridSize.x(), gridSize.y(), gridSize.z());
1167 
1168 	commandsAfterCompute(*cmdBuffer,
1169 						 *pipelineReadImage,
1170 						 *pipelineLayoutReadImage,
1171 						 *descriptorSetReadImage,
1172 						 outBuffSizeInBytes,
1173 						 m_useTransfer);
1174 
1175 	const VkBufferMemoryBarrier	outputBufferPreHostReadBarrier
1176 		= makeBufferMemoryBarrier(((m_useTransfer || isTexelBuffer) ? VK_ACCESS_TRANSFER_WRITE_BIT : VK_ACCESS_SHADER_WRITE_BIT),
1177 								  VK_ACCESS_HOST_READ_BIT,
1178 								  m_outputBuffer->get(),
1179 								  0ull,
1180 								  outBuffSizeInBytes);
1181 
1182 	deviceInterface.cmdPipelineBarrier(*cmdBuffer,
1183 									   ((m_useTransfer || isTexelBuffer) ? VK_PIPELINE_STAGE_TRANSFER_BIT : VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT),
1184 									   VK_PIPELINE_STAGE_HOST_BIT,
1185 									   DE_FALSE, 0u, DE_NULL,
1186 									   1u, &outputBufferPreHostReadBarrier, 0u, DE_NULL);
1187 
1188 	endCommandBuffer(deviceInterface, *cmdBuffer);
1189 
1190 	std::vector<VkPipelineStageFlags> waitStages(m_waitSemaphores.size(), VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT);
1191 	submitCommandsAndWait(deviceInterface, device, queue, *cmdBuffer, false, 1u,
1192 		static_cast<deUint32>(m_waitSemaphores.size()), de::dataOrNull(m_waitSemaphores), de::dataOrNull(waitStages));
1193 
1194 	Allocation& outputBufferAllocation = m_outputBuffer->getAllocation();
1195 
1196 	invalidateAlloc(deviceInterface, device, outputBufferAllocation);
1197 
1198 	if (verifyResult(outputBufferAllocation, (imageFormat == VK_FORMAT_R64_UINT || imageFormat == VK_FORMAT_R64_SINT)))
1199 		return tcu::TestStatus::pass("Comparison succeeded");
1200 	else
1201 		return tcu::TestStatus::fail("Comparison failed");
1202 }
1203 
shaderFillImage(const VkCommandBuffer cmdBuffer,const VkBuffer & buffer,const VkPipeline pipeline,const VkPipelineLayout pipelineLayout,const VkDescriptorSet descriptorSet,const VkDeviceSize & range,const tcu::UVec3 & gridSize)1204 void BinaryAtomicInstanceBase::shaderFillImage (const VkCommandBuffer	cmdBuffer,
1205 												const VkBuffer&			buffer,
1206 												const VkPipeline		pipeline,
1207 												const VkPipelineLayout	pipelineLayout,
1208 												const VkDescriptorSet	descriptorSet,
1209 												const VkDeviceSize&		range,
1210 												const tcu::UVec3&		gridSize)
1211 {
1212 	const VkDevice					device					= m_context.getDevice();
1213 	const DeviceInterface&			deviceInterface			= m_context.getDeviceInterface();
1214 	const VkDescriptorImageInfo		descResultImageInfo		= makeDescriptorImageInfo(DE_NULL, *m_resultImageView, VK_IMAGE_LAYOUT_GENERAL);
1215 	const VkDescriptorBufferInfo	descResultBufferInfo	= makeDescriptorBufferInfo(buffer, 0, range);
1216 	const VkImageSubresourceRange	subresourceRange		= makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, getNumLayers(m_imageType, m_imageSize));
1217 
1218 	DescriptorSetUpdateBuilder()
1219 		.writeSingle(descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &descResultImageInfo)
1220 		.writeSingle(descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &descResultBufferInfo)
1221 		.update(deviceInterface, device);
1222 
1223 	const VkImageMemoryBarrier imageBarrierPre = makeImageMemoryBarrier(0,
1224 																		VK_ACCESS_SHADER_WRITE_BIT,
1225 																		VK_IMAGE_LAYOUT_UNDEFINED,
1226 																		VK_IMAGE_LAYOUT_GENERAL,
1227 																		m_resultImage->get(),
1228 																		subresourceRange);
1229 
1230 	deviceInterface.cmdPipelineBarrier(	cmdBuffer,
1231 										VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
1232 										VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
1233 										(VkDependencyFlags)0,
1234 										0, (const VkMemoryBarrier*)DE_NULL,
1235 										0, (const VkBufferMemoryBarrier*)DE_NULL,
1236 										1, &imageBarrierPre);
1237 
1238 	deviceInterface.cmdBindPipeline(cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
1239 	deviceInterface.cmdBindDescriptorSets(cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipelineLayout, 0u, 1u, &descriptorSet, 0u, DE_NULL);
1240 
1241 	deviceInterface.cmdDispatch(cmdBuffer, gridSize.x(), gridSize.y(), gridSize.z());
1242 
1243 	const VkImageMemoryBarrier imageBarrierPost = makeImageMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT,
1244 																		 VK_ACCESS_SHADER_READ_BIT,
1245 																		 VK_IMAGE_LAYOUT_GENERAL,
1246 																		 VK_IMAGE_LAYOUT_GENERAL,
1247 																		 m_resultImage->get(),
1248 																		 subresourceRange);
1249 
1250 	deviceInterface.cmdPipelineBarrier(	cmdBuffer,
1251 										VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
1252 										VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
1253 										(VkDependencyFlags)0,
1254 										0, (const VkMemoryBarrier*)DE_NULL,
1255 										0, (const VkBufferMemoryBarrier*)DE_NULL,
1256 										1, &imageBarrierPost);
1257 }
1258 
createImageAndView(VkFormat imageFormat,const tcu::UVec3 & imageExent,bool useTransfer,de::MovePtr<Image> & imagePtr,Move<VkImageView> & imageViewPtr)1259 void BinaryAtomicInstanceBase::createImageAndView	(VkFormat						imageFormat,
1260 													 const tcu::UVec3&				imageExent,
1261 													 bool							useTransfer,
1262 													 de::MovePtr<Image>&			imagePtr,
1263 													 Move<VkImageView>&				imageViewPtr)
1264 {
1265 	const VkDevice			device			= m_context.getDevice();
1266 	const DeviceInterface&	deviceInterface	= m_context.getDeviceInterface();
1267 	Allocator&				allocator		= m_context.getDefaultAllocator();
1268 	const VkImageUsageFlags	usageFlags		= getUsageFlags(useTransfer);
1269 	VkImageCreateFlags		createFlags		= 0u;
1270 
1271 	if (m_imageType == IMAGE_TYPE_CUBE || m_imageType == IMAGE_TYPE_CUBE_ARRAY)
1272 		createFlags |= VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT;
1273 
1274 	const auto numLayers = getNumLayers(m_imageType, m_imageSize);
1275 
1276 	VkImageCreateInfo createInfo =
1277 	{
1278 		VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,					// VkStructureType			sType;
1279 		DE_NULL,												// const void*				pNext;
1280 		createFlags,											// VkImageCreateFlags		flags;
1281 		mapImageType(m_imageType),								// VkImageType				imageType;
1282 		imageFormat,											// VkFormat					format;
1283 		makeExtent3D(imageExent),								// VkExtent3D				extent;
1284 		1u,														// deUint32					mipLevels;
1285 		numLayers,												// deUint32					arrayLayers;
1286 		VK_SAMPLE_COUNT_1_BIT,									// VkSampleCountFlagBits	samples;
1287 		m_tiling,												// VkImageTiling			tiling;
1288 		usageFlags,												// VkImageUsageFlags		usage;
1289 		VK_SHARING_MODE_EXCLUSIVE,								// VkSharingMode			sharingMode;
1290 		0u,														// deUint32					queueFamilyIndexCount;
1291 		DE_NULL,												// const deUint32*			pQueueFamilyIndices;
1292 		VK_IMAGE_LAYOUT_UNDEFINED,								// VkImageLayout			initialLayout;
1293 	};
1294 
1295 #ifndef CTS_USES_VULKANSC
1296 	if (m_backingType == ImageBackingType::SPARSE)
1297 	{
1298 		const auto&		vki				= m_context.getInstanceInterface();
1299 		const auto		physicalDevice	= m_context.getPhysicalDevice();
1300 		const auto		sparseQueue		= m_context.getSparseQueue();
1301 		const auto		sparseQueueIdx	= m_context.getSparseQueueFamilyIndex();
1302 		const auto		universalQIdx	= m_context.getUniversalQueueFamilyIndex();
1303 		const deUint32	queueIndices[]	= { universalQIdx, sparseQueueIdx };
1304 
1305 		createInfo.flags |= (VK_IMAGE_CREATE_SPARSE_BINDING_BIT | VK_IMAGE_CREATE_SPARSE_RESIDENCY_BIT);
1306 
1307 		if (sparseQueueIdx != universalQIdx)
1308 		{
1309 			createInfo.sharingMode				= VK_SHARING_MODE_CONCURRENT;
1310 			createInfo.queueFamilyIndexCount	= static_cast<deUint32>(DE_LENGTH_OF_ARRAY(queueIndices));
1311 			createInfo.pQueueFamilyIndices		= queueIndices;
1312 		}
1313 
1314 		const auto sparseImage = new SparseImage(deviceInterface, device, physicalDevice, vki, createInfo, sparseQueue, allocator, m_format);
1315 		m_waitSemaphores.push_back(sparseImage->getSemaphore());
1316 		imagePtr = de::MovePtr<Image>(sparseImage);
1317 	}
1318 	else
1319 #endif // CTS_USES_VULKANSC
1320 		imagePtr = de::MovePtr<Image>(new Image(deviceInterface, device, allocator, createInfo, MemoryRequirement::Any));
1321 
1322 	const VkImageSubresourceRange subresourceRange = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, numLayers);
1323 
1324 	imageViewPtr = makeImageView(deviceInterface, device, imagePtr->get(), mapImageViewType(m_imageType), imageFormat, subresourceRange);
1325 }
1326 
createImageResources(const VkFormat & imageFormat,const bool useTransfer)1327 void BinaryAtomicInstanceBase::createImageResources (const VkFormat&	imageFormat,
1328 													 const bool			useTransfer)
1329 {
1330 	//Create the image that is going to store results of atomic operations
1331 	createImageAndView(imageFormat, getLayerSize(m_imageType, m_imageSize), useTransfer, m_resultImage, m_resultImageView);
1332 }
1333 
1334 class BinaryAtomicEndResultInstance : public BinaryAtomicInstanceBase
1335 {
1336 public:
1337 
BinaryAtomicEndResultInstance(Context & context,const string & name,const ImageType imageType,const tcu::UVec3 & imageSize,const TextureFormat & format,const VkImageTiling tiling,const AtomicOperation operation,const bool useTransfer,const ShaderReadType shaderReadType,const ImageBackingType backingType)1338 						BinaryAtomicEndResultInstance  (Context&					context,
1339 														const string&				name,
1340 														const ImageType				imageType,
1341 														const tcu::UVec3&			imageSize,
1342 														const TextureFormat&		format,
1343 														const VkImageTiling			tiling,
1344 														const AtomicOperation		operation,
1345 														const bool					useTransfer,
1346 														const ShaderReadType		shaderReadType,
1347 														const ImageBackingType		backingType)
1348 							: BinaryAtomicInstanceBase(context, name, imageType, imageSize, format, tiling, operation, useTransfer, shaderReadType, backingType) {}
1349 
1350 	virtual deUint32	getOutputBufferSize			   (void) const;
1351 
prepareResources(const bool useTransfer)1352 	virtual void		prepareResources			   (const bool					useTransfer) { DE_UNREF(useTransfer); }
1353 	virtual void		prepareDescriptors			   (const bool					isTexelBuffer);
1354 
commandsBeforeCompute(const VkCommandBuffer) const1355 	virtual void		commandsBeforeCompute		   (const VkCommandBuffer) const {}
1356 	virtual void		commandsAfterCompute		   (const VkCommandBuffer		cmdBuffer,
1357 														const VkPipeline			pipeline,
1358 														const VkPipelineLayout		pipelineLayout,
1359 														const VkDescriptorSet		descriptorSet,
1360 														const VkDeviceSize&			range,
1361 														const bool					useTransfer);
1362 
1363 	virtual bool		verifyResult				   (Allocation&					outputBufferAllocation,
1364 														const bool					is64Bit) const;
1365 
1366 protected:
1367 
1368 	template <typename T>
1369 	bool				isValueCorrect				   (const T						resultValue,
1370 														deInt32						x,
1371 														deInt32						y,
1372 														deInt32						z,
1373 														const UVec3&				gridSize,
1374 														const IVec3					extendedGridSize) const;
1375 };
1376 
getOutputBufferSize(void) const1377 deUint32 BinaryAtomicEndResultInstance::getOutputBufferSize (void) const
1378 {
1379 	return tcu::getPixelSize(m_format) * getNumPixels(m_imageType, m_imageSize);
1380 }
1381 
prepareDescriptors(const bool isTexelBuffer)1382 void BinaryAtomicEndResultInstance::prepareDescriptors (const bool	isTexelBuffer)
1383 {
1384 	const VkDescriptorType	descriptorType	= isTexelBuffer ?
1385 											VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER :
1386 											VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
1387 	const VkDevice			device			= m_context.getDevice();
1388 	const DeviceInterface&	deviceInterface = m_context.getDeviceInterface();
1389 
1390 	m_descriptorSetLayout =
1391 		DescriptorSetLayoutBuilder()
1392 		.addSingleBinding(descriptorType, VK_SHADER_STAGE_COMPUTE_BIT)
1393 		.build(deviceInterface, device);
1394 
1395 	m_descriptorPool =
1396 		DescriptorPoolBuilder()
1397 		.addType(descriptorType)
1398 		.build(deviceInterface, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
1399 
1400 	m_descriptorSet = makeDescriptorSet(deviceInterface, device, *m_descriptorPool, *m_descriptorSetLayout);
1401 
1402 	if (isTexelBuffer)
1403 	{
1404 		m_descResultBufferView = makeBufferView(deviceInterface, device, *(*m_inputBuffer), mapTextureFormat(m_format), 0, VK_WHOLE_SIZE);
1405 
1406 		DescriptorSetUpdateBuilder()
1407 			.writeSingle(*m_descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), descriptorType, &(m_descResultBufferView.get()))
1408 			.update(deviceInterface, device);
1409 	}
1410 	else
1411 	{
1412 		const VkDescriptorImageInfo	descResultImageInfo = makeDescriptorImageInfo(DE_NULL, *m_resultImageView, VK_IMAGE_LAYOUT_GENERAL);
1413 
1414 		DescriptorSetUpdateBuilder()
1415 			.writeSingle(*m_descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), descriptorType, &descResultImageInfo)
1416 			.update(deviceInterface, device);
1417 	}
1418 }
1419 
commandsAfterCompute(const VkCommandBuffer cmdBuffer,const VkPipeline pipeline,const VkPipelineLayout pipelineLayout,const VkDescriptorSet descriptorSet,const VkDeviceSize & range,const bool useTransfer)1420 void BinaryAtomicEndResultInstance::commandsAfterCompute (const VkCommandBuffer		cmdBuffer,
1421 														  const VkPipeline			pipeline,
1422 														  const VkPipelineLayout	pipelineLayout,
1423 														  const VkDescriptorSet		descriptorSet,
1424 														  const VkDeviceSize&		range,
1425 														  const bool				useTransfer)
1426 {
1427 	const DeviceInterface&			deviceInterface		= m_context.getDeviceInterface();
1428 	const VkImageSubresourceRange	subresourceRange	= makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, getNumLayers(m_imageType, m_imageSize));
1429 	const UVec3						layerSize			= getLayerSize(m_imageType, m_imageSize);
1430 
1431 	if (m_imageType == IMAGE_TYPE_BUFFER)
1432 	{
1433 		m_outputBuffer = m_inputBuffer;
1434 	}
1435 	else if (useTransfer)
1436 	{
1437 		const VkImageMemoryBarrier	resultImagePostDispatchBarrier =
1438 			makeImageMemoryBarrier(	VK_ACCESS_SHADER_WRITE_BIT,
1439 									VK_ACCESS_TRANSFER_READ_BIT,
1440 									VK_IMAGE_LAYOUT_GENERAL,
1441 									VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
1442 									m_resultImage->get(),
1443 									subresourceRange);
1444 
1445 		deviceInterface.cmdPipelineBarrier(	cmdBuffer,
1446 											VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
1447 											VK_PIPELINE_STAGE_TRANSFER_BIT,
1448 											DE_FALSE, 0u, DE_NULL, 0u, DE_NULL,
1449 											1u, &resultImagePostDispatchBarrier);
1450 
1451 		const VkBufferImageCopy		bufferImageCopyParams = makeBufferImageCopy(makeExtent3D(layerSize), getNumLayers(m_imageType, m_imageSize));
1452 
1453 		deviceInterface.cmdCopyImageToBuffer(cmdBuffer, m_resultImage->get(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, m_outputBuffer->get(), 1u, &bufferImageCopyParams);
1454 	}
1455 	else
1456 	{
1457 		const VkDevice					device					= m_context.getDevice();
1458 		const VkDescriptorImageInfo		descResultImageInfo		= makeDescriptorImageInfo(DE_NULL, *m_resultImageView, VK_IMAGE_LAYOUT_GENERAL);
1459 		const VkDescriptorBufferInfo	descResultBufferInfo	= makeDescriptorBufferInfo(m_outputBuffer->get(), 0, range);
1460 
1461 		DescriptorSetUpdateBuilder()
1462 			.writeSingle(descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &descResultImageInfo)
1463 			.writeSingle(descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &descResultBufferInfo)
1464 			.update(deviceInterface, device);
1465 
1466 		const VkImageMemoryBarrier	resultImagePostDispatchBarrier =
1467 			makeImageMemoryBarrier(	VK_ACCESS_SHADER_WRITE_BIT,
1468 									VK_ACCESS_SHADER_READ_BIT,
1469 									VK_IMAGE_LAYOUT_GENERAL,
1470 									VK_IMAGE_LAYOUT_GENERAL,
1471 									m_resultImage->get(),
1472 									subresourceRange);
1473 
1474 		deviceInterface.cmdPipelineBarrier(	cmdBuffer,
1475 											VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
1476 											VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
1477 											DE_FALSE, 0u, DE_NULL, 0u, DE_NULL,
1478 											1u, &resultImagePostDispatchBarrier);
1479 
1480 		deviceInterface.cmdBindPipeline(cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
1481 		deviceInterface.cmdBindDescriptorSets(cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipelineLayout, 0u, 1u, &descriptorSet, 0u, DE_NULL);
1482 
1483 		switch (m_imageType)
1484 		{
1485 			case IMAGE_TYPE_1D_ARRAY:
1486 				deviceInterface.cmdDispatch(cmdBuffer, layerSize.x(), subresourceRange.layerCount, layerSize.z());
1487 				break;
1488 			case IMAGE_TYPE_2D_ARRAY:
1489 			case IMAGE_TYPE_CUBE:
1490 			case IMAGE_TYPE_CUBE_ARRAY:
1491 				deviceInterface.cmdDispatch(cmdBuffer, layerSize.x(), layerSize.y(), subresourceRange.layerCount);
1492 				break;
1493 			default:
1494 				deviceInterface.cmdDispatch(cmdBuffer, layerSize.x(), layerSize.y(), layerSize.z());
1495 				break;
1496 		}
1497 	}
1498 }
1499 
verifyResult(Allocation & outputBufferAllocation,const bool is64Bit) const1500 bool BinaryAtomicEndResultInstance::verifyResult (Allocation&	outputBufferAllocation,
1501 												  const bool	is64Bit) const
1502 {
1503 	const UVec3	gridSize			= getShaderGridSize(m_imageType, m_imageSize);
1504 	const IVec3 extendedGridSize	= IVec3(NUM_INVOCATIONS_PER_PIXEL*gridSize.x(), gridSize.y(), gridSize.z());
1505 
1506 	tcu::ConstPixelBufferAccess resultBuffer(m_format, gridSize.x(), gridSize.y(), gridSize.z(), outputBufferAllocation.getHostPtr());
1507 
1508 	for (deInt32 z = 0; z < resultBuffer.getDepth();  z++)
1509 	for (deInt32 y = 0; y < resultBuffer.getHeight(); y++)
1510 	for (deInt32 x = 0; x < resultBuffer.getWidth();  x++)
1511 	{
1512 		const void* resultValue = resultBuffer.getPixelPtr(x, y, z);
1513 		deInt32 floatToIntValue = 0;
1514 		bool isFloatValue = false;
1515 		if (isFloatFormat(mapTextureFormat(m_format)))
1516 		{
1517 			isFloatValue = true;
1518 			floatToIntValue = static_cast<deInt32>(*((float*)resultValue));
1519 		}
1520 
1521 		if (isOrderIndependentAtomicOperation(m_operation))
1522 		{
1523 			if (isUintFormat(mapTextureFormat(m_format)))
1524 			{
1525 				if(is64Bit)
1526 				{
1527 					if (!isValueCorrect<deUint64>(*((deUint64*)resultValue), x, y, z, gridSize, extendedGridSize))
1528 						return false;
1529 				}
1530 				else
1531 				{
1532 					if (!isValueCorrect<deUint32>(*((deUint32*)resultValue), x, y, z, gridSize, extendedGridSize))
1533 						return false;
1534 				}
1535 			}
1536 			else if (isIntFormat(mapTextureFormat(m_format)))
1537 			{
1538 				if (is64Bit)
1539 				{
1540 					if (!isValueCorrect<deInt64>(*((deInt64*)resultValue), x, y, z, gridSize, extendedGridSize))
1541 						return false;
1542 				}
1543 				else
1544 				{
1545 					if (!isValueCorrect<deInt32>(*((deInt32*)resultValue), x, y, z, gridSize, extendedGridSize))
1546 						return false;
1547 				}
1548 			}
1549 			else
1550 			{
1551 				// 32-bit floating point
1552 				if (!isValueCorrect<deInt32>(floatToIntValue, x, y, z, gridSize, extendedGridSize))
1553 					return false;
1554 			}
1555 		}
1556 		else if (m_operation == ATOMIC_OPERATION_EXCHANGE)
1557 		{
1558 			// Check if the end result equals one of the atomic args.
1559 			bool matchFound = false;
1560 
1561 			for (deInt32 i = 0; i < static_cast<deInt32>(NUM_INVOCATIONS_PER_PIXEL) && !matchFound; i++)
1562 			{
1563 				const IVec3 gid(x + i*gridSize.x(), y, z);
1564 				matchFound = is64Bit ?
1565 					(*((deInt64*)resultValue) == getAtomicFuncArgument<deInt64>(m_operation, gid, extendedGridSize)) :
1566 					isFloatValue ?
1567 					floatToIntValue == getAtomicFuncArgument<deInt32>(m_operation, gid, extendedGridSize) :
1568 					(*((deInt32*)resultValue) == getAtomicFuncArgument<deInt32>(m_operation, gid, extendedGridSize));
1569 
1570 			}
1571 
1572 			if (!matchFound)
1573 				return false;
1574 		}
1575 		else if (m_operation == ATOMIC_OPERATION_COMPARE_EXCHANGE)
1576 		{
1577 			// Check if the end result equals one of the atomic args.
1578 			bool matchFound = false;
1579 
1580 			for (deInt32 i = 0; i < static_cast<deInt32>(NUM_INVOCATIONS_PER_PIXEL) && !matchFound; i++)
1581 			{
1582 				const IVec3 gid(x + i*gridSize.x(), y, z);
1583 				matchFound = is64Bit ?
1584 					(*((deInt64*)resultValue) == getAtomicFuncArgument<deInt64>(m_operation, gid, extendedGridSize)) :
1585 					isFloatValue ?
1586 					floatToIntValue == getAtomicFuncArgument<deInt32>(m_operation, gid, extendedGridSize) :
1587 					(*((deInt32*)resultValue) == getAtomicFuncArgument<deInt32>(m_operation, gid, extendedGridSize));
1588 			}
1589 
1590 			if (!matchFound)
1591 				return false;
1592 		}
1593 		else
1594 			DE_ASSERT(false);
1595 	}
1596 	return true;
1597 }
1598 
1599 template <typename T>
isValueCorrect(const T resultValue,deInt32 x,deInt32 y,deInt32 z,const UVec3 & gridSize,const IVec3 extendedGridSize) const1600 bool BinaryAtomicEndResultInstance::isValueCorrect(const T resultValue, deInt32 x, deInt32 y, deInt32 z, const UVec3& gridSize, const IVec3 extendedGridSize) const
1601 {
1602 	T reference = getOperationInitialValue<T>(m_operation);
1603 	for (deInt32 i = 0; i < static_cast<deInt32>(NUM_INVOCATIONS_PER_PIXEL); i++)
1604 	{
1605 		const IVec3 gid(x + i*gridSize.x(), y, z);
1606 		T			arg = getAtomicFuncArgument<T>(m_operation, gid, extendedGridSize);
1607 		reference = computeBinaryAtomicOperationResult(m_operation, reference, arg);
1608 	}
1609 	return (resultValue == reference);
1610 }
1611 
createInstance(Context & context) const1612 TestInstance* BinaryAtomicEndResultCase::createInstance (Context& context) const
1613 {
1614 	return new BinaryAtomicEndResultInstance(context, m_name, m_imageType, m_imageSize, m_format, m_tiling, m_operation, m_useTransfer, m_readType, m_backingType);
1615 }
1616 
1617 class BinaryAtomicIntermValuesInstance : public BinaryAtomicInstanceBase
1618 {
1619 public:
1620 
BinaryAtomicIntermValuesInstance(Context & context,const string & name,const ImageType imageType,const tcu::UVec3 & imageSize,const TextureFormat & format,const VkImageTiling tiling,const AtomicOperation operation,const bool useTransfer,const ShaderReadType shaderReadType,const ImageBackingType backingType)1621 						BinaryAtomicIntermValuesInstance   (Context&				context,
1622 															const string&			name,
1623 															const ImageType			imageType,
1624 															const tcu::UVec3&		imageSize,
1625 															const TextureFormat&	format,
1626 															const VkImageTiling		tiling,
1627 															const AtomicOperation	operation,
1628 															const bool				useTransfer,
1629 															const ShaderReadType	shaderReadType,
1630 															const ImageBackingType	backingType)
1631 							: BinaryAtomicInstanceBase(context, name, imageType, imageSize, format, tiling, operation, useTransfer, shaderReadType, backingType) {}
1632 
1633 	virtual deUint32	getOutputBufferSize				   (void) const;
1634 
1635 	virtual void		prepareResources				   (const bool				useTransfer);
1636 	virtual void		prepareDescriptors				   (const bool				isTexelBuffer);
1637 
1638 	virtual void		commandsBeforeCompute			   (const VkCommandBuffer	cmdBuffer) const;
1639 	virtual void		commandsAfterCompute			   (const VkCommandBuffer	cmdBuffer,
1640 															const VkPipeline		pipeline,
1641 															const VkPipelineLayout	pipelineLayout,
1642 															const VkDescriptorSet	descriptorSet,
1643 															const VkDeviceSize&		range,
1644 															const bool				useTransfer);
1645 
1646 	virtual bool		verifyResult					   (Allocation&				outputBufferAllocation,
1647 															const bool				is64Bit) const;
1648 
1649 protected:
1650 
1651 	template <typename T>
1652 	bool				areValuesCorrect				   (tcu::ConstPixelBufferAccess& resultBuffer,
1653 															const bool isFloatingPoint,
1654 															deInt32 x,
1655 															deInt32 y,
1656 															deInt32 z,
1657 															const UVec3& gridSize,
1658 															const IVec3 extendedGridSize) const;
1659 
1660 	template <typename T>
1661 	bool				verifyRecursive					   (const deInt32			index,
1662 															const T					valueSoFar,
1663 															bool					argsUsed[NUM_INVOCATIONS_PER_PIXEL],
1664 															const T					atomicArgs[NUM_INVOCATIONS_PER_PIXEL],
1665 															const T					resultValues[NUM_INVOCATIONS_PER_PIXEL]) const;
1666 	de::MovePtr<Image>	m_intermResultsImage;
1667 	Move<VkImageView>	m_intermResultsImageView;
1668 };
1669 
getOutputBufferSize(void) const1670 deUint32 BinaryAtomicIntermValuesInstance::getOutputBufferSize (void) const
1671 {
1672 	return NUM_INVOCATIONS_PER_PIXEL * tcu::getPixelSize(m_format) * getNumPixels(m_imageType, m_imageSize);
1673 }
1674 
prepareResources(const bool useTransfer)1675 void BinaryAtomicIntermValuesInstance::prepareResources (const bool useTransfer)
1676 {
1677 	const UVec3 layerSize			= getLayerSize(m_imageType, m_imageSize);
1678 	const bool  isCubeBasedImage	= (m_imageType == IMAGE_TYPE_CUBE || m_imageType == IMAGE_TYPE_CUBE_ARRAY);
1679 	const UVec3 extendedLayerSize	= isCubeBasedImage	? UVec3(NUM_INVOCATIONS_PER_PIXEL * layerSize.x(), NUM_INVOCATIONS_PER_PIXEL * layerSize.y(), layerSize.z())
1680 														: UVec3(NUM_INVOCATIONS_PER_PIXEL * layerSize.x(), layerSize.y(), layerSize.z());
1681 
1682 	createImageAndView(mapTextureFormat(m_format), extendedLayerSize, useTransfer, m_intermResultsImage, m_intermResultsImageView);
1683 }
1684 
prepareDescriptors(const bool isTexelBuffer)1685 void BinaryAtomicIntermValuesInstance::prepareDescriptors (const bool	isTexelBuffer)
1686 {
1687 	const VkDescriptorType	descriptorType	= isTexelBuffer ?
1688 											VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER :
1689 											VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
1690 
1691 	const VkDevice			device			= m_context.getDevice();
1692 	const DeviceInterface&	deviceInterface = m_context.getDeviceInterface();
1693 
1694 	m_descriptorSetLayout =
1695 		DescriptorSetLayoutBuilder()
1696 		.addSingleBinding(descriptorType, VK_SHADER_STAGE_COMPUTE_BIT)
1697 		.addSingleBinding(descriptorType, VK_SHADER_STAGE_COMPUTE_BIT)
1698 		.build(deviceInterface, device);
1699 
1700 	m_descriptorPool =
1701 		DescriptorPoolBuilder()
1702 		.addType(descriptorType, 2u)
1703 		.build(deviceInterface, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
1704 
1705 	m_descriptorSet = makeDescriptorSet(deviceInterface, device, *m_descriptorPool, *m_descriptorSetLayout);
1706 
1707 	if (isTexelBuffer)
1708 	{
1709 		m_descResultBufferView			= makeBufferView(deviceInterface, device, *(*m_inputBuffer), mapTextureFormat(m_format), 0, VK_WHOLE_SIZE);
1710 		m_descIntermResultsBufferView	= makeBufferView(deviceInterface, device, *(*m_outputBuffer), mapTextureFormat(m_format), 0, VK_WHOLE_SIZE);
1711 
1712 		DescriptorSetUpdateBuilder()
1713 			.writeSingle(*m_descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), descriptorType, &(m_descResultBufferView.get()))
1714 			.writeSingle(*m_descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), descriptorType, &(m_descIntermResultsBufferView.get()))
1715 			.update(deviceInterface, device);
1716 	}
1717 	else
1718 	{
1719 		const VkDescriptorImageInfo	descResultImageInfo			= makeDescriptorImageInfo(DE_NULL, *m_resultImageView, VK_IMAGE_LAYOUT_GENERAL);
1720 		const VkDescriptorImageInfo	descIntermResultsImageInfo	= makeDescriptorImageInfo(DE_NULL, *m_intermResultsImageView, VK_IMAGE_LAYOUT_GENERAL);
1721 
1722 		DescriptorSetUpdateBuilder()
1723 			.writeSingle(*m_descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), descriptorType, &descResultImageInfo)
1724 			.writeSingle(*m_descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), descriptorType, &descIntermResultsImageInfo)
1725 			.update(deviceInterface, device);
1726 	}
1727 }
1728 
commandsBeforeCompute(const VkCommandBuffer cmdBuffer) const1729 void BinaryAtomicIntermValuesInstance::commandsBeforeCompute (const VkCommandBuffer cmdBuffer) const
1730 {
1731 	const DeviceInterface&			deviceInterface		= m_context.getDeviceInterface();
1732 	const VkImageSubresourceRange	subresourceRange	= makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, getNumLayers(m_imageType, m_imageSize));
1733 
1734 	const VkImageMemoryBarrier	imagePreDispatchBarrier =
1735 		makeImageMemoryBarrier(	0u,
1736 								VK_ACCESS_SHADER_WRITE_BIT,
1737 								VK_IMAGE_LAYOUT_UNDEFINED,
1738 								VK_IMAGE_LAYOUT_GENERAL,
1739 								m_intermResultsImage->get(),
1740 								subresourceRange);
1741 
1742 	deviceInterface.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, DE_FALSE, 0u, DE_NULL, 0u, DE_NULL, 1u, &imagePreDispatchBarrier);
1743 }
1744 
commandsAfterCompute(const VkCommandBuffer cmdBuffer,const VkPipeline pipeline,const VkPipelineLayout pipelineLayout,const VkDescriptorSet descriptorSet,const VkDeviceSize & range,const bool useTransfer)1745 void BinaryAtomicIntermValuesInstance::commandsAfterCompute (const VkCommandBuffer		cmdBuffer,
1746 															 const VkPipeline			pipeline,
1747 															 const VkPipelineLayout		pipelineLayout,
1748 															 const VkDescriptorSet		descriptorSet,
1749 															 const VkDeviceSize&		range,
1750 															 const bool					useTransfer)
1751 {
1752 	// nothing is needed for texel image buffer
1753 	if (m_imageType == IMAGE_TYPE_BUFFER)
1754 		return;
1755 
1756 	const DeviceInterface&			deviceInterface		= m_context.getDeviceInterface();
1757 	const VkImageSubresourceRange	subresourceRange	= makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, getNumLayers(m_imageType, m_imageSize));
1758 	const UVec3						layerSize			= getLayerSize(m_imageType, m_imageSize);
1759 
1760 	if (useTransfer)
1761 	{
1762 		const VkImageMemoryBarrier	imagePostDispatchBarrier =
1763 			makeImageMemoryBarrier(	VK_ACCESS_SHADER_WRITE_BIT,
1764 									VK_ACCESS_TRANSFER_READ_BIT,
1765 									VK_IMAGE_LAYOUT_GENERAL,
1766 									VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
1767 									m_intermResultsImage->get(),
1768 									subresourceRange);
1769 
1770 		deviceInterface.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, DE_FALSE, 0u, DE_NULL, 0u, DE_NULL, 1u, &imagePostDispatchBarrier);
1771 
1772 		const UVec3					extendedLayerSize		= UVec3(NUM_INVOCATIONS_PER_PIXEL * layerSize.x(), layerSize.y(), layerSize.z());
1773 		const VkBufferImageCopy		bufferImageCopyParams	= makeBufferImageCopy(makeExtent3D(extendedLayerSize), getNumLayers(m_imageType, m_imageSize));
1774 
1775 		deviceInterface.cmdCopyImageToBuffer(cmdBuffer, m_intermResultsImage->get(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, m_outputBuffer->get(), 1u, &bufferImageCopyParams);
1776 	}
1777 	else
1778 	{
1779 		const VkDevice					device					= m_context.getDevice();
1780 		const VkDescriptorImageInfo		descResultImageInfo		= makeDescriptorImageInfo(DE_NULL, *m_intermResultsImageView, VK_IMAGE_LAYOUT_GENERAL);
1781 		const VkDescriptorBufferInfo	descResultBufferInfo	= makeDescriptorBufferInfo(m_outputBuffer->get(), 0, range);
1782 
1783 		DescriptorSetUpdateBuilder()
1784 			.writeSingle(descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &descResultImageInfo)
1785 			.writeSingle(descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &descResultBufferInfo)
1786 			.update(deviceInterface, device);
1787 
1788 		const VkImageMemoryBarrier	resultImagePostDispatchBarrier =
1789 		makeImageMemoryBarrier(	VK_ACCESS_SHADER_WRITE_BIT,
1790 								VK_ACCESS_SHADER_READ_BIT,
1791 								VK_IMAGE_LAYOUT_GENERAL,
1792 								VK_IMAGE_LAYOUT_GENERAL,
1793 								m_intermResultsImage->get(),
1794 								subresourceRange);
1795 
1796 		deviceInterface.cmdPipelineBarrier(	cmdBuffer,
1797 									VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
1798 									VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
1799 									DE_FALSE, 0u, DE_NULL, 0u, DE_NULL,
1800 									1u, &resultImagePostDispatchBarrier);
1801 
1802 		deviceInterface.cmdBindPipeline(cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
1803 		deviceInterface.cmdBindDescriptorSets(cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipelineLayout, 0u, 1u, &descriptorSet, 0u, DE_NULL);
1804 
1805 		switch (m_imageType)
1806 		{
1807 			case IMAGE_TYPE_1D_ARRAY:
1808 				deviceInterface.cmdDispatch(cmdBuffer, NUM_INVOCATIONS_PER_PIXEL * layerSize.x(), subresourceRange.layerCount, layerSize.z());
1809 				break;
1810 			case IMAGE_TYPE_2D_ARRAY:
1811 			case IMAGE_TYPE_CUBE:
1812 			case IMAGE_TYPE_CUBE_ARRAY:
1813 				deviceInterface.cmdDispatch(cmdBuffer, NUM_INVOCATIONS_PER_PIXEL * layerSize.x(), layerSize.y(), subresourceRange.layerCount);
1814 				break;
1815 			default:
1816 				deviceInterface.cmdDispatch(cmdBuffer, NUM_INVOCATIONS_PER_PIXEL * layerSize.x(), layerSize.y(), layerSize.z());
1817 				break;
1818 		}
1819 	}
1820 }
1821 
verifyResult(Allocation & outputBufferAllocation,const bool is64Bit) const1822 bool BinaryAtomicIntermValuesInstance::verifyResult (Allocation&	outputBufferAllocation,
1823 													 const bool		is64Bit) const
1824 {
1825 	const UVec3	gridSize		 = getShaderGridSize(m_imageType, m_imageSize);
1826 	const IVec3 extendedGridSize = IVec3(NUM_INVOCATIONS_PER_PIXEL*gridSize.x(), gridSize.y(), gridSize.z());
1827 
1828 	tcu::ConstPixelBufferAccess resultBuffer(m_format, extendedGridSize.x(), extendedGridSize.y(), extendedGridSize.z(), outputBufferAllocation.getHostPtr());
1829 
1830 	for (deInt32 z = 0; z < resultBuffer.getDepth(); z++)
1831 	for (deInt32 y = 0; y < resultBuffer.getHeight(); y++)
1832 	for (deUint32 x = 0; x < gridSize.x(); x++)
1833 	{
1834 		if (isUintFormat(mapTextureFormat(m_format)))
1835 		{
1836 			if (is64Bit)
1837 			{
1838 				if (!areValuesCorrect<deUint64>(resultBuffer, false, x, y, z, gridSize, extendedGridSize))
1839 					return false;
1840 			}
1841 			else
1842 			{
1843 				if (!areValuesCorrect<deUint32>(resultBuffer, false, x, y, z, gridSize, extendedGridSize))
1844 					return false;
1845 			}
1846 		}
1847 		else if (isIntFormat(mapTextureFormat(m_format)))
1848 		{
1849 			if (is64Bit)
1850 			{
1851 				if (!areValuesCorrect<deInt64>(resultBuffer, false, x, y, z, gridSize, extendedGridSize))
1852 					return false;
1853 			}
1854 			else
1855 			{
1856 				if (!areValuesCorrect<deInt32>(resultBuffer, false, x, y, z, gridSize, extendedGridSize))
1857 					return false;
1858 			}
1859 		}
1860 		else
1861 		{
1862 			// 32-bit floating point
1863 			if (!areValuesCorrect<deInt32>(resultBuffer, true, x, y, z, gridSize, extendedGridSize))
1864 				return false;
1865 		}
1866 	}
1867 
1868 	return true;
1869 }
1870 
1871 template <typename T>
areValuesCorrect(tcu::ConstPixelBufferAccess & resultBuffer,const bool isFloatingPoint,deInt32 x,deInt32 y,deInt32 z,const UVec3 & gridSize,const IVec3 extendedGridSize) const1872 bool BinaryAtomicIntermValuesInstance::areValuesCorrect(tcu::ConstPixelBufferAccess& resultBuffer, const bool isFloatingPoint, deInt32 x, deInt32 y, deInt32 z, const UVec3& gridSize, const IVec3 extendedGridSize) const
1873 {
1874 	T		resultValues[NUM_INVOCATIONS_PER_PIXEL];
1875 	T		atomicArgs[NUM_INVOCATIONS_PER_PIXEL];
1876 	bool	argsUsed[NUM_INVOCATIONS_PER_PIXEL];
1877 
1878 	for (deInt32 i = 0; i < static_cast<deInt32>(NUM_INVOCATIONS_PER_PIXEL); i++)
1879 	{
1880 		IVec3 gid(x + i*gridSize.x(), y, z);
1881 		T data = *((T*)resultBuffer.getPixelPtr(gid.x(), gid.y(), gid.z()));
1882 		if (isFloatingPoint)
1883 		{
1884 			float fData;
1885 			deMemcpy(&fData, &data, sizeof(fData));
1886 			data = static_cast<T>(fData);
1887 		}
1888 		resultValues[i] = data;
1889 		atomicArgs[i]	= getAtomicFuncArgument<T>(m_operation, gid, extendedGridSize);
1890 		argsUsed[i]		= false;
1891 	}
1892 
1893 	// Verify that the return values form a valid sequence.
1894 	return verifyRecursive(0, getOperationInitialValue<T>(m_operation), argsUsed, atomicArgs, resultValues);
1895 }
1896 
1897 template <typename T>
verifyRecursive(const deInt32 index,const T valueSoFar,bool argsUsed[NUM_INVOCATIONS_PER_PIXEL],const T atomicArgs[NUM_INVOCATIONS_PER_PIXEL],const T resultValues[NUM_INVOCATIONS_PER_PIXEL]) const1898 bool BinaryAtomicIntermValuesInstance::verifyRecursive (const deInt32	index,
1899 														const T			valueSoFar,
1900 														bool			argsUsed[NUM_INVOCATIONS_PER_PIXEL],
1901 														const T			atomicArgs[NUM_INVOCATIONS_PER_PIXEL],
1902 														const T			resultValues[NUM_INVOCATIONS_PER_PIXEL]) const
1903 {
1904 	if (index >= static_cast<deInt32>(NUM_INVOCATIONS_PER_PIXEL))
1905 		return true;
1906 
1907 	for (deInt32 i = 0; i < static_cast<deInt32>(NUM_INVOCATIONS_PER_PIXEL); i++)
1908 	{
1909 		if (!argsUsed[i] && resultValues[i] == valueSoFar)
1910 		{
1911 			argsUsed[i] = true;
1912 
1913 			if (verifyRecursive(index + 1, computeBinaryAtomicOperationResult(m_operation, valueSoFar, atomicArgs[i]), argsUsed, atomicArgs, resultValues))
1914 			{
1915 				return true;
1916 			}
1917 
1918 			argsUsed[i] = false;
1919 		}
1920 	}
1921 
1922 	return false;
1923 }
1924 
createInstance(Context & context) const1925 TestInstance* BinaryAtomicIntermValuesCase::createInstance (Context& context) const
1926 {
1927 	return new BinaryAtomicIntermValuesInstance(context, m_name, m_imageType, m_imageSize, m_format, m_tiling, m_operation, m_useTransfer, m_readType, m_backingType);
1928 }
1929 
1930 } // anonymous ns
1931 
createImageAtomicOperationTests(tcu::TestContext & testCtx)1932 tcu::TestCaseGroup* createImageAtomicOperationTests (tcu::TestContext& testCtx)
1933 {
1934 	de::MovePtr<tcu::TestCaseGroup> imageAtomicOperationsTests(new tcu::TestCaseGroup(testCtx, "atomic_operations", "Atomic image operations cases"));
1935 
1936 	struct ImageParams
1937 	{
1938 		ImageParams(const ImageType imageType, const tcu::UVec3& imageSize)
1939 			: m_imageType	(imageType)
1940 			, m_imageSize	(imageSize)
1941 		{
1942 		}
1943 		const ImageType		m_imageType;
1944 		const tcu::UVec3	m_imageSize;
1945 	};
1946 
1947 	const ImageParams imageParamsArray[] =
1948 	{
1949 		ImageParams(IMAGE_TYPE_1D,			tcu::UVec3(64u, 1u, 1u)),
1950 		ImageParams(IMAGE_TYPE_1D_ARRAY,	tcu::UVec3(64u, 1u, 8u)),
1951 		ImageParams(IMAGE_TYPE_2D,			tcu::UVec3(64u, 64u, 1u)),
1952 		ImageParams(IMAGE_TYPE_2D_ARRAY,	tcu::UVec3(64u, 64u, 8u)),
1953 		ImageParams(IMAGE_TYPE_3D,			tcu::UVec3(48u, 48u, 8u)),
1954 		ImageParams(IMAGE_TYPE_CUBE,		tcu::UVec3(64u, 64u, 1u)),
1955 		ImageParams(IMAGE_TYPE_CUBE_ARRAY,	tcu::UVec3(64u, 64u, 2u)),
1956 		ImageParams(IMAGE_TYPE_BUFFER,		tcu::UVec3(64u, 1u, 1u))
1957 	};
1958 
1959 	const tcu::TextureFormat formats[] =
1960 	{
1961 		tcu::TextureFormat(tcu::TextureFormat::R, tcu::TextureFormat::UNSIGNED_INT32),
1962 		tcu::TextureFormat(tcu::TextureFormat::R, tcu::TextureFormat::SIGNED_INT32),
1963 		tcu::TextureFormat(tcu::TextureFormat::R, tcu::TextureFormat::FLOAT),
1964 		tcu::TextureFormat(tcu::TextureFormat::R, tcu::TextureFormat::UNSIGNED_INT64),
1965 		tcu::TextureFormat(tcu::TextureFormat::R, tcu::TextureFormat::SIGNED_INT64)
1966 	};
1967 
1968     static const VkImageTiling s_tilings[] = {
1969         VK_IMAGE_TILING_OPTIMAL,
1970         VK_IMAGE_TILING_LINEAR,
1971     };
1972 
1973 	const struct
1974 	{
1975 		ShaderReadType		type;
1976 		const char*			name;
1977 	} readTypes[] =
1978 	{
1979 		{	ShaderReadType::NORMAL,	"normal_read"	},
1980 #ifndef CTS_USES_VULKANSC
1981 		{	ShaderReadType::SPARSE,	"sparse_read"	},
1982 #endif // CTS_USES_VULKANSC
1983 	};
1984 
1985 	const struct
1986 	{
1987 		ImageBackingType	type;
1988 		const char*			name;
1989 	} backingTypes[] =
1990 	{
1991 		{	ImageBackingType::NORMAL,	"normal_img"	},
1992 #ifndef CTS_USES_VULKANSC
1993 		{	ImageBackingType::SPARSE,	"sparse_img"	},
1994 #endif // CTS_USES_VULKANSC
1995 	};
1996 
1997 	for (deUint32 operationI = 0; operationI < ATOMIC_OPERATION_LAST; operationI++)
1998 	{
1999 		const AtomicOperation operation = (AtomicOperation)operationI;
2000 
2001 		de::MovePtr<tcu::TestCaseGroup> operationGroup(new tcu::TestCaseGroup(testCtx, getAtomicOperationCaseName(operation).c_str(), ""));
2002 
2003 		for (deUint32 imageTypeNdx = 0; imageTypeNdx < DE_LENGTH_OF_ARRAY(imageParamsArray); imageTypeNdx++)
2004 		{
2005 			const ImageType	 imageType = imageParamsArray[imageTypeNdx].m_imageType;
2006 			const tcu::UVec3 imageSize = imageParamsArray[imageTypeNdx].m_imageSize;
2007 
2008 			de::MovePtr<tcu::TestCaseGroup> imageTypeGroup(new tcu::TestCaseGroup(testCtx, getImageTypeName(imageType).c_str(), ""));
2009 
2010 			for (int useTransferIdx = 0; useTransferIdx < 2; ++useTransferIdx)
2011 			{
2012 				const bool				useTransfer	= (useTransferIdx > 0);
2013 				const string			groupName	= (!useTransfer ? "no" : "") + string("transfer");
2014 
2015 				de::MovePtr<tcu::TestCaseGroup> transferGroup(new tcu::TestCaseGroup(testCtx, groupName.c_str(), ""));
2016 
2017 				for (int readTypeIdx = 0; readTypeIdx < DE_LENGTH_OF_ARRAY(readTypes); ++readTypeIdx)
2018 				{
2019 					const auto& readType = readTypes[readTypeIdx];
2020 
2021 					de::MovePtr<tcu::TestCaseGroup> readTypeGroup(new tcu::TestCaseGroup(testCtx, readType.name, ""));
2022 
2023 					for (int backingTypeIdx = 0; backingTypeIdx < DE_LENGTH_OF_ARRAY(backingTypes); ++backingTypeIdx)
2024 					{
2025 						const auto& backingType = backingTypes[backingTypeIdx];
2026 
2027 						de::MovePtr<tcu::TestCaseGroup> backingTypeGroup(new tcu::TestCaseGroup(testCtx, backingType.name, ""));
2028 
2029 						for (deUint32 formatNdx = 0; formatNdx < DE_LENGTH_OF_ARRAY(formats); formatNdx++)
2030 						{
2031 							for (int tilingNdx = 0; tilingNdx < DE_LENGTH_OF_ARRAY(s_tilings); tilingNdx++)
2032 							{
2033 								const TextureFormat&	format		= formats[formatNdx];
2034 								const std::string		formatName	= getShaderImageFormatQualifier(format);
2035 								const char* suffix = (s_tilings[tilingNdx] == VK_IMAGE_TILING_OPTIMAL) ? "" : "_linear";
2036 
2037 								// Need SPIRV programs in vktImageAtomicSpirvShaders.cpp
2038 								if (imageType == IMAGE_TYPE_BUFFER && (format.type != tcu::TextureFormat::FLOAT))
2039 								{
2040 									continue;
2041 								}
2042 
2043 								// Only 2D and 3D images may support sparse residency.
2044 								// VK_IMAGE_TILING_LINEAR does not support sparse residency
2045 								const auto vkImageType = mapImageType(imageType);
2046 								if (backingType.type == ImageBackingType::SPARSE && ((vkImageType != VK_IMAGE_TYPE_2D && vkImageType != VK_IMAGE_TYPE_3D) || (s_tilings[tilingNdx] == VK_IMAGE_TILING_LINEAR)))
2047 									continue;
2048 
2049 								// Only some operations are supported on floating-point
2050 								if (format.type == tcu::TextureFormat::FLOAT)
2051 								{
2052 									if (operation != ATOMIC_OPERATION_ADD &&
2053 #ifndef CTS_USES_VULKANSC
2054 										operation != ATOMIC_OPERATION_MIN &&
2055 										operation != ATOMIC_OPERATION_MAX &&
2056 #endif // CTS_USES_VULKANSC
2057 										operation != ATOMIC_OPERATION_EXCHANGE)
2058 									{
2059 										continue;
2060 									}
2061 								}
2062 
2063 								if (readType.type == ShaderReadType::SPARSE)
2064 								{
2065 									// When using transfer, shader reads will not be used, so avoid creating two identical cases.
2066 									if (useTransfer)
2067 										continue;
2068 
2069 									// Sparse reads are not supported for all types of images.
2070 									if (imageType == IMAGE_TYPE_1D || imageType == IMAGE_TYPE_1D_ARRAY || imageType == IMAGE_TYPE_BUFFER)
2071 										continue;
2072 								}
2073 
2074 								//!< Atomic case checks the end result of the operations, and not the intermediate return values
2075 								const string caseEndResult = formatName + "_end_result" + suffix;
2076 								backingTypeGroup->addChild(new BinaryAtomicEndResultCase(testCtx, caseEndResult, "", imageType, imageSize, format, s_tilings[tilingNdx], operation, useTransfer, readType.type, backingType.type, glu::GLSL_VERSION_450));
2077 
2078 								//!< Atomic case checks the return values of the atomic function and not the end result.
2079 								const string caseIntermValues = formatName + "_intermediate_values" + suffix;
2080 								backingTypeGroup->addChild(new BinaryAtomicIntermValuesCase(testCtx, caseIntermValues, "", imageType, imageSize, format, s_tilings[tilingNdx], operation, useTransfer, readType.type, backingType.type, glu::GLSL_VERSION_450));
2081 							}
2082 						}
2083 
2084 						readTypeGroup->addChild(backingTypeGroup.release());
2085 					}
2086 
2087 					transferGroup->addChild(readTypeGroup.release());
2088 				}
2089 
2090 				imageTypeGroup->addChild(transferGroup.release());
2091 			}
2092 
2093 			operationGroup->addChild(imageTypeGroup.release());
2094 		}
2095 
2096 		imageAtomicOperationsTests->addChild(operationGroup.release());
2097 	}
2098 
2099 	return imageAtomicOperationsTests.release();
2100 }
2101 
2102 } // image
2103 } // vkt
2104