• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2016 The Khronos Group Inc.
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  *      http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  *
19  *//*!
20  * \file  vktImageAtomicOperationTests.cpp
21  * \brief Image atomic operation tests
22  *//*--------------------------------------------------------------------*/
23 
24 #include "vktImageAtomicOperationTests.hpp"
25 #include "vktImageAtomicSpirvShaders.hpp"
26 
27 #include "deUniquePtr.hpp"
28 #include "deStringUtil.hpp"
29 #include "deSTLUtil.hpp"
30 
31 #include "vktTestCaseUtil.hpp"
32 #include "vkPrograms.hpp"
33 #include "vkImageUtil.hpp"
34 #include "vkQueryUtil.hpp"
35 #include "vkBarrierUtil.hpp"
36 #include "vktImageTestsUtil.hpp"
37 #include "vkBuilderUtil.hpp"
38 #include "vkRef.hpp"
39 #include "vkRefUtil.hpp"
40 #include "vkTypeUtil.hpp"
41 #include "vkCmdUtil.hpp"
42 #include "vkObjUtil.hpp"
43 
44 #include "tcuTextureUtil.hpp"
45 #include "tcuTexture.hpp"
46 #include "tcuVectorType.hpp"
47 #include "tcuStringTemplate.hpp"
48 
49 namespace vkt
50 {
51 namespace image
52 {
53 namespace
54 {
55 
56 using namespace vk;
57 using namespace std;
58 using de::toString;
59 
60 using tcu::TextureFormat;
61 using tcu::IVec2;
62 using tcu::IVec3;
63 using tcu::UVec3;
64 using tcu::Vec4;
65 using tcu::IVec4;
66 using tcu::UVec4;
67 using tcu::CubeFace;
68 using tcu::Texture1D;
69 using tcu::Texture2D;
70 using tcu::Texture3D;
71 using tcu::Texture2DArray;
72 using tcu::TextureCube;
73 using tcu::PixelBufferAccess;
74 using tcu::ConstPixelBufferAccess;
75 using tcu::Vector;
76 using tcu::TestContext;
77 
78 enum
79 {
80 	NUM_INVOCATIONS_PER_PIXEL = 5u
81 };
82 
83 enum AtomicOperation
84 {
85 	ATOMIC_OPERATION_ADD = 0,
86 	ATOMIC_OPERATION_SUB,
87 	ATOMIC_OPERATION_INC,
88 	ATOMIC_OPERATION_DEC,
89 	ATOMIC_OPERATION_MIN,
90 	ATOMIC_OPERATION_MAX,
91 	ATOMIC_OPERATION_AND,
92 	ATOMIC_OPERATION_OR,
93 	ATOMIC_OPERATION_XOR,
94 	ATOMIC_OPERATION_EXCHANGE,
95 	ATOMIC_OPERATION_COMPARE_EXCHANGE,
96 
97 	ATOMIC_OPERATION_LAST
98 };
99 
100 enum class ShaderReadType
101 {
102 	NORMAL = 0,
103 	SPARSE,
104 };
105 
106 enum class ImageBackingType
107 {
108 	NORMAL = 0,
109 	SPARSE,
110 };
111 
getCoordStr(const ImageType imageType,const std::string & x,const std::string & y,const std::string & z)112 static string getCoordStr (const ImageType		imageType,
113 						   const std::string&	x,
114 						   const std::string&	y,
115 						   const std::string&	z)
116 {
117 	switch (imageType)
118 	{
119 		case IMAGE_TYPE_1D:
120 		case IMAGE_TYPE_BUFFER:
121 			return x;
122 		case IMAGE_TYPE_1D_ARRAY:
123 		case IMAGE_TYPE_2D:
124 			return string("ivec2(" + x + "," + y + ")");
125 		case IMAGE_TYPE_2D_ARRAY:
126 		case IMAGE_TYPE_3D:
127 		case IMAGE_TYPE_CUBE:
128 		case IMAGE_TYPE_CUBE_ARRAY:
129 			return string("ivec3(" + x + "," + y + "," + z + ")");
130 		default:
131 			DE_ASSERT(false);
132 			return DE_NULL;
133 	}
134 }
135 
getComponentTypeStr(deUint32 componentWidth,bool intFormat,bool uintFormat,bool floatFormat)136 static string getComponentTypeStr (deUint32 componentWidth, bool intFormat, bool uintFormat, bool floatFormat)
137 {
138 	DE_ASSERT(intFormat || uintFormat || floatFormat);
139 
140 	const bool is64 = (componentWidth == 64);
141 
142 	if (intFormat)
143 		return (is64 ? "int64_t" : "int");
144 	if (uintFormat)
145 		return (is64 ? "uint64_t" : "uint");
146 	if (floatFormat)
147 		return (is64 ? "double" : "float");
148 
149 	return "";
150 }
151 
getVec4TypeStr(deUint32 componentWidth,bool intFormat,bool uintFormat,bool floatFormat)152 static string getVec4TypeStr (deUint32 componentWidth, bool intFormat, bool uintFormat, bool floatFormat)
153 {
154 	DE_ASSERT(intFormat || uintFormat || floatFormat);
155 
156 	const bool is64 = (componentWidth == 64);
157 
158 	if (intFormat)
159 		return (is64 ? "i64vec4" : "ivec4");
160 	if (uintFormat)
161 		return (is64 ? "u64vec4" : "uvec4");
162 	if (floatFormat)
163 		return (is64 ? "f64vec4" : "vec4");
164 
165 	return "";
166 }
167 
getAtomicFuncArgumentShaderStr(const AtomicOperation op,const string & x,const string & y,const string & z,const IVec3 & gridSize)168 static string getAtomicFuncArgumentShaderStr (const AtomicOperation	op,
169 											  const string&			x,
170 											  const string&			y,
171 											  const string&			z,
172 											  const IVec3&			gridSize)
173 {
174 	switch (op)
175 	{
176 		case ATOMIC_OPERATION_ADD:
177 		case ATOMIC_OPERATION_AND:
178 		case ATOMIC_OPERATION_OR:
179 		case ATOMIC_OPERATION_XOR:
180 			return string("(" + x + "*" + x + " + " + y + "*" + y + " + " + z + "*" + z + ")");
181 		case ATOMIC_OPERATION_MIN:
182 		case ATOMIC_OPERATION_MAX:
183 			// multiply by (1-2*(value % 2) to make half of the data negative
184 			// this will result in generating large numbers for uint formats
185 			return string("((1 - 2*(" + x + " % 2)) * (" + x + "*" + x + " + " + y + "*" + y + " + " + z + "*" + z + "))");
186 		case ATOMIC_OPERATION_EXCHANGE:
187 		case ATOMIC_OPERATION_COMPARE_EXCHANGE:
188 			return string("((" + z + "*" + toString(gridSize.x()) + " + " + x + ")*" + toString(gridSize.y()) + " + " + y + ")");
189 		default:
190 			DE_ASSERT(false);
191 			return DE_NULL;
192 	}
193 }
194 
getAtomicOperationCaseName(const AtomicOperation op)195 static string getAtomicOperationCaseName (const AtomicOperation op)
196 {
197 	switch (op)
198 	{
199 		case ATOMIC_OPERATION_ADD:				return string("add");
200 		case ATOMIC_OPERATION_SUB:				return string("sub");
201 		case ATOMIC_OPERATION_INC:				return string("inc");
202 		case ATOMIC_OPERATION_DEC:				return string("dec");
203 		case ATOMIC_OPERATION_MIN:				return string("min");
204 		case ATOMIC_OPERATION_MAX:				return string("max");
205 		case ATOMIC_OPERATION_AND:				return string("and");
206 		case ATOMIC_OPERATION_OR:				return string("or");
207 		case ATOMIC_OPERATION_XOR:				return string("xor");
208 		case ATOMIC_OPERATION_EXCHANGE:			return string("exchange");
209 		case ATOMIC_OPERATION_COMPARE_EXCHANGE:	return string("compare_exchange");
210 		default:
211 			DE_ASSERT(false);
212 			return DE_NULL;
213 	}
214 }
215 
getAtomicOperationShaderFuncName(const AtomicOperation op)216 static string getAtomicOperationShaderFuncName (const AtomicOperation op)
217 {
218 	switch (op)
219 	{
220 		case ATOMIC_OPERATION_ADD:				return string("imageAtomicAdd");
221 		case ATOMIC_OPERATION_MIN:				return string("imageAtomicMin");
222 		case ATOMIC_OPERATION_MAX:				return string("imageAtomicMax");
223 		case ATOMIC_OPERATION_AND:				return string("imageAtomicAnd");
224 		case ATOMIC_OPERATION_OR:				return string("imageAtomicOr");
225 		case ATOMIC_OPERATION_XOR:				return string("imageAtomicXor");
226 		case ATOMIC_OPERATION_EXCHANGE:			return string("imageAtomicExchange");
227 		case ATOMIC_OPERATION_COMPARE_EXCHANGE:	return string("imageAtomicCompSwap");
228 		default:
229 			DE_ASSERT(false);
230 			return DE_NULL;
231 	}
232 }
233 
234 template <typename T>
getOperationInitialValue(const AtomicOperation op)235 T getOperationInitialValue (const AtomicOperation op)
236 {
237 	switch (op)
238 	{
239 		// \note 18 is just an arbitrary small nonzero value.
240 		case ATOMIC_OPERATION_ADD:				return 18;
241 		case ATOMIC_OPERATION_INC:				return 18;
242 		case ATOMIC_OPERATION_SUB:				return (1 << 24) - 1;
243 		case ATOMIC_OPERATION_DEC:				return (1 << 24) - 1;
244 		case ATOMIC_OPERATION_MIN:				return (1 << 15) - 1;
245 		case ATOMIC_OPERATION_MAX:				return 18;
246 		case ATOMIC_OPERATION_AND:				return (1 << 15) - 1;
247 		case ATOMIC_OPERATION_OR:				return 18;
248 		case ATOMIC_OPERATION_XOR:				return 18;
249 		case ATOMIC_OPERATION_EXCHANGE:			return 18;
250 		case ATOMIC_OPERATION_COMPARE_EXCHANGE:	return 18;
251 		default:
252 			DE_ASSERT(false);
253 			return 0xFFFFFFFF;
254 	}
255 }
256 
257 template <>
getOperationInitialValue(const AtomicOperation op)258 deInt64 getOperationInitialValue<deInt64>(const AtomicOperation op)
259 {
260 	switch (op)
261 	{
262 		// \note 0x000000BEFFFFFF18 is just an arbitrary nonzero value.
263 		case ATOMIC_OPERATION_ADD:				return 0x000000BEFFFFFF18;
264 		case ATOMIC_OPERATION_INC:				return 0x000000BEFFFFFF18;
265 		case ATOMIC_OPERATION_SUB:				return (1ull << 56) - 1;
266 		case ATOMIC_OPERATION_DEC:				return (1ull << 56) - 1;
267 		case ATOMIC_OPERATION_MIN:				return (1ull << 47) - 1;
268 		case ATOMIC_OPERATION_MAX:				return 0x000000BEFFFFFF18;
269 		case ATOMIC_OPERATION_AND:				return (1ull << 47) - 1;
270 		case ATOMIC_OPERATION_OR:				return 0x000000BEFFFFFF18;
271 		case ATOMIC_OPERATION_XOR:				return 0x000000BEFFFFFF18;
272 		case ATOMIC_OPERATION_EXCHANGE:			return 0x000000BEFFFFFF18;
273 		case ATOMIC_OPERATION_COMPARE_EXCHANGE:	return 0x000000BEFFFFFF18;
274 		default:
275 			DE_ASSERT(false);
276 			return 0xFFFFFFFFFFFFFFFF;
277 	}
278 }
279 
280 template <>
getOperationInitialValue(const AtomicOperation op)281 deUint64 getOperationInitialValue<deUint64>(const AtomicOperation op)
282 {
283 	return (deUint64)getOperationInitialValue<deInt64>(op);
284 }
285 
286 
287 template <typename T>
getAtomicFuncArgument(const AtomicOperation op,const IVec3 & invocationID,const IVec3 & gridSize)288 static T getAtomicFuncArgument (const AtomicOperation	op,
289 								const IVec3&			invocationID,
290 								const IVec3&			gridSize)
291 {
292 	const T x = static_cast<T>(invocationID.x());
293 	const T y = static_cast<T>(invocationID.y());
294 	const T z = static_cast<T>(invocationID.z());
295 
296 	switch (op)
297 	{
298 		// \note Fall-throughs.
299 		case ATOMIC_OPERATION_ADD:
300 		case ATOMIC_OPERATION_SUB:
301 		case ATOMIC_OPERATION_AND:
302 		case ATOMIC_OPERATION_OR:
303 		case ATOMIC_OPERATION_XOR:
304 			return x*x + y*y + z*z;
305 		case ATOMIC_OPERATION_INC:
306 		case ATOMIC_OPERATION_DEC:
307 			return 1;
308 		case ATOMIC_OPERATION_MIN:
309 		case ATOMIC_OPERATION_MAX:
310 			// multiply half of the data by -1
311 			return (1-2*(x % 2))*(x*x + y*y + z*z);
312 		case ATOMIC_OPERATION_EXCHANGE:
313 		case ATOMIC_OPERATION_COMPARE_EXCHANGE:
314 			return (z*static_cast<T>(gridSize.x()) + x)*static_cast<T>(gridSize.y()) + y;
315 		default:
316 			DE_ASSERT(false);
317 			return -1;
318 	}
319 }
320 
321 //! An order-independent operation is one for which the end result doesn't depend on the order in which the operations are carried (i.e. is both commutative and associative).
isOrderIndependentAtomicOperation(const AtomicOperation op)322 static bool isOrderIndependentAtomicOperation (const AtomicOperation op)
323 {
324 	return	op == ATOMIC_OPERATION_ADD ||
325 			op == ATOMIC_OPERATION_SUB ||
326 			op == ATOMIC_OPERATION_INC ||
327 			op == ATOMIC_OPERATION_DEC ||
328 			op == ATOMIC_OPERATION_MIN ||
329 			op == ATOMIC_OPERATION_MAX ||
330 			op == ATOMIC_OPERATION_AND ||
331 			op == ATOMIC_OPERATION_OR ||
332 			op == ATOMIC_OPERATION_XOR;
333 }
334 
335 //! Checks if the operation needs an SPIR-V shader.
isSpirvAtomicOperation(const AtomicOperation op)336 static bool isSpirvAtomicOperation (const AtomicOperation op)
337 {
338 	return	op == ATOMIC_OPERATION_SUB ||
339 			op == ATOMIC_OPERATION_INC ||
340 			op == ATOMIC_OPERATION_DEC;
341 }
342 
343 //! Returns the SPIR-V assembler name of the given operation.
getSpirvAtomicOpName(const AtomicOperation op)344 static std::string getSpirvAtomicOpName (const AtomicOperation op)
345 {
346 	switch (op)
347 	{
348 	case ATOMIC_OPERATION_SUB:	return "OpAtomicISub";
349 	case ATOMIC_OPERATION_INC:	return "OpAtomicIIncrement";
350 	case ATOMIC_OPERATION_DEC:	return "OpAtomicIDecrement";
351 	default:					break;
352 	}
353 
354 	DE_ASSERT(false);
355 	return "";
356 }
357 
358 //! Returns true if the given SPIR-V operation does not need the last argument, compared to OpAtomicIAdd.
isSpirvAtomicNoLastArgOp(const AtomicOperation op)359 static bool isSpirvAtomicNoLastArgOp (const AtomicOperation op)
360 {
361 	switch (op)
362 	{
363 	case ATOMIC_OPERATION_SUB:	return false;
364 	case ATOMIC_OPERATION_INC:	// fallthrough
365 	case ATOMIC_OPERATION_DEC:	return true;
366 	default:					break;
367 	}
368 
369 	DE_ASSERT(false);
370 	return false;
371 }
372 
373 //! Computes the result of an atomic operation where "a" is the data operated on and "b" is the parameter to the atomic function.
374 template <typename T>
computeBinaryAtomicOperationResult(const AtomicOperation op,const T a,const T b)375 static T computeBinaryAtomicOperationResult (const AtomicOperation op, const T a, const T b)
376 {
377 	switch (op)
378 	{
379 		case ATOMIC_OPERATION_INC:				// fallthrough.
380 		case ATOMIC_OPERATION_ADD:				return a + b;
381 		case ATOMIC_OPERATION_DEC:				// fallthrough.
382 		case ATOMIC_OPERATION_SUB:				return a - b;
383 		case ATOMIC_OPERATION_MIN:				return de::min(a, b);
384 		case ATOMIC_OPERATION_MAX:				return de::max(a, b);
385 		case ATOMIC_OPERATION_AND:				return a & b;
386 		case ATOMIC_OPERATION_OR:				return a | b;
387 		case ATOMIC_OPERATION_XOR:				return a ^ b;
388 		case ATOMIC_OPERATION_EXCHANGE:			return b;
389 		case ATOMIC_OPERATION_COMPARE_EXCHANGE:	return (a == (sizeof(T) == 8 ? 0xBEFFFFFF18 : 18)) ? b : a;
390 		default:
391 			DE_ASSERT(false);
392 			return -1;
393 	}
394 }
395 
getUsageFlags(bool useTransfer)396 VkImageUsageFlags getUsageFlags (bool useTransfer)
397 {
398 	VkImageUsageFlags usageFlags = VK_IMAGE_USAGE_STORAGE_BIT;
399 
400 	if (useTransfer)
401 		usageFlags |= (VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT);
402 
403 	return usageFlags;
404 }
405 
AddFillReadShader(SourceCollections & sourceCollections,const ImageType & imageType,const tcu::TextureFormat & format,const string & componentType,const string & vec4Type)406 void AddFillReadShader (SourceCollections&			sourceCollections,
407 						const ImageType&			imageType,
408 						const tcu::TextureFormat&	format,
409 						const string&				componentType,
410 						const string&				vec4Type)
411 {
412 	const string	imageInCoord			= getCoordStr(imageType, "gx", "gy", "gz");
413 	const string	shaderImageFormatStr	= getShaderImageFormatQualifier(format);
414 	const string	shaderImageTypeStr		= getShaderImageType(format, imageType);
415 	const auto		componentWidth			= getFormatComponentWidth(mapTextureFormat(format), 0u);
416 	const string	extensions				= ((componentWidth == 64u)
417 											?	"#extension GL_EXT_shader_explicit_arithmetic_types_int64 : require\n"
418 												"#extension GL_EXT_shader_image_int64 : require\n"
419 											:	"");
420 
421 
422 	const string fillShader =	"#version 450\n"
423 								+ extensions +
424 								"precision highp " + shaderImageTypeStr + ";\n"
425 								"\n"
426 								"layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
427 								"layout (" + shaderImageFormatStr + ", binding=0) coherent uniform " + shaderImageTypeStr + " u_resultImage;\n"
428 								"\n"
429 								"layout(std430, binding = 1) buffer inputBuffer\n"
430 								"{\n"
431 								"	"+ componentType + " data[];\n"
432 								"} inBuffer;\n"
433 								"\n"
434 								"void main(void)\n"
435 								"{\n"
436 								"	int gx = int(gl_GlobalInvocationID.x);\n"
437 								"	int gy = int(gl_GlobalInvocationID.y);\n"
438 								"	int gz = int(gl_GlobalInvocationID.z);\n"
439 								"	uint index = gx + (gy * gl_NumWorkGroups.x) + (gz *gl_NumWorkGroups.x * gl_NumWorkGroups.y);\n"
440 								"	imageStore(u_resultImage, " + imageInCoord + ", " + vec4Type + "(inBuffer.data[index]));\n"
441 								"}\n";
442 
443 	const string readShader =	"#version 450\n"
444 								+ extensions +
445 								"precision highp " + shaderImageTypeStr + ";\n"
446 								"\n"
447 								"layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
448 								"layout (" + shaderImageFormatStr + ", binding=0) coherent uniform " + shaderImageTypeStr + " u_resultImage;\n"
449 								"\n"
450 								"layout(std430, binding = 1) buffer outputBuffer\n"
451 								"{\n"
452 								"	" + componentType + " data[];\n"
453 								"} outBuffer;\n"
454 								"\n"
455 								"void main(void)\n"
456 								"{\n"
457 								"	int gx = int(gl_GlobalInvocationID.x);\n"
458 								"	int gy = int(gl_GlobalInvocationID.y);\n"
459 								"	int gz = int(gl_GlobalInvocationID.z);\n"
460 								"	uint index = gx + (gy * gl_NumWorkGroups.x) + (gz *gl_NumWorkGroups.x * gl_NumWorkGroups.y);\n"
461 								"	outBuffer.data[index] = imageLoad(u_resultImage, " + imageInCoord + ").x;\n"
462 								"}\n";
463 
464 
465 	if ((imageType != IMAGE_TYPE_1D) &&
466 		(imageType != IMAGE_TYPE_1D_ARRAY) &&
467 		(imageType != IMAGE_TYPE_BUFFER))
468 	{
469 		const string readShaderResidency  = "#version 450\n"
470 											"#extension GL_ARB_sparse_texture2 : require\n"
471 											+ extensions +
472 											"precision highp " + shaderImageTypeStr + ";\n"
473 											"\n"
474 											"layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
475 											"layout (" + shaderImageFormatStr + ", binding=0) coherent uniform " + shaderImageTypeStr + " u_resultImage;\n"
476 											"\n"
477 											"layout(std430, binding = 1) buffer outputBuffer\n"
478 											"{\n"
479 											"	" + componentType + " data[];\n"
480 											"} outBuffer;\n"
481 											"\n"
482 											"void main(void)\n"
483 											"{\n"
484 											"	int gx = int(gl_GlobalInvocationID.x);\n"
485 											"	int gy = int(gl_GlobalInvocationID.y);\n"
486 											"	int gz = int(gl_GlobalInvocationID.z);\n"
487 											"	uint index = gx + (gy * gl_NumWorkGroups.x) + (gz *gl_NumWorkGroups.x * gl_NumWorkGroups.y);\n"
488 											"	outBuffer.data[index] = imageLoad(u_resultImage, " + imageInCoord + ").x;\n"
489 											"	" + vec4Type + " sparseValue;\n"
490 											"	sparseImageLoadARB(u_resultImage, " + imageInCoord + ", sparseValue);\n"
491 											"	if (outBuffer.data[index] != sparseValue.x)\n"
492 											"		outBuffer.data[index] = " + vec4Type + "(1234).x;\n"
493 											"}\n";
494 
495 		sourceCollections.glslSources.add("readShaderResidency") << glu::ComputeSource(readShaderResidency.c_str()) << vk::ShaderBuildOptions(sourceCollections.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
496 	}
497 
498 	sourceCollections.glslSources.add("fillShader") << glu::ComputeSource(fillShader.c_str()) << vk::ShaderBuildOptions(sourceCollections.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
499 	sourceCollections.glslSources.add("readShader") << glu::ComputeSource(readShader.c_str()) << vk::ShaderBuildOptions(sourceCollections.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
500 }
501 
502 //! Prepare the initial data for the image
initDataForImage(const VkDevice device,const DeviceInterface & deviceInterface,const TextureFormat & format,const AtomicOperation operation,const tcu::UVec3 & gridSize,Buffer & buffer)503 static void initDataForImage (const VkDevice			device,
504 							  const DeviceInterface&	deviceInterface,
505 							  const TextureFormat&		format,
506 							  const AtomicOperation		operation,
507 							  const tcu::UVec3&			gridSize,
508 							  Buffer&					buffer)
509 {
510 	Allocation&				bufferAllocation	= buffer.getAllocation();
511 	const VkFormat			imageFormat			= mapTextureFormat(format);
512 	tcu::PixelBufferAccess	pixelBuffer			(format, gridSize.x(), gridSize.y(), gridSize.z(), bufferAllocation.getHostPtr());
513 
514 	if (imageFormat == VK_FORMAT_R64_UINT || imageFormat == VK_FORMAT_R64_SINT)
515 	{
516 		const deInt64 initialValue(getOperationInitialValue<deInt64>(operation));
517 
518 		for (deUint32 z = 0; z < gridSize.z(); z++)
519 		for (deUint32 y = 0; y < gridSize.y(); y++)
520 		for (deUint32 x = 0; x < gridSize.x(); x++)
521 		{
522 			*((deInt64*)pixelBuffer.getPixelPtr(x, y, z)) = initialValue;
523 		}
524 	}
525 	else
526 	{
527 		const tcu::IVec4 initialValue(getOperationInitialValue<deInt32>(operation));
528 
529 		for (deUint32 z = 0; z < gridSize.z(); z++)
530 		for (deUint32 y = 0; y < gridSize.y(); y++)
531 		for (deUint32 x = 0; x < gridSize.x(); x++)
532 		{
533 			pixelBuffer.setPixel(initialValue, x, y, z);
534 		}
535 	}
536 
537 	flushAlloc(deviceInterface, device, bufferAllocation);
538 }
539 
commonCheckSupport(Context & context,const tcu::TextureFormat & tcuFormat,ImageType imageType,AtomicOperation operation,bool useTransfer,ShaderReadType readType,ImageBackingType backingType)540 void commonCheckSupport (Context& context, const tcu::TextureFormat& tcuFormat, ImageType imageType, AtomicOperation operation, bool useTransfer, ShaderReadType readType, ImageBackingType backingType)
541 {
542 	const VkFormat				format				= mapTextureFormat(tcuFormat);
543 	const VkImageType			vkImgType			= mapImageType(imageType);
544 	const VkFormatFeatureFlags	texelBufferSupport	= (VK_FORMAT_FEATURE_STORAGE_TEXEL_BUFFER_BIT | VK_FORMAT_FEATURE_STORAGE_TEXEL_BUFFER_ATOMIC_BIT);
545 	const VkFormatProperties	formatProperties	= getPhysicalDeviceFormatProperties(context.getInstanceInterface(),
546 																						context.getPhysicalDevice(), format);
547 
548 	if ((imageType == IMAGE_TYPE_BUFFER) &&
549 		((formatProperties.bufferFeatures & texelBufferSupport) != texelBufferSupport))
550 		TCU_THROW(NotSupportedError, "Atomic storage texel buffers not supported");
551 
552 	if (imageType == IMAGE_TYPE_CUBE_ARRAY)
553 		context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_IMAGE_CUBE_ARRAY);
554 
555 	if (backingType == ImageBackingType::SPARSE)
556 	{
557 		context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_SPARSE_BINDING);
558 
559 		switch (vkImgType)
560 		{
561 		case VK_IMAGE_TYPE_2D:	context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_SPARSE_RESIDENCY_IMAGE2D); break;
562 		case VK_IMAGE_TYPE_3D:	context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_SPARSE_RESIDENCY_IMAGE3D); break;
563 		default:				DE_ASSERT(false); break;
564 		}
565 
566 		if (!checkSparseImageFormatSupport(context.getPhysicalDevice(), context.getInstanceInterface(), format, vkImgType, VK_SAMPLE_COUNT_1_BIT, getUsageFlags(useTransfer), VK_IMAGE_TILING_OPTIMAL))
567 			TCU_THROW(NotSupportedError, "Format does not support sparse images");
568 	}
569 
570 	if (isFloatFormat(format))
571 	{
572 		context.requireDeviceFunctionality("VK_EXT_shader_atomic_float");
573 
574 		const VkFormatFeatureFlags	requiredFeatures	= (VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT | VK_FORMAT_FEATURE_STORAGE_IMAGE_ATOMIC_BIT);
575 		const auto&					atomicFloatFeatures	= context.getShaderAtomicFloatFeaturesEXT();
576 
577 		if (!atomicFloatFeatures.shaderImageFloat32Atomics)
578 			TCU_THROW(NotSupportedError, "shaderImageFloat32Atomics not supported");
579 
580 		if ((operation == ATOMIC_OPERATION_ADD) && !atomicFloatFeatures.shaderImageFloat32AtomicAdd)
581 			TCU_THROW(NotSupportedError, "shaderImageFloat32AtomicAdd not supported");
582 
583 		if (operation == ATOMIC_OPERATION_MIN || operation == ATOMIC_OPERATION_MAX)
584 		{
585 			context.requireDeviceFunctionality("VK_EXT_shader_atomic_float2");
586 			if (!context.getShaderAtomicFloat2FeaturesEXT().shaderImageFloat32AtomicMinMax)
587 			{
588 				TCU_THROW(NotSupportedError, "shaderImageFloat32AtomicMinMax not supported");
589 			}
590 		}
591 
592 		if ((formatProperties.optimalTilingFeatures & requiredFeatures) != requiredFeatures)
593 			TCU_FAIL("Required format feature bits not supported");
594 
595 		if (backingType == ImageBackingType::SPARSE)
596 		{
597 			if (!atomicFloatFeatures.sparseImageFloat32Atomics)
598 				TCU_THROW(NotSupportedError, "sparseImageFloat32Atomics not supported");
599 
600 			if (operation == ATOMIC_OPERATION_ADD && !atomicFloatFeatures.sparseImageFloat32AtomicAdd)
601 				TCU_THROW(NotSupportedError, "sparseImageFloat32AtomicAdd not supported");
602 		}
603 
604 	}
605 	else if (format == VK_FORMAT_R64_UINT || format == VK_FORMAT_R64_SINT)
606 	{
607 		context.requireDeviceFunctionality("VK_EXT_shader_image_atomic_int64");
608 
609 		const VkFormatFeatureFlags	requiredFeatures	= (VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT | VK_FORMAT_FEATURE_STORAGE_IMAGE_ATOMIC_BIT);
610 		const auto&					atomicInt64Features	= context.getShaderImageAtomicInt64FeaturesEXT();
611 
612 		if (!atomicInt64Features.shaderImageInt64Atomics)
613 			TCU_THROW(NotSupportedError, "shaderImageInt64Atomics not supported");
614 
615 		if (backingType == ImageBackingType::SPARSE && !atomicInt64Features.sparseImageInt64Atomics)
616 			TCU_THROW(NotSupportedError, "sparseImageInt64Atomics not supported");
617 
618 		if ((formatProperties.optimalTilingFeatures & requiredFeatures) != requiredFeatures)
619 			TCU_FAIL("Mandatory format features not supported");
620 	}
621 
622 	if (useTransfer)
623 	{
624 		const VkFormatFeatureFlags transferFeatures = (VK_FORMAT_FEATURE_TRANSFER_SRC_BIT | VK_FORMAT_FEATURE_TRANSFER_DST_BIT);
625 		if ((formatProperties.optimalTilingFeatures & transferFeatures) != transferFeatures)
626 			TCU_THROW(NotSupportedError, "Transfer features not supported for this format");
627 	}
628 
629 	if (readType == ShaderReadType::SPARSE)
630 	{
631 		DE_ASSERT(imageType != IMAGE_TYPE_1D && imageType != IMAGE_TYPE_1D_ARRAY && imageType != IMAGE_TYPE_BUFFER);
632 		context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_SHADER_RESOURCE_RESIDENCY);
633 	}
634 }
635 
636 class BinaryAtomicEndResultCase : public vkt::TestCase
637 {
638 public:
639 								BinaryAtomicEndResultCase	(tcu::TestContext&			testCtx,
640 															 const string&				name,
641 															 const string&				description,
642 															 const ImageType			imageType,
643 															 const tcu::UVec3&			imageSize,
644 															 const tcu::TextureFormat&	format,
645 															 const AtomicOperation		operation,
646 															 const bool					useTransfer,
647 															 const ShaderReadType		shaderReadType,
648 															 const ImageBackingType		backingType,
649 															 const glu::GLSLVersion		glslVersion);
650 
651 	void						initPrograms				(SourceCollections&			sourceCollections) const;
652 	TestInstance*				createInstance				(Context&					context) const;
653 	virtual void				checkSupport				(Context&					context) const;
654 
655 private:
656 	const ImageType				m_imageType;
657 	const tcu::UVec3			m_imageSize;
658 	const tcu::TextureFormat	m_format;
659 	const AtomicOperation		m_operation;
660 	const bool					m_useTransfer;
661 	const ShaderReadType		m_readType;
662 	const ImageBackingType		m_backingType;
663 	const glu::GLSLVersion		m_glslVersion;
664 };
665 
BinaryAtomicEndResultCase(tcu::TestContext & testCtx,const string & name,const string & description,const ImageType imageType,const tcu::UVec3 & imageSize,const tcu::TextureFormat & format,const AtomicOperation operation,const bool useTransfer,const ShaderReadType shaderReadType,const ImageBackingType backingType,const glu::GLSLVersion glslVersion)666 BinaryAtomicEndResultCase::BinaryAtomicEndResultCase (tcu::TestContext&			testCtx,
667 													  const string&				name,
668 													  const string&				description,
669 													  const ImageType			imageType,
670 													  const tcu::UVec3&			imageSize,
671 													  const tcu::TextureFormat&	format,
672 													  const AtomicOperation		operation,
673 													  const bool				useTransfer,
674 													  const ShaderReadType		shaderReadType,
675 													  const ImageBackingType	backingType,
676 													  const glu::GLSLVersion	glslVersion)
677 	: TestCase		(testCtx, name, description)
678 	, m_imageType	(imageType)
679 	, m_imageSize	(imageSize)
680 	, m_format		(format)
681 	, m_operation	(operation)
682 	, m_useTransfer	(useTransfer)
683 	, m_readType	(shaderReadType)
684 	, m_backingType	(backingType)
685 	, m_glslVersion	(glslVersion)
686 {
687 }
688 
checkSupport(Context & context) const689 void BinaryAtomicEndResultCase::checkSupport (Context& context) const
690 {
691 	commonCheckSupport(context, m_format, m_imageType, m_operation, m_useTransfer, m_readType, m_backingType);
692 }
693 
initPrograms(SourceCollections & sourceCollections) const694 void BinaryAtomicEndResultCase::initPrograms (SourceCollections& sourceCollections) const
695 {
696 	const VkFormat	imageFormat		= mapTextureFormat(m_format);
697 	const deUint32	componentWidth	= getFormatComponentWidth(imageFormat, 0);
698 	const bool		intFormat		= isIntFormat(imageFormat);
699 	const bool		uintFormat		= isUintFormat(imageFormat);
700 	const bool		floatFormat		= isFloatFormat(imageFormat);
701 	const string	type			= getComponentTypeStr(componentWidth, intFormat, uintFormat, floatFormat);
702 	const string	vec4Type		= getVec4TypeStr(componentWidth, intFormat, uintFormat, floatFormat);
703 
704 	AddFillReadShader(sourceCollections, m_imageType, m_format, type, vec4Type);
705 
706 	if (isSpirvAtomicOperation(m_operation))
707 	{
708 		const CaseVariant					caseVariant{m_imageType, m_format.order, m_format.type, CaseVariant::CHECK_TYPE_END_RESULTS};
709 		const tcu::StringTemplate			shaderTemplate{getSpirvAtomicOpShader(caseVariant)};
710 		std::map<std::string, std::string>	specializations;
711 
712 		specializations["OPNAME"] = getSpirvAtomicOpName(m_operation);
713 		if (isSpirvAtomicNoLastArgOp(m_operation))
714 			specializations["LASTARG"] = "";
715 
716 		sourceCollections.spirvAsmSources.add(m_name) << shaderTemplate.specialize(specializations);
717 	}
718 	else
719 	{
720 		const string	versionDecl				= glu::getGLSLVersionDeclaration(m_glslVersion);
721 
722 		const UVec3		gridSize				= getShaderGridSize(m_imageType, m_imageSize);
723 		const string	atomicCoord				= getCoordStr(m_imageType, "gx % " + toString(gridSize.x()), "gy", "gz");
724 
725 		const string	atomicArgExpr			= type + getAtomicFuncArgumentShaderStr(m_operation,
726 																						"gx", "gy", "gz",
727 																						IVec3(NUM_INVOCATIONS_PER_PIXEL*gridSize.x(), gridSize.y(), gridSize.z()));
728 
729 		const string	compareExchangeStr		= (m_operation == ATOMIC_OPERATION_COMPARE_EXCHANGE) ?
730 												(componentWidth == 64 ?", 820338753304": ", 18") + string(uintFormat ? "u" : "") + string(componentWidth == 64 ? "l" : "")
731 												: "";
732 		const string	atomicInvocation		= getAtomicOperationShaderFuncName(m_operation) + "(u_resultImage, " + atomicCoord + compareExchangeStr + ", " + atomicArgExpr + ")";
733 		const string	shaderImageFormatStr	= getShaderImageFormatQualifier(m_format);
734 		const string	shaderImageTypeStr		= getShaderImageType(m_format, m_imageType);
735 		const string	extensions				= "#extension GL_EXT_shader_atomic_float : enable\n"
736 												  "#extension GL_EXT_shader_atomic_float2 : enable\n"
737 												  "#extension GL_KHR_memory_scope_semantics : enable";
738 
739 		string source = versionDecl + "\n" + extensions + "\n";
740 
741 		if (64 == componentWidth)
742 		{
743 			source +=	"#extension GL_EXT_shader_explicit_arithmetic_types_int64 : require\n"
744 						"#extension GL_EXT_shader_image_int64 : require\n";
745 		}
746 
747 		source +=	"precision highp " + shaderImageTypeStr + ";\n"
748 					"\n"
749 					"layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
750 					"layout (" + shaderImageFormatStr + ", binding=0) coherent uniform " + shaderImageTypeStr + " u_resultImage;\n"
751 					"\n"
752 					"void main (void)\n"
753 					"{\n"
754 					"	int gx = int(gl_GlobalInvocationID.x);\n"
755 					"	int gy = int(gl_GlobalInvocationID.y);\n"
756 					"	int gz = int(gl_GlobalInvocationID.z);\n"
757 					"	" + atomicInvocation + ";\n"
758 					"}\n";
759 
760 		sourceCollections.glslSources.add(m_name) << glu::ComputeSource(source.c_str());
761 	}
762 }
763 
764 class BinaryAtomicIntermValuesCase : public vkt::TestCase
765 {
766 public:
767 								BinaryAtomicIntermValuesCase	(tcu::TestContext&			testCtx,
768 																 const string&				name,
769 																 const string&				description,
770 																 const ImageType			imageType,
771 																 const tcu::UVec3&			imageSize,
772 																 const tcu::TextureFormat&	format,
773 																 const AtomicOperation		operation,
774 																 const bool					useTransfer,
775 																 const ShaderReadType		shaderReadType,
776 																 const ImageBackingType		backingType,
777 																 const glu::GLSLVersion		glslVersion);
778 
779 	void						initPrograms					(SourceCollections&			sourceCollections) const;
780 	TestInstance*				createInstance					(Context&					context) const;
781 	virtual void				checkSupport					(Context&					context) const;
782 
783 private:
784 	const ImageType				m_imageType;
785 	const tcu::UVec3			m_imageSize;
786 	const tcu::TextureFormat	m_format;
787 	const AtomicOperation		m_operation;
788 	const bool					m_useTransfer;
789 	const ShaderReadType		m_readType;
790 	const ImageBackingType		m_backingType;
791 	const glu::GLSLVersion		m_glslVersion;
792 };
793 
BinaryAtomicIntermValuesCase(TestContext & testCtx,const string & name,const string & description,const ImageType imageType,const tcu::UVec3 & imageSize,const TextureFormat & format,const AtomicOperation operation,const bool useTransfer,const ShaderReadType shaderReadType,const ImageBackingType backingType,const glu::GLSLVersion glslVersion)794 BinaryAtomicIntermValuesCase::BinaryAtomicIntermValuesCase (TestContext&			testCtx,
795 															const string&			name,
796 															const string&			description,
797 															const ImageType			imageType,
798 															const tcu::UVec3&		imageSize,
799 															const TextureFormat&	format,
800 															const AtomicOperation	operation,
801 															const bool				useTransfer,
802 															const ShaderReadType	shaderReadType,
803 															const ImageBackingType	backingType,
804 															const glu::GLSLVersion	glslVersion)
805 	: TestCase		(testCtx, name, description)
806 	, m_imageType	(imageType)
807 	, m_imageSize	(imageSize)
808 	, m_format		(format)
809 	, m_operation	(operation)
810 	, m_useTransfer	(useTransfer)
811 	, m_readType	(shaderReadType)
812 	, m_backingType	(backingType)
813 	, m_glslVersion	(glslVersion)
814 {
815 }
816 
checkSupport(Context & context) const817 void BinaryAtomicIntermValuesCase::checkSupport (Context& context) const
818 {
819 	commonCheckSupport(context, m_format, m_imageType, m_operation, m_useTransfer, m_readType, m_backingType);
820 }
821 
initPrograms(SourceCollections & sourceCollections) const822 void BinaryAtomicIntermValuesCase::initPrograms (SourceCollections& sourceCollections) const
823 {
824 	const VkFormat	imageFormat		= mapTextureFormat(m_format);
825 	const deUint32	componentWidth	= getFormatComponentWidth(imageFormat, 0);
826 	const bool		intFormat		= isIntFormat(imageFormat);
827 	const bool		uintFormat		= isUintFormat(imageFormat);
828 	const bool		floatFormat		= isFloatFormat(imageFormat);
829 	const string	type			= getComponentTypeStr(componentWidth, intFormat, uintFormat, floatFormat);
830 	const string	vec4Type		= getVec4TypeStr(componentWidth, intFormat, uintFormat, floatFormat);
831 
832 	AddFillReadShader(sourceCollections, m_imageType, m_format, type, vec4Type);
833 
834 	if (isSpirvAtomicOperation(m_operation))
835 	{
836 		const CaseVariant					caseVariant{m_imageType, m_format.order, m_format.type, CaseVariant::CHECK_TYPE_INTERMEDIATE_RESULTS};
837 		const tcu::StringTemplate			shaderTemplate{getSpirvAtomicOpShader(caseVariant)};
838 		std::map<std::string, std::string>	specializations;
839 
840 		specializations["OPNAME"] = getSpirvAtomicOpName(m_operation);
841 		if (isSpirvAtomicNoLastArgOp(m_operation))
842 			specializations["LASTARG"] = "";
843 
844 		sourceCollections.spirvAsmSources.add(m_name) << shaderTemplate.specialize(specializations);
845 	}
846 	else
847 	{
848 		const string	versionDecl				= glu::getGLSLVersionDeclaration(m_glslVersion);
849 		const UVec3		gridSize				= getShaderGridSize(m_imageType, m_imageSize);
850 		const string	atomicCoord				= getCoordStr(m_imageType, "gx % " + toString(gridSize.x()), "gy", "gz");
851 		const string	invocationCoord			= getCoordStr(m_imageType, "gx", "gy", "gz");
852 		const string	atomicArgExpr			= type + getAtomicFuncArgumentShaderStr(m_operation,
853 																						"gx", "gy", "gz",
854 																						IVec3(NUM_INVOCATIONS_PER_PIXEL*gridSize.x(), gridSize.y(), gridSize.z()));
855 
856 		const string	compareExchangeStr		= (m_operation == ATOMIC_OPERATION_COMPARE_EXCHANGE) ?
857 												  (componentWidth == 64 ? ", 820338753304" : ", 18") + string(uintFormat ? "u" : "") + string(componentWidth == 64 ? "l" : "") :
858 												  "";
859 		const string	atomicInvocation		= getAtomicOperationShaderFuncName(m_operation) +
860 												"(u_resultImage, " + atomicCoord + compareExchangeStr + ", " + atomicArgExpr + ")";
861 		const string	shaderImageFormatStr	= getShaderImageFormatQualifier(m_format);
862 		const string	shaderImageTypeStr		= getShaderImageType(m_format, m_imageType);
863 		const string	extensions				= "#extension GL_EXT_shader_atomic_float : enable\n"
864 												  "#extension GL_EXT_shader_atomic_float2 : enable\n"
865 												  "#extension GL_KHR_memory_scope_semantics : enable";
866 
867 		string source = versionDecl + "\n" + extensions + "\n"
868 						"\n";
869 
870 		if (64 == componentWidth)
871 		{
872 			source +=	"#extension GL_EXT_shader_explicit_arithmetic_types_int64 : require\n"
873 						"#extension GL_EXT_shader_image_int64 : require\n";
874 		}
875 
876 			source +=	"precision highp " + shaderImageTypeStr + "; \n"
877 						"layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
878 						"layout (" + shaderImageFormatStr + ", binding=0) coherent uniform " + shaderImageTypeStr + " u_resultImage;\n"
879 						"layout (" + shaderImageFormatStr + ", binding=1) writeonly uniform " + shaderImageTypeStr + " u_intermValuesImage;\n"
880 						"\n"
881 						"void main (void)\n"
882 						"{\n"
883 						"	int gx = int(gl_GlobalInvocationID.x);\n"
884 						"	int gy = int(gl_GlobalInvocationID.y);\n"
885 						"	int gz = int(gl_GlobalInvocationID.z);\n"
886 						"	imageStore(u_intermValuesImage, " + invocationCoord + ", " + vec4Type + "(" + atomicInvocation + "));\n"
887 						"}\n";
888 
889 		sourceCollections.glslSources.add(m_name) << glu::ComputeSource(source.c_str());
890 	}
891 }
892 
893 class BinaryAtomicInstanceBase : public vkt::TestInstance
894 {
895 public:
896 
897 								BinaryAtomicInstanceBase (Context&						context,
898 														  const string&					name,
899 														  const ImageType				imageType,
900 														  const tcu::UVec3&				imageSize,
901 														  const TextureFormat&			format,
902 														  const AtomicOperation			operation,
903 														  const bool					useTransfer,
904 														  const ShaderReadType			shaderReadType,
905 														  const ImageBackingType		backingType);
906 
907 	tcu::TestStatus				iterate					 (void);
908 
909 	virtual deUint32			getOutputBufferSize		 (void) const = 0;
910 
911 	virtual void				prepareResources		 (const bool					useTransfer) = 0;
912 	virtual void				prepareDescriptors		 (const bool					isTexelBuffer) = 0;
913 
914 	virtual void				commandsBeforeCompute	 (const VkCommandBuffer			cmdBuffer) const = 0;
915 	virtual void				commandsAfterCompute	 (const VkCommandBuffer			cmdBuffer,
916 														  const VkPipeline				pipeline,
917 														  const VkPipelineLayout		pipelineLayout,
918 														   const VkDescriptorSet		descriptorSet,
919 														  const VkDeviceSize&			range,
920 														  const bool					useTransfer) = 0;
921 
922 	virtual bool				verifyResult			 (Allocation&					outputBufferAllocation,
923 														  const bool					is64Bit) const = 0;
924 
925 protected:
926 
927 	void						shaderFillImage			 (const VkCommandBuffer			cmdBuffer,
928 														  const VkBuffer&				buffer,
929 														  const VkPipeline				pipeline,
930 														  const VkPipelineLayout		pipelineLayout,
931 														  const VkDescriptorSet			descriptorSet,
932 														  const VkDeviceSize&			range,
933 														  const tcu::UVec3&				gridSize);
934 
935 	void						createImageAndView		(VkFormat						imageFormat,
936 														 const tcu::UVec3&				imageExent,
937 														 bool							useTransfer,
938 														 de::MovePtr<Image>&			imagePtr,
939 														 Move<VkImageView>&				imageViewPtr);
940 
941 	void						createImageResources	(const VkFormat&				imageFormat,
942 														 const bool						useTransfer);
943 
944 	const string				m_name;
945 	const ImageType				m_imageType;
946 	const tcu::UVec3			m_imageSize;
947 	const TextureFormat			m_format;
948 	const AtomicOperation		m_operation;
949 	const bool					m_useTransfer;
950 	const ShaderReadType		m_readType;
951 	const ImageBackingType		m_backingType;
952 
953 	de::MovePtr<Buffer>			m_inputBuffer;
954 	de::MovePtr<Buffer>			m_outputBuffer;
955 	Move<VkBufferView>			m_descResultBufferView;
956 	Move<VkBufferView>			m_descIntermResultsBufferView;
957 	Move<VkDescriptorPool>		m_descriptorPool;
958 	Move<VkDescriptorSetLayout>	m_descriptorSetLayout;
959 	Move<VkDescriptorSet>		m_descriptorSet;
960 
961 	Move<VkDescriptorSetLayout>	m_descriptorSetLayoutNoTransfer;
962 	Move<VkDescriptorPool>		m_descriptorPoolNoTransfer;
963 
964 	de::MovePtr<Image>			m_resultImage;
965 	Move<VkImageView>			m_resultImageView;
966 
967 	std::vector<VkSemaphore>	m_waitSemaphores;
968 };
969 
BinaryAtomicInstanceBase(Context & context,const string & name,const ImageType imageType,const tcu::UVec3 & imageSize,const TextureFormat & format,const AtomicOperation operation,const bool useTransfer,const ShaderReadType shaderReadType,const ImageBackingType backingType)970 BinaryAtomicInstanceBase::BinaryAtomicInstanceBase (Context&				context,
971 													const string&			name,
972 													const ImageType			imageType,
973 													const tcu::UVec3&		imageSize,
974 													const TextureFormat&	format,
975 													const AtomicOperation	operation,
976 													const bool				useTransfer,
977 													const ShaderReadType	shaderReadType,
978 													const ImageBackingType	backingType)
979 	: vkt::TestInstance	(context)
980 	, m_name			(name)
981 	, m_imageType		(imageType)
982 	, m_imageSize		(imageSize)
983 	, m_format			(format)
984 	, m_operation		(operation)
985 	, m_useTransfer		(useTransfer)
986 	, m_readType		(shaderReadType)
987 	, m_backingType		(backingType)
988 {
989 }
990 
iterate(void)991 tcu::TestStatus	BinaryAtomicInstanceBase::iterate (void)
992 {
993 	const VkDevice			device				= m_context.getDevice();
994 	const DeviceInterface&	deviceInterface		= m_context.getDeviceInterface();
995 	const VkQueue			queue				= m_context.getUniversalQueue();
996 	const deUint32			queueFamilyIndex	= m_context.getUniversalQueueFamilyIndex();
997 	Allocator&				allocator			= m_context.getDefaultAllocator();
998 	const VkDeviceSize		imageSizeInBytes	= tcu::getPixelSize(m_format) * getNumPixels(m_imageType, m_imageSize);
999 	const VkDeviceSize		outBuffSizeInBytes	= getOutputBufferSize();
1000 	const VkFormat			imageFormat			= mapTextureFormat(m_format);
1001 	const bool				isTexelBuffer		= (m_imageType == IMAGE_TYPE_BUFFER);
1002 
1003 	if (!isTexelBuffer)
1004 	{
1005 		createImageResources(imageFormat, m_useTransfer);
1006 	}
1007 
1008 	tcu::UVec3				gridSize			= getShaderGridSize(m_imageType, m_imageSize);
1009 
1010 	//Prepare the buffer with the initial data for the image
1011 	m_inputBuffer = de::MovePtr<Buffer>(new Buffer(deviceInterface,
1012 													device,
1013 													allocator,
1014 													makeBufferCreateInfo(imageSizeInBytes,
1015 																		 VK_BUFFER_USAGE_TRANSFER_SRC_BIT |
1016 																		 VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
1017 																		 (isTexelBuffer ? VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT : static_cast<VkBufferUsageFlagBits>(0u))),
1018 													MemoryRequirement::HostVisible));
1019 
1020 	// Fill in buffer with initial data used for image.
1021 	initDataForImage(device, deviceInterface, m_format, m_operation, gridSize, *m_inputBuffer);
1022 
1023 	// Create a buffer to store shader output copied from result image
1024 	m_outputBuffer = de::MovePtr<Buffer>(new Buffer(deviceInterface,
1025 													device,
1026 													allocator,
1027 													makeBufferCreateInfo(outBuffSizeInBytes,
1028 																		 VK_BUFFER_USAGE_TRANSFER_DST_BIT |
1029 																		 VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
1030 																		 (isTexelBuffer ? VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT : static_cast<VkBufferUsageFlagBits>(0u))),
1031 													MemoryRequirement::HostVisible));
1032 
1033 	if (!isTexelBuffer)
1034 	{
1035 		prepareResources(m_useTransfer);
1036 	}
1037 
1038 	prepareDescriptors(isTexelBuffer);
1039 
1040 	Move<VkDescriptorSet>	descriptorSetFillImage;
1041 	Move<VkShaderModule>	shaderModuleFillImage;
1042 	Move<VkPipelineLayout>	pipelineLayoutFillImage;
1043 	Move<VkPipeline>		pipelineFillImage;
1044 
1045 	Move<VkDescriptorSet>	descriptorSetReadImage;
1046 	Move<VkShaderModule>	shaderModuleReadImage;
1047 	Move<VkPipelineLayout>	pipelineLayoutReadImage;
1048 	Move<VkPipeline>		pipelineReadImage;
1049 
1050 	if (!m_useTransfer)
1051 	{
1052 		m_descriptorSetLayoutNoTransfer =
1053 			DescriptorSetLayoutBuilder()
1054 			.addSingleBinding((isTexelBuffer ? VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER : VK_DESCRIPTOR_TYPE_STORAGE_IMAGE), VK_SHADER_STAGE_COMPUTE_BIT)
1055 			.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
1056 			.build(deviceInterface, device);
1057 
1058 		m_descriptorPoolNoTransfer =
1059 			DescriptorPoolBuilder()
1060 			.addType((isTexelBuffer ? VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER : VK_DESCRIPTOR_TYPE_STORAGE_IMAGE), 2)
1061 			.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 2)
1062 			.build(deviceInterface, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 2u);
1063 
1064 		descriptorSetFillImage = makeDescriptorSet(deviceInterface,
1065 			device,
1066 			*m_descriptorPoolNoTransfer,
1067 			*m_descriptorSetLayoutNoTransfer);
1068 
1069 		descriptorSetReadImage = makeDescriptorSet(deviceInterface,
1070 			device,
1071 			*m_descriptorPoolNoTransfer,
1072 			*m_descriptorSetLayoutNoTransfer);
1073 
1074 		shaderModuleFillImage	= createShaderModule(deviceInterface, device, m_context.getBinaryCollection().get("fillShader"), 0);
1075 		pipelineLayoutFillImage	= makePipelineLayout(deviceInterface, device, *m_descriptorSetLayoutNoTransfer);
1076 		pipelineFillImage		= makeComputePipeline(deviceInterface, device, *pipelineLayoutFillImage, *shaderModuleFillImage);
1077 
1078 		if (m_readType == ShaderReadType::SPARSE)
1079 		{
1080 			shaderModuleReadImage = createShaderModule(deviceInterface, device, m_context.getBinaryCollection().get("readShaderResidency"), 0);
1081 		}
1082 		else
1083 		{
1084 			shaderModuleReadImage = createShaderModule(deviceInterface, device, m_context.getBinaryCollection().get("readShader"), 0);
1085 		}
1086 		pipelineLayoutReadImage = makePipelineLayout(deviceInterface, device, *m_descriptorSetLayoutNoTransfer);
1087 		pipelineReadImage		= makeComputePipeline(deviceInterface, device, *pipelineLayoutFillImage, *shaderModuleReadImage);
1088 	}
1089 
1090 	// Create pipeline
1091 	const Unique<VkShaderModule>	shaderModule(createShaderModule(deviceInterface, device, m_context.getBinaryCollection().get(m_name), 0));
1092 	const Unique<VkPipelineLayout>	pipelineLayout(makePipelineLayout(deviceInterface, device, *m_descriptorSetLayout));
1093 	const Unique<VkPipeline>		pipeline(makeComputePipeline(deviceInterface, device, *pipelineLayout, *shaderModule));
1094 
1095 	// Create command buffer
1096 	const Unique<VkCommandPool>		cmdPool(createCommandPool(deviceInterface, device, VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, queueFamilyIndex));
1097 	const Unique<VkCommandBuffer>	cmdBuffer(allocateCommandBuffer(deviceInterface, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
1098 
1099 	beginCommandBuffer(deviceInterface, *cmdBuffer);
1100 
1101 	if (!isTexelBuffer)
1102 	{
1103 		if (m_useTransfer)
1104 		{
1105 			const vector<VkBufferImageCopy>	bufferImageCopy(1, makeBufferImageCopy(makeExtent3D(getLayerSize(m_imageType, m_imageSize)), getNumLayers(m_imageType, m_imageSize)));
1106 			copyBufferToImage(deviceInterface,
1107 							  *cmdBuffer,
1108 							  *(*m_inputBuffer),
1109 							  imageSizeInBytes,
1110 							  bufferImageCopy,
1111 							  VK_IMAGE_ASPECT_COLOR_BIT,
1112 							  1,
1113 							  getNumLayers(m_imageType, m_imageSize), m_resultImage->get(), VK_IMAGE_LAYOUT_GENERAL, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT);
1114 		}
1115 		else
1116 		{
1117 			shaderFillImage(*cmdBuffer, *(*m_inputBuffer), *pipelineFillImage, *pipelineLayoutFillImage, *descriptorSetFillImage, imageSizeInBytes, gridSize);
1118 		}
1119 		commandsBeforeCompute(*cmdBuffer);
1120 	}
1121 
1122 	deviceInterface.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
1123 	deviceInterface.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &m_descriptorSet.get(), 0u, DE_NULL);
1124 
1125 	deviceInterface.cmdDispatch(*cmdBuffer, NUM_INVOCATIONS_PER_PIXEL * gridSize.x(), gridSize.y(), gridSize.z());
1126 
1127 	commandsAfterCompute(*cmdBuffer,
1128 						 *pipelineReadImage,
1129 						 *pipelineLayoutReadImage,
1130 						 *descriptorSetReadImage,
1131 						 outBuffSizeInBytes,
1132 						 m_useTransfer);
1133 
1134 	const VkBufferMemoryBarrier	outputBufferPreHostReadBarrier
1135 		= makeBufferMemoryBarrier(((m_useTransfer || isTexelBuffer) ? VK_ACCESS_TRANSFER_WRITE_BIT : VK_ACCESS_SHADER_WRITE_BIT),
1136 								  VK_ACCESS_HOST_READ_BIT,
1137 								  m_outputBuffer->get(),
1138 								  0ull,
1139 								  outBuffSizeInBytes);
1140 
1141 	deviceInterface.cmdPipelineBarrier(*cmdBuffer,
1142 									   ((m_useTransfer || isTexelBuffer) ? VK_PIPELINE_STAGE_TRANSFER_BIT : VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT),
1143 									   VK_PIPELINE_STAGE_HOST_BIT,
1144 									   DE_FALSE, 0u, DE_NULL,
1145 									   1u, &outputBufferPreHostReadBarrier, 0u, DE_NULL);
1146 
1147 	endCommandBuffer(deviceInterface, *cmdBuffer);
1148 
1149 	std::vector<VkPipelineStageFlags> waitStages(m_waitSemaphores.size(), VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT);
1150 	submitCommandsAndWait(deviceInterface, device, queue, *cmdBuffer, false, 1u,
1151 		static_cast<deUint32>(m_waitSemaphores.size()), de::dataOrNull(m_waitSemaphores), de::dataOrNull(waitStages));
1152 
1153 	Allocation& outputBufferAllocation = m_outputBuffer->getAllocation();
1154 
1155 	invalidateAlloc(deviceInterface, device, outputBufferAllocation);
1156 
1157 	if (verifyResult(outputBufferAllocation, (imageFormat == VK_FORMAT_R64_UINT || imageFormat == VK_FORMAT_R64_SINT)))
1158 		return tcu::TestStatus::pass("Comparison succeeded");
1159 	else
1160 		return tcu::TestStatus::fail("Comparison failed");
1161 }
1162 
shaderFillImage(const VkCommandBuffer cmdBuffer,const VkBuffer & buffer,const VkPipeline pipeline,const VkPipelineLayout pipelineLayout,const VkDescriptorSet descriptorSet,const VkDeviceSize & range,const tcu::UVec3 & gridSize)1163 void BinaryAtomicInstanceBase::shaderFillImage (const VkCommandBuffer	cmdBuffer,
1164 												const VkBuffer&			buffer,
1165 												const VkPipeline		pipeline,
1166 												const VkPipelineLayout	pipelineLayout,
1167 												const VkDescriptorSet	descriptorSet,
1168 												const VkDeviceSize&		range,
1169 												const tcu::UVec3&		gridSize)
1170 {
1171 	const VkDevice					device					= m_context.getDevice();
1172 	const DeviceInterface&			deviceInterface			= m_context.getDeviceInterface();
1173 	const VkDescriptorImageInfo		descResultImageInfo		= makeDescriptorImageInfo(DE_NULL, *m_resultImageView, VK_IMAGE_LAYOUT_GENERAL);
1174 	const VkDescriptorBufferInfo	descResultBufferInfo	= makeDescriptorBufferInfo(buffer, 0, range);
1175 	const VkImageSubresourceRange	subresourceRange		= makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, getNumLayers(m_imageType, m_imageSize));
1176 
1177 	DescriptorSetUpdateBuilder()
1178 		.writeSingle(descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &descResultImageInfo)
1179 		.writeSingle(descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &descResultBufferInfo)
1180 		.update(deviceInterface, device);
1181 
1182 	const VkImageMemoryBarrier imageBarrierPre = makeImageMemoryBarrier(0,
1183 																		VK_ACCESS_SHADER_WRITE_BIT,
1184 																		VK_IMAGE_LAYOUT_UNDEFINED,
1185 																		VK_IMAGE_LAYOUT_GENERAL,
1186 																		m_resultImage->get(),
1187 																		subresourceRange);
1188 
1189 	deviceInterface.cmdPipelineBarrier(	cmdBuffer,
1190 										VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
1191 										VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
1192 										(VkDependencyFlags)0,
1193 										0, (const VkMemoryBarrier*)DE_NULL,
1194 										0, (const VkBufferMemoryBarrier*)DE_NULL,
1195 										1, &imageBarrierPre);
1196 
1197 	deviceInterface.cmdBindPipeline(cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
1198 	deviceInterface.cmdBindDescriptorSets(cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipelineLayout, 0u, 1u, &descriptorSet, 0u, DE_NULL);
1199 
1200 	deviceInterface.cmdDispatch(cmdBuffer, gridSize.x(), gridSize.y(), gridSize.z());
1201 
1202 	const VkImageMemoryBarrier imageBarrierPost = makeImageMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT,
1203 																		 VK_ACCESS_SHADER_READ_BIT,
1204 																		 VK_IMAGE_LAYOUT_GENERAL,
1205 																		 VK_IMAGE_LAYOUT_GENERAL,
1206 																		 m_resultImage->get(),
1207 																		 subresourceRange);
1208 
1209 	deviceInterface.cmdPipelineBarrier(	cmdBuffer,
1210 										VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
1211 										VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
1212 										(VkDependencyFlags)0,
1213 										0, (const VkMemoryBarrier*)DE_NULL,
1214 										0, (const VkBufferMemoryBarrier*)DE_NULL,
1215 										1, &imageBarrierPost);
1216 }
1217 
createImageAndView(VkFormat imageFormat,const tcu::UVec3 & imageExent,bool useTransfer,de::MovePtr<Image> & imagePtr,Move<VkImageView> & imageViewPtr)1218 void BinaryAtomicInstanceBase::createImageAndView	(VkFormat						imageFormat,
1219 													 const tcu::UVec3&				imageExent,
1220 													 bool							useTransfer,
1221 													 de::MovePtr<Image>&			imagePtr,
1222 													 Move<VkImageView>&				imageViewPtr)
1223 {
1224 	const VkDevice			device			= m_context.getDevice();
1225 	const DeviceInterface&	deviceInterface	= m_context.getDeviceInterface();
1226 	Allocator&				allocator		= m_context.getDefaultAllocator();
1227 	const VkImageUsageFlags	usageFlags		= getUsageFlags(useTransfer);
1228 	VkImageCreateFlags		createFlags		= 0u;
1229 
1230 	if (m_imageType == IMAGE_TYPE_CUBE || m_imageType == IMAGE_TYPE_CUBE_ARRAY)
1231 		createFlags |= VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT;
1232 
1233 	const auto numLayers = getNumLayers(m_imageType, m_imageSize);
1234 
1235 	VkImageCreateInfo createInfo =
1236 	{
1237 		VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,					// VkStructureType			sType;
1238 		DE_NULL,												// const void*				pNext;
1239 		createFlags,											// VkImageCreateFlags		flags;
1240 		mapImageType(m_imageType),								// VkImageType				imageType;
1241 		imageFormat,											// VkFormat					format;
1242 		makeExtent3D(imageExent),								// VkExtent3D				extent;
1243 		1u,														// deUint32					mipLevels;
1244 		numLayers,												// deUint32					arrayLayers;
1245 		VK_SAMPLE_COUNT_1_BIT,									// VkSampleCountFlagBits	samples;
1246 		VK_IMAGE_TILING_OPTIMAL,								// VkImageTiling			tiling;
1247 		usageFlags,												// VkImageUsageFlags		usage;
1248 		VK_SHARING_MODE_EXCLUSIVE,								// VkSharingMode			sharingMode;
1249 		0u,														// deUint32					queueFamilyIndexCount;
1250 		DE_NULL,												// const deUint32*			pQueueFamilyIndices;
1251 		VK_IMAGE_LAYOUT_UNDEFINED,								// VkImageLayout			initialLayout;
1252 	};
1253 
1254 	if (m_backingType == ImageBackingType::SPARSE)
1255 	{
1256 		const auto&		vki				= m_context.getInstanceInterface();
1257 		const auto		physicalDevice	= m_context.getPhysicalDevice();
1258 		const auto		sparseQueue		= m_context.getSparseQueue();
1259 		const auto		sparseQueueIdx	= m_context.getSparseQueueFamilyIndex();
1260 		const auto		universalQIdx	= m_context.getUniversalQueueFamilyIndex();
1261 		const deUint32	queueIndices[]	= { universalQIdx, sparseQueueIdx };
1262 
1263 		createInfo.flags |= (VK_IMAGE_CREATE_SPARSE_BINDING_BIT | VK_IMAGE_CREATE_SPARSE_RESIDENCY_BIT);
1264 
1265 		if (sparseQueueIdx != universalQIdx)
1266 		{
1267 			createInfo.sharingMode				= VK_SHARING_MODE_CONCURRENT;
1268 			createInfo.queueFamilyIndexCount	= static_cast<deUint32>(DE_LENGTH_OF_ARRAY(queueIndices));
1269 			createInfo.pQueueFamilyIndices		= queueIndices;
1270 		}
1271 
1272 		const auto sparseImage = new SparseImage(deviceInterface, device, physicalDevice, vki, createInfo, sparseQueue, allocator, m_format);
1273 		m_waitSemaphores.push_back(sparseImage->getSemaphore());
1274 		imagePtr = de::MovePtr<Image>(sparseImage);
1275 	}
1276 	else
1277 		imagePtr = de::MovePtr<Image>(new Image(deviceInterface, device, allocator, createInfo, MemoryRequirement::Any));
1278 
1279 	const VkImageSubresourceRange subresourceRange = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, numLayers);
1280 
1281 	imageViewPtr = makeImageView(deviceInterface, device, imagePtr->get(), mapImageViewType(m_imageType), imageFormat, subresourceRange);
1282 }
1283 
createImageResources(const VkFormat & imageFormat,const bool useTransfer)1284 void BinaryAtomicInstanceBase::createImageResources (const VkFormat&	imageFormat,
1285 													 const bool			useTransfer)
1286 {
1287 	//Create the image that is going to store results of atomic operations
1288 	createImageAndView(imageFormat, getLayerSize(m_imageType, m_imageSize), useTransfer, m_resultImage, m_resultImageView);
1289 }
1290 
1291 class BinaryAtomicEndResultInstance : public BinaryAtomicInstanceBase
1292 {
1293 public:
1294 
BinaryAtomicEndResultInstance(Context & context,const string & name,const ImageType imageType,const tcu::UVec3 & imageSize,const TextureFormat & format,const AtomicOperation operation,const bool useTransfer,const ShaderReadType shaderReadType,const ImageBackingType backingType)1295 						BinaryAtomicEndResultInstance  (Context&					context,
1296 														const string&				name,
1297 														const ImageType				imageType,
1298 														const tcu::UVec3&			imageSize,
1299 														const TextureFormat&		format,
1300 														const AtomicOperation		operation,
1301 														const bool					useTransfer,
1302 														const ShaderReadType		shaderReadType,
1303 														const ImageBackingType		backingType)
1304 							: BinaryAtomicInstanceBase(context, name, imageType, imageSize, format, operation, useTransfer, shaderReadType, backingType) {}
1305 
1306 	virtual deUint32	getOutputBufferSize			   (void) const;
1307 
prepareResources(const bool useTransfer)1308 	virtual void		prepareResources			   (const bool					useTransfer) { DE_UNREF(useTransfer); }
1309 	virtual void		prepareDescriptors			   (const bool					isTexelBuffer);
1310 
commandsBeforeCompute(const VkCommandBuffer) const1311 	virtual void		commandsBeforeCompute		   (const VkCommandBuffer) const {}
1312 	virtual void		commandsAfterCompute		   (const VkCommandBuffer		cmdBuffer,
1313 														const VkPipeline			pipeline,
1314 														const VkPipelineLayout		pipelineLayout,
1315 														const VkDescriptorSet		descriptorSet,
1316 														const VkDeviceSize&			range,
1317 														const bool					useTransfer);
1318 
1319 	virtual bool		verifyResult				   (Allocation&					outputBufferAllocation,
1320 														const bool					is64Bit) const;
1321 
1322 protected:
1323 
1324 	template <typename T>
1325 	bool				isValueCorrect				   (const T						resultValue,
1326 														deInt32						x,
1327 														deInt32						y,
1328 														deInt32						z,
1329 														const UVec3&				gridSize,
1330 														const IVec3					extendedGridSize) const;
1331 };
1332 
getOutputBufferSize(void) const1333 deUint32 BinaryAtomicEndResultInstance::getOutputBufferSize (void) const
1334 {
1335 	return tcu::getPixelSize(m_format) * getNumPixels(m_imageType, m_imageSize);
1336 }
1337 
prepareDescriptors(const bool isTexelBuffer)1338 void BinaryAtomicEndResultInstance::prepareDescriptors (const bool	isTexelBuffer)
1339 {
1340 	const VkDescriptorType	descriptorType	= isTexelBuffer ?
1341 											VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER :
1342 											VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
1343 	const VkDevice			device			= m_context.getDevice();
1344 	const DeviceInterface&	deviceInterface = m_context.getDeviceInterface();
1345 
1346 	m_descriptorSetLayout =
1347 		DescriptorSetLayoutBuilder()
1348 		.addSingleBinding(descriptorType, VK_SHADER_STAGE_COMPUTE_BIT)
1349 		.build(deviceInterface, device);
1350 
1351 	m_descriptorPool =
1352 		DescriptorPoolBuilder()
1353 		.addType(descriptorType)
1354 		.build(deviceInterface, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
1355 
1356 	m_descriptorSet = makeDescriptorSet(deviceInterface, device, *m_descriptorPool, *m_descriptorSetLayout);
1357 
1358 	if (isTexelBuffer)
1359 	{
1360 		m_descResultBufferView = makeBufferView(deviceInterface, device, *(*m_inputBuffer), mapTextureFormat(m_format), 0, VK_WHOLE_SIZE);
1361 
1362 		DescriptorSetUpdateBuilder()
1363 			.writeSingle(*m_descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), descriptorType, &(m_descResultBufferView.get()))
1364 			.update(deviceInterface, device);
1365 	}
1366 	else
1367 	{
1368 		const VkDescriptorImageInfo	descResultImageInfo = makeDescriptorImageInfo(DE_NULL, *m_resultImageView, VK_IMAGE_LAYOUT_GENERAL);
1369 
1370 		DescriptorSetUpdateBuilder()
1371 			.writeSingle(*m_descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), descriptorType, &descResultImageInfo)
1372 			.update(deviceInterface, device);
1373 	}
1374 }
1375 
commandsAfterCompute(const VkCommandBuffer cmdBuffer,const VkPipeline pipeline,const VkPipelineLayout pipelineLayout,const VkDescriptorSet descriptorSet,const VkDeviceSize & range,const bool useTransfer)1376 void BinaryAtomicEndResultInstance::commandsAfterCompute (const VkCommandBuffer		cmdBuffer,
1377 														  const VkPipeline			pipeline,
1378 														  const VkPipelineLayout	pipelineLayout,
1379 														  const VkDescriptorSet		descriptorSet,
1380 														  const VkDeviceSize&		range,
1381 														  const bool				useTransfer)
1382 {
1383 	const DeviceInterface&			deviceInterface		= m_context.getDeviceInterface();
1384 	const VkImageSubresourceRange	subresourceRange	= makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, getNumLayers(m_imageType, m_imageSize));
1385 	const UVec3						layerSize			= getLayerSize(m_imageType, m_imageSize);
1386 
1387 	if (m_imageType == IMAGE_TYPE_BUFFER)
1388 	{
1389 		m_outputBuffer = m_inputBuffer;
1390 	}
1391 	else if (useTransfer)
1392 	{
1393 		const VkImageMemoryBarrier	resultImagePostDispatchBarrier =
1394 			makeImageMemoryBarrier(	VK_ACCESS_SHADER_WRITE_BIT,
1395 									VK_ACCESS_TRANSFER_READ_BIT,
1396 									VK_IMAGE_LAYOUT_GENERAL,
1397 									VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
1398 									m_resultImage->get(),
1399 									subresourceRange);
1400 
1401 		deviceInterface.cmdPipelineBarrier(	cmdBuffer,
1402 											VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
1403 											VK_PIPELINE_STAGE_TRANSFER_BIT,
1404 											DE_FALSE, 0u, DE_NULL, 0u, DE_NULL,
1405 											1u, &resultImagePostDispatchBarrier);
1406 
1407 		const VkBufferImageCopy		bufferImageCopyParams = makeBufferImageCopy(makeExtent3D(layerSize), getNumLayers(m_imageType, m_imageSize));
1408 
1409 		deviceInterface.cmdCopyImageToBuffer(cmdBuffer, m_resultImage->get(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, m_outputBuffer->get(), 1u, &bufferImageCopyParams);
1410 	}
1411 	else
1412 	{
1413 		const VkDevice					device					= m_context.getDevice();
1414 		const VkDescriptorImageInfo		descResultImageInfo		= makeDescriptorImageInfo(DE_NULL, *m_resultImageView, VK_IMAGE_LAYOUT_GENERAL);
1415 		const VkDescriptorBufferInfo	descResultBufferInfo	= makeDescriptorBufferInfo(m_outputBuffer->get(), 0, range);
1416 
1417 		DescriptorSetUpdateBuilder()
1418 			.writeSingle(descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &descResultImageInfo)
1419 			.writeSingle(descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &descResultBufferInfo)
1420 			.update(deviceInterface, device);
1421 
1422 		const VkImageMemoryBarrier	resultImagePostDispatchBarrier =
1423 			makeImageMemoryBarrier(	VK_ACCESS_SHADER_WRITE_BIT,
1424 									VK_ACCESS_SHADER_READ_BIT,
1425 									VK_IMAGE_LAYOUT_GENERAL,
1426 									VK_IMAGE_LAYOUT_GENERAL,
1427 									m_resultImage->get(),
1428 									subresourceRange);
1429 
1430 		deviceInterface.cmdPipelineBarrier(	cmdBuffer,
1431 											VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
1432 											VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
1433 											DE_FALSE, 0u, DE_NULL, 0u, DE_NULL,
1434 											1u, &resultImagePostDispatchBarrier);
1435 
1436 		deviceInterface.cmdBindPipeline(cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
1437 		deviceInterface.cmdBindDescriptorSets(cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipelineLayout, 0u, 1u, &descriptorSet, 0u, DE_NULL);
1438 
1439 		switch (m_imageType)
1440 		{
1441 			case IMAGE_TYPE_1D_ARRAY:
1442 				deviceInterface.cmdDispatch(cmdBuffer, layerSize.x(), subresourceRange.layerCount, layerSize.z());
1443 				break;
1444 			case IMAGE_TYPE_2D_ARRAY:
1445 			case IMAGE_TYPE_CUBE:
1446 			case IMAGE_TYPE_CUBE_ARRAY:
1447 				deviceInterface.cmdDispatch(cmdBuffer, layerSize.x(), layerSize.y(), subresourceRange.layerCount);
1448 				break;
1449 			default:
1450 				deviceInterface.cmdDispatch(cmdBuffer, layerSize.x(), layerSize.y(), layerSize.z());
1451 				break;
1452 		}
1453 	}
1454 }
1455 
verifyResult(Allocation & outputBufferAllocation,const bool is64Bit) const1456 bool BinaryAtomicEndResultInstance::verifyResult (Allocation&	outputBufferAllocation,
1457 												  const bool	is64Bit) const
1458 {
1459 	const UVec3	gridSize			= getShaderGridSize(m_imageType, m_imageSize);
1460 	const IVec3 extendedGridSize	= IVec3(NUM_INVOCATIONS_PER_PIXEL*gridSize.x(), gridSize.y(), gridSize.z());
1461 
1462 	tcu::ConstPixelBufferAccess resultBuffer(m_format, gridSize.x(), gridSize.y(), gridSize.z(), outputBufferAllocation.getHostPtr());
1463 
1464 	for (deInt32 z = 0; z < resultBuffer.getDepth();  z++)
1465 	for (deInt32 y = 0; y < resultBuffer.getHeight(); y++)
1466 	for (deInt32 x = 0; x < resultBuffer.getWidth();  x++)
1467 	{
1468 		const void* resultValue = resultBuffer.getPixelPtr(x, y, z);
1469 		deInt32 floatToIntValue = 0;
1470 		bool isFloatValue = false;
1471 		if (isFloatFormat(mapTextureFormat(m_format)))
1472 		{
1473 			isFloatValue = true;
1474 			floatToIntValue = static_cast<deInt32>(*((float*)resultValue));
1475 		}
1476 
1477 		if (isOrderIndependentAtomicOperation(m_operation))
1478 		{
1479 			if (isUintFormat(mapTextureFormat(m_format)))
1480 			{
1481 				if(is64Bit)
1482 				{
1483 					if (!isValueCorrect<deUint64>(*((deUint64*)resultValue), x, y, z, gridSize, extendedGridSize))
1484 						return false;
1485 				}
1486 				else
1487 				{
1488 					if (!isValueCorrect<deUint32>(*((deUint32*)resultValue), x, y, z, gridSize, extendedGridSize))
1489 						return false;
1490 				}
1491 			}
1492 			else if (isIntFormat(mapTextureFormat(m_format)))
1493 			{
1494 				if (is64Bit)
1495 				{
1496 					if (!isValueCorrect<deInt64>(*((deInt64*)resultValue), x, y, z, gridSize, extendedGridSize))
1497 						return false;
1498 				}
1499 				else
1500 				{
1501 					if (!isValueCorrect<deInt32>(*((deInt32*)resultValue), x, y, z, gridSize, extendedGridSize))
1502 						return false;
1503 				}
1504 			}
1505 			else
1506 			{
1507 				// 32-bit floating point
1508 				if (!isValueCorrect<deInt32>(floatToIntValue, x, y, z, gridSize, extendedGridSize))
1509 					return false;
1510 			}
1511 		}
1512 		else if (m_operation == ATOMIC_OPERATION_EXCHANGE)
1513 		{
1514 			// Check if the end result equals one of the atomic args.
1515 			bool matchFound = false;
1516 
1517 			for (deInt32 i = 0; i < static_cast<deInt32>(NUM_INVOCATIONS_PER_PIXEL) && !matchFound; i++)
1518 			{
1519 				const IVec3 gid(x + i*gridSize.x(), y, z);
1520 				matchFound = is64Bit ?
1521 					(*((deInt64*)resultValue) == getAtomicFuncArgument<deInt64>(m_operation, gid, extendedGridSize)) :
1522 					isFloatValue ?
1523 					floatToIntValue == getAtomicFuncArgument<deInt32>(m_operation, gid, extendedGridSize) :
1524 					(*((deInt32*)resultValue) == getAtomicFuncArgument<deInt32>(m_operation, gid, extendedGridSize));
1525 
1526 			}
1527 
1528 			if (!matchFound)
1529 				return false;
1530 		}
1531 		else if (m_operation == ATOMIC_OPERATION_COMPARE_EXCHANGE)
1532 		{
1533 			// Check if the end result equals one of the atomic args.
1534 			bool matchFound = false;
1535 
1536 			for (deInt32 i = 0; i < static_cast<deInt32>(NUM_INVOCATIONS_PER_PIXEL) && !matchFound; i++)
1537 			{
1538 				const IVec3 gid(x + i*gridSize.x(), y, z);
1539 				matchFound = is64Bit ?
1540 					(*((deInt64*)resultValue) == getAtomicFuncArgument<deInt64>(m_operation, gid, extendedGridSize)) :
1541 					isFloatValue ?
1542 					floatToIntValue == getAtomicFuncArgument<deInt32>(m_operation, gid, extendedGridSize) :
1543 					(*((deInt32*)resultValue) == getAtomicFuncArgument<deInt32>(m_operation, gid, extendedGridSize));
1544 			}
1545 
1546 			if (!matchFound)
1547 				return false;
1548 		}
1549 		else
1550 			DE_ASSERT(false);
1551 	}
1552 	return true;
1553 }
1554 
1555 template <typename T>
isValueCorrect(const T resultValue,deInt32 x,deInt32 y,deInt32 z,const UVec3 & gridSize,const IVec3 extendedGridSize) const1556 bool BinaryAtomicEndResultInstance::isValueCorrect(const T resultValue, deInt32 x, deInt32 y, deInt32 z, const UVec3& gridSize, const IVec3 extendedGridSize) const
1557 {
1558 	T reference = getOperationInitialValue<T>(m_operation);
1559 	for (deInt32 i = 0; i < static_cast<deInt32>(NUM_INVOCATIONS_PER_PIXEL); i++)
1560 	{
1561 		const IVec3 gid(x + i*gridSize.x(), y, z);
1562 		T			arg = getAtomicFuncArgument<T>(m_operation, gid, extendedGridSize);
1563 		reference = computeBinaryAtomicOperationResult(m_operation, reference, arg);
1564 	}
1565 	return (resultValue == reference);
1566 }
1567 
createInstance(Context & context) const1568 TestInstance* BinaryAtomicEndResultCase::createInstance (Context& context) const
1569 {
1570 	return new BinaryAtomicEndResultInstance(context, m_name, m_imageType, m_imageSize, m_format, m_operation, m_useTransfer, m_readType, m_backingType);
1571 }
1572 
1573 class BinaryAtomicIntermValuesInstance : public BinaryAtomicInstanceBase
1574 {
1575 public:
1576 
BinaryAtomicIntermValuesInstance(Context & context,const string & name,const ImageType imageType,const tcu::UVec3 & imageSize,const TextureFormat & format,const AtomicOperation operation,const bool useTransfer,const ShaderReadType shaderReadType,const ImageBackingType backingType)1577 						BinaryAtomicIntermValuesInstance   (Context&				context,
1578 															const string&			name,
1579 															const ImageType			imageType,
1580 															const tcu::UVec3&		imageSize,
1581 															const TextureFormat&	format,
1582 															const AtomicOperation	operation,
1583 															const bool				useTransfer,
1584 															const ShaderReadType	shaderReadType,
1585 															const ImageBackingType	backingType)
1586 							: BinaryAtomicInstanceBase(context, name, imageType, imageSize, format, operation, useTransfer, shaderReadType, backingType) {}
1587 
1588 	virtual deUint32	getOutputBufferSize				   (void) const;
1589 
1590 	virtual void		prepareResources				   (const bool				useTransfer);
1591 	virtual void		prepareDescriptors				   (const bool				isTexelBuffer);
1592 
1593 	virtual void		commandsBeforeCompute			   (const VkCommandBuffer	cmdBuffer) const;
1594 	virtual void		commandsAfterCompute			   (const VkCommandBuffer	cmdBuffer,
1595 															const VkPipeline		pipeline,
1596 															const VkPipelineLayout	pipelineLayout,
1597 															const VkDescriptorSet	descriptorSet,
1598 															const VkDeviceSize&		range,
1599 															const bool				useTransfer);
1600 
1601 	virtual bool		verifyResult					   (Allocation&				outputBufferAllocation,
1602 															const bool				is64Bit) const;
1603 
1604 protected:
1605 
1606 	template <typename T>
1607 	bool				areValuesCorrect				   (tcu::ConstPixelBufferAccess& resultBuffer,
1608 															const bool isFloatingPoint,
1609 															deInt32 x,
1610 															deInt32 y,
1611 															deInt32 z,
1612 															const UVec3& gridSize,
1613 															const IVec3 extendedGridSize) const;
1614 
1615 	template <typename T>
1616 	bool				verifyRecursive					   (const deInt32			index,
1617 															const T					valueSoFar,
1618 															bool					argsUsed[NUM_INVOCATIONS_PER_PIXEL],
1619 															const T					atomicArgs[NUM_INVOCATIONS_PER_PIXEL],
1620 															const T					resultValues[NUM_INVOCATIONS_PER_PIXEL]) const;
1621 	de::MovePtr<Image>	m_intermResultsImage;
1622 	Move<VkImageView>	m_intermResultsImageView;
1623 };
1624 
getOutputBufferSize(void) const1625 deUint32 BinaryAtomicIntermValuesInstance::getOutputBufferSize (void) const
1626 {
1627 	return NUM_INVOCATIONS_PER_PIXEL * tcu::getPixelSize(m_format) * getNumPixels(m_imageType, m_imageSize);
1628 }
1629 
prepareResources(const bool useTransfer)1630 void BinaryAtomicIntermValuesInstance::prepareResources (const bool useTransfer)
1631 {
1632 	const UVec3 layerSize			= getLayerSize(m_imageType, m_imageSize);
1633 	const bool  isCubeBasedImage	= (m_imageType == IMAGE_TYPE_CUBE || m_imageType == IMAGE_TYPE_CUBE_ARRAY);
1634 	const UVec3 extendedLayerSize	= isCubeBasedImage	? UVec3(NUM_INVOCATIONS_PER_PIXEL * layerSize.x(), NUM_INVOCATIONS_PER_PIXEL * layerSize.y(), layerSize.z())
1635 														: UVec3(NUM_INVOCATIONS_PER_PIXEL * layerSize.x(), layerSize.y(), layerSize.z());
1636 
1637 	createImageAndView(mapTextureFormat(m_format), extendedLayerSize, useTransfer, m_intermResultsImage, m_intermResultsImageView);
1638 }
1639 
prepareDescriptors(const bool isTexelBuffer)1640 void BinaryAtomicIntermValuesInstance::prepareDescriptors (const bool	isTexelBuffer)
1641 {
1642 	const VkDescriptorType	descriptorType	= isTexelBuffer ?
1643 											VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER :
1644 											VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
1645 
1646 	const VkDevice			device			= m_context.getDevice();
1647 	const DeviceInterface&	deviceInterface = m_context.getDeviceInterface();
1648 
1649 	m_descriptorSetLayout =
1650 		DescriptorSetLayoutBuilder()
1651 		.addSingleBinding(descriptorType, VK_SHADER_STAGE_COMPUTE_BIT)
1652 		.addSingleBinding(descriptorType, VK_SHADER_STAGE_COMPUTE_BIT)
1653 		.build(deviceInterface, device);
1654 
1655 	m_descriptorPool =
1656 		DescriptorPoolBuilder()
1657 		.addType(descriptorType, 2u)
1658 		.build(deviceInterface, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
1659 
1660 	m_descriptorSet = makeDescriptorSet(deviceInterface, device, *m_descriptorPool, *m_descriptorSetLayout);
1661 
1662 	if (isTexelBuffer)
1663 	{
1664 		m_descResultBufferView			= makeBufferView(deviceInterface, device, *(*m_inputBuffer), mapTextureFormat(m_format), 0, VK_WHOLE_SIZE);
1665 		m_descIntermResultsBufferView	= makeBufferView(deviceInterface, device, *(*m_outputBuffer), mapTextureFormat(m_format), 0, VK_WHOLE_SIZE);
1666 
1667 		DescriptorSetUpdateBuilder()
1668 			.writeSingle(*m_descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), descriptorType, &(m_descResultBufferView.get()))
1669 			.writeSingle(*m_descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), descriptorType, &(m_descIntermResultsBufferView.get()))
1670 			.update(deviceInterface, device);
1671 	}
1672 	else
1673 	{
1674 		const VkDescriptorImageInfo	descResultImageInfo			= makeDescriptorImageInfo(DE_NULL, *m_resultImageView, VK_IMAGE_LAYOUT_GENERAL);
1675 		const VkDescriptorImageInfo	descIntermResultsImageInfo	= makeDescriptorImageInfo(DE_NULL, *m_intermResultsImageView, VK_IMAGE_LAYOUT_GENERAL);
1676 
1677 		DescriptorSetUpdateBuilder()
1678 			.writeSingle(*m_descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), descriptorType, &descResultImageInfo)
1679 			.writeSingle(*m_descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), descriptorType, &descIntermResultsImageInfo)
1680 			.update(deviceInterface, device);
1681 	}
1682 }
1683 
commandsBeforeCompute(const VkCommandBuffer cmdBuffer) const1684 void BinaryAtomicIntermValuesInstance::commandsBeforeCompute (const VkCommandBuffer cmdBuffer) const
1685 {
1686 	const DeviceInterface&			deviceInterface		= m_context.getDeviceInterface();
1687 	const VkImageSubresourceRange	subresourceRange	= makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, getNumLayers(m_imageType, m_imageSize));
1688 
1689 	const VkImageMemoryBarrier	imagePreDispatchBarrier =
1690 		makeImageMemoryBarrier(	0u,
1691 								VK_ACCESS_SHADER_WRITE_BIT,
1692 								VK_IMAGE_LAYOUT_UNDEFINED,
1693 								VK_IMAGE_LAYOUT_GENERAL,
1694 								m_intermResultsImage->get(),
1695 								subresourceRange);
1696 
1697 	deviceInterface.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, DE_FALSE, 0u, DE_NULL, 0u, DE_NULL, 1u, &imagePreDispatchBarrier);
1698 }
1699 
commandsAfterCompute(const VkCommandBuffer cmdBuffer,const VkPipeline pipeline,const VkPipelineLayout pipelineLayout,const VkDescriptorSet descriptorSet,const VkDeviceSize & range,const bool useTransfer)1700 void BinaryAtomicIntermValuesInstance::commandsAfterCompute (const VkCommandBuffer		cmdBuffer,
1701 															 const VkPipeline			pipeline,
1702 															 const VkPipelineLayout		pipelineLayout,
1703 															 const VkDescriptorSet		descriptorSet,
1704 															 const VkDeviceSize&		range,
1705 															 const bool					useTransfer)
1706 {
1707 	// nothing is needed for texel image buffer
1708 	if (m_imageType == IMAGE_TYPE_BUFFER)
1709 		return;
1710 
1711 	const DeviceInterface&			deviceInterface		= m_context.getDeviceInterface();
1712 	const VkImageSubresourceRange	subresourceRange	= makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, getNumLayers(m_imageType, m_imageSize));
1713 	const UVec3						layerSize			= getLayerSize(m_imageType, m_imageSize);
1714 
1715 	if (useTransfer)
1716 	{
1717 		const VkImageMemoryBarrier	imagePostDispatchBarrier =
1718 			makeImageMemoryBarrier(	VK_ACCESS_SHADER_WRITE_BIT,
1719 									VK_ACCESS_TRANSFER_READ_BIT,
1720 									VK_IMAGE_LAYOUT_GENERAL,
1721 									VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
1722 									m_intermResultsImage->get(),
1723 									subresourceRange);
1724 
1725 		deviceInterface.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, DE_FALSE, 0u, DE_NULL, 0u, DE_NULL, 1u, &imagePostDispatchBarrier);
1726 
1727 		const UVec3					extendedLayerSize		= UVec3(NUM_INVOCATIONS_PER_PIXEL * layerSize.x(), layerSize.y(), layerSize.z());
1728 		const VkBufferImageCopy		bufferImageCopyParams	= makeBufferImageCopy(makeExtent3D(extendedLayerSize), getNumLayers(m_imageType, m_imageSize));
1729 
1730 		deviceInterface.cmdCopyImageToBuffer(cmdBuffer, m_intermResultsImage->get(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, m_outputBuffer->get(), 1u, &bufferImageCopyParams);
1731 	}
1732 	else
1733 	{
1734 		const VkDevice					device					= m_context.getDevice();
1735 		const VkDescriptorImageInfo		descResultImageInfo		= makeDescriptorImageInfo(DE_NULL, *m_intermResultsImageView, VK_IMAGE_LAYOUT_GENERAL);
1736 		const VkDescriptorBufferInfo	descResultBufferInfo	= makeDescriptorBufferInfo(m_outputBuffer->get(), 0, range);
1737 
1738 		DescriptorSetUpdateBuilder()
1739 			.writeSingle(descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &descResultImageInfo)
1740 			.writeSingle(descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &descResultBufferInfo)
1741 			.update(deviceInterface, device);
1742 
1743 		const VkImageMemoryBarrier	resultImagePostDispatchBarrier =
1744 		makeImageMemoryBarrier(	VK_ACCESS_SHADER_WRITE_BIT,
1745 								VK_ACCESS_SHADER_READ_BIT,
1746 								VK_IMAGE_LAYOUT_GENERAL,
1747 								VK_IMAGE_LAYOUT_GENERAL,
1748 								m_intermResultsImage->get(),
1749 								subresourceRange);
1750 
1751 		deviceInterface.cmdPipelineBarrier(	cmdBuffer,
1752 									VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
1753 									VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
1754 									DE_FALSE, 0u, DE_NULL, 0u, DE_NULL,
1755 									1u, &resultImagePostDispatchBarrier);
1756 
1757 		deviceInterface.cmdBindPipeline(cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
1758 		deviceInterface.cmdBindDescriptorSets(cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipelineLayout, 0u, 1u, &descriptorSet, 0u, DE_NULL);
1759 
1760 		switch (m_imageType)
1761 		{
1762 			case IMAGE_TYPE_1D_ARRAY:
1763 				deviceInterface.cmdDispatch(cmdBuffer, NUM_INVOCATIONS_PER_PIXEL * layerSize.x(), subresourceRange.layerCount, layerSize.z());
1764 				break;
1765 			case IMAGE_TYPE_2D_ARRAY:
1766 			case IMAGE_TYPE_CUBE:
1767 			case IMAGE_TYPE_CUBE_ARRAY:
1768 				deviceInterface.cmdDispatch(cmdBuffer, NUM_INVOCATIONS_PER_PIXEL * layerSize.x(), layerSize.y(), subresourceRange.layerCount);
1769 				break;
1770 			default:
1771 				deviceInterface.cmdDispatch(cmdBuffer, NUM_INVOCATIONS_PER_PIXEL * layerSize.x(), layerSize.y(), layerSize.z());
1772 				break;
1773 		}
1774 	}
1775 }
1776 
verifyResult(Allocation & outputBufferAllocation,const bool is64Bit) const1777 bool BinaryAtomicIntermValuesInstance::verifyResult (Allocation&	outputBufferAllocation,
1778 													 const bool		is64Bit) const
1779 {
1780 	const UVec3	gridSize		 = getShaderGridSize(m_imageType, m_imageSize);
1781 	const IVec3 extendedGridSize = IVec3(NUM_INVOCATIONS_PER_PIXEL*gridSize.x(), gridSize.y(), gridSize.z());
1782 
1783 	tcu::ConstPixelBufferAccess resultBuffer(m_format, extendedGridSize.x(), extendedGridSize.y(), extendedGridSize.z(), outputBufferAllocation.getHostPtr());
1784 
1785 	for (deInt32 z = 0; z < resultBuffer.getDepth(); z++)
1786 	for (deInt32 y = 0; y < resultBuffer.getHeight(); y++)
1787 	for (deUint32 x = 0; x < gridSize.x(); x++)
1788 	{
1789 		if (isUintFormat(mapTextureFormat(m_format)))
1790 		{
1791 			if (is64Bit)
1792 			{
1793 				if (!areValuesCorrect<deUint64>(resultBuffer, false, x, y, z, gridSize, extendedGridSize))
1794 					return false;
1795 			}
1796 			else
1797 			{
1798 				if (!areValuesCorrect<deUint32>(resultBuffer, false, x, y, z, gridSize, extendedGridSize))
1799 					return false;
1800 			}
1801 		}
1802 		else if (isIntFormat(mapTextureFormat(m_format)))
1803 		{
1804 			if (is64Bit)
1805 			{
1806 				if (!areValuesCorrect<deInt64>(resultBuffer, false, x, y, z, gridSize, extendedGridSize))
1807 					return false;
1808 			}
1809 			else
1810 			{
1811 				if (!areValuesCorrect<deInt32>(resultBuffer, false, x, y, z, gridSize, extendedGridSize))
1812 					return false;
1813 			}
1814 		}
1815 		else
1816 		{
1817 			// 32-bit floating point
1818 			if (!areValuesCorrect<deInt32>(resultBuffer, true, x, y, z, gridSize, extendedGridSize))
1819 				return false;
1820 		}
1821 	}
1822 
1823 	return true;
1824 }
1825 
1826 template <typename T>
areValuesCorrect(tcu::ConstPixelBufferAccess & resultBuffer,const bool isFloatingPoint,deInt32 x,deInt32 y,deInt32 z,const UVec3 & gridSize,const IVec3 extendedGridSize) const1827 bool BinaryAtomicIntermValuesInstance::areValuesCorrect(tcu::ConstPixelBufferAccess& resultBuffer, const bool isFloatingPoint, deInt32 x, deInt32 y, deInt32 z, const UVec3& gridSize, const IVec3 extendedGridSize) const
1828 {
1829 	T		resultValues[NUM_INVOCATIONS_PER_PIXEL];
1830 	T		atomicArgs[NUM_INVOCATIONS_PER_PIXEL];
1831 	bool	argsUsed[NUM_INVOCATIONS_PER_PIXEL];
1832 
1833 	for (deInt32 i = 0; i < static_cast<deInt32>(NUM_INVOCATIONS_PER_PIXEL); i++)
1834 	{
1835 		IVec3 gid(x + i*gridSize.x(), y, z);
1836 		T data = *((T*)resultBuffer.getPixelPtr(gid.x(), gid.y(), gid.z()));
1837 		if (isFloatingPoint)
1838 		{
1839 			float fData;
1840 			deMemcpy(&fData, &data, sizeof(fData));
1841 			data = static_cast<T>(fData);
1842 		}
1843 		resultValues[i] = data;
1844 		atomicArgs[i]	= getAtomicFuncArgument<T>(m_operation, gid, extendedGridSize);
1845 		argsUsed[i]		= false;
1846 	}
1847 
1848 	// Verify that the return values form a valid sequence.
1849 	return verifyRecursive(0, getOperationInitialValue<T>(m_operation), argsUsed, atomicArgs, resultValues);
1850 }
1851 
1852 template <typename T>
verifyRecursive(const deInt32 index,const T valueSoFar,bool argsUsed[NUM_INVOCATIONS_PER_PIXEL],const T atomicArgs[NUM_INVOCATIONS_PER_PIXEL],const T resultValues[NUM_INVOCATIONS_PER_PIXEL]) const1853 bool BinaryAtomicIntermValuesInstance::verifyRecursive (const deInt32	index,
1854 														const T			valueSoFar,
1855 														bool			argsUsed[NUM_INVOCATIONS_PER_PIXEL],
1856 														const T			atomicArgs[NUM_INVOCATIONS_PER_PIXEL],
1857 														const T			resultValues[NUM_INVOCATIONS_PER_PIXEL]) const
1858 {
1859 	if (index >= static_cast<deInt32>(NUM_INVOCATIONS_PER_PIXEL))
1860 		return true;
1861 
1862 	for (deInt32 i = 0; i < static_cast<deInt32>(NUM_INVOCATIONS_PER_PIXEL); i++)
1863 	{
1864 		if (!argsUsed[i] && resultValues[i] == valueSoFar)
1865 		{
1866 			argsUsed[i] = true;
1867 
1868 			if (verifyRecursive(index + 1, computeBinaryAtomicOperationResult(m_operation, valueSoFar, atomicArgs[i]), argsUsed, atomicArgs, resultValues))
1869 			{
1870 				return true;
1871 			}
1872 
1873 			argsUsed[i] = false;
1874 		}
1875 	}
1876 
1877 	return false;
1878 }
1879 
createInstance(Context & context) const1880 TestInstance* BinaryAtomicIntermValuesCase::createInstance (Context& context) const
1881 {
1882 	return new BinaryAtomicIntermValuesInstance(context, m_name, m_imageType, m_imageSize, m_format, m_operation, m_useTransfer, m_readType, m_backingType);
1883 }
1884 
1885 } // anonymous ns
1886 
createImageAtomicOperationTests(tcu::TestContext & testCtx)1887 tcu::TestCaseGroup* createImageAtomicOperationTests (tcu::TestContext& testCtx)
1888 {
1889 	de::MovePtr<tcu::TestCaseGroup> imageAtomicOperationsTests(new tcu::TestCaseGroup(testCtx, "atomic_operations", "Atomic image operations cases"));
1890 
1891 	struct ImageParams
1892 	{
1893 		ImageParams(const ImageType imageType, const tcu::UVec3& imageSize)
1894 			: m_imageType	(imageType)
1895 			, m_imageSize	(imageSize)
1896 		{
1897 		}
1898 		const ImageType		m_imageType;
1899 		const tcu::UVec3	m_imageSize;
1900 	};
1901 
1902 	const ImageParams imageParamsArray[] =
1903 	{
1904 		ImageParams(IMAGE_TYPE_1D,			tcu::UVec3(64u, 1u, 1u)),
1905 		ImageParams(IMAGE_TYPE_1D_ARRAY,	tcu::UVec3(64u, 1u, 8u)),
1906 		ImageParams(IMAGE_TYPE_2D,			tcu::UVec3(64u, 64u, 1u)),
1907 		ImageParams(IMAGE_TYPE_2D_ARRAY,	tcu::UVec3(64u, 64u, 8u)),
1908 		ImageParams(IMAGE_TYPE_3D,			tcu::UVec3(48u, 48u, 8u)),
1909 		ImageParams(IMAGE_TYPE_CUBE,		tcu::UVec3(64u, 64u, 1u)),
1910 		ImageParams(IMAGE_TYPE_CUBE_ARRAY,	tcu::UVec3(64u, 64u, 2u)),
1911 		ImageParams(IMAGE_TYPE_BUFFER,		tcu::UVec3(64u, 1u, 1u))
1912 	};
1913 
1914 	const tcu::TextureFormat formats[] =
1915 	{
1916 		tcu::TextureFormat(tcu::TextureFormat::R, tcu::TextureFormat::UNSIGNED_INT32),
1917 		tcu::TextureFormat(tcu::TextureFormat::R, tcu::TextureFormat::SIGNED_INT32),
1918 		tcu::TextureFormat(tcu::TextureFormat::R, tcu::TextureFormat::FLOAT),
1919 		tcu::TextureFormat(tcu::TextureFormat::R, tcu::TextureFormat::UNSIGNED_INT64),
1920 		tcu::TextureFormat(tcu::TextureFormat::R, tcu::TextureFormat::SIGNED_INT64)
1921 	};
1922 
1923 	const struct
1924 	{
1925 		ShaderReadType		type;
1926 		const char*			name;
1927 	} readTypes[] =
1928 	{
1929 		{	ShaderReadType::NORMAL,	"normal_read"	},
1930 		{	ShaderReadType::SPARSE,	"sparse_read"	},
1931 	};
1932 
1933 	const struct
1934 	{
1935 		ImageBackingType	type;
1936 		const char*			name;
1937 	} backingTypes[] =
1938 	{
1939 		{	ImageBackingType::NORMAL,	"normal_img"	},
1940 		{	ImageBackingType::SPARSE,	"sparse_img"	},
1941 	};
1942 
1943 	for (deUint32 operationI = 0; operationI < ATOMIC_OPERATION_LAST; operationI++)
1944 	{
1945 		const AtomicOperation operation = (AtomicOperation)operationI;
1946 
1947 		de::MovePtr<tcu::TestCaseGroup> operationGroup(new tcu::TestCaseGroup(testCtx, getAtomicOperationCaseName(operation).c_str(), ""));
1948 
1949 		for (deUint32 imageTypeNdx = 0; imageTypeNdx < DE_LENGTH_OF_ARRAY(imageParamsArray); imageTypeNdx++)
1950 		{
1951 			const ImageType	 imageType = imageParamsArray[imageTypeNdx].m_imageType;
1952 			const tcu::UVec3 imageSize = imageParamsArray[imageTypeNdx].m_imageSize;
1953 
1954 			de::MovePtr<tcu::TestCaseGroup> imageTypeGroup(new tcu::TestCaseGroup(testCtx, getImageTypeName(imageType).c_str(), ""));
1955 
1956 			for (int useTransferIdx = 0; useTransferIdx < 2; ++useTransferIdx)
1957 			{
1958 				const bool				useTransfer	= (useTransferIdx > 0);
1959 				const string			groupName	= (!useTransfer ? "no" : "") + string("transfer");
1960 
1961 				de::MovePtr<tcu::TestCaseGroup> transferGroup(new tcu::TestCaseGroup(testCtx, groupName.c_str(), ""));
1962 
1963 				for (int readTypeIdx = 0; readTypeIdx < DE_LENGTH_OF_ARRAY(readTypes); ++readTypeIdx)
1964 				{
1965 					const auto& readType = readTypes[readTypeIdx];
1966 
1967 					de::MovePtr<tcu::TestCaseGroup> readTypeGroup(new tcu::TestCaseGroup(testCtx, readType.name, ""));
1968 
1969 					for (int backingTypeIdx = 0; backingTypeIdx < DE_LENGTH_OF_ARRAY(backingTypes); ++backingTypeIdx)
1970 					{
1971 						const auto& backingType = backingTypes[backingTypeIdx];
1972 
1973 						de::MovePtr<tcu::TestCaseGroup> backingTypeGroup(new tcu::TestCaseGroup(testCtx, backingType.name, ""));
1974 
1975 						for (deUint32 formatNdx = 0; formatNdx < DE_LENGTH_OF_ARRAY(formats); formatNdx++)
1976 						{
1977 							const TextureFormat&	format		= formats[formatNdx];
1978 							const std::string		formatName	= getShaderImageFormatQualifier(format);
1979 
1980 							// Need SPIRV programs in vktImageAtomicSpirvShaders.cpp
1981 							if (imageType == IMAGE_TYPE_BUFFER && (format.type != tcu::TextureFormat::FLOAT))
1982 							{
1983 								continue;
1984 							}
1985 
1986 							// Only 2D and 3D images may support sparse residency.
1987 							const auto vkImageType = mapImageType(imageType);
1988 							if (backingType.type == ImageBackingType::SPARSE && (vkImageType != VK_IMAGE_TYPE_2D && vkImageType != VK_IMAGE_TYPE_3D))
1989 								continue;
1990 
1991 							// Only some operations are supported on floating-point
1992 							if (format.type == tcu::TextureFormat::FLOAT)
1993 							{
1994 								if (operation != ATOMIC_OPERATION_ADD &&
1995 									operation != ATOMIC_OPERATION_EXCHANGE &&
1996 									operation != ATOMIC_OPERATION_MIN &&
1997 									operation != ATOMIC_OPERATION_MAX)
1998 								{
1999 									continue;
2000 								}
2001 							}
2002 
2003 							if (readType.type == ShaderReadType::SPARSE)
2004 							{
2005 								// When using transfer, shader reads will not be used, so avoid creating two identical cases.
2006 								if (useTransfer)
2007 									continue;
2008 
2009 								// Sparse reads are not supported for all types of images.
2010 								if (imageType == IMAGE_TYPE_1D || imageType == IMAGE_TYPE_1D_ARRAY || imageType == IMAGE_TYPE_BUFFER)
2011 									continue;
2012 							}
2013 
2014 							//!< Atomic case checks the end result of the operations, and not the intermediate return values
2015 							const string caseEndResult = formatName + "_end_result";
2016 							backingTypeGroup->addChild(new BinaryAtomicEndResultCase(testCtx, caseEndResult, "", imageType, imageSize, format, operation, useTransfer, readType.type, backingType.type, glu::GLSL_VERSION_450));
2017 
2018 							//!< Atomic case checks the return values of the atomic function and not the end result.
2019 							const string caseIntermValues = formatName + "_intermediate_values";
2020 							backingTypeGroup->addChild(new BinaryAtomicIntermValuesCase(testCtx, caseIntermValues, "", imageType, imageSize, format, operation, useTransfer, readType.type, backingType.type, glu::GLSL_VERSION_450));
2021 						}
2022 
2023 						readTypeGroup->addChild(backingTypeGroup.release());
2024 					}
2025 
2026 					transferGroup->addChild(readTypeGroup.release());
2027 				}
2028 
2029 				imageTypeGroup->addChild(transferGroup.release());
2030 			}
2031 
2032 			operationGroup->addChild(imageTypeGroup.release());
2033 		}
2034 
2035 		imageAtomicOperationsTests->addChild(operationGroup.release());
2036 	}
2037 
2038 	return imageAtomicOperationsTests.release();
2039 }
2040 
2041 } // image
2042 } // vkt
2043