1 /*------------------------------------------------------------------------
2 * Vulkan Conformance Tests
3 * ------------------------
4 *
5 * Copyright (c) 2016 The Khronos Group Inc.
6 *
7 * Licensed under the Apache License, Version 2.0 (the "License");
8 * you may not use this file except in compliance with the License.
9 * You may obtain a copy of the License at
10 *
11 * http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
18 *
19 *//*!
20 * \file vktImageAtomicOperationTests.cpp
21 * \brief Image atomic operation tests
22 *//*--------------------------------------------------------------------*/
23
24 #include "vktImageAtomicOperationTests.hpp"
25 #include "vktImageAtomicSpirvShaders.hpp"
26
27 #include "deUniquePtr.hpp"
28 #include "deStringUtil.hpp"
29 #include "deSTLUtil.hpp"
30
31 #include "vktTestCaseUtil.hpp"
32 #include "vkPrograms.hpp"
33 #include "vkImageUtil.hpp"
34 #include "vkQueryUtil.hpp"
35 #include "vkBarrierUtil.hpp"
36 #include "vktImageTestsUtil.hpp"
37 #include "vkBuilderUtil.hpp"
38 #include "vkRef.hpp"
39 #include "vkRefUtil.hpp"
40 #include "vkTypeUtil.hpp"
41 #include "vkCmdUtil.hpp"
42 #include "vkObjUtil.hpp"
43
44 #include "tcuTextureUtil.hpp"
45 #include "tcuTexture.hpp"
46 #include "tcuVectorType.hpp"
47 #include "tcuStringTemplate.hpp"
48
49 namespace vkt
50 {
51 namespace image
52 {
53 namespace
54 {
55
56 using namespace vk;
57 using namespace std;
58 using de::toString;
59
60 using tcu::TextureFormat;
61 using tcu::IVec2;
62 using tcu::IVec3;
63 using tcu::UVec3;
64 using tcu::Vec4;
65 using tcu::IVec4;
66 using tcu::UVec4;
67 using tcu::CubeFace;
68 using tcu::Texture1D;
69 using tcu::Texture2D;
70 using tcu::Texture3D;
71 using tcu::Texture2DArray;
72 using tcu::TextureCube;
73 using tcu::PixelBufferAccess;
74 using tcu::ConstPixelBufferAccess;
75 using tcu::Vector;
76 using tcu::TestContext;
77
78 enum
79 {
80 NUM_INVOCATIONS_PER_PIXEL = 5u
81 };
82
83 enum AtomicOperation
84 {
85 ATOMIC_OPERATION_ADD = 0,
86 ATOMIC_OPERATION_SUB,
87 ATOMIC_OPERATION_INC,
88 ATOMIC_OPERATION_DEC,
89 ATOMIC_OPERATION_MIN,
90 ATOMIC_OPERATION_MAX,
91 ATOMIC_OPERATION_AND,
92 ATOMIC_OPERATION_OR,
93 ATOMIC_OPERATION_XOR,
94 ATOMIC_OPERATION_EXCHANGE,
95 ATOMIC_OPERATION_COMPARE_EXCHANGE,
96
97 ATOMIC_OPERATION_LAST
98 };
99
100 enum class ShaderReadType
101 {
102 NORMAL = 0,
103 SPARSE,
104 };
105
106 enum class ImageBackingType
107 {
108 NORMAL = 0,
109 SPARSE,
110 };
111
getCoordStr(const ImageType imageType,const std::string & x,const std::string & y,const std::string & z)112 static string getCoordStr (const ImageType imageType,
113 const std::string& x,
114 const std::string& y,
115 const std::string& z)
116 {
117 switch (imageType)
118 {
119 case IMAGE_TYPE_1D:
120 case IMAGE_TYPE_BUFFER:
121 return x;
122 case IMAGE_TYPE_1D_ARRAY:
123 case IMAGE_TYPE_2D:
124 return string("ivec2(" + x + "," + y + ")");
125 case IMAGE_TYPE_2D_ARRAY:
126 case IMAGE_TYPE_3D:
127 case IMAGE_TYPE_CUBE:
128 case IMAGE_TYPE_CUBE_ARRAY:
129 return string("ivec3(" + x + "," + y + "," + z + ")");
130 default:
131 DE_ASSERT(false);
132 return DE_NULL;
133 }
134 }
135
getComponentTypeStr(deUint32 componentWidth,bool intFormat,bool uintFormat,bool floatFormat)136 static string getComponentTypeStr (deUint32 componentWidth, bool intFormat, bool uintFormat, bool floatFormat)
137 {
138 DE_ASSERT(intFormat || uintFormat || floatFormat);
139
140 const bool is64 = (componentWidth == 64);
141
142 if (intFormat)
143 return (is64 ? "int64_t" : "int");
144 if (uintFormat)
145 return (is64 ? "uint64_t" : "uint");
146 if (floatFormat)
147 return (is64 ? "double" : "float");
148
149 return "";
150 }
151
getVec4TypeStr(deUint32 componentWidth,bool intFormat,bool uintFormat,bool floatFormat)152 static string getVec4TypeStr (deUint32 componentWidth, bool intFormat, bool uintFormat, bool floatFormat)
153 {
154 DE_ASSERT(intFormat || uintFormat || floatFormat);
155
156 const bool is64 = (componentWidth == 64);
157
158 if (intFormat)
159 return (is64 ? "i64vec4" : "ivec4");
160 if (uintFormat)
161 return (is64 ? "u64vec4" : "uvec4");
162 if (floatFormat)
163 return (is64 ? "f64vec4" : "vec4");
164
165 return "";
166 }
167
getAtomicFuncArgumentShaderStr(const AtomicOperation op,const string & x,const string & y,const string & z,const IVec3 & gridSize)168 static string getAtomicFuncArgumentShaderStr (const AtomicOperation op,
169 const string& x,
170 const string& y,
171 const string& z,
172 const IVec3& gridSize)
173 {
174 switch (op)
175 {
176 case ATOMIC_OPERATION_ADD:
177 case ATOMIC_OPERATION_AND:
178 case ATOMIC_OPERATION_OR:
179 case ATOMIC_OPERATION_XOR:
180 return string("(" + x + "*" + x + " + " + y + "*" + y + " + " + z + "*" + z + ")");
181 case ATOMIC_OPERATION_MIN:
182 case ATOMIC_OPERATION_MAX:
183 // multiply by (1-2*(value % 2) to make half of the data negative
184 // this will result in generating large numbers for uint formats
185 return string("((1 - 2*(" + x + " % 2)) * (" + x + "*" + x + " + " + y + "*" + y + " + " + z + "*" + z + "))");
186 case ATOMIC_OPERATION_EXCHANGE:
187 case ATOMIC_OPERATION_COMPARE_EXCHANGE:
188 return string("((" + z + "*" + toString(gridSize.x()) + " + " + x + ")*" + toString(gridSize.y()) + " + " + y + ")");
189 default:
190 DE_ASSERT(false);
191 return DE_NULL;
192 }
193 }
194
getAtomicOperationCaseName(const AtomicOperation op)195 static string getAtomicOperationCaseName (const AtomicOperation op)
196 {
197 switch (op)
198 {
199 case ATOMIC_OPERATION_ADD: return string("add");
200 case ATOMIC_OPERATION_SUB: return string("sub");
201 case ATOMIC_OPERATION_INC: return string("inc");
202 case ATOMIC_OPERATION_DEC: return string("dec");
203 case ATOMIC_OPERATION_MIN: return string("min");
204 case ATOMIC_OPERATION_MAX: return string("max");
205 case ATOMIC_OPERATION_AND: return string("and");
206 case ATOMIC_OPERATION_OR: return string("or");
207 case ATOMIC_OPERATION_XOR: return string("xor");
208 case ATOMIC_OPERATION_EXCHANGE: return string("exchange");
209 case ATOMIC_OPERATION_COMPARE_EXCHANGE: return string("compare_exchange");
210 default:
211 DE_ASSERT(false);
212 return DE_NULL;
213 }
214 }
215
getAtomicOperationShaderFuncName(const AtomicOperation op)216 static string getAtomicOperationShaderFuncName (const AtomicOperation op)
217 {
218 switch (op)
219 {
220 case ATOMIC_OPERATION_ADD: return string("imageAtomicAdd");
221 case ATOMIC_OPERATION_MIN: return string("imageAtomicMin");
222 case ATOMIC_OPERATION_MAX: return string("imageAtomicMax");
223 case ATOMIC_OPERATION_AND: return string("imageAtomicAnd");
224 case ATOMIC_OPERATION_OR: return string("imageAtomicOr");
225 case ATOMIC_OPERATION_XOR: return string("imageAtomicXor");
226 case ATOMIC_OPERATION_EXCHANGE: return string("imageAtomicExchange");
227 case ATOMIC_OPERATION_COMPARE_EXCHANGE: return string("imageAtomicCompSwap");
228 default:
229 DE_ASSERT(false);
230 return DE_NULL;
231 }
232 }
233
234 template <typename T>
getOperationInitialValue(const AtomicOperation op)235 T getOperationInitialValue (const AtomicOperation op)
236 {
237 switch (op)
238 {
239 // \note 18 is just an arbitrary small nonzero value.
240 case ATOMIC_OPERATION_ADD: return 18;
241 case ATOMIC_OPERATION_INC: return 18;
242 case ATOMIC_OPERATION_SUB: return (1 << 24) - 1;
243 case ATOMIC_OPERATION_DEC: return (1 << 24) - 1;
244 case ATOMIC_OPERATION_MIN: return (1 << 15) - 1;
245 case ATOMIC_OPERATION_MAX: return 18;
246 case ATOMIC_OPERATION_AND: return (1 << 15) - 1;
247 case ATOMIC_OPERATION_OR: return 18;
248 case ATOMIC_OPERATION_XOR: return 18;
249 case ATOMIC_OPERATION_EXCHANGE: return 18;
250 case ATOMIC_OPERATION_COMPARE_EXCHANGE: return 18;
251 default:
252 DE_ASSERT(false);
253 return 0xFFFFFFFF;
254 }
255 }
256
257 template <>
getOperationInitialValue(const AtomicOperation op)258 deInt64 getOperationInitialValue<deInt64>(const AtomicOperation op)
259 {
260 switch (op)
261 {
262 // \note 0x000000BEFFFFFF18 is just an arbitrary nonzero value.
263 case ATOMIC_OPERATION_ADD: return 0x000000BEFFFFFF18;
264 case ATOMIC_OPERATION_INC: return 0x000000BEFFFFFF18;
265 case ATOMIC_OPERATION_SUB: return (1ull << 56) - 1;
266 case ATOMIC_OPERATION_DEC: return (1ull << 56) - 1;
267 case ATOMIC_OPERATION_MIN: return (1ull << 47) - 1;
268 case ATOMIC_OPERATION_MAX: return 0x000000BEFFFFFF18;
269 case ATOMIC_OPERATION_AND: return (1ull << 47) - 1;
270 case ATOMIC_OPERATION_OR: return 0x000000BEFFFFFF18;
271 case ATOMIC_OPERATION_XOR: return 0x000000BEFFFFFF18;
272 case ATOMIC_OPERATION_EXCHANGE: return 0x000000BEFFFFFF18;
273 case ATOMIC_OPERATION_COMPARE_EXCHANGE: return 0x000000BEFFFFFF18;
274 default:
275 DE_ASSERT(false);
276 return 0xFFFFFFFFFFFFFFFF;
277 }
278 }
279
280 template <>
getOperationInitialValue(const AtomicOperation op)281 deUint64 getOperationInitialValue<deUint64>(const AtomicOperation op)
282 {
283 return (deUint64)getOperationInitialValue<deInt64>(op);
284 }
285
286
287 template <typename T>
getAtomicFuncArgument(const AtomicOperation op,const IVec3 & invocationID,const IVec3 & gridSize)288 static T getAtomicFuncArgument (const AtomicOperation op,
289 const IVec3& invocationID,
290 const IVec3& gridSize)
291 {
292 const T x = static_cast<T>(invocationID.x());
293 const T y = static_cast<T>(invocationID.y());
294 const T z = static_cast<T>(invocationID.z());
295
296 switch (op)
297 {
298 // \note Fall-throughs.
299 case ATOMIC_OPERATION_ADD:
300 case ATOMIC_OPERATION_SUB:
301 case ATOMIC_OPERATION_AND:
302 case ATOMIC_OPERATION_OR:
303 case ATOMIC_OPERATION_XOR:
304 return x*x + y*y + z*z;
305 case ATOMIC_OPERATION_INC:
306 case ATOMIC_OPERATION_DEC:
307 return 1;
308 case ATOMIC_OPERATION_MIN:
309 case ATOMIC_OPERATION_MAX:
310 // multiply half of the data by -1
311 return (1-2*(x % 2))*(x*x + y*y + z*z);
312 case ATOMIC_OPERATION_EXCHANGE:
313 case ATOMIC_OPERATION_COMPARE_EXCHANGE:
314 return (z*static_cast<T>(gridSize.x()) + x)*static_cast<T>(gridSize.y()) + y;
315 default:
316 DE_ASSERT(false);
317 return -1;
318 }
319 }
320
321 //! An order-independent operation is one for which the end result doesn't depend on the order in which the operations are carried (i.e. is both commutative and associative).
isOrderIndependentAtomicOperation(const AtomicOperation op)322 static bool isOrderIndependentAtomicOperation (const AtomicOperation op)
323 {
324 return op == ATOMIC_OPERATION_ADD ||
325 op == ATOMIC_OPERATION_SUB ||
326 op == ATOMIC_OPERATION_INC ||
327 op == ATOMIC_OPERATION_DEC ||
328 op == ATOMIC_OPERATION_MIN ||
329 op == ATOMIC_OPERATION_MAX ||
330 op == ATOMIC_OPERATION_AND ||
331 op == ATOMIC_OPERATION_OR ||
332 op == ATOMIC_OPERATION_XOR;
333 }
334
335 //! Checks if the operation needs an SPIR-V shader.
isSpirvAtomicOperation(const AtomicOperation op)336 static bool isSpirvAtomicOperation (const AtomicOperation op)
337 {
338 return op == ATOMIC_OPERATION_SUB ||
339 op == ATOMIC_OPERATION_INC ||
340 op == ATOMIC_OPERATION_DEC;
341 }
342
343 //! Returns the SPIR-V assembler name of the given operation.
getSpirvAtomicOpName(const AtomicOperation op)344 static std::string getSpirvAtomicOpName (const AtomicOperation op)
345 {
346 switch (op)
347 {
348 case ATOMIC_OPERATION_SUB: return "OpAtomicISub";
349 case ATOMIC_OPERATION_INC: return "OpAtomicIIncrement";
350 case ATOMIC_OPERATION_DEC: return "OpAtomicIDecrement";
351 default: break;
352 }
353
354 DE_ASSERT(false);
355 return "";
356 }
357
358 //! Returns true if the given SPIR-V operation does not need the last argument, compared to OpAtomicIAdd.
isSpirvAtomicNoLastArgOp(const AtomicOperation op)359 static bool isSpirvAtomicNoLastArgOp (const AtomicOperation op)
360 {
361 switch (op)
362 {
363 case ATOMIC_OPERATION_SUB: return false;
364 case ATOMIC_OPERATION_INC: // fallthrough
365 case ATOMIC_OPERATION_DEC: return true;
366 default: break;
367 }
368
369 DE_ASSERT(false);
370 return false;
371 }
372
373 //! Computes the result of an atomic operation where "a" is the data operated on and "b" is the parameter to the atomic function.
374 template <typename T>
computeBinaryAtomicOperationResult(const AtomicOperation op,const T a,const T b)375 static T computeBinaryAtomicOperationResult (const AtomicOperation op, const T a, const T b)
376 {
377 switch (op)
378 {
379 case ATOMIC_OPERATION_INC: // fallthrough.
380 case ATOMIC_OPERATION_ADD: return a + b;
381 case ATOMIC_OPERATION_DEC: // fallthrough.
382 case ATOMIC_OPERATION_SUB: return a - b;
383 case ATOMIC_OPERATION_MIN: return de::min(a, b);
384 case ATOMIC_OPERATION_MAX: return de::max(a, b);
385 case ATOMIC_OPERATION_AND: return a & b;
386 case ATOMIC_OPERATION_OR: return a | b;
387 case ATOMIC_OPERATION_XOR: return a ^ b;
388 case ATOMIC_OPERATION_EXCHANGE: return b;
389 case ATOMIC_OPERATION_COMPARE_EXCHANGE: return (a == (sizeof(T) == 8 ? 0xBEFFFFFF18 : 18)) ? b : a;
390 default:
391 DE_ASSERT(false);
392 return -1;
393 }
394 }
395
getUsageFlags(bool useTransfer)396 VkImageUsageFlags getUsageFlags (bool useTransfer)
397 {
398 VkImageUsageFlags usageFlags = VK_IMAGE_USAGE_STORAGE_BIT;
399
400 if (useTransfer)
401 usageFlags |= (VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT);
402
403 return usageFlags;
404 }
405
AddFillReadShader(SourceCollections & sourceCollections,const ImageType & imageType,const tcu::TextureFormat & format,const string & componentType,const string & vec4Type)406 void AddFillReadShader (SourceCollections& sourceCollections,
407 const ImageType& imageType,
408 const tcu::TextureFormat& format,
409 const string& componentType,
410 const string& vec4Type)
411 {
412 const string imageInCoord = getCoordStr(imageType, "gx", "gy", "gz");
413 const string shaderImageFormatStr = getShaderImageFormatQualifier(format);
414 const string shaderImageTypeStr = getShaderImageType(format, imageType);
415 const auto componentWidth = getFormatComponentWidth(mapTextureFormat(format), 0u);
416 const string extensions = ((componentWidth == 64u)
417 ? "#extension GL_EXT_shader_explicit_arithmetic_types_int64 : require\n"
418 "#extension GL_EXT_shader_image_int64 : require\n"
419 : "");
420
421
422 const string fillShader = "#version 450\n"
423 + extensions +
424 "precision highp " + shaderImageTypeStr + ";\n"
425 "\n"
426 "layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
427 "layout (" + shaderImageFormatStr + ", binding=0) coherent uniform " + shaderImageTypeStr + " u_resultImage;\n"
428 "\n"
429 "layout(std430, binding = 1) buffer inputBuffer\n"
430 "{\n"
431 " "+ componentType + " data[];\n"
432 "} inBuffer;\n"
433 "\n"
434 "void main(void)\n"
435 "{\n"
436 " int gx = int(gl_GlobalInvocationID.x);\n"
437 " int gy = int(gl_GlobalInvocationID.y);\n"
438 " int gz = int(gl_GlobalInvocationID.z);\n"
439 " uint index = gx + (gy * gl_NumWorkGroups.x) + (gz *gl_NumWorkGroups.x * gl_NumWorkGroups.y);\n"
440 " imageStore(u_resultImage, " + imageInCoord + ", " + vec4Type + "(inBuffer.data[index]));\n"
441 "}\n";
442
443 const string readShader = "#version 450\n"
444 + extensions +
445 "precision highp " + shaderImageTypeStr + ";\n"
446 "\n"
447 "layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
448 "layout (" + shaderImageFormatStr + ", binding=0) coherent uniform " + shaderImageTypeStr + " u_resultImage;\n"
449 "\n"
450 "layout(std430, binding = 1) buffer outputBuffer\n"
451 "{\n"
452 " " + componentType + " data[];\n"
453 "} outBuffer;\n"
454 "\n"
455 "void main(void)\n"
456 "{\n"
457 " int gx = int(gl_GlobalInvocationID.x);\n"
458 " int gy = int(gl_GlobalInvocationID.y);\n"
459 " int gz = int(gl_GlobalInvocationID.z);\n"
460 " uint index = gx + (gy * gl_NumWorkGroups.x) + (gz *gl_NumWorkGroups.x * gl_NumWorkGroups.y);\n"
461 " outBuffer.data[index] = imageLoad(u_resultImage, " + imageInCoord + ").x;\n"
462 "}\n";
463
464
465 if ((imageType != IMAGE_TYPE_1D) &&
466 (imageType != IMAGE_TYPE_1D_ARRAY) &&
467 (imageType != IMAGE_TYPE_BUFFER))
468 {
469 const string readShaderResidency = "#version 450\n"
470 "#extension GL_ARB_sparse_texture2 : require\n"
471 + extensions +
472 "precision highp " + shaderImageTypeStr + ";\n"
473 "\n"
474 "layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
475 "layout (" + shaderImageFormatStr + ", binding=0) coherent uniform " + shaderImageTypeStr + " u_resultImage;\n"
476 "\n"
477 "layout(std430, binding = 1) buffer outputBuffer\n"
478 "{\n"
479 " " + componentType + " data[];\n"
480 "} outBuffer;\n"
481 "\n"
482 "void main(void)\n"
483 "{\n"
484 " int gx = int(gl_GlobalInvocationID.x);\n"
485 " int gy = int(gl_GlobalInvocationID.y);\n"
486 " int gz = int(gl_GlobalInvocationID.z);\n"
487 " uint index = gx + (gy * gl_NumWorkGroups.x) + (gz *gl_NumWorkGroups.x * gl_NumWorkGroups.y);\n"
488 " outBuffer.data[index] = imageLoad(u_resultImage, " + imageInCoord + ").x;\n"
489 " " + vec4Type + " sparseValue;\n"
490 " sparseImageLoadARB(u_resultImage, " + imageInCoord + ", sparseValue);\n"
491 " if (outBuffer.data[index] != sparseValue.x)\n"
492 " outBuffer.data[index] = " + vec4Type + "(1234).x;\n"
493 "}\n";
494
495 sourceCollections.glslSources.add("readShaderResidency") << glu::ComputeSource(readShaderResidency.c_str()) << vk::ShaderBuildOptions(sourceCollections.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
496 }
497
498 sourceCollections.glslSources.add("fillShader") << glu::ComputeSource(fillShader.c_str()) << vk::ShaderBuildOptions(sourceCollections.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
499 sourceCollections.glslSources.add("readShader") << glu::ComputeSource(readShader.c_str()) << vk::ShaderBuildOptions(sourceCollections.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
500 }
501
502 //! Prepare the initial data for the image
initDataForImage(const VkDevice device,const DeviceInterface & deviceInterface,const TextureFormat & format,const AtomicOperation operation,const tcu::UVec3 & gridSize,Buffer & buffer)503 static void initDataForImage (const VkDevice device,
504 const DeviceInterface& deviceInterface,
505 const TextureFormat& format,
506 const AtomicOperation operation,
507 const tcu::UVec3& gridSize,
508 Buffer& buffer)
509 {
510 Allocation& bufferAllocation = buffer.getAllocation();
511 const VkFormat imageFormat = mapTextureFormat(format);
512 tcu::PixelBufferAccess pixelBuffer (format, gridSize.x(), gridSize.y(), gridSize.z(), bufferAllocation.getHostPtr());
513
514 if (imageFormat == VK_FORMAT_R64_UINT || imageFormat == VK_FORMAT_R64_SINT)
515 {
516 const deInt64 initialValue(getOperationInitialValue<deInt64>(operation));
517
518 for (deUint32 z = 0; z < gridSize.z(); z++)
519 for (deUint32 y = 0; y < gridSize.y(); y++)
520 for (deUint32 x = 0; x < gridSize.x(); x++)
521 {
522 *((deInt64*)pixelBuffer.getPixelPtr(x, y, z)) = initialValue;
523 }
524 }
525 else
526 {
527 const tcu::IVec4 initialValue(getOperationInitialValue<deInt32>(operation));
528
529 for (deUint32 z = 0; z < gridSize.z(); z++)
530 for (deUint32 y = 0; y < gridSize.y(); y++)
531 for (deUint32 x = 0; x < gridSize.x(); x++)
532 {
533 pixelBuffer.setPixel(initialValue, x, y, z);
534 }
535 }
536
537 flushAlloc(deviceInterface, device, bufferAllocation);
538 }
539
commonCheckSupport(Context & context,const tcu::TextureFormat & tcuFormat,ImageType imageType,AtomicOperation operation,bool useTransfer,ShaderReadType readType,ImageBackingType backingType)540 void commonCheckSupport (Context& context, const tcu::TextureFormat& tcuFormat, ImageType imageType, AtomicOperation operation, bool useTransfer, ShaderReadType readType, ImageBackingType backingType)
541 {
542 const VkFormat format = mapTextureFormat(tcuFormat);
543 const VkImageType vkImgType = mapImageType(imageType);
544 const VkFormatFeatureFlags texelBufferSupport = (VK_FORMAT_FEATURE_STORAGE_TEXEL_BUFFER_BIT | VK_FORMAT_FEATURE_STORAGE_TEXEL_BUFFER_ATOMIC_BIT);
545 const VkFormatProperties formatProperties = getPhysicalDeviceFormatProperties(context.getInstanceInterface(),
546 context.getPhysicalDevice(), format);
547
548 if ((imageType == IMAGE_TYPE_BUFFER) &&
549 ((formatProperties.bufferFeatures & texelBufferSupport) != texelBufferSupport))
550 TCU_THROW(NotSupportedError, "Atomic storage texel buffers not supported");
551
552 if (imageType == IMAGE_TYPE_CUBE_ARRAY)
553 context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_IMAGE_CUBE_ARRAY);
554
555 if (backingType == ImageBackingType::SPARSE)
556 {
557 context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_SPARSE_BINDING);
558
559 switch (vkImgType)
560 {
561 case VK_IMAGE_TYPE_2D: context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_SPARSE_RESIDENCY_IMAGE2D); break;
562 case VK_IMAGE_TYPE_3D: context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_SPARSE_RESIDENCY_IMAGE3D); break;
563 default: DE_ASSERT(false); break;
564 }
565
566 if (!checkSparseImageFormatSupport(context.getPhysicalDevice(), context.getInstanceInterface(), format, vkImgType, VK_SAMPLE_COUNT_1_BIT, getUsageFlags(useTransfer), VK_IMAGE_TILING_OPTIMAL))
567 TCU_THROW(NotSupportedError, "Format does not support sparse images");
568 }
569
570 if (isFloatFormat(format))
571 {
572 context.requireDeviceFunctionality("VK_EXT_shader_atomic_float");
573
574 const VkFormatFeatureFlags requiredFeatures = (VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT | VK_FORMAT_FEATURE_STORAGE_IMAGE_ATOMIC_BIT);
575 const auto& atomicFloatFeatures = context.getShaderAtomicFloatFeaturesEXT();
576
577 if (!atomicFloatFeatures.shaderImageFloat32Atomics)
578 TCU_THROW(NotSupportedError, "shaderImageFloat32Atomics not supported");
579
580 if ((operation == ATOMIC_OPERATION_ADD) && !atomicFloatFeatures.shaderImageFloat32AtomicAdd)
581 TCU_THROW(NotSupportedError, "shaderImageFloat32AtomicAdd not supported");
582
583 if (operation == ATOMIC_OPERATION_MIN || operation == ATOMIC_OPERATION_MAX)
584 {
585 context.requireDeviceFunctionality("VK_EXT_shader_atomic_float2");
586 if (!context.getShaderAtomicFloat2FeaturesEXT().shaderImageFloat32AtomicMinMax)
587 {
588 TCU_THROW(NotSupportedError, "shaderImageFloat32AtomicMinMax not supported");
589 }
590 }
591
592 if ((formatProperties.optimalTilingFeatures & requiredFeatures) != requiredFeatures)
593 TCU_FAIL("Required format feature bits not supported");
594
595 if (backingType == ImageBackingType::SPARSE)
596 {
597 if (!atomicFloatFeatures.sparseImageFloat32Atomics)
598 TCU_THROW(NotSupportedError, "sparseImageFloat32Atomics not supported");
599
600 if (operation == ATOMIC_OPERATION_ADD && !atomicFloatFeatures.sparseImageFloat32AtomicAdd)
601 TCU_THROW(NotSupportedError, "sparseImageFloat32AtomicAdd not supported");
602 }
603
604 }
605 else if (format == VK_FORMAT_R64_UINT || format == VK_FORMAT_R64_SINT)
606 {
607 context.requireDeviceFunctionality("VK_EXT_shader_image_atomic_int64");
608
609 const VkFormatFeatureFlags requiredFeatures = (VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT | VK_FORMAT_FEATURE_STORAGE_IMAGE_ATOMIC_BIT);
610 const auto& atomicInt64Features = context.getShaderImageAtomicInt64FeaturesEXT();
611
612 if (!atomicInt64Features.shaderImageInt64Atomics)
613 TCU_THROW(NotSupportedError, "shaderImageInt64Atomics not supported");
614
615 if (backingType == ImageBackingType::SPARSE && !atomicInt64Features.sparseImageInt64Atomics)
616 TCU_THROW(NotSupportedError, "sparseImageInt64Atomics not supported");
617
618 if ((formatProperties.optimalTilingFeatures & requiredFeatures) != requiredFeatures)
619 TCU_FAIL("Mandatory format features not supported");
620 }
621
622 if (useTransfer)
623 {
624 const VkFormatFeatureFlags transferFeatures = (VK_FORMAT_FEATURE_TRANSFER_SRC_BIT | VK_FORMAT_FEATURE_TRANSFER_DST_BIT);
625 if ((formatProperties.optimalTilingFeatures & transferFeatures) != transferFeatures)
626 TCU_THROW(NotSupportedError, "Transfer features not supported for this format");
627 }
628
629 if (readType == ShaderReadType::SPARSE)
630 {
631 DE_ASSERT(imageType != IMAGE_TYPE_1D && imageType != IMAGE_TYPE_1D_ARRAY && imageType != IMAGE_TYPE_BUFFER);
632 context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_SHADER_RESOURCE_RESIDENCY);
633 }
634 }
635
636 class BinaryAtomicEndResultCase : public vkt::TestCase
637 {
638 public:
639 BinaryAtomicEndResultCase (tcu::TestContext& testCtx,
640 const string& name,
641 const string& description,
642 const ImageType imageType,
643 const tcu::UVec3& imageSize,
644 const tcu::TextureFormat& format,
645 const AtomicOperation operation,
646 const bool useTransfer,
647 const ShaderReadType shaderReadType,
648 const ImageBackingType backingType,
649 const glu::GLSLVersion glslVersion);
650
651 void initPrograms (SourceCollections& sourceCollections) const;
652 TestInstance* createInstance (Context& context) const;
653 virtual void checkSupport (Context& context) const;
654
655 private:
656 const ImageType m_imageType;
657 const tcu::UVec3 m_imageSize;
658 const tcu::TextureFormat m_format;
659 const AtomicOperation m_operation;
660 const bool m_useTransfer;
661 const ShaderReadType m_readType;
662 const ImageBackingType m_backingType;
663 const glu::GLSLVersion m_glslVersion;
664 };
665
BinaryAtomicEndResultCase(tcu::TestContext & testCtx,const string & name,const string & description,const ImageType imageType,const tcu::UVec3 & imageSize,const tcu::TextureFormat & format,const AtomicOperation operation,const bool useTransfer,const ShaderReadType shaderReadType,const ImageBackingType backingType,const glu::GLSLVersion glslVersion)666 BinaryAtomicEndResultCase::BinaryAtomicEndResultCase (tcu::TestContext& testCtx,
667 const string& name,
668 const string& description,
669 const ImageType imageType,
670 const tcu::UVec3& imageSize,
671 const tcu::TextureFormat& format,
672 const AtomicOperation operation,
673 const bool useTransfer,
674 const ShaderReadType shaderReadType,
675 const ImageBackingType backingType,
676 const glu::GLSLVersion glslVersion)
677 : TestCase (testCtx, name, description)
678 , m_imageType (imageType)
679 , m_imageSize (imageSize)
680 , m_format (format)
681 , m_operation (operation)
682 , m_useTransfer (useTransfer)
683 , m_readType (shaderReadType)
684 , m_backingType (backingType)
685 , m_glslVersion (glslVersion)
686 {
687 }
688
checkSupport(Context & context) const689 void BinaryAtomicEndResultCase::checkSupport (Context& context) const
690 {
691 commonCheckSupport(context, m_format, m_imageType, m_operation, m_useTransfer, m_readType, m_backingType);
692 }
693
initPrograms(SourceCollections & sourceCollections) const694 void BinaryAtomicEndResultCase::initPrograms (SourceCollections& sourceCollections) const
695 {
696 const VkFormat imageFormat = mapTextureFormat(m_format);
697 const deUint32 componentWidth = getFormatComponentWidth(imageFormat, 0);
698 const bool intFormat = isIntFormat(imageFormat);
699 const bool uintFormat = isUintFormat(imageFormat);
700 const bool floatFormat = isFloatFormat(imageFormat);
701 const string type = getComponentTypeStr(componentWidth, intFormat, uintFormat, floatFormat);
702 const string vec4Type = getVec4TypeStr(componentWidth, intFormat, uintFormat, floatFormat);
703
704 AddFillReadShader(sourceCollections, m_imageType, m_format, type, vec4Type);
705
706 if (isSpirvAtomicOperation(m_operation))
707 {
708 const CaseVariant caseVariant{m_imageType, m_format.order, m_format.type, CaseVariant::CHECK_TYPE_END_RESULTS};
709 const tcu::StringTemplate shaderTemplate{getSpirvAtomicOpShader(caseVariant)};
710 std::map<std::string, std::string> specializations;
711
712 specializations["OPNAME"] = getSpirvAtomicOpName(m_operation);
713 if (isSpirvAtomicNoLastArgOp(m_operation))
714 specializations["LASTARG"] = "";
715
716 sourceCollections.spirvAsmSources.add(m_name) << shaderTemplate.specialize(specializations);
717 }
718 else
719 {
720 const string versionDecl = glu::getGLSLVersionDeclaration(m_glslVersion);
721
722 const UVec3 gridSize = getShaderGridSize(m_imageType, m_imageSize);
723 const string atomicCoord = getCoordStr(m_imageType, "gx % " + toString(gridSize.x()), "gy", "gz");
724
725 const string atomicArgExpr = type + getAtomicFuncArgumentShaderStr(m_operation,
726 "gx", "gy", "gz",
727 IVec3(NUM_INVOCATIONS_PER_PIXEL*gridSize.x(), gridSize.y(), gridSize.z()));
728
729 const string compareExchangeStr = (m_operation == ATOMIC_OPERATION_COMPARE_EXCHANGE) ?
730 (componentWidth == 64 ?", 820338753304": ", 18") + string(uintFormat ? "u" : "") + string(componentWidth == 64 ? "l" : "")
731 : "";
732 const string atomicInvocation = getAtomicOperationShaderFuncName(m_operation) + "(u_resultImage, " + atomicCoord + compareExchangeStr + ", " + atomicArgExpr + ")";
733 const string shaderImageFormatStr = getShaderImageFormatQualifier(m_format);
734 const string shaderImageTypeStr = getShaderImageType(m_format, m_imageType);
735 const string extensions = "#extension GL_EXT_shader_atomic_float : enable\n"
736 "#extension GL_EXT_shader_atomic_float2 : enable\n"
737 "#extension GL_KHR_memory_scope_semantics : enable";
738
739 string source = versionDecl + "\n" + extensions + "\n";
740
741 if (64 == componentWidth)
742 {
743 source += "#extension GL_EXT_shader_explicit_arithmetic_types_int64 : require\n"
744 "#extension GL_EXT_shader_image_int64 : require\n";
745 }
746
747 source += "precision highp " + shaderImageTypeStr + ";\n"
748 "\n"
749 "layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
750 "layout (" + shaderImageFormatStr + ", binding=0) coherent uniform " + shaderImageTypeStr + " u_resultImage;\n"
751 "\n"
752 "void main (void)\n"
753 "{\n"
754 " int gx = int(gl_GlobalInvocationID.x);\n"
755 " int gy = int(gl_GlobalInvocationID.y);\n"
756 " int gz = int(gl_GlobalInvocationID.z);\n"
757 " " + atomicInvocation + ";\n"
758 "}\n";
759
760 sourceCollections.glslSources.add(m_name) << glu::ComputeSource(source.c_str());
761 }
762 }
763
764 class BinaryAtomicIntermValuesCase : public vkt::TestCase
765 {
766 public:
767 BinaryAtomicIntermValuesCase (tcu::TestContext& testCtx,
768 const string& name,
769 const string& description,
770 const ImageType imageType,
771 const tcu::UVec3& imageSize,
772 const tcu::TextureFormat& format,
773 const AtomicOperation operation,
774 const bool useTransfer,
775 const ShaderReadType shaderReadType,
776 const ImageBackingType backingType,
777 const glu::GLSLVersion glslVersion);
778
779 void initPrograms (SourceCollections& sourceCollections) const;
780 TestInstance* createInstance (Context& context) const;
781 virtual void checkSupport (Context& context) const;
782
783 private:
784 const ImageType m_imageType;
785 const tcu::UVec3 m_imageSize;
786 const tcu::TextureFormat m_format;
787 const AtomicOperation m_operation;
788 const bool m_useTransfer;
789 const ShaderReadType m_readType;
790 const ImageBackingType m_backingType;
791 const glu::GLSLVersion m_glslVersion;
792 };
793
BinaryAtomicIntermValuesCase(TestContext & testCtx,const string & name,const string & description,const ImageType imageType,const tcu::UVec3 & imageSize,const TextureFormat & format,const AtomicOperation operation,const bool useTransfer,const ShaderReadType shaderReadType,const ImageBackingType backingType,const glu::GLSLVersion glslVersion)794 BinaryAtomicIntermValuesCase::BinaryAtomicIntermValuesCase (TestContext& testCtx,
795 const string& name,
796 const string& description,
797 const ImageType imageType,
798 const tcu::UVec3& imageSize,
799 const TextureFormat& format,
800 const AtomicOperation operation,
801 const bool useTransfer,
802 const ShaderReadType shaderReadType,
803 const ImageBackingType backingType,
804 const glu::GLSLVersion glslVersion)
805 : TestCase (testCtx, name, description)
806 , m_imageType (imageType)
807 , m_imageSize (imageSize)
808 , m_format (format)
809 , m_operation (operation)
810 , m_useTransfer (useTransfer)
811 , m_readType (shaderReadType)
812 , m_backingType (backingType)
813 , m_glslVersion (glslVersion)
814 {
815 }
816
checkSupport(Context & context) const817 void BinaryAtomicIntermValuesCase::checkSupport (Context& context) const
818 {
819 commonCheckSupport(context, m_format, m_imageType, m_operation, m_useTransfer, m_readType, m_backingType);
820 }
821
initPrograms(SourceCollections & sourceCollections) const822 void BinaryAtomicIntermValuesCase::initPrograms (SourceCollections& sourceCollections) const
823 {
824 const VkFormat imageFormat = mapTextureFormat(m_format);
825 const deUint32 componentWidth = getFormatComponentWidth(imageFormat, 0);
826 const bool intFormat = isIntFormat(imageFormat);
827 const bool uintFormat = isUintFormat(imageFormat);
828 const bool floatFormat = isFloatFormat(imageFormat);
829 const string type = getComponentTypeStr(componentWidth, intFormat, uintFormat, floatFormat);
830 const string vec4Type = getVec4TypeStr(componentWidth, intFormat, uintFormat, floatFormat);
831
832 AddFillReadShader(sourceCollections, m_imageType, m_format, type, vec4Type);
833
834 if (isSpirvAtomicOperation(m_operation))
835 {
836 const CaseVariant caseVariant{m_imageType, m_format.order, m_format.type, CaseVariant::CHECK_TYPE_INTERMEDIATE_RESULTS};
837 const tcu::StringTemplate shaderTemplate{getSpirvAtomicOpShader(caseVariant)};
838 std::map<std::string, std::string> specializations;
839
840 specializations["OPNAME"] = getSpirvAtomicOpName(m_operation);
841 if (isSpirvAtomicNoLastArgOp(m_operation))
842 specializations["LASTARG"] = "";
843
844 sourceCollections.spirvAsmSources.add(m_name) << shaderTemplate.specialize(specializations);
845 }
846 else
847 {
848 const string versionDecl = glu::getGLSLVersionDeclaration(m_glslVersion);
849 const UVec3 gridSize = getShaderGridSize(m_imageType, m_imageSize);
850 const string atomicCoord = getCoordStr(m_imageType, "gx % " + toString(gridSize.x()), "gy", "gz");
851 const string invocationCoord = getCoordStr(m_imageType, "gx", "gy", "gz");
852 const string atomicArgExpr = type + getAtomicFuncArgumentShaderStr(m_operation,
853 "gx", "gy", "gz",
854 IVec3(NUM_INVOCATIONS_PER_PIXEL*gridSize.x(), gridSize.y(), gridSize.z()));
855
856 const string compareExchangeStr = (m_operation == ATOMIC_OPERATION_COMPARE_EXCHANGE) ?
857 (componentWidth == 64 ? ", 820338753304" : ", 18") + string(uintFormat ? "u" : "") + string(componentWidth == 64 ? "l" : "") :
858 "";
859 const string atomicInvocation = getAtomicOperationShaderFuncName(m_operation) +
860 "(u_resultImage, " + atomicCoord + compareExchangeStr + ", " + atomicArgExpr + ")";
861 const string shaderImageFormatStr = getShaderImageFormatQualifier(m_format);
862 const string shaderImageTypeStr = getShaderImageType(m_format, m_imageType);
863 const string extensions = "#extension GL_EXT_shader_atomic_float : enable\n"
864 "#extension GL_EXT_shader_atomic_float2 : enable\n"
865 "#extension GL_KHR_memory_scope_semantics : enable";
866
867 string source = versionDecl + "\n" + extensions + "\n"
868 "\n";
869
870 if (64 == componentWidth)
871 {
872 source += "#extension GL_EXT_shader_explicit_arithmetic_types_int64 : require\n"
873 "#extension GL_EXT_shader_image_int64 : require\n";
874 }
875
876 source += "precision highp " + shaderImageTypeStr + "; \n"
877 "layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
878 "layout (" + shaderImageFormatStr + ", binding=0) coherent uniform " + shaderImageTypeStr + " u_resultImage;\n"
879 "layout (" + shaderImageFormatStr + ", binding=1) writeonly uniform " + shaderImageTypeStr + " u_intermValuesImage;\n"
880 "\n"
881 "void main (void)\n"
882 "{\n"
883 " int gx = int(gl_GlobalInvocationID.x);\n"
884 " int gy = int(gl_GlobalInvocationID.y);\n"
885 " int gz = int(gl_GlobalInvocationID.z);\n"
886 " imageStore(u_intermValuesImage, " + invocationCoord + ", " + vec4Type + "(" + atomicInvocation + "));\n"
887 "}\n";
888
889 sourceCollections.glslSources.add(m_name) << glu::ComputeSource(source.c_str());
890 }
891 }
892
893 class BinaryAtomicInstanceBase : public vkt::TestInstance
894 {
895 public:
896
897 BinaryAtomicInstanceBase (Context& context,
898 const string& name,
899 const ImageType imageType,
900 const tcu::UVec3& imageSize,
901 const TextureFormat& format,
902 const AtomicOperation operation,
903 const bool useTransfer,
904 const ShaderReadType shaderReadType,
905 const ImageBackingType backingType);
906
907 tcu::TestStatus iterate (void);
908
909 virtual deUint32 getOutputBufferSize (void) const = 0;
910
911 virtual void prepareResources (const bool useTransfer) = 0;
912 virtual void prepareDescriptors (const bool isTexelBuffer) = 0;
913
914 virtual void commandsBeforeCompute (const VkCommandBuffer cmdBuffer) const = 0;
915 virtual void commandsAfterCompute (const VkCommandBuffer cmdBuffer,
916 const VkPipeline pipeline,
917 const VkPipelineLayout pipelineLayout,
918 const VkDescriptorSet descriptorSet,
919 const VkDeviceSize& range,
920 const bool useTransfer) = 0;
921
922 virtual bool verifyResult (Allocation& outputBufferAllocation,
923 const bool is64Bit) const = 0;
924
925 protected:
926
927 void shaderFillImage (const VkCommandBuffer cmdBuffer,
928 const VkBuffer& buffer,
929 const VkPipeline pipeline,
930 const VkPipelineLayout pipelineLayout,
931 const VkDescriptorSet descriptorSet,
932 const VkDeviceSize& range,
933 const tcu::UVec3& gridSize);
934
935 void createImageAndView (VkFormat imageFormat,
936 const tcu::UVec3& imageExent,
937 bool useTransfer,
938 de::MovePtr<Image>& imagePtr,
939 Move<VkImageView>& imageViewPtr);
940
941 void createImageResources (const VkFormat& imageFormat,
942 const bool useTransfer);
943
944 const string m_name;
945 const ImageType m_imageType;
946 const tcu::UVec3 m_imageSize;
947 const TextureFormat m_format;
948 const AtomicOperation m_operation;
949 const bool m_useTransfer;
950 const ShaderReadType m_readType;
951 const ImageBackingType m_backingType;
952
953 de::MovePtr<Buffer> m_inputBuffer;
954 de::MovePtr<Buffer> m_outputBuffer;
955 Move<VkBufferView> m_descResultBufferView;
956 Move<VkBufferView> m_descIntermResultsBufferView;
957 Move<VkDescriptorPool> m_descriptorPool;
958 Move<VkDescriptorSetLayout> m_descriptorSetLayout;
959 Move<VkDescriptorSet> m_descriptorSet;
960
961 Move<VkDescriptorSetLayout> m_descriptorSetLayoutNoTransfer;
962 Move<VkDescriptorPool> m_descriptorPoolNoTransfer;
963
964 de::MovePtr<Image> m_resultImage;
965 Move<VkImageView> m_resultImageView;
966
967 std::vector<VkSemaphore> m_waitSemaphores;
968 };
969
BinaryAtomicInstanceBase(Context & context,const string & name,const ImageType imageType,const tcu::UVec3 & imageSize,const TextureFormat & format,const AtomicOperation operation,const bool useTransfer,const ShaderReadType shaderReadType,const ImageBackingType backingType)970 BinaryAtomicInstanceBase::BinaryAtomicInstanceBase (Context& context,
971 const string& name,
972 const ImageType imageType,
973 const tcu::UVec3& imageSize,
974 const TextureFormat& format,
975 const AtomicOperation operation,
976 const bool useTransfer,
977 const ShaderReadType shaderReadType,
978 const ImageBackingType backingType)
979 : vkt::TestInstance (context)
980 , m_name (name)
981 , m_imageType (imageType)
982 , m_imageSize (imageSize)
983 , m_format (format)
984 , m_operation (operation)
985 , m_useTransfer (useTransfer)
986 , m_readType (shaderReadType)
987 , m_backingType (backingType)
988 {
989 }
990
iterate(void)991 tcu::TestStatus BinaryAtomicInstanceBase::iterate (void)
992 {
993 const VkDevice device = m_context.getDevice();
994 const DeviceInterface& deviceInterface = m_context.getDeviceInterface();
995 const VkQueue queue = m_context.getUniversalQueue();
996 const deUint32 queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
997 Allocator& allocator = m_context.getDefaultAllocator();
998 const VkDeviceSize imageSizeInBytes = tcu::getPixelSize(m_format) * getNumPixels(m_imageType, m_imageSize);
999 const VkDeviceSize outBuffSizeInBytes = getOutputBufferSize();
1000 const VkFormat imageFormat = mapTextureFormat(m_format);
1001 const bool isTexelBuffer = (m_imageType == IMAGE_TYPE_BUFFER);
1002
1003 if (!isTexelBuffer)
1004 {
1005 createImageResources(imageFormat, m_useTransfer);
1006 }
1007
1008 tcu::UVec3 gridSize = getShaderGridSize(m_imageType, m_imageSize);
1009
1010 //Prepare the buffer with the initial data for the image
1011 m_inputBuffer = de::MovePtr<Buffer>(new Buffer(deviceInterface,
1012 device,
1013 allocator,
1014 makeBufferCreateInfo(imageSizeInBytes,
1015 VK_BUFFER_USAGE_TRANSFER_SRC_BIT |
1016 VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
1017 (isTexelBuffer ? VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT : static_cast<VkBufferUsageFlagBits>(0u))),
1018 MemoryRequirement::HostVisible));
1019
1020 // Fill in buffer with initial data used for image.
1021 initDataForImage(device, deviceInterface, m_format, m_operation, gridSize, *m_inputBuffer);
1022
1023 // Create a buffer to store shader output copied from result image
1024 m_outputBuffer = de::MovePtr<Buffer>(new Buffer(deviceInterface,
1025 device,
1026 allocator,
1027 makeBufferCreateInfo(outBuffSizeInBytes,
1028 VK_BUFFER_USAGE_TRANSFER_DST_BIT |
1029 VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
1030 (isTexelBuffer ? VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT : static_cast<VkBufferUsageFlagBits>(0u))),
1031 MemoryRequirement::HostVisible));
1032
1033 if (!isTexelBuffer)
1034 {
1035 prepareResources(m_useTransfer);
1036 }
1037
1038 prepareDescriptors(isTexelBuffer);
1039
1040 Move<VkDescriptorSet> descriptorSetFillImage;
1041 Move<VkShaderModule> shaderModuleFillImage;
1042 Move<VkPipelineLayout> pipelineLayoutFillImage;
1043 Move<VkPipeline> pipelineFillImage;
1044
1045 Move<VkDescriptorSet> descriptorSetReadImage;
1046 Move<VkShaderModule> shaderModuleReadImage;
1047 Move<VkPipelineLayout> pipelineLayoutReadImage;
1048 Move<VkPipeline> pipelineReadImage;
1049
1050 if (!m_useTransfer)
1051 {
1052 m_descriptorSetLayoutNoTransfer =
1053 DescriptorSetLayoutBuilder()
1054 .addSingleBinding((isTexelBuffer ? VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER : VK_DESCRIPTOR_TYPE_STORAGE_IMAGE), VK_SHADER_STAGE_COMPUTE_BIT)
1055 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
1056 .build(deviceInterface, device);
1057
1058 m_descriptorPoolNoTransfer =
1059 DescriptorPoolBuilder()
1060 .addType((isTexelBuffer ? VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER : VK_DESCRIPTOR_TYPE_STORAGE_IMAGE), 2)
1061 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 2)
1062 .build(deviceInterface, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 2u);
1063
1064 descriptorSetFillImage = makeDescriptorSet(deviceInterface,
1065 device,
1066 *m_descriptorPoolNoTransfer,
1067 *m_descriptorSetLayoutNoTransfer);
1068
1069 descriptorSetReadImage = makeDescriptorSet(deviceInterface,
1070 device,
1071 *m_descriptorPoolNoTransfer,
1072 *m_descriptorSetLayoutNoTransfer);
1073
1074 shaderModuleFillImage = createShaderModule(deviceInterface, device, m_context.getBinaryCollection().get("fillShader"), 0);
1075 pipelineLayoutFillImage = makePipelineLayout(deviceInterface, device, *m_descriptorSetLayoutNoTransfer);
1076 pipelineFillImage = makeComputePipeline(deviceInterface, device, *pipelineLayoutFillImage, *shaderModuleFillImage);
1077
1078 if (m_readType == ShaderReadType::SPARSE)
1079 {
1080 shaderModuleReadImage = createShaderModule(deviceInterface, device, m_context.getBinaryCollection().get("readShaderResidency"), 0);
1081 }
1082 else
1083 {
1084 shaderModuleReadImage = createShaderModule(deviceInterface, device, m_context.getBinaryCollection().get("readShader"), 0);
1085 }
1086 pipelineLayoutReadImage = makePipelineLayout(deviceInterface, device, *m_descriptorSetLayoutNoTransfer);
1087 pipelineReadImage = makeComputePipeline(deviceInterface, device, *pipelineLayoutFillImage, *shaderModuleReadImage);
1088 }
1089
1090 // Create pipeline
1091 const Unique<VkShaderModule> shaderModule(createShaderModule(deviceInterface, device, m_context.getBinaryCollection().get(m_name), 0));
1092 const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(deviceInterface, device, *m_descriptorSetLayout));
1093 const Unique<VkPipeline> pipeline(makeComputePipeline(deviceInterface, device, *pipelineLayout, *shaderModule));
1094
1095 // Create command buffer
1096 const Unique<VkCommandPool> cmdPool(createCommandPool(deviceInterface, device, VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, queueFamilyIndex));
1097 const Unique<VkCommandBuffer> cmdBuffer(allocateCommandBuffer(deviceInterface, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
1098
1099 beginCommandBuffer(deviceInterface, *cmdBuffer);
1100
1101 if (!isTexelBuffer)
1102 {
1103 if (m_useTransfer)
1104 {
1105 const vector<VkBufferImageCopy> bufferImageCopy(1, makeBufferImageCopy(makeExtent3D(getLayerSize(m_imageType, m_imageSize)), getNumLayers(m_imageType, m_imageSize)));
1106 copyBufferToImage(deviceInterface,
1107 *cmdBuffer,
1108 *(*m_inputBuffer),
1109 imageSizeInBytes,
1110 bufferImageCopy,
1111 VK_IMAGE_ASPECT_COLOR_BIT,
1112 1,
1113 getNumLayers(m_imageType, m_imageSize), m_resultImage->get(), VK_IMAGE_LAYOUT_GENERAL, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT);
1114 }
1115 else
1116 {
1117 shaderFillImage(*cmdBuffer, *(*m_inputBuffer), *pipelineFillImage, *pipelineLayoutFillImage, *descriptorSetFillImage, imageSizeInBytes, gridSize);
1118 }
1119 commandsBeforeCompute(*cmdBuffer);
1120 }
1121
1122 deviceInterface.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
1123 deviceInterface.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &m_descriptorSet.get(), 0u, DE_NULL);
1124
1125 deviceInterface.cmdDispatch(*cmdBuffer, NUM_INVOCATIONS_PER_PIXEL * gridSize.x(), gridSize.y(), gridSize.z());
1126
1127 commandsAfterCompute(*cmdBuffer,
1128 *pipelineReadImage,
1129 *pipelineLayoutReadImage,
1130 *descriptorSetReadImage,
1131 outBuffSizeInBytes,
1132 m_useTransfer);
1133
1134 const VkBufferMemoryBarrier outputBufferPreHostReadBarrier
1135 = makeBufferMemoryBarrier(((m_useTransfer || isTexelBuffer) ? VK_ACCESS_TRANSFER_WRITE_BIT : VK_ACCESS_SHADER_WRITE_BIT),
1136 VK_ACCESS_HOST_READ_BIT,
1137 m_outputBuffer->get(),
1138 0ull,
1139 outBuffSizeInBytes);
1140
1141 deviceInterface.cmdPipelineBarrier(*cmdBuffer,
1142 ((m_useTransfer || isTexelBuffer) ? VK_PIPELINE_STAGE_TRANSFER_BIT : VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT),
1143 VK_PIPELINE_STAGE_HOST_BIT,
1144 DE_FALSE, 0u, DE_NULL,
1145 1u, &outputBufferPreHostReadBarrier, 0u, DE_NULL);
1146
1147 endCommandBuffer(deviceInterface, *cmdBuffer);
1148
1149 std::vector<VkPipelineStageFlags> waitStages(m_waitSemaphores.size(), VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT);
1150 submitCommandsAndWait(deviceInterface, device, queue, *cmdBuffer, false, 1u,
1151 static_cast<deUint32>(m_waitSemaphores.size()), de::dataOrNull(m_waitSemaphores), de::dataOrNull(waitStages));
1152
1153 Allocation& outputBufferAllocation = m_outputBuffer->getAllocation();
1154
1155 invalidateAlloc(deviceInterface, device, outputBufferAllocation);
1156
1157 if (verifyResult(outputBufferAllocation, (imageFormat == VK_FORMAT_R64_UINT || imageFormat == VK_FORMAT_R64_SINT)))
1158 return tcu::TestStatus::pass("Comparison succeeded");
1159 else
1160 return tcu::TestStatus::fail("Comparison failed");
1161 }
1162
shaderFillImage(const VkCommandBuffer cmdBuffer,const VkBuffer & buffer,const VkPipeline pipeline,const VkPipelineLayout pipelineLayout,const VkDescriptorSet descriptorSet,const VkDeviceSize & range,const tcu::UVec3 & gridSize)1163 void BinaryAtomicInstanceBase::shaderFillImage (const VkCommandBuffer cmdBuffer,
1164 const VkBuffer& buffer,
1165 const VkPipeline pipeline,
1166 const VkPipelineLayout pipelineLayout,
1167 const VkDescriptorSet descriptorSet,
1168 const VkDeviceSize& range,
1169 const tcu::UVec3& gridSize)
1170 {
1171 const VkDevice device = m_context.getDevice();
1172 const DeviceInterface& deviceInterface = m_context.getDeviceInterface();
1173 const VkDescriptorImageInfo descResultImageInfo = makeDescriptorImageInfo(DE_NULL, *m_resultImageView, VK_IMAGE_LAYOUT_GENERAL);
1174 const VkDescriptorBufferInfo descResultBufferInfo = makeDescriptorBufferInfo(buffer, 0, range);
1175 const VkImageSubresourceRange subresourceRange = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, getNumLayers(m_imageType, m_imageSize));
1176
1177 DescriptorSetUpdateBuilder()
1178 .writeSingle(descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &descResultImageInfo)
1179 .writeSingle(descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &descResultBufferInfo)
1180 .update(deviceInterface, device);
1181
1182 const VkImageMemoryBarrier imageBarrierPre = makeImageMemoryBarrier(0,
1183 VK_ACCESS_SHADER_WRITE_BIT,
1184 VK_IMAGE_LAYOUT_UNDEFINED,
1185 VK_IMAGE_LAYOUT_GENERAL,
1186 m_resultImage->get(),
1187 subresourceRange);
1188
1189 deviceInterface.cmdPipelineBarrier( cmdBuffer,
1190 VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
1191 VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
1192 (VkDependencyFlags)0,
1193 0, (const VkMemoryBarrier*)DE_NULL,
1194 0, (const VkBufferMemoryBarrier*)DE_NULL,
1195 1, &imageBarrierPre);
1196
1197 deviceInterface.cmdBindPipeline(cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
1198 deviceInterface.cmdBindDescriptorSets(cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipelineLayout, 0u, 1u, &descriptorSet, 0u, DE_NULL);
1199
1200 deviceInterface.cmdDispatch(cmdBuffer, gridSize.x(), gridSize.y(), gridSize.z());
1201
1202 const VkImageMemoryBarrier imageBarrierPost = makeImageMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT,
1203 VK_ACCESS_SHADER_READ_BIT,
1204 VK_IMAGE_LAYOUT_GENERAL,
1205 VK_IMAGE_LAYOUT_GENERAL,
1206 m_resultImage->get(),
1207 subresourceRange);
1208
1209 deviceInterface.cmdPipelineBarrier( cmdBuffer,
1210 VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
1211 VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
1212 (VkDependencyFlags)0,
1213 0, (const VkMemoryBarrier*)DE_NULL,
1214 0, (const VkBufferMemoryBarrier*)DE_NULL,
1215 1, &imageBarrierPost);
1216 }
1217
createImageAndView(VkFormat imageFormat,const tcu::UVec3 & imageExent,bool useTransfer,de::MovePtr<Image> & imagePtr,Move<VkImageView> & imageViewPtr)1218 void BinaryAtomicInstanceBase::createImageAndView (VkFormat imageFormat,
1219 const tcu::UVec3& imageExent,
1220 bool useTransfer,
1221 de::MovePtr<Image>& imagePtr,
1222 Move<VkImageView>& imageViewPtr)
1223 {
1224 const VkDevice device = m_context.getDevice();
1225 const DeviceInterface& deviceInterface = m_context.getDeviceInterface();
1226 Allocator& allocator = m_context.getDefaultAllocator();
1227 const VkImageUsageFlags usageFlags = getUsageFlags(useTransfer);
1228 VkImageCreateFlags createFlags = 0u;
1229
1230 if (m_imageType == IMAGE_TYPE_CUBE || m_imageType == IMAGE_TYPE_CUBE_ARRAY)
1231 createFlags |= VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT;
1232
1233 const auto numLayers = getNumLayers(m_imageType, m_imageSize);
1234
1235 VkImageCreateInfo createInfo =
1236 {
1237 VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, // VkStructureType sType;
1238 DE_NULL, // const void* pNext;
1239 createFlags, // VkImageCreateFlags flags;
1240 mapImageType(m_imageType), // VkImageType imageType;
1241 imageFormat, // VkFormat format;
1242 makeExtent3D(imageExent), // VkExtent3D extent;
1243 1u, // deUint32 mipLevels;
1244 numLayers, // deUint32 arrayLayers;
1245 VK_SAMPLE_COUNT_1_BIT, // VkSampleCountFlagBits samples;
1246 VK_IMAGE_TILING_OPTIMAL, // VkImageTiling tiling;
1247 usageFlags, // VkImageUsageFlags usage;
1248 VK_SHARING_MODE_EXCLUSIVE, // VkSharingMode sharingMode;
1249 0u, // deUint32 queueFamilyIndexCount;
1250 DE_NULL, // const deUint32* pQueueFamilyIndices;
1251 VK_IMAGE_LAYOUT_UNDEFINED, // VkImageLayout initialLayout;
1252 };
1253
1254 if (m_backingType == ImageBackingType::SPARSE)
1255 {
1256 const auto& vki = m_context.getInstanceInterface();
1257 const auto physicalDevice = m_context.getPhysicalDevice();
1258 const auto sparseQueue = m_context.getSparseQueue();
1259 const auto sparseQueueIdx = m_context.getSparseQueueFamilyIndex();
1260 const auto universalQIdx = m_context.getUniversalQueueFamilyIndex();
1261 const deUint32 queueIndices[] = { universalQIdx, sparseQueueIdx };
1262
1263 createInfo.flags |= (VK_IMAGE_CREATE_SPARSE_BINDING_BIT | VK_IMAGE_CREATE_SPARSE_RESIDENCY_BIT);
1264
1265 if (sparseQueueIdx != universalQIdx)
1266 {
1267 createInfo.sharingMode = VK_SHARING_MODE_CONCURRENT;
1268 createInfo.queueFamilyIndexCount = static_cast<deUint32>(DE_LENGTH_OF_ARRAY(queueIndices));
1269 createInfo.pQueueFamilyIndices = queueIndices;
1270 }
1271
1272 const auto sparseImage = new SparseImage(deviceInterface, device, physicalDevice, vki, createInfo, sparseQueue, allocator, m_format);
1273 m_waitSemaphores.push_back(sparseImage->getSemaphore());
1274 imagePtr = de::MovePtr<Image>(sparseImage);
1275 }
1276 else
1277 imagePtr = de::MovePtr<Image>(new Image(deviceInterface, device, allocator, createInfo, MemoryRequirement::Any));
1278
1279 const VkImageSubresourceRange subresourceRange = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, numLayers);
1280
1281 imageViewPtr = makeImageView(deviceInterface, device, imagePtr->get(), mapImageViewType(m_imageType), imageFormat, subresourceRange);
1282 }
1283
createImageResources(const VkFormat & imageFormat,const bool useTransfer)1284 void BinaryAtomicInstanceBase::createImageResources (const VkFormat& imageFormat,
1285 const bool useTransfer)
1286 {
1287 //Create the image that is going to store results of atomic operations
1288 createImageAndView(imageFormat, getLayerSize(m_imageType, m_imageSize), useTransfer, m_resultImage, m_resultImageView);
1289 }
1290
1291 class BinaryAtomicEndResultInstance : public BinaryAtomicInstanceBase
1292 {
1293 public:
1294
BinaryAtomicEndResultInstance(Context & context,const string & name,const ImageType imageType,const tcu::UVec3 & imageSize,const TextureFormat & format,const AtomicOperation operation,const bool useTransfer,const ShaderReadType shaderReadType,const ImageBackingType backingType)1295 BinaryAtomicEndResultInstance (Context& context,
1296 const string& name,
1297 const ImageType imageType,
1298 const tcu::UVec3& imageSize,
1299 const TextureFormat& format,
1300 const AtomicOperation operation,
1301 const bool useTransfer,
1302 const ShaderReadType shaderReadType,
1303 const ImageBackingType backingType)
1304 : BinaryAtomicInstanceBase(context, name, imageType, imageSize, format, operation, useTransfer, shaderReadType, backingType) {}
1305
1306 virtual deUint32 getOutputBufferSize (void) const;
1307
prepareResources(const bool useTransfer)1308 virtual void prepareResources (const bool useTransfer) { DE_UNREF(useTransfer); }
1309 virtual void prepareDescriptors (const bool isTexelBuffer);
1310
commandsBeforeCompute(const VkCommandBuffer) const1311 virtual void commandsBeforeCompute (const VkCommandBuffer) const {}
1312 virtual void commandsAfterCompute (const VkCommandBuffer cmdBuffer,
1313 const VkPipeline pipeline,
1314 const VkPipelineLayout pipelineLayout,
1315 const VkDescriptorSet descriptorSet,
1316 const VkDeviceSize& range,
1317 const bool useTransfer);
1318
1319 virtual bool verifyResult (Allocation& outputBufferAllocation,
1320 const bool is64Bit) const;
1321
1322 protected:
1323
1324 template <typename T>
1325 bool isValueCorrect (const T resultValue,
1326 deInt32 x,
1327 deInt32 y,
1328 deInt32 z,
1329 const UVec3& gridSize,
1330 const IVec3 extendedGridSize) const;
1331 };
1332
getOutputBufferSize(void) const1333 deUint32 BinaryAtomicEndResultInstance::getOutputBufferSize (void) const
1334 {
1335 return tcu::getPixelSize(m_format) * getNumPixels(m_imageType, m_imageSize);
1336 }
1337
prepareDescriptors(const bool isTexelBuffer)1338 void BinaryAtomicEndResultInstance::prepareDescriptors (const bool isTexelBuffer)
1339 {
1340 const VkDescriptorType descriptorType = isTexelBuffer ?
1341 VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER :
1342 VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
1343 const VkDevice device = m_context.getDevice();
1344 const DeviceInterface& deviceInterface = m_context.getDeviceInterface();
1345
1346 m_descriptorSetLayout =
1347 DescriptorSetLayoutBuilder()
1348 .addSingleBinding(descriptorType, VK_SHADER_STAGE_COMPUTE_BIT)
1349 .build(deviceInterface, device);
1350
1351 m_descriptorPool =
1352 DescriptorPoolBuilder()
1353 .addType(descriptorType)
1354 .build(deviceInterface, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
1355
1356 m_descriptorSet = makeDescriptorSet(deviceInterface, device, *m_descriptorPool, *m_descriptorSetLayout);
1357
1358 if (isTexelBuffer)
1359 {
1360 m_descResultBufferView = makeBufferView(deviceInterface, device, *(*m_inputBuffer), mapTextureFormat(m_format), 0, VK_WHOLE_SIZE);
1361
1362 DescriptorSetUpdateBuilder()
1363 .writeSingle(*m_descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), descriptorType, &(m_descResultBufferView.get()))
1364 .update(deviceInterface, device);
1365 }
1366 else
1367 {
1368 const VkDescriptorImageInfo descResultImageInfo = makeDescriptorImageInfo(DE_NULL, *m_resultImageView, VK_IMAGE_LAYOUT_GENERAL);
1369
1370 DescriptorSetUpdateBuilder()
1371 .writeSingle(*m_descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), descriptorType, &descResultImageInfo)
1372 .update(deviceInterface, device);
1373 }
1374 }
1375
commandsAfterCompute(const VkCommandBuffer cmdBuffer,const VkPipeline pipeline,const VkPipelineLayout pipelineLayout,const VkDescriptorSet descriptorSet,const VkDeviceSize & range,const bool useTransfer)1376 void BinaryAtomicEndResultInstance::commandsAfterCompute (const VkCommandBuffer cmdBuffer,
1377 const VkPipeline pipeline,
1378 const VkPipelineLayout pipelineLayout,
1379 const VkDescriptorSet descriptorSet,
1380 const VkDeviceSize& range,
1381 const bool useTransfer)
1382 {
1383 const DeviceInterface& deviceInterface = m_context.getDeviceInterface();
1384 const VkImageSubresourceRange subresourceRange = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, getNumLayers(m_imageType, m_imageSize));
1385 const UVec3 layerSize = getLayerSize(m_imageType, m_imageSize);
1386
1387 if (m_imageType == IMAGE_TYPE_BUFFER)
1388 {
1389 m_outputBuffer = m_inputBuffer;
1390 }
1391 else if (useTransfer)
1392 {
1393 const VkImageMemoryBarrier resultImagePostDispatchBarrier =
1394 makeImageMemoryBarrier( VK_ACCESS_SHADER_WRITE_BIT,
1395 VK_ACCESS_TRANSFER_READ_BIT,
1396 VK_IMAGE_LAYOUT_GENERAL,
1397 VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
1398 m_resultImage->get(),
1399 subresourceRange);
1400
1401 deviceInterface.cmdPipelineBarrier( cmdBuffer,
1402 VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
1403 VK_PIPELINE_STAGE_TRANSFER_BIT,
1404 DE_FALSE, 0u, DE_NULL, 0u, DE_NULL,
1405 1u, &resultImagePostDispatchBarrier);
1406
1407 const VkBufferImageCopy bufferImageCopyParams = makeBufferImageCopy(makeExtent3D(layerSize), getNumLayers(m_imageType, m_imageSize));
1408
1409 deviceInterface.cmdCopyImageToBuffer(cmdBuffer, m_resultImage->get(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, m_outputBuffer->get(), 1u, &bufferImageCopyParams);
1410 }
1411 else
1412 {
1413 const VkDevice device = m_context.getDevice();
1414 const VkDescriptorImageInfo descResultImageInfo = makeDescriptorImageInfo(DE_NULL, *m_resultImageView, VK_IMAGE_LAYOUT_GENERAL);
1415 const VkDescriptorBufferInfo descResultBufferInfo = makeDescriptorBufferInfo(m_outputBuffer->get(), 0, range);
1416
1417 DescriptorSetUpdateBuilder()
1418 .writeSingle(descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &descResultImageInfo)
1419 .writeSingle(descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &descResultBufferInfo)
1420 .update(deviceInterface, device);
1421
1422 const VkImageMemoryBarrier resultImagePostDispatchBarrier =
1423 makeImageMemoryBarrier( VK_ACCESS_SHADER_WRITE_BIT,
1424 VK_ACCESS_SHADER_READ_BIT,
1425 VK_IMAGE_LAYOUT_GENERAL,
1426 VK_IMAGE_LAYOUT_GENERAL,
1427 m_resultImage->get(),
1428 subresourceRange);
1429
1430 deviceInterface.cmdPipelineBarrier( cmdBuffer,
1431 VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
1432 VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
1433 DE_FALSE, 0u, DE_NULL, 0u, DE_NULL,
1434 1u, &resultImagePostDispatchBarrier);
1435
1436 deviceInterface.cmdBindPipeline(cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
1437 deviceInterface.cmdBindDescriptorSets(cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipelineLayout, 0u, 1u, &descriptorSet, 0u, DE_NULL);
1438
1439 switch (m_imageType)
1440 {
1441 case IMAGE_TYPE_1D_ARRAY:
1442 deviceInterface.cmdDispatch(cmdBuffer, layerSize.x(), subresourceRange.layerCount, layerSize.z());
1443 break;
1444 case IMAGE_TYPE_2D_ARRAY:
1445 case IMAGE_TYPE_CUBE:
1446 case IMAGE_TYPE_CUBE_ARRAY:
1447 deviceInterface.cmdDispatch(cmdBuffer, layerSize.x(), layerSize.y(), subresourceRange.layerCount);
1448 break;
1449 default:
1450 deviceInterface.cmdDispatch(cmdBuffer, layerSize.x(), layerSize.y(), layerSize.z());
1451 break;
1452 }
1453 }
1454 }
1455
verifyResult(Allocation & outputBufferAllocation,const bool is64Bit) const1456 bool BinaryAtomicEndResultInstance::verifyResult (Allocation& outputBufferAllocation,
1457 const bool is64Bit) const
1458 {
1459 const UVec3 gridSize = getShaderGridSize(m_imageType, m_imageSize);
1460 const IVec3 extendedGridSize = IVec3(NUM_INVOCATIONS_PER_PIXEL*gridSize.x(), gridSize.y(), gridSize.z());
1461
1462 tcu::ConstPixelBufferAccess resultBuffer(m_format, gridSize.x(), gridSize.y(), gridSize.z(), outputBufferAllocation.getHostPtr());
1463
1464 for (deInt32 z = 0; z < resultBuffer.getDepth(); z++)
1465 for (deInt32 y = 0; y < resultBuffer.getHeight(); y++)
1466 for (deInt32 x = 0; x < resultBuffer.getWidth(); x++)
1467 {
1468 const void* resultValue = resultBuffer.getPixelPtr(x, y, z);
1469 deInt32 floatToIntValue = 0;
1470 bool isFloatValue = false;
1471 if (isFloatFormat(mapTextureFormat(m_format)))
1472 {
1473 isFloatValue = true;
1474 floatToIntValue = static_cast<deInt32>(*((float*)resultValue));
1475 }
1476
1477 if (isOrderIndependentAtomicOperation(m_operation))
1478 {
1479 if (isUintFormat(mapTextureFormat(m_format)))
1480 {
1481 if(is64Bit)
1482 {
1483 if (!isValueCorrect<deUint64>(*((deUint64*)resultValue), x, y, z, gridSize, extendedGridSize))
1484 return false;
1485 }
1486 else
1487 {
1488 if (!isValueCorrect<deUint32>(*((deUint32*)resultValue), x, y, z, gridSize, extendedGridSize))
1489 return false;
1490 }
1491 }
1492 else if (isIntFormat(mapTextureFormat(m_format)))
1493 {
1494 if (is64Bit)
1495 {
1496 if (!isValueCorrect<deInt64>(*((deInt64*)resultValue), x, y, z, gridSize, extendedGridSize))
1497 return false;
1498 }
1499 else
1500 {
1501 if (!isValueCorrect<deInt32>(*((deInt32*)resultValue), x, y, z, gridSize, extendedGridSize))
1502 return false;
1503 }
1504 }
1505 else
1506 {
1507 // 32-bit floating point
1508 if (!isValueCorrect<deInt32>(floatToIntValue, x, y, z, gridSize, extendedGridSize))
1509 return false;
1510 }
1511 }
1512 else if (m_operation == ATOMIC_OPERATION_EXCHANGE)
1513 {
1514 // Check if the end result equals one of the atomic args.
1515 bool matchFound = false;
1516
1517 for (deInt32 i = 0; i < static_cast<deInt32>(NUM_INVOCATIONS_PER_PIXEL) && !matchFound; i++)
1518 {
1519 const IVec3 gid(x + i*gridSize.x(), y, z);
1520 matchFound = is64Bit ?
1521 (*((deInt64*)resultValue) == getAtomicFuncArgument<deInt64>(m_operation, gid, extendedGridSize)) :
1522 isFloatValue ?
1523 floatToIntValue == getAtomicFuncArgument<deInt32>(m_operation, gid, extendedGridSize) :
1524 (*((deInt32*)resultValue) == getAtomicFuncArgument<deInt32>(m_operation, gid, extendedGridSize));
1525
1526 }
1527
1528 if (!matchFound)
1529 return false;
1530 }
1531 else if (m_operation == ATOMIC_OPERATION_COMPARE_EXCHANGE)
1532 {
1533 // Check if the end result equals one of the atomic args.
1534 bool matchFound = false;
1535
1536 for (deInt32 i = 0; i < static_cast<deInt32>(NUM_INVOCATIONS_PER_PIXEL) && !matchFound; i++)
1537 {
1538 const IVec3 gid(x + i*gridSize.x(), y, z);
1539 matchFound = is64Bit ?
1540 (*((deInt64*)resultValue) == getAtomicFuncArgument<deInt64>(m_operation, gid, extendedGridSize)) :
1541 isFloatValue ?
1542 floatToIntValue == getAtomicFuncArgument<deInt32>(m_operation, gid, extendedGridSize) :
1543 (*((deInt32*)resultValue) == getAtomicFuncArgument<deInt32>(m_operation, gid, extendedGridSize));
1544 }
1545
1546 if (!matchFound)
1547 return false;
1548 }
1549 else
1550 DE_ASSERT(false);
1551 }
1552 return true;
1553 }
1554
1555 template <typename T>
isValueCorrect(const T resultValue,deInt32 x,deInt32 y,deInt32 z,const UVec3 & gridSize,const IVec3 extendedGridSize) const1556 bool BinaryAtomicEndResultInstance::isValueCorrect(const T resultValue, deInt32 x, deInt32 y, deInt32 z, const UVec3& gridSize, const IVec3 extendedGridSize) const
1557 {
1558 T reference = getOperationInitialValue<T>(m_operation);
1559 for (deInt32 i = 0; i < static_cast<deInt32>(NUM_INVOCATIONS_PER_PIXEL); i++)
1560 {
1561 const IVec3 gid(x + i*gridSize.x(), y, z);
1562 T arg = getAtomicFuncArgument<T>(m_operation, gid, extendedGridSize);
1563 reference = computeBinaryAtomicOperationResult(m_operation, reference, arg);
1564 }
1565 return (resultValue == reference);
1566 }
1567
createInstance(Context & context) const1568 TestInstance* BinaryAtomicEndResultCase::createInstance (Context& context) const
1569 {
1570 return new BinaryAtomicEndResultInstance(context, m_name, m_imageType, m_imageSize, m_format, m_operation, m_useTransfer, m_readType, m_backingType);
1571 }
1572
1573 class BinaryAtomicIntermValuesInstance : public BinaryAtomicInstanceBase
1574 {
1575 public:
1576
BinaryAtomicIntermValuesInstance(Context & context,const string & name,const ImageType imageType,const tcu::UVec3 & imageSize,const TextureFormat & format,const AtomicOperation operation,const bool useTransfer,const ShaderReadType shaderReadType,const ImageBackingType backingType)1577 BinaryAtomicIntermValuesInstance (Context& context,
1578 const string& name,
1579 const ImageType imageType,
1580 const tcu::UVec3& imageSize,
1581 const TextureFormat& format,
1582 const AtomicOperation operation,
1583 const bool useTransfer,
1584 const ShaderReadType shaderReadType,
1585 const ImageBackingType backingType)
1586 : BinaryAtomicInstanceBase(context, name, imageType, imageSize, format, operation, useTransfer, shaderReadType, backingType) {}
1587
1588 virtual deUint32 getOutputBufferSize (void) const;
1589
1590 virtual void prepareResources (const bool useTransfer);
1591 virtual void prepareDescriptors (const bool isTexelBuffer);
1592
1593 virtual void commandsBeforeCompute (const VkCommandBuffer cmdBuffer) const;
1594 virtual void commandsAfterCompute (const VkCommandBuffer cmdBuffer,
1595 const VkPipeline pipeline,
1596 const VkPipelineLayout pipelineLayout,
1597 const VkDescriptorSet descriptorSet,
1598 const VkDeviceSize& range,
1599 const bool useTransfer);
1600
1601 virtual bool verifyResult (Allocation& outputBufferAllocation,
1602 const bool is64Bit) const;
1603
1604 protected:
1605
1606 template <typename T>
1607 bool areValuesCorrect (tcu::ConstPixelBufferAccess& resultBuffer,
1608 const bool isFloatingPoint,
1609 deInt32 x,
1610 deInt32 y,
1611 deInt32 z,
1612 const UVec3& gridSize,
1613 const IVec3 extendedGridSize) const;
1614
1615 template <typename T>
1616 bool verifyRecursive (const deInt32 index,
1617 const T valueSoFar,
1618 bool argsUsed[NUM_INVOCATIONS_PER_PIXEL],
1619 const T atomicArgs[NUM_INVOCATIONS_PER_PIXEL],
1620 const T resultValues[NUM_INVOCATIONS_PER_PIXEL]) const;
1621 de::MovePtr<Image> m_intermResultsImage;
1622 Move<VkImageView> m_intermResultsImageView;
1623 };
1624
getOutputBufferSize(void) const1625 deUint32 BinaryAtomicIntermValuesInstance::getOutputBufferSize (void) const
1626 {
1627 return NUM_INVOCATIONS_PER_PIXEL * tcu::getPixelSize(m_format) * getNumPixels(m_imageType, m_imageSize);
1628 }
1629
prepareResources(const bool useTransfer)1630 void BinaryAtomicIntermValuesInstance::prepareResources (const bool useTransfer)
1631 {
1632 const UVec3 layerSize = getLayerSize(m_imageType, m_imageSize);
1633 const bool isCubeBasedImage = (m_imageType == IMAGE_TYPE_CUBE || m_imageType == IMAGE_TYPE_CUBE_ARRAY);
1634 const UVec3 extendedLayerSize = isCubeBasedImage ? UVec3(NUM_INVOCATIONS_PER_PIXEL * layerSize.x(), NUM_INVOCATIONS_PER_PIXEL * layerSize.y(), layerSize.z())
1635 : UVec3(NUM_INVOCATIONS_PER_PIXEL * layerSize.x(), layerSize.y(), layerSize.z());
1636
1637 createImageAndView(mapTextureFormat(m_format), extendedLayerSize, useTransfer, m_intermResultsImage, m_intermResultsImageView);
1638 }
1639
prepareDescriptors(const bool isTexelBuffer)1640 void BinaryAtomicIntermValuesInstance::prepareDescriptors (const bool isTexelBuffer)
1641 {
1642 const VkDescriptorType descriptorType = isTexelBuffer ?
1643 VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER :
1644 VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
1645
1646 const VkDevice device = m_context.getDevice();
1647 const DeviceInterface& deviceInterface = m_context.getDeviceInterface();
1648
1649 m_descriptorSetLayout =
1650 DescriptorSetLayoutBuilder()
1651 .addSingleBinding(descriptorType, VK_SHADER_STAGE_COMPUTE_BIT)
1652 .addSingleBinding(descriptorType, VK_SHADER_STAGE_COMPUTE_BIT)
1653 .build(deviceInterface, device);
1654
1655 m_descriptorPool =
1656 DescriptorPoolBuilder()
1657 .addType(descriptorType, 2u)
1658 .build(deviceInterface, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
1659
1660 m_descriptorSet = makeDescriptorSet(deviceInterface, device, *m_descriptorPool, *m_descriptorSetLayout);
1661
1662 if (isTexelBuffer)
1663 {
1664 m_descResultBufferView = makeBufferView(deviceInterface, device, *(*m_inputBuffer), mapTextureFormat(m_format), 0, VK_WHOLE_SIZE);
1665 m_descIntermResultsBufferView = makeBufferView(deviceInterface, device, *(*m_outputBuffer), mapTextureFormat(m_format), 0, VK_WHOLE_SIZE);
1666
1667 DescriptorSetUpdateBuilder()
1668 .writeSingle(*m_descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), descriptorType, &(m_descResultBufferView.get()))
1669 .writeSingle(*m_descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), descriptorType, &(m_descIntermResultsBufferView.get()))
1670 .update(deviceInterface, device);
1671 }
1672 else
1673 {
1674 const VkDescriptorImageInfo descResultImageInfo = makeDescriptorImageInfo(DE_NULL, *m_resultImageView, VK_IMAGE_LAYOUT_GENERAL);
1675 const VkDescriptorImageInfo descIntermResultsImageInfo = makeDescriptorImageInfo(DE_NULL, *m_intermResultsImageView, VK_IMAGE_LAYOUT_GENERAL);
1676
1677 DescriptorSetUpdateBuilder()
1678 .writeSingle(*m_descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), descriptorType, &descResultImageInfo)
1679 .writeSingle(*m_descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), descriptorType, &descIntermResultsImageInfo)
1680 .update(deviceInterface, device);
1681 }
1682 }
1683
commandsBeforeCompute(const VkCommandBuffer cmdBuffer) const1684 void BinaryAtomicIntermValuesInstance::commandsBeforeCompute (const VkCommandBuffer cmdBuffer) const
1685 {
1686 const DeviceInterface& deviceInterface = m_context.getDeviceInterface();
1687 const VkImageSubresourceRange subresourceRange = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, getNumLayers(m_imageType, m_imageSize));
1688
1689 const VkImageMemoryBarrier imagePreDispatchBarrier =
1690 makeImageMemoryBarrier( 0u,
1691 VK_ACCESS_SHADER_WRITE_BIT,
1692 VK_IMAGE_LAYOUT_UNDEFINED,
1693 VK_IMAGE_LAYOUT_GENERAL,
1694 m_intermResultsImage->get(),
1695 subresourceRange);
1696
1697 deviceInterface.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, DE_FALSE, 0u, DE_NULL, 0u, DE_NULL, 1u, &imagePreDispatchBarrier);
1698 }
1699
commandsAfterCompute(const VkCommandBuffer cmdBuffer,const VkPipeline pipeline,const VkPipelineLayout pipelineLayout,const VkDescriptorSet descriptorSet,const VkDeviceSize & range,const bool useTransfer)1700 void BinaryAtomicIntermValuesInstance::commandsAfterCompute (const VkCommandBuffer cmdBuffer,
1701 const VkPipeline pipeline,
1702 const VkPipelineLayout pipelineLayout,
1703 const VkDescriptorSet descriptorSet,
1704 const VkDeviceSize& range,
1705 const bool useTransfer)
1706 {
1707 // nothing is needed for texel image buffer
1708 if (m_imageType == IMAGE_TYPE_BUFFER)
1709 return;
1710
1711 const DeviceInterface& deviceInterface = m_context.getDeviceInterface();
1712 const VkImageSubresourceRange subresourceRange = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, getNumLayers(m_imageType, m_imageSize));
1713 const UVec3 layerSize = getLayerSize(m_imageType, m_imageSize);
1714
1715 if (useTransfer)
1716 {
1717 const VkImageMemoryBarrier imagePostDispatchBarrier =
1718 makeImageMemoryBarrier( VK_ACCESS_SHADER_WRITE_BIT,
1719 VK_ACCESS_TRANSFER_READ_BIT,
1720 VK_IMAGE_LAYOUT_GENERAL,
1721 VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
1722 m_intermResultsImage->get(),
1723 subresourceRange);
1724
1725 deviceInterface.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, DE_FALSE, 0u, DE_NULL, 0u, DE_NULL, 1u, &imagePostDispatchBarrier);
1726
1727 const UVec3 extendedLayerSize = UVec3(NUM_INVOCATIONS_PER_PIXEL * layerSize.x(), layerSize.y(), layerSize.z());
1728 const VkBufferImageCopy bufferImageCopyParams = makeBufferImageCopy(makeExtent3D(extendedLayerSize), getNumLayers(m_imageType, m_imageSize));
1729
1730 deviceInterface.cmdCopyImageToBuffer(cmdBuffer, m_intermResultsImage->get(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, m_outputBuffer->get(), 1u, &bufferImageCopyParams);
1731 }
1732 else
1733 {
1734 const VkDevice device = m_context.getDevice();
1735 const VkDescriptorImageInfo descResultImageInfo = makeDescriptorImageInfo(DE_NULL, *m_intermResultsImageView, VK_IMAGE_LAYOUT_GENERAL);
1736 const VkDescriptorBufferInfo descResultBufferInfo = makeDescriptorBufferInfo(m_outputBuffer->get(), 0, range);
1737
1738 DescriptorSetUpdateBuilder()
1739 .writeSingle(descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &descResultImageInfo)
1740 .writeSingle(descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &descResultBufferInfo)
1741 .update(deviceInterface, device);
1742
1743 const VkImageMemoryBarrier resultImagePostDispatchBarrier =
1744 makeImageMemoryBarrier( VK_ACCESS_SHADER_WRITE_BIT,
1745 VK_ACCESS_SHADER_READ_BIT,
1746 VK_IMAGE_LAYOUT_GENERAL,
1747 VK_IMAGE_LAYOUT_GENERAL,
1748 m_intermResultsImage->get(),
1749 subresourceRange);
1750
1751 deviceInterface.cmdPipelineBarrier( cmdBuffer,
1752 VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
1753 VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
1754 DE_FALSE, 0u, DE_NULL, 0u, DE_NULL,
1755 1u, &resultImagePostDispatchBarrier);
1756
1757 deviceInterface.cmdBindPipeline(cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
1758 deviceInterface.cmdBindDescriptorSets(cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipelineLayout, 0u, 1u, &descriptorSet, 0u, DE_NULL);
1759
1760 switch (m_imageType)
1761 {
1762 case IMAGE_TYPE_1D_ARRAY:
1763 deviceInterface.cmdDispatch(cmdBuffer, NUM_INVOCATIONS_PER_PIXEL * layerSize.x(), subresourceRange.layerCount, layerSize.z());
1764 break;
1765 case IMAGE_TYPE_2D_ARRAY:
1766 case IMAGE_TYPE_CUBE:
1767 case IMAGE_TYPE_CUBE_ARRAY:
1768 deviceInterface.cmdDispatch(cmdBuffer, NUM_INVOCATIONS_PER_PIXEL * layerSize.x(), layerSize.y(), subresourceRange.layerCount);
1769 break;
1770 default:
1771 deviceInterface.cmdDispatch(cmdBuffer, NUM_INVOCATIONS_PER_PIXEL * layerSize.x(), layerSize.y(), layerSize.z());
1772 break;
1773 }
1774 }
1775 }
1776
verifyResult(Allocation & outputBufferAllocation,const bool is64Bit) const1777 bool BinaryAtomicIntermValuesInstance::verifyResult (Allocation& outputBufferAllocation,
1778 const bool is64Bit) const
1779 {
1780 const UVec3 gridSize = getShaderGridSize(m_imageType, m_imageSize);
1781 const IVec3 extendedGridSize = IVec3(NUM_INVOCATIONS_PER_PIXEL*gridSize.x(), gridSize.y(), gridSize.z());
1782
1783 tcu::ConstPixelBufferAccess resultBuffer(m_format, extendedGridSize.x(), extendedGridSize.y(), extendedGridSize.z(), outputBufferAllocation.getHostPtr());
1784
1785 for (deInt32 z = 0; z < resultBuffer.getDepth(); z++)
1786 for (deInt32 y = 0; y < resultBuffer.getHeight(); y++)
1787 for (deUint32 x = 0; x < gridSize.x(); x++)
1788 {
1789 if (isUintFormat(mapTextureFormat(m_format)))
1790 {
1791 if (is64Bit)
1792 {
1793 if (!areValuesCorrect<deUint64>(resultBuffer, false, x, y, z, gridSize, extendedGridSize))
1794 return false;
1795 }
1796 else
1797 {
1798 if (!areValuesCorrect<deUint32>(resultBuffer, false, x, y, z, gridSize, extendedGridSize))
1799 return false;
1800 }
1801 }
1802 else if (isIntFormat(mapTextureFormat(m_format)))
1803 {
1804 if (is64Bit)
1805 {
1806 if (!areValuesCorrect<deInt64>(resultBuffer, false, x, y, z, gridSize, extendedGridSize))
1807 return false;
1808 }
1809 else
1810 {
1811 if (!areValuesCorrect<deInt32>(resultBuffer, false, x, y, z, gridSize, extendedGridSize))
1812 return false;
1813 }
1814 }
1815 else
1816 {
1817 // 32-bit floating point
1818 if (!areValuesCorrect<deInt32>(resultBuffer, true, x, y, z, gridSize, extendedGridSize))
1819 return false;
1820 }
1821 }
1822
1823 return true;
1824 }
1825
1826 template <typename T>
areValuesCorrect(tcu::ConstPixelBufferAccess & resultBuffer,const bool isFloatingPoint,deInt32 x,deInt32 y,deInt32 z,const UVec3 & gridSize,const IVec3 extendedGridSize) const1827 bool BinaryAtomicIntermValuesInstance::areValuesCorrect(tcu::ConstPixelBufferAccess& resultBuffer, const bool isFloatingPoint, deInt32 x, deInt32 y, deInt32 z, const UVec3& gridSize, const IVec3 extendedGridSize) const
1828 {
1829 T resultValues[NUM_INVOCATIONS_PER_PIXEL];
1830 T atomicArgs[NUM_INVOCATIONS_PER_PIXEL];
1831 bool argsUsed[NUM_INVOCATIONS_PER_PIXEL];
1832
1833 for (deInt32 i = 0; i < static_cast<deInt32>(NUM_INVOCATIONS_PER_PIXEL); i++)
1834 {
1835 IVec3 gid(x + i*gridSize.x(), y, z);
1836 T data = *((T*)resultBuffer.getPixelPtr(gid.x(), gid.y(), gid.z()));
1837 if (isFloatingPoint)
1838 {
1839 float fData;
1840 deMemcpy(&fData, &data, sizeof(fData));
1841 data = static_cast<T>(fData);
1842 }
1843 resultValues[i] = data;
1844 atomicArgs[i] = getAtomicFuncArgument<T>(m_operation, gid, extendedGridSize);
1845 argsUsed[i] = false;
1846 }
1847
1848 // Verify that the return values form a valid sequence.
1849 return verifyRecursive(0, getOperationInitialValue<T>(m_operation), argsUsed, atomicArgs, resultValues);
1850 }
1851
1852 template <typename T>
verifyRecursive(const deInt32 index,const T valueSoFar,bool argsUsed[NUM_INVOCATIONS_PER_PIXEL],const T atomicArgs[NUM_INVOCATIONS_PER_PIXEL],const T resultValues[NUM_INVOCATIONS_PER_PIXEL]) const1853 bool BinaryAtomicIntermValuesInstance::verifyRecursive (const deInt32 index,
1854 const T valueSoFar,
1855 bool argsUsed[NUM_INVOCATIONS_PER_PIXEL],
1856 const T atomicArgs[NUM_INVOCATIONS_PER_PIXEL],
1857 const T resultValues[NUM_INVOCATIONS_PER_PIXEL]) const
1858 {
1859 if (index >= static_cast<deInt32>(NUM_INVOCATIONS_PER_PIXEL))
1860 return true;
1861
1862 for (deInt32 i = 0; i < static_cast<deInt32>(NUM_INVOCATIONS_PER_PIXEL); i++)
1863 {
1864 if (!argsUsed[i] && resultValues[i] == valueSoFar)
1865 {
1866 argsUsed[i] = true;
1867
1868 if (verifyRecursive(index + 1, computeBinaryAtomicOperationResult(m_operation, valueSoFar, atomicArgs[i]), argsUsed, atomicArgs, resultValues))
1869 {
1870 return true;
1871 }
1872
1873 argsUsed[i] = false;
1874 }
1875 }
1876
1877 return false;
1878 }
1879
createInstance(Context & context) const1880 TestInstance* BinaryAtomicIntermValuesCase::createInstance (Context& context) const
1881 {
1882 return new BinaryAtomicIntermValuesInstance(context, m_name, m_imageType, m_imageSize, m_format, m_operation, m_useTransfer, m_readType, m_backingType);
1883 }
1884
1885 } // anonymous ns
1886
createImageAtomicOperationTests(tcu::TestContext & testCtx)1887 tcu::TestCaseGroup* createImageAtomicOperationTests (tcu::TestContext& testCtx)
1888 {
1889 de::MovePtr<tcu::TestCaseGroup> imageAtomicOperationsTests(new tcu::TestCaseGroup(testCtx, "atomic_operations", "Atomic image operations cases"));
1890
1891 struct ImageParams
1892 {
1893 ImageParams(const ImageType imageType, const tcu::UVec3& imageSize)
1894 : m_imageType (imageType)
1895 , m_imageSize (imageSize)
1896 {
1897 }
1898 const ImageType m_imageType;
1899 const tcu::UVec3 m_imageSize;
1900 };
1901
1902 const ImageParams imageParamsArray[] =
1903 {
1904 ImageParams(IMAGE_TYPE_1D, tcu::UVec3(64u, 1u, 1u)),
1905 ImageParams(IMAGE_TYPE_1D_ARRAY, tcu::UVec3(64u, 1u, 8u)),
1906 ImageParams(IMAGE_TYPE_2D, tcu::UVec3(64u, 64u, 1u)),
1907 ImageParams(IMAGE_TYPE_2D_ARRAY, tcu::UVec3(64u, 64u, 8u)),
1908 ImageParams(IMAGE_TYPE_3D, tcu::UVec3(48u, 48u, 8u)),
1909 ImageParams(IMAGE_TYPE_CUBE, tcu::UVec3(64u, 64u, 1u)),
1910 ImageParams(IMAGE_TYPE_CUBE_ARRAY, tcu::UVec3(64u, 64u, 2u)),
1911 ImageParams(IMAGE_TYPE_BUFFER, tcu::UVec3(64u, 1u, 1u))
1912 };
1913
1914 const tcu::TextureFormat formats[] =
1915 {
1916 tcu::TextureFormat(tcu::TextureFormat::R, tcu::TextureFormat::UNSIGNED_INT32),
1917 tcu::TextureFormat(tcu::TextureFormat::R, tcu::TextureFormat::SIGNED_INT32),
1918 tcu::TextureFormat(tcu::TextureFormat::R, tcu::TextureFormat::FLOAT),
1919 tcu::TextureFormat(tcu::TextureFormat::R, tcu::TextureFormat::UNSIGNED_INT64),
1920 tcu::TextureFormat(tcu::TextureFormat::R, tcu::TextureFormat::SIGNED_INT64)
1921 };
1922
1923 const struct
1924 {
1925 ShaderReadType type;
1926 const char* name;
1927 } readTypes[] =
1928 {
1929 { ShaderReadType::NORMAL, "normal_read" },
1930 { ShaderReadType::SPARSE, "sparse_read" },
1931 };
1932
1933 const struct
1934 {
1935 ImageBackingType type;
1936 const char* name;
1937 } backingTypes[] =
1938 {
1939 { ImageBackingType::NORMAL, "normal_img" },
1940 { ImageBackingType::SPARSE, "sparse_img" },
1941 };
1942
1943 for (deUint32 operationI = 0; operationI < ATOMIC_OPERATION_LAST; operationI++)
1944 {
1945 const AtomicOperation operation = (AtomicOperation)operationI;
1946
1947 de::MovePtr<tcu::TestCaseGroup> operationGroup(new tcu::TestCaseGroup(testCtx, getAtomicOperationCaseName(operation).c_str(), ""));
1948
1949 for (deUint32 imageTypeNdx = 0; imageTypeNdx < DE_LENGTH_OF_ARRAY(imageParamsArray); imageTypeNdx++)
1950 {
1951 const ImageType imageType = imageParamsArray[imageTypeNdx].m_imageType;
1952 const tcu::UVec3 imageSize = imageParamsArray[imageTypeNdx].m_imageSize;
1953
1954 de::MovePtr<tcu::TestCaseGroup> imageTypeGroup(new tcu::TestCaseGroup(testCtx, getImageTypeName(imageType).c_str(), ""));
1955
1956 for (int useTransferIdx = 0; useTransferIdx < 2; ++useTransferIdx)
1957 {
1958 const bool useTransfer = (useTransferIdx > 0);
1959 const string groupName = (!useTransfer ? "no" : "") + string("transfer");
1960
1961 de::MovePtr<tcu::TestCaseGroup> transferGroup(new tcu::TestCaseGroup(testCtx, groupName.c_str(), ""));
1962
1963 for (int readTypeIdx = 0; readTypeIdx < DE_LENGTH_OF_ARRAY(readTypes); ++readTypeIdx)
1964 {
1965 const auto& readType = readTypes[readTypeIdx];
1966
1967 de::MovePtr<tcu::TestCaseGroup> readTypeGroup(new tcu::TestCaseGroup(testCtx, readType.name, ""));
1968
1969 for (int backingTypeIdx = 0; backingTypeIdx < DE_LENGTH_OF_ARRAY(backingTypes); ++backingTypeIdx)
1970 {
1971 const auto& backingType = backingTypes[backingTypeIdx];
1972
1973 de::MovePtr<tcu::TestCaseGroup> backingTypeGroup(new tcu::TestCaseGroup(testCtx, backingType.name, ""));
1974
1975 for (deUint32 formatNdx = 0; formatNdx < DE_LENGTH_OF_ARRAY(formats); formatNdx++)
1976 {
1977 const TextureFormat& format = formats[formatNdx];
1978 const std::string formatName = getShaderImageFormatQualifier(format);
1979
1980 // Need SPIRV programs in vktImageAtomicSpirvShaders.cpp
1981 if (imageType == IMAGE_TYPE_BUFFER && (format.type != tcu::TextureFormat::FLOAT))
1982 {
1983 continue;
1984 }
1985
1986 // Only 2D and 3D images may support sparse residency.
1987 const auto vkImageType = mapImageType(imageType);
1988 if (backingType.type == ImageBackingType::SPARSE && (vkImageType != VK_IMAGE_TYPE_2D && vkImageType != VK_IMAGE_TYPE_3D))
1989 continue;
1990
1991 // Only some operations are supported on floating-point
1992 if (format.type == tcu::TextureFormat::FLOAT)
1993 {
1994 if (operation != ATOMIC_OPERATION_ADD &&
1995 operation != ATOMIC_OPERATION_EXCHANGE &&
1996 operation != ATOMIC_OPERATION_MIN &&
1997 operation != ATOMIC_OPERATION_MAX)
1998 {
1999 continue;
2000 }
2001 }
2002
2003 if (readType.type == ShaderReadType::SPARSE)
2004 {
2005 // When using transfer, shader reads will not be used, so avoid creating two identical cases.
2006 if (useTransfer)
2007 continue;
2008
2009 // Sparse reads are not supported for all types of images.
2010 if (imageType == IMAGE_TYPE_1D || imageType == IMAGE_TYPE_1D_ARRAY || imageType == IMAGE_TYPE_BUFFER)
2011 continue;
2012 }
2013
2014 //!< Atomic case checks the end result of the operations, and not the intermediate return values
2015 const string caseEndResult = formatName + "_end_result";
2016 backingTypeGroup->addChild(new BinaryAtomicEndResultCase(testCtx, caseEndResult, "", imageType, imageSize, format, operation, useTransfer, readType.type, backingType.type, glu::GLSL_VERSION_450));
2017
2018 //!< Atomic case checks the return values of the atomic function and not the end result.
2019 const string caseIntermValues = formatName + "_intermediate_values";
2020 backingTypeGroup->addChild(new BinaryAtomicIntermValuesCase(testCtx, caseIntermValues, "", imageType, imageSize, format, operation, useTransfer, readType.type, backingType.type, glu::GLSL_VERSION_450));
2021 }
2022
2023 readTypeGroup->addChild(backingTypeGroup.release());
2024 }
2025
2026 transferGroup->addChild(readTypeGroup.release());
2027 }
2028
2029 imageTypeGroup->addChild(transferGroup.release());
2030 }
2031
2032 operationGroup->addChild(imageTypeGroup.release());
2033 }
2034
2035 imageAtomicOperationsTests->addChild(operationGroup.release());
2036 }
2037
2038 return imageAtomicOperationsTests.release();
2039 }
2040
2041 } // image
2042 } // vkt
2043