• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2016 The Khronos Group Inc.
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  *      http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  *
19  *//*!
20  * \file  vktImageAtomicOperationTests.cpp
21  * \brief Image atomic operation tests
22  *//*--------------------------------------------------------------------*/
23 
24 #include "vktImageAtomicOperationTests.hpp"
25 #include "vktImageAtomicSpirvShaders.hpp"
26 
27 #include "deUniquePtr.hpp"
28 #include "deStringUtil.hpp"
29 #include "deSTLUtil.hpp"
30 
31 #include "vktTestCaseUtil.hpp"
32 #include "vkPrograms.hpp"
33 #include "vkImageUtil.hpp"
34 #include "vkQueryUtil.hpp"
35 #include "vkBarrierUtil.hpp"
36 #include "vktImageTestsUtil.hpp"
37 #include "vkBuilderUtil.hpp"
38 #include "vkRef.hpp"
39 #include "vkRefUtil.hpp"
40 #include "vkTypeUtil.hpp"
41 #include "vkCmdUtil.hpp"
42 #include "vkObjUtil.hpp"
43 #include "vkBufferWithMemory.hpp"
44 
45 #include "tcuTextureUtil.hpp"
46 #include "tcuTexture.hpp"
47 #include "tcuVectorType.hpp"
48 #include "tcuStringTemplate.hpp"
49 
50 namespace vkt
51 {
52 namespace image
53 {
54 namespace
55 {
56 
57 using namespace vk;
58 using namespace std;
59 using de::toString;
60 
61 using tcu::ConstPixelBufferAccess;
62 using tcu::CubeFace;
63 using tcu::IVec2;
64 using tcu::IVec3;
65 using tcu::IVec4;
66 using tcu::PixelBufferAccess;
67 using tcu::TestContext;
68 using tcu::Texture1D;
69 using tcu::Texture2D;
70 using tcu::Texture2DArray;
71 using tcu::Texture3D;
72 using tcu::TextureCube;
73 using tcu::TextureFormat;
74 using tcu::UVec3;
75 using tcu::UVec4;
76 using tcu::Vec4;
77 using tcu::Vector;
78 
79 enum
80 {
81     NUM_INVOCATIONS_PER_PIXEL = 5u
82 };
83 
84 enum AtomicOperation
85 {
86     ATOMIC_OPERATION_ADD = 0,
87     ATOMIC_OPERATION_SUB,
88     ATOMIC_OPERATION_INC,
89     ATOMIC_OPERATION_DEC,
90     ATOMIC_OPERATION_MIN,
91     ATOMIC_OPERATION_MAX,
92     ATOMIC_OPERATION_AND,
93     ATOMIC_OPERATION_OR,
94     ATOMIC_OPERATION_XOR,
95     ATOMIC_OPERATION_EXCHANGE,
96     ATOMIC_OPERATION_COMPARE_EXCHANGE,
97 
98     ATOMIC_OPERATION_LAST
99 };
100 
101 enum class ShaderReadType
102 {
103     NORMAL = 0,
104     SPARSE,
105 };
106 
107 enum class ImageBackingType
108 {
109     NORMAL = 0,
110     SPARSE,
111 };
112 
getCoordStr(const ImageType imageType,const std::string & x,const std::string & y,const std::string & z)113 static string getCoordStr(const ImageType imageType, const std::string &x, const std::string &y, const std::string &z)
114 {
115     switch (imageType)
116     {
117     case IMAGE_TYPE_1D:
118     case IMAGE_TYPE_BUFFER:
119         return x;
120     case IMAGE_TYPE_1D_ARRAY:
121     case IMAGE_TYPE_2D:
122         return string("ivec2(" + x + "," + y + ")");
123     case IMAGE_TYPE_2D_ARRAY:
124     case IMAGE_TYPE_3D:
125     case IMAGE_TYPE_CUBE:
126     case IMAGE_TYPE_CUBE_ARRAY:
127         return string("ivec3(" + x + "," + y + "," + z + ")");
128     default:
129         DE_ASSERT(false);
130         return "";
131     }
132 }
133 
getComponentTypeStr(uint32_t componentWidth,bool intFormat,bool uintFormat,bool floatFormat)134 static string getComponentTypeStr(uint32_t componentWidth, bool intFormat, bool uintFormat, bool floatFormat)
135 {
136     DE_ASSERT(intFormat || uintFormat || floatFormat);
137 
138     const bool is64 = (componentWidth == 64);
139 
140     if (intFormat)
141         return (is64 ? "int64_t" : "int");
142     if (uintFormat)
143         return (is64 ? "uint64_t" : "uint");
144     if (floatFormat)
145         return (is64 ? "double" : "float");
146 
147     return "";
148 }
149 
getVec4TypeStr(uint32_t componentWidth,bool intFormat,bool uintFormat,bool floatFormat)150 static string getVec4TypeStr(uint32_t componentWidth, bool intFormat, bool uintFormat, bool floatFormat)
151 {
152     DE_ASSERT(intFormat || uintFormat || floatFormat);
153 
154     const bool is64 = (componentWidth == 64);
155 
156     if (intFormat)
157         return (is64 ? "i64vec4" : "ivec4");
158     if (uintFormat)
159         return (is64 ? "u64vec4" : "uvec4");
160     if (floatFormat)
161         return (is64 ? "f64vec4" : "vec4");
162 
163     return "";
164 }
165 
getAtomicFuncArgumentShaderStr(const AtomicOperation op,const string & x,const string & y,const string & z,const IVec3 & gridSize)166 static string getAtomicFuncArgumentShaderStr(const AtomicOperation op, const string &x, const string &y,
167                                              const string &z, const IVec3 &gridSize)
168 {
169     switch (op)
170     {
171     case ATOMIC_OPERATION_ADD:
172     case ATOMIC_OPERATION_AND:
173     case ATOMIC_OPERATION_OR:
174     case ATOMIC_OPERATION_XOR:
175         return string("(" + x + "*" + x + " + " + y + "*" + y + " + " + z + "*" + z + ")");
176     case ATOMIC_OPERATION_MIN:
177     case ATOMIC_OPERATION_MAX:
178         // multiply by (1-2*(value % 2) to make half of the data negative
179         // this will result in generating large numbers for uint formats
180         return string("((1 - 2*(" + x + " % 2)) * (" + x + "*" + x + " + " + y + "*" + y + " + " + z + "*" + z + "))");
181     case ATOMIC_OPERATION_EXCHANGE:
182     case ATOMIC_OPERATION_COMPARE_EXCHANGE:
183         return string("((" + z + "*" + toString(gridSize.x()) + " + " + x + ")*" + toString(gridSize.y()) + " + " + y +
184                       ")");
185     default:
186         DE_ASSERT(false);
187         return "";
188     }
189 }
190 
getAtomicOperationCaseName(const AtomicOperation op)191 static string getAtomicOperationCaseName(const AtomicOperation op)
192 {
193     switch (op)
194     {
195     case ATOMIC_OPERATION_ADD:
196         return string("add");
197     case ATOMIC_OPERATION_SUB:
198         return string("sub");
199     case ATOMIC_OPERATION_INC:
200         return string("inc");
201     case ATOMIC_OPERATION_DEC:
202         return string("dec");
203     case ATOMIC_OPERATION_MIN:
204         return string("min");
205     case ATOMIC_OPERATION_MAX:
206         return string("max");
207     case ATOMIC_OPERATION_AND:
208         return string("and");
209     case ATOMIC_OPERATION_OR:
210         return string("or");
211     case ATOMIC_OPERATION_XOR:
212         return string("xor");
213     case ATOMIC_OPERATION_EXCHANGE:
214         return string("exchange");
215     case ATOMIC_OPERATION_COMPARE_EXCHANGE:
216         return string("compare_exchange");
217     default:
218         DE_ASSERT(false);
219         return "";
220     }
221 }
222 
getAtomicOperationShaderFuncName(const AtomicOperation op)223 static string getAtomicOperationShaderFuncName(const AtomicOperation op)
224 {
225     switch (op)
226     {
227     case ATOMIC_OPERATION_ADD:
228         return string("imageAtomicAdd");
229     case ATOMIC_OPERATION_MIN:
230         return string("imageAtomicMin");
231     case ATOMIC_OPERATION_MAX:
232         return string("imageAtomicMax");
233     case ATOMIC_OPERATION_AND:
234         return string("imageAtomicAnd");
235     case ATOMIC_OPERATION_OR:
236         return string("imageAtomicOr");
237     case ATOMIC_OPERATION_XOR:
238         return string("imageAtomicXor");
239     case ATOMIC_OPERATION_EXCHANGE:
240         return string("imageAtomicExchange");
241     case ATOMIC_OPERATION_COMPARE_EXCHANGE:
242         return string("imageAtomicCompSwap");
243     default:
244         DE_ASSERT(false);
245         return "";
246     }
247 }
248 
249 template <typename T>
getOperationInitialValue(const AtomicOperation op)250 T getOperationInitialValue(const AtomicOperation op)
251 {
252     switch (op)
253     {
254     // \note 18 is just an arbitrary small nonzero value.
255     case ATOMIC_OPERATION_ADD:
256         return 18;
257     case ATOMIC_OPERATION_INC:
258         return 18;
259     case ATOMIC_OPERATION_SUB:
260         return (1 << 24) - 1;
261     case ATOMIC_OPERATION_DEC:
262         return (1 << 24) - 1;
263     case ATOMIC_OPERATION_MIN:
264         return (1 << 15) - 1;
265     case ATOMIC_OPERATION_MAX:
266         return 18;
267     case ATOMIC_OPERATION_AND:
268         return (1 << 15) - 1;
269     case ATOMIC_OPERATION_OR:
270         return 18;
271     case ATOMIC_OPERATION_XOR:
272         return 18;
273     case ATOMIC_OPERATION_EXCHANGE:
274         return 18;
275     case ATOMIC_OPERATION_COMPARE_EXCHANGE:
276         return 18;
277     default:
278         DE_ASSERT(false);
279         return 0xFFFFFFFF;
280     }
281 }
282 
283 template <>
getOperationInitialValue(const AtomicOperation op)284 int64_t getOperationInitialValue<int64_t>(const AtomicOperation op)
285 {
286     switch (op)
287     {
288     // \note 0x000000BEFFFFFF18 is just an arbitrary nonzero value.
289     case ATOMIC_OPERATION_ADD:
290         return 0x000000BEFFFFFF18;
291     case ATOMIC_OPERATION_INC:
292         return 0x000000BEFFFFFF18;
293     case ATOMIC_OPERATION_SUB:
294         return (1ull << 56) - 1;
295     case ATOMIC_OPERATION_DEC:
296         return (1ull << 56) - 1;
297     case ATOMIC_OPERATION_MIN:
298         return (1ull << 47) - 1;
299     case ATOMIC_OPERATION_MAX:
300         return 0x000000BEFFFFFF18;
301     case ATOMIC_OPERATION_AND:
302         return (1ull << 47) - 1;
303     case ATOMIC_OPERATION_OR:
304         return 0x000000BEFFFFFF18;
305     case ATOMIC_OPERATION_XOR:
306         return 0x000000BEFFFFFF18;
307     case ATOMIC_OPERATION_EXCHANGE:
308         return 0x000000BEFFFFFF18;
309     case ATOMIC_OPERATION_COMPARE_EXCHANGE:
310         return 0x000000BEFFFFFF18;
311     default:
312         DE_ASSERT(false);
313         return 0xFFFFFFFFFFFFFFFF;
314     }
315 }
316 
317 template <>
getOperationInitialValue(const AtomicOperation op)318 uint64_t getOperationInitialValue<uint64_t>(const AtomicOperation op)
319 {
320     return (uint64_t)getOperationInitialValue<int64_t>(op);
321 }
322 
323 template <typename T>
getAtomicFuncArgument(const AtomicOperation op,const IVec3 & invocationID,const IVec3 & gridSize)324 static T getAtomicFuncArgument(const AtomicOperation op, const IVec3 &invocationID, const IVec3 &gridSize)
325 {
326     const T x = static_cast<T>(invocationID.x());
327     const T y = static_cast<T>(invocationID.y());
328     const T z = static_cast<T>(invocationID.z());
329 
330     switch (op)
331     {
332     // \note Fall-throughs.
333     case ATOMIC_OPERATION_ADD:
334     case ATOMIC_OPERATION_SUB:
335     case ATOMIC_OPERATION_AND:
336     case ATOMIC_OPERATION_OR:
337     case ATOMIC_OPERATION_XOR:
338         return x * x + y * y + z * z;
339     case ATOMIC_OPERATION_INC:
340     case ATOMIC_OPERATION_DEC:
341         return 1;
342     case ATOMIC_OPERATION_MIN:
343     case ATOMIC_OPERATION_MAX:
344         // multiply half of the data by -1
345         return (1 - 2 * (x % 2)) * (x * x + y * y + z * z);
346     case ATOMIC_OPERATION_EXCHANGE:
347     case ATOMIC_OPERATION_COMPARE_EXCHANGE:
348         return (z * static_cast<T>(gridSize.x()) + x) * static_cast<T>(gridSize.y()) + y;
349     default:
350         DE_ASSERT(false);
351         return -1;
352     }
353 }
354 
355 //! An order-independent operation is one for which the end result doesn't depend on the order in which the operations are carried (i.e. is both commutative and associative).
isOrderIndependentAtomicOperation(const AtomicOperation op)356 static bool isOrderIndependentAtomicOperation(const AtomicOperation op)
357 {
358     return op == ATOMIC_OPERATION_ADD || op == ATOMIC_OPERATION_SUB || op == ATOMIC_OPERATION_INC ||
359            op == ATOMIC_OPERATION_DEC || op == ATOMIC_OPERATION_MIN || op == ATOMIC_OPERATION_MAX ||
360            op == ATOMIC_OPERATION_AND || op == ATOMIC_OPERATION_OR || op == ATOMIC_OPERATION_XOR;
361 }
362 
363 //! Checks if the operation needs an SPIR-V shader.
isSpirvAtomicOperation(const AtomicOperation op)364 static bool isSpirvAtomicOperation(const AtomicOperation op)
365 {
366     return op == ATOMIC_OPERATION_SUB || op == ATOMIC_OPERATION_INC || op == ATOMIC_OPERATION_DEC;
367 }
368 
369 //! Returns the SPIR-V assembler name of the given operation.
getSpirvAtomicOpName(const AtomicOperation op)370 static std::string getSpirvAtomicOpName(const AtomicOperation op)
371 {
372     switch (op)
373     {
374     case ATOMIC_OPERATION_SUB:
375         return "OpAtomicISub";
376     case ATOMIC_OPERATION_INC:
377         return "OpAtomicIIncrement";
378     case ATOMIC_OPERATION_DEC:
379         return "OpAtomicIDecrement";
380     default:
381         break;
382     }
383 
384     DE_ASSERT(false);
385     return "";
386 }
387 
388 //! Returns true if the given SPIR-V operation does not need the last argument, compared to OpAtomicIAdd.
isSpirvAtomicNoLastArgOp(const AtomicOperation op)389 static bool isSpirvAtomicNoLastArgOp(const AtomicOperation op)
390 {
391     switch (op)
392     {
393     case ATOMIC_OPERATION_SUB:
394         return false;
395     case ATOMIC_OPERATION_INC: // fallthrough
396     case ATOMIC_OPERATION_DEC:
397         return true;
398     default:
399         break;
400     }
401 
402     DE_ASSERT(false);
403     return false;
404 }
405 
406 //! Computes the result of an atomic operation where "a" is the data operated on and "b" is the parameter to the atomic function.
407 template <typename T>
computeBinaryAtomicOperationResult(const AtomicOperation op,const T a,const T b)408 static T computeBinaryAtomicOperationResult(const AtomicOperation op, const T a, const T b)
409 {
410     switch (op)
411     {
412     case ATOMIC_OPERATION_INC: // fallthrough.
413     case ATOMIC_OPERATION_ADD:
414         return a + b;
415     case ATOMIC_OPERATION_DEC: // fallthrough.
416     case ATOMIC_OPERATION_SUB:
417         return a - b;
418     case ATOMIC_OPERATION_MIN:
419         return de::min(a, b);
420     case ATOMIC_OPERATION_MAX:
421         return de::max(a, b);
422     case ATOMIC_OPERATION_AND:
423         return a & b;
424     case ATOMIC_OPERATION_OR:
425         return a | b;
426     case ATOMIC_OPERATION_XOR:
427         return a ^ b;
428     case ATOMIC_OPERATION_EXCHANGE:
429         return b;
430     case ATOMIC_OPERATION_COMPARE_EXCHANGE:
431         return (a == (sizeof(T) == 8 ? 0xBEFFFFFF18 : 18)) ? b : a;
432     default:
433         DE_ASSERT(false);
434         return -1;
435     }
436 }
437 
getUsageFlags(bool useTransfer)438 VkImageUsageFlags getUsageFlags(bool useTransfer)
439 {
440     VkImageUsageFlags usageFlags = VK_IMAGE_USAGE_STORAGE_BIT;
441 
442     if (useTransfer)
443         usageFlags |= (VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT);
444 
445     return usageFlags;
446 }
447 
AddFillReadShader(SourceCollections & sourceCollections,const ImageType & imageType,const tcu::TextureFormat & format,const string & componentType,const string & vec4Type)448 void AddFillReadShader(SourceCollections &sourceCollections, const ImageType &imageType,
449                        const tcu::TextureFormat &format, const string &componentType, const string &vec4Type)
450 {
451     const string imageInCoord         = getCoordStr(imageType, "gx", "gy", "gz");
452     const string shaderImageFormatStr = getShaderImageFormatQualifier(format);
453     const string shaderImageTypeStr   = getShaderImageType(format, imageType);
454     const auto componentWidth         = getFormatComponentWidth(mapTextureFormat(format), 0u);
455     const string extensions =
456         ((componentWidth == 64u) ? "#extension GL_EXT_shader_explicit_arithmetic_types_int64 : require\n"
457                                    "#extension GL_EXT_shader_image_int64 : require\n" :
458                                    "");
459 
460     const string fillShader =
461         "#version 450\n" + extensions + "precision highp " + shaderImageTypeStr +
462         ";\n"
463         "\n"
464         "layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
465         "layout (" +
466         shaderImageFormatStr + ", binding=0) coherent uniform " + shaderImageTypeStr +
467         " u_resultImage;\n"
468         "\n"
469         "layout(std430, binding = 1) buffer inputBuffer\n"
470         "{\n"
471         "    " +
472         componentType +
473         " data[];\n"
474         "} inBuffer;\n"
475         "\n"
476         "void main(void)\n"
477         "{\n"
478         "    int gx = int(gl_GlobalInvocationID.x);\n"
479         "    int gy = int(gl_GlobalInvocationID.y);\n"
480         "    int gz = int(gl_GlobalInvocationID.z);\n"
481         "    uint index = gx + (gy * gl_NumWorkGroups.x) + (gz *gl_NumWorkGroups.x * gl_NumWorkGroups.y);\n"
482         "    imageStore(u_resultImage, " +
483         imageInCoord + ", " + vec4Type +
484         "(inBuffer.data[index]));\n"
485         "}\n";
486 
487     const string readShader =
488         "#version 450\n" + extensions + "precision highp " + shaderImageTypeStr +
489         ";\n"
490         "\n"
491         "layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
492         "layout (" +
493         shaderImageFormatStr + ", binding=0) coherent uniform " + shaderImageTypeStr +
494         " u_resultImage;\n"
495         "\n"
496         "layout(std430, binding = 1) buffer outputBuffer\n"
497         "{\n"
498         "    " +
499         componentType +
500         " data[];\n"
501         "} outBuffer;\n"
502         "\n"
503         "void main(void)\n"
504         "{\n"
505         "    int gx = int(gl_GlobalInvocationID.x);\n"
506         "    int gy = int(gl_GlobalInvocationID.y);\n"
507         "    int gz = int(gl_GlobalInvocationID.z);\n"
508         "    uint index = gx + (gy * gl_NumWorkGroups.x) + (gz *gl_NumWorkGroups.x * gl_NumWorkGroups.y);\n"
509         "    outBuffer.data[index] = imageLoad(u_resultImage, " +
510         imageInCoord +
511         ").x;\n"
512         "}\n";
513 
514     if ((imageType != IMAGE_TYPE_1D) && (imageType != IMAGE_TYPE_1D_ARRAY) && (imageType != IMAGE_TYPE_BUFFER))
515     {
516         const string readShaderResidency =
517             "#version 450\n"
518             "#extension GL_ARB_sparse_texture2 : require\n" +
519             extensions + "precision highp " + shaderImageTypeStr +
520             ";\n"
521             "\n"
522             "layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
523             "layout (" +
524             shaderImageFormatStr + ", binding=0) coherent uniform " + shaderImageTypeStr +
525             " u_resultImage;\n"
526             "\n"
527             "layout(std430, binding = 1) buffer outputBuffer\n"
528             "{\n"
529             "    " +
530             componentType +
531             " data[];\n"
532             "} outBuffer;\n"
533             "\n"
534             "void main(void)\n"
535             "{\n"
536             "    int gx = int(gl_GlobalInvocationID.x);\n"
537             "    int gy = int(gl_GlobalInvocationID.y);\n"
538             "    int gz = int(gl_GlobalInvocationID.z);\n"
539             "    uint index = gx + (gy * gl_NumWorkGroups.x) + (gz *gl_NumWorkGroups.x * gl_NumWorkGroups.y);\n"
540             "    outBuffer.data[index] = imageLoad(u_resultImage, " +
541             imageInCoord +
542             ").x;\n"
543             "    " +
544             vec4Type +
545             " sparseValue;\n"
546             "    sparseImageLoadARB(u_resultImage, " +
547             imageInCoord +
548             ", sparseValue);\n"
549             "    if (outBuffer.data[index] != sparseValue.x)\n"
550             "        outBuffer.data[index] = " +
551             vec4Type +
552             "(1234).x;\n"
553             "}\n";
554 
555         sourceCollections.glslSources.add("readShaderResidency")
556             << glu::ComputeSource(readShaderResidency.c_str())
557             << vk::ShaderBuildOptions(sourceCollections.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
558     }
559 
560     sourceCollections.glslSources.add("fillShader")
561         << glu::ComputeSource(fillShader.c_str())
562         << vk::ShaderBuildOptions(sourceCollections.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
563     sourceCollections.glslSources.add("readShader")
564         << glu::ComputeSource(readShader.c_str())
565         << vk::ShaderBuildOptions(sourceCollections.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
566 }
567 
568 //! Prepare the initial data for the image
initDataForImage(const VkDevice device,const DeviceInterface & deviceInterface,const TextureFormat & format,const AtomicOperation operation,const tcu::UVec3 & gridSize,BufferWithMemory & buffer)569 static void initDataForImage(const VkDevice device, const DeviceInterface &deviceInterface, const TextureFormat &format,
570                              const AtomicOperation operation, const tcu::UVec3 &gridSize, BufferWithMemory &buffer)
571 {
572     Allocation &bufferAllocation = buffer.getAllocation();
573     const VkFormat imageFormat   = mapTextureFormat(format);
574     tcu::PixelBufferAccess pixelBuffer(format, gridSize.x(), gridSize.y(), gridSize.z(), bufferAllocation.getHostPtr());
575 
576     if (imageFormat == VK_FORMAT_R64_UINT || imageFormat == VK_FORMAT_R64_SINT)
577     {
578         const int64_t initialValue(getOperationInitialValue<int64_t>(operation));
579 
580         for (uint32_t z = 0; z < gridSize.z(); z++)
581             for (uint32_t y = 0; y < gridSize.y(); y++)
582                 for (uint32_t x = 0; x < gridSize.x(); x++)
583                 {
584                     *((int64_t *)pixelBuffer.getPixelPtr(x, y, z)) = initialValue;
585                 }
586     }
587     else
588     {
589         const tcu::IVec4 initialValue(getOperationInitialValue<int32_t>(operation));
590 
591         for (uint32_t z = 0; z < gridSize.z(); z++)
592             for (uint32_t y = 0; y < gridSize.y(); y++)
593                 for (uint32_t x = 0; x < gridSize.x(); x++)
594                 {
595                     pixelBuffer.setPixel(initialValue, x, y, z);
596                 }
597     }
598 
599     flushAlloc(deviceInterface, device, bufferAllocation);
600 }
601 
commonCheckSupport(Context & context,const tcu::TextureFormat & tcuFormat,VkImageTiling tiling,ImageType imageType,const tcu::UVec3 & imageSize,AtomicOperation operation,bool useTransfer,ShaderReadType readType,ImageBackingType backingType)602 void commonCheckSupport(Context &context, const tcu::TextureFormat &tcuFormat, VkImageTiling tiling,
603                         ImageType imageType, const tcu::UVec3 &imageSize, AtomicOperation operation, bool useTransfer,
604                         ShaderReadType readType, ImageBackingType backingType)
605 {
606     const VkFormat format       = mapTextureFormat(tcuFormat);
607     const VkImageType vkImgType = mapImageType(imageType);
608     const VkFormatFeatureFlags texelBufferSupport =
609         (VK_FORMAT_FEATURE_STORAGE_TEXEL_BUFFER_BIT | VK_FORMAT_FEATURE_STORAGE_TEXEL_BUFFER_ATOMIC_BIT);
610 
611     const auto &vki           = context.getInstanceInterface();
612     const auto physicalDevice = context.getPhysicalDevice();
613     const auto usageFlags     = getUsageFlags(useTransfer);
614 
615     VkImageFormatProperties vkImageFormatProperties;
616     const auto result = vki.getPhysicalDeviceImageFormatProperties(physicalDevice, format, vkImgType, tiling,
617                                                                    usageFlags, 0, &vkImageFormatProperties);
618     if (result != VK_SUCCESS)
619     {
620         if (result == VK_ERROR_FORMAT_NOT_SUPPORTED)
621             TCU_THROW(NotSupportedError, "Format unsupported for tiling");
622         else
623             TCU_FAIL("vkGetPhysicalDeviceImageFormatProperties returned unexpected error");
624     }
625 
626     if (vkImageFormatProperties.maxArrayLayers < (uint32_t)getNumLayers(imageType, imageSize))
627     {
628         TCU_THROW(NotSupportedError, "This format and tiling combination does not support this number of aray layers");
629     }
630 
631     const VkFormatProperties formatProperties =
632         getPhysicalDeviceFormatProperties(context.getInstanceInterface(), context.getPhysicalDevice(), format);
633     if ((imageType == IMAGE_TYPE_BUFFER) &&
634         ((formatProperties.bufferFeatures & texelBufferSupport) != texelBufferSupport))
635         TCU_THROW(NotSupportedError, "Atomic storage texel buffers not supported");
636 
637     const VkFormatFeatureFlags requiredFeaturesLinear =
638         (VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT | VK_FORMAT_FEATURE_STORAGE_IMAGE_ATOMIC_BIT);
639     if (tiling == vk::VK_IMAGE_TILING_LINEAR &&
640         ((formatProperties.linearTilingFeatures & requiredFeaturesLinear) != requiredFeaturesLinear))
641     {
642         TCU_THROW(NotSupportedError, "Format doesn't support atomic storage with linear tiling");
643     }
644 
645     if (imageType == IMAGE_TYPE_CUBE_ARRAY)
646         context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_IMAGE_CUBE_ARRAY);
647 
648 #ifndef CTS_USES_VULKANSC
649     if (backingType == ImageBackingType::SPARSE)
650     {
651         context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_SPARSE_BINDING);
652 
653         switch (vkImgType)
654         {
655         case VK_IMAGE_TYPE_2D:
656             context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_SPARSE_RESIDENCY_IMAGE2D);
657             break;
658         case VK_IMAGE_TYPE_3D:
659             context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_SPARSE_RESIDENCY_IMAGE3D);
660             break;
661         default:
662             DE_ASSERT(false);
663             break;
664         }
665 
666         if (!checkSparseImageFormatSupport(context.getPhysicalDevice(), context.getInstanceInterface(), format,
667                                            vkImgType, VK_SAMPLE_COUNT_1_BIT, usageFlags, tiling))
668             TCU_THROW(NotSupportedError, "Format does not support sparse images");
669     }
670 #endif // CTS_USES_VULKANSC
671 
672     if (isFloatFormat(format))
673     {
674         context.requireDeviceFunctionality("VK_EXT_shader_atomic_float");
675 
676         const VkFormatFeatureFlags requiredFeatures =
677             (VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT | VK_FORMAT_FEATURE_STORAGE_IMAGE_ATOMIC_BIT);
678         const auto &atomicFloatFeatures = context.getShaderAtomicFloatFeaturesEXT();
679 
680         if (!atomicFloatFeatures.shaderImageFloat32Atomics)
681             TCU_THROW(NotSupportedError, "shaderImageFloat32Atomics not supported");
682 
683         if ((operation == ATOMIC_OPERATION_ADD) && !atomicFloatFeatures.shaderImageFloat32AtomicAdd)
684             TCU_THROW(NotSupportedError, "shaderImageFloat32AtomicAdd not supported");
685 
686         if (operation == ATOMIC_OPERATION_MIN || operation == ATOMIC_OPERATION_MAX)
687         {
688             context.requireDeviceFunctionality("VK_EXT_shader_atomic_float2");
689 #ifndef CTS_USES_VULKANSC
690             if (!context.getShaderAtomicFloat2FeaturesEXT().shaderImageFloat32AtomicMinMax)
691             {
692                 TCU_THROW(NotSupportedError, "shaderImageFloat32AtomicMinMax not supported");
693             }
694 #endif // CTS_USES_VULKANSC
695         }
696 
697         if ((formatProperties.optimalTilingFeatures & requiredFeatures) != requiredFeatures)
698             TCU_FAIL("Required format feature bits not supported");
699 
700         if (backingType == ImageBackingType::SPARSE)
701         {
702             if (!atomicFloatFeatures.sparseImageFloat32Atomics)
703                 TCU_THROW(NotSupportedError, "sparseImageFloat32Atomics not supported");
704 
705             if (operation == ATOMIC_OPERATION_ADD && !atomicFloatFeatures.sparseImageFloat32AtomicAdd)
706                 TCU_THROW(NotSupportedError, "sparseImageFloat32AtomicAdd not supported");
707         }
708     }
709     else if (format == VK_FORMAT_R64_UINT || format == VK_FORMAT_R64_SINT)
710     {
711         context.requireDeviceFunctionality("VK_EXT_shader_image_atomic_int64");
712 
713         const VkFormatFeatureFlags requiredFeatures =
714             (VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT | VK_FORMAT_FEATURE_STORAGE_IMAGE_ATOMIC_BIT);
715         const auto &atomicInt64Features = context.getShaderImageAtomicInt64FeaturesEXT();
716 
717         if (!atomicInt64Features.shaderImageInt64Atomics)
718             TCU_THROW(NotSupportedError, "shaderImageInt64Atomics not supported");
719 
720         if (backingType == ImageBackingType::SPARSE && !atomicInt64Features.sparseImageInt64Atomics)
721             TCU_THROW(NotSupportedError, "sparseImageInt64Atomics not supported");
722 
723         if ((formatProperties.optimalTilingFeatures & requiredFeatures) != requiredFeatures)
724             TCU_FAIL("Mandatory format features not supported");
725     }
726 
727     if (useTransfer)
728     {
729         const VkFormatFeatureFlags transferFeatures =
730             (VK_FORMAT_FEATURE_TRANSFER_SRC_BIT | VK_FORMAT_FEATURE_TRANSFER_DST_BIT);
731         if ((formatProperties.optimalTilingFeatures & transferFeatures) != transferFeatures)
732             TCU_THROW(NotSupportedError, "Transfer features not supported for this format");
733     }
734 
735     if (readType == ShaderReadType::SPARSE)
736     {
737         DE_ASSERT(imageType != IMAGE_TYPE_1D && imageType != IMAGE_TYPE_1D_ARRAY && imageType != IMAGE_TYPE_BUFFER);
738         context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_SHADER_RESOURCE_RESIDENCY);
739     }
740 }
741 
742 class BinaryAtomicEndResultCase : public vkt::TestCase
743 {
744 public:
745     BinaryAtomicEndResultCase(tcu::TestContext &testCtx, const string &name, const ImageType imageType,
746                               const tcu::UVec3 &imageSize, const tcu::TextureFormat &format, const VkImageTiling tiling,
747                               const AtomicOperation operation, const bool useTransfer,
748                               const ShaderReadType shaderReadType, const ImageBackingType backingType,
749                               const glu::GLSLVersion glslVersion);
750 
751     void initPrograms(SourceCollections &sourceCollections) const;
752     TestInstance *createInstance(Context &context) const;
753     virtual void checkSupport(Context &context) const;
754 
755 private:
756     const ImageType m_imageType;
757     const tcu::UVec3 m_imageSize;
758     const tcu::TextureFormat m_format;
759     const VkImageTiling m_tiling;
760     const AtomicOperation m_operation;
761     const bool m_useTransfer;
762     const ShaderReadType m_readType;
763     const ImageBackingType m_backingType;
764     const glu::GLSLVersion m_glslVersion;
765 };
766 
BinaryAtomicEndResultCase(tcu::TestContext & testCtx,const string & name,const ImageType imageType,const tcu::UVec3 & imageSize,const tcu::TextureFormat & format,const VkImageTiling tiling,const AtomicOperation operation,const bool useTransfer,const ShaderReadType shaderReadType,const ImageBackingType backingType,const glu::GLSLVersion glslVersion)767 BinaryAtomicEndResultCase::BinaryAtomicEndResultCase(tcu::TestContext &testCtx, const string &name,
768                                                      const ImageType imageType, const tcu::UVec3 &imageSize,
769                                                      const tcu::TextureFormat &format, const VkImageTiling tiling,
770                                                      const AtomicOperation operation, const bool useTransfer,
771                                                      const ShaderReadType shaderReadType,
772                                                      const ImageBackingType backingType,
773                                                      const glu::GLSLVersion glslVersion)
774     : TestCase(testCtx, name)
775     , m_imageType(imageType)
776     , m_imageSize(imageSize)
777     , m_format(format)
778     , m_tiling(tiling)
779     , m_operation(operation)
780     , m_useTransfer(useTransfer)
781     , m_readType(shaderReadType)
782     , m_backingType(backingType)
783     , m_glslVersion(glslVersion)
784 {
785 }
786 
checkSupport(Context & context) const787 void BinaryAtomicEndResultCase::checkSupport(Context &context) const
788 {
789     commonCheckSupport(context, m_format, m_tiling, m_imageType, m_imageSize, m_operation, m_useTransfer, m_readType,
790                        m_backingType);
791 }
792 
initPrograms(SourceCollections & sourceCollections) const793 void BinaryAtomicEndResultCase::initPrograms(SourceCollections &sourceCollections) const
794 {
795     const VkFormat imageFormat    = mapTextureFormat(m_format);
796     const uint32_t componentWidth = getFormatComponentWidth(imageFormat, 0);
797     const bool intFormat          = isIntFormat(imageFormat);
798     const bool uintFormat         = isUintFormat(imageFormat);
799     const bool floatFormat        = isFloatFormat(imageFormat);
800     const string type             = getComponentTypeStr(componentWidth, intFormat, uintFormat, floatFormat);
801     const string vec4Type         = getVec4TypeStr(componentWidth, intFormat, uintFormat, floatFormat);
802 
803     AddFillReadShader(sourceCollections, m_imageType, m_format, type, vec4Type);
804 
805     if (isSpirvAtomicOperation(m_operation))
806     {
807         const CaseVariant caseVariant{m_imageType, m_format.order, m_format.type, CaseVariant::CHECK_TYPE_END_RESULTS};
808         const tcu::StringTemplate shaderTemplate{getSpirvAtomicOpShader(caseVariant)};
809         std::map<std::string, std::string> specializations;
810 
811         specializations["OPNAME"] = getSpirvAtomicOpName(m_operation);
812         if (isSpirvAtomicNoLastArgOp(m_operation))
813             specializations["LASTARG"] = "";
814 
815         sourceCollections.spirvAsmSources.add(m_name) << shaderTemplate.specialize(specializations);
816     }
817     else
818     {
819         const string versionDecl = glu::getGLSLVersionDeclaration(m_glslVersion);
820 
821         const UVec3 gridSize     = getShaderGridSize(m_imageType, m_imageSize);
822         const string atomicCoord = getCoordStr(m_imageType, "gx % " + toString(gridSize.x()), "gy", "gz");
823 
824         const string atomicArgExpr =
825             type +
826             getAtomicFuncArgumentShaderStr(m_operation, "gx", "gy", "gz",
827                                            IVec3(NUM_INVOCATIONS_PER_PIXEL * gridSize.x(), gridSize.y(), gridSize.z()));
828 
829         const string compareExchangeStr = (m_operation == ATOMIC_OPERATION_COMPARE_EXCHANGE) ?
830                                               (componentWidth == 64 ? ", 820338753304" : ", 18") +
831                                                   string(uintFormat ? "u" : "") +
832                                                   string(componentWidth == 64 ? "l" : "") :
833                                               "";
834         const string atomicInvocation   = getAtomicOperationShaderFuncName(m_operation) + "(u_resultImage, " +
835                                         atomicCoord + compareExchangeStr + ", " + atomicArgExpr + ")";
836         const string shaderImageFormatStr = getShaderImageFormatQualifier(m_format);
837         const string shaderImageTypeStr   = getShaderImageType(m_format, m_imageType);
838         const string extensions           = "#extension GL_EXT_shader_atomic_float : enable\n"
839                                             "#extension GL_EXT_shader_atomic_float2 : enable\n"
840                                             "#extension GL_KHR_memory_scope_semantics : enable";
841 
842         string source = versionDecl + "\n" + extensions + "\n";
843 
844         if (64 == componentWidth)
845         {
846             source += "#extension GL_EXT_shader_explicit_arithmetic_types_int64 : require\n"
847                       "#extension GL_EXT_shader_image_int64 : require\n";
848         }
849 
850         source += "precision highp " + shaderImageTypeStr +
851                   ";\n"
852                   "\n"
853                   "layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
854                   "layout (" +
855                   shaderImageFormatStr + ", binding=0) coherent uniform " + shaderImageTypeStr +
856                   " u_resultImage;\n"
857                   "\n"
858                   "void main (void)\n"
859                   "{\n"
860                   "    int gx = int(gl_GlobalInvocationID.x);\n"
861                   "    int gy = int(gl_GlobalInvocationID.y);\n"
862                   "    int gz = int(gl_GlobalInvocationID.z);\n"
863                   "    " +
864                   atomicInvocation +
865                   ";\n"
866                   "}\n";
867 
868         sourceCollections.glslSources.add(m_name) << glu::ComputeSource(source.c_str());
869     }
870 }
871 
872 class BinaryAtomicIntermValuesCase : public vkt::TestCase
873 {
874 public:
875     BinaryAtomicIntermValuesCase(tcu::TestContext &testCtx, const string &name, const ImageType imageType,
876                                  const tcu::UVec3 &imageSize, const tcu::TextureFormat &format,
877                                  const VkImageTiling tiling, const AtomicOperation operation, const bool useTransfer,
878                                  const ShaderReadType shaderReadType, const ImageBackingType backingType,
879                                  const glu::GLSLVersion glslVersion);
880 
881     void initPrograms(SourceCollections &sourceCollections) const;
882     TestInstance *createInstance(Context &context) const;
883     virtual void checkSupport(Context &context) const;
884 
885 private:
886     const ImageType m_imageType;
887     const tcu::UVec3 m_imageSize;
888     const tcu::TextureFormat m_format;
889     const VkImageTiling m_tiling;
890     const AtomicOperation m_operation;
891     const bool m_useTransfer;
892     const ShaderReadType m_readType;
893     const ImageBackingType m_backingType;
894     const glu::GLSLVersion m_glslVersion;
895 };
896 
BinaryAtomicIntermValuesCase(TestContext & testCtx,const string & name,const ImageType imageType,const tcu::UVec3 & imageSize,const TextureFormat & format,const VkImageTiling tiling,const AtomicOperation operation,const bool useTransfer,const ShaderReadType shaderReadType,const ImageBackingType backingType,const glu::GLSLVersion glslVersion)897 BinaryAtomicIntermValuesCase::BinaryAtomicIntermValuesCase(
898     TestContext &testCtx, const string &name, const ImageType imageType, const tcu::UVec3 &imageSize,
899     const TextureFormat &format, const VkImageTiling tiling, const AtomicOperation operation, const bool useTransfer,
900     const ShaderReadType shaderReadType, const ImageBackingType backingType, const glu::GLSLVersion glslVersion)
901     : TestCase(testCtx, name)
902     , m_imageType(imageType)
903     , m_imageSize(imageSize)
904     , m_format(format)
905     , m_tiling(tiling)
906     , m_operation(operation)
907     , m_useTransfer(useTransfer)
908     , m_readType(shaderReadType)
909     , m_backingType(backingType)
910     , m_glslVersion(glslVersion)
911 {
912 }
913 
checkSupport(Context & context) const914 void BinaryAtomicIntermValuesCase::checkSupport(Context &context) const
915 {
916     commonCheckSupport(context, m_format, m_tiling, m_imageType, m_imageSize, m_operation, m_useTransfer, m_readType,
917                        m_backingType);
918 }
919 
initPrograms(SourceCollections & sourceCollections) const920 void BinaryAtomicIntermValuesCase::initPrograms(SourceCollections &sourceCollections) const
921 {
922     const VkFormat imageFormat    = mapTextureFormat(m_format);
923     const uint32_t componentWidth = getFormatComponentWidth(imageFormat, 0);
924     const bool intFormat          = isIntFormat(imageFormat);
925     const bool uintFormat         = isUintFormat(imageFormat);
926     const bool floatFormat        = isFloatFormat(imageFormat);
927     const string type             = getComponentTypeStr(componentWidth, intFormat, uintFormat, floatFormat);
928     const string vec4Type         = getVec4TypeStr(componentWidth, intFormat, uintFormat, floatFormat);
929 
930     AddFillReadShader(sourceCollections, m_imageType, m_format, type, vec4Type);
931 
932     if (isSpirvAtomicOperation(m_operation))
933     {
934         const CaseVariant caseVariant{m_imageType, m_format.order, m_format.type,
935                                       CaseVariant::CHECK_TYPE_INTERMEDIATE_RESULTS};
936         const tcu::StringTemplate shaderTemplate{getSpirvAtomicOpShader(caseVariant)};
937         std::map<std::string, std::string> specializations;
938 
939         specializations["OPNAME"] = getSpirvAtomicOpName(m_operation);
940         if (isSpirvAtomicNoLastArgOp(m_operation))
941             specializations["LASTARG"] = "";
942 
943         sourceCollections.spirvAsmSources.add(m_name) << shaderTemplate.specialize(specializations);
944     }
945     else
946     {
947         const string versionDecl     = glu::getGLSLVersionDeclaration(m_glslVersion);
948         const UVec3 gridSize         = getShaderGridSize(m_imageType, m_imageSize);
949         const string atomicCoord     = getCoordStr(m_imageType, "gx % " + toString(gridSize.x()), "gy", "gz");
950         const string invocationCoord = getCoordStr(m_imageType, "gx", "gy", "gz");
951         const string atomicArgExpr =
952             type +
953             getAtomicFuncArgumentShaderStr(m_operation, "gx", "gy", "gz",
954                                            IVec3(NUM_INVOCATIONS_PER_PIXEL * gridSize.x(), gridSize.y(), gridSize.z()));
955 
956         const string compareExchangeStr = (m_operation == ATOMIC_OPERATION_COMPARE_EXCHANGE) ?
957                                               (componentWidth == 64 ? ", 820338753304" : ", 18") +
958                                                   string(uintFormat ? "u" : "") +
959                                                   string(componentWidth == 64 ? "l" : "") :
960                                               "";
961         const string atomicInvocation   = getAtomicOperationShaderFuncName(m_operation) + "(u_resultImage, " +
962                                         atomicCoord + compareExchangeStr + ", " + atomicArgExpr + ")";
963         const string shaderImageFormatStr = getShaderImageFormatQualifier(m_format);
964         const string shaderImageTypeStr   = getShaderImageType(m_format, m_imageType);
965         const string extensions           = "#extension GL_EXT_shader_atomic_float : enable\n"
966                                             "#extension GL_EXT_shader_atomic_float2 : enable\n"
967                                             "#extension GL_KHR_memory_scope_semantics : enable";
968 
969         string source = versionDecl + "\n" + extensions +
970                         "\n"
971                         "\n";
972 
973         if (64 == componentWidth)
974         {
975             source += "#extension GL_EXT_shader_explicit_arithmetic_types_int64 : require\n"
976                       "#extension GL_EXT_shader_image_int64 : require\n";
977         }
978 
979         source += "precision highp " + shaderImageTypeStr +
980                   "; \n"
981                   "layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
982                   "layout (" +
983                   shaderImageFormatStr + ", binding=0) coherent uniform " + shaderImageTypeStr +
984                   " u_resultImage;\n"
985                   "layout (" +
986                   shaderImageFormatStr + ", binding=1) writeonly uniform " + shaderImageTypeStr +
987                   " u_intermValuesImage;\n"
988                   "\n"
989                   "void main (void)\n"
990                   "{\n"
991                   "    int gx = int(gl_GlobalInvocationID.x);\n"
992                   "    int gy = int(gl_GlobalInvocationID.y);\n"
993                   "    int gz = int(gl_GlobalInvocationID.z);\n"
994                   "    imageStore(u_intermValuesImage, " +
995                   invocationCoord + ", " + vec4Type + "(" + atomicInvocation +
996                   "));\n"
997                   "}\n";
998 
999         sourceCollections.glslSources.add(m_name) << glu::ComputeSource(source.c_str());
1000     }
1001 }
1002 
1003 class BinaryAtomicInstanceBase : public vkt::TestInstance
1004 {
1005 public:
1006     BinaryAtomicInstanceBase(Context &context, const string &name, const ImageType imageType,
1007                              const tcu::UVec3 &imageSize, const TextureFormat &format, const VkImageTiling tiling,
1008                              const AtomicOperation operation, const bool useTransfer,
1009                              const ShaderReadType shaderReadType, const ImageBackingType backingType);
1010 
1011     tcu::TestStatus iterate(void);
1012 
1013     virtual uint32_t getOutputBufferSize(void) const = 0;
1014 
1015     virtual void prepareResources(const bool useTransfer)     = 0;
1016     virtual void prepareDescriptors(const bool isTexelBuffer) = 0;
1017 
1018     virtual void commandsBeforeCompute(const VkCommandBuffer cmdBuffer) const            = 0;
1019     virtual void commandsAfterCompute(const VkCommandBuffer cmdBuffer, const VkPipeline pipeline,
1020                                       const VkPipelineLayout pipelineLayout, const VkDescriptorSet descriptorSet,
1021                                       const VkDeviceSize &range, const bool useTransfer) = 0;
1022 
1023     virtual bool verifyResult(Allocation &outputBufferAllocation, const bool is64Bit) const = 0;
1024 
1025 protected:
1026     void shaderFillImage(const VkCommandBuffer cmdBuffer, const VkBuffer &buffer, const VkPipeline pipeline,
1027                          const VkPipelineLayout pipelineLayout, const VkDescriptorSet descriptorSet,
1028                          const VkDeviceSize &range, const tcu::UVec3 &gridSize);
1029 
1030     void createImageAndView(VkFormat imageFormat, const tcu::UVec3 &imageExent, bool useTransfer,
1031                             de::MovePtr<Image> &imagePtr, Move<VkImageView> &imageViewPtr);
1032 
1033     void createImageResources(const VkFormat &imageFormat, const bool useTransfer);
1034 
1035     const string m_name;
1036     const ImageType m_imageType;
1037     const tcu::UVec3 m_imageSize;
1038     const TextureFormat m_format;
1039     const VkImageTiling m_tiling;
1040     const AtomicOperation m_operation;
1041     const bool m_useTransfer;
1042     const ShaderReadType m_readType;
1043     const ImageBackingType m_backingType;
1044 
1045     de::MovePtr<BufferWithMemory> m_inputBuffer;
1046     de::MovePtr<BufferWithMemory> m_outputBuffer;
1047     Move<VkBufferView> m_descResultBufferView;
1048     Move<VkBufferView> m_descIntermResultsBufferView;
1049     Move<VkDescriptorPool> m_descriptorPool;
1050     Move<VkDescriptorSetLayout> m_descriptorSetLayout;
1051     Move<VkDescriptorSet> m_descriptorSet;
1052 
1053     Move<VkDescriptorSetLayout> m_descriptorSetLayoutNoTransfer;
1054     Move<VkDescriptorPool> m_descriptorPoolNoTransfer;
1055 
1056     de::MovePtr<Image> m_resultImage;
1057     Move<VkImageView> m_resultImageView;
1058 
1059     std::vector<VkSemaphore> m_waitSemaphores;
1060 };
1061 
BinaryAtomicInstanceBase(Context & context,const string & name,const ImageType imageType,const tcu::UVec3 & imageSize,const TextureFormat & format,const VkImageTiling tiling,const AtomicOperation operation,const bool useTransfer,const ShaderReadType shaderReadType,const ImageBackingType backingType)1062 BinaryAtomicInstanceBase::BinaryAtomicInstanceBase(Context &context, const string &name, const ImageType imageType,
1063                                                    const tcu::UVec3 &imageSize, const TextureFormat &format,
1064                                                    const VkImageTiling tiling, const AtomicOperation operation,
1065                                                    const bool useTransfer, const ShaderReadType shaderReadType,
1066                                                    const ImageBackingType backingType)
1067     : vkt::TestInstance(context)
1068     , m_name(name)
1069     , m_imageType(imageType)
1070     , m_imageSize(imageSize)
1071     , m_format(format)
1072     , m_tiling(tiling)
1073     , m_operation(operation)
1074     , m_useTransfer(useTransfer)
1075     , m_readType(shaderReadType)
1076     , m_backingType(backingType)
1077 {
1078 }
1079 
iterate(void)1080 tcu::TestStatus BinaryAtomicInstanceBase::iterate(void)
1081 {
1082     const VkDevice device                  = m_context.getDevice();
1083     const DeviceInterface &deviceInterface = m_context.getDeviceInterface();
1084     const VkQueue queue                    = m_context.getUniversalQueue();
1085     const uint32_t queueFamilyIndex        = m_context.getUniversalQueueFamilyIndex();
1086     Allocator &allocator                   = m_context.getDefaultAllocator();
1087     const VkDeviceSize imageSizeInBytes    = tcu::getPixelSize(m_format) * getNumPixels(m_imageType, m_imageSize);
1088     const VkDeviceSize outBuffSizeInBytes  = getOutputBufferSize();
1089     const VkFormat imageFormat             = mapTextureFormat(m_format);
1090     const bool isTexelBuffer               = (m_imageType == IMAGE_TYPE_BUFFER);
1091 
1092     if (!isTexelBuffer)
1093     {
1094         createImageResources(imageFormat, m_useTransfer);
1095     }
1096 
1097     tcu::UVec3 gridSize = getShaderGridSize(m_imageType, m_imageSize);
1098 
1099     //Prepare the buffer with the initial data for the image
1100     m_inputBuffer = de::MovePtr<BufferWithMemory>(new BufferWithMemory(
1101         deviceInterface, device, allocator,
1102         makeBufferCreateInfo(imageSizeInBytes, VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
1103                                                    (isTexelBuffer ? VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT :
1104                                                                     static_cast<VkBufferUsageFlagBits>(0u))),
1105         MemoryRequirement::HostVisible));
1106 
1107     // Fill in buffer with initial data used for image.
1108     initDataForImage(device, deviceInterface, m_format, m_operation, gridSize, *m_inputBuffer);
1109 
1110     // Create a buffer to store shader output copied from result image
1111     m_outputBuffer = de::MovePtr<BufferWithMemory>(new BufferWithMemory(
1112         deviceInterface, device, allocator,
1113         makeBufferCreateInfo(outBuffSizeInBytes, VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
1114                                                      (isTexelBuffer ? VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT :
1115                                                                       static_cast<VkBufferUsageFlagBits>(0u))),
1116         MemoryRequirement::HostVisible));
1117 
1118     if (!isTexelBuffer)
1119     {
1120         prepareResources(m_useTransfer);
1121     }
1122 
1123     prepareDescriptors(isTexelBuffer);
1124 
1125     Move<VkDescriptorSet> descriptorSetFillImage;
1126     Move<VkShaderModule> shaderModuleFillImage;
1127     Move<VkPipelineLayout> pipelineLayoutFillImage;
1128     Move<VkPipeline> pipelineFillImage;
1129 
1130     Move<VkDescriptorSet> descriptorSetReadImage;
1131     Move<VkShaderModule> shaderModuleReadImage;
1132     Move<VkPipelineLayout> pipelineLayoutReadImage;
1133     Move<VkPipeline> pipelineReadImage;
1134 
1135     if (!m_useTransfer)
1136     {
1137         m_descriptorSetLayoutNoTransfer =
1138             DescriptorSetLayoutBuilder()
1139                 .addSingleBinding(
1140                     (isTexelBuffer ? VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER : VK_DESCRIPTOR_TYPE_STORAGE_IMAGE),
1141                     VK_SHADER_STAGE_COMPUTE_BIT)
1142                 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
1143                 .build(deviceInterface, device);
1144 
1145         m_descriptorPoolNoTransfer =
1146             DescriptorPoolBuilder()
1147                 .addType((isTexelBuffer ? VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER : VK_DESCRIPTOR_TYPE_STORAGE_IMAGE),
1148                          2)
1149                 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 2)
1150                 .build(deviceInterface, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 2u);
1151 
1152         descriptorSetFillImage =
1153             makeDescriptorSet(deviceInterface, device, *m_descriptorPoolNoTransfer, *m_descriptorSetLayoutNoTransfer);
1154 
1155         descriptorSetReadImage =
1156             makeDescriptorSet(deviceInterface, device, *m_descriptorPoolNoTransfer, *m_descriptorSetLayoutNoTransfer);
1157 
1158         shaderModuleFillImage =
1159             createShaderModule(deviceInterface, device, m_context.getBinaryCollection().get("fillShader"), 0);
1160         pipelineLayoutFillImage = makePipelineLayout(deviceInterface, device, *m_descriptorSetLayoutNoTransfer);
1161         pipelineFillImage =
1162             makeComputePipeline(deviceInterface, device, *pipelineLayoutFillImage, *shaderModuleFillImage);
1163 
1164         if (m_readType == ShaderReadType::SPARSE)
1165         {
1166             shaderModuleReadImage = createShaderModule(deviceInterface, device,
1167                                                        m_context.getBinaryCollection().get("readShaderResidency"), 0);
1168         }
1169         else
1170         {
1171             shaderModuleReadImage =
1172                 createShaderModule(deviceInterface, device, m_context.getBinaryCollection().get("readShader"), 0);
1173         }
1174         pipelineLayoutReadImage = makePipelineLayout(deviceInterface, device, *m_descriptorSetLayoutNoTransfer);
1175         pipelineReadImage =
1176             makeComputePipeline(deviceInterface, device, *pipelineLayoutFillImage, *shaderModuleReadImage);
1177     }
1178 
1179     // Create pipeline
1180     const Unique<VkShaderModule> shaderModule(
1181         createShaderModule(deviceInterface, device, m_context.getBinaryCollection().get(m_name), 0));
1182     const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(deviceInterface, device, *m_descriptorSetLayout));
1183     const Unique<VkPipeline> pipeline(makeComputePipeline(deviceInterface, device, *pipelineLayout, *shaderModule));
1184 
1185     // Create command buffer
1186     const Unique<VkCommandPool> cmdPool(
1187         createCommandPool(deviceInterface, device, VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, queueFamilyIndex));
1188     const Unique<VkCommandBuffer> cmdBuffer(
1189         allocateCommandBuffer(deviceInterface, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
1190 
1191     beginCommandBuffer(deviceInterface, *cmdBuffer);
1192 
1193     if (!isTexelBuffer)
1194     {
1195         if (m_useTransfer)
1196         {
1197             const vector<VkBufferImageCopy> bufferImageCopy(
1198                 1, makeBufferImageCopy(makeExtent3D(getLayerSize(m_imageType, m_imageSize)),
1199                                        getNumLayers(m_imageType, m_imageSize)));
1200             copyBufferToImage(deviceInterface, *cmdBuffer, *(*m_inputBuffer), imageSizeInBytes, bufferImageCopy,
1201                               VK_IMAGE_ASPECT_COLOR_BIT, 1, getNumLayers(m_imageType, m_imageSize),
1202                               m_resultImage->get(), VK_IMAGE_LAYOUT_GENERAL, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT);
1203         }
1204         else
1205         {
1206             shaderFillImage(*cmdBuffer, *(*m_inputBuffer), *pipelineFillImage, *pipelineLayoutFillImage,
1207                             *descriptorSetFillImage, imageSizeInBytes, gridSize);
1208         }
1209         commandsBeforeCompute(*cmdBuffer);
1210     }
1211 
1212     deviceInterface.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
1213     deviceInterface.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u,
1214                                           &m_descriptorSet.get(), 0u, DE_NULL);
1215 
1216     deviceInterface.cmdDispatch(*cmdBuffer, NUM_INVOCATIONS_PER_PIXEL * gridSize.x(), gridSize.y(), gridSize.z());
1217 
1218     commandsAfterCompute(*cmdBuffer, *pipelineReadImage, *pipelineLayoutReadImage, *descriptorSetReadImage,
1219                          outBuffSizeInBytes, m_useTransfer);
1220 
1221     const VkBufferMemoryBarrier outputBufferPreHostReadBarrier = makeBufferMemoryBarrier(
1222         ((m_useTransfer || isTexelBuffer) ? VK_ACCESS_TRANSFER_WRITE_BIT : VK_ACCESS_SHADER_WRITE_BIT),
1223         VK_ACCESS_HOST_READ_BIT, m_outputBuffer->get(), 0ull, outBuffSizeInBytes);
1224 
1225     deviceInterface.cmdPipelineBarrier(
1226         *cmdBuffer,
1227         ((m_useTransfer || isTexelBuffer) ? VK_PIPELINE_STAGE_TRANSFER_BIT : VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT),
1228         VK_PIPELINE_STAGE_HOST_BIT, false, 0u, DE_NULL, 1u, &outputBufferPreHostReadBarrier, 0u, DE_NULL);
1229 
1230     endCommandBuffer(deviceInterface, *cmdBuffer);
1231 
1232     std::vector<VkPipelineStageFlags> waitStages(m_waitSemaphores.size(), VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT);
1233     submitCommandsAndWait(deviceInterface, device, queue, *cmdBuffer, false, 1u,
1234                           static_cast<uint32_t>(m_waitSemaphores.size()), de::dataOrNull(m_waitSemaphores),
1235                           de::dataOrNull(waitStages));
1236 
1237     Allocation &outputBufferAllocation = m_outputBuffer->getAllocation();
1238 
1239     invalidateAlloc(deviceInterface, device, outputBufferAllocation);
1240 
1241     if (verifyResult(outputBufferAllocation, (imageFormat == VK_FORMAT_R64_UINT || imageFormat == VK_FORMAT_R64_SINT)))
1242         return tcu::TestStatus::pass("Comparison succeeded");
1243     else
1244         return tcu::TestStatus::fail("Comparison failed");
1245 }
1246 
shaderFillImage(const VkCommandBuffer cmdBuffer,const VkBuffer & buffer,const VkPipeline pipeline,const VkPipelineLayout pipelineLayout,const VkDescriptorSet descriptorSet,const VkDeviceSize & range,const tcu::UVec3 & gridSize)1247 void BinaryAtomicInstanceBase::shaderFillImage(const VkCommandBuffer cmdBuffer, const VkBuffer &buffer,
1248                                                const VkPipeline pipeline, const VkPipelineLayout pipelineLayout,
1249                                                const VkDescriptorSet descriptorSet, const VkDeviceSize &range,
1250                                                const tcu::UVec3 &gridSize)
1251 {
1252     const VkDevice device                  = m_context.getDevice();
1253     const DeviceInterface &deviceInterface = m_context.getDeviceInterface();
1254     const VkDescriptorImageInfo descResultImageInfo =
1255         makeDescriptorImageInfo(DE_NULL, *m_resultImageView, VK_IMAGE_LAYOUT_GENERAL);
1256     const VkDescriptorBufferInfo descResultBufferInfo = makeDescriptorBufferInfo(buffer, 0, range);
1257     const VkImageSubresourceRange subresourceRange =
1258         makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, getNumLayers(m_imageType, m_imageSize));
1259 
1260     DescriptorSetUpdateBuilder()
1261         .writeSingle(descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
1262                      &descResultImageInfo)
1263         .writeSingle(descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u),
1264                      VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &descResultBufferInfo)
1265         .update(deviceInterface, device);
1266 
1267     const VkImageMemoryBarrier imageBarrierPre =
1268         makeImageMemoryBarrier(0, VK_ACCESS_SHADER_WRITE_BIT, VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_GENERAL,
1269                                m_resultImage->get(), subresourceRange);
1270 
1271     deviceInterface.cmdPipelineBarrier(
1272         cmdBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, (VkDependencyFlags)0, 0,
1273         (const VkMemoryBarrier *)DE_NULL, 0, (const VkBufferMemoryBarrier *)DE_NULL, 1, &imageBarrierPre);
1274 
1275     deviceInterface.cmdBindPipeline(cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
1276     deviceInterface.cmdBindDescriptorSets(cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipelineLayout, 0u, 1u,
1277                                           &descriptorSet, 0u, DE_NULL);
1278 
1279     deviceInterface.cmdDispatch(cmdBuffer, gridSize.x(), gridSize.y(), gridSize.z());
1280 
1281     const VkImageMemoryBarrier imageBarrierPost =
1282         makeImageMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT, VK_IMAGE_LAYOUT_GENERAL,
1283                                VK_IMAGE_LAYOUT_GENERAL, m_resultImage->get(), subresourceRange);
1284 
1285     deviceInterface.cmdPipelineBarrier(
1286         cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, (VkDependencyFlags)0, 0,
1287         (const VkMemoryBarrier *)DE_NULL, 0, (const VkBufferMemoryBarrier *)DE_NULL, 1, &imageBarrierPost);
1288 }
1289 
createImageAndView(VkFormat imageFormat,const tcu::UVec3 & imageExent,bool useTransfer,de::MovePtr<Image> & imagePtr,Move<VkImageView> & imageViewPtr)1290 void BinaryAtomicInstanceBase::createImageAndView(VkFormat imageFormat, const tcu::UVec3 &imageExent, bool useTransfer,
1291                                                   de::MovePtr<Image> &imagePtr, Move<VkImageView> &imageViewPtr)
1292 {
1293     const VkDevice device                  = m_context.getDevice();
1294     const DeviceInterface &deviceInterface = m_context.getDeviceInterface();
1295     Allocator &allocator                   = m_context.getDefaultAllocator();
1296     const VkImageUsageFlags usageFlags     = getUsageFlags(useTransfer);
1297     VkImageCreateFlags createFlags         = 0u;
1298 
1299     if (m_imageType == IMAGE_TYPE_CUBE || m_imageType == IMAGE_TYPE_CUBE_ARRAY)
1300         createFlags |= VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT;
1301 
1302     const auto numLayers = getNumLayers(m_imageType, m_imageSize);
1303 
1304     VkImageCreateInfo createInfo = {
1305         VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, // VkStructureType sType;
1306         DE_NULL,                             // const void* pNext;
1307         createFlags,                         // VkImageCreateFlags flags;
1308         mapImageType(m_imageType),           // VkImageType imageType;
1309         imageFormat,                         // VkFormat format;
1310         makeExtent3D(imageExent),            // VkExtent3D extent;
1311         1u,                                  // uint32_t mipLevels;
1312         numLayers,                           // uint32_t arrayLayers;
1313         VK_SAMPLE_COUNT_1_BIT,               // VkSampleCountFlagBits samples;
1314         m_tiling,                            // VkImageTiling tiling;
1315         usageFlags,                          // VkImageUsageFlags usage;
1316         VK_SHARING_MODE_EXCLUSIVE,           // VkSharingMode sharingMode;
1317         0u,                                  // uint32_t queueFamilyIndexCount;
1318         DE_NULL,                             // const uint32_t* pQueueFamilyIndices;
1319         VK_IMAGE_LAYOUT_UNDEFINED,           // VkImageLayout initialLayout;
1320     };
1321 
1322 #ifndef CTS_USES_VULKANSC
1323     if (m_backingType == ImageBackingType::SPARSE)
1324     {
1325         const auto &vki               = m_context.getInstanceInterface();
1326         const auto physicalDevice     = m_context.getPhysicalDevice();
1327         const auto sparseQueue        = m_context.getSparseQueue();
1328         const auto sparseQueueIdx     = m_context.getSparseQueueFamilyIndex();
1329         const auto universalQIdx      = m_context.getUniversalQueueFamilyIndex();
1330         const uint32_t queueIndices[] = {universalQIdx, sparseQueueIdx};
1331 
1332         createInfo.flags |= (VK_IMAGE_CREATE_SPARSE_BINDING_BIT | VK_IMAGE_CREATE_SPARSE_RESIDENCY_BIT);
1333 
1334         if (sparseQueueIdx != universalQIdx)
1335         {
1336             createInfo.sharingMode           = VK_SHARING_MODE_CONCURRENT;
1337             createInfo.queueFamilyIndexCount = static_cast<uint32_t>(DE_LENGTH_OF_ARRAY(queueIndices));
1338             createInfo.pQueueFamilyIndices   = queueIndices;
1339         }
1340 
1341         const auto sparseImage =
1342             new SparseImage(deviceInterface, device, physicalDevice, vki, createInfo, sparseQueue, allocator, m_format);
1343         m_waitSemaphores.push_back(sparseImage->getSemaphore());
1344         imagePtr = de::MovePtr<Image>(sparseImage);
1345     }
1346     else
1347 #endif // CTS_USES_VULKANSC
1348         imagePtr =
1349             de::MovePtr<Image>(new Image(deviceInterface, device, allocator, createInfo, MemoryRequirement::Any));
1350 
1351     const VkImageSubresourceRange subresourceRange =
1352         makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, numLayers);
1353 
1354     imageViewPtr = makeImageView(deviceInterface, device, imagePtr->get(), mapImageViewType(m_imageType), imageFormat,
1355                                  subresourceRange);
1356 }
1357 
createImageResources(const VkFormat & imageFormat,const bool useTransfer)1358 void BinaryAtomicInstanceBase::createImageResources(const VkFormat &imageFormat, const bool useTransfer)
1359 {
1360     //Create the image that is going to store results of atomic operations
1361     createImageAndView(imageFormat, getLayerSize(m_imageType, m_imageSize), useTransfer, m_resultImage,
1362                        m_resultImageView);
1363 }
1364 
1365 class BinaryAtomicEndResultInstance : public BinaryAtomicInstanceBase
1366 {
1367 public:
BinaryAtomicEndResultInstance(Context & context,const string & name,const ImageType imageType,const tcu::UVec3 & imageSize,const TextureFormat & format,const VkImageTiling tiling,const AtomicOperation operation,const bool useTransfer,const ShaderReadType shaderReadType,const ImageBackingType backingType)1368     BinaryAtomicEndResultInstance(Context &context, const string &name, const ImageType imageType,
1369                                   const tcu::UVec3 &imageSize, const TextureFormat &format, const VkImageTiling tiling,
1370                                   const AtomicOperation operation, const bool useTransfer,
1371                                   const ShaderReadType shaderReadType, const ImageBackingType backingType)
1372         : BinaryAtomicInstanceBase(context, name, imageType, imageSize, format, tiling, operation, useTransfer,
1373                                    shaderReadType, backingType)
1374     {
1375     }
1376 
1377     virtual uint32_t getOutputBufferSize(void) const;
1378 
prepareResources(const bool useTransfer)1379     virtual void prepareResources(const bool useTransfer)
1380     {
1381         DE_UNREF(useTransfer);
1382     }
1383     virtual void prepareDescriptors(const bool isTexelBuffer);
1384 
commandsBeforeCompute(const VkCommandBuffer) const1385     virtual void commandsBeforeCompute(const VkCommandBuffer) const
1386     {
1387     }
1388     virtual void commandsAfterCompute(const VkCommandBuffer cmdBuffer, const VkPipeline pipeline,
1389                                       const VkPipelineLayout pipelineLayout, const VkDescriptorSet descriptorSet,
1390                                       const VkDeviceSize &range, const bool useTransfer);
1391 
1392     virtual bool verifyResult(Allocation &outputBufferAllocation, const bool is64Bit) const;
1393 
1394 protected:
1395     template <typename T>
1396     bool isValueCorrect(const T resultValue, int32_t x, int32_t y, int32_t z, const UVec3 &gridSize,
1397                         const IVec3 extendedGridSize) const;
1398 };
1399 
getOutputBufferSize(void) const1400 uint32_t BinaryAtomicEndResultInstance::getOutputBufferSize(void) const
1401 {
1402     return tcu::getPixelSize(m_format) * getNumPixels(m_imageType, m_imageSize);
1403 }
1404 
prepareDescriptors(const bool isTexelBuffer)1405 void BinaryAtomicEndResultInstance::prepareDescriptors(const bool isTexelBuffer)
1406 {
1407     const VkDescriptorType descriptorType =
1408         isTexelBuffer ? VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER : VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
1409     const VkDevice device                  = m_context.getDevice();
1410     const DeviceInterface &deviceInterface = m_context.getDeviceInterface();
1411 
1412     m_descriptorSetLayout = DescriptorSetLayoutBuilder()
1413                                 .addSingleBinding(descriptorType, VK_SHADER_STAGE_COMPUTE_BIT)
1414                                 .build(deviceInterface, device);
1415 
1416     m_descriptorPool = DescriptorPoolBuilder()
1417                            .addType(descriptorType)
1418                            .build(deviceInterface, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
1419 
1420     m_descriptorSet = makeDescriptorSet(deviceInterface, device, *m_descriptorPool, *m_descriptorSetLayout);
1421 
1422     if (isTexelBuffer)
1423     {
1424         m_descResultBufferView =
1425             makeBufferView(deviceInterface, device, *(*m_inputBuffer), mapTextureFormat(m_format), 0, VK_WHOLE_SIZE);
1426 
1427         DescriptorSetUpdateBuilder()
1428             .writeSingle(*m_descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), descriptorType,
1429                          &(m_descResultBufferView.get()))
1430             .update(deviceInterface, device);
1431     }
1432     else
1433     {
1434         const VkDescriptorImageInfo descResultImageInfo =
1435             makeDescriptorImageInfo(DE_NULL, *m_resultImageView, VK_IMAGE_LAYOUT_GENERAL);
1436 
1437         DescriptorSetUpdateBuilder()
1438             .writeSingle(*m_descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), descriptorType,
1439                          &descResultImageInfo)
1440             .update(deviceInterface, device);
1441     }
1442 }
1443 
commandsAfterCompute(const VkCommandBuffer cmdBuffer,const VkPipeline pipeline,const VkPipelineLayout pipelineLayout,const VkDescriptorSet descriptorSet,const VkDeviceSize & range,const bool useTransfer)1444 void BinaryAtomicEndResultInstance::commandsAfterCompute(const VkCommandBuffer cmdBuffer, const VkPipeline pipeline,
1445                                                          const VkPipelineLayout pipelineLayout,
1446                                                          const VkDescriptorSet descriptorSet, const VkDeviceSize &range,
1447                                                          const bool useTransfer)
1448 {
1449     const DeviceInterface &deviceInterface = m_context.getDeviceInterface();
1450     const VkImageSubresourceRange subresourceRange =
1451         makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, getNumLayers(m_imageType, m_imageSize));
1452     const UVec3 layerSize = getLayerSize(m_imageType, m_imageSize);
1453 
1454     if (m_imageType == IMAGE_TYPE_BUFFER)
1455     {
1456         m_outputBuffer = m_inputBuffer;
1457     }
1458     else if (useTransfer)
1459     {
1460         const VkImageMemoryBarrier resultImagePostDispatchBarrier =
1461             makeImageMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT, VK_IMAGE_LAYOUT_GENERAL,
1462                                    VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, m_resultImage->get(), subresourceRange);
1463 
1464         deviceInterface.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
1465                                            VK_PIPELINE_STAGE_TRANSFER_BIT, false, 0u, DE_NULL, 0u, DE_NULL, 1u,
1466                                            &resultImagePostDispatchBarrier);
1467 
1468         const VkBufferImageCopy bufferImageCopyParams =
1469             makeBufferImageCopy(makeExtent3D(layerSize), getNumLayers(m_imageType, m_imageSize));
1470 
1471         deviceInterface.cmdCopyImageToBuffer(cmdBuffer, m_resultImage->get(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
1472                                              m_outputBuffer->get(), 1u, &bufferImageCopyParams);
1473     }
1474     else
1475     {
1476         const VkDevice device = m_context.getDevice();
1477         const VkDescriptorImageInfo descResultImageInfo =
1478             makeDescriptorImageInfo(DE_NULL, *m_resultImageView, VK_IMAGE_LAYOUT_GENERAL);
1479         const VkDescriptorBufferInfo descResultBufferInfo = makeDescriptorBufferInfo(m_outputBuffer->get(), 0, range);
1480 
1481         DescriptorSetUpdateBuilder()
1482             .writeSingle(descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u),
1483                          VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &descResultImageInfo)
1484             .writeSingle(descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u),
1485                          VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &descResultBufferInfo)
1486             .update(deviceInterface, device);
1487 
1488         const VkImageMemoryBarrier resultImagePostDispatchBarrier =
1489             makeImageMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT, VK_IMAGE_LAYOUT_GENERAL,
1490                                    VK_IMAGE_LAYOUT_GENERAL, m_resultImage->get(), subresourceRange);
1491 
1492         deviceInterface.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
1493                                            VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, false, 0u, DE_NULL, 0u, DE_NULL, 1u,
1494                                            &resultImagePostDispatchBarrier);
1495 
1496         deviceInterface.cmdBindPipeline(cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
1497         deviceInterface.cmdBindDescriptorSets(cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipelineLayout, 0u, 1u,
1498                                               &descriptorSet, 0u, DE_NULL);
1499 
1500         switch (m_imageType)
1501         {
1502         case IMAGE_TYPE_1D_ARRAY:
1503             deviceInterface.cmdDispatch(cmdBuffer, layerSize.x(), subresourceRange.layerCount, layerSize.z());
1504             break;
1505         case IMAGE_TYPE_2D_ARRAY:
1506         case IMAGE_TYPE_CUBE:
1507         case IMAGE_TYPE_CUBE_ARRAY:
1508             deviceInterface.cmdDispatch(cmdBuffer, layerSize.x(), layerSize.y(), subresourceRange.layerCount);
1509             break;
1510         default:
1511             deviceInterface.cmdDispatch(cmdBuffer, layerSize.x(), layerSize.y(), layerSize.z());
1512             break;
1513         }
1514     }
1515 }
1516 
verifyResult(Allocation & outputBufferAllocation,const bool is64Bit) const1517 bool BinaryAtomicEndResultInstance::verifyResult(Allocation &outputBufferAllocation, const bool is64Bit) const
1518 {
1519     const UVec3 gridSize         = getShaderGridSize(m_imageType, m_imageSize);
1520     const IVec3 extendedGridSize = IVec3(NUM_INVOCATIONS_PER_PIXEL * gridSize.x(), gridSize.y(), gridSize.z());
1521 
1522     tcu::ConstPixelBufferAccess resultBuffer(m_format, gridSize.x(), gridSize.y(), gridSize.z(),
1523                                              outputBufferAllocation.getHostPtr());
1524 
1525     for (int32_t z = 0; z < resultBuffer.getDepth(); z++)
1526         for (int32_t y = 0; y < resultBuffer.getHeight(); y++)
1527             for (int32_t x = 0; x < resultBuffer.getWidth(); x++)
1528             {
1529                 const void *resultValue = resultBuffer.getPixelPtr(x, y, z);
1530                 int32_t floatToIntValue = 0;
1531                 bool isFloatValue       = false;
1532                 if (isFloatFormat(mapTextureFormat(m_format)))
1533                 {
1534                     isFloatValue    = true;
1535                     floatToIntValue = static_cast<int32_t>(*((float *)resultValue));
1536                 }
1537 
1538                 if (isOrderIndependentAtomicOperation(m_operation))
1539                 {
1540                     if (isUintFormat(mapTextureFormat(m_format)))
1541                     {
1542                         if (is64Bit)
1543                         {
1544                             if (!isValueCorrect<uint64_t>(*((uint64_t *)resultValue), x, y, z, gridSize,
1545                                                           extendedGridSize))
1546                                 return false;
1547                         }
1548                         else
1549                         {
1550                             if (!isValueCorrect<uint32_t>(*((uint32_t *)resultValue), x, y, z, gridSize,
1551                                                           extendedGridSize))
1552                                 return false;
1553                         }
1554                     }
1555                     else if (isIntFormat(mapTextureFormat(m_format)))
1556                     {
1557                         if (is64Bit)
1558                         {
1559                             if (!isValueCorrect<int64_t>(*((int64_t *)resultValue), x, y, z, gridSize,
1560                                                          extendedGridSize))
1561                                 return false;
1562                         }
1563                         else
1564                         {
1565                             if (!isValueCorrect<int32_t>(*((int32_t *)resultValue), x, y, z, gridSize,
1566                                                          extendedGridSize))
1567                                 return false;
1568                         }
1569                     }
1570                     else
1571                     {
1572                         // 32-bit floating point
1573                         if (!isValueCorrect<int32_t>(floatToIntValue, x, y, z, gridSize, extendedGridSize))
1574                             return false;
1575                     }
1576                 }
1577                 else if (m_operation == ATOMIC_OPERATION_EXCHANGE)
1578                 {
1579                     // Check if the end result equals one of the atomic args.
1580                     bool matchFound = false;
1581 
1582                     for (int32_t i = 0; i < static_cast<int32_t>(NUM_INVOCATIONS_PER_PIXEL) && !matchFound; i++)
1583                     {
1584                         const IVec3 gid(x + i * gridSize.x(), y, z);
1585                         matchFound = is64Bit ? (*((int64_t *)resultValue) ==
1586                                                 getAtomicFuncArgument<int64_t>(m_operation, gid, extendedGridSize)) :
1587                                      isFloatValue ?
1588                                                floatToIntValue ==
1589                                                    getAtomicFuncArgument<int32_t>(m_operation, gid, extendedGridSize) :
1590                                                (*((int32_t *)resultValue) ==
1591                                                 getAtomicFuncArgument<int32_t>(m_operation, gid, extendedGridSize));
1592                     }
1593 
1594                     if (!matchFound)
1595                         return false;
1596                 }
1597                 else if (m_operation == ATOMIC_OPERATION_COMPARE_EXCHANGE)
1598                 {
1599                     // Check if the end result equals one of the atomic args.
1600                     bool matchFound = false;
1601 
1602                     for (int32_t i = 0; i < static_cast<int32_t>(NUM_INVOCATIONS_PER_PIXEL) && !matchFound; i++)
1603                     {
1604                         const IVec3 gid(x + i * gridSize.x(), y, z);
1605                         matchFound = is64Bit ? (*((int64_t *)resultValue) ==
1606                                                 getAtomicFuncArgument<int64_t>(m_operation, gid, extendedGridSize)) :
1607                                      isFloatValue ?
1608                                                floatToIntValue ==
1609                                                    getAtomicFuncArgument<int32_t>(m_operation, gid, extendedGridSize) :
1610                                                (*((int32_t *)resultValue) ==
1611                                                 getAtomicFuncArgument<int32_t>(m_operation, gid, extendedGridSize));
1612                     }
1613 
1614                     if (!matchFound)
1615                         return false;
1616                 }
1617                 else
1618                     DE_ASSERT(false);
1619             }
1620     return true;
1621 }
1622 
1623 template <typename T>
isValueCorrect(const T resultValue,int32_t x,int32_t y,int32_t z,const UVec3 & gridSize,const IVec3 extendedGridSize) const1624 bool BinaryAtomicEndResultInstance::isValueCorrect(const T resultValue, int32_t x, int32_t y, int32_t z,
1625                                                    const UVec3 &gridSize, const IVec3 extendedGridSize) const
1626 {
1627     T reference = getOperationInitialValue<T>(m_operation);
1628     for (int32_t i = 0; i < static_cast<int32_t>(NUM_INVOCATIONS_PER_PIXEL); i++)
1629     {
1630         const IVec3 gid(x + i * gridSize.x(), y, z);
1631         T arg     = getAtomicFuncArgument<T>(m_operation, gid, extendedGridSize);
1632         reference = computeBinaryAtomicOperationResult(m_operation, reference, arg);
1633     }
1634     return (resultValue == reference);
1635 }
1636 
createInstance(Context & context) const1637 TestInstance *BinaryAtomicEndResultCase::createInstance(Context &context) const
1638 {
1639     return new BinaryAtomicEndResultInstance(context, m_name, m_imageType, m_imageSize, m_format, m_tiling, m_operation,
1640                                              m_useTransfer, m_readType, m_backingType);
1641 }
1642 
1643 class BinaryAtomicIntermValuesInstance : public BinaryAtomicInstanceBase
1644 {
1645 public:
BinaryAtomicIntermValuesInstance(Context & context,const string & name,const ImageType imageType,const tcu::UVec3 & imageSize,const TextureFormat & format,const VkImageTiling tiling,const AtomicOperation operation,const bool useTransfer,const ShaderReadType shaderReadType,const ImageBackingType backingType)1646     BinaryAtomicIntermValuesInstance(Context &context, const string &name, const ImageType imageType,
1647                                      const tcu::UVec3 &imageSize, const TextureFormat &format,
1648                                      const VkImageTiling tiling, const AtomicOperation operation,
1649                                      const bool useTransfer, const ShaderReadType shaderReadType,
1650                                      const ImageBackingType backingType)
1651         : BinaryAtomicInstanceBase(context, name, imageType, imageSize, format, tiling, operation, useTransfer,
1652                                    shaderReadType, backingType)
1653     {
1654     }
1655 
1656     virtual uint32_t getOutputBufferSize(void) const;
1657 
1658     virtual void prepareResources(const bool useTransfer);
1659     virtual void prepareDescriptors(const bool isTexelBuffer);
1660 
1661     virtual void commandsBeforeCompute(const VkCommandBuffer cmdBuffer) const;
1662     virtual void commandsAfterCompute(const VkCommandBuffer cmdBuffer, const VkPipeline pipeline,
1663                                       const VkPipelineLayout pipelineLayout, const VkDescriptorSet descriptorSet,
1664                                       const VkDeviceSize &range, const bool useTransfer);
1665 
1666     virtual bool verifyResult(Allocation &outputBufferAllocation, const bool is64Bit) const;
1667 
1668 protected:
1669     template <typename T>
1670     bool areValuesCorrect(tcu::ConstPixelBufferAccess &resultBuffer, const bool isFloatingPoint, int32_t x, int32_t y,
1671                           int32_t z, const UVec3 &gridSize, const IVec3 extendedGridSize) const;
1672 
1673     template <typename T>
1674     bool verifyRecursive(const int32_t index, const T valueSoFar, bool argsUsed[NUM_INVOCATIONS_PER_PIXEL],
1675                          const T atomicArgs[NUM_INVOCATIONS_PER_PIXEL],
1676                          const T resultValues[NUM_INVOCATIONS_PER_PIXEL]) const;
1677     de::MovePtr<Image> m_intermResultsImage;
1678     Move<VkImageView> m_intermResultsImageView;
1679 };
1680 
getOutputBufferSize(void) const1681 uint32_t BinaryAtomicIntermValuesInstance::getOutputBufferSize(void) const
1682 {
1683     return NUM_INVOCATIONS_PER_PIXEL * tcu::getPixelSize(m_format) * getNumPixels(m_imageType, m_imageSize);
1684 }
1685 
prepareResources(const bool useTransfer)1686 void BinaryAtomicIntermValuesInstance::prepareResources(const bool useTransfer)
1687 {
1688     const UVec3 layerSize       = getLayerSize(m_imageType, m_imageSize);
1689     const bool isCubeBasedImage = (m_imageType == IMAGE_TYPE_CUBE || m_imageType == IMAGE_TYPE_CUBE_ARRAY);
1690     const UVec3 extendedLayerSize =
1691         isCubeBasedImage ?
1692             UVec3(NUM_INVOCATIONS_PER_PIXEL * layerSize.x(), NUM_INVOCATIONS_PER_PIXEL * layerSize.y(), layerSize.z()) :
1693             UVec3(NUM_INVOCATIONS_PER_PIXEL * layerSize.x(), layerSize.y(), layerSize.z());
1694 
1695     createImageAndView(mapTextureFormat(m_format), extendedLayerSize, useTransfer, m_intermResultsImage,
1696                        m_intermResultsImageView);
1697 }
1698 
prepareDescriptors(const bool isTexelBuffer)1699 void BinaryAtomicIntermValuesInstance::prepareDescriptors(const bool isTexelBuffer)
1700 {
1701     const VkDescriptorType descriptorType =
1702         isTexelBuffer ? VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER : VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
1703 
1704     const VkDevice device                  = m_context.getDevice();
1705     const DeviceInterface &deviceInterface = m_context.getDeviceInterface();
1706 
1707     m_descriptorSetLayout = DescriptorSetLayoutBuilder()
1708                                 .addSingleBinding(descriptorType, VK_SHADER_STAGE_COMPUTE_BIT)
1709                                 .addSingleBinding(descriptorType, VK_SHADER_STAGE_COMPUTE_BIT)
1710                                 .build(deviceInterface, device);
1711 
1712     m_descriptorPool = DescriptorPoolBuilder()
1713                            .addType(descriptorType, 2u)
1714                            .build(deviceInterface, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
1715 
1716     m_descriptorSet = makeDescriptorSet(deviceInterface, device, *m_descriptorPool, *m_descriptorSetLayout);
1717 
1718     if (isTexelBuffer)
1719     {
1720         m_descResultBufferView =
1721             makeBufferView(deviceInterface, device, *(*m_inputBuffer), mapTextureFormat(m_format), 0, VK_WHOLE_SIZE);
1722         m_descIntermResultsBufferView =
1723             makeBufferView(deviceInterface, device, *(*m_outputBuffer), mapTextureFormat(m_format), 0, VK_WHOLE_SIZE);
1724 
1725         DescriptorSetUpdateBuilder()
1726             .writeSingle(*m_descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), descriptorType,
1727                          &(m_descResultBufferView.get()))
1728             .writeSingle(*m_descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), descriptorType,
1729                          &(m_descIntermResultsBufferView.get()))
1730             .update(deviceInterface, device);
1731     }
1732     else
1733     {
1734         const VkDescriptorImageInfo descResultImageInfo =
1735             makeDescriptorImageInfo(DE_NULL, *m_resultImageView, VK_IMAGE_LAYOUT_GENERAL);
1736         const VkDescriptorImageInfo descIntermResultsImageInfo =
1737             makeDescriptorImageInfo(DE_NULL, *m_intermResultsImageView, VK_IMAGE_LAYOUT_GENERAL);
1738 
1739         DescriptorSetUpdateBuilder()
1740             .writeSingle(*m_descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), descriptorType,
1741                          &descResultImageInfo)
1742             .writeSingle(*m_descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), descriptorType,
1743                          &descIntermResultsImageInfo)
1744             .update(deviceInterface, device);
1745     }
1746 }
1747 
commandsBeforeCompute(const VkCommandBuffer cmdBuffer) const1748 void BinaryAtomicIntermValuesInstance::commandsBeforeCompute(const VkCommandBuffer cmdBuffer) const
1749 {
1750     const DeviceInterface &deviceInterface = m_context.getDeviceInterface();
1751     const VkImageSubresourceRange subresourceRange =
1752         makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, getNumLayers(m_imageType, m_imageSize));
1753 
1754     const VkImageMemoryBarrier imagePreDispatchBarrier =
1755         makeImageMemoryBarrier(0u, VK_ACCESS_SHADER_WRITE_BIT, VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_GENERAL,
1756                                m_intermResultsImage->get(), subresourceRange);
1757 
1758     deviceInterface.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
1759                                        VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, false, 0u, DE_NULL, 0u, DE_NULL, 1u,
1760                                        &imagePreDispatchBarrier);
1761 }
1762 
commandsAfterCompute(const VkCommandBuffer cmdBuffer,const VkPipeline pipeline,const VkPipelineLayout pipelineLayout,const VkDescriptorSet descriptorSet,const VkDeviceSize & range,const bool useTransfer)1763 void BinaryAtomicIntermValuesInstance::commandsAfterCompute(const VkCommandBuffer cmdBuffer, const VkPipeline pipeline,
1764                                                             const VkPipelineLayout pipelineLayout,
1765                                                             const VkDescriptorSet descriptorSet,
1766                                                             const VkDeviceSize &range, const bool useTransfer)
1767 {
1768     // nothing is needed for texel image buffer
1769     if (m_imageType == IMAGE_TYPE_BUFFER)
1770         return;
1771 
1772     const DeviceInterface &deviceInterface = m_context.getDeviceInterface();
1773     const VkImageSubresourceRange subresourceRange =
1774         makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, getNumLayers(m_imageType, m_imageSize));
1775     const UVec3 layerSize = getLayerSize(m_imageType, m_imageSize);
1776 
1777     if (useTransfer)
1778     {
1779         const VkImageMemoryBarrier imagePostDispatchBarrier =
1780             makeImageMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT, VK_IMAGE_LAYOUT_GENERAL,
1781                                    VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, m_intermResultsImage->get(), subresourceRange);
1782 
1783         deviceInterface.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
1784                                            VK_PIPELINE_STAGE_TRANSFER_BIT, false, 0u, DE_NULL, 0u, DE_NULL, 1u,
1785                                            &imagePostDispatchBarrier);
1786 
1787         const UVec3 extendedLayerSize = UVec3(NUM_INVOCATIONS_PER_PIXEL * layerSize.x(), layerSize.y(), layerSize.z());
1788         const VkBufferImageCopy bufferImageCopyParams =
1789             makeBufferImageCopy(makeExtent3D(extendedLayerSize), getNumLayers(m_imageType, m_imageSize));
1790 
1791         deviceInterface.cmdCopyImageToBuffer(cmdBuffer, m_intermResultsImage->get(),
1792                                              VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, m_outputBuffer->get(), 1u,
1793                                              &bufferImageCopyParams);
1794     }
1795     else
1796     {
1797         const VkDevice device = m_context.getDevice();
1798         const VkDescriptorImageInfo descResultImageInfo =
1799             makeDescriptorImageInfo(DE_NULL, *m_intermResultsImageView, VK_IMAGE_LAYOUT_GENERAL);
1800         const VkDescriptorBufferInfo descResultBufferInfo = makeDescriptorBufferInfo(m_outputBuffer->get(), 0, range);
1801 
1802         DescriptorSetUpdateBuilder()
1803             .writeSingle(descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u),
1804                          VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &descResultImageInfo)
1805             .writeSingle(descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u),
1806                          VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &descResultBufferInfo)
1807             .update(deviceInterface, device);
1808 
1809         const VkImageMemoryBarrier resultImagePostDispatchBarrier =
1810             makeImageMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT, VK_IMAGE_LAYOUT_GENERAL,
1811                                    VK_IMAGE_LAYOUT_GENERAL, m_intermResultsImage->get(), subresourceRange);
1812 
1813         deviceInterface.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
1814                                            VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, false, 0u, DE_NULL, 0u, DE_NULL, 1u,
1815                                            &resultImagePostDispatchBarrier);
1816 
1817         deviceInterface.cmdBindPipeline(cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
1818         deviceInterface.cmdBindDescriptorSets(cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipelineLayout, 0u, 1u,
1819                                               &descriptorSet, 0u, DE_NULL);
1820 
1821         switch (m_imageType)
1822         {
1823         case IMAGE_TYPE_1D_ARRAY:
1824             deviceInterface.cmdDispatch(cmdBuffer, NUM_INVOCATIONS_PER_PIXEL * layerSize.x(),
1825                                         subresourceRange.layerCount, layerSize.z());
1826             break;
1827         case IMAGE_TYPE_2D_ARRAY:
1828         case IMAGE_TYPE_CUBE:
1829         case IMAGE_TYPE_CUBE_ARRAY:
1830             deviceInterface.cmdDispatch(cmdBuffer, NUM_INVOCATIONS_PER_PIXEL * layerSize.x(), layerSize.y(),
1831                                         subresourceRange.layerCount);
1832             break;
1833         default:
1834             deviceInterface.cmdDispatch(cmdBuffer, NUM_INVOCATIONS_PER_PIXEL * layerSize.x(), layerSize.y(),
1835                                         layerSize.z());
1836             break;
1837         }
1838     }
1839 }
1840 
verifyResult(Allocation & outputBufferAllocation,const bool is64Bit) const1841 bool BinaryAtomicIntermValuesInstance::verifyResult(Allocation &outputBufferAllocation, const bool is64Bit) const
1842 {
1843     const UVec3 gridSize         = getShaderGridSize(m_imageType, m_imageSize);
1844     const IVec3 extendedGridSize = IVec3(NUM_INVOCATIONS_PER_PIXEL * gridSize.x(), gridSize.y(), gridSize.z());
1845 
1846     tcu::ConstPixelBufferAccess resultBuffer(m_format, extendedGridSize.x(), extendedGridSize.y(), extendedGridSize.z(),
1847                                              outputBufferAllocation.getHostPtr());
1848 
1849     for (int32_t z = 0; z < resultBuffer.getDepth(); z++)
1850         for (int32_t y = 0; y < resultBuffer.getHeight(); y++)
1851             for (uint32_t x = 0; x < gridSize.x(); x++)
1852             {
1853                 if (isUintFormat(mapTextureFormat(m_format)))
1854                 {
1855                     if (is64Bit)
1856                     {
1857                         if (!areValuesCorrect<uint64_t>(resultBuffer, false, x, y, z, gridSize, extendedGridSize))
1858                             return false;
1859                     }
1860                     else
1861                     {
1862                         if (!areValuesCorrect<uint32_t>(resultBuffer, false, x, y, z, gridSize, extendedGridSize))
1863                             return false;
1864                     }
1865                 }
1866                 else if (isIntFormat(mapTextureFormat(m_format)))
1867                 {
1868                     if (is64Bit)
1869                     {
1870                         if (!areValuesCorrect<int64_t>(resultBuffer, false, x, y, z, gridSize, extendedGridSize))
1871                             return false;
1872                     }
1873                     else
1874                     {
1875                         if (!areValuesCorrect<int32_t>(resultBuffer, false, x, y, z, gridSize, extendedGridSize))
1876                             return false;
1877                     }
1878                 }
1879                 else
1880                 {
1881                     // 32-bit floating point
1882                     if (!areValuesCorrect<int32_t>(resultBuffer, true, x, y, z, gridSize, extendedGridSize))
1883                         return false;
1884                 }
1885             }
1886 
1887     return true;
1888 }
1889 
1890 template <typename T>
areValuesCorrect(tcu::ConstPixelBufferAccess & resultBuffer,const bool isFloatingPoint,int32_t x,int32_t y,int32_t z,const UVec3 & gridSize,const IVec3 extendedGridSize) const1891 bool BinaryAtomicIntermValuesInstance::areValuesCorrect(tcu::ConstPixelBufferAccess &resultBuffer,
1892                                                         const bool isFloatingPoint, int32_t x, int32_t y, int32_t z,
1893                                                         const UVec3 &gridSize, const IVec3 extendedGridSize) const
1894 {
1895     T resultValues[NUM_INVOCATIONS_PER_PIXEL];
1896     T atomicArgs[NUM_INVOCATIONS_PER_PIXEL];
1897     bool argsUsed[NUM_INVOCATIONS_PER_PIXEL];
1898 
1899     for (int32_t i = 0; i < static_cast<int32_t>(NUM_INVOCATIONS_PER_PIXEL); i++)
1900     {
1901         IVec3 gid(x + i * gridSize.x(), y, z);
1902         T data = *((T *)resultBuffer.getPixelPtr(gid.x(), gid.y(), gid.z()));
1903         if (isFloatingPoint)
1904         {
1905             float fData;
1906             deMemcpy(&fData, &data, sizeof(fData));
1907             data = static_cast<T>(fData);
1908         }
1909         resultValues[i] = data;
1910         atomicArgs[i]   = getAtomicFuncArgument<T>(m_operation, gid, extendedGridSize);
1911         argsUsed[i]     = false;
1912     }
1913 
1914     // Verify that the return values form a valid sequence.
1915     return verifyRecursive(0, getOperationInitialValue<T>(m_operation), argsUsed, atomicArgs, resultValues);
1916 }
1917 
1918 template <typename T>
verifyRecursive(const int32_t index,const T valueSoFar,bool argsUsed[NUM_INVOCATIONS_PER_PIXEL],const T atomicArgs[NUM_INVOCATIONS_PER_PIXEL],const T resultValues[NUM_INVOCATIONS_PER_PIXEL]) const1919 bool BinaryAtomicIntermValuesInstance::verifyRecursive(const int32_t index, const T valueSoFar,
1920                                                        bool argsUsed[NUM_INVOCATIONS_PER_PIXEL],
1921                                                        const T atomicArgs[NUM_INVOCATIONS_PER_PIXEL],
1922                                                        const T resultValues[NUM_INVOCATIONS_PER_PIXEL]) const
1923 {
1924     if (index >= static_cast<int32_t>(NUM_INVOCATIONS_PER_PIXEL))
1925         return true;
1926 
1927     for (int32_t i = 0; i < static_cast<int32_t>(NUM_INVOCATIONS_PER_PIXEL); i++)
1928     {
1929         if (!argsUsed[i] && resultValues[i] == valueSoFar)
1930         {
1931             argsUsed[i] = true;
1932 
1933             if (verifyRecursive(index + 1, computeBinaryAtomicOperationResult(m_operation, valueSoFar, atomicArgs[i]),
1934                                 argsUsed, atomicArgs, resultValues))
1935             {
1936                 return true;
1937             }
1938 
1939             argsUsed[i] = false;
1940         }
1941     }
1942 
1943     return false;
1944 }
1945 
createInstance(Context & context) const1946 TestInstance *BinaryAtomicIntermValuesCase::createInstance(Context &context) const
1947 {
1948     return new BinaryAtomicIntermValuesInstance(context, m_name, m_imageType, m_imageSize, m_format, m_tiling,
1949                                                 m_operation, m_useTransfer, m_readType, m_backingType);
1950 }
1951 
1952 } // namespace
1953 
createImageAtomicOperationTests(tcu::TestContext & testCtx)1954 tcu::TestCaseGroup *createImageAtomicOperationTests(tcu::TestContext &testCtx)
1955 {
1956     de::MovePtr<tcu::TestCaseGroup> imageAtomicOperationsTests(new tcu::TestCaseGroup(testCtx, "atomic_operations"));
1957 
1958     struct ImageParams
1959     {
1960         ImageParams(const ImageType imageType, const tcu::UVec3 &imageSize)
1961             : m_imageType(imageType)
1962             , m_imageSize(imageSize)
1963         {
1964         }
1965         const ImageType m_imageType;
1966         const tcu::UVec3 m_imageSize;
1967     };
1968 
1969     const ImageParams imageParamsArray[] = {ImageParams(IMAGE_TYPE_1D, tcu::UVec3(64u, 1u, 1u)),
1970                                             ImageParams(IMAGE_TYPE_1D_ARRAY, tcu::UVec3(64u, 1u, 8u)),
1971                                             ImageParams(IMAGE_TYPE_2D, tcu::UVec3(64u, 64u, 1u)),
1972                                             ImageParams(IMAGE_TYPE_2D_ARRAY, tcu::UVec3(64u, 64u, 8u)),
1973                                             ImageParams(IMAGE_TYPE_3D, tcu::UVec3(48u, 48u, 8u)),
1974                                             ImageParams(IMAGE_TYPE_CUBE, tcu::UVec3(64u, 64u, 1u)),
1975                                             ImageParams(IMAGE_TYPE_CUBE_ARRAY, tcu::UVec3(64u, 64u, 2u)),
1976                                             ImageParams(IMAGE_TYPE_BUFFER, tcu::UVec3(64u, 1u, 1u))};
1977 
1978     const tcu::TextureFormat formats[] = {tcu::TextureFormat(tcu::TextureFormat::R, tcu::TextureFormat::UNSIGNED_INT32),
1979                                           tcu::TextureFormat(tcu::TextureFormat::R, tcu::TextureFormat::SIGNED_INT32),
1980                                           tcu::TextureFormat(tcu::TextureFormat::R, tcu::TextureFormat::FLOAT),
1981                                           tcu::TextureFormat(tcu::TextureFormat::R, tcu::TextureFormat::UNSIGNED_INT64),
1982                                           tcu::TextureFormat(tcu::TextureFormat::R, tcu::TextureFormat::SIGNED_INT64)};
1983 
1984     static const VkImageTiling s_tilings[] = {
1985         VK_IMAGE_TILING_OPTIMAL,
1986         VK_IMAGE_TILING_LINEAR,
1987     };
1988 
1989     const struct
1990     {
1991         ShaderReadType type;
1992         const char *name;
1993     } readTypes[] = {
1994         {ShaderReadType::NORMAL, "normal_read"},
1995 #ifndef CTS_USES_VULKANSC
1996         {ShaderReadType::SPARSE, "sparse_read"},
1997 #endif // CTS_USES_VULKANSC
1998     };
1999 
2000     const struct
2001     {
2002         ImageBackingType type;
2003         const char *name;
2004     } backingTypes[] = {
2005         {ImageBackingType::NORMAL, "normal_img"},
2006 #ifndef CTS_USES_VULKANSC
2007         {ImageBackingType::SPARSE, "sparse_img"},
2008 #endif // CTS_USES_VULKANSC
2009     };
2010 
2011     for (uint32_t operationI = 0; operationI < ATOMIC_OPERATION_LAST; operationI++)
2012     {
2013         const AtomicOperation operation = (AtomicOperation)operationI;
2014 
2015         de::MovePtr<tcu::TestCaseGroup> operationGroup(
2016             new tcu::TestCaseGroup(testCtx, getAtomicOperationCaseName(operation).c_str()));
2017 
2018         for (uint32_t imageTypeNdx = 0; imageTypeNdx < DE_LENGTH_OF_ARRAY(imageParamsArray); imageTypeNdx++)
2019         {
2020             const ImageType imageType  = imageParamsArray[imageTypeNdx].m_imageType;
2021             const tcu::UVec3 imageSize = imageParamsArray[imageTypeNdx].m_imageSize;
2022 
2023             de::MovePtr<tcu::TestCaseGroup> imageTypeGroup(
2024                 new tcu::TestCaseGroup(testCtx, getImageTypeName(imageType).c_str()));
2025 
2026             for (int useTransferIdx = 0; useTransferIdx < 2; ++useTransferIdx)
2027             {
2028                 const bool useTransfer = (useTransferIdx > 0);
2029                 const string groupName = (!useTransfer ? "no" : "") + string("transfer");
2030 
2031                 de::MovePtr<tcu::TestCaseGroup> transferGroup(new tcu::TestCaseGroup(testCtx, groupName.c_str()));
2032 
2033                 for (int readTypeIdx = 0; readTypeIdx < DE_LENGTH_OF_ARRAY(readTypes); ++readTypeIdx)
2034                 {
2035                     const auto &readType = readTypes[readTypeIdx];
2036 
2037                     de::MovePtr<tcu::TestCaseGroup> readTypeGroup(new tcu::TestCaseGroup(testCtx, readType.name));
2038 
2039                     for (int backingTypeIdx = 0; backingTypeIdx < DE_LENGTH_OF_ARRAY(backingTypes); ++backingTypeIdx)
2040                     {
2041                         const auto &backingType = backingTypes[backingTypeIdx];
2042 
2043                         de::MovePtr<tcu::TestCaseGroup> backingTypeGroup(
2044                             new tcu::TestCaseGroup(testCtx, backingType.name));
2045 
2046                         for (uint32_t formatNdx = 0; formatNdx < DE_LENGTH_OF_ARRAY(formats); formatNdx++)
2047                         {
2048                             for (int tilingNdx = 0; tilingNdx < DE_LENGTH_OF_ARRAY(s_tilings); tilingNdx++)
2049                             {
2050                                 const TextureFormat &format  = formats[formatNdx];
2051                                 const std::string formatName = getShaderImageFormatQualifier(format);
2052                                 const char *suffix = (s_tilings[tilingNdx] == VK_IMAGE_TILING_OPTIMAL) ? "" : "_linear";
2053 
2054                                 // Need SPIRV programs in vktImageAtomicSpirvShaders.cpp
2055                                 if (imageType == IMAGE_TYPE_BUFFER && (format.type != tcu::TextureFormat::FLOAT))
2056                                 {
2057                                     continue;
2058                                 }
2059 
2060                                 // Only 2D and 3D images may support sparse residency.
2061                                 // VK_IMAGE_TILING_LINEAR does not support sparse residency
2062                                 const auto vkImageType = mapImageType(imageType);
2063                                 if (backingType.type == ImageBackingType::SPARSE &&
2064                                     ((vkImageType != VK_IMAGE_TYPE_2D && vkImageType != VK_IMAGE_TYPE_3D) ||
2065                                      (s_tilings[tilingNdx] == VK_IMAGE_TILING_LINEAR)))
2066                                     continue;
2067 
2068                                 // Only some operations are supported on floating-point
2069                                 if (format.type == tcu::TextureFormat::FLOAT)
2070                                 {
2071                                     if (operation != ATOMIC_OPERATION_ADD &&
2072 #ifndef CTS_USES_VULKANSC
2073                                         operation != ATOMIC_OPERATION_MIN && operation != ATOMIC_OPERATION_MAX &&
2074 #endif // CTS_USES_VULKANSC
2075                                         operation != ATOMIC_OPERATION_EXCHANGE)
2076                                     {
2077                                         continue;
2078                                     }
2079                                 }
2080 
2081                                 if (readType.type == ShaderReadType::SPARSE)
2082                                 {
2083                                     // When using transfer, shader reads will not be used, so avoid creating two identical cases.
2084                                     if (useTransfer)
2085                                         continue;
2086 
2087                                     // Sparse reads are not supported for all types of images.
2088                                     if (imageType == IMAGE_TYPE_1D || imageType == IMAGE_TYPE_1D_ARRAY ||
2089                                         imageType == IMAGE_TYPE_BUFFER)
2090                                         continue;
2091                                 }
2092 
2093                                 //!< Atomic case checks the end result of the operations, and not the intermediate return values
2094                                 const string caseEndResult = formatName + "_end_result" + suffix;
2095                                 backingTypeGroup->addChild(new BinaryAtomicEndResultCase(
2096                                     testCtx, caseEndResult, imageType, imageSize, format, s_tilings[tilingNdx],
2097                                     operation, useTransfer, readType.type, backingType.type, glu::GLSL_VERSION_450));
2098 
2099                                 //!< Atomic case checks the return values of the atomic function and not the end result.
2100                                 const string caseIntermValues = formatName + "_intermediate_values" + suffix;
2101                                 backingTypeGroup->addChild(new BinaryAtomicIntermValuesCase(
2102                                     testCtx, caseIntermValues, imageType, imageSize, format, s_tilings[tilingNdx],
2103                                     operation, useTransfer, readType.type, backingType.type, glu::GLSL_VERSION_450));
2104                             }
2105                         }
2106 
2107                         readTypeGroup->addChild(backingTypeGroup.release());
2108                     }
2109 
2110                     transferGroup->addChild(readTypeGroup.release());
2111                 }
2112 
2113                 imageTypeGroup->addChild(transferGroup.release());
2114             }
2115 
2116             operationGroup->addChild(imageTypeGroup.release());
2117         }
2118 
2119         imageAtomicOperationsTests->addChild(operationGroup.release());
2120     }
2121 
2122     return imageAtomicOperationsTests.release();
2123 }
2124 
2125 } // namespace image
2126 } // namespace vkt
2127