1 /*------------------------------------------------------------------------
2 * Vulkan Conformance Tests
3 * ------------------------
4 *
5 * Copyright (c) 2016 The Khronos Group Inc.
6 *
7 * Licensed under the Apache License, Version 2.0 (the "License");
8 * you may not use this file except in compliance with the License.
9 * You may obtain a copy of the License at
10 *
11 * http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
18 *
19 *//*!
20 * \file vktImageAtomicOperationTests.cpp
21 * \brief Image atomic operation tests
22 *//*--------------------------------------------------------------------*/
23
24 #include "vktImageAtomicOperationTests.hpp"
25 #include "vktImageAtomicSpirvShaders.hpp"
26
27 #include "deUniquePtr.hpp"
28 #include "deStringUtil.hpp"
29 #include "deSTLUtil.hpp"
30
31 #include "vktTestCaseUtil.hpp"
32 #include "vkPrograms.hpp"
33 #include "vkImageUtil.hpp"
34 #include "vkQueryUtil.hpp"
35 #include "vkBarrierUtil.hpp"
36 #include "vktImageTestsUtil.hpp"
37 #include "vkBuilderUtil.hpp"
38 #include "vkRef.hpp"
39 #include "vkRefUtil.hpp"
40 #include "vkTypeUtil.hpp"
41 #include "vkCmdUtil.hpp"
42 #include "vkObjUtil.hpp"
43 #include "vkBufferWithMemory.hpp"
44
45 #include "tcuTextureUtil.hpp"
46 #include "tcuTexture.hpp"
47 #include "tcuVectorType.hpp"
48 #include "tcuStringTemplate.hpp"
49
50 namespace vkt
51 {
52 namespace image
53 {
54 namespace
55 {
56
57 using namespace vk;
58 using namespace std;
59 using de::toString;
60
61 using tcu::ConstPixelBufferAccess;
62 using tcu::CubeFace;
63 using tcu::IVec2;
64 using tcu::IVec3;
65 using tcu::IVec4;
66 using tcu::PixelBufferAccess;
67 using tcu::TestContext;
68 using tcu::Texture1D;
69 using tcu::Texture2D;
70 using tcu::Texture2DArray;
71 using tcu::Texture3D;
72 using tcu::TextureCube;
73 using tcu::TextureFormat;
74 using tcu::UVec3;
75 using tcu::UVec4;
76 using tcu::Vec4;
77 using tcu::Vector;
78
79 enum
80 {
81 NUM_INVOCATIONS_PER_PIXEL = 5u
82 };
83
84 enum AtomicOperation
85 {
86 ATOMIC_OPERATION_ADD = 0,
87 ATOMIC_OPERATION_SUB,
88 ATOMIC_OPERATION_INC,
89 ATOMIC_OPERATION_DEC,
90 ATOMIC_OPERATION_MIN,
91 ATOMIC_OPERATION_MAX,
92 ATOMIC_OPERATION_AND,
93 ATOMIC_OPERATION_OR,
94 ATOMIC_OPERATION_XOR,
95 ATOMIC_OPERATION_EXCHANGE,
96 ATOMIC_OPERATION_COMPARE_EXCHANGE,
97
98 ATOMIC_OPERATION_LAST
99 };
100
101 enum class ShaderReadType
102 {
103 NORMAL = 0,
104 SPARSE,
105 };
106
107 enum class ImageBackingType
108 {
109 NORMAL = 0,
110 SPARSE,
111 };
112
getCoordStr(const ImageType imageType,const std::string & x,const std::string & y,const std::string & z)113 static string getCoordStr(const ImageType imageType, const std::string &x, const std::string &y, const std::string &z)
114 {
115 switch (imageType)
116 {
117 case IMAGE_TYPE_1D:
118 case IMAGE_TYPE_BUFFER:
119 return x;
120 case IMAGE_TYPE_1D_ARRAY:
121 case IMAGE_TYPE_2D:
122 return string("ivec2(" + x + "," + y + ")");
123 case IMAGE_TYPE_2D_ARRAY:
124 case IMAGE_TYPE_3D:
125 case IMAGE_TYPE_CUBE:
126 case IMAGE_TYPE_CUBE_ARRAY:
127 return string("ivec3(" + x + "," + y + "," + z + ")");
128 default:
129 DE_ASSERT(false);
130 return "";
131 }
132 }
133
getComponentTypeStr(uint32_t componentWidth,bool intFormat,bool uintFormat,bool floatFormat)134 static string getComponentTypeStr(uint32_t componentWidth, bool intFormat, bool uintFormat, bool floatFormat)
135 {
136 DE_ASSERT(intFormat || uintFormat || floatFormat);
137
138 const bool is64 = (componentWidth == 64);
139
140 if (intFormat)
141 return (is64 ? "int64_t" : "int");
142 if (uintFormat)
143 return (is64 ? "uint64_t" : "uint");
144 if (floatFormat)
145 return (is64 ? "double" : "float");
146
147 return "";
148 }
149
getVec4TypeStr(uint32_t componentWidth,bool intFormat,bool uintFormat,bool floatFormat)150 static string getVec4TypeStr(uint32_t componentWidth, bool intFormat, bool uintFormat, bool floatFormat)
151 {
152 DE_ASSERT(intFormat || uintFormat || floatFormat);
153
154 const bool is64 = (componentWidth == 64);
155
156 if (intFormat)
157 return (is64 ? "i64vec4" : "ivec4");
158 if (uintFormat)
159 return (is64 ? "u64vec4" : "uvec4");
160 if (floatFormat)
161 return (is64 ? "f64vec4" : "vec4");
162
163 return "";
164 }
165
getAtomicFuncArgumentShaderStr(const AtomicOperation op,const string & x,const string & y,const string & z,const IVec3 & gridSize)166 static string getAtomicFuncArgumentShaderStr(const AtomicOperation op, const string &x, const string &y,
167 const string &z, const IVec3 &gridSize)
168 {
169 switch (op)
170 {
171 case ATOMIC_OPERATION_ADD:
172 case ATOMIC_OPERATION_AND:
173 case ATOMIC_OPERATION_OR:
174 case ATOMIC_OPERATION_XOR:
175 return string("(" + x + "*" + x + " + " + y + "*" + y + " + " + z + "*" + z + ")");
176 case ATOMIC_OPERATION_MIN:
177 case ATOMIC_OPERATION_MAX:
178 // multiply by (1-2*(value % 2) to make half of the data negative
179 // this will result in generating large numbers for uint formats
180 return string("((1 - 2*(" + x + " % 2)) * (" + x + "*" + x + " + " + y + "*" + y + " + " + z + "*" + z + "))");
181 case ATOMIC_OPERATION_EXCHANGE:
182 case ATOMIC_OPERATION_COMPARE_EXCHANGE:
183 return string("((" + z + "*" + toString(gridSize.x()) + " + " + x + ")*" + toString(gridSize.y()) + " + " + y +
184 ")");
185 default:
186 DE_ASSERT(false);
187 return "";
188 }
189 }
190
getAtomicOperationCaseName(const AtomicOperation op)191 static string getAtomicOperationCaseName(const AtomicOperation op)
192 {
193 switch (op)
194 {
195 case ATOMIC_OPERATION_ADD:
196 return string("add");
197 case ATOMIC_OPERATION_SUB:
198 return string("sub");
199 case ATOMIC_OPERATION_INC:
200 return string("inc");
201 case ATOMIC_OPERATION_DEC:
202 return string("dec");
203 case ATOMIC_OPERATION_MIN:
204 return string("min");
205 case ATOMIC_OPERATION_MAX:
206 return string("max");
207 case ATOMIC_OPERATION_AND:
208 return string("and");
209 case ATOMIC_OPERATION_OR:
210 return string("or");
211 case ATOMIC_OPERATION_XOR:
212 return string("xor");
213 case ATOMIC_OPERATION_EXCHANGE:
214 return string("exchange");
215 case ATOMIC_OPERATION_COMPARE_EXCHANGE:
216 return string("compare_exchange");
217 default:
218 DE_ASSERT(false);
219 return "";
220 }
221 }
222
getAtomicOperationShaderFuncName(const AtomicOperation op)223 static string getAtomicOperationShaderFuncName(const AtomicOperation op)
224 {
225 switch (op)
226 {
227 case ATOMIC_OPERATION_ADD:
228 return string("imageAtomicAdd");
229 case ATOMIC_OPERATION_MIN:
230 return string("imageAtomicMin");
231 case ATOMIC_OPERATION_MAX:
232 return string("imageAtomicMax");
233 case ATOMIC_OPERATION_AND:
234 return string("imageAtomicAnd");
235 case ATOMIC_OPERATION_OR:
236 return string("imageAtomicOr");
237 case ATOMIC_OPERATION_XOR:
238 return string("imageAtomicXor");
239 case ATOMIC_OPERATION_EXCHANGE:
240 return string("imageAtomicExchange");
241 case ATOMIC_OPERATION_COMPARE_EXCHANGE:
242 return string("imageAtomicCompSwap");
243 default:
244 DE_ASSERT(false);
245 return "";
246 }
247 }
248
249 template <typename T>
getOperationInitialValue(const AtomicOperation op)250 T getOperationInitialValue(const AtomicOperation op)
251 {
252 switch (op)
253 {
254 // \note 18 is just an arbitrary small nonzero value.
255 case ATOMIC_OPERATION_ADD:
256 return 18;
257 case ATOMIC_OPERATION_INC:
258 return 18;
259 case ATOMIC_OPERATION_SUB:
260 return (1 << 24) - 1;
261 case ATOMIC_OPERATION_DEC:
262 return (1 << 24) - 1;
263 case ATOMIC_OPERATION_MIN:
264 return (1 << 15) - 1;
265 case ATOMIC_OPERATION_MAX:
266 return 18;
267 case ATOMIC_OPERATION_AND:
268 return (1 << 15) - 1;
269 case ATOMIC_OPERATION_OR:
270 return 18;
271 case ATOMIC_OPERATION_XOR:
272 return 18;
273 case ATOMIC_OPERATION_EXCHANGE:
274 return 18;
275 case ATOMIC_OPERATION_COMPARE_EXCHANGE:
276 return 18;
277 default:
278 DE_ASSERT(false);
279 return 0xFFFFFFFF;
280 }
281 }
282
283 template <>
getOperationInitialValue(const AtomicOperation op)284 int64_t getOperationInitialValue<int64_t>(const AtomicOperation op)
285 {
286 switch (op)
287 {
288 // \note 0x000000BEFFFFFF18 is just an arbitrary nonzero value.
289 case ATOMIC_OPERATION_ADD:
290 return 0x000000BEFFFFFF18;
291 case ATOMIC_OPERATION_INC:
292 return 0x000000BEFFFFFF18;
293 case ATOMIC_OPERATION_SUB:
294 return (1ull << 56) - 1;
295 case ATOMIC_OPERATION_DEC:
296 return (1ull << 56) - 1;
297 case ATOMIC_OPERATION_MIN:
298 return (1ull << 47) - 1;
299 case ATOMIC_OPERATION_MAX:
300 return 0x000000BEFFFFFF18;
301 case ATOMIC_OPERATION_AND:
302 return (1ull << 47) - 1;
303 case ATOMIC_OPERATION_OR:
304 return 0x000000BEFFFFFF18;
305 case ATOMIC_OPERATION_XOR:
306 return 0x000000BEFFFFFF18;
307 case ATOMIC_OPERATION_EXCHANGE:
308 return 0x000000BEFFFFFF18;
309 case ATOMIC_OPERATION_COMPARE_EXCHANGE:
310 return 0x000000BEFFFFFF18;
311 default:
312 DE_ASSERT(false);
313 return 0xFFFFFFFFFFFFFFFF;
314 }
315 }
316
317 template <>
getOperationInitialValue(const AtomicOperation op)318 uint64_t getOperationInitialValue<uint64_t>(const AtomicOperation op)
319 {
320 return (uint64_t)getOperationInitialValue<int64_t>(op);
321 }
322
323 template <typename T>
getAtomicFuncArgument(const AtomicOperation op,const IVec3 & invocationID,const IVec3 & gridSize)324 static T getAtomicFuncArgument(const AtomicOperation op, const IVec3 &invocationID, const IVec3 &gridSize)
325 {
326 const T x = static_cast<T>(invocationID.x());
327 const T y = static_cast<T>(invocationID.y());
328 const T z = static_cast<T>(invocationID.z());
329
330 switch (op)
331 {
332 // \note Fall-throughs.
333 case ATOMIC_OPERATION_ADD:
334 case ATOMIC_OPERATION_SUB:
335 case ATOMIC_OPERATION_AND:
336 case ATOMIC_OPERATION_OR:
337 case ATOMIC_OPERATION_XOR:
338 return x * x + y * y + z * z;
339 case ATOMIC_OPERATION_INC:
340 case ATOMIC_OPERATION_DEC:
341 return 1;
342 case ATOMIC_OPERATION_MIN:
343 case ATOMIC_OPERATION_MAX:
344 // multiply half of the data by -1
345 return (1 - 2 * (x % 2)) * (x * x + y * y + z * z);
346 case ATOMIC_OPERATION_EXCHANGE:
347 case ATOMIC_OPERATION_COMPARE_EXCHANGE:
348 return (z * static_cast<T>(gridSize.x()) + x) * static_cast<T>(gridSize.y()) + y;
349 default:
350 DE_ASSERT(false);
351 return -1;
352 }
353 }
354
355 //! An order-independent operation is one for which the end result doesn't depend on the order in which the operations are carried (i.e. is both commutative and associative).
isOrderIndependentAtomicOperation(const AtomicOperation op)356 static bool isOrderIndependentAtomicOperation(const AtomicOperation op)
357 {
358 return op == ATOMIC_OPERATION_ADD || op == ATOMIC_OPERATION_SUB || op == ATOMIC_OPERATION_INC ||
359 op == ATOMIC_OPERATION_DEC || op == ATOMIC_OPERATION_MIN || op == ATOMIC_OPERATION_MAX ||
360 op == ATOMIC_OPERATION_AND || op == ATOMIC_OPERATION_OR || op == ATOMIC_OPERATION_XOR;
361 }
362
363 //! Checks if the operation needs an SPIR-V shader.
isSpirvAtomicOperation(const AtomicOperation op)364 static bool isSpirvAtomicOperation(const AtomicOperation op)
365 {
366 return op == ATOMIC_OPERATION_SUB || op == ATOMIC_OPERATION_INC || op == ATOMIC_OPERATION_DEC;
367 }
368
369 //! Returns the SPIR-V assembler name of the given operation.
getSpirvAtomicOpName(const AtomicOperation op)370 static std::string getSpirvAtomicOpName(const AtomicOperation op)
371 {
372 switch (op)
373 {
374 case ATOMIC_OPERATION_SUB:
375 return "OpAtomicISub";
376 case ATOMIC_OPERATION_INC:
377 return "OpAtomicIIncrement";
378 case ATOMIC_OPERATION_DEC:
379 return "OpAtomicIDecrement";
380 default:
381 break;
382 }
383
384 DE_ASSERT(false);
385 return "";
386 }
387
388 //! Returns true if the given SPIR-V operation does not need the last argument, compared to OpAtomicIAdd.
isSpirvAtomicNoLastArgOp(const AtomicOperation op)389 static bool isSpirvAtomicNoLastArgOp(const AtomicOperation op)
390 {
391 switch (op)
392 {
393 case ATOMIC_OPERATION_SUB:
394 return false;
395 case ATOMIC_OPERATION_INC: // fallthrough
396 case ATOMIC_OPERATION_DEC:
397 return true;
398 default:
399 break;
400 }
401
402 DE_ASSERT(false);
403 return false;
404 }
405
406 //! Computes the result of an atomic operation where "a" is the data operated on and "b" is the parameter to the atomic function.
407 template <typename T>
computeBinaryAtomicOperationResult(const AtomicOperation op,const T a,const T b)408 static T computeBinaryAtomicOperationResult(const AtomicOperation op, const T a, const T b)
409 {
410 switch (op)
411 {
412 case ATOMIC_OPERATION_INC: // fallthrough.
413 case ATOMIC_OPERATION_ADD:
414 return a + b;
415 case ATOMIC_OPERATION_DEC: // fallthrough.
416 case ATOMIC_OPERATION_SUB:
417 return a - b;
418 case ATOMIC_OPERATION_MIN:
419 return de::min(a, b);
420 case ATOMIC_OPERATION_MAX:
421 return de::max(a, b);
422 case ATOMIC_OPERATION_AND:
423 return a & b;
424 case ATOMIC_OPERATION_OR:
425 return a | b;
426 case ATOMIC_OPERATION_XOR:
427 return a ^ b;
428 case ATOMIC_OPERATION_EXCHANGE:
429 return b;
430 case ATOMIC_OPERATION_COMPARE_EXCHANGE:
431 return (a == (sizeof(T) == 8 ? 0xBEFFFFFF18 : 18)) ? b : a;
432 default:
433 DE_ASSERT(false);
434 return -1;
435 }
436 }
437
getUsageFlags(bool useTransfer)438 VkImageUsageFlags getUsageFlags(bool useTransfer)
439 {
440 VkImageUsageFlags usageFlags = VK_IMAGE_USAGE_STORAGE_BIT;
441
442 if (useTransfer)
443 usageFlags |= (VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT);
444
445 return usageFlags;
446 }
447
AddFillReadShader(SourceCollections & sourceCollections,const ImageType & imageType,const tcu::TextureFormat & format,const string & componentType,const string & vec4Type)448 void AddFillReadShader(SourceCollections &sourceCollections, const ImageType &imageType,
449 const tcu::TextureFormat &format, const string &componentType, const string &vec4Type)
450 {
451 const string imageInCoord = getCoordStr(imageType, "gx", "gy", "gz");
452 const string shaderImageFormatStr = getShaderImageFormatQualifier(format);
453 const string shaderImageTypeStr = getShaderImageType(format, imageType);
454 const auto componentWidth = getFormatComponentWidth(mapTextureFormat(format), 0u);
455 const string extensions =
456 ((componentWidth == 64u) ? "#extension GL_EXT_shader_explicit_arithmetic_types_int64 : require\n"
457 "#extension GL_EXT_shader_image_int64 : require\n" :
458 "");
459
460 const string fillShader =
461 "#version 450\n" + extensions + "precision highp " + shaderImageTypeStr +
462 ";\n"
463 "\n"
464 "layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
465 "layout (" +
466 shaderImageFormatStr + ", binding=0) coherent uniform " + shaderImageTypeStr +
467 " u_resultImage;\n"
468 "\n"
469 "layout(std430, binding = 1) buffer inputBuffer\n"
470 "{\n"
471 " " +
472 componentType +
473 " data[];\n"
474 "} inBuffer;\n"
475 "\n"
476 "void main(void)\n"
477 "{\n"
478 " int gx = int(gl_GlobalInvocationID.x);\n"
479 " int gy = int(gl_GlobalInvocationID.y);\n"
480 " int gz = int(gl_GlobalInvocationID.z);\n"
481 " uint index = gx + (gy * gl_NumWorkGroups.x) + (gz *gl_NumWorkGroups.x * gl_NumWorkGroups.y);\n"
482 " imageStore(u_resultImage, " +
483 imageInCoord + ", " + vec4Type +
484 "(inBuffer.data[index]));\n"
485 "}\n";
486
487 const string readShader =
488 "#version 450\n" + extensions + "precision highp " + shaderImageTypeStr +
489 ";\n"
490 "\n"
491 "layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
492 "layout (" +
493 shaderImageFormatStr + ", binding=0) coherent uniform " + shaderImageTypeStr +
494 " u_resultImage;\n"
495 "\n"
496 "layout(std430, binding = 1) buffer outputBuffer\n"
497 "{\n"
498 " " +
499 componentType +
500 " data[];\n"
501 "} outBuffer;\n"
502 "\n"
503 "void main(void)\n"
504 "{\n"
505 " int gx = int(gl_GlobalInvocationID.x);\n"
506 " int gy = int(gl_GlobalInvocationID.y);\n"
507 " int gz = int(gl_GlobalInvocationID.z);\n"
508 " uint index = gx + (gy * gl_NumWorkGroups.x) + (gz *gl_NumWorkGroups.x * gl_NumWorkGroups.y);\n"
509 " outBuffer.data[index] = imageLoad(u_resultImage, " +
510 imageInCoord +
511 ").x;\n"
512 "}\n";
513
514 if ((imageType != IMAGE_TYPE_1D) && (imageType != IMAGE_TYPE_1D_ARRAY) && (imageType != IMAGE_TYPE_BUFFER))
515 {
516 const string readShaderResidency =
517 "#version 450\n"
518 "#extension GL_ARB_sparse_texture2 : require\n" +
519 extensions + "precision highp " + shaderImageTypeStr +
520 ";\n"
521 "\n"
522 "layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
523 "layout (" +
524 shaderImageFormatStr + ", binding=0) coherent uniform " + shaderImageTypeStr +
525 " u_resultImage;\n"
526 "\n"
527 "layout(std430, binding = 1) buffer outputBuffer\n"
528 "{\n"
529 " " +
530 componentType +
531 " data[];\n"
532 "} outBuffer;\n"
533 "\n"
534 "void main(void)\n"
535 "{\n"
536 " int gx = int(gl_GlobalInvocationID.x);\n"
537 " int gy = int(gl_GlobalInvocationID.y);\n"
538 " int gz = int(gl_GlobalInvocationID.z);\n"
539 " uint index = gx + (gy * gl_NumWorkGroups.x) + (gz *gl_NumWorkGroups.x * gl_NumWorkGroups.y);\n"
540 " outBuffer.data[index] = imageLoad(u_resultImage, " +
541 imageInCoord +
542 ").x;\n"
543 " " +
544 vec4Type +
545 " sparseValue;\n"
546 " sparseImageLoadARB(u_resultImage, " +
547 imageInCoord +
548 ", sparseValue);\n"
549 " if (outBuffer.data[index] != sparseValue.x)\n"
550 " outBuffer.data[index] = " +
551 vec4Type +
552 "(1234).x;\n"
553 "}\n";
554
555 sourceCollections.glslSources.add("readShaderResidency")
556 << glu::ComputeSource(readShaderResidency.c_str())
557 << vk::ShaderBuildOptions(sourceCollections.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
558 }
559
560 sourceCollections.glslSources.add("fillShader")
561 << glu::ComputeSource(fillShader.c_str())
562 << vk::ShaderBuildOptions(sourceCollections.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
563 sourceCollections.glslSources.add("readShader")
564 << glu::ComputeSource(readShader.c_str())
565 << vk::ShaderBuildOptions(sourceCollections.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
566 }
567
568 //! Prepare the initial data for the image
initDataForImage(const VkDevice device,const DeviceInterface & deviceInterface,const TextureFormat & format,const AtomicOperation operation,const tcu::UVec3 & gridSize,BufferWithMemory & buffer)569 static void initDataForImage(const VkDevice device, const DeviceInterface &deviceInterface, const TextureFormat &format,
570 const AtomicOperation operation, const tcu::UVec3 &gridSize, BufferWithMemory &buffer)
571 {
572 Allocation &bufferAllocation = buffer.getAllocation();
573 const VkFormat imageFormat = mapTextureFormat(format);
574 tcu::PixelBufferAccess pixelBuffer(format, gridSize.x(), gridSize.y(), gridSize.z(), bufferAllocation.getHostPtr());
575
576 if (imageFormat == VK_FORMAT_R64_UINT || imageFormat == VK_FORMAT_R64_SINT)
577 {
578 const int64_t initialValue(getOperationInitialValue<int64_t>(operation));
579
580 for (uint32_t z = 0; z < gridSize.z(); z++)
581 for (uint32_t y = 0; y < gridSize.y(); y++)
582 for (uint32_t x = 0; x < gridSize.x(); x++)
583 {
584 *((int64_t *)pixelBuffer.getPixelPtr(x, y, z)) = initialValue;
585 }
586 }
587 else
588 {
589 const tcu::IVec4 initialValue(getOperationInitialValue<int32_t>(operation));
590
591 for (uint32_t z = 0; z < gridSize.z(); z++)
592 for (uint32_t y = 0; y < gridSize.y(); y++)
593 for (uint32_t x = 0; x < gridSize.x(); x++)
594 {
595 pixelBuffer.setPixel(initialValue, x, y, z);
596 }
597 }
598
599 flushAlloc(deviceInterface, device, bufferAllocation);
600 }
601
commonCheckSupport(Context & context,const tcu::TextureFormat & tcuFormat,VkImageTiling tiling,ImageType imageType,const tcu::UVec3 & imageSize,AtomicOperation operation,bool useTransfer,ShaderReadType readType,ImageBackingType backingType)602 void commonCheckSupport(Context &context, const tcu::TextureFormat &tcuFormat, VkImageTiling tiling,
603 ImageType imageType, const tcu::UVec3 &imageSize, AtomicOperation operation, bool useTransfer,
604 ShaderReadType readType, ImageBackingType backingType)
605 {
606 const VkFormat format = mapTextureFormat(tcuFormat);
607 const VkImageType vkImgType = mapImageType(imageType);
608 const VkFormatFeatureFlags texelBufferSupport =
609 (VK_FORMAT_FEATURE_STORAGE_TEXEL_BUFFER_BIT | VK_FORMAT_FEATURE_STORAGE_TEXEL_BUFFER_ATOMIC_BIT);
610
611 const auto &vki = context.getInstanceInterface();
612 const auto physicalDevice = context.getPhysicalDevice();
613 const auto usageFlags = getUsageFlags(useTransfer);
614
615 VkImageFormatProperties vkImageFormatProperties;
616 const auto result = vki.getPhysicalDeviceImageFormatProperties(physicalDevice, format, vkImgType, tiling,
617 usageFlags, 0, &vkImageFormatProperties);
618 if (result != VK_SUCCESS)
619 {
620 if (result == VK_ERROR_FORMAT_NOT_SUPPORTED)
621 TCU_THROW(NotSupportedError, "Format unsupported for tiling");
622 else
623 TCU_FAIL("vkGetPhysicalDeviceImageFormatProperties returned unexpected error");
624 }
625
626 if (vkImageFormatProperties.maxArrayLayers < (uint32_t)getNumLayers(imageType, imageSize))
627 {
628 TCU_THROW(NotSupportedError, "This format and tiling combination does not support this number of aray layers");
629 }
630
631 const VkFormatProperties formatProperties =
632 getPhysicalDeviceFormatProperties(context.getInstanceInterface(), context.getPhysicalDevice(), format);
633 if ((imageType == IMAGE_TYPE_BUFFER) &&
634 ((formatProperties.bufferFeatures & texelBufferSupport) != texelBufferSupport))
635 TCU_THROW(NotSupportedError, "Atomic storage texel buffers not supported");
636
637 const VkFormatFeatureFlags requiredFeaturesLinear =
638 (VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT | VK_FORMAT_FEATURE_STORAGE_IMAGE_ATOMIC_BIT);
639 if (tiling == vk::VK_IMAGE_TILING_LINEAR &&
640 ((formatProperties.linearTilingFeatures & requiredFeaturesLinear) != requiredFeaturesLinear))
641 {
642 TCU_THROW(NotSupportedError, "Format doesn't support atomic storage with linear tiling");
643 }
644
645 if (imageType == IMAGE_TYPE_CUBE_ARRAY)
646 context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_IMAGE_CUBE_ARRAY);
647
648 #ifndef CTS_USES_VULKANSC
649 if (backingType == ImageBackingType::SPARSE)
650 {
651 context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_SPARSE_BINDING);
652
653 switch (vkImgType)
654 {
655 case VK_IMAGE_TYPE_2D:
656 context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_SPARSE_RESIDENCY_IMAGE2D);
657 break;
658 case VK_IMAGE_TYPE_3D:
659 context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_SPARSE_RESIDENCY_IMAGE3D);
660 break;
661 default:
662 DE_ASSERT(false);
663 break;
664 }
665
666 if (!checkSparseImageFormatSupport(context.getPhysicalDevice(), context.getInstanceInterface(), format,
667 vkImgType, VK_SAMPLE_COUNT_1_BIT, usageFlags, tiling))
668 TCU_THROW(NotSupportedError, "Format does not support sparse images");
669 }
670 #endif // CTS_USES_VULKANSC
671
672 if (isFloatFormat(format))
673 {
674 context.requireDeviceFunctionality("VK_EXT_shader_atomic_float");
675
676 const VkFormatFeatureFlags requiredFeatures =
677 (VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT | VK_FORMAT_FEATURE_STORAGE_IMAGE_ATOMIC_BIT);
678 const auto &atomicFloatFeatures = context.getShaderAtomicFloatFeaturesEXT();
679
680 if (!atomicFloatFeatures.shaderImageFloat32Atomics)
681 TCU_THROW(NotSupportedError, "shaderImageFloat32Atomics not supported");
682
683 if ((operation == ATOMIC_OPERATION_ADD) && !atomicFloatFeatures.shaderImageFloat32AtomicAdd)
684 TCU_THROW(NotSupportedError, "shaderImageFloat32AtomicAdd not supported");
685
686 if (operation == ATOMIC_OPERATION_MIN || operation == ATOMIC_OPERATION_MAX)
687 {
688 context.requireDeviceFunctionality("VK_EXT_shader_atomic_float2");
689 #ifndef CTS_USES_VULKANSC
690 if (!context.getShaderAtomicFloat2FeaturesEXT().shaderImageFloat32AtomicMinMax)
691 {
692 TCU_THROW(NotSupportedError, "shaderImageFloat32AtomicMinMax not supported");
693 }
694 #endif // CTS_USES_VULKANSC
695 }
696
697 if ((formatProperties.optimalTilingFeatures & requiredFeatures) != requiredFeatures)
698 TCU_FAIL("Required format feature bits not supported");
699
700 if (backingType == ImageBackingType::SPARSE)
701 {
702 if (!atomicFloatFeatures.sparseImageFloat32Atomics)
703 TCU_THROW(NotSupportedError, "sparseImageFloat32Atomics not supported");
704
705 if (operation == ATOMIC_OPERATION_ADD && !atomicFloatFeatures.sparseImageFloat32AtomicAdd)
706 TCU_THROW(NotSupportedError, "sparseImageFloat32AtomicAdd not supported");
707 }
708 }
709 else if (format == VK_FORMAT_R64_UINT || format == VK_FORMAT_R64_SINT)
710 {
711 context.requireDeviceFunctionality("VK_EXT_shader_image_atomic_int64");
712
713 const VkFormatFeatureFlags requiredFeatures =
714 (VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT | VK_FORMAT_FEATURE_STORAGE_IMAGE_ATOMIC_BIT);
715 const auto &atomicInt64Features = context.getShaderImageAtomicInt64FeaturesEXT();
716
717 if (!atomicInt64Features.shaderImageInt64Atomics)
718 TCU_THROW(NotSupportedError, "shaderImageInt64Atomics not supported");
719
720 if (backingType == ImageBackingType::SPARSE && !atomicInt64Features.sparseImageInt64Atomics)
721 TCU_THROW(NotSupportedError, "sparseImageInt64Atomics not supported");
722
723 if ((formatProperties.optimalTilingFeatures & requiredFeatures) != requiredFeatures)
724 TCU_FAIL("Mandatory format features not supported");
725 }
726
727 if (useTransfer)
728 {
729 const VkFormatFeatureFlags transferFeatures =
730 (VK_FORMAT_FEATURE_TRANSFER_SRC_BIT | VK_FORMAT_FEATURE_TRANSFER_DST_BIT);
731 if ((formatProperties.optimalTilingFeatures & transferFeatures) != transferFeatures)
732 TCU_THROW(NotSupportedError, "Transfer features not supported for this format");
733 }
734
735 if (readType == ShaderReadType::SPARSE)
736 {
737 DE_ASSERT(imageType != IMAGE_TYPE_1D && imageType != IMAGE_TYPE_1D_ARRAY && imageType != IMAGE_TYPE_BUFFER);
738 context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_SHADER_RESOURCE_RESIDENCY);
739 }
740 }
741
742 class BinaryAtomicEndResultCase : public vkt::TestCase
743 {
744 public:
745 BinaryAtomicEndResultCase(tcu::TestContext &testCtx, const string &name, const ImageType imageType,
746 const tcu::UVec3 &imageSize, const tcu::TextureFormat &format, const VkImageTiling tiling,
747 const AtomicOperation operation, const bool useTransfer,
748 const ShaderReadType shaderReadType, const ImageBackingType backingType,
749 const glu::GLSLVersion glslVersion);
750
751 void initPrograms(SourceCollections &sourceCollections) const;
752 TestInstance *createInstance(Context &context) const;
753 virtual void checkSupport(Context &context) const;
754
755 private:
756 const ImageType m_imageType;
757 const tcu::UVec3 m_imageSize;
758 const tcu::TextureFormat m_format;
759 const VkImageTiling m_tiling;
760 const AtomicOperation m_operation;
761 const bool m_useTransfer;
762 const ShaderReadType m_readType;
763 const ImageBackingType m_backingType;
764 const glu::GLSLVersion m_glslVersion;
765 };
766
BinaryAtomicEndResultCase(tcu::TestContext & testCtx,const string & name,const ImageType imageType,const tcu::UVec3 & imageSize,const tcu::TextureFormat & format,const VkImageTiling tiling,const AtomicOperation operation,const bool useTransfer,const ShaderReadType shaderReadType,const ImageBackingType backingType,const glu::GLSLVersion glslVersion)767 BinaryAtomicEndResultCase::BinaryAtomicEndResultCase(tcu::TestContext &testCtx, const string &name,
768 const ImageType imageType, const tcu::UVec3 &imageSize,
769 const tcu::TextureFormat &format, const VkImageTiling tiling,
770 const AtomicOperation operation, const bool useTransfer,
771 const ShaderReadType shaderReadType,
772 const ImageBackingType backingType,
773 const glu::GLSLVersion glslVersion)
774 : TestCase(testCtx, name)
775 , m_imageType(imageType)
776 , m_imageSize(imageSize)
777 , m_format(format)
778 , m_tiling(tiling)
779 , m_operation(operation)
780 , m_useTransfer(useTransfer)
781 , m_readType(shaderReadType)
782 , m_backingType(backingType)
783 , m_glslVersion(glslVersion)
784 {
785 }
786
checkSupport(Context & context) const787 void BinaryAtomicEndResultCase::checkSupport(Context &context) const
788 {
789 commonCheckSupport(context, m_format, m_tiling, m_imageType, m_imageSize, m_operation, m_useTransfer, m_readType,
790 m_backingType);
791 }
792
initPrograms(SourceCollections & sourceCollections) const793 void BinaryAtomicEndResultCase::initPrograms(SourceCollections &sourceCollections) const
794 {
795 const VkFormat imageFormat = mapTextureFormat(m_format);
796 const uint32_t componentWidth = getFormatComponentWidth(imageFormat, 0);
797 const bool intFormat = isIntFormat(imageFormat);
798 const bool uintFormat = isUintFormat(imageFormat);
799 const bool floatFormat = isFloatFormat(imageFormat);
800 const string type = getComponentTypeStr(componentWidth, intFormat, uintFormat, floatFormat);
801 const string vec4Type = getVec4TypeStr(componentWidth, intFormat, uintFormat, floatFormat);
802
803 AddFillReadShader(sourceCollections, m_imageType, m_format, type, vec4Type);
804
805 if (isSpirvAtomicOperation(m_operation))
806 {
807 const CaseVariant caseVariant{m_imageType, m_format.order, m_format.type, CaseVariant::CHECK_TYPE_END_RESULTS};
808 const tcu::StringTemplate shaderTemplate{getSpirvAtomicOpShader(caseVariant)};
809 std::map<std::string, std::string> specializations;
810
811 specializations["OPNAME"] = getSpirvAtomicOpName(m_operation);
812 if (isSpirvAtomicNoLastArgOp(m_operation))
813 specializations["LASTARG"] = "";
814
815 sourceCollections.spirvAsmSources.add(m_name) << shaderTemplate.specialize(specializations);
816 }
817 else
818 {
819 const string versionDecl = glu::getGLSLVersionDeclaration(m_glslVersion);
820
821 const UVec3 gridSize = getShaderGridSize(m_imageType, m_imageSize);
822 const string atomicCoord = getCoordStr(m_imageType, "gx % " + toString(gridSize.x()), "gy", "gz");
823
824 const string atomicArgExpr =
825 type +
826 getAtomicFuncArgumentShaderStr(m_operation, "gx", "gy", "gz",
827 IVec3(NUM_INVOCATIONS_PER_PIXEL * gridSize.x(), gridSize.y(), gridSize.z()));
828
829 const string compareExchangeStr = (m_operation == ATOMIC_OPERATION_COMPARE_EXCHANGE) ?
830 (componentWidth == 64 ? ", 820338753304" : ", 18") +
831 string(uintFormat ? "u" : "") +
832 string(componentWidth == 64 ? "l" : "") :
833 "";
834 const string atomicInvocation = getAtomicOperationShaderFuncName(m_operation) + "(u_resultImage, " +
835 atomicCoord + compareExchangeStr + ", " + atomicArgExpr + ")";
836 const string shaderImageFormatStr = getShaderImageFormatQualifier(m_format);
837 const string shaderImageTypeStr = getShaderImageType(m_format, m_imageType);
838 const string extensions = "#extension GL_EXT_shader_atomic_float : enable\n"
839 "#extension GL_EXT_shader_atomic_float2 : enable\n"
840 "#extension GL_KHR_memory_scope_semantics : enable";
841
842 string source = versionDecl + "\n" + extensions + "\n";
843
844 if (64 == componentWidth)
845 {
846 source += "#extension GL_EXT_shader_explicit_arithmetic_types_int64 : require\n"
847 "#extension GL_EXT_shader_image_int64 : require\n";
848 }
849
850 source += "precision highp " + shaderImageTypeStr +
851 ";\n"
852 "\n"
853 "layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
854 "layout (" +
855 shaderImageFormatStr + ", binding=0) coherent uniform " + shaderImageTypeStr +
856 " u_resultImage;\n"
857 "\n"
858 "void main (void)\n"
859 "{\n"
860 " int gx = int(gl_GlobalInvocationID.x);\n"
861 " int gy = int(gl_GlobalInvocationID.y);\n"
862 " int gz = int(gl_GlobalInvocationID.z);\n"
863 " " +
864 atomicInvocation +
865 ";\n"
866 "}\n";
867
868 sourceCollections.glslSources.add(m_name) << glu::ComputeSource(source.c_str());
869 }
870 }
871
872 class BinaryAtomicIntermValuesCase : public vkt::TestCase
873 {
874 public:
875 BinaryAtomicIntermValuesCase(tcu::TestContext &testCtx, const string &name, const ImageType imageType,
876 const tcu::UVec3 &imageSize, const tcu::TextureFormat &format,
877 const VkImageTiling tiling, const AtomicOperation operation, const bool useTransfer,
878 const ShaderReadType shaderReadType, const ImageBackingType backingType,
879 const glu::GLSLVersion glslVersion);
880
881 void initPrograms(SourceCollections &sourceCollections) const;
882 TestInstance *createInstance(Context &context) const;
883 virtual void checkSupport(Context &context) const;
884
885 private:
886 const ImageType m_imageType;
887 const tcu::UVec3 m_imageSize;
888 const tcu::TextureFormat m_format;
889 const VkImageTiling m_tiling;
890 const AtomicOperation m_operation;
891 const bool m_useTransfer;
892 const ShaderReadType m_readType;
893 const ImageBackingType m_backingType;
894 const glu::GLSLVersion m_glslVersion;
895 };
896
BinaryAtomicIntermValuesCase(TestContext & testCtx,const string & name,const ImageType imageType,const tcu::UVec3 & imageSize,const TextureFormat & format,const VkImageTiling tiling,const AtomicOperation operation,const bool useTransfer,const ShaderReadType shaderReadType,const ImageBackingType backingType,const glu::GLSLVersion glslVersion)897 BinaryAtomicIntermValuesCase::BinaryAtomicIntermValuesCase(
898 TestContext &testCtx, const string &name, const ImageType imageType, const tcu::UVec3 &imageSize,
899 const TextureFormat &format, const VkImageTiling tiling, const AtomicOperation operation, const bool useTransfer,
900 const ShaderReadType shaderReadType, const ImageBackingType backingType, const glu::GLSLVersion glslVersion)
901 : TestCase(testCtx, name)
902 , m_imageType(imageType)
903 , m_imageSize(imageSize)
904 , m_format(format)
905 , m_tiling(tiling)
906 , m_operation(operation)
907 , m_useTransfer(useTransfer)
908 , m_readType(shaderReadType)
909 , m_backingType(backingType)
910 , m_glslVersion(glslVersion)
911 {
912 }
913
checkSupport(Context & context) const914 void BinaryAtomicIntermValuesCase::checkSupport(Context &context) const
915 {
916 commonCheckSupport(context, m_format, m_tiling, m_imageType, m_imageSize, m_operation, m_useTransfer, m_readType,
917 m_backingType);
918 }
919
initPrograms(SourceCollections & sourceCollections) const920 void BinaryAtomicIntermValuesCase::initPrograms(SourceCollections &sourceCollections) const
921 {
922 const VkFormat imageFormat = mapTextureFormat(m_format);
923 const uint32_t componentWidth = getFormatComponentWidth(imageFormat, 0);
924 const bool intFormat = isIntFormat(imageFormat);
925 const bool uintFormat = isUintFormat(imageFormat);
926 const bool floatFormat = isFloatFormat(imageFormat);
927 const string type = getComponentTypeStr(componentWidth, intFormat, uintFormat, floatFormat);
928 const string vec4Type = getVec4TypeStr(componentWidth, intFormat, uintFormat, floatFormat);
929
930 AddFillReadShader(sourceCollections, m_imageType, m_format, type, vec4Type);
931
932 if (isSpirvAtomicOperation(m_operation))
933 {
934 const CaseVariant caseVariant{m_imageType, m_format.order, m_format.type,
935 CaseVariant::CHECK_TYPE_INTERMEDIATE_RESULTS};
936 const tcu::StringTemplate shaderTemplate{getSpirvAtomicOpShader(caseVariant)};
937 std::map<std::string, std::string> specializations;
938
939 specializations["OPNAME"] = getSpirvAtomicOpName(m_operation);
940 if (isSpirvAtomicNoLastArgOp(m_operation))
941 specializations["LASTARG"] = "";
942
943 sourceCollections.spirvAsmSources.add(m_name) << shaderTemplate.specialize(specializations);
944 }
945 else
946 {
947 const string versionDecl = glu::getGLSLVersionDeclaration(m_glslVersion);
948 const UVec3 gridSize = getShaderGridSize(m_imageType, m_imageSize);
949 const string atomicCoord = getCoordStr(m_imageType, "gx % " + toString(gridSize.x()), "gy", "gz");
950 const string invocationCoord = getCoordStr(m_imageType, "gx", "gy", "gz");
951 const string atomicArgExpr =
952 type +
953 getAtomicFuncArgumentShaderStr(m_operation, "gx", "gy", "gz",
954 IVec3(NUM_INVOCATIONS_PER_PIXEL * gridSize.x(), gridSize.y(), gridSize.z()));
955
956 const string compareExchangeStr = (m_operation == ATOMIC_OPERATION_COMPARE_EXCHANGE) ?
957 (componentWidth == 64 ? ", 820338753304" : ", 18") +
958 string(uintFormat ? "u" : "") +
959 string(componentWidth == 64 ? "l" : "") :
960 "";
961 const string atomicInvocation = getAtomicOperationShaderFuncName(m_operation) + "(u_resultImage, " +
962 atomicCoord + compareExchangeStr + ", " + atomicArgExpr + ")";
963 const string shaderImageFormatStr = getShaderImageFormatQualifier(m_format);
964 const string shaderImageTypeStr = getShaderImageType(m_format, m_imageType);
965 const string extensions = "#extension GL_EXT_shader_atomic_float : enable\n"
966 "#extension GL_EXT_shader_atomic_float2 : enable\n"
967 "#extension GL_KHR_memory_scope_semantics : enable";
968
969 string source = versionDecl + "\n" + extensions +
970 "\n"
971 "\n";
972
973 if (64 == componentWidth)
974 {
975 source += "#extension GL_EXT_shader_explicit_arithmetic_types_int64 : require\n"
976 "#extension GL_EXT_shader_image_int64 : require\n";
977 }
978
979 source += "precision highp " + shaderImageTypeStr +
980 "; \n"
981 "layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
982 "layout (" +
983 shaderImageFormatStr + ", binding=0) coherent uniform " + shaderImageTypeStr +
984 " u_resultImage;\n"
985 "layout (" +
986 shaderImageFormatStr + ", binding=1) writeonly uniform " + shaderImageTypeStr +
987 " u_intermValuesImage;\n"
988 "\n"
989 "void main (void)\n"
990 "{\n"
991 " int gx = int(gl_GlobalInvocationID.x);\n"
992 " int gy = int(gl_GlobalInvocationID.y);\n"
993 " int gz = int(gl_GlobalInvocationID.z);\n"
994 " imageStore(u_intermValuesImage, " +
995 invocationCoord + ", " + vec4Type + "(" + atomicInvocation +
996 "));\n"
997 "}\n";
998
999 sourceCollections.glslSources.add(m_name) << glu::ComputeSource(source.c_str());
1000 }
1001 }
1002
1003 class BinaryAtomicInstanceBase : public vkt::TestInstance
1004 {
1005 public:
1006 BinaryAtomicInstanceBase(Context &context, const string &name, const ImageType imageType,
1007 const tcu::UVec3 &imageSize, const TextureFormat &format, const VkImageTiling tiling,
1008 const AtomicOperation operation, const bool useTransfer,
1009 const ShaderReadType shaderReadType, const ImageBackingType backingType);
1010
1011 tcu::TestStatus iterate(void);
1012
1013 virtual uint32_t getOutputBufferSize(void) const = 0;
1014
1015 virtual void prepareResources(const bool useTransfer) = 0;
1016 virtual void prepareDescriptors(const bool isTexelBuffer) = 0;
1017
1018 virtual void commandsBeforeCompute(const VkCommandBuffer cmdBuffer) const = 0;
1019 virtual void commandsAfterCompute(const VkCommandBuffer cmdBuffer, const VkPipeline pipeline,
1020 const VkPipelineLayout pipelineLayout, const VkDescriptorSet descriptorSet,
1021 const VkDeviceSize &range, const bool useTransfer) = 0;
1022
1023 virtual bool verifyResult(Allocation &outputBufferAllocation, const bool is64Bit) const = 0;
1024
1025 protected:
1026 void shaderFillImage(const VkCommandBuffer cmdBuffer, const VkBuffer &buffer, const VkPipeline pipeline,
1027 const VkPipelineLayout pipelineLayout, const VkDescriptorSet descriptorSet,
1028 const VkDeviceSize &range, const tcu::UVec3 &gridSize);
1029
1030 void createImageAndView(VkFormat imageFormat, const tcu::UVec3 &imageExent, bool useTransfer,
1031 de::MovePtr<Image> &imagePtr, Move<VkImageView> &imageViewPtr);
1032
1033 void createImageResources(const VkFormat &imageFormat, const bool useTransfer);
1034
1035 const string m_name;
1036 const ImageType m_imageType;
1037 const tcu::UVec3 m_imageSize;
1038 const TextureFormat m_format;
1039 const VkImageTiling m_tiling;
1040 const AtomicOperation m_operation;
1041 const bool m_useTransfer;
1042 const ShaderReadType m_readType;
1043 const ImageBackingType m_backingType;
1044
1045 de::MovePtr<BufferWithMemory> m_inputBuffer;
1046 de::MovePtr<BufferWithMemory> m_outputBuffer;
1047 Move<VkBufferView> m_descResultBufferView;
1048 Move<VkBufferView> m_descIntermResultsBufferView;
1049 Move<VkDescriptorPool> m_descriptorPool;
1050 Move<VkDescriptorSetLayout> m_descriptorSetLayout;
1051 Move<VkDescriptorSet> m_descriptorSet;
1052
1053 Move<VkDescriptorSetLayout> m_descriptorSetLayoutNoTransfer;
1054 Move<VkDescriptorPool> m_descriptorPoolNoTransfer;
1055
1056 de::MovePtr<Image> m_resultImage;
1057 Move<VkImageView> m_resultImageView;
1058
1059 std::vector<VkSemaphore> m_waitSemaphores;
1060 };
1061
BinaryAtomicInstanceBase(Context & context,const string & name,const ImageType imageType,const tcu::UVec3 & imageSize,const TextureFormat & format,const VkImageTiling tiling,const AtomicOperation operation,const bool useTransfer,const ShaderReadType shaderReadType,const ImageBackingType backingType)1062 BinaryAtomicInstanceBase::BinaryAtomicInstanceBase(Context &context, const string &name, const ImageType imageType,
1063 const tcu::UVec3 &imageSize, const TextureFormat &format,
1064 const VkImageTiling tiling, const AtomicOperation operation,
1065 const bool useTransfer, const ShaderReadType shaderReadType,
1066 const ImageBackingType backingType)
1067 : vkt::TestInstance(context)
1068 , m_name(name)
1069 , m_imageType(imageType)
1070 , m_imageSize(imageSize)
1071 , m_format(format)
1072 , m_tiling(tiling)
1073 , m_operation(operation)
1074 , m_useTransfer(useTransfer)
1075 , m_readType(shaderReadType)
1076 , m_backingType(backingType)
1077 {
1078 }
1079
iterate(void)1080 tcu::TestStatus BinaryAtomicInstanceBase::iterate(void)
1081 {
1082 const VkDevice device = m_context.getDevice();
1083 const DeviceInterface &deviceInterface = m_context.getDeviceInterface();
1084 const VkQueue queue = m_context.getUniversalQueue();
1085 const uint32_t queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
1086 Allocator &allocator = m_context.getDefaultAllocator();
1087 const VkDeviceSize imageSizeInBytes = tcu::getPixelSize(m_format) * getNumPixels(m_imageType, m_imageSize);
1088 const VkDeviceSize outBuffSizeInBytes = getOutputBufferSize();
1089 const VkFormat imageFormat = mapTextureFormat(m_format);
1090 const bool isTexelBuffer = (m_imageType == IMAGE_TYPE_BUFFER);
1091
1092 if (!isTexelBuffer)
1093 {
1094 createImageResources(imageFormat, m_useTransfer);
1095 }
1096
1097 tcu::UVec3 gridSize = getShaderGridSize(m_imageType, m_imageSize);
1098
1099 //Prepare the buffer with the initial data for the image
1100 m_inputBuffer = de::MovePtr<BufferWithMemory>(new BufferWithMemory(
1101 deviceInterface, device, allocator,
1102 makeBufferCreateInfo(imageSizeInBytes, VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
1103 (isTexelBuffer ? VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT :
1104 static_cast<VkBufferUsageFlagBits>(0u))),
1105 MemoryRequirement::HostVisible));
1106
1107 // Fill in buffer with initial data used for image.
1108 initDataForImage(device, deviceInterface, m_format, m_operation, gridSize, *m_inputBuffer);
1109
1110 // Create a buffer to store shader output copied from result image
1111 m_outputBuffer = de::MovePtr<BufferWithMemory>(new BufferWithMemory(
1112 deviceInterface, device, allocator,
1113 makeBufferCreateInfo(outBuffSizeInBytes, VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
1114 (isTexelBuffer ? VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT :
1115 static_cast<VkBufferUsageFlagBits>(0u))),
1116 MemoryRequirement::HostVisible));
1117
1118 if (!isTexelBuffer)
1119 {
1120 prepareResources(m_useTransfer);
1121 }
1122
1123 prepareDescriptors(isTexelBuffer);
1124
1125 Move<VkDescriptorSet> descriptorSetFillImage;
1126 Move<VkShaderModule> shaderModuleFillImage;
1127 Move<VkPipelineLayout> pipelineLayoutFillImage;
1128 Move<VkPipeline> pipelineFillImage;
1129
1130 Move<VkDescriptorSet> descriptorSetReadImage;
1131 Move<VkShaderModule> shaderModuleReadImage;
1132 Move<VkPipelineLayout> pipelineLayoutReadImage;
1133 Move<VkPipeline> pipelineReadImage;
1134
1135 if (!m_useTransfer)
1136 {
1137 m_descriptorSetLayoutNoTransfer =
1138 DescriptorSetLayoutBuilder()
1139 .addSingleBinding(
1140 (isTexelBuffer ? VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER : VK_DESCRIPTOR_TYPE_STORAGE_IMAGE),
1141 VK_SHADER_STAGE_COMPUTE_BIT)
1142 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
1143 .build(deviceInterface, device);
1144
1145 m_descriptorPoolNoTransfer =
1146 DescriptorPoolBuilder()
1147 .addType((isTexelBuffer ? VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER : VK_DESCRIPTOR_TYPE_STORAGE_IMAGE),
1148 2)
1149 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 2)
1150 .build(deviceInterface, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 2u);
1151
1152 descriptorSetFillImage =
1153 makeDescriptorSet(deviceInterface, device, *m_descriptorPoolNoTransfer, *m_descriptorSetLayoutNoTransfer);
1154
1155 descriptorSetReadImage =
1156 makeDescriptorSet(deviceInterface, device, *m_descriptorPoolNoTransfer, *m_descriptorSetLayoutNoTransfer);
1157
1158 shaderModuleFillImage =
1159 createShaderModule(deviceInterface, device, m_context.getBinaryCollection().get("fillShader"), 0);
1160 pipelineLayoutFillImage = makePipelineLayout(deviceInterface, device, *m_descriptorSetLayoutNoTransfer);
1161 pipelineFillImage =
1162 makeComputePipeline(deviceInterface, device, *pipelineLayoutFillImage, *shaderModuleFillImage);
1163
1164 if (m_readType == ShaderReadType::SPARSE)
1165 {
1166 shaderModuleReadImage = createShaderModule(deviceInterface, device,
1167 m_context.getBinaryCollection().get("readShaderResidency"), 0);
1168 }
1169 else
1170 {
1171 shaderModuleReadImage =
1172 createShaderModule(deviceInterface, device, m_context.getBinaryCollection().get("readShader"), 0);
1173 }
1174 pipelineLayoutReadImage = makePipelineLayout(deviceInterface, device, *m_descriptorSetLayoutNoTransfer);
1175 pipelineReadImage =
1176 makeComputePipeline(deviceInterface, device, *pipelineLayoutFillImage, *shaderModuleReadImage);
1177 }
1178
1179 // Create pipeline
1180 const Unique<VkShaderModule> shaderModule(
1181 createShaderModule(deviceInterface, device, m_context.getBinaryCollection().get(m_name), 0));
1182 const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(deviceInterface, device, *m_descriptorSetLayout));
1183 const Unique<VkPipeline> pipeline(makeComputePipeline(deviceInterface, device, *pipelineLayout, *shaderModule));
1184
1185 // Create command buffer
1186 const Unique<VkCommandPool> cmdPool(
1187 createCommandPool(deviceInterface, device, VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, queueFamilyIndex));
1188 const Unique<VkCommandBuffer> cmdBuffer(
1189 allocateCommandBuffer(deviceInterface, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
1190
1191 beginCommandBuffer(deviceInterface, *cmdBuffer);
1192
1193 if (!isTexelBuffer)
1194 {
1195 if (m_useTransfer)
1196 {
1197 const vector<VkBufferImageCopy> bufferImageCopy(
1198 1, makeBufferImageCopy(makeExtent3D(getLayerSize(m_imageType, m_imageSize)),
1199 getNumLayers(m_imageType, m_imageSize)));
1200 copyBufferToImage(deviceInterface, *cmdBuffer, *(*m_inputBuffer), imageSizeInBytes, bufferImageCopy,
1201 VK_IMAGE_ASPECT_COLOR_BIT, 1, getNumLayers(m_imageType, m_imageSize),
1202 m_resultImage->get(), VK_IMAGE_LAYOUT_GENERAL, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT);
1203 }
1204 else
1205 {
1206 shaderFillImage(*cmdBuffer, *(*m_inputBuffer), *pipelineFillImage, *pipelineLayoutFillImage,
1207 *descriptorSetFillImage, imageSizeInBytes, gridSize);
1208 }
1209 commandsBeforeCompute(*cmdBuffer);
1210 }
1211
1212 deviceInterface.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
1213 deviceInterface.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u,
1214 &m_descriptorSet.get(), 0u, DE_NULL);
1215
1216 deviceInterface.cmdDispatch(*cmdBuffer, NUM_INVOCATIONS_PER_PIXEL * gridSize.x(), gridSize.y(), gridSize.z());
1217
1218 commandsAfterCompute(*cmdBuffer, *pipelineReadImage, *pipelineLayoutReadImage, *descriptorSetReadImage,
1219 outBuffSizeInBytes, m_useTransfer);
1220
1221 const VkBufferMemoryBarrier outputBufferPreHostReadBarrier = makeBufferMemoryBarrier(
1222 ((m_useTransfer || isTexelBuffer) ? VK_ACCESS_TRANSFER_WRITE_BIT : VK_ACCESS_SHADER_WRITE_BIT),
1223 VK_ACCESS_HOST_READ_BIT, m_outputBuffer->get(), 0ull, outBuffSizeInBytes);
1224
1225 deviceInterface.cmdPipelineBarrier(
1226 *cmdBuffer,
1227 ((m_useTransfer || isTexelBuffer) ? VK_PIPELINE_STAGE_TRANSFER_BIT : VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT),
1228 VK_PIPELINE_STAGE_HOST_BIT, false, 0u, DE_NULL, 1u, &outputBufferPreHostReadBarrier, 0u, DE_NULL);
1229
1230 endCommandBuffer(deviceInterface, *cmdBuffer);
1231
1232 std::vector<VkPipelineStageFlags> waitStages(m_waitSemaphores.size(), VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT);
1233 submitCommandsAndWait(deviceInterface, device, queue, *cmdBuffer, false, 1u,
1234 static_cast<uint32_t>(m_waitSemaphores.size()), de::dataOrNull(m_waitSemaphores),
1235 de::dataOrNull(waitStages));
1236
1237 Allocation &outputBufferAllocation = m_outputBuffer->getAllocation();
1238
1239 invalidateAlloc(deviceInterface, device, outputBufferAllocation);
1240
1241 if (verifyResult(outputBufferAllocation, (imageFormat == VK_FORMAT_R64_UINT || imageFormat == VK_FORMAT_R64_SINT)))
1242 return tcu::TestStatus::pass("Comparison succeeded");
1243 else
1244 return tcu::TestStatus::fail("Comparison failed");
1245 }
1246
shaderFillImage(const VkCommandBuffer cmdBuffer,const VkBuffer & buffer,const VkPipeline pipeline,const VkPipelineLayout pipelineLayout,const VkDescriptorSet descriptorSet,const VkDeviceSize & range,const tcu::UVec3 & gridSize)1247 void BinaryAtomicInstanceBase::shaderFillImage(const VkCommandBuffer cmdBuffer, const VkBuffer &buffer,
1248 const VkPipeline pipeline, const VkPipelineLayout pipelineLayout,
1249 const VkDescriptorSet descriptorSet, const VkDeviceSize &range,
1250 const tcu::UVec3 &gridSize)
1251 {
1252 const VkDevice device = m_context.getDevice();
1253 const DeviceInterface &deviceInterface = m_context.getDeviceInterface();
1254 const VkDescriptorImageInfo descResultImageInfo =
1255 makeDescriptorImageInfo(DE_NULL, *m_resultImageView, VK_IMAGE_LAYOUT_GENERAL);
1256 const VkDescriptorBufferInfo descResultBufferInfo = makeDescriptorBufferInfo(buffer, 0, range);
1257 const VkImageSubresourceRange subresourceRange =
1258 makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, getNumLayers(m_imageType, m_imageSize));
1259
1260 DescriptorSetUpdateBuilder()
1261 .writeSingle(descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
1262 &descResultImageInfo)
1263 .writeSingle(descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u),
1264 VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &descResultBufferInfo)
1265 .update(deviceInterface, device);
1266
1267 const VkImageMemoryBarrier imageBarrierPre =
1268 makeImageMemoryBarrier(0, VK_ACCESS_SHADER_WRITE_BIT, VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_GENERAL,
1269 m_resultImage->get(), subresourceRange);
1270
1271 deviceInterface.cmdPipelineBarrier(
1272 cmdBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, (VkDependencyFlags)0, 0,
1273 (const VkMemoryBarrier *)DE_NULL, 0, (const VkBufferMemoryBarrier *)DE_NULL, 1, &imageBarrierPre);
1274
1275 deviceInterface.cmdBindPipeline(cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
1276 deviceInterface.cmdBindDescriptorSets(cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipelineLayout, 0u, 1u,
1277 &descriptorSet, 0u, DE_NULL);
1278
1279 deviceInterface.cmdDispatch(cmdBuffer, gridSize.x(), gridSize.y(), gridSize.z());
1280
1281 const VkImageMemoryBarrier imageBarrierPost =
1282 makeImageMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT, VK_IMAGE_LAYOUT_GENERAL,
1283 VK_IMAGE_LAYOUT_GENERAL, m_resultImage->get(), subresourceRange);
1284
1285 deviceInterface.cmdPipelineBarrier(
1286 cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, (VkDependencyFlags)0, 0,
1287 (const VkMemoryBarrier *)DE_NULL, 0, (const VkBufferMemoryBarrier *)DE_NULL, 1, &imageBarrierPost);
1288 }
1289
createImageAndView(VkFormat imageFormat,const tcu::UVec3 & imageExent,bool useTransfer,de::MovePtr<Image> & imagePtr,Move<VkImageView> & imageViewPtr)1290 void BinaryAtomicInstanceBase::createImageAndView(VkFormat imageFormat, const tcu::UVec3 &imageExent, bool useTransfer,
1291 de::MovePtr<Image> &imagePtr, Move<VkImageView> &imageViewPtr)
1292 {
1293 const VkDevice device = m_context.getDevice();
1294 const DeviceInterface &deviceInterface = m_context.getDeviceInterface();
1295 Allocator &allocator = m_context.getDefaultAllocator();
1296 const VkImageUsageFlags usageFlags = getUsageFlags(useTransfer);
1297 VkImageCreateFlags createFlags = 0u;
1298
1299 if (m_imageType == IMAGE_TYPE_CUBE || m_imageType == IMAGE_TYPE_CUBE_ARRAY)
1300 createFlags |= VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT;
1301
1302 const auto numLayers = getNumLayers(m_imageType, m_imageSize);
1303
1304 VkImageCreateInfo createInfo = {
1305 VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, // VkStructureType sType;
1306 DE_NULL, // const void* pNext;
1307 createFlags, // VkImageCreateFlags flags;
1308 mapImageType(m_imageType), // VkImageType imageType;
1309 imageFormat, // VkFormat format;
1310 makeExtent3D(imageExent), // VkExtent3D extent;
1311 1u, // uint32_t mipLevels;
1312 numLayers, // uint32_t arrayLayers;
1313 VK_SAMPLE_COUNT_1_BIT, // VkSampleCountFlagBits samples;
1314 m_tiling, // VkImageTiling tiling;
1315 usageFlags, // VkImageUsageFlags usage;
1316 VK_SHARING_MODE_EXCLUSIVE, // VkSharingMode sharingMode;
1317 0u, // uint32_t queueFamilyIndexCount;
1318 DE_NULL, // const uint32_t* pQueueFamilyIndices;
1319 VK_IMAGE_LAYOUT_UNDEFINED, // VkImageLayout initialLayout;
1320 };
1321
1322 #ifndef CTS_USES_VULKANSC
1323 if (m_backingType == ImageBackingType::SPARSE)
1324 {
1325 const auto &vki = m_context.getInstanceInterface();
1326 const auto physicalDevice = m_context.getPhysicalDevice();
1327 const auto sparseQueue = m_context.getSparseQueue();
1328 const auto sparseQueueIdx = m_context.getSparseQueueFamilyIndex();
1329 const auto universalQIdx = m_context.getUniversalQueueFamilyIndex();
1330 const uint32_t queueIndices[] = {universalQIdx, sparseQueueIdx};
1331
1332 createInfo.flags |= (VK_IMAGE_CREATE_SPARSE_BINDING_BIT | VK_IMAGE_CREATE_SPARSE_RESIDENCY_BIT);
1333
1334 if (sparseQueueIdx != universalQIdx)
1335 {
1336 createInfo.sharingMode = VK_SHARING_MODE_CONCURRENT;
1337 createInfo.queueFamilyIndexCount = static_cast<uint32_t>(DE_LENGTH_OF_ARRAY(queueIndices));
1338 createInfo.pQueueFamilyIndices = queueIndices;
1339 }
1340
1341 const auto sparseImage =
1342 new SparseImage(deviceInterface, device, physicalDevice, vki, createInfo, sparseQueue, allocator, m_format);
1343 m_waitSemaphores.push_back(sparseImage->getSemaphore());
1344 imagePtr = de::MovePtr<Image>(sparseImage);
1345 }
1346 else
1347 #endif // CTS_USES_VULKANSC
1348 imagePtr =
1349 de::MovePtr<Image>(new Image(deviceInterface, device, allocator, createInfo, MemoryRequirement::Any));
1350
1351 const VkImageSubresourceRange subresourceRange =
1352 makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, numLayers);
1353
1354 imageViewPtr = makeImageView(deviceInterface, device, imagePtr->get(), mapImageViewType(m_imageType), imageFormat,
1355 subresourceRange);
1356 }
1357
createImageResources(const VkFormat & imageFormat,const bool useTransfer)1358 void BinaryAtomicInstanceBase::createImageResources(const VkFormat &imageFormat, const bool useTransfer)
1359 {
1360 //Create the image that is going to store results of atomic operations
1361 createImageAndView(imageFormat, getLayerSize(m_imageType, m_imageSize), useTransfer, m_resultImage,
1362 m_resultImageView);
1363 }
1364
1365 class BinaryAtomicEndResultInstance : public BinaryAtomicInstanceBase
1366 {
1367 public:
BinaryAtomicEndResultInstance(Context & context,const string & name,const ImageType imageType,const tcu::UVec3 & imageSize,const TextureFormat & format,const VkImageTiling tiling,const AtomicOperation operation,const bool useTransfer,const ShaderReadType shaderReadType,const ImageBackingType backingType)1368 BinaryAtomicEndResultInstance(Context &context, const string &name, const ImageType imageType,
1369 const tcu::UVec3 &imageSize, const TextureFormat &format, const VkImageTiling tiling,
1370 const AtomicOperation operation, const bool useTransfer,
1371 const ShaderReadType shaderReadType, const ImageBackingType backingType)
1372 : BinaryAtomicInstanceBase(context, name, imageType, imageSize, format, tiling, operation, useTransfer,
1373 shaderReadType, backingType)
1374 {
1375 }
1376
1377 virtual uint32_t getOutputBufferSize(void) const;
1378
prepareResources(const bool useTransfer)1379 virtual void prepareResources(const bool useTransfer)
1380 {
1381 DE_UNREF(useTransfer);
1382 }
1383 virtual void prepareDescriptors(const bool isTexelBuffer);
1384
commandsBeforeCompute(const VkCommandBuffer) const1385 virtual void commandsBeforeCompute(const VkCommandBuffer) const
1386 {
1387 }
1388 virtual void commandsAfterCompute(const VkCommandBuffer cmdBuffer, const VkPipeline pipeline,
1389 const VkPipelineLayout pipelineLayout, const VkDescriptorSet descriptorSet,
1390 const VkDeviceSize &range, const bool useTransfer);
1391
1392 virtual bool verifyResult(Allocation &outputBufferAllocation, const bool is64Bit) const;
1393
1394 protected:
1395 template <typename T>
1396 bool isValueCorrect(const T resultValue, int32_t x, int32_t y, int32_t z, const UVec3 &gridSize,
1397 const IVec3 extendedGridSize) const;
1398 };
1399
getOutputBufferSize(void) const1400 uint32_t BinaryAtomicEndResultInstance::getOutputBufferSize(void) const
1401 {
1402 return tcu::getPixelSize(m_format) * getNumPixels(m_imageType, m_imageSize);
1403 }
1404
prepareDescriptors(const bool isTexelBuffer)1405 void BinaryAtomicEndResultInstance::prepareDescriptors(const bool isTexelBuffer)
1406 {
1407 const VkDescriptorType descriptorType =
1408 isTexelBuffer ? VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER : VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
1409 const VkDevice device = m_context.getDevice();
1410 const DeviceInterface &deviceInterface = m_context.getDeviceInterface();
1411
1412 m_descriptorSetLayout = DescriptorSetLayoutBuilder()
1413 .addSingleBinding(descriptorType, VK_SHADER_STAGE_COMPUTE_BIT)
1414 .build(deviceInterface, device);
1415
1416 m_descriptorPool = DescriptorPoolBuilder()
1417 .addType(descriptorType)
1418 .build(deviceInterface, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
1419
1420 m_descriptorSet = makeDescriptorSet(deviceInterface, device, *m_descriptorPool, *m_descriptorSetLayout);
1421
1422 if (isTexelBuffer)
1423 {
1424 m_descResultBufferView =
1425 makeBufferView(deviceInterface, device, *(*m_inputBuffer), mapTextureFormat(m_format), 0, VK_WHOLE_SIZE);
1426
1427 DescriptorSetUpdateBuilder()
1428 .writeSingle(*m_descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), descriptorType,
1429 &(m_descResultBufferView.get()))
1430 .update(deviceInterface, device);
1431 }
1432 else
1433 {
1434 const VkDescriptorImageInfo descResultImageInfo =
1435 makeDescriptorImageInfo(DE_NULL, *m_resultImageView, VK_IMAGE_LAYOUT_GENERAL);
1436
1437 DescriptorSetUpdateBuilder()
1438 .writeSingle(*m_descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), descriptorType,
1439 &descResultImageInfo)
1440 .update(deviceInterface, device);
1441 }
1442 }
1443
commandsAfterCompute(const VkCommandBuffer cmdBuffer,const VkPipeline pipeline,const VkPipelineLayout pipelineLayout,const VkDescriptorSet descriptorSet,const VkDeviceSize & range,const bool useTransfer)1444 void BinaryAtomicEndResultInstance::commandsAfterCompute(const VkCommandBuffer cmdBuffer, const VkPipeline pipeline,
1445 const VkPipelineLayout pipelineLayout,
1446 const VkDescriptorSet descriptorSet, const VkDeviceSize &range,
1447 const bool useTransfer)
1448 {
1449 const DeviceInterface &deviceInterface = m_context.getDeviceInterface();
1450 const VkImageSubresourceRange subresourceRange =
1451 makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, getNumLayers(m_imageType, m_imageSize));
1452 const UVec3 layerSize = getLayerSize(m_imageType, m_imageSize);
1453
1454 if (m_imageType == IMAGE_TYPE_BUFFER)
1455 {
1456 m_outputBuffer = m_inputBuffer;
1457 }
1458 else if (useTransfer)
1459 {
1460 const VkImageMemoryBarrier resultImagePostDispatchBarrier =
1461 makeImageMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT, VK_IMAGE_LAYOUT_GENERAL,
1462 VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, m_resultImage->get(), subresourceRange);
1463
1464 deviceInterface.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
1465 VK_PIPELINE_STAGE_TRANSFER_BIT, false, 0u, DE_NULL, 0u, DE_NULL, 1u,
1466 &resultImagePostDispatchBarrier);
1467
1468 const VkBufferImageCopy bufferImageCopyParams =
1469 makeBufferImageCopy(makeExtent3D(layerSize), getNumLayers(m_imageType, m_imageSize));
1470
1471 deviceInterface.cmdCopyImageToBuffer(cmdBuffer, m_resultImage->get(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
1472 m_outputBuffer->get(), 1u, &bufferImageCopyParams);
1473 }
1474 else
1475 {
1476 const VkDevice device = m_context.getDevice();
1477 const VkDescriptorImageInfo descResultImageInfo =
1478 makeDescriptorImageInfo(DE_NULL, *m_resultImageView, VK_IMAGE_LAYOUT_GENERAL);
1479 const VkDescriptorBufferInfo descResultBufferInfo = makeDescriptorBufferInfo(m_outputBuffer->get(), 0, range);
1480
1481 DescriptorSetUpdateBuilder()
1482 .writeSingle(descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u),
1483 VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &descResultImageInfo)
1484 .writeSingle(descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u),
1485 VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &descResultBufferInfo)
1486 .update(deviceInterface, device);
1487
1488 const VkImageMemoryBarrier resultImagePostDispatchBarrier =
1489 makeImageMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT, VK_IMAGE_LAYOUT_GENERAL,
1490 VK_IMAGE_LAYOUT_GENERAL, m_resultImage->get(), subresourceRange);
1491
1492 deviceInterface.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
1493 VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, false, 0u, DE_NULL, 0u, DE_NULL, 1u,
1494 &resultImagePostDispatchBarrier);
1495
1496 deviceInterface.cmdBindPipeline(cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
1497 deviceInterface.cmdBindDescriptorSets(cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipelineLayout, 0u, 1u,
1498 &descriptorSet, 0u, DE_NULL);
1499
1500 switch (m_imageType)
1501 {
1502 case IMAGE_TYPE_1D_ARRAY:
1503 deviceInterface.cmdDispatch(cmdBuffer, layerSize.x(), subresourceRange.layerCount, layerSize.z());
1504 break;
1505 case IMAGE_TYPE_2D_ARRAY:
1506 case IMAGE_TYPE_CUBE:
1507 case IMAGE_TYPE_CUBE_ARRAY:
1508 deviceInterface.cmdDispatch(cmdBuffer, layerSize.x(), layerSize.y(), subresourceRange.layerCount);
1509 break;
1510 default:
1511 deviceInterface.cmdDispatch(cmdBuffer, layerSize.x(), layerSize.y(), layerSize.z());
1512 break;
1513 }
1514 }
1515 }
1516
verifyResult(Allocation & outputBufferAllocation,const bool is64Bit) const1517 bool BinaryAtomicEndResultInstance::verifyResult(Allocation &outputBufferAllocation, const bool is64Bit) const
1518 {
1519 const UVec3 gridSize = getShaderGridSize(m_imageType, m_imageSize);
1520 const IVec3 extendedGridSize = IVec3(NUM_INVOCATIONS_PER_PIXEL * gridSize.x(), gridSize.y(), gridSize.z());
1521
1522 tcu::ConstPixelBufferAccess resultBuffer(m_format, gridSize.x(), gridSize.y(), gridSize.z(),
1523 outputBufferAllocation.getHostPtr());
1524
1525 for (int32_t z = 0; z < resultBuffer.getDepth(); z++)
1526 for (int32_t y = 0; y < resultBuffer.getHeight(); y++)
1527 for (int32_t x = 0; x < resultBuffer.getWidth(); x++)
1528 {
1529 const void *resultValue = resultBuffer.getPixelPtr(x, y, z);
1530 int32_t floatToIntValue = 0;
1531 bool isFloatValue = false;
1532 if (isFloatFormat(mapTextureFormat(m_format)))
1533 {
1534 isFloatValue = true;
1535 floatToIntValue = static_cast<int32_t>(*((float *)resultValue));
1536 }
1537
1538 if (isOrderIndependentAtomicOperation(m_operation))
1539 {
1540 if (isUintFormat(mapTextureFormat(m_format)))
1541 {
1542 if (is64Bit)
1543 {
1544 if (!isValueCorrect<uint64_t>(*((uint64_t *)resultValue), x, y, z, gridSize,
1545 extendedGridSize))
1546 return false;
1547 }
1548 else
1549 {
1550 if (!isValueCorrect<uint32_t>(*((uint32_t *)resultValue), x, y, z, gridSize,
1551 extendedGridSize))
1552 return false;
1553 }
1554 }
1555 else if (isIntFormat(mapTextureFormat(m_format)))
1556 {
1557 if (is64Bit)
1558 {
1559 if (!isValueCorrect<int64_t>(*((int64_t *)resultValue), x, y, z, gridSize,
1560 extendedGridSize))
1561 return false;
1562 }
1563 else
1564 {
1565 if (!isValueCorrect<int32_t>(*((int32_t *)resultValue), x, y, z, gridSize,
1566 extendedGridSize))
1567 return false;
1568 }
1569 }
1570 else
1571 {
1572 // 32-bit floating point
1573 if (!isValueCorrect<int32_t>(floatToIntValue, x, y, z, gridSize, extendedGridSize))
1574 return false;
1575 }
1576 }
1577 else if (m_operation == ATOMIC_OPERATION_EXCHANGE)
1578 {
1579 // Check if the end result equals one of the atomic args.
1580 bool matchFound = false;
1581
1582 for (int32_t i = 0; i < static_cast<int32_t>(NUM_INVOCATIONS_PER_PIXEL) && !matchFound; i++)
1583 {
1584 const IVec3 gid(x + i * gridSize.x(), y, z);
1585 matchFound = is64Bit ? (*((int64_t *)resultValue) ==
1586 getAtomicFuncArgument<int64_t>(m_operation, gid, extendedGridSize)) :
1587 isFloatValue ?
1588 floatToIntValue ==
1589 getAtomicFuncArgument<int32_t>(m_operation, gid, extendedGridSize) :
1590 (*((int32_t *)resultValue) ==
1591 getAtomicFuncArgument<int32_t>(m_operation, gid, extendedGridSize));
1592 }
1593
1594 if (!matchFound)
1595 return false;
1596 }
1597 else if (m_operation == ATOMIC_OPERATION_COMPARE_EXCHANGE)
1598 {
1599 // Check if the end result equals one of the atomic args.
1600 bool matchFound = false;
1601
1602 for (int32_t i = 0; i < static_cast<int32_t>(NUM_INVOCATIONS_PER_PIXEL) && !matchFound; i++)
1603 {
1604 const IVec3 gid(x + i * gridSize.x(), y, z);
1605 matchFound = is64Bit ? (*((int64_t *)resultValue) ==
1606 getAtomicFuncArgument<int64_t>(m_operation, gid, extendedGridSize)) :
1607 isFloatValue ?
1608 floatToIntValue ==
1609 getAtomicFuncArgument<int32_t>(m_operation, gid, extendedGridSize) :
1610 (*((int32_t *)resultValue) ==
1611 getAtomicFuncArgument<int32_t>(m_operation, gid, extendedGridSize));
1612 }
1613
1614 if (!matchFound)
1615 return false;
1616 }
1617 else
1618 DE_ASSERT(false);
1619 }
1620 return true;
1621 }
1622
1623 template <typename T>
isValueCorrect(const T resultValue,int32_t x,int32_t y,int32_t z,const UVec3 & gridSize,const IVec3 extendedGridSize) const1624 bool BinaryAtomicEndResultInstance::isValueCorrect(const T resultValue, int32_t x, int32_t y, int32_t z,
1625 const UVec3 &gridSize, const IVec3 extendedGridSize) const
1626 {
1627 T reference = getOperationInitialValue<T>(m_operation);
1628 for (int32_t i = 0; i < static_cast<int32_t>(NUM_INVOCATIONS_PER_PIXEL); i++)
1629 {
1630 const IVec3 gid(x + i * gridSize.x(), y, z);
1631 T arg = getAtomicFuncArgument<T>(m_operation, gid, extendedGridSize);
1632 reference = computeBinaryAtomicOperationResult(m_operation, reference, arg);
1633 }
1634 return (resultValue == reference);
1635 }
1636
createInstance(Context & context) const1637 TestInstance *BinaryAtomicEndResultCase::createInstance(Context &context) const
1638 {
1639 return new BinaryAtomicEndResultInstance(context, m_name, m_imageType, m_imageSize, m_format, m_tiling, m_operation,
1640 m_useTransfer, m_readType, m_backingType);
1641 }
1642
1643 class BinaryAtomicIntermValuesInstance : public BinaryAtomicInstanceBase
1644 {
1645 public:
BinaryAtomicIntermValuesInstance(Context & context,const string & name,const ImageType imageType,const tcu::UVec3 & imageSize,const TextureFormat & format,const VkImageTiling tiling,const AtomicOperation operation,const bool useTransfer,const ShaderReadType shaderReadType,const ImageBackingType backingType)1646 BinaryAtomicIntermValuesInstance(Context &context, const string &name, const ImageType imageType,
1647 const tcu::UVec3 &imageSize, const TextureFormat &format,
1648 const VkImageTiling tiling, const AtomicOperation operation,
1649 const bool useTransfer, const ShaderReadType shaderReadType,
1650 const ImageBackingType backingType)
1651 : BinaryAtomicInstanceBase(context, name, imageType, imageSize, format, tiling, operation, useTransfer,
1652 shaderReadType, backingType)
1653 {
1654 }
1655
1656 virtual uint32_t getOutputBufferSize(void) const;
1657
1658 virtual void prepareResources(const bool useTransfer);
1659 virtual void prepareDescriptors(const bool isTexelBuffer);
1660
1661 virtual void commandsBeforeCompute(const VkCommandBuffer cmdBuffer) const;
1662 virtual void commandsAfterCompute(const VkCommandBuffer cmdBuffer, const VkPipeline pipeline,
1663 const VkPipelineLayout pipelineLayout, const VkDescriptorSet descriptorSet,
1664 const VkDeviceSize &range, const bool useTransfer);
1665
1666 virtual bool verifyResult(Allocation &outputBufferAllocation, const bool is64Bit) const;
1667
1668 protected:
1669 template <typename T>
1670 bool areValuesCorrect(tcu::ConstPixelBufferAccess &resultBuffer, const bool isFloatingPoint, int32_t x, int32_t y,
1671 int32_t z, const UVec3 &gridSize, const IVec3 extendedGridSize) const;
1672
1673 template <typename T>
1674 bool verifyRecursive(const int32_t index, const T valueSoFar, bool argsUsed[NUM_INVOCATIONS_PER_PIXEL],
1675 const T atomicArgs[NUM_INVOCATIONS_PER_PIXEL],
1676 const T resultValues[NUM_INVOCATIONS_PER_PIXEL]) const;
1677 de::MovePtr<Image> m_intermResultsImage;
1678 Move<VkImageView> m_intermResultsImageView;
1679 };
1680
getOutputBufferSize(void) const1681 uint32_t BinaryAtomicIntermValuesInstance::getOutputBufferSize(void) const
1682 {
1683 return NUM_INVOCATIONS_PER_PIXEL * tcu::getPixelSize(m_format) * getNumPixels(m_imageType, m_imageSize);
1684 }
1685
prepareResources(const bool useTransfer)1686 void BinaryAtomicIntermValuesInstance::prepareResources(const bool useTransfer)
1687 {
1688 const UVec3 layerSize = getLayerSize(m_imageType, m_imageSize);
1689 const bool isCubeBasedImage = (m_imageType == IMAGE_TYPE_CUBE || m_imageType == IMAGE_TYPE_CUBE_ARRAY);
1690 const UVec3 extendedLayerSize =
1691 isCubeBasedImage ?
1692 UVec3(NUM_INVOCATIONS_PER_PIXEL * layerSize.x(), NUM_INVOCATIONS_PER_PIXEL * layerSize.y(), layerSize.z()) :
1693 UVec3(NUM_INVOCATIONS_PER_PIXEL * layerSize.x(), layerSize.y(), layerSize.z());
1694
1695 createImageAndView(mapTextureFormat(m_format), extendedLayerSize, useTransfer, m_intermResultsImage,
1696 m_intermResultsImageView);
1697 }
1698
prepareDescriptors(const bool isTexelBuffer)1699 void BinaryAtomicIntermValuesInstance::prepareDescriptors(const bool isTexelBuffer)
1700 {
1701 const VkDescriptorType descriptorType =
1702 isTexelBuffer ? VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER : VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
1703
1704 const VkDevice device = m_context.getDevice();
1705 const DeviceInterface &deviceInterface = m_context.getDeviceInterface();
1706
1707 m_descriptorSetLayout = DescriptorSetLayoutBuilder()
1708 .addSingleBinding(descriptorType, VK_SHADER_STAGE_COMPUTE_BIT)
1709 .addSingleBinding(descriptorType, VK_SHADER_STAGE_COMPUTE_BIT)
1710 .build(deviceInterface, device);
1711
1712 m_descriptorPool = DescriptorPoolBuilder()
1713 .addType(descriptorType, 2u)
1714 .build(deviceInterface, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
1715
1716 m_descriptorSet = makeDescriptorSet(deviceInterface, device, *m_descriptorPool, *m_descriptorSetLayout);
1717
1718 if (isTexelBuffer)
1719 {
1720 m_descResultBufferView =
1721 makeBufferView(deviceInterface, device, *(*m_inputBuffer), mapTextureFormat(m_format), 0, VK_WHOLE_SIZE);
1722 m_descIntermResultsBufferView =
1723 makeBufferView(deviceInterface, device, *(*m_outputBuffer), mapTextureFormat(m_format), 0, VK_WHOLE_SIZE);
1724
1725 DescriptorSetUpdateBuilder()
1726 .writeSingle(*m_descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), descriptorType,
1727 &(m_descResultBufferView.get()))
1728 .writeSingle(*m_descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), descriptorType,
1729 &(m_descIntermResultsBufferView.get()))
1730 .update(deviceInterface, device);
1731 }
1732 else
1733 {
1734 const VkDescriptorImageInfo descResultImageInfo =
1735 makeDescriptorImageInfo(DE_NULL, *m_resultImageView, VK_IMAGE_LAYOUT_GENERAL);
1736 const VkDescriptorImageInfo descIntermResultsImageInfo =
1737 makeDescriptorImageInfo(DE_NULL, *m_intermResultsImageView, VK_IMAGE_LAYOUT_GENERAL);
1738
1739 DescriptorSetUpdateBuilder()
1740 .writeSingle(*m_descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), descriptorType,
1741 &descResultImageInfo)
1742 .writeSingle(*m_descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), descriptorType,
1743 &descIntermResultsImageInfo)
1744 .update(deviceInterface, device);
1745 }
1746 }
1747
commandsBeforeCompute(const VkCommandBuffer cmdBuffer) const1748 void BinaryAtomicIntermValuesInstance::commandsBeforeCompute(const VkCommandBuffer cmdBuffer) const
1749 {
1750 const DeviceInterface &deviceInterface = m_context.getDeviceInterface();
1751 const VkImageSubresourceRange subresourceRange =
1752 makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, getNumLayers(m_imageType, m_imageSize));
1753
1754 const VkImageMemoryBarrier imagePreDispatchBarrier =
1755 makeImageMemoryBarrier(0u, VK_ACCESS_SHADER_WRITE_BIT, VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_GENERAL,
1756 m_intermResultsImage->get(), subresourceRange);
1757
1758 deviceInterface.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
1759 VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, false, 0u, DE_NULL, 0u, DE_NULL, 1u,
1760 &imagePreDispatchBarrier);
1761 }
1762
commandsAfterCompute(const VkCommandBuffer cmdBuffer,const VkPipeline pipeline,const VkPipelineLayout pipelineLayout,const VkDescriptorSet descriptorSet,const VkDeviceSize & range,const bool useTransfer)1763 void BinaryAtomicIntermValuesInstance::commandsAfterCompute(const VkCommandBuffer cmdBuffer, const VkPipeline pipeline,
1764 const VkPipelineLayout pipelineLayout,
1765 const VkDescriptorSet descriptorSet,
1766 const VkDeviceSize &range, const bool useTransfer)
1767 {
1768 // nothing is needed for texel image buffer
1769 if (m_imageType == IMAGE_TYPE_BUFFER)
1770 return;
1771
1772 const DeviceInterface &deviceInterface = m_context.getDeviceInterface();
1773 const VkImageSubresourceRange subresourceRange =
1774 makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, getNumLayers(m_imageType, m_imageSize));
1775 const UVec3 layerSize = getLayerSize(m_imageType, m_imageSize);
1776
1777 if (useTransfer)
1778 {
1779 const VkImageMemoryBarrier imagePostDispatchBarrier =
1780 makeImageMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT, VK_IMAGE_LAYOUT_GENERAL,
1781 VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, m_intermResultsImage->get(), subresourceRange);
1782
1783 deviceInterface.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
1784 VK_PIPELINE_STAGE_TRANSFER_BIT, false, 0u, DE_NULL, 0u, DE_NULL, 1u,
1785 &imagePostDispatchBarrier);
1786
1787 const UVec3 extendedLayerSize = UVec3(NUM_INVOCATIONS_PER_PIXEL * layerSize.x(), layerSize.y(), layerSize.z());
1788 const VkBufferImageCopy bufferImageCopyParams =
1789 makeBufferImageCopy(makeExtent3D(extendedLayerSize), getNumLayers(m_imageType, m_imageSize));
1790
1791 deviceInterface.cmdCopyImageToBuffer(cmdBuffer, m_intermResultsImage->get(),
1792 VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, m_outputBuffer->get(), 1u,
1793 &bufferImageCopyParams);
1794 }
1795 else
1796 {
1797 const VkDevice device = m_context.getDevice();
1798 const VkDescriptorImageInfo descResultImageInfo =
1799 makeDescriptorImageInfo(DE_NULL, *m_intermResultsImageView, VK_IMAGE_LAYOUT_GENERAL);
1800 const VkDescriptorBufferInfo descResultBufferInfo = makeDescriptorBufferInfo(m_outputBuffer->get(), 0, range);
1801
1802 DescriptorSetUpdateBuilder()
1803 .writeSingle(descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u),
1804 VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &descResultImageInfo)
1805 .writeSingle(descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u),
1806 VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &descResultBufferInfo)
1807 .update(deviceInterface, device);
1808
1809 const VkImageMemoryBarrier resultImagePostDispatchBarrier =
1810 makeImageMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT, VK_IMAGE_LAYOUT_GENERAL,
1811 VK_IMAGE_LAYOUT_GENERAL, m_intermResultsImage->get(), subresourceRange);
1812
1813 deviceInterface.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
1814 VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, false, 0u, DE_NULL, 0u, DE_NULL, 1u,
1815 &resultImagePostDispatchBarrier);
1816
1817 deviceInterface.cmdBindPipeline(cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
1818 deviceInterface.cmdBindDescriptorSets(cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipelineLayout, 0u, 1u,
1819 &descriptorSet, 0u, DE_NULL);
1820
1821 switch (m_imageType)
1822 {
1823 case IMAGE_TYPE_1D_ARRAY:
1824 deviceInterface.cmdDispatch(cmdBuffer, NUM_INVOCATIONS_PER_PIXEL * layerSize.x(),
1825 subresourceRange.layerCount, layerSize.z());
1826 break;
1827 case IMAGE_TYPE_2D_ARRAY:
1828 case IMAGE_TYPE_CUBE:
1829 case IMAGE_TYPE_CUBE_ARRAY:
1830 deviceInterface.cmdDispatch(cmdBuffer, NUM_INVOCATIONS_PER_PIXEL * layerSize.x(), layerSize.y(),
1831 subresourceRange.layerCount);
1832 break;
1833 default:
1834 deviceInterface.cmdDispatch(cmdBuffer, NUM_INVOCATIONS_PER_PIXEL * layerSize.x(), layerSize.y(),
1835 layerSize.z());
1836 break;
1837 }
1838 }
1839 }
1840
verifyResult(Allocation & outputBufferAllocation,const bool is64Bit) const1841 bool BinaryAtomicIntermValuesInstance::verifyResult(Allocation &outputBufferAllocation, const bool is64Bit) const
1842 {
1843 const UVec3 gridSize = getShaderGridSize(m_imageType, m_imageSize);
1844 const IVec3 extendedGridSize = IVec3(NUM_INVOCATIONS_PER_PIXEL * gridSize.x(), gridSize.y(), gridSize.z());
1845
1846 tcu::ConstPixelBufferAccess resultBuffer(m_format, extendedGridSize.x(), extendedGridSize.y(), extendedGridSize.z(),
1847 outputBufferAllocation.getHostPtr());
1848
1849 for (int32_t z = 0; z < resultBuffer.getDepth(); z++)
1850 for (int32_t y = 0; y < resultBuffer.getHeight(); y++)
1851 for (uint32_t x = 0; x < gridSize.x(); x++)
1852 {
1853 if (isUintFormat(mapTextureFormat(m_format)))
1854 {
1855 if (is64Bit)
1856 {
1857 if (!areValuesCorrect<uint64_t>(resultBuffer, false, x, y, z, gridSize, extendedGridSize))
1858 return false;
1859 }
1860 else
1861 {
1862 if (!areValuesCorrect<uint32_t>(resultBuffer, false, x, y, z, gridSize, extendedGridSize))
1863 return false;
1864 }
1865 }
1866 else if (isIntFormat(mapTextureFormat(m_format)))
1867 {
1868 if (is64Bit)
1869 {
1870 if (!areValuesCorrect<int64_t>(resultBuffer, false, x, y, z, gridSize, extendedGridSize))
1871 return false;
1872 }
1873 else
1874 {
1875 if (!areValuesCorrect<int32_t>(resultBuffer, false, x, y, z, gridSize, extendedGridSize))
1876 return false;
1877 }
1878 }
1879 else
1880 {
1881 // 32-bit floating point
1882 if (!areValuesCorrect<int32_t>(resultBuffer, true, x, y, z, gridSize, extendedGridSize))
1883 return false;
1884 }
1885 }
1886
1887 return true;
1888 }
1889
1890 template <typename T>
areValuesCorrect(tcu::ConstPixelBufferAccess & resultBuffer,const bool isFloatingPoint,int32_t x,int32_t y,int32_t z,const UVec3 & gridSize,const IVec3 extendedGridSize) const1891 bool BinaryAtomicIntermValuesInstance::areValuesCorrect(tcu::ConstPixelBufferAccess &resultBuffer,
1892 const bool isFloatingPoint, int32_t x, int32_t y, int32_t z,
1893 const UVec3 &gridSize, const IVec3 extendedGridSize) const
1894 {
1895 T resultValues[NUM_INVOCATIONS_PER_PIXEL];
1896 T atomicArgs[NUM_INVOCATIONS_PER_PIXEL];
1897 bool argsUsed[NUM_INVOCATIONS_PER_PIXEL];
1898
1899 for (int32_t i = 0; i < static_cast<int32_t>(NUM_INVOCATIONS_PER_PIXEL); i++)
1900 {
1901 IVec3 gid(x + i * gridSize.x(), y, z);
1902 T data = *((T *)resultBuffer.getPixelPtr(gid.x(), gid.y(), gid.z()));
1903 if (isFloatingPoint)
1904 {
1905 float fData;
1906 deMemcpy(&fData, &data, sizeof(fData));
1907 data = static_cast<T>(fData);
1908 }
1909 resultValues[i] = data;
1910 atomicArgs[i] = getAtomicFuncArgument<T>(m_operation, gid, extendedGridSize);
1911 argsUsed[i] = false;
1912 }
1913
1914 // Verify that the return values form a valid sequence.
1915 return verifyRecursive(0, getOperationInitialValue<T>(m_operation), argsUsed, atomicArgs, resultValues);
1916 }
1917
1918 template <typename T>
verifyRecursive(const int32_t index,const T valueSoFar,bool argsUsed[NUM_INVOCATIONS_PER_PIXEL],const T atomicArgs[NUM_INVOCATIONS_PER_PIXEL],const T resultValues[NUM_INVOCATIONS_PER_PIXEL]) const1919 bool BinaryAtomicIntermValuesInstance::verifyRecursive(const int32_t index, const T valueSoFar,
1920 bool argsUsed[NUM_INVOCATIONS_PER_PIXEL],
1921 const T atomicArgs[NUM_INVOCATIONS_PER_PIXEL],
1922 const T resultValues[NUM_INVOCATIONS_PER_PIXEL]) const
1923 {
1924 if (index >= static_cast<int32_t>(NUM_INVOCATIONS_PER_PIXEL))
1925 return true;
1926
1927 for (int32_t i = 0; i < static_cast<int32_t>(NUM_INVOCATIONS_PER_PIXEL); i++)
1928 {
1929 if (!argsUsed[i] && resultValues[i] == valueSoFar)
1930 {
1931 argsUsed[i] = true;
1932
1933 if (verifyRecursive(index + 1, computeBinaryAtomicOperationResult(m_operation, valueSoFar, atomicArgs[i]),
1934 argsUsed, atomicArgs, resultValues))
1935 {
1936 return true;
1937 }
1938
1939 argsUsed[i] = false;
1940 }
1941 }
1942
1943 return false;
1944 }
1945
createInstance(Context & context) const1946 TestInstance *BinaryAtomicIntermValuesCase::createInstance(Context &context) const
1947 {
1948 return new BinaryAtomicIntermValuesInstance(context, m_name, m_imageType, m_imageSize, m_format, m_tiling,
1949 m_operation, m_useTransfer, m_readType, m_backingType);
1950 }
1951
1952 } // namespace
1953
createImageAtomicOperationTests(tcu::TestContext & testCtx)1954 tcu::TestCaseGroup *createImageAtomicOperationTests(tcu::TestContext &testCtx)
1955 {
1956 de::MovePtr<tcu::TestCaseGroup> imageAtomicOperationsTests(new tcu::TestCaseGroup(testCtx, "atomic_operations"));
1957
1958 struct ImageParams
1959 {
1960 ImageParams(const ImageType imageType, const tcu::UVec3 &imageSize)
1961 : m_imageType(imageType)
1962 , m_imageSize(imageSize)
1963 {
1964 }
1965 const ImageType m_imageType;
1966 const tcu::UVec3 m_imageSize;
1967 };
1968
1969 const ImageParams imageParamsArray[] = {ImageParams(IMAGE_TYPE_1D, tcu::UVec3(64u, 1u, 1u)),
1970 ImageParams(IMAGE_TYPE_1D_ARRAY, tcu::UVec3(64u, 1u, 8u)),
1971 ImageParams(IMAGE_TYPE_2D, tcu::UVec3(64u, 64u, 1u)),
1972 ImageParams(IMAGE_TYPE_2D_ARRAY, tcu::UVec3(64u, 64u, 8u)),
1973 ImageParams(IMAGE_TYPE_3D, tcu::UVec3(48u, 48u, 8u)),
1974 ImageParams(IMAGE_TYPE_CUBE, tcu::UVec3(64u, 64u, 1u)),
1975 ImageParams(IMAGE_TYPE_CUBE_ARRAY, tcu::UVec3(64u, 64u, 2u)),
1976 ImageParams(IMAGE_TYPE_BUFFER, tcu::UVec3(64u, 1u, 1u))};
1977
1978 const tcu::TextureFormat formats[] = {tcu::TextureFormat(tcu::TextureFormat::R, tcu::TextureFormat::UNSIGNED_INT32),
1979 tcu::TextureFormat(tcu::TextureFormat::R, tcu::TextureFormat::SIGNED_INT32),
1980 tcu::TextureFormat(tcu::TextureFormat::R, tcu::TextureFormat::FLOAT),
1981 tcu::TextureFormat(tcu::TextureFormat::R, tcu::TextureFormat::UNSIGNED_INT64),
1982 tcu::TextureFormat(tcu::TextureFormat::R, tcu::TextureFormat::SIGNED_INT64)};
1983
1984 static const VkImageTiling s_tilings[] = {
1985 VK_IMAGE_TILING_OPTIMAL,
1986 VK_IMAGE_TILING_LINEAR,
1987 };
1988
1989 const struct
1990 {
1991 ShaderReadType type;
1992 const char *name;
1993 } readTypes[] = {
1994 {ShaderReadType::NORMAL, "normal_read"},
1995 #ifndef CTS_USES_VULKANSC
1996 {ShaderReadType::SPARSE, "sparse_read"},
1997 #endif // CTS_USES_VULKANSC
1998 };
1999
2000 const struct
2001 {
2002 ImageBackingType type;
2003 const char *name;
2004 } backingTypes[] = {
2005 {ImageBackingType::NORMAL, "normal_img"},
2006 #ifndef CTS_USES_VULKANSC
2007 {ImageBackingType::SPARSE, "sparse_img"},
2008 #endif // CTS_USES_VULKANSC
2009 };
2010
2011 for (uint32_t operationI = 0; operationI < ATOMIC_OPERATION_LAST; operationI++)
2012 {
2013 const AtomicOperation operation = (AtomicOperation)operationI;
2014
2015 de::MovePtr<tcu::TestCaseGroup> operationGroup(
2016 new tcu::TestCaseGroup(testCtx, getAtomicOperationCaseName(operation).c_str()));
2017
2018 for (uint32_t imageTypeNdx = 0; imageTypeNdx < DE_LENGTH_OF_ARRAY(imageParamsArray); imageTypeNdx++)
2019 {
2020 const ImageType imageType = imageParamsArray[imageTypeNdx].m_imageType;
2021 const tcu::UVec3 imageSize = imageParamsArray[imageTypeNdx].m_imageSize;
2022
2023 de::MovePtr<tcu::TestCaseGroup> imageTypeGroup(
2024 new tcu::TestCaseGroup(testCtx, getImageTypeName(imageType).c_str()));
2025
2026 for (int useTransferIdx = 0; useTransferIdx < 2; ++useTransferIdx)
2027 {
2028 const bool useTransfer = (useTransferIdx > 0);
2029 const string groupName = (!useTransfer ? "no" : "") + string("transfer");
2030
2031 de::MovePtr<tcu::TestCaseGroup> transferGroup(new tcu::TestCaseGroup(testCtx, groupName.c_str()));
2032
2033 for (int readTypeIdx = 0; readTypeIdx < DE_LENGTH_OF_ARRAY(readTypes); ++readTypeIdx)
2034 {
2035 const auto &readType = readTypes[readTypeIdx];
2036
2037 de::MovePtr<tcu::TestCaseGroup> readTypeGroup(new tcu::TestCaseGroup(testCtx, readType.name));
2038
2039 for (int backingTypeIdx = 0; backingTypeIdx < DE_LENGTH_OF_ARRAY(backingTypes); ++backingTypeIdx)
2040 {
2041 const auto &backingType = backingTypes[backingTypeIdx];
2042
2043 de::MovePtr<tcu::TestCaseGroup> backingTypeGroup(
2044 new tcu::TestCaseGroup(testCtx, backingType.name));
2045
2046 for (uint32_t formatNdx = 0; formatNdx < DE_LENGTH_OF_ARRAY(formats); formatNdx++)
2047 {
2048 for (int tilingNdx = 0; tilingNdx < DE_LENGTH_OF_ARRAY(s_tilings); tilingNdx++)
2049 {
2050 const TextureFormat &format = formats[formatNdx];
2051 const std::string formatName = getShaderImageFormatQualifier(format);
2052 const char *suffix = (s_tilings[tilingNdx] == VK_IMAGE_TILING_OPTIMAL) ? "" : "_linear";
2053
2054 // Need SPIRV programs in vktImageAtomicSpirvShaders.cpp
2055 if (imageType == IMAGE_TYPE_BUFFER && (format.type != tcu::TextureFormat::FLOAT))
2056 {
2057 continue;
2058 }
2059
2060 // Only 2D and 3D images may support sparse residency.
2061 // VK_IMAGE_TILING_LINEAR does not support sparse residency
2062 const auto vkImageType = mapImageType(imageType);
2063 if (backingType.type == ImageBackingType::SPARSE &&
2064 ((vkImageType != VK_IMAGE_TYPE_2D && vkImageType != VK_IMAGE_TYPE_3D) ||
2065 (s_tilings[tilingNdx] == VK_IMAGE_TILING_LINEAR)))
2066 continue;
2067
2068 // Only some operations are supported on floating-point
2069 if (format.type == tcu::TextureFormat::FLOAT)
2070 {
2071 if (operation != ATOMIC_OPERATION_ADD &&
2072 #ifndef CTS_USES_VULKANSC
2073 operation != ATOMIC_OPERATION_MIN && operation != ATOMIC_OPERATION_MAX &&
2074 #endif // CTS_USES_VULKANSC
2075 operation != ATOMIC_OPERATION_EXCHANGE)
2076 {
2077 continue;
2078 }
2079 }
2080
2081 if (readType.type == ShaderReadType::SPARSE)
2082 {
2083 // When using transfer, shader reads will not be used, so avoid creating two identical cases.
2084 if (useTransfer)
2085 continue;
2086
2087 // Sparse reads are not supported for all types of images.
2088 if (imageType == IMAGE_TYPE_1D || imageType == IMAGE_TYPE_1D_ARRAY ||
2089 imageType == IMAGE_TYPE_BUFFER)
2090 continue;
2091 }
2092
2093 //!< Atomic case checks the end result of the operations, and not the intermediate return values
2094 const string caseEndResult = formatName + "_end_result" + suffix;
2095 backingTypeGroup->addChild(new BinaryAtomicEndResultCase(
2096 testCtx, caseEndResult, imageType, imageSize, format, s_tilings[tilingNdx],
2097 operation, useTransfer, readType.type, backingType.type, glu::GLSL_VERSION_450));
2098
2099 //!< Atomic case checks the return values of the atomic function and not the end result.
2100 const string caseIntermValues = formatName + "_intermediate_values" + suffix;
2101 backingTypeGroup->addChild(new BinaryAtomicIntermValuesCase(
2102 testCtx, caseIntermValues, imageType, imageSize, format, s_tilings[tilingNdx],
2103 operation, useTransfer, readType.type, backingType.type, glu::GLSL_VERSION_450));
2104 }
2105 }
2106
2107 readTypeGroup->addChild(backingTypeGroup.release());
2108 }
2109
2110 transferGroup->addChild(readTypeGroup.release());
2111 }
2112
2113 imageTypeGroup->addChild(transferGroup.release());
2114 }
2115
2116 operationGroup->addChild(imageTypeGroup.release());
2117 }
2118
2119 imageAtomicOperationsTests->addChild(operationGroup.release());
2120 }
2121
2122 return imageAtomicOperationsTests.release();
2123 }
2124
2125 } // namespace image
2126 } // namespace vkt
2127