1 /*------------------------------------------------------------------------
2 * Vulkan Conformance Tests
3 * ------------------------
4 *
5 * Copyright (c) 2016 The Khronos Group Inc.
6 *
7 * Licensed under the Apache License, Version 2.0 (the "License");
8 * you may not use this file except in compliance with the License.
9 * You may obtain a copy of the License at
10 *
11 * http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
18 *
19 *//*!
20 * \file vktImageAtomicOperationTests.cpp
21 * \brief Image atomic operation tests
22 *//*--------------------------------------------------------------------*/
23
24 #include "vktImageAtomicOperationTests.hpp"
25 #include "vktImageAtomicSpirvShaders.hpp"
26
27 #include "deUniquePtr.hpp"
28 #include "deStringUtil.hpp"
29 #include "deSTLUtil.hpp"
30
31 #include "vktTestCaseUtil.hpp"
32 #include "vkPrograms.hpp"
33 #include "vkImageUtil.hpp"
34 #include "vkQueryUtil.hpp"
35 #include "vkBarrierUtil.hpp"
36 #include "vktImageTestsUtil.hpp"
37 #include "vkBuilderUtil.hpp"
38 #include "vkRef.hpp"
39 #include "vkRefUtil.hpp"
40 #include "vkTypeUtil.hpp"
41 #include "vkCmdUtil.hpp"
42 #include "vkObjUtil.hpp"
43 #include "vkBufferWithMemory.hpp"
44
45 #include "tcuTextureUtil.hpp"
46 #include "tcuTexture.hpp"
47 #include "tcuVectorType.hpp"
48 #include "tcuStringTemplate.hpp"
49
50 namespace vkt
51 {
52 namespace image
53 {
54 namespace
55 {
56
57 using namespace vk;
58 using namespace std;
59 using de::toString;
60
61 using tcu::TextureFormat;
62 using tcu::IVec2;
63 using tcu::IVec3;
64 using tcu::UVec3;
65 using tcu::Vec4;
66 using tcu::IVec4;
67 using tcu::UVec4;
68 using tcu::CubeFace;
69 using tcu::Texture1D;
70 using tcu::Texture2D;
71 using tcu::Texture3D;
72 using tcu::Texture2DArray;
73 using tcu::TextureCube;
74 using tcu::PixelBufferAccess;
75 using tcu::ConstPixelBufferAccess;
76 using tcu::Vector;
77 using tcu::TestContext;
78
79 enum
80 {
81 NUM_INVOCATIONS_PER_PIXEL = 5u
82 };
83
84 enum AtomicOperation
85 {
86 ATOMIC_OPERATION_ADD = 0,
87 ATOMIC_OPERATION_SUB,
88 ATOMIC_OPERATION_INC,
89 ATOMIC_OPERATION_DEC,
90 ATOMIC_OPERATION_MIN,
91 ATOMIC_OPERATION_MAX,
92 ATOMIC_OPERATION_AND,
93 ATOMIC_OPERATION_OR,
94 ATOMIC_OPERATION_XOR,
95 ATOMIC_OPERATION_EXCHANGE,
96 ATOMIC_OPERATION_COMPARE_EXCHANGE,
97
98 ATOMIC_OPERATION_LAST
99 };
100
101 enum class ShaderReadType
102 {
103 NORMAL = 0,
104 SPARSE,
105 };
106
107 enum class ImageBackingType
108 {
109 NORMAL = 0,
110 SPARSE,
111 };
112
getCoordStr(const ImageType imageType,const std::string & x,const std::string & y,const std::string & z)113 static string getCoordStr (const ImageType imageType,
114 const std::string& x,
115 const std::string& y,
116 const std::string& z)
117 {
118 switch (imageType)
119 {
120 case IMAGE_TYPE_1D:
121 case IMAGE_TYPE_BUFFER:
122 return x;
123 case IMAGE_TYPE_1D_ARRAY:
124 case IMAGE_TYPE_2D:
125 return string("ivec2(" + x + "," + y + ")");
126 case IMAGE_TYPE_2D_ARRAY:
127 case IMAGE_TYPE_3D:
128 case IMAGE_TYPE_CUBE:
129 case IMAGE_TYPE_CUBE_ARRAY:
130 return string("ivec3(" + x + "," + y + "," + z + ")");
131 default:
132 DE_ASSERT(false);
133 return "";
134 }
135 }
136
getComponentTypeStr(deUint32 componentWidth,bool intFormat,bool uintFormat,bool floatFormat)137 static string getComponentTypeStr (deUint32 componentWidth, bool intFormat, bool uintFormat, bool floatFormat)
138 {
139 DE_ASSERT(intFormat || uintFormat || floatFormat);
140
141 const bool is64 = (componentWidth == 64);
142
143 if (intFormat)
144 return (is64 ? "int64_t" : "int");
145 if (uintFormat)
146 return (is64 ? "uint64_t" : "uint");
147 if (floatFormat)
148 return (is64 ? "double" : "float");
149
150 return "";
151 }
152
getVec4TypeStr(deUint32 componentWidth,bool intFormat,bool uintFormat,bool floatFormat)153 static string getVec4TypeStr (deUint32 componentWidth, bool intFormat, bool uintFormat, bool floatFormat)
154 {
155 DE_ASSERT(intFormat || uintFormat || floatFormat);
156
157 const bool is64 = (componentWidth == 64);
158
159 if (intFormat)
160 return (is64 ? "i64vec4" : "ivec4");
161 if (uintFormat)
162 return (is64 ? "u64vec4" : "uvec4");
163 if (floatFormat)
164 return (is64 ? "f64vec4" : "vec4");
165
166 return "";
167 }
168
getAtomicFuncArgumentShaderStr(const AtomicOperation op,const string & x,const string & y,const string & z,const IVec3 & gridSize)169 static string getAtomicFuncArgumentShaderStr (const AtomicOperation op,
170 const string& x,
171 const string& y,
172 const string& z,
173 const IVec3& gridSize)
174 {
175 switch (op)
176 {
177 case ATOMIC_OPERATION_ADD:
178 case ATOMIC_OPERATION_AND:
179 case ATOMIC_OPERATION_OR:
180 case ATOMIC_OPERATION_XOR:
181 return string("(" + x + "*" + x + " + " + y + "*" + y + " + " + z + "*" + z + ")");
182 case ATOMIC_OPERATION_MIN:
183 case ATOMIC_OPERATION_MAX:
184 // multiply by (1-2*(value % 2) to make half of the data negative
185 // this will result in generating large numbers for uint formats
186 return string("((1 - 2*(" + x + " % 2)) * (" + x + "*" + x + " + " + y + "*" + y + " + " + z + "*" + z + "))");
187 case ATOMIC_OPERATION_EXCHANGE:
188 case ATOMIC_OPERATION_COMPARE_EXCHANGE:
189 return string("((" + z + "*" + toString(gridSize.x()) + " + " + x + ")*" + toString(gridSize.y()) + " + " + y + ")");
190 default:
191 DE_ASSERT(false);
192 return "";
193 }
194 }
195
getAtomicOperationCaseName(const AtomicOperation op)196 static string getAtomicOperationCaseName (const AtomicOperation op)
197 {
198 switch (op)
199 {
200 case ATOMIC_OPERATION_ADD: return string("add");
201 case ATOMIC_OPERATION_SUB: return string("sub");
202 case ATOMIC_OPERATION_INC: return string("inc");
203 case ATOMIC_OPERATION_DEC: return string("dec");
204 case ATOMIC_OPERATION_MIN: return string("min");
205 case ATOMIC_OPERATION_MAX: return string("max");
206 case ATOMIC_OPERATION_AND: return string("and");
207 case ATOMIC_OPERATION_OR: return string("or");
208 case ATOMIC_OPERATION_XOR: return string("xor");
209 case ATOMIC_OPERATION_EXCHANGE: return string("exchange");
210 case ATOMIC_OPERATION_COMPARE_EXCHANGE: return string("compare_exchange");
211 default:
212 DE_ASSERT(false);
213 return "";
214 }
215 }
216
getAtomicOperationShaderFuncName(const AtomicOperation op)217 static string getAtomicOperationShaderFuncName (const AtomicOperation op)
218 {
219 switch (op)
220 {
221 case ATOMIC_OPERATION_ADD: return string("imageAtomicAdd");
222 case ATOMIC_OPERATION_MIN: return string("imageAtomicMin");
223 case ATOMIC_OPERATION_MAX: return string("imageAtomicMax");
224 case ATOMIC_OPERATION_AND: return string("imageAtomicAnd");
225 case ATOMIC_OPERATION_OR: return string("imageAtomicOr");
226 case ATOMIC_OPERATION_XOR: return string("imageAtomicXor");
227 case ATOMIC_OPERATION_EXCHANGE: return string("imageAtomicExchange");
228 case ATOMIC_OPERATION_COMPARE_EXCHANGE: return string("imageAtomicCompSwap");
229 default:
230 DE_ASSERT(false);
231 return "";
232 }
233 }
234
235 template <typename T>
getOperationInitialValue(const AtomicOperation op)236 T getOperationInitialValue (const AtomicOperation op)
237 {
238 switch (op)
239 {
240 // \note 18 is just an arbitrary small nonzero value.
241 case ATOMIC_OPERATION_ADD: return 18;
242 case ATOMIC_OPERATION_INC: return 18;
243 case ATOMIC_OPERATION_SUB: return (1 << 24) - 1;
244 case ATOMIC_OPERATION_DEC: return (1 << 24) - 1;
245 case ATOMIC_OPERATION_MIN: return (1 << 15) - 1;
246 case ATOMIC_OPERATION_MAX: return 18;
247 case ATOMIC_OPERATION_AND: return (1 << 15) - 1;
248 case ATOMIC_OPERATION_OR: return 18;
249 case ATOMIC_OPERATION_XOR: return 18;
250 case ATOMIC_OPERATION_EXCHANGE: return 18;
251 case ATOMIC_OPERATION_COMPARE_EXCHANGE: return 18;
252 default:
253 DE_ASSERT(false);
254 return 0xFFFFFFFF;
255 }
256 }
257
258 template <>
getOperationInitialValue(const AtomicOperation op)259 deInt64 getOperationInitialValue<deInt64>(const AtomicOperation op)
260 {
261 switch (op)
262 {
263 // \note 0x000000BEFFFFFF18 is just an arbitrary nonzero value.
264 case ATOMIC_OPERATION_ADD: return 0x000000BEFFFFFF18;
265 case ATOMIC_OPERATION_INC: return 0x000000BEFFFFFF18;
266 case ATOMIC_OPERATION_SUB: return (1ull << 56) - 1;
267 case ATOMIC_OPERATION_DEC: return (1ull << 56) - 1;
268 case ATOMIC_OPERATION_MIN: return (1ull << 47) - 1;
269 case ATOMIC_OPERATION_MAX: return 0x000000BEFFFFFF18;
270 case ATOMIC_OPERATION_AND: return (1ull << 47) - 1;
271 case ATOMIC_OPERATION_OR: return 0x000000BEFFFFFF18;
272 case ATOMIC_OPERATION_XOR: return 0x000000BEFFFFFF18;
273 case ATOMIC_OPERATION_EXCHANGE: return 0x000000BEFFFFFF18;
274 case ATOMIC_OPERATION_COMPARE_EXCHANGE: return 0x000000BEFFFFFF18;
275 default:
276 DE_ASSERT(false);
277 return 0xFFFFFFFFFFFFFFFF;
278 }
279 }
280
281 template <>
getOperationInitialValue(const AtomicOperation op)282 deUint64 getOperationInitialValue<deUint64>(const AtomicOperation op)
283 {
284 return (deUint64)getOperationInitialValue<deInt64>(op);
285 }
286
287
288 template <typename T>
getAtomicFuncArgument(const AtomicOperation op,const IVec3 & invocationID,const IVec3 & gridSize)289 static T getAtomicFuncArgument (const AtomicOperation op,
290 const IVec3& invocationID,
291 const IVec3& gridSize)
292 {
293 const T x = static_cast<T>(invocationID.x());
294 const T y = static_cast<T>(invocationID.y());
295 const T z = static_cast<T>(invocationID.z());
296
297 switch (op)
298 {
299 // \note Fall-throughs.
300 case ATOMIC_OPERATION_ADD:
301 case ATOMIC_OPERATION_SUB:
302 case ATOMIC_OPERATION_AND:
303 case ATOMIC_OPERATION_OR:
304 case ATOMIC_OPERATION_XOR:
305 return x*x + y*y + z*z;
306 case ATOMIC_OPERATION_INC:
307 case ATOMIC_OPERATION_DEC:
308 return 1;
309 case ATOMIC_OPERATION_MIN:
310 case ATOMIC_OPERATION_MAX:
311 // multiply half of the data by -1
312 return (1-2*(x % 2))*(x*x + y*y + z*z);
313 case ATOMIC_OPERATION_EXCHANGE:
314 case ATOMIC_OPERATION_COMPARE_EXCHANGE:
315 return (z*static_cast<T>(gridSize.x()) + x)*static_cast<T>(gridSize.y()) + y;
316 default:
317 DE_ASSERT(false);
318 return -1;
319 }
320 }
321
322 //! An order-independent operation is one for which the end result doesn't depend on the order in which the operations are carried (i.e. is both commutative and associative).
isOrderIndependentAtomicOperation(const AtomicOperation op)323 static bool isOrderIndependentAtomicOperation (const AtomicOperation op)
324 {
325 return op == ATOMIC_OPERATION_ADD ||
326 op == ATOMIC_OPERATION_SUB ||
327 op == ATOMIC_OPERATION_INC ||
328 op == ATOMIC_OPERATION_DEC ||
329 op == ATOMIC_OPERATION_MIN ||
330 op == ATOMIC_OPERATION_MAX ||
331 op == ATOMIC_OPERATION_AND ||
332 op == ATOMIC_OPERATION_OR ||
333 op == ATOMIC_OPERATION_XOR;
334 }
335
336 //! Checks if the operation needs an SPIR-V shader.
isSpirvAtomicOperation(const AtomicOperation op)337 static bool isSpirvAtomicOperation (const AtomicOperation op)
338 {
339 return op == ATOMIC_OPERATION_SUB ||
340 op == ATOMIC_OPERATION_INC ||
341 op == ATOMIC_OPERATION_DEC;
342 }
343
344 //! Returns the SPIR-V assembler name of the given operation.
getSpirvAtomicOpName(const AtomicOperation op)345 static std::string getSpirvAtomicOpName (const AtomicOperation op)
346 {
347 switch (op)
348 {
349 case ATOMIC_OPERATION_SUB: return "OpAtomicISub";
350 case ATOMIC_OPERATION_INC: return "OpAtomicIIncrement";
351 case ATOMIC_OPERATION_DEC: return "OpAtomicIDecrement";
352 default: break;
353 }
354
355 DE_ASSERT(false);
356 return "";
357 }
358
359 //! Returns true if the given SPIR-V operation does not need the last argument, compared to OpAtomicIAdd.
isSpirvAtomicNoLastArgOp(const AtomicOperation op)360 static bool isSpirvAtomicNoLastArgOp (const AtomicOperation op)
361 {
362 switch (op)
363 {
364 case ATOMIC_OPERATION_SUB: return false;
365 case ATOMIC_OPERATION_INC: // fallthrough
366 case ATOMIC_OPERATION_DEC: return true;
367 default: break;
368 }
369
370 DE_ASSERT(false);
371 return false;
372 }
373
374 //! Computes the result of an atomic operation where "a" is the data operated on and "b" is the parameter to the atomic function.
375 template <typename T>
computeBinaryAtomicOperationResult(const AtomicOperation op,const T a,const T b)376 static T computeBinaryAtomicOperationResult (const AtomicOperation op, const T a, const T b)
377 {
378 switch (op)
379 {
380 case ATOMIC_OPERATION_INC: // fallthrough.
381 case ATOMIC_OPERATION_ADD: return a + b;
382 case ATOMIC_OPERATION_DEC: // fallthrough.
383 case ATOMIC_OPERATION_SUB: return a - b;
384 case ATOMIC_OPERATION_MIN: return de::min(a, b);
385 case ATOMIC_OPERATION_MAX: return de::max(a, b);
386 case ATOMIC_OPERATION_AND: return a & b;
387 case ATOMIC_OPERATION_OR: return a | b;
388 case ATOMIC_OPERATION_XOR: return a ^ b;
389 case ATOMIC_OPERATION_EXCHANGE: return b;
390 case ATOMIC_OPERATION_COMPARE_EXCHANGE: return (a == (sizeof(T) == 8 ? 0xBEFFFFFF18 : 18)) ? b : a;
391 default:
392 DE_ASSERT(false);
393 return -1;
394 }
395 }
396
getUsageFlags(bool useTransfer)397 VkImageUsageFlags getUsageFlags (bool useTransfer)
398 {
399 VkImageUsageFlags usageFlags = VK_IMAGE_USAGE_STORAGE_BIT;
400
401 if (useTransfer)
402 usageFlags |= (VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT);
403
404 return usageFlags;
405 }
406
AddFillReadShader(SourceCollections & sourceCollections,const ImageType & imageType,const tcu::TextureFormat & format,const string & componentType,const string & vec4Type)407 void AddFillReadShader (SourceCollections& sourceCollections,
408 const ImageType& imageType,
409 const tcu::TextureFormat& format,
410 const string& componentType,
411 const string& vec4Type)
412 {
413 const string imageInCoord = getCoordStr(imageType, "gx", "gy", "gz");
414 const string shaderImageFormatStr = getShaderImageFormatQualifier(format);
415 const string shaderImageTypeStr = getShaderImageType(format, imageType);
416 const auto componentWidth = getFormatComponentWidth(mapTextureFormat(format), 0u);
417 const string extensions = ((componentWidth == 64u)
418 ? "#extension GL_EXT_shader_explicit_arithmetic_types_int64 : require\n"
419 "#extension GL_EXT_shader_image_int64 : require\n"
420 : "");
421
422
423 const string fillShader = "#version 450\n"
424 + extensions +
425 "precision highp " + shaderImageTypeStr + ";\n"
426 "\n"
427 "layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
428 "layout (" + shaderImageFormatStr + ", binding=0) coherent uniform " + shaderImageTypeStr + " u_resultImage;\n"
429 "\n"
430 "layout(std430, binding = 1) buffer inputBuffer\n"
431 "{\n"
432 " "+ componentType + " data[];\n"
433 "} inBuffer;\n"
434 "\n"
435 "void main(void)\n"
436 "{\n"
437 " int gx = int(gl_GlobalInvocationID.x);\n"
438 " int gy = int(gl_GlobalInvocationID.y);\n"
439 " int gz = int(gl_GlobalInvocationID.z);\n"
440 " uint index = gx + (gy * gl_NumWorkGroups.x) + (gz *gl_NumWorkGroups.x * gl_NumWorkGroups.y);\n"
441 " imageStore(u_resultImage, " + imageInCoord + ", " + vec4Type + "(inBuffer.data[index]));\n"
442 "}\n";
443
444 const string readShader = "#version 450\n"
445 + extensions +
446 "precision highp " + shaderImageTypeStr + ";\n"
447 "\n"
448 "layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
449 "layout (" + shaderImageFormatStr + ", binding=0) coherent uniform " + shaderImageTypeStr + " u_resultImage;\n"
450 "\n"
451 "layout(std430, binding = 1) buffer outputBuffer\n"
452 "{\n"
453 " " + componentType + " data[];\n"
454 "} outBuffer;\n"
455 "\n"
456 "void main(void)\n"
457 "{\n"
458 " int gx = int(gl_GlobalInvocationID.x);\n"
459 " int gy = int(gl_GlobalInvocationID.y);\n"
460 " int gz = int(gl_GlobalInvocationID.z);\n"
461 " uint index = gx + (gy * gl_NumWorkGroups.x) + (gz *gl_NumWorkGroups.x * gl_NumWorkGroups.y);\n"
462 " outBuffer.data[index] = imageLoad(u_resultImage, " + imageInCoord + ").x;\n"
463 "}\n";
464
465
466 if ((imageType != IMAGE_TYPE_1D) &&
467 (imageType != IMAGE_TYPE_1D_ARRAY) &&
468 (imageType != IMAGE_TYPE_BUFFER))
469 {
470 const string readShaderResidency = "#version 450\n"
471 "#extension GL_ARB_sparse_texture2 : require\n"
472 + extensions +
473 "precision highp " + shaderImageTypeStr + ";\n"
474 "\n"
475 "layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
476 "layout (" + shaderImageFormatStr + ", binding=0) coherent uniform " + shaderImageTypeStr + " u_resultImage;\n"
477 "\n"
478 "layout(std430, binding = 1) buffer outputBuffer\n"
479 "{\n"
480 " " + componentType + " data[];\n"
481 "} outBuffer;\n"
482 "\n"
483 "void main(void)\n"
484 "{\n"
485 " int gx = int(gl_GlobalInvocationID.x);\n"
486 " int gy = int(gl_GlobalInvocationID.y);\n"
487 " int gz = int(gl_GlobalInvocationID.z);\n"
488 " uint index = gx + (gy * gl_NumWorkGroups.x) + (gz *gl_NumWorkGroups.x * gl_NumWorkGroups.y);\n"
489 " outBuffer.data[index] = imageLoad(u_resultImage, " + imageInCoord + ").x;\n"
490 " " + vec4Type + " sparseValue;\n"
491 " sparseImageLoadARB(u_resultImage, " + imageInCoord + ", sparseValue);\n"
492 " if (outBuffer.data[index] != sparseValue.x)\n"
493 " outBuffer.data[index] = " + vec4Type + "(1234).x;\n"
494 "}\n";
495
496 sourceCollections.glslSources.add("readShaderResidency") << glu::ComputeSource(readShaderResidency.c_str()) << vk::ShaderBuildOptions(sourceCollections.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
497 }
498
499 sourceCollections.glslSources.add("fillShader") << glu::ComputeSource(fillShader.c_str()) << vk::ShaderBuildOptions(sourceCollections.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
500 sourceCollections.glslSources.add("readShader") << glu::ComputeSource(readShader.c_str()) << vk::ShaderBuildOptions(sourceCollections.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
501 }
502
503 //! Prepare the initial data for the image
initDataForImage(const VkDevice device,const DeviceInterface & deviceInterface,const TextureFormat & format,const AtomicOperation operation,const tcu::UVec3 & gridSize,BufferWithMemory & buffer)504 static void initDataForImage (const VkDevice device,
505 const DeviceInterface& deviceInterface,
506 const TextureFormat& format,
507 const AtomicOperation operation,
508 const tcu::UVec3& gridSize,
509 BufferWithMemory& buffer)
510 {
511 Allocation& bufferAllocation = buffer.getAllocation();
512 const VkFormat imageFormat = mapTextureFormat(format);
513 tcu::PixelBufferAccess pixelBuffer (format, gridSize.x(), gridSize.y(), gridSize.z(), bufferAllocation.getHostPtr());
514
515 if (imageFormat == VK_FORMAT_R64_UINT || imageFormat == VK_FORMAT_R64_SINT)
516 {
517 const deInt64 initialValue(getOperationInitialValue<deInt64>(operation));
518
519 for (deUint32 z = 0; z < gridSize.z(); z++)
520 for (deUint32 y = 0; y < gridSize.y(); y++)
521 for (deUint32 x = 0; x < gridSize.x(); x++)
522 {
523 *((deInt64*)pixelBuffer.getPixelPtr(x, y, z)) = initialValue;
524 }
525 }
526 else
527 {
528 const tcu::IVec4 initialValue(getOperationInitialValue<deInt32>(operation));
529
530 for (deUint32 z = 0; z < gridSize.z(); z++)
531 for (deUint32 y = 0; y < gridSize.y(); y++)
532 for (deUint32 x = 0; x < gridSize.x(); x++)
533 {
534 pixelBuffer.setPixel(initialValue, x, y, z);
535 }
536 }
537
538 flushAlloc(deviceInterface, device, bufferAllocation);
539 }
540
commonCheckSupport(Context & context,const tcu::TextureFormat & tcuFormat,VkImageTiling tiling,ImageType imageType,const tcu::UVec3 & imageSize,AtomicOperation operation,bool useTransfer,ShaderReadType readType,ImageBackingType backingType)541 void commonCheckSupport (Context& context, const tcu::TextureFormat& tcuFormat, VkImageTiling tiling, ImageType imageType, const tcu::UVec3& imageSize, AtomicOperation operation, bool useTransfer, ShaderReadType readType, ImageBackingType backingType)
542 {
543 const VkFormat format = mapTextureFormat(tcuFormat);
544 const VkImageType vkImgType = mapImageType(imageType);
545 const VkFormatFeatureFlags texelBufferSupport = (VK_FORMAT_FEATURE_STORAGE_TEXEL_BUFFER_BIT | VK_FORMAT_FEATURE_STORAGE_TEXEL_BUFFER_ATOMIC_BIT);
546
547 const auto& vki = context.getInstanceInterface();
548 const auto physicalDevice = context.getPhysicalDevice();
549 const auto usageFlags = getUsageFlags(useTransfer);
550
551 VkImageFormatProperties vkImageFormatProperties;
552 const auto result = vki.getPhysicalDeviceImageFormatProperties(physicalDevice, format, vkImgType, tiling, usageFlags, 0, &vkImageFormatProperties);
553 if (result != VK_SUCCESS) {
554 if (result == VK_ERROR_FORMAT_NOT_SUPPORTED)
555 TCU_THROW(NotSupportedError, "Format unsupported for tiling");
556 else
557 TCU_FAIL("vkGetPhysicalDeviceImageFormatProperties returned unexpected error");
558 }
559
560 if (vkImageFormatProperties.maxArrayLayers < (uint32_t)getNumLayers(imageType, imageSize)) {
561 TCU_THROW(NotSupportedError, "This format and tiling combination does not support this number of aray layers");
562 }
563
564 const VkFormatProperties formatProperties = getPhysicalDeviceFormatProperties(context.getInstanceInterface(),
565 context.getPhysicalDevice(), format);
566 if ((imageType == IMAGE_TYPE_BUFFER) &&
567 ((formatProperties.bufferFeatures & texelBufferSupport) != texelBufferSupport))
568 TCU_THROW(NotSupportedError, "Atomic storage texel buffers not supported");
569
570 const VkFormatFeatureFlags requiredFeaturesLinear = (VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT | VK_FORMAT_FEATURE_STORAGE_IMAGE_ATOMIC_BIT);
571 if (tiling == vk::VK_IMAGE_TILING_LINEAR &&
572 ((formatProperties.linearTilingFeatures & requiredFeaturesLinear) != requiredFeaturesLinear)
573 ) {
574 TCU_THROW(NotSupportedError, "Format doesn't support atomic storage with linear tiling");
575 }
576
577 if (imageType == IMAGE_TYPE_CUBE_ARRAY)
578 context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_IMAGE_CUBE_ARRAY);
579
580 #ifndef CTS_USES_VULKANSC
581 if (backingType == ImageBackingType::SPARSE)
582 {
583 context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_SPARSE_BINDING);
584
585 switch (vkImgType)
586 {
587 case VK_IMAGE_TYPE_2D: context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_SPARSE_RESIDENCY_IMAGE2D); break;
588 case VK_IMAGE_TYPE_3D: context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_SPARSE_RESIDENCY_IMAGE3D); break;
589 default: DE_ASSERT(false); break;
590 }
591
592 if (!checkSparseImageFormatSupport(context.getPhysicalDevice(), context.getInstanceInterface(), format, vkImgType, VK_SAMPLE_COUNT_1_BIT, usageFlags, tiling))
593 TCU_THROW(NotSupportedError, "Format does not support sparse images");
594 }
595 #endif // CTS_USES_VULKANSC
596
597 if (isFloatFormat(format))
598 {
599 context.requireDeviceFunctionality("VK_EXT_shader_atomic_float");
600
601 const VkFormatFeatureFlags requiredFeatures = (VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT | VK_FORMAT_FEATURE_STORAGE_IMAGE_ATOMIC_BIT);
602 const auto& atomicFloatFeatures = context.getShaderAtomicFloatFeaturesEXT();
603
604 if (!atomicFloatFeatures.shaderImageFloat32Atomics)
605 TCU_THROW(NotSupportedError, "shaderImageFloat32Atomics not supported");
606
607 if ((operation == ATOMIC_OPERATION_ADD) && !atomicFloatFeatures.shaderImageFloat32AtomicAdd)
608 TCU_THROW(NotSupportedError, "shaderImageFloat32AtomicAdd not supported");
609
610 if (operation == ATOMIC_OPERATION_MIN || operation == ATOMIC_OPERATION_MAX)
611 {
612 context.requireDeviceFunctionality("VK_EXT_shader_atomic_float2");
613 #ifndef CTS_USES_VULKANSC
614 if (!context.getShaderAtomicFloat2FeaturesEXT().shaderImageFloat32AtomicMinMax)
615 {
616 TCU_THROW(NotSupportedError, "shaderImageFloat32AtomicMinMax not supported");
617 }
618 #endif // CTS_USES_VULKANSC
619 }
620
621 if ((formatProperties.optimalTilingFeatures & requiredFeatures) != requiredFeatures)
622 TCU_FAIL("Required format feature bits not supported");
623
624 if (backingType == ImageBackingType::SPARSE)
625 {
626 if (!atomicFloatFeatures.sparseImageFloat32Atomics)
627 TCU_THROW(NotSupportedError, "sparseImageFloat32Atomics not supported");
628
629 if (operation == ATOMIC_OPERATION_ADD && !atomicFloatFeatures.sparseImageFloat32AtomicAdd)
630 TCU_THROW(NotSupportedError, "sparseImageFloat32AtomicAdd not supported");
631 }
632
633 }
634 else if (format == VK_FORMAT_R64_UINT || format == VK_FORMAT_R64_SINT)
635 {
636 context.requireDeviceFunctionality("VK_EXT_shader_image_atomic_int64");
637
638 const VkFormatFeatureFlags requiredFeatures = (VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT | VK_FORMAT_FEATURE_STORAGE_IMAGE_ATOMIC_BIT);
639 const auto& atomicInt64Features = context.getShaderImageAtomicInt64FeaturesEXT();
640
641 if (!atomicInt64Features.shaderImageInt64Atomics)
642 TCU_THROW(NotSupportedError, "shaderImageInt64Atomics not supported");
643
644 if (backingType == ImageBackingType::SPARSE && !atomicInt64Features.sparseImageInt64Atomics)
645 TCU_THROW(NotSupportedError, "sparseImageInt64Atomics not supported");
646
647 if ((formatProperties.optimalTilingFeatures & requiredFeatures) != requiredFeatures)
648 TCU_FAIL("Mandatory format features not supported");
649 }
650
651 if (useTransfer)
652 {
653 const VkFormatFeatureFlags transferFeatures = (VK_FORMAT_FEATURE_TRANSFER_SRC_BIT | VK_FORMAT_FEATURE_TRANSFER_DST_BIT);
654 if ((formatProperties.optimalTilingFeatures & transferFeatures) != transferFeatures)
655 TCU_THROW(NotSupportedError, "Transfer features not supported for this format");
656 }
657
658 if (readType == ShaderReadType::SPARSE)
659 {
660 DE_ASSERT(imageType != IMAGE_TYPE_1D && imageType != IMAGE_TYPE_1D_ARRAY && imageType != IMAGE_TYPE_BUFFER);
661 context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_SHADER_RESOURCE_RESIDENCY);
662 }
663 }
664
665 class BinaryAtomicEndResultCase : public vkt::TestCase
666 {
667 public:
668 BinaryAtomicEndResultCase (tcu::TestContext& testCtx,
669 const string& name,
670 const ImageType imageType,
671 const tcu::UVec3& imageSize,
672 const tcu::TextureFormat& format,
673 const VkImageTiling tiling,
674 const AtomicOperation operation,
675 const bool useTransfer,
676 const ShaderReadType shaderReadType,
677 const ImageBackingType backingType,
678 const glu::GLSLVersion glslVersion);
679
680 void initPrograms (SourceCollections& sourceCollections) const;
681 TestInstance* createInstance (Context& context) const;
682 virtual void checkSupport (Context& context) const;
683
684 private:
685 const ImageType m_imageType;
686 const tcu::UVec3 m_imageSize;
687 const tcu::TextureFormat m_format;
688 const VkImageTiling m_tiling;
689 const AtomicOperation m_operation;
690 const bool m_useTransfer;
691 const ShaderReadType m_readType;
692 const ImageBackingType m_backingType;
693 const glu::GLSLVersion m_glslVersion;
694 };
695
BinaryAtomicEndResultCase(tcu::TestContext & testCtx,const string & name,const ImageType imageType,const tcu::UVec3 & imageSize,const tcu::TextureFormat & format,const VkImageTiling tiling,const AtomicOperation operation,const bool useTransfer,const ShaderReadType shaderReadType,const ImageBackingType backingType,const glu::GLSLVersion glslVersion)696 BinaryAtomicEndResultCase::BinaryAtomicEndResultCase (tcu::TestContext& testCtx,
697 const string& name,
698 const ImageType imageType,
699 const tcu::UVec3& imageSize,
700 const tcu::TextureFormat& format,
701 const VkImageTiling tiling,
702 const AtomicOperation operation,
703 const bool useTransfer,
704 const ShaderReadType shaderReadType,
705 const ImageBackingType backingType,
706 const glu::GLSLVersion glslVersion)
707 : TestCase (testCtx, name)
708 , m_imageType (imageType)
709 , m_imageSize (imageSize)
710 , m_format (format)
711 , m_tiling (tiling)
712 , m_operation (operation)
713 , m_useTransfer (useTransfer)
714 , m_readType (shaderReadType)
715 , m_backingType (backingType)
716 , m_glslVersion (glslVersion)
717 {
718 }
719
checkSupport(Context & context) const720 void BinaryAtomicEndResultCase::checkSupport (Context& context) const
721 {
722 commonCheckSupport(context, m_format, m_tiling, m_imageType, m_imageSize, m_operation, m_useTransfer, m_readType, m_backingType);
723 }
724
initPrograms(SourceCollections & sourceCollections) const725 void BinaryAtomicEndResultCase::initPrograms (SourceCollections& sourceCollections) const
726 {
727 const VkFormat imageFormat = mapTextureFormat(m_format);
728 const deUint32 componentWidth = getFormatComponentWidth(imageFormat, 0);
729 const bool intFormat = isIntFormat(imageFormat);
730 const bool uintFormat = isUintFormat(imageFormat);
731 const bool floatFormat = isFloatFormat(imageFormat);
732 const string type = getComponentTypeStr(componentWidth, intFormat, uintFormat, floatFormat);
733 const string vec4Type = getVec4TypeStr(componentWidth, intFormat, uintFormat, floatFormat);
734
735 AddFillReadShader(sourceCollections, m_imageType, m_format, type, vec4Type);
736
737 if (isSpirvAtomicOperation(m_operation))
738 {
739 const CaseVariant caseVariant{m_imageType, m_format.order, m_format.type, CaseVariant::CHECK_TYPE_END_RESULTS};
740 const tcu::StringTemplate shaderTemplate{getSpirvAtomicOpShader(caseVariant)};
741 std::map<std::string, std::string> specializations;
742
743 specializations["OPNAME"] = getSpirvAtomicOpName(m_operation);
744 if (isSpirvAtomicNoLastArgOp(m_operation))
745 specializations["LASTARG"] = "";
746
747 sourceCollections.spirvAsmSources.add(m_name) << shaderTemplate.specialize(specializations);
748 }
749 else
750 {
751 const string versionDecl = glu::getGLSLVersionDeclaration(m_glslVersion);
752
753 const UVec3 gridSize = getShaderGridSize(m_imageType, m_imageSize);
754 const string atomicCoord = getCoordStr(m_imageType, "gx % " + toString(gridSize.x()), "gy", "gz");
755
756 const string atomicArgExpr = type + getAtomicFuncArgumentShaderStr(m_operation,
757 "gx", "gy", "gz",
758 IVec3(NUM_INVOCATIONS_PER_PIXEL*gridSize.x(), gridSize.y(), gridSize.z()));
759
760 const string compareExchangeStr = (m_operation == ATOMIC_OPERATION_COMPARE_EXCHANGE) ?
761 (componentWidth == 64 ?", 820338753304": ", 18") + string(uintFormat ? "u" : "") + string(componentWidth == 64 ? "l" : "")
762 : "";
763 const string atomicInvocation = getAtomicOperationShaderFuncName(m_operation) + "(u_resultImage, " + atomicCoord + compareExchangeStr + ", " + atomicArgExpr + ")";
764 const string shaderImageFormatStr = getShaderImageFormatQualifier(m_format);
765 const string shaderImageTypeStr = getShaderImageType(m_format, m_imageType);
766 const string extensions = "#extension GL_EXT_shader_atomic_float : enable\n"
767 "#extension GL_EXT_shader_atomic_float2 : enable\n"
768 "#extension GL_KHR_memory_scope_semantics : enable";
769
770 string source = versionDecl + "\n" + extensions + "\n";
771
772 if (64 == componentWidth)
773 {
774 source += "#extension GL_EXT_shader_explicit_arithmetic_types_int64 : require\n"
775 "#extension GL_EXT_shader_image_int64 : require\n";
776 }
777
778 source += "precision highp " + shaderImageTypeStr + ";\n"
779 "\n"
780 "layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
781 "layout (" + shaderImageFormatStr + ", binding=0) coherent uniform " + shaderImageTypeStr + " u_resultImage;\n"
782 "\n"
783 "void main (void)\n"
784 "{\n"
785 " int gx = int(gl_GlobalInvocationID.x);\n"
786 " int gy = int(gl_GlobalInvocationID.y);\n"
787 " int gz = int(gl_GlobalInvocationID.z);\n"
788 " " + atomicInvocation + ";\n"
789 "}\n";
790
791 sourceCollections.glslSources.add(m_name) << glu::ComputeSource(source.c_str());
792 }
793 }
794
795 class BinaryAtomicIntermValuesCase : public vkt::TestCase
796 {
797 public:
798 BinaryAtomicIntermValuesCase (tcu::TestContext& testCtx,
799 const string& name,
800 const ImageType imageType,
801 const tcu::UVec3& imageSize,
802 const tcu::TextureFormat& format,
803 const VkImageTiling tiling,
804 const AtomicOperation operation,
805 const bool useTransfer,
806 const ShaderReadType shaderReadType,
807 const ImageBackingType backingType,
808 const glu::GLSLVersion glslVersion);
809
810 void initPrograms (SourceCollections& sourceCollections) const;
811 TestInstance* createInstance (Context& context) const;
812 virtual void checkSupport (Context& context) const;
813
814 private:
815 const ImageType m_imageType;
816 const tcu::UVec3 m_imageSize;
817 const tcu::TextureFormat m_format;
818 const VkImageTiling m_tiling;
819 const AtomicOperation m_operation;
820 const bool m_useTransfer;
821 const ShaderReadType m_readType;
822 const ImageBackingType m_backingType;
823 const glu::GLSLVersion m_glslVersion;
824 };
825
BinaryAtomicIntermValuesCase(TestContext & testCtx,const string & name,const ImageType imageType,const tcu::UVec3 & imageSize,const TextureFormat & format,const VkImageTiling tiling,const AtomicOperation operation,const bool useTransfer,const ShaderReadType shaderReadType,const ImageBackingType backingType,const glu::GLSLVersion glslVersion)826 BinaryAtomicIntermValuesCase::BinaryAtomicIntermValuesCase (TestContext& testCtx,
827 const string& name,
828 const ImageType imageType,
829 const tcu::UVec3& imageSize,
830 const TextureFormat& format,
831 const VkImageTiling tiling,
832 const AtomicOperation operation,
833 const bool useTransfer,
834 const ShaderReadType shaderReadType,
835 const ImageBackingType backingType,
836 const glu::GLSLVersion glslVersion)
837 : TestCase (testCtx, name)
838 , m_imageType (imageType)
839 , m_imageSize (imageSize)
840 , m_format (format)
841 , m_tiling (tiling)
842 , m_operation (operation)
843 , m_useTransfer (useTransfer)
844 , m_readType (shaderReadType)
845 , m_backingType (backingType)
846 , m_glslVersion (glslVersion)
847 {
848 }
849
checkSupport(Context & context) const850 void BinaryAtomicIntermValuesCase::checkSupport (Context& context) const
851 {
852 commonCheckSupport(context, m_format, m_tiling, m_imageType, m_imageSize, m_operation, m_useTransfer, m_readType, m_backingType);
853 }
854
initPrograms(SourceCollections & sourceCollections) const855 void BinaryAtomicIntermValuesCase::initPrograms (SourceCollections& sourceCollections) const
856 {
857 const VkFormat imageFormat = mapTextureFormat(m_format);
858 const deUint32 componentWidth = getFormatComponentWidth(imageFormat, 0);
859 const bool intFormat = isIntFormat(imageFormat);
860 const bool uintFormat = isUintFormat(imageFormat);
861 const bool floatFormat = isFloatFormat(imageFormat);
862 const string type = getComponentTypeStr(componentWidth, intFormat, uintFormat, floatFormat);
863 const string vec4Type = getVec4TypeStr(componentWidth, intFormat, uintFormat, floatFormat);
864
865 AddFillReadShader(sourceCollections, m_imageType, m_format, type, vec4Type);
866
867 if (isSpirvAtomicOperation(m_operation))
868 {
869 const CaseVariant caseVariant{m_imageType, m_format.order, m_format.type, CaseVariant::CHECK_TYPE_INTERMEDIATE_RESULTS};
870 const tcu::StringTemplate shaderTemplate{getSpirvAtomicOpShader(caseVariant)};
871 std::map<std::string, std::string> specializations;
872
873 specializations["OPNAME"] = getSpirvAtomicOpName(m_operation);
874 if (isSpirvAtomicNoLastArgOp(m_operation))
875 specializations["LASTARG"] = "";
876
877 sourceCollections.spirvAsmSources.add(m_name) << shaderTemplate.specialize(specializations);
878 }
879 else
880 {
881 const string versionDecl = glu::getGLSLVersionDeclaration(m_glslVersion);
882 const UVec3 gridSize = getShaderGridSize(m_imageType, m_imageSize);
883 const string atomicCoord = getCoordStr(m_imageType, "gx % " + toString(gridSize.x()), "gy", "gz");
884 const string invocationCoord = getCoordStr(m_imageType, "gx", "gy", "gz");
885 const string atomicArgExpr = type + getAtomicFuncArgumentShaderStr(m_operation,
886 "gx", "gy", "gz",
887 IVec3(NUM_INVOCATIONS_PER_PIXEL*gridSize.x(), gridSize.y(), gridSize.z()));
888
889 const string compareExchangeStr = (m_operation == ATOMIC_OPERATION_COMPARE_EXCHANGE) ?
890 (componentWidth == 64 ? ", 820338753304" : ", 18") + string(uintFormat ? "u" : "") + string(componentWidth == 64 ? "l" : "") :
891 "";
892 const string atomicInvocation = getAtomicOperationShaderFuncName(m_operation) +
893 "(u_resultImage, " + atomicCoord + compareExchangeStr + ", " + atomicArgExpr + ")";
894 const string shaderImageFormatStr = getShaderImageFormatQualifier(m_format);
895 const string shaderImageTypeStr = getShaderImageType(m_format, m_imageType);
896 const string extensions = "#extension GL_EXT_shader_atomic_float : enable\n"
897 "#extension GL_EXT_shader_atomic_float2 : enable\n"
898 "#extension GL_KHR_memory_scope_semantics : enable";
899
900 string source = versionDecl + "\n" + extensions + "\n"
901 "\n";
902
903 if (64 == componentWidth)
904 {
905 source += "#extension GL_EXT_shader_explicit_arithmetic_types_int64 : require\n"
906 "#extension GL_EXT_shader_image_int64 : require\n";
907 }
908
909 source += "precision highp " + shaderImageTypeStr + "; \n"
910 "layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
911 "layout (" + shaderImageFormatStr + ", binding=0) coherent uniform " + shaderImageTypeStr + " u_resultImage;\n"
912 "layout (" + shaderImageFormatStr + ", binding=1) writeonly uniform " + shaderImageTypeStr + " u_intermValuesImage;\n"
913 "\n"
914 "void main (void)\n"
915 "{\n"
916 " int gx = int(gl_GlobalInvocationID.x);\n"
917 " int gy = int(gl_GlobalInvocationID.y);\n"
918 " int gz = int(gl_GlobalInvocationID.z);\n"
919 " imageStore(u_intermValuesImage, " + invocationCoord + ", " + vec4Type + "(" + atomicInvocation + "));\n"
920 "}\n";
921
922 sourceCollections.glslSources.add(m_name) << glu::ComputeSource(source.c_str());
923 }
924 }
925
926 class BinaryAtomicInstanceBase : public vkt::TestInstance
927 {
928 public:
929
930 BinaryAtomicInstanceBase (Context& context,
931 const string& name,
932 const ImageType imageType,
933 const tcu::UVec3& imageSize,
934 const TextureFormat& format,
935 const VkImageTiling tiling,
936 const AtomicOperation operation,
937 const bool useTransfer,
938 const ShaderReadType shaderReadType,
939 const ImageBackingType backingType);
940
941 tcu::TestStatus iterate (void);
942
943 virtual deUint32 getOutputBufferSize (void) const = 0;
944
945 virtual void prepareResources (const bool useTransfer) = 0;
946 virtual void prepareDescriptors (const bool isTexelBuffer) = 0;
947
948 virtual void commandsBeforeCompute (const VkCommandBuffer cmdBuffer) const = 0;
949 virtual void commandsAfterCompute (const VkCommandBuffer cmdBuffer,
950 const VkPipeline pipeline,
951 const VkPipelineLayout pipelineLayout,
952 const VkDescriptorSet descriptorSet,
953 const VkDeviceSize& range,
954 const bool useTransfer) = 0;
955
956 virtual bool verifyResult (Allocation& outputBufferAllocation,
957 const bool is64Bit) const = 0;
958
959 protected:
960
961 void shaderFillImage (const VkCommandBuffer cmdBuffer,
962 const VkBuffer& buffer,
963 const VkPipeline pipeline,
964 const VkPipelineLayout pipelineLayout,
965 const VkDescriptorSet descriptorSet,
966 const VkDeviceSize& range,
967 const tcu::UVec3& gridSize);
968
969 void createImageAndView (VkFormat imageFormat,
970 const tcu::UVec3& imageExent,
971 bool useTransfer,
972 de::MovePtr<Image>& imagePtr,
973 Move<VkImageView>& imageViewPtr);
974
975 void createImageResources (const VkFormat& imageFormat,
976 const bool useTransfer);
977
978 const string m_name;
979 const ImageType m_imageType;
980 const tcu::UVec3 m_imageSize;
981 const TextureFormat m_format;
982 const VkImageTiling m_tiling;
983 const AtomicOperation m_operation;
984 const bool m_useTransfer;
985 const ShaderReadType m_readType;
986 const ImageBackingType m_backingType;
987
988 de::MovePtr<BufferWithMemory> m_inputBuffer;
989 de::MovePtr<BufferWithMemory> m_outputBuffer;
990 Move<VkBufferView> m_descResultBufferView;
991 Move<VkBufferView> m_descIntermResultsBufferView;
992 Move<VkDescriptorPool> m_descriptorPool;
993 Move<VkDescriptorSetLayout> m_descriptorSetLayout;
994 Move<VkDescriptorSet> m_descriptorSet;
995
996 Move<VkDescriptorSetLayout> m_descriptorSetLayoutNoTransfer;
997 Move<VkDescriptorPool> m_descriptorPoolNoTransfer;
998
999 de::MovePtr<Image> m_resultImage;
1000 Move<VkImageView> m_resultImageView;
1001
1002 std::vector<VkSemaphore> m_waitSemaphores;
1003 };
1004
BinaryAtomicInstanceBase(Context & context,const string & name,const ImageType imageType,const tcu::UVec3 & imageSize,const TextureFormat & format,const VkImageTiling tiling,const AtomicOperation operation,const bool useTransfer,const ShaderReadType shaderReadType,const ImageBackingType backingType)1005 BinaryAtomicInstanceBase::BinaryAtomicInstanceBase (Context& context,
1006 const string& name,
1007 const ImageType imageType,
1008 const tcu::UVec3& imageSize,
1009 const TextureFormat& format,
1010 const VkImageTiling tiling,
1011 const AtomicOperation operation,
1012 const bool useTransfer,
1013 const ShaderReadType shaderReadType,
1014 const ImageBackingType backingType)
1015 : vkt::TestInstance (context)
1016 , m_name (name)
1017 , m_imageType (imageType)
1018 , m_imageSize (imageSize)
1019 , m_format (format)
1020 , m_tiling (tiling)
1021 , m_operation (operation)
1022 , m_useTransfer (useTransfer)
1023 , m_readType (shaderReadType)
1024 , m_backingType (backingType)
1025 {
1026 }
1027
iterate(void)1028 tcu::TestStatus BinaryAtomicInstanceBase::iterate (void)
1029 {
1030 const VkDevice device = m_context.getDevice();
1031 const DeviceInterface& deviceInterface = m_context.getDeviceInterface();
1032 const VkQueue queue = m_context.getUniversalQueue();
1033 const deUint32 queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
1034 Allocator& allocator = m_context.getDefaultAllocator();
1035 const VkDeviceSize imageSizeInBytes = tcu::getPixelSize(m_format) * getNumPixels(m_imageType, m_imageSize);
1036 const VkDeviceSize outBuffSizeInBytes = getOutputBufferSize();
1037 const VkFormat imageFormat = mapTextureFormat(m_format);
1038 const bool isTexelBuffer = (m_imageType == IMAGE_TYPE_BUFFER);
1039
1040 if (!isTexelBuffer)
1041 {
1042 createImageResources(imageFormat, m_useTransfer);
1043 }
1044
1045 tcu::UVec3 gridSize = getShaderGridSize(m_imageType, m_imageSize);
1046
1047 //Prepare the buffer with the initial data for the image
1048 m_inputBuffer = de::MovePtr<BufferWithMemory>(new BufferWithMemory(deviceInterface,
1049 device,
1050 allocator,
1051 makeBufferCreateInfo(imageSizeInBytes,
1052 VK_BUFFER_USAGE_TRANSFER_SRC_BIT |
1053 VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
1054 (isTexelBuffer ? VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT : static_cast<VkBufferUsageFlagBits>(0u))),
1055 MemoryRequirement::HostVisible));
1056
1057 // Fill in buffer with initial data used for image.
1058 initDataForImage(device, deviceInterface, m_format, m_operation, gridSize, *m_inputBuffer);
1059
1060 // Create a buffer to store shader output copied from result image
1061 m_outputBuffer = de::MovePtr<BufferWithMemory>(new BufferWithMemory(deviceInterface,
1062 device,
1063 allocator,
1064 makeBufferCreateInfo(outBuffSizeInBytes,
1065 VK_BUFFER_USAGE_TRANSFER_DST_BIT |
1066 VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
1067 (isTexelBuffer ? VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT : static_cast<VkBufferUsageFlagBits>(0u))),
1068 MemoryRequirement::HostVisible));
1069
1070 if (!isTexelBuffer)
1071 {
1072 prepareResources(m_useTransfer);
1073 }
1074
1075 prepareDescriptors(isTexelBuffer);
1076
1077 Move<VkDescriptorSet> descriptorSetFillImage;
1078 Move<VkShaderModule> shaderModuleFillImage;
1079 Move<VkPipelineLayout> pipelineLayoutFillImage;
1080 Move<VkPipeline> pipelineFillImage;
1081
1082 Move<VkDescriptorSet> descriptorSetReadImage;
1083 Move<VkShaderModule> shaderModuleReadImage;
1084 Move<VkPipelineLayout> pipelineLayoutReadImage;
1085 Move<VkPipeline> pipelineReadImage;
1086
1087 if (!m_useTransfer)
1088 {
1089 m_descriptorSetLayoutNoTransfer =
1090 DescriptorSetLayoutBuilder()
1091 .addSingleBinding((isTexelBuffer ? VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER : VK_DESCRIPTOR_TYPE_STORAGE_IMAGE), VK_SHADER_STAGE_COMPUTE_BIT)
1092 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
1093 .build(deviceInterface, device);
1094
1095 m_descriptorPoolNoTransfer =
1096 DescriptorPoolBuilder()
1097 .addType((isTexelBuffer ? VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER : VK_DESCRIPTOR_TYPE_STORAGE_IMAGE), 2)
1098 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 2)
1099 .build(deviceInterface, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 2u);
1100
1101 descriptorSetFillImage = makeDescriptorSet(deviceInterface,
1102 device,
1103 *m_descriptorPoolNoTransfer,
1104 *m_descriptorSetLayoutNoTransfer);
1105
1106 descriptorSetReadImage = makeDescriptorSet(deviceInterface,
1107 device,
1108 *m_descriptorPoolNoTransfer,
1109 *m_descriptorSetLayoutNoTransfer);
1110
1111 shaderModuleFillImage = createShaderModule(deviceInterface, device, m_context.getBinaryCollection().get("fillShader"), 0);
1112 pipelineLayoutFillImage = makePipelineLayout(deviceInterface, device, *m_descriptorSetLayoutNoTransfer);
1113 pipelineFillImage = makeComputePipeline(deviceInterface, device, *pipelineLayoutFillImage, *shaderModuleFillImage);
1114
1115 if (m_readType == ShaderReadType::SPARSE)
1116 {
1117 shaderModuleReadImage = createShaderModule(deviceInterface, device, m_context.getBinaryCollection().get("readShaderResidency"), 0);
1118 }
1119 else
1120 {
1121 shaderModuleReadImage = createShaderModule(deviceInterface, device, m_context.getBinaryCollection().get("readShader"), 0);
1122 }
1123 pipelineLayoutReadImage = makePipelineLayout(deviceInterface, device, *m_descriptorSetLayoutNoTransfer);
1124 pipelineReadImage = makeComputePipeline(deviceInterface, device, *pipelineLayoutFillImage, *shaderModuleReadImage);
1125 }
1126
1127 // Create pipeline
1128 const Unique<VkShaderModule> shaderModule(createShaderModule(deviceInterface, device, m_context.getBinaryCollection().get(m_name), 0));
1129 const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(deviceInterface, device, *m_descriptorSetLayout));
1130 const Unique<VkPipeline> pipeline(makeComputePipeline(deviceInterface, device, *pipelineLayout, *shaderModule));
1131
1132 // Create command buffer
1133 const Unique<VkCommandPool> cmdPool(createCommandPool(deviceInterface, device, VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, queueFamilyIndex));
1134 const Unique<VkCommandBuffer> cmdBuffer(allocateCommandBuffer(deviceInterface, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
1135
1136 beginCommandBuffer(deviceInterface, *cmdBuffer);
1137
1138 if (!isTexelBuffer)
1139 {
1140 if (m_useTransfer)
1141 {
1142 const vector<VkBufferImageCopy> bufferImageCopy(1, makeBufferImageCopy(makeExtent3D(getLayerSize(m_imageType, m_imageSize)), getNumLayers(m_imageType, m_imageSize)));
1143 copyBufferToImage(deviceInterface,
1144 *cmdBuffer,
1145 *(*m_inputBuffer),
1146 imageSizeInBytes,
1147 bufferImageCopy,
1148 VK_IMAGE_ASPECT_COLOR_BIT,
1149 1,
1150 getNumLayers(m_imageType, m_imageSize), m_resultImage->get(), VK_IMAGE_LAYOUT_GENERAL, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT);
1151 }
1152 else
1153 {
1154 shaderFillImage(*cmdBuffer, *(*m_inputBuffer), *pipelineFillImage, *pipelineLayoutFillImage, *descriptorSetFillImage, imageSizeInBytes, gridSize);
1155 }
1156 commandsBeforeCompute(*cmdBuffer);
1157 }
1158
1159 deviceInterface.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
1160 deviceInterface.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &m_descriptorSet.get(), 0u, DE_NULL);
1161
1162 deviceInterface.cmdDispatch(*cmdBuffer, NUM_INVOCATIONS_PER_PIXEL * gridSize.x(), gridSize.y(), gridSize.z());
1163
1164 commandsAfterCompute(*cmdBuffer,
1165 *pipelineReadImage,
1166 *pipelineLayoutReadImage,
1167 *descriptorSetReadImage,
1168 outBuffSizeInBytes,
1169 m_useTransfer);
1170
1171 const VkBufferMemoryBarrier outputBufferPreHostReadBarrier
1172 = makeBufferMemoryBarrier(((m_useTransfer || isTexelBuffer) ? VK_ACCESS_TRANSFER_WRITE_BIT : VK_ACCESS_SHADER_WRITE_BIT),
1173 VK_ACCESS_HOST_READ_BIT,
1174 m_outputBuffer->get(),
1175 0ull,
1176 outBuffSizeInBytes);
1177
1178 deviceInterface.cmdPipelineBarrier(*cmdBuffer,
1179 ((m_useTransfer || isTexelBuffer) ? VK_PIPELINE_STAGE_TRANSFER_BIT : VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT),
1180 VK_PIPELINE_STAGE_HOST_BIT,
1181 DE_FALSE, 0u, DE_NULL,
1182 1u, &outputBufferPreHostReadBarrier, 0u, DE_NULL);
1183
1184 endCommandBuffer(deviceInterface, *cmdBuffer);
1185
1186 std::vector<VkPipelineStageFlags> waitStages(m_waitSemaphores.size(), VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT);
1187 submitCommandsAndWait(deviceInterface, device, queue, *cmdBuffer, false, 1u,
1188 static_cast<deUint32>(m_waitSemaphores.size()), de::dataOrNull(m_waitSemaphores), de::dataOrNull(waitStages));
1189
1190 Allocation& outputBufferAllocation = m_outputBuffer->getAllocation();
1191
1192 invalidateAlloc(deviceInterface, device, outputBufferAllocation);
1193
1194 if (verifyResult(outputBufferAllocation, (imageFormat == VK_FORMAT_R64_UINT || imageFormat == VK_FORMAT_R64_SINT)))
1195 return tcu::TestStatus::pass("Comparison succeeded");
1196 else
1197 return tcu::TestStatus::fail("Comparison failed");
1198 }
1199
shaderFillImage(const VkCommandBuffer cmdBuffer,const VkBuffer & buffer,const VkPipeline pipeline,const VkPipelineLayout pipelineLayout,const VkDescriptorSet descriptorSet,const VkDeviceSize & range,const tcu::UVec3 & gridSize)1200 void BinaryAtomicInstanceBase::shaderFillImage (const VkCommandBuffer cmdBuffer,
1201 const VkBuffer& buffer,
1202 const VkPipeline pipeline,
1203 const VkPipelineLayout pipelineLayout,
1204 const VkDescriptorSet descriptorSet,
1205 const VkDeviceSize& range,
1206 const tcu::UVec3& gridSize)
1207 {
1208 const VkDevice device = m_context.getDevice();
1209 const DeviceInterface& deviceInterface = m_context.getDeviceInterface();
1210 const VkDescriptorImageInfo descResultImageInfo = makeDescriptorImageInfo(DE_NULL, *m_resultImageView, VK_IMAGE_LAYOUT_GENERAL);
1211 const VkDescriptorBufferInfo descResultBufferInfo = makeDescriptorBufferInfo(buffer, 0, range);
1212 const VkImageSubresourceRange subresourceRange = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, getNumLayers(m_imageType, m_imageSize));
1213
1214 DescriptorSetUpdateBuilder()
1215 .writeSingle(descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &descResultImageInfo)
1216 .writeSingle(descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &descResultBufferInfo)
1217 .update(deviceInterface, device);
1218
1219 const VkImageMemoryBarrier imageBarrierPre = makeImageMemoryBarrier(0,
1220 VK_ACCESS_SHADER_WRITE_BIT,
1221 VK_IMAGE_LAYOUT_UNDEFINED,
1222 VK_IMAGE_LAYOUT_GENERAL,
1223 m_resultImage->get(),
1224 subresourceRange);
1225
1226 deviceInterface.cmdPipelineBarrier( cmdBuffer,
1227 VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
1228 VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
1229 (VkDependencyFlags)0,
1230 0, (const VkMemoryBarrier*)DE_NULL,
1231 0, (const VkBufferMemoryBarrier*)DE_NULL,
1232 1, &imageBarrierPre);
1233
1234 deviceInterface.cmdBindPipeline(cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
1235 deviceInterface.cmdBindDescriptorSets(cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipelineLayout, 0u, 1u, &descriptorSet, 0u, DE_NULL);
1236
1237 deviceInterface.cmdDispatch(cmdBuffer, gridSize.x(), gridSize.y(), gridSize.z());
1238
1239 const VkImageMemoryBarrier imageBarrierPost = makeImageMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT,
1240 VK_ACCESS_SHADER_READ_BIT,
1241 VK_IMAGE_LAYOUT_GENERAL,
1242 VK_IMAGE_LAYOUT_GENERAL,
1243 m_resultImage->get(),
1244 subresourceRange);
1245
1246 deviceInterface.cmdPipelineBarrier( cmdBuffer,
1247 VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
1248 VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
1249 (VkDependencyFlags)0,
1250 0, (const VkMemoryBarrier*)DE_NULL,
1251 0, (const VkBufferMemoryBarrier*)DE_NULL,
1252 1, &imageBarrierPost);
1253 }
1254
createImageAndView(VkFormat imageFormat,const tcu::UVec3 & imageExent,bool useTransfer,de::MovePtr<Image> & imagePtr,Move<VkImageView> & imageViewPtr)1255 void BinaryAtomicInstanceBase::createImageAndView (VkFormat imageFormat,
1256 const tcu::UVec3& imageExent,
1257 bool useTransfer,
1258 de::MovePtr<Image>& imagePtr,
1259 Move<VkImageView>& imageViewPtr)
1260 {
1261 const VkDevice device = m_context.getDevice();
1262 const DeviceInterface& deviceInterface = m_context.getDeviceInterface();
1263 Allocator& allocator = m_context.getDefaultAllocator();
1264 const VkImageUsageFlags usageFlags = getUsageFlags(useTransfer);
1265 VkImageCreateFlags createFlags = 0u;
1266
1267 if (m_imageType == IMAGE_TYPE_CUBE || m_imageType == IMAGE_TYPE_CUBE_ARRAY)
1268 createFlags |= VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT;
1269
1270 const auto numLayers = getNumLayers(m_imageType, m_imageSize);
1271
1272 VkImageCreateInfo createInfo =
1273 {
1274 VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, // VkStructureType sType;
1275 DE_NULL, // const void* pNext;
1276 createFlags, // VkImageCreateFlags flags;
1277 mapImageType(m_imageType), // VkImageType imageType;
1278 imageFormat, // VkFormat format;
1279 makeExtent3D(imageExent), // VkExtent3D extent;
1280 1u, // deUint32 mipLevels;
1281 numLayers, // deUint32 arrayLayers;
1282 VK_SAMPLE_COUNT_1_BIT, // VkSampleCountFlagBits samples;
1283 m_tiling, // VkImageTiling tiling;
1284 usageFlags, // VkImageUsageFlags usage;
1285 VK_SHARING_MODE_EXCLUSIVE, // VkSharingMode sharingMode;
1286 0u, // deUint32 queueFamilyIndexCount;
1287 DE_NULL, // const deUint32* pQueueFamilyIndices;
1288 VK_IMAGE_LAYOUT_UNDEFINED, // VkImageLayout initialLayout;
1289 };
1290
1291 #ifndef CTS_USES_VULKANSC
1292 if (m_backingType == ImageBackingType::SPARSE)
1293 {
1294 const auto& vki = m_context.getInstanceInterface();
1295 const auto physicalDevice = m_context.getPhysicalDevice();
1296 const auto sparseQueue = m_context.getSparseQueue();
1297 const auto sparseQueueIdx = m_context.getSparseQueueFamilyIndex();
1298 const auto universalQIdx = m_context.getUniversalQueueFamilyIndex();
1299 const deUint32 queueIndices[] = { universalQIdx, sparseQueueIdx };
1300
1301 createInfo.flags |= (VK_IMAGE_CREATE_SPARSE_BINDING_BIT | VK_IMAGE_CREATE_SPARSE_RESIDENCY_BIT);
1302
1303 if (sparseQueueIdx != universalQIdx)
1304 {
1305 createInfo.sharingMode = VK_SHARING_MODE_CONCURRENT;
1306 createInfo.queueFamilyIndexCount = static_cast<deUint32>(DE_LENGTH_OF_ARRAY(queueIndices));
1307 createInfo.pQueueFamilyIndices = queueIndices;
1308 }
1309
1310 const auto sparseImage = new SparseImage(deviceInterface, device, physicalDevice, vki, createInfo, sparseQueue, allocator, m_format);
1311 m_waitSemaphores.push_back(sparseImage->getSemaphore());
1312 imagePtr = de::MovePtr<Image>(sparseImage);
1313 }
1314 else
1315 #endif // CTS_USES_VULKANSC
1316 imagePtr = de::MovePtr<Image>(new Image(deviceInterface, device, allocator, createInfo, MemoryRequirement::Any));
1317
1318 const VkImageSubresourceRange subresourceRange = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, numLayers);
1319
1320 imageViewPtr = makeImageView(deviceInterface, device, imagePtr->get(), mapImageViewType(m_imageType), imageFormat, subresourceRange);
1321 }
1322
createImageResources(const VkFormat & imageFormat,const bool useTransfer)1323 void BinaryAtomicInstanceBase::createImageResources (const VkFormat& imageFormat,
1324 const bool useTransfer)
1325 {
1326 //Create the image that is going to store results of atomic operations
1327 createImageAndView(imageFormat, getLayerSize(m_imageType, m_imageSize), useTransfer, m_resultImage, m_resultImageView);
1328 }
1329
1330 class BinaryAtomicEndResultInstance : public BinaryAtomicInstanceBase
1331 {
1332 public:
1333
BinaryAtomicEndResultInstance(Context & context,const string & name,const ImageType imageType,const tcu::UVec3 & imageSize,const TextureFormat & format,const VkImageTiling tiling,const AtomicOperation operation,const bool useTransfer,const ShaderReadType shaderReadType,const ImageBackingType backingType)1334 BinaryAtomicEndResultInstance (Context& context,
1335 const string& name,
1336 const ImageType imageType,
1337 const tcu::UVec3& imageSize,
1338 const TextureFormat& format,
1339 const VkImageTiling tiling,
1340 const AtomicOperation operation,
1341 const bool useTransfer,
1342 const ShaderReadType shaderReadType,
1343 const ImageBackingType backingType)
1344 : BinaryAtomicInstanceBase(context, name, imageType, imageSize, format, tiling, operation, useTransfer, shaderReadType, backingType) {}
1345
1346 virtual deUint32 getOutputBufferSize (void) const;
1347
prepareResources(const bool useTransfer)1348 virtual void prepareResources (const bool useTransfer) { DE_UNREF(useTransfer); }
1349 virtual void prepareDescriptors (const bool isTexelBuffer);
1350
commandsBeforeCompute(const VkCommandBuffer) const1351 virtual void commandsBeforeCompute (const VkCommandBuffer) const {}
1352 virtual void commandsAfterCompute (const VkCommandBuffer cmdBuffer,
1353 const VkPipeline pipeline,
1354 const VkPipelineLayout pipelineLayout,
1355 const VkDescriptorSet descriptorSet,
1356 const VkDeviceSize& range,
1357 const bool useTransfer);
1358
1359 virtual bool verifyResult (Allocation& outputBufferAllocation,
1360 const bool is64Bit) const;
1361
1362 protected:
1363
1364 template <typename T>
1365 bool isValueCorrect (const T resultValue,
1366 deInt32 x,
1367 deInt32 y,
1368 deInt32 z,
1369 const UVec3& gridSize,
1370 const IVec3 extendedGridSize) const;
1371 };
1372
getOutputBufferSize(void) const1373 deUint32 BinaryAtomicEndResultInstance::getOutputBufferSize (void) const
1374 {
1375 return tcu::getPixelSize(m_format) * getNumPixels(m_imageType, m_imageSize);
1376 }
1377
prepareDescriptors(const bool isTexelBuffer)1378 void BinaryAtomicEndResultInstance::prepareDescriptors (const bool isTexelBuffer)
1379 {
1380 const VkDescriptorType descriptorType = isTexelBuffer ?
1381 VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER :
1382 VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
1383 const VkDevice device = m_context.getDevice();
1384 const DeviceInterface& deviceInterface = m_context.getDeviceInterface();
1385
1386 m_descriptorSetLayout =
1387 DescriptorSetLayoutBuilder()
1388 .addSingleBinding(descriptorType, VK_SHADER_STAGE_COMPUTE_BIT)
1389 .build(deviceInterface, device);
1390
1391 m_descriptorPool =
1392 DescriptorPoolBuilder()
1393 .addType(descriptorType)
1394 .build(deviceInterface, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
1395
1396 m_descriptorSet = makeDescriptorSet(deviceInterface, device, *m_descriptorPool, *m_descriptorSetLayout);
1397
1398 if (isTexelBuffer)
1399 {
1400 m_descResultBufferView = makeBufferView(deviceInterface, device, *(*m_inputBuffer), mapTextureFormat(m_format), 0, VK_WHOLE_SIZE);
1401
1402 DescriptorSetUpdateBuilder()
1403 .writeSingle(*m_descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), descriptorType, &(m_descResultBufferView.get()))
1404 .update(deviceInterface, device);
1405 }
1406 else
1407 {
1408 const VkDescriptorImageInfo descResultImageInfo = makeDescriptorImageInfo(DE_NULL, *m_resultImageView, VK_IMAGE_LAYOUT_GENERAL);
1409
1410 DescriptorSetUpdateBuilder()
1411 .writeSingle(*m_descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), descriptorType, &descResultImageInfo)
1412 .update(deviceInterface, device);
1413 }
1414 }
1415
commandsAfterCompute(const VkCommandBuffer cmdBuffer,const VkPipeline pipeline,const VkPipelineLayout pipelineLayout,const VkDescriptorSet descriptorSet,const VkDeviceSize & range,const bool useTransfer)1416 void BinaryAtomicEndResultInstance::commandsAfterCompute (const VkCommandBuffer cmdBuffer,
1417 const VkPipeline pipeline,
1418 const VkPipelineLayout pipelineLayout,
1419 const VkDescriptorSet descriptorSet,
1420 const VkDeviceSize& range,
1421 const bool useTransfer)
1422 {
1423 const DeviceInterface& deviceInterface = m_context.getDeviceInterface();
1424 const VkImageSubresourceRange subresourceRange = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, getNumLayers(m_imageType, m_imageSize));
1425 const UVec3 layerSize = getLayerSize(m_imageType, m_imageSize);
1426
1427 if (m_imageType == IMAGE_TYPE_BUFFER)
1428 {
1429 m_outputBuffer = m_inputBuffer;
1430 }
1431 else if (useTransfer)
1432 {
1433 const VkImageMemoryBarrier resultImagePostDispatchBarrier =
1434 makeImageMemoryBarrier( VK_ACCESS_SHADER_WRITE_BIT,
1435 VK_ACCESS_TRANSFER_READ_BIT,
1436 VK_IMAGE_LAYOUT_GENERAL,
1437 VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
1438 m_resultImage->get(),
1439 subresourceRange);
1440
1441 deviceInterface.cmdPipelineBarrier( cmdBuffer,
1442 VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
1443 VK_PIPELINE_STAGE_TRANSFER_BIT,
1444 DE_FALSE, 0u, DE_NULL, 0u, DE_NULL,
1445 1u, &resultImagePostDispatchBarrier);
1446
1447 const VkBufferImageCopy bufferImageCopyParams = makeBufferImageCopy(makeExtent3D(layerSize), getNumLayers(m_imageType, m_imageSize));
1448
1449 deviceInterface.cmdCopyImageToBuffer(cmdBuffer, m_resultImage->get(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, m_outputBuffer->get(), 1u, &bufferImageCopyParams);
1450 }
1451 else
1452 {
1453 const VkDevice device = m_context.getDevice();
1454 const VkDescriptorImageInfo descResultImageInfo = makeDescriptorImageInfo(DE_NULL, *m_resultImageView, VK_IMAGE_LAYOUT_GENERAL);
1455 const VkDescriptorBufferInfo descResultBufferInfo = makeDescriptorBufferInfo(m_outputBuffer->get(), 0, range);
1456
1457 DescriptorSetUpdateBuilder()
1458 .writeSingle(descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &descResultImageInfo)
1459 .writeSingle(descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &descResultBufferInfo)
1460 .update(deviceInterface, device);
1461
1462 const VkImageMemoryBarrier resultImagePostDispatchBarrier =
1463 makeImageMemoryBarrier( VK_ACCESS_SHADER_WRITE_BIT,
1464 VK_ACCESS_SHADER_READ_BIT,
1465 VK_IMAGE_LAYOUT_GENERAL,
1466 VK_IMAGE_LAYOUT_GENERAL,
1467 m_resultImage->get(),
1468 subresourceRange);
1469
1470 deviceInterface.cmdPipelineBarrier( cmdBuffer,
1471 VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
1472 VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
1473 DE_FALSE, 0u, DE_NULL, 0u, DE_NULL,
1474 1u, &resultImagePostDispatchBarrier);
1475
1476 deviceInterface.cmdBindPipeline(cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
1477 deviceInterface.cmdBindDescriptorSets(cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipelineLayout, 0u, 1u, &descriptorSet, 0u, DE_NULL);
1478
1479 switch (m_imageType)
1480 {
1481 case IMAGE_TYPE_1D_ARRAY:
1482 deviceInterface.cmdDispatch(cmdBuffer, layerSize.x(), subresourceRange.layerCount, layerSize.z());
1483 break;
1484 case IMAGE_TYPE_2D_ARRAY:
1485 case IMAGE_TYPE_CUBE:
1486 case IMAGE_TYPE_CUBE_ARRAY:
1487 deviceInterface.cmdDispatch(cmdBuffer, layerSize.x(), layerSize.y(), subresourceRange.layerCount);
1488 break;
1489 default:
1490 deviceInterface.cmdDispatch(cmdBuffer, layerSize.x(), layerSize.y(), layerSize.z());
1491 break;
1492 }
1493 }
1494 }
1495
verifyResult(Allocation & outputBufferAllocation,const bool is64Bit) const1496 bool BinaryAtomicEndResultInstance::verifyResult (Allocation& outputBufferAllocation,
1497 const bool is64Bit) const
1498 {
1499 const UVec3 gridSize = getShaderGridSize(m_imageType, m_imageSize);
1500 const IVec3 extendedGridSize = IVec3(NUM_INVOCATIONS_PER_PIXEL*gridSize.x(), gridSize.y(), gridSize.z());
1501
1502 tcu::ConstPixelBufferAccess resultBuffer(m_format, gridSize.x(), gridSize.y(), gridSize.z(), outputBufferAllocation.getHostPtr());
1503
1504 for (deInt32 z = 0; z < resultBuffer.getDepth(); z++)
1505 for (deInt32 y = 0; y < resultBuffer.getHeight(); y++)
1506 for (deInt32 x = 0; x < resultBuffer.getWidth(); x++)
1507 {
1508 const void* resultValue = resultBuffer.getPixelPtr(x, y, z);
1509 deInt32 floatToIntValue = 0;
1510 bool isFloatValue = false;
1511 if (isFloatFormat(mapTextureFormat(m_format)))
1512 {
1513 isFloatValue = true;
1514 floatToIntValue = static_cast<deInt32>(*((float*)resultValue));
1515 }
1516
1517 if (isOrderIndependentAtomicOperation(m_operation))
1518 {
1519 if (isUintFormat(mapTextureFormat(m_format)))
1520 {
1521 if(is64Bit)
1522 {
1523 if (!isValueCorrect<deUint64>(*((deUint64*)resultValue), x, y, z, gridSize, extendedGridSize))
1524 return false;
1525 }
1526 else
1527 {
1528 if (!isValueCorrect<deUint32>(*((deUint32*)resultValue), x, y, z, gridSize, extendedGridSize))
1529 return false;
1530 }
1531 }
1532 else if (isIntFormat(mapTextureFormat(m_format)))
1533 {
1534 if (is64Bit)
1535 {
1536 if (!isValueCorrect<deInt64>(*((deInt64*)resultValue), x, y, z, gridSize, extendedGridSize))
1537 return false;
1538 }
1539 else
1540 {
1541 if (!isValueCorrect<deInt32>(*((deInt32*)resultValue), x, y, z, gridSize, extendedGridSize))
1542 return false;
1543 }
1544 }
1545 else
1546 {
1547 // 32-bit floating point
1548 if (!isValueCorrect<deInt32>(floatToIntValue, x, y, z, gridSize, extendedGridSize))
1549 return false;
1550 }
1551 }
1552 else if (m_operation == ATOMIC_OPERATION_EXCHANGE)
1553 {
1554 // Check if the end result equals one of the atomic args.
1555 bool matchFound = false;
1556
1557 for (deInt32 i = 0; i < static_cast<deInt32>(NUM_INVOCATIONS_PER_PIXEL) && !matchFound; i++)
1558 {
1559 const IVec3 gid(x + i*gridSize.x(), y, z);
1560 matchFound = is64Bit ?
1561 (*((deInt64*)resultValue) == getAtomicFuncArgument<deInt64>(m_operation, gid, extendedGridSize)) :
1562 isFloatValue ?
1563 floatToIntValue == getAtomicFuncArgument<deInt32>(m_operation, gid, extendedGridSize) :
1564 (*((deInt32*)resultValue) == getAtomicFuncArgument<deInt32>(m_operation, gid, extendedGridSize));
1565
1566 }
1567
1568 if (!matchFound)
1569 return false;
1570 }
1571 else if (m_operation == ATOMIC_OPERATION_COMPARE_EXCHANGE)
1572 {
1573 // Check if the end result equals one of the atomic args.
1574 bool matchFound = false;
1575
1576 for (deInt32 i = 0; i < static_cast<deInt32>(NUM_INVOCATIONS_PER_PIXEL) && !matchFound; i++)
1577 {
1578 const IVec3 gid(x + i*gridSize.x(), y, z);
1579 matchFound = is64Bit ?
1580 (*((deInt64*)resultValue) == getAtomicFuncArgument<deInt64>(m_operation, gid, extendedGridSize)) :
1581 isFloatValue ?
1582 floatToIntValue == getAtomicFuncArgument<deInt32>(m_operation, gid, extendedGridSize) :
1583 (*((deInt32*)resultValue) == getAtomicFuncArgument<deInt32>(m_operation, gid, extendedGridSize));
1584 }
1585
1586 if (!matchFound)
1587 return false;
1588 }
1589 else
1590 DE_ASSERT(false);
1591 }
1592 return true;
1593 }
1594
1595 template <typename T>
isValueCorrect(const T resultValue,deInt32 x,deInt32 y,deInt32 z,const UVec3 & gridSize,const IVec3 extendedGridSize) const1596 bool BinaryAtomicEndResultInstance::isValueCorrect(const T resultValue, deInt32 x, deInt32 y, deInt32 z, const UVec3& gridSize, const IVec3 extendedGridSize) const
1597 {
1598 T reference = getOperationInitialValue<T>(m_operation);
1599 for (deInt32 i = 0; i < static_cast<deInt32>(NUM_INVOCATIONS_PER_PIXEL); i++)
1600 {
1601 const IVec3 gid(x + i*gridSize.x(), y, z);
1602 T arg = getAtomicFuncArgument<T>(m_operation, gid, extendedGridSize);
1603 reference = computeBinaryAtomicOperationResult(m_operation, reference, arg);
1604 }
1605 return (resultValue == reference);
1606 }
1607
createInstance(Context & context) const1608 TestInstance* BinaryAtomicEndResultCase::createInstance (Context& context) const
1609 {
1610 return new BinaryAtomicEndResultInstance(context, m_name, m_imageType, m_imageSize, m_format, m_tiling, m_operation, m_useTransfer, m_readType, m_backingType);
1611 }
1612
1613 class BinaryAtomicIntermValuesInstance : public BinaryAtomicInstanceBase
1614 {
1615 public:
1616
BinaryAtomicIntermValuesInstance(Context & context,const string & name,const ImageType imageType,const tcu::UVec3 & imageSize,const TextureFormat & format,const VkImageTiling tiling,const AtomicOperation operation,const bool useTransfer,const ShaderReadType shaderReadType,const ImageBackingType backingType)1617 BinaryAtomicIntermValuesInstance (Context& context,
1618 const string& name,
1619 const ImageType imageType,
1620 const tcu::UVec3& imageSize,
1621 const TextureFormat& format,
1622 const VkImageTiling tiling,
1623 const AtomicOperation operation,
1624 const bool useTransfer,
1625 const ShaderReadType shaderReadType,
1626 const ImageBackingType backingType)
1627 : BinaryAtomicInstanceBase(context, name, imageType, imageSize, format, tiling, operation, useTransfer, shaderReadType, backingType) {}
1628
1629 virtual deUint32 getOutputBufferSize (void) const;
1630
1631 virtual void prepareResources (const bool useTransfer);
1632 virtual void prepareDescriptors (const bool isTexelBuffer);
1633
1634 virtual void commandsBeforeCompute (const VkCommandBuffer cmdBuffer) const;
1635 virtual void commandsAfterCompute (const VkCommandBuffer cmdBuffer,
1636 const VkPipeline pipeline,
1637 const VkPipelineLayout pipelineLayout,
1638 const VkDescriptorSet descriptorSet,
1639 const VkDeviceSize& range,
1640 const bool useTransfer);
1641
1642 virtual bool verifyResult (Allocation& outputBufferAllocation,
1643 const bool is64Bit) const;
1644
1645 protected:
1646
1647 template <typename T>
1648 bool areValuesCorrect (tcu::ConstPixelBufferAccess& resultBuffer,
1649 const bool isFloatingPoint,
1650 deInt32 x,
1651 deInt32 y,
1652 deInt32 z,
1653 const UVec3& gridSize,
1654 const IVec3 extendedGridSize) const;
1655
1656 template <typename T>
1657 bool verifyRecursive (const deInt32 index,
1658 const T valueSoFar,
1659 bool argsUsed[NUM_INVOCATIONS_PER_PIXEL],
1660 const T atomicArgs[NUM_INVOCATIONS_PER_PIXEL],
1661 const T resultValues[NUM_INVOCATIONS_PER_PIXEL]) const;
1662 de::MovePtr<Image> m_intermResultsImage;
1663 Move<VkImageView> m_intermResultsImageView;
1664 };
1665
getOutputBufferSize(void) const1666 deUint32 BinaryAtomicIntermValuesInstance::getOutputBufferSize (void) const
1667 {
1668 return NUM_INVOCATIONS_PER_PIXEL * tcu::getPixelSize(m_format) * getNumPixels(m_imageType, m_imageSize);
1669 }
1670
prepareResources(const bool useTransfer)1671 void BinaryAtomicIntermValuesInstance::prepareResources (const bool useTransfer)
1672 {
1673 const UVec3 layerSize = getLayerSize(m_imageType, m_imageSize);
1674 const bool isCubeBasedImage = (m_imageType == IMAGE_TYPE_CUBE || m_imageType == IMAGE_TYPE_CUBE_ARRAY);
1675 const UVec3 extendedLayerSize = isCubeBasedImage ? UVec3(NUM_INVOCATIONS_PER_PIXEL * layerSize.x(), NUM_INVOCATIONS_PER_PIXEL * layerSize.y(), layerSize.z())
1676 : UVec3(NUM_INVOCATIONS_PER_PIXEL * layerSize.x(), layerSize.y(), layerSize.z());
1677
1678 createImageAndView(mapTextureFormat(m_format), extendedLayerSize, useTransfer, m_intermResultsImage, m_intermResultsImageView);
1679 }
1680
prepareDescriptors(const bool isTexelBuffer)1681 void BinaryAtomicIntermValuesInstance::prepareDescriptors (const bool isTexelBuffer)
1682 {
1683 const VkDescriptorType descriptorType = isTexelBuffer ?
1684 VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER :
1685 VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
1686
1687 const VkDevice device = m_context.getDevice();
1688 const DeviceInterface& deviceInterface = m_context.getDeviceInterface();
1689
1690 m_descriptorSetLayout =
1691 DescriptorSetLayoutBuilder()
1692 .addSingleBinding(descriptorType, VK_SHADER_STAGE_COMPUTE_BIT)
1693 .addSingleBinding(descriptorType, VK_SHADER_STAGE_COMPUTE_BIT)
1694 .build(deviceInterface, device);
1695
1696 m_descriptorPool =
1697 DescriptorPoolBuilder()
1698 .addType(descriptorType, 2u)
1699 .build(deviceInterface, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
1700
1701 m_descriptorSet = makeDescriptorSet(deviceInterface, device, *m_descriptorPool, *m_descriptorSetLayout);
1702
1703 if (isTexelBuffer)
1704 {
1705 m_descResultBufferView = makeBufferView(deviceInterface, device, *(*m_inputBuffer), mapTextureFormat(m_format), 0, VK_WHOLE_SIZE);
1706 m_descIntermResultsBufferView = makeBufferView(deviceInterface, device, *(*m_outputBuffer), mapTextureFormat(m_format), 0, VK_WHOLE_SIZE);
1707
1708 DescriptorSetUpdateBuilder()
1709 .writeSingle(*m_descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), descriptorType, &(m_descResultBufferView.get()))
1710 .writeSingle(*m_descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), descriptorType, &(m_descIntermResultsBufferView.get()))
1711 .update(deviceInterface, device);
1712 }
1713 else
1714 {
1715 const VkDescriptorImageInfo descResultImageInfo = makeDescriptorImageInfo(DE_NULL, *m_resultImageView, VK_IMAGE_LAYOUT_GENERAL);
1716 const VkDescriptorImageInfo descIntermResultsImageInfo = makeDescriptorImageInfo(DE_NULL, *m_intermResultsImageView, VK_IMAGE_LAYOUT_GENERAL);
1717
1718 DescriptorSetUpdateBuilder()
1719 .writeSingle(*m_descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), descriptorType, &descResultImageInfo)
1720 .writeSingle(*m_descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), descriptorType, &descIntermResultsImageInfo)
1721 .update(deviceInterface, device);
1722 }
1723 }
1724
commandsBeforeCompute(const VkCommandBuffer cmdBuffer) const1725 void BinaryAtomicIntermValuesInstance::commandsBeforeCompute (const VkCommandBuffer cmdBuffer) const
1726 {
1727 const DeviceInterface& deviceInterface = m_context.getDeviceInterface();
1728 const VkImageSubresourceRange subresourceRange = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, getNumLayers(m_imageType, m_imageSize));
1729
1730 const VkImageMemoryBarrier imagePreDispatchBarrier =
1731 makeImageMemoryBarrier( 0u,
1732 VK_ACCESS_SHADER_WRITE_BIT,
1733 VK_IMAGE_LAYOUT_UNDEFINED,
1734 VK_IMAGE_LAYOUT_GENERAL,
1735 m_intermResultsImage->get(),
1736 subresourceRange);
1737
1738 deviceInterface.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, DE_FALSE, 0u, DE_NULL, 0u, DE_NULL, 1u, &imagePreDispatchBarrier);
1739 }
1740
commandsAfterCompute(const VkCommandBuffer cmdBuffer,const VkPipeline pipeline,const VkPipelineLayout pipelineLayout,const VkDescriptorSet descriptorSet,const VkDeviceSize & range,const bool useTransfer)1741 void BinaryAtomicIntermValuesInstance::commandsAfterCompute (const VkCommandBuffer cmdBuffer,
1742 const VkPipeline pipeline,
1743 const VkPipelineLayout pipelineLayout,
1744 const VkDescriptorSet descriptorSet,
1745 const VkDeviceSize& range,
1746 const bool useTransfer)
1747 {
1748 // nothing is needed for texel image buffer
1749 if (m_imageType == IMAGE_TYPE_BUFFER)
1750 return;
1751
1752 const DeviceInterface& deviceInterface = m_context.getDeviceInterface();
1753 const VkImageSubresourceRange subresourceRange = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, getNumLayers(m_imageType, m_imageSize));
1754 const UVec3 layerSize = getLayerSize(m_imageType, m_imageSize);
1755
1756 if (useTransfer)
1757 {
1758 const VkImageMemoryBarrier imagePostDispatchBarrier =
1759 makeImageMemoryBarrier( VK_ACCESS_SHADER_WRITE_BIT,
1760 VK_ACCESS_TRANSFER_READ_BIT,
1761 VK_IMAGE_LAYOUT_GENERAL,
1762 VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
1763 m_intermResultsImage->get(),
1764 subresourceRange);
1765
1766 deviceInterface.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, DE_FALSE, 0u, DE_NULL, 0u, DE_NULL, 1u, &imagePostDispatchBarrier);
1767
1768 const UVec3 extendedLayerSize = UVec3(NUM_INVOCATIONS_PER_PIXEL * layerSize.x(), layerSize.y(), layerSize.z());
1769 const VkBufferImageCopy bufferImageCopyParams = makeBufferImageCopy(makeExtent3D(extendedLayerSize), getNumLayers(m_imageType, m_imageSize));
1770
1771 deviceInterface.cmdCopyImageToBuffer(cmdBuffer, m_intermResultsImage->get(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, m_outputBuffer->get(), 1u, &bufferImageCopyParams);
1772 }
1773 else
1774 {
1775 const VkDevice device = m_context.getDevice();
1776 const VkDescriptorImageInfo descResultImageInfo = makeDescriptorImageInfo(DE_NULL, *m_intermResultsImageView, VK_IMAGE_LAYOUT_GENERAL);
1777 const VkDescriptorBufferInfo descResultBufferInfo = makeDescriptorBufferInfo(m_outputBuffer->get(), 0, range);
1778
1779 DescriptorSetUpdateBuilder()
1780 .writeSingle(descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &descResultImageInfo)
1781 .writeSingle(descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &descResultBufferInfo)
1782 .update(deviceInterface, device);
1783
1784 const VkImageMemoryBarrier resultImagePostDispatchBarrier =
1785 makeImageMemoryBarrier( VK_ACCESS_SHADER_WRITE_BIT,
1786 VK_ACCESS_SHADER_READ_BIT,
1787 VK_IMAGE_LAYOUT_GENERAL,
1788 VK_IMAGE_LAYOUT_GENERAL,
1789 m_intermResultsImage->get(),
1790 subresourceRange);
1791
1792 deviceInterface.cmdPipelineBarrier( cmdBuffer,
1793 VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
1794 VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
1795 DE_FALSE, 0u, DE_NULL, 0u, DE_NULL,
1796 1u, &resultImagePostDispatchBarrier);
1797
1798 deviceInterface.cmdBindPipeline(cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
1799 deviceInterface.cmdBindDescriptorSets(cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipelineLayout, 0u, 1u, &descriptorSet, 0u, DE_NULL);
1800
1801 switch (m_imageType)
1802 {
1803 case IMAGE_TYPE_1D_ARRAY:
1804 deviceInterface.cmdDispatch(cmdBuffer, NUM_INVOCATIONS_PER_PIXEL * layerSize.x(), subresourceRange.layerCount, layerSize.z());
1805 break;
1806 case IMAGE_TYPE_2D_ARRAY:
1807 case IMAGE_TYPE_CUBE:
1808 case IMAGE_TYPE_CUBE_ARRAY:
1809 deviceInterface.cmdDispatch(cmdBuffer, NUM_INVOCATIONS_PER_PIXEL * layerSize.x(), layerSize.y(), subresourceRange.layerCount);
1810 break;
1811 default:
1812 deviceInterface.cmdDispatch(cmdBuffer, NUM_INVOCATIONS_PER_PIXEL * layerSize.x(), layerSize.y(), layerSize.z());
1813 break;
1814 }
1815 }
1816 }
1817
verifyResult(Allocation & outputBufferAllocation,const bool is64Bit) const1818 bool BinaryAtomicIntermValuesInstance::verifyResult (Allocation& outputBufferAllocation,
1819 const bool is64Bit) const
1820 {
1821 const UVec3 gridSize = getShaderGridSize(m_imageType, m_imageSize);
1822 const IVec3 extendedGridSize = IVec3(NUM_INVOCATIONS_PER_PIXEL*gridSize.x(), gridSize.y(), gridSize.z());
1823
1824 tcu::ConstPixelBufferAccess resultBuffer(m_format, extendedGridSize.x(), extendedGridSize.y(), extendedGridSize.z(), outputBufferAllocation.getHostPtr());
1825
1826 for (deInt32 z = 0; z < resultBuffer.getDepth(); z++)
1827 for (deInt32 y = 0; y < resultBuffer.getHeight(); y++)
1828 for (deUint32 x = 0; x < gridSize.x(); x++)
1829 {
1830 if (isUintFormat(mapTextureFormat(m_format)))
1831 {
1832 if (is64Bit)
1833 {
1834 if (!areValuesCorrect<deUint64>(resultBuffer, false, x, y, z, gridSize, extendedGridSize))
1835 return false;
1836 }
1837 else
1838 {
1839 if (!areValuesCorrect<deUint32>(resultBuffer, false, x, y, z, gridSize, extendedGridSize))
1840 return false;
1841 }
1842 }
1843 else if (isIntFormat(mapTextureFormat(m_format)))
1844 {
1845 if (is64Bit)
1846 {
1847 if (!areValuesCorrect<deInt64>(resultBuffer, false, x, y, z, gridSize, extendedGridSize))
1848 return false;
1849 }
1850 else
1851 {
1852 if (!areValuesCorrect<deInt32>(resultBuffer, false, x, y, z, gridSize, extendedGridSize))
1853 return false;
1854 }
1855 }
1856 else
1857 {
1858 // 32-bit floating point
1859 if (!areValuesCorrect<deInt32>(resultBuffer, true, x, y, z, gridSize, extendedGridSize))
1860 return false;
1861 }
1862 }
1863
1864 return true;
1865 }
1866
1867 template <typename T>
areValuesCorrect(tcu::ConstPixelBufferAccess & resultBuffer,const bool isFloatingPoint,deInt32 x,deInt32 y,deInt32 z,const UVec3 & gridSize,const IVec3 extendedGridSize) const1868 bool BinaryAtomicIntermValuesInstance::areValuesCorrect(tcu::ConstPixelBufferAccess& resultBuffer, const bool isFloatingPoint, deInt32 x, deInt32 y, deInt32 z, const UVec3& gridSize, const IVec3 extendedGridSize) const
1869 {
1870 T resultValues[NUM_INVOCATIONS_PER_PIXEL];
1871 T atomicArgs[NUM_INVOCATIONS_PER_PIXEL];
1872 bool argsUsed[NUM_INVOCATIONS_PER_PIXEL];
1873
1874 for (deInt32 i = 0; i < static_cast<deInt32>(NUM_INVOCATIONS_PER_PIXEL); i++)
1875 {
1876 IVec3 gid(x + i*gridSize.x(), y, z);
1877 T data = *((T*)resultBuffer.getPixelPtr(gid.x(), gid.y(), gid.z()));
1878 if (isFloatingPoint)
1879 {
1880 float fData;
1881 deMemcpy(&fData, &data, sizeof(fData));
1882 data = static_cast<T>(fData);
1883 }
1884 resultValues[i] = data;
1885 atomicArgs[i] = getAtomicFuncArgument<T>(m_operation, gid, extendedGridSize);
1886 argsUsed[i] = false;
1887 }
1888
1889 // Verify that the return values form a valid sequence.
1890 return verifyRecursive(0, getOperationInitialValue<T>(m_operation), argsUsed, atomicArgs, resultValues);
1891 }
1892
1893 template <typename T>
verifyRecursive(const deInt32 index,const T valueSoFar,bool argsUsed[NUM_INVOCATIONS_PER_PIXEL],const T atomicArgs[NUM_INVOCATIONS_PER_PIXEL],const T resultValues[NUM_INVOCATIONS_PER_PIXEL]) const1894 bool BinaryAtomicIntermValuesInstance::verifyRecursive (const deInt32 index,
1895 const T valueSoFar,
1896 bool argsUsed[NUM_INVOCATIONS_PER_PIXEL],
1897 const T atomicArgs[NUM_INVOCATIONS_PER_PIXEL],
1898 const T resultValues[NUM_INVOCATIONS_PER_PIXEL]) const
1899 {
1900 if (index >= static_cast<deInt32>(NUM_INVOCATIONS_PER_PIXEL))
1901 return true;
1902
1903 for (deInt32 i = 0; i < static_cast<deInt32>(NUM_INVOCATIONS_PER_PIXEL); i++)
1904 {
1905 if (!argsUsed[i] && resultValues[i] == valueSoFar)
1906 {
1907 argsUsed[i] = true;
1908
1909 if (verifyRecursive(index + 1, computeBinaryAtomicOperationResult(m_operation, valueSoFar, atomicArgs[i]), argsUsed, atomicArgs, resultValues))
1910 {
1911 return true;
1912 }
1913
1914 argsUsed[i] = false;
1915 }
1916 }
1917
1918 return false;
1919 }
1920
createInstance(Context & context) const1921 TestInstance* BinaryAtomicIntermValuesCase::createInstance (Context& context) const
1922 {
1923 return new BinaryAtomicIntermValuesInstance(context, m_name, m_imageType, m_imageSize, m_format, m_tiling, m_operation, m_useTransfer, m_readType, m_backingType);
1924 }
1925
1926 } // anonymous ns
1927
createImageAtomicOperationTests(tcu::TestContext & testCtx)1928 tcu::TestCaseGroup* createImageAtomicOperationTests (tcu::TestContext& testCtx)
1929 {
1930 de::MovePtr<tcu::TestCaseGroup> imageAtomicOperationsTests(new tcu::TestCaseGroup(testCtx, "atomic_operations"));
1931
1932 struct ImageParams
1933 {
1934 ImageParams(const ImageType imageType, const tcu::UVec3& imageSize)
1935 : m_imageType (imageType)
1936 , m_imageSize (imageSize)
1937 {
1938 }
1939 const ImageType m_imageType;
1940 const tcu::UVec3 m_imageSize;
1941 };
1942
1943 const ImageParams imageParamsArray[] =
1944 {
1945 ImageParams(IMAGE_TYPE_1D, tcu::UVec3(64u, 1u, 1u)),
1946 ImageParams(IMAGE_TYPE_1D_ARRAY, tcu::UVec3(64u, 1u, 8u)),
1947 ImageParams(IMAGE_TYPE_2D, tcu::UVec3(64u, 64u, 1u)),
1948 ImageParams(IMAGE_TYPE_2D_ARRAY, tcu::UVec3(64u, 64u, 8u)),
1949 ImageParams(IMAGE_TYPE_3D, tcu::UVec3(48u, 48u, 8u)),
1950 ImageParams(IMAGE_TYPE_CUBE, tcu::UVec3(64u, 64u, 1u)),
1951 ImageParams(IMAGE_TYPE_CUBE_ARRAY, tcu::UVec3(64u, 64u, 2u)),
1952 ImageParams(IMAGE_TYPE_BUFFER, tcu::UVec3(64u, 1u, 1u))
1953 };
1954
1955 const tcu::TextureFormat formats[] =
1956 {
1957 tcu::TextureFormat(tcu::TextureFormat::R, tcu::TextureFormat::UNSIGNED_INT32),
1958 tcu::TextureFormat(tcu::TextureFormat::R, tcu::TextureFormat::SIGNED_INT32),
1959 tcu::TextureFormat(tcu::TextureFormat::R, tcu::TextureFormat::FLOAT),
1960 tcu::TextureFormat(tcu::TextureFormat::R, tcu::TextureFormat::UNSIGNED_INT64),
1961 tcu::TextureFormat(tcu::TextureFormat::R, tcu::TextureFormat::SIGNED_INT64)
1962 };
1963
1964 static const VkImageTiling s_tilings[] = {
1965 VK_IMAGE_TILING_OPTIMAL,
1966 VK_IMAGE_TILING_LINEAR,
1967 };
1968
1969 const struct
1970 {
1971 ShaderReadType type;
1972 const char* name;
1973 } readTypes[] =
1974 {
1975 { ShaderReadType::NORMAL, "normal_read" },
1976 #ifndef CTS_USES_VULKANSC
1977 { ShaderReadType::SPARSE, "sparse_read" },
1978 #endif // CTS_USES_VULKANSC
1979 };
1980
1981 const struct
1982 {
1983 ImageBackingType type;
1984 const char* name;
1985 } backingTypes[] =
1986 {
1987 { ImageBackingType::NORMAL, "normal_img" },
1988 #ifndef CTS_USES_VULKANSC
1989 { ImageBackingType::SPARSE, "sparse_img" },
1990 #endif // CTS_USES_VULKANSC
1991 };
1992
1993 for (deUint32 operationI = 0; operationI < ATOMIC_OPERATION_LAST; operationI++)
1994 {
1995 const AtomicOperation operation = (AtomicOperation)operationI;
1996
1997 de::MovePtr<tcu::TestCaseGroup> operationGroup(new tcu::TestCaseGroup(testCtx, getAtomicOperationCaseName(operation).c_str()));
1998
1999 for (deUint32 imageTypeNdx = 0; imageTypeNdx < DE_LENGTH_OF_ARRAY(imageParamsArray); imageTypeNdx++)
2000 {
2001 const ImageType imageType = imageParamsArray[imageTypeNdx].m_imageType;
2002 const tcu::UVec3 imageSize = imageParamsArray[imageTypeNdx].m_imageSize;
2003
2004 de::MovePtr<tcu::TestCaseGroup> imageTypeGroup(new tcu::TestCaseGroup(testCtx, getImageTypeName(imageType).c_str()));
2005
2006 for (int useTransferIdx = 0; useTransferIdx < 2; ++useTransferIdx)
2007 {
2008 const bool useTransfer = (useTransferIdx > 0);
2009 const string groupName = (!useTransfer ? "no" : "") + string("transfer");
2010
2011 de::MovePtr<tcu::TestCaseGroup> transferGroup(new tcu::TestCaseGroup(testCtx, groupName.c_str()));
2012
2013 for (int readTypeIdx = 0; readTypeIdx < DE_LENGTH_OF_ARRAY(readTypes); ++readTypeIdx)
2014 {
2015 const auto& readType = readTypes[readTypeIdx];
2016
2017 de::MovePtr<tcu::TestCaseGroup> readTypeGroup(new tcu::TestCaseGroup(testCtx, readType.name));
2018
2019 for (int backingTypeIdx = 0; backingTypeIdx < DE_LENGTH_OF_ARRAY(backingTypes); ++backingTypeIdx)
2020 {
2021 const auto& backingType = backingTypes[backingTypeIdx];
2022
2023 de::MovePtr<tcu::TestCaseGroup> backingTypeGroup(new tcu::TestCaseGroup(testCtx, backingType.name));
2024
2025 for (deUint32 formatNdx = 0; formatNdx < DE_LENGTH_OF_ARRAY(formats); formatNdx++)
2026 {
2027 for (int tilingNdx = 0; tilingNdx < DE_LENGTH_OF_ARRAY(s_tilings); tilingNdx++)
2028 {
2029 const TextureFormat& format = formats[formatNdx];
2030 const std::string formatName = getShaderImageFormatQualifier(format);
2031 const char* suffix = (s_tilings[tilingNdx] == VK_IMAGE_TILING_OPTIMAL) ? "" : "_linear";
2032
2033 // Need SPIRV programs in vktImageAtomicSpirvShaders.cpp
2034 if (imageType == IMAGE_TYPE_BUFFER && (format.type != tcu::TextureFormat::FLOAT))
2035 {
2036 continue;
2037 }
2038
2039 // Only 2D and 3D images may support sparse residency.
2040 // VK_IMAGE_TILING_LINEAR does not support sparse residency
2041 const auto vkImageType = mapImageType(imageType);
2042 if (backingType.type == ImageBackingType::SPARSE && ((vkImageType != VK_IMAGE_TYPE_2D && vkImageType != VK_IMAGE_TYPE_3D) || (s_tilings[tilingNdx] == VK_IMAGE_TILING_LINEAR)))
2043 continue;
2044
2045 // Only some operations are supported on floating-point
2046 if (format.type == tcu::TextureFormat::FLOAT)
2047 {
2048 if (operation != ATOMIC_OPERATION_ADD &&
2049 #ifndef CTS_USES_VULKANSC
2050 operation != ATOMIC_OPERATION_MIN &&
2051 operation != ATOMIC_OPERATION_MAX &&
2052 #endif // CTS_USES_VULKANSC
2053 operation != ATOMIC_OPERATION_EXCHANGE)
2054 {
2055 continue;
2056 }
2057 }
2058
2059 if (readType.type == ShaderReadType::SPARSE)
2060 {
2061 // When using transfer, shader reads will not be used, so avoid creating two identical cases.
2062 if (useTransfer)
2063 continue;
2064
2065 // Sparse reads are not supported for all types of images.
2066 if (imageType == IMAGE_TYPE_1D || imageType == IMAGE_TYPE_1D_ARRAY || imageType == IMAGE_TYPE_BUFFER)
2067 continue;
2068 }
2069
2070 //!< Atomic case checks the end result of the operations, and not the intermediate return values
2071 const string caseEndResult = formatName + "_end_result" + suffix;
2072 backingTypeGroup->addChild(new BinaryAtomicEndResultCase(testCtx, caseEndResult, imageType, imageSize, format, s_tilings[tilingNdx], operation, useTransfer, readType.type, backingType.type, glu::GLSL_VERSION_450));
2073
2074 //!< Atomic case checks the return values of the atomic function and not the end result.
2075 const string caseIntermValues = formatName + "_intermediate_values" + suffix;
2076 backingTypeGroup->addChild(new BinaryAtomicIntermValuesCase(testCtx, caseIntermValues, imageType, imageSize, format, s_tilings[tilingNdx], operation, useTransfer, readType.type, backingType.type, glu::GLSL_VERSION_450));
2077 }
2078 }
2079
2080 readTypeGroup->addChild(backingTypeGroup.release());
2081 }
2082
2083 transferGroup->addChild(readTypeGroup.release());
2084 }
2085
2086 imageTypeGroup->addChild(transferGroup.release());
2087 }
2088
2089 operationGroup->addChild(imageTypeGroup.release());
2090 }
2091
2092 imageAtomicOperationsTests->addChild(operationGroup.release());
2093 }
2094
2095 return imageAtomicOperationsTests.release();
2096 }
2097
2098 } // image
2099 } // vkt
2100