1 /*------------------------------------------------------------------------
2 * Vulkan Conformance Tests
3 * ------------------------
4 *
5 * Copyright (c) 2016 The Khronos Group Inc.
6 *
7 * Licensed under the Apache License, Version 2.0 (the "License");
8 * you may not use this file except in compliance with the License.
9 * You may obtain a copy of the License at
10 *
11 * http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
18 *
19 *//*!
20 * \file vktImageAtomicOperationTests.cpp
21 * \brief Image atomic operation tests
22 *//*--------------------------------------------------------------------*/
23
24 #include "vktImageAtomicOperationTests.hpp"
25 #include "vktImageAtomicSpirvShaders.hpp"
26
27 #include "deUniquePtr.hpp"
28 #include "deStringUtil.hpp"
29 #include "deSTLUtil.hpp"
30
31 #include "vktTestCaseUtil.hpp"
32 #include "vkPrograms.hpp"
33 #include "vkImageUtil.hpp"
34 #include "vkQueryUtil.hpp"
35 #include "vkBarrierUtil.hpp"
36 #include "vktImageTestsUtil.hpp"
37 #include "vkBuilderUtil.hpp"
38 #include "vkRef.hpp"
39 #include "vkRefUtil.hpp"
40 #include "vkTypeUtil.hpp"
41 #include "vkCmdUtil.hpp"
42 #include "vkObjUtil.hpp"
43 #include "vkBufferWithMemory.hpp"
44
45 #include "tcuTextureUtil.hpp"
46 #include "tcuTexture.hpp"
47 #include "tcuVectorType.hpp"
48 #include "tcuStringTemplate.hpp"
49
50 namespace vkt
51 {
52 namespace image
53 {
54 namespace
55 {
56
57 using namespace vk;
58 using namespace std;
59 using de::toString;
60
61 using tcu::TextureFormat;
62 using tcu::IVec2;
63 using tcu::IVec3;
64 using tcu::UVec3;
65 using tcu::Vec4;
66 using tcu::IVec4;
67 using tcu::UVec4;
68 using tcu::CubeFace;
69 using tcu::Texture1D;
70 using tcu::Texture2D;
71 using tcu::Texture3D;
72 using tcu::Texture2DArray;
73 using tcu::TextureCube;
74 using tcu::PixelBufferAccess;
75 using tcu::ConstPixelBufferAccess;
76 using tcu::Vector;
77 using tcu::TestContext;
78
79 enum
80 {
81 NUM_INVOCATIONS_PER_PIXEL = 5u
82 };
83
84 enum AtomicOperation
85 {
86 ATOMIC_OPERATION_ADD = 0,
87 ATOMIC_OPERATION_SUB,
88 ATOMIC_OPERATION_INC,
89 ATOMIC_OPERATION_DEC,
90 ATOMIC_OPERATION_MIN,
91 ATOMIC_OPERATION_MAX,
92 ATOMIC_OPERATION_AND,
93 ATOMIC_OPERATION_OR,
94 ATOMIC_OPERATION_XOR,
95 ATOMIC_OPERATION_EXCHANGE,
96 ATOMIC_OPERATION_COMPARE_EXCHANGE,
97
98 ATOMIC_OPERATION_LAST
99 };
100
101 enum class ShaderReadType
102 {
103 NORMAL = 0,
104 SPARSE,
105 };
106
107 enum class ImageBackingType
108 {
109 NORMAL = 0,
110 SPARSE,
111 };
112
getCoordStr(const ImageType imageType,const std::string & x,const std::string & y,const std::string & z)113 static string getCoordStr (const ImageType imageType,
114 const std::string& x,
115 const std::string& y,
116 const std::string& z)
117 {
118 switch (imageType)
119 {
120 case IMAGE_TYPE_1D:
121 case IMAGE_TYPE_BUFFER:
122 return x;
123 case IMAGE_TYPE_1D_ARRAY:
124 case IMAGE_TYPE_2D:
125 return string("ivec2(" + x + "," + y + ")");
126 case IMAGE_TYPE_2D_ARRAY:
127 case IMAGE_TYPE_3D:
128 case IMAGE_TYPE_CUBE:
129 case IMAGE_TYPE_CUBE_ARRAY:
130 return string("ivec3(" + x + "," + y + "," + z + ")");
131 default:
132 DE_ASSERT(false);
133 return "";
134 }
135 }
136
getComponentTypeStr(deUint32 componentWidth,bool intFormat,bool uintFormat,bool floatFormat)137 static string getComponentTypeStr (deUint32 componentWidth, bool intFormat, bool uintFormat, bool floatFormat)
138 {
139 DE_ASSERT(intFormat || uintFormat || floatFormat);
140
141 const bool is64 = (componentWidth == 64);
142
143 if (intFormat)
144 return (is64 ? "int64_t" : "int");
145 if (uintFormat)
146 return (is64 ? "uint64_t" : "uint");
147 if (floatFormat)
148 return (is64 ? "double" : "float");
149
150 return "";
151 }
152
getVec4TypeStr(deUint32 componentWidth,bool intFormat,bool uintFormat,bool floatFormat)153 static string getVec4TypeStr (deUint32 componentWidth, bool intFormat, bool uintFormat, bool floatFormat)
154 {
155 DE_ASSERT(intFormat || uintFormat || floatFormat);
156
157 const bool is64 = (componentWidth == 64);
158
159 if (intFormat)
160 return (is64 ? "i64vec4" : "ivec4");
161 if (uintFormat)
162 return (is64 ? "u64vec4" : "uvec4");
163 if (floatFormat)
164 return (is64 ? "f64vec4" : "vec4");
165
166 return "";
167 }
168
getAtomicFuncArgumentShaderStr(const AtomicOperation op,const string & x,const string & y,const string & z,const IVec3 & gridSize)169 static string getAtomicFuncArgumentShaderStr (const AtomicOperation op,
170 const string& x,
171 const string& y,
172 const string& z,
173 const IVec3& gridSize)
174 {
175 switch (op)
176 {
177 case ATOMIC_OPERATION_ADD:
178 case ATOMIC_OPERATION_AND:
179 case ATOMIC_OPERATION_OR:
180 case ATOMIC_OPERATION_XOR:
181 return string("(" + x + "*" + x + " + " + y + "*" + y + " + " + z + "*" + z + ")");
182 case ATOMIC_OPERATION_MIN:
183 case ATOMIC_OPERATION_MAX:
184 // multiply by (1-2*(value % 2) to make half of the data negative
185 // this will result in generating large numbers for uint formats
186 return string("((1 - 2*(" + x + " % 2)) * (" + x + "*" + x + " + " + y + "*" + y + " + " + z + "*" + z + "))");
187 case ATOMIC_OPERATION_EXCHANGE:
188 case ATOMIC_OPERATION_COMPARE_EXCHANGE:
189 return string("((" + z + "*" + toString(gridSize.x()) + " + " + x + ")*" + toString(gridSize.y()) + " + " + y + ")");
190 default:
191 DE_ASSERT(false);
192 return "";
193 }
194 }
195
getAtomicOperationCaseName(const AtomicOperation op)196 static string getAtomicOperationCaseName (const AtomicOperation op)
197 {
198 switch (op)
199 {
200 case ATOMIC_OPERATION_ADD: return string("add");
201 case ATOMIC_OPERATION_SUB: return string("sub");
202 case ATOMIC_OPERATION_INC: return string("inc");
203 case ATOMIC_OPERATION_DEC: return string("dec");
204 case ATOMIC_OPERATION_MIN: return string("min");
205 case ATOMIC_OPERATION_MAX: return string("max");
206 case ATOMIC_OPERATION_AND: return string("and");
207 case ATOMIC_OPERATION_OR: return string("or");
208 case ATOMIC_OPERATION_XOR: return string("xor");
209 case ATOMIC_OPERATION_EXCHANGE: return string("exchange");
210 case ATOMIC_OPERATION_COMPARE_EXCHANGE: return string("compare_exchange");
211 default:
212 DE_ASSERT(false);
213 return "";
214 }
215 }
216
getAtomicOperationShaderFuncName(const AtomicOperation op)217 static string getAtomicOperationShaderFuncName (const AtomicOperation op)
218 {
219 switch (op)
220 {
221 case ATOMIC_OPERATION_ADD: return string("imageAtomicAdd");
222 case ATOMIC_OPERATION_MIN: return string("imageAtomicMin");
223 case ATOMIC_OPERATION_MAX: return string("imageAtomicMax");
224 case ATOMIC_OPERATION_AND: return string("imageAtomicAnd");
225 case ATOMIC_OPERATION_OR: return string("imageAtomicOr");
226 case ATOMIC_OPERATION_XOR: return string("imageAtomicXor");
227 case ATOMIC_OPERATION_EXCHANGE: return string("imageAtomicExchange");
228 case ATOMIC_OPERATION_COMPARE_EXCHANGE: return string("imageAtomicCompSwap");
229 default:
230 DE_ASSERT(false);
231 return "";
232 }
233 }
234
235 template <typename T>
getOperationInitialValue(const AtomicOperation op)236 T getOperationInitialValue (const AtomicOperation op)
237 {
238 switch (op)
239 {
240 // \note 18 is just an arbitrary small nonzero value.
241 case ATOMIC_OPERATION_ADD: return 18;
242 case ATOMIC_OPERATION_INC: return 18;
243 case ATOMIC_OPERATION_SUB: return (1 << 24) - 1;
244 case ATOMIC_OPERATION_DEC: return (1 << 24) - 1;
245 case ATOMIC_OPERATION_MIN: return (1 << 15) - 1;
246 case ATOMIC_OPERATION_MAX: return 18;
247 case ATOMIC_OPERATION_AND: return (1 << 15) - 1;
248 case ATOMIC_OPERATION_OR: return 18;
249 case ATOMIC_OPERATION_XOR: return 18;
250 case ATOMIC_OPERATION_EXCHANGE: return 18;
251 case ATOMIC_OPERATION_COMPARE_EXCHANGE: return 18;
252 default:
253 DE_ASSERT(false);
254 return 0xFFFFFFFF;
255 }
256 }
257
258 template <>
getOperationInitialValue(const AtomicOperation op)259 deInt64 getOperationInitialValue<deInt64>(const AtomicOperation op)
260 {
261 switch (op)
262 {
263 // \note 0x000000BEFFFFFF18 is just an arbitrary nonzero value.
264 case ATOMIC_OPERATION_ADD: return 0x000000BEFFFFFF18;
265 case ATOMIC_OPERATION_INC: return 0x000000BEFFFFFF18;
266 case ATOMIC_OPERATION_SUB: return (1ull << 56) - 1;
267 case ATOMIC_OPERATION_DEC: return (1ull << 56) - 1;
268 case ATOMIC_OPERATION_MIN: return (1ull << 47) - 1;
269 case ATOMIC_OPERATION_MAX: return 0x000000BEFFFFFF18;
270 case ATOMIC_OPERATION_AND: return (1ull << 47) - 1;
271 case ATOMIC_OPERATION_OR: return 0x000000BEFFFFFF18;
272 case ATOMIC_OPERATION_XOR: return 0x000000BEFFFFFF18;
273 case ATOMIC_OPERATION_EXCHANGE: return 0x000000BEFFFFFF18;
274 case ATOMIC_OPERATION_COMPARE_EXCHANGE: return 0x000000BEFFFFFF18;
275 default:
276 DE_ASSERT(false);
277 return 0xFFFFFFFFFFFFFFFF;
278 }
279 }
280
281 template <>
getOperationInitialValue(const AtomicOperation op)282 deUint64 getOperationInitialValue<deUint64>(const AtomicOperation op)
283 {
284 return (deUint64)getOperationInitialValue<deInt64>(op);
285 }
286
287
288 template <typename T>
getAtomicFuncArgument(const AtomicOperation op,const IVec3 & invocationID,const IVec3 & gridSize)289 static T getAtomicFuncArgument (const AtomicOperation op,
290 const IVec3& invocationID,
291 const IVec3& gridSize)
292 {
293 const T x = static_cast<T>(invocationID.x());
294 const T y = static_cast<T>(invocationID.y());
295 const T z = static_cast<T>(invocationID.z());
296
297 switch (op)
298 {
299 // \note Fall-throughs.
300 case ATOMIC_OPERATION_ADD:
301 case ATOMIC_OPERATION_SUB:
302 case ATOMIC_OPERATION_AND:
303 case ATOMIC_OPERATION_OR:
304 case ATOMIC_OPERATION_XOR:
305 return x*x + y*y + z*z;
306 case ATOMIC_OPERATION_INC:
307 case ATOMIC_OPERATION_DEC:
308 return 1;
309 case ATOMIC_OPERATION_MIN:
310 case ATOMIC_OPERATION_MAX:
311 // multiply half of the data by -1
312 return (1-2*(x % 2))*(x*x + y*y + z*z);
313 case ATOMIC_OPERATION_EXCHANGE:
314 case ATOMIC_OPERATION_COMPARE_EXCHANGE:
315 return (z*static_cast<T>(gridSize.x()) + x)*static_cast<T>(gridSize.y()) + y;
316 default:
317 DE_ASSERT(false);
318 return -1;
319 }
320 }
321
322 //! An order-independent operation is one for which the end result doesn't depend on the order in which the operations are carried (i.e. is both commutative and associative).
isOrderIndependentAtomicOperation(const AtomicOperation op)323 static bool isOrderIndependentAtomicOperation (const AtomicOperation op)
324 {
325 return op == ATOMIC_OPERATION_ADD ||
326 op == ATOMIC_OPERATION_SUB ||
327 op == ATOMIC_OPERATION_INC ||
328 op == ATOMIC_OPERATION_DEC ||
329 op == ATOMIC_OPERATION_MIN ||
330 op == ATOMIC_OPERATION_MAX ||
331 op == ATOMIC_OPERATION_AND ||
332 op == ATOMIC_OPERATION_OR ||
333 op == ATOMIC_OPERATION_XOR;
334 }
335
336 //! Checks if the operation needs an SPIR-V shader.
isSpirvAtomicOperation(const AtomicOperation op)337 static bool isSpirvAtomicOperation (const AtomicOperation op)
338 {
339 return op == ATOMIC_OPERATION_SUB ||
340 op == ATOMIC_OPERATION_INC ||
341 op == ATOMIC_OPERATION_DEC;
342 }
343
344 //! Returns the SPIR-V assembler name of the given operation.
getSpirvAtomicOpName(const AtomicOperation op)345 static std::string getSpirvAtomicOpName (const AtomicOperation op)
346 {
347 switch (op)
348 {
349 case ATOMIC_OPERATION_SUB: return "OpAtomicISub";
350 case ATOMIC_OPERATION_INC: return "OpAtomicIIncrement";
351 case ATOMIC_OPERATION_DEC: return "OpAtomicIDecrement";
352 default: break;
353 }
354
355 DE_ASSERT(false);
356 return "";
357 }
358
359 //! Returns true if the given SPIR-V operation does not need the last argument, compared to OpAtomicIAdd.
isSpirvAtomicNoLastArgOp(const AtomicOperation op)360 static bool isSpirvAtomicNoLastArgOp (const AtomicOperation op)
361 {
362 switch (op)
363 {
364 case ATOMIC_OPERATION_SUB: return false;
365 case ATOMIC_OPERATION_INC: // fallthrough
366 case ATOMIC_OPERATION_DEC: return true;
367 default: break;
368 }
369
370 DE_ASSERT(false);
371 return false;
372 }
373
374 //! Computes the result of an atomic operation where "a" is the data operated on and "b" is the parameter to the atomic function.
375 template <typename T>
computeBinaryAtomicOperationResult(const AtomicOperation op,const T a,const T b)376 static T computeBinaryAtomicOperationResult (const AtomicOperation op, const T a, const T b)
377 {
378 switch (op)
379 {
380 case ATOMIC_OPERATION_INC: // fallthrough.
381 case ATOMIC_OPERATION_ADD: return a + b;
382 case ATOMIC_OPERATION_DEC: // fallthrough.
383 case ATOMIC_OPERATION_SUB: return a - b;
384 case ATOMIC_OPERATION_MIN: return de::min(a, b);
385 case ATOMIC_OPERATION_MAX: return de::max(a, b);
386 case ATOMIC_OPERATION_AND: return a & b;
387 case ATOMIC_OPERATION_OR: return a | b;
388 case ATOMIC_OPERATION_XOR: return a ^ b;
389 case ATOMIC_OPERATION_EXCHANGE: return b;
390 case ATOMIC_OPERATION_COMPARE_EXCHANGE: return (a == (sizeof(T) == 8 ? 0xBEFFFFFF18 : 18)) ? b : a;
391 default:
392 DE_ASSERT(false);
393 return -1;
394 }
395 }
396
getUsageFlags(bool useTransfer)397 VkImageUsageFlags getUsageFlags (bool useTransfer)
398 {
399 VkImageUsageFlags usageFlags = VK_IMAGE_USAGE_STORAGE_BIT;
400
401 if (useTransfer)
402 usageFlags |= (VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT);
403
404 return usageFlags;
405 }
406
AddFillReadShader(SourceCollections & sourceCollections,const ImageType & imageType,const tcu::TextureFormat & format,const string & componentType,const string & vec4Type)407 void AddFillReadShader (SourceCollections& sourceCollections,
408 const ImageType& imageType,
409 const tcu::TextureFormat& format,
410 const string& componentType,
411 const string& vec4Type)
412 {
413 const string imageInCoord = getCoordStr(imageType, "gx", "gy", "gz");
414 const string shaderImageFormatStr = getShaderImageFormatQualifier(format);
415 const string shaderImageTypeStr = getShaderImageType(format, imageType);
416 const auto componentWidth = getFormatComponentWidth(mapTextureFormat(format), 0u);
417 const string extensions = ((componentWidth == 64u)
418 ? "#extension GL_EXT_shader_explicit_arithmetic_types_int64 : require\n"
419 "#extension GL_EXT_shader_image_int64 : require\n"
420 : "");
421
422
423 const string fillShader = "#version 450\n"
424 + extensions +
425 "precision highp " + shaderImageTypeStr + ";\n"
426 "\n"
427 "layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
428 "layout (" + shaderImageFormatStr + ", binding=0) coherent uniform " + shaderImageTypeStr + " u_resultImage;\n"
429 "\n"
430 "layout(std430, binding = 1) buffer inputBuffer\n"
431 "{\n"
432 " "+ componentType + " data[];\n"
433 "} inBuffer;\n"
434 "\n"
435 "void main(void)\n"
436 "{\n"
437 " int gx = int(gl_GlobalInvocationID.x);\n"
438 " int gy = int(gl_GlobalInvocationID.y);\n"
439 " int gz = int(gl_GlobalInvocationID.z);\n"
440 " uint index = gx + (gy * gl_NumWorkGroups.x) + (gz *gl_NumWorkGroups.x * gl_NumWorkGroups.y);\n"
441 " imageStore(u_resultImage, " + imageInCoord + ", " + vec4Type + "(inBuffer.data[index]));\n"
442 "}\n";
443
444 const string readShader = "#version 450\n"
445 + extensions +
446 "precision highp " + shaderImageTypeStr + ";\n"
447 "\n"
448 "layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
449 "layout (" + shaderImageFormatStr + ", binding=0) coherent uniform " + shaderImageTypeStr + " u_resultImage;\n"
450 "\n"
451 "layout(std430, binding = 1) buffer outputBuffer\n"
452 "{\n"
453 " " + componentType + " data[];\n"
454 "} outBuffer;\n"
455 "\n"
456 "void main(void)\n"
457 "{\n"
458 " int gx = int(gl_GlobalInvocationID.x);\n"
459 " int gy = int(gl_GlobalInvocationID.y);\n"
460 " int gz = int(gl_GlobalInvocationID.z);\n"
461 " uint index = gx + (gy * gl_NumWorkGroups.x) + (gz *gl_NumWorkGroups.x * gl_NumWorkGroups.y);\n"
462 " outBuffer.data[index] = imageLoad(u_resultImage, " + imageInCoord + ").x;\n"
463 "}\n";
464
465
466 if ((imageType != IMAGE_TYPE_1D) &&
467 (imageType != IMAGE_TYPE_1D_ARRAY) &&
468 (imageType != IMAGE_TYPE_BUFFER))
469 {
470 const string readShaderResidency = "#version 450\n"
471 "#extension GL_ARB_sparse_texture2 : require\n"
472 + extensions +
473 "precision highp " + shaderImageTypeStr + ";\n"
474 "\n"
475 "layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
476 "layout (" + shaderImageFormatStr + ", binding=0) coherent uniform " + shaderImageTypeStr + " u_resultImage;\n"
477 "\n"
478 "layout(std430, binding = 1) buffer outputBuffer\n"
479 "{\n"
480 " " + componentType + " data[];\n"
481 "} outBuffer;\n"
482 "\n"
483 "void main(void)\n"
484 "{\n"
485 " int gx = int(gl_GlobalInvocationID.x);\n"
486 " int gy = int(gl_GlobalInvocationID.y);\n"
487 " int gz = int(gl_GlobalInvocationID.z);\n"
488 " uint index = gx + (gy * gl_NumWorkGroups.x) + (gz *gl_NumWorkGroups.x * gl_NumWorkGroups.y);\n"
489 " outBuffer.data[index] = imageLoad(u_resultImage, " + imageInCoord + ").x;\n"
490 " " + vec4Type + " sparseValue;\n"
491 " sparseImageLoadARB(u_resultImage, " + imageInCoord + ", sparseValue);\n"
492 " if (outBuffer.data[index] != sparseValue.x)\n"
493 " outBuffer.data[index] = " + vec4Type + "(1234).x;\n"
494 "}\n";
495
496 sourceCollections.glslSources.add("readShaderResidency") << glu::ComputeSource(readShaderResidency.c_str()) << vk::ShaderBuildOptions(sourceCollections.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
497 }
498
499 sourceCollections.glslSources.add("fillShader") << glu::ComputeSource(fillShader.c_str()) << vk::ShaderBuildOptions(sourceCollections.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
500 sourceCollections.glslSources.add("readShader") << glu::ComputeSource(readShader.c_str()) << vk::ShaderBuildOptions(sourceCollections.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
501 }
502
503 //! Prepare the initial data for the image
initDataForImage(const VkDevice device,const DeviceInterface & deviceInterface,const TextureFormat & format,const AtomicOperation operation,const tcu::UVec3 & gridSize,BufferWithMemory & buffer)504 static void initDataForImage (const VkDevice device,
505 const DeviceInterface& deviceInterface,
506 const TextureFormat& format,
507 const AtomicOperation operation,
508 const tcu::UVec3& gridSize,
509 BufferWithMemory& buffer)
510 {
511 Allocation& bufferAllocation = buffer.getAllocation();
512 const VkFormat imageFormat = mapTextureFormat(format);
513 tcu::PixelBufferAccess pixelBuffer (format, gridSize.x(), gridSize.y(), gridSize.z(), bufferAllocation.getHostPtr());
514
515 if (imageFormat == VK_FORMAT_R64_UINT || imageFormat == VK_FORMAT_R64_SINT)
516 {
517 const deInt64 initialValue(getOperationInitialValue<deInt64>(operation));
518
519 for (deUint32 z = 0; z < gridSize.z(); z++)
520 for (deUint32 y = 0; y < gridSize.y(); y++)
521 for (deUint32 x = 0; x < gridSize.x(); x++)
522 {
523 *((deInt64*)pixelBuffer.getPixelPtr(x, y, z)) = initialValue;
524 }
525 }
526 else
527 {
528 const tcu::IVec4 initialValue(getOperationInitialValue<deInt32>(operation));
529
530 for (deUint32 z = 0; z < gridSize.z(); z++)
531 for (deUint32 y = 0; y < gridSize.y(); y++)
532 for (deUint32 x = 0; x < gridSize.x(); x++)
533 {
534 pixelBuffer.setPixel(initialValue, x, y, z);
535 }
536 }
537
538 flushAlloc(deviceInterface, device, bufferAllocation);
539 }
540
commonCheckSupport(Context & context,const tcu::TextureFormat & tcuFormat,VkImageTiling tiling,ImageType imageType,const tcu::UVec3 & imageSize,AtomicOperation operation,bool useTransfer,ShaderReadType readType,ImageBackingType backingType)541 void commonCheckSupport (Context& context, const tcu::TextureFormat& tcuFormat, VkImageTiling tiling, ImageType imageType, const tcu::UVec3& imageSize, AtomicOperation operation, bool useTransfer, ShaderReadType readType, ImageBackingType backingType)
542 {
543 const VkFormat format = mapTextureFormat(tcuFormat);
544 const VkImageType vkImgType = mapImageType(imageType);
545 const VkFormatFeatureFlags texelBufferSupport = (VK_FORMAT_FEATURE_STORAGE_TEXEL_BUFFER_BIT | VK_FORMAT_FEATURE_STORAGE_TEXEL_BUFFER_ATOMIC_BIT);
546
547 const auto& vki = context.getInstanceInterface();
548 const auto physicalDevice = context.getPhysicalDevice();
549 const auto usageFlags = getUsageFlags(useTransfer);
550
551 VkImageFormatProperties vkImageFormatProperties;
552 const auto result = vki.getPhysicalDeviceImageFormatProperties(physicalDevice, format, vkImgType, tiling, usageFlags, 0, &vkImageFormatProperties);
553 if (result != VK_SUCCESS) {
554 if (result == VK_ERROR_FORMAT_NOT_SUPPORTED)
555 TCU_THROW(NotSupportedError, "Format unsupported for tiling");
556 else
557 TCU_FAIL("vkGetPhysicalDeviceImageFormatProperties returned unexpected error");
558 }
559
560 if (vkImageFormatProperties.maxArrayLayers < (uint32_t)getNumLayers(imageType, imageSize)) {
561 TCU_THROW(NotSupportedError, "This format and tiling combination does not support this number of aray layers");
562 }
563
564 const VkFormatProperties formatProperties = getPhysicalDeviceFormatProperties(context.getInstanceInterface(),
565 context.getPhysicalDevice(), format);
566 if ((imageType == IMAGE_TYPE_BUFFER) &&
567 ((formatProperties.bufferFeatures & texelBufferSupport) != texelBufferSupport))
568 TCU_THROW(NotSupportedError, "Atomic storage texel buffers not supported");
569
570 const VkFormatFeatureFlags requiredFeaturesLinear = (VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT | VK_FORMAT_FEATURE_STORAGE_IMAGE_ATOMIC_BIT);
571 if (tiling == vk::VK_IMAGE_TILING_LINEAR &&
572 ((formatProperties.linearTilingFeatures & requiredFeaturesLinear) != requiredFeaturesLinear)
573 ) {
574 TCU_THROW(NotSupportedError, "Format doesn't support atomic storage with linear tiling");
575 }
576
577 if (imageType == IMAGE_TYPE_CUBE_ARRAY)
578 context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_IMAGE_CUBE_ARRAY);
579
580 #ifndef CTS_USES_VULKANSC
581 if (backingType == ImageBackingType::SPARSE)
582 {
583 context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_SPARSE_BINDING);
584
585 switch (vkImgType)
586 {
587 case VK_IMAGE_TYPE_2D: context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_SPARSE_RESIDENCY_IMAGE2D); break;
588 case VK_IMAGE_TYPE_3D: context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_SPARSE_RESIDENCY_IMAGE3D); break;
589 default: DE_ASSERT(false); break;
590 }
591
592 if (!checkSparseImageFormatSupport(context.getPhysicalDevice(), context.getInstanceInterface(), format, vkImgType, VK_SAMPLE_COUNT_1_BIT, usageFlags, tiling))
593 TCU_THROW(NotSupportedError, "Format does not support sparse images");
594 }
595 #endif // CTS_USES_VULKANSC
596
597 if (isFloatFormat(format))
598 {
599 context.requireDeviceFunctionality("VK_EXT_shader_atomic_float");
600
601 const VkFormatFeatureFlags requiredFeatures = (VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT | VK_FORMAT_FEATURE_STORAGE_IMAGE_ATOMIC_BIT);
602 const auto& atomicFloatFeatures = context.getShaderAtomicFloatFeaturesEXT();
603
604 if (!atomicFloatFeatures.shaderImageFloat32Atomics)
605 TCU_THROW(NotSupportedError, "shaderImageFloat32Atomics not supported");
606
607 if ((operation == ATOMIC_OPERATION_ADD) && !atomicFloatFeatures.shaderImageFloat32AtomicAdd)
608 TCU_THROW(NotSupportedError, "shaderImageFloat32AtomicAdd not supported");
609
610 if (operation == ATOMIC_OPERATION_MIN || operation == ATOMIC_OPERATION_MAX)
611 {
612 context.requireDeviceFunctionality("VK_EXT_shader_atomic_float2");
613 #ifndef CTS_USES_VULKANSC
614 if (!context.getShaderAtomicFloat2FeaturesEXT().shaderImageFloat32AtomicMinMax)
615 {
616 TCU_THROW(NotSupportedError, "shaderImageFloat32AtomicMinMax not supported");
617 }
618 #endif // CTS_USES_VULKANSC
619 }
620
621 if ((formatProperties.optimalTilingFeatures & requiredFeatures) != requiredFeatures)
622 TCU_FAIL("Required format feature bits not supported");
623
624 if (backingType == ImageBackingType::SPARSE)
625 {
626 if (!atomicFloatFeatures.sparseImageFloat32Atomics)
627 TCU_THROW(NotSupportedError, "sparseImageFloat32Atomics not supported");
628
629 if (operation == ATOMIC_OPERATION_ADD && !atomicFloatFeatures.sparseImageFloat32AtomicAdd)
630 TCU_THROW(NotSupportedError, "sparseImageFloat32AtomicAdd not supported");
631 }
632
633 }
634 else if (format == VK_FORMAT_R64_UINT || format == VK_FORMAT_R64_SINT)
635 {
636 context.requireDeviceFunctionality("VK_EXT_shader_image_atomic_int64");
637
638 const VkFormatFeatureFlags requiredFeatures = (VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT | VK_FORMAT_FEATURE_STORAGE_IMAGE_ATOMIC_BIT);
639 const auto& atomicInt64Features = context.getShaderImageAtomicInt64FeaturesEXT();
640
641 if (!atomicInt64Features.shaderImageInt64Atomics)
642 TCU_THROW(NotSupportedError, "shaderImageInt64Atomics not supported");
643
644 if (backingType == ImageBackingType::SPARSE && !atomicInt64Features.sparseImageInt64Atomics)
645 TCU_THROW(NotSupportedError, "sparseImageInt64Atomics not supported");
646
647 if ((formatProperties.optimalTilingFeatures & requiredFeatures) != requiredFeatures)
648 TCU_FAIL("Mandatory format features not supported");
649 }
650
651 if (useTransfer)
652 {
653 const VkFormatFeatureFlags transferFeatures = (VK_FORMAT_FEATURE_TRANSFER_SRC_BIT | VK_FORMAT_FEATURE_TRANSFER_DST_BIT);
654 if ((formatProperties.optimalTilingFeatures & transferFeatures) != transferFeatures)
655 TCU_THROW(NotSupportedError, "Transfer features not supported for this format");
656 }
657
658 if (readType == ShaderReadType::SPARSE)
659 {
660 DE_ASSERT(imageType != IMAGE_TYPE_1D && imageType != IMAGE_TYPE_1D_ARRAY && imageType != IMAGE_TYPE_BUFFER);
661 context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_SHADER_RESOURCE_RESIDENCY);
662 }
663 }
664
665 class BinaryAtomicEndResultCase : public vkt::TestCase
666 {
667 public:
668 BinaryAtomicEndResultCase (tcu::TestContext& testCtx,
669 const string& name,
670 const string& description,
671 const ImageType imageType,
672 const tcu::UVec3& imageSize,
673 const tcu::TextureFormat& format,
674 const VkImageTiling tiling,
675 const AtomicOperation operation,
676 const bool useTransfer,
677 const ShaderReadType shaderReadType,
678 const ImageBackingType backingType,
679 const glu::GLSLVersion glslVersion);
680
681 void initPrograms (SourceCollections& sourceCollections) const;
682 TestInstance* createInstance (Context& context) const;
683 virtual void checkSupport (Context& context) const;
684
685 private:
686 const ImageType m_imageType;
687 const tcu::UVec3 m_imageSize;
688 const tcu::TextureFormat m_format;
689 const VkImageTiling m_tiling;
690 const AtomicOperation m_operation;
691 const bool m_useTransfer;
692 const ShaderReadType m_readType;
693 const ImageBackingType m_backingType;
694 const glu::GLSLVersion m_glslVersion;
695 };
696
BinaryAtomicEndResultCase(tcu::TestContext & testCtx,const string & name,const string & description,const ImageType imageType,const tcu::UVec3 & imageSize,const tcu::TextureFormat & format,const VkImageTiling tiling,const AtomicOperation operation,const bool useTransfer,const ShaderReadType shaderReadType,const ImageBackingType backingType,const glu::GLSLVersion glslVersion)697 BinaryAtomicEndResultCase::BinaryAtomicEndResultCase (tcu::TestContext& testCtx,
698 const string& name,
699 const string& description,
700 const ImageType imageType,
701 const tcu::UVec3& imageSize,
702 const tcu::TextureFormat& format,
703 const VkImageTiling tiling,
704 const AtomicOperation operation,
705 const bool useTransfer,
706 const ShaderReadType shaderReadType,
707 const ImageBackingType backingType,
708 const glu::GLSLVersion glslVersion)
709 : TestCase (testCtx, name, description)
710 , m_imageType (imageType)
711 , m_imageSize (imageSize)
712 , m_format (format)
713 , m_tiling (tiling)
714 , m_operation (operation)
715 , m_useTransfer (useTransfer)
716 , m_readType (shaderReadType)
717 , m_backingType (backingType)
718 , m_glslVersion (glslVersion)
719 {
720 }
721
checkSupport(Context & context) const722 void BinaryAtomicEndResultCase::checkSupport (Context& context) const
723 {
724 commonCheckSupport(context, m_format, m_tiling, m_imageType, m_imageSize, m_operation, m_useTransfer, m_readType, m_backingType);
725 }
726
initPrograms(SourceCollections & sourceCollections) const727 void BinaryAtomicEndResultCase::initPrograms (SourceCollections& sourceCollections) const
728 {
729 const VkFormat imageFormat = mapTextureFormat(m_format);
730 const deUint32 componentWidth = getFormatComponentWidth(imageFormat, 0);
731 const bool intFormat = isIntFormat(imageFormat);
732 const bool uintFormat = isUintFormat(imageFormat);
733 const bool floatFormat = isFloatFormat(imageFormat);
734 const string type = getComponentTypeStr(componentWidth, intFormat, uintFormat, floatFormat);
735 const string vec4Type = getVec4TypeStr(componentWidth, intFormat, uintFormat, floatFormat);
736
737 AddFillReadShader(sourceCollections, m_imageType, m_format, type, vec4Type);
738
739 if (isSpirvAtomicOperation(m_operation))
740 {
741 const CaseVariant caseVariant{m_imageType, m_format.order, m_format.type, CaseVariant::CHECK_TYPE_END_RESULTS};
742 const tcu::StringTemplate shaderTemplate{getSpirvAtomicOpShader(caseVariant)};
743 std::map<std::string, std::string> specializations;
744
745 specializations["OPNAME"] = getSpirvAtomicOpName(m_operation);
746 if (isSpirvAtomicNoLastArgOp(m_operation))
747 specializations["LASTARG"] = "";
748
749 sourceCollections.spirvAsmSources.add(m_name) << shaderTemplate.specialize(specializations);
750 }
751 else
752 {
753 const string versionDecl = glu::getGLSLVersionDeclaration(m_glslVersion);
754
755 const UVec3 gridSize = getShaderGridSize(m_imageType, m_imageSize);
756 const string atomicCoord = getCoordStr(m_imageType, "gx % " + toString(gridSize.x()), "gy", "gz");
757
758 const string atomicArgExpr = type + getAtomicFuncArgumentShaderStr(m_operation,
759 "gx", "gy", "gz",
760 IVec3(NUM_INVOCATIONS_PER_PIXEL*gridSize.x(), gridSize.y(), gridSize.z()));
761
762 const string compareExchangeStr = (m_operation == ATOMIC_OPERATION_COMPARE_EXCHANGE) ?
763 (componentWidth == 64 ?", 820338753304": ", 18") + string(uintFormat ? "u" : "") + string(componentWidth == 64 ? "l" : "")
764 : "";
765 const string atomicInvocation = getAtomicOperationShaderFuncName(m_operation) + "(u_resultImage, " + atomicCoord + compareExchangeStr + ", " + atomicArgExpr + ")";
766 const string shaderImageFormatStr = getShaderImageFormatQualifier(m_format);
767 const string shaderImageTypeStr = getShaderImageType(m_format, m_imageType);
768 const string extensions = "#extension GL_EXT_shader_atomic_float : enable\n"
769 "#extension GL_EXT_shader_atomic_float2 : enable\n"
770 "#extension GL_KHR_memory_scope_semantics : enable";
771
772 string source = versionDecl + "\n" + extensions + "\n";
773
774 if (64 == componentWidth)
775 {
776 source += "#extension GL_EXT_shader_explicit_arithmetic_types_int64 : require\n"
777 "#extension GL_EXT_shader_image_int64 : require\n";
778 }
779
780 source += "precision highp " + shaderImageTypeStr + ";\n"
781 "\n"
782 "layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
783 "layout (" + shaderImageFormatStr + ", binding=0) coherent uniform " + shaderImageTypeStr + " u_resultImage;\n"
784 "\n"
785 "void main (void)\n"
786 "{\n"
787 " int gx = int(gl_GlobalInvocationID.x);\n"
788 " int gy = int(gl_GlobalInvocationID.y);\n"
789 " int gz = int(gl_GlobalInvocationID.z);\n"
790 " " + atomicInvocation + ";\n"
791 "}\n";
792
793 sourceCollections.glslSources.add(m_name) << glu::ComputeSource(source.c_str());
794 }
795 }
796
797 class BinaryAtomicIntermValuesCase : public vkt::TestCase
798 {
799 public:
800 BinaryAtomicIntermValuesCase (tcu::TestContext& testCtx,
801 const string& name,
802 const string& description,
803 const ImageType imageType,
804 const tcu::UVec3& imageSize,
805 const tcu::TextureFormat& format,
806 const VkImageTiling tiling,
807 const AtomicOperation operation,
808 const bool useTransfer,
809 const ShaderReadType shaderReadType,
810 const ImageBackingType backingType,
811 const glu::GLSLVersion glslVersion);
812
813 void initPrograms (SourceCollections& sourceCollections) const;
814 TestInstance* createInstance (Context& context) const;
815 virtual void checkSupport (Context& context) const;
816
817 private:
818 const ImageType m_imageType;
819 const tcu::UVec3 m_imageSize;
820 const tcu::TextureFormat m_format;
821 const VkImageTiling m_tiling;
822 const AtomicOperation m_operation;
823 const bool m_useTransfer;
824 const ShaderReadType m_readType;
825 const ImageBackingType m_backingType;
826 const glu::GLSLVersion m_glslVersion;
827 };
828
BinaryAtomicIntermValuesCase(TestContext & testCtx,const string & name,const string & description,const ImageType imageType,const tcu::UVec3 & imageSize,const TextureFormat & format,const VkImageTiling tiling,const AtomicOperation operation,const bool useTransfer,const ShaderReadType shaderReadType,const ImageBackingType backingType,const glu::GLSLVersion glslVersion)829 BinaryAtomicIntermValuesCase::BinaryAtomicIntermValuesCase (TestContext& testCtx,
830 const string& name,
831 const string& description,
832 const ImageType imageType,
833 const tcu::UVec3& imageSize,
834 const TextureFormat& format,
835 const VkImageTiling tiling,
836 const AtomicOperation operation,
837 const bool useTransfer,
838 const ShaderReadType shaderReadType,
839 const ImageBackingType backingType,
840 const glu::GLSLVersion glslVersion)
841 : TestCase (testCtx, name, description)
842 , m_imageType (imageType)
843 , m_imageSize (imageSize)
844 , m_format (format)
845 , m_tiling (tiling)
846 , m_operation (operation)
847 , m_useTransfer (useTransfer)
848 , m_readType (shaderReadType)
849 , m_backingType (backingType)
850 , m_glslVersion (glslVersion)
851 {
852 }
853
checkSupport(Context & context) const854 void BinaryAtomicIntermValuesCase::checkSupport (Context& context) const
855 {
856 commonCheckSupport(context, m_format, m_tiling, m_imageType, m_imageSize, m_operation, m_useTransfer, m_readType, m_backingType);
857 }
858
initPrograms(SourceCollections & sourceCollections) const859 void BinaryAtomicIntermValuesCase::initPrograms (SourceCollections& sourceCollections) const
860 {
861 const VkFormat imageFormat = mapTextureFormat(m_format);
862 const deUint32 componentWidth = getFormatComponentWidth(imageFormat, 0);
863 const bool intFormat = isIntFormat(imageFormat);
864 const bool uintFormat = isUintFormat(imageFormat);
865 const bool floatFormat = isFloatFormat(imageFormat);
866 const string type = getComponentTypeStr(componentWidth, intFormat, uintFormat, floatFormat);
867 const string vec4Type = getVec4TypeStr(componentWidth, intFormat, uintFormat, floatFormat);
868
869 AddFillReadShader(sourceCollections, m_imageType, m_format, type, vec4Type);
870
871 if (isSpirvAtomicOperation(m_operation))
872 {
873 const CaseVariant caseVariant{m_imageType, m_format.order, m_format.type, CaseVariant::CHECK_TYPE_INTERMEDIATE_RESULTS};
874 const tcu::StringTemplate shaderTemplate{getSpirvAtomicOpShader(caseVariant)};
875 std::map<std::string, std::string> specializations;
876
877 specializations["OPNAME"] = getSpirvAtomicOpName(m_operation);
878 if (isSpirvAtomicNoLastArgOp(m_operation))
879 specializations["LASTARG"] = "";
880
881 sourceCollections.spirvAsmSources.add(m_name) << shaderTemplate.specialize(specializations);
882 }
883 else
884 {
885 const string versionDecl = glu::getGLSLVersionDeclaration(m_glslVersion);
886 const UVec3 gridSize = getShaderGridSize(m_imageType, m_imageSize);
887 const string atomicCoord = getCoordStr(m_imageType, "gx % " + toString(gridSize.x()), "gy", "gz");
888 const string invocationCoord = getCoordStr(m_imageType, "gx", "gy", "gz");
889 const string atomicArgExpr = type + getAtomicFuncArgumentShaderStr(m_operation,
890 "gx", "gy", "gz",
891 IVec3(NUM_INVOCATIONS_PER_PIXEL*gridSize.x(), gridSize.y(), gridSize.z()));
892
893 const string compareExchangeStr = (m_operation == ATOMIC_OPERATION_COMPARE_EXCHANGE) ?
894 (componentWidth == 64 ? ", 820338753304" : ", 18") + string(uintFormat ? "u" : "") + string(componentWidth == 64 ? "l" : "") :
895 "";
896 const string atomicInvocation = getAtomicOperationShaderFuncName(m_operation) +
897 "(u_resultImage, " + atomicCoord + compareExchangeStr + ", " + atomicArgExpr + ")";
898 const string shaderImageFormatStr = getShaderImageFormatQualifier(m_format);
899 const string shaderImageTypeStr = getShaderImageType(m_format, m_imageType);
900 const string extensions = "#extension GL_EXT_shader_atomic_float : enable\n"
901 "#extension GL_EXT_shader_atomic_float2 : enable\n"
902 "#extension GL_KHR_memory_scope_semantics : enable";
903
904 string source = versionDecl + "\n" + extensions + "\n"
905 "\n";
906
907 if (64 == componentWidth)
908 {
909 source += "#extension GL_EXT_shader_explicit_arithmetic_types_int64 : require\n"
910 "#extension GL_EXT_shader_image_int64 : require\n";
911 }
912
913 source += "precision highp " + shaderImageTypeStr + "; \n"
914 "layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
915 "layout (" + shaderImageFormatStr + ", binding=0) coherent uniform " + shaderImageTypeStr + " u_resultImage;\n"
916 "layout (" + shaderImageFormatStr + ", binding=1) writeonly uniform " + shaderImageTypeStr + " u_intermValuesImage;\n"
917 "\n"
918 "void main (void)\n"
919 "{\n"
920 " int gx = int(gl_GlobalInvocationID.x);\n"
921 " int gy = int(gl_GlobalInvocationID.y);\n"
922 " int gz = int(gl_GlobalInvocationID.z);\n"
923 " imageStore(u_intermValuesImage, " + invocationCoord + ", " + vec4Type + "(" + atomicInvocation + "));\n"
924 "}\n";
925
926 sourceCollections.glslSources.add(m_name) << glu::ComputeSource(source.c_str());
927 }
928 }
929
930 class BinaryAtomicInstanceBase : public vkt::TestInstance
931 {
932 public:
933
934 BinaryAtomicInstanceBase (Context& context,
935 const string& name,
936 const ImageType imageType,
937 const tcu::UVec3& imageSize,
938 const TextureFormat& format,
939 const VkImageTiling tiling,
940 const AtomicOperation operation,
941 const bool useTransfer,
942 const ShaderReadType shaderReadType,
943 const ImageBackingType backingType);
944
945 tcu::TestStatus iterate (void);
946
947 virtual deUint32 getOutputBufferSize (void) const = 0;
948
949 virtual void prepareResources (const bool useTransfer) = 0;
950 virtual void prepareDescriptors (const bool isTexelBuffer) = 0;
951
952 virtual void commandsBeforeCompute (const VkCommandBuffer cmdBuffer) const = 0;
953 virtual void commandsAfterCompute (const VkCommandBuffer cmdBuffer,
954 const VkPipeline pipeline,
955 const VkPipelineLayout pipelineLayout,
956 const VkDescriptorSet descriptorSet,
957 const VkDeviceSize& range,
958 const bool useTransfer) = 0;
959
960 virtual bool verifyResult (Allocation& outputBufferAllocation,
961 const bool is64Bit) const = 0;
962
963 protected:
964
965 void shaderFillImage (const VkCommandBuffer cmdBuffer,
966 const VkBuffer& buffer,
967 const VkPipeline pipeline,
968 const VkPipelineLayout pipelineLayout,
969 const VkDescriptorSet descriptorSet,
970 const VkDeviceSize& range,
971 const tcu::UVec3& gridSize);
972
973 void createImageAndView (VkFormat imageFormat,
974 const tcu::UVec3& imageExent,
975 bool useTransfer,
976 de::MovePtr<Image>& imagePtr,
977 Move<VkImageView>& imageViewPtr);
978
979 void createImageResources (const VkFormat& imageFormat,
980 const bool useTransfer);
981
982 const string m_name;
983 const ImageType m_imageType;
984 const tcu::UVec3 m_imageSize;
985 const TextureFormat m_format;
986 const VkImageTiling m_tiling;
987 const AtomicOperation m_operation;
988 const bool m_useTransfer;
989 const ShaderReadType m_readType;
990 const ImageBackingType m_backingType;
991
992 de::MovePtr<BufferWithMemory> m_inputBuffer;
993 de::MovePtr<BufferWithMemory> m_outputBuffer;
994 Move<VkBufferView> m_descResultBufferView;
995 Move<VkBufferView> m_descIntermResultsBufferView;
996 Move<VkDescriptorPool> m_descriptorPool;
997 Move<VkDescriptorSetLayout> m_descriptorSetLayout;
998 Move<VkDescriptorSet> m_descriptorSet;
999
1000 Move<VkDescriptorSetLayout> m_descriptorSetLayoutNoTransfer;
1001 Move<VkDescriptorPool> m_descriptorPoolNoTransfer;
1002
1003 de::MovePtr<Image> m_resultImage;
1004 Move<VkImageView> m_resultImageView;
1005
1006 std::vector<VkSemaphore> m_waitSemaphores;
1007 };
1008
BinaryAtomicInstanceBase(Context & context,const string & name,const ImageType imageType,const tcu::UVec3 & imageSize,const TextureFormat & format,const VkImageTiling tiling,const AtomicOperation operation,const bool useTransfer,const ShaderReadType shaderReadType,const ImageBackingType backingType)1009 BinaryAtomicInstanceBase::BinaryAtomicInstanceBase (Context& context,
1010 const string& name,
1011 const ImageType imageType,
1012 const tcu::UVec3& imageSize,
1013 const TextureFormat& format,
1014 const VkImageTiling tiling,
1015 const AtomicOperation operation,
1016 const bool useTransfer,
1017 const ShaderReadType shaderReadType,
1018 const ImageBackingType backingType)
1019 : vkt::TestInstance (context)
1020 , m_name (name)
1021 , m_imageType (imageType)
1022 , m_imageSize (imageSize)
1023 , m_format (format)
1024 , m_tiling (tiling)
1025 , m_operation (operation)
1026 , m_useTransfer (useTransfer)
1027 , m_readType (shaderReadType)
1028 , m_backingType (backingType)
1029 {
1030 }
1031
iterate(void)1032 tcu::TestStatus BinaryAtomicInstanceBase::iterate (void)
1033 {
1034 const VkDevice device = m_context.getDevice();
1035 const DeviceInterface& deviceInterface = m_context.getDeviceInterface();
1036 const VkQueue queue = m_context.getUniversalQueue();
1037 const deUint32 queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
1038 Allocator& allocator = m_context.getDefaultAllocator();
1039 const VkDeviceSize imageSizeInBytes = tcu::getPixelSize(m_format) * getNumPixels(m_imageType, m_imageSize);
1040 const VkDeviceSize outBuffSizeInBytes = getOutputBufferSize();
1041 const VkFormat imageFormat = mapTextureFormat(m_format);
1042 const bool isTexelBuffer = (m_imageType == IMAGE_TYPE_BUFFER);
1043
1044 if (!isTexelBuffer)
1045 {
1046 createImageResources(imageFormat, m_useTransfer);
1047 }
1048
1049 tcu::UVec3 gridSize = getShaderGridSize(m_imageType, m_imageSize);
1050
1051 //Prepare the buffer with the initial data for the image
1052 m_inputBuffer = de::MovePtr<BufferWithMemory>(new BufferWithMemory(deviceInterface,
1053 device,
1054 allocator,
1055 makeBufferCreateInfo(imageSizeInBytes,
1056 VK_BUFFER_USAGE_TRANSFER_SRC_BIT |
1057 VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
1058 (isTexelBuffer ? VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT : static_cast<VkBufferUsageFlagBits>(0u))),
1059 MemoryRequirement::HostVisible));
1060
1061 // Fill in buffer with initial data used for image.
1062 initDataForImage(device, deviceInterface, m_format, m_operation, gridSize, *m_inputBuffer);
1063
1064 // Create a buffer to store shader output copied from result image
1065 m_outputBuffer = de::MovePtr<BufferWithMemory>(new BufferWithMemory(deviceInterface,
1066 device,
1067 allocator,
1068 makeBufferCreateInfo(outBuffSizeInBytes,
1069 VK_BUFFER_USAGE_TRANSFER_DST_BIT |
1070 VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
1071 (isTexelBuffer ? VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT : static_cast<VkBufferUsageFlagBits>(0u))),
1072 MemoryRequirement::HostVisible));
1073
1074 if (!isTexelBuffer)
1075 {
1076 prepareResources(m_useTransfer);
1077 }
1078
1079 prepareDescriptors(isTexelBuffer);
1080
1081 Move<VkDescriptorSet> descriptorSetFillImage;
1082 Move<VkShaderModule> shaderModuleFillImage;
1083 Move<VkPipelineLayout> pipelineLayoutFillImage;
1084 Move<VkPipeline> pipelineFillImage;
1085
1086 Move<VkDescriptorSet> descriptorSetReadImage;
1087 Move<VkShaderModule> shaderModuleReadImage;
1088 Move<VkPipelineLayout> pipelineLayoutReadImage;
1089 Move<VkPipeline> pipelineReadImage;
1090
1091 if (!m_useTransfer)
1092 {
1093 m_descriptorSetLayoutNoTransfer =
1094 DescriptorSetLayoutBuilder()
1095 .addSingleBinding((isTexelBuffer ? VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER : VK_DESCRIPTOR_TYPE_STORAGE_IMAGE), VK_SHADER_STAGE_COMPUTE_BIT)
1096 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
1097 .build(deviceInterface, device);
1098
1099 m_descriptorPoolNoTransfer =
1100 DescriptorPoolBuilder()
1101 .addType((isTexelBuffer ? VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER : VK_DESCRIPTOR_TYPE_STORAGE_IMAGE), 2)
1102 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 2)
1103 .build(deviceInterface, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 2u);
1104
1105 descriptorSetFillImage = makeDescriptorSet(deviceInterface,
1106 device,
1107 *m_descriptorPoolNoTransfer,
1108 *m_descriptorSetLayoutNoTransfer);
1109
1110 descriptorSetReadImage = makeDescriptorSet(deviceInterface,
1111 device,
1112 *m_descriptorPoolNoTransfer,
1113 *m_descriptorSetLayoutNoTransfer);
1114
1115 shaderModuleFillImage = createShaderModule(deviceInterface, device, m_context.getBinaryCollection().get("fillShader"), 0);
1116 pipelineLayoutFillImage = makePipelineLayout(deviceInterface, device, *m_descriptorSetLayoutNoTransfer);
1117 pipelineFillImage = makeComputePipeline(deviceInterface, device, *pipelineLayoutFillImage, *shaderModuleFillImage);
1118
1119 if (m_readType == ShaderReadType::SPARSE)
1120 {
1121 shaderModuleReadImage = createShaderModule(deviceInterface, device, m_context.getBinaryCollection().get("readShaderResidency"), 0);
1122 }
1123 else
1124 {
1125 shaderModuleReadImage = createShaderModule(deviceInterface, device, m_context.getBinaryCollection().get("readShader"), 0);
1126 }
1127 pipelineLayoutReadImage = makePipelineLayout(deviceInterface, device, *m_descriptorSetLayoutNoTransfer);
1128 pipelineReadImage = makeComputePipeline(deviceInterface, device, *pipelineLayoutFillImage, *shaderModuleReadImage);
1129 }
1130
1131 // Create pipeline
1132 const Unique<VkShaderModule> shaderModule(createShaderModule(deviceInterface, device, m_context.getBinaryCollection().get(m_name), 0));
1133 const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(deviceInterface, device, *m_descriptorSetLayout));
1134 const Unique<VkPipeline> pipeline(makeComputePipeline(deviceInterface, device, *pipelineLayout, *shaderModule));
1135
1136 // Create command buffer
1137 const Unique<VkCommandPool> cmdPool(createCommandPool(deviceInterface, device, VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, queueFamilyIndex));
1138 const Unique<VkCommandBuffer> cmdBuffer(allocateCommandBuffer(deviceInterface, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
1139
1140 beginCommandBuffer(deviceInterface, *cmdBuffer);
1141
1142 if (!isTexelBuffer)
1143 {
1144 if (m_useTransfer)
1145 {
1146 const vector<VkBufferImageCopy> bufferImageCopy(1, makeBufferImageCopy(makeExtent3D(getLayerSize(m_imageType, m_imageSize)), getNumLayers(m_imageType, m_imageSize)));
1147 copyBufferToImage(deviceInterface,
1148 *cmdBuffer,
1149 *(*m_inputBuffer),
1150 imageSizeInBytes,
1151 bufferImageCopy,
1152 VK_IMAGE_ASPECT_COLOR_BIT,
1153 1,
1154 getNumLayers(m_imageType, m_imageSize), m_resultImage->get(), VK_IMAGE_LAYOUT_GENERAL, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT);
1155 }
1156 else
1157 {
1158 shaderFillImage(*cmdBuffer, *(*m_inputBuffer), *pipelineFillImage, *pipelineLayoutFillImage, *descriptorSetFillImage, imageSizeInBytes, gridSize);
1159 }
1160 commandsBeforeCompute(*cmdBuffer);
1161 }
1162
1163 deviceInterface.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
1164 deviceInterface.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &m_descriptorSet.get(), 0u, DE_NULL);
1165
1166 deviceInterface.cmdDispatch(*cmdBuffer, NUM_INVOCATIONS_PER_PIXEL * gridSize.x(), gridSize.y(), gridSize.z());
1167
1168 commandsAfterCompute(*cmdBuffer,
1169 *pipelineReadImage,
1170 *pipelineLayoutReadImage,
1171 *descriptorSetReadImage,
1172 outBuffSizeInBytes,
1173 m_useTransfer);
1174
1175 const VkBufferMemoryBarrier outputBufferPreHostReadBarrier
1176 = makeBufferMemoryBarrier(((m_useTransfer || isTexelBuffer) ? VK_ACCESS_TRANSFER_WRITE_BIT : VK_ACCESS_SHADER_WRITE_BIT),
1177 VK_ACCESS_HOST_READ_BIT,
1178 m_outputBuffer->get(),
1179 0ull,
1180 outBuffSizeInBytes);
1181
1182 deviceInterface.cmdPipelineBarrier(*cmdBuffer,
1183 ((m_useTransfer || isTexelBuffer) ? VK_PIPELINE_STAGE_TRANSFER_BIT : VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT),
1184 VK_PIPELINE_STAGE_HOST_BIT,
1185 DE_FALSE, 0u, DE_NULL,
1186 1u, &outputBufferPreHostReadBarrier, 0u, DE_NULL);
1187
1188 endCommandBuffer(deviceInterface, *cmdBuffer);
1189
1190 std::vector<VkPipelineStageFlags> waitStages(m_waitSemaphores.size(), VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT);
1191 submitCommandsAndWait(deviceInterface, device, queue, *cmdBuffer, false, 1u,
1192 static_cast<deUint32>(m_waitSemaphores.size()), de::dataOrNull(m_waitSemaphores), de::dataOrNull(waitStages));
1193
1194 Allocation& outputBufferAllocation = m_outputBuffer->getAllocation();
1195
1196 invalidateAlloc(deviceInterface, device, outputBufferAllocation);
1197
1198 if (verifyResult(outputBufferAllocation, (imageFormat == VK_FORMAT_R64_UINT || imageFormat == VK_FORMAT_R64_SINT)))
1199 return tcu::TestStatus::pass("Comparison succeeded");
1200 else
1201 return tcu::TestStatus::fail("Comparison failed");
1202 }
1203
shaderFillImage(const VkCommandBuffer cmdBuffer,const VkBuffer & buffer,const VkPipeline pipeline,const VkPipelineLayout pipelineLayout,const VkDescriptorSet descriptorSet,const VkDeviceSize & range,const tcu::UVec3 & gridSize)1204 void BinaryAtomicInstanceBase::shaderFillImage (const VkCommandBuffer cmdBuffer,
1205 const VkBuffer& buffer,
1206 const VkPipeline pipeline,
1207 const VkPipelineLayout pipelineLayout,
1208 const VkDescriptorSet descriptorSet,
1209 const VkDeviceSize& range,
1210 const tcu::UVec3& gridSize)
1211 {
1212 const VkDevice device = m_context.getDevice();
1213 const DeviceInterface& deviceInterface = m_context.getDeviceInterface();
1214 const VkDescriptorImageInfo descResultImageInfo = makeDescriptorImageInfo(DE_NULL, *m_resultImageView, VK_IMAGE_LAYOUT_GENERAL);
1215 const VkDescriptorBufferInfo descResultBufferInfo = makeDescriptorBufferInfo(buffer, 0, range);
1216 const VkImageSubresourceRange subresourceRange = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, getNumLayers(m_imageType, m_imageSize));
1217
1218 DescriptorSetUpdateBuilder()
1219 .writeSingle(descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &descResultImageInfo)
1220 .writeSingle(descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &descResultBufferInfo)
1221 .update(deviceInterface, device);
1222
1223 const VkImageMemoryBarrier imageBarrierPre = makeImageMemoryBarrier(0,
1224 VK_ACCESS_SHADER_WRITE_BIT,
1225 VK_IMAGE_LAYOUT_UNDEFINED,
1226 VK_IMAGE_LAYOUT_GENERAL,
1227 m_resultImage->get(),
1228 subresourceRange);
1229
1230 deviceInterface.cmdPipelineBarrier( cmdBuffer,
1231 VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
1232 VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
1233 (VkDependencyFlags)0,
1234 0, (const VkMemoryBarrier*)DE_NULL,
1235 0, (const VkBufferMemoryBarrier*)DE_NULL,
1236 1, &imageBarrierPre);
1237
1238 deviceInterface.cmdBindPipeline(cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
1239 deviceInterface.cmdBindDescriptorSets(cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipelineLayout, 0u, 1u, &descriptorSet, 0u, DE_NULL);
1240
1241 deviceInterface.cmdDispatch(cmdBuffer, gridSize.x(), gridSize.y(), gridSize.z());
1242
1243 const VkImageMemoryBarrier imageBarrierPost = makeImageMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT,
1244 VK_ACCESS_SHADER_READ_BIT,
1245 VK_IMAGE_LAYOUT_GENERAL,
1246 VK_IMAGE_LAYOUT_GENERAL,
1247 m_resultImage->get(),
1248 subresourceRange);
1249
1250 deviceInterface.cmdPipelineBarrier( cmdBuffer,
1251 VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
1252 VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
1253 (VkDependencyFlags)0,
1254 0, (const VkMemoryBarrier*)DE_NULL,
1255 0, (const VkBufferMemoryBarrier*)DE_NULL,
1256 1, &imageBarrierPost);
1257 }
1258
createImageAndView(VkFormat imageFormat,const tcu::UVec3 & imageExent,bool useTransfer,de::MovePtr<Image> & imagePtr,Move<VkImageView> & imageViewPtr)1259 void BinaryAtomicInstanceBase::createImageAndView (VkFormat imageFormat,
1260 const tcu::UVec3& imageExent,
1261 bool useTransfer,
1262 de::MovePtr<Image>& imagePtr,
1263 Move<VkImageView>& imageViewPtr)
1264 {
1265 const VkDevice device = m_context.getDevice();
1266 const DeviceInterface& deviceInterface = m_context.getDeviceInterface();
1267 Allocator& allocator = m_context.getDefaultAllocator();
1268 const VkImageUsageFlags usageFlags = getUsageFlags(useTransfer);
1269 VkImageCreateFlags createFlags = 0u;
1270
1271 if (m_imageType == IMAGE_TYPE_CUBE || m_imageType == IMAGE_TYPE_CUBE_ARRAY)
1272 createFlags |= VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT;
1273
1274 const auto numLayers = getNumLayers(m_imageType, m_imageSize);
1275
1276 VkImageCreateInfo createInfo =
1277 {
1278 VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, // VkStructureType sType;
1279 DE_NULL, // const void* pNext;
1280 createFlags, // VkImageCreateFlags flags;
1281 mapImageType(m_imageType), // VkImageType imageType;
1282 imageFormat, // VkFormat format;
1283 makeExtent3D(imageExent), // VkExtent3D extent;
1284 1u, // deUint32 mipLevels;
1285 numLayers, // deUint32 arrayLayers;
1286 VK_SAMPLE_COUNT_1_BIT, // VkSampleCountFlagBits samples;
1287 m_tiling, // VkImageTiling tiling;
1288 usageFlags, // VkImageUsageFlags usage;
1289 VK_SHARING_MODE_EXCLUSIVE, // VkSharingMode sharingMode;
1290 0u, // deUint32 queueFamilyIndexCount;
1291 DE_NULL, // const deUint32* pQueueFamilyIndices;
1292 VK_IMAGE_LAYOUT_UNDEFINED, // VkImageLayout initialLayout;
1293 };
1294
1295 #ifndef CTS_USES_VULKANSC
1296 if (m_backingType == ImageBackingType::SPARSE)
1297 {
1298 const auto& vki = m_context.getInstanceInterface();
1299 const auto physicalDevice = m_context.getPhysicalDevice();
1300 const auto sparseQueue = m_context.getSparseQueue();
1301 const auto sparseQueueIdx = m_context.getSparseQueueFamilyIndex();
1302 const auto universalQIdx = m_context.getUniversalQueueFamilyIndex();
1303 const deUint32 queueIndices[] = { universalQIdx, sparseQueueIdx };
1304
1305 createInfo.flags |= (VK_IMAGE_CREATE_SPARSE_BINDING_BIT | VK_IMAGE_CREATE_SPARSE_RESIDENCY_BIT);
1306
1307 if (sparseQueueIdx != universalQIdx)
1308 {
1309 createInfo.sharingMode = VK_SHARING_MODE_CONCURRENT;
1310 createInfo.queueFamilyIndexCount = static_cast<deUint32>(DE_LENGTH_OF_ARRAY(queueIndices));
1311 createInfo.pQueueFamilyIndices = queueIndices;
1312 }
1313
1314 const auto sparseImage = new SparseImage(deviceInterface, device, physicalDevice, vki, createInfo, sparseQueue, allocator, m_format);
1315 m_waitSemaphores.push_back(sparseImage->getSemaphore());
1316 imagePtr = de::MovePtr<Image>(sparseImage);
1317 }
1318 else
1319 #endif // CTS_USES_VULKANSC
1320 imagePtr = de::MovePtr<Image>(new Image(deviceInterface, device, allocator, createInfo, MemoryRequirement::Any));
1321
1322 const VkImageSubresourceRange subresourceRange = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, numLayers);
1323
1324 imageViewPtr = makeImageView(deviceInterface, device, imagePtr->get(), mapImageViewType(m_imageType), imageFormat, subresourceRange);
1325 }
1326
createImageResources(const VkFormat & imageFormat,const bool useTransfer)1327 void BinaryAtomicInstanceBase::createImageResources (const VkFormat& imageFormat,
1328 const bool useTransfer)
1329 {
1330 //Create the image that is going to store results of atomic operations
1331 createImageAndView(imageFormat, getLayerSize(m_imageType, m_imageSize), useTransfer, m_resultImage, m_resultImageView);
1332 }
1333
1334 class BinaryAtomicEndResultInstance : public BinaryAtomicInstanceBase
1335 {
1336 public:
1337
BinaryAtomicEndResultInstance(Context & context,const string & name,const ImageType imageType,const tcu::UVec3 & imageSize,const TextureFormat & format,const VkImageTiling tiling,const AtomicOperation operation,const bool useTransfer,const ShaderReadType shaderReadType,const ImageBackingType backingType)1338 BinaryAtomicEndResultInstance (Context& context,
1339 const string& name,
1340 const ImageType imageType,
1341 const tcu::UVec3& imageSize,
1342 const TextureFormat& format,
1343 const VkImageTiling tiling,
1344 const AtomicOperation operation,
1345 const bool useTransfer,
1346 const ShaderReadType shaderReadType,
1347 const ImageBackingType backingType)
1348 : BinaryAtomicInstanceBase(context, name, imageType, imageSize, format, tiling, operation, useTransfer, shaderReadType, backingType) {}
1349
1350 virtual deUint32 getOutputBufferSize (void) const;
1351
prepareResources(const bool useTransfer)1352 virtual void prepareResources (const bool useTransfer) { DE_UNREF(useTransfer); }
1353 virtual void prepareDescriptors (const bool isTexelBuffer);
1354
commandsBeforeCompute(const VkCommandBuffer) const1355 virtual void commandsBeforeCompute (const VkCommandBuffer) const {}
1356 virtual void commandsAfterCompute (const VkCommandBuffer cmdBuffer,
1357 const VkPipeline pipeline,
1358 const VkPipelineLayout pipelineLayout,
1359 const VkDescriptorSet descriptorSet,
1360 const VkDeviceSize& range,
1361 const bool useTransfer);
1362
1363 virtual bool verifyResult (Allocation& outputBufferAllocation,
1364 const bool is64Bit) const;
1365
1366 protected:
1367
1368 template <typename T>
1369 bool isValueCorrect (const T resultValue,
1370 deInt32 x,
1371 deInt32 y,
1372 deInt32 z,
1373 const UVec3& gridSize,
1374 const IVec3 extendedGridSize) const;
1375 };
1376
getOutputBufferSize(void) const1377 deUint32 BinaryAtomicEndResultInstance::getOutputBufferSize (void) const
1378 {
1379 return tcu::getPixelSize(m_format) * getNumPixels(m_imageType, m_imageSize);
1380 }
1381
prepareDescriptors(const bool isTexelBuffer)1382 void BinaryAtomicEndResultInstance::prepareDescriptors (const bool isTexelBuffer)
1383 {
1384 const VkDescriptorType descriptorType = isTexelBuffer ?
1385 VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER :
1386 VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
1387 const VkDevice device = m_context.getDevice();
1388 const DeviceInterface& deviceInterface = m_context.getDeviceInterface();
1389
1390 m_descriptorSetLayout =
1391 DescriptorSetLayoutBuilder()
1392 .addSingleBinding(descriptorType, VK_SHADER_STAGE_COMPUTE_BIT)
1393 .build(deviceInterface, device);
1394
1395 m_descriptorPool =
1396 DescriptorPoolBuilder()
1397 .addType(descriptorType)
1398 .build(deviceInterface, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
1399
1400 m_descriptorSet = makeDescriptorSet(deviceInterface, device, *m_descriptorPool, *m_descriptorSetLayout);
1401
1402 if (isTexelBuffer)
1403 {
1404 m_descResultBufferView = makeBufferView(deviceInterface, device, *(*m_inputBuffer), mapTextureFormat(m_format), 0, VK_WHOLE_SIZE);
1405
1406 DescriptorSetUpdateBuilder()
1407 .writeSingle(*m_descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), descriptorType, &(m_descResultBufferView.get()))
1408 .update(deviceInterface, device);
1409 }
1410 else
1411 {
1412 const VkDescriptorImageInfo descResultImageInfo = makeDescriptorImageInfo(DE_NULL, *m_resultImageView, VK_IMAGE_LAYOUT_GENERAL);
1413
1414 DescriptorSetUpdateBuilder()
1415 .writeSingle(*m_descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), descriptorType, &descResultImageInfo)
1416 .update(deviceInterface, device);
1417 }
1418 }
1419
commandsAfterCompute(const VkCommandBuffer cmdBuffer,const VkPipeline pipeline,const VkPipelineLayout pipelineLayout,const VkDescriptorSet descriptorSet,const VkDeviceSize & range,const bool useTransfer)1420 void BinaryAtomicEndResultInstance::commandsAfterCompute (const VkCommandBuffer cmdBuffer,
1421 const VkPipeline pipeline,
1422 const VkPipelineLayout pipelineLayout,
1423 const VkDescriptorSet descriptorSet,
1424 const VkDeviceSize& range,
1425 const bool useTransfer)
1426 {
1427 const DeviceInterface& deviceInterface = m_context.getDeviceInterface();
1428 const VkImageSubresourceRange subresourceRange = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, getNumLayers(m_imageType, m_imageSize));
1429 const UVec3 layerSize = getLayerSize(m_imageType, m_imageSize);
1430
1431 if (m_imageType == IMAGE_TYPE_BUFFER)
1432 {
1433 m_outputBuffer = m_inputBuffer;
1434 }
1435 else if (useTransfer)
1436 {
1437 const VkImageMemoryBarrier resultImagePostDispatchBarrier =
1438 makeImageMemoryBarrier( VK_ACCESS_SHADER_WRITE_BIT,
1439 VK_ACCESS_TRANSFER_READ_BIT,
1440 VK_IMAGE_LAYOUT_GENERAL,
1441 VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
1442 m_resultImage->get(),
1443 subresourceRange);
1444
1445 deviceInterface.cmdPipelineBarrier( cmdBuffer,
1446 VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
1447 VK_PIPELINE_STAGE_TRANSFER_BIT,
1448 DE_FALSE, 0u, DE_NULL, 0u, DE_NULL,
1449 1u, &resultImagePostDispatchBarrier);
1450
1451 const VkBufferImageCopy bufferImageCopyParams = makeBufferImageCopy(makeExtent3D(layerSize), getNumLayers(m_imageType, m_imageSize));
1452
1453 deviceInterface.cmdCopyImageToBuffer(cmdBuffer, m_resultImage->get(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, m_outputBuffer->get(), 1u, &bufferImageCopyParams);
1454 }
1455 else
1456 {
1457 const VkDevice device = m_context.getDevice();
1458 const VkDescriptorImageInfo descResultImageInfo = makeDescriptorImageInfo(DE_NULL, *m_resultImageView, VK_IMAGE_LAYOUT_GENERAL);
1459 const VkDescriptorBufferInfo descResultBufferInfo = makeDescriptorBufferInfo(m_outputBuffer->get(), 0, range);
1460
1461 DescriptorSetUpdateBuilder()
1462 .writeSingle(descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &descResultImageInfo)
1463 .writeSingle(descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &descResultBufferInfo)
1464 .update(deviceInterface, device);
1465
1466 const VkImageMemoryBarrier resultImagePostDispatchBarrier =
1467 makeImageMemoryBarrier( VK_ACCESS_SHADER_WRITE_BIT,
1468 VK_ACCESS_SHADER_READ_BIT,
1469 VK_IMAGE_LAYOUT_GENERAL,
1470 VK_IMAGE_LAYOUT_GENERAL,
1471 m_resultImage->get(),
1472 subresourceRange);
1473
1474 deviceInterface.cmdPipelineBarrier( cmdBuffer,
1475 VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
1476 VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
1477 DE_FALSE, 0u, DE_NULL, 0u, DE_NULL,
1478 1u, &resultImagePostDispatchBarrier);
1479
1480 deviceInterface.cmdBindPipeline(cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
1481 deviceInterface.cmdBindDescriptorSets(cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipelineLayout, 0u, 1u, &descriptorSet, 0u, DE_NULL);
1482
1483 switch (m_imageType)
1484 {
1485 case IMAGE_TYPE_1D_ARRAY:
1486 deviceInterface.cmdDispatch(cmdBuffer, layerSize.x(), subresourceRange.layerCount, layerSize.z());
1487 break;
1488 case IMAGE_TYPE_2D_ARRAY:
1489 case IMAGE_TYPE_CUBE:
1490 case IMAGE_TYPE_CUBE_ARRAY:
1491 deviceInterface.cmdDispatch(cmdBuffer, layerSize.x(), layerSize.y(), subresourceRange.layerCount);
1492 break;
1493 default:
1494 deviceInterface.cmdDispatch(cmdBuffer, layerSize.x(), layerSize.y(), layerSize.z());
1495 break;
1496 }
1497 }
1498 }
1499
verifyResult(Allocation & outputBufferAllocation,const bool is64Bit) const1500 bool BinaryAtomicEndResultInstance::verifyResult (Allocation& outputBufferAllocation,
1501 const bool is64Bit) const
1502 {
1503 const UVec3 gridSize = getShaderGridSize(m_imageType, m_imageSize);
1504 const IVec3 extendedGridSize = IVec3(NUM_INVOCATIONS_PER_PIXEL*gridSize.x(), gridSize.y(), gridSize.z());
1505
1506 tcu::ConstPixelBufferAccess resultBuffer(m_format, gridSize.x(), gridSize.y(), gridSize.z(), outputBufferAllocation.getHostPtr());
1507
1508 for (deInt32 z = 0; z < resultBuffer.getDepth(); z++)
1509 for (deInt32 y = 0; y < resultBuffer.getHeight(); y++)
1510 for (deInt32 x = 0; x < resultBuffer.getWidth(); x++)
1511 {
1512 const void* resultValue = resultBuffer.getPixelPtr(x, y, z);
1513 deInt32 floatToIntValue = 0;
1514 bool isFloatValue = false;
1515 if (isFloatFormat(mapTextureFormat(m_format)))
1516 {
1517 isFloatValue = true;
1518 floatToIntValue = static_cast<deInt32>(*((float*)resultValue));
1519 }
1520
1521 if (isOrderIndependentAtomicOperation(m_operation))
1522 {
1523 if (isUintFormat(mapTextureFormat(m_format)))
1524 {
1525 if(is64Bit)
1526 {
1527 if (!isValueCorrect<deUint64>(*((deUint64*)resultValue), x, y, z, gridSize, extendedGridSize))
1528 return false;
1529 }
1530 else
1531 {
1532 if (!isValueCorrect<deUint32>(*((deUint32*)resultValue), x, y, z, gridSize, extendedGridSize))
1533 return false;
1534 }
1535 }
1536 else if (isIntFormat(mapTextureFormat(m_format)))
1537 {
1538 if (is64Bit)
1539 {
1540 if (!isValueCorrect<deInt64>(*((deInt64*)resultValue), x, y, z, gridSize, extendedGridSize))
1541 return false;
1542 }
1543 else
1544 {
1545 if (!isValueCorrect<deInt32>(*((deInt32*)resultValue), x, y, z, gridSize, extendedGridSize))
1546 return false;
1547 }
1548 }
1549 else
1550 {
1551 // 32-bit floating point
1552 if (!isValueCorrect<deInt32>(floatToIntValue, x, y, z, gridSize, extendedGridSize))
1553 return false;
1554 }
1555 }
1556 else if (m_operation == ATOMIC_OPERATION_EXCHANGE)
1557 {
1558 // Check if the end result equals one of the atomic args.
1559 bool matchFound = false;
1560
1561 for (deInt32 i = 0; i < static_cast<deInt32>(NUM_INVOCATIONS_PER_PIXEL) && !matchFound; i++)
1562 {
1563 const IVec3 gid(x + i*gridSize.x(), y, z);
1564 matchFound = is64Bit ?
1565 (*((deInt64*)resultValue) == getAtomicFuncArgument<deInt64>(m_operation, gid, extendedGridSize)) :
1566 isFloatValue ?
1567 floatToIntValue == getAtomicFuncArgument<deInt32>(m_operation, gid, extendedGridSize) :
1568 (*((deInt32*)resultValue) == getAtomicFuncArgument<deInt32>(m_operation, gid, extendedGridSize));
1569
1570 }
1571
1572 if (!matchFound)
1573 return false;
1574 }
1575 else if (m_operation == ATOMIC_OPERATION_COMPARE_EXCHANGE)
1576 {
1577 // Check if the end result equals one of the atomic args.
1578 bool matchFound = false;
1579
1580 for (deInt32 i = 0; i < static_cast<deInt32>(NUM_INVOCATIONS_PER_PIXEL) && !matchFound; i++)
1581 {
1582 const IVec3 gid(x + i*gridSize.x(), y, z);
1583 matchFound = is64Bit ?
1584 (*((deInt64*)resultValue) == getAtomicFuncArgument<deInt64>(m_operation, gid, extendedGridSize)) :
1585 isFloatValue ?
1586 floatToIntValue == getAtomicFuncArgument<deInt32>(m_operation, gid, extendedGridSize) :
1587 (*((deInt32*)resultValue) == getAtomicFuncArgument<deInt32>(m_operation, gid, extendedGridSize));
1588 }
1589
1590 if (!matchFound)
1591 return false;
1592 }
1593 else
1594 DE_ASSERT(false);
1595 }
1596 return true;
1597 }
1598
1599 template <typename T>
isValueCorrect(const T resultValue,deInt32 x,deInt32 y,deInt32 z,const UVec3 & gridSize,const IVec3 extendedGridSize) const1600 bool BinaryAtomicEndResultInstance::isValueCorrect(const T resultValue, deInt32 x, deInt32 y, deInt32 z, const UVec3& gridSize, const IVec3 extendedGridSize) const
1601 {
1602 T reference = getOperationInitialValue<T>(m_operation);
1603 for (deInt32 i = 0; i < static_cast<deInt32>(NUM_INVOCATIONS_PER_PIXEL); i++)
1604 {
1605 const IVec3 gid(x + i*gridSize.x(), y, z);
1606 T arg = getAtomicFuncArgument<T>(m_operation, gid, extendedGridSize);
1607 reference = computeBinaryAtomicOperationResult(m_operation, reference, arg);
1608 }
1609 return (resultValue == reference);
1610 }
1611
createInstance(Context & context) const1612 TestInstance* BinaryAtomicEndResultCase::createInstance (Context& context) const
1613 {
1614 return new BinaryAtomicEndResultInstance(context, m_name, m_imageType, m_imageSize, m_format, m_tiling, m_operation, m_useTransfer, m_readType, m_backingType);
1615 }
1616
1617 class BinaryAtomicIntermValuesInstance : public BinaryAtomicInstanceBase
1618 {
1619 public:
1620
BinaryAtomicIntermValuesInstance(Context & context,const string & name,const ImageType imageType,const tcu::UVec3 & imageSize,const TextureFormat & format,const VkImageTiling tiling,const AtomicOperation operation,const bool useTransfer,const ShaderReadType shaderReadType,const ImageBackingType backingType)1621 BinaryAtomicIntermValuesInstance (Context& context,
1622 const string& name,
1623 const ImageType imageType,
1624 const tcu::UVec3& imageSize,
1625 const TextureFormat& format,
1626 const VkImageTiling tiling,
1627 const AtomicOperation operation,
1628 const bool useTransfer,
1629 const ShaderReadType shaderReadType,
1630 const ImageBackingType backingType)
1631 : BinaryAtomicInstanceBase(context, name, imageType, imageSize, format, tiling, operation, useTransfer, shaderReadType, backingType) {}
1632
1633 virtual deUint32 getOutputBufferSize (void) const;
1634
1635 virtual void prepareResources (const bool useTransfer);
1636 virtual void prepareDescriptors (const bool isTexelBuffer);
1637
1638 virtual void commandsBeforeCompute (const VkCommandBuffer cmdBuffer) const;
1639 virtual void commandsAfterCompute (const VkCommandBuffer cmdBuffer,
1640 const VkPipeline pipeline,
1641 const VkPipelineLayout pipelineLayout,
1642 const VkDescriptorSet descriptorSet,
1643 const VkDeviceSize& range,
1644 const bool useTransfer);
1645
1646 virtual bool verifyResult (Allocation& outputBufferAllocation,
1647 const bool is64Bit) const;
1648
1649 protected:
1650
1651 template <typename T>
1652 bool areValuesCorrect (tcu::ConstPixelBufferAccess& resultBuffer,
1653 const bool isFloatingPoint,
1654 deInt32 x,
1655 deInt32 y,
1656 deInt32 z,
1657 const UVec3& gridSize,
1658 const IVec3 extendedGridSize) const;
1659
1660 template <typename T>
1661 bool verifyRecursive (const deInt32 index,
1662 const T valueSoFar,
1663 bool argsUsed[NUM_INVOCATIONS_PER_PIXEL],
1664 const T atomicArgs[NUM_INVOCATIONS_PER_PIXEL],
1665 const T resultValues[NUM_INVOCATIONS_PER_PIXEL]) const;
1666 de::MovePtr<Image> m_intermResultsImage;
1667 Move<VkImageView> m_intermResultsImageView;
1668 };
1669
getOutputBufferSize(void) const1670 deUint32 BinaryAtomicIntermValuesInstance::getOutputBufferSize (void) const
1671 {
1672 return NUM_INVOCATIONS_PER_PIXEL * tcu::getPixelSize(m_format) * getNumPixels(m_imageType, m_imageSize);
1673 }
1674
prepareResources(const bool useTransfer)1675 void BinaryAtomicIntermValuesInstance::prepareResources (const bool useTransfer)
1676 {
1677 const UVec3 layerSize = getLayerSize(m_imageType, m_imageSize);
1678 const bool isCubeBasedImage = (m_imageType == IMAGE_TYPE_CUBE || m_imageType == IMAGE_TYPE_CUBE_ARRAY);
1679 const UVec3 extendedLayerSize = isCubeBasedImage ? UVec3(NUM_INVOCATIONS_PER_PIXEL * layerSize.x(), NUM_INVOCATIONS_PER_PIXEL * layerSize.y(), layerSize.z())
1680 : UVec3(NUM_INVOCATIONS_PER_PIXEL * layerSize.x(), layerSize.y(), layerSize.z());
1681
1682 createImageAndView(mapTextureFormat(m_format), extendedLayerSize, useTransfer, m_intermResultsImage, m_intermResultsImageView);
1683 }
1684
prepareDescriptors(const bool isTexelBuffer)1685 void BinaryAtomicIntermValuesInstance::prepareDescriptors (const bool isTexelBuffer)
1686 {
1687 const VkDescriptorType descriptorType = isTexelBuffer ?
1688 VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER :
1689 VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
1690
1691 const VkDevice device = m_context.getDevice();
1692 const DeviceInterface& deviceInterface = m_context.getDeviceInterface();
1693
1694 m_descriptorSetLayout =
1695 DescriptorSetLayoutBuilder()
1696 .addSingleBinding(descriptorType, VK_SHADER_STAGE_COMPUTE_BIT)
1697 .addSingleBinding(descriptorType, VK_SHADER_STAGE_COMPUTE_BIT)
1698 .build(deviceInterface, device);
1699
1700 m_descriptorPool =
1701 DescriptorPoolBuilder()
1702 .addType(descriptorType, 2u)
1703 .build(deviceInterface, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
1704
1705 m_descriptorSet = makeDescriptorSet(deviceInterface, device, *m_descriptorPool, *m_descriptorSetLayout);
1706
1707 if (isTexelBuffer)
1708 {
1709 m_descResultBufferView = makeBufferView(deviceInterface, device, *(*m_inputBuffer), mapTextureFormat(m_format), 0, VK_WHOLE_SIZE);
1710 m_descIntermResultsBufferView = makeBufferView(deviceInterface, device, *(*m_outputBuffer), mapTextureFormat(m_format), 0, VK_WHOLE_SIZE);
1711
1712 DescriptorSetUpdateBuilder()
1713 .writeSingle(*m_descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), descriptorType, &(m_descResultBufferView.get()))
1714 .writeSingle(*m_descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), descriptorType, &(m_descIntermResultsBufferView.get()))
1715 .update(deviceInterface, device);
1716 }
1717 else
1718 {
1719 const VkDescriptorImageInfo descResultImageInfo = makeDescriptorImageInfo(DE_NULL, *m_resultImageView, VK_IMAGE_LAYOUT_GENERAL);
1720 const VkDescriptorImageInfo descIntermResultsImageInfo = makeDescriptorImageInfo(DE_NULL, *m_intermResultsImageView, VK_IMAGE_LAYOUT_GENERAL);
1721
1722 DescriptorSetUpdateBuilder()
1723 .writeSingle(*m_descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), descriptorType, &descResultImageInfo)
1724 .writeSingle(*m_descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), descriptorType, &descIntermResultsImageInfo)
1725 .update(deviceInterface, device);
1726 }
1727 }
1728
commandsBeforeCompute(const VkCommandBuffer cmdBuffer) const1729 void BinaryAtomicIntermValuesInstance::commandsBeforeCompute (const VkCommandBuffer cmdBuffer) const
1730 {
1731 const DeviceInterface& deviceInterface = m_context.getDeviceInterface();
1732 const VkImageSubresourceRange subresourceRange = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, getNumLayers(m_imageType, m_imageSize));
1733
1734 const VkImageMemoryBarrier imagePreDispatchBarrier =
1735 makeImageMemoryBarrier( 0u,
1736 VK_ACCESS_SHADER_WRITE_BIT,
1737 VK_IMAGE_LAYOUT_UNDEFINED,
1738 VK_IMAGE_LAYOUT_GENERAL,
1739 m_intermResultsImage->get(),
1740 subresourceRange);
1741
1742 deviceInterface.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, DE_FALSE, 0u, DE_NULL, 0u, DE_NULL, 1u, &imagePreDispatchBarrier);
1743 }
1744
commandsAfterCompute(const VkCommandBuffer cmdBuffer,const VkPipeline pipeline,const VkPipelineLayout pipelineLayout,const VkDescriptorSet descriptorSet,const VkDeviceSize & range,const bool useTransfer)1745 void BinaryAtomicIntermValuesInstance::commandsAfterCompute (const VkCommandBuffer cmdBuffer,
1746 const VkPipeline pipeline,
1747 const VkPipelineLayout pipelineLayout,
1748 const VkDescriptorSet descriptorSet,
1749 const VkDeviceSize& range,
1750 const bool useTransfer)
1751 {
1752 // nothing is needed for texel image buffer
1753 if (m_imageType == IMAGE_TYPE_BUFFER)
1754 return;
1755
1756 const DeviceInterface& deviceInterface = m_context.getDeviceInterface();
1757 const VkImageSubresourceRange subresourceRange = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, getNumLayers(m_imageType, m_imageSize));
1758 const UVec3 layerSize = getLayerSize(m_imageType, m_imageSize);
1759
1760 if (useTransfer)
1761 {
1762 const VkImageMemoryBarrier imagePostDispatchBarrier =
1763 makeImageMemoryBarrier( VK_ACCESS_SHADER_WRITE_BIT,
1764 VK_ACCESS_TRANSFER_READ_BIT,
1765 VK_IMAGE_LAYOUT_GENERAL,
1766 VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
1767 m_intermResultsImage->get(),
1768 subresourceRange);
1769
1770 deviceInterface.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, DE_FALSE, 0u, DE_NULL, 0u, DE_NULL, 1u, &imagePostDispatchBarrier);
1771
1772 const UVec3 extendedLayerSize = UVec3(NUM_INVOCATIONS_PER_PIXEL * layerSize.x(), layerSize.y(), layerSize.z());
1773 const VkBufferImageCopy bufferImageCopyParams = makeBufferImageCopy(makeExtent3D(extendedLayerSize), getNumLayers(m_imageType, m_imageSize));
1774
1775 deviceInterface.cmdCopyImageToBuffer(cmdBuffer, m_intermResultsImage->get(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, m_outputBuffer->get(), 1u, &bufferImageCopyParams);
1776 }
1777 else
1778 {
1779 const VkDevice device = m_context.getDevice();
1780 const VkDescriptorImageInfo descResultImageInfo = makeDescriptorImageInfo(DE_NULL, *m_intermResultsImageView, VK_IMAGE_LAYOUT_GENERAL);
1781 const VkDescriptorBufferInfo descResultBufferInfo = makeDescriptorBufferInfo(m_outputBuffer->get(), 0, range);
1782
1783 DescriptorSetUpdateBuilder()
1784 .writeSingle(descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &descResultImageInfo)
1785 .writeSingle(descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &descResultBufferInfo)
1786 .update(deviceInterface, device);
1787
1788 const VkImageMemoryBarrier resultImagePostDispatchBarrier =
1789 makeImageMemoryBarrier( VK_ACCESS_SHADER_WRITE_BIT,
1790 VK_ACCESS_SHADER_READ_BIT,
1791 VK_IMAGE_LAYOUT_GENERAL,
1792 VK_IMAGE_LAYOUT_GENERAL,
1793 m_intermResultsImage->get(),
1794 subresourceRange);
1795
1796 deviceInterface.cmdPipelineBarrier( cmdBuffer,
1797 VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
1798 VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
1799 DE_FALSE, 0u, DE_NULL, 0u, DE_NULL,
1800 1u, &resultImagePostDispatchBarrier);
1801
1802 deviceInterface.cmdBindPipeline(cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
1803 deviceInterface.cmdBindDescriptorSets(cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipelineLayout, 0u, 1u, &descriptorSet, 0u, DE_NULL);
1804
1805 switch (m_imageType)
1806 {
1807 case IMAGE_TYPE_1D_ARRAY:
1808 deviceInterface.cmdDispatch(cmdBuffer, NUM_INVOCATIONS_PER_PIXEL * layerSize.x(), subresourceRange.layerCount, layerSize.z());
1809 break;
1810 case IMAGE_TYPE_2D_ARRAY:
1811 case IMAGE_TYPE_CUBE:
1812 case IMAGE_TYPE_CUBE_ARRAY:
1813 deviceInterface.cmdDispatch(cmdBuffer, NUM_INVOCATIONS_PER_PIXEL * layerSize.x(), layerSize.y(), subresourceRange.layerCount);
1814 break;
1815 default:
1816 deviceInterface.cmdDispatch(cmdBuffer, NUM_INVOCATIONS_PER_PIXEL * layerSize.x(), layerSize.y(), layerSize.z());
1817 break;
1818 }
1819 }
1820 }
1821
verifyResult(Allocation & outputBufferAllocation,const bool is64Bit) const1822 bool BinaryAtomicIntermValuesInstance::verifyResult (Allocation& outputBufferAllocation,
1823 const bool is64Bit) const
1824 {
1825 const UVec3 gridSize = getShaderGridSize(m_imageType, m_imageSize);
1826 const IVec3 extendedGridSize = IVec3(NUM_INVOCATIONS_PER_PIXEL*gridSize.x(), gridSize.y(), gridSize.z());
1827
1828 tcu::ConstPixelBufferAccess resultBuffer(m_format, extendedGridSize.x(), extendedGridSize.y(), extendedGridSize.z(), outputBufferAllocation.getHostPtr());
1829
1830 for (deInt32 z = 0; z < resultBuffer.getDepth(); z++)
1831 for (deInt32 y = 0; y < resultBuffer.getHeight(); y++)
1832 for (deUint32 x = 0; x < gridSize.x(); x++)
1833 {
1834 if (isUintFormat(mapTextureFormat(m_format)))
1835 {
1836 if (is64Bit)
1837 {
1838 if (!areValuesCorrect<deUint64>(resultBuffer, false, x, y, z, gridSize, extendedGridSize))
1839 return false;
1840 }
1841 else
1842 {
1843 if (!areValuesCorrect<deUint32>(resultBuffer, false, x, y, z, gridSize, extendedGridSize))
1844 return false;
1845 }
1846 }
1847 else if (isIntFormat(mapTextureFormat(m_format)))
1848 {
1849 if (is64Bit)
1850 {
1851 if (!areValuesCorrect<deInt64>(resultBuffer, false, x, y, z, gridSize, extendedGridSize))
1852 return false;
1853 }
1854 else
1855 {
1856 if (!areValuesCorrect<deInt32>(resultBuffer, false, x, y, z, gridSize, extendedGridSize))
1857 return false;
1858 }
1859 }
1860 else
1861 {
1862 // 32-bit floating point
1863 if (!areValuesCorrect<deInt32>(resultBuffer, true, x, y, z, gridSize, extendedGridSize))
1864 return false;
1865 }
1866 }
1867
1868 return true;
1869 }
1870
1871 template <typename T>
areValuesCorrect(tcu::ConstPixelBufferAccess & resultBuffer,const bool isFloatingPoint,deInt32 x,deInt32 y,deInt32 z,const UVec3 & gridSize,const IVec3 extendedGridSize) const1872 bool BinaryAtomicIntermValuesInstance::areValuesCorrect(tcu::ConstPixelBufferAccess& resultBuffer, const bool isFloatingPoint, deInt32 x, deInt32 y, deInt32 z, const UVec3& gridSize, const IVec3 extendedGridSize) const
1873 {
1874 T resultValues[NUM_INVOCATIONS_PER_PIXEL];
1875 T atomicArgs[NUM_INVOCATIONS_PER_PIXEL];
1876 bool argsUsed[NUM_INVOCATIONS_PER_PIXEL];
1877
1878 for (deInt32 i = 0; i < static_cast<deInt32>(NUM_INVOCATIONS_PER_PIXEL); i++)
1879 {
1880 IVec3 gid(x + i*gridSize.x(), y, z);
1881 T data = *((T*)resultBuffer.getPixelPtr(gid.x(), gid.y(), gid.z()));
1882 if (isFloatingPoint)
1883 {
1884 float fData;
1885 deMemcpy(&fData, &data, sizeof(fData));
1886 data = static_cast<T>(fData);
1887 }
1888 resultValues[i] = data;
1889 atomicArgs[i] = getAtomicFuncArgument<T>(m_operation, gid, extendedGridSize);
1890 argsUsed[i] = false;
1891 }
1892
1893 // Verify that the return values form a valid sequence.
1894 return verifyRecursive(0, getOperationInitialValue<T>(m_operation), argsUsed, atomicArgs, resultValues);
1895 }
1896
1897 template <typename T>
verifyRecursive(const deInt32 index,const T valueSoFar,bool argsUsed[NUM_INVOCATIONS_PER_PIXEL],const T atomicArgs[NUM_INVOCATIONS_PER_PIXEL],const T resultValues[NUM_INVOCATIONS_PER_PIXEL]) const1898 bool BinaryAtomicIntermValuesInstance::verifyRecursive (const deInt32 index,
1899 const T valueSoFar,
1900 bool argsUsed[NUM_INVOCATIONS_PER_PIXEL],
1901 const T atomicArgs[NUM_INVOCATIONS_PER_PIXEL],
1902 const T resultValues[NUM_INVOCATIONS_PER_PIXEL]) const
1903 {
1904 if (index >= static_cast<deInt32>(NUM_INVOCATIONS_PER_PIXEL))
1905 return true;
1906
1907 for (deInt32 i = 0; i < static_cast<deInt32>(NUM_INVOCATIONS_PER_PIXEL); i++)
1908 {
1909 if (!argsUsed[i] && resultValues[i] == valueSoFar)
1910 {
1911 argsUsed[i] = true;
1912
1913 if (verifyRecursive(index + 1, computeBinaryAtomicOperationResult(m_operation, valueSoFar, atomicArgs[i]), argsUsed, atomicArgs, resultValues))
1914 {
1915 return true;
1916 }
1917
1918 argsUsed[i] = false;
1919 }
1920 }
1921
1922 return false;
1923 }
1924
createInstance(Context & context) const1925 TestInstance* BinaryAtomicIntermValuesCase::createInstance (Context& context) const
1926 {
1927 return new BinaryAtomicIntermValuesInstance(context, m_name, m_imageType, m_imageSize, m_format, m_tiling, m_operation, m_useTransfer, m_readType, m_backingType);
1928 }
1929
1930 } // anonymous ns
1931
createImageAtomicOperationTests(tcu::TestContext & testCtx)1932 tcu::TestCaseGroup* createImageAtomicOperationTests (tcu::TestContext& testCtx)
1933 {
1934 de::MovePtr<tcu::TestCaseGroup> imageAtomicOperationsTests(new tcu::TestCaseGroup(testCtx, "atomic_operations", "Atomic image operations cases"));
1935
1936 struct ImageParams
1937 {
1938 ImageParams(const ImageType imageType, const tcu::UVec3& imageSize)
1939 : m_imageType (imageType)
1940 , m_imageSize (imageSize)
1941 {
1942 }
1943 const ImageType m_imageType;
1944 const tcu::UVec3 m_imageSize;
1945 };
1946
1947 const ImageParams imageParamsArray[] =
1948 {
1949 ImageParams(IMAGE_TYPE_1D, tcu::UVec3(64u, 1u, 1u)),
1950 ImageParams(IMAGE_TYPE_1D_ARRAY, tcu::UVec3(64u, 1u, 8u)),
1951 ImageParams(IMAGE_TYPE_2D, tcu::UVec3(64u, 64u, 1u)),
1952 ImageParams(IMAGE_TYPE_2D_ARRAY, tcu::UVec3(64u, 64u, 8u)),
1953 ImageParams(IMAGE_TYPE_3D, tcu::UVec3(48u, 48u, 8u)),
1954 ImageParams(IMAGE_TYPE_CUBE, tcu::UVec3(64u, 64u, 1u)),
1955 ImageParams(IMAGE_TYPE_CUBE_ARRAY, tcu::UVec3(64u, 64u, 2u)),
1956 ImageParams(IMAGE_TYPE_BUFFER, tcu::UVec3(64u, 1u, 1u))
1957 };
1958
1959 const tcu::TextureFormat formats[] =
1960 {
1961 tcu::TextureFormat(tcu::TextureFormat::R, tcu::TextureFormat::UNSIGNED_INT32),
1962 tcu::TextureFormat(tcu::TextureFormat::R, tcu::TextureFormat::SIGNED_INT32),
1963 tcu::TextureFormat(tcu::TextureFormat::R, tcu::TextureFormat::FLOAT),
1964 tcu::TextureFormat(tcu::TextureFormat::R, tcu::TextureFormat::UNSIGNED_INT64),
1965 tcu::TextureFormat(tcu::TextureFormat::R, tcu::TextureFormat::SIGNED_INT64)
1966 };
1967
1968 static const VkImageTiling s_tilings[] = {
1969 VK_IMAGE_TILING_OPTIMAL,
1970 VK_IMAGE_TILING_LINEAR,
1971 };
1972
1973 const struct
1974 {
1975 ShaderReadType type;
1976 const char* name;
1977 } readTypes[] =
1978 {
1979 { ShaderReadType::NORMAL, "normal_read" },
1980 #ifndef CTS_USES_VULKANSC
1981 { ShaderReadType::SPARSE, "sparse_read" },
1982 #endif // CTS_USES_VULKANSC
1983 };
1984
1985 const struct
1986 {
1987 ImageBackingType type;
1988 const char* name;
1989 } backingTypes[] =
1990 {
1991 { ImageBackingType::NORMAL, "normal_img" },
1992 #ifndef CTS_USES_VULKANSC
1993 { ImageBackingType::SPARSE, "sparse_img" },
1994 #endif // CTS_USES_VULKANSC
1995 };
1996
1997 for (deUint32 operationI = 0; operationI < ATOMIC_OPERATION_LAST; operationI++)
1998 {
1999 const AtomicOperation operation = (AtomicOperation)operationI;
2000
2001 de::MovePtr<tcu::TestCaseGroup> operationGroup(new tcu::TestCaseGroup(testCtx, getAtomicOperationCaseName(operation).c_str(), ""));
2002
2003 for (deUint32 imageTypeNdx = 0; imageTypeNdx < DE_LENGTH_OF_ARRAY(imageParamsArray); imageTypeNdx++)
2004 {
2005 const ImageType imageType = imageParamsArray[imageTypeNdx].m_imageType;
2006 const tcu::UVec3 imageSize = imageParamsArray[imageTypeNdx].m_imageSize;
2007
2008 de::MovePtr<tcu::TestCaseGroup> imageTypeGroup(new tcu::TestCaseGroup(testCtx, getImageTypeName(imageType).c_str(), ""));
2009
2010 for (int useTransferIdx = 0; useTransferIdx < 2; ++useTransferIdx)
2011 {
2012 const bool useTransfer = (useTransferIdx > 0);
2013 const string groupName = (!useTransfer ? "no" : "") + string("transfer");
2014
2015 de::MovePtr<tcu::TestCaseGroup> transferGroup(new tcu::TestCaseGroup(testCtx, groupName.c_str(), ""));
2016
2017 for (int readTypeIdx = 0; readTypeIdx < DE_LENGTH_OF_ARRAY(readTypes); ++readTypeIdx)
2018 {
2019 const auto& readType = readTypes[readTypeIdx];
2020
2021 de::MovePtr<tcu::TestCaseGroup> readTypeGroup(new tcu::TestCaseGroup(testCtx, readType.name, ""));
2022
2023 for (int backingTypeIdx = 0; backingTypeIdx < DE_LENGTH_OF_ARRAY(backingTypes); ++backingTypeIdx)
2024 {
2025 const auto& backingType = backingTypes[backingTypeIdx];
2026
2027 de::MovePtr<tcu::TestCaseGroup> backingTypeGroup(new tcu::TestCaseGroup(testCtx, backingType.name, ""));
2028
2029 for (deUint32 formatNdx = 0; formatNdx < DE_LENGTH_OF_ARRAY(formats); formatNdx++)
2030 {
2031 for (int tilingNdx = 0; tilingNdx < DE_LENGTH_OF_ARRAY(s_tilings); tilingNdx++)
2032 {
2033 const TextureFormat& format = formats[formatNdx];
2034 const std::string formatName = getShaderImageFormatQualifier(format);
2035 const char* suffix = (s_tilings[tilingNdx] == VK_IMAGE_TILING_OPTIMAL) ? "" : "_linear";
2036
2037 // Need SPIRV programs in vktImageAtomicSpirvShaders.cpp
2038 if (imageType == IMAGE_TYPE_BUFFER && (format.type != tcu::TextureFormat::FLOAT))
2039 {
2040 continue;
2041 }
2042
2043 // Only 2D and 3D images may support sparse residency.
2044 // VK_IMAGE_TILING_LINEAR does not support sparse residency
2045 const auto vkImageType = mapImageType(imageType);
2046 if (backingType.type == ImageBackingType::SPARSE && ((vkImageType != VK_IMAGE_TYPE_2D && vkImageType != VK_IMAGE_TYPE_3D) || (s_tilings[tilingNdx] == VK_IMAGE_TILING_LINEAR)))
2047 continue;
2048
2049 // Only some operations are supported on floating-point
2050 if (format.type == tcu::TextureFormat::FLOAT)
2051 {
2052 if (operation != ATOMIC_OPERATION_ADD &&
2053 #ifndef CTS_USES_VULKANSC
2054 operation != ATOMIC_OPERATION_MIN &&
2055 operation != ATOMIC_OPERATION_MAX &&
2056 #endif // CTS_USES_VULKANSC
2057 operation != ATOMIC_OPERATION_EXCHANGE)
2058 {
2059 continue;
2060 }
2061 }
2062
2063 if (readType.type == ShaderReadType::SPARSE)
2064 {
2065 // When using transfer, shader reads will not be used, so avoid creating two identical cases.
2066 if (useTransfer)
2067 continue;
2068
2069 // Sparse reads are not supported for all types of images.
2070 if (imageType == IMAGE_TYPE_1D || imageType == IMAGE_TYPE_1D_ARRAY || imageType == IMAGE_TYPE_BUFFER)
2071 continue;
2072 }
2073
2074 //!< Atomic case checks the end result of the operations, and not the intermediate return values
2075 const string caseEndResult = formatName + "_end_result" + suffix;
2076 backingTypeGroup->addChild(new BinaryAtomicEndResultCase(testCtx, caseEndResult, "", imageType, imageSize, format, s_tilings[tilingNdx], operation, useTransfer, readType.type, backingType.type, glu::GLSL_VERSION_450));
2077
2078 //!< Atomic case checks the return values of the atomic function and not the end result.
2079 const string caseIntermValues = formatName + "_intermediate_values" + suffix;
2080 backingTypeGroup->addChild(new BinaryAtomicIntermValuesCase(testCtx, caseIntermValues, "", imageType, imageSize, format, s_tilings[tilingNdx], operation, useTransfer, readType.type, backingType.type, glu::GLSL_VERSION_450));
2081 }
2082 }
2083
2084 readTypeGroup->addChild(backingTypeGroup.release());
2085 }
2086
2087 transferGroup->addChild(readTypeGroup.release());
2088 }
2089
2090 imageTypeGroup->addChild(transferGroup.release());
2091 }
2092
2093 operationGroup->addChild(imageTypeGroup.release());
2094 }
2095
2096 imageAtomicOperationsTests->addChild(operationGroup.release());
2097 }
2098
2099 return imageAtomicOperationsTests.release();
2100 }
2101
2102 } // image
2103 } // vkt
2104