1 /*------------------------------------------------------------------------
2 * Vulkan Conformance Tests
3 * ------------------------
4 *
5 * Copyright (c) 2019 The Khronos Group Inc.
6 * Copyright (c) 2019 The Android Open Source Project
7 * Copyright (c) 2023 LunarG, Inc.
8 * Copyright (c) 2023 Nintendo
9 *
10 * Licensed under the Apache License, Version 2.0 (the "License");
11 * you may not use this file except in compliance with the License.
12 * You may obtain a copy of the License at
13 *
14 * http://www.apache.org/licenses/LICENSE-2.0
15 *
16 * Unless required by applicable law or agreed to in writing, software
17 * distributed under the License is distributed on an "AS IS" BASIS,
18 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
19 * See the License for the specific language governing permissions and
20 * limitations under the License.
21 *
22 *//*!
23 * \file
24 * \brief Compute Shader Tests
25 *//*--------------------------------------------------------------------*/
26
27 #include "vktComputeBasicComputeShaderTests.hpp"
28 #include "vktTestCase.hpp"
29 #include "vktTestCaseUtil.hpp"
30 #include "vktComputeTestsUtil.hpp"
31 #include "vktCustomInstancesDevices.hpp"
32 #include "vktAmberTestCase.hpp"
33
34 #include "vkDefs.hpp"
35 #include "vkRef.hpp"
36 #include "vkRefUtil.hpp"
37 #include "vkPlatform.hpp"
38 #include "vkPrograms.hpp"
39 #include "vkRefUtil.hpp"
40 #include "vkMemUtil.hpp"
41 #include "vkBarrierUtil.hpp"
42 #include "vkQueryUtil.hpp"
43 #include "vkBuilderUtil.hpp"
44 #include "vkTypeUtil.hpp"
45 #include "vkDeviceUtil.hpp"
46 #include "vkCmdUtil.hpp"
47 #include "vkObjUtil.hpp"
48 #include "vkBufferWithMemory.hpp"
49 #include "vkSafetyCriticalUtil.hpp"
50 #include "vkImageWithMemory.hpp"
51
52 #include "tcuCommandLine.hpp"
53 #include "tcuTestLog.hpp"
54 #include "tcuMaybe.hpp"
55
56 #include "deMath.h"
57 #include "deRandom.hpp"
58 #include "deStringUtil.hpp"
59 #include "deUniquePtr.hpp"
60
61 #include <vector>
62 #include <memory>
63
64 using namespace vk;
65
66 namespace vkt
67 {
68 namespace compute
69 {
70 namespace
71 {
72
73 template <typename T, int size>
multiplyComponents(const tcu::Vector<T,size> & v)74 T multiplyComponents(const tcu::Vector<T, size> &v)
75 {
76 T accum = 1;
77 for (int i = 0; i < size; ++i)
78 accum *= v[i];
79 return accum;
80 }
81
82 template <typename T>
squared(const T & a)83 inline T squared(const T &a)
84 {
85 return a * a;
86 }
87
make2DImageCreateInfo(const tcu::IVec2 & imageSize,const VkImageUsageFlags usage)88 inline VkImageCreateInfo make2DImageCreateInfo(const tcu::IVec2 &imageSize, const VkImageUsageFlags usage)
89 {
90 const VkImageCreateInfo imageParams = {
91 VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, // VkStructureType sType;
92 nullptr, // const void* pNext;
93 0u, // VkImageCreateFlags flags;
94 VK_IMAGE_TYPE_2D, // VkImageType imageType;
95 VK_FORMAT_R32_UINT, // VkFormat format;
96 vk::makeExtent3D(imageSize.x(), imageSize.y(), 1), // VkExtent3D extent;
97 1u, // uint32_t mipLevels;
98 1u, // uint32_t arrayLayers;
99 VK_SAMPLE_COUNT_1_BIT, // VkSampleCountFlagBits samples;
100 VK_IMAGE_TILING_OPTIMAL, // VkImageTiling tiling;
101 usage, // VkImageUsageFlags usage;
102 VK_SHARING_MODE_EXCLUSIVE, // VkSharingMode sharingMode;
103 0u, // uint32_t queueFamilyIndexCount;
104 nullptr, // const uint32_t* pQueueFamilyIndices;
105 VK_IMAGE_LAYOUT_UNDEFINED, // VkImageLayout initialLayout;
106 };
107 return imageParams;
108 }
109
makeBufferImageCopy(const tcu::IVec2 & imageSize)110 inline VkBufferImageCopy makeBufferImageCopy(const tcu::IVec2 &imageSize)
111 {
112 return compute::makeBufferImageCopy(vk::makeExtent3D(imageSize.x(), imageSize.y(), 1), 1u);
113 }
114
115 enum BufferType
116 {
117 BUFFER_TYPE_UNIFORM,
118 BUFFER_TYPE_SSBO,
119 };
120
121 class SharedVarTest : public vkt::TestCase
122 {
123 public:
124 SharedVarTest(tcu::TestContext &testCtx, const std::string &name, const tcu::IVec3 &localSize,
125 const tcu::IVec3 &workSize,
126 const vk::ComputePipelineConstructionType computePipelineConstructionType);
127
128 virtual void checkSupport(Context &context) const;
129 void initPrograms(SourceCollections &sourceCollections) const;
130 TestInstance *createInstance(Context &context) const;
131
132 private:
133 const tcu::IVec3 m_localSize;
134 const tcu::IVec3 m_workSize;
135 vk::ComputePipelineConstructionType m_computePipelineConstructionType;
136 };
137
138 class SharedVarTestInstance : public vkt::TestInstance
139 {
140 public:
141 SharedVarTestInstance(Context &context, const tcu::IVec3 &localSize, const tcu::IVec3 &workSize,
142 const vk::ComputePipelineConstructionType computePipelineConstructionType);
143
144 tcu::TestStatus iterate(void);
145
146 private:
147 const tcu::IVec3 m_localSize;
148 const tcu::IVec3 m_workSize;
149 vk::ComputePipelineConstructionType m_computePipelineConstructionType;
150 };
151
SharedVarTest(tcu::TestContext & testCtx,const std::string & name,const tcu::IVec3 & localSize,const tcu::IVec3 & workSize,const vk::ComputePipelineConstructionType computePipelineConstructionType)152 SharedVarTest::SharedVarTest(tcu::TestContext &testCtx, const std::string &name, const tcu::IVec3 &localSize,
153 const tcu::IVec3 &workSize,
154 const vk::ComputePipelineConstructionType computePipelineConstructionType)
155 : TestCase(testCtx, name)
156 , m_localSize(localSize)
157 , m_workSize(workSize)
158 , m_computePipelineConstructionType(computePipelineConstructionType)
159 {
160 }
161
checkSupport(Context & context) const162 void SharedVarTest::checkSupport(Context &context) const
163 {
164 checkShaderObjectRequirements(context.getInstanceInterface(), context.getPhysicalDevice(),
165 m_computePipelineConstructionType);
166 }
167
initPrograms(SourceCollections & sourceCollections) const168 void SharedVarTest::initPrograms(SourceCollections &sourceCollections) const
169 {
170 const int workGroupSize = multiplyComponents(m_localSize);
171 const int workGroupCount = multiplyComponents(m_workSize);
172 const int numValues = workGroupSize * workGroupCount;
173
174 std::ostringstream src;
175 src << "#version 310 es\n"
176 << "layout (local_size_x = " << m_localSize.x() << ", local_size_y = " << m_localSize.y()
177 << ", local_size_z = " << m_localSize.z() << ") in;\n"
178 << "layout(binding = 0) writeonly buffer Output {\n"
179 << " uint values[" << numValues << "];\n"
180 << "} sb_out;\n\n"
181 << "shared uint offsets[" << workGroupSize << "];\n\n"
182 << "void main (void) {\n"
183 << " uint localSize = gl_WorkGroupSize.x*gl_WorkGroupSize.y*gl_WorkGroupSize.z;\n"
184 << " uint globalNdx = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z + "
185 "gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n"
186 << " uint globalOffs = localSize*globalNdx;\n"
187 << " uint localOffs = gl_WorkGroupSize.x*gl_WorkGroupSize.y*gl_LocalInvocationID.z + "
188 "gl_WorkGroupSize.x*gl_LocalInvocationID.y + gl_LocalInvocationID.x;\n"
189 << "\n"
190 << " offsets[localSize-localOffs-1u] = globalOffs + localOffs*localOffs;\n"
191 << " memoryBarrierShared();\n"
192 << " barrier();\n"
193 << " sb_out.values[globalOffs + localOffs] = offsets[localOffs];\n"
194 << "}\n";
195
196 sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
197 }
198
createInstance(Context & context) const199 TestInstance *SharedVarTest::createInstance(Context &context) const
200 {
201 return new SharedVarTestInstance(context, m_localSize, m_workSize, m_computePipelineConstructionType);
202 }
203
SharedVarTestInstance(Context & context,const tcu::IVec3 & localSize,const tcu::IVec3 & workSize,const vk::ComputePipelineConstructionType computePipelineConstructionType)204 SharedVarTestInstance::SharedVarTestInstance(Context &context, const tcu::IVec3 &localSize, const tcu::IVec3 &workSize,
205 const vk::ComputePipelineConstructionType computePipelineConstructionType)
206 : TestInstance(context)
207 , m_localSize(localSize)
208 , m_workSize(workSize)
209 , m_computePipelineConstructionType(computePipelineConstructionType)
210 {
211 }
212
iterate(void)213 tcu::TestStatus SharedVarTestInstance::iterate(void)
214 {
215 const DeviceInterface &vk = m_context.getDeviceInterface();
216 const VkDevice device = m_context.getDevice();
217 const VkQueue queue = m_context.getUniversalQueue();
218 const uint32_t queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
219 Allocator &allocator = m_context.getDefaultAllocator();
220
221 const int workGroupSize = multiplyComponents(m_localSize);
222 const int workGroupCount = multiplyComponents(m_workSize);
223
224 // Create a buffer and host-visible memory for it
225
226 const VkDeviceSize bufferSizeBytes = sizeof(uint32_t) * workGroupSize * workGroupCount;
227 const BufferWithMemory buffer(vk, device, allocator,
228 makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT),
229 MemoryRequirement::HostVisible);
230
231 // Create descriptor set
232
233 const Unique<VkDescriptorSetLayout> descriptorSetLayout(
234 DescriptorSetLayoutBuilder()
235 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
236 .build(vk, device));
237
238 const Unique<VkDescriptorPool> descriptorPool(
239 DescriptorPoolBuilder()
240 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
241 .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
242
243 const Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
244
245 const VkDescriptorBufferInfo descriptorInfo = makeDescriptorBufferInfo(*buffer, 0ull, bufferSizeBytes);
246 DescriptorSetUpdateBuilder()
247 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u),
248 VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &descriptorInfo)
249 .update(vk, device);
250
251 // Perform the computation
252
253 ComputePipelineWrapper pipeline(vk, device, m_computePipelineConstructionType,
254 m_context.getBinaryCollection().get("comp"));
255 pipeline.setDescriptorSetLayout(descriptorSetLayout.get());
256 pipeline.buildPipeline();
257
258 const VkBufferMemoryBarrier computeFinishBarrier =
259 makeBufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *buffer, 0ull, bufferSizeBytes);
260
261 const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
262 const Unique<VkCommandBuffer> cmdBuffer(
263 allocateCommandBuffer(vk, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
264
265 // Start recording commands
266
267 beginCommandBuffer(vk, *cmdBuffer);
268
269 pipeline.bind(*cmdBuffer);
270 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline.getPipelineLayout(), 0u, 1u,
271 &descriptorSet.get(), 0u, nullptr);
272
273 vk.cmdDispatch(*cmdBuffer, m_workSize.x(), m_workSize.y(), m_workSize.z());
274
275 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT,
276 (VkDependencyFlags)0, 0, nullptr, 1, &computeFinishBarrier, 0, nullptr);
277
278 endCommandBuffer(vk, *cmdBuffer);
279
280 // Wait for completion
281
282 submitCommandsAndWait(vk, device, queue, *cmdBuffer);
283
284 // Validate the results
285
286 const Allocation &bufferAllocation = buffer.getAllocation();
287 invalidateAlloc(vk, device, bufferAllocation);
288
289 const uint32_t *bufferPtr = static_cast<uint32_t *>(bufferAllocation.getHostPtr());
290
291 for (int groupNdx = 0; groupNdx < workGroupCount; ++groupNdx)
292 {
293 const int globalOffset = groupNdx * workGroupSize;
294 for (int localOffset = 0; localOffset < workGroupSize; ++localOffset)
295 {
296 const uint32_t res = bufferPtr[globalOffset + localOffset];
297 const uint32_t ref = globalOffset + squared(workGroupSize - localOffset - 1);
298
299 if (res != ref)
300 {
301 std::ostringstream msg;
302 msg << "Comparison failed for Output.values[" << (globalOffset + localOffset) << "]";
303 return tcu::TestStatus::fail(msg.str());
304 }
305 }
306 }
307 return tcu::TestStatus::pass("Compute succeeded");
308 }
309
310 class SharedVarAtomicOpTest : public vkt::TestCase
311 {
312 public:
313 SharedVarAtomicOpTest(tcu::TestContext &testCtx, const std::string &name, const tcu::IVec3 &localSize,
314 const tcu::IVec3 &workSize,
315 const vk::ComputePipelineConstructionType computePipelineConstructionType);
316
317 virtual void checkSupport(Context &context) const;
318 void initPrograms(SourceCollections &sourceCollections) const;
319 TestInstance *createInstance(Context &context) const;
320
321 private:
322 const tcu::IVec3 m_localSize;
323 const tcu::IVec3 m_workSize;
324 vk::ComputePipelineConstructionType m_computePipelineConstructionType;
325 };
326
327 class SharedVarAtomicOpTestInstance : public vkt::TestInstance
328 {
329 public:
330 SharedVarAtomicOpTestInstance(Context &context, const tcu::IVec3 &localSize, const tcu::IVec3 &workSize,
331 const vk::ComputePipelineConstructionType computePipelineConstructionType);
332
333 tcu::TestStatus iterate(void);
334
335 private:
336 const tcu::IVec3 m_localSize;
337 const tcu::IVec3 m_workSize;
338 vk::ComputePipelineConstructionType m_computePipelineConstructionType;
339 };
340
SharedVarAtomicOpTest(tcu::TestContext & testCtx,const std::string & name,const tcu::IVec3 & localSize,const tcu::IVec3 & workSize,const vk::ComputePipelineConstructionType computePipelineConstructionType)341 SharedVarAtomicOpTest::SharedVarAtomicOpTest(tcu::TestContext &testCtx, const std::string &name,
342 const tcu::IVec3 &localSize, const tcu::IVec3 &workSize,
343 const vk::ComputePipelineConstructionType computePipelineConstructionType)
344 : TestCase(testCtx, name)
345 , m_localSize(localSize)
346 , m_workSize(workSize)
347 , m_computePipelineConstructionType(computePipelineConstructionType)
348 {
349 }
350
checkSupport(Context & context) const351 void SharedVarAtomicOpTest::checkSupport(Context &context) const
352 {
353 checkShaderObjectRequirements(context.getInstanceInterface(), context.getPhysicalDevice(),
354 m_computePipelineConstructionType);
355 }
356
initPrograms(SourceCollections & sourceCollections) const357 void SharedVarAtomicOpTest::initPrograms(SourceCollections &sourceCollections) const
358 {
359 const int workGroupSize = multiplyComponents(m_localSize);
360 const int workGroupCount = multiplyComponents(m_workSize);
361 const int numValues = workGroupSize * workGroupCount;
362
363 std::ostringstream src;
364 src << "#version 310 es\n"
365 << "layout (local_size_x = " << m_localSize.x() << ", local_size_y = " << m_localSize.y()
366 << ", local_size_z = " << m_localSize.z() << ") in;\n"
367 << "layout(binding = 0) writeonly buffer Output {\n"
368 << " uint values[" << numValues << "];\n"
369 << "} sb_out;\n\n"
370 << "shared uint count;\n\n"
371 << "void main (void) {\n"
372 << " uint localSize = gl_WorkGroupSize.x*gl_WorkGroupSize.y*gl_WorkGroupSize.z;\n"
373 << " uint globalNdx = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z + "
374 "gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n"
375 << " uint globalOffs = localSize*globalNdx;\n"
376 << "\n"
377 << " count = 0u;\n"
378 << " memoryBarrierShared();\n"
379 << " barrier();\n"
380 << " uint oldVal = atomicAdd(count, 1u);\n"
381 << " sb_out.values[globalOffs+oldVal] = oldVal+1u;\n"
382 << "}\n";
383
384 sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
385 }
386
createInstance(Context & context) const387 TestInstance *SharedVarAtomicOpTest::createInstance(Context &context) const
388 {
389 return new SharedVarAtomicOpTestInstance(context, m_localSize, m_workSize, m_computePipelineConstructionType);
390 }
391
SharedVarAtomicOpTestInstance(Context & context,const tcu::IVec3 & localSize,const tcu::IVec3 & workSize,const vk::ComputePipelineConstructionType computePipelineConstructionType)392 SharedVarAtomicOpTestInstance::SharedVarAtomicOpTestInstance(
393 Context &context, const tcu::IVec3 &localSize, const tcu::IVec3 &workSize,
394 const vk::ComputePipelineConstructionType computePipelineConstructionType)
395 : TestInstance(context)
396 , m_localSize(localSize)
397 , m_workSize(workSize)
398 , m_computePipelineConstructionType(computePipelineConstructionType)
399 {
400 }
401
iterate(void)402 tcu::TestStatus SharedVarAtomicOpTestInstance::iterate(void)
403 {
404 const DeviceInterface &vk = m_context.getDeviceInterface();
405 const VkDevice device = m_context.getDevice();
406 const VkQueue queue = m_context.getUniversalQueue();
407 const uint32_t queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
408 Allocator &allocator = m_context.getDefaultAllocator();
409
410 const int workGroupSize = multiplyComponents(m_localSize);
411 const int workGroupCount = multiplyComponents(m_workSize);
412
413 // Create a buffer and host-visible memory for it
414
415 const VkDeviceSize bufferSizeBytes = sizeof(uint32_t) * workGroupSize * workGroupCount;
416 const BufferWithMemory buffer(vk, device, allocator,
417 makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT),
418 MemoryRequirement::HostVisible);
419
420 // Create descriptor set
421
422 const Unique<VkDescriptorSetLayout> descriptorSetLayout(
423 DescriptorSetLayoutBuilder()
424 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
425 .build(vk, device));
426
427 const Unique<VkDescriptorPool> descriptorPool(
428 DescriptorPoolBuilder()
429 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
430 .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
431
432 const Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
433
434 const VkDescriptorBufferInfo descriptorInfo = makeDescriptorBufferInfo(*buffer, 0ull, bufferSizeBytes);
435 DescriptorSetUpdateBuilder()
436 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u),
437 VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &descriptorInfo)
438 .update(vk, device);
439
440 // Perform the computation
441
442 ComputePipelineWrapper pipeline(vk, device, m_computePipelineConstructionType,
443 m_context.getBinaryCollection().get("comp"));
444 pipeline.setDescriptorSetLayout(descriptorSetLayout.get());
445 pipeline.buildPipeline();
446
447 const VkBufferMemoryBarrier computeFinishBarrier =
448 makeBufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *buffer, 0ull, bufferSizeBytes);
449
450 const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
451 const Unique<VkCommandBuffer> cmdBuffer(
452 allocateCommandBuffer(vk, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
453
454 // Start recording commands
455
456 beginCommandBuffer(vk, *cmdBuffer);
457
458 pipeline.bind(*cmdBuffer);
459 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline.getPipelineLayout(), 0u, 1u,
460 &descriptorSet.get(), 0u, nullptr);
461
462 vk.cmdDispatch(*cmdBuffer, m_workSize.x(), m_workSize.y(), m_workSize.z());
463
464 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT,
465 (VkDependencyFlags)0, 0, nullptr, 1u, &computeFinishBarrier, 0, nullptr);
466
467 endCommandBuffer(vk, *cmdBuffer);
468
469 // Wait for completion
470
471 submitCommandsAndWait(vk, device, queue, *cmdBuffer);
472
473 // Validate the results
474
475 const Allocation &bufferAllocation = buffer.getAllocation();
476 invalidateAlloc(vk, device, bufferAllocation);
477
478 const uint32_t *bufferPtr = static_cast<uint32_t *>(bufferAllocation.getHostPtr());
479
480 for (int groupNdx = 0; groupNdx < workGroupCount; ++groupNdx)
481 {
482 const int globalOffset = groupNdx * workGroupSize;
483 for (int localOffset = 0; localOffset < workGroupSize; ++localOffset)
484 {
485 const uint32_t res = bufferPtr[globalOffset + localOffset];
486 const uint32_t ref = localOffset + 1;
487
488 if (res != ref)
489 {
490 std::ostringstream msg;
491 msg << "Comparison failed for Output.values[" << (globalOffset + localOffset) << "]";
492 return tcu::TestStatus::fail(msg.str());
493 }
494 }
495 }
496 return tcu::TestStatus::pass("Compute succeeded");
497 }
498
499 class SSBOLocalBarrierTest : public vkt::TestCase
500 {
501 public:
502 SSBOLocalBarrierTest(tcu::TestContext &testCtx, const std::string &name, const tcu::IVec3 &localSize,
503 const tcu::IVec3 &workSize,
504 const vk::ComputePipelineConstructionType computePipelineConstructionType);
505
506 virtual void checkSupport(Context &context) const;
507 void initPrograms(SourceCollections &sourceCollections) const;
508 TestInstance *createInstance(Context &context) const;
509
510 private:
511 const tcu::IVec3 m_localSize;
512 const tcu::IVec3 m_workSize;
513 vk::ComputePipelineConstructionType m_computePipelineConstructionType;
514 };
515
516 class SSBOLocalBarrierTestInstance : public vkt::TestInstance
517 {
518 public:
519 SSBOLocalBarrierTestInstance(Context &context, const tcu::IVec3 &localSize, const tcu::IVec3 &workSize,
520 const vk::ComputePipelineConstructionType computePipelineConstructionType);
521
522 tcu::TestStatus iterate(void);
523
524 private:
525 const tcu::IVec3 m_localSize;
526 const tcu::IVec3 m_workSize;
527 vk::ComputePipelineConstructionType m_computePipelineConstructionType;
528 };
529
SSBOLocalBarrierTest(tcu::TestContext & testCtx,const std::string & name,const tcu::IVec3 & localSize,const tcu::IVec3 & workSize,const vk::ComputePipelineConstructionType computePipelineConstructionType)530 SSBOLocalBarrierTest::SSBOLocalBarrierTest(tcu::TestContext &testCtx, const std::string &name,
531 const tcu::IVec3 &localSize, const tcu::IVec3 &workSize,
532 const vk::ComputePipelineConstructionType computePipelineConstructionType)
533 : TestCase(testCtx, name)
534 , m_localSize(localSize)
535 , m_workSize(workSize)
536 , m_computePipelineConstructionType(computePipelineConstructionType)
537 {
538 }
539
checkSupport(Context & context) const540 void SSBOLocalBarrierTest::checkSupport(Context &context) const
541 {
542 checkShaderObjectRequirements(context.getInstanceInterface(), context.getPhysicalDevice(),
543 m_computePipelineConstructionType);
544 }
545
initPrograms(SourceCollections & sourceCollections) const546 void SSBOLocalBarrierTest::initPrograms(SourceCollections &sourceCollections) const
547 {
548 const int workGroupSize = multiplyComponents(m_localSize);
549 const int workGroupCount = multiplyComponents(m_workSize);
550 const int numValues = workGroupSize * workGroupCount;
551
552 std::ostringstream src;
553 src << "#version 310 es\n"
554 << "layout (local_size_x = " << m_localSize.x() << ", local_size_y = " << m_localSize.y()
555 << ", local_size_z = " << m_localSize.z() << ") in;\n"
556 << "layout(binding = 0) coherent buffer Output {\n"
557 << " uint values[" << numValues << "];\n"
558 << "} sb_out;\n\n"
559 << "void main (void) {\n"
560 << " uint localSize = gl_WorkGroupSize.x*gl_WorkGroupSize.y*gl_WorkGroupSize.z;\n"
561 << " uint globalNdx = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z + "
562 "gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n"
563 << " uint globalOffs = localSize*globalNdx;\n"
564 << " uint localOffs = gl_WorkGroupSize.x*gl_WorkGroupSize.y*gl_LocalInvocationID.z + "
565 "gl_WorkGroupSize.x*gl_LocalInvocationID.y + gl_LocalInvocationID.x;\n"
566 << "\n"
567 << " sb_out.values[globalOffs + localOffs] = globalOffs;\n"
568 << " memoryBarrierBuffer();\n"
569 << " barrier();\n"
570 << " sb_out.values[globalOffs + ((localOffs+1u)%localSize)] += localOffs;\n" // += so we read and write
571 << " memoryBarrierBuffer();\n"
572 << " barrier();\n"
573 << " sb_out.values[globalOffs + ((localOffs+2u)%localSize)] += localOffs;\n"
574 << "}\n";
575
576 sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
577 }
578
createInstance(Context & context) const579 TestInstance *SSBOLocalBarrierTest::createInstance(Context &context) const
580 {
581 return new SSBOLocalBarrierTestInstance(context, m_localSize, m_workSize, m_computePipelineConstructionType);
582 }
583
SSBOLocalBarrierTestInstance(Context & context,const tcu::IVec3 & localSize,const tcu::IVec3 & workSize,const vk::ComputePipelineConstructionType computePipelineConstructionType)584 SSBOLocalBarrierTestInstance::SSBOLocalBarrierTestInstance(
585 Context &context, const tcu::IVec3 &localSize, const tcu::IVec3 &workSize,
586 const vk::ComputePipelineConstructionType computePipelineConstructionType)
587 : TestInstance(context)
588 , m_localSize(localSize)
589 , m_workSize(workSize)
590 , m_computePipelineConstructionType(computePipelineConstructionType)
591 {
592 }
593
iterate(void)594 tcu::TestStatus SSBOLocalBarrierTestInstance::iterate(void)
595 {
596 const DeviceInterface &vk = m_context.getDeviceInterface();
597 const VkDevice device = m_context.getDevice();
598 const VkQueue queue = m_context.getUniversalQueue();
599 const uint32_t queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
600 Allocator &allocator = m_context.getDefaultAllocator();
601
602 const int workGroupSize = multiplyComponents(m_localSize);
603 const int workGroupCount = multiplyComponents(m_workSize);
604
605 // Create a buffer and host-visible memory for it
606
607 const VkDeviceSize bufferSizeBytes = sizeof(uint32_t) * workGroupSize * workGroupCount;
608 const BufferWithMemory buffer(vk, device, allocator,
609 makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT),
610 MemoryRequirement::HostVisible);
611
612 // Create descriptor set
613
614 const Unique<VkDescriptorSetLayout> descriptorSetLayout(
615 DescriptorSetLayoutBuilder()
616 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
617 .build(vk, device));
618
619 const Unique<VkDescriptorPool> descriptorPool(
620 DescriptorPoolBuilder()
621 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
622 .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
623
624 const Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
625
626 const VkDescriptorBufferInfo descriptorInfo = makeDescriptorBufferInfo(*buffer, 0ull, bufferSizeBytes);
627 DescriptorSetUpdateBuilder()
628 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u),
629 VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &descriptorInfo)
630 .update(vk, device);
631
632 // Perform the computation
633
634 ComputePipelineWrapper pipeline(vk, device, m_computePipelineConstructionType,
635 m_context.getBinaryCollection().get("comp"));
636 pipeline.setDescriptorSetLayout(descriptorSetLayout.get());
637 pipeline.buildPipeline();
638
639 const VkBufferMemoryBarrier computeFinishBarrier =
640 makeBufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *buffer, 0ull, bufferSizeBytes);
641
642 const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
643 const Unique<VkCommandBuffer> cmdBuffer(
644 allocateCommandBuffer(vk, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
645
646 // Start recording commands
647
648 beginCommandBuffer(vk, *cmdBuffer);
649
650 pipeline.bind(*cmdBuffer);
651 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline.getPipelineLayout(), 0u, 1u,
652 &descriptorSet.get(), 0u, nullptr);
653
654 vk.cmdDispatch(*cmdBuffer, m_workSize.x(), m_workSize.y(), m_workSize.z());
655
656 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT,
657 (VkDependencyFlags)0, 0, nullptr, 1, &computeFinishBarrier, 0, nullptr);
658
659 endCommandBuffer(vk, *cmdBuffer);
660
661 // Wait for completion
662
663 submitCommandsAndWait(vk, device, queue, *cmdBuffer);
664
665 // Validate the results
666
667 const Allocation &bufferAllocation = buffer.getAllocation();
668 invalidateAlloc(vk, device, bufferAllocation);
669
670 const uint32_t *bufferPtr = static_cast<uint32_t *>(bufferAllocation.getHostPtr());
671
672 for (int groupNdx = 0; groupNdx < workGroupCount; ++groupNdx)
673 {
674 const int globalOffset = groupNdx * workGroupSize;
675 for (int localOffset = 0; localOffset < workGroupSize; ++localOffset)
676 {
677 const uint32_t res = bufferPtr[globalOffset + localOffset];
678 const int offs0 = localOffset - 1 < 0 ? ((localOffset + workGroupSize - 1) % workGroupSize) :
679 ((localOffset - 1) % workGroupSize);
680 const int offs1 = localOffset - 2 < 0 ? ((localOffset + workGroupSize - 2) % workGroupSize) :
681 ((localOffset - 2) % workGroupSize);
682 const uint32_t ref = static_cast<uint32_t>(globalOffset + offs0 + offs1);
683
684 if (res != ref)
685 {
686 std::ostringstream msg;
687 msg << "Comparison failed for Output.values[" << (globalOffset + localOffset) << "]";
688 return tcu::TestStatus::fail(msg.str());
689 }
690 }
691 }
692 return tcu::TestStatus::pass("Compute succeeded");
693 }
694
695 class CopyImageToSSBOTest : public vkt::TestCase
696 {
697 public:
698 CopyImageToSSBOTest(tcu::TestContext &testCtx, const std::string &name, const tcu::IVec2 &localSize,
699 const tcu::IVec2 &imageSize,
700 const vk::ComputePipelineConstructionType computePipelineConstructionType);
701
702 virtual void checkSupport(Context &context) const;
703 void initPrograms(SourceCollections &sourceCollections) const;
704 TestInstance *createInstance(Context &context) const;
705
706 private:
707 const tcu::IVec2 m_localSize;
708 const tcu::IVec2 m_imageSize;
709 vk::ComputePipelineConstructionType m_computePipelineConstructionType;
710 };
711
712 class CopyImageToSSBOTestInstance : public vkt::TestInstance
713 {
714 public:
715 CopyImageToSSBOTestInstance(Context &context, const tcu::IVec2 &localSize, const tcu::IVec2 &imageSize,
716 const vk::ComputePipelineConstructionType computePipelineConstructionType);
717
718 tcu::TestStatus iterate(void);
719
720 private:
721 const tcu::IVec2 m_localSize;
722 const tcu::IVec2 m_imageSize;
723 vk::ComputePipelineConstructionType m_computePipelineConstructionType;
724 };
725
CopyImageToSSBOTest(tcu::TestContext & testCtx,const std::string & name,const tcu::IVec2 & localSize,const tcu::IVec2 & imageSize,const vk::ComputePipelineConstructionType computePipelineConstructionType)726 CopyImageToSSBOTest::CopyImageToSSBOTest(tcu::TestContext &testCtx, const std::string &name,
727 const tcu::IVec2 &localSize, const tcu::IVec2 &imageSize,
728 const vk::ComputePipelineConstructionType computePipelineConstructionType)
729 : TestCase(testCtx, name)
730 , m_localSize(localSize)
731 , m_imageSize(imageSize)
732 , m_computePipelineConstructionType(computePipelineConstructionType)
733 {
734 DE_ASSERT(m_imageSize.x() % m_localSize.x() == 0);
735 DE_ASSERT(m_imageSize.y() % m_localSize.y() == 0);
736 }
737
checkSupport(Context & context) const738 void CopyImageToSSBOTest::checkSupport(Context &context) const
739 {
740 checkShaderObjectRequirements(context.getInstanceInterface(), context.getPhysicalDevice(),
741 m_computePipelineConstructionType);
742 }
743
initPrograms(SourceCollections & sourceCollections) const744 void CopyImageToSSBOTest::initPrograms(SourceCollections &sourceCollections) const
745 {
746 std::ostringstream src;
747 src << "#version 310 es\n"
748 << "layout (local_size_x = " << m_localSize.x() << ", local_size_y = " << m_localSize.y() << ") in;\n"
749 << "layout(binding = 1, r32ui) readonly uniform highp uimage2D u_srcImg;\n"
750 << "layout(binding = 0) writeonly buffer Output {\n"
751 << " uint values[" << (m_imageSize.x() * m_imageSize.y()) << "];\n"
752 << "} sb_out;\n\n"
753 << "void main (void) {\n"
754 << " uint stride = gl_NumWorkGroups.x*gl_WorkGroupSize.x;\n"
755 << " uint value = imageLoad(u_srcImg, ivec2(gl_GlobalInvocationID.xy)).x;\n"
756 << " sb_out.values[gl_GlobalInvocationID.y*stride + gl_GlobalInvocationID.x] = value;\n"
757 << "}\n";
758
759 sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
760 }
761
createInstance(Context & context) const762 TestInstance *CopyImageToSSBOTest::createInstance(Context &context) const
763 {
764 return new CopyImageToSSBOTestInstance(context, m_localSize, m_imageSize, m_computePipelineConstructionType);
765 }
766
CopyImageToSSBOTestInstance(Context & context,const tcu::IVec2 & localSize,const tcu::IVec2 & imageSize,const vk::ComputePipelineConstructionType computePipelineConstructionType)767 CopyImageToSSBOTestInstance::CopyImageToSSBOTestInstance(
768 Context &context, const tcu::IVec2 &localSize, const tcu::IVec2 &imageSize,
769 const vk::ComputePipelineConstructionType computePipelineConstructionType)
770 : TestInstance(context)
771 , m_localSize(localSize)
772 , m_imageSize(imageSize)
773 , m_computePipelineConstructionType(computePipelineConstructionType)
774 {
775 }
776
iterate(void)777 tcu::TestStatus CopyImageToSSBOTestInstance::iterate(void)
778 {
779 const DeviceInterface &vk = m_context.getDeviceInterface();
780 const VkDevice device = m_context.getDevice();
781 const VkQueue queue = m_context.getUniversalQueue();
782 const uint32_t queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
783 Allocator &allocator = m_context.getDefaultAllocator();
784
785 // Create an image
786
787 const VkImageCreateInfo imageParams =
788 make2DImageCreateInfo(m_imageSize, VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_STORAGE_BIT);
789 const ImageWithMemory image(vk, device, allocator, imageParams, MemoryRequirement::Any);
790
791 const VkImageSubresourceRange subresourceRange =
792 makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u);
793 const Unique<VkImageView> imageView(
794 makeImageView(vk, device, *image, VK_IMAGE_VIEW_TYPE_2D, VK_FORMAT_R32_UINT, subresourceRange));
795
796 // Staging buffer (source data for image)
797
798 const uint32_t imageArea = multiplyComponents(m_imageSize);
799 const VkDeviceSize bufferSizeBytes = sizeof(uint32_t) * imageArea;
800
801 const BufferWithMemory stagingBuffer(vk, device, allocator,
802 makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_TRANSFER_SRC_BIT),
803 MemoryRequirement::HostVisible);
804
805 // Populate the staging buffer with test data
806 {
807 de::Random rnd(0xab2c7);
808 const Allocation &stagingBufferAllocation = stagingBuffer.getAllocation();
809 uint32_t *bufferPtr = static_cast<uint32_t *>(stagingBufferAllocation.getHostPtr());
810 for (uint32_t i = 0; i < imageArea; ++i)
811 *bufferPtr++ = rnd.getUint32();
812
813 flushAlloc(vk, device, stagingBufferAllocation);
814 }
815
816 // Create a buffer to store shader output
817
818 const BufferWithMemory outputBuffer(vk, device, allocator,
819 makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT),
820 MemoryRequirement::HostVisible);
821
822 // Create descriptor set
823
824 const Unique<VkDescriptorSetLayout> descriptorSetLayout(
825 DescriptorSetLayoutBuilder()
826 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
827 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, VK_SHADER_STAGE_COMPUTE_BIT)
828 .build(vk, device));
829
830 const Unique<VkDescriptorPool> descriptorPool(
831 DescriptorPoolBuilder()
832 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
833 .addType(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE)
834 .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
835
836 const Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
837
838 // Set the bindings
839
840 const VkDescriptorImageInfo imageDescriptorInfo =
841 makeDescriptorImageInfo(VK_NULL_HANDLE, *imageView, VK_IMAGE_LAYOUT_GENERAL);
842 const VkDescriptorBufferInfo bufferDescriptorInfo = makeDescriptorBufferInfo(*outputBuffer, 0ull, bufferSizeBytes);
843
844 DescriptorSetUpdateBuilder()
845 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u),
846 VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &bufferDescriptorInfo)
847 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u),
848 VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &imageDescriptorInfo)
849 .update(vk, device);
850
851 // Perform the computation
852 {
853 ComputePipelineWrapper pipeline(vk, device, m_computePipelineConstructionType,
854 m_context.getBinaryCollection().get("comp"));
855 pipeline.setDescriptorSetLayout(descriptorSetLayout.get());
856 pipeline.buildPipeline();
857
858 const VkBufferMemoryBarrier computeFinishBarrier = makeBufferMemoryBarrier(
859 VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *outputBuffer, 0ull, bufferSizeBytes);
860 const tcu::IVec2 workSize = m_imageSize / m_localSize;
861
862 // Prepare the command buffer
863
864 const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
865 const Unique<VkCommandBuffer> cmdBuffer(
866 allocateCommandBuffer(vk, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
867
868 // Start recording commands
869
870 beginCommandBuffer(vk, *cmdBuffer);
871
872 pipeline.bind(*cmdBuffer);
873 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline.getPipelineLayout(), 0u, 1u,
874 &descriptorSet.get(), 0u, nullptr);
875
876 const std::vector<VkBufferImageCopy> bufferImageCopy(1, makeBufferImageCopy(m_imageSize));
877 copyBufferToImage(vk, *cmdBuffer, *stagingBuffer, bufferSizeBytes, bufferImageCopy, VK_IMAGE_ASPECT_COLOR_BIT,
878 1, 1, *image, VK_IMAGE_LAYOUT_GENERAL, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT);
879
880 vk.cmdDispatch(*cmdBuffer, workSize.x(), workSize.y(), 1u);
881 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT,
882 (VkDependencyFlags)0, 0, nullptr, 1, &computeFinishBarrier, 0, nullptr);
883
884 endCommandBuffer(vk, *cmdBuffer);
885
886 // Wait for completion
887
888 submitCommandsAndWait(vk, device, queue, *cmdBuffer);
889 }
890
891 // Validate the results
892
893 const Allocation &outputBufferAllocation = outputBuffer.getAllocation();
894 invalidateAlloc(vk, device, outputBufferAllocation);
895
896 const uint32_t *bufferPtr = static_cast<uint32_t *>(outputBufferAllocation.getHostPtr());
897 const uint32_t *refBufferPtr = static_cast<uint32_t *>(stagingBuffer.getAllocation().getHostPtr());
898
899 for (uint32_t ndx = 0; ndx < imageArea; ++ndx)
900 {
901 const uint32_t res = *(bufferPtr + ndx);
902 const uint32_t ref = *(refBufferPtr + ndx);
903
904 if (res != ref)
905 {
906 std::ostringstream msg;
907 msg << "Comparison failed for Output.values[" << ndx << "]";
908 return tcu::TestStatus::fail(msg.str());
909 }
910 }
911 return tcu::TestStatus::pass("Compute succeeded");
912 }
913
914 class CopySSBOToImageTest : public vkt::TestCase
915 {
916 public:
917 CopySSBOToImageTest(tcu::TestContext &testCtx, const std::string &name, const tcu::IVec2 &localSize,
918 const tcu::IVec2 &imageSize,
919 const vk::ComputePipelineConstructionType computePipelineConstructionType);
920
921 virtual void checkSupport(Context &context) const;
922 void initPrograms(SourceCollections &sourceCollections) const;
923 TestInstance *createInstance(Context &context) const;
924
925 private:
926 const tcu::IVec2 m_localSize;
927 const tcu::IVec2 m_imageSize;
928 vk::ComputePipelineConstructionType m_computePipelineConstructionType;
929 };
930
931 class CopySSBOToImageTestInstance : public vkt::TestInstance
932 {
933 public:
934 CopySSBOToImageTestInstance(Context &context, const tcu::IVec2 &localSize, const tcu::IVec2 &imageSize,
935 const vk::ComputePipelineConstructionType computePipelineConstructionType);
936
937 tcu::TestStatus iterate(void);
938
939 private:
940 const tcu::IVec2 m_localSize;
941 const tcu::IVec2 m_imageSize;
942 vk::ComputePipelineConstructionType m_computePipelineConstructionType;
943 };
944
CopySSBOToImageTest(tcu::TestContext & testCtx,const std::string & name,const tcu::IVec2 & localSize,const tcu::IVec2 & imageSize,const vk::ComputePipelineConstructionType computePipelineConstructionType)945 CopySSBOToImageTest::CopySSBOToImageTest(tcu::TestContext &testCtx, const std::string &name,
946 const tcu::IVec2 &localSize, const tcu::IVec2 &imageSize,
947 const vk::ComputePipelineConstructionType computePipelineConstructionType)
948 : TestCase(testCtx, name)
949 , m_localSize(localSize)
950 , m_imageSize(imageSize)
951 , m_computePipelineConstructionType(computePipelineConstructionType)
952 {
953 DE_ASSERT(m_imageSize.x() % m_localSize.x() == 0);
954 DE_ASSERT(m_imageSize.y() % m_localSize.y() == 0);
955 }
956
checkSupport(Context & context) const957 void CopySSBOToImageTest::checkSupport(Context &context) const
958 {
959 checkShaderObjectRequirements(context.getInstanceInterface(), context.getPhysicalDevice(),
960 m_computePipelineConstructionType);
961 }
962
initPrograms(SourceCollections & sourceCollections) const963 void CopySSBOToImageTest::initPrograms(SourceCollections &sourceCollections) const
964 {
965 std::ostringstream src;
966 src << "#version 310 es\n"
967 << "layout (local_size_x = " << m_localSize.x() << ", local_size_y = " << m_localSize.y() << ") in;\n"
968 << "layout(binding = 1, r32ui) writeonly uniform highp uimage2D u_dstImg;\n"
969 << "layout(binding = 0) readonly buffer Input {\n"
970 << " uint values[" << (m_imageSize.x() * m_imageSize.y()) << "];\n"
971 << "} sb_in;\n\n"
972 << "void main (void) {\n"
973 << " uint stride = gl_NumWorkGroups.x*gl_WorkGroupSize.x;\n"
974 << " uint value = sb_in.values[gl_GlobalInvocationID.y*stride + gl_GlobalInvocationID.x];\n"
975 << " imageStore(u_dstImg, ivec2(gl_GlobalInvocationID.xy), uvec4(value, 0, 0, 0));\n"
976 << "}\n";
977
978 sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
979 }
980
createInstance(Context & context) const981 TestInstance *CopySSBOToImageTest::createInstance(Context &context) const
982 {
983 return new CopySSBOToImageTestInstance(context, m_localSize, m_imageSize, m_computePipelineConstructionType);
984 }
985
CopySSBOToImageTestInstance(Context & context,const tcu::IVec2 & localSize,const tcu::IVec2 & imageSize,const vk::ComputePipelineConstructionType computePipelineConstructionType)986 CopySSBOToImageTestInstance::CopySSBOToImageTestInstance(
987 Context &context, const tcu::IVec2 &localSize, const tcu::IVec2 &imageSize,
988 const vk::ComputePipelineConstructionType computePipelineConstructionType)
989 : TestInstance(context)
990 , m_localSize(localSize)
991 , m_imageSize(imageSize)
992 , m_computePipelineConstructionType(computePipelineConstructionType)
993 {
994 }
995
iterate(void)996 tcu::TestStatus CopySSBOToImageTestInstance::iterate(void)
997 {
998 ContextCommonData data = m_context.getContextCommonData();
999 const DeviceInterface &vkd = data.vkd;
1000
1001 // Create an image, a view, and the output buffer
1002 const VkImageSubresourceRange subresourceRange =
1003 makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u);
1004 ImageWithBuffer imageWithBuffer(
1005 vkd, data.device, data.allocator, vk::makeExtent3D(m_imageSize.x(), m_imageSize.y(), 1), VK_FORMAT_R32_UINT,
1006 VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_STORAGE_BIT, vk::VK_IMAGE_TYPE_2D, subresourceRange);
1007
1008 const uint32_t imageArea = multiplyComponents(m_imageSize);
1009 const VkDeviceSize bufferSizeBytes = sizeof(uint32_t) * imageArea;
1010
1011 const BufferWithMemory inputBuffer(vkd, data.device, data.allocator,
1012 makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT),
1013 MemoryRequirement::HostVisible);
1014
1015 // Populate the buffer with test data
1016 {
1017 de::Random rnd(0x77238ac2);
1018 const Allocation &inputBufferAllocation = inputBuffer.getAllocation();
1019 uint32_t *bufferPtr = static_cast<uint32_t *>(inputBufferAllocation.getHostPtr());
1020 for (uint32_t i = 0; i < imageArea; ++i)
1021 *bufferPtr++ = rnd.getUint32();
1022
1023 flushAlloc(vkd, data.device, inputBufferAllocation);
1024 }
1025
1026 // Create descriptor set
1027 const Unique<VkDescriptorSetLayout> descriptorSetLayout(
1028 DescriptorSetLayoutBuilder()
1029 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
1030 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, VK_SHADER_STAGE_COMPUTE_BIT)
1031 .build(vkd, data.device));
1032
1033 const Unique<VkDescriptorPool> descriptorPool(
1034 DescriptorPoolBuilder()
1035 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
1036 .addType(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE)
1037 .build(vkd, data.device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
1038
1039 const Unique<VkDescriptorSet> descriptorSet(
1040 makeDescriptorSet(vkd, data.device, *descriptorPool, *descriptorSetLayout));
1041
1042 // Set the bindings
1043
1044 const VkDescriptorImageInfo imageDescriptorInfo =
1045 makeDescriptorImageInfo(VK_NULL_HANDLE, imageWithBuffer.getImageView(), VK_IMAGE_LAYOUT_GENERAL);
1046 const VkDescriptorBufferInfo bufferDescriptorInfo = makeDescriptorBufferInfo(*inputBuffer, 0ull, bufferSizeBytes);
1047
1048 DescriptorSetUpdateBuilder()
1049 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u),
1050 VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &bufferDescriptorInfo)
1051 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u),
1052 VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &imageDescriptorInfo)
1053 .update(vkd, data.device);
1054
1055 // Perform the computation
1056 {
1057 ComputePipelineWrapper pipeline(vkd, data.device, m_computePipelineConstructionType,
1058 m_context.getBinaryCollection().get("comp"));
1059 pipeline.setDescriptorSetLayout(descriptorSetLayout.get());
1060 pipeline.buildPipeline();
1061
1062 const VkBufferMemoryBarrier inputBufferPostHostWriteBarrier = makeBufferMemoryBarrier(
1063 VK_ACCESS_HOST_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT, *inputBuffer, 0ull, bufferSizeBytes);
1064
1065 const VkImageMemoryBarrier imageLayoutBarrier =
1066 makeImageMemoryBarrier(0u, VK_ACCESS_SHADER_WRITE_BIT, VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_GENERAL,
1067 imageWithBuffer.getImage(), subresourceRange);
1068
1069 const tcu::IVec2 workSize = m_imageSize / m_localSize;
1070
1071 // Prepare the command buffer
1072
1073 const Unique<VkCommandPool> cmdPool(makeCommandPool(vkd, data.device, data.qfIndex));
1074 const Unique<VkCommandBuffer> cmdBuffer(
1075 allocateCommandBuffer(vkd, data.device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
1076
1077 // Start recording commands
1078
1079 beginCommandBuffer(vkd, *cmdBuffer);
1080
1081 pipeline.bind(*cmdBuffer);
1082 vkd.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline.getPipelineLayout(), 0u, 1u,
1083 &descriptorSet.get(), 0u, nullptr);
1084
1085 vkd.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
1086 (VkDependencyFlags)0, 0, nullptr, 1, &inputBufferPostHostWriteBarrier, 1,
1087 &imageLayoutBarrier);
1088 vkd.cmdDispatch(*cmdBuffer, workSize.x(), workSize.y(), 1u);
1089
1090 copyImageToBuffer(vkd, *cmdBuffer, imageWithBuffer.getImage(), imageWithBuffer.getBuffer(), m_imageSize,
1091 VK_ACCESS_SHADER_WRITE_BIT, VK_IMAGE_LAYOUT_GENERAL);
1092
1093 endCommandBuffer(vkd, *cmdBuffer);
1094
1095 // Wait for completion
1096
1097 submitCommandsAndWait(vkd, data.device, data.queue, *cmdBuffer);
1098 }
1099
1100 // Validate the results
1101
1102 const Allocation &outputBufferAllocation = imageWithBuffer.getBufferAllocation();
1103 invalidateAlloc(vkd, data.device, outputBufferAllocation);
1104
1105 const uint32_t *bufferPtr = static_cast<uint32_t *>(outputBufferAllocation.getHostPtr());
1106 const uint32_t *refBufferPtr = static_cast<uint32_t *>(inputBuffer.getAllocation().getHostPtr());
1107
1108 for (uint32_t ndx = 0; ndx < imageArea; ++ndx)
1109 {
1110 const uint32_t res = *(bufferPtr + ndx);
1111 const uint32_t ref = *(refBufferPtr + ndx);
1112
1113 if (res != ref)
1114 {
1115 std::ostringstream msg;
1116 msg << "Comparison failed for pixel " << ndx;
1117 return tcu::TestStatus::fail(msg.str());
1118 }
1119 }
1120 return tcu::TestStatus::pass("Compute succeeded");
1121 }
1122
1123 class BufferToBufferInvertTest : public vkt::TestCase
1124 {
1125 public:
1126 virtual void checkSupport(Context &context) const;
1127 void initPrograms(SourceCollections &sourceCollections) const;
1128 TestInstance *createInstance(Context &context) const;
1129
1130 static BufferToBufferInvertTest *UBOToSSBOInvertCase(
1131 tcu::TestContext &testCtx, const std::string &name, const uint32_t numValues, const tcu::IVec3 &localSize,
1132 const tcu::IVec3 &workSize, const vk::ComputePipelineConstructionType computePipelineConstructionType);
1133
1134 static BufferToBufferInvertTest *CopyInvertSSBOCase(
1135 tcu::TestContext &testCtx, const std::string &name, const uint32_t numValues, const tcu::IVec3 &localSize,
1136 const tcu::IVec3 &workSize, const vk::ComputePipelineConstructionType computePipelineConstructionType);
1137
1138 private:
1139 BufferToBufferInvertTest(tcu::TestContext &testCtx, const std::string &name, const uint32_t numValues,
1140 const tcu::IVec3 &localSize, const tcu::IVec3 &workSize, const BufferType bufferType,
1141 const vk::ComputePipelineConstructionType computePipelineConstructionType);
1142
1143 const BufferType m_bufferType;
1144 const uint32_t m_numValues;
1145 const tcu::IVec3 m_localSize;
1146 const tcu::IVec3 m_workSize;
1147 vk::ComputePipelineConstructionType m_computePipelineConstructionType;
1148 };
1149
1150 class BufferToBufferInvertTestInstance : public vkt::TestInstance
1151 {
1152 public:
1153 BufferToBufferInvertTestInstance(Context &context, const uint32_t numValues, const tcu::IVec3 &localSize,
1154 const tcu::IVec3 &workSize, const BufferType bufferType,
1155 const vk::ComputePipelineConstructionType computePipelineConstructionType);
1156
1157 tcu::TestStatus iterate(void);
1158
1159 private:
1160 const BufferType m_bufferType;
1161 const uint32_t m_numValues;
1162 const tcu::IVec3 m_localSize;
1163 const tcu::IVec3 m_workSize;
1164 vk::ComputePipelineConstructionType m_computePipelineConstructionType;
1165 };
1166
BufferToBufferInvertTest(tcu::TestContext & testCtx,const std::string & name,const uint32_t numValues,const tcu::IVec3 & localSize,const tcu::IVec3 & workSize,const BufferType bufferType,const vk::ComputePipelineConstructionType computePipelineConstructionType)1167 BufferToBufferInvertTest::BufferToBufferInvertTest(
1168 tcu::TestContext &testCtx, const std::string &name, const uint32_t numValues, const tcu::IVec3 &localSize,
1169 const tcu::IVec3 &workSize, const BufferType bufferType,
1170 const vk::ComputePipelineConstructionType computePipelineConstructionType)
1171 : TestCase(testCtx, name)
1172 , m_bufferType(bufferType)
1173 , m_numValues(numValues)
1174 , m_localSize(localSize)
1175 , m_workSize(workSize)
1176 , m_computePipelineConstructionType(computePipelineConstructionType)
1177 {
1178 DE_ASSERT(m_numValues % (multiplyComponents(m_workSize) * multiplyComponents(m_localSize)) == 0);
1179 DE_ASSERT(m_bufferType == BUFFER_TYPE_UNIFORM || m_bufferType == BUFFER_TYPE_SSBO);
1180 }
1181
UBOToSSBOInvertCase(tcu::TestContext & testCtx,const std::string & name,const uint32_t numValues,const tcu::IVec3 & localSize,const tcu::IVec3 & workSize,const vk::ComputePipelineConstructionType computePipelineConstructionType)1182 BufferToBufferInvertTest *BufferToBufferInvertTest::UBOToSSBOInvertCase(
1183 tcu::TestContext &testCtx, const std::string &name, const uint32_t numValues, const tcu::IVec3 &localSize,
1184 const tcu::IVec3 &workSize, const vk::ComputePipelineConstructionType computePipelineConstructionType)
1185 {
1186 return new BufferToBufferInvertTest(testCtx, name, numValues, localSize, workSize, BUFFER_TYPE_UNIFORM,
1187 computePipelineConstructionType);
1188 }
1189
CopyInvertSSBOCase(tcu::TestContext & testCtx,const std::string & name,const uint32_t numValues,const tcu::IVec3 & localSize,const tcu::IVec3 & workSize,const vk::ComputePipelineConstructionType computePipelineConstructionType)1190 BufferToBufferInvertTest *BufferToBufferInvertTest::CopyInvertSSBOCase(
1191 tcu::TestContext &testCtx, const std::string &name, const uint32_t numValues, const tcu::IVec3 &localSize,
1192 const tcu::IVec3 &workSize, const vk::ComputePipelineConstructionType computePipelineConstructionType)
1193 {
1194 return new BufferToBufferInvertTest(testCtx, name, numValues, localSize, workSize, BUFFER_TYPE_SSBO,
1195 computePipelineConstructionType);
1196 }
1197
checkSupport(Context & context) const1198 void BufferToBufferInvertTest::checkSupport(Context &context) const
1199 {
1200 checkShaderObjectRequirements(context.getInstanceInterface(), context.getPhysicalDevice(),
1201 m_computePipelineConstructionType);
1202 }
1203
initPrograms(SourceCollections & sourceCollections) const1204 void BufferToBufferInvertTest::initPrograms(SourceCollections &sourceCollections) const
1205 {
1206 std::ostringstream src;
1207 if (m_bufferType == BUFFER_TYPE_UNIFORM)
1208 {
1209 src << "#version 310 es\n"
1210 << "layout (local_size_x = " << m_localSize.x() << ", local_size_y = " << m_localSize.y()
1211 << ", local_size_z = " << m_localSize.z() << ") in;\n"
1212 << "layout(binding = 0) readonly uniform Input {\n"
1213 << " uint values[" << m_numValues << "];\n"
1214 << "} ub_in;\n"
1215 << "layout(binding = 1, std140) writeonly buffer Output {\n"
1216 << " uint values[" << m_numValues << "];\n"
1217 << "} sb_out;\n"
1218 << "void main (void) {\n"
1219 << " uvec3 size = gl_NumWorkGroups * gl_WorkGroupSize;\n"
1220 << " uint numValuesPerInv = uint(ub_in.values.length()) / (size.x*size.y*size.z);\n"
1221 << " uint groupNdx = size.x*size.y*gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + "
1222 "gl_GlobalInvocationID.x;\n"
1223 << " uint offset = numValuesPerInv*groupNdx;\n"
1224 << "\n"
1225 << " for (uint ndx = 0u; ndx < numValuesPerInv; ndx++)\n"
1226 << " sb_out.values[offset + ndx] = ~ub_in.values[offset + ndx];\n"
1227 << "}\n";
1228 }
1229 else if (m_bufferType == BUFFER_TYPE_SSBO)
1230 {
1231 src << "#version 310 es\n"
1232 << "layout (local_size_x = " << m_localSize.x() << ", local_size_y = " << m_localSize.y()
1233 << ", local_size_z = " << m_localSize.z() << ") in;\n"
1234 << "layout(binding = 0, std140) readonly buffer Input {\n"
1235 << " uint values[" << m_numValues << "];\n"
1236 << "} sb_in;\n"
1237 << "layout (binding = 1, std140) writeonly buffer Output {\n"
1238 << " uint values[" << m_numValues << "];\n"
1239 << "} sb_out;\n"
1240 << "void main (void) {\n"
1241 << " uvec3 size = gl_NumWorkGroups * gl_WorkGroupSize;\n"
1242 << " uint numValuesPerInv = uint(sb_in.values.length()) / (size.x*size.y*size.z);\n"
1243 << " uint groupNdx = size.x*size.y*gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + "
1244 "gl_GlobalInvocationID.x;\n"
1245 << " uint offset = numValuesPerInv*groupNdx;\n"
1246 << "\n"
1247 << " for (uint ndx = 0u; ndx < numValuesPerInv; ndx++)\n"
1248 << " sb_out.values[offset + ndx] = ~sb_in.values[offset + ndx];\n"
1249 << "}\n";
1250 }
1251
1252 sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
1253 }
1254
createInstance(Context & context) const1255 TestInstance *BufferToBufferInvertTest::createInstance(Context &context) const
1256 {
1257 return new BufferToBufferInvertTestInstance(context, m_numValues, m_localSize, m_workSize, m_bufferType,
1258 m_computePipelineConstructionType);
1259 }
1260
BufferToBufferInvertTestInstance(Context & context,const uint32_t numValues,const tcu::IVec3 & localSize,const tcu::IVec3 & workSize,const BufferType bufferType,const vk::ComputePipelineConstructionType computePipelineConstructionType)1261 BufferToBufferInvertTestInstance::BufferToBufferInvertTestInstance(
1262 Context &context, const uint32_t numValues, const tcu::IVec3 &localSize, const tcu::IVec3 &workSize,
1263 const BufferType bufferType, const vk::ComputePipelineConstructionType computePipelineConstructionType)
1264 : TestInstance(context)
1265 , m_bufferType(bufferType)
1266 , m_numValues(numValues)
1267 , m_localSize(localSize)
1268 , m_workSize(workSize)
1269 , m_computePipelineConstructionType(computePipelineConstructionType)
1270 {
1271 }
1272
iterate(void)1273 tcu::TestStatus BufferToBufferInvertTestInstance::iterate(void)
1274 {
1275 const DeviceInterface &vk = m_context.getDeviceInterface();
1276 const VkDevice device = m_context.getDevice();
1277 const VkQueue queue = m_context.getUniversalQueue();
1278 const uint32_t queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
1279 Allocator &allocator = m_context.getDefaultAllocator();
1280
1281 // Customize the test based on buffer type
1282
1283 const VkBufferUsageFlags inputBufferUsageFlags =
1284 (m_bufferType == BUFFER_TYPE_UNIFORM ? VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT : VK_BUFFER_USAGE_STORAGE_BUFFER_BIT);
1285 const VkDescriptorType inputBufferDescriptorType =
1286 (m_bufferType == BUFFER_TYPE_UNIFORM ? VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER : VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
1287 const uint32_t randomSeed = (m_bufferType == BUFFER_TYPE_UNIFORM ? 0x111223f : 0x124fef);
1288
1289 // Create an input buffer
1290
1291 const VkDeviceSize bufferSizeBytes = sizeof(tcu::UVec4) * m_numValues;
1292 const BufferWithMemory inputBuffer(vk, device, allocator,
1293 makeBufferCreateInfo(bufferSizeBytes, inputBufferUsageFlags),
1294 MemoryRequirement::HostVisible);
1295
1296 // Fill the input buffer with data
1297 {
1298 de::Random rnd(randomSeed);
1299 const Allocation &inputBufferAllocation = inputBuffer.getAllocation();
1300 tcu::UVec4 *bufferPtr = static_cast<tcu::UVec4 *>(inputBufferAllocation.getHostPtr());
1301 for (uint32_t i = 0; i < m_numValues; ++i)
1302 bufferPtr[i].x() = rnd.getUint32();
1303
1304 flushAlloc(vk, device, inputBufferAllocation);
1305 }
1306
1307 // Create an output buffer
1308
1309 const BufferWithMemory outputBuffer(vk, device, allocator,
1310 makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT),
1311 MemoryRequirement::HostVisible);
1312
1313 // Create descriptor set
1314
1315 const Unique<VkDescriptorSetLayout> descriptorSetLayout(
1316 DescriptorSetLayoutBuilder()
1317 .addSingleBinding(inputBufferDescriptorType, VK_SHADER_STAGE_COMPUTE_BIT)
1318 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
1319 .build(vk, device));
1320
1321 const Unique<VkDescriptorPool> descriptorPool(
1322 DescriptorPoolBuilder()
1323 .addType(inputBufferDescriptorType)
1324 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
1325 .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
1326
1327 const Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
1328
1329 const VkDescriptorBufferInfo inputBufferDescriptorInfo =
1330 makeDescriptorBufferInfo(*inputBuffer, 0ull, bufferSizeBytes);
1331 const VkDescriptorBufferInfo outputBufferDescriptorInfo =
1332 makeDescriptorBufferInfo(*outputBuffer, 0ull, bufferSizeBytes);
1333 DescriptorSetUpdateBuilder()
1334 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), inputBufferDescriptorType,
1335 &inputBufferDescriptorInfo)
1336 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u),
1337 VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &outputBufferDescriptorInfo)
1338 .update(vk, device);
1339
1340 // Perform the computation
1341
1342 ComputePipelineWrapper pipeline(vk, device, m_computePipelineConstructionType,
1343 m_context.getBinaryCollection().get("comp"));
1344 pipeline.setDescriptorSetLayout(descriptorSetLayout.get());
1345 pipeline.buildPipeline();
1346
1347 const VkBufferMemoryBarrier hostWriteBarrier = makeBufferMemoryBarrier(
1348 VK_ACCESS_HOST_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT, *inputBuffer, 0ull, bufferSizeBytes);
1349
1350 const VkBufferMemoryBarrier shaderWriteBarrier = makeBufferMemoryBarrier(
1351 VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *outputBuffer, 0ull, bufferSizeBytes);
1352
1353 const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
1354 const Unique<VkCommandBuffer> cmdBuffer(
1355 allocateCommandBuffer(vk, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
1356
1357 // Start recording commands
1358
1359 beginCommandBuffer(vk, *cmdBuffer);
1360
1361 pipeline.bind(*cmdBuffer);
1362 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline.getPipelineLayout(), 0u, 1u,
1363 &descriptorSet.get(), 0u, nullptr);
1364
1365 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
1366 (VkDependencyFlags)0, 0, nullptr, 1, &hostWriteBarrier, 0, nullptr);
1367 vk.cmdDispatch(*cmdBuffer, m_workSize.x(), m_workSize.y(), m_workSize.z());
1368 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT,
1369 (VkDependencyFlags)0, 0, nullptr, 1, &shaderWriteBarrier, 0, nullptr);
1370
1371 endCommandBuffer(vk, *cmdBuffer);
1372
1373 // Wait for completion
1374
1375 submitCommandsAndWait(vk, device, queue, *cmdBuffer);
1376
1377 // Validate the results
1378
1379 const Allocation &outputBufferAllocation = outputBuffer.getAllocation();
1380 invalidateAlloc(vk, device, outputBufferAllocation);
1381
1382 const tcu::UVec4 *bufferPtr = static_cast<tcu::UVec4 *>(outputBufferAllocation.getHostPtr());
1383 const tcu::UVec4 *refBufferPtr = static_cast<tcu::UVec4 *>(inputBuffer.getAllocation().getHostPtr());
1384
1385 for (uint32_t ndx = 0; ndx < m_numValues; ++ndx)
1386 {
1387 const uint32_t res = bufferPtr[ndx].x();
1388 const uint32_t ref = ~refBufferPtr[ndx].x();
1389
1390 if (res != ref)
1391 {
1392 std::ostringstream msg;
1393 msg << "Comparison failed for Output.values[" << ndx << "]";
1394 return tcu::TestStatus::fail(msg.str());
1395 }
1396 }
1397 return tcu::TestStatus::pass("Compute succeeded");
1398 }
1399
1400 class InvertSSBOInPlaceTest : public vkt::TestCase
1401 {
1402 public:
1403 InvertSSBOInPlaceTest(tcu::TestContext &testCtx, const std::string &name, const uint32_t numValues,
1404 const bool sized, const tcu::IVec3 &localSize, const tcu::IVec3 &workSize,
1405 const vk::ComputePipelineConstructionType computePipelineConstructionType);
1406
1407 virtual void checkSupport(Context &context) const;
1408 void initPrograms(SourceCollections &sourceCollections) const;
1409 TestInstance *createInstance(Context &context) const;
1410
1411 private:
1412 const uint32_t m_numValues;
1413 const bool m_sized;
1414 const tcu::IVec3 m_localSize;
1415 const tcu::IVec3 m_workSize;
1416 vk::ComputePipelineConstructionType m_computePipelineConstructionType;
1417 };
1418
1419 class InvertSSBOInPlaceTestInstance : public vkt::TestInstance
1420 {
1421 public:
1422 InvertSSBOInPlaceTestInstance(Context &context, const uint32_t numValues, const tcu::IVec3 &localSize,
1423 const tcu::IVec3 &workSize,
1424 const vk::ComputePipelineConstructionType computePipelineConstructionType);
1425
1426 tcu::TestStatus iterate(void);
1427
1428 private:
1429 const uint32_t m_numValues;
1430 const tcu::IVec3 m_localSize;
1431 const tcu::IVec3 m_workSize;
1432 vk::ComputePipelineConstructionType m_computePipelineConstructionType;
1433 };
1434
InvertSSBOInPlaceTest(tcu::TestContext & testCtx,const std::string & name,const uint32_t numValues,const bool sized,const tcu::IVec3 & localSize,const tcu::IVec3 & workSize,const vk::ComputePipelineConstructionType computePipelineConstructionType)1435 InvertSSBOInPlaceTest::InvertSSBOInPlaceTest(tcu::TestContext &testCtx, const std::string &name,
1436 const uint32_t numValues, const bool sized, const tcu::IVec3 &localSize,
1437 const tcu::IVec3 &workSize,
1438 const vk::ComputePipelineConstructionType computePipelineConstructionType)
1439 : TestCase(testCtx, name)
1440 , m_numValues(numValues)
1441 , m_sized(sized)
1442 , m_localSize(localSize)
1443 , m_workSize(workSize)
1444 , m_computePipelineConstructionType(computePipelineConstructionType)
1445 {
1446 DE_ASSERT(m_numValues % (multiplyComponents(m_workSize) * multiplyComponents(m_localSize)) == 0);
1447 }
1448
checkSupport(Context & context) const1449 void InvertSSBOInPlaceTest::checkSupport(Context &context) const
1450 {
1451 checkShaderObjectRequirements(context.getInstanceInterface(), context.getPhysicalDevice(),
1452 m_computePipelineConstructionType);
1453 }
1454
initPrograms(SourceCollections & sourceCollections) const1455 void InvertSSBOInPlaceTest::initPrograms(SourceCollections &sourceCollections) const
1456 {
1457 std::ostringstream src;
1458 src << "#version 310 es\n"
1459 << "layout (local_size_x = " << m_localSize.x() << ", local_size_y = " << m_localSize.y()
1460 << ", local_size_z = " << m_localSize.z() << ") in;\n"
1461 << "layout(binding = 0) buffer InOut {\n"
1462 << " uint values[" << (m_sized ? de::toString(m_numValues) : "") << "];\n"
1463 << "} sb_inout;\n"
1464 << "void main (void) {\n"
1465 << " uvec3 size = gl_NumWorkGroups * gl_WorkGroupSize;\n"
1466 << " uint numValuesPerInv = uint(sb_inout.values.length()) / (size.x*size.y*size.z);\n"
1467 << " uint groupNdx = size.x*size.y*gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + "
1468 "gl_GlobalInvocationID.x;\n"
1469 << " uint offset = numValuesPerInv*groupNdx;\n"
1470 << "\n"
1471 << " for (uint ndx = 0u; ndx < numValuesPerInv; ndx++)\n"
1472 << " sb_inout.values[offset + ndx] = ~sb_inout.values[offset + ndx];\n"
1473 << "}\n";
1474
1475 sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
1476 }
1477
createInstance(Context & context) const1478 TestInstance *InvertSSBOInPlaceTest::createInstance(Context &context) const
1479 {
1480 return new InvertSSBOInPlaceTestInstance(context, m_numValues, m_localSize, m_workSize,
1481 m_computePipelineConstructionType);
1482 }
1483
InvertSSBOInPlaceTestInstance(Context & context,const uint32_t numValues,const tcu::IVec3 & localSize,const tcu::IVec3 & workSize,const vk::ComputePipelineConstructionType computePipelineConstructionType)1484 InvertSSBOInPlaceTestInstance::InvertSSBOInPlaceTestInstance(
1485 Context &context, const uint32_t numValues, const tcu::IVec3 &localSize, const tcu::IVec3 &workSize,
1486 const vk::ComputePipelineConstructionType computePipelineConstructionType)
1487 : TestInstance(context)
1488 , m_numValues(numValues)
1489 , m_localSize(localSize)
1490 , m_workSize(workSize)
1491 , m_computePipelineConstructionType(computePipelineConstructionType)
1492 {
1493 }
1494
iterate(void)1495 tcu::TestStatus InvertSSBOInPlaceTestInstance::iterate(void)
1496 {
1497 const DeviceInterface &vk = m_context.getDeviceInterface();
1498 const VkDevice device = m_context.getDevice();
1499 const VkQueue queue = m_context.getUniversalQueue();
1500 const uint32_t queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
1501 Allocator &allocator = m_context.getDefaultAllocator();
1502
1503 // Create an input/output buffer
1504
1505 const VkDeviceSize bufferSizeBytes = sizeof(uint32_t) * m_numValues;
1506 const BufferWithMemory buffer(vk, device, allocator,
1507 makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT),
1508 MemoryRequirement::HostVisible);
1509
1510 // Fill the buffer with data
1511
1512 typedef std::vector<uint32_t> data_vector_t;
1513 data_vector_t inputData(m_numValues);
1514
1515 {
1516 de::Random rnd(0x82ce7f);
1517 const Allocation &bufferAllocation = buffer.getAllocation();
1518 uint32_t *bufferPtr = static_cast<uint32_t *>(bufferAllocation.getHostPtr());
1519 for (uint32_t i = 0; i < m_numValues; ++i)
1520 inputData[i] = *bufferPtr++ = rnd.getUint32();
1521
1522 flushAlloc(vk, device, bufferAllocation);
1523 }
1524
1525 // Create descriptor set
1526
1527 const Unique<VkDescriptorSetLayout> descriptorSetLayout(
1528 DescriptorSetLayoutBuilder()
1529 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
1530 .build(vk, device));
1531
1532 const Unique<VkDescriptorPool> descriptorPool(
1533 DescriptorPoolBuilder()
1534 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
1535 .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
1536
1537 const Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
1538
1539 const VkDescriptorBufferInfo bufferDescriptorInfo = makeDescriptorBufferInfo(*buffer, 0ull, bufferSizeBytes);
1540 DescriptorSetUpdateBuilder()
1541 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u),
1542 VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &bufferDescriptorInfo)
1543 .update(vk, device);
1544
1545 // Perform the computation
1546
1547 ComputePipelineWrapper pipeline(vk, device, m_computePipelineConstructionType,
1548 m_context.getBinaryCollection().get("comp"));
1549 pipeline.setDescriptorSetLayout(descriptorSetLayout.get());
1550 pipeline.buildPipeline();
1551
1552 const VkBufferMemoryBarrier hostWriteBarrier =
1553 makeBufferMemoryBarrier(VK_ACCESS_HOST_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT, *buffer, 0ull, bufferSizeBytes);
1554
1555 const VkBufferMemoryBarrier shaderWriteBarrier =
1556 makeBufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *buffer, 0ull, bufferSizeBytes);
1557
1558 const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
1559 const Unique<VkCommandBuffer> cmdBuffer(
1560 allocateCommandBuffer(vk, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
1561
1562 // Start recording commands
1563
1564 beginCommandBuffer(vk, *cmdBuffer);
1565
1566 pipeline.bind(*cmdBuffer);
1567 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline.getPipelineLayout(), 0u, 1u,
1568 &descriptorSet.get(), 0u, nullptr);
1569
1570 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
1571 (VkDependencyFlags)0, 0, nullptr, 1, &hostWriteBarrier, 0, nullptr);
1572 vk.cmdDispatch(*cmdBuffer, m_workSize.x(), m_workSize.y(), m_workSize.z());
1573 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT,
1574 (VkDependencyFlags)0, 0, nullptr, 1, &shaderWriteBarrier, 0, nullptr);
1575
1576 endCommandBuffer(vk, *cmdBuffer);
1577
1578 // Wait for completion
1579
1580 submitCommandsAndWait(vk, device, queue, *cmdBuffer);
1581
1582 // Validate the results
1583
1584 const Allocation &bufferAllocation = buffer.getAllocation();
1585 invalidateAlloc(vk, device, bufferAllocation);
1586
1587 const uint32_t *bufferPtr = static_cast<uint32_t *>(bufferAllocation.getHostPtr());
1588
1589 for (uint32_t ndx = 0; ndx < m_numValues; ++ndx)
1590 {
1591 const uint32_t res = bufferPtr[ndx];
1592 const uint32_t ref = ~inputData[ndx];
1593
1594 if (res != ref)
1595 {
1596 std::ostringstream msg;
1597 msg << "Comparison failed for InOut.values[" << ndx << "]";
1598 return tcu::TestStatus::fail(msg.str());
1599 }
1600 }
1601 return tcu::TestStatus::pass("Compute succeeded");
1602 }
1603
1604 class WriteToMultipleSSBOTest : public vkt::TestCase
1605 {
1606 public:
1607 WriteToMultipleSSBOTest(tcu::TestContext &testCtx, const std::string &name, const uint32_t numValues,
1608 const bool sized, const tcu::IVec3 &localSize, const tcu::IVec3 &workSize,
1609 const vk::ComputePipelineConstructionType computePipelineConstructionType);
1610
1611 virtual void checkSupport(Context &context) const;
1612 void initPrograms(SourceCollections &sourceCollections) const;
1613 TestInstance *createInstance(Context &context) const;
1614
1615 private:
1616 const uint32_t m_numValues;
1617 const bool m_sized;
1618 const tcu::IVec3 m_localSize;
1619 const tcu::IVec3 m_workSize;
1620 vk::ComputePipelineConstructionType m_computePipelineConstructionType;
1621 };
1622
1623 class WriteToMultipleSSBOTestInstance : public vkt::TestInstance
1624 {
1625 public:
1626 WriteToMultipleSSBOTestInstance(Context &context, const uint32_t numValues, const tcu::IVec3 &localSize,
1627 const tcu::IVec3 &workSize,
1628 const vk::ComputePipelineConstructionType computePipelineConstructionType);
1629
1630 tcu::TestStatus iterate(void);
1631
1632 private:
1633 const uint32_t m_numValues;
1634 const tcu::IVec3 m_localSize;
1635 const tcu::IVec3 m_workSize;
1636 vk::ComputePipelineConstructionType m_computePipelineConstructionType;
1637 };
1638
WriteToMultipleSSBOTest(tcu::TestContext & testCtx,const std::string & name,const uint32_t numValues,const bool sized,const tcu::IVec3 & localSize,const tcu::IVec3 & workSize,const vk::ComputePipelineConstructionType computePipelineConstructionType)1639 WriteToMultipleSSBOTest::WriteToMultipleSSBOTest(
1640 tcu::TestContext &testCtx, const std::string &name, const uint32_t numValues, const bool sized,
1641 const tcu::IVec3 &localSize, const tcu::IVec3 &workSize,
1642 const vk::ComputePipelineConstructionType computePipelineConstructionType)
1643 : TestCase(testCtx, name)
1644 , m_numValues(numValues)
1645 , m_sized(sized)
1646 , m_localSize(localSize)
1647 , m_workSize(workSize)
1648 , m_computePipelineConstructionType(computePipelineConstructionType)
1649 {
1650 DE_ASSERT(m_numValues % (multiplyComponents(m_workSize) * multiplyComponents(m_localSize)) == 0);
1651 }
1652
checkSupport(Context & context) const1653 void WriteToMultipleSSBOTest::checkSupport(Context &context) const
1654 {
1655 checkShaderObjectRequirements(context.getInstanceInterface(), context.getPhysicalDevice(),
1656 m_computePipelineConstructionType);
1657 }
1658
initPrograms(SourceCollections & sourceCollections) const1659 void WriteToMultipleSSBOTest::initPrograms(SourceCollections &sourceCollections) const
1660 {
1661 std::ostringstream src;
1662 src << "#version 310 es\n"
1663 << "layout (local_size_x = " << m_localSize.x() << ", local_size_y = " << m_localSize.y()
1664 << ", local_size_z = " << m_localSize.z() << ") in;\n"
1665 << "layout(binding = 0) writeonly buffer Out0 {\n"
1666 << " uint values[" << (m_sized ? de::toString(m_numValues) : "") << "];\n"
1667 << "} sb_out0;\n"
1668 << "layout(binding = 1) writeonly buffer Out1 {\n"
1669 << " uint values[" << (m_sized ? de::toString(m_numValues) : "") << "];\n"
1670 << "} sb_out1;\n"
1671 << "void main (void) {\n"
1672 << " uvec3 size = gl_NumWorkGroups * gl_WorkGroupSize;\n"
1673 << " uint groupNdx = size.x*size.y*gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + "
1674 "gl_GlobalInvocationID.x;\n"
1675 << "\n"
1676 << " {\n"
1677 << " uint numValuesPerInv = uint(sb_out0.values.length()) / (size.x*size.y*size.z);\n"
1678 << " uint offset = numValuesPerInv*groupNdx;\n"
1679 << "\n"
1680 << " for (uint ndx = 0u; ndx < numValuesPerInv; ndx++)\n"
1681 << " sb_out0.values[offset + ndx] = offset + ndx;\n"
1682 << " }\n"
1683 << " {\n"
1684 << " uint numValuesPerInv = uint(sb_out1.values.length()) / (size.x*size.y*size.z);\n"
1685 << " uint offset = numValuesPerInv*groupNdx;\n"
1686 << "\n"
1687 << " for (uint ndx = 0u; ndx < numValuesPerInv; ndx++)\n"
1688 << " sb_out1.values[offset + ndx] = uint(sb_out1.values.length()) - offset - ndx;\n"
1689 << " }\n"
1690 << "}\n";
1691
1692 sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
1693 }
1694
createInstance(Context & context) const1695 TestInstance *WriteToMultipleSSBOTest::createInstance(Context &context) const
1696 {
1697 return new WriteToMultipleSSBOTestInstance(context, m_numValues, m_localSize, m_workSize,
1698 m_computePipelineConstructionType);
1699 }
1700
WriteToMultipleSSBOTestInstance(Context & context,const uint32_t numValues,const tcu::IVec3 & localSize,const tcu::IVec3 & workSize,const vk::ComputePipelineConstructionType computePipelineConstructionType)1701 WriteToMultipleSSBOTestInstance::WriteToMultipleSSBOTestInstance(
1702 Context &context, const uint32_t numValues, const tcu::IVec3 &localSize, const tcu::IVec3 &workSize,
1703 const vk::ComputePipelineConstructionType computePipelineConstructionType)
1704 : TestInstance(context)
1705 , m_numValues(numValues)
1706 , m_localSize(localSize)
1707 , m_workSize(workSize)
1708 , m_computePipelineConstructionType(computePipelineConstructionType)
1709 {
1710 }
1711
iterate(void)1712 tcu::TestStatus WriteToMultipleSSBOTestInstance::iterate(void)
1713 {
1714 const DeviceInterface &vk = m_context.getDeviceInterface();
1715 const VkDevice device = m_context.getDevice();
1716 const VkQueue queue = m_context.getUniversalQueue();
1717 const uint32_t queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
1718 Allocator &allocator = m_context.getDefaultAllocator();
1719
1720 // Create two output buffers
1721
1722 const VkDeviceSize bufferSizeBytes = sizeof(uint32_t) * m_numValues;
1723 const BufferWithMemory buffer0(vk, device, allocator,
1724 makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT),
1725 MemoryRequirement::HostVisible);
1726 const BufferWithMemory buffer1(vk, device, allocator,
1727 makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT),
1728 MemoryRequirement::HostVisible);
1729
1730 // Create descriptor set
1731
1732 const Unique<VkDescriptorSetLayout> descriptorSetLayout(
1733 DescriptorSetLayoutBuilder()
1734 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
1735 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
1736 .build(vk, device));
1737
1738 const Unique<VkDescriptorPool> descriptorPool(
1739 DescriptorPoolBuilder()
1740 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 2u)
1741 .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
1742
1743 const Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
1744
1745 const VkDescriptorBufferInfo buffer0DescriptorInfo = makeDescriptorBufferInfo(*buffer0, 0ull, bufferSizeBytes);
1746 const VkDescriptorBufferInfo buffer1DescriptorInfo = makeDescriptorBufferInfo(*buffer1, 0ull, bufferSizeBytes);
1747 DescriptorSetUpdateBuilder()
1748 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u),
1749 VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &buffer0DescriptorInfo)
1750 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u),
1751 VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &buffer1DescriptorInfo)
1752 .update(vk, device);
1753
1754 // Perform the computation
1755
1756 ComputePipelineWrapper pipeline(vk, device, m_computePipelineConstructionType,
1757 m_context.getBinaryCollection().get("comp"));
1758 pipeline.setDescriptorSetLayout(descriptorSetLayout.get());
1759 pipeline.buildPipeline();
1760
1761 const VkBufferMemoryBarrier shaderWriteBarriers[] = {
1762 makeBufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *buffer0, 0ull, bufferSizeBytes),
1763 makeBufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *buffer1, 0ull, bufferSizeBytes)};
1764
1765 const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
1766 const Unique<VkCommandBuffer> cmdBuffer(
1767 allocateCommandBuffer(vk, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
1768
1769 // Start recording commands
1770
1771 beginCommandBuffer(vk, *cmdBuffer);
1772
1773 pipeline.bind(*cmdBuffer);
1774 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline.getPipelineLayout(), 0u, 1u,
1775 &descriptorSet.get(), 0u, nullptr);
1776
1777 vk.cmdDispatch(*cmdBuffer, m_workSize.x(), m_workSize.y(), m_workSize.z());
1778 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT,
1779 (VkDependencyFlags)0, 0, nullptr, DE_LENGTH_OF_ARRAY(shaderWriteBarriers),
1780 shaderWriteBarriers, 0, nullptr);
1781
1782 endCommandBuffer(vk, *cmdBuffer);
1783
1784 // Wait for completion
1785
1786 submitCommandsAndWait(vk, device, queue, *cmdBuffer);
1787
1788 // Validate the results
1789 {
1790 const Allocation &buffer0Allocation = buffer0.getAllocation();
1791 invalidateAlloc(vk, device, buffer0Allocation);
1792 const uint32_t *buffer0Ptr = static_cast<uint32_t *>(buffer0Allocation.getHostPtr());
1793
1794 for (uint32_t ndx = 0; ndx < m_numValues; ++ndx)
1795 {
1796 const uint32_t res = buffer0Ptr[ndx];
1797 const uint32_t ref = ndx;
1798
1799 if (res != ref)
1800 {
1801 std::ostringstream msg;
1802 msg << "Comparison failed for Out0.values[" << ndx << "] res=" << res << " ref=" << ref;
1803 return tcu::TestStatus::fail(msg.str());
1804 }
1805 }
1806 }
1807 {
1808 const Allocation &buffer1Allocation = buffer1.getAllocation();
1809 invalidateAlloc(vk, device, buffer1Allocation);
1810 const uint32_t *buffer1Ptr = static_cast<uint32_t *>(buffer1Allocation.getHostPtr());
1811
1812 for (uint32_t ndx = 0; ndx < m_numValues; ++ndx)
1813 {
1814 const uint32_t res = buffer1Ptr[ndx];
1815 const uint32_t ref = m_numValues - ndx;
1816
1817 if (res != ref)
1818 {
1819 std::ostringstream msg;
1820 msg << "Comparison failed for Out1.values[" << ndx << "] res=" << res << " ref=" << ref;
1821 return tcu::TestStatus::fail(msg.str());
1822 }
1823 }
1824 }
1825 return tcu::TestStatus::pass("Compute succeeded");
1826 }
1827
1828 class SSBOBarrierTest : public vkt::TestCase
1829 {
1830 public:
1831 SSBOBarrierTest(tcu::TestContext &testCtx, const std::string &name, const tcu::IVec3 &workSize,
1832 const vk::ComputePipelineConstructionType computePipelineConstructionType);
1833
1834 virtual void checkSupport(Context &context) const;
1835 void initPrograms(SourceCollections &sourceCollections) const;
1836 TestInstance *createInstance(Context &context) const;
1837
1838 private:
1839 const tcu::IVec3 m_workSize;
1840 vk::ComputePipelineConstructionType m_computePipelineConstructionType;
1841 };
1842
1843 class SSBOBarrierTestInstance : public vkt::TestInstance
1844 {
1845 public:
1846 SSBOBarrierTestInstance(Context &context, const tcu::IVec3 &workSize,
1847 const vk::ComputePipelineConstructionType computePipelineConstructionType);
1848
1849 tcu::TestStatus iterate(void);
1850
1851 private:
1852 const tcu::IVec3 m_workSize;
1853 vk::ComputePipelineConstructionType m_computePipelineConstructionType;
1854 };
1855
SSBOBarrierTest(tcu::TestContext & testCtx,const std::string & name,const tcu::IVec3 & workSize,const vk::ComputePipelineConstructionType computePipelineConstructionType)1856 SSBOBarrierTest::SSBOBarrierTest(tcu::TestContext &testCtx, const std::string &name, const tcu::IVec3 &workSize,
1857 const vk::ComputePipelineConstructionType computePipelineConstructionType)
1858 : TestCase(testCtx, name)
1859 , m_workSize(workSize)
1860 , m_computePipelineConstructionType(computePipelineConstructionType)
1861 {
1862 }
1863
checkSupport(Context & context) const1864 void SSBOBarrierTest::checkSupport(Context &context) const
1865 {
1866 checkShaderObjectRequirements(context.getInstanceInterface(), context.getPhysicalDevice(),
1867 m_computePipelineConstructionType);
1868 }
1869
initPrograms(SourceCollections & sourceCollections) const1870 void SSBOBarrierTest::initPrograms(SourceCollections &sourceCollections) const
1871 {
1872 sourceCollections.glslSources.add("comp0")
1873 << glu::ComputeSource("#version 310 es\n"
1874 "layout (local_size_x = 1) in;\n"
1875 "layout(binding = 2) readonly uniform Constants {\n"
1876 " uint u_baseVal;\n"
1877 "};\n"
1878 "layout(binding = 1) writeonly buffer Output {\n"
1879 " uint values[];\n"
1880 "};\n"
1881 "void main (void) {\n"
1882 " uint offset = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z + "
1883 "gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n"
1884 " values[offset] = u_baseVal + offset;\n"
1885 "}\n");
1886
1887 sourceCollections.glslSources.add("comp1")
1888 << glu::ComputeSource("#version 310 es\n"
1889 "layout (local_size_x = 1) in;\n"
1890 "layout(binding = 1) readonly buffer Input {\n"
1891 " uint values[];\n"
1892 "};\n"
1893 "layout(binding = 0) coherent buffer Output {\n"
1894 " uint sum;\n"
1895 "};\n"
1896 "void main (void) {\n"
1897 " uint offset = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z + "
1898 "gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n"
1899 " uint value = values[offset];\n"
1900 " atomicAdd(sum, value);\n"
1901 "}\n");
1902 }
1903
createInstance(Context & context) const1904 TestInstance *SSBOBarrierTest::createInstance(Context &context) const
1905 {
1906 return new SSBOBarrierTestInstance(context, m_workSize, m_computePipelineConstructionType);
1907 }
1908
SSBOBarrierTestInstance(Context & context,const tcu::IVec3 & workSize,const vk::ComputePipelineConstructionType computePipelineConstructionType)1909 SSBOBarrierTestInstance::SSBOBarrierTestInstance(
1910 Context &context, const tcu::IVec3 &workSize,
1911 const vk::ComputePipelineConstructionType computePipelineConstructionType)
1912 : TestInstance(context)
1913 , m_workSize(workSize)
1914 , m_computePipelineConstructionType(computePipelineConstructionType)
1915 {
1916 }
1917
iterate(void)1918 tcu::TestStatus SSBOBarrierTestInstance::iterate(void)
1919 {
1920 const DeviceInterface &vk = m_context.getDeviceInterface();
1921 const VkDevice device = m_context.getDevice();
1922 const VkQueue queue = m_context.getUniversalQueue();
1923 const uint32_t queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
1924 Allocator &allocator = m_context.getDefaultAllocator();
1925
1926 // Create a work buffer used by both shaders
1927
1928 const int workGroupCount = multiplyComponents(m_workSize);
1929 const VkDeviceSize workBufferSizeBytes = sizeof(uint32_t) * workGroupCount;
1930 const BufferWithMemory workBuffer(vk, device, allocator,
1931 makeBufferCreateInfo(workBufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT),
1932 MemoryRequirement::Any);
1933
1934 // Create an output buffer
1935
1936 const VkDeviceSize outputBufferSizeBytes = sizeof(uint32_t);
1937 const BufferWithMemory outputBuffer(vk, device, allocator,
1938 makeBufferCreateInfo(outputBufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT),
1939 MemoryRequirement::HostVisible);
1940
1941 // Initialize atomic counter value to zero
1942 {
1943 const Allocation &outputBufferAllocation = outputBuffer.getAllocation();
1944 uint32_t *outputBufferPtr = static_cast<uint32_t *>(outputBufferAllocation.getHostPtr());
1945 *outputBufferPtr = 0;
1946 flushAlloc(vk, device, outputBufferAllocation);
1947 }
1948
1949 // Create a uniform buffer (to pass uniform constants)
1950
1951 const VkDeviceSize uniformBufferSizeBytes = sizeof(uint32_t);
1952 const BufferWithMemory uniformBuffer(
1953 vk, device, allocator, makeBufferCreateInfo(uniformBufferSizeBytes, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT),
1954 MemoryRequirement::HostVisible);
1955
1956 // Set the constants in the uniform buffer
1957
1958 const uint32_t baseValue = 127;
1959 {
1960 const Allocation &uniformBufferAllocation = uniformBuffer.getAllocation();
1961 uint32_t *uniformBufferPtr = static_cast<uint32_t *>(uniformBufferAllocation.getHostPtr());
1962 uniformBufferPtr[0] = baseValue;
1963
1964 flushAlloc(vk, device, uniformBufferAllocation);
1965 }
1966
1967 // Create descriptor set
1968
1969 const Unique<VkDescriptorSetLayout> descriptorSetLayout(
1970 DescriptorSetLayoutBuilder()
1971 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
1972 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
1973 .addSingleBinding(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
1974 .build(vk, device));
1975
1976 const Unique<VkDescriptorPool> descriptorPool(
1977 DescriptorPoolBuilder()
1978 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 2u)
1979 .addType(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER)
1980 .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
1981
1982 const Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
1983
1984 const VkDescriptorBufferInfo workBufferDescriptorInfo =
1985 makeDescriptorBufferInfo(*workBuffer, 0ull, workBufferSizeBytes);
1986 const VkDescriptorBufferInfo outputBufferDescriptorInfo =
1987 makeDescriptorBufferInfo(*outputBuffer, 0ull, outputBufferSizeBytes);
1988 const VkDescriptorBufferInfo uniformBufferDescriptorInfo =
1989 makeDescriptorBufferInfo(*uniformBuffer, 0ull, uniformBufferSizeBytes);
1990 DescriptorSetUpdateBuilder()
1991 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u),
1992 VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &outputBufferDescriptorInfo)
1993 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u),
1994 VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &workBufferDescriptorInfo)
1995 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(2u),
1996 VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, &uniformBufferDescriptorInfo)
1997 .update(vk, device);
1998
1999 // Perform the computation
2000
2001 ComputePipelineWrapper pipeline0(vk, device, m_computePipelineConstructionType,
2002 m_context.getBinaryCollection().get("comp0"));
2003 pipeline0.setDescriptorSetLayout(descriptorSetLayout.get());
2004 pipeline0.buildPipeline();
2005
2006 ComputePipelineWrapper pipeline1(vk, device, m_computePipelineConstructionType,
2007 m_context.getBinaryCollection().get("comp1"));
2008 pipeline1.setDescriptorSetLayout(descriptorSetLayout.get());
2009 pipeline1.buildPipeline();
2010
2011 const VkBufferMemoryBarrier writeUniformConstantsBarrier = makeBufferMemoryBarrier(
2012 VK_ACCESS_HOST_WRITE_BIT, VK_ACCESS_UNIFORM_READ_BIT, *uniformBuffer, 0ull, uniformBufferSizeBytes);
2013
2014 const VkBufferMemoryBarrier betweenShadersBarrier = makeBufferMemoryBarrier(
2015 VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT, *workBuffer, 0ull, workBufferSizeBytes);
2016
2017 const VkBufferMemoryBarrier afterComputeBarrier = makeBufferMemoryBarrier(
2018 VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *outputBuffer, 0ull, outputBufferSizeBytes);
2019
2020 const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
2021 const Unique<VkCommandBuffer> cmdBuffer(
2022 allocateCommandBuffer(vk, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
2023
2024 // Start recording commands
2025
2026 beginCommandBuffer(vk, *cmdBuffer);
2027
2028 pipeline0.bind(*cmdBuffer);
2029 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline0.getPipelineLayout(), 0u, 1u,
2030 &descriptorSet.get(), 0u, nullptr);
2031
2032 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
2033 (VkDependencyFlags)0, 0, nullptr, 1, &writeUniformConstantsBarrier, 0, nullptr);
2034
2035 vk.cmdDispatch(*cmdBuffer, m_workSize.x(), m_workSize.y(), m_workSize.z());
2036 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
2037 (VkDependencyFlags)0, 0, nullptr, 1, &betweenShadersBarrier, 0, nullptr);
2038
2039 // Switch to the second shader program
2040 pipeline1.bind(*cmdBuffer);
2041
2042 vk.cmdDispatch(*cmdBuffer, m_workSize.x(), m_workSize.y(), m_workSize.z());
2043 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT,
2044 (VkDependencyFlags)0, 0, nullptr, 1, &afterComputeBarrier, 0, nullptr);
2045
2046 endCommandBuffer(vk, *cmdBuffer);
2047
2048 // Wait for completion
2049
2050 submitCommandsAndWait(vk, device, queue, *cmdBuffer);
2051
2052 // Validate the results
2053
2054 const Allocation &outputBufferAllocation = outputBuffer.getAllocation();
2055 invalidateAlloc(vk, device, outputBufferAllocation);
2056
2057 const uint32_t *bufferPtr = static_cast<uint32_t *>(outputBufferAllocation.getHostPtr());
2058 const uint32_t res = *bufferPtr;
2059 uint32_t ref = 0;
2060
2061 for (int ndx = 0; ndx < workGroupCount; ++ndx)
2062 ref += baseValue + ndx;
2063
2064 if (res != ref)
2065 {
2066 std::ostringstream msg;
2067 msg << "ERROR: comparison failed, expected " << ref << ", got " << res;
2068 return tcu::TestStatus::fail(msg.str());
2069 }
2070 return tcu::TestStatus::pass("Compute succeeded");
2071 }
2072
2073 class ImageAtomicOpTest : public vkt::TestCase
2074 {
2075 public:
2076 ImageAtomicOpTest(tcu::TestContext &testCtx, const std::string &name, const uint32_t localSize,
2077 const tcu::IVec2 &imageSize,
2078 const vk::ComputePipelineConstructionType computePipelineConstructionType);
2079
2080 virtual void checkSupport(Context &context) const;
2081 void initPrograms(SourceCollections &sourceCollections) const;
2082 TestInstance *createInstance(Context &context) const;
2083
2084 private:
2085 const uint32_t m_localSize;
2086 const tcu::IVec2 m_imageSize;
2087 vk::ComputePipelineConstructionType m_computePipelineConstructionType;
2088 };
2089
2090 class ImageAtomicOpTestInstance : public vkt::TestInstance
2091 {
2092 public:
2093 ImageAtomicOpTestInstance(Context &context, const uint32_t localSize, const tcu::IVec2 &imageSize,
2094 const vk::ComputePipelineConstructionType computePipelineConstructionType);
2095
2096 tcu::TestStatus iterate(void);
2097
2098 private:
2099 const uint32_t m_localSize;
2100 const tcu::IVec2 m_imageSize;
2101 vk::ComputePipelineConstructionType m_computePipelineConstructionType;
2102 };
2103
ImageAtomicOpTest(tcu::TestContext & testCtx,const std::string & name,const uint32_t localSize,const tcu::IVec2 & imageSize,const vk::ComputePipelineConstructionType computePipelineConstructionType)2104 ImageAtomicOpTest::ImageAtomicOpTest(tcu::TestContext &testCtx, const std::string &name, const uint32_t localSize,
2105 const tcu::IVec2 &imageSize,
2106 const vk::ComputePipelineConstructionType computePipelineConstructionType)
2107 : TestCase(testCtx, name)
2108 , m_localSize(localSize)
2109 , m_imageSize(imageSize)
2110 , m_computePipelineConstructionType(computePipelineConstructionType)
2111 {
2112 }
2113
checkSupport(Context & context) const2114 void ImageAtomicOpTest::checkSupport(Context &context) const
2115 {
2116 checkShaderObjectRequirements(context.getInstanceInterface(), context.getPhysicalDevice(),
2117 m_computePipelineConstructionType);
2118 }
2119
initPrograms(SourceCollections & sourceCollections) const2120 void ImageAtomicOpTest::initPrograms(SourceCollections &sourceCollections) const
2121 {
2122 std::ostringstream src;
2123 src << "#version 310 es\n"
2124 << "#extension GL_OES_shader_image_atomic : require\n"
2125 << "layout (local_size_x = " << m_localSize << ") in;\n"
2126 << "layout(binding = 1, r32ui) coherent uniform highp uimage2D u_dstImg;\n"
2127 << "layout(binding = 0) readonly buffer Input {\n"
2128 << " uint values[" << (multiplyComponents(m_imageSize) * m_localSize) << "];\n"
2129 << "} sb_in;\n\n"
2130 << "void main (void) {\n"
2131 << " uint stride = gl_NumWorkGroups.x*gl_WorkGroupSize.x;\n"
2132 << " uint value = sb_in.values[gl_GlobalInvocationID.y*stride + gl_GlobalInvocationID.x];\n"
2133 << "\n"
2134 << " if (gl_LocalInvocationIndex == 0u)\n"
2135 << " imageStore(u_dstImg, ivec2(gl_WorkGroupID.xy), uvec4(0));\n"
2136 << " memoryBarrierImage();\n"
2137 << " barrier();\n"
2138 << " imageAtomicAdd(u_dstImg, ivec2(gl_WorkGroupID.xy), value);\n"
2139 << "}\n";
2140
2141 sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
2142 }
2143
createInstance(Context & context) const2144 TestInstance *ImageAtomicOpTest::createInstance(Context &context) const
2145 {
2146 return new ImageAtomicOpTestInstance(context, m_localSize, m_imageSize, m_computePipelineConstructionType);
2147 }
2148
ImageAtomicOpTestInstance(Context & context,const uint32_t localSize,const tcu::IVec2 & imageSize,const vk::ComputePipelineConstructionType computePipelineConstructionType)2149 ImageAtomicOpTestInstance::ImageAtomicOpTestInstance(
2150 Context &context, const uint32_t localSize, const tcu::IVec2 &imageSize,
2151 const vk::ComputePipelineConstructionType computePipelineConstructionType)
2152 : TestInstance(context)
2153 , m_localSize(localSize)
2154 , m_imageSize(imageSize)
2155 , m_computePipelineConstructionType(computePipelineConstructionType)
2156 {
2157 }
2158
iterate(void)2159 tcu::TestStatus ImageAtomicOpTestInstance::iterate(void)
2160 {
2161 const DeviceInterface &vk = m_context.getDeviceInterface();
2162 const VkDevice device = m_context.getDevice();
2163 const VkQueue queue = m_context.getUniversalQueue();
2164 const uint32_t queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
2165 Allocator &allocator = m_context.getDefaultAllocator();
2166
2167 // Create an image
2168
2169 const VkImageCreateInfo imageParams =
2170 make2DImageCreateInfo(m_imageSize, VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_STORAGE_BIT);
2171 const ImageWithMemory image(vk, device, allocator, imageParams, MemoryRequirement::Any);
2172
2173 const VkImageSubresourceRange subresourceRange =
2174 makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u);
2175 const Unique<VkImageView> imageView(
2176 makeImageView(vk, device, *image, VK_IMAGE_VIEW_TYPE_2D, VK_FORMAT_R32_UINT, subresourceRange));
2177
2178 // Input buffer
2179
2180 const uint32_t numInputValues = multiplyComponents(m_imageSize) * m_localSize;
2181 const VkDeviceSize inputBufferSizeBytes = sizeof(uint32_t) * numInputValues;
2182
2183 const BufferWithMemory inputBuffer(vk, device, allocator,
2184 makeBufferCreateInfo(inputBufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT),
2185 MemoryRequirement::HostVisible);
2186
2187 // Populate the input buffer with test data
2188 {
2189 de::Random rnd(0x77238ac2);
2190 const Allocation &inputBufferAllocation = inputBuffer.getAllocation();
2191 uint32_t *bufferPtr = static_cast<uint32_t *>(inputBufferAllocation.getHostPtr());
2192 for (uint32_t i = 0; i < numInputValues; ++i)
2193 *bufferPtr++ = rnd.getUint32();
2194
2195 flushAlloc(vk, device, inputBufferAllocation);
2196 }
2197
2198 // Create a buffer to store shader output (copied from image data)
2199
2200 const uint32_t imageArea = multiplyComponents(m_imageSize);
2201 const VkDeviceSize outputBufferSizeBytes = sizeof(uint32_t) * imageArea;
2202 const BufferWithMemory outputBuffer(vk, device, allocator,
2203 makeBufferCreateInfo(outputBufferSizeBytes, VK_BUFFER_USAGE_TRANSFER_DST_BIT),
2204 MemoryRequirement::HostVisible);
2205
2206 // Create descriptor set
2207
2208 const Unique<VkDescriptorSetLayout> descriptorSetLayout(
2209 DescriptorSetLayoutBuilder()
2210 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
2211 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, VK_SHADER_STAGE_COMPUTE_BIT)
2212 .build(vk, device));
2213
2214 const Unique<VkDescriptorPool> descriptorPool(
2215 DescriptorPoolBuilder()
2216 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
2217 .addType(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE)
2218 .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
2219
2220 const Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
2221
2222 // Set the bindings
2223
2224 const VkDescriptorImageInfo imageDescriptorInfo =
2225 makeDescriptorImageInfo(VK_NULL_HANDLE, *imageView, VK_IMAGE_LAYOUT_GENERAL);
2226 const VkDescriptorBufferInfo bufferDescriptorInfo =
2227 makeDescriptorBufferInfo(*inputBuffer, 0ull, inputBufferSizeBytes);
2228
2229 DescriptorSetUpdateBuilder()
2230 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u),
2231 VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &bufferDescriptorInfo)
2232 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u),
2233 VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &imageDescriptorInfo)
2234 .update(vk, device);
2235
2236 // Perform the computation
2237 {
2238 ComputePipelineWrapper pipeline(vk, device, m_computePipelineConstructionType,
2239 m_context.getBinaryCollection().get("comp"));
2240 pipeline.setDescriptorSetLayout(descriptorSetLayout.get());
2241 pipeline.buildPipeline();
2242
2243 const VkBufferMemoryBarrier inputBufferPostHostWriteBarrier = makeBufferMemoryBarrier(
2244 VK_ACCESS_HOST_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT, *inputBuffer, 0ull, inputBufferSizeBytes);
2245
2246 const VkImageMemoryBarrier imageLayoutBarrier =
2247 makeImageMemoryBarrier((VkAccessFlags)0, VK_ACCESS_SHADER_WRITE_BIT, VK_IMAGE_LAYOUT_UNDEFINED,
2248 VK_IMAGE_LAYOUT_GENERAL, *image, subresourceRange);
2249
2250 // Prepare the command buffer
2251
2252 const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
2253 const Unique<VkCommandBuffer> cmdBuffer(
2254 allocateCommandBuffer(vk, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
2255
2256 // Start recording commands
2257
2258 beginCommandBuffer(vk, *cmdBuffer);
2259
2260 pipeline.bind(*cmdBuffer);
2261 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline.getPipelineLayout(), 0u, 1u,
2262 &descriptorSet.get(), 0u, nullptr);
2263
2264 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
2265 (VkDependencyFlags)0, 0, nullptr, 1, &inputBufferPostHostWriteBarrier, 1,
2266 &imageLayoutBarrier);
2267 vk.cmdDispatch(*cmdBuffer, m_imageSize.x(), m_imageSize.y(), 1u);
2268
2269 copyImageToBuffer(vk, *cmdBuffer, *image, *outputBuffer, m_imageSize, VK_ACCESS_SHADER_WRITE_BIT,
2270 VK_IMAGE_LAYOUT_GENERAL);
2271
2272 endCommandBuffer(vk, *cmdBuffer);
2273
2274 // Wait for completion
2275
2276 submitCommandsAndWait(vk, device, queue, *cmdBuffer);
2277 }
2278
2279 // Validate the results
2280
2281 const Allocation &outputBufferAllocation = outputBuffer.getAllocation();
2282 invalidateAlloc(vk, device, outputBufferAllocation);
2283
2284 const uint32_t *bufferPtr = static_cast<uint32_t *>(outputBufferAllocation.getHostPtr());
2285 const uint32_t *refBufferPtr = static_cast<uint32_t *>(inputBuffer.getAllocation().getHostPtr());
2286
2287 for (uint32_t pixelNdx = 0; pixelNdx < imageArea; ++pixelNdx)
2288 {
2289 const uint32_t res = bufferPtr[pixelNdx];
2290 uint32_t ref = 0;
2291
2292 for (uint32_t offs = 0; offs < m_localSize; ++offs)
2293 ref += refBufferPtr[pixelNdx * m_localSize + offs];
2294
2295 if (res != ref)
2296 {
2297 std::ostringstream msg;
2298 msg << "Comparison failed for pixel " << pixelNdx;
2299 return tcu::TestStatus::fail(msg.str());
2300 }
2301 }
2302 return tcu::TestStatus::pass("Compute succeeded");
2303 }
2304
2305 class ImageBarrierTest : public vkt::TestCase
2306 {
2307 public:
2308 ImageBarrierTest(tcu::TestContext &testCtx, const std::string &name, const tcu::IVec2 &imageSize,
2309 const vk::ComputePipelineConstructionType computePipelineConstructionType);
2310
2311 virtual void checkSupport(Context &context) const;
2312 void initPrograms(SourceCollections &sourceCollections) const;
2313 TestInstance *createInstance(Context &context) const;
2314
2315 private:
2316 const tcu::IVec2 m_imageSize;
2317 vk::ComputePipelineConstructionType m_computePipelineConstructionType;
2318 };
2319
2320 class ImageBarrierTestInstance : public vkt::TestInstance
2321 {
2322 public:
2323 ImageBarrierTestInstance(Context &context, const tcu::IVec2 &imageSize,
2324 const vk::ComputePipelineConstructionType computePipelineConstructionType);
2325
2326 tcu::TestStatus iterate(void);
2327
2328 private:
2329 const tcu::IVec2 m_imageSize;
2330 vk::ComputePipelineConstructionType m_computePipelineConstructionType;
2331 };
2332
ImageBarrierTest(tcu::TestContext & testCtx,const std::string & name,const tcu::IVec2 & imageSize,const vk::ComputePipelineConstructionType computePipelineConstructionType)2333 ImageBarrierTest::ImageBarrierTest(tcu::TestContext &testCtx, const std::string &name, const tcu::IVec2 &imageSize,
2334 const vk::ComputePipelineConstructionType computePipelineConstructionType)
2335 : TestCase(testCtx, name)
2336 , m_imageSize(imageSize)
2337 , m_computePipelineConstructionType(computePipelineConstructionType)
2338 {
2339 }
2340
checkSupport(Context & context) const2341 void ImageBarrierTest::checkSupport(Context &context) const
2342 {
2343 checkShaderObjectRequirements(context.getInstanceInterface(), context.getPhysicalDevice(),
2344 m_computePipelineConstructionType);
2345 }
2346
initPrograms(SourceCollections & sourceCollections) const2347 void ImageBarrierTest::initPrograms(SourceCollections &sourceCollections) const
2348 {
2349 sourceCollections.glslSources.add("comp0")
2350 << glu::ComputeSource("#version 310 es\n"
2351 "layout (local_size_x = 1) in;\n"
2352 "layout(binding = 2) readonly uniform Constants {\n"
2353 " uint u_baseVal;\n"
2354 "};\n"
2355 "layout(binding = 1, r32ui) writeonly uniform highp uimage2D u_img;\n"
2356 "void main (void) {\n"
2357 " uint offset = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z + "
2358 "gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n"
2359 " imageStore(u_img, ivec2(gl_WorkGroupID.xy), uvec4(offset + u_baseVal, 0, 0, 0));\n"
2360 "}\n");
2361
2362 sourceCollections.glslSources.add("comp1")
2363 << glu::ComputeSource("#version 310 es\n"
2364 "layout (local_size_x = 1) in;\n"
2365 "layout(binding = 1, r32ui) readonly uniform highp uimage2D u_img;\n"
2366 "layout(binding = 0) coherent buffer Output {\n"
2367 " uint sum;\n"
2368 "};\n"
2369 "void main (void) {\n"
2370 " uint value = imageLoad(u_img, ivec2(gl_WorkGroupID.xy)).x;\n"
2371 " atomicAdd(sum, value);\n"
2372 "}\n");
2373 }
2374
createInstance(Context & context) const2375 TestInstance *ImageBarrierTest::createInstance(Context &context) const
2376 {
2377 return new ImageBarrierTestInstance(context, m_imageSize, m_computePipelineConstructionType);
2378 }
2379
ImageBarrierTestInstance(Context & context,const tcu::IVec2 & imageSize,const vk::ComputePipelineConstructionType computePipelineConstructionType)2380 ImageBarrierTestInstance::ImageBarrierTestInstance(
2381 Context &context, const tcu::IVec2 &imageSize,
2382 const vk::ComputePipelineConstructionType computePipelineConstructionType)
2383 : TestInstance(context)
2384 , m_imageSize(imageSize)
2385 , m_computePipelineConstructionType(computePipelineConstructionType)
2386 {
2387 }
2388
iterate(void)2389 tcu::TestStatus ImageBarrierTestInstance::iterate(void)
2390 {
2391 const DeviceInterface &vk = m_context.getDeviceInterface();
2392 const VkDevice device = m_context.getDevice();
2393 const VkQueue queue = m_context.getUniversalQueue();
2394 const uint32_t queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
2395 Allocator &allocator = m_context.getDefaultAllocator();
2396
2397 // Create an image used by both shaders
2398
2399 const VkImageCreateInfo imageParams = make2DImageCreateInfo(m_imageSize, VK_IMAGE_USAGE_STORAGE_BIT);
2400 const ImageWithMemory image(vk, device, allocator, imageParams, MemoryRequirement::Any);
2401
2402 const VkImageSubresourceRange subresourceRange =
2403 makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u);
2404 const Unique<VkImageView> imageView(
2405 makeImageView(vk, device, *image, VK_IMAGE_VIEW_TYPE_2D, VK_FORMAT_R32_UINT, subresourceRange));
2406
2407 // Create an output buffer
2408
2409 const VkDeviceSize outputBufferSizeBytes = sizeof(uint32_t);
2410 const BufferWithMemory outputBuffer(vk, device, allocator,
2411 makeBufferCreateInfo(outputBufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT),
2412 MemoryRequirement::HostVisible);
2413
2414 // Initialize atomic counter value to zero
2415 {
2416 const Allocation &outputBufferAllocation = outputBuffer.getAllocation();
2417 uint32_t *outputBufferPtr = static_cast<uint32_t *>(outputBufferAllocation.getHostPtr());
2418 *outputBufferPtr = 0;
2419 flushAlloc(vk, device, outputBufferAllocation);
2420 }
2421
2422 // Create a uniform buffer (to pass uniform constants)
2423
2424 const VkDeviceSize uniformBufferSizeBytes = sizeof(uint32_t);
2425 const BufferWithMemory uniformBuffer(
2426 vk, device, allocator, makeBufferCreateInfo(uniformBufferSizeBytes, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT),
2427 MemoryRequirement::HostVisible);
2428
2429 // Set the constants in the uniform buffer
2430
2431 const uint32_t baseValue = 127;
2432 {
2433 const Allocation &uniformBufferAllocation = uniformBuffer.getAllocation();
2434 uint32_t *uniformBufferPtr = static_cast<uint32_t *>(uniformBufferAllocation.getHostPtr());
2435 uniformBufferPtr[0] = baseValue;
2436
2437 flushAlloc(vk, device, uniformBufferAllocation);
2438 }
2439
2440 // Create descriptor set
2441
2442 const Unique<VkDescriptorSetLayout> descriptorSetLayout(
2443 DescriptorSetLayoutBuilder()
2444 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
2445 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, VK_SHADER_STAGE_COMPUTE_BIT)
2446 .addSingleBinding(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
2447 .build(vk, device));
2448
2449 const Unique<VkDescriptorPool> descriptorPool(
2450 DescriptorPoolBuilder()
2451 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
2452 .addType(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE)
2453 .addType(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER)
2454 .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
2455
2456 const Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
2457
2458 const VkDescriptorImageInfo imageDescriptorInfo =
2459 makeDescriptorImageInfo(VK_NULL_HANDLE, *imageView, VK_IMAGE_LAYOUT_GENERAL);
2460 const VkDescriptorBufferInfo outputBufferDescriptorInfo =
2461 makeDescriptorBufferInfo(*outputBuffer, 0ull, outputBufferSizeBytes);
2462 const VkDescriptorBufferInfo uniformBufferDescriptorInfo =
2463 makeDescriptorBufferInfo(*uniformBuffer, 0ull, uniformBufferSizeBytes);
2464 DescriptorSetUpdateBuilder()
2465 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u),
2466 VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &outputBufferDescriptorInfo)
2467 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u),
2468 VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &imageDescriptorInfo)
2469 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(2u),
2470 VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, &uniformBufferDescriptorInfo)
2471 .update(vk, device);
2472
2473 // Perform the computation
2474
2475 ComputePipelineWrapper pipeline0(vk, device, m_computePipelineConstructionType,
2476 m_context.getBinaryCollection().get("comp0"));
2477 pipeline0.setDescriptorSetLayout(descriptorSetLayout.get());
2478 pipeline0.buildPipeline();
2479 ComputePipelineWrapper pipeline1(vk, device, m_computePipelineConstructionType,
2480 m_context.getBinaryCollection().get("comp1"));
2481 pipeline1.setDescriptorSetLayout(descriptorSetLayout.get());
2482 pipeline1.buildPipeline();
2483
2484 const VkBufferMemoryBarrier writeUniformConstantsBarrier = makeBufferMemoryBarrier(
2485 VK_ACCESS_HOST_WRITE_BIT, VK_ACCESS_UNIFORM_READ_BIT, *uniformBuffer, 0ull, uniformBufferSizeBytes);
2486
2487 const VkImageMemoryBarrier imageLayoutBarrier =
2488 makeImageMemoryBarrier(0u, 0u, VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_GENERAL, *image, subresourceRange);
2489
2490 const VkImageMemoryBarrier imageBarrierBetweenShaders =
2491 makeImageMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT, VK_IMAGE_LAYOUT_GENERAL,
2492 VK_IMAGE_LAYOUT_GENERAL, *image, subresourceRange);
2493
2494 const VkBufferMemoryBarrier afterComputeBarrier = makeBufferMemoryBarrier(
2495 VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *outputBuffer, 0ull, outputBufferSizeBytes);
2496
2497 const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
2498 const Unique<VkCommandBuffer> cmdBuffer(
2499 allocateCommandBuffer(vk, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
2500
2501 // Start recording commands
2502
2503 beginCommandBuffer(vk, *cmdBuffer);
2504
2505 pipeline0.bind(*cmdBuffer);
2506 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline0.getPipelineLayout(), 0u, 1u,
2507 &descriptorSet.get(), 0u, nullptr);
2508
2509 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
2510 (VkDependencyFlags)0, 0, nullptr, 1, &writeUniformConstantsBarrier, 1, &imageLayoutBarrier);
2511
2512 vk.cmdDispatch(*cmdBuffer, m_imageSize.x(), m_imageSize.y(), 1u);
2513 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
2514 (VkDependencyFlags)0, 0, nullptr, 0, nullptr, 1, &imageBarrierBetweenShaders);
2515
2516 // Switch to the second shader program
2517 pipeline1.bind(*cmdBuffer);
2518
2519 vk.cmdDispatch(*cmdBuffer, m_imageSize.x(), m_imageSize.y(), 1u);
2520 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT,
2521 (VkDependencyFlags)0, 0, nullptr, 1, &afterComputeBarrier, 0, nullptr);
2522
2523 endCommandBuffer(vk, *cmdBuffer);
2524
2525 // Wait for completion
2526
2527 submitCommandsAndWait(vk, device, queue, *cmdBuffer);
2528
2529 // Validate the results
2530
2531 const Allocation &outputBufferAllocation = outputBuffer.getAllocation();
2532 invalidateAlloc(vk, device, outputBufferAllocation);
2533
2534 const int numValues = multiplyComponents(m_imageSize);
2535 const uint32_t *bufferPtr = static_cast<uint32_t *>(outputBufferAllocation.getHostPtr());
2536 const uint32_t res = *bufferPtr;
2537 uint32_t ref = 0;
2538
2539 for (int ndx = 0; ndx < numValues; ++ndx)
2540 ref += baseValue + ndx;
2541
2542 if (res != ref)
2543 {
2544 std::ostringstream msg;
2545 msg << "ERROR: comparison failed, expected " << ref << ", got " << res;
2546 return tcu::TestStatus::fail(msg.str());
2547 }
2548 return tcu::TestStatus::pass("Compute succeeded");
2549 }
2550
2551 class ComputeTestInstance : public vkt::TestInstance
2552 {
2553 public:
ComputeTestInstance(Context & context,vk::ComputePipelineConstructionType computePipelineConstructionType,bool useMaintenance5)2554 ComputeTestInstance(Context &context, vk::ComputePipelineConstructionType computePipelineConstructionType,
2555 bool useMaintenance5)
2556 : TestInstance(context)
2557 , m_numPhysDevices(1)
2558 , m_queueFamilyIndex(0)
2559 , m_computePipelineConstructionType(computePipelineConstructionType)
2560 , m_maintenance5(useMaintenance5)
2561 {
2562 createDeviceGroup();
2563 }
2564
~ComputeTestInstance()2565 ~ComputeTestInstance()
2566 {
2567 }
2568
2569 void createDeviceGroup(void);
getDeviceInterface(void)2570 const vk::DeviceInterface &getDeviceInterface(void)
2571 {
2572 return *m_deviceDriver;
2573 }
getInstance(void)2574 vk::VkInstance getInstance(void)
2575 {
2576 return m_deviceGroupInstance;
2577 }
getDevice(void)2578 vk::VkDevice getDevice(void)
2579 {
2580 return *m_logicalDevice;
2581 }
getPhysicalDevice(uint32_t i=0)2582 vk::VkPhysicalDevice getPhysicalDevice(uint32_t i = 0)
2583 {
2584 return m_physicalDevices[i];
2585 }
2586
2587 protected:
2588 uint32_t m_numPhysDevices;
2589 uint32_t m_queueFamilyIndex;
2590 vk::ComputePipelineConstructionType m_computePipelineConstructionType;
2591 bool m_maintenance5;
2592
2593 private:
2594 CustomInstance m_deviceGroupInstance;
2595 vk::Move<vk::VkDevice> m_logicalDevice;
2596 std::vector<vk::VkPhysicalDevice> m_physicalDevices;
2597 #ifndef CTS_USES_VULKANSC
2598 de::MovePtr<vk::DeviceDriver> m_deviceDriver;
2599 #else
2600 de::MovePtr<vk::DeviceDriverSC, vk::DeinitDeviceDeleter> m_deviceDriver;
2601 #endif // CTS_USES_VULKANSC
2602 };
2603
createDeviceGroup(void)2604 void ComputeTestInstance::createDeviceGroup(void)
2605 {
2606 const tcu::CommandLine &cmdLine = m_context.getTestContext().getCommandLine();
2607 const uint32_t devGroupIdx = cmdLine.getVKDeviceGroupId() - 1;
2608 const uint32_t physDeviceIdx = cmdLine.getVKDeviceId() - 1;
2609 const float queuePriority = 1.0f;
2610 const std::vector<std::string> requiredExtensions(1, "VK_KHR_device_group_creation");
2611 m_deviceGroupInstance = createCustomInstanceWithExtensions(m_context, requiredExtensions);
2612 std::vector<VkPhysicalDeviceGroupProperties> devGroupProperties =
2613 enumeratePhysicalDeviceGroups(m_context.getInstanceInterface(), m_deviceGroupInstance);
2614 m_numPhysDevices = devGroupProperties[devGroupIdx].physicalDeviceCount;
2615 std::vector<const char *> deviceExtensions;
2616
2617 if (!isCoreDeviceExtension(m_context.getUsedApiVersion(), "VK_KHR_device_group"))
2618 deviceExtensions.push_back("VK_KHR_device_group");
2619
2620 if (m_maintenance5)
2621 deviceExtensions.push_back("VK_KHR_maintenance5");
2622
2623 //m_ma
2624
2625 VkDeviceGroupDeviceCreateInfo deviceGroupInfo = {
2626 VK_STRUCTURE_TYPE_DEVICE_GROUP_DEVICE_CREATE_INFO, //stype
2627 nullptr, //pNext
2628 devGroupProperties[devGroupIdx].physicalDeviceCount, //physicalDeviceCount
2629 devGroupProperties[devGroupIdx].physicalDevices //physicalDevices
2630 };
2631 const InstanceDriver &instance(m_deviceGroupInstance.getDriver());
2632 VkPhysicalDeviceFeatures2 deviceFeatures2 = initVulkanStructure();
2633 const VkPhysicalDeviceFeatures deviceFeatures =
2634 getPhysicalDeviceFeatures(instance, deviceGroupInfo.pPhysicalDevices[physDeviceIdx]);
2635 const std::vector<VkQueueFamilyProperties> queueProps = getPhysicalDeviceQueueFamilyProperties(
2636 instance, devGroupProperties[devGroupIdx].physicalDevices[physDeviceIdx]);
2637
2638 deviceFeatures2.features = deviceFeatures;
2639
2640 #ifndef CTS_USES_VULKANSC
2641 VkPhysicalDeviceDynamicRenderingFeaturesKHR dynamicRenderingFeatures = initVulkanStructure();
2642 dynamicRenderingFeatures.dynamicRendering = VK_TRUE;
2643 VkPhysicalDeviceShaderObjectFeaturesEXT shaderObjectFeatures = initVulkanStructure(&dynamicRenderingFeatures);
2644 shaderObjectFeatures.shaderObject = VK_TRUE;
2645 if (m_computePipelineConstructionType)
2646 {
2647 deviceExtensions.push_back("VK_EXT_shader_object");
2648 deviceFeatures2.pNext = &shaderObjectFeatures;
2649 }
2650 #endif
2651
2652 m_physicalDevices.resize(m_numPhysDevices);
2653 for (uint32_t physDevIdx = 0; physDevIdx < m_numPhysDevices; physDevIdx++)
2654 m_physicalDevices[physDevIdx] = devGroupProperties[devGroupIdx].physicalDevices[physDevIdx];
2655
2656 for (size_t queueNdx = 0; queueNdx < queueProps.size(); queueNdx++)
2657 {
2658 if (queueProps[queueNdx].queueFlags & VK_QUEUE_COMPUTE_BIT)
2659 m_queueFamilyIndex = (uint32_t)queueNdx;
2660 }
2661
2662 VkDeviceQueueCreateInfo queueInfo = {
2663 VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO, // VkStructureType sType;
2664 nullptr, // const void* pNext;
2665 (VkDeviceQueueCreateFlags)0u, // VkDeviceQueueCreateFlags flags;
2666 m_queueFamilyIndex, // uint32_t queueFamilyIndex;
2667 1u, // uint32_t queueCount;
2668 &queuePriority // const float* pQueuePriorities;
2669 };
2670
2671 void *pNext = &deviceGroupInfo;
2672 if (deviceFeatures2.pNext != nullptr)
2673 deviceGroupInfo.pNext = &deviceFeatures2;
2674
2675 #ifdef CTS_USES_VULKANSC
2676 VkDeviceObjectReservationCreateInfo memReservationInfo = cmdLine.isSubProcess() ?
2677 m_context.getResourceInterface()->getStatMax() :
2678 resetDeviceObjectReservationCreateInfo();
2679 memReservationInfo.pNext = pNext;
2680 pNext = &memReservationInfo;
2681
2682 VkPhysicalDeviceVulkanSC10Features sc10Features = createDefaultSC10Features();
2683 sc10Features.pNext = pNext;
2684 pNext = &sc10Features;
2685 VkPipelineCacheCreateInfo pcCI;
2686 std::vector<VkPipelinePoolSize> poolSizes;
2687 if (cmdLine.isSubProcess())
2688 {
2689 if (m_context.getResourceInterface()->getCacheDataSize() > 0)
2690 {
2691 pcCI = {
2692 VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO, // VkStructureType sType;
2693 nullptr, // const void* pNext;
2694 VK_PIPELINE_CACHE_CREATE_READ_ONLY_BIT |
2695 VK_PIPELINE_CACHE_CREATE_USE_APPLICATION_STORAGE_BIT, // VkPipelineCacheCreateFlags flags;
2696 m_context.getResourceInterface()->getCacheDataSize(), // uintptr_t initialDataSize;
2697 m_context.getResourceInterface()->getCacheData() // const void* pInitialData;
2698 };
2699 memReservationInfo.pipelineCacheCreateInfoCount = 1;
2700 memReservationInfo.pPipelineCacheCreateInfos = &pcCI;
2701 }
2702
2703 poolSizes = m_context.getResourceInterface()->getPipelinePoolSizes();
2704 if (!poolSizes.empty())
2705 {
2706 memReservationInfo.pipelinePoolSizeCount = uint32_t(poolSizes.size());
2707 memReservationInfo.pPipelinePoolSizes = poolSizes.data();
2708 }
2709 }
2710
2711 #endif // CTS_USES_VULKANSC
2712
2713 const VkDeviceCreateInfo deviceInfo = {
2714 VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO, // VkStructureType sType;
2715 pNext, // const void* pNext;
2716 (VkDeviceCreateFlags)0, // VkDeviceCreateFlags flags;
2717 1u, // uint32_t queueCreateInfoCount;
2718 &queueInfo, // const VkDeviceQueueCreateInfo* pQueueCreateInfos;
2719 0u, // uint32_t enabledLayerCount;
2720 nullptr, // const char* const* ppEnabledLayerNames;
2721 uint32_t(deviceExtensions.size()), // uint32_t enabledExtensionCount;
2722 (deviceExtensions.empty() ? nullptr : &deviceExtensions[0]), // const char* const* ppEnabledExtensionNames;
2723 deviceFeatures2.pNext == nullptr ? &deviceFeatures :
2724 nullptr, // const VkPhysicalDeviceFeatures* pEnabledFeatures;
2725 };
2726
2727 m_logicalDevice = createCustomDevice(m_context.getTestContext().getCommandLine().isValidationEnabled(),
2728 m_context.getPlatformInterface(), m_deviceGroupInstance, instance,
2729 deviceGroupInfo.pPhysicalDevices[physDeviceIdx], &deviceInfo);
2730 #ifndef CTS_USES_VULKANSC
2731 m_deviceDriver = de::MovePtr<DeviceDriver>(new DeviceDriver(m_context.getPlatformInterface(), m_deviceGroupInstance,
2732 *m_logicalDevice, m_context.getUsedApiVersion(),
2733 m_context.getTestContext().getCommandLine()));
2734 #else
2735 m_deviceDriver = de::MovePtr<DeviceDriverSC, DeinitDeviceDeleter>(
2736 new DeviceDriverSC(m_context.getPlatformInterface(), m_context.getInstance(), *m_logicalDevice,
2737 m_context.getTestContext().getCommandLine(), m_context.getResourceInterface(),
2738 m_context.getDeviceVulkanSC10Properties(), m_context.getDeviceProperties(),
2739 m_context.getUsedApiVersion()),
2740 vk::DeinitDeviceDeleter(m_context.getResourceInterface().get(), *m_logicalDevice));
2741 #endif // CTS_USES_VULKANSC
2742 }
2743
2744 class DispatchBaseTest : public vkt::TestCase
2745 {
2746 public:
2747 DispatchBaseTest(tcu::TestContext &testCtx, const std::string &name, const uint32_t numValues,
2748 const tcu::IVec3 &localsize, const tcu::IVec3 &worksize, const tcu::IVec3 &splitsize,
2749 const vk::ComputePipelineConstructionType computePipelineConstructionType,
2750 const bool useMaintenance5);
2751
2752 virtual void checkSupport(Context &context) const;
2753 void initPrograms(SourceCollections &sourceCollections) const;
2754 TestInstance *createInstance(Context &context) const;
2755
2756 private:
2757 const uint32_t m_numValues;
2758 const tcu::IVec3 m_localSize;
2759 const tcu::IVec3 m_workSize;
2760 const tcu::IVec3 m_splitSize;
2761 vk::ComputePipelineConstructionType m_computePipelineConstructionType;
2762 const bool m_useMaintenance5;
2763 };
2764
2765 class DispatchBaseTestInstance : public ComputeTestInstance
2766 {
2767 public:
2768 DispatchBaseTestInstance(Context &context, const uint32_t numValues, const tcu::IVec3 &localsize,
2769 const tcu::IVec3 &worksize, const tcu::IVec3 &splitsize,
2770 const vk::ComputePipelineConstructionType computePipelineConstructionType,
2771 const bool useMaintenance5);
2772
2773 bool isInputVectorValid(const tcu::IVec3 &small, const tcu::IVec3 &big);
2774 tcu::TestStatus iterate(void);
2775
2776 private:
2777 const uint32_t m_numValues;
2778 const tcu::IVec3 m_localSize;
2779 const tcu::IVec3 m_workSize;
2780 const tcu::IVec3 m_splitWorkSize;
2781 const bool m_useMaintenance5;
2782 };
2783
DispatchBaseTest(tcu::TestContext & testCtx,const std::string & name,const uint32_t numValues,const tcu::IVec3 & localsize,const tcu::IVec3 & worksize,const tcu::IVec3 & splitsize,const vk::ComputePipelineConstructionType computePipelineConstructionType,const bool useMaintenance5)2784 DispatchBaseTest::DispatchBaseTest(tcu::TestContext &testCtx, const std::string &name, const uint32_t numValues,
2785 const tcu::IVec3 &localsize, const tcu::IVec3 &worksize, const tcu::IVec3 &splitsize,
2786 const vk::ComputePipelineConstructionType computePipelineConstructionType,
2787 const bool useMaintenance5)
2788 : TestCase(testCtx, name)
2789 , m_numValues(numValues)
2790 , m_localSize(localsize)
2791 , m_workSize(worksize)
2792 , m_splitSize(splitsize)
2793 , m_computePipelineConstructionType(computePipelineConstructionType)
2794 , m_useMaintenance5(useMaintenance5)
2795 {
2796 }
2797
checkSupport(Context & context) const2798 void DispatchBaseTest::checkSupport(Context &context) const
2799 {
2800 checkShaderObjectRequirements(context.getInstanceInterface(), context.getPhysicalDevice(),
2801 m_computePipelineConstructionType);
2802 if (m_useMaintenance5)
2803 context.requireDeviceFunctionality("VK_KHR_maintenance5");
2804 }
2805
initPrograms(SourceCollections & sourceCollections) const2806 void DispatchBaseTest::initPrograms(SourceCollections &sourceCollections) const
2807 {
2808 std::ostringstream src;
2809 src << "#version 310 es\n"
2810 << "layout (local_size_x = " << m_localSize.x() << ", local_size_y = " << m_localSize.y()
2811 << ", local_size_z = " << m_localSize.z() << ") in;\n"
2812
2813 << "layout(binding = 0) buffer InOut {\n"
2814 << " uint values[" << de::toString(m_numValues) << "];\n"
2815 << "} sb_inout;\n"
2816
2817 << "layout(binding = 1) readonly uniform uniformInput {\n"
2818 << " uvec3 gridSize;\n"
2819 << "} ubo_in;\n"
2820
2821 << "void main (void) {\n"
2822 << " uvec3 size = ubo_in.gridSize * gl_WorkGroupSize;\n"
2823 << " uint numValuesPerInv = uint(sb_inout.values.length()) / (size.x*size.y*size.z);\n"
2824 << " uint index = size.x*size.y*gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + "
2825 "gl_GlobalInvocationID.x;\n"
2826 << " uint offset = numValuesPerInv*index;\n"
2827 << " for (uint ndx = 0u; ndx < numValuesPerInv; ndx++)\n"
2828 << " sb_inout.values[offset + ndx] = ~sb_inout.values[offset + ndx];\n"
2829 << "}\n";
2830
2831 sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
2832 }
2833
createInstance(Context & context) const2834 TestInstance *DispatchBaseTest::createInstance(Context &context) const
2835 {
2836 return new DispatchBaseTestInstance(context, m_numValues, m_localSize, m_workSize, m_splitSize,
2837 m_computePipelineConstructionType, m_useMaintenance5);
2838 }
2839
DispatchBaseTestInstance(Context & context,const uint32_t numValues,const tcu::IVec3 & localsize,const tcu::IVec3 & worksize,const tcu::IVec3 & splitsize,const vk::ComputePipelineConstructionType computePipelineConstructionType,const bool useMaintenance5)2840 DispatchBaseTestInstance::DispatchBaseTestInstance(
2841 Context &context, const uint32_t numValues, const tcu::IVec3 &localsize, const tcu::IVec3 &worksize,
2842 const tcu::IVec3 &splitsize, const vk::ComputePipelineConstructionType computePipelineConstructionType,
2843 const bool useMaintenance5)
2844
2845 : ComputeTestInstance(context, computePipelineConstructionType, useMaintenance5)
2846 , m_numValues(numValues)
2847 , m_localSize(localsize)
2848 , m_workSize(worksize)
2849 , m_splitWorkSize(splitsize)
2850 , m_useMaintenance5(useMaintenance5)
2851 {
2852 // For easy work distribution across physical devices:
2853 // WorkSize should be a multiple of SplitWorkSize only in the X component
2854 if ((!isInputVectorValid(m_splitWorkSize, m_workSize)) || (m_workSize.x() <= m_splitWorkSize.x()) ||
2855 (m_workSize.y() != m_splitWorkSize.y()) || (m_workSize.z() != m_splitWorkSize.z()))
2856 TCU_THROW(TestError, "Invalid Input.");
2857
2858 // For easy work distribution within the same physical device:
2859 // SplitWorkSize should be a multiple of localSize in Y or Z component
2860 if ((!isInputVectorValid(m_localSize, m_splitWorkSize)) || (m_localSize.x() != m_splitWorkSize.x()) ||
2861 (m_localSize.y() >= m_splitWorkSize.y()) || (m_localSize.z() >= m_splitWorkSize.z()))
2862 TCU_THROW(TestError, "Invalid Input.");
2863
2864 if ((multiplyComponents(m_workSize) / multiplyComponents(m_splitWorkSize)) < (int32_t)m_numPhysDevices)
2865 TCU_THROW(TestError, "Not enough work to distribute across all physical devices.");
2866
2867 uint32_t totalWork = multiplyComponents(m_workSize) * multiplyComponents(m_localSize);
2868 if ((totalWork > numValues) || (numValues % totalWork != 0))
2869 TCU_THROW(TestError, "Buffer too small/not aligned to cover all values.");
2870 }
2871
isInputVectorValid(const tcu::IVec3 & small,const tcu::IVec3 & big)2872 bool DispatchBaseTestInstance::isInputVectorValid(const tcu::IVec3 &small, const tcu::IVec3 &big)
2873 {
2874 if (((big.x() < small.x()) || (big.y() < small.y()) || (big.z() < small.z())) ||
2875 ((big.x() % small.x() != 0) || (big.y() % small.y() != 0) || (big.z() % small.z() != 0)))
2876 return false;
2877 return true;
2878 }
2879
iterate(void)2880 tcu::TestStatus DispatchBaseTestInstance::iterate(void)
2881 {
2882 const DeviceInterface &vk = getDeviceInterface();
2883 const VkDevice device = getDevice();
2884 const VkQueue queue = getDeviceQueue(vk, device, m_queueFamilyIndex, 0);
2885 SimpleAllocator allocator(vk, device,
2886 getPhysicalDeviceMemoryProperties(m_context.getInstanceInterface(), getPhysicalDevice()));
2887 uint32_t totalWorkloadSize = 0;
2888
2889 // Create an uniform and input/output buffer
2890 const uint32_t uniformBufSize = 3; // Pass the compute grid size
2891 const VkDeviceSize uniformBufferSizeBytes = sizeof(uint32_t) * uniformBufSize;
2892 const BufferWithMemory uniformBuffer(
2893 vk, device, allocator, makeBufferCreateInfo(uniformBufferSizeBytes, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT),
2894 MemoryRequirement::HostVisible);
2895
2896 const VkDeviceSize bufferSizeBytes = sizeof(uint32_t) * m_numValues;
2897 const BufferWithMemory buffer(vk, device, allocator,
2898 makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT),
2899 MemoryRequirement::HostVisible);
2900
2901 // Fill the buffers with data
2902 typedef std::vector<uint32_t> data_vector_t;
2903 data_vector_t uniformInputData(uniformBufSize);
2904 data_vector_t inputData(m_numValues);
2905
2906 {
2907 const Allocation &bufferAllocation = uniformBuffer.getAllocation();
2908 uint32_t *bufferPtr = static_cast<uint32_t *>(bufferAllocation.getHostPtr());
2909 uniformInputData[0] = *bufferPtr++ = m_workSize.x();
2910 uniformInputData[1] = *bufferPtr++ = m_workSize.y();
2911 uniformInputData[2] = *bufferPtr++ = m_workSize.z();
2912 flushAlloc(vk, device, bufferAllocation);
2913 }
2914
2915 {
2916 de::Random rnd(0x82ce7f);
2917 const Allocation &bufferAllocation = buffer.getAllocation();
2918 uint32_t *bufferPtr = static_cast<uint32_t *>(bufferAllocation.getHostPtr());
2919 for (uint32_t i = 0; i < m_numValues; ++i)
2920 inputData[i] = *bufferPtr++ = rnd.getUint32();
2921
2922 flushAlloc(vk, device, bufferAllocation);
2923 }
2924
2925 // Create descriptor set
2926 const Unique<VkDescriptorSetLayout> descriptorSetLayout(
2927 DescriptorSetLayoutBuilder()
2928 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
2929 .addSingleBinding(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
2930 .build(vk, device));
2931
2932 const Unique<VkDescriptorPool> descriptorPool(
2933 DescriptorPoolBuilder()
2934 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
2935 .addType(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER)
2936 .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
2937
2938 const Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
2939
2940 const VkDescriptorBufferInfo bufferDescriptorInfo = makeDescriptorBufferInfo(*buffer, 0ull, bufferSizeBytes);
2941 const VkDescriptorBufferInfo uniformBufferDescriptorInfo =
2942 makeDescriptorBufferInfo(*uniformBuffer, 0ull, uniformBufferSizeBytes);
2943
2944 DescriptorSetUpdateBuilder()
2945 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u),
2946 VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &bufferDescriptorInfo)
2947 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u),
2948 VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, &uniformBufferDescriptorInfo)
2949 .update(vk, device);
2950
2951 ComputePipelineWrapper pipeline(vk, device, m_computePipelineConstructionType,
2952 m_context.getBinaryCollection().get("comp"));
2953 pipeline.setDescriptorSetLayout(descriptorSetLayout.get());
2954 pipeline.setPipelineCreateFlags(VK_PIPELINE_CREATE_DISPATCH_BASE);
2955
2956 #ifndef CTS_USES_VULKANSC
2957 VkPipelineCreateFlags2CreateInfoKHR pipelineFlags2CreateInfo = initVulkanStructure();
2958 if (m_useMaintenance5)
2959 {
2960 pipelineFlags2CreateInfo.flags = VK_PIPELINE_CREATE_2_DISPATCH_BASE_BIT_KHR;
2961 pipeline.setPipelineCreatePNext(&pipelineFlags2CreateInfo);
2962 pipeline.setPipelineCreateFlags(0);
2963 }
2964 #else
2965 DE_UNREF(m_useMaintenance5);
2966 #endif // CTS_USES_VULKANSC
2967
2968 pipeline.buildPipeline();
2969
2970 const VkBufferMemoryBarrier hostWriteBarrier =
2971 makeBufferMemoryBarrier(VK_ACCESS_HOST_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT, *buffer, 0ull, bufferSizeBytes);
2972 const VkBufferMemoryBarrier hostUniformWriteBarrier = makeBufferMemoryBarrier(
2973 VK_ACCESS_HOST_WRITE_BIT, VK_ACCESS_UNIFORM_READ_BIT, *uniformBuffer, 0ull, uniformBufferSizeBytes);
2974
2975 const VkBufferMemoryBarrier shaderWriteBarrier =
2976 makeBufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *buffer, 0ull, bufferSizeBytes);
2977
2978 const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, m_queueFamilyIndex));
2979 const Unique<VkCommandBuffer> cmdBuffer(
2980 allocateCommandBuffer(vk, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
2981
2982 // Start recording commands
2983 beginCommandBuffer(vk, *cmdBuffer);
2984
2985 pipeline.bind(*cmdBuffer);
2986 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline.getPipelineLayout(), 0u, 1u,
2987 &descriptorSet.get(), 0u, nullptr);
2988
2989 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
2990 (VkDependencyFlags)0, 0, nullptr, 1, &hostUniformWriteBarrier, 0, nullptr);
2991
2992 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
2993 (VkDependencyFlags)0, 0, nullptr, 1, &hostWriteBarrier, 0, nullptr);
2994
2995 // Split the workload across all physical devices based on m_splitWorkSize.x()
2996 for (uint32_t physDevIdx = 0; physDevIdx < m_numPhysDevices; physDevIdx++)
2997 {
2998 uint32_t baseGroupX = physDevIdx * m_splitWorkSize.x();
2999 uint32_t baseGroupY = 0;
3000 uint32_t baseGroupZ = 0;
3001
3002 // Split the workload within the physical device based on m_localSize.y() and m_localSize.z()
3003 for (int32_t localIdxY = 0; localIdxY < (m_splitWorkSize.y() / m_localSize.y()); localIdxY++)
3004 {
3005 for (int32_t localIdxZ = 0; localIdxZ < (m_splitWorkSize.z() / m_localSize.z()); localIdxZ++)
3006 {
3007 uint32_t offsetX = baseGroupX;
3008 uint32_t offsetY = baseGroupY + localIdxY * m_localSize.y();
3009 uint32_t offsetZ = baseGroupZ + localIdxZ * m_localSize.z();
3010
3011 uint32_t localSizeX =
3012 (physDevIdx == (m_numPhysDevices - 1)) ? m_workSize.x() - baseGroupX : m_localSize.x();
3013 uint32_t localSizeY = m_localSize.y();
3014 uint32_t localSizeZ = m_localSize.z();
3015
3016 totalWorkloadSize += (localSizeX * localSizeY * localSizeZ);
3017 vk.cmdDispatchBase(*cmdBuffer, offsetX, offsetY, offsetZ, localSizeX, localSizeY, localSizeZ);
3018 }
3019 }
3020 }
3021
3022 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT,
3023 (VkDependencyFlags)0, 0, nullptr, 1, &shaderWriteBarrier, 0, nullptr);
3024
3025 endCommandBuffer(vk, *cmdBuffer);
3026 submitCommandsAndWait(vk, device, queue, *cmdBuffer);
3027
3028 if (totalWorkloadSize != uint32_t(multiplyComponents(m_workSize)))
3029 TCU_THROW(TestError, "Not covering the entire workload.");
3030
3031 // Validate the results
3032 const Allocation &bufferAllocation = buffer.getAllocation();
3033 invalidateAlloc(vk, device, bufferAllocation);
3034 const uint32_t *bufferPtr = static_cast<uint32_t *>(bufferAllocation.getHostPtr());
3035
3036 for (uint32_t ndx = 0; ndx < m_numValues; ++ndx)
3037 {
3038 const uint32_t res = bufferPtr[ndx];
3039 const uint32_t ref = ~inputData[ndx];
3040
3041 if (res != ref)
3042 {
3043 std::ostringstream msg;
3044 msg << "Comparison failed for InOut.values[" << ndx << "]";
3045 return tcu::TestStatus::fail(msg.str());
3046 }
3047 }
3048 return tcu::TestStatus::pass("Compute succeeded");
3049 }
3050
3051 class DeviceIndexTest : public vkt::TestCase
3052 {
3053 public:
3054 DeviceIndexTest(tcu::TestContext &testCtx, const std::string &name, const uint32_t numValues,
3055 const tcu::IVec3 &localsize, const tcu::IVec3 &splitsize,
3056 const vk::ComputePipelineConstructionType computePipelineConstructionType);
3057
3058 virtual void checkSupport(Context &context) const;
3059 void initPrograms(SourceCollections &sourceCollections) const;
3060 TestInstance *createInstance(Context &context) const;
3061
3062 private:
3063 const uint32_t m_numValues;
3064 const tcu::IVec3 m_localSize;
3065 const tcu::IVec3 m_workSize;
3066 const tcu::IVec3 m_splitSize;
3067 vk::ComputePipelineConstructionType m_computePipelineConstructionType;
3068 };
3069
3070 class DeviceIndexTestInstance : public ComputeTestInstance
3071 {
3072 public:
3073 DeviceIndexTestInstance(Context &context, const uint32_t numValues, const tcu::IVec3 &localsize,
3074 const tcu::IVec3 &worksize,
3075 const vk::ComputePipelineConstructionType computePipelineConstructionType);
3076 tcu::TestStatus iterate(void);
3077
3078 private:
3079 const uint32_t m_numValues;
3080 const tcu::IVec3 m_localSize;
3081 tcu::IVec3 m_workSize;
3082 };
3083
DeviceIndexTest(tcu::TestContext & testCtx,const std::string & name,const uint32_t numValues,const tcu::IVec3 & localsize,const tcu::IVec3 & worksize,const vk::ComputePipelineConstructionType computePipelineConstructionType)3084 DeviceIndexTest::DeviceIndexTest(tcu::TestContext &testCtx, const std::string &name, const uint32_t numValues,
3085 const tcu::IVec3 &localsize, const tcu::IVec3 &worksize,
3086 const vk::ComputePipelineConstructionType computePipelineConstructionType)
3087 : TestCase(testCtx, name)
3088 , m_numValues(numValues)
3089 , m_localSize(localsize)
3090 , m_workSize(worksize)
3091 , m_computePipelineConstructionType(computePipelineConstructionType)
3092 {
3093 }
3094
checkSupport(Context & context) const3095 void DeviceIndexTest::checkSupport(Context &context) const
3096 {
3097 checkShaderObjectRequirements(context.getInstanceInterface(), context.getPhysicalDevice(),
3098 m_computePipelineConstructionType);
3099 }
3100
initPrograms(SourceCollections & sourceCollections) const3101 void DeviceIndexTest::initPrograms(SourceCollections &sourceCollections) const
3102 {
3103 std::ostringstream src;
3104 src << "#version 310 es\n"
3105 << "#extension GL_EXT_device_group : require\n"
3106 << "layout (local_size_x = " << m_localSize.x() << ", local_size_y = " << m_localSize.y()
3107 << ", local_size_z = " << m_localSize.z() << ") in;\n"
3108
3109 << "layout(binding = 0) buffer InOut {\n"
3110 << " uint values[" << de::toString(m_numValues) << "];\n"
3111 << "} sb_inout;\n"
3112
3113 << "layout(binding = 1) readonly uniform uniformInput {\n"
3114 << " uint baseOffset[1+" << VK_MAX_DEVICE_GROUP_SIZE << "];\n"
3115 << "} ubo_in;\n"
3116
3117 << "void main (void) {\n"
3118 << " uvec3 size = gl_NumWorkGroups * gl_WorkGroupSize;\n"
3119 << " uint numValuesPerInv = uint(sb_inout.values.length()) / (size.x*size.y*size.z);\n"
3120 << " uint index = size.x*size.y*gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + "
3121 "gl_GlobalInvocationID.x;\n"
3122 << " uint offset = numValuesPerInv*index;\n"
3123 << " for (uint ndx = 0u; ndx < numValuesPerInv; ndx++)\n"
3124 << " sb_inout.values[offset + ndx] = ubo_in.baseOffset[0] + ubo_in.baseOffset[gl_DeviceIndex + 1];\n"
3125 << "}\n";
3126
3127 sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
3128 }
3129
createInstance(Context & context) const3130 TestInstance *DeviceIndexTest::createInstance(Context &context) const
3131 {
3132 return new DeviceIndexTestInstance(context, m_numValues, m_localSize, m_workSize,
3133 m_computePipelineConstructionType);
3134 }
3135
DeviceIndexTestInstance(Context & context,const uint32_t numValues,const tcu::IVec3 & localsize,const tcu::IVec3 & worksize,const vk::ComputePipelineConstructionType computePipelineConstructionType)3136 DeviceIndexTestInstance::DeviceIndexTestInstance(
3137 Context &context, const uint32_t numValues, const tcu::IVec3 &localsize, const tcu::IVec3 &worksize,
3138 const vk::ComputePipelineConstructionType computePipelineConstructionType)
3139
3140 : ComputeTestInstance(context, computePipelineConstructionType, false)
3141 , m_numValues(numValues)
3142 , m_localSize(localsize)
3143 , m_workSize(worksize)
3144 {
3145 }
3146
iterate(void)3147 tcu::TestStatus DeviceIndexTestInstance::iterate(void)
3148 {
3149 const DeviceInterface &vk = getDeviceInterface();
3150 const VkDevice device = getDevice();
3151 const VkQueue queue = getDeviceQueue(vk, device, m_queueFamilyIndex, 0);
3152 SimpleAllocator allocator(vk, device,
3153 getPhysicalDeviceMemoryProperties(m_context.getInstanceInterface(), getPhysicalDevice()));
3154 const uint32_t allocDeviceMask = (1 << m_numPhysDevices) - 1;
3155 de::Random rnd(0x82ce7f);
3156 Move<VkBuffer> sboBuffer;
3157 vk::Move<vk::VkDeviceMemory> sboBufferMemory;
3158
3159 // Create an uniform and output buffer
3160 const uint32_t uniformBufSize = 4 * (1 + VK_MAX_DEVICE_GROUP_SIZE);
3161 const VkDeviceSize uniformBufferSizeBytes = sizeof(uint32_t) * uniformBufSize;
3162 const BufferWithMemory uniformBuffer(
3163 vk, device, allocator, makeBufferCreateInfo(uniformBufferSizeBytes, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT),
3164 MemoryRequirement::HostVisible);
3165
3166 const VkDeviceSize bufferSizeBytes = sizeof(uint32_t) * m_numValues;
3167 const BufferWithMemory checkBuffer(vk, device, allocator,
3168 makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_TRANSFER_DST_BIT),
3169 MemoryRequirement::HostVisible);
3170
3171 // create SBO buffer
3172 {
3173 const VkBufferCreateInfo sboBufferParams = {
3174 VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, // sType
3175 nullptr, // pNext
3176 0u, // flags
3177 (VkDeviceSize)bufferSizeBytes, // size
3178 VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT, // usage
3179 VK_SHARING_MODE_EXCLUSIVE, // sharingMode
3180 1u, // queueFamilyIndexCount
3181 &m_queueFamilyIndex, // pQueueFamilyIndices
3182 };
3183 sboBuffer = createBuffer(vk, device, &sboBufferParams);
3184
3185 VkMemoryRequirements memReqs = getBufferMemoryRequirements(vk, device, sboBuffer.get());
3186 uint32_t memoryTypeNdx = 0;
3187 const VkPhysicalDeviceMemoryProperties deviceMemProps =
3188 getPhysicalDeviceMemoryProperties(m_context.getInstanceInterface(), getPhysicalDevice());
3189 for (memoryTypeNdx = 0; memoryTypeNdx < deviceMemProps.memoryTypeCount; memoryTypeNdx++)
3190 {
3191 if ((memReqs.memoryTypeBits & (1u << memoryTypeNdx)) != 0 &&
3192 (deviceMemProps.memoryTypes[memoryTypeNdx].propertyFlags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) ==
3193 VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT)
3194 break;
3195 }
3196 if (memoryTypeNdx == deviceMemProps.memoryTypeCount)
3197 TCU_THROW(NotSupportedError, "No compatible memory type found");
3198
3199 const VkMemoryAllocateFlagsInfo allocDeviceMaskInfo = {
3200 VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO, // sType
3201 nullptr, // pNext
3202 VK_MEMORY_ALLOCATE_DEVICE_MASK_BIT, // flags
3203 allocDeviceMask, // deviceMask
3204 };
3205
3206 VkMemoryAllocateInfo allocInfo = {
3207 VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, // sType
3208 &allocDeviceMaskInfo, // pNext
3209 memReqs.size, // allocationSize
3210 memoryTypeNdx, // memoryTypeIndex
3211 };
3212
3213 sboBufferMemory = allocateMemory(vk, device, &allocInfo);
3214 VK_CHECK(vk.bindBufferMemory(device, *sboBuffer, sboBufferMemory.get(), 0));
3215 }
3216
3217 // Fill the buffers with data
3218 typedef std::vector<uint32_t> data_vector_t;
3219 data_vector_t uniformInputData(uniformBufSize, 0);
3220
3221 {
3222 const Allocation &bufferAllocation = uniformBuffer.getAllocation();
3223 uint32_t *bufferPtr = static_cast<uint32_t *>(bufferAllocation.getHostPtr());
3224 for (uint32_t i = 0; i < uniformBufSize; ++i)
3225 uniformInputData[i] = *bufferPtr++ = rnd.getUint32() / 10; // divide to prevent overflow in addition
3226
3227 flushAlloc(vk, device, bufferAllocation);
3228 }
3229
3230 // Create descriptor set
3231 const Unique<VkDescriptorSetLayout> descriptorSetLayout(
3232 DescriptorSetLayoutBuilder()
3233 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
3234 .addSingleBinding(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
3235 .build(vk, device));
3236
3237 const Unique<VkDescriptorPool> descriptorPool(
3238 DescriptorPoolBuilder()
3239 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
3240 .addType(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER)
3241 .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
3242
3243 const Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
3244
3245 const VkDescriptorBufferInfo bufferDescriptorInfo = makeDescriptorBufferInfo(*sboBuffer, 0ull, bufferSizeBytes);
3246 const VkDescriptorBufferInfo uniformBufferDescriptorInfo =
3247 makeDescriptorBufferInfo(*uniformBuffer, 0ull, uniformBufferSizeBytes);
3248
3249 DescriptorSetUpdateBuilder()
3250 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u),
3251 VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &bufferDescriptorInfo)
3252 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u),
3253 VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, &uniformBufferDescriptorInfo)
3254 .update(vk, device);
3255
3256 ComputePipelineWrapper pipeline(vk, device, m_computePipelineConstructionType,
3257 m_context.getBinaryCollection().get("comp"));
3258 pipeline.setDescriptorSetLayout(descriptorSetLayout.get());
3259 pipeline.buildPipeline();
3260
3261 const VkBufferMemoryBarrier hostUniformWriteBarrier = makeBufferMemoryBarrier(
3262 VK_ACCESS_HOST_WRITE_BIT, VK_ACCESS_UNIFORM_READ_BIT, *uniformBuffer, 0ull, uniformBufferSizeBytes);
3263 const VkBufferMemoryBarrier shaderWriteBarrier = makeBufferMemoryBarrier(
3264 VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT, *sboBuffer, 0ull, bufferSizeBytes);
3265
3266 const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, m_queueFamilyIndex));
3267 const Unique<VkCommandBuffer> cmdBuffer(
3268 allocateCommandBuffer(vk, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
3269
3270 // Verify multiple device masks
3271 for (uint32_t physDevMask = 1; physDevMask < (1u << m_numPhysDevices); physDevMask++)
3272 {
3273 uint32_t constantValPerLoop = 0;
3274 {
3275 const Allocation &bufferAllocation = uniformBuffer.getAllocation();
3276 uint32_t *bufferPtr = static_cast<uint32_t *>(bufferAllocation.getHostPtr());
3277 constantValPerLoop = *bufferPtr = rnd.getUint32() / 10; // divide to prevent overflow in addition
3278 flushAlloc(vk, device, bufferAllocation);
3279 }
3280 beginCommandBuffer(vk, *cmdBuffer);
3281
3282 pipeline.bind(*cmdBuffer);
3283 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline.getPipelineLayout(), 0u, 1u,
3284 &descriptorSet.get(), 0u, nullptr);
3285 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
3286 (VkDependencyFlags)0, 0, nullptr, 1, &hostUniformWriteBarrier, 0, nullptr);
3287
3288 vk.cmdSetDeviceMask(*cmdBuffer, physDevMask);
3289 vk.cmdDispatch(*cmdBuffer, m_workSize.x(), m_workSize.y(), m_workSize.z());
3290
3291 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
3292 (VkDependencyFlags)0, 0, nullptr, 1, &shaderWriteBarrier, 0, nullptr);
3293
3294 endCommandBuffer(vk, *cmdBuffer);
3295 submitCommandsAndWait(vk, device, queue, *cmdBuffer, true, physDevMask);
3296 m_context.resetCommandPoolForVKSC(device, *cmdPool);
3297
3298 // Validate the results on all physical devices where compute shader was launched
3299 const VkBufferMemoryBarrier srcBufferBarrier = makeBufferMemoryBarrier(
3300 VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT, *sboBuffer, 0ull, bufferSizeBytes);
3301 const VkBufferMemoryBarrier dstBufferBarrier = makeBufferMemoryBarrier(
3302 VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *checkBuffer, 0ull, bufferSizeBytes);
3303 const VkBufferCopy copyParams = {
3304 (VkDeviceSize)0u, // srcOffset
3305 (VkDeviceSize)0u, // dstOffset
3306 bufferSizeBytes // size
3307 };
3308
3309 for (uint32_t physDevIdx = 0; physDevIdx < m_numPhysDevices; physDevIdx++)
3310 {
3311 if (!(1 << physDevIdx & physDevMask))
3312 continue;
3313
3314 const uint32_t deviceMask = 1 << physDevIdx;
3315
3316 beginCommandBuffer(vk, *cmdBuffer);
3317 vk.cmdSetDeviceMask(*cmdBuffer, deviceMask);
3318 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
3319 (VkDependencyFlags)0, 0, nullptr, 1, &srcBufferBarrier, 0, nullptr);
3320 vk.cmdCopyBuffer(*cmdBuffer, *sboBuffer, *checkBuffer, 1, ©Params);
3321 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT,
3322 (VkDependencyFlags)0, 0, nullptr, 1, &dstBufferBarrier, 0, nullptr);
3323
3324 endCommandBuffer(vk, *cmdBuffer);
3325 submitCommandsAndWait(vk, device, queue, *cmdBuffer, true, deviceMask);
3326
3327 const Allocation &bufferAllocation = checkBuffer.getAllocation();
3328 invalidateAlloc(vk, device, bufferAllocation);
3329 const uint32_t *bufferPtr = static_cast<uint32_t *>(bufferAllocation.getHostPtr());
3330
3331 for (uint32_t ndx = 0; ndx < m_numValues; ++ndx)
3332 {
3333 const uint32_t res = bufferPtr[ndx];
3334 const uint32_t ref = constantValPerLoop + uniformInputData[4 * (physDevIdx + 1)];
3335
3336 if (res != ref)
3337 {
3338 std::ostringstream msg;
3339 msg << "Comparison failed on physical device " << getPhysicalDevice(physDevIdx) << " ( deviceMask "
3340 << deviceMask << " ) for InOut.values[" << ndx << "]";
3341 return tcu::TestStatus::fail(msg.str());
3342 }
3343 }
3344 }
3345 }
3346
3347 return tcu::TestStatus::pass("Compute succeeded");
3348 }
3349
3350 class ConcurrentCompute : public vkt::TestCase
3351 {
3352 public:
3353 ConcurrentCompute(tcu::TestContext &testCtx, const std::string &name,
3354 const vk::ComputePipelineConstructionType computePipelineConstructionType);
3355
3356 virtual void checkSupport(Context &context) const;
3357 void initPrograms(SourceCollections &sourceCollections) const;
3358 TestInstance *createInstance(Context &context) const;
3359
3360 vk::ComputePipelineConstructionType m_computePipelineConstructionType;
3361 };
3362
3363 class ConcurrentComputeInstance : public vkt::TestInstance
3364 {
3365 public:
3366 ConcurrentComputeInstance(Context &context,
3367 const vk::ComputePipelineConstructionType computePipelineConstructionType);
3368
3369 tcu::TestStatus iterate(void);
3370
3371 private:
3372 vk::ComputePipelineConstructionType m_computePipelineConstructionType;
3373 };
3374
ConcurrentCompute(tcu::TestContext & testCtx,const std::string & name,const vk::ComputePipelineConstructionType computePipelineConstructionType)3375 ConcurrentCompute::ConcurrentCompute(tcu::TestContext &testCtx, const std::string &name,
3376 const vk::ComputePipelineConstructionType computePipelineConstructionType)
3377 : TestCase(testCtx, name)
3378 , m_computePipelineConstructionType(computePipelineConstructionType)
3379 {
3380 }
3381
checkSupport(Context & context) const3382 void ConcurrentCompute::checkSupport(Context &context) const
3383 {
3384 checkShaderObjectRequirements(context.getInstanceInterface(), context.getPhysicalDevice(),
3385 m_computePipelineConstructionType);
3386 }
3387
initPrograms(SourceCollections & sourceCollections) const3388 void ConcurrentCompute::initPrograms(SourceCollections &sourceCollections) const
3389 {
3390 std::ostringstream src;
3391 src << "#version 310 es\n"
3392 << "layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
3393 << "layout(binding = 0) buffer InOut {\n"
3394 << " uint values[1024];\n"
3395 << "} sb_inout;\n"
3396 << "void main (void) {\n"
3397 << " uvec3 size = gl_NumWorkGroups * gl_WorkGroupSize;\n"
3398 << " uint numValuesPerInv = uint(sb_inout.values.length()) / (size.x*size.y*size.z);\n"
3399 << " uint groupNdx = size.x*size.y*gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + "
3400 "gl_GlobalInvocationID.x;\n"
3401 << " uint offset = numValuesPerInv*groupNdx;\n"
3402 << "\n"
3403 << " for (uint ndx = 0u; ndx < numValuesPerInv; ndx++)\n"
3404 << " sb_inout.values[offset + ndx] = ~sb_inout.values[offset + ndx];\n"
3405 << "}\n";
3406
3407 sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
3408 }
3409
createInstance(Context & context) const3410 TestInstance *ConcurrentCompute::createInstance(Context &context) const
3411 {
3412 return new ConcurrentComputeInstance(context, m_computePipelineConstructionType);
3413 }
3414
ConcurrentComputeInstance(Context & context,const vk::ComputePipelineConstructionType computePipelineConstructionType)3415 ConcurrentComputeInstance::ConcurrentComputeInstance(
3416 Context &context, const vk::ComputePipelineConstructionType computePipelineConstructionType)
3417 : TestInstance(context)
3418 , m_computePipelineConstructionType(computePipelineConstructionType)
3419 {
3420 }
3421
iterate(void)3422 tcu::TestStatus ConcurrentComputeInstance::iterate(void)
3423 {
3424 enum
3425 {
3426 NO_MATCH_FOUND = ~((uint32_t)0),
3427 ERROR_NONE = 0,
3428 ERROR_WAIT = 1,
3429 ERROR_ORDER = 2
3430 };
3431
3432 struct Queues
3433 {
3434 VkQueue queue;
3435 uint32_t queueFamilyIndex;
3436 };
3437
3438 // const DeviceInterface& vk = m_context.getDeviceInterface();
3439 const uint32_t numValues = 1024;
3440 const CustomInstance instance(createCustomInstanceFromContext(m_context));
3441 const InstanceDriver &instanceDriver(instance.getDriver());
3442 const VkPhysicalDevice physicalDevice =
3443 chooseDevice(instanceDriver, instance, m_context.getTestContext().getCommandLine());
3444 tcu::TestLog &log = m_context.getTestContext().getLog();
3445 vk::Move<vk::VkDevice> logicalDevice;
3446 std::vector<VkQueueFamilyProperties> queueFamilyProperties;
3447 VkDeviceCreateInfo deviceInfo;
3448 VkPhysicalDeviceFeatures2 deviceFeatures2 = initVulkanStructure();
3449 VkPhysicalDeviceFeatures deviceFeatures;
3450 const float queuePriorities[2] = {1.0f, 0.0f};
3451 VkDeviceQueueCreateInfo queueInfos[2];
3452 Queues queues[2] = {{nullptr, (uint32_t)NO_MATCH_FOUND}, {nullptr, (uint32_t)NO_MATCH_FOUND}};
3453
3454 queueFamilyProperties = getPhysicalDeviceQueueFamilyProperties(instanceDriver, physicalDevice);
3455
3456 for (uint32_t queueNdx = 0; queueNdx < queueFamilyProperties.size(); ++queueNdx)
3457 {
3458 if (queueFamilyProperties[queueNdx].queueFlags & VK_QUEUE_COMPUTE_BIT)
3459 {
3460 if (NO_MATCH_FOUND == queues[0].queueFamilyIndex)
3461 queues[0].queueFamilyIndex = queueNdx;
3462
3463 if (queues[0].queueFamilyIndex != queueNdx || queueFamilyProperties[queueNdx].queueCount > 1u)
3464 {
3465 queues[1].queueFamilyIndex = queueNdx;
3466 break;
3467 }
3468 }
3469 }
3470
3471 if (queues[0].queueFamilyIndex == NO_MATCH_FOUND || queues[1].queueFamilyIndex == NO_MATCH_FOUND)
3472 TCU_THROW(NotSupportedError, "Queues couldn't be created");
3473
3474 for (int queueNdx = 0; queueNdx < 2; ++queueNdx)
3475 {
3476 VkDeviceQueueCreateInfo queueInfo;
3477 deMemset(&queueInfo, 0, sizeof(queueInfo));
3478
3479 queueInfo.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO;
3480 queueInfo.pNext = nullptr;
3481 queueInfo.flags = (VkDeviceQueueCreateFlags)0u;
3482 queueInfo.queueFamilyIndex = queues[queueNdx].queueFamilyIndex;
3483 queueInfo.queueCount = (queues[0].queueFamilyIndex == queues[1].queueFamilyIndex) ? 2 : 1;
3484 queueInfo.pQueuePriorities = (queueInfo.queueCount == 2) ? queuePriorities : &queuePriorities[queueNdx];
3485
3486 queueInfos[queueNdx] = queueInfo;
3487
3488 if (queues[0].queueFamilyIndex == queues[1].queueFamilyIndex)
3489 break;
3490 }
3491
3492 void *pNext = nullptr;
3493
3494 deMemset(&deviceInfo, 0, sizeof(deviceInfo));
3495 instanceDriver.getPhysicalDeviceFeatures(physicalDevice, &deviceFeatures);
3496
3497 deviceFeatures2.features = deviceFeatures;
3498
3499 std::vector<const char *> deviceExtensions;
3500
3501 #ifndef CTS_USES_VULKANSC
3502 VkPhysicalDeviceDynamicRenderingFeaturesKHR dynamicRenderingFeatures = initVulkanStructure();
3503 dynamicRenderingFeatures.dynamicRendering = VK_TRUE;
3504 VkPhysicalDeviceShaderObjectFeaturesEXT shaderObjectFeatures = initVulkanStructure(&dynamicRenderingFeatures);
3505 shaderObjectFeatures.shaderObject = VK_TRUE;
3506
3507 if (m_computePipelineConstructionType != COMPUTE_PIPELINE_CONSTRUCTION_TYPE_PIPELINE)
3508 {
3509 deviceExtensions.push_back("VK_EXT_shader_object");
3510 deviceFeatures2.pNext = &shaderObjectFeatures;
3511 pNext = &deviceFeatures2;
3512 }
3513 #endif
3514
3515 #ifdef CTS_USES_VULKANSC
3516 VkDeviceObjectReservationCreateInfo memReservationInfo =
3517 m_context.getTestContext().getCommandLine().isSubProcess() ? m_context.getResourceInterface()->getStatMax() :
3518 resetDeviceObjectReservationCreateInfo();
3519 memReservationInfo.pNext = pNext;
3520 pNext = &memReservationInfo;
3521
3522 VkPhysicalDeviceVulkanSC10Features sc10Features = createDefaultSC10Features();
3523 sc10Features.pNext = pNext;
3524 pNext = &sc10Features;
3525
3526 VkPipelineCacheCreateInfo pcCI;
3527 std::vector<VkPipelinePoolSize> poolSizes;
3528 if (m_context.getTestContext().getCommandLine().isSubProcess())
3529 {
3530 if (m_context.getResourceInterface()->getCacheDataSize() > 0)
3531 {
3532 pcCI = {
3533 VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO, // VkStructureType sType;
3534 nullptr, // const void* pNext;
3535 VK_PIPELINE_CACHE_CREATE_READ_ONLY_BIT |
3536 VK_PIPELINE_CACHE_CREATE_USE_APPLICATION_STORAGE_BIT, // VkPipelineCacheCreateFlags flags;
3537 m_context.getResourceInterface()->getCacheDataSize(), // uintptr_t initialDataSize;
3538 m_context.getResourceInterface()->getCacheData() // const void* pInitialData;
3539 };
3540 memReservationInfo.pipelineCacheCreateInfoCount = 1;
3541 memReservationInfo.pPipelineCacheCreateInfos = &pcCI;
3542 }
3543
3544 poolSizes = m_context.getResourceInterface()->getPipelinePoolSizes();
3545 if (!poolSizes.empty())
3546 {
3547 memReservationInfo.pipelinePoolSizeCount = uint32_t(poolSizes.size());
3548 memReservationInfo.pPipelinePoolSizes = poolSizes.data();
3549 }
3550 }
3551 #endif // CTS_USES_VULKANSC
3552
3553 deviceInfo.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO;
3554 deviceInfo.pNext = pNext;
3555 deviceInfo.enabledExtensionCount = (uint32_t)deviceExtensions.size();
3556 deviceInfo.ppEnabledExtensionNames = deviceExtensions.data();
3557 deviceInfo.enabledLayerCount = 0u;
3558 deviceInfo.ppEnabledLayerNames = nullptr;
3559 deviceInfo.pEnabledFeatures = (deviceFeatures2.pNext == nullptr) ? &deviceFeatures : nullptr;
3560 deviceInfo.queueCreateInfoCount = (queues[0].queueFamilyIndex == queues[1].queueFamilyIndex) ? 1 : 2;
3561 deviceInfo.pQueueCreateInfos = queueInfos;
3562
3563 logicalDevice =
3564 createCustomDevice(m_context.getTestContext().getCommandLine().isValidationEnabled(),
3565 m_context.getPlatformInterface(), instance, instanceDriver, physicalDevice, &deviceInfo);
3566
3567 #ifndef CTS_USES_VULKANSC
3568 de::MovePtr<vk::DeviceDriver> deviceDriver = de::MovePtr<DeviceDriver>(
3569 new DeviceDriver(m_context.getPlatformInterface(), instance, *logicalDevice, m_context.getUsedApiVersion(),
3570 m_context.getTestContext().getCommandLine()));
3571 #else
3572 de::MovePtr<vk::DeviceDriverSC, vk::DeinitDeviceDeleter> deviceDriver =
3573 de::MovePtr<DeviceDriverSC, DeinitDeviceDeleter>(
3574 new DeviceDriverSC(m_context.getPlatformInterface(), instance, *logicalDevice,
3575 m_context.getTestContext().getCommandLine(), m_context.getResourceInterface(),
3576 m_context.getDeviceVulkanSC10Properties(), m_context.getDeviceProperties(),
3577 m_context.getUsedApiVersion()),
3578 vk::DeinitDeviceDeleter(m_context.getResourceInterface().get(), *logicalDevice));
3579 #endif // CTS_USES_VULKANSC
3580 vk::DeviceInterface &vk = *deviceDriver;
3581
3582 for (uint32_t queueReqNdx = 0; queueReqNdx < 2; ++queueReqNdx)
3583 {
3584 if (queues[0].queueFamilyIndex == queues[1].queueFamilyIndex)
3585 vk.getDeviceQueue(*logicalDevice, queues[queueReqNdx].queueFamilyIndex, queueReqNdx,
3586 &queues[queueReqNdx].queue);
3587 else
3588 vk.getDeviceQueue(*logicalDevice, queues[queueReqNdx].queueFamilyIndex, 0u, &queues[queueReqNdx].queue);
3589 }
3590
3591 // Create an input/output buffers
3592 const VkPhysicalDeviceMemoryProperties memoryProperties =
3593 vk::getPhysicalDeviceMemoryProperties(instanceDriver, physicalDevice);
3594
3595 de::MovePtr<SimpleAllocator> allocator =
3596 de::MovePtr<SimpleAllocator>(new SimpleAllocator(vk, *logicalDevice, memoryProperties));
3597 const VkDeviceSize bufferSizeBytes = sizeof(uint32_t) * numValues;
3598 const BufferWithMemory buffer1(vk, *logicalDevice, *allocator,
3599 makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT),
3600 MemoryRequirement::HostVisible);
3601 const BufferWithMemory buffer2(vk, *logicalDevice, *allocator,
3602 makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT),
3603 MemoryRequirement::HostVisible);
3604
3605 // Fill the buffers with data
3606
3607 typedef std::vector<uint32_t> data_vector_t;
3608 data_vector_t inputData(numValues);
3609
3610 {
3611 de::Random rnd(0x82ce7f);
3612 const Allocation &bufferAllocation1 = buffer1.getAllocation();
3613 const Allocation &bufferAllocation2 = buffer2.getAllocation();
3614 uint32_t *bufferPtr1 = static_cast<uint32_t *>(bufferAllocation1.getHostPtr());
3615 uint32_t *bufferPtr2 = static_cast<uint32_t *>(bufferAllocation2.getHostPtr());
3616
3617 for (uint32_t i = 0; i < numValues; ++i)
3618 {
3619 uint32_t val = rnd.getUint32();
3620 inputData[i] = val;
3621 *bufferPtr1++ = val;
3622 *bufferPtr2++ = val;
3623 }
3624
3625 flushAlloc(vk, *logicalDevice, bufferAllocation1);
3626 flushAlloc(vk, *logicalDevice, bufferAllocation2);
3627 }
3628
3629 // Create descriptor sets
3630
3631 const Unique<VkDescriptorSetLayout> descriptorSetLayout1(
3632 DescriptorSetLayoutBuilder()
3633 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
3634 .build(vk, *logicalDevice));
3635
3636 const Unique<VkDescriptorPool> descriptorPool1(
3637 DescriptorPoolBuilder()
3638 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
3639 .build(vk, *logicalDevice, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
3640
3641 const Unique<VkDescriptorSet> descriptorSet1(
3642 makeDescriptorSet(vk, *logicalDevice, *descriptorPool1, *descriptorSetLayout1));
3643
3644 const VkDescriptorBufferInfo bufferDescriptorInfo1 = makeDescriptorBufferInfo(*buffer1, 0ull, bufferSizeBytes);
3645 DescriptorSetUpdateBuilder()
3646 .writeSingle(*descriptorSet1, DescriptorSetUpdateBuilder::Location::binding(0u),
3647 VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &bufferDescriptorInfo1)
3648 .update(vk, *logicalDevice);
3649
3650 const Unique<VkDescriptorSetLayout> descriptorSetLayout2(
3651 DescriptorSetLayoutBuilder()
3652 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
3653 .build(vk, *logicalDevice));
3654
3655 const Unique<VkDescriptorPool> descriptorPool2(
3656 DescriptorPoolBuilder()
3657 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
3658 .build(vk, *logicalDevice, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
3659
3660 const Unique<VkDescriptorSet> descriptorSet2(
3661 makeDescriptorSet(vk, *logicalDevice, *descriptorPool2, *descriptorSetLayout2));
3662
3663 const VkDescriptorBufferInfo bufferDescriptorInfo2 = makeDescriptorBufferInfo(*buffer2, 0ull, bufferSizeBytes);
3664 DescriptorSetUpdateBuilder()
3665 .writeSingle(*descriptorSet2, DescriptorSetUpdateBuilder::Location::binding(0u),
3666 VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &bufferDescriptorInfo2)
3667 .update(vk, *logicalDevice);
3668
3669 // Perform the computation
3670
3671 const Unique<VkShaderModule> shaderModule(
3672 createShaderModule(vk, *logicalDevice, m_context.getBinaryCollection().get("comp"), 0u));
3673
3674 ComputePipelineWrapper pipeline1(vk, *logicalDevice, m_computePipelineConstructionType,
3675 m_context.getBinaryCollection().get("comp"));
3676 pipeline1.setDescriptorSetLayout(*descriptorSetLayout1);
3677 pipeline1.buildPipeline();
3678 const VkBufferMemoryBarrier hostWriteBarrier1 =
3679 makeBufferMemoryBarrier(VK_ACCESS_HOST_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT, *buffer1, 0ull, bufferSizeBytes);
3680 const VkBufferMemoryBarrier shaderWriteBarrier1 =
3681 makeBufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *buffer1, 0ull, bufferSizeBytes);
3682 const Unique<VkCommandPool> cmdPool1(makeCommandPool(vk, *logicalDevice, queues[0].queueFamilyIndex));
3683 const Unique<VkCommandBuffer> cmdBuffer1(
3684 allocateCommandBuffer(vk, *logicalDevice, *cmdPool1, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
3685
3686 ComputePipelineWrapper pipeline2(vk, *logicalDevice, m_computePipelineConstructionType,
3687 m_context.getBinaryCollection().get("comp"));
3688 pipeline2.setDescriptorSetLayout(*descriptorSetLayout2);
3689 pipeline2.buildPipeline();
3690 const VkBufferMemoryBarrier hostWriteBarrier2 =
3691 makeBufferMemoryBarrier(VK_ACCESS_HOST_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT, *buffer2, 0ull, bufferSizeBytes);
3692 const VkBufferMemoryBarrier shaderWriteBarrier2 =
3693 makeBufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *buffer2, 0ull, bufferSizeBytes);
3694 const Unique<VkCommandPool> cmdPool2(makeCommandPool(vk, *logicalDevice, queues[1].queueFamilyIndex));
3695 const Unique<VkCommandBuffer> cmdBuffer2(
3696 allocateCommandBuffer(vk, *logicalDevice, *cmdPool2, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
3697
3698 // Command buffer 1
3699
3700 beginCommandBuffer(vk, *cmdBuffer1);
3701 pipeline1.bind(*cmdBuffer1);
3702 vk.cmdBindDescriptorSets(*cmdBuffer1, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline1.getPipelineLayout(), 0u, 1u,
3703 &descriptorSet1.get(), 0u, nullptr);
3704 vk.cmdPipelineBarrier(*cmdBuffer1, VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
3705 (VkDependencyFlags)0, 0, nullptr, 1, &hostWriteBarrier1, 0, nullptr);
3706 vk.cmdDispatch(*cmdBuffer1, 1, 1, 1);
3707 vk.cmdPipelineBarrier(*cmdBuffer1, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT,
3708 (VkDependencyFlags)0, 0, nullptr, 1, &shaderWriteBarrier1, 0, nullptr);
3709 endCommandBuffer(vk, *cmdBuffer1);
3710
3711 // Command buffer 2
3712
3713 beginCommandBuffer(vk, *cmdBuffer2);
3714 pipeline2.bind(*cmdBuffer2);
3715 vk.cmdBindDescriptorSets(*cmdBuffer2, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline2.getPipelineLayout(), 0u, 1u,
3716 &descriptorSet2.get(), 0u, nullptr);
3717 vk.cmdPipelineBarrier(*cmdBuffer2, VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
3718 (VkDependencyFlags)0, 0, nullptr, 1, &hostWriteBarrier2, 0, nullptr);
3719 vk.cmdDispatch(*cmdBuffer2, 1, 1, 1);
3720 vk.cmdPipelineBarrier(*cmdBuffer2, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT,
3721 (VkDependencyFlags)0, 0, nullptr, 1, &shaderWriteBarrier2, 0, nullptr);
3722 endCommandBuffer(vk, *cmdBuffer2);
3723
3724 VkSubmitInfo submitInfo1 = {
3725 VK_STRUCTURE_TYPE_SUBMIT_INFO, // sType
3726 nullptr, // pNext
3727 0u, // waitSemaphoreCount
3728 nullptr, // pWaitSemaphores
3729 nullptr, // pWaitDstStageMask
3730 1u, // commandBufferCount
3731 &cmdBuffer1.get(), // pCommandBuffers
3732 0u, // signalSemaphoreCount
3733 nullptr // pSignalSemaphores
3734 };
3735
3736 VkSubmitInfo submitInfo2 = {
3737 VK_STRUCTURE_TYPE_SUBMIT_INFO, // sType
3738 nullptr, // pNext
3739 0u, // waitSemaphoreCount
3740 nullptr, // pWaitSemaphores
3741 nullptr, // pWaitDstStageMask
3742 1u, // commandBufferCount
3743 &cmdBuffer2.get(), // pCommandBuffers
3744 0u, // signalSemaphoreCount
3745 nullptr // pSignalSemaphores
3746 };
3747
3748 // Wait for completion
3749 const Unique<VkFence> fence1(createFence(vk, *logicalDevice));
3750 const Unique<VkFence> fence2(createFence(vk, *logicalDevice));
3751
3752 VK_CHECK(vk.queueSubmit(queues[0].queue, 1u, &submitInfo1, *fence1));
3753 VK_CHECK(vk.queueSubmit(queues[1].queue, 1u, &submitInfo2, *fence2));
3754
3755 int err = ERROR_NONE;
3756
3757 // First wait for the low-priority queue
3758 if (VK_SUCCESS != vk.waitForFences(*logicalDevice, 1u, &fence2.get(), true, ~0ull))
3759 err = ERROR_WAIT;
3760
3761 // If the high-priority queue hasn't finished, we have a problem.
3762 if (VK_SUCCESS != vk.getFenceStatus(*logicalDevice, fence1.get()))
3763 if (err == ERROR_NONE)
3764 err = ERROR_ORDER;
3765
3766 // Wait for the high-priority fence so we don't get errors on teardown.
3767 vk.waitForFences(*logicalDevice, 1u, &fence1.get(), true, ~0ull);
3768
3769 // If we fail() before waiting for all of the fences, error will come from
3770 // teardown instead of the error we want.
3771
3772 if (err == ERROR_WAIT)
3773 {
3774 return tcu::TestStatus::fail("Failed waiting for low-priority queue fence.");
3775 }
3776
3777 // Validate the results
3778
3779 const Allocation &bufferAllocation1 = buffer1.getAllocation();
3780 invalidateAlloc(vk, *logicalDevice, bufferAllocation1);
3781 const uint32_t *bufferPtr1 = static_cast<uint32_t *>(bufferAllocation1.getHostPtr());
3782
3783 const Allocation &bufferAllocation2 = buffer2.getAllocation();
3784 invalidateAlloc(vk, *logicalDevice, bufferAllocation2);
3785 const uint32_t *bufferPtr2 = static_cast<uint32_t *>(bufferAllocation2.getHostPtr());
3786
3787 for (uint32_t ndx = 0; ndx < numValues; ++ndx)
3788 {
3789 const uint32_t res1 = bufferPtr1[ndx];
3790 const uint32_t res2 = bufferPtr2[ndx];
3791 const uint32_t inp = inputData[ndx];
3792 const uint32_t ref = ~inp;
3793
3794 if (res1 != ref || res1 != res2)
3795 {
3796 std::ostringstream msg;
3797 msg << "Comparison failed for InOut.values[" << ndx << "] ref:" << ref << " res1:" << res1
3798 << " res2:" << res2 << " inp:" << inp;
3799 return tcu::TestStatus::fail(msg.str());
3800 }
3801 }
3802
3803 if (err == ERROR_ORDER)
3804 {
3805 log << tcu::TestLog::Message
3806 << "Note: Low-priority queue was faster than high-priority one. This is not an error, but priorities may "
3807 "be inverted."
3808 << tcu::TestLog::EndMessage;
3809 }
3810
3811 return tcu::TestStatus::pass("Test passed");
3812 }
3813
3814 class EmptyWorkGroupCase : public vkt::TestCase
3815 {
3816 public:
3817 EmptyWorkGroupCase(tcu::TestContext &testCtx, const std::string &name, const tcu::UVec3 &dispatchSize,
3818 const vk::ComputePipelineConstructionType computePipelineConstructionType);
~EmptyWorkGroupCase(void)3819 virtual ~EmptyWorkGroupCase(void)
3820 {
3821 }
3822
3823 virtual void checkSupport(Context &context) const override;
3824 TestInstance *createInstance(Context &context) const override;
3825 void initPrograms(vk::SourceCollections &programCollection) const override;
3826
3827 protected:
3828 const tcu::UVec3 m_dispatchSize;
3829 vk::ComputePipelineConstructionType m_computePipelineConstructionType;
3830 };
3831
3832 class EmptyWorkGroupInstance : public vkt::TestInstance
3833 {
3834 public:
EmptyWorkGroupInstance(Context & context,const tcu::UVec3 & dispatchSize,const vk::ComputePipelineConstructionType computePipelineConstructionType)3835 EmptyWorkGroupInstance(Context &context, const tcu::UVec3 &dispatchSize,
3836 const vk::ComputePipelineConstructionType computePipelineConstructionType)
3837 : vkt::TestInstance(context)
3838 , m_dispatchSize(dispatchSize)
3839 , m_computePipelineConstructionType(computePipelineConstructionType)
3840 {
3841 }
~EmptyWorkGroupInstance(void)3842 virtual ~EmptyWorkGroupInstance(void)
3843 {
3844 }
3845
3846 tcu::TestStatus iterate(void) override;
3847
3848 protected:
3849 const tcu::UVec3 m_dispatchSize;
3850 vk::ComputePipelineConstructionType m_computePipelineConstructionType;
3851 };
3852
EmptyWorkGroupCase(tcu::TestContext & testCtx,const std::string & name,const tcu::UVec3 & dispatchSize,const vk::ComputePipelineConstructionType computePipelineConstructionType)3853 EmptyWorkGroupCase::EmptyWorkGroupCase(tcu::TestContext &testCtx, const std::string &name,
3854 const tcu::UVec3 &dispatchSize,
3855 const vk::ComputePipelineConstructionType computePipelineConstructionType)
3856 : vkt::TestCase(testCtx, name)
3857 , m_dispatchSize(dispatchSize)
3858 , m_computePipelineConstructionType(computePipelineConstructionType)
3859 {
3860 DE_ASSERT(m_dispatchSize.x() == 0u || m_dispatchSize.y() == 0u || m_dispatchSize.z() == 0u);
3861 }
3862
checkSupport(Context & context) const3863 void EmptyWorkGroupCase::checkSupport(Context &context) const
3864 {
3865 checkShaderObjectRequirements(context.getInstanceInterface(), context.getPhysicalDevice(),
3866 m_computePipelineConstructionType);
3867 }
3868
createInstance(Context & context) const3869 TestInstance *EmptyWorkGroupCase::createInstance(Context &context) const
3870 {
3871 return new EmptyWorkGroupInstance(context, m_dispatchSize, m_computePipelineConstructionType);
3872 }
3873
initPrograms(vk::SourceCollections & programCollection) const3874 void EmptyWorkGroupCase::initPrograms(vk::SourceCollections &programCollection) const
3875 {
3876 std::ostringstream comp;
3877 comp << "#version 450\n"
3878 << "layout (local_size_x=1, local_size_y=1, local_size_z=1) in;\n"
3879 << "layout (set=0, binding=0) buffer VerificationBlock { uint value; } verif;\n"
3880 << "void main () { atomicAdd(verif.value, 1u); }\n";
3881 programCollection.glslSources.add("comp") << glu::ComputeSource(comp.str());
3882 }
3883
iterate(void)3884 tcu::TestStatus EmptyWorkGroupInstance::iterate(void)
3885 {
3886 const auto &vkd = m_context.getDeviceInterface();
3887 const auto device = m_context.getDevice();
3888 auto &alloc = m_context.getDefaultAllocator();
3889 const auto queueIndex = m_context.getUniversalQueueFamilyIndex();
3890 const auto queue = m_context.getUniversalQueue();
3891
3892 const auto verifBufferSize = static_cast<VkDeviceSize>(sizeof(uint32_t));
3893 const auto verifBufferInfo = makeBufferCreateInfo(verifBufferSize, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT);
3894 BufferWithMemory verifBuffer(vkd, device, alloc, verifBufferInfo, MemoryRequirement::HostVisible);
3895 auto &verifBufferAlloc = verifBuffer.getAllocation();
3896 void *verifBufferPtr = verifBufferAlloc.getHostPtr();
3897
3898 deMemset(verifBufferPtr, 0, static_cast<size_t>(verifBufferSize));
3899 flushAlloc(vkd, device, verifBufferAlloc);
3900
3901 DescriptorSetLayoutBuilder layoutBuilder;
3902 layoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT);
3903 const auto descriptorSetLayout = layoutBuilder.build(vkd, device);
3904
3905 ComputePipelineWrapper pipeline(vkd, device, m_computePipelineConstructionType,
3906 m_context.getBinaryCollection().get("comp"));
3907 pipeline.setDescriptorSetLayout(descriptorSetLayout.get());
3908 pipeline.buildPipeline();
3909
3910 DescriptorPoolBuilder poolBuilder;
3911 poolBuilder.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
3912 const auto descriptorPool = poolBuilder.build(vkd, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
3913 const auto descriptorSet = makeDescriptorSet(vkd, device, descriptorPool.get(), descriptorSetLayout.get());
3914
3915 DescriptorSetUpdateBuilder updateBuilder;
3916 const auto verifBufferDescInfo = makeDescriptorBufferInfo(verifBuffer.get(), 0ull, verifBufferSize);
3917 updateBuilder.writeSingle(descriptorSet.get(), DescriptorSetUpdateBuilder::Location::binding(0u),
3918 VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &verifBufferDescInfo);
3919 updateBuilder.update(vkd, device);
3920
3921 const auto cmdPool = makeCommandPool(vkd, device, queueIndex);
3922 const auto cmdBufferPtr = allocateCommandBuffer(vkd, device, cmdPool.get(), VK_COMMAND_BUFFER_LEVEL_PRIMARY);
3923 const auto cmdBuffer = cmdBufferPtr.get();
3924
3925 beginCommandBuffer(vkd, cmdBuffer);
3926 pipeline.bind(cmdBuffer);
3927 vkd.cmdBindDescriptorSets(cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline.getPipelineLayout(), 0u, 1u,
3928 &descriptorSet.get(), 0u, nullptr);
3929 vkd.cmdDispatch(cmdBuffer, m_dispatchSize.x(), m_dispatchSize.y(), m_dispatchSize.z());
3930
3931 const auto readWriteAccess = (VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_SHADER_READ_BIT);
3932 const auto computeToCompute = makeMemoryBarrier(readWriteAccess, readWriteAccess);
3933 vkd.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0U,
3934 1u, &computeToCompute, 0u, nullptr, 0u, nullptr);
3935
3936 vkd.cmdDispatch(cmdBuffer, 1u, 1u, 1u);
3937
3938 const auto computeToHost = makeMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT);
3939 vkd.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT, 0u, 1u,
3940 &computeToHost, 0u, nullptr, 0u, nullptr);
3941
3942 endCommandBuffer(vkd, cmdBuffer);
3943 submitCommandsAndWait(vkd, device, queue, cmdBuffer);
3944
3945 uint32_t value;
3946 invalidateAlloc(vkd, device, verifBufferAlloc);
3947 deMemcpy(&value, verifBufferPtr, sizeof(value));
3948
3949 if (value != 1u)
3950 {
3951 std::ostringstream msg;
3952 msg << "Unexpected value found in buffer: " << value << " while expecting 1";
3953 TCU_FAIL(msg.str());
3954 }
3955
3956 return tcu::TestStatus::pass("Pass");
3957 }
3958
3959 class MaxWorkGroupSizeTest : public vkt::TestCase
3960 {
3961 public:
3962 enum class Axis
3963 {
3964 X = 0,
3965 Y = 1,
3966 Z = 2
3967 };
3968
3969 struct Params
3970 {
3971 // Which axis to maximize.
3972 Axis axis;
3973 };
3974
3975 MaxWorkGroupSizeTest(tcu::TestContext &testCtx, const std::string &name, const Params ¶ms,
3976 const vk::ComputePipelineConstructionType computePipelineConstructionType);
~MaxWorkGroupSizeTest(void)3977 virtual ~MaxWorkGroupSizeTest(void)
3978 {
3979 }
3980
3981 virtual void initPrograms(vk::SourceCollections &programCollection) const;
3982 virtual TestInstance *createInstance(Context &context) const;
3983 virtual void checkSupport(Context &context) const;
3984
3985 // Helper to transform the axis value to an index.
3986 static int getIndex(Axis axis);
3987
3988 // Helper returning the number of invocations according to the test parameters.
3989 static uint32_t getInvocations(const Params ¶ms, const vk::InstanceInterface &vki,
3990 vk::VkPhysicalDevice physicalDevice,
3991 const vk::VkPhysicalDeviceProperties *devProperties = nullptr);
3992
3993 // Helper returning the buffer size needed to this test.
3994 static uint32_t getSSBOSize(uint32_t invocations);
3995
3996 private:
3997 Params m_params;
3998 vk::ComputePipelineConstructionType m_computePipelineConstructionType;
3999 };
4000
4001 class MaxWorkGroupSizeInstance : public vkt::TestInstance
4002 {
4003 public:
4004 MaxWorkGroupSizeInstance(Context &context, const MaxWorkGroupSizeTest::Params ¶ms,
4005 const vk::ComputePipelineConstructionType computePipelineConstructionType);
~MaxWorkGroupSizeInstance(void)4006 virtual ~MaxWorkGroupSizeInstance(void)
4007 {
4008 }
4009
4010 virtual tcu::TestStatus iterate(void);
4011
4012 private:
4013 MaxWorkGroupSizeTest::Params m_params;
4014 vk::ComputePipelineConstructionType m_computePipelineConstructionType;
4015 };
4016
getIndex(Axis axis)4017 int MaxWorkGroupSizeTest::getIndex(Axis axis)
4018 {
4019 const int ret = static_cast<int>(axis);
4020 DE_ASSERT(ret >= static_cast<int>(Axis::X) && ret <= static_cast<int>(Axis::Z));
4021 return ret;
4022 }
4023
getInvocations(const Params & params,const vk::InstanceInterface & vki,vk::VkPhysicalDevice physicalDevice,const vk::VkPhysicalDeviceProperties * devProperties)4024 uint32_t MaxWorkGroupSizeTest::getInvocations(const Params ¶ms, const vk::InstanceInterface &vki,
4025 vk::VkPhysicalDevice physicalDevice,
4026 const vk::VkPhysicalDeviceProperties *devProperties)
4027 {
4028 const auto axis = getIndex(params.axis);
4029
4030 if (devProperties)
4031 return devProperties->limits.maxComputeWorkGroupSize[axis];
4032 return vk::getPhysicalDeviceProperties(vki, physicalDevice).limits.maxComputeWorkGroupSize[axis];
4033 }
4034
getSSBOSize(uint32_t invocations)4035 uint32_t MaxWorkGroupSizeTest::getSSBOSize(uint32_t invocations)
4036 {
4037 return invocations * static_cast<uint32_t>(sizeof(uint32_t));
4038 }
4039
MaxWorkGroupSizeTest(tcu::TestContext & testCtx,const std::string & name,const Params & params,const vk::ComputePipelineConstructionType computePipelineConstructionType)4040 MaxWorkGroupSizeTest::MaxWorkGroupSizeTest(tcu::TestContext &testCtx, const std::string &name, const Params ¶ms,
4041 const vk::ComputePipelineConstructionType computePipelineConstructionType)
4042 : vkt::TestCase(testCtx, name)
4043 , m_params(params)
4044 , m_computePipelineConstructionType(computePipelineConstructionType)
4045 {
4046 }
4047
initPrograms(vk::SourceCollections & programCollection) const4048 void MaxWorkGroupSizeTest::initPrograms(vk::SourceCollections &programCollection) const
4049 {
4050 std::ostringstream shader;
4051
4052 // The actual local sizes will be set using spec constants when running the test instance.
4053 shader << "#version 450\n"
4054 << "\n"
4055 << "layout(constant_id=0) const int local_size_x_val = 1;\n"
4056 << "layout(constant_id=1) const int local_size_y_val = 1;\n"
4057 << "layout(constant_id=2) const int local_size_z_val = 1;\n"
4058 << "\n"
4059 << "layout(local_size_x_id=0, local_size_y_id=1, local_size_z_id=2) in;\n"
4060 << "\n"
4061 << "layout(set=0, binding=0) buffer StorageBuffer {\n"
4062 << " uint values[];\n"
4063 << "} ssbo;\n"
4064 << "\n"
4065 << "void main() {\n"
4066 << " ssbo.values[gl_LocalInvocationIndex] = 1u;\n"
4067 << "}\n";
4068
4069 programCollection.glslSources.add("comp") << glu::ComputeSource(shader.str());
4070 }
4071
createInstance(Context & context) const4072 TestInstance *MaxWorkGroupSizeTest::createInstance(Context &context) const
4073 {
4074 return new MaxWorkGroupSizeInstance(context, m_params, m_computePipelineConstructionType);
4075 }
4076
checkSupport(Context & context) const4077 void MaxWorkGroupSizeTest::checkSupport(Context &context) const
4078 {
4079 const auto &vki = context.getInstanceInterface();
4080 const auto physicalDevice = context.getPhysicalDevice();
4081
4082 const auto properties = vk::getPhysicalDeviceProperties(vki, physicalDevice);
4083 const auto invocations = getInvocations(m_params, vki, physicalDevice, &properties);
4084
4085 if (invocations > properties.limits.maxComputeWorkGroupInvocations)
4086 TCU_FAIL("Reported workgroup size limit in the axis is greater than the global invocation limit");
4087
4088 if (properties.limits.maxStorageBufferRange / static_cast<uint32_t>(sizeof(uint32_t)) < invocations)
4089 TCU_THROW(NotSupportedError, "Maximum supported storage buffer range too small");
4090
4091 checkShaderObjectRequirements(vki, physicalDevice, m_computePipelineConstructionType);
4092 }
4093
MaxWorkGroupSizeInstance(Context & context,const MaxWorkGroupSizeTest::Params & params,const vk::ComputePipelineConstructionType computePipelineConstructionType)4094 MaxWorkGroupSizeInstance::MaxWorkGroupSizeInstance(
4095 Context &context, const MaxWorkGroupSizeTest::Params ¶ms,
4096 const vk::ComputePipelineConstructionType computePipelineConstructionType)
4097 : vkt::TestInstance(context)
4098 , m_params(params)
4099 , m_computePipelineConstructionType(computePipelineConstructionType)
4100 {
4101 }
4102
iterate(void)4103 tcu::TestStatus MaxWorkGroupSizeInstance::iterate(void)
4104 {
4105 const auto &vki = m_context.getInstanceInterface();
4106 const auto &vkd = m_context.getDeviceInterface();
4107 const auto physicalDevice = m_context.getPhysicalDevice();
4108 const auto device = m_context.getDevice();
4109 auto &alloc = m_context.getDefaultAllocator();
4110 const auto queueIndex = m_context.getUniversalQueueFamilyIndex();
4111 const auto queue = m_context.getUniversalQueue();
4112 auto &log = m_context.getTestContext().getLog();
4113
4114 const auto axis = MaxWorkGroupSizeTest::getIndex(m_params.axis);
4115 const auto invocations = MaxWorkGroupSizeTest::getInvocations(m_params, vki, physicalDevice);
4116 const auto ssboSize = static_cast<vk::VkDeviceSize>(MaxWorkGroupSizeTest::getSSBOSize(invocations));
4117
4118 log << tcu::TestLog::Message << "Running test with " << invocations << " invocations on axis " << axis
4119 << " using a storage buffer size of " << ssboSize << " bytes" << tcu::TestLog::EndMessage;
4120
4121 // Main SSBO buffer.
4122 const auto ssboInfo = vk::makeBufferCreateInfo(ssboSize, vk::VK_BUFFER_USAGE_STORAGE_BUFFER_BIT);
4123 vk::BufferWithMemory ssbo(vkd, device, alloc, ssboInfo, vk::MemoryRequirement::HostVisible);
4124
4125 // Descriptor set layouts.
4126 vk::DescriptorSetLayoutBuilder layoutBuilder;
4127 layoutBuilder.addSingleBinding(vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, vk::VK_SHADER_STAGE_COMPUTE_BIT);
4128 const auto descriptorSetLayout = layoutBuilder.build(vkd, device);
4129
4130 // Specialization constants: set the number of invocations in the appropriate local size id.
4131 const auto entrySize = static_cast<uintptr_t>(sizeof(int32_t));
4132 int32_t specializationData[3] = {1, 1, 1};
4133 specializationData[axis] = static_cast<int32_t>(invocations);
4134
4135 const vk::VkSpecializationMapEntry specializationMaps[3] = {
4136 {
4137 0u, // uint32_t constantID;
4138 0u, // uint32_t offset;
4139 entrySize, // uintptr_t size;
4140 },
4141 {
4142 1u, // uint32_t constantID;
4143 static_cast<uint32_t>(entrySize), // uint32_t offset;
4144 entrySize, // uintptr_t size;
4145 },
4146 {
4147 2u, // uint32_t constantID;
4148 static_cast<uint32_t>(entrySize * 2u), // uint32_t offset;
4149 entrySize, // uintptr_t size;
4150 },
4151 };
4152
4153 const vk::VkSpecializationInfo specializationInfo = {
4154 3u, // uint32_t mapEntryCount;
4155 specializationMaps, // const VkSpecializationMapEntry* pMapEntries;
4156 static_cast<uintptr_t>(sizeof(specializationData)), // uintptr_t dataSize;
4157 specializationData, // const void* pData;
4158 };
4159
4160 ComputePipelineWrapper testPipeline(vkd, device, m_computePipelineConstructionType,
4161 m_context.getBinaryCollection().get("comp"));
4162 testPipeline.setDescriptorSetLayout(descriptorSetLayout.get());
4163 testPipeline.setSpecializationInfo(specializationInfo);
4164 testPipeline.buildPipeline();
4165
4166 // Create descriptor pool and set.
4167 vk::DescriptorPoolBuilder poolBuilder;
4168 poolBuilder.addType(vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
4169 const auto descriptorPool =
4170 poolBuilder.build(vkd, device, vk::VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
4171 const auto descriptorSet = vk::makeDescriptorSet(vkd, device, descriptorPool.get(), descriptorSetLayout.get());
4172
4173 // Update descriptor set.
4174 const vk::VkDescriptorBufferInfo ssboBufferInfo = {
4175 ssbo.get(), // VkBuffer buffer;
4176 0u, // VkDeviceSize offset;
4177 VK_WHOLE_SIZE, // VkDeviceSize range;
4178 };
4179
4180 vk::DescriptorSetUpdateBuilder updateBuilder;
4181 updateBuilder.writeSingle(descriptorSet.get(), vk::DescriptorSetUpdateBuilder::Location::binding(0u),
4182 vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &ssboBufferInfo);
4183 updateBuilder.update(vkd, device);
4184
4185 // Clear buffer.
4186 auto &ssboAlloc = ssbo.getAllocation();
4187 void *ssboPtr = ssboAlloc.getHostPtr();
4188 deMemset(ssboPtr, 0, static_cast<size_t>(ssboSize));
4189 vk::flushAlloc(vkd, device, ssboAlloc);
4190
4191 // Run pipelines.
4192 const auto cmdPool = vk::makeCommandPool(vkd, device, queueIndex);
4193 const auto cmdBUfferPtr =
4194 vk::allocateCommandBuffer(vkd, device, cmdPool.get(), vk::VK_COMMAND_BUFFER_LEVEL_PRIMARY);
4195 const auto cmdBuffer = cmdBUfferPtr.get();
4196
4197 vk::beginCommandBuffer(vkd, cmdBuffer);
4198
4199 // Run the main test shader.
4200 const auto hostToComputeBarrier = vk::makeBufferMemoryBarrier(
4201 vk::VK_ACCESS_HOST_WRITE_BIT, vk::VK_ACCESS_SHADER_WRITE_BIT, ssbo.get(), 0ull, VK_WHOLE_SIZE);
4202 vkd.cmdPipelineBarrier(cmdBuffer, vk::VK_PIPELINE_STAGE_HOST_BIT, vk::VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0u, 0u,
4203 nullptr, 1u, &hostToComputeBarrier, 0u, nullptr);
4204
4205 testPipeline.bind(cmdBuffer);
4206 vkd.cmdBindDescriptorSets(cmdBuffer, vk::VK_PIPELINE_BIND_POINT_COMPUTE, testPipeline.getPipelineLayout(), 0u, 1u,
4207 &descriptorSet.get(), 0u, nullptr);
4208 vkd.cmdDispatch(cmdBuffer, 1u, 1u, 1u);
4209
4210 const auto computeToHostBarrier = vk::makeBufferMemoryBarrier(
4211 vk::VK_ACCESS_SHADER_WRITE_BIT, vk::VK_ACCESS_HOST_READ_BIT, ssbo.get(), 0ull, VK_WHOLE_SIZE);
4212 vkd.cmdPipelineBarrier(cmdBuffer, vk::VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, vk::VK_PIPELINE_STAGE_HOST_BIT, 0u, 0u,
4213 nullptr, 1u, &computeToHostBarrier, 0u, nullptr);
4214
4215 vk::endCommandBuffer(vkd, cmdBuffer);
4216 vk::submitCommandsAndWait(vkd, device, queue, cmdBuffer);
4217
4218 // Verify buffer contents.
4219 vk::invalidateAlloc(vkd, device, ssboAlloc);
4220 std::unique_ptr<uint32_t[]> valuesArray(new uint32_t[invocations]);
4221 uint32_t *valuesPtr = valuesArray.get();
4222 deMemcpy(valuesPtr, ssboPtr, static_cast<size_t>(ssboSize));
4223
4224 std::string errorMsg;
4225 bool ok = true;
4226
4227 for (size_t i = 0; i < invocations; ++i)
4228 {
4229 if (valuesPtr[i] != 1u)
4230 {
4231 ok = false;
4232 errorMsg = "Found invalid value for invocation index " + de::toString(i) + ": expected 1u and found " +
4233 de::toString(valuesPtr[i]);
4234 break;
4235 }
4236 }
4237
4238 if (!ok)
4239 return tcu::TestStatus::fail(errorMsg);
4240 return tcu::TestStatus::pass("Pass");
4241 }
4242
4243 namespace EmptyShaderTest
4244 {
4245
checkSupport(Context & context,vk::ComputePipelineConstructionType computePipelineConstructionType)4246 void checkSupport(Context &context, vk::ComputePipelineConstructionType computePipelineConstructionType)
4247 {
4248 checkShaderObjectRequirements(context.getInstanceInterface(), context.getPhysicalDevice(),
4249 computePipelineConstructionType);
4250 }
4251
createProgram(SourceCollections & dst,vk::ComputePipelineConstructionType)4252 void createProgram(SourceCollections &dst, vk::ComputePipelineConstructionType)
4253 {
4254 dst.glslSources.add("comp") << glu::ComputeSource("#version 310 es\n"
4255 "layout (local_size_x = 1) in;\n"
4256 "void main (void) {}\n");
4257 }
4258
createTest(Context & context,vk::ComputePipelineConstructionType computePipelineConstructionType)4259 tcu::TestStatus createTest(Context &context, vk::ComputePipelineConstructionType computePipelineConstructionType)
4260 {
4261 const DeviceInterface &vk = context.getDeviceInterface();
4262 const VkDevice device = context.getDevice();
4263 const VkQueue queue = context.getUniversalQueue();
4264 const uint32_t queueFamilyIndex = context.getUniversalQueueFamilyIndex();
4265
4266 ComputePipelineWrapper pipeline(vk, device, computePipelineConstructionType,
4267 context.getBinaryCollection().get("comp"));
4268 pipeline.buildPipeline();
4269
4270 const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
4271 const Unique<VkCommandBuffer> cmdBuffer(
4272 allocateCommandBuffer(vk, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
4273
4274 // Start recording commands
4275
4276 beginCommandBuffer(vk, *cmdBuffer);
4277
4278 pipeline.bind(*cmdBuffer);
4279
4280 const tcu::IVec3 workGroups(1, 1, 1);
4281 vk.cmdDispatch(*cmdBuffer, workGroups.x(), workGroups.y(), workGroups.z());
4282
4283 endCommandBuffer(vk, *cmdBuffer);
4284
4285 submitCommandsAndWait(vk, device, queue, *cmdBuffer);
4286
4287 return tcu::TestStatus::pass("Compute succeeded");
4288 }
4289
4290 } // namespace EmptyShaderTest
4291
4292 namespace ComputeOnlyQueueTests
4293 {
4294
getComputeOnlyQueueFamily(Context & context)4295 tcu::Maybe<uint32_t> getComputeOnlyQueueFamily(Context &context)
4296 {
4297 bool foundQueue = false;
4298 uint32_t index = 0;
4299
4300 auto queueFamilies =
4301 getPhysicalDeviceQueueFamilyProperties(context.getInstanceInterface(), context.getPhysicalDevice());
4302
4303 for (const auto &queueFamily : queueFamilies)
4304 {
4305 if ((queueFamily.queueFlags & VK_QUEUE_COMPUTE_BIT) && !(queueFamily.queueFlags & VK_QUEUE_GRAPHICS_BIT))
4306 {
4307 foundQueue = true;
4308 break;
4309 }
4310 else
4311 {
4312 index++;
4313 }
4314 }
4315 if (!foundQueue)
4316 {
4317 return tcu::Maybe<uint32_t>();
4318 }
4319 else
4320 {
4321 return index;
4322 }
4323 }
4324
4325 // Creates a device that has a queue for compute capabilities without graphics.
createComputeOnlyDevice(vk::VkInstance instance,const InstanceInterface & instanceDriver,const VkPhysicalDevice physicalDevice,Context & context,uint32_t & queueFamilyIndex)4326 Move<VkDevice> createComputeOnlyDevice(vk::VkInstance instance, const InstanceInterface &instanceDriver,
4327 const VkPhysicalDevice physicalDevice, Context &context,
4328 uint32_t &queueFamilyIndex)
4329 {
4330 const auto queueFamilies = getPhysicalDeviceQueueFamilyProperties(instanceDriver, physicalDevice);
4331
4332 // One queue family without a graphics bit should be found, since this is checked in checkSupport.
4333 queueFamilyIndex = getComputeOnlyQueueFamily(context).get();
4334
4335 const float queuePriority = 1.0f;
4336 const VkDeviceQueueCreateInfo deviceQueueCreateInfos = {
4337 VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO, // VkStructureType sType;
4338 nullptr, // const void* pNext;
4339 (VkDeviceQueueCreateFlags)0u, // VkDeviceQueueCreateFlags flags;
4340 queueFamilyIndex, // uint32_t queueFamilyIndex;
4341 1u, // uint32_t queueCount;
4342 &queuePriority, // const float* pQueuePriorities;
4343 };
4344
4345 void *pNext = nullptr;
4346 #ifdef CTS_USES_VULKANSC
4347 VkDeviceObjectReservationCreateInfo memReservationInfo = context.getTestContext().getCommandLine().isSubProcess() ?
4348 context.getResourceInterface()->getStatMax() :
4349 resetDeviceObjectReservationCreateInfo();
4350 pNext = &memReservationInfo;
4351
4352 VkPhysicalDeviceVulkanSC10Features sc10Features = createDefaultSC10Features();
4353 sc10Features.pNext = pNext;
4354 pNext = &sc10Features;
4355
4356 VkPipelineCacheCreateInfo pcCI;
4357 std::vector<VkPipelinePoolSize> poolSizes;
4358 if (context.getTestContext().getCommandLine().isSubProcess())
4359 {
4360 if (context.getResourceInterface()->getCacheDataSize() > 0)
4361 {
4362 pcCI = {
4363 VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO, // VkStructureType sType;
4364 nullptr, // const void* pNext;
4365 VK_PIPELINE_CACHE_CREATE_READ_ONLY_BIT |
4366 VK_PIPELINE_CACHE_CREATE_USE_APPLICATION_STORAGE_BIT, // VkPipelineCacheCreateFlags flags;
4367 context.getResourceInterface()->getCacheDataSize(), // uintptr_t initialDataSize;
4368 context.getResourceInterface()->getCacheData() // const void* pInitialData;
4369 };
4370 memReservationInfo.pipelineCacheCreateInfoCount = 1;
4371 memReservationInfo.pPipelineCacheCreateInfos = &pcCI;
4372 }
4373 poolSizes = context.getResourceInterface()->getPipelinePoolSizes();
4374 if (!poolSizes.empty())
4375 {
4376 memReservationInfo.pipelinePoolSizeCount = uint32_t(poolSizes.size());
4377 memReservationInfo.pPipelinePoolSizes = poolSizes.data();
4378 }
4379 }
4380 #endif // CTS_USES_VULKANSC
4381 const VkDeviceCreateInfo deviceCreateInfo = {
4382 VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO, // VkStructureType sType;
4383 pNext, // const void* pNext;
4384 (VkDeviceCreateFlags)0u, // VkDeviceCreateFlags flags;
4385 1, // uint32_t queueCreateInfoCount;
4386 &deviceQueueCreateInfos, // const VkDeviceQueueCreateInfo* pQueueCreateInfos;
4387 0u, // uint32_t enabledLayerCount;
4388 nullptr, // const char* const* ppEnabledLayerNames;
4389 0, // uint32_t enabledExtensionCount;
4390 nullptr, // const char* const* ppEnabledExtensionNames;
4391 nullptr, // const VkPhysicalDeviceFeatures* pEnabledFeatures;
4392 };
4393
4394 return vkt::createCustomDevice(context.getTestContext().getCommandLine().isValidationEnabled(),
4395 context.getPlatformInterface(), instance, instanceDriver, physicalDevice,
4396 &deviceCreateInfo);
4397 }
4398
4399 class SecondaryCommandBufferComputeOnlyTest : public vkt::TestCase
4400 {
4401 public:
SecondaryCommandBufferComputeOnlyTest(tcu::TestContext & context,const std::string & name)4402 SecondaryCommandBufferComputeOnlyTest(tcu::TestContext &context, const std::string &name)
4403 : vkt::TestCase(context, name){};
4404
4405 void initPrograms(SourceCollections &programCollection) const override;
4406 TestInstance *createInstance(Context &context) const override;
4407 void checkSupport(Context &context) const override;
4408 };
4409
4410 class SecondaryCommandBufferComputeOnlyTestInstance : public vkt::TestInstance
4411 {
4412 public:
SecondaryCommandBufferComputeOnlyTestInstance(Context & context)4413 SecondaryCommandBufferComputeOnlyTestInstance(Context &context)
4414 : vkt::TestInstance(context)
4415 #ifdef CTS_USES_VULKANSC
4416 , m_customInstance(createCustomInstanceFromContext(context))
4417 #endif // CTS_USES_VULKANSC
4418 {};
4419 virtual tcu::TestStatus iterate(void);
4420
4421 protected:
4422 #ifdef CTS_USES_VULKANSC
4423 const CustomInstance m_customInstance;
4424 #endif // CTS_USES_VULKANSC
4425 };
4426
initPrograms(SourceCollections & collection) const4427 void SecondaryCommandBufferComputeOnlyTest::initPrograms(SourceCollections &collection) const
4428 {
4429 {
4430 std::ostringstream src;
4431 src << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450) << "\n"
4432 << "layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
4433 << "layout(set = 0, binding = 0, std430) buffer Out\n"
4434 << "{\n"
4435 << " uint data[];\n"
4436 << "};\n"
4437 << "void main (void)\n"
4438 << "{\n"
4439 << "data[0] = 1;"
4440 << "}\n";
4441 collection.glslSources.add("comp") << glu::ComputeSource(src.str());
4442 }
4443 }
4444
createInstance(Context & context) const4445 TestInstance *SecondaryCommandBufferComputeOnlyTest::createInstance(Context &context) const
4446 {
4447 return new SecondaryCommandBufferComputeOnlyTestInstance(context);
4448 }
4449
checkSupport(Context & context) const4450 void SecondaryCommandBufferComputeOnlyTest::checkSupport(Context &context) const
4451 {
4452 // Find at least one queue family that supports compute queue but does NOT support graphics queue.
4453 if (!getComputeOnlyQueueFamily(context))
4454 TCU_THROW(NotSupportedError, "No queue family found that only supports compute queue.");
4455 }
4456
iterate()4457 tcu::TestStatus SecondaryCommandBufferComputeOnlyTestInstance::iterate()
4458 {
4459 VkDevice device;
4460 uint32_t queueFamilyIndex;
4461 #ifdef CTS_USES_VULKANSC
4462 const vk::InstanceInterface &vki = m_customInstance.getDriver();
4463 const VkPhysicalDevice physDevice =
4464 chooseDevice(vki, m_customInstance, m_context.getTestContext().getCommandLine());
4465 auto customDevice = createComputeOnlyDevice(m_customInstance, vki, physDevice, m_context, queueFamilyIndex);
4466 de::MovePtr<DeviceDriverSC, DeinitDeviceDeleter> deviceDriver;
4467 #else
4468 const InstanceInterface &vki = m_context.getInstanceInterface();
4469 const VkPhysicalDevice physDevice = m_context.getPhysicalDevice();
4470 auto customDevice = createComputeOnlyDevice(m_context.getInstance(), vki, physDevice, m_context, queueFamilyIndex);
4471 de::MovePtr<DeviceDriver> deviceDriver;
4472 #endif // CTS_USES_VULKANSC
4473
4474 device = customDevice.get();
4475
4476 #ifndef CTS_USES_VULKANSC
4477 deviceDriver = de::MovePtr<DeviceDriver>(new DeviceDriver(m_context.getPlatformInterface(), m_context.getInstance(),
4478 device, m_context.getUsedApiVersion(),
4479 m_context.getTestContext().getCommandLine()));
4480 #else
4481 deviceDriver = de::MovePtr<DeviceDriverSC, DeinitDeviceDeleter>(
4482 new DeviceDriverSC(m_context.getPlatformInterface(), m_customInstance, device,
4483 m_context.getTestContext().getCommandLine(), m_context.getResourceInterface(),
4484 m_context.getDeviceVulkanSC10Properties(), m_context.getDeviceProperties(),
4485 m_context.getUsedApiVersion()),
4486 DeinitDeviceDeleter(m_context.getResourceInterface().get(), device));
4487 #endif // CTS_USES_VULKANSC
4488
4489 const DeviceInterface &vkdi = *deviceDriver;
4490
4491 auto queue = getDeviceQueue(vkdi, device, queueFamilyIndex, 0u);
4492 auto allocator =
4493 de::MovePtr<Allocator>(new SimpleAllocator(vkdi, device, getPhysicalDeviceMemoryProperties(vki, physDevice)));
4494
4495 const auto bufferSize = static_cast<VkDeviceSize>(sizeof(uint32_t));
4496 BufferWithMemory buffer(vkdi, device, *allocator.get(),
4497 makeBufferCreateInfo(bufferSize, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT),
4498 MemoryRequirement::HostVisible);
4499 auto &bufferAlloc = buffer.getAllocation();
4500 void *bufferData = bufferAlloc.getHostPtr();
4501 deMemset(bufferData, 0, sizeof(uint32_t));
4502 flushAlloc(vkdi, device, bufferAlloc);
4503
4504 DescriptorSetLayoutBuilder layoutBuilder;
4505 layoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT);
4506 Unique<VkDescriptorSetLayout> descriptorSetLayout(layoutBuilder.build(vkdi, device));
4507
4508 DescriptorPoolBuilder poolBuilder;
4509 poolBuilder.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
4510 const auto descriptorPool = poolBuilder.build(vkdi, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1);
4511 const auto descriptorSetBuffer = makeDescriptorSet(vkdi, device, descriptorPool.get(), descriptorSetLayout.get());
4512
4513 // Update descriptor sets.
4514 DescriptorSetUpdateBuilder updater;
4515
4516 const auto bufferInfo = makeDescriptorBufferInfo(buffer.get(), 0ull, bufferSize);
4517 updater.writeSingle(descriptorSetBuffer.get(), DescriptorSetUpdateBuilder::Location::binding(0u),
4518 VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &bufferInfo);
4519
4520 updater.update(vkdi, device);
4521
4522 auto shader = createShaderModule(vkdi, device, m_context.getBinaryCollection().get("comp"));
4523 // Create compute pipeline
4524 const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(vkdi, device, *descriptorSetLayout));
4525 const Unique<VkPipeline> computePipeline(makeComputePipeline(vkdi, device, *pipelineLayout, *shader));
4526
4527 // Create command buffer
4528 const Unique<VkCommandPool> cmdPool(makeCommandPool(vkdi, device, queueFamilyIndex));
4529 const Unique<VkCommandBuffer> cmdBuffer(
4530 allocateCommandBuffer(vkdi, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
4531 const Unique<VkCommandBuffer> cmdBuffer2(
4532 allocateCommandBuffer(vkdi, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_SECONDARY));
4533
4534 const VkCommandBufferInheritanceInfo bufferInheritanceInfo{
4535 VK_STRUCTURE_TYPE_COMMAND_BUFFER_INHERITANCE_INFO, // VkStructureType sType;
4536 nullptr, // const void* pNext;
4537 VK_NULL_HANDLE, // VkRenderPass renderPass;
4538 0u, // uint32_t subpass;
4539 VK_NULL_HANDLE, // VkFramebuffer framebuffer;
4540 VK_FALSE, // VkBool32 occlusionQueryEnable;
4541 (VkQueryControlFlags)0u, // VkQueryControlFlags queryFlags;
4542 (VkQueryPipelineStatisticFlags)0u // VkQueryPipelineStatisticFlags pipelineStatistics;
4543 };
4544
4545 VkCommandBufferUsageFlags usageFlags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
4546 const VkCommandBufferBeginInfo commandBufBeginParams{
4547 VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, // VkStructureType sType;
4548 nullptr, // const void* pNext;
4549 usageFlags, // VkCommandBufferUsageFlags flags;
4550 &bufferInheritanceInfo};
4551
4552 beginCommandBuffer(vkdi, cmdBuffer.get());
4553 vkdi.beginCommandBuffer(cmdBuffer2.get(), &commandBufBeginParams);
4554 vkdi.cmdBindPipeline(cmdBuffer2.get(), VK_PIPELINE_BIND_POINT_COMPUTE, computePipeline.get());
4555 vkdi.cmdBindDescriptorSets(cmdBuffer2.get(), VK_PIPELINE_BIND_POINT_COMPUTE, pipelineLayout.get(), 0u, 1,
4556 &descriptorSetBuffer.get(), 0u, nullptr);
4557 vkdi.cmdDispatch(cmdBuffer2.get(), 1, 1, 1);
4558 endCommandBuffer(vkdi, cmdBuffer2.get());
4559 vkdi.cmdExecuteCommands(cmdBuffer.get(), 1, &cmdBuffer2.get());
4560 const VkBufferMemoryBarrier renderBufferBarrier =
4561 makeBufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, buffer.get(), 0ull, bufferSize);
4562 cmdPipelineBufferMemoryBarrier(vkdi, cmdBuffer.get(), VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
4563 VK_PIPELINE_STAGE_HOST_BIT, &renderBufferBarrier);
4564 endCommandBuffer(vkdi, cmdBuffer.get());
4565 submitCommandsAndWait(vkdi, device, queue, cmdBuffer.get());
4566
4567 invalidateAlloc(vkdi, device, bufferAlloc);
4568
4569 uint32_t result = 0;
4570 deMemcpy(&result, bufferData, sizeof(uint32_t));
4571 if (result != 1)
4572 {
4573 return tcu::TestStatus::pass("value of buffer unexpected");
4574 }
4575
4576 return tcu::TestStatus::pass("passed");
4577 }
4578
4579 }; // namespace ComputeOnlyQueueTests
4580
4581 enum CompositeType
4582 {
4583 VECTOR,
4584 MATRIX,
4585 ARRAY,
4586 ARRAY_ARRAY,
4587 STRUCT,
4588 STRUCT_STRUCT,
4589 COOPMAT,
4590 };
4591 enum InstType
4592 {
4593 VALUE,
4594 CONSTANT,
4595 SPECCONSTANT,
4596 };
4597
4598 #ifndef CTS_USES_VULKANSC
4599
4600 class ReplicatedCompositesTest : public vkt::TestCase
4601 {
4602 public:
4603 ReplicatedCompositesTest(tcu::TestContext &testCtx, const std::string &name, const CompositeType compositeType,
4604 const InstType instType,
4605 const vk::ComputePipelineConstructionType computePipelineConstructionType);
4606
4607 virtual void checkSupport(Context &context) const;
4608 void initPrograms(SourceCollections &sourceCollections) const;
4609 TestInstance *createInstance(Context &context) const;
4610
4611 private:
4612 vk::ComputePipelineConstructionType m_computePipelineConstructionType;
4613 CompositeType m_compositeType;
4614 InstType m_instType;
4615 };
4616
4617 class ReplicatedCompositesTestInstance : public vkt::TestInstance
4618 {
4619 public:
4620 ReplicatedCompositesTestInstance(Context &context, const CompositeType compositeType, const InstType instType,
4621 const vk::ComputePipelineConstructionType computePipelineConstructionType);
4622
4623 tcu::TestStatus iterate(void);
4624
4625 private:
4626 vk::ComputePipelineConstructionType m_computePipelineConstructionType;
4627 CompositeType m_compositeType;
4628 InstType m_instType;
4629 };
4630
ReplicatedCompositesTest(tcu::TestContext & testCtx,const std::string & name,const CompositeType compositeType,const InstType instType,const vk::ComputePipelineConstructionType computePipelineConstructionType)4631 ReplicatedCompositesTest::ReplicatedCompositesTest(
4632 tcu::TestContext &testCtx, const std::string &name, const CompositeType compositeType, const InstType instType,
4633 const vk::ComputePipelineConstructionType computePipelineConstructionType)
4634 : TestCase(testCtx, name)
4635 , m_computePipelineConstructionType(computePipelineConstructionType)
4636 , m_compositeType(compositeType)
4637 , m_instType(instType)
4638 {
4639 }
4640
checkSupport(Context & context) const4641 void ReplicatedCompositesTest::checkSupport(Context &context) const
4642 {
4643 checkShaderObjectRequirements(context.getInstanceInterface(), context.getPhysicalDevice(),
4644 m_computePipelineConstructionType);
4645
4646 #ifndef CTS_USES_VULKANSC
4647 if (!context.getShaderReplicatedCompositesFeaturesEXT().shaderReplicatedComposites)
4648 {
4649 TCU_THROW(NotSupportedError, "shaderReplicatedComposites not supported");
4650 }
4651
4652 if (m_compositeType == COOPMAT)
4653 {
4654 const InstanceInterface &vki = context.getInstanceInterface();
4655 if (!context.getCooperativeMatrixFeatures().cooperativeMatrix)
4656 {
4657 TCU_THROW(NotSupportedError,
4658 "VkPhysicalDeviceCooperativeMatrixFeaturesKHR::cooperativeMatrix not supported");
4659 }
4660
4661 uint32_t propertyCount = 0;
4662
4663 VK_CHECK(
4664 vki.getPhysicalDeviceCooperativeMatrixPropertiesKHR(context.getPhysicalDevice(), &propertyCount, nullptr));
4665
4666 const VkCooperativeMatrixPropertiesKHR initStruct = initVulkanStructureConst();
4667
4668 std::vector<VkCooperativeMatrixPropertiesKHR> properties(propertyCount, initStruct);
4669
4670 VK_CHECK(vki.getPhysicalDeviceCooperativeMatrixPropertiesKHR(context.getPhysicalDevice(), &propertyCount,
4671 properties.data()));
4672
4673 bool foundFp16 = false;
4674 for (size_t i = 0; i < properties.size(); ++i)
4675 {
4676 const VkCooperativeMatrixPropertiesKHR *p = &properties[i];
4677
4678 if (p->scope != VK_SCOPE_SUBGROUP_KHR)
4679 continue;
4680
4681 if (p->AType == VK_COMPONENT_TYPE_FLOAT16_KHR)
4682 foundFp16 = true;
4683 }
4684 if (!foundFp16)
4685 {
4686 TCU_THROW(NotSupportedError, "cooperativeMatrix float16 not supported");
4687 }
4688 }
4689 #endif // CTS_USES_VULKANSC
4690 }
4691
initPrograms(SourceCollections & sourceCollections) const4692 void ReplicatedCompositesTest::initPrograms(SourceCollections &sourceCollections) const
4693 {
4694 std::ostringstream src;
4695 src << "#version 460 core\n"
4696 << "#extension GL_EXT_scalar_block_layout : enable\n"
4697 << "#extension GL_KHR_cooperative_matrix : enable\n"
4698 << "#extension GL_EXT_shader_explicit_arithmetic_types_float16 : enable\n"
4699 << "#extension GL_KHR_memory_scope_semantics : enable\n"
4700 << "#extension GL_EXT_spec_constant_composites : enable\n"
4701 << "#pragma use_replicated_composites\n"
4702 << "layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
4703 << "layout(binding = 0, scalar) buffer Output {\n";
4704
4705 switch (m_compositeType)
4706 {
4707 case VECTOR:
4708 src << "float vec[4];\n";
4709 break;
4710 case MATRIX:
4711 src << "float mat[4*4];\n";
4712 break;
4713 case ARRAY:
4714 src << "uint arr[3];\n";
4715 break;
4716 case ARRAY_ARRAY:
4717 src << "uint arrarr[6];\n";
4718 break;
4719 case STRUCT:
4720 src << "uint str[3];\n";
4721 break;
4722 case STRUCT_STRUCT:
4723 src << "uint str[6];\n";
4724 break;
4725 case COOPMAT:
4726 src << "float mat[2];\n";
4727 break;
4728 default:
4729 DE_ASSERT(0);
4730 break;
4731 }
4732 src << "} sb_out;\n\n";
4733
4734 if (m_compositeType == COOPMAT)
4735 {
4736 src << "layout(constant_id = 1) const uint rows = 1;\n"
4737 << "layout(constant_id = 2) const uint cols = 1;\n";
4738 }
4739
4740 if (m_instType != VALUE)
4741 {
4742 if (m_instType == SPECCONSTANT)
4743 {
4744 src << "layout(constant_id = 0) ";
4745 }
4746 switch (m_compositeType)
4747 {
4748 case VECTOR:
4749 src << "const float one = 1.0;\n"
4750 << "const vec4 vec = vec4(one);\n";
4751 break;
4752 case MATRIX:
4753 src << "const float one = 1.0;\n"
4754 << "const vec4 vec = vec4(one);\n"
4755 << "const mat4 mat = mat4(vec, vec, vec, vec);\n";
4756 break;
4757 case ARRAY:
4758 src << "const uint three = 3;\n"
4759 << "const uint arr[3] = {three, three, three};\n";
4760 break;
4761 case ARRAY_ARRAY:
4762 src << "const uint three = 3;\n"
4763 << "const uint arr[3] = {three, three, three};\n"
4764 << "const uint arrarr[2][3] = {arr, arr};\n";
4765 break;
4766 case STRUCT:
4767 src << "const uint six = 6;\n"
4768 << "struct S { uint a; uint b; uint c; };\n"
4769 << "const S str = S(six, six, six);\n\n";
4770 break;
4771 case STRUCT_STRUCT:
4772 src << "const uint six = 6;\n"
4773 << "struct S { uint a; uint b; uint c; };\n"
4774 << "struct SS { S a; S b; };\n"
4775 << "const S str = S(six, six, six);\n"
4776 << "const SS str2 = SS(str, str);\n\n";
4777 break;
4778 case COOPMAT:
4779 src << "const float one = 1.0;\n"
4780 << "const coopmat<float16_t, gl_ScopeSubgroup, rows, cols, gl_MatrixUseA> mat = coopmat<float16_t, "
4781 "gl_ScopeSubgroup, rows, cols, gl_MatrixUseA>(one);\n";
4782 break;
4783 default:
4784 DE_ASSERT(0);
4785 break;
4786 }
4787 }
4788 src << "void main (void) {\n";
4789
4790 if (m_instType == VALUE)
4791 {
4792 switch (m_compositeType)
4793 {
4794 case VECTOR:
4795 src << " float one = 1.0;\n"
4796 << " vec4 vec = vec4(one);\n";
4797 break;
4798 case MATRIX:
4799 src << " float one = 1.0;\n"
4800 << " vec4 vec = vec4(one);\n"
4801 << " mat4 mat = mat4(vec, vec, vec, vec);\n";
4802 break;
4803 case ARRAY:
4804 src << " uint three = 3;\n"
4805 << " uint arr[3] = {three, three, three};\n";
4806 break;
4807 case ARRAY_ARRAY:
4808 src << " uint three = 3;\n"
4809 << " uint arr[3] = {three, three, three};\n"
4810 << " uint arrarr[2][3] = {arr, arr};\n";
4811 break;
4812 case STRUCT:
4813 src << " uint six = 6;\n"
4814 << " struct S { uint a; uint b; uint c; };\n"
4815 << " S str = S(six, six, six);\n\n";
4816 break;
4817 case STRUCT_STRUCT:
4818 src << " uint six = 6;\n"
4819 << " struct S { uint a; uint b; uint c; };\n"
4820 << " struct SS { S a; S b; };\n"
4821 << " S str = S(six, six, six);\n"
4822 << " SS str2 = SS(str, str);\n\n";
4823 break;
4824 case COOPMAT:
4825 src << " float one = 1.0;\n"
4826 << " coopmat<float16_t, gl_ScopeSubgroup, rows, cols, gl_MatrixUseA> mat = coopmat<float16_t, "
4827 "gl_ScopeSubgroup, rows, cols, gl_MatrixUseA>(one);\n";
4828 break;
4829 default:
4830 DE_ASSERT(0);
4831 break;
4832 }
4833 }
4834 switch (m_compositeType)
4835 {
4836 case VECTOR:
4837 src << " sb_out.vec[0] = vec[0];\n"
4838 << " sb_out.vec[1] = vec[1];\n"
4839 << " sb_out.vec[2] = vec[2];\n"
4840 << " sb_out.vec[3] = vec[3];\n";
4841 break;
4842 case MATRIX:
4843 src << " sb_out.mat[0] = mat[0][0];\n"
4844 << " sb_out.mat[1] = mat[0][1];\n"
4845 << " sb_out.mat[2] = mat[0][2];\n"
4846 << " sb_out.mat[3] = mat[0][3];\n"
4847 << " sb_out.mat[4] = mat[1][0];\n"
4848 << " sb_out.mat[5] = mat[1][1];\n"
4849 << " sb_out.mat[6] = mat[1][2];\n"
4850 << " sb_out.mat[7] = mat[1][3];\n"
4851 << " sb_out.mat[8] = mat[2][0];\n"
4852 << " sb_out.mat[9] = mat[2][1];\n"
4853 << " sb_out.mat[10] = mat[2][2];\n"
4854 << " sb_out.mat[11] = mat[2][3];\n"
4855 << " sb_out.mat[12] = mat[3][0];\n"
4856 << " sb_out.mat[13] = mat[3][1];\n"
4857 << " sb_out.mat[14] = mat[3][2];\n"
4858 << " sb_out.mat[15] = mat[3][3];\n";
4859 break;
4860 case ARRAY:
4861 src << " sb_out.arr[0] = arr[0];\n"
4862 << " sb_out.arr[1] = arr[1];\n"
4863 << " sb_out.arr[2] = arr[2];\n";
4864 break;
4865 case ARRAY_ARRAY:
4866 src << " sb_out.arrarr[0] = arrarr[0][0];\n"
4867 << " sb_out.arrarr[1] = arrarr[0][1];\n"
4868 << " sb_out.arrarr[2] = arrarr[0][2];\n"
4869 << " sb_out.arrarr[3] = arrarr[1][0];\n"
4870 << " sb_out.arrarr[4] = arrarr[1][1];\n"
4871 << " sb_out.arrarr[5] = arrarr[1][2];\n";
4872 break;
4873 case STRUCT:
4874 src << " sb_out.str[0] = str.a;\n"
4875 << " sb_out.str[1] = str.b;\n"
4876 << " sb_out.str[2] = str.c;\n";
4877 break;
4878 case STRUCT_STRUCT:
4879 src << " sb_out.str[0] = str2.a.a;\n"
4880 << " sb_out.str[1] = str2.a.b;\n"
4881 << " sb_out.str[2] = str2.a.c;\n"
4882 << " sb_out.str[3] = str2.b.a;\n"
4883 << " sb_out.str[4] = str2.b.b;\n"
4884 << " sb_out.str[5] = str2.b.c;\n";
4885 break;
4886 case COOPMAT:
4887 src << " sb_out.mat[0] = float(mat[0]);\n"
4888 << " sb_out.mat[1] = (mat.length() > 1) ? float(mat[1]) : float(mat[0]);\n";
4889 break;
4890 default:
4891 DE_ASSERT(0);
4892 break;
4893 }
4894 src << "}\n";
4895
4896 sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
4897 }
4898
createInstance(Context & context) const4899 TestInstance *ReplicatedCompositesTest::createInstance(Context &context) const
4900 {
4901 return new ReplicatedCompositesTestInstance(context, m_compositeType, m_instType,
4902 m_computePipelineConstructionType);
4903 }
4904
ReplicatedCompositesTestInstance(Context & context,const CompositeType compositeType,const InstType instType,const vk::ComputePipelineConstructionType computePipelineConstructionType)4905 ReplicatedCompositesTestInstance::ReplicatedCompositesTestInstance(
4906 Context &context, const CompositeType compositeType, const InstType instType,
4907 const vk::ComputePipelineConstructionType computePipelineConstructionType)
4908 : TestInstance(context)
4909 , m_computePipelineConstructionType(computePipelineConstructionType)
4910 , m_compositeType(compositeType)
4911 , m_instType(instType)
4912 {
4913 }
4914
iterate(void)4915 tcu::TestStatus ReplicatedCompositesTestInstance::iterate(void)
4916 {
4917 const DeviceInterface &vk = m_context.getDeviceInterface();
4918 const VkDevice device = m_context.getDevice();
4919 const VkQueue queue = m_context.getUniversalQueue();
4920 const uint32_t queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
4921 Allocator &allocator = m_context.getDefaultAllocator();
4922
4923 // Create a buffer and host-visible memory for it
4924
4925 const VkDeviceSize bufferSizeBytes = 256;
4926 const BufferWithMemory buffer(vk, device, allocator,
4927 makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT),
4928 MemoryRequirement::HostVisible);
4929
4930 const Allocation &bufferAllocation = buffer.getAllocation();
4931 deMemset(bufferAllocation.getHostPtr(), 0, bufferSizeBytes);
4932
4933 flushAlloc(vk, device, bufferAllocation);
4934 // Create descriptor set
4935
4936 const Unique<VkDescriptorSetLayout> descriptorSetLayout(
4937 DescriptorSetLayoutBuilder()
4938 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
4939 .build(vk, device));
4940
4941 const Unique<VkDescriptorPool> descriptorPool(
4942 DescriptorPoolBuilder()
4943 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
4944 .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
4945
4946 const Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
4947
4948 const VkDescriptorBufferInfo descriptorInfo = makeDescriptorBufferInfo(*buffer, 0ull, bufferSizeBytes);
4949 DescriptorSetUpdateBuilder()
4950 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u),
4951 VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &descriptorInfo)
4952 .update(vk, device);
4953
4954 // Perform the computation
4955 ComputePipelineWrapper pipeline(vk, device, m_computePipelineConstructionType,
4956 m_context.getBinaryCollection().get("comp"));
4957 pipeline.setDescriptorSetLayout(descriptorSetLayout.get());
4958
4959 uint32_t coopmatRows = 0, coopmatCols = 0;
4960 #ifndef CTS_USES_VULKANSC
4961 if (m_compositeType == COOPMAT)
4962 {
4963 const InstanceInterface &vki = m_context.getInstanceInterface();
4964 uint32_t propertyCount = 0;
4965
4966 VK_CHECK(vki.getPhysicalDeviceCooperativeMatrixPropertiesKHR(m_context.getPhysicalDevice(), &propertyCount,
4967 nullptr));
4968
4969 const VkCooperativeMatrixPropertiesKHR initStruct = initVulkanStructureConst();
4970
4971 std::vector<VkCooperativeMatrixPropertiesKHR> properties(propertyCount, initStruct);
4972
4973 VK_CHECK(vki.getPhysicalDeviceCooperativeMatrixPropertiesKHR(m_context.getPhysicalDevice(), &propertyCount,
4974 properties.data()));
4975
4976 for (size_t i = 0; i < properties.size(); ++i)
4977 {
4978 const VkCooperativeMatrixPropertiesKHR *p = &properties[i];
4979
4980 if (p->scope != VK_SCOPE_SUBGROUP_KHR)
4981 continue;
4982
4983 if (p->AType == VK_COMPONENT_TYPE_FLOAT16_KHR)
4984 {
4985 if (p->MSize * p->KSize > coopmatRows * coopmatCols)
4986 {
4987 coopmatRows = p->MSize;
4988 coopmatCols = p->KSize;
4989 }
4990 }
4991 }
4992 DE_ASSERT(coopmatRows * coopmatCols > 0);
4993 }
4994 #endif // CTS_USES_VULKANSC
4995
4996 uint32_t specializationData[3] = {deFloatBitsToUint32(2.0f), coopmatRows, coopmatCols};
4997 const vk::VkSpecializationMapEntry specializationMaps[3] = {
4998 {
4999 0u, // uint32_t constantID;
5000 0u, // uint32_t offset;
5001 sizeof(uint32_t), // uintptr_t size;
5002 },
5003 {
5004 1u, // uint32_t constantID;
5005 4u, // uint32_t offset;
5006 sizeof(uint32_t), // uintptr_t size;
5007 },
5008 {
5009 2u, // uint32_t constantID;
5010 8u, // uint32_t offset;
5011 sizeof(uint32_t), // uintptr_t size;
5012 },
5013 };
5014 const vk::VkSpecializationInfo specializationInfo = {
5015 3u, // uint32_t mapEntryCount;
5016 specializationMaps, // const VkSpecializationMapEntry* pMapEntries;
5017 static_cast<uintptr_t>(sizeof(specializationData)), // uintptr_t dataSize;
5018 specializationData, // const void* pData;
5019 };
5020 pipeline.setSpecializationInfo(specializationInfo);
5021 pipeline.buildPipeline();
5022
5023 const VkBufferMemoryBarrier computeFinishBarrier =
5024 makeBufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *buffer, 0ull, bufferSizeBytes);
5025
5026 const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
5027 const Unique<VkCommandBuffer> cmdBuffer(
5028 allocateCommandBuffer(vk, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
5029
5030 // Start recording commands
5031
5032 beginCommandBuffer(vk, *cmdBuffer);
5033
5034 pipeline.bind(*cmdBuffer);
5035 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline.getPipelineLayout(), 0u, 1u,
5036 &descriptorSet.get(), 0u, nullptr);
5037
5038 vk.cmdDispatch(*cmdBuffer, 1, 1, 1);
5039
5040 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT,
5041 (VkDependencyFlags)0, 0, nullptr, 1, &computeFinishBarrier, 0, nullptr);
5042
5043 endCommandBuffer(vk, *cmdBuffer);
5044
5045 // Wait for completion
5046
5047 submitCommandsAndWait(vk, device, queue, *cmdBuffer);
5048
5049 // Validate the results
5050
5051 invalidateAlloc(vk, device, bufferAllocation);
5052
5053 const void *outStruct = bufferAllocation.getHostPtr();
5054
5055 {
5056 float vecelem = m_instType == SPECCONSTANT ? 2.0f : 1.0f;
5057 float vecRef[4] = {vecelem, vecelem, vecelem, vecelem};
5058 float matRef[4 * 4] = {vecelem, vecelem, vecelem, vecelem, vecelem, vecelem, vecelem, vecelem,
5059 vecelem, vecelem, vecelem, vecelem, vecelem, vecelem, vecelem, vecelem};
5060 float coopmatRef[2] = {vecelem, vecelem};
5061
5062 uint32_t arrElem = m_instType == SPECCONSTANT ? deFloatBitsToUint32(2.0f) : 3;
5063 uint32_t arrRef[3] = {arrElem, arrElem, arrElem};
5064 uint32_t arrarrRef[6] = {arrElem, arrElem, arrElem, arrElem, arrElem, arrElem};
5065
5066 uint32_t strElem = m_instType == SPECCONSTANT ? deFloatBitsToUint32(2.0f) : 6;
5067 uint32_t strRef[3] = {strElem, strElem, strElem};
5068 uint32_t strstrRef[6] = {strElem, strElem, strElem, strElem, strElem, strElem};
5069
5070 const void *ref = nullptr;
5071 size_t sizeofref = 0;
5072
5073 switch (m_compositeType)
5074 {
5075 case VECTOR:
5076 ref = vecRef;
5077 sizeofref = sizeof(vecRef);
5078 break;
5079 case MATRIX:
5080 ref = matRef;
5081 sizeofref = sizeof(matRef);
5082 break;
5083 case ARRAY:
5084 ref = arrRef;
5085 sizeofref = sizeof(arrRef);
5086 break;
5087 case ARRAY_ARRAY:
5088 ref = arrarrRef;
5089 sizeofref = sizeof(arrarrRef);
5090 break;
5091 case STRUCT:
5092 ref = strRef;
5093 sizeofref = sizeof(strRef);
5094 break;
5095 case STRUCT_STRUCT:
5096 ref = strstrRef;
5097 sizeofref = sizeof(strstrRef);
5098 break;
5099 case COOPMAT:
5100 ref = coopmatRef;
5101 sizeofref = sizeof(coopmatRef);
5102 break;
5103 default:
5104 DE_ASSERT(0);
5105 break;
5106 }
5107 DE_ASSERT(sizeofref <= bufferSizeBytes);
5108
5109 if (deMemCmp(outStruct, ref, sizeofref) != 0)
5110 {
5111 return tcu::TestStatus::fail("Comparison failed");
5112 }
5113 }
5114 return tcu::TestStatus::pass("Compute succeeded");
5115 }
5116 #endif // ifndef CTS_USES_VULKANSC
5117
5118 } // namespace
5119
createBasicComputeShaderTests(tcu::TestContext & testCtx,vk::ComputePipelineConstructionType computePipelineConstructionType)5120 tcu::TestCaseGroup *createBasicComputeShaderTests(tcu::TestContext &testCtx,
5121 vk::ComputePipelineConstructionType computePipelineConstructionType)
5122 {
5123 // Basic compute tests
5124 de::MovePtr<tcu::TestCaseGroup> basicComputeTests(new tcu::TestCaseGroup(testCtx, "basic"));
5125
5126 // Shader that does nothing
5127 addFunctionCaseWithPrograms(basicComputeTests.get(), "empty_shader", EmptyShaderTest::checkSupport,
5128 EmptyShaderTest::createProgram, EmptyShaderTest::createTest,
5129 computePipelineConstructionType);
5130
5131 // Concurrent compute test
5132 basicComputeTests->addChild(new ConcurrentCompute(testCtx, "concurrent_compute", computePipelineConstructionType));
5133
5134 // Use an empty workgroup with size 0 on the X axis
5135 basicComputeTests->addChild(
5136 new EmptyWorkGroupCase(testCtx, "empty_workgroup_x", tcu::UVec3(0u, 2u, 3u), computePipelineConstructionType));
5137 // Use an empty workgroup with size 0 on the Y axis
5138 basicComputeTests->addChild(
5139 new EmptyWorkGroupCase(testCtx, "empty_workgroup_y", tcu::UVec3(2u, 0u, 3u), computePipelineConstructionType));
5140 // Use an empty workgroup with size 0 on the Z axis
5141 basicComputeTests->addChild(
5142 new EmptyWorkGroupCase(testCtx, "empty_workgroup_z", tcu::UVec3(2u, 3u, 0u), computePipelineConstructionType));
5143 // Use an empty workgroup with size 0 on the X, Y and Z axes
5144 basicComputeTests->addChild(new EmptyWorkGroupCase(testCtx, "empty_workgroup_all", tcu::UVec3(0u, 0u, 0u),
5145 computePipelineConstructionType));
5146
5147 // Use the maximum work group size on the X axis
5148 basicComputeTests->addChild(new MaxWorkGroupSizeTest(testCtx, "max_local_size_x",
5149 MaxWorkGroupSizeTest::Params{MaxWorkGroupSizeTest::Axis::X},
5150 computePipelineConstructionType));
5151 // Use the maximum work group size on the Y axis
5152 basicComputeTests->addChild(new MaxWorkGroupSizeTest(testCtx, "max_local_size_y",
5153 MaxWorkGroupSizeTest::Params{MaxWorkGroupSizeTest::Axis::Y},
5154 computePipelineConstructionType));
5155 // Use the maximum work group size on the Z axis
5156 basicComputeTests->addChild(new MaxWorkGroupSizeTest(testCtx, "max_local_size_z",
5157 MaxWorkGroupSizeTest::Params{MaxWorkGroupSizeTest::Axis::Z},
5158 computePipelineConstructionType));
5159
5160 // Concurrent compute test
5161 basicComputeTests->addChild(BufferToBufferInvertTest::UBOToSSBOInvertCase(
5162 testCtx, "ubo_to_ssbo_single_invocation", 256, tcu::IVec3(1, 1, 1), tcu::IVec3(1, 1, 1),
5163 computePipelineConstructionType));
5164 basicComputeTests->addChild(BufferToBufferInvertTest::UBOToSSBOInvertCase(testCtx, "ubo_to_ssbo_single_group", 1024,
5165 tcu::IVec3(2, 1, 4), tcu::IVec3(1, 1, 1),
5166 computePipelineConstructionType));
5167 basicComputeTests->addChild(BufferToBufferInvertTest::UBOToSSBOInvertCase(
5168 testCtx, "ubo_to_ssbo_multiple_invocations", 1024, tcu::IVec3(1, 1, 1), tcu::IVec3(2, 4, 1),
5169 computePipelineConstructionType));
5170 basicComputeTests->addChild(
5171 BufferToBufferInvertTest::UBOToSSBOInvertCase(testCtx, "ubo_to_ssbo_multiple_groups", 1024, tcu::IVec3(1, 4, 2),
5172 tcu::IVec3(2, 2, 4), computePipelineConstructionType));
5173
5174 // Concurrent compute test
5175 basicComputeTests->addChild(
5176 BufferToBufferInvertTest::CopyInvertSSBOCase(testCtx, "copy_ssbo_single_invocation", 256, tcu::IVec3(1, 1, 1),
5177 tcu::IVec3(1, 1, 1), computePipelineConstructionType));
5178 basicComputeTests->addChild(BufferToBufferInvertTest::CopyInvertSSBOCase(
5179 testCtx, "copy_ssbo_multiple_invocations", 1024, tcu::IVec3(1, 1, 1), tcu::IVec3(2, 4, 1),
5180 computePipelineConstructionType));
5181 basicComputeTests->addChild(BufferToBufferInvertTest::CopyInvertSSBOCase(testCtx, "copy_ssbo_multiple_groups", 1024,
5182 tcu::IVec3(1, 4, 2), tcu::IVec3(2, 2, 4),
5183 computePipelineConstructionType));
5184
5185 // Read and write same SSBO
5186 basicComputeTests->addChild(new InvertSSBOInPlaceTest(testCtx, "ssbo_rw_single_invocation", 256, true,
5187 tcu::IVec3(1, 1, 1), tcu::IVec3(1, 1, 1),
5188 computePipelineConstructionType));
5189 basicComputeTests->addChild(new InvertSSBOInPlaceTest(testCtx, "ssbo_rw_multiple_groups", 1024, true,
5190 tcu::IVec3(1, 4, 2), tcu::IVec3(2, 2, 4),
5191 computePipelineConstructionType));
5192 basicComputeTests->addChild(new InvertSSBOInPlaceTest(testCtx, "ssbo_unsized_arr_single_invocation", 256, false,
5193 tcu::IVec3(1, 1, 1), tcu::IVec3(1, 1, 1),
5194 computePipelineConstructionType));
5195 basicComputeTests->addChild(new InvertSSBOInPlaceTest(testCtx, "ssbo_unsized_arr_multiple_groups", 1024, false,
5196 tcu::IVec3(1, 4, 2), tcu::IVec3(2, 2, 4),
5197 computePipelineConstructionType));
5198
5199 // Write to multiple SSBOs
5200 basicComputeTests->addChild(new WriteToMultipleSSBOTest(testCtx, "write_multiple_arr_single_invocation", 256, true,
5201 tcu::IVec3(1, 1, 1), tcu::IVec3(1, 1, 1),
5202 computePipelineConstructionType));
5203 basicComputeTests->addChild(new WriteToMultipleSSBOTest(testCtx, "write_multiple_arr_multiple_groups", 1024, true,
5204 tcu::IVec3(1, 4, 2), tcu::IVec3(2, 2, 4),
5205 computePipelineConstructionType));
5206 basicComputeTests->addChild(new WriteToMultipleSSBOTest(testCtx, "write_multiple_unsized_arr_single_invocation",
5207 256, false, tcu::IVec3(1, 1, 1), tcu::IVec3(1, 1, 1),
5208 computePipelineConstructionType));
5209 basicComputeTests->addChild(new WriteToMultipleSSBOTest(testCtx, "write_multiple_unsized_arr_multiple_groups", 1024,
5210 false, tcu::IVec3(1, 4, 2), tcu::IVec3(2, 2, 4),
5211 computePipelineConstructionType));
5212
5213 // SSBO local barrier usage
5214 basicComputeTests->addChild(new SSBOLocalBarrierTest(testCtx, "ssbo_local_barrier_single_invocation",
5215 tcu::IVec3(1, 1, 1), tcu::IVec3(1, 1, 1),
5216 computePipelineConstructionType));
5217 basicComputeTests->addChild(new SSBOLocalBarrierTest(testCtx, "ssbo_local_barrier_single_group",
5218 tcu::IVec3(3, 2, 5), tcu::IVec3(1, 1, 1),
5219 computePipelineConstructionType));
5220 basicComputeTests->addChild(new SSBOLocalBarrierTest(testCtx, "ssbo_local_barrier_multiple_groups",
5221 tcu::IVec3(3, 4, 1), tcu::IVec3(2, 7, 3),
5222 computePipelineConstructionType));
5223
5224 // SSBO memory barrier usage
5225 basicComputeTests->addChild(
5226 new SSBOBarrierTest(testCtx, "ssbo_cmd_barrier_single", tcu::IVec3(1, 1, 1), computePipelineConstructionType));
5227 basicComputeTests->addChild(new SSBOBarrierTest(testCtx, "ssbo_cmd_barrier_multiple", tcu::IVec3(11, 5, 7),
5228 computePipelineConstructionType));
5229
5230 // Basic shared variable usage
5231 basicComputeTests->addChild(new SharedVarTest(testCtx, "shared_var_single_invocation", tcu::IVec3(1, 1, 1),
5232 tcu::IVec3(1, 1, 1), computePipelineConstructionType));
5233 basicComputeTests->addChild(new SharedVarTest(testCtx, "shared_var_single_group", tcu::IVec3(3, 2, 5),
5234 tcu::IVec3(1, 1, 1), computePipelineConstructionType));
5235 basicComputeTests->addChild(new SharedVarTest(testCtx, "shared_var_multiple_invocations", tcu::IVec3(1, 1, 1),
5236 tcu::IVec3(2, 5, 4), computePipelineConstructionType));
5237 basicComputeTests->addChild(new SharedVarTest(testCtx, "shared_var_multiple_groups", tcu::IVec3(3, 4, 1),
5238 tcu::IVec3(2, 7, 3), computePipelineConstructionType));
5239
5240 // Atomic operation with shared var
5241 basicComputeTests->addChild(new SharedVarAtomicOpTest(testCtx, "shared_atomic_op_single_invocation",
5242 tcu::IVec3(1, 1, 1), tcu::IVec3(1, 1, 1),
5243 computePipelineConstructionType));
5244 basicComputeTests->addChild(new SharedVarAtomicOpTest(testCtx, "shared_atomic_op_single_group", tcu::IVec3(3, 2, 5),
5245 tcu::IVec3(1, 1, 1), computePipelineConstructionType));
5246 basicComputeTests->addChild(new SharedVarAtomicOpTest(testCtx, "shared_atomic_op_multiple_invocations",
5247 tcu::IVec3(1, 1, 1), tcu::IVec3(2, 5, 4),
5248 computePipelineConstructionType));
5249 basicComputeTests->addChild(new SharedVarAtomicOpTest(testCtx, "shared_atomic_op_multiple_groups",
5250 tcu::IVec3(3, 4, 1), tcu::IVec3(2, 7, 3),
5251 computePipelineConstructionType));
5252
5253 // Image to SSBO copy
5254 basicComputeTests->addChild(new CopyImageToSSBOTest(testCtx, "copy_image_to_ssbo_small", tcu::IVec2(1, 1),
5255 tcu::IVec2(64, 64), computePipelineConstructionType));
5256 basicComputeTests->addChild(new CopyImageToSSBOTest(testCtx, "copy_image_to_ssbo_large", tcu::IVec2(2, 4),
5257 tcu::IVec2(512, 512), computePipelineConstructionType));
5258
5259 // SSBO to image copy
5260 basicComputeTests->addChild(new CopySSBOToImageTest(testCtx, "copy_ssbo_to_image_small", tcu::IVec2(1, 1),
5261 tcu::IVec2(64, 64), computePipelineConstructionType));
5262 basicComputeTests->addChild(new CopySSBOToImageTest(testCtx, "copy_ssbo_to_image_large", tcu::IVec2(2, 4),
5263 tcu::IVec2(512, 512), computePipelineConstructionType));
5264
5265 // Atomic operation with image
5266 basicComputeTests->addChild(new ImageAtomicOpTest(testCtx, "image_atomic_op_local_size_1", 1, tcu::IVec2(64, 64),
5267 computePipelineConstructionType));
5268 basicComputeTests->addChild(new ImageAtomicOpTest(testCtx, "image_atomic_op_local_size_8", 8, tcu::IVec2(64, 64),
5269 computePipelineConstructionType));
5270
5271 // Image barrier
5272 basicComputeTests->addChild(
5273 new ImageBarrierTest(testCtx, "image_barrier_single", tcu::IVec2(1, 1), computePipelineConstructionType));
5274 basicComputeTests->addChild(
5275 new ImageBarrierTest(testCtx, "image_barrier_multiple", tcu::IVec2(64, 64), computePipelineConstructionType));
5276
5277 // Test secondary command buffers in compute only queues
5278 basicComputeTests->addChild(
5279 new ComputeOnlyQueueTests::SecondaryCommandBufferComputeOnlyTest(testCtx, "secondary_compute_only_queue"));
5280
5281 #ifndef CTS_USES_VULKANSC
5282 for (uint32_t i = 0; i < 3; ++i)
5283 {
5284 const char *instStr[3] = {"value", "constant", "specconstant"};
5285 std::string name;
5286 name = std::string("replicated_composites_vector_") + instStr[i];
5287 basicComputeTests->addChild(
5288 new ReplicatedCompositesTest(testCtx, name.c_str(), VECTOR, (InstType)i, computePipelineConstructionType));
5289 name = std::string("replicated_composites_matrix_") + instStr[i];
5290 basicComputeTests->addChild(
5291 new ReplicatedCompositesTest(testCtx, name.c_str(), MATRIX, (InstType)i, computePipelineConstructionType));
5292 name = std::string("replicated_composites_array_") + instStr[i];
5293 basicComputeTests->addChild(
5294 new ReplicatedCompositesTest(testCtx, name.c_str(), ARRAY, (InstType)i, computePipelineConstructionType));
5295 name = std::string("replicated_composites_array_array_") + instStr[i];
5296 basicComputeTests->addChild(new ReplicatedCompositesTest(testCtx, name.c_str(), ARRAY_ARRAY, (InstType)i,
5297 computePipelineConstructionType));
5298 name = std::string("replicated_composites_struct_") + instStr[i];
5299 basicComputeTests->addChild(
5300 new ReplicatedCompositesTest(testCtx, name.c_str(), STRUCT, (InstType)i, computePipelineConstructionType));
5301 name = std::string("replicated_composites_struct_struct_") + instStr[i];
5302 basicComputeTests->addChild(new ReplicatedCompositesTest(testCtx, name.c_str(), STRUCT_STRUCT, (InstType)i,
5303 computePipelineConstructionType));
5304 name = std::string("replicated_composites_coopmat_") + instStr[i];
5305 basicComputeTests->addChild(
5306 new ReplicatedCompositesTest(testCtx, name.c_str(), COOPMAT, (InstType)i, computePipelineConstructionType));
5307 }
5308
5309 if (!isComputePipelineConstructionTypeShaderObject(computePipelineConstructionType))
5310 {
5311 basicComputeTests->addChild(
5312 cts_amber::createAmberTestCase(testCtx, "write_ssbo_array", "", "compute", "write_ssbo_array.amber"));
5313 basicComputeTests->addChild(cts_amber::createAmberTestCase(testCtx, "atomic_barrier_sum_small", "", "compute",
5314 "atomic_barrier_sum_small.amber"));
5315 basicComputeTests->addChild(
5316 cts_amber::createAmberTestCase(testCtx, "branch_past_barrier", "", "compute", "branch_past_barrier.amber"));
5317 basicComputeTests->addChild(cts_amber::createAmberTestCase(
5318 testCtx, "webgl_spirv_loop",
5319 "Simple SPIR-V loop from a WebGL example that caused problems in some implementations", "compute",
5320 "webgl_spirv_loop.amber"));
5321
5322 {
5323 cts_amber::AmberTestCase *testCase = cts_amber::createAmberTestCase(
5324 testCtx, "pk_immediate", "Immediate/inline arguments to packed 16-bit operations", "compute",
5325 "pk-immediate.amber");
5326 testCase->addRequirement("Storage16BitFeatures.storageBuffer16BitAccess");
5327 testCase->addRequirement("Float16Int8Features.shaderFloat16");
5328 testCase->addRequirement("Features.shaderInt16");
5329 testCase->addPropertyRequirement("FloatControlsProperties.shaderDenormPreserveFloat16");
5330 basicComputeTests->addChild(testCase);
5331 }
5332
5333 {
5334 cts_amber::AmberTestCase *testCase = cts_amber::createAmberTestCase(
5335 testCtx, "pkadd_immediate", "Immediate/inline arguments to packed 16-bit operations", "compute",
5336 "pkadd-immediate.amber");
5337 testCase->addRequirement("Features.shaderInt16");
5338 testCase->addRequirement("Storage16BitFeatures.storageBuffer16BitAccess");
5339 basicComputeTests->addChild(testCase);
5340 }
5341 }
5342 #endif // ifndef CTS_USES_VULKANSC
5343
5344 return basicComputeTests.release();
5345 }
5346
createBasicDeviceGroupComputeShaderTests(tcu::TestContext & testCtx,vk::ComputePipelineConstructionType computePipelineConstructionType)5347 tcu::TestCaseGroup *createBasicDeviceGroupComputeShaderTests(
5348 tcu::TestContext &testCtx, vk::ComputePipelineConstructionType computePipelineConstructionType)
5349 {
5350 de::MovePtr<tcu::TestCaseGroup> deviceGroupComputeTests(new tcu::TestCaseGroup(testCtx, "device_group"));
5351
5352 deviceGroupComputeTests->addChild(new DispatchBaseTest(testCtx, "dispatch_base", 32768, tcu::IVec3(4, 2, 4),
5353 tcu::IVec3(16, 8, 8), tcu::IVec3(4, 8, 8),
5354 computePipelineConstructionType, false));
5355 #ifndef CTS_USES_VULKANSC
5356 deviceGroupComputeTests->addChild(new DispatchBaseTest(testCtx, "dispatch_base_maintenance5", 32768,
5357 tcu::IVec3(4, 2, 4), tcu::IVec3(16, 8, 8),
5358 tcu::IVec3(4, 8, 8), computePipelineConstructionType, true));
5359 #endif
5360 deviceGroupComputeTests->addChild(new DeviceIndexTest(testCtx, "device_index", 96, tcu::IVec3(3, 2, 1),
5361 tcu::IVec3(2, 4, 1), computePipelineConstructionType));
5362
5363 return deviceGroupComputeTests.release();
5364 }
5365 } // namespace compute
5366 } // namespace vkt
5367