1 /*------------------------------------------------------------------------
2 * Vulkan Conformance Tests
3 * ------------------------
4 *
5 * Copyright (c) 2019 The Khronos Group Inc.
6 * Copyright (c) 2019 The Android Open Source Project
7 * Copyright (c) 2023 LunarG, Inc.
8 * Copyright (c) 2023 Nintendo
9 *
10 * Licensed under the Apache License, Version 2.0 (the "License");
11 * you may not use this file except in compliance with the License.
12 * You may obtain a copy of the License at
13 *
14 * http://www.apache.org/licenses/LICENSE-2.0
15 *
16 * Unless required by applicable law or agreed to in writing, software
17 * distributed under the License is distributed on an "AS IS" BASIS,
18 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
19 * See the License for the specific language governing permissions and
20 * limitations under the License.
21 *
22 *//*!
23 * \file
24 * \brief Compute Shader Tests
25 *//*--------------------------------------------------------------------*/
26
27 #include "vktComputeBasicComputeShaderTests.hpp"
28 #include "vktTestCase.hpp"
29 #include "vktTestCaseUtil.hpp"
30 #include "vktComputeTestsUtil.hpp"
31 #include "vktCustomInstancesDevices.hpp"
32 #include "vktAmberTestCase.hpp"
33
34 #include "vkDefs.hpp"
35 #include "vkRef.hpp"
36 #include "vkRefUtil.hpp"
37 #include "vkPlatform.hpp"
38 #include "vkPrograms.hpp"
39 #include "vkRefUtil.hpp"
40 #include "vkMemUtil.hpp"
41 #include "vkBarrierUtil.hpp"
42 #include "vkQueryUtil.hpp"
43 #include "vkBuilderUtil.hpp"
44 #include "vkTypeUtil.hpp"
45 #include "vkDeviceUtil.hpp"
46 #include "vkCmdUtil.hpp"
47 #include "vkObjUtil.hpp"
48 #include "vkBufferWithMemory.hpp"
49 #include "vkSafetyCriticalUtil.hpp"
50 #include "vkImageWithMemory.hpp"
51
52 #include "tcuCommandLine.hpp"
53 #include "tcuTestLog.hpp"
54 #include "tcuMaybe.hpp"
55
56 #include "deStringUtil.hpp"
57 #include "deUniquePtr.hpp"
58 #include "deRandom.hpp"
59
60 #include <vector>
61 #include <memory>
62
63 using namespace vk;
64
65 namespace vkt
66 {
67 namespace compute
68 {
69 namespace
70 {
71
72 template<typename T, int size>
multiplyComponents(const tcu::Vector<T,size> & v)73 T multiplyComponents (const tcu::Vector<T, size>& v)
74 {
75 T accum = 1;
76 for (int i = 0; i < size; ++i)
77 accum *= v[i];
78 return accum;
79 }
80
81 template<typename T>
squared(const T & a)82 inline T squared (const T& a)
83 {
84 return a * a;
85 }
86
make2DImageCreateInfo(const tcu::IVec2 & imageSize,const VkImageUsageFlags usage)87 inline VkImageCreateInfo make2DImageCreateInfo (const tcu::IVec2& imageSize, const VkImageUsageFlags usage)
88 {
89 const VkImageCreateInfo imageParams =
90 {
91 VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, // VkStructureType sType;
92 DE_NULL, // const void* pNext;
93 0u, // VkImageCreateFlags flags;
94 VK_IMAGE_TYPE_2D, // VkImageType imageType;
95 VK_FORMAT_R32_UINT, // VkFormat format;
96 vk::makeExtent3D(imageSize.x(), imageSize.y(), 1), // VkExtent3D extent;
97 1u, // deUint32 mipLevels;
98 1u, // deUint32 arrayLayers;
99 VK_SAMPLE_COUNT_1_BIT, // VkSampleCountFlagBits samples;
100 VK_IMAGE_TILING_OPTIMAL, // VkImageTiling tiling;
101 usage, // VkImageUsageFlags usage;
102 VK_SHARING_MODE_EXCLUSIVE, // VkSharingMode sharingMode;
103 0u, // deUint32 queueFamilyIndexCount;
104 DE_NULL, // const deUint32* pQueueFamilyIndices;
105 VK_IMAGE_LAYOUT_UNDEFINED, // VkImageLayout initialLayout;
106 };
107 return imageParams;
108 }
109
makeBufferImageCopy(const tcu::IVec2 & imageSize)110 inline VkBufferImageCopy makeBufferImageCopy(const tcu::IVec2& imageSize)
111 {
112 return compute::makeBufferImageCopy(vk::makeExtent3D(imageSize.x(), imageSize.y(), 1), 1u);
113 }
114
115 enum BufferType
116 {
117 BUFFER_TYPE_UNIFORM,
118 BUFFER_TYPE_SSBO,
119 };
120
121 class SharedVarTest : public vkt::TestCase
122 {
123 public:
124 SharedVarTest (tcu::TestContext& testCtx,
125 const std::string& name,
126 const tcu::IVec3& localSize,
127 const tcu::IVec3& workSize,
128 const vk::ComputePipelineConstructionType computePipelineConstructionType);
129
130 virtual void checkSupport (Context& context) const;
131 void initPrograms (SourceCollections& sourceCollections) const;
132 TestInstance* createInstance (Context& context) const;
133
134 private:
135 const tcu::IVec3 m_localSize;
136 const tcu::IVec3 m_workSize;
137 vk::ComputePipelineConstructionType m_computePipelineConstructionType;
138 };
139
140 class SharedVarTestInstance : public vkt::TestInstance
141 {
142 public:
143 SharedVarTestInstance (Context& context,
144 const tcu::IVec3& localSize,
145 const tcu::IVec3& workSize,
146 const vk::ComputePipelineConstructionType computePipelineConstructionType);
147
148 tcu::TestStatus iterate (void);
149
150 private:
151 const tcu::IVec3 m_localSize;
152 const tcu::IVec3 m_workSize;
153 vk::ComputePipelineConstructionType m_computePipelineConstructionType;
154 };
155
SharedVarTest(tcu::TestContext & testCtx,const std::string & name,const tcu::IVec3 & localSize,const tcu::IVec3 & workSize,const vk::ComputePipelineConstructionType computePipelineConstructionType)156 SharedVarTest::SharedVarTest (tcu::TestContext& testCtx,
157 const std::string& name,
158 const tcu::IVec3& localSize,
159 const tcu::IVec3& workSize,
160 const vk::ComputePipelineConstructionType computePipelineConstructionType)
161 : TestCase (testCtx, name)
162 , m_localSize (localSize)
163 , m_workSize (workSize)
164 , m_computePipelineConstructionType(computePipelineConstructionType)
165 {
166 }
167
checkSupport(Context & context) const168 void SharedVarTest::checkSupport (Context& context) const
169 {
170 checkShaderObjectRequirements(context.getInstanceInterface(), context.getPhysicalDevice(), m_computePipelineConstructionType);
171 }
172
initPrograms(SourceCollections & sourceCollections) const173 void SharedVarTest::initPrograms (SourceCollections& sourceCollections) const
174 {
175 const int workGroupSize = multiplyComponents(m_localSize);
176 const int workGroupCount = multiplyComponents(m_workSize);
177 const int numValues = workGroupSize * workGroupCount;
178
179 std::ostringstream src;
180 src << "#version 310 es\n"
181 << "layout (local_size_x = " << m_localSize.x() << ", local_size_y = " << m_localSize.y() << ", local_size_z = " << m_localSize.z() << ") in;\n"
182 << "layout(binding = 0) writeonly buffer Output {\n"
183 << " uint values[" << numValues << "];\n"
184 << "} sb_out;\n\n"
185 << "shared uint offsets[" << workGroupSize << "];\n\n"
186 << "void main (void) {\n"
187 << " uint localSize = gl_WorkGroupSize.x*gl_WorkGroupSize.y*gl_WorkGroupSize.z;\n"
188 << " uint globalNdx = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z + gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n"
189 << " uint globalOffs = localSize*globalNdx;\n"
190 << " uint localOffs = gl_WorkGroupSize.x*gl_WorkGroupSize.y*gl_LocalInvocationID.z + gl_WorkGroupSize.x*gl_LocalInvocationID.y + gl_LocalInvocationID.x;\n"
191 << "\n"
192 << " offsets[localSize-localOffs-1u] = globalOffs + localOffs*localOffs;\n"
193 << " memoryBarrierShared();\n"
194 << " barrier();\n"
195 << " sb_out.values[globalOffs + localOffs] = offsets[localOffs];\n"
196 << "}\n";
197
198 sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
199 }
200
createInstance(Context & context) const201 TestInstance* SharedVarTest::createInstance (Context& context) const
202 {
203 return new SharedVarTestInstance(context, m_localSize, m_workSize, m_computePipelineConstructionType);
204 }
205
SharedVarTestInstance(Context & context,const tcu::IVec3 & localSize,const tcu::IVec3 & workSize,const vk::ComputePipelineConstructionType computePipelineConstructionType)206 SharedVarTestInstance::SharedVarTestInstance (Context& context, const tcu::IVec3& localSize, const tcu::IVec3& workSize, const vk::ComputePipelineConstructionType computePipelineConstructionType)
207 : TestInstance (context)
208 , m_localSize (localSize)
209 , m_workSize (workSize)
210 , m_computePipelineConstructionType (computePipelineConstructionType)
211 {
212 }
213
iterate(void)214 tcu::TestStatus SharedVarTestInstance::iterate (void)
215 {
216 const DeviceInterface& vk = m_context.getDeviceInterface();
217 const VkDevice device = m_context.getDevice();
218 const VkQueue queue = m_context.getUniversalQueue();
219 const deUint32 queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
220 Allocator& allocator = m_context.getDefaultAllocator();
221
222 const int workGroupSize = multiplyComponents(m_localSize);
223 const int workGroupCount = multiplyComponents(m_workSize);
224
225 // Create a buffer and host-visible memory for it
226
227 const VkDeviceSize bufferSizeBytes = sizeof(deUint32) * workGroupSize * workGroupCount;
228 const BufferWithMemory buffer(vk, device, allocator, makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT), MemoryRequirement::HostVisible);
229
230 // Create descriptor set
231
232 const Unique<VkDescriptorSetLayout> descriptorSetLayout(
233 DescriptorSetLayoutBuilder()
234 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
235 .build(vk, device));
236
237 const Unique<VkDescriptorPool> descriptorPool(
238 DescriptorPoolBuilder()
239 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
240 .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
241
242 const Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
243
244 const VkDescriptorBufferInfo descriptorInfo = makeDescriptorBufferInfo(*buffer, 0ull, bufferSizeBytes);
245 DescriptorSetUpdateBuilder()
246 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &descriptorInfo)
247 .update(vk, device);
248
249 // Perform the computation
250
251 ComputePipelineWrapper pipeline(vk, device, m_computePipelineConstructionType, m_context.getBinaryCollection().get("comp"));
252 pipeline.setDescriptorSetLayout(descriptorSetLayout.get());
253 pipeline.buildPipeline();
254
255 const VkBufferMemoryBarrier computeFinishBarrier = makeBufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *buffer, 0ull, bufferSizeBytes);
256
257 const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
258 const Unique<VkCommandBuffer> cmdBuffer(allocateCommandBuffer(vk, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
259
260 // Start recording commands
261
262 beginCommandBuffer(vk, *cmdBuffer);
263
264 pipeline.bind(*cmdBuffer);
265 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline.getPipelineLayout(), 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
266
267 vk.cmdDispatch(*cmdBuffer, m_workSize.x(), m_workSize.y(), m_workSize.z());
268
269 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 1, &computeFinishBarrier, 0, (const VkImageMemoryBarrier*)DE_NULL);
270
271 endCommandBuffer(vk, *cmdBuffer);
272
273 // Wait for completion
274
275 submitCommandsAndWait(vk, device, queue, *cmdBuffer);
276
277 // Validate the results
278
279 const Allocation& bufferAllocation = buffer.getAllocation();
280 invalidateAlloc(vk, device, bufferAllocation);
281
282 const deUint32* bufferPtr = static_cast<deUint32*>(bufferAllocation.getHostPtr());
283
284 for (int groupNdx = 0; groupNdx < workGroupCount; ++groupNdx)
285 {
286 const int globalOffset = groupNdx * workGroupSize;
287 for (int localOffset = 0; localOffset < workGroupSize; ++localOffset)
288 {
289 const deUint32 res = bufferPtr[globalOffset + localOffset];
290 const deUint32 ref = globalOffset + squared(workGroupSize - localOffset - 1);
291
292 if (res != ref)
293 {
294 std::ostringstream msg;
295 msg << "Comparison failed for Output.values[" << (globalOffset + localOffset) << "]";
296 return tcu::TestStatus::fail(msg.str());
297 }
298 }
299 }
300 return tcu::TestStatus::pass("Compute succeeded");
301 }
302
303 class SharedVarAtomicOpTest : public vkt::TestCase
304 {
305 public:
306 SharedVarAtomicOpTest (tcu::TestContext& testCtx,
307 const std::string& name,
308 const tcu::IVec3& localSize,
309 const tcu::IVec3& workSize,
310 const vk::ComputePipelineConstructionType computePipelineConstructionType);
311
312 virtual void checkSupport (Context& context) const;
313 void initPrograms (SourceCollections& sourceCollections) const;
314 TestInstance* createInstance (Context& context) const;
315
316 private:
317 const tcu::IVec3 m_localSize;
318 const tcu::IVec3 m_workSize;
319 vk::ComputePipelineConstructionType m_computePipelineConstructionType;
320 };
321
322 class SharedVarAtomicOpTestInstance : public vkt::TestInstance
323 {
324 public:
325 SharedVarAtomicOpTestInstance (Context& context,
326 const tcu::IVec3& localSize,
327 const tcu::IVec3& workSize,
328 const vk::ComputePipelineConstructionType computePipelineConstructionType);
329
330 tcu::TestStatus iterate (void);
331
332 private:
333 const tcu::IVec3 m_localSize;
334 const tcu::IVec3 m_workSize;
335 vk::ComputePipelineConstructionType m_computePipelineConstructionType;
336 };
337
SharedVarAtomicOpTest(tcu::TestContext & testCtx,const std::string & name,const tcu::IVec3 & localSize,const tcu::IVec3 & workSize,const vk::ComputePipelineConstructionType computePipelineConstructionType)338 SharedVarAtomicOpTest::SharedVarAtomicOpTest (tcu::TestContext& testCtx,
339 const std::string& name,
340 const tcu::IVec3& localSize,
341 const tcu::IVec3& workSize,
342 const vk::ComputePipelineConstructionType computePipelineConstructionType)
343 : TestCase (testCtx, name)
344 , m_localSize (localSize)
345 , m_workSize (workSize)
346 , m_computePipelineConstructionType (computePipelineConstructionType)
347 {
348 }
349
checkSupport(Context & context) const350 void SharedVarAtomicOpTest::checkSupport (Context& context) const
351 {
352 checkShaderObjectRequirements(context.getInstanceInterface(), context.getPhysicalDevice(), m_computePipelineConstructionType);
353 }
354
initPrograms(SourceCollections & sourceCollections) const355 void SharedVarAtomicOpTest::initPrograms (SourceCollections& sourceCollections) const
356 {
357 const int workGroupSize = multiplyComponents(m_localSize);
358 const int workGroupCount = multiplyComponents(m_workSize);
359 const int numValues = workGroupSize * workGroupCount;
360
361 std::ostringstream src;
362 src << "#version 310 es\n"
363 << "layout (local_size_x = " << m_localSize.x() << ", local_size_y = " << m_localSize.y() << ", local_size_z = " << m_localSize.z() << ") in;\n"
364 << "layout(binding = 0) writeonly buffer Output {\n"
365 << " uint values[" << numValues << "];\n"
366 << "} sb_out;\n\n"
367 << "shared uint count;\n\n"
368 << "void main (void) {\n"
369 << " uint localSize = gl_WorkGroupSize.x*gl_WorkGroupSize.y*gl_WorkGroupSize.z;\n"
370 << " uint globalNdx = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z + gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n"
371 << " uint globalOffs = localSize*globalNdx;\n"
372 << "\n"
373 << " count = 0u;\n"
374 << " memoryBarrierShared();\n"
375 << " barrier();\n"
376 << " uint oldVal = atomicAdd(count, 1u);\n"
377 << " sb_out.values[globalOffs+oldVal] = oldVal+1u;\n"
378 << "}\n";
379
380 sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
381 }
382
createInstance(Context & context) const383 TestInstance* SharedVarAtomicOpTest::createInstance (Context& context) const
384 {
385 return new SharedVarAtomicOpTestInstance(context, m_localSize, m_workSize, m_computePipelineConstructionType);
386 }
387
SharedVarAtomicOpTestInstance(Context & context,const tcu::IVec3 & localSize,const tcu::IVec3 & workSize,const vk::ComputePipelineConstructionType computePipelineConstructionType)388 SharedVarAtomicOpTestInstance::SharedVarAtomicOpTestInstance (Context& context, const tcu::IVec3& localSize, const tcu::IVec3& workSize, const vk::ComputePipelineConstructionType computePipelineConstructionType)
389 : TestInstance (context)
390 , m_localSize (localSize)
391 , m_workSize (workSize)
392 , m_computePipelineConstructionType (computePipelineConstructionType)
393 {
394 }
395
iterate(void)396 tcu::TestStatus SharedVarAtomicOpTestInstance::iterate (void)
397 {
398 const DeviceInterface& vk = m_context.getDeviceInterface();
399 const VkDevice device = m_context.getDevice();
400 const VkQueue queue = m_context.getUniversalQueue();
401 const deUint32 queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
402 Allocator& allocator = m_context.getDefaultAllocator();
403
404 const int workGroupSize = multiplyComponents(m_localSize);
405 const int workGroupCount = multiplyComponents(m_workSize);
406
407 // Create a buffer and host-visible memory for it
408
409 const VkDeviceSize bufferSizeBytes = sizeof(deUint32) * workGroupSize * workGroupCount;
410 const BufferWithMemory buffer(vk, device, allocator, makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT), MemoryRequirement::HostVisible);
411
412 // Create descriptor set
413
414 const Unique<VkDescriptorSetLayout> descriptorSetLayout(
415 DescriptorSetLayoutBuilder()
416 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
417 .build(vk, device));
418
419 const Unique<VkDescriptorPool> descriptorPool(
420 DescriptorPoolBuilder()
421 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
422 .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
423
424 const Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
425
426 const VkDescriptorBufferInfo descriptorInfo = makeDescriptorBufferInfo(*buffer, 0ull, bufferSizeBytes);
427 DescriptorSetUpdateBuilder()
428 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &descriptorInfo)
429 .update(vk, device);
430
431 // Perform the computation
432
433 ComputePipelineWrapper pipeline(vk, device, m_computePipelineConstructionType, m_context.getBinaryCollection().get("comp"));
434 pipeline.setDescriptorSetLayout(descriptorSetLayout.get());
435 pipeline.buildPipeline();
436
437 const VkBufferMemoryBarrier computeFinishBarrier = makeBufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *buffer, 0ull, bufferSizeBytes);
438
439 const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
440 const Unique<VkCommandBuffer> cmdBuffer(allocateCommandBuffer(vk, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
441
442 // Start recording commands
443
444 beginCommandBuffer(vk, *cmdBuffer);
445
446 pipeline.bind(*cmdBuffer);
447 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline.getPipelineLayout(), 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
448
449 vk.cmdDispatch(*cmdBuffer, m_workSize.x(), m_workSize.y(), m_workSize.z());
450
451 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 1u, &computeFinishBarrier, 0, (const VkImageMemoryBarrier*)DE_NULL);
452
453 endCommandBuffer(vk, *cmdBuffer);
454
455 // Wait for completion
456
457 submitCommandsAndWait(vk, device, queue, *cmdBuffer);
458
459 // Validate the results
460
461 const Allocation& bufferAllocation = buffer.getAllocation();
462 invalidateAlloc(vk, device, bufferAllocation);
463
464 const deUint32* bufferPtr = static_cast<deUint32*>(bufferAllocation.getHostPtr());
465
466 for (int groupNdx = 0; groupNdx < workGroupCount; ++groupNdx)
467 {
468 const int globalOffset = groupNdx * workGroupSize;
469 for (int localOffset = 0; localOffset < workGroupSize; ++localOffset)
470 {
471 const deUint32 res = bufferPtr[globalOffset + localOffset];
472 const deUint32 ref = localOffset + 1;
473
474 if (res != ref)
475 {
476 std::ostringstream msg;
477 msg << "Comparison failed for Output.values[" << (globalOffset + localOffset) << "]";
478 return tcu::TestStatus::fail(msg.str());
479 }
480 }
481 }
482 return tcu::TestStatus::pass("Compute succeeded");
483 }
484
485 class SSBOLocalBarrierTest : public vkt::TestCase
486 {
487 public:
488 SSBOLocalBarrierTest (tcu::TestContext& testCtx,
489 const std::string& name,
490 const tcu::IVec3& localSize,
491 const tcu::IVec3& workSize,
492 const vk::ComputePipelineConstructionType computePipelineConstructionType
493 );
494
495 virtual void checkSupport (Context& context) const;
496 void initPrograms (SourceCollections& sourceCollections) const;
497 TestInstance* createInstance (Context& context) const;
498
499 private:
500 const tcu::IVec3 m_localSize;
501 const tcu::IVec3 m_workSize;
502 vk::ComputePipelineConstructionType m_computePipelineConstructionType;
503 };
504
505 class SSBOLocalBarrierTestInstance : public vkt::TestInstance
506 {
507 public:
508 SSBOLocalBarrierTestInstance (Context& context,
509 const tcu::IVec3& localSize,
510 const tcu::IVec3& workSize,
511 const vk::ComputePipelineConstructionType computePipelineConstructionType);
512
513 tcu::TestStatus iterate (void);
514
515 private:
516 const tcu::IVec3 m_localSize;
517 const tcu::IVec3 m_workSize;
518 vk::ComputePipelineConstructionType m_computePipelineConstructionType;
519 };
520
SSBOLocalBarrierTest(tcu::TestContext & testCtx,const std::string & name,const tcu::IVec3 & localSize,const tcu::IVec3 & workSize,const vk::ComputePipelineConstructionType computePipelineConstructionType)521 SSBOLocalBarrierTest::SSBOLocalBarrierTest (tcu::TestContext& testCtx,
522 const std::string& name,
523 const tcu::IVec3& localSize,
524 const tcu::IVec3& workSize,
525 const vk::ComputePipelineConstructionType computePipelineConstructionType)
526 : TestCase (testCtx, name)
527 , m_localSize (localSize)
528 , m_workSize (workSize)
529 , m_computePipelineConstructionType(computePipelineConstructionType)
530 {
531 }
532
checkSupport(Context & context) const533 void SSBOLocalBarrierTest::checkSupport (Context& context) const
534 {
535 checkShaderObjectRequirements(context.getInstanceInterface(), context.getPhysicalDevice(), m_computePipelineConstructionType);
536 }
537
initPrograms(SourceCollections & sourceCollections) const538 void SSBOLocalBarrierTest::initPrograms (SourceCollections& sourceCollections) const
539 {
540 const int workGroupSize = multiplyComponents(m_localSize);
541 const int workGroupCount = multiplyComponents(m_workSize);
542 const int numValues = workGroupSize * workGroupCount;
543
544 std::ostringstream src;
545 src << "#version 310 es\n"
546 << "layout (local_size_x = " << m_localSize.x() << ", local_size_y = " << m_localSize.y() << ", local_size_z = " << m_localSize.z() << ") in;\n"
547 << "layout(binding = 0) coherent buffer Output {\n"
548 << " uint values[" << numValues << "];\n"
549 << "} sb_out;\n\n"
550 << "void main (void) {\n"
551 << " uint localSize = gl_WorkGroupSize.x*gl_WorkGroupSize.y*gl_WorkGroupSize.z;\n"
552 << " uint globalNdx = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z + gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n"
553 << " uint globalOffs = localSize*globalNdx;\n"
554 << " uint localOffs = gl_WorkGroupSize.x*gl_WorkGroupSize.y*gl_LocalInvocationID.z + gl_WorkGroupSize.x*gl_LocalInvocationID.y + gl_LocalInvocationID.x;\n"
555 << "\n"
556 << " sb_out.values[globalOffs + localOffs] = globalOffs;\n"
557 << " memoryBarrierBuffer();\n"
558 << " barrier();\n"
559 << " sb_out.values[globalOffs + ((localOffs+1u)%localSize)] += localOffs;\n" // += so we read and write
560 << " memoryBarrierBuffer();\n"
561 << " barrier();\n"
562 << " sb_out.values[globalOffs + ((localOffs+2u)%localSize)] += localOffs;\n"
563 << "}\n";
564
565 sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
566 }
567
createInstance(Context & context) const568 TestInstance* SSBOLocalBarrierTest::createInstance (Context& context) const
569 {
570 return new SSBOLocalBarrierTestInstance(context, m_localSize, m_workSize, m_computePipelineConstructionType);
571 }
572
SSBOLocalBarrierTestInstance(Context & context,const tcu::IVec3 & localSize,const tcu::IVec3 & workSize,const vk::ComputePipelineConstructionType computePipelineConstructionType)573 SSBOLocalBarrierTestInstance::SSBOLocalBarrierTestInstance (Context& context, const tcu::IVec3& localSize, const tcu::IVec3& workSize, const vk::ComputePipelineConstructionType computePipelineConstructionType)
574 : TestInstance (context)
575 , m_localSize (localSize)
576 , m_workSize (workSize)
577 , m_computePipelineConstructionType(computePipelineConstructionType)
578 {
579 }
580
iterate(void)581 tcu::TestStatus SSBOLocalBarrierTestInstance::iterate (void)
582 {
583 const DeviceInterface& vk = m_context.getDeviceInterface();
584 const VkDevice device = m_context.getDevice();
585 const VkQueue queue = m_context.getUniversalQueue();
586 const deUint32 queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
587 Allocator& allocator = m_context.getDefaultAllocator();
588
589 const int workGroupSize = multiplyComponents(m_localSize);
590 const int workGroupCount = multiplyComponents(m_workSize);
591
592 // Create a buffer and host-visible memory for it
593
594 const VkDeviceSize bufferSizeBytes = sizeof(deUint32) * workGroupSize * workGroupCount;
595 const BufferWithMemory buffer(vk, device, allocator, makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT), MemoryRequirement::HostVisible);
596
597 // Create descriptor set
598
599 const Unique<VkDescriptorSetLayout> descriptorSetLayout(
600 DescriptorSetLayoutBuilder()
601 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
602 .build(vk, device));
603
604 const Unique<VkDescriptorPool> descriptorPool(
605 DescriptorPoolBuilder()
606 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
607 .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
608
609 const Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
610
611 const VkDescriptorBufferInfo descriptorInfo = makeDescriptorBufferInfo(*buffer, 0ull, bufferSizeBytes);
612 DescriptorSetUpdateBuilder()
613 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &descriptorInfo)
614 .update(vk, device);
615
616 // Perform the computation
617
618 ComputePipelineWrapper pipeline(vk, device, m_computePipelineConstructionType, m_context.getBinaryCollection().get("comp"));
619 pipeline.setDescriptorSetLayout(descriptorSetLayout.get());
620 pipeline.buildPipeline();
621
622 const VkBufferMemoryBarrier computeFinishBarrier = makeBufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *buffer, 0ull, bufferSizeBytes);
623
624 const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
625 const Unique<VkCommandBuffer> cmdBuffer(allocateCommandBuffer(vk, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
626
627 // Start recording commands
628
629 beginCommandBuffer(vk, *cmdBuffer);
630
631 pipeline.bind(*cmdBuffer);
632 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline.getPipelineLayout(), 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
633
634 vk.cmdDispatch(*cmdBuffer, m_workSize.x(), m_workSize.y(), m_workSize.z());
635
636 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 1, &computeFinishBarrier, 0, (const VkImageMemoryBarrier*)DE_NULL);
637
638 endCommandBuffer(vk, *cmdBuffer);
639
640 // Wait for completion
641
642 submitCommandsAndWait(vk, device, queue, *cmdBuffer);
643
644 // Validate the results
645
646 const Allocation& bufferAllocation = buffer.getAllocation();
647 invalidateAlloc(vk, device, bufferAllocation);
648
649 const deUint32* bufferPtr = static_cast<deUint32*>(bufferAllocation.getHostPtr());
650
651 for (int groupNdx = 0; groupNdx < workGroupCount; ++groupNdx)
652 {
653 const int globalOffset = groupNdx * workGroupSize;
654 for (int localOffset = 0; localOffset < workGroupSize; ++localOffset)
655 {
656 const deUint32 res = bufferPtr[globalOffset + localOffset];
657 const int offs0 = localOffset - 1 < 0 ? ((localOffset + workGroupSize - 1) % workGroupSize) : ((localOffset - 1) % workGroupSize);
658 const int offs1 = localOffset - 2 < 0 ? ((localOffset + workGroupSize - 2) % workGroupSize) : ((localOffset - 2) % workGroupSize);
659 const deUint32 ref = static_cast<deUint32>(globalOffset + offs0 + offs1);
660
661 if (res != ref)
662 {
663 std::ostringstream msg;
664 msg << "Comparison failed for Output.values[" << (globalOffset + localOffset) << "]";
665 return tcu::TestStatus::fail(msg.str());
666 }
667 }
668 }
669 return tcu::TestStatus::pass("Compute succeeded");
670 }
671
672 class CopyImageToSSBOTest : public vkt::TestCase
673 {
674 public:
675 CopyImageToSSBOTest (tcu::TestContext& testCtx,
676 const std::string& name,
677 const tcu::IVec2& localSize,
678 const tcu::IVec2& imageSize,
679 const vk::ComputePipelineConstructionType computePipelineConstructionType);
680
681 virtual void checkSupport (Context& context) const;
682 void initPrograms (SourceCollections& sourceCollections) const;
683 TestInstance* createInstance (Context& context) const;
684
685 private:
686 const tcu::IVec2 m_localSize;
687 const tcu::IVec2 m_imageSize;
688 vk::ComputePipelineConstructionType m_computePipelineConstructionType;
689 };
690
691 class CopyImageToSSBOTestInstance : public vkt::TestInstance
692 {
693 public:
694 CopyImageToSSBOTestInstance (Context& context,
695 const tcu::IVec2& localSize,
696 const tcu::IVec2& imageSize,
697 const vk::ComputePipelineConstructionType computePipelineConstructionType);
698
699 tcu::TestStatus iterate (void);
700
701 private:
702 const tcu::IVec2 m_localSize;
703 const tcu::IVec2 m_imageSize;
704 vk::ComputePipelineConstructionType m_computePipelineConstructionType;
705 };
706
CopyImageToSSBOTest(tcu::TestContext & testCtx,const std::string & name,const tcu::IVec2 & localSize,const tcu::IVec2 & imageSize,const vk::ComputePipelineConstructionType computePipelineConstructionType)707 CopyImageToSSBOTest::CopyImageToSSBOTest (tcu::TestContext& testCtx,
708 const std::string& name,
709 const tcu::IVec2& localSize,
710 const tcu::IVec2& imageSize,
711 const vk::ComputePipelineConstructionType computePipelineConstructionType)
712 : TestCase (testCtx, name)
713 , m_localSize (localSize)
714 , m_imageSize (imageSize)
715 , m_computePipelineConstructionType (computePipelineConstructionType)
716 {
717 DE_ASSERT(m_imageSize.x() % m_localSize.x() == 0);
718 DE_ASSERT(m_imageSize.y() % m_localSize.y() == 0);
719 }
720
checkSupport(Context & context) const721 void CopyImageToSSBOTest::checkSupport (Context& context) const
722 {
723 checkShaderObjectRequirements(context.getInstanceInterface(), context.getPhysicalDevice(), m_computePipelineConstructionType);
724 }
725
initPrograms(SourceCollections & sourceCollections) const726 void CopyImageToSSBOTest::initPrograms (SourceCollections& sourceCollections) const
727 {
728 std::ostringstream src;
729 src << "#version 310 es\n"
730 << "layout (local_size_x = " << m_localSize.x() << ", local_size_y = " << m_localSize.y() << ") in;\n"
731 << "layout(binding = 1, r32ui) readonly uniform highp uimage2D u_srcImg;\n"
732 << "layout(binding = 0) writeonly buffer Output {\n"
733 << " uint values[" << (m_imageSize.x() * m_imageSize.y()) << "];\n"
734 << "} sb_out;\n\n"
735 << "void main (void) {\n"
736 << " uint stride = gl_NumWorkGroups.x*gl_WorkGroupSize.x;\n"
737 << " uint value = imageLoad(u_srcImg, ivec2(gl_GlobalInvocationID.xy)).x;\n"
738 << " sb_out.values[gl_GlobalInvocationID.y*stride + gl_GlobalInvocationID.x] = value;\n"
739 << "}\n";
740
741 sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
742 }
743
createInstance(Context & context) const744 TestInstance* CopyImageToSSBOTest::createInstance (Context& context) const
745 {
746 return new CopyImageToSSBOTestInstance(context, m_localSize, m_imageSize, m_computePipelineConstructionType);
747 }
748
CopyImageToSSBOTestInstance(Context & context,const tcu::IVec2 & localSize,const tcu::IVec2 & imageSize,const vk::ComputePipelineConstructionType computePipelineConstructionType)749 CopyImageToSSBOTestInstance::CopyImageToSSBOTestInstance (Context& context, const tcu::IVec2& localSize, const tcu::IVec2& imageSize, const vk::ComputePipelineConstructionType computePipelineConstructionType)
750 : TestInstance (context)
751 , m_localSize (localSize)
752 , m_imageSize (imageSize)
753 , m_computePipelineConstructionType (computePipelineConstructionType)
754 {
755 }
756
iterate(void)757 tcu::TestStatus CopyImageToSSBOTestInstance::iterate (void)
758 {
759 const DeviceInterface& vk = m_context.getDeviceInterface();
760 const VkDevice device = m_context.getDevice();
761 const VkQueue queue = m_context.getUniversalQueue();
762 const deUint32 queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
763 Allocator& allocator = m_context.getDefaultAllocator();
764
765 // Create an image
766
767 const VkImageCreateInfo imageParams = make2DImageCreateInfo(m_imageSize, VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_STORAGE_BIT);
768 const ImageWithMemory image(vk, device, allocator, imageParams, MemoryRequirement::Any);
769
770 const VkImageSubresourceRange subresourceRange = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u);
771 const Unique<VkImageView> imageView(makeImageView(vk, device, *image, VK_IMAGE_VIEW_TYPE_2D, VK_FORMAT_R32_UINT, subresourceRange));
772
773 // Staging buffer (source data for image)
774
775 const deUint32 imageArea = multiplyComponents(m_imageSize);
776 const VkDeviceSize bufferSizeBytes = sizeof(deUint32) * imageArea;
777
778 const BufferWithMemory stagingBuffer(vk, device, allocator, makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_TRANSFER_SRC_BIT), MemoryRequirement::HostVisible);
779
780 // Populate the staging buffer with test data
781 {
782 de::Random rnd(0xab2c7);
783 const Allocation& stagingBufferAllocation = stagingBuffer.getAllocation();
784 deUint32* bufferPtr = static_cast<deUint32*>(stagingBufferAllocation.getHostPtr());
785 for (deUint32 i = 0; i < imageArea; ++i)
786 *bufferPtr++ = rnd.getUint32();
787
788 flushAlloc(vk, device, stagingBufferAllocation);
789 }
790
791 // Create a buffer to store shader output
792
793 const BufferWithMemory outputBuffer(vk, device, allocator, makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT), MemoryRequirement::HostVisible);
794
795 // Create descriptor set
796
797 const Unique<VkDescriptorSetLayout> descriptorSetLayout(
798 DescriptorSetLayoutBuilder()
799 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
800 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, VK_SHADER_STAGE_COMPUTE_BIT)
801 .build(vk, device));
802
803 const Unique<VkDescriptorPool> descriptorPool(
804 DescriptorPoolBuilder()
805 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
806 .addType(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE)
807 .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
808
809 const Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
810
811 // Set the bindings
812
813 const VkDescriptorImageInfo imageDescriptorInfo = makeDescriptorImageInfo(DE_NULL, *imageView, VK_IMAGE_LAYOUT_GENERAL);
814 const VkDescriptorBufferInfo bufferDescriptorInfo = makeDescriptorBufferInfo(*outputBuffer, 0ull, bufferSizeBytes);
815
816 DescriptorSetUpdateBuilder()
817 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &bufferDescriptorInfo)
818 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &imageDescriptorInfo)
819 .update(vk, device);
820
821 // Perform the computation
822 {
823 ComputePipelineWrapper pipeline(vk, device, m_computePipelineConstructionType, m_context.getBinaryCollection().get("comp"));
824 pipeline.setDescriptorSetLayout(descriptorSetLayout.get());
825 pipeline.buildPipeline();
826
827 const VkBufferMemoryBarrier computeFinishBarrier = makeBufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *outputBuffer, 0ull, bufferSizeBytes);
828 const tcu::IVec2 workSize = m_imageSize / m_localSize;
829
830 // Prepare the command buffer
831
832 const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
833 const Unique<VkCommandBuffer> cmdBuffer(allocateCommandBuffer(vk, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
834
835 // Start recording commands
836
837 beginCommandBuffer(vk, *cmdBuffer);
838
839 pipeline.bind(*cmdBuffer);
840 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline.getPipelineLayout(), 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
841
842 const std::vector<VkBufferImageCopy> bufferImageCopy(1, makeBufferImageCopy(m_imageSize));
843 copyBufferToImage(vk, *cmdBuffer, *stagingBuffer, bufferSizeBytes, bufferImageCopy, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, *image, VK_IMAGE_LAYOUT_GENERAL, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT);
844
845 vk.cmdDispatch(*cmdBuffer, workSize.x(), workSize.y(), 1u);
846 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 1, &computeFinishBarrier, 0, (const VkImageMemoryBarrier*)DE_NULL);
847
848 endCommandBuffer(vk, *cmdBuffer);
849
850 // Wait for completion
851
852 submitCommandsAndWait(vk, device, queue, *cmdBuffer);
853 }
854
855 // Validate the results
856
857 const Allocation& outputBufferAllocation = outputBuffer.getAllocation();
858 invalidateAlloc(vk, device, outputBufferAllocation);
859
860 const deUint32* bufferPtr = static_cast<deUint32*>(outputBufferAllocation.getHostPtr());
861 const deUint32* refBufferPtr = static_cast<deUint32*>(stagingBuffer.getAllocation().getHostPtr());
862
863 for (deUint32 ndx = 0; ndx < imageArea; ++ndx)
864 {
865 const deUint32 res = *(bufferPtr + ndx);
866 const deUint32 ref = *(refBufferPtr + ndx);
867
868 if (res != ref)
869 {
870 std::ostringstream msg;
871 msg << "Comparison failed for Output.values[" << ndx << "]";
872 return tcu::TestStatus::fail(msg.str());
873 }
874 }
875 return tcu::TestStatus::pass("Compute succeeded");
876 }
877
878 class CopySSBOToImageTest : public vkt::TestCase
879 {
880 public:
881 CopySSBOToImageTest (tcu::TestContext& testCtx,
882 const std::string& name,
883 const tcu::IVec2& localSize,
884 const tcu::IVec2& imageSize,
885 const vk::ComputePipelineConstructionType computePipelineConstructionType);
886
887 virtual void checkSupport (Context& context) const;
888 void initPrograms (SourceCollections& sourceCollections) const;
889 TestInstance* createInstance (Context& context) const;
890
891 private:
892 const tcu::IVec2 m_localSize;
893 const tcu::IVec2 m_imageSize;
894 vk::ComputePipelineConstructionType m_computePipelineConstructionType;
895 };
896
897 class CopySSBOToImageTestInstance : public vkt::TestInstance
898 {
899 public:
900 CopySSBOToImageTestInstance (Context& context,
901 const tcu::IVec2& localSize,
902 const tcu::IVec2& imageSize,
903 const vk::ComputePipelineConstructionType computePipelineConstructionType);
904
905 tcu::TestStatus iterate (void);
906
907 private:
908 const tcu::IVec2 m_localSize;
909 const tcu::IVec2 m_imageSize;
910 vk::ComputePipelineConstructionType m_computePipelineConstructionType;
911 };
912
CopySSBOToImageTest(tcu::TestContext & testCtx,const std::string & name,const tcu::IVec2 & localSize,const tcu::IVec2 & imageSize,const vk::ComputePipelineConstructionType computePipelineConstructionType)913 CopySSBOToImageTest::CopySSBOToImageTest (tcu::TestContext& testCtx,
914 const std::string& name,
915 const tcu::IVec2& localSize,
916 const tcu::IVec2& imageSize,
917 const vk::ComputePipelineConstructionType computePipelineConstructionType)
918 : TestCase (testCtx, name)
919 , m_localSize (localSize)
920 , m_imageSize (imageSize)
921 , m_computePipelineConstructionType (computePipelineConstructionType)
922 {
923 DE_ASSERT(m_imageSize.x() % m_localSize.x() == 0);
924 DE_ASSERT(m_imageSize.y() % m_localSize.y() == 0);
925 }
926
checkSupport(Context & context) const927 void CopySSBOToImageTest::checkSupport (Context& context) const
928 {
929 checkShaderObjectRequirements(context.getInstanceInterface(), context.getPhysicalDevice(), m_computePipelineConstructionType);
930 }
931
initPrograms(SourceCollections & sourceCollections) const932 void CopySSBOToImageTest::initPrograms (SourceCollections& sourceCollections) const
933 {
934 std::ostringstream src;
935 src << "#version 310 es\n"
936 << "layout (local_size_x = " << m_localSize.x() << ", local_size_y = " << m_localSize.y() << ") in;\n"
937 << "layout(binding = 1, r32ui) writeonly uniform highp uimage2D u_dstImg;\n"
938 << "layout(binding = 0) readonly buffer Input {\n"
939 << " uint values[" << (m_imageSize.x() * m_imageSize.y()) << "];\n"
940 << "} sb_in;\n\n"
941 << "void main (void) {\n"
942 << " uint stride = gl_NumWorkGroups.x*gl_WorkGroupSize.x;\n"
943 << " uint value = sb_in.values[gl_GlobalInvocationID.y*stride + gl_GlobalInvocationID.x];\n"
944 << " imageStore(u_dstImg, ivec2(gl_GlobalInvocationID.xy), uvec4(value, 0, 0, 0));\n"
945 << "}\n";
946
947 sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
948 }
949
createInstance(Context & context) const950 TestInstance* CopySSBOToImageTest::createInstance (Context& context) const
951 {
952 return new CopySSBOToImageTestInstance(context, m_localSize, m_imageSize, m_computePipelineConstructionType);
953 }
954
CopySSBOToImageTestInstance(Context & context,const tcu::IVec2 & localSize,const tcu::IVec2 & imageSize,const vk::ComputePipelineConstructionType computePipelineConstructionType)955 CopySSBOToImageTestInstance::CopySSBOToImageTestInstance (Context& context, const tcu::IVec2& localSize, const tcu::IVec2& imageSize, const vk::ComputePipelineConstructionType computePipelineConstructionType)
956 : TestInstance (context)
957 , m_localSize (localSize)
958 , m_imageSize (imageSize)
959 , m_computePipelineConstructionType (computePipelineConstructionType)
960 {
961 }
962
iterate(void)963 tcu::TestStatus CopySSBOToImageTestInstance::iterate (void)
964 {
965 ContextCommonData data = m_context.getContextCommonData();
966 const DeviceInterface& vkd = data.vkd;
967
968 // Create an image, a view, and the output buffer
969 const VkImageSubresourceRange subresourceRange = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u);
970 ImageWithBuffer imageWithBuffer(vkd, data.device, data.allocator, vk::makeExtent3D(m_imageSize.x(), m_imageSize.y(), 1),
971 VK_FORMAT_R32_UINT, VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_STORAGE_BIT, vk::VK_IMAGE_TYPE_2D,
972 subresourceRange);
973
974 const deUint32 imageArea = multiplyComponents(m_imageSize);
975 const VkDeviceSize bufferSizeBytes = sizeof(deUint32) * imageArea;
976
977 const BufferWithMemory inputBuffer(vkd, data.device, data.allocator, makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT), MemoryRequirement::HostVisible);
978
979 // Populate the buffer with test data
980 {
981 de::Random rnd(0x77238ac2);
982 const Allocation& inputBufferAllocation = inputBuffer.getAllocation();
983 deUint32* bufferPtr = static_cast<deUint32*>(inputBufferAllocation.getHostPtr());
984 for (deUint32 i = 0; i < imageArea; ++i)
985 *bufferPtr++ = rnd.getUint32();
986
987 flushAlloc(vkd, data.device, inputBufferAllocation);
988 }
989
990 // Create descriptor set
991 const Unique<VkDescriptorSetLayout> descriptorSetLayout(
992 DescriptorSetLayoutBuilder()
993 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
994 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, VK_SHADER_STAGE_COMPUTE_BIT)
995 .build(vkd, data.device));
996
997 const Unique<VkDescriptorPool> descriptorPool(
998 DescriptorPoolBuilder()
999 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
1000 .addType(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE)
1001 .build(vkd, data.device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
1002
1003 const Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vkd, data.device, *descriptorPool, *descriptorSetLayout));
1004
1005 // Set the bindings
1006
1007 const VkDescriptorImageInfo imageDescriptorInfo = makeDescriptorImageInfo(DE_NULL, imageWithBuffer.getImageView(), VK_IMAGE_LAYOUT_GENERAL);
1008 const VkDescriptorBufferInfo bufferDescriptorInfo = makeDescriptorBufferInfo(*inputBuffer, 0ull, bufferSizeBytes);
1009
1010 DescriptorSetUpdateBuilder()
1011 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &bufferDescriptorInfo)
1012 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &imageDescriptorInfo)
1013 .update(vkd, data.device);
1014
1015 // Perform the computation
1016 {
1017 ComputePipelineWrapper pipeline(vkd, data.device, m_computePipelineConstructionType, m_context.getBinaryCollection().get("comp"));
1018 pipeline.setDescriptorSetLayout(descriptorSetLayout.get());
1019 pipeline.buildPipeline();
1020
1021 const VkBufferMemoryBarrier inputBufferPostHostWriteBarrier = makeBufferMemoryBarrier(VK_ACCESS_HOST_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT, *inputBuffer, 0ull, bufferSizeBytes);
1022
1023 const VkImageMemoryBarrier imageLayoutBarrier = makeImageMemoryBarrier(
1024 0u, VK_ACCESS_SHADER_WRITE_BIT,
1025 VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_GENERAL,
1026 imageWithBuffer.getImage(), subresourceRange);
1027
1028 const tcu::IVec2 workSize = m_imageSize / m_localSize;
1029
1030 // Prepare the command buffer
1031
1032 const Unique<VkCommandPool> cmdPool(makeCommandPool(vkd, data.device, data.qfIndex));
1033 const Unique<VkCommandBuffer> cmdBuffer(allocateCommandBuffer(vkd, data.device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
1034
1035 // Start recording commands
1036
1037 beginCommandBuffer(vkd, *cmdBuffer);
1038
1039 pipeline.bind(*cmdBuffer);
1040 vkd.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline.getPipelineLayout(), 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
1041
1042 vkd.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 1, &inputBufferPostHostWriteBarrier, 1, &imageLayoutBarrier);
1043 vkd.cmdDispatch(*cmdBuffer, workSize.x(), workSize.y(), 1u);
1044
1045 copyImageToBuffer(vkd, *cmdBuffer, imageWithBuffer.getImage(), imageWithBuffer.getBuffer(), m_imageSize, VK_ACCESS_SHADER_WRITE_BIT, VK_IMAGE_LAYOUT_GENERAL);
1046
1047 endCommandBuffer(vkd, *cmdBuffer);
1048
1049 // Wait for completion
1050
1051 submitCommandsAndWait(vkd, data.device, data.queue, *cmdBuffer);
1052 }
1053
1054 // Validate the results
1055
1056 const Allocation& outputBufferAllocation = imageWithBuffer.getBufferAllocation();
1057 invalidateAlloc(vkd, data.device, outputBufferAllocation);
1058
1059 const deUint32* bufferPtr = static_cast<deUint32*>(outputBufferAllocation.getHostPtr());
1060 const deUint32* refBufferPtr = static_cast<deUint32*>(inputBuffer.getAllocation().getHostPtr());
1061
1062 for (deUint32 ndx = 0; ndx < imageArea; ++ndx)
1063 {
1064 const deUint32 res = *(bufferPtr + ndx);
1065 const deUint32 ref = *(refBufferPtr + ndx);
1066
1067 if (res != ref)
1068 {
1069 std::ostringstream msg;
1070 msg << "Comparison failed for pixel " << ndx;
1071 return tcu::TestStatus::fail(msg.str());
1072 }
1073 }
1074 return tcu::TestStatus::pass("Compute succeeded");
1075 }
1076
1077 class BufferToBufferInvertTest : public vkt::TestCase
1078 {
1079 public:
1080 virtual void checkSupport (Context& context) const;
1081 void initPrograms (SourceCollections& sourceCollections) const;
1082 TestInstance* createInstance (Context& context) const;
1083
1084 static BufferToBufferInvertTest* UBOToSSBOInvertCase (tcu::TestContext& testCtx,
1085 const std::string& name,
1086 const deUint32 numValues,
1087 const tcu::IVec3& localSize,
1088 const tcu::IVec3& workSize,
1089 const vk::ComputePipelineConstructionType computePipelineConstructionType);
1090
1091 static BufferToBufferInvertTest* CopyInvertSSBOCase (tcu::TestContext& testCtx,
1092 const std::string& name,
1093 const deUint32 numValues,
1094 const tcu::IVec3& localSize,
1095 const tcu::IVec3& workSize,
1096 const vk::ComputePipelineConstructionType computePipelineConstructionType);
1097
1098 private:
1099 BufferToBufferInvertTest (tcu::TestContext& testCtx,
1100 const std::string& name,
1101 const deUint32 numValues,
1102 const tcu::IVec3& localSize,
1103 const tcu::IVec3& workSize,
1104 const BufferType bufferType,
1105 const vk::ComputePipelineConstructionType computePipelineConstructionType);
1106
1107 const BufferType m_bufferType;
1108 const deUint32 m_numValues;
1109 const tcu::IVec3 m_localSize;
1110 const tcu::IVec3 m_workSize;
1111 vk::ComputePipelineConstructionType m_computePipelineConstructionType;
1112 };
1113
1114 class BufferToBufferInvertTestInstance : public vkt::TestInstance
1115 {
1116 public:
1117 BufferToBufferInvertTestInstance (Context& context,
1118 const deUint32 numValues,
1119 const tcu::IVec3& localSize,
1120 const tcu::IVec3& workSize,
1121 const BufferType bufferType,
1122 const vk::ComputePipelineConstructionType computePipelineConstructionType);
1123
1124 tcu::TestStatus iterate (void);
1125
1126 private:
1127 const BufferType m_bufferType;
1128 const deUint32 m_numValues;
1129 const tcu::IVec3 m_localSize;
1130 const tcu::IVec3 m_workSize;
1131 vk::ComputePipelineConstructionType m_computePipelineConstructionType;
1132 };
1133
BufferToBufferInvertTest(tcu::TestContext & testCtx,const std::string & name,const deUint32 numValues,const tcu::IVec3 & localSize,const tcu::IVec3 & workSize,const BufferType bufferType,const vk::ComputePipelineConstructionType computePipelineConstructionType)1134 BufferToBufferInvertTest::BufferToBufferInvertTest (tcu::TestContext& testCtx,
1135 const std::string& name,
1136 const deUint32 numValues,
1137 const tcu::IVec3& localSize,
1138 const tcu::IVec3& workSize,
1139 const BufferType bufferType,
1140 const vk::ComputePipelineConstructionType computePipelineConstructionType)
1141 : TestCase (testCtx, name)
1142 , m_bufferType (bufferType)
1143 , m_numValues (numValues)
1144 , m_localSize (localSize)
1145 , m_workSize (workSize)
1146 , m_computePipelineConstructionType (computePipelineConstructionType)
1147 {
1148 DE_ASSERT(m_numValues % (multiplyComponents(m_workSize) * multiplyComponents(m_localSize)) == 0);
1149 DE_ASSERT(m_bufferType == BUFFER_TYPE_UNIFORM || m_bufferType == BUFFER_TYPE_SSBO);
1150 }
1151
UBOToSSBOInvertCase(tcu::TestContext & testCtx,const std::string & name,const deUint32 numValues,const tcu::IVec3 & localSize,const tcu::IVec3 & workSize,const vk::ComputePipelineConstructionType computePipelineConstructionType)1152 BufferToBufferInvertTest* BufferToBufferInvertTest::UBOToSSBOInvertCase (tcu::TestContext& testCtx,
1153 const std::string& name,
1154 const deUint32 numValues,
1155 const tcu::IVec3& localSize,
1156 const tcu::IVec3& workSize,
1157 const vk::ComputePipelineConstructionType computePipelineConstructionType)
1158 {
1159 return new BufferToBufferInvertTest(testCtx, name, numValues, localSize, workSize, BUFFER_TYPE_UNIFORM, computePipelineConstructionType);
1160 }
1161
CopyInvertSSBOCase(tcu::TestContext & testCtx,const std::string & name,const deUint32 numValues,const tcu::IVec3 & localSize,const tcu::IVec3 & workSize,const vk::ComputePipelineConstructionType computePipelineConstructionType)1162 BufferToBufferInvertTest* BufferToBufferInvertTest::CopyInvertSSBOCase (tcu::TestContext& testCtx,
1163 const std::string& name,
1164 const deUint32 numValues,
1165 const tcu::IVec3& localSize,
1166 const tcu::IVec3& workSize,
1167 const vk::ComputePipelineConstructionType computePipelineConstructionType)
1168 {
1169 return new BufferToBufferInvertTest(testCtx, name, numValues, localSize, workSize, BUFFER_TYPE_SSBO, computePipelineConstructionType);
1170 }
1171
checkSupport(Context & context) const1172 void BufferToBufferInvertTest::checkSupport (Context& context) const
1173 {
1174 checkShaderObjectRequirements(context.getInstanceInterface(), context.getPhysicalDevice(), m_computePipelineConstructionType);
1175 }
1176
initPrograms(SourceCollections & sourceCollections) const1177 void BufferToBufferInvertTest::initPrograms (SourceCollections& sourceCollections) const
1178 {
1179 std::ostringstream src;
1180 if (m_bufferType == BUFFER_TYPE_UNIFORM)
1181 {
1182 src << "#version 310 es\n"
1183 << "layout (local_size_x = " << m_localSize.x() << ", local_size_y = " << m_localSize.y() << ", local_size_z = " << m_localSize.z() << ") in;\n"
1184 << "layout(binding = 0) readonly uniform Input {\n"
1185 << " uint values[" << m_numValues << "];\n"
1186 << "} ub_in;\n"
1187 << "layout(binding = 1, std140) writeonly buffer Output {\n"
1188 << " uint values[" << m_numValues << "];\n"
1189 << "} sb_out;\n"
1190 << "void main (void) {\n"
1191 << " uvec3 size = gl_NumWorkGroups * gl_WorkGroupSize;\n"
1192 << " uint numValuesPerInv = uint(ub_in.values.length()) / (size.x*size.y*size.z);\n"
1193 << " uint groupNdx = size.x*size.y*gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + gl_GlobalInvocationID.x;\n"
1194 << " uint offset = numValuesPerInv*groupNdx;\n"
1195 << "\n"
1196 << " for (uint ndx = 0u; ndx < numValuesPerInv; ndx++)\n"
1197 << " sb_out.values[offset + ndx] = ~ub_in.values[offset + ndx];\n"
1198 << "}\n";
1199 }
1200 else if (m_bufferType == BUFFER_TYPE_SSBO)
1201 {
1202 src << "#version 310 es\n"
1203 << "layout (local_size_x = " << m_localSize.x() << ", local_size_y = " << m_localSize.y() << ", local_size_z = " << m_localSize.z() << ") in;\n"
1204 << "layout(binding = 0, std140) readonly buffer Input {\n"
1205 << " uint values[" << m_numValues << "];\n"
1206 << "} sb_in;\n"
1207 << "layout (binding = 1, std140) writeonly buffer Output {\n"
1208 << " uint values[" << m_numValues << "];\n"
1209 << "} sb_out;\n"
1210 << "void main (void) {\n"
1211 << " uvec3 size = gl_NumWorkGroups * gl_WorkGroupSize;\n"
1212 << " uint numValuesPerInv = uint(sb_in.values.length()) / (size.x*size.y*size.z);\n"
1213 << " uint groupNdx = size.x*size.y*gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + gl_GlobalInvocationID.x;\n"
1214 << " uint offset = numValuesPerInv*groupNdx;\n"
1215 << "\n"
1216 << " for (uint ndx = 0u; ndx < numValuesPerInv; ndx++)\n"
1217 << " sb_out.values[offset + ndx] = ~sb_in.values[offset + ndx];\n"
1218 << "}\n";
1219 }
1220
1221 sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
1222 }
1223
createInstance(Context & context) const1224 TestInstance* BufferToBufferInvertTest::createInstance (Context& context) const
1225 {
1226 return new BufferToBufferInvertTestInstance(context, m_numValues, m_localSize, m_workSize, m_bufferType, m_computePipelineConstructionType);
1227 }
1228
BufferToBufferInvertTestInstance(Context & context,const deUint32 numValues,const tcu::IVec3 & localSize,const tcu::IVec3 & workSize,const BufferType bufferType,const vk::ComputePipelineConstructionType computePipelineConstructionType)1229 BufferToBufferInvertTestInstance::BufferToBufferInvertTestInstance (Context& context,
1230 const deUint32 numValues,
1231 const tcu::IVec3& localSize,
1232 const tcu::IVec3& workSize,
1233 const BufferType bufferType,
1234 const vk::ComputePipelineConstructionType computePipelineConstructionType)
1235 : TestInstance (context)
1236 , m_bufferType (bufferType)
1237 , m_numValues (numValues)
1238 , m_localSize (localSize)
1239 , m_workSize (workSize)
1240 , m_computePipelineConstructionType (computePipelineConstructionType)
1241 {
1242 }
1243
iterate(void)1244 tcu::TestStatus BufferToBufferInvertTestInstance::iterate (void)
1245 {
1246 const DeviceInterface& vk = m_context.getDeviceInterface();
1247 const VkDevice device = m_context.getDevice();
1248 const VkQueue queue = m_context.getUniversalQueue();
1249 const deUint32 queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
1250 Allocator& allocator = m_context.getDefaultAllocator();
1251
1252 // Customize the test based on buffer type
1253
1254 const VkBufferUsageFlags inputBufferUsageFlags = (m_bufferType == BUFFER_TYPE_UNIFORM ? VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT : VK_BUFFER_USAGE_STORAGE_BUFFER_BIT);
1255 const VkDescriptorType inputBufferDescriptorType = (m_bufferType == BUFFER_TYPE_UNIFORM ? VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER : VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
1256 const deUint32 randomSeed = (m_bufferType == BUFFER_TYPE_UNIFORM ? 0x111223f : 0x124fef);
1257
1258 // Create an input buffer
1259
1260 const VkDeviceSize bufferSizeBytes = sizeof(tcu::UVec4) * m_numValues;
1261 const BufferWithMemory inputBuffer(vk, device, allocator, makeBufferCreateInfo(bufferSizeBytes, inputBufferUsageFlags), MemoryRequirement::HostVisible);
1262
1263 // Fill the input buffer with data
1264 {
1265 de::Random rnd(randomSeed);
1266 const Allocation& inputBufferAllocation = inputBuffer.getAllocation();
1267 tcu::UVec4* bufferPtr = static_cast<tcu::UVec4*>(inputBufferAllocation.getHostPtr());
1268 for (deUint32 i = 0; i < m_numValues; ++i)
1269 bufferPtr[i].x() = rnd.getUint32();
1270
1271 flushAlloc(vk, device, inputBufferAllocation);
1272 }
1273
1274 // Create an output buffer
1275
1276 const BufferWithMemory outputBuffer(vk, device, allocator, makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT), MemoryRequirement::HostVisible);
1277
1278 // Create descriptor set
1279
1280 const Unique<VkDescriptorSetLayout> descriptorSetLayout(
1281 DescriptorSetLayoutBuilder()
1282 .addSingleBinding(inputBufferDescriptorType, VK_SHADER_STAGE_COMPUTE_BIT)
1283 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
1284 .build(vk, device));
1285
1286 const Unique<VkDescriptorPool> descriptorPool(
1287 DescriptorPoolBuilder()
1288 .addType(inputBufferDescriptorType)
1289 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
1290 .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
1291
1292 const Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
1293
1294 const VkDescriptorBufferInfo inputBufferDescriptorInfo = makeDescriptorBufferInfo(*inputBuffer, 0ull, bufferSizeBytes);
1295 const VkDescriptorBufferInfo outputBufferDescriptorInfo = makeDescriptorBufferInfo(*outputBuffer, 0ull, bufferSizeBytes);
1296 DescriptorSetUpdateBuilder()
1297 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), inputBufferDescriptorType, &inputBufferDescriptorInfo)
1298 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &outputBufferDescriptorInfo)
1299 .update(vk, device);
1300
1301 // Perform the computation
1302
1303 ComputePipelineWrapper pipeline(vk, device, m_computePipelineConstructionType, m_context.getBinaryCollection().get("comp"));
1304 pipeline.setDescriptorSetLayout(descriptorSetLayout.get());
1305 pipeline.buildPipeline();
1306
1307 const VkBufferMemoryBarrier hostWriteBarrier = makeBufferMemoryBarrier(VK_ACCESS_HOST_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT, *inputBuffer, 0ull, bufferSizeBytes);
1308
1309 const VkBufferMemoryBarrier shaderWriteBarrier = makeBufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *outputBuffer, 0ull, bufferSizeBytes);
1310
1311 const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
1312 const Unique<VkCommandBuffer> cmdBuffer(allocateCommandBuffer(vk, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
1313
1314 // Start recording commands
1315
1316 beginCommandBuffer(vk, *cmdBuffer);
1317
1318 pipeline.bind(*cmdBuffer);
1319 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline.getPipelineLayout(), 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
1320
1321 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 1, &hostWriteBarrier, 0, (const VkImageMemoryBarrier*)DE_NULL);
1322 vk.cmdDispatch(*cmdBuffer, m_workSize.x(), m_workSize.y(), m_workSize.z());
1323 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 1, &shaderWriteBarrier, 0, (const VkImageMemoryBarrier*)DE_NULL);
1324
1325 endCommandBuffer(vk, *cmdBuffer);
1326
1327 // Wait for completion
1328
1329 submitCommandsAndWait(vk, device, queue, *cmdBuffer);
1330
1331 // Validate the results
1332
1333 const Allocation& outputBufferAllocation = outputBuffer.getAllocation();
1334 invalidateAlloc(vk, device, outputBufferAllocation);
1335
1336 const tcu::UVec4* bufferPtr = static_cast<tcu::UVec4*>(outputBufferAllocation.getHostPtr());
1337 const tcu::UVec4* refBufferPtr = static_cast<tcu::UVec4*>(inputBuffer.getAllocation().getHostPtr());
1338
1339 for (deUint32 ndx = 0; ndx < m_numValues; ++ndx)
1340 {
1341 const deUint32 res = bufferPtr[ndx].x();
1342 const deUint32 ref = ~refBufferPtr[ndx].x();
1343
1344 if (res != ref)
1345 {
1346 std::ostringstream msg;
1347 msg << "Comparison failed for Output.values[" << ndx << "]";
1348 return tcu::TestStatus::fail(msg.str());
1349 }
1350 }
1351 return tcu::TestStatus::pass("Compute succeeded");
1352 }
1353
1354 class InvertSSBOInPlaceTest : public vkt::TestCase
1355 {
1356 public:
1357 InvertSSBOInPlaceTest (tcu::TestContext& testCtx,
1358 const std::string& name,
1359 const deUint32 numValues,
1360 const bool sized,
1361 const tcu::IVec3& localSize,
1362 const tcu::IVec3& workSize,
1363 const vk::ComputePipelineConstructionType computePipelineConstructionType);
1364
1365 virtual void checkSupport (Context& context) const;
1366 void initPrograms (SourceCollections& sourceCollections) const;
1367 TestInstance* createInstance (Context& context) const;
1368
1369 private:
1370 const deUint32 m_numValues;
1371 const bool m_sized;
1372 const tcu::IVec3 m_localSize;
1373 const tcu::IVec3 m_workSize;
1374 vk::ComputePipelineConstructionType m_computePipelineConstructionType;
1375 };
1376
1377 class InvertSSBOInPlaceTestInstance : public vkt::TestInstance
1378 {
1379 public:
1380 InvertSSBOInPlaceTestInstance (Context& context,
1381 const deUint32 numValues,
1382 const tcu::IVec3& localSize,
1383 const tcu::IVec3& workSize,
1384 const vk::ComputePipelineConstructionType computePipelineConstructionType);
1385
1386 tcu::TestStatus iterate (void);
1387
1388 private:
1389 const deUint32 m_numValues;
1390 const tcu::IVec3 m_localSize;
1391 const tcu::IVec3 m_workSize;
1392 vk::ComputePipelineConstructionType m_computePipelineConstructionType;
1393 };
1394
InvertSSBOInPlaceTest(tcu::TestContext & testCtx,const std::string & name,const deUint32 numValues,const bool sized,const tcu::IVec3 & localSize,const tcu::IVec3 & workSize,const vk::ComputePipelineConstructionType computePipelineConstructionType)1395 InvertSSBOInPlaceTest::InvertSSBOInPlaceTest (tcu::TestContext& testCtx,
1396 const std::string& name,
1397 const deUint32 numValues,
1398 const bool sized,
1399 const tcu::IVec3& localSize,
1400 const tcu::IVec3& workSize,
1401 const vk::ComputePipelineConstructionType computePipelineConstructionType)
1402 : TestCase (testCtx, name)
1403 , m_numValues (numValues)
1404 , m_sized (sized)
1405 , m_localSize (localSize)
1406 , m_workSize (workSize)
1407 , m_computePipelineConstructionType (computePipelineConstructionType)
1408 {
1409 DE_ASSERT(m_numValues % (multiplyComponents(m_workSize) * multiplyComponents(m_localSize)) == 0);
1410 }
1411
checkSupport(Context & context) const1412 void InvertSSBOInPlaceTest::checkSupport (Context& context) const
1413 {
1414 checkShaderObjectRequirements(context.getInstanceInterface(), context.getPhysicalDevice(), m_computePipelineConstructionType);
1415 }
1416
initPrograms(SourceCollections & sourceCollections) const1417 void InvertSSBOInPlaceTest::initPrograms (SourceCollections& sourceCollections) const
1418 {
1419 std::ostringstream src;
1420 src << "#version 310 es\n"
1421 << "layout (local_size_x = " << m_localSize.x() << ", local_size_y = " << m_localSize.y() << ", local_size_z = " << m_localSize.z() << ") in;\n"
1422 << "layout(binding = 0) buffer InOut {\n"
1423 << " uint values[" << (m_sized ? de::toString(m_numValues) : "") << "];\n"
1424 << "} sb_inout;\n"
1425 << "void main (void) {\n"
1426 << " uvec3 size = gl_NumWorkGroups * gl_WorkGroupSize;\n"
1427 << " uint numValuesPerInv = uint(sb_inout.values.length()) / (size.x*size.y*size.z);\n"
1428 << " uint groupNdx = size.x*size.y*gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + gl_GlobalInvocationID.x;\n"
1429 << " uint offset = numValuesPerInv*groupNdx;\n"
1430 << "\n"
1431 << " for (uint ndx = 0u; ndx < numValuesPerInv; ndx++)\n"
1432 << " sb_inout.values[offset + ndx] = ~sb_inout.values[offset + ndx];\n"
1433 << "}\n";
1434
1435 sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
1436 }
1437
createInstance(Context & context) const1438 TestInstance* InvertSSBOInPlaceTest::createInstance (Context& context) const
1439 {
1440 return new InvertSSBOInPlaceTestInstance(context, m_numValues, m_localSize, m_workSize, m_computePipelineConstructionType);
1441 }
1442
InvertSSBOInPlaceTestInstance(Context & context,const deUint32 numValues,const tcu::IVec3 & localSize,const tcu::IVec3 & workSize,const vk::ComputePipelineConstructionType computePipelineConstructionType)1443 InvertSSBOInPlaceTestInstance::InvertSSBOInPlaceTestInstance (Context& context,
1444 const deUint32 numValues,
1445 const tcu::IVec3& localSize,
1446 const tcu::IVec3& workSize,
1447 const vk::ComputePipelineConstructionType computePipelineConstructionType)
1448 : TestInstance (context)
1449 , m_numValues (numValues)
1450 , m_localSize (localSize)
1451 , m_workSize (workSize)
1452 , m_computePipelineConstructionType (computePipelineConstructionType)
1453 {
1454 }
1455
iterate(void)1456 tcu::TestStatus InvertSSBOInPlaceTestInstance::iterate (void)
1457 {
1458 const DeviceInterface& vk = m_context.getDeviceInterface();
1459 const VkDevice device = m_context.getDevice();
1460 const VkQueue queue = m_context.getUniversalQueue();
1461 const deUint32 queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
1462 Allocator& allocator = m_context.getDefaultAllocator();
1463
1464 // Create an input/output buffer
1465
1466 const VkDeviceSize bufferSizeBytes = sizeof(deUint32) * m_numValues;
1467 const BufferWithMemory buffer(vk, device, allocator, makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT), MemoryRequirement::HostVisible);
1468
1469 // Fill the buffer with data
1470
1471 typedef std::vector<deUint32> data_vector_t;
1472 data_vector_t inputData(m_numValues);
1473
1474 {
1475 de::Random rnd(0x82ce7f);
1476 const Allocation& bufferAllocation = buffer.getAllocation();
1477 deUint32* bufferPtr = static_cast<deUint32*>(bufferAllocation.getHostPtr());
1478 for (deUint32 i = 0; i < m_numValues; ++i)
1479 inputData[i] = *bufferPtr++ = rnd.getUint32();
1480
1481 flushAlloc(vk, device, bufferAllocation);
1482 }
1483
1484 // Create descriptor set
1485
1486 const Unique<VkDescriptorSetLayout> descriptorSetLayout(
1487 DescriptorSetLayoutBuilder()
1488 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
1489 .build(vk, device));
1490
1491 const Unique<VkDescriptorPool> descriptorPool(
1492 DescriptorPoolBuilder()
1493 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
1494 .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
1495
1496 const Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
1497
1498 const VkDescriptorBufferInfo bufferDescriptorInfo = makeDescriptorBufferInfo(*buffer, 0ull, bufferSizeBytes);
1499 DescriptorSetUpdateBuilder()
1500 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &bufferDescriptorInfo)
1501 .update(vk, device);
1502
1503 // Perform the computation
1504
1505 ComputePipelineWrapper pipeline (vk, device, m_computePipelineConstructionType, m_context.getBinaryCollection().get("comp"));
1506 pipeline.setDescriptorSetLayout(descriptorSetLayout.get());
1507 pipeline.buildPipeline();
1508
1509 const VkBufferMemoryBarrier hostWriteBarrier = makeBufferMemoryBarrier(VK_ACCESS_HOST_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT, *buffer, 0ull, bufferSizeBytes);
1510
1511 const VkBufferMemoryBarrier shaderWriteBarrier = makeBufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *buffer, 0ull, bufferSizeBytes);
1512
1513 const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
1514 const Unique<VkCommandBuffer> cmdBuffer(allocateCommandBuffer(vk, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
1515
1516 // Start recording commands
1517
1518 beginCommandBuffer(vk, *cmdBuffer);
1519
1520 pipeline.bind(*cmdBuffer);
1521 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline.getPipelineLayout(), 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
1522
1523 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 1, &hostWriteBarrier, 0, (const VkImageMemoryBarrier*)DE_NULL);
1524 vk.cmdDispatch(*cmdBuffer, m_workSize.x(), m_workSize.y(), m_workSize.z());
1525 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 1, &shaderWriteBarrier, 0, (const VkImageMemoryBarrier*)DE_NULL);
1526
1527 endCommandBuffer(vk, *cmdBuffer);
1528
1529 // Wait for completion
1530
1531 submitCommandsAndWait(vk, device, queue, *cmdBuffer);
1532
1533 // Validate the results
1534
1535 const Allocation& bufferAllocation = buffer.getAllocation();
1536 invalidateAlloc(vk, device, bufferAllocation);
1537
1538 const deUint32* bufferPtr = static_cast<deUint32*>(bufferAllocation.getHostPtr());
1539
1540 for (deUint32 ndx = 0; ndx < m_numValues; ++ndx)
1541 {
1542 const deUint32 res = bufferPtr[ndx];
1543 const deUint32 ref = ~inputData[ndx];
1544
1545 if (res != ref)
1546 {
1547 std::ostringstream msg;
1548 msg << "Comparison failed for InOut.values[" << ndx << "]";
1549 return tcu::TestStatus::fail(msg.str());
1550 }
1551 }
1552 return tcu::TestStatus::pass("Compute succeeded");
1553 }
1554
1555 class WriteToMultipleSSBOTest : public vkt::TestCase
1556 {
1557 public:
1558 WriteToMultipleSSBOTest (tcu::TestContext& testCtx,
1559 const std::string& name,
1560 const deUint32 numValues,
1561 const bool sized,
1562 const tcu::IVec3& localSize,
1563 const tcu::IVec3& workSize,
1564 const vk::ComputePipelineConstructionType computePipelineConstructionType);
1565
1566 virtual void checkSupport (Context& context) const;
1567 void initPrograms (SourceCollections& sourceCollections) const;
1568 TestInstance* createInstance (Context& context) const;
1569
1570 private:
1571 const deUint32 m_numValues;
1572 const bool m_sized;
1573 const tcu::IVec3 m_localSize;
1574 const tcu::IVec3 m_workSize;
1575 vk::ComputePipelineConstructionType m_computePipelineConstructionType;
1576 };
1577
1578 class WriteToMultipleSSBOTestInstance : public vkt::TestInstance
1579 {
1580 public:
1581 WriteToMultipleSSBOTestInstance (Context& context,
1582 const deUint32 numValues,
1583 const tcu::IVec3& localSize,
1584 const tcu::IVec3& workSize,
1585 const vk::ComputePipelineConstructionType computePipelineConstructionType);
1586
1587 tcu::TestStatus iterate (void);
1588
1589 private:
1590 const deUint32 m_numValues;
1591 const tcu::IVec3 m_localSize;
1592 const tcu::IVec3 m_workSize;
1593 vk::ComputePipelineConstructionType m_computePipelineConstructionType;
1594 };
1595
WriteToMultipleSSBOTest(tcu::TestContext & testCtx,const std::string & name,const deUint32 numValues,const bool sized,const tcu::IVec3 & localSize,const tcu::IVec3 & workSize,const vk::ComputePipelineConstructionType computePipelineConstructionType)1596 WriteToMultipleSSBOTest::WriteToMultipleSSBOTest (tcu::TestContext& testCtx,
1597 const std::string& name,
1598 const deUint32 numValues,
1599 const bool sized,
1600 const tcu::IVec3& localSize,
1601 const tcu::IVec3& workSize,
1602 const vk::ComputePipelineConstructionType computePipelineConstructionType)
1603 : TestCase (testCtx, name)
1604 , m_numValues (numValues)
1605 , m_sized (sized)
1606 , m_localSize (localSize)
1607 , m_workSize (workSize)
1608 , m_computePipelineConstructionType (computePipelineConstructionType)
1609 {
1610 DE_ASSERT(m_numValues % (multiplyComponents(m_workSize) * multiplyComponents(m_localSize)) == 0);
1611 }
1612
checkSupport(Context & context) const1613 void WriteToMultipleSSBOTest::checkSupport (Context& context) const
1614 {
1615 checkShaderObjectRequirements(context.getInstanceInterface(), context.getPhysicalDevice(), m_computePipelineConstructionType);
1616 }
1617
initPrograms(SourceCollections & sourceCollections) const1618 void WriteToMultipleSSBOTest::initPrograms (SourceCollections& sourceCollections) const
1619 {
1620 std::ostringstream src;
1621 src << "#version 310 es\n"
1622 << "layout (local_size_x = " << m_localSize.x() << ", local_size_y = " << m_localSize.y() << ", local_size_z = " << m_localSize.z() << ") in;\n"
1623 << "layout(binding = 0) writeonly buffer Out0 {\n"
1624 << " uint values[" << (m_sized ? de::toString(m_numValues) : "") << "];\n"
1625 << "} sb_out0;\n"
1626 << "layout(binding = 1) writeonly buffer Out1 {\n"
1627 << " uint values[" << (m_sized ? de::toString(m_numValues) : "") << "];\n"
1628 << "} sb_out1;\n"
1629 << "void main (void) {\n"
1630 << " uvec3 size = gl_NumWorkGroups * gl_WorkGroupSize;\n"
1631 << " uint groupNdx = size.x*size.y*gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + gl_GlobalInvocationID.x;\n"
1632 << "\n"
1633 << " {\n"
1634 << " uint numValuesPerInv = uint(sb_out0.values.length()) / (size.x*size.y*size.z);\n"
1635 << " uint offset = numValuesPerInv*groupNdx;\n"
1636 << "\n"
1637 << " for (uint ndx = 0u; ndx < numValuesPerInv; ndx++)\n"
1638 << " sb_out0.values[offset + ndx] = offset + ndx;\n"
1639 << " }\n"
1640 << " {\n"
1641 << " uint numValuesPerInv = uint(sb_out1.values.length()) / (size.x*size.y*size.z);\n"
1642 << " uint offset = numValuesPerInv*groupNdx;\n"
1643 << "\n"
1644 << " for (uint ndx = 0u; ndx < numValuesPerInv; ndx++)\n"
1645 << " sb_out1.values[offset + ndx] = uint(sb_out1.values.length()) - offset - ndx;\n"
1646 << " }\n"
1647 << "}\n";
1648
1649 sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
1650 }
1651
createInstance(Context & context) const1652 TestInstance* WriteToMultipleSSBOTest::createInstance (Context& context) const
1653 {
1654 return new WriteToMultipleSSBOTestInstance(context, m_numValues, m_localSize, m_workSize, m_computePipelineConstructionType);
1655 }
1656
WriteToMultipleSSBOTestInstance(Context & context,const deUint32 numValues,const tcu::IVec3 & localSize,const tcu::IVec3 & workSize,const vk::ComputePipelineConstructionType computePipelineConstructionType)1657 WriteToMultipleSSBOTestInstance::WriteToMultipleSSBOTestInstance (Context& context,
1658 const deUint32 numValues,
1659 const tcu::IVec3& localSize,
1660 const tcu::IVec3& workSize,
1661 const vk::ComputePipelineConstructionType computePipelineConstructionType)
1662 : TestInstance (context)
1663 , m_numValues (numValues)
1664 , m_localSize (localSize)
1665 , m_workSize (workSize)
1666 , m_computePipelineConstructionType (computePipelineConstructionType)
1667 {
1668 }
1669
iterate(void)1670 tcu::TestStatus WriteToMultipleSSBOTestInstance::iterate (void)
1671 {
1672 const DeviceInterface& vk = m_context.getDeviceInterface();
1673 const VkDevice device = m_context.getDevice();
1674 const VkQueue queue = m_context.getUniversalQueue();
1675 const deUint32 queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
1676 Allocator& allocator = m_context.getDefaultAllocator();
1677
1678 // Create two output buffers
1679
1680 const VkDeviceSize bufferSizeBytes = sizeof(deUint32) * m_numValues;
1681 const BufferWithMemory buffer0(vk, device, allocator, makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT), MemoryRequirement::HostVisible);
1682 const BufferWithMemory buffer1(vk, device, allocator, makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT), MemoryRequirement::HostVisible);
1683
1684 // Create descriptor set
1685
1686 const Unique<VkDescriptorSetLayout> descriptorSetLayout(
1687 DescriptorSetLayoutBuilder()
1688 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
1689 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
1690 .build(vk, device));
1691
1692 const Unique<VkDescriptorPool> descriptorPool(
1693 DescriptorPoolBuilder()
1694 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 2u)
1695 .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
1696
1697 const Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
1698
1699 const VkDescriptorBufferInfo buffer0DescriptorInfo = makeDescriptorBufferInfo(*buffer0, 0ull, bufferSizeBytes);
1700 const VkDescriptorBufferInfo buffer1DescriptorInfo = makeDescriptorBufferInfo(*buffer1, 0ull, bufferSizeBytes);
1701 DescriptorSetUpdateBuilder()
1702 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &buffer0DescriptorInfo)
1703 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &buffer1DescriptorInfo)
1704 .update(vk, device);
1705
1706 // Perform the computation
1707
1708 ComputePipelineWrapper pipeline(vk, device, m_computePipelineConstructionType, m_context.getBinaryCollection().get("comp"));
1709 pipeline.setDescriptorSetLayout(descriptorSetLayout.get());
1710 pipeline.buildPipeline();
1711
1712 const VkBufferMemoryBarrier shaderWriteBarriers[] =
1713 {
1714 makeBufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *buffer0, 0ull, bufferSizeBytes),
1715 makeBufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *buffer1, 0ull, bufferSizeBytes)
1716 };
1717
1718 const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
1719 const Unique<VkCommandBuffer> cmdBuffer(allocateCommandBuffer(vk, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
1720
1721 // Start recording commands
1722
1723 beginCommandBuffer(vk, *cmdBuffer);
1724
1725 pipeline.bind(*cmdBuffer);
1726 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline.getPipelineLayout(), 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
1727
1728 vk.cmdDispatch(*cmdBuffer, m_workSize.x(), m_workSize.y(), m_workSize.z());
1729 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, DE_LENGTH_OF_ARRAY(shaderWriteBarriers), shaderWriteBarriers, 0, (const VkImageMemoryBarrier*)DE_NULL);
1730
1731 endCommandBuffer(vk, *cmdBuffer);
1732
1733 // Wait for completion
1734
1735 submitCommandsAndWait(vk, device, queue, *cmdBuffer);
1736
1737 // Validate the results
1738 {
1739 const Allocation& buffer0Allocation = buffer0.getAllocation();
1740 invalidateAlloc(vk, device, buffer0Allocation);
1741 const deUint32* buffer0Ptr = static_cast<deUint32*>(buffer0Allocation.getHostPtr());
1742
1743 for (deUint32 ndx = 0; ndx < m_numValues; ++ndx)
1744 {
1745 const deUint32 res = buffer0Ptr[ndx];
1746 const deUint32 ref = ndx;
1747
1748 if (res != ref)
1749 {
1750 std::ostringstream msg;
1751 msg << "Comparison failed for Out0.values[" << ndx << "] res=" << res << " ref=" << ref;
1752 return tcu::TestStatus::fail(msg.str());
1753 }
1754 }
1755 }
1756 {
1757 const Allocation& buffer1Allocation = buffer1.getAllocation();
1758 invalidateAlloc(vk, device, buffer1Allocation);
1759 const deUint32* buffer1Ptr = static_cast<deUint32*>(buffer1Allocation.getHostPtr());
1760
1761 for (deUint32 ndx = 0; ndx < m_numValues; ++ndx)
1762 {
1763 const deUint32 res = buffer1Ptr[ndx];
1764 const deUint32 ref = m_numValues - ndx;
1765
1766 if (res != ref)
1767 {
1768 std::ostringstream msg;
1769 msg << "Comparison failed for Out1.values[" << ndx << "] res=" << res << " ref=" << ref;
1770 return tcu::TestStatus::fail(msg.str());
1771 }
1772 }
1773 }
1774 return tcu::TestStatus::pass("Compute succeeded");
1775 }
1776
1777 class SSBOBarrierTest : public vkt::TestCase
1778 {
1779 public:
1780 SSBOBarrierTest (tcu::TestContext& testCtx,
1781 const std::string& name,
1782 const tcu::IVec3& workSize,
1783 const vk::ComputePipelineConstructionType computePipelineConstructionType);
1784
1785 virtual void checkSupport (Context& context) const;
1786 void initPrograms (SourceCollections& sourceCollections) const;
1787 TestInstance* createInstance (Context& context) const;
1788
1789 private:
1790 const tcu::IVec3 m_workSize;
1791 vk::ComputePipelineConstructionType m_computePipelineConstructionType;
1792 };
1793
1794 class SSBOBarrierTestInstance : public vkt::TestInstance
1795 {
1796 public:
1797 SSBOBarrierTestInstance (Context& context,
1798 const tcu::IVec3& workSize,
1799 const vk::ComputePipelineConstructionType computePipelineConstructionType);
1800
1801 tcu::TestStatus iterate (void);
1802
1803 private:
1804 const tcu::IVec3 m_workSize;
1805 vk::ComputePipelineConstructionType m_computePipelineConstructionType;
1806 };
1807
SSBOBarrierTest(tcu::TestContext & testCtx,const std::string & name,const tcu::IVec3 & workSize,const vk::ComputePipelineConstructionType computePipelineConstructionType)1808 SSBOBarrierTest::SSBOBarrierTest (tcu::TestContext& testCtx,
1809 const std::string& name,
1810 const tcu::IVec3& workSize,
1811 const vk::ComputePipelineConstructionType computePipelineConstructionType)
1812 : TestCase (testCtx, name)
1813 , m_workSize (workSize)
1814 , m_computePipelineConstructionType(computePipelineConstructionType)
1815 {
1816 }
1817
checkSupport(Context & context) const1818 void SSBOBarrierTest::checkSupport (Context& context) const
1819 {
1820 checkShaderObjectRequirements(context.getInstanceInterface(), context.getPhysicalDevice(), m_computePipelineConstructionType);
1821 }
1822
initPrograms(SourceCollections & sourceCollections) const1823 void SSBOBarrierTest::initPrograms (SourceCollections& sourceCollections) const
1824 {
1825 sourceCollections.glslSources.add("comp0") << glu::ComputeSource(
1826 "#version 310 es\n"
1827 "layout (local_size_x = 1) in;\n"
1828 "layout(binding = 2) readonly uniform Constants {\n"
1829 " uint u_baseVal;\n"
1830 "};\n"
1831 "layout(binding = 1) writeonly buffer Output {\n"
1832 " uint values[];\n"
1833 "};\n"
1834 "void main (void) {\n"
1835 " uint offset = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z + gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n"
1836 " values[offset] = u_baseVal + offset;\n"
1837 "}\n");
1838
1839 sourceCollections.glslSources.add("comp1") << glu::ComputeSource(
1840 "#version 310 es\n"
1841 "layout (local_size_x = 1) in;\n"
1842 "layout(binding = 1) readonly buffer Input {\n"
1843 " uint values[];\n"
1844 "};\n"
1845 "layout(binding = 0) coherent buffer Output {\n"
1846 " uint sum;\n"
1847 "};\n"
1848 "void main (void) {\n"
1849 " uint offset = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z + gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n"
1850 " uint value = values[offset];\n"
1851 " atomicAdd(sum, value);\n"
1852 "}\n");
1853 }
1854
createInstance(Context & context) const1855 TestInstance* SSBOBarrierTest::createInstance (Context& context) const
1856 {
1857 return new SSBOBarrierTestInstance(context, m_workSize, m_computePipelineConstructionType);
1858 }
1859
SSBOBarrierTestInstance(Context & context,const tcu::IVec3 & workSize,const vk::ComputePipelineConstructionType computePipelineConstructionType)1860 SSBOBarrierTestInstance::SSBOBarrierTestInstance (Context& context, const tcu::IVec3& workSize, const vk::ComputePipelineConstructionType computePipelineConstructionType)
1861 : TestInstance (context)
1862 , m_workSize (workSize)
1863 , m_computePipelineConstructionType(computePipelineConstructionType)
1864 {
1865 }
1866
iterate(void)1867 tcu::TestStatus SSBOBarrierTestInstance::iterate (void)
1868 {
1869 const DeviceInterface& vk = m_context.getDeviceInterface();
1870 const VkDevice device = m_context.getDevice();
1871 const VkQueue queue = m_context.getUniversalQueue();
1872 const deUint32 queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
1873 Allocator& allocator = m_context.getDefaultAllocator();
1874
1875 // Create a work buffer used by both shaders
1876
1877 const int workGroupCount = multiplyComponents(m_workSize);
1878 const VkDeviceSize workBufferSizeBytes = sizeof(deUint32) * workGroupCount;
1879 const BufferWithMemory workBuffer(vk, device, allocator, makeBufferCreateInfo(workBufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT), MemoryRequirement::Any);
1880
1881 // Create an output buffer
1882
1883 const VkDeviceSize outputBufferSizeBytes = sizeof(deUint32);
1884 const BufferWithMemory outputBuffer(vk, device, allocator, makeBufferCreateInfo(outputBufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT), MemoryRequirement::HostVisible);
1885
1886 // Initialize atomic counter value to zero
1887 {
1888 const Allocation& outputBufferAllocation = outputBuffer.getAllocation();
1889 deUint32* outputBufferPtr = static_cast<deUint32*>(outputBufferAllocation.getHostPtr());
1890 *outputBufferPtr = 0;
1891 flushAlloc(vk, device, outputBufferAllocation);
1892 }
1893
1894 // Create a uniform buffer (to pass uniform constants)
1895
1896 const VkDeviceSize uniformBufferSizeBytes = sizeof(deUint32);
1897 const BufferWithMemory uniformBuffer(vk, device, allocator, makeBufferCreateInfo(uniformBufferSizeBytes, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT), MemoryRequirement::HostVisible);
1898
1899 // Set the constants in the uniform buffer
1900
1901 const deUint32 baseValue = 127;
1902 {
1903 const Allocation& uniformBufferAllocation = uniformBuffer.getAllocation();
1904 deUint32* uniformBufferPtr = static_cast<deUint32*>(uniformBufferAllocation.getHostPtr());
1905 uniformBufferPtr[0] = baseValue;
1906
1907 flushAlloc(vk, device, uniformBufferAllocation);
1908 }
1909
1910 // Create descriptor set
1911
1912 const Unique<VkDescriptorSetLayout> descriptorSetLayout(
1913 DescriptorSetLayoutBuilder()
1914 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
1915 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
1916 .addSingleBinding(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
1917 .build(vk, device));
1918
1919 const Unique<VkDescriptorPool> descriptorPool(
1920 DescriptorPoolBuilder()
1921 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 2u)
1922 .addType(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER)
1923 .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
1924
1925 const Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
1926
1927 const VkDescriptorBufferInfo workBufferDescriptorInfo = makeDescriptorBufferInfo(*workBuffer, 0ull, workBufferSizeBytes);
1928 const VkDescriptorBufferInfo outputBufferDescriptorInfo = makeDescriptorBufferInfo(*outputBuffer, 0ull, outputBufferSizeBytes);
1929 const VkDescriptorBufferInfo uniformBufferDescriptorInfo = makeDescriptorBufferInfo(*uniformBuffer, 0ull, uniformBufferSizeBytes);
1930 DescriptorSetUpdateBuilder()
1931 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &outputBufferDescriptorInfo)
1932 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &workBufferDescriptorInfo)
1933 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(2u), VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, &uniformBufferDescriptorInfo)
1934 .update(vk, device);
1935
1936 // Perform the computation
1937
1938 ComputePipelineWrapper pipeline0(vk, device, m_computePipelineConstructionType, m_context.getBinaryCollection().get("comp0"));
1939 pipeline0.setDescriptorSetLayout(descriptorSetLayout.get());
1940 pipeline0.buildPipeline();
1941
1942 ComputePipelineWrapper pipeline1(vk, device, m_computePipelineConstructionType, m_context.getBinaryCollection().get("comp1"));
1943 pipeline1.setDescriptorSetLayout(descriptorSetLayout.get());
1944 pipeline1.buildPipeline();
1945
1946 const VkBufferMemoryBarrier writeUniformConstantsBarrier = makeBufferMemoryBarrier(VK_ACCESS_HOST_WRITE_BIT, VK_ACCESS_UNIFORM_READ_BIT, *uniformBuffer, 0ull, uniformBufferSizeBytes);
1947
1948 const VkBufferMemoryBarrier betweenShadersBarrier = makeBufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT, *workBuffer, 0ull, workBufferSizeBytes);
1949
1950 const VkBufferMemoryBarrier afterComputeBarrier = makeBufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *outputBuffer, 0ull, outputBufferSizeBytes);
1951
1952 const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
1953 const Unique<VkCommandBuffer> cmdBuffer(allocateCommandBuffer(vk, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
1954
1955 // Start recording commands
1956
1957 beginCommandBuffer(vk, *cmdBuffer);
1958
1959 pipeline0.bind(*cmdBuffer);
1960 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline0.getPipelineLayout(), 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
1961
1962 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 1, &writeUniformConstantsBarrier, 0, (const VkImageMemoryBarrier*)DE_NULL);
1963
1964 vk.cmdDispatch(*cmdBuffer, m_workSize.x(), m_workSize.y(), m_workSize.z());
1965 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 1, &betweenShadersBarrier, 0, (const VkImageMemoryBarrier*)DE_NULL);
1966
1967 // Switch to the second shader program
1968 pipeline1.bind(*cmdBuffer);
1969
1970 vk.cmdDispatch(*cmdBuffer, m_workSize.x(), m_workSize.y(), m_workSize.z());
1971 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 1, &afterComputeBarrier, 0, (const VkImageMemoryBarrier*)DE_NULL);
1972
1973 endCommandBuffer(vk, *cmdBuffer);
1974
1975 // Wait for completion
1976
1977 submitCommandsAndWait(vk, device, queue, *cmdBuffer);
1978
1979 // Validate the results
1980
1981 const Allocation& outputBufferAllocation = outputBuffer.getAllocation();
1982 invalidateAlloc(vk, device, outputBufferAllocation);
1983
1984 const deUint32* bufferPtr = static_cast<deUint32*>(outputBufferAllocation.getHostPtr());
1985 const deUint32 res = *bufferPtr;
1986 deUint32 ref = 0;
1987
1988 for (int ndx = 0; ndx < workGroupCount; ++ndx)
1989 ref += baseValue + ndx;
1990
1991 if (res != ref)
1992 {
1993 std::ostringstream msg;
1994 msg << "ERROR: comparison failed, expected " << ref << ", got " << res;
1995 return tcu::TestStatus::fail(msg.str());
1996 }
1997 return tcu::TestStatus::pass("Compute succeeded");
1998 }
1999
2000 class ImageAtomicOpTest : public vkt::TestCase
2001 {
2002 public:
2003 ImageAtomicOpTest (tcu::TestContext& testCtx,
2004 const std::string& name,
2005 const deUint32 localSize,
2006 const tcu::IVec2& imageSize,
2007 const vk::ComputePipelineConstructionType computePipelineConstructionType);
2008
2009 virtual void checkSupport (Context& context) const;
2010 void initPrograms (SourceCollections& sourceCollections) const;
2011 TestInstance* createInstance (Context& context) const;
2012
2013 private:
2014 const deUint32 m_localSize;
2015 const tcu::IVec2 m_imageSize;
2016 vk::ComputePipelineConstructionType m_computePipelineConstructionType;
2017 };
2018
2019 class ImageAtomicOpTestInstance : public vkt::TestInstance
2020 {
2021 public:
2022 ImageAtomicOpTestInstance (Context& context,
2023 const deUint32 localSize,
2024 const tcu::IVec2& imageSize,
2025 const vk::ComputePipelineConstructionType computePipelineConstructionType);
2026
2027 tcu::TestStatus iterate (void);
2028
2029 private:
2030 const deUint32 m_localSize;
2031 const tcu::IVec2 m_imageSize;
2032 vk::ComputePipelineConstructionType m_computePipelineConstructionType;
2033 };
2034
ImageAtomicOpTest(tcu::TestContext & testCtx,const std::string & name,const deUint32 localSize,const tcu::IVec2 & imageSize,const vk::ComputePipelineConstructionType computePipelineConstructionType)2035 ImageAtomicOpTest::ImageAtomicOpTest (tcu::TestContext& testCtx,
2036 const std::string& name,
2037 const deUint32 localSize,
2038 const tcu::IVec2& imageSize,
2039 const vk::ComputePipelineConstructionType computePipelineConstructionType)
2040 : TestCase (testCtx, name)
2041 , m_localSize (localSize)
2042 , m_imageSize (imageSize)
2043 , m_computePipelineConstructionType (computePipelineConstructionType)
2044 {
2045 }
2046
checkSupport(Context & context) const2047 void ImageAtomicOpTest::checkSupport (Context& context) const
2048 {
2049 checkShaderObjectRequirements(context.getInstanceInterface(), context.getPhysicalDevice(), m_computePipelineConstructionType);
2050 }
2051
initPrograms(SourceCollections & sourceCollections) const2052 void ImageAtomicOpTest::initPrograms (SourceCollections& sourceCollections) const
2053 {
2054 std::ostringstream src;
2055 src << "#version 310 es\n"
2056 << "#extension GL_OES_shader_image_atomic : require\n"
2057 << "layout (local_size_x = " << m_localSize << ") in;\n"
2058 << "layout(binding = 1, r32ui) coherent uniform highp uimage2D u_dstImg;\n"
2059 << "layout(binding = 0) readonly buffer Input {\n"
2060 << " uint values[" << (multiplyComponents(m_imageSize) * m_localSize) << "];\n"
2061 << "} sb_in;\n\n"
2062 << "void main (void) {\n"
2063 << " uint stride = gl_NumWorkGroups.x*gl_WorkGroupSize.x;\n"
2064 << " uint value = sb_in.values[gl_GlobalInvocationID.y*stride + gl_GlobalInvocationID.x];\n"
2065 << "\n"
2066 << " if (gl_LocalInvocationIndex == 0u)\n"
2067 << " imageStore(u_dstImg, ivec2(gl_WorkGroupID.xy), uvec4(0));\n"
2068 << " memoryBarrierImage();\n"
2069 << " barrier();\n"
2070 << " imageAtomicAdd(u_dstImg, ivec2(gl_WorkGroupID.xy), value);\n"
2071 << "}\n";
2072
2073 sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
2074 }
2075
createInstance(Context & context) const2076 TestInstance* ImageAtomicOpTest::createInstance (Context& context) const
2077 {
2078 return new ImageAtomicOpTestInstance(context, m_localSize, m_imageSize, m_computePipelineConstructionType);
2079 }
2080
ImageAtomicOpTestInstance(Context & context,const deUint32 localSize,const tcu::IVec2 & imageSize,const vk::ComputePipelineConstructionType computePipelineConstructionType)2081 ImageAtomicOpTestInstance::ImageAtomicOpTestInstance (Context& context, const deUint32 localSize, const tcu::IVec2& imageSize, const vk::ComputePipelineConstructionType computePipelineConstructionType)
2082 : TestInstance (context)
2083 , m_localSize (localSize)
2084 , m_imageSize (imageSize)
2085 , m_computePipelineConstructionType(computePipelineConstructionType)
2086 {
2087 }
2088
iterate(void)2089 tcu::TestStatus ImageAtomicOpTestInstance::iterate (void)
2090 {
2091 const DeviceInterface& vk = m_context.getDeviceInterface();
2092 const VkDevice device = m_context.getDevice();
2093 const VkQueue queue = m_context.getUniversalQueue();
2094 const deUint32 queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
2095 Allocator& allocator = m_context.getDefaultAllocator();
2096
2097 // Create an image
2098
2099 const VkImageCreateInfo imageParams = make2DImageCreateInfo(m_imageSize, VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_STORAGE_BIT);
2100 const ImageWithMemory image(vk, device, allocator, imageParams, MemoryRequirement::Any);
2101
2102 const VkImageSubresourceRange subresourceRange = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u);
2103 const Unique<VkImageView> imageView(makeImageView(vk, device, *image, VK_IMAGE_VIEW_TYPE_2D, VK_FORMAT_R32_UINT, subresourceRange));
2104
2105 // Input buffer
2106
2107 const deUint32 numInputValues = multiplyComponents(m_imageSize) * m_localSize;
2108 const VkDeviceSize inputBufferSizeBytes = sizeof(deUint32) * numInputValues;
2109
2110 const BufferWithMemory inputBuffer(vk, device, allocator, makeBufferCreateInfo(inputBufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT), MemoryRequirement::HostVisible);
2111
2112 // Populate the input buffer with test data
2113 {
2114 de::Random rnd(0x77238ac2);
2115 const Allocation& inputBufferAllocation = inputBuffer.getAllocation();
2116 deUint32* bufferPtr = static_cast<deUint32*>(inputBufferAllocation.getHostPtr());
2117 for (deUint32 i = 0; i < numInputValues; ++i)
2118 *bufferPtr++ = rnd.getUint32();
2119
2120 flushAlloc(vk, device, inputBufferAllocation);
2121 }
2122
2123 // Create a buffer to store shader output (copied from image data)
2124
2125 const deUint32 imageArea = multiplyComponents(m_imageSize);
2126 const VkDeviceSize outputBufferSizeBytes = sizeof(deUint32) * imageArea;
2127 const BufferWithMemory outputBuffer(vk, device, allocator, makeBufferCreateInfo(outputBufferSizeBytes, VK_BUFFER_USAGE_TRANSFER_DST_BIT), MemoryRequirement::HostVisible);
2128
2129 // Create descriptor set
2130
2131 const Unique<VkDescriptorSetLayout> descriptorSetLayout(
2132 DescriptorSetLayoutBuilder()
2133 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
2134 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, VK_SHADER_STAGE_COMPUTE_BIT)
2135 .build(vk, device));
2136
2137 const Unique<VkDescriptorPool> descriptorPool(
2138 DescriptorPoolBuilder()
2139 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
2140 .addType(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE)
2141 .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
2142
2143 const Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
2144
2145 // Set the bindings
2146
2147 const VkDescriptorImageInfo imageDescriptorInfo = makeDescriptorImageInfo(DE_NULL, *imageView, VK_IMAGE_LAYOUT_GENERAL);
2148 const VkDescriptorBufferInfo bufferDescriptorInfo = makeDescriptorBufferInfo(*inputBuffer, 0ull, inputBufferSizeBytes);
2149
2150 DescriptorSetUpdateBuilder()
2151 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &bufferDescriptorInfo)
2152 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &imageDescriptorInfo)
2153 .update(vk, device);
2154
2155 // Perform the computation
2156 {
2157 ComputePipelineWrapper pipeline(vk, device, m_computePipelineConstructionType, m_context.getBinaryCollection().get("comp"));
2158 pipeline.setDescriptorSetLayout(descriptorSetLayout.get());
2159 pipeline.buildPipeline();
2160
2161 const VkBufferMemoryBarrier inputBufferPostHostWriteBarrier = makeBufferMemoryBarrier(VK_ACCESS_HOST_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT, *inputBuffer, 0ull, inputBufferSizeBytes);
2162
2163 const VkImageMemoryBarrier imageLayoutBarrier = makeImageMemoryBarrier(
2164 (VkAccessFlags)0, VK_ACCESS_SHADER_WRITE_BIT,
2165 VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_GENERAL,
2166 *image, subresourceRange);
2167
2168 // Prepare the command buffer
2169
2170 const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
2171 const Unique<VkCommandBuffer> cmdBuffer(allocateCommandBuffer(vk, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
2172
2173 // Start recording commands
2174
2175 beginCommandBuffer(vk, *cmdBuffer);
2176
2177 pipeline.bind(*cmdBuffer);
2178 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline.getPipelineLayout(), 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
2179
2180 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 1, &inputBufferPostHostWriteBarrier, 1, &imageLayoutBarrier);
2181 vk.cmdDispatch(*cmdBuffer, m_imageSize.x(), m_imageSize.y(), 1u);
2182
2183 copyImageToBuffer(vk, *cmdBuffer, *image, *outputBuffer, m_imageSize, VK_ACCESS_SHADER_WRITE_BIT, VK_IMAGE_LAYOUT_GENERAL);
2184
2185 endCommandBuffer(vk, *cmdBuffer);
2186
2187 // Wait for completion
2188
2189 submitCommandsAndWait(vk, device, queue, *cmdBuffer);
2190 }
2191
2192 // Validate the results
2193
2194 const Allocation& outputBufferAllocation = outputBuffer.getAllocation();
2195 invalidateAlloc(vk, device, outputBufferAllocation);
2196
2197 const deUint32* bufferPtr = static_cast<deUint32*>(outputBufferAllocation.getHostPtr());
2198 const deUint32* refBufferPtr = static_cast<deUint32*>(inputBuffer.getAllocation().getHostPtr());
2199
2200 for (deUint32 pixelNdx = 0; pixelNdx < imageArea; ++pixelNdx)
2201 {
2202 const deUint32 res = bufferPtr[pixelNdx];
2203 deUint32 ref = 0;
2204
2205 for (deUint32 offs = 0; offs < m_localSize; ++offs)
2206 ref += refBufferPtr[pixelNdx * m_localSize + offs];
2207
2208 if (res != ref)
2209 {
2210 std::ostringstream msg;
2211 msg << "Comparison failed for pixel " << pixelNdx;
2212 return tcu::TestStatus::fail(msg.str());
2213 }
2214 }
2215 return tcu::TestStatus::pass("Compute succeeded");
2216 }
2217
2218 class ImageBarrierTest : public vkt::TestCase
2219 {
2220 public:
2221 ImageBarrierTest (tcu::TestContext& testCtx,
2222 const std::string& name,
2223 const tcu::IVec2& imageSize,
2224 const vk::ComputePipelineConstructionType computePipelineConstructionType);
2225
2226 virtual void checkSupport (Context& context) const;
2227 void initPrograms (SourceCollections& sourceCollections) const;
2228 TestInstance* createInstance (Context& context) const;
2229
2230 private:
2231 const tcu::IVec2 m_imageSize;
2232 vk::ComputePipelineConstructionType m_computePipelineConstructionType;
2233 };
2234
2235 class ImageBarrierTestInstance : public vkt::TestInstance
2236 {
2237 public:
2238 ImageBarrierTestInstance (Context& context,
2239 const tcu::IVec2& imageSize,
2240 const vk::ComputePipelineConstructionType computePipelineConstructionType);
2241
2242 tcu::TestStatus iterate (void);
2243
2244 private:
2245 const tcu::IVec2 m_imageSize;
2246 vk::ComputePipelineConstructionType m_computePipelineConstructionType;
2247 };
2248
ImageBarrierTest(tcu::TestContext & testCtx,const std::string & name,const tcu::IVec2 & imageSize,const vk::ComputePipelineConstructionType computePipelineConstructionType)2249 ImageBarrierTest::ImageBarrierTest (tcu::TestContext& testCtx,
2250 const std::string& name,
2251 const tcu::IVec2& imageSize,
2252 const vk::ComputePipelineConstructionType computePipelineConstructionType)
2253 : TestCase (testCtx, name)
2254 , m_imageSize (imageSize)
2255 , m_computePipelineConstructionType (computePipelineConstructionType)
2256 {
2257 }
2258
checkSupport(Context & context) const2259 void ImageBarrierTest::checkSupport (Context& context) const
2260 {
2261 checkShaderObjectRequirements(context.getInstanceInterface(), context.getPhysicalDevice(), m_computePipelineConstructionType);
2262 }
2263
initPrograms(SourceCollections & sourceCollections) const2264 void ImageBarrierTest::initPrograms (SourceCollections& sourceCollections) const
2265 {
2266 sourceCollections.glslSources.add("comp0") << glu::ComputeSource(
2267 "#version 310 es\n"
2268 "layout (local_size_x = 1) in;\n"
2269 "layout(binding = 2) readonly uniform Constants {\n"
2270 " uint u_baseVal;\n"
2271 "};\n"
2272 "layout(binding = 1, r32ui) writeonly uniform highp uimage2D u_img;\n"
2273 "void main (void) {\n"
2274 " uint offset = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z + gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n"
2275 " imageStore(u_img, ivec2(gl_WorkGroupID.xy), uvec4(offset + u_baseVal, 0, 0, 0));\n"
2276 "}\n");
2277
2278 sourceCollections.glslSources.add("comp1") << glu::ComputeSource(
2279 "#version 310 es\n"
2280 "layout (local_size_x = 1) in;\n"
2281 "layout(binding = 1, r32ui) readonly uniform highp uimage2D u_img;\n"
2282 "layout(binding = 0) coherent buffer Output {\n"
2283 " uint sum;\n"
2284 "};\n"
2285 "void main (void) {\n"
2286 " uint value = imageLoad(u_img, ivec2(gl_WorkGroupID.xy)).x;\n"
2287 " atomicAdd(sum, value);\n"
2288 "}\n");
2289 }
2290
createInstance(Context & context) const2291 TestInstance* ImageBarrierTest::createInstance (Context& context) const
2292 {
2293 return new ImageBarrierTestInstance(context, m_imageSize, m_computePipelineConstructionType);
2294 }
2295
ImageBarrierTestInstance(Context & context,const tcu::IVec2 & imageSize,const vk::ComputePipelineConstructionType computePipelineConstructionType)2296 ImageBarrierTestInstance::ImageBarrierTestInstance (Context& context, const tcu::IVec2& imageSize, const vk::ComputePipelineConstructionType computePipelineConstructionType)
2297 : TestInstance (context)
2298 , m_imageSize (imageSize)
2299 , m_computePipelineConstructionType (computePipelineConstructionType)
2300 {
2301 }
2302
iterate(void)2303 tcu::TestStatus ImageBarrierTestInstance::iterate (void)
2304 {
2305 const DeviceInterface& vk = m_context.getDeviceInterface();
2306 const VkDevice device = m_context.getDevice();
2307 const VkQueue queue = m_context.getUniversalQueue();
2308 const deUint32 queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
2309 Allocator& allocator = m_context.getDefaultAllocator();
2310
2311 // Create an image used by both shaders
2312
2313 const VkImageCreateInfo imageParams = make2DImageCreateInfo(m_imageSize, VK_IMAGE_USAGE_STORAGE_BIT);
2314 const ImageWithMemory image(vk, device, allocator, imageParams, MemoryRequirement::Any);
2315
2316 const VkImageSubresourceRange subresourceRange = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u);
2317 const Unique<VkImageView> imageView(makeImageView(vk, device, *image, VK_IMAGE_VIEW_TYPE_2D, VK_FORMAT_R32_UINT, subresourceRange));
2318
2319 // Create an output buffer
2320
2321 const VkDeviceSize outputBufferSizeBytes = sizeof(deUint32);
2322 const BufferWithMemory outputBuffer(vk, device, allocator, makeBufferCreateInfo(outputBufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT), MemoryRequirement::HostVisible);
2323
2324 // Initialize atomic counter value to zero
2325 {
2326 const Allocation& outputBufferAllocation = outputBuffer.getAllocation();
2327 deUint32* outputBufferPtr = static_cast<deUint32*>(outputBufferAllocation.getHostPtr());
2328 *outputBufferPtr = 0;
2329 flushAlloc(vk, device, outputBufferAllocation);
2330 }
2331
2332 // Create a uniform buffer (to pass uniform constants)
2333
2334 const VkDeviceSize uniformBufferSizeBytes = sizeof(deUint32);
2335 const BufferWithMemory uniformBuffer(vk, device, allocator, makeBufferCreateInfo(uniformBufferSizeBytes, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT), MemoryRequirement::HostVisible);
2336
2337 // Set the constants in the uniform buffer
2338
2339 const deUint32 baseValue = 127;
2340 {
2341 const Allocation& uniformBufferAllocation = uniformBuffer.getAllocation();
2342 deUint32* uniformBufferPtr = static_cast<deUint32*>(uniformBufferAllocation.getHostPtr());
2343 uniformBufferPtr[0] = baseValue;
2344
2345 flushAlloc(vk, device, uniformBufferAllocation);
2346 }
2347
2348 // Create descriptor set
2349
2350 const Unique<VkDescriptorSetLayout> descriptorSetLayout(
2351 DescriptorSetLayoutBuilder()
2352 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
2353 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, VK_SHADER_STAGE_COMPUTE_BIT)
2354 .addSingleBinding(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
2355 .build(vk, device));
2356
2357 const Unique<VkDescriptorPool> descriptorPool(
2358 DescriptorPoolBuilder()
2359 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
2360 .addType(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE)
2361 .addType(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER)
2362 .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
2363
2364 const Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
2365
2366 const VkDescriptorImageInfo imageDescriptorInfo = makeDescriptorImageInfo(DE_NULL, *imageView, VK_IMAGE_LAYOUT_GENERAL);
2367 const VkDescriptorBufferInfo outputBufferDescriptorInfo = makeDescriptorBufferInfo(*outputBuffer, 0ull, outputBufferSizeBytes);
2368 const VkDescriptorBufferInfo uniformBufferDescriptorInfo = makeDescriptorBufferInfo(*uniformBuffer, 0ull, uniformBufferSizeBytes);
2369 DescriptorSetUpdateBuilder()
2370 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &outputBufferDescriptorInfo)
2371 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &imageDescriptorInfo)
2372 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(2u), VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, &uniformBufferDescriptorInfo)
2373 .update(vk, device);
2374
2375 // Perform the computation
2376
2377 ComputePipelineWrapper pipeline0(vk, device, m_computePipelineConstructionType, m_context.getBinaryCollection().get("comp0"));
2378 pipeline0.setDescriptorSetLayout(descriptorSetLayout.get());
2379 pipeline0.buildPipeline();
2380 ComputePipelineWrapper pipeline1(vk, device, m_computePipelineConstructionType, m_context.getBinaryCollection().get("comp1"));
2381 pipeline1.setDescriptorSetLayout(descriptorSetLayout.get());
2382 pipeline1.buildPipeline();
2383
2384 const VkBufferMemoryBarrier writeUniformConstantsBarrier = makeBufferMemoryBarrier(VK_ACCESS_HOST_WRITE_BIT, VK_ACCESS_UNIFORM_READ_BIT, *uniformBuffer, 0ull, uniformBufferSizeBytes);
2385
2386 const VkImageMemoryBarrier imageLayoutBarrier = makeImageMemoryBarrier(
2387 0u, 0u,
2388 VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_GENERAL,
2389 *image, subresourceRange);
2390
2391 const VkImageMemoryBarrier imageBarrierBetweenShaders = makeImageMemoryBarrier(
2392 VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT,
2393 VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_GENERAL,
2394 *image, subresourceRange);
2395
2396 const VkBufferMemoryBarrier afterComputeBarrier = makeBufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *outputBuffer, 0ull, outputBufferSizeBytes);
2397
2398 const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
2399 const Unique<VkCommandBuffer> cmdBuffer(allocateCommandBuffer(vk, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
2400
2401 // Start recording commands
2402
2403 beginCommandBuffer(vk, *cmdBuffer);
2404
2405 pipeline0.bind(*cmdBuffer);
2406 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline0.getPipelineLayout(), 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
2407
2408 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 1, &writeUniformConstantsBarrier, 1, &imageLayoutBarrier);
2409
2410 vk.cmdDispatch(*cmdBuffer, m_imageSize.x(), m_imageSize.y(), 1u);
2411 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 0, (const VkBufferMemoryBarrier*)DE_NULL, 1, &imageBarrierBetweenShaders);
2412
2413 // Switch to the second shader program
2414 pipeline1.bind(*cmdBuffer);
2415
2416 vk.cmdDispatch(*cmdBuffer, m_imageSize.x(), m_imageSize.y(), 1u);
2417 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 1, &afterComputeBarrier, 0, (const VkImageMemoryBarrier*)DE_NULL);
2418
2419 endCommandBuffer(vk, *cmdBuffer);
2420
2421 // Wait for completion
2422
2423 submitCommandsAndWait(vk, device, queue, *cmdBuffer);
2424
2425 // Validate the results
2426
2427 const Allocation& outputBufferAllocation = outputBuffer.getAllocation();
2428 invalidateAlloc(vk, device, outputBufferAllocation);
2429
2430 const int numValues = multiplyComponents(m_imageSize);
2431 const deUint32* bufferPtr = static_cast<deUint32*>(outputBufferAllocation.getHostPtr());
2432 const deUint32 res = *bufferPtr;
2433 deUint32 ref = 0;
2434
2435 for (int ndx = 0; ndx < numValues; ++ndx)
2436 ref += baseValue + ndx;
2437
2438 if (res != ref)
2439 {
2440 std::ostringstream msg;
2441 msg << "ERROR: comparison failed, expected " << ref << ", got " << res;
2442 return tcu::TestStatus::fail(msg.str());
2443 }
2444 return tcu::TestStatus::pass("Compute succeeded");
2445 }
2446
2447 class ComputeTestInstance : public vkt::TestInstance
2448 {
2449 public:
ComputeTestInstance(Context & context,vk::ComputePipelineConstructionType computePipelineConstructionType)2450 ComputeTestInstance (Context& context, vk::ComputePipelineConstructionType computePipelineConstructionType)
2451 : TestInstance (context)
2452 , m_numPhysDevices (1)
2453 , m_queueFamilyIndex (0)
2454 , m_computePipelineConstructionType (computePipelineConstructionType)
2455 {
2456 createDeviceGroup();
2457 }
2458
~ComputeTestInstance()2459 ~ComputeTestInstance ()
2460 {
2461 }
2462
2463 void createDeviceGroup (void);
getDeviceInterface(void)2464 const vk::DeviceInterface& getDeviceInterface (void) { return *m_deviceDriver; }
getInstance(void)2465 vk::VkInstance getInstance (void) { return m_deviceGroupInstance; }
getDevice(void)2466 vk::VkDevice getDevice (void) { return *m_logicalDevice; }
getPhysicalDevice(deUint32 i=0)2467 vk::VkPhysicalDevice getPhysicalDevice (deUint32 i = 0){ return m_physicalDevices[i]; }
2468
2469 protected:
2470 deUint32 m_numPhysDevices;
2471 deUint32 m_queueFamilyIndex;
2472 vk::ComputePipelineConstructionType m_computePipelineConstructionType;
2473
2474 private:
2475 CustomInstance m_deviceGroupInstance;
2476 vk::Move<vk::VkDevice> m_logicalDevice;
2477 std::vector<vk::VkPhysicalDevice> m_physicalDevices;
2478 #ifndef CTS_USES_VULKANSC
2479 de::MovePtr<vk::DeviceDriver> m_deviceDriver;
2480 #else
2481 de::MovePtr<vk::DeviceDriverSC, vk::DeinitDeviceDeleter> m_deviceDriver;
2482 #endif // CTS_USES_VULKANSC
2483 };
2484
createDeviceGroup(void)2485 void ComputeTestInstance::createDeviceGroup (void)
2486 {
2487 const tcu::CommandLine& cmdLine = m_context.getTestContext().getCommandLine();
2488 const deUint32 devGroupIdx = cmdLine.getVKDeviceGroupId() - 1;
2489 const deUint32 physDeviceIdx = cmdLine.getVKDeviceId() - 1;
2490 const float queuePriority = 1.0f;
2491 const std::vector<std::string> requiredExtensions (1, "VK_KHR_device_group_creation");
2492 m_deviceGroupInstance = createCustomInstanceWithExtensions(m_context, requiredExtensions);
2493 std::vector<VkPhysicalDeviceGroupProperties> devGroupProperties = enumeratePhysicalDeviceGroups(m_context.getInstanceInterface(), m_deviceGroupInstance);
2494 m_numPhysDevices = devGroupProperties[devGroupIdx].physicalDeviceCount;
2495 std::vector<const char*> deviceExtensions;
2496
2497 if (!isCoreDeviceExtension(m_context.getUsedApiVersion(), "VK_KHR_device_group"))
2498 deviceExtensions.push_back("VK_KHR_device_group");
2499
2500 VkDeviceGroupDeviceCreateInfo deviceGroupInfo =
2501 {
2502 VK_STRUCTURE_TYPE_DEVICE_GROUP_DEVICE_CREATE_INFO, //stype
2503 DE_NULL, //pNext
2504 devGroupProperties[devGroupIdx].physicalDeviceCount, //physicalDeviceCount
2505 devGroupProperties[devGroupIdx].physicalDevices //physicalDevices
2506 };
2507 const InstanceDriver& instance (m_deviceGroupInstance.getDriver());
2508 VkPhysicalDeviceFeatures2 deviceFeatures2 = initVulkanStructure();
2509 const VkPhysicalDeviceFeatures deviceFeatures = getPhysicalDeviceFeatures(instance, deviceGroupInfo.pPhysicalDevices[physDeviceIdx]);
2510 const std::vector<VkQueueFamilyProperties> queueProps = getPhysicalDeviceQueueFamilyProperties(instance, devGroupProperties[devGroupIdx].physicalDevices[physDeviceIdx]);
2511
2512 deviceFeatures2.features = deviceFeatures;
2513
2514 #ifndef CTS_USES_VULKANSC
2515 VkPhysicalDeviceDynamicRenderingFeaturesKHR dynamicRenderingFeatures = initVulkanStructure();
2516 dynamicRenderingFeatures.dynamicRendering = VK_TRUE;
2517 VkPhysicalDeviceShaderObjectFeaturesEXT shaderObjectFeatures = initVulkanStructure(&dynamicRenderingFeatures);
2518 shaderObjectFeatures.shaderObject = VK_TRUE;
2519 if (m_computePipelineConstructionType)
2520 {
2521 deviceExtensions.push_back("VK_EXT_shader_object");
2522 deviceFeatures2.pNext = &shaderObjectFeatures;
2523 }
2524 #endif
2525
2526 m_physicalDevices.resize(m_numPhysDevices);
2527 for (deUint32 physDevIdx = 0; physDevIdx < m_numPhysDevices; physDevIdx++)
2528 m_physicalDevices[physDevIdx] = devGroupProperties[devGroupIdx].physicalDevices[physDevIdx];
2529
2530 for (size_t queueNdx = 0; queueNdx < queueProps.size(); queueNdx++)
2531 {
2532 if (queueProps[queueNdx].queueFlags & VK_QUEUE_COMPUTE_BIT)
2533 m_queueFamilyIndex = (deUint32)queueNdx;
2534 }
2535
2536 VkDeviceQueueCreateInfo queueInfo =
2537 {
2538 VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO, // VkStructureType sType;
2539 DE_NULL, // const void* pNext;
2540 (VkDeviceQueueCreateFlags)0u, // VkDeviceQueueCreateFlags flags;
2541 m_queueFamilyIndex, // deUint32 queueFamilyIndex;
2542 1u, // deUint32 queueCount;
2543 &queuePriority // const float* pQueuePriorities;
2544 };
2545
2546 void* pNext = &deviceGroupInfo;
2547 if (deviceFeatures2.pNext != DE_NULL)
2548 deviceGroupInfo.pNext = &deviceFeatures2;
2549
2550 #ifdef CTS_USES_VULKANSC
2551 VkDeviceObjectReservationCreateInfo memReservationInfo = cmdLine.isSubProcess() ? m_context.getResourceInterface()->getStatMax() : resetDeviceObjectReservationCreateInfo();
2552 memReservationInfo.pNext = pNext;
2553 pNext = &memReservationInfo;
2554
2555 VkPhysicalDeviceVulkanSC10Features sc10Features = createDefaultSC10Features();
2556 sc10Features.pNext = pNext;
2557 pNext = &sc10Features;
2558 VkPipelineCacheCreateInfo pcCI;
2559 std::vector<VkPipelinePoolSize> poolSizes;
2560 if (cmdLine.isSubProcess())
2561 {
2562 if (m_context.getResourceInterface()->getCacheDataSize() > 0)
2563 {
2564 pcCI =
2565 {
2566 VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO, // VkStructureType sType;
2567 DE_NULL, // const void* pNext;
2568 VK_PIPELINE_CACHE_CREATE_READ_ONLY_BIT |
2569 VK_PIPELINE_CACHE_CREATE_USE_APPLICATION_STORAGE_BIT, // VkPipelineCacheCreateFlags flags;
2570 m_context.getResourceInterface()->getCacheDataSize(), // deUintptr initialDataSize;
2571 m_context.getResourceInterface()->getCacheData() // const void* pInitialData;
2572 };
2573 memReservationInfo.pipelineCacheCreateInfoCount = 1;
2574 memReservationInfo.pPipelineCacheCreateInfos = &pcCI;
2575 }
2576
2577 poolSizes = m_context.getResourceInterface()->getPipelinePoolSizes();
2578 if (!poolSizes.empty())
2579 {
2580 memReservationInfo.pipelinePoolSizeCount = deUint32(poolSizes.size());
2581 memReservationInfo.pPipelinePoolSizes = poolSizes.data();
2582 }
2583 }
2584
2585 #endif // CTS_USES_VULKANSC
2586
2587 const VkDeviceCreateInfo deviceInfo =
2588 {
2589 VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO, // VkStructureType sType;
2590 pNext, // const void* pNext;
2591 (VkDeviceCreateFlags)0, // VkDeviceCreateFlags flags;
2592 1u , // uint32_t queueCreateInfoCount;
2593 &queueInfo, // const VkDeviceQueueCreateInfo* pQueueCreateInfos;
2594 0u, // uint32_t enabledLayerCount;
2595 DE_NULL, // const char* const* ppEnabledLayerNames;
2596 deUint32(deviceExtensions.size()), // uint32_t enabledExtensionCount;
2597 (deviceExtensions.empty() ? DE_NULL : &deviceExtensions[0]), // const char* const* ppEnabledExtensionNames;
2598 deviceFeatures2.pNext == DE_NULL ? &deviceFeatures : DE_NULL, // const VkPhysicalDeviceFeatures* pEnabledFeatures;
2599 };
2600
2601 m_logicalDevice = createCustomDevice(m_context.getTestContext().getCommandLine().isValidationEnabled(), m_context.getPlatformInterface(), m_deviceGroupInstance, instance, deviceGroupInfo.pPhysicalDevices[physDeviceIdx], &deviceInfo);
2602 #ifndef CTS_USES_VULKANSC
2603 m_deviceDriver = de::MovePtr<DeviceDriver>(new DeviceDriver(m_context.getPlatformInterface(), m_deviceGroupInstance, *m_logicalDevice, m_context.getUsedApiVersion()));
2604 #else
2605 m_deviceDriver = de::MovePtr<DeviceDriverSC, DeinitDeviceDeleter>(new DeviceDriverSC(m_context.getPlatformInterface(), m_context.getInstance(), *m_logicalDevice, m_context.getTestContext().getCommandLine(), m_context.getResourceInterface(), m_context.getDeviceVulkanSC10Properties(), m_context.getDeviceProperties(), m_context.getUsedApiVersion()), vk::DeinitDeviceDeleter(m_context.getResourceInterface().get(), *m_logicalDevice));
2606 #endif // CTS_USES_VULKANSC
2607 }
2608
2609 class DispatchBaseTest : public vkt::TestCase
2610 {
2611 public:
2612 DispatchBaseTest (tcu::TestContext& testCtx,
2613 const std::string& name,
2614 const deUint32 numValues,
2615 const tcu::IVec3& localsize,
2616 const tcu::IVec3& worksize,
2617 const tcu::IVec3& splitsize,
2618 const vk::ComputePipelineConstructionType computePipelineConstructionType,
2619 const bool useMaintenance5);
2620
2621 virtual void checkSupport (Context& context) const;
2622 void initPrograms (SourceCollections& sourceCollections) const;
2623 TestInstance* createInstance (Context& context) const;
2624
2625 private:
2626 const deUint32 m_numValues;
2627 const tcu::IVec3 m_localSize;
2628 const tcu::IVec3 m_workSize;
2629 const tcu::IVec3 m_splitSize;
2630 vk::ComputePipelineConstructionType m_computePipelineConstructionType;
2631 const bool m_useMaintenance5;
2632 };
2633
2634 class DispatchBaseTestInstance : public ComputeTestInstance
2635 {
2636 public:
2637 DispatchBaseTestInstance (Context& context,
2638 const deUint32 numValues,
2639 const tcu::IVec3& localsize,
2640 const tcu::IVec3& worksize,
2641 const tcu::IVec3& splitsize,
2642 const vk::ComputePipelineConstructionType computePipelineConstructionType,
2643 const bool useMaintenance5);
2644
2645 bool isInputVectorValid (const tcu::IVec3& small, const tcu::IVec3& big);
2646 tcu::TestStatus iterate (void);
2647
2648 private:
2649 const deUint32 m_numValues;
2650 const tcu::IVec3 m_localSize;
2651 const tcu::IVec3 m_workSize;
2652 const tcu::IVec3 m_splitWorkSize;
2653 const bool m_useMaintenance5;
2654 };
2655
DispatchBaseTest(tcu::TestContext & testCtx,const std::string & name,const deUint32 numValues,const tcu::IVec3 & localsize,const tcu::IVec3 & worksize,const tcu::IVec3 & splitsize,const vk::ComputePipelineConstructionType computePipelineConstructionType,const bool useMaintenance5)2656 DispatchBaseTest::DispatchBaseTest (tcu::TestContext& testCtx,
2657 const std::string& name,
2658 const deUint32 numValues,
2659 const tcu::IVec3& localsize,
2660 const tcu::IVec3& worksize,
2661 const tcu::IVec3& splitsize,
2662 const vk::ComputePipelineConstructionType computePipelineConstructionType,
2663 const bool useMaintenance5)
2664 : TestCase (testCtx, name)
2665 , m_numValues (numValues)
2666 , m_localSize (localsize)
2667 , m_workSize (worksize)
2668 , m_splitSize (splitsize)
2669 , m_computePipelineConstructionType(computePipelineConstructionType)
2670 , m_useMaintenance5 (useMaintenance5)
2671 {
2672 }
2673
checkSupport(Context & context) const2674 void DispatchBaseTest::checkSupport (Context& context) const
2675 {
2676 checkShaderObjectRequirements(context.getInstanceInterface(), context.getPhysicalDevice(), m_computePipelineConstructionType);
2677 if (m_useMaintenance5)
2678 context.requireDeviceFunctionality("VK_KHR_maintenance5");
2679 }
2680
initPrograms(SourceCollections & sourceCollections) const2681 void DispatchBaseTest::initPrograms (SourceCollections& sourceCollections) const
2682 {
2683 std::ostringstream src;
2684 src << "#version 310 es\n"
2685 << "layout (local_size_x = " << m_localSize.x() << ", local_size_y = " << m_localSize.y() << ", local_size_z = " << m_localSize.z() << ") in;\n"
2686
2687 << "layout(binding = 0) buffer InOut {\n"
2688 << " uint values[" << de::toString(m_numValues) << "];\n"
2689 << "} sb_inout;\n"
2690
2691 << "layout(binding = 1) readonly uniform uniformInput {\n"
2692 << " uvec3 gridSize;\n"
2693 << "} ubo_in;\n"
2694
2695 << "void main (void) {\n"
2696 << " uvec3 size = ubo_in.gridSize * gl_WorkGroupSize;\n"
2697 << " uint numValuesPerInv = uint(sb_inout.values.length()) / (size.x*size.y*size.z);\n"
2698 << " uint index = size.x*size.y*gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + gl_GlobalInvocationID.x;\n"
2699 << " uint offset = numValuesPerInv*index;\n"
2700 << " for (uint ndx = 0u; ndx < numValuesPerInv; ndx++)\n"
2701 << " sb_inout.values[offset + ndx] = ~sb_inout.values[offset + ndx];\n"
2702 << "}\n";
2703
2704 sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
2705 }
2706
createInstance(Context & context) const2707 TestInstance* DispatchBaseTest::createInstance (Context& context) const
2708 {
2709 return new DispatchBaseTestInstance(context, m_numValues, m_localSize, m_workSize, m_splitSize, m_computePipelineConstructionType, m_useMaintenance5);
2710 }
2711
DispatchBaseTestInstance(Context & context,const deUint32 numValues,const tcu::IVec3 & localsize,const tcu::IVec3 & worksize,const tcu::IVec3 & splitsize,const vk::ComputePipelineConstructionType computePipelineConstructionType,const bool useMaintenance5)2712 DispatchBaseTestInstance::DispatchBaseTestInstance (Context& context,
2713 const deUint32 numValues,
2714 const tcu::IVec3& localsize,
2715 const tcu::IVec3& worksize,
2716 const tcu::IVec3& splitsize,
2717 const vk::ComputePipelineConstructionType computePipelineConstructionType,
2718 const bool useMaintenance5)
2719
2720 : ComputeTestInstance (context, computePipelineConstructionType)
2721 , m_numValues (numValues)
2722 , m_localSize (localsize)
2723 , m_workSize (worksize)
2724 , m_splitWorkSize (splitsize)
2725 , m_useMaintenance5 (useMaintenance5)
2726 {
2727 // For easy work distribution across physical devices:
2728 // WorkSize should be a multiple of SplitWorkSize only in the X component
2729 if ((!isInputVectorValid(m_splitWorkSize, m_workSize)) ||
2730 (m_workSize.x() <= m_splitWorkSize.x()) ||
2731 (m_workSize.y() != m_splitWorkSize.y()) ||
2732 (m_workSize.z() != m_splitWorkSize.z()))
2733 TCU_THROW(TestError, "Invalid Input.");
2734
2735 // For easy work distribution within the same physical device:
2736 // SplitWorkSize should be a multiple of localSize in Y or Z component
2737 if ((!isInputVectorValid(m_localSize, m_splitWorkSize)) ||
2738 (m_localSize.x() != m_splitWorkSize.x()) ||
2739 (m_localSize.y() >= m_splitWorkSize.y()) ||
2740 (m_localSize.z() >= m_splitWorkSize.z()))
2741 TCU_THROW(TestError, "Invalid Input.");
2742
2743 if ((multiplyComponents(m_workSize) / multiplyComponents(m_splitWorkSize)) < (deInt32) m_numPhysDevices)
2744 TCU_THROW(TestError, "Not enough work to distribute across all physical devices.");
2745
2746 deUint32 totalWork = multiplyComponents(m_workSize) * multiplyComponents(m_localSize);
2747 if ((totalWork > numValues) || (numValues % totalWork != 0))
2748 TCU_THROW(TestError, "Buffer too small/not aligned to cover all values.");
2749 }
2750
isInputVectorValid(const tcu::IVec3 & small,const tcu::IVec3 & big)2751 bool DispatchBaseTestInstance::isInputVectorValid(const tcu::IVec3& small, const tcu::IVec3& big)
2752 {
2753 if (((big.x() < small.x()) || (big.y() < small.y()) || (big.z() < small.z())) ||
2754 ((big.x() % small.x() != 0) || (big.y() % small.y() != 0) || (big.z() % small.z() != 0)))
2755 return false;
2756 return true;
2757 }
2758
iterate(void)2759 tcu::TestStatus DispatchBaseTestInstance::iterate (void)
2760 {
2761 const DeviceInterface& vk = getDeviceInterface();
2762 const VkDevice device = getDevice();
2763 const VkQueue queue = getDeviceQueue(vk, device, m_queueFamilyIndex, 0);
2764 SimpleAllocator allocator (vk, device, getPhysicalDeviceMemoryProperties(m_context.getInstanceInterface(), getPhysicalDevice()));
2765 deUint32 totalWorkloadSize = 0;
2766
2767 // Create an uniform and input/output buffer
2768 const deUint32 uniformBufSize = 3; // Pass the compute grid size
2769 const VkDeviceSize uniformBufferSizeBytes = sizeof(deUint32) * uniformBufSize;
2770 const BufferWithMemory uniformBuffer(vk, device, allocator, makeBufferCreateInfo(uniformBufferSizeBytes, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT), MemoryRequirement::HostVisible);
2771
2772 const VkDeviceSize bufferSizeBytes = sizeof(deUint32) * m_numValues;
2773 const BufferWithMemory buffer(vk, device, allocator, makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT), MemoryRequirement::HostVisible);
2774
2775 // Fill the buffers with data
2776 typedef std::vector<deUint32> data_vector_t;
2777 data_vector_t uniformInputData(uniformBufSize);
2778 data_vector_t inputData(m_numValues);
2779
2780 {
2781 const Allocation& bufferAllocation = uniformBuffer.getAllocation();
2782 deUint32* bufferPtr = static_cast<deUint32*>(bufferAllocation.getHostPtr());
2783 uniformInputData[0] = *bufferPtr++ = m_workSize.x();
2784 uniformInputData[1] = *bufferPtr++ = m_workSize.y();
2785 uniformInputData[2] = *bufferPtr++ = m_workSize.z();
2786 flushAlloc(vk, device, bufferAllocation);
2787 }
2788
2789 {
2790 de::Random rnd(0x82ce7f);
2791 const Allocation& bufferAllocation = buffer.getAllocation();
2792 deUint32* bufferPtr = static_cast<deUint32*>(bufferAllocation.getHostPtr());
2793 for (deUint32 i = 0; i < m_numValues; ++i)
2794 inputData[i] = *bufferPtr++ = rnd.getUint32();
2795
2796 flushAlloc(vk, device, bufferAllocation);
2797 }
2798
2799 // Create descriptor set
2800 const Unique<VkDescriptorSetLayout> descriptorSetLayout(
2801 DescriptorSetLayoutBuilder()
2802 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
2803 .addSingleBinding(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
2804 .build(vk, device));
2805
2806 const Unique<VkDescriptorPool> descriptorPool(
2807 DescriptorPoolBuilder()
2808 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
2809 .addType(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER)
2810 .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
2811
2812 const Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
2813
2814 const VkDescriptorBufferInfo bufferDescriptorInfo = makeDescriptorBufferInfo(*buffer, 0ull, bufferSizeBytes);
2815 const VkDescriptorBufferInfo uniformBufferDescriptorInfo = makeDescriptorBufferInfo(*uniformBuffer, 0ull, uniformBufferSizeBytes);
2816
2817 DescriptorSetUpdateBuilder()
2818 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &bufferDescriptorInfo)
2819 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, &uniformBufferDescriptorInfo)
2820 .update(vk, device);
2821
2822 ComputePipelineWrapper pipeline(vk, device, m_computePipelineConstructionType, m_context.getBinaryCollection().get("comp"));
2823 pipeline.setDescriptorSetLayout(descriptorSetLayout.get());
2824 pipeline.setPipelineCreateFlags(VK_PIPELINE_CREATE_DISPATCH_BASE);
2825
2826 #ifndef CTS_USES_VULKANSC
2827 if (m_useMaintenance5)
2828 {
2829 VkPipelineCreateFlags2CreateInfoKHR pipelineFlags2CreateInfo = initVulkanStructure();
2830 pipelineFlags2CreateInfo.flags = VK_PIPELINE_CREATE_2_DISPATCH_BASE_BIT_KHR;
2831 pipeline.setPipelineCreatePNext(&pipelineFlags2CreateInfo);
2832 pipeline.setPipelineCreateFlags(0);
2833 }
2834 #else
2835 DE_UNREF(m_useMaintenance5);
2836 #endif // CTS_USES_VULKANSC
2837
2838 pipeline.buildPipeline();
2839
2840 const VkBufferMemoryBarrier hostWriteBarrier = makeBufferMemoryBarrier(VK_ACCESS_HOST_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT, *buffer, 0ull, bufferSizeBytes);
2841 const VkBufferMemoryBarrier hostUniformWriteBarrier = makeBufferMemoryBarrier(VK_ACCESS_HOST_WRITE_BIT, VK_ACCESS_UNIFORM_READ_BIT, *uniformBuffer, 0ull, uniformBufferSizeBytes);
2842
2843 const VkBufferMemoryBarrier shaderWriteBarrier = makeBufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *buffer, 0ull, bufferSizeBytes);
2844
2845 const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, m_queueFamilyIndex));
2846 const Unique<VkCommandBuffer> cmdBuffer(allocateCommandBuffer(vk, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
2847
2848 // Start recording commands
2849 beginCommandBuffer(vk, *cmdBuffer);
2850
2851 pipeline.bind(*cmdBuffer);
2852 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline.getPipelineLayout(), 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
2853
2854 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 1, &hostUniformWriteBarrier, 0, (const VkImageMemoryBarrier*)DE_NULL);
2855
2856 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 1, &hostWriteBarrier, 0, (const VkImageMemoryBarrier*)DE_NULL);
2857
2858 // Split the workload across all physical devices based on m_splitWorkSize.x()
2859 for (deUint32 physDevIdx = 0; physDevIdx < m_numPhysDevices; physDevIdx++)
2860 {
2861 deUint32 baseGroupX = physDevIdx * m_splitWorkSize.x();
2862 deUint32 baseGroupY = 0;
2863 deUint32 baseGroupZ = 0;
2864
2865 // Split the workload within the physical device based on m_localSize.y() and m_localSize.z()
2866 for (deInt32 localIdxY = 0; localIdxY < (m_splitWorkSize.y() / m_localSize.y()); localIdxY++)
2867 {
2868 for (deInt32 localIdxZ = 0; localIdxZ < (m_splitWorkSize.z() / m_localSize.z()); localIdxZ++)
2869 {
2870 deUint32 offsetX = baseGroupX;
2871 deUint32 offsetY = baseGroupY + localIdxY * m_localSize.y();
2872 deUint32 offsetZ = baseGroupZ + localIdxZ * m_localSize.z();
2873
2874 deUint32 localSizeX = (physDevIdx == (m_numPhysDevices - 1)) ? m_workSize.x() - baseGroupX : m_localSize.x();
2875 deUint32 localSizeY = m_localSize.y();
2876 deUint32 localSizeZ = m_localSize.z();
2877
2878 totalWorkloadSize += (localSizeX * localSizeY * localSizeZ);
2879 vk.cmdDispatchBase(*cmdBuffer, offsetX, offsetY, offsetZ, localSizeX, localSizeY, localSizeZ);
2880 }
2881 }
2882 }
2883
2884 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 1, &shaderWriteBarrier, 0, (const VkImageMemoryBarrier*)DE_NULL);
2885
2886 endCommandBuffer(vk, *cmdBuffer);
2887 submitCommandsAndWait(vk, device, queue, *cmdBuffer);
2888
2889 if (totalWorkloadSize != deUint32(multiplyComponents(m_workSize)))
2890 TCU_THROW(TestError, "Not covering the entire workload.");
2891
2892 // Validate the results
2893 const Allocation& bufferAllocation = buffer.getAllocation();
2894 invalidateAlloc(vk, device, bufferAllocation);
2895 const deUint32* bufferPtr = static_cast<deUint32*>(bufferAllocation.getHostPtr());
2896
2897 for (deUint32 ndx = 0; ndx < m_numValues; ++ndx)
2898 {
2899 const deUint32 res = bufferPtr[ndx];
2900 const deUint32 ref = ~inputData[ndx];
2901
2902 if (res != ref)
2903 {
2904 std::ostringstream msg;
2905 msg << "Comparison failed for InOut.values[" << ndx << "]";
2906 return tcu::TestStatus::fail(msg.str());
2907 }
2908 }
2909 return tcu::TestStatus::pass("Compute succeeded");
2910 }
2911
2912 class DeviceIndexTest : public vkt::TestCase
2913 {
2914 public:
2915 DeviceIndexTest (tcu::TestContext& testCtx,
2916 const std::string& name,
2917 const deUint32 numValues,
2918 const tcu::IVec3& localsize,
2919 const tcu::IVec3& splitsize,
2920 const vk::ComputePipelineConstructionType computePipelineConstructionType);
2921
2922 virtual void checkSupport (Context& context) const;
2923 void initPrograms (SourceCollections& sourceCollections) const;
2924 TestInstance* createInstance (Context& context) const;
2925
2926 private:
2927 const deUint32 m_numValues;
2928 const tcu::IVec3 m_localSize;
2929 const tcu::IVec3 m_workSize;
2930 const tcu::IVec3 m_splitSize;
2931 vk::ComputePipelineConstructionType m_computePipelineConstructionType;
2932 };
2933
2934 class DeviceIndexTestInstance : public ComputeTestInstance
2935 {
2936 public:
2937 DeviceIndexTestInstance (Context& context,
2938 const deUint32 numValues,
2939 const tcu::IVec3& localsize,
2940 const tcu::IVec3& worksize,
2941 const vk::ComputePipelineConstructionType computePipelineConstructionType);
2942 tcu::TestStatus iterate (void);
2943 private:
2944 const deUint32 m_numValues;
2945 const tcu::IVec3 m_localSize;
2946 tcu::IVec3 m_workSize;
2947 };
2948
DeviceIndexTest(tcu::TestContext & testCtx,const std::string & name,const deUint32 numValues,const tcu::IVec3 & localsize,const tcu::IVec3 & worksize,const vk::ComputePipelineConstructionType computePipelineConstructionType)2949 DeviceIndexTest::DeviceIndexTest (tcu::TestContext& testCtx,
2950 const std::string& name,
2951 const deUint32 numValues,
2952 const tcu::IVec3& localsize,
2953 const tcu::IVec3& worksize,
2954 const vk::ComputePipelineConstructionType computePipelineConstructionType)
2955 : TestCase (testCtx, name)
2956 , m_numValues (numValues)
2957 , m_localSize (localsize)
2958 , m_workSize (worksize)
2959 , m_computePipelineConstructionType (computePipelineConstructionType)
2960 {
2961 }
2962
checkSupport(Context & context) const2963 void DeviceIndexTest::checkSupport (Context& context) const
2964 {
2965 checkShaderObjectRequirements(context.getInstanceInterface(), context.getPhysicalDevice(), m_computePipelineConstructionType);
2966 }
2967
initPrograms(SourceCollections & sourceCollections) const2968 void DeviceIndexTest::initPrograms (SourceCollections& sourceCollections) const
2969 {
2970 std::ostringstream src;
2971 src << "#version 310 es\n"
2972 << "#extension GL_EXT_device_group : require\n"
2973 << "layout (local_size_x = " << m_localSize.x() << ", local_size_y = " << m_localSize.y() << ", local_size_z = " << m_localSize.z() << ") in;\n"
2974
2975 << "layout(binding = 0) buffer InOut {\n"
2976 << " uint values[" << de::toString(m_numValues) << "];\n"
2977 << "} sb_inout;\n"
2978
2979 << "layout(binding = 1) readonly uniform uniformInput {\n"
2980 << " uint baseOffset[1+" << VK_MAX_DEVICE_GROUP_SIZE << "];\n"
2981 << "} ubo_in;\n"
2982
2983 << "void main (void) {\n"
2984 << " uvec3 size = gl_NumWorkGroups * gl_WorkGroupSize;\n"
2985 << " uint numValuesPerInv = uint(sb_inout.values.length()) / (size.x*size.y*size.z);\n"
2986 << " uint index = size.x*size.y*gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + gl_GlobalInvocationID.x;\n"
2987 << " uint offset = numValuesPerInv*index;\n"
2988 << " for (uint ndx = 0u; ndx < numValuesPerInv; ndx++)\n"
2989 << " sb_inout.values[offset + ndx] = ubo_in.baseOffset[0] + ubo_in.baseOffset[gl_DeviceIndex + 1];\n"
2990 << "}\n";
2991
2992 sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
2993 }
2994
createInstance(Context & context) const2995 TestInstance* DeviceIndexTest::createInstance (Context& context) const
2996 {
2997 return new DeviceIndexTestInstance(context, m_numValues, m_localSize, m_workSize, m_computePipelineConstructionType);
2998 }
2999
DeviceIndexTestInstance(Context & context,const deUint32 numValues,const tcu::IVec3 & localsize,const tcu::IVec3 & worksize,const vk::ComputePipelineConstructionType computePipelineConstructionType)3000 DeviceIndexTestInstance::DeviceIndexTestInstance (Context& context,
3001 const deUint32 numValues,
3002 const tcu::IVec3& localsize,
3003 const tcu::IVec3& worksize,
3004 const vk::ComputePipelineConstructionType computePipelineConstructionType)
3005
3006 : ComputeTestInstance (context, computePipelineConstructionType)
3007 , m_numValues (numValues)
3008 , m_localSize (localsize)
3009 , m_workSize (worksize)
3010 {}
3011
iterate(void)3012 tcu::TestStatus DeviceIndexTestInstance::iterate (void)
3013 {
3014 const DeviceInterface& vk = getDeviceInterface();
3015 const VkDevice device = getDevice();
3016 const VkQueue queue = getDeviceQueue(vk, device, m_queueFamilyIndex, 0);
3017 SimpleAllocator allocator (vk, device, getPhysicalDeviceMemoryProperties(m_context.getInstanceInterface(), getPhysicalDevice()));
3018 const deUint32 allocDeviceMask = (1 << m_numPhysDevices) - 1;
3019 de::Random rnd (0x82ce7f);
3020 Move<VkBuffer> sboBuffer;
3021 vk::Move<vk::VkDeviceMemory> sboBufferMemory;
3022
3023 // Create an uniform and output buffer
3024 const deUint32 uniformBufSize = 4 * (1 + VK_MAX_DEVICE_GROUP_SIZE);
3025 const VkDeviceSize uniformBufferSizeBytes = sizeof(deUint32) * uniformBufSize;
3026 const BufferWithMemory uniformBuffer(vk, device, allocator, makeBufferCreateInfo(uniformBufferSizeBytes, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT), MemoryRequirement::HostVisible);
3027
3028 const VkDeviceSize bufferSizeBytes = sizeof(deUint32) * m_numValues;
3029 const BufferWithMemory checkBuffer(vk, device, allocator, makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_TRANSFER_DST_BIT), MemoryRequirement::HostVisible);
3030
3031 // create SBO buffer
3032 {
3033 const VkBufferCreateInfo sboBufferParams =
3034 {
3035 VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, // sType
3036 DE_NULL, // pNext
3037 0u, // flags
3038 (VkDeviceSize)bufferSizeBytes, // size
3039 VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT, // usage
3040 VK_SHARING_MODE_EXCLUSIVE, // sharingMode
3041 1u, // queueFamilyIndexCount
3042 &m_queueFamilyIndex, // pQueueFamilyIndices
3043 };
3044 sboBuffer = createBuffer(vk, device, &sboBufferParams);
3045
3046 VkMemoryRequirements memReqs = getBufferMemoryRequirements(vk, device, sboBuffer.get());
3047 deUint32 memoryTypeNdx = 0;
3048 const VkPhysicalDeviceMemoryProperties deviceMemProps = getPhysicalDeviceMemoryProperties(m_context.getInstanceInterface(), getPhysicalDevice());
3049 for ( memoryTypeNdx = 0; memoryTypeNdx < deviceMemProps.memoryTypeCount; memoryTypeNdx++)
3050 {
3051 if ((memReqs.memoryTypeBits & (1u << memoryTypeNdx)) != 0 &&
3052 (deviceMemProps.memoryTypes[memoryTypeNdx].propertyFlags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) == VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT)
3053 break;
3054 }
3055 if (memoryTypeNdx == deviceMemProps.memoryTypeCount)
3056 TCU_THROW(NotSupportedError, "No compatible memory type found");
3057
3058 const VkMemoryAllocateFlagsInfo allocDeviceMaskInfo =
3059 {
3060 VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO, // sType
3061 DE_NULL, // pNext
3062 VK_MEMORY_ALLOCATE_DEVICE_MASK_BIT, // flags
3063 allocDeviceMask, // deviceMask
3064 };
3065
3066 VkMemoryAllocateInfo allocInfo =
3067 {
3068 VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, // sType
3069 &allocDeviceMaskInfo, // pNext
3070 memReqs.size, // allocationSize
3071 memoryTypeNdx, // memoryTypeIndex
3072 };
3073
3074 sboBufferMemory = allocateMemory(vk, device, &allocInfo);
3075 VK_CHECK(vk.bindBufferMemory(device, *sboBuffer, sboBufferMemory.get(), 0));
3076 }
3077
3078 // Fill the buffers with data
3079 typedef std::vector<deUint32> data_vector_t;
3080 data_vector_t uniformInputData(uniformBufSize, 0);
3081
3082 {
3083 const Allocation& bufferAllocation = uniformBuffer.getAllocation();
3084 deUint32* bufferPtr = static_cast<deUint32*>(bufferAllocation.getHostPtr());
3085 for (deUint32 i = 0; i < uniformBufSize; ++i)
3086 uniformInputData[i] = *bufferPtr++ = rnd.getUint32() / 10; // divide to prevent overflow in addition
3087
3088 flushAlloc(vk, device, bufferAllocation);
3089 }
3090
3091 // Create descriptor set
3092 const Unique<VkDescriptorSetLayout> descriptorSetLayout(
3093 DescriptorSetLayoutBuilder()
3094 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
3095 .addSingleBinding(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
3096 .build(vk, device));
3097
3098 const Unique<VkDescriptorPool> descriptorPool(
3099 DescriptorPoolBuilder()
3100 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
3101 .addType(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER)
3102 .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
3103
3104 const Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
3105
3106 const VkDescriptorBufferInfo bufferDescriptorInfo = makeDescriptorBufferInfo(*sboBuffer, 0ull, bufferSizeBytes);
3107 const VkDescriptorBufferInfo uniformBufferDescriptorInfo = makeDescriptorBufferInfo(*uniformBuffer, 0ull, uniformBufferSizeBytes);
3108
3109 DescriptorSetUpdateBuilder()
3110 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &bufferDescriptorInfo)
3111 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, &uniformBufferDescriptorInfo)
3112 .update(vk, device);
3113
3114 ComputePipelineWrapper pipeline(vk, device, m_computePipelineConstructionType, m_context.getBinaryCollection().get("comp"));
3115 pipeline.setDescriptorSetLayout(descriptorSetLayout.get());
3116 pipeline.buildPipeline();
3117
3118 const VkBufferMemoryBarrier hostUniformWriteBarrier = makeBufferMemoryBarrier(VK_ACCESS_HOST_WRITE_BIT, VK_ACCESS_UNIFORM_READ_BIT, *uniformBuffer, 0ull, uniformBufferSizeBytes);
3119 const VkBufferMemoryBarrier shaderWriteBarrier = makeBufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT , *sboBuffer, 0ull, bufferSizeBytes);
3120
3121 const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, m_queueFamilyIndex));
3122 const Unique<VkCommandBuffer> cmdBuffer(allocateCommandBuffer(vk, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
3123
3124 // Verify multiple device masks
3125 for (deUint32 physDevMask = 1; physDevMask < (1u << m_numPhysDevices); physDevMask++)
3126 {
3127 deUint32 constantValPerLoop = 0;
3128 {
3129 const Allocation& bufferAllocation = uniformBuffer.getAllocation();
3130 deUint32* bufferPtr = static_cast<deUint32*>(bufferAllocation.getHostPtr());
3131 constantValPerLoop = *bufferPtr = rnd.getUint32() / 10; // divide to prevent overflow in addition
3132 flushAlloc(vk, device, bufferAllocation);
3133 }
3134 beginCommandBuffer(vk, *cmdBuffer);
3135
3136 pipeline.bind(*cmdBuffer);
3137 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline.getPipelineLayout(), 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
3138 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 1, &hostUniformWriteBarrier, 0, (const VkImageMemoryBarrier*)DE_NULL);
3139
3140 vk.cmdSetDeviceMask(*cmdBuffer, physDevMask);
3141 vk.cmdDispatch(*cmdBuffer, m_workSize.x(), m_workSize.y(), m_workSize.z());
3142
3143 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 1, &shaderWriteBarrier, 0, (const VkImageMemoryBarrier*)DE_NULL);
3144
3145 endCommandBuffer(vk, *cmdBuffer);
3146 submitCommandsAndWait(vk, device, queue, *cmdBuffer, true, physDevMask);
3147 m_context.resetCommandPoolForVKSC(device, *cmdPool);
3148
3149 // Validate the results on all physical devices where compute shader was launched
3150 const VkBufferMemoryBarrier srcBufferBarrier = makeBufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT , *sboBuffer, 0ull, bufferSizeBytes);
3151 const VkBufferMemoryBarrier dstBufferBarrier = makeBufferMemoryBarrier(VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *checkBuffer, 0ull, bufferSizeBytes);
3152 const VkBufferCopy copyParams =
3153 {
3154 (VkDeviceSize)0u, // srcOffset
3155 (VkDeviceSize)0u, // dstOffset
3156 bufferSizeBytes // size
3157 };
3158
3159 for (deUint32 physDevIdx = 0; physDevIdx < m_numPhysDevices; physDevIdx++)
3160 {
3161 if (!(1<<physDevIdx & physDevMask))
3162 continue;
3163
3164 const deUint32 deviceMask = 1 << physDevIdx;
3165
3166 beginCommandBuffer(vk, *cmdBuffer);
3167 vk.cmdSetDeviceMask(*cmdBuffer, deviceMask);
3168 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT , VK_PIPELINE_STAGE_TRANSFER_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 1, &srcBufferBarrier, 0, (const VkImageMemoryBarrier*)DE_NULL);
3169 vk.cmdCopyBuffer(*cmdBuffer, *sboBuffer, *checkBuffer, 1, ©Params);
3170 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 1, &dstBufferBarrier, 0, (const VkImageMemoryBarrier*)DE_NULL);
3171
3172 endCommandBuffer(vk, *cmdBuffer);
3173 submitCommandsAndWait(vk, device, queue, *cmdBuffer, true, deviceMask);
3174
3175 const Allocation& bufferAllocation = checkBuffer.getAllocation();
3176 invalidateAlloc(vk, device, bufferAllocation);
3177 const deUint32* bufferPtr = static_cast<deUint32*>(bufferAllocation.getHostPtr());
3178
3179 for (deUint32 ndx = 0; ndx < m_numValues; ++ndx)
3180 {
3181 const deUint32 res = bufferPtr[ndx];
3182 const deUint32 ref = constantValPerLoop + uniformInputData[4 * (physDevIdx + 1)];
3183
3184 if (res != ref)
3185 {
3186 std::ostringstream msg;
3187 msg << "Comparison failed on physical device "<< getPhysicalDevice(physDevIdx) <<" ( deviceMask "<< deviceMask <<" ) for InOut.values[" << ndx << "]";
3188 return tcu::TestStatus::fail(msg.str());
3189 }
3190 }
3191 }
3192 }
3193
3194 return tcu::TestStatus::pass("Compute succeeded");
3195 }
3196
3197 class ConcurrentCompute : public vkt::TestCase
3198 {
3199 public:
3200 ConcurrentCompute (tcu::TestContext& testCtx,
3201 const std::string& name,
3202 const vk::ComputePipelineConstructionType computePipelineConstructionType);
3203
3204
3205 virtual void checkSupport (Context& context) const;
3206 void initPrograms (SourceCollections& sourceCollections) const;
3207 TestInstance* createInstance (Context& context) const;
3208
3209 vk::ComputePipelineConstructionType m_computePipelineConstructionType;
3210 };
3211
3212 class ConcurrentComputeInstance : public vkt::TestInstance
3213 {
3214 public:
3215 ConcurrentComputeInstance (Context& context, const vk::ComputePipelineConstructionType computePipelineConstructionType);
3216
3217 tcu::TestStatus iterate (void);
3218 private:
3219 vk::ComputePipelineConstructionType m_computePipelineConstructionType;
3220 };
3221
ConcurrentCompute(tcu::TestContext & testCtx,const std::string & name,const vk::ComputePipelineConstructionType computePipelineConstructionType)3222 ConcurrentCompute::ConcurrentCompute (tcu::TestContext& testCtx,
3223 const std::string& name,
3224 const vk::ComputePipelineConstructionType computePipelineConstructionType)
3225 : TestCase (testCtx, name)
3226 , m_computePipelineConstructionType (computePipelineConstructionType)
3227 {
3228 }
3229
checkSupport(Context & context) const3230 void ConcurrentCompute::checkSupport (Context& context) const
3231 {
3232 checkShaderObjectRequirements(context.getInstanceInterface(), context.getPhysicalDevice(), m_computePipelineConstructionType);
3233 }
3234
initPrograms(SourceCollections & sourceCollections) const3235 void ConcurrentCompute::initPrograms (SourceCollections& sourceCollections) const
3236 {
3237 std::ostringstream src;
3238 src << "#version 310 es\n"
3239 << "layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
3240 << "layout(binding = 0) buffer InOut {\n"
3241 << " uint values[1024];\n"
3242 << "} sb_inout;\n"
3243 << "void main (void) {\n"
3244 << " uvec3 size = gl_NumWorkGroups * gl_WorkGroupSize;\n"
3245 << " uint numValuesPerInv = uint(sb_inout.values.length()) / (size.x*size.y*size.z);\n"
3246 << " uint groupNdx = size.x*size.y*gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + gl_GlobalInvocationID.x;\n"
3247 << " uint offset = numValuesPerInv*groupNdx;\n"
3248 << "\n"
3249 << " for (uint ndx = 0u; ndx < numValuesPerInv; ndx++)\n"
3250 << " sb_inout.values[offset + ndx] = ~sb_inout.values[offset + ndx];\n"
3251 << "}\n";
3252
3253 sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
3254 }
3255
createInstance(Context & context) const3256 TestInstance* ConcurrentCompute::createInstance (Context& context) const
3257 {
3258 return new ConcurrentComputeInstance(context, m_computePipelineConstructionType);
3259 }
3260
ConcurrentComputeInstance(Context & context,const vk::ComputePipelineConstructionType computePipelineConstructionType)3261 ConcurrentComputeInstance::ConcurrentComputeInstance (Context& context, const vk::ComputePipelineConstructionType computePipelineConstructionType)
3262 : TestInstance (context)
3263 , m_computePipelineConstructionType (computePipelineConstructionType)
3264 {
3265 }
3266
iterate(void)3267 tcu::TestStatus ConcurrentComputeInstance::iterate (void)
3268 {
3269 enum {
3270 NO_MATCH_FOUND = ~((deUint32)0),
3271 ERROR_NONE = 0,
3272 ERROR_WAIT = 1,
3273 ERROR_ORDER = 2
3274 };
3275
3276 struct Queues
3277 {
3278 VkQueue queue;
3279 deUint32 queueFamilyIndex;
3280 };
3281
3282 // const DeviceInterface& vk = m_context.getDeviceInterface();
3283 const deUint32 numValues = 1024;
3284 const CustomInstance instance (createCustomInstanceFromContext(m_context));
3285 const InstanceDriver& instanceDriver (instance.getDriver());
3286 const VkPhysicalDevice physicalDevice = chooseDevice(instanceDriver, instance, m_context.getTestContext().getCommandLine());
3287 tcu::TestLog& log = m_context.getTestContext().getLog();
3288 vk::Move<vk::VkDevice> logicalDevice;
3289 std::vector<VkQueueFamilyProperties> queueFamilyProperties;
3290 VkDeviceCreateInfo deviceInfo;
3291 VkPhysicalDeviceFeatures2 deviceFeatures2 = initVulkanStructure();
3292 VkPhysicalDeviceFeatures deviceFeatures;
3293 const float queuePriorities[2] = {1.0f, 0.0f};
3294 VkDeviceQueueCreateInfo queueInfos[2];
3295 Queues queues[2] =
3296 {
3297 {DE_NULL, (deUint32)NO_MATCH_FOUND},
3298 {DE_NULL, (deUint32)NO_MATCH_FOUND}
3299 };
3300
3301 queueFamilyProperties = getPhysicalDeviceQueueFamilyProperties(instanceDriver, physicalDevice);
3302
3303 for (deUint32 queueNdx = 0; queueNdx < queueFamilyProperties.size(); ++queueNdx)
3304 {
3305 if (queueFamilyProperties[queueNdx].queueFlags & VK_QUEUE_COMPUTE_BIT)
3306 {
3307 if (NO_MATCH_FOUND == queues[0].queueFamilyIndex)
3308 queues[0].queueFamilyIndex = queueNdx;
3309
3310 if (queues[0].queueFamilyIndex != queueNdx || queueFamilyProperties[queueNdx].queueCount > 1u)
3311 {
3312 queues[1].queueFamilyIndex = queueNdx;
3313 break;
3314 }
3315 }
3316 }
3317
3318 if (queues[0].queueFamilyIndex == NO_MATCH_FOUND || queues[1].queueFamilyIndex == NO_MATCH_FOUND)
3319 TCU_THROW(NotSupportedError, "Queues couldn't be created");
3320
3321 for (int queueNdx = 0; queueNdx < 2; ++queueNdx)
3322 {
3323 VkDeviceQueueCreateInfo queueInfo;
3324 deMemset(&queueInfo, 0, sizeof(queueInfo));
3325
3326 queueInfo.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO;
3327 queueInfo.pNext = DE_NULL;
3328 queueInfo.flags = (VkDeviceQueueCreateFlags)0u;
3329 queueInfo.queueFamilyIndex = queues[queueNdx].queueFamilyIndex;
3330 queueInfo.queueCount = (queues[0].queueFamilyIndex == queues[1].queueFamilyIndex) ? 2 : 1;
3331 queueInfo.pQueuePriorities = (queueInfo.queueCount == 2) ? queuePriorities : &queuePriorities[queueNdx];
3332
3333 queueInfos[queueNdx] = queueInfo;
3334
3335 if (queues[0].queueFamilyIndex == queues[1].queueFamilyIndex)
3336 break;
3337 }
3338
3339 void* pNext = DE_NULL;
3340
3341 deMemset(&deviceInfo, 0, sizeof(deviceInfo));
3342 instanceDriver.getPhysicalDeviceFeatures(physicalDevice, &deviceFeatures);
3343
3344 deviceFeatures2.features = deviceFeatures;
3345
3346 std::vector<const char*> deviceExtensions;
3347
3348 #ifndef CTS_USES_VULKANSC
3349 VkPhysicalDeviceDynamicRenderingFeaturesKHR dynamicRenderingFeatures = initVulkanStructure();
3350 dynamicRenderingFeatures.dynamicRendering = VK_TRUE;
3351 VkPhysicalDeviceShaderObjectFeaturesEXT shaderObjectFeatures = initVulkanStructure(&dynamicRenderingFeatures);
3352 shaderObjectFeatures.shaderObject = VK_TRUE;
3353
3354 if (m_computePipelineConstructionType != COMPUTE_PIPELINE_CONSTRUCTION_TYPE_PIPELINE)
3355 {
3356 deviceExtensions.push_back("VK_EXT_shader_object");
3357 deviceFeatures2.pNext = &shaderObjectFeatures;
3358 pNext = &deviceFeatures2;
3359 }
3360 #endif
3361
3362 #ifdef CTS_USES_VULKANSC
3363 VkDeviceObjectReservationCreateInfo memReservationInfo = m_context.getTestContext().getCommandLine().isSubProcess() ? m_context.getResourceInterface()->getStatMax() : resetDeviceObjectReservationCreateInfo();
3364 memReservationInfo.pNext = pNext;
3365 pNext = &memReservationInfo;
3366
3367 VkPhysicalDeviceVulkanSC10Features sc10Features = createDefaultSC10Features();
3368 sc10Features.pNext = pNext;
3369 pNext = &sc10Features;
3370
3371 VkPipelineCacheCreateInfo pcCI;
3372 std::vector<VkPipelinePoolSize> poolSizes;
3373 if (m_context.getTestContext().getCommandLine().isSubProcess())
3374 {
3375 if (m_context.getResourceInterface()->getCacheDataSize() > 0)
3376 {
3377 pcCI =
3378 {
3379 VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO, // VkStructureType sType;
3380 DE_NULL, // const void* pNext;
3381 VK_PIPELINE_CACHE_CREATE_READ_ONLY_BIT |
3382 VK_PIPELINE_CACHE_CREATE_USE_APPLICATION_STORAGE_BIT, // VkPipelineCacheCreateFlags flags;
3383 m_context.getResourceInterface()->getCacheDataSize(), // deUintptr initialDataSize;
3384 m_context.getResourceInterface()->getCacheData() // const void* pInitialData;
3385 };
3386 memReservationInfo.pipelineCacheCreateInfoCount = 1;
3387 memReservationInfo.pPipelineCacheCreateInfos = &pcCI;
3388 }
3389
3390 poolSizes = m_context.getResourceInterface()->getPipelinePoolSizes();
3391 if (!poolSizes.empty())
3392 {
3393 memReservationInfo.pipelinePoolSizeCount = deUint32(poolSizes.size());
3394 memReservationInfo.pPipelinePoolSizes = poolSizes.data();
3395 }
3396 }
3397 #endif // CTS_USES_VULKANSC
3398
3399 deviceInfo.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO;
3400 deviceInfo.pNext = pNext;
3401 deviceInfo.enabledExtensionCount = (deUint32)deviceExtensions.size();
3402 deviceInfo.ppEnabledExtensionNames = deviceExtensions.data();
3403 deviceInfo.enabledLayerCount = 0u;
3404 deviceInfo.ppEnabledLayerNames = DE_NULL;
3405 deviceInfo.pEnabledFeatures = (deviceFeatures2.pNext == DE_NULL) ? &deviceFeatures : DE_NULL;
3406 deviceInfo.queueCreateInfoCount = (queues[0].queueFamilyIndex == queues[1].queueFamilyIndex) ? 1 : 2;
3407 deviceInfo.pQueueCreateInfos = queueInfos;
3408
3409 logicalDevice = createCustomDevice (m_context.getTestContext().getCommandLine().isValidationEnabled(), m_context.getPlatformInterface(), instance, instanceDriver, physicalDevice, &deviceInfo);
3410
3411 #ifndef CTS_USES_VULKANSC
3412 de::MovePtr<vk::DeviceDriver> deviceDriver = de::MovePtr<DeviceDriver>(new DeviceDriver(m_context.getPlatformInterface(), instance, *logicalDevice, m_context.getUsedApiVersion()));
3413 #else
3414 de::MovePtr<vk::DeviceDriverSC, vk::DeinitDeviceDeleter> deviceDriver = de::MovePtr<DeviceDriverSC, DeinitDeviceDeleter>(new DeviceDriverSC(m_context.getPlatformInterface(), instance, *logicalDevice, m_context.getTestContext().getCommandLine(), m_context.getResourceInterface(), m_context.getDeviceVulkanSC10Properties(), m_context.getDeviceProperties(), m_context.getUsedApiVersion()), vk::DeinitDeviceDeleter(m_context.getResourceInterface().get(), *logicalDevice));
3415 #endif // CTS_USES_VULKANSC
3416 vk::DeviceInterface& vk = *deviceDriver;
3417
3418 for (deUint32 queueReqNdx = 0; queueReqNdx < 2; ++queueReqNdx)
3419 {
3420 if (queues[0].queueFamilyIndex == queues[1].queueFamilyIndex)
3421 vk.getDeviceQueue(*logicalDevice, queues[queueReqNdx].queueFamilyIndex, queueReqNdx, &queues[queueReqNdx].queue);
3422 else
3423 vk.getDeviceQueue(*logicalDevice, queues[queueReqNdx].queueFamilyIndex, 0u, &queues[queueReqNdx].queue);
3424 }
3425
3426 // Create an input/output buffers
3427 const VkPhysicalDeviceMemoryProperties memoryProperties = vk::getPhysicalDeviceMemoryProperties(instanceDriver, physicalDevice);
3428
3429 de::MovePtr<SimpleAllocator> allocator = de::MovePtr<SimpleAllocator>(new SimpleAllocator(vk, *logicalDevice, memoryProperties));
3430 const VkDeviceSize bufferSizeBytes = sizeof(deUint32) * numValues;
3431 const BufferWithMemory buffer1(vk, *logicalDevice, *allocator, makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT), MemoryRequirement::HostVisible);
3432 const BufferWithMemory buffer2(vk, *logicalDevice, *allocator, makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT), MemoryRequirement::HostVisible);
3433
3434 // Fill the buffers with data
3435
3436 typedef std::vector<deUint32> data_vector_t;
3437 data_vector_t inputData(numValues);
3438
3439 {
3440 de::Random rnd(0x82ce7f);
3441 const Allocation& bufferAllocation1 = buffer1.getAllocation();
3442 const Allocation& bufferAllocation2 = buffer2.getAllocation();
3443 deUint32* bufferPtr1 = static_cast<deUint32*>(bufferAllocation1.getHostPtr());
3444 deUint32* bufferPtr2 = static_cast<deUint32*>(bufferAllocation2.getHostPtr());
3445
3446 for (deUint32 i = 0; i < numValues; ++i)
3447 {
3448 deUint32 val = rnd.getUint32();
3449 inputData[i] = val;
3450 *bufferPtr1++ = val;
3451 *bufferPtr2++ = val;
3452 }
3453
3454 flushAlloc(vk, *logicalDevice, bufferAllocation1);
3455 flushAlloc(vk, *logicalDevice, bufferAllocation2);
3456 }
3457
3458 // Create descriptor sets
3459
3460 const Unique<VkDescriptorSetLayout> descriptorSetLayout1(
3461 DescriptorSetLayoutBuilder()
3462 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
3463 .build(vk, *logicalDevice));
3464
3465 const Unique<VkDescriptorPool> descriptorPool1(
3466 DescriptorPoolBuilder()
3467 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
3468 .build(vk, *logicalDevice, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
3469
3470 const Unique<VkDescriptorSet> descriptorSet1(makeDescriptorSet(vk, *logicalDevice, *descriptorPool1, *descriptorSetLayout1));
3471
3472 const VkDescriptorBufferInfo bufferDescriptorInfo1 = makeDescriptorBufferInfo(*buffer1, 0ull, bufferSizeBytes);
3473 DescriptorSetUpdateBuilder()
3474 .writeSingle(*descriptorSet1, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &bufferDescriptorInfo1)
3475 .update(vk, *logicalDevice);
3476
3477 const Unique<VkDescriptorSetLayout> descriptorSetLayout2(
3478 DescriptorSetLayoutBuilder()
3479 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
3480 .build(vk, *logicalDevice));
3481
3482 const Unique<VkDescriptorPool> descriptorPool2(
3483 DescriptorPoolBuilder()
3484 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
3485 .build(vk, *logicalDevice, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
3486
3487 const Unique<VkDescriptorSet> descriptorSet2(makeDescriptorSet(vk, *logicalDevice, *descriptorPool2, *descriptorSetLayout2));
3488
3489 const VkDescriptorBufferInfo bufferDescriptorInfo2 = makeDescriptorBufferInfo(*buffer2, 0ull, bufferSizeBytes);
3490 DescriptorSetUpdateBuilder()
3491 .writeSingle(*descriptorSet2, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &bufferDescriptorInfo2)
3492 .update(vk, *logicalDevice);
3493
3494 // Perform the computation
3495
3496 const Unique<VkShaderModule> shaderModule(createShaderModule(vk, *logicalDevice, m_context.getBinaryCollection().get("comp"), 0u));
3497
3498 ComputePipelineWrapper pipeline1(vk, *logicalDevice, m_computePipelineConstructionType, m_context.getBinaryCollection().get("comp"));
3499 pipeline1.setDescriptorSetLayout(*descriptorSetLayout1);
3500 pipeline1.buildPipeline();
3501 const VkBufferMemoryBarrier hostWriteBarrier1 = makeBufferMemoryBarrier(VK_ACCESS_HOST_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT, *buffer1, 0ull, bufferSizeBytes);
3502 const VkBufferMemoryBarrier shaderWriteBarrier1 = makeBufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *buffer1, 0ull, bufferSizeBytes);
3503 const Unique<VkCommandPool> cmdPool1(makeCommandPool(vk, *logicalDevice, queues[0].queueFamilyIndex));
3504 const Unique<VkCommandBuffer> cmdBuffer1(allocateCommandBuffer(vk, *logicalDevice, *cmdPool1, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
3505
3506 ComputePipelineWrapper pipeline2(vk, *logicalDevice, m_computePipelineConstructionType, m_context.getBinaryCollection().get("comp"));
3507 pipeline2.setDescriptorSetLayout(*descriptorSetLayout2);
3508 pipeline2.buildPipeline();
3509 const VkBufferMemoryBarrier hostWriteBarrier2 = makeBufferMemoryBarrier(VK_ACCESS_HOST_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT, *buffer2, 0ull, bufferSizeBytes);
3510 const VkBufferMemoryBarrier shaderWriteBarrier2 = makeBufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *buffer2, 0ull, bufferSizeBytes);
3511 const Unique<VkCommandPool> cmdPool2(makeCommandPool(vk, *logicalDevice, queues[1].queueFamilyIndex));
3512 const Unique<VkCommandBuffer> cmdBuffer2(allocateCommandBuffer(vk, *logicalDevice, *cmdPool2, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
3513
3514 // Command buffer 1
3515
3516 beginCommandBuffer(vk, *cmdBuffer1);
3517 pipeline1.bind(*cmdBuffer1);
3518 vk.cmdBindDescriptorSets(*cmdBuffer1, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline1.getPipelineLayout(), 0u, 1u, &descriptorSet1.get(), 0u, DE_NULL);
3519 vk.cmdPipelineBarrier(*cmdBuffer1, VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 1, &hostWriteBarrier1, 0, (const VkImageMemoryBarrier*)DE_NULL);
3520 vk.cmdDispatch(*cmdBuffer1, 1, 1, 1);
3521 vk.cmdPipelineBarrier(*cmdBuffer1, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 1, &shaderWriteBarrier1, 0, (const VkImageMemoryBarrier*)DE_NULL);
3522 endCommandBuffer(vk, *cmdBuffer1);
3523
3524 // Command buffer 2
3525
3526 beginCommandBuffer(vk, *cmdBuffer2);
3527 pipeline2.bind(*cmdBuffer2);
3528 vk.cmdBindDescriptorSets(*cmdBuffer2, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline2.getPipelineLayout(), 0u, 1u, &descriptorSet2.get(), 0u, DE_NULL);
3529 vk.cmdPipelineBarrier(*cmdBuffer2, VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 1, &hostWriteBarrier2, 0, (const VkImageMemoryBarrier*)DE_NULL);
3530 vk.cmdDispatch(*cmdBuffer2, 1, 1, 1);
3531 vk.cmdPipelineBarrier(*cmdBuffer2, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 1, &shaderWriteBarrier2, 0, (const VkImageMemoryBarrier*)DE_NULL);
3532 endCommandBuffer(vk, *cmdBuffer2);
3533
3534 VkSubmitInfo submitInfo1 =
3535 {
3536 VK_STRUCTURE_TYPE_SUBMIT_INFO, // sType
3537 DE_NULL, // pNext
3538 0u, // waitSemaphoreCount
3539 DE_NULL, // pWaitSemaphores
3540 (const VkPipelineStageFlags*)DE_NULL, // pWaitDstStageMask
3541 1u, // commandBufferCount
3542 &cmdBuffer1.get(), // pCommandBuffers
3543 0u, // signalSemaphoreCount
3544 DE_NULL // pSignalSemaphores
3545 };
3546
3547 VkSubmitInfo submitInfo2 =
3548 {
3549 VK_STRUCTURE_TYPE_SUBMIT_INFO, // sType
3550 DE_NULL, // pNext
3551 0u, // waitSemaphoreCount
3552 DE_NULL, // pWaitSemaphores
3553 (const VkPipelineStageFlags*)DE_NULL, // pWaitDstStageMask
3554 1u, // commandBufferCount
3555 &cmdBuffer2.get(), // pCommandBuffers
3556 0u, // signalSemaphoreCount
3557 DE_NULL // pSignalSemaphores
3558 };
3559
3560 // Wait for completion
3561 const Unique<VkFence> fence1(createFence(vk, *logicalDevice));
3562 const Unique<VkFence> fence2(createFence(vk, *logicalDevice));
3563
3564 VK_CHECK(vk.queueSubmit(queues[0].queue, 1u, &submitInfo1, *fence1));
3565 VK_CHECK(vk.queueSubmit(queues[1].queue, 1u, &submitInfo2, *fence2));
3566
3567 int err = ERROR_NONE;
3568
3569 // First wait for the low-priority queue
3570 if (VK_SUCCESS != vk.waitForFences(*logicalDevice, 1u, &fence2.get(), DE_TRUE, ~0ull))
3571 err = ERROR_WAIT;
3572
3573 // If the high-priority queue hasn't finished, we have a problem.
3574 if (VK_SUCCESS != vk.getFenceStatus(*logicalDevice, fence1.get()))
3575 if (err == ERROR_NONE)
3576 err = ERROR_ORDER;
3577
3578 // Wait for the high-priority fence so we don't get errors on teardown.
3579 vk.waitForFences(*logicalDevice, 1u, &fence1.get(), DE_TRUE, ~0ull);
3580
3581 // If we fail() before waiting for all of the fences, error will come from
3582 // teardown instead of the error we want.
3583
3584 if (err == ERROR_WAIT)
3585 {
3586 return tcu::TestStatus::fail("Failed waiting for low-priority queue fence.");
3587 }
3588
3589 // Validate the results
3590
3591 const Allocation& bufferAllocation1 = buffer1.getAllocation();
3592 invalidateAlloc(vk, *logicalDevice, bufferAllocation1);
3593 const deUint32* bufferPtr1 = static_cast<deUint32*>(bufferAllocation1.getHostPtr());
3594
3595 const Allocation& bufferAllocation2 = buffer2.getAllocation();
3596 invalidateAlloc(vk, *logicalDevice, bufferAllocation2);
3597 const deUint32* bufferPtr2 = static_cast<deUint32*>(bufferAllocation2.getHostPtr());
3598
3599 for (deUint32 ndx = 0; ndx < numValues; ++ndx)
3600 {
3601 const deUint32 res1 = bufferPtr1[ndx];
3602 const deUint32 res2 = bufferPtr2[ndx];
3603 const deUint32 inp = inputData[ndx];
3604 const deUint32 ref = ~inp;
3605
3606 if (res1 != ref || res1 != res2)
3607 {
3608 std::ostringstream msg;
3609 msg << "Comparison failed for InOut.values[" << ndx << "] ref:" << ref <<" res1:" << res1 << " res2:" << res2 << " inp:" << inp;
3610 return tcu::TestStatus::fail(msg.str());
3611 }
3612 }
3613
3614 if (err == ERROR_ORDER)
3615 log << tcu::TestLog::Message << "Note: Low-priority queue was faster than high-priority one. This is not an error, but priorities may be inverted." << tcu::TestLog::EndMessage;
3616
3617 return tcu::TestStatus::pass("Test passed");
3618 }
3619
3620 class EmptyWorkGroupCase : public vkt::TestCase
3621 {
3622 public:
3623 EmptyWorkGroupCase (tcu::TestContext& testCtx, const std::string& name, const tcu::UVec3& dispatchSize, const vk::ComputePipelineConstructionType computePipelineConstructionType);
~EmptyWorkGroupCase(void)3624 virtual ~EmptyWorkGroupCase (void) {}
3625
3626 virtual void checkSupport (Context& context) const override;
3627 TestInstance* createInstance (Context& context) const override;
3628 void initPrograms (vk::SourceCollections& programCollection) const override;
3629
3630 protected:
3631 const tcu::UVec3 m_dispatchSize;
3632 vk::ComputePipelineConstructionType m_computePipelineConstructionType;
3633 };
3634
3635 class EmptyWorkGroupInstance : public vkt::TestInstance
3636 {
3637 public:
EmptyWorkGroupInstance(Context & context,const tcu::UVec3 & dispatchSize,const vk::ComputePipelineConstructionType computePipelineConstructionType)3638 EmptyWorkGroupInstance (Context& context, const tcu::UVec3& dispatchSize, const vk::ComputePipelineConstructionType computePipelineConstructionType)
3639 : vkt::TestInstance (context)
3640 , m_dispatchSize (dispatchSize)
3641 , m_computePipelineConstructionType (computePipelineConstructionType)
3642 {}
~EmptyWorkGroupInstance(void)3643 virtual ~EmptyWorkGroupInstance (void) {}
3644
3645 tcu::TestStatus iterate (void) override;
3646
3647 protected:
3648 const tcu::UVec3 m_dispatchSize;
3649 vk::ComputePipelineConstructionType m_computePipelineConstructionType;
3650 };
3651
EmptyWorkGroupCase(tcu::TestContext & testCtx,const std::string & name,const tcu::UVec3 & dispatchSize,const vk::ComputePipelineConstructionType computePipelineConstructionType)3652 EmptyWorkGroupCase::EmptyWorkGroupCase (tcu::TestContext& testCtx, const std::string& name, const tcu::UVec3& dispatchSize, const vk::ComputePipelineConstructionType computePipelineConstructionType)
3653 : vkt::TestCase (testCtx, name)
3654 , m_dispatchSize (dispatchSize)
3655 , m_computePipelineConstructionType (computePipelineConstructionType)
3656 {
3657 DE_ASSERT(m_dispatchSize.x() == 0u || m_dispatchSize.y() == 0u || m_dispatchSize.z() == 0u);
3658 }
3659
checkSupport(Context & context) const3660 void EmptyWorkGroupCase::checkSupport (Context& context) const
3661 {
3662 checkShaderObjectRequirements(context.getInstanceInterface(), context.getPhysicalDevice(), m_computePipelineConstructionType);
3663 }
3664
createInstance(Context & context) const3665 TestInstance* EmptyWorkGroupCase::createInstance (Context& context) const
3666 {
3667 return new EmptyWorkGroupInstance(context, m_dispatchSize, m_computePipelineConstructionType);
3668 }
3669
initPrograms(vk::SourceCollections & programCollection) const3670 void EmptyWorkGroupCase::initPrograms (vk::SourceCollections& programCollection) const
3671 {
3672 std::ostringstream comp;
3673 comp
3674 << "#version 450\n"
3675 << "layout (local_size_x=1, local_size_y=1, local_size_z=1) in;\n"
3676 << "layout (set=0, binding=0) buffer VerificationBlock { uint value; } verif;\n"
3677 << "void main () { atomicAdd(verif.value, 1u); }\n"
3678 ;
3679 programCollection.glslSources.add("comp") << glu::ComputeSource(comp.str());
3680 }
3681
iterate(void)3682 tcu::TestStatus EmptyWorkGroupInstance::iterate (void)
3683 {
3684 const auto& vkd = m_context.getDeviceInterface();
3685 const auto device = m_context.getDevice();
3686 auto& alloc = m_context.getDefaultAllocator();
3687 const auto queueIndex = m_context.getUniversalQueueFamilyIndex();
3688 const auto queue = m_context.getUniversalQueue();
3689
3690 const auto verifBufferSize = static_cast<VkDeviceSize>(sizeof(uint32_t));
3691 const auto verifBufferInfo = makeBufferCreateInfo(verifBufferSize, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT);
3692 BufferWithMemory verifBuffer (vkd, device, alloc, verifBufferInfo, MemoryRequirement::HostVisible);
3693 auto& verifBufferAlloc = verifBuffer.getAllocation();
3694 void* verifBufferPtr = verifBufferAlloc.getHostPtr();
3695
3696 deMemset(verifBufferPtr, 0, static_cast<size_t>(verifBufferSize));
3697 flushAlloc(vkd, device, verifBufferAlloc);
3698
3699 DescriptorSetLayoutBuilder layoutBuilder;
3700 layoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT);
3701 const auto descriptorSetLayout = layoutBuilder.build(vkd, device);
3702
3703 ComputePipelineWrapper pipeline(vkd, device, m_computePipelineConstructionType, m_context.getBinaryCollection().get("comp"));
3704 pipeline.setDescriptorSetLayout(descriptorSetLayout.get());
3705 pipeline.buildPipeline();
3706
3707 DescriptorPoolBuilder poolBuilder;
3708 poolBuilder.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
3709 const auto descriptorPool = poolBuilder.build(vkd, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
3710 const auto descriptorSet = makeDescriptorSet(vkd, device, descriptorPool.get(), descriptorSetLayout.get());
3711
3712 DescriptorSetUpdateBuilder updateBuilder;
3713 const auto verifBufferDescInfo = makeDescriptorBufferInfo(verifBuffer.get(), 0ull, verifBufferSize);
3714 updateBuilder.writeSingle(descriptorSet.get(), DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &verifBufferDescInfo);
3715 updateBuilder.update(vkd, device);
3716
3717 const auto cmdPool = makeCommandPool(vkd, device, queueIndex);
3718 const auto cmdBufferPtr = allocateCommandBuffer(vkd, device, cmdPool.get(), VK_COMMAND_BUFFER_LEVEL_PRIMARY);
3719 const auto cmdBuffer = cmdBufferPtr.get();
3720
3721 beginCommandBuffer(vkd, cmdBuffer);
3722 pipeline.bind(cmdBuffer);
3723 vkd.cmdBindDescriptorSets(cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline.getPipelineLayout(), 0u, 1u, &descriptorSet.get(), 0u, nullptr);
3724 vkd.cmdDispatch(cmdBuffer, m_dispatchSize.x(), m_dispatchSize.y(), m_dispatchSize.z());
3725
3726 const auto readWriteAccess = (VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_SHADER_READ_BIT);
3727 const auto computeToCompute = makeMemoryBarrier(readWriteAccess, readWriteAccess);
3728 vkd.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0U, 1u, &computeToCompute, 0u, nullptr, 0u, nullptr);
3729
3730 vkd.cmdDispatch(cmdBuffer, 1u, 1u, 1u);
3731
3732 const auto computeToHost = makeMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT);
3733 vkd.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT, 0u, 1u, &computeToHost, 0u, nullptr, 0u, nullptr);
3734
3735 endCommandBuffer(vkd, cmdBuffer);
3736 submitCommandsAndWait(vkd, device, queue, cmdBuffer);
3737
3738 uint32_t value;
3739 invalidateAlloc(vkd, device, verifBufferAlloc);
3740 deMemcpy(&value, verifBufferPtr, sizeof(value));
3741
3742 if (value != 1u)
3743 {
3744 std::ostringstream msg;
3745 msg << "Unexpected value found in buffer: " << value << " while expecting 1";
3746 TCU_FAIL(msg.str());
3747 }
3748
3749 return tcu::TestStatus::pass("Pass");
3750 }
3751
3752 class MaxWorkGroupSizeTest : public vkt::TestCase
3753 {
3754 public:
3755 enum class Axis { X = 0, Y = 1, Z = 2 };
3756
3757 struct Params
3758 {
3759 // Which axis to maximize.
3760 Axis axis;
3761 };
3762
3763 MaxWorkGroupSizeTest (tcu::TestContext& testCtx, const std::string& name, const Params& params, const vk::ComputePipelineConstructionType computePipelineConstructionType);
~MaxWorkGroupSizeTest(void)3764 virtual ~MaxWorkGroupSizeTest (void) {}
3765
3766 virtual void initPrograms (vk::SourceCollections& programCollection) const;
3767 virtual TestInstance* createInstance (Context& context) const;
3768 virtual void checkSupport (Context& context) const;
3769
3770 // Helper to transform the axis value to an index.
3771 static int getIndex (Axis axis);
3772
3773 // Helper returning the number of invocations according to the test parameters.
3774 static deUint32 getInvocations (const Params& params, const vk::InstanceInterface& vki, vk::VkPhysicalDevice physicalDevice, const vk::VkPhysicalDeviceProperties* devProperties = nullptr);
3775
3776 // Helper returning the buffer size needed to this test.
3777 static deUint32 getSSBOSize (deUint32 invocations);
3778
3779 private:
3780 Params m_params;
3781 vk::ComputePipelineConstructionType m_computePipelineConstructionType;
3782 };
3783
3784 class MaxWorkGroupSizeInstance : public vkt::TestInstance
3785 {
3786 public:
3787 MaxWorkGroupSizeInstance (Context& context, const MaxWorkGroupSizeTest::Params& params, const vk::ComputePipelineConstructionType computePipelineConstructionType);
~MaxWorkGroupSizeInstance(void)3788 virtual ~MaxWorkGroupSizeInstance (void) {}
3789
3790 virtual tcu::TestStatus iterate (void);
3791
3792 private:
3793 MaxWorkGroupSizeTest::Params m_params;
3794 vk::ComputePipelineConstructionType m_computePipelineConstructionType;
3795 };
3796
getIndex(Axis axis)3797 int MaxWorkGroupSizeTest::getIndex (Axis axis)
3798 {
3799 const int ret = static_cast<int>(axis);
3800 DE_ASSERT(ret >= static_cast<int>(Axis::X) && ret <= static_cast<int>(Axis::Z));
3801 return ret;
3802 }
3803
getInvocations(const Params & params,const vk::InstanceInterface & vki,vk::VkPhysicalDevice physicalDevice,const vk::VkPhysicalDeviceProperties * devProperties)3804 deUint32 MaxWorkGroupSizeTest::getInvocations (const Params& params, const vk::InstanceInterface& vki, vk::VkPhysicalDevice physicalDevice, const vk::VkPhysicalDeviceProperties* devProperties)
3805 {
3806 const auto axis = getIndex(params.axis);
3807
3808 if (devProperties)
3809 return devProperties->limits.maxComputeWorkGroupSize[axis];
3810 return vk::getPhysicalDeviceProperties(vki, physicalDevice).limits.maxComputeWorkGroupSize[axis];
3811 }
3812
getSSBOSize(deUint32 invocations)3813 deUint32 MaxWorkGroupSizeTest::getSSBOSize (deUint32 invocations)
3814 {
3815 return invocations * static_cast<deUint32>(sizeof(deUint32));
3816 }
3817
MaxWorkGroupSizeTest(tcu::TestContext & testCtx,const std::string & name,const Params & params,const vk::ComputePipelineConstructionType computePipelineConstructionType)3818 MaxWorkGroupSizeTest::MaxWorkGroupSizeTest (tcu::TestContext& testCtx, const std::string& name, const Params& params, const vk::ComputePipelineConstructionType computePipelineConstructionType)
3819 : vkt::TestCase (testCtx, name)
3820 , m_params (params)
3821 , m_computePipelineConstructionType (computePipelineConstructionType)
3822 {}
3823
initPrograms(vk::SourceCollections & programCollection) const3824 void MaxWorkGroupSizeTest::initPrograms (vk::SourceCollections& programCollection) const
3825 {
3826 std::ostringstream shader;
3827
3828 // The actual local sizes will be set using spec constants when running the test instance.
3829 shader
3830 << "#version 450\n"
3831 << "\n"
3832 << "layout(constant_id=0) const int local_size_x_val = 1;\n"
3833 << "layout(constant_id=1) const int local_size_y_val = 1;\n"
3834 << "layout(constant_id=2) const int local_size_z_val = 1;\n"
3835 << "\n"
3836 << "layout(local_size_x_id=0, local_size_y_id=1, local_size_z_id=2) in;\n"
3837 << "\n"
3838 << "layout(set=0, binding=0) buffer StorageBuffer {\n"
3839 << " uint values[];\n"
3840 << "} ssbo;\n"
3841 << "\n"
3842 << "void main() {\n"
3843 << " ssbo.values[gl_LocalInvocationIndex] = 1u;\n"
3844 << "}\n"
3845 ;
3846
3847 programCollection.glslSources.add("comp") << glu::ComputeSource(shader.str());
3848 }
3849
createInstance(Context & context) const3850 TestInstance* MaxWorkGroupSizeTest::createInstance (Context& context) const
3851 {
3852 return new MaxWorkGroupSizeInstance(context, m_params, m_computePipelineConstructionType);
3853 }
3854
checkSupport(Context & context) const3855 void MaxWorkGroupSizeTest::checkSupport (Context& context) const
3856 {
3857 const auto& vki = context.getInstanceInterface();
3858 const auto physicalDevice = context.getPhysicalDevice();
3859
3860 const auto properties = vk::getPhysicalDeviceProperties(vki, physicalDevice);
3861 const auto invocations = getInvocations(m_params, vki, physicalDevice, &properties);
3862
3863 if (invocations > properties.limits.maxComputeWorkGroupInvocations)
3864 TCU_FAIL("Reported workgroup size limit in the axis is greater than the global invocation limit");
3865
3866 if (properties.limits.maxStorageBufferRange / static_cast<deUint32>(sizeof(deUint32)) < invocations)
3867 TCU_THROW(NotSupportedError, "Maximum supported storage buffer range too small");
3868
3869 checkShaderObjectRequirements(vki, physicalDevice, m_computePipelineConstructionType);
3870 }
3871
MaxWorkGroupSizeInstance(Context & context,const MaxWorkGroupSizeTest::Params & params,const vk::ComputePipelineConstructionType computePipelineConstructionType)3872 MaxWorkGroupSizeInstance::MaxWorkGroupSizeInstance (Context& context, const MaxWorkGroupSizeTest::Params& params, const vk::ComputePipelineConstructionType computePipelineConstructionType)
3873 : vkt::TestInstance (context)
3874 , m_params (params)
3875 , m_computePipelineConstructionType (computePipelineConstructionType)
3876 {}
3877
iterate(void)3878 tcu::TestStatus MaxWorkGroupSizeInstance::iterate (void)
3879 {
3880 const auto& vki = m_context.getInstanceInterface();
3881 const auto& vkd = m_context.getDeviceInterface();
3882 const auto physicalDevice = m_context.getPhysicalDevice();
3883 const auto device = m_context.getDevice();
3884 auto& alloc = m_context.getDefaultAllocator();
3885 const auto queueIndex = m_context.getUniversalQueueFamilyIndex();
3886 const auto queue = m_context.getUniversalQueue();
3887 auto& log = m_context.getTestContext().getLog();
3888
3889 const auto axis = MaxWorkGroupSizeTest::getIndex(m_params.axis);
3890 const auto invocations = MaxWorkGroupSizeTest::getInvocations(m_params, vki, physicalDevice);
3891 const auto ssboSize = static_cast<vk::VkDeviceSize>(MaxWorkGroupSizeTest::getSSBOSize(invocations));
3892
3893 log
3894 << tcu::TestLog::Message
3895 << "Running test with " << invocations << " invocations on axis " << axis << " using a storage buffer size of " << ssboSize << " bytes"
3896 << tcu::TestLog::EndMessage
3897 ;
3898
3899 // Main SSBO buffer.
3900 const auto ssboInfo = vk::makeBufferCreateInfo(ssboSize, vk::VK_BUFFER_USAGE_STORAGE_BUFFER_BIT);
3901 vk::BufferWithMemory ssbo (vkd, device, alloc, ssboInfo, vk::MemoryRequirement::HostVisible);
3902
3903 // Descriptor set layouts.
3904 vk::DescriptorSetLayoutBuilder layoutBuilder;
3905 layoutBuilder.addSingleBinding(vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, vk::VK_SHADER_STAGE_COMPUTE_BIT);
3906 const auto descriptorSetLayout = layoutBuilder.build(vkd, device);
3907
3908 // Specialization constants: set the number of invocations in the appropriate local size id.
3909 const auto entrySize = static_cast<deUintptr>(sizeof(deInt32));
3910 deInt32 specializationData[3] = { 1, 1, 1 };
3911 specializationData[axis] = static_cast<deInt32>(invocations);
3912
3913 const vk::VkSpecializationMapEntry specializationMaps[3] =
3914 {
3915 {
3916 0u, // deUint32 constantID;
3917 0u, // deUint32 offset;
3918 entrySize, // deUintptr size;
3919 },
3920 {
3921 1u, // deUint32 constantID;
3922 static_cast<deUint32>(entrySize), // deUint32 offset;
3923 entrySize, // deUintptr size;
3924 },
3925 {
3926 2u, // deUint32 constantID;
3927 static_cast<deUint32>(entrySize * 2u), // deUint32 offset;
3928 entrySize, // deUintptr size;
3929 },
3930 };
3931
3932 const vk::VkSpecializationInfo specializationInfo =
3933 {
3934 3u, // deUint32 mapEntryCount;
3935 specializationMaps, // const VkSpecializationMapEntry* pMapEntries;
3936 static_cast<deUintptr>(sizeof(specializationData)), // deUintptr dataSize;
3937 specializationData, // const void* pData;
3938 };
3939
3940 ComputePipelineWrapper testPipeline (vkd, device, m_computePipelineConstructionType, m_context.getBinaryCollection().get("comp"));
3941 testPipeline.setDescriptorSetLayout(descriptorSetLayout.get());
3942 testPipeline.setSpecializationInfo(specializationInfo);
3943 testPipeline.buildPipeline();
3944
3945 // Create descriptor pool and set.
3946 vk::DescriptorPoolBuilder poolBuilder;
3947 poolBuilder.addType(vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
3948 const auto descriptorPool = poolBuilder.build(vkd, device, vk::VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
3949 const auto descriptorSet = vk::makeDescriptorSet(vkd, device, descriptorPool.get(), descriptorSetLayout.get());
3950
3951 // Update descriptor set.
3952 const vk::VkDescriptorBufferInfo ssboBufferInfo =
3953 {
3954 ssbo.get(), // VkBuffer buffer;
3955 0u, // VkDeviceSize offset;
3956 VK_WHOLE_SIZE, // VkDeviceSize range;
3957 };
3958
3959 vk::DescriptorSetUpdateBuilder updateBuilder;
3960 updateBuilder.writeSingle(descriptorSet.get(), vk::DescriptorSetUpdateBuilder::Location::binding(0u), vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &ssboBufferInfo);
3961 updateBuilder.update(vkd, device);
3962
3963 // Clear buffer.
3964 auto& ssboAlloc = ssbo.getAllocation();
3965 void* ssboPtr = ssboAlloc.getHostPtr();
3966 deMemset(ssboPtr, 0, static_cast<size_t>(ssboSize));
3967 vk::flushAlloc(vkd, device, ssboAlloc);
3968
3969 // Run pipelines.
3970 const auto cmdPool = vk::makeCommandPool(vkd, device, queueIndex);
3971 const auto cmdBUfferPtr = vk::allocateCommandBuffer(vkd, device, cmdPool.get(), vk::VK_COMMAND_BUFFER_LEVEL_PRIMARY);
3972 const auto cmdBuffer = cmdBUfferPtr.get();
3973
3974 vk::beginCommandBuffer(vkd, cmdBuffer);
3975
3976 // Run the main test shader.
3977 const auto hostToComputeBarrier = vk::makeBufferMemoryBarrier(vk::VK_ACCESS_HOST_WRITE_BIT, vk::VK_ACCESS_SHADER_WRITE_BIT, ssbo.get(), 0ull, VK_WHOLE_SIZE);
3978 vkd.cmdPipelineBarrier(cmdBuffer, vk::VK_PIPELINE_STAGE_HOST_BIT, vk::VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0u, 0u, nullptr, 1u, &hostToComputeBarrier, 0u, nullptr);
3979
3980 testPipeline.bind(cmdBuffer);
3981 vkd.cmdBindDescriptorSets(cmdBuffer, vk::VK_PIPELINE_BIND_POINT_COMPUTE, testPipeline.getPipelineLayout(), 0u, 1u, &descriptorSet.get(), 0u, nullptr);
3982 vkd.cmdDispatch(cmdBuffer, 1u, 1u, 1u);
3983
3984 const auto computeToHostBarrier = vk::makeBufferMemoryBarrier(vk::VK_ACCESS_SHADER_WRITE_BIT, vk::VK_ACCESS_HOST_READ_BIT, ssbo.get(), 0ull, VK_WHOLE_SIZE);
3985 vkd.cmdPipelineBarrier(cmdBuffer, vk::VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, vk::VK_PIPELINE_STAGE_HOST_BIT, 0u, 0u, nullptr, 1u, &computeToHostBarrier, 0u, nullptr);
3986
3987 vk::endCommandBuffer(vkd, cmdBuffer);
3988 vk::submitCommandsAndWait(vkd, device, queue, cmdBuffer);
3989
3990 // Verify buffer contents.
3991 vk::invalidateAlloc(vkd, device, ssboAlloc);
3992 std::unique_ptr<deUint32[]> valuesArray (new deUint32[invocations]);
3993 deUint32* valuesPtr = valuesArray.get();
3994 deMemcpy(valuesPtr, ssboPtr, static_cast<size_t>(ssboSize));
3995
3996 std::string errorMsg;
3997 bool ok = true;
3998
3999 for (size_t i = 0; i < invocations; ++i)
4000 {
4001 if (valuesPtr[i] != 1u)
4002 {
4003 ok = false;
4004 errorMsg = "Found invalid value for invocation index " + de::toString(i) + ": expected 1u and found " + de::toString(valuesPtr[i]);
4005 break;
4006 }
4007 }
4008
4009 if (!ok)
4010 return tcu::TestStatus::fail(errorMsg);
4011 return tcu::TestStatus::pass("Pass");
4012 }
4013
4014 namespace EmptyShaderTest
4015 {
4016
checkSupport(Context & context,vk::ComputePipelineConstructionType computePipelineConstructionType)4017 void checkSupport (Context& context, vk::ComputePipelineConstructionType computePipelineConstructionType)
4018 {
4019 checkShaderObjectRequirements(context.getInstanceInterface(), context.getPhysicalDevice(), computePipelineConstructionType);
4020 }
4021
createProgram(SourceCollections & dst,vk::ComputePipelineConstructionType)4022 void createProgram (SourceCollections& dst, vk::ComputePipelineConstructionType)
4023 {
4024 dst.glslSources.add("comp") << glu::ComputeSource(
4025 "#version 310 es\n"
4026 "layout (local_size_x = 1) in;\n"
4027 "void main (void) {}\n"
4028 );
4029 }
4030
createTest(Context & context,vk::ComputePipelineConstructionType computePipelineConstructionType)4031 tcu::TestStatus createTest (Context& context, vk::ComputePipelineConstructionType computePipelineConstructionType)
4032 {
4033 const DeviceInterface& vk = context.getDeviceInterface();
4034 const VkDevice device = context.getDevice();
4035 const VkQueue queue = context.getUniversalQueue();
4036 const deUint32 queueFamilyIndex = context.getUniversalQueueFamilyIndex();
4037
4038 ComputePipelineWrapper pipeline (vk, device, computePipelineConstructionType, context.getBinaryCollection().get("comp"));
4039 pipeline.buildPipeline();
4040
4041 const Unique<VkCommandPool> cmdPool (makeCommandPool(vk, device, queueFamilyIndex));
4042 const Unique<VkCommandBuffer> cmdBuffer (allocateCommandBuffer(vk, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
4043
4044 // Start recording commands
4045
4046 beginCommandBuffer(vk, *cmdBuffer);
4047
4048 pipeline.bind(*cmdBuffer);
4049
4050 const tcu::IVec3 workGroups(1, 1, 1);
4051 vk.cmdDispatch(*cmdBuffer, workGroups.x(), workGroups.y(), workGroups.z());
4052
4053 endCommandBuffer(vk, *cmdBuffer);
4054
4055 submitCommandsAndWait(vk, device, queue, *cmdBuffer);
4056
4057 return tcu::TestStatus::pass("Compute succeeded");
4058 }
4059
4060 } // EmptyShaderTest ns
4061
4062 namespace ComputeOnlyQueueTests
4063 {
4064
getComputeOnlyQueueFamily(Context & context)4065 tcu::Maybe<uint32_t> getComputeOnlyQueueFamily(Context& context)
4066 {
4067 bool foundQueue = false;
4068 uint32_t index = 0;
4069
4070 auto queueFamilies = getPhysicalDeviceQueueFamilyProperties(context.getInstanceInterface(), context.getPhysicalDevice());
4071
4072 for (const auto &queueFamily: queueFamilies)
4073 {
4074 if ((queueFamily.queueFlags & VK_QUEUE_COMPUTE_BIT) &&
4075 !(queueFamily.queueFlags & VK_QUEUE_GRAPHICS_BIT))
4076 {
4077 foundQueue = true;
4078 break;
4079 } else {
4080 index++;
4081 }
4082 }
4083 if (!foundQueue)
4084 {
4085 return tcu::Maybe<uint32_t>();
4086 } else {
4087 return index;
4088 }
4089 }
4090
4091 // Creates a device that has a queue for compute capabilities without graphics.
createComputeOnlyDevice(Context & context,uint32_t & queueFamilyIndex)4092 Move<VkDevice> createComputeOnlyDevice(Context& context, uint32_t& queueFamilyIndex)
4093 {
4094 const auto& instanceDriver = context.getInstanceInterface();
4095 const auto physicalDevice = context.getPhysicalDevice();
4096 const auto queueFamilies = getPhysicalDeviceQueueFamilyProperties(instanceDriver, physicalDevice);
4097
4098 // One queue family without a graphics bit should be found, since this is checked in checkSupport.
4099 queueFamilyIndex = getComputeOnlyQueueFamily(context).get();
4100
4101 const float queuePriority = 1.0f;
4102 const VkDeviceQueueCreateInfo deviceQueueCreateInfos = {
4103 VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO, // VkStructureType sType;
4104 nullptr, // const void* pNext;
4105 (VkDeviceQueueCreateFlags)0u, // VkDeviceQueueCreateFlags flags;
4106 queueFamilyIndex, // uint32_t queueFamilyIndex;
4107 1u, // uint32_t queueCount;
4108 &queuePriority, // const float* pQueuePriorities;
4109 };
4110
4111 void* pNext = nullptr;
4112 #ifdef CTS_USES_VULKANSC
4113 VkDeviceObjectReservationCreateInfo memReservationInfo =
4114 context.getTestContext().getCommandLine().isSubProcess() ? context.getResourceInterface()->getStatMax() : resetDeviceObjectReservationCreateInfo();
4115 pNext = &memReservationInfo;
4116
4117 VkPipelineCacheCreateInfo pcCI;
4118 std::vector<VkPipelinePoolSize> poolSizes;
4119 if (context.getTestContext().getCommandLine().isSubProcess())
4120 {
4121 if (context.getResourceInterface()->getCacheDataSize() > 0)
4122 {
4123 pcCI =
4124 {
4125 VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO, // VkStructureType sType;
4126 nullptr, // const void* pNext;
4127 VK_PIPELINE_CACHE_CREATE_READ_ONLY_BIT |
4128 VK_PIPELINE_CACHE_CREATE_USE_APPLICATION_STORAGE_BIT, // VkPipelineCacheCreateFlags flags;
4129 context.getResourceInterface()->getCacheDataSize(), // deUintptr initialDataSize;
4130 context.getResourceInterface()->getCacheData() // const void* pInitialData;
4131 };
4132 memReservationInfo.pipelineCacheCreateInfoCount = 1;
4133 memReservationInfo.pPipelineCacheCreateInfos = &pcCI;
4134 }
4135 poolSizes = context.getResourceInterface()->getPipelinePoolSizes();
4136 if (!poolSizes.empty())
4137 {
4138 memReservationInfo.pipelinePoolSizeCount = deUint32(poolSizes.size());
4139 memReservationInfo.pPipelinePoolSizes = poolSizes.data();
4140 }
4141 }
4142 #endif // CTS_USES_VULKANSC
4143 const VkDeviceCreateInfo deviceCreateInfo =
4144 {
4145 VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO, // VkStructureType sType;
4146 pNext, // const void* pNext;
4147 (VkDeviceCreateFlags)0u, // VkDeviceCreateFlags flags;
4148 1, // uint32_t queueCreateInfoCount;
4149 &deviceQueueCreateInfos, // const VkDeviceQueueCreateInfo* pQueueCreateInfos;
4150 0u, // uint32_t enabledLayerCount;
4151 nullptr, // const char* const* ppEnabledLayerNames;
4152 0, // uint32_t enabledExtensionCount;
4153 nullptr, // const char* const* ppEnabledExtensionNames;
4154 nullptr, // const VkPhysicalDeviceFeatures* pEnabledFeatures;
4155 };
4156
4157 return vkt::createCustomDevice(context.getTestContext().getCommandLine().isValidationEnabled(),
4158 context.getPlatformInterface(),
4159 context.getInstance(),
4160 instanceDriver, physicalDevice, &deviceCreateInfo);
4161 }
4162
4163 class SecondaryCommandBufferComputeOnlyTest : public vkt::TestCase {
4164 public:
SecondaryCommandBufferComputeOnlyTest(tcu::TestContext & context,const std::string & name)4165 SecondaryCommandBufferComputeOnlyTest(tcu::TestContext& context, const std::string& name)
4166 : vkt::TestCase(context, name)
4167 {};
4168
4169 void initPrograms (SourceCollections& programCollection) const override;
4170 TestInstance* createInstance (Context& context) const override;
4171 void checkSupport (Context& context) const override;
4172 };
4173
4174 class SecondaryCommandBufferComputeOnlyTestInstance : public vkt::TestInstance {
4175 public:
SecondaryCommandBufferComputeOnlyTestInstance(Context & context)4176 SecondaryCommandBufferComputeOnlyTestInstance(Context& context) : vkt::TestInstance(context)
4177 { };
4178 virtual tcu::TestStatus iterate(void);
4179 };
4180
initPrograms(SourceCollections & collection) const4181 void SecondaryCommandBufferComputeOnlyTest::initPrograms(SourceCollections& collection) const {
4182 {
4183 std::ostringstream src;
4184 src << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450) << "\n"
4185 << "layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
4186 << "layout(set = 0, binding = 0, std430) buffer Out\n"
4187 << "{\n"
4188 << " uint data[];\n"
4189 << "};\n"
4190 << "void main (void)\n"
4191 << "{\n"
4192 << "data[0] = 1;"
4193 << "}\n";
4194 collection.glslSources.add("comp") << glu::ComputeSource(src.str());
4195 }
4196 }
4197
4198
createInstance(Context & context) const4199 TestInstance* SecondaryCommandBufferComputeOnlyTest::createInstance(Context& context) const {
4200 return new SecondaryCommandBufferComputeOnlyTestInstance(context);
4201 }
4202
checkSupport(Context & context) const4203 void SecondaryCommandBufferComputeOnlyTest::checkSupport(Context& context) const {
4204 // Find at least one queue family that supports compute queue but does NOT support graphics queue.
4205 if (!getComputeOnlyQueueFamily(context))
4206 TCU_THROW(NotSupportedError, "No queue family found that only supports compute queue.");
4207 }
4208
iterate()4209 tcu::TestStatus SecondaryCommandBufferComputeOnlyTestInstance::iterate()
4210 {
4211 const InstanceInterface& vki = m_context.getInstanceInterface();
4212 #ifdef CTS_USES_VULKANSC
4213 de::MovePtr<DeviceDriverSC, DeinitDeviceDeleter> deviceDriver;
4214 #else
4215 de::MovePtr<DeviceDriver> deviceDriver;
4216 #endif // CTS_USES_VULKANSC
4217 VkDevice device;
4218 uint32_t queueFamilyIndex;
4219 auto customDevice = createComputeOnlyDevice(m_context, queueFamilyIndex);
4220 device = customDevice.get();
4221 #ifndef CTS_USES_VULKANSC
4222 deviceDriver = de::MovePtr<DeviceDriver>(new DeviceDriver(m_context.getPlatformInterface(), m_context.getInstance(), device, m_context.getUsedApiVersion()));
4223 #else
4224 deviceDriver = de::MovePtr<DeviceDriverSC, DeinitDeviceDeleter>(new DeviceDriverSC(m_context.getPlatformInterface(), m_context.getInstance(), device,
4225 m_context.getTestContext().getCommandLine(), m_context.getResourceInterface(), m_context.getDeviceVulkanSC10Properties(),
4226 m_context.getDeviceProperties(), m_context.getUsedApiVersion()), DeinitDeviceDeleter(m_context.getResourceInterface().get(), device));
4227 #endif // CTS_USES_VULKANSC
4228
4229 const DeviceInterface& vkdi = *deviceDriver;
4230
4231 auto queue = getDeviceQueue(vkdi, device, queueFamilyIndex, 0u);
4232 auto allocator = de::MovePtr<Allocator>(new SimpleAllocator(vkdi, device, getPhysicalDeviceMemoryProperties(vki, m_context.getPhysicalDevice())));
4233
4234 const auto bufferSize = static_cast<VkDeviceSize>(sizeof(uint32_t));
4235 BufferWithMemory buffer (vkdi, device, *allocator.get(), makeBufferCreateInfo(bufferSize, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT), MemoryRequirement::HostVisible);
4236 auto& bufferAlloc = buffer.getAllocation();
4237 void* bufferData = bufferAlloc.getHostPtr();
4238 deMemset(bufferData, 0, sizeof(uint32_t));
4239 flushAlloc(vkdi, device, bufferAlloc);
4240
4241 DescriptorSetLayoutBuilder layoutBuilder;
4242 layoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT);
4243 Unique<VkDescriptorSetLayout> descriptorSetLayout(layoutBuilder.build(vkdi, device));
4244
4245 DescriptorPoolBuilder poolBuilder;
4246 poolBuilder.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
4247 const auto descriptorPool = poolBuilder.build(vkdi, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1);
4248 const auto descriptorSetBuffer = makeDescriptorSet(vkdi, device, descriptorPool.get(), descriptorSetLayout.get());
4249
4250 // Update descriptor sets.
4251 DescriptorSetUpdateBuilder updater;
4252
4253 const auto bufferInfo = makeDescriptorBufferInfo(buffer.get(), 0ull, bufferSize);
4254 updater.writeSingle(descriptorSetBuffer.get(), DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &bufferInfo);
4255
4256 updater.update(vkdi, device);
4257
4258 auto shader = createShaderModule(vkdi, device, m_context.getBinaryCollection().get("comp"));
4259 // Create compute pipeline
4260 const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(vkdi, device, *descriptorSetLayout));
4261 const Unique<VkPipeline> computePipeline(makeComputePipeline(vkdi, device, *pipelineLayout, *shader));
4262
4263 // Create command buffer
4264 const Unique<VkCommandPool> cmdPool(makeCommandPool(vkdi, device, queueFamilyIndex));
4265 const Unique<VkCommandBuffer> cmdBuffer(allocateCommandBuffer(vkdi, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
4266 const Unique<VkCommandBuffer> cmdBuffer2(allocateCommandBuffer(vkdi, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_SECONDARY));
4267
4268 const VkCommandBufferInheritanceInfo bufferInheritanceInfo
4269 {
4270 VK_STRUCTURE_TYPE_COMMAND_BUFFER_INHERITANCE_INFO, // VkStructureType sType;
4271 nullptr, // const void* pNext;
4272 VK_NULL_HANDLE, // VkRenderPass renderPass;
4273 0u, // deUint32 subpass;
4274 VK_NULL_HANDLE, // VkFramebuffer framebuffer;
4275 VK_FALSE, // VkBool32 occlusionQueryEnable;
4276 (VkQueryControlFlags)0u, // VkQueryControlFlags queryFlags;
4277 (VkQueryPipelineStatisticFlags)0u // VkQueryPipelineStatisticFlags pipelineStatistics;
4278 };
4279
4280 VkCommandBufferUsageFlags usageFlags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
4281 const VkCommandBufferBeginInfo commandBufBeginParams
4282 {
4283 VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, // VkStructureType sType;
4284 nullptr, // const void* pNext;
4285 usageFlags, // VkCommandBufferUsageFlags flags;
4286 &bufferInheritanceInfo
4287 };
4288
4289 beginCommandBuffer(vkdi, cmdBuffer.get());
4290 vkdi.beginCommandBuffer(cmdBuffer2.get(), &commandBufBeginParams);
4291 vkdi.cmdBindPipeline(cmdBuffer2.get(), VK_PIPELINE_BIND_POINT_COMPUTE, computePipeline.get());
4292 vkdi.cmdBindDescriptorSets(cmdBuffer2.get(), VK_PIPELINE_BIND_POINT_COMPUTE, pipelineLayout.get(), 0u, 1, &descriptorSetBuffer.get(), 0u, nullptr);
4293 vkdi.cmdDispatch(cmdBuffer2.get(), 1, 1, 1);
4294 endCommandBuffer(vkdi, cmdBuffer2.get());
4295 vkdi.cmdExecuteCommands(cmdBuffer.get(), 1, &cmdBuffer2.get());
4296 const VkBufferMemoryBarrier renderBufferBarrier = makeBufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, buffer.get(), 0ull, bufferSize);
4297 cmdPipelineBufferMemoryBarrier(vkdi, cmdBuffer.get(), VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT, &renderBufferBarrier);
4298 endCommandBuffer(vkdi, cmdBuffer.get());
4299 submitCommandsAndWait(vkdi, device, queue, cmdBuffer.get());
4300
4301 invalidateAlloc(vkdi, device, bufferAlloc);
4302
4303 uint32_t result = 0;
4304 deMemcpy(&result, bufferData, sizeof(uint32_t));
4305 if (result != 1)
4306 {
4307 return tcu::TestStatus::pass("value of buffer unexpected");
4308 }
4309
4310 return tcu::TestStatus::pass("passed");
4311 }
4312
4313 };
4314
4315 } // anonymous
4316
createFunctionCaseWithPrograms2(tcu::TestContext & testCtx,tcu::TestNodeType type,const std::string & name,FunctionSupport0::Function checkSupport,FunctionPrograms0::Function initPrograms,FunctionInstance0::Function testFunction)4317 inline TestCase* createFunctionCaseWithPrograms2 (tcu::TestContext& testCtx,
4318 tcu::TestNodeType type,
4319 const std::string& name,
4320 FunctionSupport0::Function checkSupport,
4321 FunctionPrograms0::Function initPrograms,
4322 FunctionInstance0::Function testFunction)
4323 {
4324 return new InstanceFactory1WithSupport<FunctionInstance0, FunctionInstance0::Function, FunctionSupport0, FunctionPrograms0>(
4325 testCtx, type, name,FunctionPrograms0(initPrograms), testFunction, checkSupport);
4326 }
createBasicComputeShaderTests(tcu::TestContext & testCtx,vk::ComputePipelineConstructionType computePipelineConstructionType)4327 tcu::TestCaseGroup* createBasicComputeShaderTests (tcu::TestContext& testCtx, vk::ComputePipelineConstructionType computePipelineConstructionType)
4328 {
4329 // Basic compute tests
4330 de::MovePtr<tcu::TestCaseGroup> basicComputeTests(new tcu::TestCaseGroup(testCtx, "basic"));
4331
4332 // Shader that does nothing
4333 addFunctionCaseWithPrograms(basicComputeTests.get(), "empty_shader", EmptyShaderTest::checkSupport, EmptyShaderTest::createProgram, EmptyShaderTest::createTest, computePipelineConstructionType);
4334
4335 // Concurrent compute test
4336 basicComputeTests->addChild(new ConcurrentCompute(testCtx, "concurrent_compute", computePipelineConstructionType));
4337
4338 // Use an empty workgroup with size 0 on the X axis
4339 basicComputeTests->addChild(new EmptyWorkGroupCase(testCtx, "empty_workgroup_x", tcu::UVec3(0u, 2u, 3u), computePipelineConstructionType));
4340 // Use an empty workgroup with size 0 on the Y axis
4341 basicComputeTests->addChild(new EmptyWorkGroupCase(testCtx, "empty_workgroup_y", tcu::UVec3(2u, 0u, 3u), computePipelineConstructionType));
4342 // Use an empty workgroup with size 0 on the Z axis
4343 basicComputeTests->addChild(new EmptyWorkGroupCase(testCtx, "empty_workgroup_z", tcu::UVec3(2u, 3u, 0u), computePipelineConstructionType));
4344 // Use an empty workgroup with size 0 on the X, Y and Z axes
4345 basicComputeTests->addChild(new EmptyWorkGroupCase(testCtx, "empty_workgroup_all", tcu::UVec3(0u, 0u, 0u), computePipelineConstructionType));
4346
4347 // Use the maximum work group size on the X axis
4348 basicComputeTests->addChild(new MaxWorkGroupSizeTest(testCtx, "max_local_size_x", MaxWorkGroupSizeTest::Params{MaxWorkGroupSizeTest::Axis::X}, computePipelineConstructionType));
4349 // Use the maximum work group size on the Y axis
4350 basicComputeTests->addChild(new MaxWorkGroupSizeTest(testCtx, "max_local_size_y", MaxWorkGroupSizeTest::Params{MaxWorkGroupSizeTest::Axis::Y}, computePipelineConstructionType));
4351 // Use the maximum work group size on the Z axis
4352 basicComputeTests->addChild(new MaxWorkGroupSizeTest(testCtx, "max_local_size_z", MaxWorkGroupSizeTest::Params{MaxWorkGroupSizeTest::Axis::Z}, computePipelineConstructionType));
4353
4354 // Concurrent compute test
4355 basicComputeTests->addChild(BufferToBufferInvertTest::UBOToSSBOInvertCase(testCtx, "ubo_to_ssbo_single_invocation", 256, tcu::IVec3(1,1,1), tcu::IVec3(1,1,1), computePipelineConstructionType));
4356 basicComputeTests->addChild(BufferToBufferInvertTest::UBOToSSBOInvertCase(testCtx, "ubo_to_ssbo_single_group", 1024, tcu::IVec3(2,1,4), tcu::IVec3(1,1,1), computePipelineConstructionType));
4357 basicComputeTests->addChild(BufferToBufferInvertTest::UBOToSSBOInvertCase(testCtx, "ubo_to_ssbo_multiple_invocations", 1024, tcu::IVec3(1,1,1), tcu::IVec3(2,4,1), computePipelineConstructionType));
4358 basicComputeTests->addChild(BufferToBufferInvertTest::UBOToSSBOInvertCase(testCtx, "ubo_to_ssbo_multiple_groups", 1024, tcu::IVec3(1,4,2), tcu::IVec3(2,2,4), computePipelineConstructionType));
4359
4360 // Concurrent compute test
4361 basicComputeTests->addChild(BufferToBufferInvertTest::CopyInvertSSBOCase(testCtx, "copy_ssbo_single_invocation", 256, tcu::IVec3(1,1,1), tcu::IVec3(1,1,1), computePipelineConstructionType));
4362 basicComputeTests->addChild(BufferToBufferInvertTest::CopyInvertSSBOCase(testCtx, "copy_ssbo_multiple_invocations", 1024, tcu::IVec3(1,1,1), tcu::IVec3(2,4,1), computePipelineConstructionType));
4363 basicComputeTests->addChild(BufferToBufferInvertTest::CopyInvertSSBOCase(testCtx, "copy_ssbo_multiple_groups", 1024, tcu::IVec3(1,4,2), tcu::IVec3(2,2,4), computePipelineConstructionType));
4364
4365 // Read and write same SSBO
4366 basicComputeTests->addChild(new InvertSSBOInPlaceTest(testCtx, "ssbo_rw_single_invocation", 256, true, tcu::IVec3(1,1,1), tcu::IVec3(1,1,1), computePipelineConstructionType));
4367 basicComputeTests->addChild(new InvertSSBOInPlaceTest(testCtx, "ssbo_rw_multiple_groups", 1024, true, tcu::IVec3(1,4,2), tcu::IVec3(2,2,4), computePipelineConstructionType));
4368 basicComputeTests->addChild(new InvertSSBOInPlaceTest(testCtx, "ssbo_unsized_arr_single_invocation", 256, false, tcu::IVec3(1,1,1), tcu::IVec3(1,1,1), computePipelineConstructionType));
4369 basicComputeTests->addChild(new InvertSSBOInPlaceTest(testCtx, "ssbo_unsized_arr_multiple_groups", 1024, false, tcu::IVec3(1,4,2), tcu::IVec3(2,2,4), computePipelineConstructionType));
4370
4371 // Write to multiple SSBOs
4372 basicComputeTests->addChild(new WriteToMultipleSSBOTest(testCtx, "write_multiple_arr_single_invocation", 256, true, tcu::IVec3(1,1,1), tcu::IVec3(1,1,1), computePipelineConstructionType));
4373 basicComputeTests->addChild(new WriteToMultipleSSBOTest(testCtx, "write_multiple_arr_multiple_groups", 1024, true, tcu::IVec3(1,4,2), tcu::IVec3(2,2,4), computePipelineConstructionType));
4374 basicComputeTests->addChild(new WriteToMultipleSSBOTest(testCtx, "write_multiple_unsized_arr_single_invocation", 256, false, tcu::IVec3(1,1,1), tcu::IVec3(1,1,1), computePipelineConstructionType));
4375 basicComputeTests->addChild(new WriteToMultipleSSBOTest(testCtx, "write_multiple_unsized_arr_multiple_groups", 1024, false, tcu::IVec3(1,4,2), tcu::IVec3(2,2,4), computePipelineConstructionType));
4376
4377 // SSBO local barrier usage
4378 basicComputeTests->addChild(new SSBOLocalBarrierTest(testCtx, "ssbo_local_barrier_single_invocation", tcu::IVec3(1,1,1), tcu::IVec3(1,1,1), computePipelineConstructionType));
4379 basicComputeTests->addChild(new SSBOLocalBarrierTest(testCtx, "ssbo_local_barrier_single_group", tcu::IVec3(3,2,5), tcu::IVec3(1,1,1), computePipelineConstructionType));
4380 basicComputeTests->addChild(new SSBOLocalBarrierTest(testCtx, "ssbo_local_barrier_multiple_groups", tcu::IVec3(3,4,1), tcu::IVec3(2,7,3), computePipelineConstructionType));
4381
4382 // SSBO memory barrier usage
4383 basicComputeTests->addChild(new SSBOBarrierTest(testCtx, "ssbo_cmd_barrier_single", tcu::IVec3(1,1,1), computePipelineConstructionType));
4384 basicComputeTests->addChild(new SSBOBarrierTest(testCtx, "ssbo_cmd_barrier_multiple", tcu::IVec3(11,5,7), computePipelineConstructionType));
4385
4386 // Basic shared variable usage
4387 basicComputeTests->addChild(new SharedVarTest(testCtx, "shared_var_single_invocation", tcu::IVec3(1,1,1), tcu::IVec3(1,1,1), computePipelineConstructionType));
4388 basicComputeTests->addChild(new SharedVarTest(testCtx, "shared_var_single_group", tcu::IVec3(3,2,5), tcu::IVec3(1,1,1), computePipelineConstructionType));
4389 basicComputeTests->addChild(new SharedVarTest(testCtx, "shared_var_multiple_invocations", tcu::IVec3(1,1,1), tcu::IVec3(2,5,4), computePipelineConstructionType));
4390 basicComputeTests->addChild(new SharedVarTest(testCtx, "shared_var_multiple_groups", tcu::IVec3(3,4,1), tcu::IVec3(2,7,3), computePipelineConstructionType));
4391
4392 // Atomic operation with shared var
4393 basicComputeTests->addChild(new SharedVarAtomicOpTest(testCtx, "shared_atomic_op_single_invocation", tcu::IVec3(1,1,1), tcu::IVec3(1,1,1), computePipelineConstructionType));
4394 basicComputeTests->addChild(new SharedVarAtomicOpTest(testCtx, "shared_atomic_op_single_group", tcu::IVec3(3,2,5), tcu::IVec3(1,1,1), computePipelineConstructionType));
4395 basicComputeTests->addChild(new SharedVarAtomicOpTest(testCtx, "shared_atomic_op_multiple_invocations", tcu::IVec3(1,1,1), tcu::IVec3(2,5,4), computePipelineConstructionType));
4396 basicComputeTests->addChild(new SharedVarAtomicOpTest(testCtx, "shared_atomic_op_multiple_groups", tcu::IVec3(3,4,1), tcu::IVec3(2,7,3), computePipelineConstructionType));
4397
4398 // Image to SSBO copy
4399 basicComputeTests->addChild(new CopyImageToSSBOTest(testCtx, "copy_image_to_ssbo_small", tcu::IVec2(1,1), tcu::IVec2(64,64), computePipelineConstructionType));
4400 basicComputeTests->addChild(new CopyImageToSSBOTest(testCtx, "copy_image_to_ssbo_large", tcu::IVec2(2,4), tcu::IVec2(512,512), computePipelineConstructionType));
4401
4402 // SSBO to image copy
4403 basicComputeTests->addChild(new CopySSBOToImageTest(testCtx, "copy_ssbo_to_image_small", tcu::IVec2(1, 1), tcu::IVec2(64, 64), computePipelineConstructionType));
4404 basicComputeTests->addChild(new CopySSBOToImageTest(testCtx, "copy_ssbo_to_image_large", tcu::IVec2(2, 4), tcu::IVec2(512, 512), computePipelineConstructionType));
4405
4406 // Atomic operation with image
4407 basicComputeTests->addChild(new ImageAtomicOpTest(testCtx, "image_atomic_op_local_size_1", 1, tcu::IVec2(64,64), computePipelineConstructionType));
4408 basicComputeTests->addChild(new ImageAtomicOpTest(testCtx, "image_atomic_op_local_size_8", 8, tcu::IVec2(64,64), computePipelineConstructionType));
4409
4410 // Image barrier
4411 basicComputeTests->addChild(new ImageBarrierTest(testCtx, "image_barrier_single", tcu::IVec2(1,1), computePipelineConstructionType));
4412 basicComputeTests->addChild(new ImageBarrierTest(testCtx, "image_barrier_multiple", tcu::IVec2(64,64), computePipelineConstructionType));
4413
4414 // Test secondary command buffers in compute only queues
4415 basicComputeTests->addChild(new ComputeOnlyQueueTests::SecondaryCommandBufferComputeOnlyTest(testCtx, "secondary_compute_only_queue"));
4416
4417 #ifndef CTS_USES_VULKANSC
4418 basicComputeTests->addChild(cts_amber::createAmberTestCase(testCtx, "write_ssbo_array", "", "compute", "write_ssbo_array.amber"));
4419 basicComputeTests->addChild(cts_amber::createAmberTestCase(testCtx, "branch_past_barrier", "", "compute", "branch_past_barrier.amber"));
4420 basicComputeTests->addChild(cts_amber::createAmberTestCase(testCtx,"webgl_spirv_loop", "Simple SPIR-V loop from a WebGL example that caused problems in some implementations", "compute", "webgl_spirv_loop.amber"));
4421 #endif
4422
4423 return basicComputeTests.release();
4424 }
4425
createBasicDeviceGroupComputeShaderTests(tcu::TestContext & testCtx,vk::ComputePipelineConstructionType computePipelineConstructionType)4426 tcu::TestCaseGroup* createBasicDeviceGroupComputeShaderTests (tcu::TestContext& testCtx, vk::ComputePipelineConstructionType computePipelineConstructionType)
4427 {
4428 de::MovePtr<tcu::TestCaseGroup> deviceGroupComputeTests(new tcu::TestCaseGroup(testCtx, "device_group"));
4429
4430 deviceGroupComputeTests->addChild(new DispatchBaseTest(testCtx, "dispatch_base", 32768, tcu::IVec3(4,2,4), tcu::IVec3(16,8,8), tcu::IVec3(4,8,8), computePipelineConstructionType, false));
4431 #ifndef CTS_USES_VULKANSC
4432 deviceGroupComputeTests->addChild(new DispatchBaseTest(testCtx, "dispatch_base_maintenance5", 32768, tcu::IVec3(4, 2, 4), tcu::IVec3(16, 8, 8), tcu::IVec3(4, 8, 8), computePipelineConstructionType, true));
4433 #endif
4434 deviceGroupComputeTests->addChild(new DeviceIndexTest(testCtx, "device_index", 96, tcu::IVec3(3,2,1), tcu::IVec3(2,4,1), computePipelineConstructionType));
4435
4436 return deviceGroupComputeTests.release();
4437
4438 }
4439 } // compute
4440 } // vkt
4441