1 /*------------------------------------------------------------------------
2 * Vulkan Conformance Tests
3 * ------------------------
4 *
5 * Copyright (c) 2021 Google LLC.
6 *
7 *
8 * Licensed under the Apache License, Version 2.0 (the "License");
9 * you may not use this file except in compliance with the License.
10 * You may obtain a copy of the License at
11 *
12 * http://www.apache.org/licenses/LICENSE-2.0
13 *
14 * Unless required by applicable law or agreed to in writing, software
15 * distributed under the License is distributed on an "AS IS" BASIS,
16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 * See the License for the specific language governing permissions and
18 * limitations under the License.
19 *
20 *//*!
21 * \file
22 * \brief Tests that compute shaders have a subgroup size that is uniform in
23 * command scope.
24 *//*--------------------------------------------------------------------*/
25
26 #include "deUniquePtr.hpp"
27
28 #include "vkRef.hpp"
29 #include "vkRefUtil.hpp"
30 #include "vkPrograms.hpp"
31 #include "vkMemUtil.hpp"
32 #include "vkBuilderUtil.hpp"
33 #include "vkCmdUtil.hpp"
34 #include "vkObjUtil.hpp"
35 #include "vkTypeUtil.hpp"
36 #include "vkImageWithMemory.hpp"
37 #include "vkBarrierUtil.hpp"
38
39 #include "vktTestCaseUtil.hpp"
40
41 using namespace vk;
42
43 namespace vkt
44 {
45 namespace subgroups
46 {
47 namespace
48 {
49 using std::vector;
50 using de::MovePtr;
51
52 class MultipleDispatchesUniformSubgroupSizeInstance : public TestInstance
53 {
54 public:
55 MultipleDispatchesUniformSubgroupSizeInstance (Context& context);
56 tcu::TestStatus iterate (void);
57 };
58
MultipleDispatchesUniformSubgroupSizeInstance(Context & context)59 MultipleDispatchesUniformSubgroupSizeInstance::MultipleDispatchesUniformSubgroupSizeInstance (Context& context)
60 :TestInstance (context)
61 {
62 }
63
iterate(void)64 tcu::TestStatus MultipleDispatchesUniformSubgroupSizeInstance::iterate (void)
65 {
66 const DeviceInterface& vk = m_context.getDeviceInterface();
67 const VkDevice device = m_context.getDevice();
68 Allocator& allocator = m_context.getDefaultAllocator();
69 const VkQueue queue = m_context.getUniversalQueue();
70 const deUint32 queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
71
72 const Move<VkCommandPool> cmdPool = createCommandPool(vk, device, VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, queueFamilyIndex);
73 const Move<VkCommandBuffer> cmdBuffer = allocateCommandBuffer(vk, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
74
75 Move<VkShaderModule> computeShader = createShaderModule (vk, device, m_context.getBinaryCollection().get("comp"), 0u);
76
77 // The number of invocations in a workgroup.
78 const deUint32 maxLocalSize = m_context.getDeviceProperties().limits.maxComputeWorkGroupSize[0];
79
80 // Create a storage buffer to hold the sizes of subgroups.
81 const VkDeviceSize bufferSize = maxLocalSize * 2 * sizeof(deUint32);
82
83 const VkBufferCreateInfo resultBufferCreateInfo = makeBufferCreateInfo(bufferSize, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT);
84 Move<VkBuffer> resultBuffer = createBuffer(vk, device, &resultBufferCreateInfo);
85 MovePtr<Allocation> resultBufferMemory = allocator.allocate(getBufferMemoryRequirements(vk, device, *resultBuffer), MemoryRequirement::HostVisible);
86
87 VK_CHECK(vk.bindBufferMemory(device, *resultBuffer, resultBufferMemory->getMemory(), resultBufferMemory->getOffset()));
88
89 // Build descriptors for the storage buffer
90 const Unique<VkDescriptorPool> descriptorPool (DescriptorPoolBuilder().addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC)
91 .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
92 const auto descriptorSetLayout1 (DescriptorSetLayoutBuilder().addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC, VK_SHADER_STAGE_COMPUTE_BIT)
93 .build(vk, device));
94 const VkDescriptorBufferInfo resultInfo = makeDescriptorBufferInfo(*resultBuffer, 0u,
95 (VkDeviceSize) bufferSize - maxLocalSize * sizeof(deUint32));
96
97 const VkDescriptorSetAllocateInfo allocInfo =
98 {
99 VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, // sType
100 DE_NULL, // pNext
101 *descriptorPool, // descriptorPool
102 1u, // descriptorSetCount
103 &(*descriptorSetLayout1) // pSetLayouts
104 };
105
106 Move<VkDescriptorSet> descriptorSet = allocateDescriptorSet(vk, device, &allocInfo);
107 DescriptorSetUpdateBuilder builder;
108
109 builder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC, &resultInfo);
110 builder.update(vk, device);
111
112 // Compute pipeline
113 const Move<VkPipelineLayout> computePipelineLayout = makePipelineLayout (vk, device, *descriptorSetLayout1);
114
115 for (deUint32 localSize1 = 8; localSize1 < maxLocalSize + 1; localSize1 *= 2)
116 {
117 for (deUint32 localSize2 = 8; localSize2 < maxLocalSize + 1; localSize2 *= 2)
118 {
119 // On each iteration, change the number of invocations which might affect
120 // the subgroup size if the driver doesn't behave as expected.
121 const VkSpecializationMapEntry entries =
122 {
123 0u, // deUint32 constantID;
124 0u, // deUint32 offset;
125 sizeof(localSize1) // size_t size;
126 };
127 const VkSpecializationInfo specInfo =
128 {
129 1, // mapEntryCount
130 &entries, // pMapEntries
131 sizeof(localSize1), // dataSize
132 &localSize1 // pData
133 };
134 const VkSpecializationInfo specInfo2 =
135 {
136 1, // mapEntryCount
137 &entries, // pMapEntries
138 sizeof(localSize2), // dataSize
139 &localSize2 // pData
140 };
141
142 const VkPipelineShaderStageCreateInfo shaderStageCreateInfo =
143 {
144 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, // sType
145 DE_NULL, // pNext
146 VK_PIPELINE_SHADER_STAGE_CREATE_ALLOW_VARYING_SUBGROUP_SIZE_BIT_EXT, // flags
147 VK_SHADER_STAGE_COMPUTE_BIT, // stage
148 *computeShader, // module
149 "main", // pName
150 &specInfo, // pSpecializationInfo
151 };
152
153 const VkPipelineShaderStageCreateInfo shaderStageCreateInfo2 =
154 {
155 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, // sType
156 DE_NULL, // pNext
157 VK_PIPELINE_SHADER_STAGE_CREATE_ALLOW_VARYING_SUBGROUP_SIZE_BIT_EXT, // flags
158 VK_SHADER_STAGE_COMPUTE_BIT, // stage
159 *computeShader, // module
160 "main", // pName
161 &specInfo2, // pSpecializationInfo
162 };
163
164 const VkComputePipelineCreateInfo pipelineCreateInfo =
165 {
166 VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, // sType
167 DE_NULL, // pNext
168 0u, // flags
169 shaderStageCreateInfo, // stage
170 *computePipelineLayout, // layout
171 (VkPipeline) 0, // basePipelineHandle
172 0u, // basePipelineIndex
173 };
174
175 const VkComputePipelineCreateInfo pipelineCreateInfo2 =
176 {
177 VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, // sType
178 DE_NULL, // pNext
179 0u, // flags
180 shaderStageCreateInfo2, // stage
181 *computePipelineLayout, // layout
182 (VkPipeline) 0, // basePipelineHandle
183 0u, // basePipelineIndex
184 };
185
186 Move<VkPipeline> computePipeline = createComputePipeline(vk, device, (VkPipelineCache) 0u, &pipelineCreateInfo);
187 Move<VkPipeline> computePipeline2 = createComputePipeline(vk, device, (VkPipelineCache) 0u, &pipelineCreateInfo2);
188
189 beginCommandBuffer(vk, *cmdBuffer);
190
191 // Clears the values written on the previous iteration.
192 vk.cmdFillBuffer(*cmdBuffer, *resultBuffer, 0u, VK_WHOLE_SIZE, 0);
193
194 const deUint32 zero = 0u;
195 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *computePipelineLayout, 0u, 1u, &descriptorSet.get(), 1, &zero);
196 vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *computePipeline);
197 vk.cmdDispatch(*cmdBuffer, 1, 1, 1);
198
199 const auto barrier = makeBufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_SHADER_WRITE_BIT, *resultBuffer, 0ull, bufferSize);
200 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, (VkDependencyFlags) 0,
201 0, (const VkMemoryBarrier *) DE_NULL, 1, &barrier, 0, (const VkImageMemoryBarrier *) DE_NULL);
202
203 const deUint32 offset = static_cast<deUint32>(maxLocalSize * sizeof(deUint32));
204 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *computePipelineLayout, 0u, 1u, &descriptorSet.get(), 1u, &offset);
205 vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *computePipeline2);
206 vk.cmdDispatch(*cmdBuffer, 1, 1, 1);
207
208 endCommandBuffer(vk, *cmdBuffer);
209 submitCommandsAndWait(vk, device, queue, *cmdBuffer);
210
211 invalidateAlloc(vk, device, *resultBufferMemory);
212
213 const deUint32 *res = static_cast<const deUint32 *>(resultBufferMemory->getHostPtr());
214 deUint32 size = 0;
215
216 // Search for the first nonzero size. Then go through the data of both pipelines and check that
217 // the first nonzero size matches with other nonzero values.
218 for (deUint32 i = 0; i < maxLocalSize; i++)
219 {
220 if (res[i] != 0)
221 {
222 size = res[i];
223 break;
224 }
225 }
226
227 // Subgroup size is guaranteed to be at least 1.
228 DE_ASSERT(size > 0);
229
230 for (deUint32 i = 0; i < maxLocalSize * 2; i++)
231 {
232 if (size != res[i] && res[i] != 0)
233 return tcu::TestStatus::fail("Subgroup size not uniform in command scope. " + std::to_string(res[i]) + " != " + std::to_string(size));
234 }
235 }
236 }
237
238 return tcu::TestStatus::pass("pass");
239 }
240
241 class MultipleDispatchesUniformSubgroupSize : public TestCase
242 {
243 public:
244 MultipleDispatchesUniformSubgroupSize (tcu::TestContext& testCtx,
245 const std::string& name,
246 const std::string& description);
247
248 void initPrograms (SourceCollections& programCollection) const;
249 TestInstance* createInstance (Context& context) const;
250 virtual void checkSupport (Context& context) const;
251
252 };
253
MultipleDispatchesUniformSubgroupSize(tcu::TestContext & testCtx,const std::string & name,const std::string & description)254 MultipleDispatchesUniformSubgroupSize::MultipleDispatchesUniformSubgroupSize (tcu::TestContext& testCtx,
255 const std::string& name,
256 const std::string& description)
257 : TestCase (testCtx, name, description)
258 {
259 }
260
checkSupport(Context & context) const261 void MultipleDispatchesUniformSubgroupSize::checkSupport (Context& context) const
262 {
263 const VkPhysicalDeviceSubgroupSizeControlFeaturesEXT& subgroupSizeControlFeatures = context.getSubgroupSizeControlFeaturesEXT();
264
265 if (subgroupSizeControlFeatures.subgroupSizeControl == DE_FALSE)
266 TCU_THROW(NotSupportedError, "Device does not support varying subgroup sizes");
267 }
268
initPrograms(SourceCollections & programCollection) const269 void MultipleDispatchesUniformSubgroupSize::initPrograms (SourceCollections& programCollection) const
270 {
271 std::ostringstream computeSrc;
272 computeSrc
273 << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450) << "\n"
274 << "#extension GL_KHR_shader_subgroup_basic : enable\n"
275 << "#extension GL_KHR_shader_subgroup_vote : enable\n"
276 << "#extension GL_KHR_shader_subgroup_ballot : enable\n"
277 << "layout(std430, binding = 0) buffer Outputs { uint sizes[]; };\n"
278
279 << "layout(local_size_x_id = 0) in;\n"
280
281 << "void main()\n"
282 << "{\n"
283 << " if (subgroupElect())\n"
284 << " {\n"
285 << " sizes[gl_WorkGroupID.x * gl_NumSubgroups + gl_SubgroupID] = gl_SubgroupSize;\n"
286 << " }\n"
287 << "}\n";
288
289 programCollection.glslSources.add("comp") << glu::ComputeSource(computeSrc.str())
290 << ShaderBuildOptions(programCollection.usedVulkanVersion, SPIRV_VERSION_1_3, 0u);
291 }
292
createInstance(Context & context) const293 TestInstance* MultipleDispatchesUniformSubgroupSize::createInstance (Context& context) const
294 {
295 return new MultipleDispatchesUniformSubgroupSizeInstance(context);
296 }
297
298 } // anonymous ns
299
createMultipleDispatchesUniformSubgroupSizeTests(tcu::TestContext & testCtx)300 tcu::TestCaseGroup* createMultipleDispatchesUniformSubgroupSizeTests (tcu::TestContext& testCtx)
301 {
302 de::MovePtr<tcu::TestCaseGroup> testGroup(new tcu::TestCaseGroup(testCtx, "multiple_dispatches", "Multiple dispatches uniform subgroup size tests"));
303
304 testGroup->addChild(new MultipleDispatchesUniformSubgroupSize(testCtx, "uniform_subgroup_size", ""));
305 return testGroup.release();
306 }
307
308 } // compute
309 } // vkt
310