1 /*------------------------------------------------------------------------
2 * Vulkan Conformance Tests
3 * ------------------------
4 *
5 * Copyright (c) 2021 Google LLC.
6 *
7 *
8 * Licensed under the Apache License, Version 2.0 (the "License");
9 * you may not use this file except in compliance with the License.
10 * You may obtain a copy of the License at
11 *
12 * http://www.apache.org/licenses/LICENSE-2.0
13 *
14 * Unless required by applicable law or agreed to in writing, software
15 * distributed under the License is distributed on an "AS IS" BASIS,
16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 * See the License for the specific language governing permissions and
18 * limitations under the License.
19 *
20 *//*!
21 * \file
22 * \brief Tests that compute shaders have a subgroup size that is uniform in
23 * command scope.
24 *//*--------------------------------------------------------------------*/
25
26 #include "deUniquePtr.hpp"
27
28 #include "vkRef.hpp"
29 #include "vkRefUtil.hpp"
30 #include "vkPrograms.hpp"
31 #include "vkMemUtil.hpp"
32 #include "vkBuilderUtil.hpp"
33 #include "vkCmdUtil.hpp"
34 #include "vkObjUtil.hpp"
35 #include "vkTypeUtil.hpp"
36 #include "vkBufferWithMemory.hpp"
37 #include "vkBarrierUtil.hpp"
38 #include "vktTestCaseUtil.hpp"
39 #include "tcuTestLog.hpp"
40 #include <sstream>
41
42 using namespace vk;
43
44 namespace vkt
45 {
46 namespace subgroups
47 {
48 namespace
49 {
50
51 class MultipleDispatchesUniformSubgroupSizeInstance : public TestInstance
52 {
53 public:
54 MultipleDispatchesUniformSubgroupSizeInstance(Context &context);
55 tcu::TestStatus iterate(void);
56 };
57
MultipleDispatchesUniformSubgroupSizeInstance(Context & context)58 MultipleDispatchesUniformSubgroupSizeInstance::MultipleDispatchesUniformSubgroupSizeInstance(Context &context)
59 : TestInstance(context)
60 {
61 }
62
iterate(void)63 tcu::TestStatus MultipleDispatchesUniformSubgroupSizeInstance::iterate(void)
64 {
65 const DeviceInterface &vk = m_context.getDeviceInterface();
66 const VkDevice device = m_context.getDevice();
67 Allocator &allocator = m_context.getDefaultAllocator();
68 const VkQueue queue = m_context.getUniversalQueue();
69 const uint32_t queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
70
71 const Move<VkCommandPool> cmdPool =
72 createCommandPool(vk, device, VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, queueFamilyIndex);
73 const Move<VkCommandBuffer> cmdBuffer =
74 allocateCommandBuffer(vk, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
75
76 Move<VkShaderModule> computeShader =
77 createShaderModule(vk, device, m_context.getBinaryCollection().get("comp"), 0u);
78
79 // The maximum number of invocations in a workgroup.
80 const uint32_t maxLocalSize = m_context.getDeviceProperties().limits.maxComputeWorkGroupSize[0];
81 const uint32_t minSubgroupSize = m_context.getSubgroupSizeControlProperties().minSubgroupSize;
82
83 // Create a storage buffer to hold the sizes of subgroups.
84 const VkDeviceSize bufferSize = (maxLocalSize / minSubgroupSize + 1u) * sizeof(uint32_t);
85
86 const VkBufferCreateInfo resultBufferCreateInfo =
87 makeBufferCreateInfo(bufferSize, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT);
88 BufferWithMemory resultBuffer(vk, device, allocator, resultBufferCreateInfo, MemoryRequirement::HostVisible);
89 auto &resultBufferAlloc = resultBuffer.getAllocation();
90
91 // Build descriptors for the storage buffer
92 const Unique<VkDescriptorPool> descriptorPool(
93 DescriptorPoolBuilder()
94 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
95 .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
96 const auto descriptorSetLayout1(
97 DescriptorSetLayoutBuilder()
98 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
99 .build(vk, device));
100 const VkDescriptorBufferInfo resultInfo = makeDescriptorBufferInfo(*resultBuffer, 0u, bufferSize);
101
102 const VkDescriptorSetAllocateInfo allocInfo = {
103 VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, // sType
104 nullptr, // pNext
105 *descriptorPool, // descriptorPool
106 1u, // descriptorSetCount
107 &(*descriptorSetLayout1) // pSetLayouts
108 };
109
110 Move<VkDescriptorSet> descriptorSet = allocateDescriptorSet(vk, device, &allocInfo);
111 DescriptorSetUpdateBuilder builder;
112
113 builder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u),
114 VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &resultInfo);
115 builder.update(vk, device);
116
117 // Compute pipeline
118 const Move<VkPipelineLayout> computePipelineLayout = makePipelineLayout(vk, device, *descriptorSetLayout1);
119
120 for (uint32_t localSize = 1u; localSize <= maxLocalSize; localSize *= 2u)
121 {
122 // On each iteration, change the number of invocations which might affect
123 // the subgroup size.
124 const VkSpecializationMapEntry entries = {
125 0u, // uint32_t constantID;
126 0u, // uint32_t offset;
127 sizeof(localSize) // size_t size;
128 };
129
130 const VkSpecializationInfo specInfo = {
131 1, // mapEntryCount
132 &entries, // pMapEntries
133 sizeof(localSize), // dataSize
134 &localSize // pData
135 };
136
137 const VkPipelineShaderStageCreateInfo shaderStageCreateInfo = {
138 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, // sType
139 nullptr, // pNext
140 VK_PIPELINE_SHADER_STAGE_CREATE_ALLOW_VARYING_SUBGROUP_SIZE_BIT_EXT, // flags
141 VK_SHADER_STAGE_COMPUTE_BIT, // stage
142 *computeShader, // module
143 "main", // pName
144 &specInfo, // pSpecializationInfo
145 };
146
147 const VkComputePipelineCreateInfo pipelineCreateInfo = {
148 VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, // sType
149 nullptr, // pNext
150 0u, // flags
151 shaderStageCreateInfo, // stage
152 *computePipelineLayout, // layout
153 VK_NULL_HANDLE, // basePipelineHandle
154 0u, // basePipelineIndex
155 };
156
157 Move<VkPipeline> computePipeline = createComputePipeline(vk, device, VK_NULL_HANDLE, &pipelineCreateInfo);
158
159 beginCommandBuffer(vk, *cmdBuffer);
160
161 // Clears the values in the buffer.
162 vk.cmdFillBuffer(*cmdBuffer, *resultBuffer, 0u, VK_WHOLE_SIZE, 0);
163
164 const auto fillBarrier = makeBufferMemoryBarrier(VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_SHADER_WRITE_BIT,
165 *resultBuffer, 0ull, bufferSize);
166 cmdPipelineBufferMemoryBarrier(vk, *cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT,
167 VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, &fillBarrier);
168
169 // Runs pipeline.
170 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *computePipelineLayout, 0u, 1u,
171 &descriptorSet.get(), 0u, nullptr);
172 vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *computePipeline);
173 vk.cmdDispatch(*cmdBuffer, 1, 1, 1);
174
175 const auto computeToHostBarrier = makeMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT);
176 cmdPipelineMemoryBarrier(vk, *cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT,
177 &computeToHostBarrier);
178
179 endCommandBuffer(vk, *cmdBuffer);
180 submitCommandsAndWait(vk, device, queue, *cmdBuffer);
181
182 invalidateAlloc(vk, device, resultBufferAlloc);
183
184 // Validate results: all non-zero subgroup sizes must be the same.
185 const uint32_t *res = static_cast<const uint32_t *>(resultBufferAlloc.getHostPtr());
186 const uint32_t maxIters = static_cast<uint32_t>(bufferSize / sizeof(uint32_t));
187 uint32_t size = 0u;
188 uint32_t subgroupCount = 0u;
189 auto &log = m_context.getTestContext().getLog();
190
191 for (uint32_t sizeIdx = 0u; sizeIdx < maxIters; ++sizeIdx)
192 {
193 if (res[sizeIdx] != 0u)
194 {
195 if (size == 0u)
196 {
197 size = res[sizeIdx];
198 }
199 else if (res[sizeIdx] != size)
200 {
201 std::ostringstream msg;
202 msg << "Subgroup size not uniform in command scope: " << res[sizeIdx] << " != " << size
203 << " at position " << sizeIdx;
204 TCU_FAIL(msg.str());
205 }
206 ++subgroupCount;
207 }
208 }
209
210 // Subgroup size is guaranteed to be at least 1.
211 if (size == 0u)
212 TCU_FAIL("Subgroup size must be at least 1");
213
214 // The number of reported sizes must match.
215 const auto expectedSubgroupCount = (localSize / size + ((localSize % size != 0u) ? 1u : 0u));
216 if (subgroupCount != expectedSubgroupCount)
217 {
218 std::ostringstream msg;
219 msg << "Local size " << localSize << " with subgroup size " << size << " resulted in subgroup count "
220 << subgroupCount << " != " << expectedSubgroupCount;
221 TCU_FAIL(msg.str());
222 }
223
224 {
225 std::ostringstream msg;
226 msg << "Subgroup size " << size << " with local size " << localSize;
227 log << tcu::TestLog::Message << msg.str() << tcu::TestLog::EndMessage;
228 }
229 }
230
231 return tcu::TestStatus::pass("Pass");
232 }
233
234 class MultipleDispatchesUniformSubgroupSize : public TestCase
235 {
236 public:
237 MultipleDispatchesUniformSubgroupSize(tcu::TestContext &testCtx, const std::string &name);
238
239 void initPrograms(SourceCollections &programCollection) const;
240 TestInstance *createInstance(Context &context) const;
241 virtual void checkSupport(Context &context) const;
242 };
243
MultipleDispatchesUniformSubgroupSize(tcu::TestContext & testCtx,const std::string & name)244 MultipleDispatchesUniformSubgroupSize::MultipleDispatchesUniformSubgroupSize(tcu::TestContext &testCtx,
245 const std::string &name)
246 : TestCase(testCtx, name)
247 {
248 }
249
checkSupport(Context & context) const250 void MultipleDispatchesUniformSubgroupSize::checkSupport(Context &context) const
251 {
252 const auto &subgroupSizeControlFeatures = context.getSubgroupSizeControlFeatures();
253
254 if (subgroupSizeControlFeatures.subgroupSizeControl == false)
255 TCU_THROW(NotSupportedError, "Device does not support varying subgroup sizes");
256 }
257
initPrograms(SourceCollections & programCollection) const258 void MultipleDispatchesUniformSubgroupSize::initPrograms(SourceCollections &programCollection) const
259 {
260 std::ostringstream computeSrc;
261 computeSrc << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450) << "\n"
262 << "#extension GL_KHR_shader_subgroup_basic : enable\n"
263 << "#extension GL_KHR_shader_subgroup_vote : enable\n"
264 << "#extension GL_KHR_shader_subgroup_ballot : enable\n"
265 << "layout(std430, binding = 0) buffer Outputs { uint sizes[]; };\n"
266
267 << "layout(local_size_x_id = 0) in;\n"
268
269 << "void main()\n"
270 << "{\n"
271 << " if (subgroupElect())\n"
272 << " {\n"
273 << " sizes[gl_WorkGroupID.x * gl_NumSubgroups + gl_SubgroupID] = gl_SubgroupSize;\n"
274 << " }\n"
275 << "}\n";
276
277 programCollection.glslSources.add("comp")
278 << glu::ComputeSource(computeSrc.str())
279 << ShaderBuildOptions(programCollection.usedVulkanVersion, SPIRV_VERSION_1_3, 0u);
280 }
281
createInstance(Context & context) const282 TestInstance *MultipleDispatchesUniformSubgroupSize::createInstance(Context &context) const
283 {
284 return new MultipleDispatchesUniformSubgroupSizeInstance(context);
285 }
286
287 } // namespace
288
createMultipleDispatchesUniformSubgroupSizeTests(tcu::TestContext & testCtx)289 tcu::TestCaseGroup *createMultipleDispatchesUniformSubgroupSizeTests(tcu::TestContext &testCtx)
290 {
291 // Multiple dispatches uniform subgroup size tests
292 de::MovePtr<tcu::TestCaseGroup> testGroup(new tcu::TestCaseGroup(testCtx, "multiple_dispatches"));
293
294 testGroup->addChild(new MultipleDispatchesUniformSubgroupSize(testCtx, "uniform_subgroup_size"));
295 return testGroup.release();
296 }
297
298 } // namespace subgroups
299 } // namespace vkt
300