1 /*------------------------------------------------------------------------
2 * Vulkan Conformance Tests
3 * ------------------------
4 *
5 * Copyright (c) 2021 Google LLC.
6 *
7 *
8 * Licensed under the Apache License, Version 2.0 (the "License");
9 * you may not use this file except in compliance with the License.
10 * You may obtain a copy of the License at
11 *
12 * http://www.apache.org/licenses/LICENSE-2.0
13 *
14 * Unless required by applicable law or agreed to in writing, software
15 * distributed under the License is distributed on an "AS IS" BASIS,
16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 * See the License for the specific language governing permissions and
18 * limitations under the License.
19 *
20 *//*!
21 * \file
22 * \brief Tests that compute shaders have a subgroup size that is uniform in
23 * command scope.
24 *//*--------------------------------------------------------------------*/
25
26 #include "deUniquePtr.hpp"
27
28 #include "vkRef.hpp"
29 #include "vkRefUtil.hpp"
30 #include "vkPrograms.hpp"
31 #include "vkMemUtil.hpp"
32 #include "vkBuilderUtil.hpp"
33 #include "vkCmdUtil.hpp"
34 #include "vkObjUtil.hpp"
35 #include "vkTypeUtil.hpp"
36 #include "vkBufferWithMemory.hpp"
37 #include "vkBarrierUtil.hpp"
38 #include "vktTestCaseUtil.hpp"
39 #include "tcuTestLog.hpp"
40 #include <sstream>
41
42 using namespace vk;
43
44 namespace vkt
45 {
46 namespace subgroups
47 {
48 namespace
49 {
50
51 class MultipleDispatchesUniformSubgroupSizeInstance : public TestInstance
52 {
53 public:
54 MultipleDispatchesUniformSubgroupSizeInstance (Context& context);
55 tcu::TestStatus iterate (void);
56 };
57
MultipleDispatchesUniformSubgroupSizeInstance(Context & context)58 MultipleDispatchesUniformSubgroupSizeInstance::MultipleDispatchesUniformSubgroupSizeInstance (Context& context)
59 :TestInstance (context)
60 {
61 }
62
iterate(void)63 tcu::TestStatus MultipleDispatchesUniformSubgroupSizeInstance::iterate (void)
64 {
65 const DeviceInterface& vk = m_context.getDeviceInterface();
66 const VkDevice device = m_context.getDevice();
67 Allocator& allocator = m_context.getDefaultAllocator();
68 const VkQueue queue = m_context.getUniversalQueue();
69 const uint32_t queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
70
71 const Move<VkCommandPool> cmdPool = createCommandPool(vk, device, VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, queueFamilyIndex);
72 const Move<VkCommandBuffer> cmdBuffer = allocateCommandBuffer(vk, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
73
74 Move<VkShaderModule> computeShader = createShaderModule (vk, device, m_context.getBinaryCollection().get("comp"), 0u);
75
76 // The maximum number of invocations in a workgroup.
77 const uint32_t maxLocalSize = m_context.getDeviceProperties().limits.maxComputeWorkGroupSize[0];
78 #ifndef CTS_USES_VULKANSC
79 const uint32_t minSubgroupSize = m_context.getSubgroupSizeControlProperties().minSubgroupSize;
80 #else
81 const uint32_t minSubgroupSize = m_context.getSubgroupSizeControlPropertiesEXT().minSubgroupSize;
82 #endif // CTS_USES_VULKANSC
83
84 // Create a storage buffer to hold the sizes of subgroups.
85 const VkDeviceSize bufferSize = (maxLocalSize / minSubgroupSize + 1u) * sizeof(uint32_t);
86
87 const VkBufferCreateInfo resultBufferCreateInfo = makeBufferCreateInfo(bufferSize, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT);
88 BufferWithMemory resultBuffer (vk, device, allocator, resultBufferCreateInfo, MemoryRequirement::HostVisible);
89 auto& resultBufferAlloc = resultBuffer.getAllocation();
90
91 // Build descriptors for the storage buffer
92 const Unique<VkDescriptorPool> descriptorPool (DescriptorPoolBuilder().addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
93 .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
94 const auto descriptorSetLayout1 (DescriptorSetLayoutBuilder().addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
95 .build(vk, device));
96 const VkDescriptorBufferInfo resultInfo = makeDescriptorBufferInfo(*resultBuffer, 0u, bufferSize);
97
98 const VkDescriptorSetAllocateInfo allocInfo =
99 {
100 VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, // sType
101 DE_NULL, // pNext
102 *descriptorPool, // descriptorPool
103 1u, // descriptorSetCount
104 &(*descriptorSetLayout1) // pSetLayouts
105 };
106
107 Move<VkDescriptorSet> descriptorSet = allocateDescriptorSet(vk, device, &allocInfo);
108 DescriptorSetUpdateBuilder builder;
109
110 builder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &resultInfo);
111 builder.update(vk, device);
112
113 // Compute pipeline
114 const Move<VkPipelineLayout> computePipelineLayout = makePipelineLayout (vk, device, *descriptorSetLayout1);
115
116 for (uint32_t localSize = 1u; localSize <= maxLocalSize; localSize *= 2u)
117 {
118 // On each iteration, change the number of invocations which might affect
119 // the subgroup size.
120 const VkSpecializationMapEntry entries =
121 {
122 0u, // uint32_t constantID;
123 0u, // uint32_t offset;
124 sizeof(localSize) // size_t size;
125 };
126
127 const VkSpecializationInfo specInfo =
128 {
129 1, // mapEntryCount
130 &entries, // pMapEntries
131 sizeof(localSize), // dataSize
132 &localSize // pData
133 };
134
135 const VkPipelineShaderStageCreateInfo shaderStageCreateInfo =
136 {
137 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, // sType
138 DE_NULL, // pNext
139 VK_PIPELINE_SHADER_STAGE_CREATE_ALLOW_VARYING_SUBGROUP_SIZE_BIT_EXT, // flags
140 VK_SHADER_STAGE_COMPUTE_BIT, // stage
141 *computeShader, // module
142 "main", // pName
143 &specInfo, // pSpecializationInfo
144 };
145
146 const VkComputePipelineCreateInfo pipelineCreateInfo =
147 {
148 VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, // sType
149 DE_NULL, // pNext
150 0u, // flags
151 shaderStageCreateInfo, // stage
152 *computePipelineLayout, // layout
153 (VkPipeline) 0, // basePipelineHandle
154 0u, // basePipelineIndex
155 };
156
157 Move<VkPipeline> computePipeline = createComputePipeline(vk, device, (VkPipelineCache) 0u, &pipelineCreateInfo);
158
159 beginCommandBuffer(vk, *cmdBuffer);
160
161 // Clears the values in the buffer.
162 vk.cmdFillBuffer(*cmdBuffer, *resultBuffer, 0u, VK_WHOLE_SIZE, 0);
163
164 const auto fillBarrier = makeBufferMemoryBarrier(VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_SHADER_WRITE_BIT, *resultBuffer, 0ull, bufferSize);
165 cmdPipelineBufferMemoryBarrier(vk, *cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, &fillBarrier);
166
167 // Runs pipeline.
168 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *computePipelineLayout, 0u, 1u, &descriptorSet.get(), 0u, nullptr);
169 vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *computePipeline);
170 vk.cmdDispatch(*cmdBuffer, 1, 1, 1);
171
172 const auto computeToHostBarrier = makeMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT);
173 cmdPipelineMemoryBarrier(vk, *cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT, &computeToHostBarrier);
174
175 endCommandBuffer(vk, *cmdBuffer);
176 submitCommandsAndWait(vk, device, queue, *cmdBuffer);
177
178 invalidateAlloc(vk, device, resultBufferAlloc);
179
180 // Validate results: all non-zero subgroup sizes must be the same.
181 const uint32_t *res = static_cast<const uint32_t *>(resultBufferAlloc.getHostPtr());
182 const uint32_t maxIters = static_cast<uint32_t>(bufferSize / sizeof(uint32_t));
183 uint32_t size = 0u;
184 uint32_t subgroupCount = 0u;
185 auto& log = m_context.getTestContext().getLog();
186
187 for (uint32_t sizeIdx = 0u; sizeIdx < maxIters; ++sizeIdx)
188 {
189 if (res[sizeIdx] != 0u)
190 {
191 if (size == 0u)
192 {
193 size = res[sizeIdx];
194 }
195 else if (res[sizeIdx] != size)
196 {
197 std::ostringstream msg;
198 msg << "Subgroup size not uniform in command scope: " << res[sizeIdx] << " != " << size << " at position " << sizeIdx;
199 TCU_FAIL(msg.str());
200 }
201 ++subgroupCount;
202 }
203 }
204
205 // Subgroup size is guaranteed to be at least 1.
206 if (size == 0u)
207 TCU_FAIL("Subgroup size must be at least 1");
208
209 // The number of reported sizes must match.
210 const auto expectedSubgroupCount = (localSize / size + ((localSize % size != 0u) ? 1u : 0u));
211 if (subgroupCount != expectedSubgroupCount)
212 {
213 std::ostringstream msg;
214 msg << "Local size " << localSize << " with subgroup size " << size << " resulted in subgroup count " << subgroupCount << " != " << expectedSubgroupCount;
215 TCU_FAIL(msg.str());
216 }
217
218 {
219 std::ostringstream msg;
220 msg << "Subgroup size " << size << " with local size " << localSize;
221 log << tcu::TestLog::Message << msg.str() << tcu::TestLog::EndMessage;
222 }
223 }
224
225 return tcu::TestStatus::pass("Pass");
226 }
227
228 class MultipleDispatchesUniformSubgroupSize : public TestCase
229 {
230 public:
231 MultipleDispatchesUniformSubgroupSize (tcu::TestContext& testCtx,
232 const std::string& name,
233 const std::string& description);
234
235 void initPrograms (SourceCollections& programCollection) const;
236 TestInstance* createInstance (Context& context) const;
237 virtual void checkSupport (Context& context) const;
238
239 };
240
MultipleDispatchesUniformSubgroupSize(tcu::TestContext & testCtx,const std::string & name,const std::string & description)241 MultipleDispatchesUniformSubgroupSize::MultipleDispatchesUniformSubgroupSize (tcu::TestContext& testCtx,
242 const std::string& name,
243 const std::string& description)
244 : TestCase (testCtx, name, description)
245 {
246 }
247
checkSupport(Context & context) const248 void MultipleDispatchesUniformSubgroupSize::checkSupport (Context& context) const
249 {
250 #ifndef CTS_USES_VULKANSC
251 const VkPhysicalDeviceSubgroupSizeControlFeatures& subgroupSizeControlFeatures = context.getSubgroupSizeControlFeatures();
252 #else
253 const VkPhysicalDeviceSubgroupSizeControlFeaturesEXT& subgroupSizeControlFeatures = context.getSubgroupSizeControlFeaturesEXT();
254 #endif // CTS_USES_VULKANSC
255
256 if (subgroupSizeControlFeatures.subgroupSizeControl == DE_FALSE)
257 TCU_THROW(NotSupportedError, "Device does not support varying subgroup sizes");
258 }
259
initPrograms(SourceCollections & programCollection) const260 void MultipleDispatchesUniformSubgroupSize::initPrograms (SourceCollections& programCollection) const
261 {
262 std::ostringstream computeSrc;
263 computeSrc
264 << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450) << "\n"
265 << "#extension GL_KHR_shader_subgroup_basic : enable\n"
266 << "#extension GL_KHR_shader_subgroup_vote : enable\n"
267 << "#extension GL_KHR_shader_subgroup_ballot : enable\n"
268 << "layout(std430, binding = 0) buffer Outputs { uint sizes[]; };\n"
269
270 << "layout(local_size_x_id = 0) in;\n"
271
272 << "void main()\n"
273 << "{\n"
274 << " if (subgroupElect())\n"
275 << " {\n"
276 << " sizes[gl_WorkGroupID.x * gl_NumSubgroups + gl_SubgroupID] = gl_SubgroupSize;\n"
277 << " }\n"
278 << "}\n";
279
280 programCollection.glslSources.add("comp") << glu::ComputeSource(computeSrc.str())
281 << ShaderBuildOptions(programCollection.usedVulkanVersion, SPIRV_VERSION_1_3, 0u);
282 }
283
createInstance(Context & context) const284 TestInstance* MultipleDispatchesUniformSubgroupSize::createInstance (Context& context) const
285 {
286 return new MultipleDispatchesUniformSubgroupSizeInstance(context);
287 }
288
289 } // anonymous ns
290
createMultipleDispatchesUniformSubgroupSizeTests(tcu::TestContext & testCtx)291 tcu::TestCaseGroup* createMultipleDispatchesUniformSubgroupSizeTests (tcu::TestContext& testCtx)
292 {
293 de::MovePtr<tcu::TestCaseGroup> testGroup(new tcu::TestCaseGroup(testCtx, "multiple_dispatches", "Multiple dispatches uniform subgroup size tests"));
294
295 testGroup->addChild(new MultipleDispatchesUniformSubgroupSize(testCtx, "uniform_subgroup_size", ""));
296 return testGroup.release();
297 }
298
299 } // compute
300 } // vkt
301