• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2021 Google LLC.
6  *
7  *
8  * Licensed under the Apache License, Version 2.0 (the "License");
9  * you may not use this file except in compliance with the License.
10  * You may obtain a copy of the License at
11  *
12  *      http://www.apache.org/licenses/LICENSE-2.0
13  *
14  * Unless required by applicable law or agreed to in writing, software
15  * distributed under the License is distributed on an "AS IS" BASIS,
16  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17  * See the License for the specific language governing permissions and
18  * limitations under the License.
19  *
20  *//*!
21  * \file
22  * \brief Tests that compute shaders have a subgroup size that is uniform in
23  * command scope.
24  *//*--------------------------------------------------------------------*/
25 
26 #include "deUniquePtr.hpp"
27 
28 #include "vkRef.hpp"
29 #include "vkRefUtil.hpp"
30 #include "vkPrograms.hpp"
31 #include "vkMemUtil.hpp"
32 #include "vkBuilderUtil.hpp"
33 #include "vkCmdUtil.hpp"
34 #include "vkObjUtil.hpp"
35 #include "vkTypeUtil.hpp"
36 #include "vkBufferWithMemory.hpp"
37 #include "vkBarrierUtil.hpp"
38 #include "vktTestCaseUtil.hpp"
39 #include "tcuTestLog.hpp"
40 #include <sstream>
41 
42 using namespace vk;
43 
44 namespace vkt
45 {
46 namespace subgroups
47 {
48 namespace
49 {
50 
51 class MultipleDispatchesUniformSubgroupSizeInstance : public TestInstance
52 {
53 public:
54     MultipleDispatchesUniformSubgroupSizeInstance(Context &context);
55     tcu::TestStatus iterate(void);
56 };
57 
MultipleDispatchesUniformSubgroupSizeInstance(Context & context)58 MultipleDispatchesUniformSubgroupSizeInstance::MultipleDispatchesUniformSubgroupSizeInstance(Context &context)
59     : TestInstance(context)
60 {
61 }
62 
iterate(void)63 tcu::TestStatus MultipleDispatchesUniformSubgroupSizeInstance::iterate(void)
64 {
65     const DeviceInterface &vk       = m_context.getDeviceInterface();
66     const VkDevice device           = m_context.getDevice();
67     Allocator &allocator            = m_context.getDefaultAllocator();
68     const VkQueue queue             = m_context.getUniversalQueue();
69     const uint32_t queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
70 
71     const Move<VkCommandPool> cmdPool =
72         createCommandPool(vk, device, VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, queueFamilyIndex);
73     const Move<VkCommandBuffer> cmdBuffer =
74         allocateCommandBuffer(vk, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
75 
76     Move<VkShaderModule> computeShader =
77         createShaderModule(vk, device, m_context.getBinaryCollection().get("comp"), 0u);
78 
79     // The maximum number of invocations in a workgroup.
80     const uint32_t maxLocalSize    = m_context.getDeviceProperties().limits.maxComputeWorkGroupSize[0];
81     const uint32_t minSubgroupSize = m_context.getSubgroupSizeControlProperties().minSubgroupSize;
82 
83     // Create a storage buffer to hold the sizes of subgroups.
84     const VkDeviceSize bufferSize = (maxLocalSize / minSubgroupSize + 1u) * sizeof(uint32_t);
85 
86     const VkBufferCreateInfo resultBufferCreateInfo =
87         makeBufferCreateInfo(bufferSize, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT);
88     BufferWithMemory resultBuffer(vk, device, allocator, resultBufferCreateInfo, MemoryRequirement::HostVisible);
89     auto &resultBufferAlloc = resultBuffer.getAllocation();
90 
91     // Build descriptors for the storage buffer
92     const Unique<VkDescriptorPool> descriptorPool(
93         DescriptorPoolBuilder()
94             .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
95             .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
96     const auto descriptorSetLayout1(
97         DescriptorSetLayoutBuilder()
98             .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
99             .build(vk, device));
100     const VkDescriptorBufferInfo resultInfo = makeDescriptorBufferInfo(*resultBuffer, 0u, bufferSize);
101 
102     const VkDescriptorSetAllocateInfo allocInfo = {
103         VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, // sType
104         nullptr,                                        // pNext
105         *descriptorPool,                                // descriptorPool
106         1u,                                             // descriptorSetCount
107         &(*descriptorSetLayout1)                        // pSetLayouts
108     };
109 
110     Move<VkDescriptorSet> descriptorSet = allocateDescriptorSet(vk, device, &allocInfo);
111     DescriptorSetUpdateBuilder builder;
112 
113     builder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u),
114                         VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &resultInfo);
115     builder.update(vk, device);
116 
117     // Compute pipeline
118     const Move<VkPipelineLayout> computePipelineLayout = makePipelineLayout(vk, device, *descriptorSetLayout1);
119 
120     for (uint32_t localSize = 1u; localSize <= maxLocalSize; localSize *= 2u)
121     {
122         // On each iteration, change the number of invocations which might affect
123         // the subgroup size.
124         const VkSpecializationMapEntry entries = {
125             0u,               // uint32_t constantID;
126             0u,               // uint32_t offset;
127             sizeof(localSize) // size_t size;
128         };
129 
130         const VkSpecializationInfo specInfo = {
131             1,                 // mapEntryCount
132             &entries,          // pMapEntries
133             sizeof(localSize), // dataSize
134             &localSize         // pData
135         };
136 
137         const VkPipelineShaderStageCreateInfo shaderStageCreateInfo = {
138             VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,                 // sType
139             nullptr,                                                             // pNext
140             VK_PIPELINE_SHADER_STAGE_CREATE_ALLOW_VARYING_SUBGROUP_SIZE_BIT_EXT, // flags
141             VK_SHADER_STAGE_COMPUTE_BIT,                                         // stage
142             *computeShader,                                                      // module
143             "main",                                                              // pName
144             &specInfo,                                                           // pSpecializationInfo
145         };
146 
147         const VkComputePipelineCreateInfo pipelineCreateInfo = {
148             VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, // sType
149             nullptr,                                        // pNext
150             0u,                                             // flags
151             shaderStageCreateInfo,                          // stage
152             *computePipelineLayout,                         // layout
153             VK_NULL_HANDLE,                                 // basePipelineHandle
154             0u,                                             // basePipelineIndex
155         };
156 
157         Move<VkPipeline> computePipeline = createComputePipeline(vk, device, VK_NULL_HANDLE, &pipelineCreateInfo);
158 
159         beginCommandBuffer(vk, *cmdBuffer);
160 
161         // Clears the values in the buffer.
162         vk.cmdFillBuffer(*cmdBuffer, *resultBuffer, 0u, VK_WHOLE_SIZE, 0);
163 
164         const auto fillBarrier = makeBufferMemoryBarrier(VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_SHADER_WRITE_BIT,
165                                                          *resultBuffer, 0ull, bufferSize);
166         cmdPipelineBufferMemoryBarrier(vk, *cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT,
167                                        VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, &fillBarrier);
168 
169         // Runs pipeline.
170         vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *computePipelineLayout, 0u, 1u,
171                                  &descriptorSet.get(), 0u, nullptr);
172         vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *computePipeline);
173         vk.cmdDispatch(*cmdBuffer, 1, 1, 1);
174 
175         const auto computeToHostBarrier = makeMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT);
176         cmdPipelineMemoryBarrier(vk, *cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT,
177                                  &computeToHostBarrier);
178 
179         endCommandBuffer(vk, *cmdBuffer);
180         submitCommandsAndWait(vk, device, queue, *cmdBuffer);
181 
182         invalidateAlloc(vk, device, resultBufferAlloc);
183 
184         // Validate results: all non-zero subgroup sizes must be the same.
185         const uint32_t *res     = static_cast<const uint32_t *>(resultBufferAlloc.getHostPtr());
186         const uint32_t maxIters = static_cast<uint32_t>(bufferSize / sizeof(uint32_t));
187         uint32_t size           = 0u;
188         uint32_t subgroupCount  = 0u;
189         auto &log               = m_context.getTestContext().getLog();
190 
191         for (uint32_t sizeIdx = 0u; sizeIdx < maxIters; ++sizeIdx)
192         {
193             if (res[sizeIdx] != 0u)
194             {
195                 if (size == 0u)
196                 {
197                     size = res[sizeIdx];
198                 }
199                 else if (res[sizeIdx] != size)
200                 {
201                     std::ostringstream msg;
202                     msg << "Subgroup size not uniform in command scope: " << res[sizeIdx] << " != " << size
203                         << " at position " << sizeIdx;
204                     TCU_FAIL(msg.str());
205                 }
206                 ++subgroupCount;
207             }
208         }
209 
210         // Subgroup size is guaranteed to be at least 1.
211         if (size == 0u)
212             TCU_FAIL("Subgroup size must be at least 1");
213 
214         // The number of reported sizes must match.
215         const auto expectedSubgroupCount = (localSize / size + ((localSize % size != 0u) ? 1u : 0u));
216         if (subgroupCount != expectedSubgroupCount)
217         {
218             std::ostringstream msg;
219             msg << "Local size " << localSize << " with subgroup size " << size << " resulted in subgroup count "
220                 << subgroupCount << " != " << expectedSubgroupCount;
221             TCU_FAIL(msg.str());
222         }
223 
224         {
225             std::ostringstream msg;
226             msg << "Subgroup size " << size << " with local size " << localSize;
227             log << tcu::TestLog::Message << msg.str() << tcu::TestLog::EndMessage;
228         }
229     }
230 
231     return tcu::TestStatus::pass("Pass");
232 }
233 
234 class MultipleDispatchesUniformSubgroupSize : public TestCase
235 {
236 public:
237     MultipleDispatchesUniformSubgroupSize(tcu::TestContext &testCtx, const std::string &name);
238 
239     void initPrograms(SourceCollections &programCollection) const;
240     TestInstance *createInstance(Context &context) const;
241     virtual void checkSupport(Context &context) const;
242 };
243 
MultipleDispatchesUniformSubgroupSize(tcu::TestContext & testCtx,const std::string & name)244 MultipleDispatchesUniformSubgroupSize::MultipleDispatchesUniformSubgroupSize(tcu::TestContext &testCtx,
245                                                                              const std::string &name)
246     : TestCase(testCtx, name)
247 {
248 }
249 
checkSupport(Context & context) const250 void MultipleDispatchesUniformSubgroupSize::checkSupport(Context &context) const
251 {
252     const auto &subgroupSizeControlFeatures = context.getSubgroupSizeControlFeatures();
253 
254     if (subgroupSizeControlFeatures.subgroupSizeControl == false)
255         TCU_THROW(NotSupportedError, "Device does not support varying subgroup sizes");
256 }
257 
initPrograms(SourceCollections & programCollection) const258 void MultipleDispatchesUniformSubgroupSize::initPrograms(SourceCollections &programCollection) const
259 {
260     std::ostringstream computeSrc;
261     computeSrc << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450) << "\n"
262                << "#extension GL_KHR_shader_subgroup_basic : enable\n"
263                << "#extension GL_KHR_shader_subgroup_vote : enable\n"
264                << "#extension GL_KHR_shader_subgroup_ballot : enable\n"
265                << "layout(std430, binding = 0) buffer Outputs { uint sizes[]; };\n"
266 
267                << "layout(local_size_x_id = 0) in;\n"
268 
269                << "void main()\n"
270                << "{\n"
271                << "    if (subgroupElect())\n"
272                << "    {\n"
273                << "        sizes[gl_WorkGroupID.x * gl_NumSubgroups + gl_SubgroupID] = gl_SubgroupSize;\n"
274                << "    }\n"
275                << "}\n";
276 
277     programCollection.glslSources.add("comp")
278         << glu::ComputeSource(computeSrc.str())
279         << ShaderBuildOptions(programCollection.usedVulkanVersion, SPIRV_VERSION_1_3, 0u);
280 }
281 
createInstance(Context & context) const282 TestInstance *MultipleDispatchesUniformSubgroupSize::createInstance(Context &context) const
283 {
284     return new MultipleDispatchesUniformSubgroupSizeInstance(context);
285 }
286 
287 } // namespace
288 
createMultipleDispatchesUniformSubgroupSizeTests(tcu::TestContext & testCtx)289 tcu::TestCaseGroup *createMultipleDispatchesUniformSubgroupSizeTests(tcu::TestContext &testCtx)
290 {
291     // Multiple dispatches uniform subgroup size tests
292     de::MovePtr<tcu::TestCaseGroup> testGroup(new tcu::TestCaseGroup(testCtx, "multiple_dispatches"));
293 
294     testGroup->addChild(new MultipleDispatchesUniformSubgroupSize(testCtx, "uniform_subgroup_size"));
295     return testGroup.release();
296 }
297 
298 } // namespace subgroups
299 } // namespace vkt
300