• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2024 The Khronos Group Inc.
6  * Copyright (c) 2024 Valve Corporation.
7  *
8  * Licensed under the Apache License, Version 2.0 (the "License");
9  * you may not use this file except in compliance with the License.
10  * You may obtain a copy of the License at
11  *
12  *      http://www.apache.org/licenses/LICENSE-2.0
13  *
14  * Unless required by applicable law or agreed to in writing, software
15  * distributed under the License is distributed on an "AS IS" BASIS,
16  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17  * See the License for the specific language governing permissions and
18  * limitations under the License.
19  *
20  *//*!
21  * \file
22  * \brief Device Generated Commands EXT Compute Subgroup Tests
23  *//*--------------------------------------------------------------------*/
24 
25 #include "vktDGCComputeSubgroupTestsExt.hpp"
26 #include "vkBarrierUtil.hpp"
27 #include "vkBufferWithMemory.hpp"
28 #include "vkBuilderUtil.hpp"
29 #include "vkCmdUtil.hpp"
30 #include "vkObjUtil.hpp"
31 #include "vkTypeUtil.hpp"
32 #include "vktDGCUtilExt.hpp"
33 #include "vktTestCaseUtil.hpp"
34 
35 #include <sstream>
36 #include <vector>
37 #include <memory>
38 
39 namespace vkt
40 {
41 namespace DGC
42 {
43 
44 using namespace vk;
45 
46 namespace
47 {
48 
49 struct BuiltinParams
50 {
51     uint32_t totalInvocations;
52     uint32_t subgroupSize;
53     bool pipelineToken;
54     bool computeQueue;
55 
getNumSubgroupsvkt::DGC::__anon8103e0ae0111::BuiltinParams56     uint32_t getNumSubgroups(void) const
57     {
58         DE_ASSERT(totalInvocations % subgroupSize == 0u);
59         return (totalInvocations / subgroupSize);
60     }
61 };
62 
checkSubgroupSupport(Context & context,BuiltinParams params)63 void checkSubgroupSupport(Context &context, BuiltinParams params)
64 {
65     const auto supportType =
66         (params.pipelineToken ? DGCComputeSupportType::BIND_PIPELINE : DGCComputeSupportType::BASIC);
67     checkDGCExtComputeSupport(context, supportType);
68 
69     if (context.getUsedApiVersion() < VK_API_VERSION_1_3)
70         TCU_THROW(NotSupportedError, "Vulkan 1.3 not supported");
71 
72     const auto &vk13Properties = context.getDeviceVulkan13Properties();
73 
74     DE_ASSERT(deIsPowerOfTwo64(params.subgroupSize));
75 
76     if (params.subgroupSize < vk13Properties.minSubgroupSize || params.subgroupSize > vk13Properties.maxSubgroupSize)
77         TCU_THROW(NotSupportedError, "Unsupported subgroup size");
78 
79     if ((vk13Properties.requiredSubgroupSizeStages & VK_SHADER_STAGE_COMPUTE_BIT) == 0u)
80         TCU_THROW(NotSupportedError, "Compute stage does not support a required subgroup size");
81 
82     if (params.computeQueue)
83         context.getComputeQueue(); // Throws NotSupportedError if not available.
84 }
85 
builtinVerificationProgram(SourceCollections & dst,BuiltinParams params)86 void builtinVerificationProgram(SourceCollections &dst, BuiltinParams params)
87 {
88     ShaderBuildOptions buildOptions(dst.usedVulkanVersion, SPIRV_VERSION_1_6, 0u);
89 
90     std::ostringstream comp;
91     comp << "#version 460\n"
92          << "#extension GL_KHR_shader_subgroup_basic  : require\n"
93          << "#extension GL_KHR_shader_subgroup_ballot : require\n"
94          << "\n"
95          << "layout (local_size_x=" << params.totalInvocations << ", local_size_y=1, local_size_z=1) in;\n"
96          << "\n"
97          << "layout (set=0, binding=0) buffer NumSubgroupsBlock { uint verification[]; } numSubgroupsBuffer;\n"
98          << "layout (set=0, binding=1) buffer SubgroupIdBlock   { uint verification[]; } subgroupIdBuffer;\n"
99          << "layout (set=0, binding=2) buffer SubgroupSizeBlock { uint verification[]; } subgroupSizeBuffer;\n"
100          << "layout (set=0, binding=3) buffer invocationIdBlock { uint verification[]; } invocationIdBuffer;\n"
101          << "layout (set=0, binding=4) buffer eqMaskBlock       { uint verification[]; } eqMaskBuffer;\n"
102          << "layout (set=0, binding=5) buffer geMaskBlock       { uint verification[]; } geMaskBuffer;\n"
103          << "layout (set=0, binding=6) buffer gtMaskBlock       { uint verification[]; } gtMaskBuffer;\n"
104          << "layout (set=0, binding=7) buffer leMaskBlock       { uint verification[]; } leMaskBuffer;\n"
105          << "layout (set=0, binding=8) buffer ltMaskBlock       { uint verification[]; } ltMaskBuffer;\n"
106          << "\n"
107          << "uint boolToUint (bool value)\n"
108          << "{\n"
109          << "   return (value ? 1 : 0);\n"
110          << "}\n"
111          << "\n"
112          << "bool checkMaskComponent (uint mask, uint offset, uint validBits, uint bitIndex, uint expectedLess, uint "
113             "expectedEqual, uint expectedGreater)\n"
114          << "{\n"
115          << "    bool ok = true;\n"
116          << "    for (uint i = 0; i < 32; ++i)\n"
117          << "    {\n"
118          << "        const uint bit = ((mask >> i) & 1);\n"
119          << "        const uint idx = offset + i;\n"
120          << "\n"
121          << "        if (idx < validBits) {\n"
122          << "            if (idx < bitIndex && bit != expectedLess)\n"
123          << "                ok = false;\n"
124          << "            else if (idx == bitIndex && bit != expectedEqual)\n"
125          << "                ok = false;\n"
126          << "            else if (idx > bitIndex && bit != expectedGreater)\n"
127          << "                ok = false;\n"
128          << "        }\n"
129          << "        else if (bit != 0)\n"
130          << "            ok = false;\n"
131          << "    }\n"
132          << "    return ok;\n"
133          << "}\n"
134          << "\n"
135          << "bool checkMask (uvec4 mask, uint validBits, uint bitIndex, uint expectedLess, uint expectedEqual, uint "
136             "expectedGreater)\n"
137          << "{\n"
138          << "   return (checkMaskComponent(mask.x,  0, validBits, bitIndex, expectedLess, expectedEqual, "
139             "expectedGreater) &&\n"
140          << "           checkMaskComponent(mask.y, 32, validBits, bitIndex, expectedLess, expectedEqual, "
141             "expectedGreater) &&\n"
142          << "           checkMaskComponent(mask.z, 64, validBits, bitIndex, expectedLess, expectedEqual, "
143             "expectedGreater) &&\n"
144          << "           checkMaskComponent(mask.w, 96, validBits, bitIndex, expectedLess, expectedEqual, "
145             "expectedGreater));\n"
146          << "}\n"
147          << "\n"
148          << "void main (void)\n"
149          << "{\n"
150          << "    const uint index = gl_SubgroupInvocationID + gl_SubgroupID * gl_SubgroupSize;\n"
151          << "\n"
152          << "    numSubgroupsBuffer.verification[index] = boolToUint(gl_NumSubgroups == " << params.getNumSubgroups()
153          << ");\n"
154          << "    subgroupIdBuffer.verification  [index] = boolToUint(gl_SubgroupID >= 0 && gl_SubgroupID < "
155             "gl_NumSubgroups);\n"
156          << "    subgroupSizeBuffer.verification[index] = boolToUint(gl_SubgroupSize == " << params.subgroupSize
157          << ");\n"
158          << "    invocationIdBuffer.verification[index] = boolToUint(gl_SubgroupInvocationID >= 0 && "
159             "gl_SubgroupInvocationID < gl_SubgroupSize);\n"
160          << "\n"
161          << "    eqMaskBuffer.verification[index] = boolToUint(checkMask(gl_SubgroupEqMask, gl_SubgroupSize, "
162             "gl_SubgroupInvocationID, 0, 1, 0));\n"
163          << "    geMaskBuffer.verification[index] = boolToUint(checkMask(gl_SubgroupGeMask, gl_SubgroupSize, "
164             "gl_SubgroupInvocationID, 0, 1, 1));\n"
165          << "    gtMaskBuffer.verification[index] = boolToUint(checkMask(gl_SubgroupGtMask, gl_SubgroupSize, "
166             "gl_SubgroupInvocationID, 0, 0, 1));\n"
167          << "    leMaskBuffer.verification[index] = boolToUint(checkMask(gl_SubgroupLeMask, gl_SubgroupSize, "
168             "gl_SubgroupInvocationID, 1, 1, 0));\n"
169          << "    ltMaskBuffer.verification[index] = boolToUint(checkMask(gl_SubgroupLtMask, gl_SubgroupSize, "
170             "gl_SubgroupInvocationID, 1, 0, 0));\n"
171          << "}\n";
172 
173     dst.glslSources.add("comp") << glu::ComputeSource(comp.str()) << buildOptions;
174 }
175 
verifyBuiltins(Context & context,BuiltinParams params)176 tcu::TestStatus verifyBuiltins(Context &context, BuiltinParams params)
177 {
178     const auto &ctx                = context.getContextCommonData();
179     const auto descType            = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
180     const auto stageFlags          = static_cast<VkShaderStageFlags>(VK_SHADER_STAGE_COMPUTE_BIT);
181     const auto bindPoint           = VK_PIPELINE_BIND_POINT_COMPUTE;
182     auto &log                      = context.getTestContext().getLog();
183     const auto queue               = (params.computeQueue ? context.getComputeQueue() : ctx.queue);
184     const auto qfIndex             = (params.computeQueue ? context.getComputeQueueFamilyIndex() : ctx.qfIndex);
185     const size_t outputBufferCount = 9u; // This must match the shader.
186 
187     // Output buffers.
188     std::vector<uint32_t> outputValues(params.totalInvocations, 0u);
189     const auto outputBufferSize       = static_cast<VkDeviceSize>(de::dataSize(outputValues));
190     const auto outputBufferCreateInfo = makeBufferCreateInfo(outputBufferSize, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT);
191 
192     using BufferWithMemoryPtr = std::unique_ptr<BufferWithMemory>;
193     std::vector<BufferWithMemoryPtr> outputBuffers;
194     outputBuffers.reserve(outputBufferCount);
195 
196     for (size_t i = 0; i < outputBufferCount; ++i)
197     {
198         outputBuffers.emplace_back(new BufferWithMemory(ctx.vkd, ctx.device, ctx.allocator, outputBufferCreateInfo,
199                                                         MemoryRequirement::HostVisible));
200         auto &bufferAlloc = outputBuffers.back()->getAllocation();
201         void *bufferData  = bufferAlloc.getHostPtr();
202 
203         deMemcpy(bufferData, de::dataOrNull(outputValues), de::dataSize(outputValues));
204         flushAlloc(ctx.vkd, ctx.device, bufferAlloc);
205     }
206 
207     // Descriptor set layout, pool and set preparation.
208     DescriptorSetLayoutBuilder setLayoutBuilder;
209     for (size_t i = 0; i < outputBuffers.size(); ++i)
210         setLayoutBuilder.addSingleBinding(descType, stageFlags);
211     const auto setLayout = setLayoutBuilder.build(ctx.vkd, ctx.device);
212 
213     DescriptorPoolBuilder poolBuilder;
214     poolBuilder.addType(descType, de::sizeU32(outputBuffers));
215     const auto descriptorPool =
216         poolBuilder.build(ctx.vkd, ctx.device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
217     const auto descriptorSet = makeDescriptorSet(ctx.vkd, ctx.device, *descriptorPool, *setLayout);
218 
219     DescriptorSetUpdateBuilder setUpdateBuilder;
220     for (size_t i = 0; i < outputBuffers.size(); ++i)
221     {
222         const auto descInfo = makeDescriptorBufferInfo(outputBuffers.at(i)->get(), 0ull, outputBufferSize);
223         setUpdateBuilder.writeSingle(*descriptorSet,
224                                      DescriptorSetUpdateBuilder::Location::binding(static_cast<uint32_t>(i)), descType,
225                                      &descInfo);
226     }
227     setUpdateBuilder.update(ctx.vkd, ctx.device);
228 
229     // Pipeline layout.
230     const auto pipelineLayout = makePipelineLayout(ctx.vkd, ctx.device, *setLayout);
231 
232     // Shader.
233     const auto &binaries  = context.getBinaryCollection();
234     const auto compModule = createShaderModule(ctx.vkd, ctx.device, binaries.get("comp"));
235 
236     // Pipeline: either a normal compute pipeline or a DGC compute pipeline.
237     using DGCComputePipelinePtr = std::unique_ptr<DGCComputePipelineExt>;
238     ExecutionSetManagerPtr executionSetManager;
239     DGCComputePipelinePtr dgcPipeline;
240     Move<VkPipeline> normalPipeline;
241     VkIndirectExecutionSetEXT executionSetHandle = VK_NULL_HANDLE;
242 
243     if (params.pipelineToken)
244     {
245         dgcPipeline.reset(new DGCComputePipelineExt(ctx.vkd, ctx.device, 0u, *pipelineLayout, 0u, *compModule, nullptr,
246                                                     VK_NULL_HANDLE, -1, params.subgroupSize));
247 
248         executionSetManager = makeExecutionSetManagerPipeline(ctx.vkd, ctx.device, dgcPipeline->get(), 1u);
249         executionSetManager->addPipeline(0u, dgcPipeline->get());
250         executionSetManager->update();
251         executionSetHandle = executionSetManager->get();
252     }
253     else
254     {
255         normalPipeline = makeComputePipeline(ctx.vkd, ctx.device, *pipelineLayout, 0u, nullptr, *compModule, 0u,
256                                              nullptr, VK_NULL_HANDLE, params.subgroupSize);
257     }
258 
259     // Indirect commands layout. Note the dispatch token is last, but its offset in the sequence is 0.
260     IndirectCommandsLayoutBuilderExt cmdsLayoutBuilder(0u, stageFlags, *pipelineLayout);
261     if (params.pipelineToken)
262         cmdsLayoutBuilder.addComputePipelineToken(0u);
263     cmdsLayoutBuilder.addDispatchToken(cmdsLayoutBuilder.getStreamRange());
264     const auto cmdsLayout = cmdsLayoutBuilder.build(ctx.vkd, ctx.device);
265 
266     // Generated indirect commands buffer contents.
267     std::vector<uint32_t> genCmdsData;
268     genCmdsData.reserve(4u /*1 for the pipeline index and 3 for the indirect dispatch command*/);
269     if (params.pipelineToken)
270         genCmdsData.push_back(0u);
271     genCmdsData.push_back(1u); // Dispatch token data.
272     genCmdsData.push_back(1u);
273     genCmdsData.push_back(1u);
274 
275     // Generated indirect commands buffer.
276     const auto genCmdsBufferSize = de::dataSize(genCmdsData);
277     DGCBuffer genCmdsBuffer(ctx.vkd, ctx.device, ctx.allocator, genCmdsBufferSize);
278     auto &genCmdsBufferAlloc = genCmdsBuffer.getAllocation();
279     void *genCmdsBufferData  = genCmdsBufferAlloc.getHostPtr();
280 
281     deMemcpy(genCmdsBufferData, de::dataOrNull(genCmdsData), de::dataSize(genCmdsData));
282     flushAlloc(ctx.vkd, ctx.device, genCmdsBufferAlloc);
283 
284     // Preprocess buffer for 1 sequence. Note normalPipeline will be VK_NULL_HANDLE when using a DGC pipeline, which is what we want.
285     PreprocessBufferExt preprocessBuffer(ctx.vkd, ctx.device, ctx.allocator, executionSetHandle, *cmdsLayout, 1u, 0u,
286                                          *normalPipeline);
287 
288     // Command pool and buffer.
289     CommandPoolWithBuffer cmd(ctx.vkd, ctx.device, qfIndex);
290     const auto cmdBuffer = *cmd.cmdBuffer;
291 
292     beginCommandBuffer(ctx.vkd, cmdBuffer);
293 
294     ctx.vkd.cmdBindDescriptorSets(cmdBuffer, bindPoint, *pipelineLayout, 0u, 1u, &descriptorSet.get(), 0u, nullptr);
295     if (!params.pipelineToken)
296         ctx.vkd.cmdBindPipeline(cmdBuffer, bindPoint, *normalPipeline);
297     else
298     {
299         DE_ASSERT(dgcPipeline);
300         ctx.vkd.cmdBindPipeline(cmdBuffer, bindPoint, dgcPipeline->get());
301     }
302 
303     {
304         DGCGenCmdsInfo cmdsInfo(stageFlags,                       // VkShaderStageFlags shaderStages;
305                                 executionSetHandle,               // VkIndirectExecutionSetEXT indirectExecutionSet;
306                                 *cmdsLayout,                      // VkIndirectCommandsLayoutEXT indirectCommandsLayout;
307                                 genCmdsBuffer.getDeviceAddress(), // VkDeviceAddress indirectAddress;
308                                 genCmdsBufferSize,                // VkDeviceSize indirectAddressSize;
309                                 preprocessBuffer.getDeviceAddress(), // VkDeviceAddress preprocessAddress;
310                                 preprocessBuffer.getSize(),          // VkDeviceSize preprocessSize;
311                                 1u,                                  // uint32_t maxSequenceCount;
312                                 0ull,                                // VkDeviceAddress sequenceCountAddress;
313                                 0u,                                  // uint32_t maxDrawCount;
314                                 *normalPipeline);
315         ctx.vkd.cmdExecuteGeneratedCommandsEXT(cmdBuffer, VK_FALSE, &cmdsInfo.get());
316     }
317     {
318         const auto barrier = makeMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT);
319         cmdPipelineMemoryBarrier(ctx.vkd, cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT,
320                                  &barrier);
321     }
322     endCommandBuffer(ctx.vkd, cmdBuffer);
323     submitCommandsAndWait(ctx.vkd, ctx.device, queue, cmdBuffer);
324 
325     // Verify results.
326     bool testFail = false;
327     for (size_t i = 0; i < outputBuffers.size(); ++i)
328     {
329         auto &outputBuffer = *outputBuffers.at(i);
330         auto &bufferAlloc  = outputBuffer.getAllocation();
331         void *bufferData   = bufferAlloc.getHostPtr();
332 
333         deMemcpy(de::dataOrNull(outputValues), bufferData, de::dataSize(outputValues));
334 
335         for (size_t j = 0; j < outputValues.size(); ++j)
336         {
337             const auto reference = 1u;
338             const auto result    = outputValues.at(j);
339 
340             if (result != reference)
341             {
342                 testFail = true;
343                 log << tcu::TestLog::Message << "Unexpected value at binding " << i << " position " << j
344                     << ": expected " << reference << " but found " << result << tcu::TestLog::EndMessage;
345             }
346         }
347     }
348 
349     if (testFail)
350         return tcu::TestStatus::fail("Unexpected value found in output buffers; check log for details");
351     return tcu::TestStatus::pass("Pass");
352 }
353 
354 } // namespace
355 
createDGCComputeSubgroupTestsExt(tcu::TestContext & testCtx)356 tcu::TestCaseGroup *createDGCComputeSubgroupTestsExt(tcu::TestContext &testCtx)
357 {
358     using GroupPtr = de::MovePtr<tcu::TestCaseGroup>;
359 
360     GroupPtr mainGroup(new tcu::TestCaseGroup(testCtx, "subgroups"));
361     GroupPtr builtinsGroup(new tcu::TestCaseGroup(testCtx, "builtins"));
362 
363     const std::vector<uint32_t> invocationCounts{16u, 32u, 64u, 128u};
364 
365     for (const auto computeQueue : {false, true})
366         for (const auto dgcPipeline : {false, true})
367             for (const auto workgroupSize : invocationCounts)
368                 for (const auto subgroupSize : invocationCounts)
369                 {
370                     if (subgroupSize > workgroupSize)
371                         break;
372 
373                     const auto testName = "workgroup_size_" + std::to_string(workgroupSize) + "_subgroup_size_" +
374                                           std::to_string(subgroupSize) +
375                                           (dgcPipeline ? "_dgc_pipeline" : "_normal_pipeline") +
376                                           (computeQueue ? "_cq" : "");
377 
378                     const BuiltinParams params{workgroupSize, subgroupSize, dgcPipeline, computeQueue};
379                     addFunctionCaseWithPrograms(builtinsGroup.get(), testName, checkSubgroupSupport,
380                                                 builtinVerificationProgram, verifyBuiltins, params);
381                 }
382 
383     mainGroup->addChild(builtinsGroup.release());
384     return mainGroup.release();
385 }
386 
387 } // namespace DGC
388 } // namespace vkt
389