1 /*------------------------------------------------------------------------
2 * Vulkan Conformance Tests
3 * ------------------------
4 *
5 * Copyright (c) 2024 The Khronos Group Inc.
6 * Copyright (c) 2024 Valve Corporation.
7 *
8 * Licensed under the Apache License, Version 2.0 (the "License");
9 * you may not use this file except in compliance with the License.
10 * You may obtain a copy of the License at
11 *
12 * http://www.apache.org/licenses/LICENSE-2.0
13 *
14 * Unless required by applicable law or agreed to in writing, software
15 * distributed under the License is distributed on an "AS IS" BASIS,
16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 * See the License for the specific language governing permissions and
18 * limitations under the License.
19 *
20 *//*!
21 * \file
22 * \brief Device Generated Commands EXT Compute Subgroup Tests
23 *//*--------------------------------------------------------------------*/
24
25 #include "vktDGCComputeSubgroupTestsExt.hpp"
26 #include "vkBarrierUtil.hpp"
27 #include "vkBufferWithMemory.hpp"
28 #include "vkBuilderUtil.hpp"
29 #include "vkCmdUtil.hpp"
30 #include "vkObjUtil.hpp"
31 #include "vkTypeUtil.hpp"
32 #include "vktDGCUtilExt.hpp"
33 #include "vktTestCaseUtil.hpp"
34
35 #include <sstream>
36 #include <vector>
37 #include <memory>
38
39 namespace vkt
40 {
41 namespace DGC
42 {
43
44 using namespace vk;
45
46 namespace
47 {
48
49 struct BuiltinParams
50 {
51 uint32_t totalInvocations;
52 uint32_t subgroupSize;
53 bool pipelineToken;
54 bool computeQueue;
55
getNumSubgroupsvkt::DGC::__anon8103e0ae0111::BuiltinParams56 uint32_t getNumSubgroups(void) const
57 {
58 DE_ASSERT(totalInvocations % subgroupSize == 0u);
59 return (totalInvocations / subgroupSize);
60 }
61 };
62
checkSubgroupSupport(Context & context,BuiltinParams params)63 void checkSubgroupSupport(Context &context, BuiltinParams params)
64 {
65 const auto supportType =
66 (params.pipelineToken ? DGCComputeSupportType::BIND_PIPELINE : DGCComputeSupportType::BASIC);
67 checkDGCExtComputeSupport(context, supportType);
68
69 if (context.getUsedApiVersion() < VK_API_VERSION_1_3)
70 TCU_THROW(NotSupportedError, "Vulkan 1.3 not supported");
71
72 const auto &vk13Properties = context.getDeviceVulkan13Properties();
73
74 DE_ASSERT(deIsPowerOfTwo64(params.subgroupSize));
75
76 if (params.subgroupSize < vk13Properties.minSubgroupSize || params.subgroupSize > vk13Properties.maxSubgroupSize)
77 TCU_THROW(NotSupportedError, "Unsupported subgroup size");
78
79 if ((vk13Properties.requiredSubgroupSizeStages & VK_SHADER_STAGE_COMPUTE_BIT) == 0u)
80 TCU_THROW(NotSupportedError, "Compute stage does not support a required subgroup size");
81
82 if (params.computeQueue)
83 context.getComputeQueue(); // Throws NotSupportedError if not available.
84 }
85
builtinVerificationProgram(SourceCollections & dst,BuiltinParams params)86 void builtinVerificationProgram(SourceCollections &dst, BuiltinParams params)
87 {
88 ShaderBuildOptions buildOptions(dst.usedVulkanVersion, SPIRV_VERSION_1_6, 0u);
89
90 std::ostringstream comp;
91 comp << "#version 460\n"
92 << "#extension GL_KHR_shader_subgroup_basic : require\n"
93 << "#extension GL_KHR_shader_subgroup_ballot : require\n"
94 << "\n"
95 << "layout (local_size_x=" << params.totalInvocations << ", local_size_y=1, local_size_z=1) in;\n"
96 << "\n"
97 << "layout (set=0, binding=0) buffer NumSubgroupsBlock { uint verification[]; } numSubgroupsBuffer;\n"
98 << "layout (set=0, binding=1) buffer SubgroupIdBlock { uint verification[]; } subgroupIdBuffer;\n"
99 << "layout (set=0, binding=2) buffer SubgroupSizeBlock { uint verification[]; } subgroupSizeBuffer;\n"
100 << "layout (set=0, binding=3) buffer invocationIdBlock { uint verification[]; } invocationIdBuffer;\n"
101 << "layout (set=0, binding=4) buffer eqMaskBlock { uint verification[]; } eqMaskBuffer;\n"
102 << "layout (set=0, binding=5) buffer geMaskBlock { uint verification[]; } geMaskBuffer;\n"
103 << "layout (set=0, binding=6) buffer gtMaskBlock { uint verification[]; } gtMaskBuffer;\n"
104 << "layout (set=0, binding=7) buffer leMaskBlock { uint verification[]; } leMaskBuffer;\n"
105 << "layout (set=0, binding=8) buffer ltMaskBlock { uint verification[]; } ltMaskBuffer;\n"
106 << "\n"
107 << "uint boolToUint (bool value)\n"
108 << "{\n"
109 << " return (value ? 1 : 0);\n"
110 << "}\n"
111 << "\n"
112 << "bool checkMaskComponent (uint mask, uint offset, uint validBits, uint bitIndex, uint expectedLess, uint "
113 "expectedEqual, uint expectedGreater)\n"
114 << "{\n"
115 << " bool ok = true;\n"
116 << " for (uint i = 0; i < 32; ++i)\n"
117 << " {\n"
118 << " const uint bit = ((mask >> i) & 1);\n"
119 << " const uint idx = offset + i;\n"
120 << "\n"
121 << " if (idx < validBits) {\n"
122 << " if (idx < bitIndex && bit != expectedLess)\n"
123 << " ok = false;\n"
124 << " else if (idx == bitIndex && bit != expectedEqual)\n"
125 << " ok = false;\n"
126 << " else if (idx > bitIndex && bit != expectedGreater)\n"
127 << " ok = false;\n"
128 << " }\n"
129 << " else if (bit != 0)\n"
130 << " ok = false;\n"
131 << " }\n"
132 << " return ok;\n"
133 << "}\n"
134 << "\n"
135 << "bool checkMask (uvec4 mask, uint validBits, uint bitIndex, uint expectedLess, uint expectedEqual, uint "
136 "expectedGreater)\n"
137 << "{\n"
138 << " return (checkMaskComponent(mask.x, 0, validBits, bitIndex, expectedLess, expectedEqual, "
139 "expectedGreater) &&\n"
140 << " checkMaskComponent(mask.y, 32, validBits, bitIndex, expectedLess, expectedEqual, "
141 "expectedGreater) &&\n"
142 << " checkMaskComponent(mask.z, 64, validBits, bitIndex, expectedLess, expectedEqual, "
143 "expectedGreater) &&\n"
144 << " checkMaskComponent(mask.w, 96, validBits, bitIndex, expectedLess, expectedEqual, "
145 "expectedGreater));\n"
146 << "}\n"
147 << "\n"
148 << "void main (void)\n"
149 << "{\n"
150 << " const uint index = gl_SubgroupInvocationID + gl_SubgroupID * gl_SubgroupSize;\n"
151 << "\n"
152 << " numSubgroupsBuffer.verification[index] = boolToUint(gl_NumSubgroups == " << params.getNumSubgroups()
153 << ");\n"
154 << " subgroupIdBuffer.verification [index] = boolToUint(gl_SubgroupID >= 0 && gl_SubgroupID < "
155 "gl_NumSubgroups);\n"
156 << " subgroupSizeBuffer.verification[index] = boolToUint(gl_SubgroupSize == " << params.subgroupSize
157 << ");\n"
158 << " invocationIdBuffer.verification[index] = boolToUint(gl_SubgroupInvocationID >= 0 && "
159 "gl_SubgroupInvocationID < gl_SubgroupSize);\n"
160 << "\n"
161 << " eqMaskBuffer.verification[index] = boolToUint(checkMask(gl_SubgroupEqMask, gl_SubgroupSize, "
162 "gl_SubgroupInvocationID, 0, 1, 0));\n"
163 << " geMaskBuffer.verification[index] = boolToUint(checkMask(gl_SubgroupGeMask, gl_SubgroupSize, "
164 "gl_SubgroupInvocationID, 0, 1, 1));\n"
165 << " gtMaskBuffer.verification[index] = boolToUint(checkMask(gl_SubgroupGtMask, gl_SubgroupSize, "
166 "gl_SubgroupInvocationID, 0, 0, 1));\n"
167 << " leMaskBuffer.verification[index] = boolToUint(checkMask(gl_SubgroupLeMask, gl_SubgroupSize, "
168 "gl_SubgroupInvocationID, 1, 1, 0));\n"
169 << " ltMaskBuffer.verification[index] = boolToUint(checkMask(gl_SubgroupLtMask, gl_SubgroupSize, "
170 "gl_SubgroupInvocationID, 1, 0, 0));\n"
171 << "}\n";
172
173 dst.glslSources.add("comp") << glu::ComputeSource(comp.str()) << buildOptions;
174 }
175
verifyBuiltins(Context & context,BuiltinParams params)176 tcu::TestStatus verifyBuiltins(Context &context, BuiltinParams params)
177 {
178 const auto &ctx = context.getContextCommonData();
179 const auto descType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
180 const auto stageFlags = static_cast<VkShaderStageFlags>(VK_SHADER_STAGE_COMPUTE_BIT);
181 const auto bindPoint = VK_PIPELINE_BIND_POINT_COMPUTE;
182 auto &log = context.getTestContext().getLog();
183 const auto queue = (params.computeQueue ? context.getComputeQueue() : ctx.queue);
184 const auto qfIndex = (params.computeQueue ? context.getComputeQueueFamilyIndex() : ctx.qfIndex);
185 const size_t outputBufferCount = 9u; // This must match the shader.
186
187 // Output buffers.
188 std::vector<uint32_t> outputValues(params.totalInvocations, 0u);
189 const auto outputBufferSize = static_cast<VkDeviceSize>(de::dataSize(outputValues));
190 const auto outputBufferCreateInfo = makeBufferCreateInfo(outputBufferSize, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT);
191
192 using BufferWithMemoryPtr = std::unique_ptr<BufferWithMemory>;
193 std::vector<BufferWithMemoryPtr> outputBuffers;
194 outputBuffers.reserve(outputBufferCount);
195
196 for (size_t i = 0; i < outputBufferCount; ++i)
197 {
198 outputBuffers.emplace_back(new BufferWithMemory(ctx.vkd, ctx.device, ctx.allocator, outputBufferCreateInfo,
199 MemoryRequirement::HostVisible));
200 auto &bufferAlloc = outputBuffers.back()->getAllocation();
201 void *bufferData = bufferAlloc.getHostPtr();
202
203 deMemcpy(bufferData, de::dataOrNull(outputValues), de::dataSize(outputValues));
204 flushAlloc(ctx.vkd, ctx.device, bufferAlloc);
205 }
206
207 // Descriptor set layout, pool and set preparation.
208 DescriptorSetLayoutBuilder setLayoutBuilder;
209 for (size_t i = 0; i < outputBuffers.size(); ++i)
210 setLayoutBuilder.addSingleBinding(descType, stageFlags);
211 const auto setLayout = setLayoutBuilder.build(ctx.vkd, ctx.device);
212
213 DescriptorPoolBuilder poolBuilder;
214 poolBuilder.addType(descType, de::sizeU32(outputBuffers));
215 const auto descriptorPool =
216 poolBuilder.build(ctx.vkd, ctx.device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
217 const auto descriptorSet = makeDescriptorSet(ctx.vkd, ctx.device, *descriptorPool, *setLayout);
218
219 DescriptorSetUpdateBuilder setUpdateBuilder;
220 for (size_t i = 0; i < outputBuffers.size(); ++i)
221 {
222 const auto descInfo = makeDescriptorBufferInfo(outputBuffers.at(i)->get(), 0ull, outputBufferSize);
223 setUpdateBuilder.writeSingle(*descriptorSet,
224 DescriptorSetUpdateBuilder::Location::binding(static_cast<uint32_t>(i)), descType,
225 &descInfo);
226 }
227 setUpdateBuilder.update(ctx.vkd, ctx.device);
228
229 // Pipeline layout.
230 const auto pipelineLayout = makePipelineLayout(ctx.vkd, ctx.device, *setLayout);
231
232 // Shader.
233 const auto &binaries = context.getBinaryCollection();
234 const auto compModule = createShaderModule(ctx.vkd, ctx.device, binaries.get("comp"));
235
236 // Pipeline: either a normal compute pipeline or a DGC compute pipeline.
237 using DGCComputePipelinePtr = std::unique_ptr<DGCComputePipelineExt>;
238 ExecutionSetManagerPtr executionSetManager;
239 DGCComputePipelinePtr dgcPipeline;
240 Move<VkPipeline> normalPipeline;
241 VkIndirectExecutionSetEXT executionSetHandle = VK_NULL_HANDLE;
242
243 if (params.pipelineToken)
244 {
245 dgcPipeline.reset(new DGCComputePipelineExt(ctx.vkd, ctx.device, 0u, *pipelineLayout, 0u, *compModule, nullptr,
246 VK_NULL_HANDLE, -1, params.subgroupSize));
247
248 executionSetManager = makeExecutionSetManagerPipeline(ctx.vkd, ctx.device, dgcPipeline->get(), 1u);
249 executionSetManager->addPipeline(0u, dgcPipeline->get());
250 executionSetManager->update();
251 executionSetHandle = executionSetManager->get();
252 }
253 else
254 {
255 normalPipeline = makeComputePipeline(ctx.vkd, ctx.device, *pipelineLayout, 0u, nullptr, *compModule, 0u,
256 nullptr, VK_NULL_HANDLE, params.subgroupSize);
257 }
258
259 // Indirect commands layout. Note the dispatch token is last, but its offset in the sequence is 0.
260 IndirectCommandsLayoutBuilderExt cmdsLayoutBuilder(0u, stageFlags, *pipelineLayout);
261 if (params.pipelineToken)
262 cmdsLayoutBuilder.addComputePipelineToken(0u);
263 cmdsLayoutBuilder.addDispatchToken(cmdsLayoutBuilder.getStreamRange());
264 const auto cmdsLayout = cmdsLayoutBuilder.build(ctx.vkd, ctx.device);
265
266 // Generated indirect commands buffer contents.
267 std::vector<uint32_t> genCmdsData;
268 genCmdsData.reserve(4u /*1 for the pipeline index and 3 for the indirect dispatch command*/);
269 if (params.pipelineToken)
270 genCmdsData.push_back(0u);
271 genCmdsData.push_back(1u); // Dispatch token data.
272 genCmdsData.push_back(1u);
273 genCmdsData.push_back(1u);
274
275 // Generated indirect commands buffer.
276 const auto genCmdsBufferSize = de::dataSize(genCmdsData);
277 DGCBuffer genCmdsBuffer(ctx.vkd, ctx.device, ctx.allocator, genCmdsBufferSize);
278 auto &genCmdsBufferAlloc = genCmdsBuffer.getAllocation();
279 void *genCmdsBufferData = genCmdsBufferAlloc.getHostPtr();
280
281 deMemcpy(genCmdsBufferData, de::dataOrNull(genCmdsData), de::dataSize(genCmdsData));
282 flushAlloc(ctx.vkd, ctx.device, genCmdsBufferAlloc);
283
284 // Preprocess buffer for 1 sequence. Note normalPipeline will be VK_NULL_HANDLE when using a DGC pipeline, which is what we want.
285 PreprocessBufferExt preprocessBuffer(ctx.vkd, ctx.device, ctx.allocator, executionSetHandle, *cmdsLayout, 1u, 0u,
286 *normalPipeline);
287
288 // Command pool and buffer.
289 CommandPoolWithBuffer cmd(ctx.vkd, ctx.device, qfIndex);
290 const auto cmdBuffer = *cmd.cmdBuffer;
291
292 beginCommandBuffer(ctx.vkd, cmdBuffer);
293
294 ctx.vkd.cmdBindDescriptorSets(cmdBuffer, bindPoint, *pipelineLayout, 0u, 1u, &descriptorSet.get(), 0u, nullptr);
295 if (!params.pipelineToken)
296 ctx.vkd.cmdBindPipeline(cmdBuffer, bindPoint, *normalPipeline);
297 else
298 {
299 DE_ASSERT(dgcPipeline);
300 ctx.vkd.cmdBindPipeline(cmdBuffer, bindPoint, dgcPipeline->get());
301 }
302
303 {
304 DGCGenCmdsInfo cmdsInfo(stageFlags, // VkShaderStageFlags shaderStages;
305 executionSetHandle, // VkIndirectExecutionSetEXT indirectExecutionSet;
306 *cmdsLayout, // VkIndirectCommandsLayoutEXT indirectCommandsLayout;
307 genCmdsBuffer.getDeviceAddress(), // VkDeviceAddress indirectAddress;
308 genCmdsBufferSize, // VkDeviceSize indirectAddressSize;
309 preprocessBuffer.getDeviceAddress(), // VkDeviceAddress preprocessAddress;
310 preprocessBuffer.getSize(), // VkDeviceSize preprocessSize;
311 1u, // uint32_t maxSequenceCount;
312 0ull, // VkDeviceAddress sequenceCountAddress;
313 0u, // uint32_t maxDrawCount;
314 *normalPipeline);
315 ctx.vkd.cmdExecuteGeneratedCommandsEXT(cmdBuffer, VK_FALSE, &cmdsInfo.get());
316 }
317 {
318 const auto barrier = makeMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT);
319 cmdPipelineMemoryBarrier(ctx.vkd, cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT,
320 &barrier);
321 }
322 endCommandBuffer(ctx.vkd, cmdBuffer);
323 submitCommandsAndWait(ctx.vkd, ctx.device, queue, cmdBuffer);
324
325 // Verify results.
326 bool testFail = false;
327 for (size_t i = 0; i < outputBuffers.size(); ++i)
328 {
329 auto &outputBuffer = *outputBuffers.at(i);
330 auto &bufferAlloc = outputBuffer.getAllocation();
331 void *bufferData = bufferAlloc.getHostPtr();
332
333 deMemcpy(de::dataOrNull(outputValues), bufferData, de::dataSize(outputValues));
334
335 for (size_t j = 0; j < outputValues.size(); ++j)
336 {
337 const auto reference = 1u;
338 const auto result = outputValues.at(j);
339
340 if (result != reference)
341 {
342 testFail = true;
343 log << tcu::TestLog::Message << "Unexpected value at binding " << i << " position " << j
344 << ": expected " << reference << " but found " << result << tcu::TestLog::EndMessage;
345 }
346 }
347 }
348
349 if (testFail)
350 return tcu::TestStatus::fail("Unexpected value found in output buffers; check log for details");
351 return tcu::TestStatus::pass("Pass");
352 }
353
354 } // namespace
355
createDGCComputeSubgroupTestsExt(tcu::TestContext & testCtx)356 tcu::TestCaseGroup *createDGCComputeSubgroupTestsExt(tcu::TestContext &testCtx)
357 {
358 using GroupPtr = de::MovePtr<tcu::TestCaseGroup>;
359
360 GroupPtr mainGroup(new tcu::TestCaseGroup(testCtx, "subgroups"));
361 GroupPtr builtinsGroup(new tcu::TestCaseGroup(testCtx, "builtins"));
362
363 const std::vector<uint32_t> invocationCounts{16u, 32u, 64u, 128u};
364
365 for (const auto computeQueue : {false, true})
366 for (const auto dgcPipeline : {false, true})
367 for (const auto workgroupSize : invocationCounts)
368 for (const auto subgroupSize : invocationCounts)
369 {
370 if (subgroupSize > workgroupSize)
371 break;
372
373 const auto testName = "workgroup_size_" + std::to_string(workgroupSize) + "_subgroup_size_" +
374 std::to_string(subgroupSize) +
375 (dgcPipeline ? "_dgc_pipeline" : "_normal_pipeline") +
376 (computeQueue ? "_cq" : "");
377
378 const BuiltinParams params{workgroupSize, subgroupSize, dgcPipeline, computeQueue};
379 addFunctionCaseWithPrograms(builtinsGroup.get(), testName, checkSubgroupSupport,
380 builtinVerificationProgram, verifyBuiltins, params);
381 }
382
383 mainGroup->addChild(builtinsGroup.release());
384 return mainGroup.release();
385 }
386
387 } // namespace DGC
388 } // namespace vkt
389