1 /*------------------------------------------------------------------------
2 * Vulkan Conformance Tests
3 * ------------------------
4 *
5 * Copyright (c) 2021 The Khronos Group Inc.
6 * Copyright (c) 2021 Valve Corporation.
7 *
8 * Licensed under the Apache License, Version 2.0 (the "License");
9 * you may not use this file except in compliance with the License.
10 * You may obtain a copy of the License at
11 *
12 * http://www.apache.org/licenses/LICENSE-2.0
13 *
14 * Unless required by applicable law or agreed to in writing, software
15 * distributed under the License is distributed on an "AS IS" BASIS,
16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 * See the License for the specific language governing permissions and
18 * limitations under the License.
19 *
20 *//*!
21 * \file
22 * \brief Mesh Shader API Tests for VK_EXT_mesh_shader
23 *//*--------------------------------------------------------------------*/
24
25 #include "vktMeshShaderApiTestsEXT.hpp"
26 #include "vktMeshShaderUtil.hpp"
27 #include "vktTestCase.hpp"
28
29 #include "vkTypeUtil.hpp"
30 #include "vkImageWithMemory.hpp"
31 #include "vkBufferWithMemory.hpp"
32 #include "vkObjUtil.hpp"
33 #include "vkBuilderUtil.hpp"
34 #include "vkCmdUtil.hpp"
35 #include "vkImageUtil.hpp"
36
37 #include "tcuMaybe.hpp"
38 #include "tcuTestLog.hpp"
39 #include "tcuImageCompare.hpp"
40
41 #include "deRandom.hpp"
42
43 #include <iostream>
44 #include <sstream>
45 #include <vector>
46 #include <algorithm>
47 #include <iterator>
48 #include <limits>
49
50 namespace vkt
51 {
52 namespace MeshShader
53 {
54
55 namespace
56 {
57
58 using namespace vk;
59
60 using GroupPtr = de::MovePtr<tcu::TestCaseGroup>;
61 using ImageWithMemoryPtr = de::MovePtr<ImageWithMemory>;
62 using BufferWithMemoryPtr = de::MovePtr<BufferWithMemory>;
63
64 enum class DrawType
65 {
66 DRAW = 0,
67 DRAW_INDIRECT,
68 DRAW_INDIRECT_COUNT,
69 };
70
operator <<(std::ostream & stream,DrawType drawType)71 std::ostream& operator<< (std::ostream& stream, DrawType drawType)
72 {
73 switch (drawType)
74 {
75 case DrawType::DRAW: stream << "draw"; break;
76 case DrawType::DRAW_INDIRECT: stream << "draw_indirect"; break;
77 case DrawType::DRAW_INDIRECT_COUNT: stream << "draw_indirect_count"; break;
78 default: DE_ASSERT(false); break;
79 }
80 return stream;
81 }
82
83
84 // This helps test the maxDrawCount rule for the DRAW_INDIRECT_COUNT case.
85 enum class IndirectCountLimitType
86 {
87 BUFFER_VALUE = 0, // The actual count will be given by the count buffer.
88 MAX_COUNT, // The actual count will be given by the maxDrawCount argument passed to the draw command.
89 };
90
91 struct IndirectArgs
92 {
93 uint32_t offset;
94 uint32_t stride;
95 };
96
97 struct TestParams
98 {
99 DrawType drawType;
100 uint32_t seed;
101 uint32_t drawCount; // Equivalent to taskCount or drawCount.
102 tcu::Maybe<IndirectArgs> indirectArgs; // Only used for DRAW_INDIRECT*.
103 tcu::Maybe<IndirectCountLimitType> indirectCountLimit; // Only used for DRAW_INDIRECT_COUNT.
104 tcu::Maybe<uint32_t> indirectCountOffset; // Only used for DRAW_INDIRECT_COUNT.
105 bool useTask;
106 bool useSecondaryCmdBuffer;
107 };
108
109 // The framebuffer will have a number of rows and 32 columns. Each mesh shader workgroup will generate geometry to fill a single
110 // framebuffer row, using a triangle list with 32 triangles of different colors, each covering a framebuffer pixel.
111 //
112 // Note: the total framebuffer rows is called "full" below (e.g. 64). When using a task shader to generate work, each workgroup will
113 // generate a single mesh workgroup using a push constant instead of a compile-time constant.
114 //
115 // When using DRAW, the task count will tell us how many rows of pixels will be filled in the framebuffer.
116 //
117 // When using indirect draws, the full framebuffer will always be drawn into by using multiple draw command structures, except in
118 // the case of drawCount==0. Each draw will spawn the needed number of tasks to fill the whole framebuffer. In addition, in order to
119 // make all argument structures different, the number of tasks in each draw count will be slightly different and assigned
120 // pseudorandomly.
121 //
122 // DRAW: taskCount=0, taskCount=1, taskCount=2, taskCount=half, taskCount=full
123 //
124 // DRAW_INDIRECT: drawCount=0, drawCount=1, drawCount=2, drawCount=half, drawCount=full.
125 // * With offset 0 and pseudorandom (multiples of 4).
126 // * With stride adding a padding of 0 and pseudorandom (multiples of 4).
127 //
128 // DRAW_INDIRECT_COUNT: same as indirect in two variants:
129 // 1. Passing the count in a buffer with a large maximum.
130 // 2. Passing a large value in the buffer and limiting it with the maximum.
131
132 class MeshApiCase : public vkt::TestCase
133 {
134 public:
MeshApiCase(tcu::TestContext & testCtx,const std::string & name,const std::string & description,const TestParams & params)135 MeshApiCase (tcu::TestContext& testCtx, const std::string& name, const std::string& description, const TestParams& params)
136 : vkt::TestCase (testCtx, name, description)
137 , m_params (params)
138 {}
~MeshApiCase(void)139 virtual ~MeshApiCase (void) {}
140
141 void initPrograms (vk::SourceCollections& programCollection) const override;
142 void checkSupport (Context& context) const override;
143 TestInstance* createInstance (Context& context) const override;
144
145 protected:
146 TestParams m_params;
147 };
148
149 class MeshApiInstance : public vkt::TestInstance
150 {
151 public:
MeshApiInstance(Context & context,const TestParams & params)152 MeshApiInstance (Context& context, const TestParams& params)
153 : vkt::TestInstance (context)
154 , m_params (params)
155 {}
~MeshApiInstance(void)156 virtual ~MeshApiInstance (void) {}
157
158 tcu::TestStatus iterate (void) override;
159
160 protected:
161 TestParams m_params;
162 };
163
createInstance(Context & context) const164 TestInstance* MeshApiCase::createInstance (Context& context) const
165 {
166 return new MeshApiInstance(context, m_params);
167 }
168
169 struct PushConstantData
170 {
171 uint32_t width;
172 uint32_t height;
173 uint32_t dimMesh; // Set work group size in the X, Y or Z dimension depending on value (0, 1, 2).
174 uint32_t one;
175 uint32_t dimTask; // Same as dimMesh.
176
getRangesvkt::MeshShader::__anon4d5a22460111::PushConstantData177 std::vector<VkPushConstantRange> getRanges (bool includeTask) const
178 {
179 constexpr uint32_t offsetMesh = 0u;
180 constexpr uint32_t offsetTask = static_cast<uint32_t>(offsetof(PushConstantData, one));
181 constexpr uint32_t sizeMesh = offsetTask;
182 constexpr uint32_t sizeTask = static_cast<uint32_t>(sizeof(PushConstantData)) - offsetTask;
183
184 const VkPushConstantRange meshRange =
185 {
186 VK_SHADER_STAGE_MESH_BIT_EXT, // VkShaderStageFlags stageFlags;
187 offsetMesh, // uint32_t offset;
188 sizeMesh, // uint32_t size;
189 };
190 const VkPushConstantRange taskRange =
191 {
192 VK_SHADER_STAGE_TASK_BIT_EXT, // VkShaderStageFlags stageFlags;
193 offsetTask, // uint32_t offset;
194 sizeTask, // uint32_t size;
195 };
196
197 std::vector<VkPushConstantRange> ranges (1u, meshRange);
198 if (includeTask)
199 ranges.push_back(taskRange);
200 return ranges;
201 }
202 };
203
initPrograms(vk::SourceCollections & programCollection) const204 void MeshApiCase::initPrograms (vk::SourceCollections& programCollection) const
205 {
206 const auto buildOptions = getMinMeshEXTBuildOptions(programCollection.usedVulkanVersion);
207
208 const std::string taskDataDecl =
209 "struct TaskData {\n"
210 " uint blockNumber;\n"
211 " uint blockRow;\n"
212 "};\n"
213 "taskPayloadSharedEXT TaskData td;\n"
214 ;
215
216 // Task shader if needed.
217 if (m_params.useTask)
218 {
219 std::ostringstream task;
220 task
221 << "#version 460\n"
222 << "#extension GL_EXT_mesh_shader : enable\n"
223 << "\n"
224 << "layout (local_size_x=1) in;\n"
225 << "\n"
226 << "layout (push_constant, std430) uniform TaskPushConstantBlock {\n"
227 << " layout (offset=12) uint one;\n"
228 << " layout (offset=16) uint dimCoord;\n"
229 << "} pc;\n"
230 << "\n"
231 << taskDataDecl
232 << "\n"
233 << "void main ()\n"
234 << "{\n"
235 << " const uint workGroupID = ((pc.dimCoord == 2) ? gl_WorkGroupID.z : ((pc.dimCoord == 1) ? gl_WorkGroupID.y : gl_WorkGroupID.x));\n"
236 << " td.blockNumber = uint(gl_DrawID);\n"
237 << " td.blockRow = workGroupID;\n"
238 << " EmitMeshTasksEXT(pc.one, pc.one, pc.one);"
239 << "}\n"
240 ;
241 programCollection.glslSources.add("task") << glu::TaskSource(task.str()) << buildOptions;
242 }
243
244 // Mesh shader.
245 {
246 std::ostringstream mesh;
247 mesh
248 << "#version 460\n"
249 << "#extension GL_EXT_mesh_shader : enable\n"
250 << "\n"
251 << "// 32 local invocations in total.\n"
252 << "layout (local_size_x=4, local_size_y=2, local_size_z=4) in;\n"
253 << "layout (triangles) out;\n"
254 << "layout (max_vertices=96, max_primitives=32) out;\n"
255 << "\n"
256 << "layout (push_constant, std430) uniform MeshPushConstantBlock {\n"
257 << " uint width;\n"
258 << " uint height;\n"
259 << " uint dimCoord;\n"
260 << "} pc;\n"
261 << "\n"
262 << "layout (location=0) perprimitiveEXT out vec4 primitiveColor[];\n"
263 << "\n"
264 << (m_params.useTask ? taskDataDecl : "")
265 << "\n"
266 << "layout (set=0, binding=0, std430) readonly buffer BlockSizes {\n"
267 << " uint blockSize[];\n"
268 << "} bsz;\n"
269 << "\n"
270 << "uint startOfBlock (uint blockNumber)\n"
271 << "{\n"
272 << " uint start = 0;\n"
273 << " for (uint i = 0; i < blockNumber; i++)\n"
274 << " start += bsz.blockSize[i];\n"
275 << " return start;\n"
276 << "}\n"
277 << "\n"
278 << "void main ()\n"
279 << "{\n"
280 << " const uint workGroupID = ((pc.dimCoord == 2) ? gl_WorkGroupID.z : ((pc.dimCoord == 1) ? gl_WorkGroupID.y : gl_WorkGroupID.x));\n"
281 << " const uint blockNumber = " << (m_params.useTask ? "td.blockNumber" : "uint(gl_DrawID)") << ";\n"
282 << " const uint blockRow = " << (m_params.useTask ? "td.blockRow" : "workGroupID") << ";\n"
283 << "\n"
284 << " // Each workgroup will fill one row, and each invocation will generate a\n"
285 << " // triangle around the pixel center in each column.\n"
286 << " const uint row = startOfBlock(blockNumber) + blockRow;\n"
287 << " const uint col = gl_LocalInvocationIndex;\n"
288 << "\n"
289 << " const float fHeight = float(pc.height);\n"
290 << " const float fWidth = float(pc.width);\n"
291 << "\n"
292 << " // Pixel coordinates, normalized.\n"
293 << " const float rowNorm = (float(row) + 0.5) / fHeight;\n"
294 << " const float colNorm = (float(col) + 0.5) / fWidth;\n"
295 << "\n"
296 << " // Framebuffer coordinates.\n"
297 << " const float coordX = (colNorm * 2.0) - 1.0;\n"
298 << " const float coordY = (rowNorm * 2.0) - 1.0;\n"
299 << "\n"
300 << " const float pixelWidth = 2.0 / fWidth;\n"
301 << " const float pixelHeight = 2.0 / fHeight;\n"
302 << "\n"
303 << " const float offsetX = pixelWidth / 2.0;\n"
304 << " const float offsetY = pixelHeight / 2.0;\n"
305 << "\n"
306 << " const uint baseIndex = col*3;\n"
307 << " const uvec3 indices = uvec3(baseIndex, baseIndex + 1, baseIndex + 2);\n"
308 << "\n"
309 << " SetMeshOutputsEXT(96u, 32u);\n"
310 << " primitiveColor[col] = vec4(rowNorm, colNorm, 0.0, 1.0);\n"
311 << " gl_PrimitiveTriangleIndicesEXT[col] = uvec3(indices.x, indices.y, indices.z);\n"
312 << "\n"
313 << " gl_MeshVerticesEXT[indices.x].gl_Position = vec4(coordX - offsetX, coordY + offsetY, 0.0, 1.0);\n"
314 << " gl_MeshVerticesEXT[indices.y].gl_Position = vec4(coordX + offsetX, coordY + offsetY, 0.0, 1.0);\n"
315 << " gl_MeshVerticesEXT[indices.z].gl_Position = vec4(coordX, coordY - offsetY, 0.0, 1.0);\n"
316 << "}\n"
317 ;
318 programCollection.glslSources.add("mesh") << glu::MeshSource(mesh.str()) << buildOptions;
319 }
320
321 // Frag shader.
322 {
323 std::ostringstream frag;
324 frag
325 << "#version 460\n"
326 << "#extension GL_EXT_mesh_shader : enable\n"
327 << "\n"
328 << "layout (location=0) perprimitiveEXT in vec4 primitiveColor;\n"
329 << "layout (location=0) out vec4 outColor;\n"
330 << "\n"
331 << "void main ()\n"
332 << "{\n"
333 << " outColor = primitiveColor;\n"
334 << "}\n"
335 ;
336 programCollection.glslSources.add("frag") << glu::FragmentSource(frag.str()) << buildOptions;
337 }
338 }
339
checkSupport(Context & context) const340 void MeshApiCase::checkSupport (Context& context) const
341 {
342 checkTaskMeshShaderSupportEXT(context, m_params.useTask, true);
343
344 // VUID-vkCmdDrawMeshTasksIndirectEXT-drawCount-02718
345 if (m_params.drawType == DrawType::DRAW_INDIRECT && m_params.drawCount > 1u)
346 {
347 context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_MULTI_DRAW_INDIRECT);
348 }
349
350 // VUID-vkCmdDrawMeshTasksIndirectCountEXT-None-04445
351 if (m_params.drawType == DrawType::DRAW_INDIRECT_COUNT)
352 context.requireDeviceFunctionality("VK_KHR_draw_indirect_count");
353 }
354
355 template <typename T>
makeStridedBuffer(const DeviceInterface & vkd,VkDevice device,Allocator & alloc,const std::vector<T> & elements,uint32_t offset,uint32_t stride,VkBufferUsageFlags usage,uint32_t endPadding)356 BufferWithMemoryPtr makeStridedBuffer(const DeviceInterface& vkd, VkDevice device, Allocator& alloc, const std::vector<T>& elements, uint32_t offset, uint32_t stride, VkBufferUsageFlags usage, uint32_t endPadding)
357 {
358 const auto elementSize = static_cast<uint32_t>(sizeof(T));
359 const auto actualStride = std::max(elementSize, stride);
360 const auto bufferSize = static_cast<size_t>(offset) + static_cast<size_t>(actualStride) * elements.size() + static_cast<size_t>(endPadding);
361 const auto bufferInfo = makeBufferCreateInfo(static_cast<VkDeviceSize>(bufferSize), usage);
362
363 BufferWithMemoryPtr buffer(new BufferWithMemory(vkd, device, alloc, bufferInfo, MemoryRequirement::HostVisible));
364 auto& bufferAlloc = buffer->getAllocation();
365 char* bufferDataPtr = reinterpret_cast<char*>(bufferAlloc.getHostPtr());
366
367 char* itr = bufferDataPtr + offset;
368 for (const auto& elem : elements)
369 {
370 deMemcpy(itr, &elem, sizeof(elem));
371 itr += actualStride;
372 }
373 if (endPadding > 0u)
374 deMemset(itr, 0xFF, endPadding);
375
376 flushAlloc(vkd, device, bufferAlloc);
377
378 return buffer;
379 }
380
getExtent()381 VkExtent3D getExtent ()
382 {
383 return makeExtent3D(32u, 64u, 1u);
384 }
385
getIndirectCommand(uint32_t blockSize,uint32_t dimCoord)386 VkDrawMeshTasksIndirectCommandEXT getIndirectCommand (uint32_t blockSize, uint32_t dimCoord)
387 {
388 VkDrawMeshTasksIndirectCommandEXT indirectCmd{1u, 1u, 1u};
389
390 switch (dimCoord)
391 {
392 case 0u: indirectCmd.groupCountX = blockSize; break;
393 case 1u: indirectCmd.groupCountY = blockSize; break;
394 case 2u: indirectCmd.groupCountZ = blockSize; break;
395 default:
396 DE_ASSERT(false); break;
397 }
398
399 return indirectCmd;
400 }
401
iterate(void)402 tcu::TestStatus MeshApiInstance::iterate (void)
403 {
404 const auto& vkd = m_context.getDeviceInterface();
405 const auto device = m_context.getDevice();
406 auto& alloc = m_context.getDefaultAllocator();
407 const auto queueIndex = m_context.getUniversalQueueFamilyIndex();
408 const auto queue = m_context.getUniversalQueue();
409
410 const auto extent = getExtent();
411 const auto iExtent3D = tcu::IVec3(static_cast<int>(extent.width), static_cast<int>(extent.height), static_cast<int>(extent.depth));
412 const auto iExtent2D = tcu::IVec2(iExtent3D.x(), iExtent3D.y());
413 const auto format = VK_FORMAT_R8G8B8A8_UNORM;
414 const auto tcuFormat = mapVkFormat(format);
415 const auto colorUsage = (VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
416 const auto colorSRR = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u);
417 const tcu::Vec4 clearColor (0.0f, 0.0f, 0.0f, 1.0f);
418 const float colorThres = 0.005f; // 1/255 < 0.005 < 2/255
419 const tcu::Vec4 threshold (colorThres, colorThres, 0.0f, 0.0f);
420
421 ImageWithMemoryPtr colorBuffer;
422 Move<VkImageView> colorBufferView;
423 {
424 const VkImageCreateInfo colorBufferInfo =
425 {
426 VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, // VkStructureType sType;
427 nullptr, // const void* pNext;
428 0u, // VkImageCreateFlags flags;
429 VK_IMAGE_TYPE_2D, // VkImageType imageType;
430 format, // VkFormat format;
431 extent, // VkExtent3D extent;
432 1u, // uint32_t mipLevels;
433 1u, // uint32_t arrayLayers;
434 VK_SAMPLE_COUNT_1_BIT, // VkSampleCountFlagBits samples;
435 VK_IMAGE_TILING_OPTIMAL, // VkImageTiling tiling;
436 colorUsage, // VkImageUsageFlags usage;
437 VK_SHARING_MODE_EXCLUSIVE, // VkSharingMode sharingMode;
438 0u, // uint32_t queueFamilyIndexCount;
439 nullptr, // const uint32_t* pQueueFamilyIndices;
440 VK_IMAGE_LAYOUT_UNDEFINED, // VkImageLayout initialLayout;
441 };
442 colorBuffer = ImageWithMemoryPtr(new ImageWithMemory(vkd, device, alloc, colorBufferInfo, MemoryRequirement::Any));
443 colorBufferView = makeImageView(vkd, device, colorBuffer->get(), VK_IMAGE_VIEW_TYPE_2D, format, colorSRR);
444 }
445
446 // Prepare buffer containing the array of block sizes.
447 de::Random rnd (m_params.seed);
448 std::vector<uint32_t> blockSizes;
449
450 const uint32_t vectorSize = std::max(1u, m_params.drawCount);
451 const uint32_t largeDrawCount = vectorSize + 1u; // The indirect buffer needs to have some padding at the end. See below.
452 const uint32_t evenBlockSize = extent.height / vectorSize;
453 uint32_t remainingRows = extent.height;
454
455 blockSizes.reserve(vectorSize);
456 for (uint32_t i = 0; i < vectorSize - 1u; ++i)
457 {
458 const auto blockSize = static_cast<uint32_t>(rnd.getInt(1, evenBlockSize));
459 remainingRows -= blockSize;
460 blockSizes.push_back(blockSize);
461 }
462 blockSizes.push_back(remainingRows);
463
464 const auto blockSizesBufferSize = static_cast<VkDeviceSize>(de::dataSize(blockSizes));
465 BufferWithMemoryPtr blockSizesBuffer = makeStridedBuffer(vkd, device, alloc, blockSizes, 0u, 0u, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, 0u);
466
467 // Descriptor set layout, pool and set.
468 DescriptorSetLayoutBuilder layoutBuilder;
469 layoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_MESH_BIT_EXT);
470 const auto setLayout = layoutBuilder.build(vkd, device);
471
472 DescriptorPoolBuilder poolBuilder;
473 poolBuilder.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
474 const auto descriptorPool = poolBuilder.build(vkd, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
475
476 const auto descriptorSet = makeDescriptorSet(vkd, device, descriptorPool.get(), setLayout.get());
477
478 // Update descriptor set.
479 {
480 DescriptorSetUpdateBuilder updateBuilder;
481
482 const auto location = DescriptorSetUpdateBuilder::Location::binding(0u);
483 const auto descriptorBufferInfo = makeDescriptorBufferInfo(blockSizesBuffer->get(), 0ull, blockSizesBufferSize);
484
485 updateBuilder.writeSingle(descriptorSet.get(), location, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &descriptorBufferInfo);
486 updateBuilder.update(vkd, device);
487 }
488
489 // Pipeline layout.
490 PushConstantData pcData;
491 const auto pcRanges = pcData.getRanges(m_params.useTask);
492 const auto pipelineLayout = makePipelineLayout(vkd, device, 1u, &setLayout.get(), static_cast<uint32_t>(pcRanges.size()), de::dataOrNull(pcRanges));
493
494 // Push constants: choose used dimension coordinate pseudorandomly.
495 const auto dimCoord = rnd.getUint32() % 3u;
496
497 pcData.width = extent.width;
498 pcData.height = extent.height;
499 pcData.dimMesh = dimCoord;
500 pcData.one = 1u;
501 pcData.dimTask = dimCoord;
502
503 // Render pass and framebuffer.
504 const auto renderPass = makeRenderPass(vkd, device, format);
505 const auto framebuffer = makeFramebuffer(vkd, device, renderPass.get(), colorBufferView.get(), extent.width, extent.height);
506
507 // Pipeline.
508 Move<VkShaderModule> taskModule;
509 Move<VkShaderModule> meshModule;
510 Move<VkShaderModule> fragModule;
511
512 const auto& binaries = m_context.getBinaryCollection();
513 if (m_params.useTask)
514 taskModule = createShaderModule(vkd, device, binaries.get("task"));
515 meshModule = createShaderModule(vkd, device, binaries.get("mesh"));
516 fragModule = createShaderModule(vkd, device, binaries.get("frag"));
517
518 const std::vector<VkViewport> viewports (1u, makeViewport(extent));
519 const std::vector<VkRect2D> scissors (1u, makeRect2D(extent));
520
521 const auto pipeline = makeGraphicsPipeline(vkd, device, pipelineLayout.get(),
522 taskModule.get(), meshModule.get(), fragModule.get(),
523 renderPass.get(), viewports, scissors);
524
525 // Command pool and buffer.
526 const auto subpassContents = (m_params.useSecondaryCmdBuffer ? VK_SUBPASS_CONTENTS_SECONDARY_COMMAND_BUFFERS : VK_SUBPASS_CONTENTS_INLINE);
527 const auto cmdPool = makeCommandPool(vkd, device, queueIndex);
528 const auto primaryCmdBuffer = allocateCommandBuffer(vkd, device, cmdPool.get(), VK_COMMAND_BUFFER_LEVEL_PRIMARY);
529 const auto primary = primaryCmdBuffer.get();
530 const auto secondaryCmdBuffer = (m_params.useSecondaryCmdBuffer ? allocateCommandBuffer(vkd, device, cmdPool.get(), VK_COMMAND_BUFFER_LEVEL_SECONDARY) : Move<VkCommandBuffer>());
531 const auto secondary = secondaryCmdBuffer.get();
532 const auto rpCmdBuffer = (m_params.useSecondaryCmdBuffer ? secondary : primary); // Holding the contents of the render pass commands.
533
534 // Indirect and count buffers if needed.
535 BufferWithMemoryPtr indirectBuffer;
536 BufferWithMemoryPtr countBuffer;
537
538 if (m_params.drawType != DrawType::DRAW)
539 {
540 // Indirect draws.
541 DE_ASSERT(static_cast<bool>(m_params.indirectArgs));
542 const auto& indirectArgs = m_params.indirectArgs.get();
543
544 // Check stride and offset validity.
545 DE_ASSERT(indirectArgs.offset % 4u == 0u);
546 DE_ASSERT(indirectArgs.stride % 4u == 0u && (indirectArgs.stride == 0u || indirectArgs.stride >= static_cast<uint32_t>(sizeof(VkDrawMeshTasksIndirectCommandEXT))));
547
548 // Prepare struct vector, which will be converted to a buffer with the proper stride and offset later.
549 std::vector<VkDrawMeshTasksIndirectCommandEXT> commands;
550 commands.reserve(blockSizes.size());
551
552 std::transform(begin(blockSizes), end(blockSizes), std::back_inserter(commands),
553 [dimCoord](uint32_t blockSize) { return getIndirectCommand(blockSize, dimCoord); });
554
555 const auto padding = static_cast<uint32_t>(sizeof(VkDrawMeshTasksIndirectCommandEXT));
556 indirectBuffer = makeStridedBuffer(vkd, device, alloc, commands, indirectArgs.offset, indirectArgs.stride, VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT, padding);
557
558 // Prepare count buffer if needed.
559 if (m_params.drawType == DrawType::DRAW_INDIRECT_COUNT)
560 {
561 DE_ASSERT(static_cast<bool>(m_params.indirectCountLimit));
562 DE_ASSERT(static_cast<bool>(m_params.indirectCountOffset));
563
564 const auto countBufferValue = ((m_params.indirectCountLimit.get() == IndirectCountLimitType::BUFFER_VALUE)
565 ? m_params.drawCount
566 : largeDrawCount);
567
568 const std::vector<uint32_t> singleCount (1u, countBufferValue);
569 countBuffer = makeStridedBuffer(vkd, device, alloc, singleCount, m_params.indirectCountOffset.get(), static_cast<uint32_t>(sizeof(uint32_t)), VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT, 0u);
570 }
571 }
572
573 // Submit commands.
574 beginCommandBuffer(vkd, primary);
575 beginRenderPass(vkd, primary, renderPass.get(), framebuffer.get(), scissors.at(0), clearColor, subpassContents);
576
577 if (m_params.useSecondaryCmdBuffer)
578 {
579 const VkCommandBufferInheritanceInfo inheritanceInfo =
580 {
581 VK_STRUCTURE_TYPE_COMMAND_BUFFER_INHERITANCE_INFO, // VkStructureType sType;
582 nullptr, // const void* pNext;
583 renderPass.get(), // VkRenderPass renderPass;
584 0u, // deUint32 subpass;
585 framebuffer.get(), // VkFramebuffer framebuffer;
586 VK_FALSE, // VkBool32 occlusionQueryEnable;
587 0u, // VkQueryControlFlags queryFlags;
588 0u, // VkQueryPipelineStatisticFlags pipelineStatistics;
589 };
590
591 const VkCommandBufferUsageFlags cmdBufferFlags = (VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT | VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT);
592 const VkCommandBufferBeginInfo beginInfo =
593 {
594 VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, // VkStructureType sType;
595 nullptr, // const void* pNext;
596 cmdBufferFlags, // VkCommandBufferUsageFlags flags;
597 &inheritanceInfo, // const VkCommandBufferInheritanceInfo* pInheritanceInfo;
598 };
599
600 vkd.beginCommandBuffer(secondary, &beginInfo);
601 }
602
603 vkd.cmdBindDescriptorSets(rpCmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipelineLayout.get(), 0u, 1u, &descriptorSet.get(), 0u, nullptr);
604 {
605 const char* pcDataPtr = reinterpret_cast<const char*>(&pcData);
606 for (const auto& range : pcRanges)
607 vkd.cmdPushConstants(rpCmdBuffer, pipelineLayout.get(), range.stageFlags, range.offset, range.size, pcDataPtr + range.offset);
608 }
609 vkd.cmdBindPipeline(rpCmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline.get());
610
611 if (m_params.drawType == DrawType::DRAW)
612 {
613 const auto drawArgs = getIndirectCommand(m_params.drawCount, dimCoord);
614 vkd.cmdDrawMeshTasksEXT(rpCmdBuffer, drawArgs.groupCountX, drawArgs.groupCountY, drawArgs.groupCountZ);
615 }
616 else if (m_params.drawType == DrawType::DRAW_INDIRECT)
617 {
618 const auto& indirectArgs = m_params.indirectArgs.get();
619 vkd.cmdDrawMeshTasksIndirectEXT(rpCmdBuffer, indirectBuffer->get(), indirectArgs.offset, m_params.drawCount, indirectArgs.stride);
620 }
621 else if (m_params.drawType == DrawType::DRAW_INDIRECT_COUNT)
622 {
623 const auto& indirectArgs = m_params.indirectArgs.get();
624 const auto& indirectCountOffset = m_params.indirectCountOffset.get();
625 const auto& indirectCountLimit = m_params.indirectCountLimit.get();
626
627 const auto maxCount = ((indirectCountLimit == IndirectCountLimitType::MAX_COUNT)
628 ? m_params.drawCount
629 : largeDrawCount);
630 vkd.cmdDrawMeshTasksIndirectCountEXT(rpCmdBuffer, indirectBuffer->get(), indirectArgs.offset, countBuffer->get(), indirectCountOffset, maxCount, indirectArgs.stride);
631 }
632 else
633 DE_ASSERT(false);
634
635 if (m_params.useSecondaryCmdBuffer)
636 {
637 endCommandBuffer(vkd, secondary);
638 vkd.cmdExecuteCommands(primary, 1u, &secondary);
639 }
640
641 endRenderPass(vkd, primary);
642
643 // Output buffer to extract the color buffer.
644 BufferWithMemoryPtr outBuffer;
645 void* outBufferData = nullptr;
646 {
647 const auto outBufferSize = static_cast<VkDeviceSize>(static_cast<uint32_t>(tcu::getPixelSize(tcuFormat)) * extent.width * extent.height);
648 const auto outBufferUsage = VK_BUFFER_USAGE_TRANSFER_DST_BIT;
649 const auto outBufferInfo = makeBufferCreateInfo(outBufferSize, outBufferUsage);
650
651 outBuffer = BufferWithMemoryPtr(new BufferWithMemory(vkd, device, alloc, outBufferInfo, MemoryRequirement::HostVisible));
652 outBufferData = outBuffer->getAllocation().getHostPtr();
653 }
654
655 copyImageToBuffer(vkd, primary, colorBuffer->get(), outBuffer->get(), iExtent2D);
656 endCommandBuffer(vkd, primary);
657
658 submitCommandsAndWait(vkd, device, queue, primary);
659
660 // Generate reference image and compare.
661 {
662 auto& log = m_context.getTestContext().getLog();
663 auto& outBufferAlloc = outBuffer->getAllocation();
664 tcu::ConstPixelBufferAccess result (tcuFormat, iExtent3D, outBufferData);
665 tcu::TextureLevel referenceLevel (tcuFormat, iExtent3D.x(), iExtent3D.y());
666 const auto reference = referenceLevel.getAccess();
667 const auto setName = de::toString(m_params.drawType) + "_draw_count_" + de::toString(m_params.drawCount) + (m_params.useTask ? "_with_task" : "_no_task");
668 const auto fHeight = static_cast<float>(extent.height);
669 const auto fWidth = static_cast<float>(extent.width);
670
671 invalidateAlloc(vkd, device, outBufferAlloc);
672
673 for (int y = 0; y < iExtent3D.y(); ++y)
674 for (int x = 0; x < iExtent3D.x(); ++x)
675 {
676 const tcu::Vec4 refColor = ((m_params.drawCount == 0u || (m_params.drawType == DrawType::DRAW && y >= static_cast<int>(m_params.drawCount)))
677 ? clearColor
678 : tcu::Vec4(
679 // These match the per-primitive color set by the mesh shader.
680 (static_cast<float>(y) + 0.5f) / fHeight,
681 (static_cast<float>(x) + 0.5f) / fWidth,
682 0.0f,
683 1.0f));
684 reference.setPixel(refColor, x, y);
685 }
686
687 if (!tcu::floatThresholdCompare(log, setName.c_str(), "", reference, result, threshold, tcu::COMPARE_LOG_ON_ERROR))
688 return tcu::TestStatus::fail("Image comparison failed; check log for details");
689 }
690
691 return tcu::TestStatus::pass("Pass");
692 }
693
694 } // anonymous
695
createMeshShaderApiTestsEXT(tcu::TestContext & testCtx)696 tcu::TestCaseGroup* createMeshShaderApiTestsEXT (tcu::TestContext& testCtx)
697 {
698 GroupPtr mainGroup (new tcu::TestCaseGroup(testCtx, "api", "Mesh Shader API tests"));
699
700 const DrawType drawCases[] =
701 {
702 DrawType::DRAW,
703 DrawType::DRAW_INDIRECT,
704 DrawType::DRAW_INDIRECT_COUNT,
705 };
706
707 const auto extent = getExtent();
708 const uint32_t drawCountCases[] = { 0u, 1u, 2u, extent.height / 2u, extent.height };
709
710 const uint32_t normalStride = static_cast<uint32_t>(sizeof(VkDrawMeshTasksIndirectCommandEXT));
711 const uint32_t largeStride = 2u * normalStride + 4u;
712 const uint32_t altOffset = 20u;
713
714 const struct
715 {
716 tcu::Maybe<IndirectArgs> indirectArgs;
717 const char* name;
718 } indirectArgsCases[] =
719 {
720 { tcu::nothing<IndirectArgs>(), "no_indirect_args" },
721
722 // Offset 0, varying strides.
723 { tcu::just(IndirectArgs{ 0u, 0u }), "offset_0_stride_0" },
724 { tcu::just(IndirectArgs{ 0u, normalStride }), "offset_0_stride_normal" },
725 { tcu::just(IndirectArgs{ 0u, largeStride }), "offset_0_stride_large" },
726
727 // Nonzero offset, varying strides.
728 { tcu::just(IndirectArgs{ altOffset, 0u }), "offset_alt_stride_0" },
729 { tcu::just(IndirectArgs{ altOffset, normalStride }), "offset_alt_stride_normal" },
730 { tcu::just(IndirectArgs{ altOffset, largeStride }), "offset_alt_stride_large" },
731 };
732
733 const struct
734 {
735 tcu::Maybe<IndirectCountLimitType> limitType;
736 const char* name;
737 } countLimitCases[] =
738 {
739 { tcu::nothing<IndirectCountLimitType>(), "no_count_limit" },
740 { tcu::just(IndirectCountLimitType::BUFFER_VALUE), "count_limit_buffer" },
741 { tcu::just(IndirectCountLimitType::MAX_COUNT), "count_limit_max_count" },
742 };
743
744 const struct
745 {
746 tcu::Maybe<uint32_t> countOffset;
747 const char* name;
748 } countOffsetCases[] =
749 {
750 { tcu::nothing<uint32_t>(), "no_count_offset" },
751 { tcu::just(uint32_t{0u}), "count_offset_0" },
752 { tcu::just(altOffset), "count_offset_alt" },
753 };
754
755 const struct
756 {
757 bool useTask;
758 const char* name;
759 } taskCases[] =
760 {
761 { false, "no_task_shader" },
762 { true, "with_task_shader" },
763 };
764
765 const struct
766 {
767 bool secondaryCmd;
768 const char* suffix;
769 } cmdBufferCases[] =
770 {
771 { false, "" },
772 { true, "_secondary_cmd" },
773 };
774
775 uint32_t seed = 1628678795u;
776
777 for (const auto& drawCase : drawCases)
778 {
779 const auto drawCaseName = de::toString(drawCase);
780 const bool isIndirect = (drawCase != DrawType::DRAW);
781 const bool isIndirectNoCount = (drawCase == DrawType::DRAW_INDIRECT);
782 const bool isIndirectCount = (drawCase == DrawType::DRAW_INDIRECT_COUNT);
783
784 GroupPtr drawGroup(new tcu::TestCaseGroup(testCtx, drawCaseName.c_str(), ""));
785
786 for (const auto& drawCountCase : drawCountCases)
787 {
788 const auto drawCountName = "draw_count_" + de::toString(drawCountCase);
789 GroupPtr drawCountGroup(new tcu::TestCaseGroup(testCtx, drawCountName.c_str(), ""));
790
791 for (const auto& indirectArgsCase : indirectArgsCases)
792 {
793 const bool hasIndirectArgs = static_cast<bool>(indirectArgsCase.indirectArgs);
794 const bool strideZero = (hasIndirectArgs && indirectArgsCase.indirectArgs.get().stride == 0u);
795
796 if (isIndirect != hasIndirectArgs)
797 continue;
798
799 if (((isIndirectNoCount && drawCountCase > 1u) || isIndirectCount) && strideZero)
800 continue;
801
802 GroupPtr indirectArgsGroup(new tcu::TestCaseGroup(testCtx, indirectArgsCase.name, ""));
803
804 for (const auto& countLimitCase : countLimitCases)
805 {
806 const bool hasCountLimit = static_cast<bool>(countLimitCase.limitType);
807
808 if (isIndirectCount != hasCountLimit)
809 continue;
810
811 GroupPtr countLimitGroup(new tcu::TestCaseGroup(testCtx, countLimitCase.name, ""));
812
813 for (const auto& countOffsetCase : countOffsetCases)
814 {
815 const bool hasCountOffsetType = static_cast<bool>(countOffsetCase.countOffset);
816
817 if (isIndirectCount != hasCountOffsetType)
818 continue;
819
820 GroupPtr countOffsetGroup(new tcu::TestCaseGroup(testCtx, countOffsetCase.name, ""));
821
822 for (const auto& taskCase : taskCases)
823 {
824 for (const auto& cmdBufferCase : cmdBufferCases)
825 {
826 const auto testName = std::string(taskCase.name) + cmdBufferCase.suffix;
827 const TestParams params =
828 {
829 drawCase, // DrawType drawType;
830 seed++, // uint32_t seed;
831 drawCountCase, // uint32_t drawCount;
832 indirectArgsCase.indirectArgs, // tcu::Maybe<IndirectArgs> indirectArgs;
833 countLimitCase.limitType, // tcu::Maybe<IndirectCountLimitType> indirectCountLimit;
834 countOffsetCase.countOffset, // tcu::Maybe<uint32_t> indirectCountOffset;
835 taskCase.useTask, // bool useTask;
836 cmdBufferCase.secondaryCmd, // bool useSecondaryCmdBuffer;
837 };
838
839 countOffsetGroup->addChild(new MeshApiCase(testCtx, testName, "", params));
840 }
841 }
842
843 countLimitGroup->addChild(countOffsetGroup.release());
844 }
845
846 indirectArgsGroup->addChild(countLimitGroup.release());
847 }
848
849 drawCountGroup->addChild(indirectArgsGroup.release());
850 }
851
852 drawGroup->addChild(drawCountGroup.release());
853 }
854
855 mainGroup->addChild(drawGroup.release());
856 }
857
858 return mainGroup.release();
859 }
860
861 } // MeshShader
862 } // vkt
863