1 /*------------------------------------------------------------------------
2 * Vulkan Conformance Tests
3 * ------------------------
4 *
5 * Copyright (c) 2021 The Khronos Group Inc.
6 * Copyright (c) 2021 Valve Corporation.
7 *
8 * Licensed under the Apache License, Version 2.0 (the "License");
9 * you may not use this file except in compliance with the License.
10 * You may obtain a copy of the License at
11 *
12 * http://www.apache.org/licenses/LICENSE-2.0
13 *
14 * Unless required by applicable law or agreed to in writing, software
15 * distributed under the License is distributed on an "AS IS" BASIS,
16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 * See the License for the specific language governing permissions and
18 * limitations under the License.
19 *
20 *//*!
21 * \file
22 * \brief Mesh Shader API Tests
23 *//*--------------------------------------------------------------------*/
24
25 #include "vktMeshShaderApiTests.hpp"
26 #include "vktTestCase.hpp"
27
28 #include "vkTypeUtil.hpp"
29 #include "vkImageWithMemory.hpp"
30 #include "vkBufferWithMemory.hpp"
31 #include "vkObjUtil.hpp"
32 #include "vkBuilderUtil.hpp"
33 #include "vkCmdUtil.hpp"
34 #include "vkImageUtil.hpp"
35
36 #include "tcuMaybe.hpp"
37 #include "tcuTestLog.hpp"
38 #include "tcuImageCompare.hpp"
39
40 #include "deRandom.hpp"
41
42 #include <iostream>
43 #include <sstream>
44 #include <vector>
45 #include <algorithm>
46 #include <iterator>
47 #include <limits>
48
49 namespace vkt
50 {
51 namespace MeshShader
52 {
53
54 namespace
55 {
56
57 using namespace vk;
58
59 using GroupPtr = de::MovePtr<tcu::TestCaseGroup>;
60 using ImageWithMemoryPtr = de::MovePtr<ImageWithMemory>;
61 using BufferWithMemoryPtr = de::MovePtr<BufferWithMemory>;
62
63 enum class DrawType
64 {
65 DRAW = 0,
66 DRAW_INDIRECT,
67 DRAW_INDIRECT_COUNT,
68 };
69
operator <<(std::ostream & stream,DrawType drawType)70 std::ostream& operator<< (std::ostream& stream, DrawType drawType)
71 {
72 switch (drawType)
73 {
74 case DrawType::DRAW: stream << "draw"; break;
75 case DrawType::DRAW_INDIRECT: stream << "draw_indirect"; break;
76 case DrawType::DRAW_INDIRECT_COUNT: stream << "draw_indirect_count"; break;
77 default: DE_ASSERT(false); break;
78 }
79 return stream;
80 }
81
82
83 // This helps test the maxDrawCount rule for the DRAW_INDIRECT_COUNT case.
84 enum class IndirectCountLimitType
85 {
86 BUFFER_VALUE = 0, // The actual count will be given by the count buffer.
87 MAX_COUNT, // The actual count will be given by the maxDrawCount argument passed to the draw command.
88 };
89
90 struct IndirectArgs
91 {
92 uint32_t offset;
93 uint32_t stride;
94 };
95
96 struct TestParams
97 {
98 DrawType drawType;
99 uint32_t seed;
100 uint32_t drawCount; // Equivalent to taskCount or drawCount.
101 uint32_t firstTask; // Equivalent to firstTask in every call.
102 tcu::Maybe<IndirectArgs> indirectArgs; // Only used for DRAW_INDIRECT*.
103 tcu::Maybe<IndirectCountLimitType> indirectCountLimit; // Only used for DRAW_INDIRECT_COUNT.
104 tcu::Maybe<uint32_t> indirectCountOffset; // Only used for DRAW_INDIRECT_COUNT.
105 bool useTask;
106 };
107
108 // The framebuffer will have a number of rows and 32 columns. Each mesh shader workgroup will generate geometry to fill a single
109 // framebuffer row, using a triangle list with 32 triangles of different colors, each covering a framebuffer pixel.
110 //
111 // Note: the total framebuffer rows is called "full" below (e.g. 64). When using a task shader to generate work, each workgroup will
112 // generate a single mesh workgroup using a push constant instead of a compile-time constant.
113 //
114 // When using DRAW, the task count will tell us how many rows of pixels will be filled in the framebuffer.
115 //
116 // When using indirect draws, the full framebuffer will always be drawn into by using multiple draw command structures, except in
117 // the case of drawCount==0. Each draw will spawn the needed number of tasks to fill the whole framebuffer. In addition, in order to
118 // make all argument structures different, the number of tasks in each draw count will be slightly different and assigned
119 // pseudorandomly.
120 //
121 // DRAW: taskCount=0, taskCount=1, taskCount=2, taskCount=half, taskCount=full
122 //
123 // DRAW_INDIRECT: drawCount=0, drawCount=1, drawCount=2, drawCount=half, drawCount=full.
124 // * With offset 0 and pseudorandom (multiples of 4).
125 // * With stride adding a padding of 0 and pseudorandom (multiples of 4).
126 //
127 // DRAW_INDIRECT_COUNT: same as indirect in two variants:
128 // 1. Passing the count in a buffer with a large maximum.
129 // 2. Passing a large value in the buffer and limiting it with the maximum.
130
131 class MeshApiCase : public vkt::TestCase
132 {
133 public:
MeshApiCase(tcu::TestContext & testCtx,const std::string & name,const std::string & description,const TestParams & params)134 MeshApiCase (tcu::TestContext& testCtx, const std::string& name, const std::string& description, const TestParams& params)
135 : vkt::TestCase (testCtx, name, description)
136 , m_params (params)
137 {}
~MeshApiCase(void)138 virtual ~MeshApiCase (void) {}
139
140 void initPrograms (vk::SourceCollections& programCollection) const override;
141 void checkSupport (Context& context) const override;
142 TestInstance* createInstance (Context& context) const override;
143
144 protected:
145 TestParams m_params;
146 };
147
148 class MeshApiInstance : public vkt::TestInstance
149 {
150 public:
MeshApiInstance(Context & context,const TestParams & params)151 MeshApiInstance (Context& context, const TestParams& params)
152 : vkt::TestInstance (context)
153 , m_params (params)
154 {}
~MeshApiInstance(void)155 virtual ~MeshApiInstance (void) {}
156
157 tcu::TestStatus iterate (void) override;
158
159 protected:
160 TestParams m_params;
161 };
162
createInstance(Context & context) const163 TestInstance* MeshApiCase::createInstance (Context& context) const
164 {
165 return new MeshApiInstance(context, m_params);
166 }
167
168 struct PushConstantData
169 {
170 uint32_t width;
171 uint32_t height;
172 uint32_t firstTaskMesh;
173 uint32_t one;
174 uint32_t firstTaskTask;
175
getRangesvkt::MeshShader::__anon382b7d580111::PushConstantData176 std::vector<VkPushConstantRange> getRanges (bool includeTask) const
177 {
178 constexpr uint32_t offsetMesh = 0u;
179 constexpr uint32_t offsetTask = static_cast<uint32_t>(offsetof(PushConstantData, one));
180 constexpr uint32_t sizeMesh = offsetTask;
181 constexpr uint32_t sizeTask = static_cast<uint32_t>(sizeof(PushConstantData)) - offsetTask;
182
183 const VkPushConstantRange meshRange =
184 {
185 VK_SHADER_STAGE_MESH_BIT_NV, // VkShaderStageFlags stageFlags;
186 offsetMesh, // uint32_t offset;
187 sizeMesh, // uint32_t size;
188 };
189 const VkPushConstantRange taskRange =
190 {
191 VK_SHADER_STAGE_TASK_BIT_NV, // VkShaderStageFlags stageFlags;
192 offsetTask, // uint32_t offset;
193 sizeTask, // uint32_t size;
194 };
195
196 std::vector<VkPushConstantRange> ranges (1u, meshRange);
197 if (includeTask)
198 ranges.push_back(taskRange);
199 return ranges;
200 }
201 };
202
initPrograms(vk::SourceCollections & programCollection) const203 void MeshApiCase::initPrograms (vk::SourceCollections& programCollection) const
204 {
205 const std::string taskDataDecl =
206 "taskNV TaskData {\n"
207 " uint blockNumber;\n"
208 " uint blockRow;\n"
209 "} td;\n"
210 ;
211
212 // Task shader if needed.
213 if (m_params.useTask)
214 {
215 std::ostringstream task;
216 task
217 << "#version 460\n"
218 << "#extension GL_NV_mesh_shader : enable\n"
219 << "\n"
220 << "layout (local_size_x=1) in;\n"
221 << "\n"
222 << "layout (push_constant, std430) uniform TaskPushConstantBlock {\n"
223 << " layout (offset=12) uint one;\n"
224 << " layout (offset=16) uint firstTask;\n"
225 << "} pc;\n"
226 << "\n"
227 << "out " << taskDataDecl
228 << "\n"
229 << "void main ()\n"
230 << "{\n"
231 << " gl_TaskCountNV = pc.one;\n"
232 << " td.blockNumber = uint(gl_DrawID);\n"
233 << " td.blockRow = gl_WorkGroupID.x - pc.firstTask;\n"
234 << "}\n"
235 ;
236 programCollection.glslSources.add("task") << glu::TaskSource(task.str());
237 }
238
239 // Mesh shader.
240 {
241 std::ostringstream mesh;
242 mesh
243 << "#version 460\n"
244 << "#extension GL_NV_mesh_shader : enable\n"
245 << "\n"
246 << "layout (local_size_x=32) in;\n"
247 << "layout (triangles) out;\n"
248 << "layout (max_vertices=96, max_primitives=32) out;\n"
249 << "\n"
250 << "layout (push_constant, std430) uniform MeshPushConstantBlock {\n"
251 << " uint width;\n"
252 << " uint height;\n"
253 << " uint firstTask;\n"
254 << "} pc;\n"
255 << "\n"
256 << "layout (location=0) perprimitiveNV out vec4 primitiveColor[];\n"
257 << "\n"
258 << (m_params.useTask ? ("in " + taskDataDecl): "")
259 << "\n"
260 << "layout (set=0, binding=0, std430) readonly buffer BlockSizes {\n"
261 << " uint blockSize[];\n"
262 << "} bsz;\n"
263 << "\n"
264 << "uint startOfBlock (uint blockNumber)\n"
265 << "{\n"
266 << " uint start = 0;\n"
267 << " for (uint i = 0; i < blockNumber; i++)\n"
268 << " start += bsz.blockSize[i];\n"
269 << " return start;\n"
270 << "}\n"
271 << "\n"
272 << "void main ()\n"
273 << "{\n"
274 << " const uint blockNumber = " << (m_params.useTask ? "td.blockNumber" : "uint(gl_DrawID)") << ";\n"
275 << " const uint blockRow = " << (m_params.useTask ? "td.blockRow" : "(gl_WorkGroupID.x - pc.firstTask)") << ";\n"
276 << "\n"
277 << " // Each workgroup will fill one row, and each invocation will generate a\n"
278 << " // triangle around the pixel center in each column.\n"
279 << " const uint row = startOfBlock(blockNumber) + blockRow;\n"
280 << " const uint col = gl_LocalInvocationID.x;\n"
281 << "\n"
282 << " const float fHeight = float(pc.height);\n"
283 << " const float fWidth = float(pc.width);\n"
284 << "\n"
285 << " // Pixel coordinates, normalized.\n"
286 << " const float rowNorm = (float(row) + 0.5) / fHeight;\n"
287 << " const float colNorm = (float(col) + 0.5) / fWidth;\n"
288 << "\n"
289 << " // Framebuffer coordinates.\n"
290 << " const float coordX = (colNorm * 2.0) - 1.0;\n"
291 << " const float coordY = (rowNorm * 2.0) - 1.0;\n"
292 << "\n"
293 << " const float pixelWidth = 2.0 / fWidth;\n"
294 << " const float pixelHeight = 2.0 / fHeight;\n"
295 << "\n"
296 << " const float offsetX = pixelWidth / 2.0;\n"
297 << " const float offsetY = pixelHeight / 2.0;\n"
298 << "\n"
299 << " const uint baseIndex = col*3;\n"
300 << " const uvec3 indices = uvec3(baseIndex, baseIndex + 1, baseIndex + 2);\n"
301 << "\n"
302 << " gl_PrimitiveCountNV = 32u;\n"
303 << " primitiveColor[col] = vec4(rowNorm, colNorm, 0.0, 1.0);\n"
304 << "\n"
305 << " gl_PrimitiveIndicesNV[indices.x] = indices.x;\n"
306 << " gl_PrimitiveIndicesNV[indices.y] = indices.y;\n"
307 << " gl_PrimitiveIndicesNV[indices.z] = indices.z;\n"
308 << "\n"
309 << " gl_MeshVerticesNV[indices.x].gl_Position = vec4(coordX - offsetX, coordY + offsetY, 0.0, 1.0);\n"
310 << " gl_MeshVerticesNV[indices.y].gl_Position = vec4(coordX + offsetX, coordY + offsetY, 0.0, 1.0);\n"
311 << " gl_MeshVerticesNV[indices.z].gl_Position = vec4(coordX, coordY - offsetY, 0.0, 1.0);\n"
312 << "}\n"
313 ;
314 programCollection.glslSources.add("mesh") << glu::MeshSource(mesh.str());
315 }
316
317 // Frag shader.
318 {
319 std::ostringstream frag;
320 frag
321 << "#version 460\n"
322 << "#extension GL_NV_mesh_shader : enable\n"
323 << "\n"
324 << "layout (location=0) perprimitiveNV in vec4 primitiveColor;\n"
325 << "layout (location=0) out vec4 outColor;\n"
326 << "\n"
327 << "void main ()\n"
328 << "{\n"
329 << " outColor = primitiveColor;\n"
330 << "}\n"
331 ;
332 programCollection.glslSources.add("frag") << glu::FragmentSource(frag.str());
333 }
334 }
335
checkSupport(Context & context) const336 void MeshApiCase::checkSupport (Context& context) const
337 {
338 context.requireDeviceFunctionality("VK_NV_mesh_shader");
339
340 const auto& meshFeatures = context.getMeshShaderFeatures();
341
342 if (!meshFeatures.meshShader)
343 TCU_THROW(NotSupportedError, "Mesh shaders not supported");
344
345 if (m_params.useTask && !meshFeatures.taskShader)
346 TCU_THROW(NotSupportedError, "Task shaders not supported");
347
348 // VUID-vkCmdDrawMeshTasksIndirectNV-drawCount-02718
349 if (m_params.drawType == DrawType::DRAW_INDIRECT && m_params.drawCount > 1u)
350 {
351 const auto& features = context.getDeviceFeatures();
352 if (!features.multiDrawIndirect)
353 TCU_THROW(NotSupportedError, "Indirect multi-draws not supported");
354 }
355
356 // VUID-vkCmdDrawMeshTasksIndirectCountNV-None-04445
357 if (m_params.drawType == DrawType::DRAW_INDIRECT_COUNT)
358 context.requireDeviceFunctionality("VK_KHR_draw_indirect_count");
359 }
360
361 template <typename T>
makeStridedBuffer(const DeviceInterface & vkd,VkDevice device,Allocator & alloc,const std::vector<T> & elements,uint32_t offset,uint32_t stride,VkBufferUsageFlags usage,uint32_t endPadding)362 BufferWithMemoryPtr makeStridedBuffer(const DeviceInterface& vkd, VkDevice device, Allocator& alloc, const std::vector<T>& elements, uint32_t offset, uint32_t stride, VkBufferUsageFlags usage, uint32_t endPadding)
363 {
364 const auto elementSize = static_cast<uint32_t>(sizeof(T));
365 const auto actualStride = std::max(elementSize, stride);
366 const auto bufferSize = static_cast<size_t>(offset) + static_cast<size_t>(actualStride) * elements.size() + static_cast<size_t>(endPadding);
367 const auto bufferInfo = makeBufferCreateInfo(static_cast<VkDeviceSize>(bufferSize), usage);
368
369 BufferWithMemoryPtr buffer(new BufferWithMemory(vkd, device, alloc, bufferInfo, MemoryRequirement::HostVisible));
370 auto& bufferAlloc = buffer->getAllocation();
371 char* bufferDataPtr = reinterpret_cast<char*>(bufferAlloc.getHostPtr());
372
373 char* itr = bufferDataPtr + offset;
374 for (const auto& elem : elements)
375 {
376 deMemcpy(itr, &elem, sizeof(elem));
377 itr += actualStride;
378 }
379 if (endPadding > 0u)
380 deMemset(itr, 0xFF, endPadding);
381
382 flushAlloc(vkd, device, bufferAlloc);
383
384 return buffer;
385 }
386
getExtent()387 VkExtent3D getExtent ()
388 {
389 return makeExtent3D(32u, 64u, 1u);
390 }
391
iterate(void)392 tcu::TestStatus MeshApiInstance::iterate (void)
393 {
394 const auto& vkd = m_context.getDeviceInterface();
395 const auto device = m_context.getDevice();
396 auto& alloc = m_context.getDefaultAllocator();
397 const auto queueIndex = m_context.getUniversalQueueFamilyIndex();
398 const auto queue = m_context.getUniversalQueue();
399
400 const auto extent = getExtent();
401 const auto iExtent3D = tcu::IVec3(static_cast<int>(extent.width), static_cast<int>(extent.height), static_cast<int>(extent.depth));
402 const auto iExtent2D = tcu::IVec2(iExtent3D.x(), iExtent3D.y());
403 const auto format = VK_FORMAT_R8G8B8A8_UNORM;
404 const auto tcuFormat = mapVkFormat(format);
405 const auto colorUsage = (VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
406 const auto colorSRR = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u);
407 const tcu::Vec4 clearColor (0.0f, 0.0f, 0.0f, 1.0f);
408 const float colorThres = 0.005f; // 1/255 < 0.005 < 2/255
409 const tcu::Vec4 threshold (colorThres, colorThres, 0.0f, 0.0f);
410
411 ImageWithMemoryPtr colorBuffer;
412 Move<VkImageView> colorBufferView;
413 {
414 const VkImageCreateInfo colorBufferInfo =
415 {
416 VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, // VkStructureType sType;
417 nullptr, // const void* pNext;
418 0u, // VkImageCreateFlags flags;
419 VK_IMAGE_TYPE_2D, // VkImageType imageType;
420 format, // VkFormat format;
421 extent, // VkExtent3D extent;
422 1u, // uint32_t mipLevels;
423 1u, // uint32_t arrayLayers;
424 VK_SAMPLE_COUNT_1_BIT, // VkSampleCountFlagBits samples;
425 VK_IMAGE_TILING_OPTIMAL, // VkImageTiling tiling;
426 colorUsage, // VkImageUsageFlags usage;
427 VK_SHARING_MODE_EXCLUSIVE, // VkSharingMode sharingMode;
428 0u, // uint32_t queueFamilyIndexCount;
429 nullptr, // const uint32_t* pQueueFamilyIndices;
430 VK_IMAGE_LAYOUT_UNDEFINED, // VkImageLayout initialLayout;
431 };
432 colorBuffer = ImageWithMemoryPtr(new ImageWithMemory(vkd, device, alloc, colorBufferInfo, MemoryRequirement::Any));
433 colorBufferView = makeImageView(vkd, device, colorBuffer->get(), VK_IMAGE_VIEW_TYPE_2D, format, colorSRR);
434 }
435
436 // Prepare buffer containing the array of block sizes.
437 de::Random rnd (m_params.seed);
438 std::vector<uint32_t> blockSizes;
439
440 const uint32_t vectorSize = std::max(1u, m_params.drawCount);
441 const uint32_t largeDrawCount = vectorSize + 1u; // The indirect buffer needs to have some padding at the end. See below.
442 const uint32_t evenBlockSize = extent.height / vectorSize;
443 uint32_t remainingRows = extent.height;
444
445 blockSizes.reserve(vectorSize);
446 for (uint32_t i = 0; i < vectorSize - 1u; ++i)
447 {
448 const auto blockSize = static_cast<uint32_t>(rnd.getInt(1, evenBlockSize));
449 remainingRows -= blockSize;
450 blockSizes.push_back(blockSize);
451 }
452 blockSizes.push_back(remainingRows);
453
454 const auto blockSizesBufferSize = static_cast<VkDeviceSize>(de::dataSize(blockSizes));
455 BufferWithMemoryPtr blockSizesBuffer = makeStridedBuffer(vkd, device, alloc, blockSizes, 0u, 0u, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, 0u);
456
457 // Descriptor set layout, pool and set.
458 DescriptorSetLayoutBuilder layoutBuilder;
459 layoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_MESH_BIT_NV);
460 const auto setLayout = layoutBuilder.build(vkd, device);
461
462 DescriptorPoolBuilder poolBuilder;
463 poolBuilder.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
464 const auto descriptorPool = poolBuilder.build(vkd, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
465
466 const auto descriptorSet = makeDescriptorSet(vkd, device, descriptorPool.get(), setLayout.get());
467
468 // Update descriptor set.
469 {
470 DescriptorSetUpdateBuilder updateBuilder;
471
472 const auto location = DescriptorSetUpdateBuilder::Location::binding(0u);
473 const auto descriptorBufferInfo = makeDescriptorBufferInfo(blockSizesBuffer->get(), 0ull, blockSizesBufferSize);
474
475 updateBuilder.writeSingle(descriptorSet.get(), location, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &descriptorBufferInfo);
476 updateBuilder.update(vkd, device);
477 }
478
479 // Pipeline layout.
480 PushConstantData pcData;
481 const auto pcRanges = pcData.getRanges(m_params.useTask);
482 const auto pipelineLayout = makePipelineLayout(vkd, device, 1u, &setLayout.get(), static_cast<uint32_t>(pcRanges.size()), de::dataOrNull(pcRanges));
483
484 // Push constants.
485 pcData.width = extent.width;
486 pcData.height = extent.height;
487 pcData.firstTaskMesh = m_params.firstTask;
488 pcData.one = 1u;
489 pcData.firstTaskTask = m_params.firstTask;
490
491 // Render pass and framebuffer.
492 const auto renderPass = makeRenderPass(vkd, device, format);
493 const auto framebuffer = makeFramebuffer(vkd, device, renderPass.get(), colorBufferView.get(), extent.width, extent.height);
494
495 // Pipeline.
496 Move<VkShaderModule> taskModule;
497 Move<VkShaderModule> meshModule;
498 Move<VkShaderModule> fragModule;
499
500 const auto& binaries = m_context.getBinaryCollection();
501 if (m_params.useTask)
502 taskModule = createShaderModule(vkd, device, binaries.get("task"));
503 meshModule = createShaderModule(vkd, device, binaries.get("mesh"));
504 fragModule = createShaderModule(vkd, device, binaries.get("frag"));
505
506 const std::vector<VkViewport> viewports (1u, makeViewport(extent));
507 const std::vector<VkRect2D> scissors (1u, makeRect2D(extent));
508
509 const auto pipeline = makeGraphicsPipeline(vkd, device, pipelineLayout.get(),
510 taskModule.get(), meshModule.get(), fragModule.get(),
511 renderPass.get(), viewports, scissors);
512
513 // Command pool and buffer.
514 const auto cmdPool = makeCommandPool(vkd, device, queueIndex);
515 const auto cmdBufferPtr = allocateCommandBuffer(vkd, device, cmdPool.get(), VK_COMMAND_BUFFER_LEVEL_PRIMARY);
516 const auto cmdBuffer = cmdBufferPtr.get();
517
518 // Indirect and count buffers if needed.
519 BufferWithMemoryPtr indirectBuffer;
520 BufferWithMemoryPtr countBuffer;
521
522 if (m_params.drawType != DrawType::DRAW)
523 {
524 // Indirect draws.
525 DE_ASSERT(static_cast<bool>(m_params.indirectArgs));
526 const auto& indirectArgs = m_params.indirectArgs.get();
527
528 // Check stride and offset validity.
529 DE_ASSERT(indirectArgs.offset % 4u == 0u);
530 DE_ASSERT(indirectArgs.stride % 4u == 0u && (indirectArgs.stride == 0u || indirectArgs.stride >= static_cast<uint32_t>(sizeof(VkDrawMeshTasksIndirectCommandNV))));
531
532 // Prepare struct vector, which will be converted to a buffer with the proper stride and offset later.
533 std::vector<VkDrawMeshTasksIndirectCommandNV> commands;
534 commands.reserve(blockSizes.size());
535
536 std::transform(begin(blockSizes), end(blockSizes), std::back_inserter(commands),
537 [this](uint32_t blockSize) { return VkDrawMeshTasksIndirectCommandNV{blockSize, this->m_params.firstTask}; });
538
539 const auto padding = static_cast<uint32_t>(sizeof(VkDrawMeshTasksIndirectCommandNV));
540 indirectBuffer = makeStridedBuffer(vkd, device, alloc, commands, indirectArgs.offset, indirectArgs.stride, VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT, padding);
541
542 // Prepare count buffer if needed.
543 if (m_params.drawType == DrawType::DRAW_INDIRECT_COUNT)
544 {
545 DE_ASSERT(static_cast<bool>(m_params.indirectCountLimit));
546 DE_ASSERT(static_cast<bool>(m_params.indirectCountOffset));
547
548 const auto countBufferValue = ((m_params.indirectCountLimit.get() == IndirectCountLimitType::BUFFER_VALUE)
549 ? m_params.drawCount
550 : largeDrawCount);
551
552 const std::vector<uint32_t> singleCount (1u, countBufferValue);
553 countBuffer = makeStridedBuffer(vkd, device, alloc, singleCount, m_params.indirectCountOffset.get(), static_cast<uint32_t>(sizeof(uint32_t)), VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT, 0u);
554 }
555 }
556
557 // Submit commands.
558 beginCommandBuffer(vkd, cmdBuffer);
559 beginRenderPass(vkd, cmdBuffer, renderPass.get(), framebuffer.get(), scissors.at(0), clearColor);
560
561 vkd.cmdBindDescriptorSets(cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipelineLayout.get(), 0u, 1u, &descriptorSet.get(), 0u, nullptr);
562 {
563 const char* pcDataPtr = reinterpret_cast<const char*>(&pcData);
564 for (const auto& range : pcRanges)
565 vkd.cmdPushConstants(cmdBuffer, pipelineLayout.get(), range.stageFlags, range.offset, range.size, pcDataPtr + range.offset);
566 }
567 vkd.cmdBindPipeline(cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline.get());
568
569 if (m_params.drawType == DrawType::DRAW)
570 {
571 vkd.cmdDrawMeshTasksNV(cmdBuffer, m_params.drawCount, m_params.firstTask);
572 }
573 else if (m_params.drawType == DrawType::DRAW_INDIRECT)
574 {
575 const auto& indirectArgs = m_params.indirectArgs.get();
576 vkd.cmdDrawMeshTasksIndirectNV(cmdBuffer, indirectBuffer->get(), indirectArgs.offset, m_params.drawCount, indirectArgs.stride);
577 }
578 else if (m_params.drawType == DrawType::DRAW_INDIRECT_COUNT)
579 {
580 const auto& indirectArgs = m_params.indirectArgs.get();
581 const auto& indirectCountOffset = m_params.indirectCountOffset.get();
582 const auto& indirectCountLimit = m_params.indirectCountLimit.get();
583
584 const auto maxCount = ((indirectCountLimit == IndirectCountLimitType::MAX_COUNT)
585 ? m_params.drawCount
586 : largeDrawCount);
587 vkd.cmdDrawMeshTasksIndirectCountNV(cmdBuffer, indirectBuffer->get(), indirectArgs.offset, countBuffer->get(), indirectCountOffset, maxCount, indirectArgs.stride);
588 }
589 else
590 DE_ASSERT(false);
591
592 endRenderPass(vkd, cmdBuffer);
593
594 // Output buffer to extract the color buffer.
595 BufferWithMemoryPtr outBuffer;
596 void* outBufferData = nullptr;
597 {
598 const auto outBufferSize = static_cast<VkDeviceSize>(static_cast<uint32_t>(tcu::getPixelSize(tcuFormat)) * extent.width * extent.height);
599 const auto outBufferUsage = VK_BUFFER_USAGE_TRANSFER_DST_BIT;
600 const auto outBufferInfo = makeBufferCreateInfo(outBufferSize, outBufferUsage);
601
602 outBuffer = BufferWithMemoryPtr(new BufferWithMemory(vkd, device, alloc, outBufferInfo, MemoryRequirement::HostVisible));
603 outBufferData = outBuffer->getAllocation().getHostPtr();
604 }
605
606 copyImageToBuffer(vkd, cmdBuffer, colorBuffer->get(), outBuffer->get(), iExtent2D);
607 endCommandBuffer(vkd, cmdBuffer);
608 submitCommandsAndWait(vkd, device, queue, cmdBuffer);
609
610 // Generate reference image and compare.
611 {
612 auto& log = m_context.getTestContext().getLog();
613 auto& outBufferAlloc = outBuffer->getAllocation();
614 tcu::ConstPixelBufferAccess result (tcuFormat, iExtent3D, outBufferData);
615 tcu::TextureLevel referenceLevel (tcuFormat, iExtent3D.x(), iExtent3D.y());
616 const auto reference = referenceLevel.getAccess();
617 const auto setName = de::toString(m_params.drawType) + "_draw_count_" + de::toString(m_params.drawCount) + (m_params.useTask ? "_with_task" : "_no_task");
618 const auto fHeight = static_cast<float>(extent.height);
619 const auto fWidth = static_cast<float>(extent.width);
620
621 invalidateAlloc(vkd, device, outBufferAlloc);
622
623 for (int y = 0; y < iExtent3D.y(); ++y)
624 for (int x = 0; x < iExtent3D.x(); ++x)
625 {
626 const tcu::Vec4 refColor = ((m_params.drawCount == 0u || (m_params.drawType == DrawType::DRAW && y >= static_cast<int>(m_params.drawCount)))
627 ? clearColor
628 : tcu::Vec4(
629 // These match the per-primitive color set by the mesh shader.
630 (static_cast<float>(y) + 0.5f) / fHeight,
631 (static_cast<float>(x) + 0.5f) / fWidth,
632 0.0f,
633 1.0f));
634 reference.setPixel(refColor, x, y);
635 }
636
637 if (!tcu::floatThresholdCompare(log, setName.c_str(), "", reference, result, threshold, tcu::COMPARE_LOG_ON_ERROR))
638 return tcu::TestStatus::fail("Image comparison failed; check log for details");
639 }
640
641 return tcu::TestStatus::pass("Pass");
642 }
643
644 } // anonymous
645
createMeshShaderApiTests(tcu::TestContext & testCtx)646 tcu::TestCaseGroup* createMeshShaderApiTests (tcu::TestContext& testCtx)
647 {
648 GroupPtr mainGroup (new tcu::TestCaseGroup(testCtx, "api", "Mesh Shader API tests"));
649
650 const DrawType drawCases[] =
651 {
652 DrawType::DRAW,
653 DrawType::DRAW_INDIRECT,
654 DrawType::DRAW_INDIRECT_COUNT,
655 };
656
657 const auto extent = getExtent();
658 const uint32_t drawCountCases[] = { 0u, 1u, 2u, extent.height / 2u, extent.height };
659
660 const uint32_t normalStride = static_cast<uint32_t>(sizeof(VkDrawMeshTasksIndirectCommandNV));
661 const uint32_t largeStride = 2u * normalStride + 4u;
662 const uint32_t altOffset = 20u;
663
664 const struct
665 {
666 tcu::Maybe<IndirectArgs> indirectArgs;
667 const char* name;
668 } indirectArgsCases[] =
669 {
670 { tcu::nothing<IndirectArgs>(), "no_indirect_args" },
671
672 // Offset 0, varying strides.
673 { tcu::just(IndirectArgs{ 0u, 0u }), "offset_0_stride_0" },
674 { tcu::just(IndirectArgs{ 0u, normalStride }), "offset_0_stride_normal" },
675 { tcu::just(IndirectArgs{ 0u, largeStride }), "offset_0_stride_large" },
676
677 // Nonzero offset, varying strides.
678 { tcu::just(IndirectArgs{ altOffset, 0u }), "offset_alt_stride_0" },
679 { tcu::just(IndirectArgs{ altOffset, normalStride }), "offset_alt_stride_normal" },
680 { tcu::just(IndirectArgs{ altOffset, largeStride }), "offset_alt_stride_large" },
681 };
682
683 const struct
684 {
685 tcu::Maybe<IndirectCountLimitType> limitType;
686 const char* name;
687 } countLimitCases[] =
688 {
689 { tcu::nothing<IndirectCountLimitType>(), "no_count_limit" },
690 { tcu::just(IndirectCountLimitType::BUFFER_VALUE), "count_limit_buffer" },
691 { tcu::just(IndirectCountLimitType::MAX_COUNT), "count_limit_max_count" },
692 };
693
694 const struct
695 {
696 tcu::Maybe<uint32_t> countOffset;
697 const char* name;
698 } countOffsetCases[] =
699 {
700 { tcu::nothing<uint32_t>(), "no_count_offset" },
701 { tcu::just(uint32_t{0u}), "count_offset_0" },
702 { tcu::just(altOffset), "count_offset_alt" },
703 };
704
705 const struct
706 {
707 bool useTask;
708 const char* name;
709 } taskCases[] =
710 {
711 { false, "no_task_shader" },
712 { true, "with_task_shader" },
713 };
714
715 const struct
716 {
717 uint32_t firstTask;
718 const char* name;
719 } firstTaskCases[] =
720 {
721 { 0u, "first_task_zero" },
722 { 1001u, "first_task_nonzero" },
723 };
724
725 uint32_t seed = 1628678795u;
726
727 for (const auto& drawCase : drawCases)
728 {
729 const auto drawCaseName = de::toString(drawCase);
730 const bool isIndirect = (drawCase != DrawType::DRAW);
731 const bool isIndirectNoCount = (drawCase == DrawType::DRAW_INDIRECT);
732 const bool isIndirectCount = (drawCase == DrawType::DRAW_INDIRECT_COUNT);
733
734 GroupPtr drawGroup(new tcu::TestCaseGroup(testCtx, drawCaseName.c_str(), ""));
735
736 for (const auto& drawCountCase : drawCountCases)
737 {
738 const auto drawCountName = "draw_count_" + de::toString(drawCountCase);
739 GroupPtr drawCountGroup(new tcu::TestCaseGroup(testCtx, drawCountName.c_str(), ""));
740
741 for (const auto& indirectArgsCase : indirectArgsCases)
742 {
743 const bool hasIndirectArgs = static_cast<bool>(indirectArgsCase.indirectArgs);
744 const bool strideZero = (hasIndirectArgs && indirectArgsCase.indirectArgs.get().stride == 0u);
745
746 if (isIndirect != hasIndirectArgs)
747 continue;
748
749 // VUID-vkCmdDrawMeshTasksIndirectNV-drawCount-02146 and VUID-vkCmdDrawMeshTasksIndirectCountNV-stride-02182.
750 if (((isIndirectNoCount && drawCountCase > 1u) || isIndirectCount) && strideZero)
751 continue;
752
753 GroupPtr indirectArgsGroup(new tcu::TestCaseGroup(testCtx, indirectArgsCase.name, ""));
754
755 for (const auto& countLimitCase : countLimitCases)
756 {
757 const bool hasCountLimit = static_cast<bool>(countLimitCase.limitType);
758
759 if (isIndirectCount != hasCountLimit)
760 continue;
761
762 GroupPtr countLimitGroup(new tcu::TestCaseGroup(testCtx, countLimitCase.name, ""));
763
764 for (const auto& countOffsetCase : countOffsetCases)
765 {
766 const bool hasCountOffsetType = static_cast<bool>(countOffsetCase.countOffset);
767
768 if (isIndirectCount != hasCountOffsetType)
769 continue;
770
771 GroupPtr countOffsetGroup(new tcu::TestCaseGroup(testCtx, countOffsetCase.name, ""));
772
773 for (const auto& taskCase : taskCases)
774 {
775 GroupPtr taskCaseGrp(new tcu::TestCaseGroup(testCtx, taskCase.name, ""));
776
777 for (const auto& firstTaskCase : firstTaskCases)
778 {
779 const TestParams params =
780 {
781 drawCase, // DrawType drawType;
782 seed++, // uint32_t seed;
783 drawCountCase, // uint32_t drawCount;
784 firstTaskCase.firstTask, // uint32_t firstTask;
785 indirectArgsCase.indirectArgs, // tcu::Maybe<IndirectArgs> indirectArgs;
786 countLimitCase.limitType, // tcu::Maybe<IndirectCountLimitType> indirectCountLimit;
787 countOffsetCase.countOffset, // tcu::Maybe<uint32_t> indirectCountOffset;
788 taskCase.useTask, // bool useTask;
789 };
790
791 taskCaseGrp->addChild(new MeshApiCase(testCtx, firstTaskCase.name, "", params));
792 }
793
794 countOffsetGroup->addChild(taskCaseGrp.release());
795 }
796
797 countLimitGroup->addChild(countOffsetGroup.release());
798 }
799
800 indirectArgsGroup->addChild(countLimitGroup.release());
801 }
802
803 drawCountGroup->addChild(indirectArgsGroup.release());
804 }
805
806 drawGroup->addChild(drawCountGroup.release());
807 }
808
809 mainGroup->addChild(drawGroup.release());
810 }
811
812 return mainGroup.release();
813 }
814
815 } // MeshShader
816 } // vkt
817