• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2021 The Khronos Group Inc.
6  * Copyright (c) 2021 Valve Corporation.
7  *
8  * Licensed under the Apache License, Version 2.0 (the "License");
9  * you may not use this file except in compliance with the License.
10  * You may obtain a copy of the License at
11  *
12  *      http://www.apache.org/licenses/LICENSE-2.0
13  *
14  * Unless required by applicable law or agreed to in writing, software
15  * distributed under the License is distributed on an "AS IS" BASIS,
16  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17  * See the License for the specific language governing permissions and
18  * limitations under the License.
19  *
20  *//*!
21  * \file
22  * \brief Mesh Shader API Tests
23  *//*--------------------------------------------------------------------*/
24 
25 #include "vktMeshShaderApiTests.hpp"
26 #include "vktTestCase.hpp"
27 
28 #include "vkTypeUtil.hpp"
29 #include "vkImageWithMemory.hpp"
30 #include "vkBufferWithMemory.hpp"
31 #include "vkObjUtil.hpp"
32 #include "vkBuilderUtil.hpp"
33 #include "vkCmdUtil.hpp"
34 #include "vkImageUtil.hpp"
35 
36 #include "tcuMaybe.hpp"
37 #include "tcuTestLog.hpp"
38 #include "tcuImageCompare.hpp"
39 
40 #include "deRandom.hpp"
41 
42 #include <iostream>
43 #include <sstream>
44 #include <vector>
45 #include <algorithm>
46 #include <iterator>
47 #include <limits>
48 
49 namespace vkt
50 {
51 namespace MeshShader
52 {
53 
54 namespace
55 {
56 
57 using namespace vk;
58 
59 using GroupPtr				= de::MovePtr<tcu::TestCaseGroup>;
60 using ImageWithMemoryPtr	= de::MovePtr<ImageWithMemory>;
61 using BufferWithMemoryPtr	= de::MovePtr<BufferWithMemory>;
62 
63 enum class DrawType
64 {
65 	DRAW = 0,
66 	DRAW_INDIRECT,
67 	DRAW_INDIRECT_COUNT,
68 };
69 
operator <<(std::ostream & stream,DrawType drawType)70 std::ostream& operator<< (std::ostream& stream, DrawType drawType)
71 {
72 	switch (drawType)
73 	{
74 	case DrawType::DRAW:				stream << "draw";					break;
75 	case DrawType::DRAW_INDIRECT:		stream << "draw_indirect";			break;
76 	case DrawType::DRAW_INDIRECT_COUNT:	stream << "draw_indirect_count";	break;
77 	default: DE_ASSERT(false); break;
78 	}
79 	return stream;
80 }
81 
82 
83 // This helps test the maxDrawCount rule for the DRAW_INDIRECT_COUNT case.
84 enum class IndirectCountLimitType
85 {
86 	BUFFER_VALUE = 0,		// The actual count will be given by the count buffer.
87 	MAX_COUNT,				// The actual count will be given by the maxDrawCount argument passed to the draw command.
88 };
89 
90 struct IndirectArgs
91 {
92 	uint32_t offset;
93 	uint32_t stride;
94 };
95 
96 struct TestParams
97 {
98 	DrawType							drawType;
99 	uint32_t							seed;
100 	uint32_t							drawCount;				// Equivalent to taskCount or drawCount.
101 	uint32_t							firstTask;				// Equivalent to firstTask in every call.
102 	tcu::Maybe<IndirectArgs>			indirectArgs;			// Only used for DRAW_INDIRECT*.
103 	tcu::Maybe<IndirectCountLimitType>	indirectCountLimit;		// Only used for DRAW_INDIRECT_COUNT.
104 	tcu::Maybe<uint32_t>				indirectCountOffset;	// Only used for DRAW_INDIRECT_COUNT.
105 	bool								useTask;
106 };
107 
108 // The framebuffer will have a number of rows and 32 columns. Each mesh shader workgroup will generate geometry to fill a single
109 // framebuffer row, using a triangle list with 32 triangles of different colors, each covering a framebuffer pixel.
110 //
111 // Note: the total framebuffer rows is called "full" below (e.g. 64). When using a task shader to generate work, each workgroup will
112 // generate a single mesh workgroup using a push constant instead of a compile-time constant.
113 //
114 // When using DRAW, the task count will tell us how many rows of pixels will be filled in the framebuffer.
115 //
116 // When using indirect draws, the full framebuffer will always be drawn into by using multiple draw command structures, except in
117 // the case of drawCount==0. Each draw will spawn the needed number of tasks to fill the whole framebuffer. In addition, in order to
118 // make all argument structures different, the number of tasks in each draw count will be slightly different and assigned
119 // pseudorandomly.
120 //
121 // DRAW: taskCount=0, taskCount=1, taskCount=2, taskCount=half, taskCount=full
122 //
123 // DRAW_INDIRECT: drawCount=0, drawCount=1, drawCount=2, drawCount=half, drawCount=full.
124 //  * With offset 0 and pseudorandom (multiples of 4).
125 //  * With stride adding a padding of 0 and pseudorandom (multiples of 4).
126 //
127 // DRAW_INDIRECT_COUNT: same as indirect in two variants:
128 //  1. Passing the count in a buffer with a large maximum.
129 //  2. Passing a large value in the buffer and limiting it with the maximum.
130 
131 class MeshApiCase : public vkt::TestCase
132 {
133 public:
MeshApiCase(tcu::TestContext & testCtx,const std::string & name,const std::string & description,const TestParams & params)134 					MeshApiCase		(tcu::TestContext& testCtx, const std::string& name, const std::string& description, const TestParams& params)
135 						: vkt::TestCase	(testCtx, name, description)
136 						, m_params		(params)
137 						{}
~MeshApiCase(void)138 	virtual			~MeshApiCase	(void) {}
139 
140 	void			initPrograms	(vk::SourceCollections& programCollection) const override;
141 	void			checkSupport	(Context& context) const override;
142 	TestInstance*	createInstance	(Context& context) const override;
143 
144 protected:
145 	TestParams		m_params;
146 };
147 
148 class MeshApiInstance : public vkt::TestInstance
149 {
150 public:
MeshApiInstance(Context & context,const TestParams & params)151 						MeshApiInstance		(Context& context, const TestParams& params)
152 							: vkt::TestInstance	(context)
153 							, m_params			(params)
154 							{}
~MeshApiInstance(void)155 	virtual				~MeshApiInstance	(void) {}
156 
157 	tcu::TestStatus		iterate				(void) override;
158 
159 protected:
160 	TestParams			m_params;
161 };
162 
createInstance(Context & context) const163 TestInstance* MeshApiCase::createInstance (Context& context) const
164 {
165 	return new MeshApiInstance(context, m_params);
166 }
167 
168 struct PushConstantData
169 {
170 	uint32_t width;
171 	uint32_t height;
172 	uint32_t firstTaskMesh;
173 	uint32_t one;
174 	uint32_t firstTaskTask;
175 
getRangesvkt::MeshShader::__anon382b7d580111::PushConstantData176 	std::vector<VkPushConstantRange> getRanges (bool includeTask) const
177 	{
178 		constexpr uint32_t offsetMesh = 0u;
179 		constexpr uint32_t offsetTask = static_cast<uint32_t>(offsetof(PushConstantData, one));
180 		constexpr uint32_t sizeMesh = offsetTask;
181 		constexpr uint32_t sizeTask = static_cast<uint32_t>(sizeof(PushConstantData)) - offsetTask;
182 
183 		const VkPushConstantRange meshRange =
184 		{
185 			VK_SHADER_STAGE_MESH_BIT_NV,	//	VkShaderStageFlags	stageFlags;
186 			offsetMesh,						//	uint32_t			offset;
187 			sizeMesh,						//	uint32_t			size;
188 		};
189 		const VkPushConstantRange taskRange =
190 		{
191 			VK_SHADER_STAGE_TASK_BIT_NV,	//	VkShaderStageFlags	stageFlags;
192 			offsetTask,						//	uint32_t			offset;
193 			sizeTask,						//	uint32_t			size;
194 		};
195 
196 		std::vector<VkPushConstantRange> ranges (1u, meshRange);
197 		if (includeTask)
198 			ranges.push_back(taskRange);
199 		return ranges;
200 	}
201 };
202 
initPrograms(vk::SourceCollections & programCollection) const203 void MeshApiCase::initPrograms (vk::SourceCollections& programCollection) const
204 {
205 	const std::string taskDataDecl =
206 		"taskNV TaskData {\n"
207 		"    uint blockNumber;\n"
208 		"    uint blockRow;\n"
209 		"} td;\n"
210 		;
211 
212 	// Task shader if needed.
213 	if (m_params.useTask)
214 	{
215 		std::ostringstream task;
216 		task
217 			<< "#version 460\n"
218 			<< "#extension GL_NV_mesh_shader : enable\n"
219 			<< "\n"
220 			<< "layout (local_size_x=1) in;\n"
221 			<< "\n"
222 			<< "layout (push_constant, std430) uniform TaskPushConstantBlock {\n"
223 			<< "    layout (offset=12) uint one;\n"
224 			<< "    layout (offset=16) uint firstTask;\n"
225 			<< "} pc;\n"
226 			<< "\n"
227 			<< "out " << taskDataDecl
228 			<< "\n"
229 			<< "void main ()\n"
230 			<< "{\n"
231 			<< "    gl_TaskCountNV  = pc.one;\n"
232 			<< "    td.blockNumber  = uint(gl_DrawID);\n"
233 			<< "    td.blockRow     = gl_WorkGroupID.x - pc.firstTask;\n"
234 			<< "}\n"
235 			;
236 		programCollection.glslSources.add("task") << glu::TaskSource(task.str());
237 	}
238 
239 	// Mesh shader.
240 	{
241 		std::ostringstream mesh;
242 		mesh
243 			<< "#version 460\n"
244 			<< "#extension GL_NV_mesh_shader : enable\n"
245 			<< "\n"
246 			<< "layout (local_size_x=32) in;\n"
247 			<< "layout (triangles) out;\n"
248 			<< "layout (max_vertices=96, max_primitives=32) out;\n"
249 			<< "\n"
250 			<< "layout (push_constant, std430) uniform MeshPushConstantBlock {\n"
251 			<< "    uint width;\n"
252 			<< "    uint height;\n"
253 			<< "    uint firstTask;\n"
254 			<< "} pc;\n"
255 			<< "\n"
256 			<< "layout (location=0) perprimitiveNV out vec4 primitiveColor[];\n"
257 			<< "\n"
258 			<< (m_params.useTask ? ("in " + taskDataDecl): "")
259 			<< "\n"
260 			<< "layout (set=0, binding=0, std430) readonly buffer BlockSizes {\n"
261 			<< "    uint blockSize[];\n"
262 			<< "} bsz;\n"
263 			<< "\n"
264 			<< "uint startOfBlock (uint blockNumber)\n"
265 			<< "{\n"
266 			<< "    uint start = 0;\n"
267 			<< "    for (uint i = 0; i < blockNumber; i++)\n"
268 			<< "        start += bsz.blockSize[i];\n"
269 			<< "    return start;\n"
270 			<< "}\n"
271 			<< "\n"
272 			<< "void main ()\n"
273 			<< "{\n"
274 			<< "    const uint blockNumber = " << (m_params.useTask ? "td.blockNumber" : "uint(gl_DrawID)") << ";\n"
275 			<< "    const uint blockRow = " << (m_params.useTask ? "td.blockRow" : "(gl_WorkGroupID.x - pc.firstTask)") << ";\n"
276 			<< "\n"
277 			<< "    // Each workgroup will fill one row, and each invocation will generate a\n"
278 			<< "    // triangle around the pixel center in each column.\n"
279 			<< "    const uint row = startOfBlock(blockNumber) + blockRow;\n"
280 			<< "    const uint col = gl_LocalInvocationID.x;\n"
281 			<< "\n"
282 			<< "    const float fHeight = float(pc.height);\n"
283 			<< "    const float fWidth = float(pc.width);\n"
284 			<< "\n"
285 			<< "    // Pixel coordinates, normalized.\n"
286 			<< "    const float rowNorm = (float(row) + 0.5) / fHeight;\n"
287 			<< "    const float colNorm = (float(col) + 0.5) / fWidth;\n"
288 			<< "\n"
289 			<< "    // Framebuffer coordinates.\n"
290 			<< "    const float coordX = (colNorm * 2.0) - 1.0;\n"
291 			<< "    const float coordY = (rowNorm * 2.0) - 1.0;\n"
292 			<< "\n"
293 			<< "    const float pixelWidth = 2.0 / fWidth;\n"
294 			<< "    const float pixelHeight = 2.0 / fHeight;\n"
295 			<< "\n"
296 			<< "    const float offsetX = pixelWidth / 2.0;\n"
297 			<< "    const float offsetY = pixelHeight / 2.0;\n"
298 			<< "\n"
299 			<< "    const uint baseIndex = col*3;\n"
300 			<< "    const uvec3 indices = uvec3(baseIndex, baseIndex + 1, baseIndex + 2);\n"
301 			<< "\n"
302 			<< "    gl_PrimitiveCountNV = 32u;\n"
303 			<< "    primitiveColor[col] = vec4(rowNorm, colNorm, 0.0, 1.0);\n"
304 			<< "\n"
305 			<< "    gl_PrimitiveIndicesNV[indices.x] = indices.x;\n"
306 			<< "    gl_PrimitiveIndicesNV[indices.y] = indices.y;\n"
307 			<< "    gl_PrimitiveIndicesNV[indices.z] = indices.z;\n"
308 			<< "\n"
309 			<< "    gl_MeshVerticesNV[indices.x].gl_Position = vec4(coordX - offsetX, coordY + offsetY, 0.0, 1.0);\n"
310 			<< "    gl_MeshVerticesNV[indices.y].gl_Position = vec4(coordX + offsetX, coordY + offsetY, 0.0, 1.0);\n"
311 			<< "    gl_MeshVerticesNV[indices.z].gl_Position = vec4(coordX, coordY - offsetY, 0.0, 1.0);\n"
312 			<< "}\n"
313 			;
314 		programCollection.glslSources.add("mesh") << glu::MeshSource(mesh.str());
315 	}
316 
317 	// Frag shader.
318 	{
319 		std::ostringstream frag;
320 		frag
321 			<< "#version 460\n"
322 			<< "#extension GL_NV_mesh_shader : enable\n"
323 			<< "\n"
324 			<< "layout (location=0) perprimitiveNV in vec4 primitiveColor;\n"
325 			<< "layout (location=0) out vec4 outColor;\n"
326 			<< "\n"
327 			<< "void main ()\n"
328 			<< "{\n"
329 			<< "    outColor = primitiveColor;\n"
330 			<< "}\n"
331 			;
332 		programCollection.glslSources.add("frag") << glu::FragmentSource(frag.str());
333 	}
334 }
335 
checkSupport(Context & context) const336 void MeshApiCase::checkSupport (Context& context) const
337 {
338 	context.requireDeviceFunctionality("VK_NV_mesh_shader");
339 
340 	const auto& meshFeatures = context.getMeshShaderFeatures();
341 
342 	if (!meshFeatures.meshShader)
343 		TCU_THROW(NotSupportedError, "Mesh shaders not supported");
344 
345 	if (m_params.useTask && !meshFeatures.taskShader)
346 		TCU_THROW(NotSupportedError, "Task shaders not supported");
347 
348 	// VUID-vkCmdDrawMeshTasksIndirectNV-drawCount-02718
349 	if (m_params.drawType == DrawType::DRAW_INDIRECT && m_params.drawCount > 1u)
350 	{
351 		const auto& features = context.getDeviceFeatures();
352 		if (!features.multiDrawIndirect)
353 			TCU_THROW(NotSupportedError, "Indirect multi-draws not supported");
354 	}
355 
356 	// VUID-vkCmdDrawMeshTasksIndirectCountNV-None-04445
357 	if (m_params.drawType == DrawType::DRAW_INDIRECT_COUNT)
358 		context.requireDeviceFunctionality("VK_KHR_draw_indirect_count");
359 }
360 
361 template <typename T>
makeStridedBuffer(const DeviceInterface & vkd,VkDevice device,Allocator & alloc,const std::vector<T> & elements,uint32_t offset,uint32_t stride,VkBufferUsageFlags usage,uint32_t endPadding)362 BufferWithMemoryPtr makeStridedBuffer(const DeviceInterface& vkd, VkDevice device, Allocator& alloc, const std::vector<T>& elements, uint32_t offset, uint32_t stride, VkBufferUsageFlags usage, uint32_t endPadding)
363 {
364 	const auto elementSize	= static_cast<uint32_t>(sizeof(T));
365 	const auto actualStride	= std::max(elementSize, stride);
366 	const auto bufferSize	= static_cast<size_t>(offset) + static_cast<size_t>(actualStride) * elements.size() + static_cast<size_t>(endPadding);
367 	const auto bufferInfo	= makeBufferCreateInfo(static_cast<VkDeviceSize>(bufferSize), usage);
368 
369 	BufferWithMemoryPtr buffer(new BufferWithMemory(vkd, device, alloc, bufferInfo, MemoryRequirement::HostVisible));
370 	auto& bufferAlloc	= buffer->getAllocation();
371 	char* bufferDataPtr	= reinterpret_cast<char*>(bufferAlloc.getHostPtr());
372 
373 	char* itr = bufferDataPtr + offset;
374 	for (const auto& elem : elements)
375 	{
376 		deMemcpy(itr, &elem, sizeof(elem));
377 		itr += actualStride;
378 	}
379 	if (endPadding > 0u)
380 		deMemset(itr, 0xFF, endPadding);
381 
382 	flushAlloc(vkd, device, bufferAlloc);
383 
384 	return buffer;
385 }
386 
getExtent()387 VkExtent3D getExtent ()
388 {
389 	return makeExtent3D(32u, 64u, 1u);
390 }
391 
iterate(void)392 tcu::TestStatus MeshApiInstance::iterate (void)
393 {
394 	const auto&		vkd			= m_context.getDeviceInterface();
395 	const auto		device		= m_context.getDevice();
396 	auto&			alloc		= m_context.getDefaultAllocator();
397 	const auto		queueIndex	= m_context.getUniversalQueueFamilyIndex();
398 	const auto		queue		= m_context.getUniversalQueue();
399 
400 	const auto		extent		= getExtent();
401 	const auto		iExtent3D	= tcu::IVec3(static_cast<int>(extent.width), static_cast<int>(extent.height), static_cast<int>(extent.depth));
402 	const auto		iExtent2D	= tcu::IVec2(iExtent3D.x(), iExtent3D.y());
403 	const auto		format		= VK_FORMAT_R8G8B8A8_UNORM;
404 	const auto		tcuFormat	= mapVkFormat(format);
405 	const auto		colorUsage	= (VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
406 	const auto		colorSRR	= makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u);
407 	const tcu::Vec4	clearColor	(0.0f, 0.0f, 0.0f, 1.0f);
408 	const float		colorThres	= 0.005f; // 1/255 < 0.005 < 2/255
409 	const tcu::Vec4	threshold	(colorThres, colorThres, 0.0f, 0.0f);
410 
411 	ImageWithMemoryPtr	colorBuffer;
412 	Move<VkImageView>	colorBufferView;
413 	{
414 		const VkImageCreateInfo colorBufferInfo =
415 		{
416 			VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,	//	VkStructureType			sType;
417 			nullptr,								//	const void*				pNext;
418 			0u,										//	VkImageCreateFlags		flags;
419 			VK_IMAGE_TYPE_2D,						//	VkImageType				imageType;
420 			format,									//	VkFormat				format;
421 			extent,									//	VkExtent3D				extent;
422 			1u,										//	uint32_t				mipLevels;
423 			1u,										//	uint32_t				arrayLayers;
424 			VK_SAMPLE_COUNT_1_BIT,					//	VkSampleCountFlagBits	samples;
425 			VK_IMAGE_TILING_OPTIMAL,				//	VkImageTiling			tiling;
426 			colorUsage,								//	VkImageUsageFlags		usage;
427 			VK_SHARING_MODE_EXCLUSIVE,				//	VkSharingMode			sharingMode;
428 			0u,										//	uint32_t				queueFamilyIndexCount;
429 			nullptr,								//	const uint32_t*			pQueueFamilyIndices;
430 			VK_IMAGE_LAYOUT_UNDEFINED,				//	VkImageLayout			initialLayout;
431 		};
432 		colorBuffer = ImageWithMemoryPtr(new ImageWithMemory(vkd, device, alloc, colorBufferInfo, MemoryRequirement::Any));
433 		colorBufferView = makeImageView(vkd, device, colorBuffer->get(), VK_IMAGE_VIEW_TYPE_2D, format, colorSRR);
434 	}
435 
436 	// Prepare buffer containing the array of block sizes.
437 	de::Random				rnd				(m_params.seed);
438 	std::vector<uint32_t>	blockSizes;
439 
440 	const uint32_t			vectorSize		= std::max(1u, m_params.drawCount);
441 	const uint32_t			largeDrawCount	= vectorSize + 1u; // The indirect buffer needs to have some padding at the end. See below.
442 	const uint32_t			evenBlockSize	= extent.height / vectorSize;
443 	uint32_t				remainingRows	= extent.height;
444 
445 	blockSizes.reserve(vectorSize);
446 	for (uint32_t i = 0; i < vectorSize - 1u; ++i)
447 	{
448 		const auto blockSize = static_cast<uint32_t>(rnd.getInt(1, evenBlockSize));
449 		remainingRows -= blockSize;
450 		blockSizes.push_back(blockSize);
451 	}
452 	blockSizes.push_back(remainingRows);
453 
454 	const auto			blockSizesBufferSize	= static_cast<VkDeviceSize>(de::dataSize(blockSizes));
455 	BufferWithMemoryPtr	blockSizesBuffer		= makeStridedBuffer(vkd, device, alloc, blockSizes, 0u, 0u, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, 0u);
456 
457 	// Descriptor set layout, pool and set.
458 	DescriptorSetLayoutBuilder layoutBuilder;
459 	layoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_MESH_BIT_NV);
460 	const auto setLayout = layoutBuilder.build(vkd, device);
461 
462 	DescriptorPoolBuilder poolBuilder;
463 	poolBuilder.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
464 	const auto descriptorPool = poolBuilder.build(vkd, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
465 
466 	const auto descriptorSet = makeDescriptorSet(vkd, device, descriptorPool.get(), setLayout.get());
467 
468 	// Update descriptor set.
469 	{
470 		DescriptorSetUpdateBuilder updateBuilder;
471 
472 		const auto location				= DescriptorSetUpdateBuilder::Location::binding(0u);
473 		const auto descriptorBufferInfo	= makeDescriptorBufferInfo(blockSizesBuffer->get(), 0ull, blockSizesBufferSize);
474 
475 		updateBuilder.writeSingle(descriptorSet.get(), location, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &descriptorBufferInfo);
476 		updateBuilder.update(vkd, device);
477 	}
478 
479 	// Pipeline layout.
480 	PushConstantData	pcData;
481 	const auto			pcRanges		= pcData.getRanges(m_params.useTask);
482 	const auto			pipelineLayout	= makePipelineLayout(vkd, device, 1u, &setLayout.get(), static_cast<uint32_t>(pcRanges.size()), de::dataOrNull(pcRanges));
483 
484 	// Push constants.
485 	pcData.width			= extent.width;
486 	pcData.height			= extent.height;
487 	pcData.firstTaskMesh	= m_params.firstTask;
488 	pcData.one				= 1u;
489 	pcData.firstTaskTask	= m_params.firstTask;
490 
491 	// Render pass and framebuffer.
492 	const auto renderPass	= makeRenderPass(vkd, device, format);
493 	const auto framebuffer	= makeFramebuffer(vkd, device, renderPass.get(), colorBufferView.get(), extent.width, extent.height);
494 
495 	// Pipeline.
496 	Move<VkShaderModule> taskModule;
497 	Move<VkShaderModule> meshModule;
498 	Move<VkShaderModule> fragModule;
499 
500 	const auto& binaries = m_context.getBinaryCollection();
501 	if (m_params.useTask)
502 		taskModule = createShaderModule(vkd, device, binaries.get("task"));
503 	meshModule = createShaderModule(vkd, device, binaries.get("mesh"));
504 	fragModule = createShaderModule(vkd, device, binaries.get("frag"));
505 
506 	const std::vector<VkViewport>	viewports	(1u, makeViewport(extent));
507 	const std::vector<VkRect2D>		scissors	(1u, makeRect2D(extent));
508 
509 	const auto pipeline = makeGraphicsPipeline(vkd, device, pipelineLayout.get(),
510 		taskModule.get(), meshModule.get(), fragModule.get(),
511 		renderPass.get(), viewports, scissors);
512 
513 	// Command pool and buffer.
514 	const auto cmdPool		= makeCommandPool(vkd, device, queueIndex);
515 	const auto cmdBufferPtr	= allocateCommandBuffer(vkd, device, cmdPool.get(), VK_COMMAND_BUFFER_LEVEL_PRIMARY);
516 	const auto cmdBuffer	= cmdBufferPtr.get();
517 
518 	// Indirect and count buffers if needed.
519 	BufferWithMemoryPtr indirectBuffer;
520 	BufferWithMemoryPtr countBuffer;
521 
522 	if (m_params.drawType != DrawType::DRAW)
523 	{
524 		// Indirect draws.
525 		DE_ASSERT(static_cast<bool>(m_params.indirectArgs));
526 		const auto& indirectArgs = m_params.indirectArgs.get();
527 
528 		// Check stride and offset validity.
529 		DE_ASSERT(indirectArgs.offset % 4u == 0u);
530 		DE_ASSERT(indirectArgs.stride % 4u == 0u && (indirectArgs.stride == 0u || indirectArgs.stride >= static_cast<uint32_t>(sizeof(VkDrawMeshTasksIndirectCommandNV))));
531 
532 		// Prepare struct vector, which will be converted to a buffer with the proper stride and offset later.
533 		std::vector<VkDrawMeshTasksIndirectCommandNV> commands;
534 		commands.reserve(blockSizes.size());
535 
536 		std::transform(begin(blockSizes), end(blockSizes), std::back_inserter(commands),
537 			[this](uint32_t blockSize) { return VkDrawMeshTasksIndirectCommandNV{blockSize, this->m_params.firstTask}; });
538 
539 		const auto padding	= static_cast<uint32_t>(sizeof(VkDrawMeshTasksIndirectCommandNV));
540 		indirectBuffer		= makeStridedBuffer(vkd, device, alloc, commands, indirectArgs.offset, indirectArgs.stride, VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT, padding);
541 
542 		// Prepare count buffer if needed.
543 		if (m_params.drawType == DrawType::DRAW_INDIRECT_COUNT)
544 		{
545 			DE_ASSERT(static_cast<bool>(m_params.indirectCountLimit));
546 			DE_ASSERT(static_cast<bool>(m_params.indirectCountOffset));
547 
548 			const auto countBufferValue	= ((m_params.indirectCountLimit.get() == IndirectCountLimitType::BUFFER_VALUE)
549 										? m_params.drawCount
550 										: largeDrawCount);
551 
552 			const std::vector<uint32_t> singleCount (1u, countBufferValue);
553 			countBuffer = makeStridedBuffer(vkd, device, alloc, singleCount, m_params.indirectCountOffset.get(), static_cast<uint32_t>(sizeof(uint32_t)), VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT, 0u);
554 		}
555 	}
556 
557 	// Submit commands.
558 	beginCommandBuffer(vkd, cmdBuffer);
559 	beginRenderPass(vkd, cmdBuffer, renderPass.get(), framebuffer.get(), scissors.at(0), clearColor);
560 
561 	vkd.cmdBindDescriptorSets(cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipelineLayout.get(), 0u, 1u, &descriptorSet.get(), 0u, nullptr);
562 	{
563 		const char* pcDataPtr = reinterpret_cast<const char*>(&pcData);
564 		for (const auto& range : pcRanges)
565 			vkd.cmdPushConstants(cmdBuffer, pipelineLayout.get(), range.stageFlags, range.offset, range.size, pcDataPtr + range.offset);
566 	}
567 	vkd.cmdBindPipeline(cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline.get());
568 
569 	if (m_params.drawType == DrawType::DRAW)
570 	{
571 		vkd.cmdDrawMeshTasksNV(cmdBuffer, m_params.drawCount, m_params.firstTask);
572 	}
573 	else if (m_params.drawType == DrawType::DRAW_INDIRECT)
574 	{
575 		const auto& indirectArgs = m_params.indirectArgs.get();
576 		vkd.cmdDrawMeshTasksIndirectNV(cmdBuffer, indirectBuffer->get(), indirectArgs.offset, m_params.drawCount, indirectArgs.stride);
577 	}
578 	else if (m_params.drawType == DrawType::DRAW_INDIRECT_COUNT)
579 	{
580 		const auto& indirectArgs		= m_params.indirectArgs.get();
581 		const auto& indirectCountOffset	= m_params.indirectCountOffset.get();
582 		const auto& indirectCountLimit	= m_params.indirectCountLimit.get();
583 
584 		const auto maxCount	= ((indirectCountLimit == IndirectCountLimitType::MAX_COUNT)
585 							? m_params.drawCount
586 							: largeDrawCount);
587 		vkd.cmdDrawMeshTasksIndirectCountNV(cmdBuffer, indirectBuffer->get(), indirectArgs.offset, countBuffer->get(), indirectCountOffset, maxCount, indirectArgs.stride);
588 	}
589 	else
590 		DE_ASSERT(false);
591 
592 	endRenderPass(vkd, cmdBuffer);
593 
594 	// Output buffer to extract the color buffer.
595 	BufferWithMemoryPtr	outBuffer;
596 	void*				outBufferData = nullptr;
597 	{
598 		const auto	outBufferSize	= static_cast<VkDeviceSize>(static_cast<uint32_t>(tcu::getPixelSize(tcuFormat)) * extent.width * extent.height);
599 		const auto	outBufferUsage	= VK_BUFFER_USAGE_TRANSFER_DST_BIT;
600 		const auto	outBufferInfo	= makeBufferCreateInfo(outBufferSize, outBufferUsage);
601 
602 		outBuffer					= BufferWithMemoryPtr(new BufferWithMemory(vkd, device, alloc, outBufferInfo, MemoryRequirement::HostVisible));
603 		outBufferData				= outBuffer->getAllocation().getHostPtr();
604 	}
605 
606 	copyImageToBuffer(vkd, cmdBuffer, colorBuffer->get(), outBuffer->get(), iExtent2D);
607 	endCommandBuffer(vkd, cmdBuffer);
608 	submitCommandsAndWait(vkd, device, queue, cmdBuffer);
609 
610 	// Generate reference image and compare.
611 	{
612 		auto&						log				= m_context.getTestContext().getLog();
613 		auto&						outBufferAlloc	= outBuffer->getAllocation();
614 		tcu::ConstPixelBufferAccess	result			(tcuFormat, iExtent3D, outBufferData);
615 		tcu::TextureLevel			referenceLevel	(tcuFormat, iExtent3D.x(), iExtent3D.y());
616 		const auto					reference		= referenceLevel.getAccess();
617 		const auto					setName			= de::toString(m_params.drawType) + "_draw_count_" + de::toString(m_params.drawCount) + (m_params.useTask ? "_with_task" : "_no_task");
618 		const auto					fHeight			= static_cast<float>(extent.height);
619 		const auto					fWidth			= static_cast<float>(extent.width);
620 
621 		invalidateAlloc(vkd, device, outBufferAlloc);
622 
623 		for (int y = 0; y < iExtent3D.y(); ++y)
624 		for (int x = 0; x < iExtent3D.x(); ++x)
625 		{
626 			const tcu::Vec4 refColor	= ((m_params.drawCount == 0u || (m_params.drawType == DrawType::DRAW && y >= static_cast<int>(m_params.drawCount)))
627 										? clearColor
628 										: tcu::Vec4(
629 											// These match the per-primitive color set by the mesh shader.
630 											(static_cast<float>(y) + 0.5f) / fHeight,
631 											(static_cast<float>(x) + 0.5f) / fWidth,
632 											0.0f,
633 											1.0f));
634 			reference.setPixel(refColor, x, y);
635 		}
636 
637 		if (!tcu::floatThresholdCompare(log, setName.c_str(), "", reference, result, threshold, tcu::COMPARE_LOG_ON_ERROR))
638 			return tcu::TestStatus::fail("Image comparison failed; check log for details");
639 	}
640 
641 	return tcu::TestStatus::pass("Pass");
642 }
643 
644 } // anonymous
645 
createMeshShaderApiTests(tcu::TestContext & testCtx)646 tcu::TestCaseGroup* createMeshShaderApiTests (tcu::TestContext& testCtx)
647 {
648 	GroupPtr mainGroup (new tcu::TestCaseGroup(testCtx, "api", "Mesh Shader API tests"));
649 
650 	const DrawType drawCases[] =
651 	{
652 		DrawType::DRAW,
653 		DrawType::DRAW_INDIRECT,
654 		DrawType::DRAW_INDIRECT_COUNT,
655 	};
656 
657 	const auto		extent				= getExtent();
658 	const uint32_t	drawCountCases[]	= { 0u, 1u, 2u, extent.height / 2u, extent.height };
659 
660 	const uint32_t normalStride	= static_cast<uint32_t>(sizeof(VkDrawMeshTasksIndirectCommandNV));
661 	const uint32_t largeStride	= 2u * normalStride + 4u;
662 	const uint32_t altOffset	= 20u;
663 
664 	const struct
665 	{
666 		tcu::Maybe<IndirectArgs>	indirectArgs;
667 		const char*					name;
668 	} indirectArgsCases[] =
669 	{
670 		{ tcu::nothing<IndirectArgs>(),							"no_indirect_args"			},
671 
672 		// Offset 0, varying strides.
673 		{ tcu::just(IndirectArgs{ 0u, 0u }),					"offset_0_stride_0"			},
674 		{ tcu::just(IndirectArgs{ 0u, normalStride }),			"offset_0_stride_normal"	},
675 		{ tcu::just(IndirectArgs{ 0u, largeStride }),			"offset_0_stride_large"		},
676 
677 		// Nonzero offset, varying strides.
678 		{ tcu::just(IndirectArgs{ altOffset, 0u }),				"offset_alt_stride_0"		},
679 		{ tcu::just(IndirectArgs{ altOffset, normalStride }),	"offset_alt_stride_normal"	},
680 		{ tcu::just(IndirectArgs{ altOffset, largeStride }),	"offset_alt_stride_large"	},
681 	};
682 
683 	const struct
684 	{
685 		tcu::Maybe<IndirectCountLimitType>	limitType;
686 		const char*							name;
687 	} countLimitCases[] =
688 	{
689 		{ tcu::nothing<IndirectCountLimitType>(),			"no_count_limit"		},
690 		{ tcu::just(IndirectCountLimitType::BUFFER_VALUE),	"count_limit_buffer"	},
691 		{ tcu::just(IndirectCountLimitType::MAX_COUNT),		"count_limit_max_count"	},
692 	};
693 
694 	const struct
695 	{
696 		tcu::Maybe<uint32_t>	countOffset;
697 		const char*				name;
698 	} countOffsetCases[] =
699 	{
700 		{ tcu::nothing<uint32_t>(),	"no_count_offset"	},
701 		{ tcu::just(uint32_t{0u}),	"count_offset_0"	},
702 		{ tcu::just(altOffset),		"count_offset_alt"	},
703 	};
704 
705 	const struct
706 	{
707 		bool		useTask;
708 		const char*	name;
709 	} taskCases[] =
710 	{
711 		{ false,	"no_task_shader"	},
712 		{ true,		"with_task_shader"	},
713 	};
714 
715 	const struct
716 	{
717 		uint32_t	firstTask;
718 		const char*	name;
719 	} firstTaskCases[] =
720 	{
721 		{ 0u,		"first_task_zero"		},
722 		{ 1001u,	"first_task_nonzero"	},
723 	};
724 
725 	uint32_t seed = 1628678795u;
726 
727 	for (const auto& drawCase : drawCases)
728 	{
729 		const auto drawCaseName			= de::toString(drawCase);
730 		const bool isIndirect			= (drawCase != DrawType::DRAW);
731 		const bool isIndirectNoCount	= (drawCase == DrawType::DRAW_INDIRECT);
732 		const bool isIndirectCount		= (drawCase == DrawType::DRAW_INDIRECT_COUNT);
733 
734 		GroupPtr drawGroup(new tcu::TestCaseGroup(testCtx, drawCaseName.c_str(), ""));
735 
736 		for (const auto& drawCountCase : drawCountCases)
737 		{
738 			const auto drawCountName = "draw_count_" + de::toString(drawCountCase);
739 			GroupPtr drawCountGroup(new tcu::TestCaseGroup(testCtx, drawCountName.c_str(), ""));
740 
741 			for (const auto& indirectArgsCase : indirectArgsCases)
742 			{
743 				const bool hasIndirectArgs	= static_cast<bool>(indirectArgsCase.indirectArgs);
744 				const bool strideZero		= (hasIndirectArgs && indirectArgsCase.indirectArgs.get().stride == 0u);
745 
746 				if (isIndirect != hasIndirectArgs)
747 					continue;
748 
749 				// VUID-vkCmdDrawMeshTasksIndirectNV-drawCount-02146 and VUID-vkCmdDrawMeshTasksIndirectCountNV-stride-02182.
750 				if (((isIndirectNoCount && drawCountCase > 1u) || isIndirectCount) && strideZero)
751 					continue;
752 
753 				GroupPtr indirectArgsGroup(new tcu::TestCaseGroup(testCtx, indirectArgsCase.name, ""));
754 
755 				for (const auto& countLimitCase : countLimitCases)
756 				{
757 					const bool hasCountLimit = static_cast<bool>(countLimitCase.limitType);
758 
759 					if (isIndirectCount != hasCountLimit)
760 						continue;
761 
762 					GroupPtr countLimitGroup(new tcu::TestCaseGroup(testCtx, countLimitCase.name, ""));
763 
764 					for (const auto& countOffsetCase : countOffsetCases)
765 					{
766 						const bool hasCountOffsetType = static_cast<bool>(countOffsetCase.countOffset);
767 
768 						if (isIndirectCount != hasCountOffsetType)
769 							continue;
770 
771 						GroupPtr countOffsetGroup(new tcu::TestCaseGroup(testCtx, countOffsetCase.name, ""));
772 
773 						for (const auto& taskCase : taskCases)
774 						{
775 							GroupPtr taskCaseGrp(new tcu::TestCaseGroup(testCtx, taskCase.name, ""));
776 
777 							for (const auto& firstTaskCase : firstTaskCases)
778 							{
779 								const TestParams params =
780 								{
781 									drawCase,						//	DrawType							drawType;
782 									seed++,							//	uint32_t							seed;
783 									drawCountCase,					//	uint32_t							drawCount;
784 									firstTaskCase.firstTask,		//	uint32_t							firstTask;
785 									indirectArgsCase.indirectArgs,	//	tcu::Maybe<IndirectArgs>			indirectArgs;
786 									countLimitCase.limitType,		//	tcu::Maybe<IndirectCountLimitType>	indirectCountLimit;
787 									countOffsetCase.countOffset,	//	tcu::Maybe<uint32_t>				indirectCountOffset;
788 									taskCase.useTask,				//	bool								useTask;
789 								};
790 
791 								taskCaseGrp->addChild(new MeshApiCase(testCtx, firstTaskCase.name, "", params));
792 							}
793 
794 							countOffsetGroup->addChild(taskCaseGrp.release());
795 						}
796 
797 						countLimitGroup->addChild(countOffsetGroup.release());
798 					}
799 
800 					indirectArgsGroup->addChild(countLimitGroup.release());
801 				}
802 
803 				drawCountGroup->addChild(indirectArgsGroup.release());
804 			}
805 
806 			drawGroup->addChild(drawCountGroup.release());
807 		}
808 
809 		mainGroup->addChild(drawGroup.release());
810 	}
811 
812 	return mainGroup.release();
813 }
814 
815 } // MeshShader
816 } // vkt
817