• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2021 The Khronos Group Inc.
6  * Copyright (c) 2021 Valve Corporation.
7  *
8  * Licensed under the Apache License, Version 2.0 (the "License");
9  * you may not use this file except in compliance with the License.
10  * You may obtain a copy of the License at
11  *
12  *      http://www.apache.org/licenses/LICENSE-2.0
13  *
14  * Unless required by applicable law or agreed to in writing, software
15  * distributed under the License is distributed on an "AS IS" BASIS,
16  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17  * See the License for the specific language governing permissions and
18  * limitations under the License.
19  *
20  *//*!
21  * \file
22  * \brief Mesh Shader API Tests for VK_EXT_mesh_shader
23  *//*--------------------------------------------------------------------*/
24 
25 #include "vktMeshShaderApiTestsEXT.hpp"
26 #include "vktMeshShaderUtil.hpp"
27 #include "vktTestCase.hpp"
28 
29 #include "vkTypeUtil.hpp"
30 #include "vkImageWithMemory.hpp"
31 #include "vkBufferWithMemory.hpp"
32 #include "vkObjUtil.hpp"
33 #include "vkBuilderUtil.hpp"
34 #include "vkCmdUtil.hpp"
35 #include "vkImageUtil.hpp"
36 
37 #include "tcuMaybe.hpp"
38 #include "tcuTestLog.hpp"
39 #include "tcuImageCompare.hpp"
40 
41 #include "deRandom.hpp"
42 
43 #include <iostream>
44 #include <sstream>
45 #include <vector>
46 #include <algorithm>
47 #include <iterator>
48 #include <limits>
49 
50 namespace vkt
51 {
52 namespace MeshShader
53 {
54 
55 namespace
56 {
57 
58 using namespace vk;
59 
60 using GroupPtr				= de::MovePtr<tcu::TestCaseGroup>;
61 using ImageWithMemoryPtr	= de::MovePtr<ImageWithMemory>;
62 using BufferWithMemoryPtr	= de::MovePtr<BufferWithMemory>;
63 
64 enum class DrawType
65 {
66 	DRAW = 0,
67 	DRAW_INDIRECT,
68 	DRAW_INDIRECT_COUNT,
69 };
70 
operator <<(std::ostream & stream,DrawType drawType)71 std::ostream& operator<< (std::ostream& stream, DrawType drawType)
72 {
73 	switch (drawType)
74 	{
75 	case DrawType::DRAW:				stream << "draw";					break;
76 	case DrawType::DRAW_INDIRECT:		stream << "draw_indirect";			break;
77 	case DrawType::DRAW_INDIRECT_COUNT:	stream << "draw_indirect_count";	break;
78 	default: DE_ASSERT(false); break;
79 	}
80 	return stream;
81 }
82 
83 
84 // This helps test the maxDrawCount rule for the DRAW_INDIRECT_COUNT case.
85 enum class IndirectCountLimitType
86 {
87 	BUFFER_VALUE = 0,		// The actual count will be given by the count buffer.
88 	MAX_COUNT,				// The actual count will be given by the maxDrawCount argument passed to the draw command.
89 };
90 
91 struct IndirectArgs
92 {
93 	uint32_t offset;
94 	uint32_t stride;
95 };
96 
97 struct TestParams
98 {
99 	DrawType							drawType;
100 	uint32_t							seed;
101 	uint32_t							drawCount;				// Equivalent to taskCount or drawCount.
102 	tcu::Maybe<IndirectArgs>			indirectArgs;			// Only used for DRAW_INDIRECT*.
103 	tcu::Maybe<IndirectCountLimitType>	indirectCountLimit;		// Only used for DRAW_INDIRECT_COUNT.
104 	tcu::Maybe<uint32_t>				indirectCountOffset;	// Only used for DRAW_INDIRECT_COUNT.
105 	bool								useTask;
106 	bool								useSecondaryCmdBuffer;
107 };
108 
109 // The framebuffer will have a number of rows and 32 columns. Each mesh shader workgroup will generate geometry to fill a single
110 // framebuffer row, using a triangle list with 32 triangles of different colors, each covering a framebuffer pixel.
111 //
112 // Note: the total framebuffer rows is called "full" below (e.g. 64). When using a task shader to generate work, each workgroup will
113 // generate a single mesh workgroup using a push constant instead of a compile-time constant.
114 //
115 // When using DRAW, the task count will tell us how many rows of pixels will be filled in the framebuffer.
116 //
117 // When using indirect draws, the full framebuffer will always be drawn into by using multiple draw command structures, except in
118 // the case of drawCount==0. Each draw will spawn the needed number of tasks to fill the whole framebuffer. In addition, in order to
119 // make all argument structures different, the number of tasks in each draw count will be slightly different and assigned
120 // pseudorandomly.
121 //
122 // DRAW: taskCount=0, taskCount=1, taskCount=2, taskCount=half, taskCount=full
123 //
124 // DRAW_INDIRECT: drawCount=0, drawCount=1, drawCount=2, drawCount=half, drawCount=full.
125 //  * With offset 0 and pseudorandom (multiples of 4).
126 //  * With stride adding a padding of 0 and pseudorandom (multiples of 4).
127 //
128 // DRAW_INDIRECT_COUNT: same as indirect in two variants:
129 //  1. Passing the count in a buffer with a large maximum.
130 //  2. Passing a large value in the buffer and limiting it with the maximum.
131 
132 class MeshApiCase : public vkt::TestCase
133 {
134 public:
MeshApiCase(tcu::TestContext & testCtx,const std::string & name,const std::string & description,const TestParams & params)135 					MeshApiCase		(tcu::TestContext& testCtx, const std::string& name, const std::string& description, const TestParams& params)
136 						: vkt::TestCase	(testCtx, name, description)
137 						, m_params		(params)
138 						{}
~MeshApiCase(void)139 	virtual			~MeshApiCase	(void) {}
140 
141 	void			initPrograms	(vk::SourceCollections& programCollection) const override;
142 	void			checkSupport	(Context& context) const override;
143 	TestInstance*	createInstance	(Context& context) const override;
144 
145 protected:
146 	TestParams		m_params;
147 };
148 
149 class MeshApiInstance : public vkt::TestInstance
150 {
151 public:
MeshApiInstance(Context & context,const TestParams & params)152 						MeshApiInstance		(Context& context, const TestParams& params)
153 							: vkt::TestInstance	(context)
154 							, m_params			(params)
155 							{}
~MeshApiInstance(void)156 	virtual				~MeshApiInstance	(void) {}
157 
158 	tcu::TestStatus		iterate				(void) override;
159 
160 protected:
161 	TestParams			m_params;
162 };
163 
createInstance(Context & context) const164 TestInstance* MeshApiCase::createInstance (Context& context) const
165 {
166 	return new MeshApiInstance(context, m_params);
167 }
168 
169 struct PushConstantData
170 {
171 	uint32_t width;
172 	uint32_t height;
173 	uint32_t dimMesh;	// Set work group size in the X, Y or Z dimension depending on value (0, 1, 2).
174 	uint32_t one;
175 	uint32_t dimTask;	// Same as dimMesh.
176 
getRangesvkt::MeshShader::__anon4d5a22460111::PushConstantData177 	std::vector<VkPushConstantRange> getRanges (bool includeTask) const
178 	{
179 		constexpr uint32_t offsetMesh = 0u;
180 		constexpr uint32_t offsetTask = static_cast<uint32_t>(offsetof(PushConstantData, one));
181 		constexpr uint32_t sizeMesh = offsetTask;
182 		constexpr uint32_t sizeTask = static_cast<uint32_t>(sizeof(PushConstantData)) - offsetTask;
183 
184 		const VkPushConstantRange meshRange =
185 		{
186 			VK_SHADER_STAGE_MESH_BIT_EXT,	//	VkShaderStageFlags	stageFlags;
187 			offsetMesh,						//	uint32_t			offset;
188 			sizeMesh,						//	uint32_t			size;
189 		};
190 		const VkPushConstantRange taskRange =
191 		{
192 			VK_SHADER_STAGE_TASK_BIT_EXT,	//	VkShaderStageFlags	stageFlags;
193 			offsetTask,						//	uint32_t			offset;
194 			sizeTask,						//	uint32_t			size;
195 		};
196 
197 		std::vector<VkPushConstantRange> ranges (1u, meshRange);
198 		if (includeTask)
199 			ranges.push_back(taskRange);
200 		return ranges;
201 	}
202 };
203 
initPrograms(vk::SourceCollections & programCollection) const204 void MeshApiCase::initPrograms (vk::SourceCollections& programCollection) const
205 {
206 	const auto buildOptions = getMinMeshEXTBuildOptions(programCollection.usedVulkanVersion);
207 
208 	const std::string taskDataDecl =
209 		"struct TaskData {\n"
210 		"    uint blockNumber;\n"
211 		"    uint blockRow;\n"
212 		"};\n"
213 		"taskPayloadSharedEXT TaskData td;\n"
214 		;
215 
216 	// Task shader if needed.
217 	if (m_params.useTask)
218 	{
219 		std::ostringstream task;
220 		task
221 			<< "#version 460\n"
222 			<< "#extension GL_EXT_mesh_shader : enable\n"
223 			<< "\n"
224 			<< "layout (local_size_x=1) in;\n"
225 			<< "\n"
226 			<< "layout (push_constant, std430) uniform TaskPushConstantBlock {\n"
227 			<< "    layout (offset=12) uint one;\n"
228 			<< "    layout (offset=16) uint dimCoord;\n"
229 			<< "} pc;\n"
230 			<< "\n"
231 			<< taskDataDecl
232 			<< "\n"
233 			<< "void main ()\n"
234 			<< "{\n"
235 			<< "    const uint workGroupID = ((pc.dimCoord == 2) ? gl_WorkGroupID.z : ((pc.dimCoord == 1) ? gl_WorkGroupID.y : gl_WorkGroupID.x));\n"
236 			<< "    td.blockNumber         = uint(gl_DrawID);\n"
237 			<< "    td.blockRow            = workGroupID;\n"
238 			<< "    EmitMeshTasksEXT(pc.one, pc.one, pc.one);"
239 			<< "}\n"
240 			;
241 		programCollection.glslSources.add("task") << glu::TaskSource(task.str()) << buildOptions;
242 	}
243 
244 	// Mesh shader.
245 	{
246 		std::ostringstream mesh;
247 		mesh
248 			<< "#version 460\n"
249 			<< "#extension GL_EXT_mesh_shader : enable\n"
250 			<< "\n"
251 			<< "// 32 local invocations in total.\n"
252 			<< "layout (local_size_x=4, local_size_y=2, local_size_z=4) in;\n"
253 			<< "layout (triangles) out;\n"
254 			<< "layout (max_vertices=96, max_primitives=32) out;\n"
255 			<< "\n"
256 			<< "layout (push_constant, std430) uniform MeshPushConstantBlock {\n"
257 			<< "    uint width;\n"
258 			<< "    uint height;\n"
259 			<< "    uint dimCoord;\n"
260 			<< "} pc;\n"
261 			<< "\n"
262 			<< "layout (location=0) perprimitiveEXT out vec4 primitiveColor[];\n"
263 			<< "\n"
264 			<< (m_params.useTask ? taskDataDecl : "")
265 			<< "\n"
266 			<< "layout (set=0, binding=0, std430) readonly buffer BlockSizes {\n"
267 			<< "    uint blockSize[];\n"
268 			<< "} bsz;\n"
269 			<< "\n"
270 			<< "uint startOfBlock (uint blockNumber)\n"
271 			<< "{\n"
272 			<< "    uint start = 0;\n"
273 			<< "    for (uint i = 0; i < blockNumber; i++)\n"
274 			<< "        start += bsz.blockSize[i];\n"
275 			<< "    return start;\n"
276 			<< "}\n"
277 			<< "\n"
278 			<< "void main ()\n"
279 			<< "{\n"
280 			<< "    const uint workGroupID = ((pc.dimCoord == 2) ? gl_WorkGroupID.z : ((pc.dimCoord == 1) ? gl_WorkGroupID.y : gl_WorkGroupID.x));\n"
281 			<< "    const uint blockNumber = " << (m_params.useTask ? "td.blockNumber" : "uint(gl_DrawID)") << ";\n"
282 			<< "    const uint blockRow = " << (m_params.useTask ? "td.blockRow" : "workGroupID") << ";\n"
283 			<< "\n"
284 			<< "    // Each workgroup will fill one row, and each invocation will generate a\n"
285 			<< "    // triangle around the pixel center in each column.\n"
286 			<< "    const uint row = startOfBlock(blockNumber) + blockRow;\n"
287 			<< "    const uint col = gl_LocalInvocationIndex;\n"
288 			<< "\n"
289 			<< "    const float fHeight = float(pc.height);\n"
290 			<< "    const float fWidth = float(pc.width);\n"
291 			<< "\n"
292 			<< "    // Pixel coordinates, normalized.\n"
293 			<< "    const float rowNorm = (float(row) + 0.5) / fHeight;\n"
294 			<< "    const float colNorm = (float(col) + 0.5) / fWidth;\n"
295 			<< "\n"
296 			<< "    // Framebuffer coordinates.\n"
297 			<< "    const float coordX = (colNorm * 2.0) - 1.0;\n"
298 			<< "    const float coordY = (rowNorm * 2.0) - 1.0;\n"
299 			<< "\n"
300 			<< "    const float pixelWidth = 2.0 / fWidth;\n"
301 			<< "    const float pixelHeight = 2.0 / fHeight;\n"
302 			<< "\n"
303 			<< "    const float offsetX = pixelWidth / 2.0;\n"
304 			<< "    const float offsetY = pixelHeight / 2.0;\n"
305 			<< "\n"
306 			<< "    const uint baseIndex = col*3;\n"
307 			<< "    const uvec3 indices = uvec3(baseIndex, baseIndex + 1, baseIndex + 2);\n"
308 			<< "\n"
309 			<< "    SetMeshOutputsEXT(96u, 32u);\n"
310 			<< "    primitiveColor[col] = vec4(rowNorm, colNorm, 0.0, 1.0);\n"
311 			<< "    gl_PrimitiveTriangleIndicesEXT[col] = uvec3(indices.x, indices.y, indices.z);\n"
312 			<< "\n"
313 			<< "    gl_MeshVerticesEXT[indices.x].gl_Position = vec4(coordX - offsetX, coordY + offsetY, 0.0, 1.0);\n"
314 			<< "    gl_MeshVerticesEXT[indices.y].gl_Position = vec4(coordX + offsetX, coordY + offsetY, 0.0, 1.0);\n"
315 			<< "    gl_MeshVerticesEXT[indices.z].gl_Position = vec4(coordX, coordY - offsetY, 0.0, 1.0);\n"
316 			<< "}\n"
317 			;
318 		programCollection.glslSources.add("mesh") << glu::MeshSource(mesh.str()) << buildOptions;
319 	}
320 
321 	// Frag shader.
322 	{
323 		std::ostringstream frag;
324 		frag
325 			<< "#version 460\n"
326 			<< "#extension GL_EXT_mesh_shader : enable\n"
327 			<< "\n"
328 			<< "layout (location=0) perprimitiveEXT in vec4 primitiveColor;\n"
329 			<< "layout (location=0) out vec4 outColor;\n"
330 			<< "\n"
331 			<< "void main ()\n"
332 			<< "{\n"
333 			<< "    outColor = primitiveColor;\n"
334 			<< "}\n"
335 			;
336 		programCollection.glslSources.add("frag") << glu::FragmentSource(frag.str()) << buildOptions;
337 	}
338 }
339 
checkSupport(Context & context) const340 void MeshApiCase::checkSupport (Context& context) const
341 {
342 	checkTaskMeshShaderSupportEXT(context, m_params.useTask, true);
343 
344 	// VUID-vkCmdDrawMeshTasksIndirectEXT-drawCount-02718
345 	if (m_params.drawType == DrawType::DRAW_INDIRECT && m_params.drawCount > 1u)
346 	{
347 		context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_MULTI_DRAW_INDIRECT);
348 	}
349 
350 	// VUID-vkCmdDrawMeshTasksIndirectCountEXT-None-04445
351 	if (m_params.drawType == DrawType::DRAW_INDIRECT_COUNT)
352 		context.requireDeviceFunctionality("VK_KHR_draw_indirect_count");
353 }
354 
355 template <typename T>
makeStridedBuffer(const DeviceInterface & vkd,VkDevice device,Allocator & alloc,const std::vector<T> & elements,uint32_t offset,uint32_t stride,VkBufferUsageFlags usage,uint32_t endPadding)356 BufferWithMemoryPtr makeStridedBuffer(const DeviceInterface& vkd, VkDevice device, Allocator& alloc, const std::vector<T>& elements, uint32_t offset, uint32_t stride, VkBufferUsageFlags usage, uint32_t endPadding)
357 {
358 	const auto elementSize	= static_cast<uint32_t>(sizeof(T));
359 	const auto actualStride	= std::max(elementSize, stride);
360 	const auto bufferSize	= static_cast<size_t>(offset) + static_cast<size_t>(actualStride) * elements.size() + static_cast<size_t>(endPadding);
361 	const auto bufferInfo	= makeBufferCreateInfo(static_cast<VkDeviceSize>(bufferSize), usage);
362 
363 	BufferWithMemoryPtr buffer(new BufferWithMemory(vkd, device, alloc, bufferInfo, MemoryRequirement::HostVisible));
364 	auto& bufferAlloc	= buffer->getAllocation();
365 	char* bufferDataPtr	= reinterpret_cast<char*>(bufferAlloc.getHostPtr());
366 
367 	char* itr = bufferDataPtr + offset;
368 	for (const auto& elem : elements)
369 	{
370 		deMemcpy(itr, &elem, sizeof(elem));
371 		itr += actualStride;
372 	}
373 	if (endPadding > 0u)
374 		deMemset(itr, 0xFF, endPadding);
375 
376 	flushAlloc(vkd, device, bufferAlloc);
377 
378 	return buffer;
379 }
380 
getExtent()381 VkExtent3D getExtent ()
382 {
383 	return makeExtent3D(32u, 64u, 1u);
384 }
385 
getIndirectCommand(uint32_t blockSize,uint32_t dimCoord)386 VkDrawMeshTasksIndirectCommandEXT getIndirectCommand (uint32_t blockSize, uint32_t dimCoord)
387 {
388 	VkDrawMeshTasksIndirectCommandEXT indirectCmd{1u, 1u, 1u};
389 
390 	switch (dimCoord)
391 	{
392 		case 0u:		indirectCmd.groupCountX = blockSize; break;
393 		case 1u:		indirectCmd.groupCountY = blockSize; break;
394 		case 2u:		indirectCmd.groupCountZ = blockSize; break;
395 		default:
396 			DE_ASSERT(false); break;
397 	}
398 
399 	return indirectCmd;
400 }
401 
iterate(void)402 tcu::TestStatus MeshApiInstance::iterate (void)
403 {
404 	const auto&		vkd			= m_context.getDeviceInterface();
405 	const auto		device		= m_context.getDevice();
406 	auto&			alloc		= m_context.getDefaultAllocator();
407 	const auto		queueIndex	= m_context.getUniversalQueueFamilyIndex();
408 	const auto		queue		= m_context.getUniversalQueue();
409 
410 	const auto		extent		= getExtent();
411 	const auto		iExtent3D	= tcu::IVec3(static_cast<int>(extent.width), static_cast<int>(extent.height), static_cast<int>(extent.depth));
412 	const auto		iExtent2D	= tcu::IVec2(iExtent3D.x(), iExtent3D.y());
413 	const auto		format		= VK_FORMAT_R8G8B8A8_UNORM;
414 	const auto		tcuFormat	= mapVkFormat(format);
415 	const auto		colorUsage	= (VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
416 	const auto		colorSRR	= makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u);
417 	const tcu::Vec4	clearColor	(0.0f, 0.0f, 0.0f, 1.0f);
418 	const float		colorThres	= 0.005f; // 1/255 < 0.005 < 2/255
419 	const tcu::Vec4	threshold	(colorThres, colorThres, 0.0f, 0.0f);
420 
421 	ImageWithMemoryPtr	colorBuffer;
422 	Move<VkImageView>	colorBufferView;
423 	{
424 		const VkImageCreateInfo colorBufferInfo =
425 		{
426 			VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,	//	VkStructureType			sType;
427 			nullptr,								//	const void*				pNext;
428 			0u,										//	VkImageCreateFlags		flags;
429 			VK_IMAGE_TYPE_2D,						//	VkImageType				imageType;
430 			format,									//	VkFormat				format;
431 			extent,									//	VkExtent3D				extent;
432 			1u,										//	uint32_t				mipLevels;
433 			1u,										//	uint32_t				arrayLayers;
434 			VK_SAMPLE_COUNT_1_BIT,					//	VkSampleCountFlagBits	samples;
435 			VK_IMAGE_TILING_OPTIMAL,				//	VkImageTiling			tiling;
436 			colorUsage,								//	VkImageUsageFlags		usage;
437 			VK_SHARING_MODE_EXCLUSIVE,				//	VkSharingMode			sharingMode;
438 			0u,										//	uint32_t				queueFamilyIndexCount;
439 			nullptr,								//	const uint32_t*			pQueueFamilyIndices;
440 			VK_IMAGE_LAYOUT_UNDEFINED,				//	VkImageLayout			initialLayout;
441 		};
442 		colorBuffer = ImageWithMemoryPtr(new ImageWithMemory(vkd, device, alloc, colorBufferInfo, MemoryRequirement::Any));
443 		colorBufferView = makeImageView(vkd, device, colorBuffer->get(), VK_IMAGE_VIEW_TYPE_2D, format, colorSRR);
444 	}
445 
446 	// Prepare buffer containing the array of block sizes.
447 	de::Random				rnd				(m_params.seed);
448 	std::vector<uint32_t>	blockSizes;
449 
450 	const uint32_t			vectorSize		= std::max(1u, m_params.drawCount);
451 	const uint32_t			largeDrawCount	= vectorSize + 1u; // The indirect buffer needs to have some padding at the end. See below.
452 	const uint32_t			evenBlockSize	= extent.height / vectorSize;
453 	uint32_t				remainingRows	= extent.height;
454 
455 	blockSizes.reserve(vectorSize);
456 	for (uint32_t i = 0; i < vectorSize - 1u; ++i)
457 	{
458 		const auto blockSize = static_cast<uint32_t>(rnd.getInt(1, evenBlockSize));
459 		remainingRows -= blockSize;
460 		blockSizes.push_back(blockSize);
461 	}
462 	blockSizes.push_back(remainingRows);
463 
464 	const auto			blockSizesBufferSize	= static_cast<VkDeviceSize>(de::dataSize(blockSizes));
465 	BufferWithMemoryPtr	blockSizesBuffer		= makeStridedBuffer(vkd, device, alloc, blockSizes, 0u, 0u, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, 0u);
466 
467 	// Descriptor set layout, pool and set.
468 	DescriptorSetLayoutBuilder layoutBuilder;
469 	layoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_MESH_BIT_EXT);
470 	const auto setLayout = layoutBuilder.build(vkd, device);
471 
472 	DescriptorPoolBuilder poolBuilder;
473 	poolBuilder.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
474 	const auto descriptorPool = poolBuilder.build(vkd, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
475 
476 	const auto descriptorSet = makeDescriptorSet(vkd, device, descriptorPool.get(), setLayout.get());
477 
478 	// Update descriptor set.
479 	{
480 		DescriptorSetUpdateBuilder updateBuilder;
481 
482 		const auto location				= DescriptorSetUpdateBuilder::Location::binding(0u);
483 		const auto descriptorBufferInfo	= makeDescriptorBufferInfo(blockSizesBuffer->get(), 0ull, blockSizesBufferSize);
484 
485 		updateBuilder.writeSingle(descriptorSet.get(), location, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &descriptorBufferInfo);
486 		updateBuilder.update(vkd, device);
487 	}
488 
489 	// Pipeline layout.
490 	PushConstantData	pcData;
491 	const auto			pcRanges		= pcData.getRanges(m_params.useTask);
492 	const auto			pipelineLayout	= makePipelineLayout(vkd, device, 1u, &setLayout.get(), static_cast<uint32_t>(pcRanges.size()), de::dataOrNull(pcRanges));
493 
494 	// Push constants: choose used dimension coordinate pseudorandomly.
495 	const auto dimCoord = rnd.getUint32() % 3u;
496 
497 	pcData.width	= extent.width;
498 	pcData.height	= extent.height;
499 	pcData.dimMesh	= dimCoord;
500 	pcData.one		= 1u;
501 	pcData.dimTask	= dimCoord;
502 
503 	// Render pass and framebuffer.
504 	const auto renderPass	= makeRenderPass(vkd, device, format);
505 	const auto framebuffer	= makeFramebuffer(vkd, device, renderPass.get(), colorBufferView.get(), extent.width, extent.height);
506 
507 	// Pipeline.
508 	Move<VkShaderModule> taskModule;
509 	Move<VkShaderModule> meshModule;
510 	Move<VkShaderModule> fragModule;
511 
512 	const auto& binaries = m_context.getBinaryCollection();
513 	if (m_params.useTask)
514 		taskModule = createShaderModule(vkd, device, binaries.get("task"));
515 	meshModule = createShaderModule(vkd, device, binaries.get("mesh"));
516 	fragModule = createShaderModule(vkd, device, binaries.get("frag"));
517 
518 	const std::vector<VkViewport>	viewports	(1u, makeViewport(extent));
519 	const std::vector<VkRect2D>		scissors	(1u, makeRect2D(extent));
520 
521 	const auto pipeline = makeGraphicsPipeline(vkd, device, pipelineLayout.get(),
522 		taskModule.get(), meshModule.get(), fragModule.get(),
523 		renderPass.get(), viewports, scissors);
524 
525 	// Command pool and buffer.
526 	const auto subpassContents		= (m_params.useSecondaryCmdBuffer ? VK_SUBPASS_CONTENTS_SECONDARY_COMMAND_BUFFERS : VK_SUBPASS_CONTENTS_INLINE);
527 	const auto cmdPool				= makeCommandPool(vkd, device, queueIndex);
528 	const auto primaryCmdBuffer		= allocateCommandBuffer(vkd, device, cmdPool.get(), VK_COMMAND_BUFFER_LEVEL_PRIMARY);
529 	const auto primary				= primaryCmdBuffer.get();
530 	const auto secondaryCmdBuffer	= (m_params.useSecondaryCmdBuffer ? allocateCommandBuffer(vkd, device, cmdPool.get(), VK_COMMAND_BUFFER_LEVEL_SECONDARY) : Move<VkCommandBuffer>());
531 	const auto secondary			= secondaryCmdBuffer.get();
532 	const auto rpCmdBuffer			= (m_params.useSecondaryCmdBuffer ? secondary : primary); // Holding the contents of the render pass commands.
533 
534 	// Indirect and count buffers if needed.
535 	BufferWithMemoryPtr indirectBuffer;
536 	BufferWithMemoryPtr countBuffer;
537 
538 	if (m_params.drawType != DrawType::DRAW)
539 	{
540 		// Indirect draws.
541 		DE_ASSERT(static_cast<bool>(m_params.indirectArgs));
542 		const auto& indirectArgs = m_params.indirectArgs.get();
543 
544 		// Check stride and offset validity.
545 		DE_ASSERT(indirectArgs.offset % 4u == 0u);
546 		DE_ASSERT(indirectArgs.stride % 4u == 0u && (indirectArgs.stride == 0u || indirectArgs.stride >= static_cast<uint32_t>(sizeof(VkDrawMeshTasksIndirectCommandEXT))));
547 
548 		// Prepare struct vector, which will be converted to a buffer with the proper stride and offset later.
549 		std::vector<VkDrawMeshTasksIndirectCommandEXT> commands;
550 		commands.reserve(blockSizes.size());
551 
552 		std::transform(begin(blockSizes), end(blockSizes), std::back_inserter(commands),
553 			[dimCoord](uint32_t blockSize) { return getIndirectCommand(blockSize, dimCoord); });
554 
555 		const auto padding	= static_cast<uint32_t>(sizeof(VkDrawMeshTasksIndirectCommandEXT));
556 		indirectBuffer		= makeStridedBuffer(vkd, device, alloc, commands, indirectArgs.offset, indirectArgs.stride, VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT, padding);
557 
558 		// Prepare count buffer if needed.
559 		if (m_params.drawType == DrawType::DRAW_INDIRECT_COUNT)
560 		{
561 			DE_ASSERT(static_cast<bool>(m_params.indirectCountLimit));
562 			DE_ASSERT(static_cast<bool>(m_params.indirectCountOffset));
563 
564 			const auto countBufferValue	= ((m_params.indirectCountLimit.get() == IndirectCountLimitType::BUFFER_VALUE)
565 										? m_params.drawCount
566 										: largeDrawCount);
567 
568 			const std::vector<uint32_t> singleCount (1u, countBufferValue);
569 			countBuffer = makeStridedBuffer(vkd, device, alloc, singleCount, m_params.indirectCountOffset.get(), static_cast<uint32_t>(sizeof(uint32_t)), VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT, 0u);
570 		}
571 	}
572 
573 	// Submit commands.
574 	beginCommandBuffer(vkd, primary);
575 	beginRenderPass(vkd, primary, renderPass.get(), framebuffer.get(), scissors.at(0), clearColor, subpassContents);
576 
577 	if (m_params.useSecondaryCmdBuffer)
578 	{
579 		const VkCommandBufferInheritanceInfo inheritanceInfo =
580 		{
581 			VK_STRUCTURE_TYPE_COMMAND_BUFFER_INHERITANCE_INFO,	// VkStructureType                  sType;
582 			nullptr,											// const void*                      pNext;
583 			renderPass.get(),									// VkRenderPass                     renderPass;
584 			0u,													// deUint32                         subpass;
585 			framebuffer.get(),									// VkFramebuffer                    framebuffer;
586 			VK_FALSE,											// VkBool32                         occlusionQueryEnable;
587 			0u,													// VkQueryControlFlags              queryFlags;
588 			0u,													// VkQueryPipelineStatisticFlags    pipelineStatistics;
589 		};
590 
591 		const VkCommandBufferUsageFlags	cmdBufferFlags	= (VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT | VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT);
592 		const VkCommandBufferBeginInfo	beginInfo		=
593 		{
594 			VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,	//	VkStructureType							sType;
595 			nullptr,										//	const void*								pNext;
596 			cmdBufferFlags,									//	VkCommandBufferUsageFlags				flags;
597 			&inheritanceInfo,								//	const VkCommandBufferInheritanceInfo*	pInheritanceInfo;
598 		};
599 
600 		vkd.beginCommandBuffer(secondary, &beginInfo);
601 	}
602 
603 	vkd.cmdBindDescriptorSets(rpCmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipelineLayout.get(), 0u, 1u, &descriptorSet.get(), 0u, nullptr);
604 	{
605 		const char* pcDataPtr = reinterpret_cast<const char*>(&pcData);
606 		for (const auto& range : pcRanges)
607 			vkd.cmdPushConstants(rpCmdBuffer, pipelineLayout.get(), range.stageFlags, range.offset, range.size, pcDataPtr + range.offset);
608 	}
609 	vkd.cmdBindPipeline(rpCmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline.get());
610 
611 	if (m_params.drawType == DrawType::DRAW)
612 	{
613 		const auto drawArgs = getIndirectCommand(m_params.drawCount, dimCoord);
614 		vkd.cmdDrawMeshTasksEXT(rpCmdBuffer, drawArgs.groupCountX, drawArgs.groupCountY, drawArgs.groupCountZ);
615 	}
616 	else if (m_params.drawType == DrawType::DRAW_INDIRECT)
617 	{
618 		const auto& indirectArgs = m_params.indirectArgs.get();
619 		vkd.cmdDrawMeshTasksIndirectEXT(rpCmdBuffer, indirectBuffer->get(), indirectArgs.offset, m_params.drawCount, indirectArgs.stride);
620 	}
621 	else if (m_params.drawType == DrawType::DRAW_INDIRECT_COUNT)
622 	{
623 		const auto& indirectArgs		= m_params.indirectArgs.get();
624 		const auto& indirectCountOffset	= m_params.indirectCountOffset.get();
625 		const auto& indirectCountLimit	= m_params.indirectCountLimit.get();
626 
627 		const auto maxCount	= ((indirectCountLimit == IndirectCountLimitType::MAX_COUNT)
628 							? m_params.drawCount
629 							: largeDrawCount);
630 		vkd.cmdDrawMeshTasksIndirectCountEXT(rpCmdBuffer, indirectBuffer->get(), indirectArgs.offset, countBuffer->get(), indirectCountOffset, maxCount, indirectArgs.stride);
631 	}
632 	else
633 		DE_ASSERT(false);
634 
635 	if (m_params.useSecondaryCmdBuffer)
636 	{
637 		endCommandBuffer(vkd, secondary);
638 		vkd.cmdExecuteCommands(primary, 1u, &secondary);
639 	}
640 
641 	endRenderPass(vkd, primary);
642 
643 	// Output buffer to extract the color buffer.
644 	BufferWithMemoryPtr	outBuffer;
645 	void*				outBufferData = nullptr;
646 	{
647 		const auto	outBufferSize	= static_cast<VkDeviceSize>(static_cast<uint32_t>(tcu::getPixelSize(tcuFormat)) * extent.width * extent.height);
648 		const auto	outBufferUsage	= VK_BUFFER_USAGE_TRANSFER_DST_BIT;
649 		const auto	outBufferInfo	= makeBufferCreateInfo(outBufferSize, outBufferUsage);
650 
651 		outBuffer					= BufferWithMemoryPtr(new BufferWithMemory(vkd, device, alloc, outBufferInfo, MemoryRequirement::HostVisible));
652 		outBufferData				= outBuffer->getAllocation().getHostPtr();
653 	}
654 
655 	copyImageToBuffer(vkd, primary, colorBuffer->get(), outBuffer->get(), iExtent2D);
656 	endCommandBuffer(vkd, primary);
657 
658 	submitCommandsAndWait(vkd, device, queue, primary);
659 
660 	// Generate reference image and compare.
661 	{
662 		auto&						log				= m_context.getTestContext().getLog();
663 		auto&						outBufferAlloc	= outBuffer->getAllocation();
664 		tcu::ConstPixelBufferAccess	result			(tcuFormat, iExtent3D, outBufferData);
665 		tcu::TextureLevel			referenceLevel	(tcuFormat, iExtent3D.x(), iExtent3D.y());
666 		const auto					reference		= referenceLevel.getAccess();
667 		const auto					setName			= de::toString(m_params.drawType) + "_draw_count_" + de::toString(m_params.drawCount) + (m_params.useTask ? "_with_task" : "_no_task");
668 		const auto					fHeight			= static_cast<float>(extent.height);
669 		const auto					fWidth			= static_cast<float>(extent.width);
670 
671 		invalidateAlloc(vkd, device, outBufferAlloc);
672 
673 		for (int y = 0; y < iExtent3D.y(); ++y)
674 			for (int x = 0; x < iExtent3D.x(); ++x)
675 			{
676 				const tcu::Vec4 refColor	= ((m_params.drawCount == 0u || (m_params.drawType == DrawType::DRAW && y >= static_cast<int>(m_params.drawCount)))
677 											? clearColor
678 											: tcu::Vec4(
679 												// These match the per-primitive color set by the mesh shader.
680 												(static_cast<float>(y) + 0.5f) / fHeight,
681 												(static_cast<float>(x) + 0.5f) / fWidth,
682 												0.0f,
683 												1.0f));
684 				reference.setPixel(refColor, x, y);
685 			}
686 
687 		if (!tcu::floatThresholdCompare(log, setName.c_str(), "", reference, result, threshold, tcu::COMPARE_LOG_ON_ERROR))
688 			return tcu::TestStatus::fail("Image comparison failed; check log for details");
689 	}
690 
691 	return tcu::TestStatus::pass("Pass");
692 }
693 
694 } // anonymous
695 
createMeshShaderApiTestsEXT(tcu::TestContext & testCtx)696 tcu::TestCaseGroup* createMeshShaderApiTestsEXT (tcu::TestContext& testCtx)
697 {
698 	GroupPtr mainGroup (new tcu::TestCaseGroup(testCtx, "api", "Mesh Shader API tests"));
699 
700 	const DrawType drawCases[] =
701 	{
702 		DrawType::DRAW,
703 		DrawType::DRAW_INDIRECT,
704 		DrawType::DRAW_INDIRECT_COUNT,
705 	};
706 
707 	const auto		extent				= getExtent();
708 	const uint32_t	drawCountCases[]	= { 0u, 1u, 2u, extent.height / 2u, extent.height };
709 
710 	const uint32_t normalStride	= static_cast<uint32_t>(sizeof(VkDrawMeshTasksIndirectCommandEXT));
711 	const uint32_t largeStride	= 2u * normalStride + 4u;
712 	const uint32_t altOffset	= 20u;
713 
714 	const struct
715 	{
716 		tcu::Maybe<IndirectArgs>	indirectArgs;
717 		const char*					name;
718 	} indirectArgsCases[] =
719 	{
720 		{ tcu::nothing<IndirectArgs>(),							"no_indirect_args"			},
721 
722 		// Offset 0, varying strides.
723 		{ tcu::just(IndirectArgs{ 0u, 0u }),					"offset_0_stride_0"			},
724 		{ tcu::just(IndirectArgs{ 0u, normalStride }),			"offset_0_stride_normal"	},
725 		{ tcu::just(IndirectArgs{ 0u, largeStride }),			"offset_0_stride_large"		},
726 
727 		// Nonzero offset, varying strides.
728 		{ tcu::just(IndirectArgs{ altOffset, 0u }),				"offset_alt_stride_0"		},
729 		{ tcu::just(IndirectArgs{ altOffset, normalStride }),	"offset_alt_stride_normal"	},
730 		{ tcu::just(IndirectArgs{ altOffset, largeStride }),	"offset_alt_stride_large"	},
731 	};
732 
733 	const struct
734 	{
735 		tcu::Maybe<IndirectCountLimitType>	limitType;
736 		const char*							name;
737 	} countLimitCases[] =
738 	{
739 		{ tcu::nothing<IndirectCountLimitType>(),			"no_count_limit"		},
740 		{ tcu::just(IndirectCountLimitType::BUFFER_VALUE),	"count_limit_buffer"	},
741 		{ tcu::just(IndirectCountLimitType::MAX_COUNT),		"count_limit_max_count"	},
742 	};
743 
744 	const struct
745 	{
746 		tcu::Maybe<uint32_t>	countOffset;
747 		const char*				name;
748 	} countOffsetCases[] =
749 	{
750 		{ tcu::nothing<uint32_t>(),	"no_count_offset"	},
751 		{ tcu::just(uint32_t{0u}),	"count_offset_0"	},
752 		{ tcu::just(altOffset),		"count_offset_alt"	},
753 	};
754 
755 	const struct
756 	{
757 		bool		useTask;
758 		const char*	name;
759 	} taskCases[] =
760 	{
761 		{ false,	"no_task_shader"	},
762 		{ true,		"with_task_shader"	},
763 	};
764 
765 	const struct
766 	{
767 		bool		secondaryCmd;
768 		const char*	suffix;
769 	} cmdBufferCases[] =
770 	{
771 		{ false,	""					},
772 		{ true,		"_secondary_cmd"	},
773 	};
774 
775 	uint32_t seed = 1628678795u;
776 
777 	for (const auto& drawCase : drawCases)
778 	{
779 		const auto drawCaseName			= de::toString(drawCase);
780 		const bool isIndirect			= (drawCase != DrawType::DRAW);
781 		const bool isIndirectNoCount	= (drawCase == DrawType::DRAW_INDIRECT);
782 		const bool isIndirectCount		= (drawCase == DrawType::DRAW_INDIRECT_COUNT);
783 
784 		GroupPtr drawGroup(new tcu::TestCaseGroup(testCtx, drawCaseName.c_str(), ""));
785 
786 		for (const auto& drawCountCase : drawCountCases)
787 		{
788 			const auto drawCountName = "draw_count_" + de::toString(drawCountCase);
789 			GroupPtr drawCountGroup(new tcu::TestCaseGroup(testCtx, drawCountName.c_str(), ""));
790 
791 			for (const auto& indirectArgsCase : indirectArgsCases)
792 			{
793 				const bool hasIndirectArgs	= static_cast<bool>(indirectArgsCase.indirectArgs);
794 				const bool strideZero		= (hasIndirectArgs && indirectArgsCase.indirectArgs.get().stride == 0u);
795 
796 				if (isIndirect != hasIndirectArgs)
797 					continue;
798 
799 				if (((isIndirectNoCount && drawCountCase > 1u) || isIndirectCount) && strideZero)
800 					continue;
801 
802 				GroupPtr indirectArgsGroup(new tcu::TestCaseGroup(testCtx, indirectArgsCase.name, ""));
803 
804 				for (const auto& countLimitCase : countLimitCases)
805 				{
806 					const bool hasCountLimit = static_cast<bool>(countLimitCase.limitType);
807 
808 					if (isIndirectCount != hasCountLimit)
809 						continue;
810 
811 					GroupPtr countLimitGroup(new tcu::TestCaseGroup(testCtx, countLimitCase.name, ""));
812 
813 					for (const auto& countOffsetCase : countOffsetCases)
814 					{
815 						const bool hasCountOffsetType = static_cast<bool>(countOffsetCase.countOffset);
816 
817 						if (isIndirectCount != hasCountOffsetType)
818 							continue;
819 
820 						GroupPtr countOffsetGroup(new tcu::TestCaseGroup(testCtx, countOffsetCase.name, ""));
821 
822 						for (const auto& taskCase : taskCases)
823 						{
824 							for (const auto& cmdBufferCase : cmdBufferCases)
825 							{
826 								const auto			testName	= std::string(taskCase.name) + cmdBufferCase.suffix;
827 								const TestParams	params		=
828 								{
829 									drawCase,						//	DrawType							drawType;
830 									seed++,							//	uint32_t							seed;
831 									drawCountCase,					//	uint32_t							drawCount;
832 									indirectArgsCase.indirectArgs,	//	tcu::Maybe<IndirectArgs>			indirectArgs;
833 									countLimitCase.limitType,		//	tcu::Maybe<IndirectCountLimitType>	indirectCountLimit;
834 									countOffsetCase.countOffset,	//	tcu::Maybe<uint32_t>				indirectCountOffset;
835 									taskCase.useTask,				//	bool								useTask;
836 									cmdBufferCase.secondaryCmd,		//	bool								useSecondaryCmdBuffer;
837 								};
838 
839 								countOffsetGroup->addChild(new MeshApiCase(testCtx, testName, "", params));
840 							}
841 						}
842 
843 						countLimitGroup->addChild(countOffsetGroup.release());
844 					}
845 
846 					indirectArgsGroup->addChild(countLimitGroup.release());
847 				}
848 
849 				drawCountGroup->addChild(indirectArgsGroup.release());
850 			}
851 
852 			drawGroup->addChild(drawCountGroup.release());
853 		}
854 
855 		mainGroup->addChild(drawGroup.release());
856 	}
857 
858 	return mainGroup.release();
859 }
860 
861 } // MeshShader
862 } // vkt
863