• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2022 The Khronos Group Inc.
6  * Copyright (c) 2022 Valve Corporation.
7  *
8  * Licensed under the Apache License, Version 2.0 (the "License");
9  * you may not use this file except in compliance with the License.
10  * You may obtain a copy of the License at
11  *
12  *      http://www.apache.org/licenses/LICENSE-2.0
13  *
14  * Unless required by applicable law or agreed to in writing, software
15  * distributed under the License is distributed on an "AS IS" BASIS,
16  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17  * See the License for the specific language governing permissions and
18  * limitations under the License.
19  *
20  *//*!
21  * \file
22  * \brief Mesh Shader Property Tests for VK_EXT_mesh_shader
23  *//*--------------------------------------------------------------------*/
24 
25 #include "vktMeshShaderPropertyTestsEXT.hpp"
26 #include "vktTestCase.hpp"
27 #include "vktMeshShaderUtil.hpp"
28 
29 #include "vkBufferWithMemory.hpp"
30 #include "vkBuilderUtil.hpp"
31 #include "vkObjUtil.hpp"
32 #include "vkTypeUtil.hpp"
33 #include "vkBarrierUtil.hpp"
34 #include "vkCmdUtil.hpp"
35 #include "vkImageWithMemory.hpp"
36 #include "vkImageUtil.hpp"
37 
38 #include "tcuTestLog.hpp"
39 #include "tcuImageCompare.hpp"
40 #include "tcuTextureUtil.hpp"
41 
42 #include "deUniquePtr.hpp"
43 
44 #include <algorithm>
45 #include <sstream>
46 #include <limits>
47 
48 namespace vkt
49 {
50 namespace MeshShader
51 {
52 
53 using namespace vk;
54 
55 namespace
56 {
57 
58 enum class PayLoadShMemSizeType
59 {
60 	PAYLOAD = 0,
61 	SHARED_MEMORY,
62 	BOTH,
63 };
64 
65 struct PayloadShMemSizeParams
66 {
67 	PayLoadShMemSizeType testType;
68 
hasPayloadvkt::MeshShader::__anon7cffbaf10111::PayloadShMemSizeParams69 	bool hasPayload			(void) const { return testType != PayLoadShMemSizeType::SHARED_MEMORY;	}
hasSharedMemoryvkt::MeshShader::__anon7cffbaf10111::PayloadShMemSizeParams70 	bool hasSharedMemory	(void) const { return testType != PayLoadShMemSizeType::PAYLOAD;		}
71 };
72 
73 using TaskPayloadShMemSizeParams	= PayloadShMemSizeParams;
74 using MeshPayloadShMemSizeParams	= PayloadShMemSizeParams;
75 using SpecConstVector				= std::vector<uint32_t>;
76 
77 class TaskPayloadShMemSizeCase : public vkt::TestCase
78 {
79 public:
TaskPayloadShMemSizeCase(tcu::TestContext & testCtx,const std::string & name,const std::string & description,const TaskPayloadShMemSizeParams & params)80 					TaskPayloadShMemSizeCase	(tcu::TestContext& testCtx, const std::string& name, const std::string& description, const TaskPayloadShMemSizeParams& params)
81 						: vkt::TestCase				(testCtx, name, description)
82 						, m_params					(params)
83 						{}
~TaskPayloadShMemSizeCase(void)84 	virtual			~TaskPayloadShMemSizeCase	(void) {}
85 
86 	void			checkSupport				(Context& context) const override;
87 	void			initPrograms				(vk::SourceCollections& programCollection) const override;
88 	TestInstance*	createInstance				(Context& context) const override;
89 
90 protected:
91 	// These depend on the context because we need the mesh shading properties to calculate them.
92 	struct ParamsFromContext
93 	{
94 		uint32_t payloadElements;
95 		uint32_t sharedMemoryElements;
96 	};
97 
98 	ParamsFromContext getParamsFromContext (Context& context) const;
99 
100 	const TaskPayloadShMemSizeParams m_params;
101 
102 	static constexpr uint32_t kElementSize		= static_cast<uint32_t>(sizeof(uint32_t));
103 	static constexpr uint32_t kLocalInvocations	= 128u;
104 };
105 
106 class SpecConstantInstance : public vkt::TestInstance
107 {
108 public:
SpecConstantInstance(Context & context,SpecConstVector && vec)109 											SpecConstantInstance	(Context& context, SpecConstVector&& vec)
110 												: vkt::TestInstance	(context)
111 												, m_specConstants	(std::move(vec))
112 												{}
~SpecConstantInstance(void)113 	virtual									~SpecConstantInstance	(void) {}
114 
115 protected:
116 	std::vector<VkSpecializationMapEntry>	makeSpecializationMap	(void) const;
117 	const SpecConstVector					m_specConstants;
118 };
119 
makeSpecializationMap(void) const120 std::vector<VkSpecializationMapEntry> SpecConstantInstance::makeSpecializationMap (void) const
121 {
122 	std::vector<VkSpecializationMapEntry> entryMap;
123 	entryMap.reserve(m_specConstants.size());
124 
125 	const auto constantSize	= sizeof(uint32_t);
126 	const auto csU32		= static_cast<uint32_t>(constantSize);
127 
128 	for (size_t i = 0u; i < m_specConstants.size(); ++i)
129 	{
130 		const auto id = static_cast<uint32_t>(i);
131 
132 		const VkSpecializationMapEntry entry =
133 		{
134 			id,				//	uint32_t	constantID;
135 			(csU32 * id),	//	uint32_t	offset;
136 			constantSize,	//	size_t		size;
137 		};
138 		entryMap.push_back(entry);
139 	}
140 
141 	return entryMap;
142 }
143 
144 class PayloadShMemSizeInstance : public SpecConstantInstance
145 {
146 public:
PayloadShMemSizeInstance(Context & context,const TaskPayloadShMemSizeParams & params,SpecConstVector && vec)147 						PayloadShMemSizeInstance	(Context& context, const TaskPayloadShMemSizeParams& params, SpecConstVector&& vec)
148 							: SpecConstantInstance	(context, std::move(vec))
149 							, m_params				(params)
150 							{}
~PayloadShMemSizeInstance(void)151 	virtual				~PayloadShMemSizeInstance	(void) {}
152 
153 	tcu::TestStatus		iterate						(void) override;
154 
155 protected:
156 	Move<VkRenderPass>						makeCustomRenderPass	(const DeviceInterface& vkd, VkDevice device);
157 	const TaskPayloadShMemSizeParams		m_params;
158 };
159 
checkSupport(Context & context) const160 void TaskPayloadShMemSizeCase::checkSupport (Context& context) const
161 {
162 	checkTaskMeshShaderSupportEXT(context, true/*requireTask*/, true/*requireMesh*/);
163 	context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_VERTEX_PIPELINE_STORES_AND_ATOMICS);
164 
165 	const auto&	meshProperties	= context.getMeshShaderPropertiesEXT();
166 	const auto	minSize			= kLocalInvocations * kElementSize;
167 
168 	// Note: the min required values for these properties in the spec would pass these checks.
169 
170 	if (meshProperties.maxTaskPayloadSize < minSize)
171 		TCU_FAIL("Invalid maxTaskPayloadSize");
172 
173 	if (meshProperties.maxTaskSharedMemorySize < minSize)
174 		TCU_FAIL("Invalid maxTaskSharedMemorySize");
175 
176 	if (meshProperties.maxTaskPayloadAndSharedMemorySize < minSize)
177 		TCU_FAIL("Invalid maxTaskPayloadAndSharedMemorySize");
178 
179 	if (meshProperties.maxMeshPayloadAndSharedMemorySize < minSize)
180 		TCU_FAIL("Invalid maxMeshPayloadAndSharedMemorySize");
181 }
182 
getParamsFromContext(Context & context) const183 TaskPayloadShMemSizeCase::ParamsFromContext TaskPayloadShMemSizeCase::getParamsFromContext (Context& context) const
184 {
185 	ParamsFromContext params;
186 
187 	const auto&	meshProperties		= context.getMeshShaderPropertiesEXT();
188 	const auto	maxMeshPayloadSize	= std::min(meshProperties.maxMeshPayloadAndOutputMemorySize, meshProperties.maxMeshPayloadAndSharedMemorySize);
189 	const auto	maxPayloadElements	= std::min(meshProperties.maxTaskPayloadSize / kElementSize, maxMeshPayloadSize / kElementSize);
190 	const auto	maxShMemElements	= meshProperties.maxTaskSharedMemorySize / kElementSize;
191 	const auto	maxTotalElements	= meshProperties.maxTaskPayloadAndSharedMemorySize / kElementSize;
192 
193 	if (m_params.testType == PayLoadShMemSizeType::PAYLOAD)
194 	{
195 		params.sharedMemoryElements	= 0u;
196 		params.payloadElements		= std::min(maxTotalElements, maxPayloadElements);
197 	}
198 	else if (m_params.testType == PayLoadShMemSizeType::SHARED_MEMORY)
199 	{
200 		params.payloadElements		= 0u;
201 		params.sharedMemoryElements	= std::min(maxTotalElements, maxShMemElements);
202 	}
203 	else
204 	{
205 		uint32_t*	minPtr;
206 		uint32_t	minVal;
207 		uint32_t*	maxPtr;
208 		uint32_t	maxVal;
209 
210 		// Divide them as evenly as possible getting them as closest as possible to maxTotalElements.
211 		if (maxPayloadElements < maxShMemElements)
212 		{
213 			minPtr = &params.payloadElements;
214 			minVal = maxPayloadElements;
215 
216 			maxPtr = &params.sharedMemoryElements;
217 			maxVal = maxShMemElements;
218 		}
219 		else
220 		{
221 			minPtr = &params.sharedMemoryElements;
222 			minVal = maxShMemElements;
223 
224 			maxPtr = &params.payloadElements;
225 			maxVal = maxPayloadElements;
226 		}
227 
228 		*minPtr = std::min(minVal, maxTotalElements / 2u);
229 		*maxPtr = std::min(maxTotalElements - (*minPtr), maxVal);
230 	}
231 
232 	return params;
233 }
234 
createInstance(Context & context) const235 TestInstance* TaskPayloadShMemSizeCase::createInstance (Context &context) const
236 {
237 	const auto		ctxParams		= getParamsFromContext(context);
238 	SpecConstVector	specConstVec	{ ctxParams.payloadElements, ctxParams.sharedMemoryElements };
239 
240 	return new PayloadShMemSizeInstance(context, m_params, std::move(specConstVec));
241 }
242 
initPrograms(vk::SourceCollections & programCollection) const243 void TaskPayloadShMemSizeCase::initPrograms (vk::SourceCollections& programCollection) const
244 {
245 	const auto buildOptions = getMinMeshEXTBuildOptions(programCollection.usedVulkanVersion);
246 
247 	const std::string scDecl =
248 		"layout (constant_id=0) const uint payloadElements = 1u;\n"
249 		"layout (constant_id=1) const uint sharedMemoryElements = 1u;\n"
250 		;
251 
252 	const std::string dsDecl =
253 		"layout (set=0, binding=0, std430) buffer ResultBlock {\n"
254 		"    uint sharedOK;\n"
255 		"    uint payloadOK;\n"
256 		"} result;\n"
257 		;
258 
259 	std::string taskData;
260 	std::string taskPayloadBody;
261 	std::string meshPayloadBody;
262 
263 	if (m_params.hasPayload())
264 	{
265 		std::ostringstream taskDataStream;
266 		taskDataStream
267 			<< "struct TaskData {\n"
268 			<< "    uint elements[payloadElements];\n"
269 			<< "};\n"
270 			<< "taskPayloadSharedEXT TaskData td;\n"
271 			;
272 		taskData = taskDataStream.str();
273 
274 		std::ostringstream taskBodyStream;
275 		taskBodyStream
276 			<< "    const uint payloadElementsPerInvocation = uint(ceil(float(payloadElements) / float(" << kLocalInvocations << ")));\n"
277 			<< "    for (uint i = 0u; i < payloadElementsPerInvocation; ++i) {\n"
278 			<< "        const uint elemIdx = payloadElementsPerInvocation * gl_LocalInvocationIndex + i;\n"
279 			<< "        if (elemIdx < payloadElements) {\n"
280 			<< "            td.elements[elemIdx] = elemIdx + 2000u;\n"
281 			<< "        }\n"
282 			<< "    }\n"
283 			<< "\n"
284 			;
285 		taskPayloadBody = taskBodyStream.str();
286 
287 		std::ostringstream meshBodyStream;
288 		meshBodyStream
289 			<< "    bool allOK = true;\n"
290 			<< "    for (uint i = 0u; i < payloadElements; ++i) {\n"
291 			<< "        if (td.elements[i] != i + 2000u) {\n"
292 			<< "            allOK = false;\n"
293 			<< "            break;\n"
294 			<< "        }\n"
295 			<< "    }\n"
296 			<< "    result.payloadOK = (allOK ? 1u : 0u);\n"
297 			<< "\n"
298 			;
299 		meshPayloadBody = meshBodyStream.str();
300 	}
301 	else
302 	{
303 		meshPayloadBody = "    result.payloadOK = 1u;\n";
304 	}
305 
306 	std::string sharedData;
307 	std::string taskSharedDataBody;
308 
309 	if (m_params.hasSharedMemory())
310 	{
311 		sharedData = "shared uint sharedElements[sharedMemoryElements];\n";
312 
313 		std::ostringstream bodyStream;
314 		bodyStream
315 			<< "    const uint shMemElementsPerInvocation = uint(ceil(float(sharedMemoryElements) / float(" << kLocalInvocations << ")));\n"
316 			<< "    for (uint i = 0u; i < shMemElementsPerInvocation; ++i) {\n"
317 			<< "        const uint elemIdx = shMemElementsPerInvocation * gl_LocalInvocationIndex + i;\n"
318 			<< "        if (elemIdx < sharedMemoryElements) {\n"
319 			<< "            sharedElements[elemIdx] = elemIdx * 2u + 1000u;\n" // Write
320 			<< "        }\n"
321 			<< "    }\n"
322 			<< "    memoryBarrierShared();\n"
323 			<< "    barrier();\n"
324 			<< "    for (uint i = 0u; i < shMemElementsPerInvocation; ++i) {\n"
325 			<< "        const uint elemIdx = shMemElementsPerInvocation * gl_LocalInvocationIndex + i;\n"
326 			<< "        if (elemIdx < sharedMemoryElements) {\n"
327 			<< "            const uint accessIdx = sharedMemoryElements - 1u - elemIdx;\n"
328 			<< "            sharedElements[accessIdx] += accessIdx;\n" // Read+Write a different element.
329 			<< "        }\n"
330 			<< "    }\n"
331 			<< "    memoryBarrierShared();\n"
332 			<< "    barrier();\n"
333 			<< "    if (gl_LocalInvocationIndex == 0u) {\n"
334 			<< "        bool allOK = true;\n"
335 			<< "        for (uint i = 0u; i < sharedMemoryElements; ++i) {\n"
336 			<< "            if (sharedElements[i] != i*3u + 1000u) {\n"
337 			<< "                allOK = false;\n"
338 			<< "                break;\n"
339 			<< "            }\n"
340 			<< "        }\n"
341 			<< "        result.sharedOK = (allOK ? 1u : 0u);\n"
342 			<< "    }\n"
343 			<< "\n"
344 			;
345 		taskSharedDataBody = bodyStream.str();
346 	}
347 	else
348 	{
349 		taskSharedDataBody =
350 			"    if (gl_LocalInvocationIndex == 0u) {\n"
351 			"        result.sharedOK = 1u;\n"
352 			"    }\n"
353 			;
354 	}
355 
356 	std::ostringstream task;
357 	task
358 		<< "#version 450\n"
359 		<< "#extension GL_EXT_mesh_shader : enable\n"
360 		<< "\n"
361 		<< "layout (local_size_x=" << kLocalInvocations << ", local_size_y=1, local_size_z=1) in;\n"
362 		<< scDecl
363 		<< dsDecl
364 		<< taskData
365 		<< sharedData
366 		<< "\n"
367 		<< "void main () {\n"
368 		<< taskSharedDataBody
369 		<< taskPayloadBody
370 		<< "    EmitMeshTasksEXT(1u, 1u, 1u);\n"
371 		<< "}\n"
372 		;
373 	programCollection.glslSources.add("task") << glu::TaskSource(task.str()) << buildOptions;
374 
375 	std::ostringstream mesh;
376 	mesh
377 		<< "#version 450\n"
378 		<< "#extension GL_EXT_mesh_shader : enable\n"
379 		<< "\n"
380 		<< "layout (local_size_x=1, local_size_y=1, local_size_z=1) in;\n"
381 		<< "layout (triangles) out;\n"
382 		<< "layout (max_vertices=3, max_primitives=1) out;\n"
383 		<< scDecl
384 		<< dsDecl
385 		<< taskData
386 		<< "\n"
387 		<< "void main () {\n"
388 		<< meshPayloadBody
389 		<< "    SetMeshOutputsEXT(0u, 0u);\n"
390 		<< "}\n"
391 		;
392 	programCollection.glslSources.add("mesh") << glu::MeshSource(mesh.str()) << buildOptions;
393 }
394 
makeCustomRenderPass(const DeviceInterface & vkd,VkDevice device)395 Move<VkRenderPass> PayloadShMemSizeInstance::makeCustomRenderPass (const DeviceInterface& vkd, VkDevice device)
396 {
397 	const auto subpassDesc	= makeSubpassDescription(0u, VK_PIPELINE_BIND_POINT_GRAPHICS, 0u, nullptr, 0u, nullptr, 0u, nullptr, 0u, nullptr);
398 	const auto dependency	= makeSubpassDependency(0u, 0u, VK_PIPELINE_STAGE_TASK_SHADER_BIT_EXT, VK_PIPELINE_STAGE_MESH_SHADER_BIT_EXT, VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_SHADER_WRITE_BIT, 0u);
399 
400 	const VkRenderPassCreateInfo renderPassCreateInfo =
401 	{
402 		VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,	//	VkStructureType					sType;
403 		nullptr,									//	const void*						pNext;
404 		0u,											//	VkRenderPassCreateFlags			flags;
405 		0u,											//	uint32_t						attachmentCount;
406 		nullptr,									//	const VkAttachmentDescription*	pAttachments;
407 		1u,											//	uint32_t						subpassCount;
408 		&subpassDesc,								//	const VkSubpassDescription*		pSubpasses;
409 		1u,											//	uint32_t						dependencyCount;
410 		&dependency,								//	const VkSubpassDependency*		pDependencies;
411 	};
412 
413 	return createRenderPass(vkd, device, &renderPassCreateInfo);
414 }
415 
iterate(void)416 tcu::TestStatus PayloadShMemSizeInstance::iterate (void)
417 {
418 	const auto&		vkd						= m_context.getDeviceInterface();
419 	const auto		device					= m_context.getDevice();
420 	auto&			alloc					= m_context.getDefaultAllocator();
421 	const auto		queueIndex				= m_context.getUniversalQueueFamilyIndex();
422 	const auto		queue					= m_context.getUniversalQueue();
423 	const auto		framebufferExtent		= makeExtent2D(1u, 1u);
424 	const auto		pipelineBindPoint		= VK_PIPELINE_BIND_POINT_GRAPHICS;
425 
426 	const auto			resultsBufferSize		= static_cast<VkDeviceSize>(sizeof(uint32_t) * 2u);
427 	const auto			resultsBufferDescType	= VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
428 	const auto			resultsBufferUsage		= VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
429 	const auto			resultsBufferStages		= (VK_SHADER_STAGE_TASK_BIT_EXT | VK_SHADER_STAGE_MESH_BIT_EXT);
430 	const auto			resultsBufferCreateInfo	= makeBufferCreateInfo(resultsBufferSize, resultsBufferUsage);
431 	BufferWithMemory	resultsBuffer			(vkd, device, alloc, resultsBufferCreateInfo, MemoryRequirement::HostVisible);
432 	auto&				resultsBufferAlloc		= resultsBuffer.getAllocation();
433 	void*				resultsBufferDataPtr	= resultsBufferAlloc.getHostPtr();
434 
435 	deMemset(resultsBufferDataPtr, 0, static_cast<size_t>(resultsBufferSize));
436 
437 	DescriptorSetLayoutBuilder layoutBuilder;
438 	layoutBuilder.addSingleBinding(resultsBufferDescType, resultsBufferStages);
439 	const auto setLayout		= layoutBuilder.build(vkd, device);
440 	const auto pipelineLayout	= makePipelineLayout(vkd, device, setLayout.get());
441 
442 	DescriptorPoolBuilder poolBuilder;
443 	poolBuilder.addType(resultsBufferDescType);
444 	const auto descriptorPool	= poolBuilder.build(vkd, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
445 	const auto descriptorSet	= makeDescriptorSet(vkd, device, descriptorPool.get(), setLayout.get());
446 
447 	DescriptorSetUpdateBuilder updateBuilder;
448 	const auto resultsBufferDescInfo = makeDescriptorBufferInfo(resultsBuffer.get(), 0ull, resultsBufferSize);
449 	updateBuilder.writeSingle(descriptorSet.get(), DescriptorSetUpdateBuilder::Location::binding(0u), resultsBufferDescType, &resultsBufferDescInfo);
450 	updateBuilder.update(vkd, device);
451 
452 	const auto&	binaries	= m_context.getBinaryCollection();
453 	const auto	hasTask		= binaries.contains("task");
454 	const auto	taskShader	= (hasTask ? createShaderModule(vkd, device, binaries.get("task")) : Move<VkShaderModule>());
455 	const auto	meshShader	= createShaderModule(vkd, device, binaries.get("mesh"));
456 
457 	const auto renderPass	= makeCustomRenderPass(vkd, device);
458 	const auto framebuffer	= makeFramebuffer(vkd, device, renderPass.get(), 0u, nullptr, framebufferExtent.width, framebufferExtent.height);
459 
460 	const std::vector<VkViewport>	viewports	(1u, makeViewport(framebufferExtent));
461 	const std::vector<VkRect2D>		scissors	(1u, makeRect2D(framebufferExtent));
462 
463 	const auto					specMap		= makeSpecializationMap();
464 	const VkSpecializationInfo	specInfo	=
465 	{
466 		static_cast<uint32_t>(specMap.size()),	//	uint32_t						mapEntryCount;
467 		de::dataOrNull(specMap),				//	const VkSpecializationMapEntry*	pMapEntries;
468 		de::dataSize(m_specConstants),			//	size_t							dataSize;
469 		de::dataOrNull(m_specConstants),		//	const void*						pData;
470 	};
471 
472 	std::vector<VkPipelineShaderStageCreateInfo>	shaderStages;
473 	VkPipelineShaderStageCreateInfo					stageInfo		=
474 	{
475 		VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,	//	VkStructureType						sType;
476 		nullptr,												//	const void*							pNext;
477 		0u,														//	VkPipelineShaderStageCreateFlags	flags;
478 		VK_SHADER_STAGE_FLAG_BITS_MAX_ENUM,						//	VkShaderStageFlagBits				stage;
479 		DE_NULL,												//	VkShaderModule						module;
480 		"main",													//	const char*							pName;
481 		&specInfo,												//	const VkSpecializationInfo*			pSpecializationInfo;
482 	};
483 
484 	if (hasTask)
485 	{
486 		stageInfo.stage = VK_SHADER_STAGE_TASK_BIT_EXT;
487 		stageInfo.module = taskShader.get();
488 		shaderStages.push_back(stageInfo);
489 	}
490 
491 	{
492 		stageInfo.stage = VK_SHADER_STAGE_MESH_BIT_EXT;
493 		stageInfo.module = meshShader.get();
494 		shaderStages.push_back(stageInfo);
495 	}
496 
497 	const auto pipeline = makeGraphicsPipeline(vkd, device,
498 		DE_NULL, pipelineLayout.get(), 0u,
499 		shaderStages, renderPass.get(), viewports, scissors);
500 
501 	const auto cmdPool		= makeCommandPool(vkd, device, queueIndex);
502 	const auto cmdBufferPtr	= allocateCommandBuffer(vkd, device, cmdPool.get(), VK_COMMAND_BUFFER_LEVEL_PRIMARY);
503 	const auto cmdBuffer	= cmdBufferPtr.get();
504 
505 	beginCommandBuffer(vkd, cmdBuffer);
506 	beginRenderPass(vkd, cmdBuffer, renderPass.get(), framebuffer.get(), scissors.at(0u));
507 	vkd.cmdBindPipeline(cmdBuffer, pipelineBindPoint, pipeline.get());
508 	vkd.cmdBindDescriptorSets(cmdBuffer, pipelineBindPoint, pipelineLayout.get(), 0u, 1u, &descriptorSet.get(), 0u, nullptr);
509 	vkd.cmdDrawMeshTasksEXT(cmdBuffer, 1u, 1u, 1u);
510 	endRenderPass(vkd, cmdBuffer);
511 	{
512 		const auto writeToHost = makeMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT);
513 		const auto writeStages = (VK_PIPELINE_STAGE_MESH_SHADER_BIT_EXT | VK_PIPELINE_STAGE_TASK_SHADER_BIT_EXT);
514 		cmdPipelineMemoryBarrier(vkd, cmdBuffer, writeStages, VK_PIPELINE_STAGE_HOST_BIT, &writeToHost);
515 	}
516 	endCommandBuffer(vkd, cmdBuffer);
517 	submitCommandsAndWait(vkd, device, queue, cmdBuffer);
518 
519 	invalidateAlloc(vkd, device, resultsBufferAlloc);
520 	struct
521 	{
522 		uint32_t sharedOK;
523 		uint32_t payloadOK;
524 	} resultData;
525 	deMemcpy(&resultData, resultsBufferDataPtr, sizeof(resultData));
526 
527 	if (resultData.sharedOK != 1u)
528 		TCU_FAIL("Unexpected shared memory result: " + std::to_string(resultData.sharedOK));
529 
530 	if (resultData.payloadOK != 1u)
531 		TCU_FAIL("Unexpected payload result: " + std::to_string(resultData.payloadOK));
532 
533 	return tcu::TestStatus::pass("Pass");
534 }
535 
536 class MaxViewIndexCase : public vkt::TestCase
537 {
538 public:
MaxViewIndexCase(tcu::TestContext & testCtx,const std::string & name,const std::string & description)539 					MaxViewIndexCase	(tcu::TestContext& testCtx, const std::string& name, const std::string& description)
540 						: vkt::TestCase	(testCtx, name, description)
541 						{}
~MaxViewIndexCase(void)542 	virtual			~MaxViewIndexCase	(void) {}
543 
544 	void			checkSupport	(Context& context) const override;
545 	void			initPrograms	(vk::SourceCollections& programCollection) const override;
546 	TestInstance*	createInstance	(Context& context) const override;
547 };
548 
549 class MaxViewIndexInstance : public vkt::TestInstance
550 {
551 public:
MaxViewIndexInstance(Context & context)552 						MaxViewIndexInstance	(Context& context)
553 							: vkt::TestInstance (context)
554 							{}
~MaxViewIndexInstance(void)555 	virtual				~MaxViewIndexInstance	(void) {}
556 
557 	tcu::TestStatus		iterate					(void) override;
558 	Move<VkRenderPass>	makeCustomRenderPass	(const DeviceInterface& vkd, VkDevice device, uint32_t layerCount, VkFormat format);
559 
560 	static constexpr uint32_t kMaxViews = 32u;
561 };
562 
checkSupport(Context & context) const563 void MaxViewIndexCase::checkSupport (Context &context) const
564 {
565 	checkTaskMeshShaderSupportEXT(context, false/*requireTask*/, true/*requireMesh*/);
566 
567 	const auto& multiviewFeatures = context.getMultiviewFeatures();
568 	if (!multiviewFeatures.multiview)
569 		TCU_THROW(NotSupportedError, "Multiview not supported");
570 
571 	const auto& meshFeatures = context.getMeshShaderFeaturesEXT();
572 	if (!meshFeatures.multiviewMeshShader)
573 		TCU_THROW(NotSupportedError, "Multiview not supported for mesh shaders");
574 }
575 
initPrograms(vk::SourceCollections & programCollection) const576 void MaxViewIndexCase::initPrograms (vk::SourceCollections& programCollection) const
577 {
578 	const auto buildOptions = getMinMeshEXTBuildOptions(programCollection.usedVulkanVersion);
579 
580 	std::ostringstream mesh;
581 	mesh
582 		<< "#version 450\n"
583 		<< "#extension GL_EXT_mesh_shader : enable\n"
584 		<< "\n"
585 		<< "layout (local_size_x=1, local_size_y=1, local_size_z=1) in;\n"
586 		<< "layout (triangles) out;\n"
587 		<< "layout (max_vertices=3, max_primitives=1) out;\n"
588 		<< "\n"
589 		<< "void main (void) {\n"
590 		<< "    SetMeshOutputsEXT(3u, 1u);\n"
591 		<< "\n"
592 		<< "    gl_MeshVerticesEXT[0].gl_Position = vec4(-1.0, -1.0, 0.0, 1.0);\n"
593 		<< "    gl_MeshVerticesEXT[1].gl_Position = vec4(-1.0,  3.0, 0.0, 1.0);\n"
594 		<< "    gl_MeshVerticesEXT[2].gl_Position = vec4( 3.0, -1.0, 0.0, 1.0);\n"
595 		<< "    gl_PrimitiveTriangleIndicesEXT[0] = uvec3(0u, 1u, 2u);\n"
596 		<< "}\n"
597 		;
598 	programCollection.glslSources.add("mesh") << glu::MeshSource(mesh.str()) << buildOptions;
599 
600 	std::ostringstream frag;
601 	frag
602 		<< "#version 450\n"
603 		<< "#extension GL_EXT_mesh_shader : enable\n"
604 		<< "#extension GL_EXT_multiview : enable\n"
605 		<< "\n"
606 		<< "layout (location=0) out uvec4 outColor;\n"
607 		<< "\n"
608 		<< "void main (void) {\n"
609 		<< "    outColor = uvec4(uint(gl_ViewIndex) + 1u, 0, 0, 0);\n"
610 		<< "}\n"
611 		;
612 	programCollection.glslSources.add("frag") << glu::FragmentSource(frag.str()) << buildOptions;
613 }
614 
createInstance(Context & context) const615 TestInstance* MaxViewIndexCase::createInstance (Context& context) const
616 {
617 	return new MaxViewIndexInstance(context);
618 }
619 
makeCustomRenderPass(const DeviceInterface & vkd,VkDevice device,uint32_t layerCount,VkFormat format)620 Move<VkRenderPass> MaxViewIndexInstance::makeCustomRenderPass (const DeviceInterface& vkd, VkDevice device, uint32_t layerCount, VkFormat format)
621 {
622 	DE_ASSERT(layerCount > 0u);
623 
624 	const VkAttachmentDescription colorAttachmentDescription =
625 	{
626 		0u,											// VkAttachmentDescriptionFlags    flags
627 		format,										// VkFormat                        format
628 		VK_SAMPLE_COUNT_1_BIT,						// VkSampleCountFlagBits           samples
629 		VK_ATTACHMENT_LOAD_OP_CLEAR,				// VkAttachmentLoadOp              loadOp
630 		VK_ATTACHMENT_STORE_OP_STORE,				// VkAttachmentStoreOp             storeOp
631 		VK_ATTACHMENT_LOAD_OP_DONT_CARE,			// VkAttachmentLoadOp              stencilLoadOp
632 		VK_ATTACHMENT_STORE_OP_DONT_CARE,			// VkAttachmentStoreOp             stencilStoreOp
633 		VK_IMAGE_LAYOUT_UNDEFINED,					// VkImageLayout                   initialLayout
634 		VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,	// VkImageLayout                   finalLayout
635 	};
636 
637 	const VkAttachmentReference colorAttachmentRef = makeAttachmentReference(0u, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL);
638 
639 	const VkSubpassDescription subpassDescription =
640 	{
641 		0u,									// VkSubpassDescriptionFlags       flags
642 		VK_PIPELINE_BIND_POINT_GRAPHICS,	// VkPipelineBindPoint             pipelineBindPoint
643 		0u,									// deUint32                        inputAttachmentCount
644 		nullptr,							// const VkAttachmentReference*    pInputAttachments
645 		1u,									// deUint32                        colorAttachmentCount
646 		&colorAttachmentRef,				// const VkAttachmentReference*    pColorAttachments
647 		nullptr,							// const VkAttachmentReference*    pResolveAttachments
648 		nullptr,							// const VkAttachmentReference*    pDepthStencilAttachment
649 		0u,									// deUint32                        preserveAttachmentCount
650 		nullptr								// const deUint32*                 pPreserveAttachments
651 	};
652 
653 	const uint32_t viewMask = ((1u << layerCount) - 1u);
654 	const VkRenderPassMultiviewCreateInfo multiviewCreateInfo =
655 	{
656 		VK_STRUCTURE_TYPE_RENDER_PASS_MULTIVIEW_CREATE_INFO,	//	VkStructureType	sType;
657 		nullptr,												//	const void*		pNext;
658 		1u,														//	uint32_t		subpassCount;
659 		&viewMask,												//	const uint32_t*	pViewMasks;
660 		0u,														//	uint32_t		dependencyCount;
661 		nullptr,												//	const int32_t*	pViewOffsets;
662 		1u,														//	uint32_t		correlationMaskCount;
663 		&viewMask,												//	const uint32_t*	pCorrelationMasks;
664 	};
665 
666 	const VkRenderPassCreateInfo renderPassInfo =
667 	{
668 		VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,				// VkStructureType                   sType
669 		&multiviewCreateInfo,									// const void*                       pNext
670 		0u,														// VkRenderPassCreateFlags           flags
671 		1u,														// deUint32                          attachmentCount
672 		&colorAttachmentDescription,							// const VkAttachmentDescription*    pAttachments
673 		1u,														// deUint32                          subpassCount
674 		&subpassDescription,									// const VkSubpassDescription*       pSubpasses
675 		0u,														// deUint32                          dependencyCount
676 		nullptr,												// const VkSubpassDependency*        pDependencies
677 	};
678 
679 	return createRenderPass(vkd, device, &renderPassInfo);
680 }
681 
iterate(void)682 tcu::TestStatus MaxViewIndexInstance::iterate (void)
683 {
684 	const auto&			vkd				= m_context.getDeviceInterface();
685 	const auto			device			= m_context.getDevice();
686 	auto&				alloc			= m_context.getDefaultAllocator();
687 	const auto			queueIndex		= m_context.getUniversalQueueFamilyIndex();
688 	const auto			queue			= m_context.getUniversalQueue();
689 	const auto&			meshProperties	= m_context.getMeshShaderPropertiesEXT();
690 	const auto			maxViews		= kMaxViews;
691 	const auto			numViews		= std::min(meshProperties.maxMeshMultiviewViewCount, maxViews);
692 	const auto			viewType		= ((numViews > 1u) ? VK_IMAGE_VIEW_TYPE_2D_ARRAY : VK_IMAGE_VIEW_TYPE_2D);
693 	const auto			colorFormat		= VK_FORMAT_R32_UINT;
694 	const auto			tcuColorFormat	= mapVkFormat(colorFormat);
695 	const auto			pixelSize		= static_cast<uint32_t>(tcu::getPixelSize(tcuColorFormat));
696 	const auto			colorUsage		= (VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
697 	const auto			fbExtent		= makeExtent3D(8u, 8u, 1u);
698 	const tcu::IVec3	iExtent3D		(static_cast<int>(fbExtent.width), static_cast<int>(fbExtent.height), static_cast<int>(numViews));
699 	const tcu::UVec4	clearColor		(0u, 0u, 0u, 0u);
700 
701 	// Create color attachment.
702 	const VkImageCreateInfo colorAttachmentCreatInfo =
703 	{
704 		VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,	//	VkStructureType			sType;
705 		nullptr,								//	const void*				pNext;
706 		0u,										//	VkImageCreateFlags		flags;
707 		VK_IMAGE_TYPE_2D,						//	VkImageType				imageType;
708 		colorFormat,							//	VkFormat				format;
709 		fbExtent,								//	VkExtent3D				extent;
710 		1u,										//	uint32_t				mipLevels;
711 		numViews,								//	uint32_t				arrayLayers;
712 		VK_SAMPLE_COUNT_1_BIT,					//	VkSampleCountFlagBits	samples;
713 		VK_IMAGE_TILING_OPTIMAL,				//	VkImageTiling			tiling;
714 		colorUsage,								//	VkImageUsageFlags		usage;
715 		VK_SHARING_MODE_EXCLUSIVE,				//	VkSharingMode			sharingMode;
716 		0u,										//	uint32_t				queueFamilyIndexCount;
717 		nullptr,								//	const uint32_t*			pQueueFamilyIndices;
718 		VK_IMAGE_LAYOUT_UNDEFINED,				//	VkImageLayout			initialLayout;
719 	};
720 	ImageWithMemory	colorAttachment		(vkd, device, alloc, colorAttachmentCreatInfo, MemoryRequirement::Any);
721 	const auto		colorSRR			= makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, numViews);
722 	const auto		colorSRL			= makeImageSubresourceLayers(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 0u, numViews);
723 	const auto		colorAttachmentView	= makeImageView(vkd, device, colorAttachment.get(), viewType, colorFormat, colorSRR);
724 
725 	// Verification buffer for the color attachment.
726 	DE_ASSERT(fbExtent.depth == 1u);
727 	const auto			verificationBufferUsage			= VK_BUFFER_USAGE_TRANSFER_DST_BIT;
728 	const auto			verificationBufferSize			= static_cast<VkDeviceSize>(pixelSize * fbExtent.width * fbExtent.height * numViews);
729 	const auto			verificationBufferCreateInfo	= makeBufferCreateInfo(verificationBufferSize, verificationBufferUsage);
730 	BufferWithMemory	verificationBuffer				(vkd, device, alloc, verificationBufferCreateInfo, MemoryRequirement::HostVisible);
731 	auto&				verificationBufferAlloc			= verificationBuffer.getAllocation();
732 	void*				verificationBufferData			= verificationBufferAlloc.getHostPtr();
733 
734 	deMemset(verificationBufferData, 0, static_cast<size_t>(verificationBufferSize));
735 
736 	const auto	pipelineLayout	= makePipelineLayout(vkd, device);
737 	const auto	renderPass		= makeCustomRenderPass(vkd, device, numViews, colorFormat);
738 	const auto	framebuffer		= makeFramebuffer(vkd, device, renderPass.get(), colorAttachmentView.get(), fbExtent.width, fbExtent.height, 1u);
739 
740 	const auto&	binaries	= m_context.getBinaryCollection();
741 	const auto	meshModule	= createShaderModule(vkd, device, binaries.get("mesh"));
742 	const auto	fragModule	= createShaderModule(vkd, device, binaries.get("frag"));
743 
744 	const std::vector<VkViewport>	viewports	(1u, makeViewport(fbExtent));
745 	const std::vector<VkRect2D>		scissors	(1u, makeRect2D(fbExtent));
746 
747 	const auto pipeline = makeGraphicsPipeline(vkd, device, pipelineLayout.get(),
748 		DE_NULL, meshModule.get(), fragModule.get(),
749 		renderPass.get(), viewports, scissors);
750 
751 	const auto cmdPool		= makeCommandPool(vkd, device, queueIndex);
752 	const auto cmdBufferPtr	= allocateCommandBuffer(vkd, device, cmdPool.get(), VK_COMMAND_BUFFER_LEVEL_PRIMARY);
753 	const auto cmdBuffer	= cmdBufferPtr.get();
754 
755 	beginCommandBuffer(vkd, cmdBuffer);
756 	beginRenderPass(vkd, cmdBuffer, renderPass.get(), framebuffer.get(), scissors.at(0u), clearColor);
757 	vkd.cmdBindPipeline(cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline.get());
758 	vkd.cmdDrawMeshTasksEXT(cmdBuffer, 1u, 1u, 1u);
759 	endRenderPass(vkd, cmdBuffer);
760 
761 	const auto preTransferBarrier = makeImageMemoryBarrier(
762 		VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT,
763 		VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
764 		colorAttachment.get(), colorSRR);
765 	cmdPipelineImageMemoryBarrier(vkd, cmdBuffer, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, &preTransferBarrier);
766 
767 	const auto copyRegion = makeBufferImageCopy(fbExtent, colorSRL);
768 	vkd.cmdCopyImageToBuffer(cmdBuffer, colorAttachment.get(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, verificationBuffer.get(), 1u, &copyRegion);
769 
770 	const auto postTransferBarrier = makeMemoryBarrier(VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT);
771 	cmdPipelineMemoryBarrier(vkd, cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT, &postTransferBarrier);
772 
773 	endCommandBuffer(vkd, cmdBuffer);
774 	submitCommandsAndWait(vkd, device, queue, cmdBuffer);
775 
776 	invalidateAlloc(vkd, device, verificationBufferAlloc);
777 	tcu::ConstPixelBufferAccess resultAccess (tcuColorFormat, iExtent3D, verificationBufferData);
778 
779 	for (int z = 0; z < iExtent3D.z(); ++z)
780 	{
781 		const tcu::UVec4 expectedPixel (static_cast<uint32_t>(z) + 1u, 0u, 0u, 1u);
782 		for (int y = 0; y < iExtent3D.y(); ++y)
783 			for (int x = 0; x < iExtent3D.x(); ++x)
784 			{
785 				const auto resultPixel = resultAccess.getPixelUint(x, y, z);
786 				if (resultPixel != expectedPixel)
787 				{
788 					std::ostringstream msg;
789 					msg
790 						<< "Unexpected pixel value at layer " << z << ": (" << x << ", " << y << ") is "
791 						<< resultPixel << " while expecting " << expectedPixel
792 						;
793 					TCU_FAIL(msg.str());
794 				}
795 			}
796 	}
797 
798 	// QualityWarning if needed.
799 	if (meshProperties.maxMeshMultiviewViewCount > maxViews)
800 	{
801 		const auto maxViewsStr = std::to_string(maxViews);
802 		return tcu::TestStatus(QP_TEST_RESULT_QUALITY_WARNING, "Test passed but maxMeshMultiviewViewCount greater than " + maxViewsStr);
803 	}
804 
805 	return tcu::TestStatus::pass("Pass");
806 }
807 
808 class MaxOutputLayersCase : public vkt::TestCase
809 {
810 public:
MaxOutputLayersCase(tcu::TestContext & testCtx,const std::string & name,const std::string & description)811 					MaxOutputLayersCase		(tcu::TestContext& testCtx, const std::string& name, const std::string& description)
812 						: vkt::TestCase (testCtx, name, description)
813 						{}
~MaxOutputLayersCase(void)814 	virtual			~MaxOutputLayersCase	(void) {}
815 
816 	TestInstance*	createInstance			(Context& context) const override;
817 	void			checkSupport			(Context& context) const override;
818 	void			initPrograms			(vk::SourceCollections& programCollection) const override;
819 };
820 
821 class MaxOutputLayersInstance : public vkt::TestInstance
822 {
823 public:
MaxOutputLayersInstance(Context & context)824 						MaxOutputLayersInstance		(Context& context) : vkt::TestInstance(context) {}
~MaxOutputLayersInstance(void)825 	virtual				~MaxOutputLayersInstance	(void) {}
826 
827 	tcu::TestStatus		iterate						(void) override;
828 };
829 
createInstance(Context & context) const830 TestInstance* MaxOutputLayersCase::createInstance (Context& context) const
831 {
832 	return new MaxOutputLayersInstance(context);
833 }
834 
checkSupport(Context & context) const835 void MaxOutputLayersCase::checkSupport (Context &context) const
836 {
837 	checkTaskMeshShaderSupportEXT(context, false/*requireTask*/, true/*requireMesh*/);
838 }
839 
initPrograms(vk::SourceCollections & programCollection) const840 void MaxOutputLayersCase::initPrograms (vk::SourceCollections &programCollection) const
841 {
842 	const auto buildOptions = getMinMeshEXTBuildOptions(programCollection.usedVulkanVersion);
843 
844 	std::ostringstream mesh;
845 	mesh
846 		<< "#version 450\n"
847 		<< "#extension GL_EXT_mesh_shader : enable\n"
848 		<< "\n"
849 		<< "layout (local_size_x=1, local_size_y=1, local_size_z=1) in;\n"
850 		<< "layout (triangles) out;\n"
851 		<< "layout (max_vertices=3, max_primitives=1) out;\n"
852 		<< "\n"
853 		<< "void main (void) {\n"
854 		<< "    SetMeshOutputsEXT(3u, 1u);\n"
855 		<< "\n"
856 		<< "    gl_MeshVerticesEXT[0].gl_Position = vec4(-1.0, -1.0, 0.0, 1.0);\n"
857 		<< "    gl_MeshVerticesEXT[1].gl_Position = vec4(-1.0,  3.0, 0.0, 1.0);\n"
858 		<< "    gl_MeshVerticesEXT[2].gl_Position = vec4( 3.0, -1.0, 0.0, 1.0);\n"
859 		<< "\n"
860 		<< "    gl_MeshPrimitivesEXT[0].gl_Layer = int(gl_WorkGroupID.x);\n"
861 		<< "    gl_PrimitiveTriangleIndicesEXT[0] = uvec3(0u, 1u, 2u);\n"
862 		<< "}\n"
863 		;
864 	programCollection.glslSources.add("mesh") << glu::MeshSource(mesh.str()) << buildOptions;
865 
866 	std::ostringstream frag;
867 	frag
868 		<< "#version 450\n"
869 		<< "\n"
870 		<< "layout (location=0) out uvec4 outColor;\n"
871 		<< "\n"
872 		<< "void main (void) {\n"
873 		<< "    outColor = uvec4(uint(gl_Layer) + 1u, 0, 0, 0);\n"
874 		<< "}\n"
875 		;
876 	programCollection.glslSources.add("frag") << glu::FragmentSource(frag.str());
877 }
878 
iterate(void)879 tcu::TestStatus MaxOutputLayersInstance::iterate (void)
880 {
881 	const auto&			vki				= m_context.getInstanceInterface();
882 	const auto&			physicalDevice	= m_context.getPhysicalDevice();
883 	const auto&			vkd				= m_context.getDeviceInterface();
884 	const auto			device			= m_context.getDevice();
885 	auto&				alloc			= m_context.getDefaultAllocator();
886 	const auto			queueIndex		= m_context.getUniversalQueueFamilyIndex();
887 	const auto			queue			= m_context.getUniversalQueue();
888 	const auto			fbFormat		= VK_FORMAT_R32_UINT;
889 	const auto			imageType		= VK_IMAGE_TYPE_2D;
890 	const auto			tiling			= VK_IMAGE_TILING_OPTIMAL;
891 	const auto			usage			= (VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
892 	const auto			sampleCount		= VK_SAMPLE_COUNT_1_BIT;
893 	auto&				log				= m_context.getTestContext().getLog();
894 
895 	// Find out how many layers we can actually use.
896 	const auto&	properties			= m_context.getDeviceProperties();
897 	const auto&	meshProperties		= m_context.getMeshShaderPropertiesEXT();
898 	const auto	formatProperties	= getPhysicalDeviceImageFormatProperties(vki, physicalDevice, fbFormat, imageType, tiling, usage, 0u);
899 	const auto	layerCount			= std::min({
900 		properties.limits.maxFramebufferLayers,
901 		meshProperties.maxMeshOutputLayers,
902 		formatProperties.maxArrayLayers,
903 		meshProperties.maxMeshWorkGroupCount[0],
904 		});
905 
906 	// This is needed for iExtent3D below.
907 	DE_ASSERT(static_cast<uint64_t>(std::numeric_limits<int>::max()) >= static_cast<uint64_t>(layerCount));
908 	log << tcu::TestLog::Message << "Using " + std::to_string(layerCount) + " layers" << tcu::TestLog::EndMessage;
909 
910 	const auto			viewType		= ((layerCount > 1u) ? VK_IMAGE_VIEW_TYPE_2D_ARRAY : VK_IMAGE_VIEW_TYPE_2D);
911 	const auto			tcuColorFormat	= mapVkFormat(fbFormat);
912 	const auto			pixelSize		= static_cast<uint32_t>(tcu::getPixelSize(tcuColorFormat));
913 	const auto			fbExtent		= makeExtent3D(1u, 1u, 1u);
914 	const tcu::IVec3	iExtent3D		(static_cast<int>(fbExtent.width), static_cast<int>(fbExtent.height), static_cast<int>(layerCount));
915 	const tcu::UVec4	clearColor		(0u, 0u, 0u, 0u);
916 
917 	// Create color attachment.
918 	const VkImageCreateInfo colorAttachmentCreatInfo =
919 	{
920 		VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,	//	VkStructureType			sType;
921 		nullptr,								//	const void*				pNext;
922 		0u,										//	VkImageCreateFlags		flags;
923 		imageType,								//	VkImageType				imageType;
924 		fbFormat,								//	VkFormat				format;
925 		fbExtent,								//	VkExtent3D				extent;
926 		1u,										//	uint32_t				mipLevels;
927 		layerCount,								//	uint32_t				arrayLayers;
928 		sampleCount,							//	VkSampleCountFlagBits	samples;
929 		tiling,									//	VkImageTiling			tiling;
930 		usage,									//	VkImageUsageFlags		usage;
931 		VK_SHARING_MODE_EXCLUSIVE,				//	VkSharingMode			sharingMode;
932 		0u,										//	uint32_t				queueFamilyIndexCount;
933 		nullptr,								//	const uint32_t*			pQueueFamilyIndices;
934 		VK_IMAGE_LAYOUT_UNDEFINED,				//	VkImageLayout			initialLayout;
935 	};
936 	ImageWithMemory	colorAttachment		(vkd, device, alloc, colorAttachmentCreatInfo, MemoryRequirement::Any);
937 	const auto		colorSRR			= makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, layerCount);
938 	const auto		colorSRL			= makeImageSubresourceLayers(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 0u, layerCount);
939 	const auto		colorAttachmentView	= makeImageView(vkd, device, colorAttachment.get(), viewType, fbFormat, colorSRR);
940 
941 	// Verification buffer for the color attachment.
942 	DE_ASSERT(fbExtent.depth == 1u);
943 	const auto			verificationBufferUsage			= VK_BUFFER_USAGE_TRANSFER_DST_BIT;
944 	const auto			verificationBufferSize			= static_cast<VkDeviceSize>(pixelSize * fbExtent.width * fbExtent.height * layerCount);
945 	const auto			verificationBufferCreateInfo	= makeBufferCreateInfo(verificationBufferSize, verificationBufferUsage);
946 	BufferWithMemory	verificationBuffer				(vkd, device, alloc, verificationBufferCreateInfo, MemoryRequirement::HostVisible);
947 	auto&				verificationBufferAlloc			= verificationBuffer.getAllocation();
948 	void*				verificationBufferData			= verificationBufferAlloc.getHostPtr();
949 
950 	deMemset(verificationBufferData, 0, static_cast<size_t>(verificationBufferSize));
951 
952 	const auto	pipelineLayout	= makePipelineLayout(vkd, device);
953 	const auto	renderPass		= makeRenderPass(vkd, device, fbFormat);
954 	const auto	framebuffer		= makeFramebuffer(vkd, device, renderPass.get(), colorAttachmentView.get(), fbExtent.width, fbExtent.height, layerCount);
955 
956 	const auto&	binaries	= m_context.getBinaryCollection();
957 	const auto	meshModule	= createShaderModule(vkd, device, binaries.get("mesh"));
958 	const auto	fragModule	= createShaderModule(vkd, device, binaries.get("frag"));
959 
960 	const std::vector<VkViewport>	viewports	(1u, makeViewport(fbExtent));
961 	const std::vector<VkRect2D>		scissors	(1u, makeRect2D(fbExtent));
962 
963 	const auto pipeline = makeGraphicsPipeline(vkd, device, pipelineLayout.get(),
964 		DE_NULL, meshModule.get(), fragModule.get(),
965 		renderPass.get(), viewports, scissors);
966 
967 	const auto cmdPool		= makeCommandPool(vkd, device, queueIndex);
968 	const auto cmdBufferPtr	= allocateCommandBuffer(vkd, device, cmdPool.get(), VK_COMMAND_BUFFER_LEVEL_PRIMARY);
969 	const auto cmdBuffer	= cmdBufferPtr.get();
970 
971 	beginCommandBuffer(vkd, cmdBuffer);
972 	beginRenderPass(vkd, cmdBuffer, renderPass.get(), framebuffer.get(), scissors.at(0u), clearColor);
973 	vkd.cmdBindPipeline(cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline.get());
974 	vkd.cmdDrawMeshTasksEXT(cmdBuffer, layerCount, 1u, 1u);
975 	endRenderPass(vkd, cmdBuffer);
976 
977 	const auto preTransferBarrier = makeImageMemoryBarrier(
978 		VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT,
979 		VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
980 		colorAttachment.get(), colorSRR);
981 	cmdPipelineImageMemoryBarrier(vkd, cmdBuffer, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, &preTransferBarrier);
982 
983 	const auto copyRegion = makeBufferImageCopy(fbExtent, colorSRL);
984 	vkd.cmdCopyImageToBuffer(cmdBuffer, colorAttachment.get(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, verificationBuffer.get(), 1u, &copyRegion);
985 
986 	const auto postTransferBarrier = makeMemoryBarrier(VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT);
987 	cmdPipelineMemoryBarrier(vkd, cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT, &postTransferBarrier);
988 
989 	endCommandBuffer(vkd, cmdBuffer);
990 	submitCommandsAndWait(vkd, device, queue, cmdBuffer);
991 
992 	invalidateAlloc(vkd, device, verificationBufferAlloc);
993 	tcu::ConstPixelBufferAccess resultAccess (tcuColorFormat, iExtent3D, verificationBufferData);
994 
995 	for (int z = 0; z < iExtent3D.z(); ++z)
996 	{
997 		const tcu::UVec4 expectedPixel (static_cast<uint32_t>(z) + 1u, 0u, 0u, 1u);
998 		for (int y = 0; y < iExtent3D.y(); ++y)
999 			for (int x = 0; x < iExtent3D.x(); ++x)
1000 			{
1001 				const auto resultPixel = resultAccess.getPixelUint(x, y, z);
1002 				if (resultPixel != expectedPixel)
1003 				{
1004 					std::ostringstream msg;
1005 					msg
1006 						<< "Unexpected pixel value at layer " << z << ": (" << x << ", " << y << ") is "
1007 						<< resultPixel << " while expecting " << expectedPixel
1008 						;
1009 					TCU_FAIL(msg.str());
1010 				}
1011 			}
1012 	}
1013 
1014 	return tcu::TestStatus::pass("Pass");
1015 }
1016 
1017 enum class MaxPrimVertType
1018 {
1019 	PRIMITIVES,
1020 	VERTICES,
1021 };
1022 
1023 struct MaxPrimVertParams
1024 {
1025 	MaxPrimVertType testType;
1026 	uint32_t		itemCount;
1027 };
1028 
1029 class MaxMeshOutputPrimVertCase : public vkt::TestCase
1030 {
1031 public:
MaxMeshOutputPrimVertCase(tcu::TestContext & testCtx,const std::string & name,const std::string & description,const MaxPrimVertParams & params)1032 					MaxMeshOutputPrimVertCase	(tcu::TestContext& testCtx, const std::string& name, const std::string& description, const MaxPrimVertParams& params)
1033 						: vkt::TestCase			(testCtx, name, description)
1034 						, m_params				(params)
1035 						{}
~MaxMeshOutputPrimVertCase(void)1036 	virtual			~MaxMeshOutputPrimVertCase	(void) {}
1037 
1038 	void			initPrograms				(vk::SourceCollections& programCollection) const override;
1039 	TestInstance*	createInstance				(Context& context) const override;
1040 	void			checkSupport				(Context& context) const override;
1041 
1042 protected:
1043 	static constexpr uint32_t kLocalInvocations = 128u;
1044 
1045 	const MaxPrimVertParams	m_params;
1046 };
1047 
1048 class MaxMeshOutputPrimVertInstance : public vkt::TestInstance
1049 {
1050 public:
MaxMeshOutputPrimVertInstance(Context & context,uint32_t shaderPrimitives,uint32_t fbWidth)1051 						MaxMeshOutputPrimVertInstance	(Context& context, uint32_t shaderPrimitives, uint32_t fbWidth)
1052 							: vkt::TestInstance			(context)
1053 							, m_shaderPrimitives		(shaderPrimitives)
1054 							, m_fbWidth					(fbWidth)
1055 							{
1056 								DE_ASSERT(m_shaderPrimitives > 0u);
1057 								DE_ASSERT(m_fbWidth > 0u);
1058 							}
~MaxMeshOutputPrimVertInstance(void)1059 	virtual				~MaxMeshOutputPrimVertInstance	(void) {}
1060 
1061 	tcu::TestStatus		iterate							(void) override;
1062 
1063 protected:
1064 	const uint32_t		m_shaderPrimitives;
1065 	const uint32_t		m_fbWidth;
1066 };
1067 
createInstance(Context & context) const1068 TestInstance* MaxMeshOutputPrimVertCase::createInstance (Context &context) const
1069 {
1070 	const auto fbWidth = ((m_params.testType == MaxPrimVertType::PRIMITIVES) ? 1u : m_params.itemCount);
1071 	return new MaxMeshOutputPrimVertInstance(context, m_params.itemCount, fbWidth);
1072 }
1073 
checkSupport(Context & context) const1074 void MaxMeshOutputPrimVertCase::checkSupport (Context &context) const
1075 {
1076 	checkTaskMeshShaderSupportEXT(context, false/*requireTask*/, true/*requireMesh*/);
1077 	context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_FRAGMENT_STORES_AND_ATOMICS);
1078 
1079 	// Note when testing vertices, due to our usage of points as the primitive type, we are also limited by the number of primitives.
1080 
1081 	const auto	isVertices		= (m_params.testType == MaxPrimVertType::VERTICES);
1082 	const auto&	meshProperties	= context.getMeshShaderPropertiesEXT();
1083 	const auto&	itemLimit		= isVertices
1084 								? std::min(meshProperties.maxMeshOutputVertices, meshProperties.maxMeshOutputPrimitives)
1085 								: meshProperties.maxMeshOutputPrimitives;
1086 
1087 	if (m_params.itemCount > itemLimit)
1088 		TCU_THROW(NotSupportedError, "Implementation does not support the given amount of items");
1089 
1090 	// Check memory limits just in case.
1091 	uint32_t	totalBytes		= 0u;
1092 	const auto	perVertexBytes	= static_cast<uint32_t>(sizeof(tcu::Vec4) + sizeof(float)); // gl_Position and gl_PointSize
1093 
1094 	if (isVertices)
1095 	{
1096 		// No per-primitive data in this variant.
1097 		const auto actualVertices		= de::roundUp(m_params.itemCount, meshProperties.meshOutputPerVertexGranularity);
1098 
1099 		totalBytes = perVertexBytes * actualVertices;
1100 	}
1101 	else
1102 	{
1103 		// Single vertex, but using gl_PrimitiveID in each primitive.
1104 		const auto perPrimitiveBytes	= static_cast<uint32_t>(sizeof(uint32_t)); // gl_PrimitiveID
1105 		const auto actualVertices		= de::roundUp(1u, meshProperties.meshOutputPerVertexGranularity);
1106 		const auto actualPrimitives		= de::roundUp(m_params.itemCount, meshProperties.meshOutputPerPrimitiveGranularity);
1107 
1108 		totalBytes = perVertexBytes * actualVertices + perPrimitiveBytes * actualPrimitives;
1109 	}
1110 
1111 	if (totalBytes > meshProperties.maxMeshOutputMemorySize)
1112 		TCU_THROW(NotSupportedError, "Not enough output memory for this test");
1113 }
1114 
initPrograms(vk::SourceCollections & programCollection) const1115 void MaxMeshOutputPrimVertCase::initPrograms (vk::SourceCollections &programCollection) const
1116 {
1117 	const auto buildOptions		= getMinMeshEXTBuildOptions(programCollection.usedVulkanVersion);
1118 	const bool isPrimitives		= (m_params.testType == MaxPrimVertType::PRIMITIVES);
1119 	const auto associatedVertex	= (isPrimitives ? "0u" : "primitiveID");
1120 	const auto maxVertices		= (isPrimitives ? 1u : m_params.itemCount);
1121 	const auto ssboIndex		= (isPrimitives ? "gl_PrimitiveID" : "uint(gl_FragCoord.x)");
1122 	const auto xCoord			= (isPrimitives ? "0.0" : "(float(vertexID) + 0.5) / float(maxVertices) * 2.0 - 1.0");
1123 	const auto maxPrimitives	= m_params.itemCount;
1124 
1125 	// When testing vertices, we'll use a wide framebuffer, emit one vertex per pixel and use the fragment coords to index into the
1126 	// SSBO. When testing primitives, we'll use a 1x1 framebuffer, emit one single vertex in the center and use the primitive id to
1127 	// index into the SSBO.
1128 	std::ostringstream frag;
1129 	frag
1130 		<< "#version 450\n"
1131 		<< "\n"
1132 		<< "layout (set=0, binding=0, std430) buffer OutputBlock {\n"
1133 		<< "    uint flags[];\n"
1134 		<< "} ssbo;\n"
1135 		<< "\n"
1136 		<< "void main (void) {\n"
1137 		<< "    ssbo.flags[" << ssboIndex << "] = 1u;\n"
1138 		<< "}\n"
1139 		;
1140 	programCollection.glslSources.add("frag") << glu::FragmentSource(frag.str());
1141 
1142 	std::ostringstream mesh;
1143 	mesh
1144 		<< "#version 450\n"
1145 		<< "#extension GL_EXT_mesh_shader : enable\n"
1146 		<< "\n"
1147 		<< "layout (local_size_x=" << kLocalInvocations << ", local_size_y=1, local_size_z=1) in;\n"
1148 		<< "layout (points) out;\n"
1149 		<< "layout (max_vertices=" << maxVertices << ", max_primitives=" << maxPrimitives << ") out;\n"
1150 		<< "\n"
1151 		<< "out gl_MeshPerVertexEXT {\n"
1152 		<< "    vec4  gl_Position;\n"
1153 		<< "    float gl_PointSize;\n"
1154 		<< "} gl_MeshVerticesEXT[];\n"
1155 		<< "\n"
1156 		;
1157 
1158 	if (isPrimitives)
1159 	{
1160 		mesh
1161 			<< "perprimitiveEXT out gl_MeshPerPrimitiveEXT {\n"
1162 			<< "    int gl_PrimitiveID;\n"
1163 			<< "} gl_MeshPrimitivesEXT[];\n"
1164 			<< "\n"
1165 			;
1166 	}
1167 
1168 	mesh
1169 		<< "void main (void) {\n"
1170 		<< "    const uint localInvs = " << kLocalInvocations << "u;\n"
1171 		<< "    const uint maxVertices = " << maxVertices << "u;\n"
1172 		<< "    const uint maxPoints = " << maxPrimitives << "u;\n"
1173 		<< "    const uint verticesPerInvocation = (maxVertices + localInvs - 1u) / localInvs;\n"
1174 		<< "    const uint primitivesPerInvocation = (maxPoints + localInvs - 1u) / localInvs;\n"
1175 		<< "\n"
1176 		<< "    SetMeshOutputsEXT(maxVertices, maxPoints);\n"
1177 		<< "\n"
1178 		<< "    for (uint i = 0u; i < verticesPerInvocation; ++i) {\n"
1179 		<< "        const uint vertexID = gl_LocalInvocationIndex * verticesPerInvocation + i;\n"
1180 		<< "        if (vertexID >= maxVertices) {\n"
1181 		<< "            break;\n"
1182 		<< "        }\n"
1183 		<< "        const float xCoord = " << xCoord << ";\n"
1184 		<< "        gl_MeshVerticesEXT[vertexID].gl_Position = vec4(xCoord, 0.0, 0.0, 1.0);\n"
1185 		<< "        gl_MeshVerticesEXT[vertexID].gl_PointSize = 1.0f;\n"
1186 		<< "    }\n"
1187 		<< "\n"
1188 		<< "    for (uint i = 0u; i < primitivesPerInvocation; ++i) {\n"
1189 		<< "        const uint primitiveID = gl_LocalInvocationIndex * primitivesPerInvocation + i;\n"
1190 		<< "        if (primitiveID >= maxPoints) {\n"
1191 		<< "            break;\n"
1192 		<< "        }\n"
1193 		<< (isPrimitives ? "        gl_MeshPrimitivesEXT[primitiveID].gl_PrimitiveID = int(primitiveID);\n" : "")
1194 		<< "        gl_PrimitivePointIndicesEXT[primitiveID] = " << associatedVertex << ";\n"
1195 		<< "    }\n"
1196 		<< "}\n"
1197 		;
1198 	programCollection.glslSources.add("mesh") << glu::MeshSource(mesh.str()) << buildOptions;
1199 }
1200 
iterate(void)1201 tcu::TestStatus MaxMeshOutputPrimVertInstance::iterate (void)
1202 {
1203 	const auto&		vkd				= m_context.getDeviceInterface();
1204 	const auto		device			= m_context.getDevice();
1205 	auto&			alloc			= m_context.getDefaultAllocator();
1206 	const auto		queueIndex		= m_context.getUniversalQueueFamilyIndex();
1207 	const auto		queue			= m_context.getUniversalQueue();
1208 	const auto		fbExtent		= makeExtent2D(m_fbWidth, 1u);
1209 	const auto		bindPoint		= VK_PIPELINE_BIND_POINT_GRAPHICS;
1210 
1211 	const auto		ssboSize		= static_cast<VkDeviceSize>(sizeof(uint32_t) * m_shaderPrimitives);
1212 	const auto		ssboUsage		= VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
1213 	const auto		ssboDescType	= VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
1214 
1215 	const auto			ssboCreateInfo	= makeBufferCreateInfo(ssboSize, ssboUsage);
1216 	BufferWithMemory	ssbo			(vkd, device, alloc, ssboCreateInfo, MemoryRequirement::HostVisible);
1217 	auto&				ssboAlloc		= ssbo.getAllocation();
1218 	void*				ssboData		= ssboAlloc.getHostPtr();
1219 	const auto			ssboDescInfo	= makeDescriptorBufferInfo(ssbo.get(), 0ull, ssboSize);
1220 
1221 	// Zero-out SSBO.
1222 	deMemset(ssboData, 0, static_cast<size_t>(ssboSize));
1223 	flushAlloc(vkd, device, ssboAlloc);
1224 
1225 	// Descriptor set layout, pool, set and set update.
1226 	DescriptorSetLayoutBuilder setLayoutBuilder;
1227 	setLayoutBuilder.addSingleBinding(ssboDescType, VK_SHADER_STAGE_FRAGMENT_BIT);
1228 	const auto setLayout = setLayoutBuilder.build(vkd, device);
1229 
1230 	DescriptorPoolBuilder poolBuilder;
1231 	poolBuilder.addType(ssboDescType);
1232 	const auto descriptorPool	= poolBuilder.build(vkd, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
1233 	const auto descriptorSet	= makeDescriptorSet(vkd, device, descriptorPool.get(), setLayout.get());
1234 
1235 	DescriptorSetUpdateBuilder updateBuilder;
1236 	updateBuilder.writeSingle(descriptorSet.get(), DescriptorSetUpdateBuilder::Location::binding(0u), ssboDescType, &ssboDescInfo);
1237 	updateBuilder.update(vkd, device);
1238 
1239 	// Pipeline layout, render pass and pipeline.
1240 	const auto pipelineLayout	= makePipelineLayout(vkd, device, setLayout.get());
1241 	const auto renderPass		= makeRenderPass(vkd, device);
1242 	const auto framebuffer		= makeFramebuffer(vkd, device, renderPass.get(), 0u, nullptr, fbExtent.width, fbExtent.height);
1243 
1244 	const std::vector<VkViewport>	viewports	(1u, makeViewport(fbExtent));
1245 	const std::vector<VkRect2D>		scissors	(1u, makeRect2D(fbExtent));
1246 
1247 	const auto&	binaries	= m_context.getBinaryCollection();
1248 	const auto	meshShader	= createShaderModule(vkd, device, binaries.get("mesh"));
1249 	const auto	fragShader	= createShaderModule(vkd, device, binaries.get("frag"));
1250 	const auto	pipeline	= makeGraphicsPipeline(vkd, device, pipelineLayout.get(),
1251 		DE_NULL, meshShader.get(), fragShader.get(),
1252 		renderPass.get(), viewports, scissors);
1253 
1254 	// Command pool and buffer.
1255 	const auto cmdPool		= makeCommandPool(vkd, device, queueIndex);
1256 	const auto cmdBufferPtr	= allocateCommandBuffer(vkd, device, cmdPool.get(), VK_COMMAND_BUFFER_LEVEL_PRIMARY);
1257 	const auto cmdBuffer	= cmdBufferPtr.get();
1258 
1259 	beginCommandBuffer(vkd, cmdBuffer);
1260 	beginRenderPass(vkd, cmdBuffer, renderPass.get(), framebuffer.get(), scissors.at(0u));
1261 	vkd.cmdBindDescriptorSets(cmdBuffer, bindPoint, pipelineLayout.get(), 0u, 1u, &descriptorSet.get(), 0u, nullptr);
1262 	vkd.cmdBindPipeline(cmdBuffer, bindPoint, pipeline.get());
1263 	vkd.cmdDrawMeshTasksEXT(cmdBuffer, 1u, 1u, 1u);
1264 	endRenderPass(vkd, cmdBuffer);
1265 	endCommandBuffer(vkd, cmdBuffer);
1266 	submitCommandsAndWait(vkd, device, queue, cmdBuffer);
1267 
1268 	invalidateAlloc(vkd, device, ssboAlloc);
1269 	std::vector<uint32_t> outputFlags(m_shaderPrimitives, 0u);
1270 	deMemcpy(outputFlags.data(), ssboData, de::dataSize(outputFlags));
1271 
1272 	// Verify output SSBO.
1273 	bool pass = true;
1274 	auto& log = m_context.getTestContext().getLog();
1275 
1276 	for (size_t i = 0u; i < outputFlags.size(); ++i)
1277 	{
1278 		if (outputFlags[i] != 1u)
1279 		{
1280 			std::ostringstream msg;
1281 			msg << "Primitive ID " << i << " flag != 1: " << outputFlags[i];
1282 			log << tcu::TestLog::Message << msg.str() << tcu::TestLog::EndMessage;
1283 			pass = false;
1284 		}
1285 	}
1286 
1287 	if (!pass)
1288 		TCU_FAIL("Check log for details");
1289 
1290 	return tcu::TestStatus::pass("Pass");
1291 }
1292 
1293 class MaxMeshOutputComponentsCase : public vkt::TestCase
1294 {
1295 public:
MaxMeshOutputComponentsCase(tcu::TestContext & testCtx,const std::string & name,const std::string & description)1296 					MaxMeshOutputComponentsCase		(tcu::TestContext& testCtx, const std::string& name, const std::string& description)
1297 						: vkt::TestCase	(testCtx, name, description)
1298 						{}
1299 
~MaxMeshOutputComponentsCase(void)1300 	virtual			~MaxMeshOutputComponentsCase	(void) {}
1301 
1302 	void			initPrograms					(vk::SourceCollections& programCollection) const override;
1303 	TestInstance*	createInstance					(Context& context) const override;
1304 	void			checkSupport					(Context& context) const override;
1305 
1306 protected:
1307 	struct ParamsFromContext
1308 	{
1309 		uint32_t maxLocations;
1310 	};
1311 	ParamsFromContext getParamsFromContext (Context& context) const;
1312 };
1313 
1314 class  MaxMeshOutputComponentsInstance : public SpecConstantInstance
1315 {
1316 public:
MaxMeshOutputComponentsInstance(Context & context,SpecConstVector && scVector)1317 						MaxMeshOutputComponentsInstance		(Context& context, SpecConstVector&& scVector)
1318 							: SpecConstantInstance(context, std::move(scVector))
1319 							{}
1320 
~MaxMeshOutputComponentsInstance(void)1321 	virtual				~MaxMeshOutputComponentsInstance	(void) {}
1322 
1323 	tcu::TestStatus		iterate								(void) override;
1324 };
1325 
getParamsFromContext(Context & context) const1326 MaxMeshOutputComponentsCase::ParamsFromContext MaxMeshOutputComponentsCase::getParamsFromContext (Context& context) const
1327 {
1328 	const uint32_t kLocationComponents	= 4u; // Each location can handle up to 4 32-bit components (and we'll be using uvec4).
1329 	const uint32_t kUsedLocations		= 1u; // For gl_Position.
1330 	const uint32_t maxLocations			= context.getMeshShaderPropertiesEXT().maxMeshOutputComponents / kLocationComponents - kUsedLocations;
1331 
1332 	ParamsFromContext params { maxLocations };
1333 	return params;
1334 }
1335 
checkSupport(Context & context) const1336 void MaxMeshOutputComponentsCase::checkSupport (Context &context) const
1337 {
1338 	checkTaskMeshShaderSupportEXT(context, false/*requireTask*/, true/*requireMesh*/);
1339 }
1340 
createInstance(Context & context) const1341 TestInstance* MaxMeshOutputComponentsCase::createInstance (Context &context) const
1342 {
1343 	const auto		ctxParams		= getParamsFromContext(context);
1344 	SpecConstVector	specConstVec	{ ctxParams.maxLocations };
1345 
1346 	return new MaxMeshOutputComponentsInstance(context, std::move(specConstVec));
1347 }
1348 
initPrograms(vk::SourceCollections & programCollection) const1349 void MaxMeshOutputComponentsCase::initPrograms (vk::SourceCollections &programCollection) const
1350 {
1351 	const auto buildOptions = getMinMeshEXTBuildOptions(programCollection.usedVulkanVersion);
1352 
1353 	const std::string locationStructDecl =
1354 		"layout (constant_id=0) const uint maxLocations = 1u;\n"
1355 		"struct LocationStruct {\n"
1356 		"    uvec4 location_var[maxLocations];\n"
1357 		"};\n"
1358 		;
1359 
1360 	const std::string declOut =
1361 		locationStructDecl +
1362 		"layout (location=0) perprimitiveEXT flat out LocationStruct ls[];\n"
1363 		;
1364 
1365 	const std::string declIn =
1366 		locationStructDecl +
1367 		"layout (location=0) perprimitiveEXT flat in LocationStruct ls;\n"
1368 		;
1369 
1370 	std::ostringstream mesh;
1371 	mesh
1372 		<< "#version 450\n"
1373 		<< "#extension GL_EXT_mesh_shader : enable\n"
1374 		<< "\n"
1375 		<< "layout (local_size_x=1, local_size_y=1, local_size_z=1) in;\n"
1376 		<< "layout (triangles) out;\n"
1377 		<< "layout (max_vertices=3, max_primitives=1) out;\n"
1378 		<< "\n"
1379 		<< "out gl_MeshPerVertexEXT {\n"
1380 		<< "    vec4  gl_Position;\n"
1381 		<< "} gl_MeshVerticesEXT[];\n"
1382 		<< "\n"
1383 		<< declOut
1384 		<< "\n"
1385 		<< "void main (void) {\n"
1386 		<< "    SetMeshOutputsEXT(3u, 1u);\n"
1387 		<< "    gl_MeshVerticesEXT[0].gl_Position = vec4( 0.0, -0.5, 0.0, 1.0);\n"
1388 		<< "    gl_MeshVerticesEXT[1].gl_Position = vec4(-0.5,  0.5, 0.0, 1.0);\n"
1389 		<< "    gl_MeshVerticesEXT[2].gl_Position = vec4( 0.5,  0.5, 0.0, 1.0);\n"
1390 		<< "    gl_PrimitiveTriangleIndicesEXT[0] = uvec3(0u, 1u, 2u);\n"
1391 		<< "\n"
1392 		<< "    for (uint i = 0u; i < maxLocations; ++i) {\n"
1393 		<< "        const uint baseVal = 10000u * (i + 1u);\n"
1394 		<< "        const uvec4 expectedValue = uvec4(baseVal + 1u, baseVal + 2u, baseVal + 3u, baseVal + 4u);\n"
1395 		<< "        ls[0].location_var[i] = expectedValue;\n"
1396 		<< "    }\n"
1397 		<< "}\n"
1398 		;
1399 	programCollection.glslSources.add("mesh") << glu::MeshSource(mesh.str()) << buildOptions;
1400 
1401 	std::ostringstream frag;
1402 	frag
1403 		<< "#version 450\n"
1404 		<< "#extension GL_EXT_mesh_shader : enable\n"
1405 		<< "\n"
1406 		<< "layout (location=0) out vec4 outColor;\n"
1407 		<< "\n"
1408 		<< declIn
1409 		<< "\n"
1410 		<< "void main (void) {\n"
1411 		<< "    bool success = true;\n"
1412 		<< "    for (uint i = 0u; i < maxLocations; ++i) {\n"
1413 		<< "        const uint baseVal = 10000u * (i + 1u);\n"
1414 		<< "        const uvec4 expectedValue = uvec4(baseVal + 1u, baseVal + 2u, baseVal + 3u, baseVal + 4u);\n"
1415 		<< "        success = success && (ls.location_var[i] == expectedValue);\n"
1416 		<< "    }\n"
1417 		<< "    outColor = (success ? vec4(0.0, 0.0, 1.0, 1.0) : vec4(0.0, 0.0, 0.0, 1.0));\n"
1418 		<< "}\n"
1419 		;
1420 	programCollection.glslSources.add("frag") << glu::FragmentSource(frag.str()) << buildOptions;
1421 }
1422 
iterate(void)1423 tcu::TestStatus MaxMeshOutputComponentsInstance::iterate (void)
1424 {
1425 	const auto&			vkd				= m_context.getDeviceInterface();
1426 	const auto			device			= m_context.getDevice();
1427 	auto&				alloc			= m_context.getDefaultAllocator();
1428 	const auto			queueIndex		= m_context.getUniversalQueueFamilyIndex();
1429 	const auto			queue			= m_context.getUniversalQueue();
1430 
1431 	const auto			colorFormat		= VK_FORMAT_R8G8B8A8_UNORM;
1432 	const auto			tcuColorFormat	= mapVkFormat(colorFormat);
1433 	const auto			pixelSize		= static_cast<uint32_t>(tcu::getPixelSize(tcuColorFormat));
1434 	const auto			colorUsage		= (VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
1435 	const auto			fbExtent		= makeExtent3D(1u, 1u, 1u);
1436 	const tcu::IVec3	iExtent3D		(static_cast<int>(fbExtent.width), static_cast<int>(fbExtent.height), static_cast<int>(fbExtent.depth));
1437 	const tcu::Vec4		clearColor		(0.0f, 0.0f, 0.0f, 1.0f);
1438 	const tcu::Vec4		expectedColor	(0.0f, 0.0f, 1.0f, 1.0f);
1439 	const tcu::Vec4		colorThreshold	(0.0f, 0.0f, 0.0f, 0.0f);
1440 
1441 	// Create color attachment.
1442 	const VkImageCreateInfo colorAttachmentCreatInfo =
1443 	{
1444 		VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,	//	VkStructureType			sType;
1445 		nullptr,								//	const void*				pNext;
1446 		0u,										//	VkImageCreateFlags		flags;
1447 		VK_IMAGE_TYPE_2D,						//	VkImageType				imageType;
1448 		colorFormat,							//	VkFormat				format;
1449 		fbExtent,								//	VkExtent3D				extent;
1450 		1u,										//	uint32_t				mipLevels;
1451 		1u,										//	uint32_t				arrayLayers;
1452 		VK_SAMPLE_COUNT_1_BIT,					//	VkSampleCountFlagBits	samples;
1453 		VK_IMAGE_TILING_OPTIMAL,				//	VkImageTiling			tiling;
1454 		colorUsage,								//	VkImageUsageFlags		usage;
1455 		VK_SHARING_MODE_EXCLUSIVE,				//	VkSharingMode			sharingMode;
1456 		0u,										//	uint32_t				queueFamilyIndexCount;
1457 		nullptr,								//	const uint32_t*			pQueueFamilyIndices;
1458 		VK_IMAGE_LAYOUT_UNDEFINED,				//	VkImageLayout			initialLayout;
1459 	};
1460 	ImageWithMemory	colorAttachment		(vkd, device, alloc, colorAttachmentCreatInfo, MemoryRequirement::Any);
1461 	const auto		colorSRR			= makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u);
1462 	const auto		colorSRL			= makeImageSubresourceLayers(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 0u, 1u);
1463 	const auto		colorAttachmentView	= makeImageView(vkd, device, colorAttachment.get(), VK_IMAGE_VIEW_TYPE_2D, colorFormat, colorSRR);
1464 
1465 	// Verification buffer for the color attachment.
1466 	DE_ASSERT(fbExtent.depth == 1u);
1467 	const auto			verificationBufferUsage			= VK_BUFFER_USAGE_TRANSFER_DST_BIT;
1468 	const auto			verificationBufferSize			= static_cast<VkDeviceSize>(pixelSize * fbExtent.width * fbExtent.height * fbExtent.depth);
1469 	const auto			verificationBufferCreateInfo	= makeBufferCreateInfo(verificationBufferSize, verificationBufferUsage);
1470 	BufferWithMemory	verificationBuffer				(vkd, device, alloc, verificationBufferCreateInfo, MemoryRequirement::HostVisible);
1471 	auto&				verificationBufferAlloc			= verificationBuffer.getAllocation();
1472 	void*				verificationBufferData			= verificationBufferAlloc.getHostPtr();
1473 
1474 	deMemset(verificationBufferData, 0, static_cast<size_t>(verificationBufferSize));
1475 
1476 	const auto	pipelineLayout	= makePipelineLayout(vkd, device);
1477 	const auto	renderPass		= makeRenderPass(vkd, device, colorFormat);
1478 	const auto	framebuffer		= makeFramebuffer(vkd, device, renderPass.get(), colorAttachmentView.get(), fbExtent.width, fbExtent.height, 1u);
1479 
1480 	const auto&	binaries	= m_context.getBinaryCollection();
1481 	const auto	meshModule	= createShaderModule(vkd, device, binaries.get("mesh"));
1482 	const auto	fragModule	= createShaderModule(vkd, device, binaries.get("frag"));
1483 
1484 	const std::vector<VkViewport>	viewports	(1u, makeViewport(fbExtent));
1485 	const std::vector<VkRect2D>		scissors	(1u, makeRect2D(fbExtent));
1486 
1487 	const auto					specMap		= makeSpecializationMap();
1488 	const VkSpecializationInfo	specInfo	=
1489 	{
1490 		static_cast<uint32_t>(specMap.size()),	//	uint32_t						mapEntryCount;
1491 		de::dataOrNull(specMap),				//	const VkSpecializationMapEntry*	pMapEntries;
1492 		de::dataSize(m_specConstants),			//	size_t							dataSize;
1493 		de::dataOrNull(m_specConstants),		//	const void*						pData;
1494 	};
1495 
1496 	std::vector<VkPipelineShaderStageCreateInfo>	shaderStages;
1497 	VkPipelineShaderStageCreateInfo					stageInfo		=
1498 	{
1499 		VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,	//	VkStructureType						sType;
1500 		nullptr,												//	const void*							pNext;
1501 		0u,														//	VkPipelineShaderStageCreateFlags	flags;
1502 		VK_SHADER_STAGE_FLAG_BITS_MAX_ENUM,						//	VkShaderStageFlagBits				stage;
1503 		DE_NULL,												//	VkShaderModule						module;
1504 		"main",													//	const char*							pName;
1505 		&specInfo,												//	const VkSpecializationInfo*			pSpecializationInfo;
1506 	};
1507 
1508 	{
1509 		stageInfo.stage = VK_SHADER_STAGE_MESH_BIT_EXT;
1510 		stageInfo.module = meshModule.get();
1511 		shaderStages.push_back(stageInfo);
1512 	}
1513 
1514 	{
1515 		stageInfo.stage = VK_SHADER_STAGE_FRAGMENT_BIT;
1516 		stageInfo.module = fragModule.get();
1517 		shaderStages.push_back(stageInfo);
1518 	}
1519 
1520 	const auto pipeline = makeGraphicsPipeline(vkd, device,
1521 		DE_NULL, pipelineLayout.get(), 0u,
1522 		shaderStages, renderPass.get(), viewports, scissors);
1523 
1524 	const auto cmdPool		= makeCommandPool(vkd, device, queueIndex);
1525 	const auto cmdBufferPtr	= allocateCommandBuffer(vkd, device, cmdPool.get(), VK_COMMAND_BUFFER_LEVEL_PRIMARY);
1526 	const auto cmdBuffer	= cmdBufferPtr.get();
1527 
1528 	beginCommandBuffer(vkd, cmdBuffer);
1529 	beginRenderPass(vkd, cmdBuffer, renderPass.get(), framebuffer.get(), scissors.at(0u), clearColor);
1530 	vkd.cmdBindPipeline(cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline.get());
1531 	vkd.cmdDrawMeshTasksEXT(cmdBuffer, 1u, 1u, 1u);
1532 	endRenderPass(vkd, cmdBuffer);
1533 
1534 	const auto preTransferBarrier = makeImageMemoryBarrier(
1535 		VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT,
1536 		VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
1537 		colorAttachment.get(), colorSRR);
1538 	cmdPipelineImageMemoryBarrier(vkd, cmdBuffer, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, &preTransferBarrier);
1539 
1540 	const auto copyRegion = makeBufferImageCopy(fbExtent, colorSRL);
1541 	vkd.cmdCopyImageToBuffer(cmdBuffer, colorAttachment.get(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, verificationBuffer.get(), 1u, &copyRegion);
1542 
1543 	const auto postTransferBarrier = makeMemoryBarrier(VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT);
1544 	cmdPipelineMemoryBarrier(vkd, cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT, &postTransferBarrier);
1545 
1546 	endCommandBuffer(vkd, cmdBuffer);
1547 	submitCommandsAndWait(vkd, device, queue, cmdBuffer);
1548 
1549 	invalidateAlloc(vkd, device, verificationBufferAlloc);
1550 	tcu::ConstPixelBufferAccess resultAccess (tcuColorFormat, iExtent3D, verificationBufferData);
1551 
1552 	auto& log = m_context.getTestContext().getLog();
1553 	log << tcu::TestLog::Message << "maxLocations value: " << m_specConstants.at(0u) << tcu::TestLog::EndMessage;
1554 	if (!tcu::floatThresholdCompare(log, "Result", "", expectedColor, resultAccess, colorThreshold, tcu::COMPARE_LOG_ON_ERROR))
1555 		TCU_FAIL("Check log for details");
1556 
1557 	return tcu::TestStatus::pass("Pass");
1558 }
1559 
1560 class MeshPayloadShMemSizeCase : public vkt::TestCase
1561 {
1562 public:
MeshPayloadShMemSizeCase(tcu::TestContext & testCtx,const std::string & name,const std::string & description,const MeshPayloadShMemSizeParams & params)1563 					MeshPayloadShMemSizeCase	(tcu::TestContext& testCtx, const std::string& name, const std::string& description, const MeshPayloadShMemSizeParams& params)
1564 						: vkt::TestCase				(testCtx, name, description)
1565 						, m_params					(params)
1566 						{}
~MeshPayloadShMemSizeCase(void)1567 	virtual			~MeshPayloadShMemSizeCase	(void) {}
1568 
1569 	void			checkSupport				(Context& context) const override;
1570 	void			initPrograms				(vk::SourceCollections& programCollection) const override;
1571 	TestInstance*	createInstance				(Context& context) const override;
1572 
1573 protected:
1574 	struct ParamsFromContext
1575 	{
1576 		uint32_t payloadElements;
1577 		uint32_t sharedMemoryElements;
1578 	};
1579 	ParamsFromContext getParamsFromContext		(Context& context) const;
1580 
1581 	const MeshPayloadShMemSizeParams			m_params;
1582 
1583 	static constexpr uint32_t kElementSize		= static_cast<uint32_t>(sizeof(uint32_t));
1584 	static constexpr uint32_t kLocalInvocations	= 128u;
1585 };
1586 
checkSupport(Context & context) const1587 void MeshPayloadShMemSizeCase::checkSupport (Context& context) const
1588 {
1589 	const bool requireTask = m_params.hasPayload();
1590 
1591 	checkTaskMeshShaderSupportEXT(context, requireTask, true/*requireMesh*/);
1592 	context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_VERTEX_PIPELINE_STORES_AND_ATOMICS);
1593 
1594 	const auto&	meshProperties	= context.getMeshShaderPropertiesEXT();
1595 	const auto	minSize			= kLocalInvocations * kElementSize;
1596 
1597 	// Note: the min required values for these properties in the spec would pass these checks.
1598 
1599 	if (requireTask)
1600 	{
1601 		if (meshProperties.maxTaskPayloadSize < minSize)
1602 			TCU_FAIL("Invalid maxTaskPayloadSize");
1603 
1604 		if (meshProperties.maxTaskPayloadAndSharedMemorySize < minSize)
1605 			TCU_FAIL("Invalid maxTaskPayloadAndSharedMemorySize");
1606 	}
1607 
1608 	if (meshProperties.maxMeshSharedMemorySize < minSize)
1609 		TCU_FAIL("Invalid maxMeshSharedMemorySize");
1610 
1611 	if (meshProperties.maxMeshPayloadAndSharedMemorySize < minSize)
1612 		TCU_FAIL("Invalid maxMeshPayloadAndSharedMemorySize");
1613 
1614 	if (meshProperties.maxMeshPayloadAndOutputMemorySize < minSize)
1615 		TCU_FAIL("Invalid maxMeshPayloadAndOutputMemorySize");
1616 }
1617 
getParamsFromContext(Context & context) const1618 MeshPayloadShMemSizeCase::ParamsFromContext MeshPayloadShMemSizeCase::getParamsFromContext (Context& context) const
1619 {
1620 	ParamsFromContext params;
1621 
1622 	const auto&	meshProperties		= context.getMeshShaderPropertiesEXT();
1623 	const auto	maxTaskPayloadSize	= std::min(meshProperties.maxTaskPayloadAndSharedMemorySize, meshProperties.maxTaskPayloadSize);
1624 	const auto	maxMeshPayloadSize	= std::min(meshProperties.maxMeshPayloadAndOutputMemorySize, meshProperties.maxMeshPayloadAndSharedMemorySize);
1625 	const auto	maxPayloadElements	= std::min(maxTaskPayloadSize, maxMeshPayloadSize) / kElementSize;
1626 	const auto	maxShMemElements	= meshProperties.maxMeshSharedMemorySize / kElementSize;
1627 	const auto	maxTotalElements	= meshProperties.maxTaskPayloadAndSharedMemorySize / kElementSize;
1628 
1629 	if (m_params.testType == PayLoadShMemSizeType::PAYLOAD)
1630 	{
1631 		params.sharedMemoryElements	= 0u;
1632 		params.payloadElements		= std::min(maxTotalElements, maxPayloadElements);
1633 	}
1634 	else if (m_params.testType == PayLoadShMemSizeType::SHARED_MEMORY)
1635 	{
1636 		params.payloadElements		= 0u;
1637 		params.sharedMemoryElements	= std::min(maxTotalElements, maxShMemElements);
1638 	}
1639 	else
1640 	{
1641 		uint32_t*	minPtr;
1642 		uint32_t	minVal;
1643 		uint32_t*	maxPtr;
1644 		uint32_t	maxVal;
1645 
1646 		// Divide them as evenly as possible getting them as closest as possible to maxTotalElements.
1647 		if (maxPayloadElements < maxShMemElements)
1648 		{
1649 			minPtr = &params.payloadElements;
1650 			minVal = maxPayloadElements;
1651 
1652 			maxPtr = &params.sharedMemoryElements;
1653 			maxVal = maxShMemElements;
1654 		}
1655 		else
1656 		{
1657 			minPtr = &params.sharedMemoryElements;
1658 			minVal = maxShMemElements;
1659 
1660 			maxPtr = &params.payloadElements;
1661 			maxVal = maxPayloadElements;
1662 		}
1663 
1664 		*minPtr = std::min(minVal, maxTotalElements / 2u);
1665 		*maxPtr = std::min(maxTotalElements - (*minPtr), maxVal);
1666 	}
1667 
1668 	return params;
1669 }
1670 
createInstance(Context & context) const1671 TestInstance* MeshPayloadShMemSizeCase::createInstance (Context &context) const
1672 {
1673 	const auto		ctxParams	= getParamsFromContext(context);
1674 	SpecConstVector	vec			{ ctxParams.payloadElements, ctxParams.sharedMemoryElements };
1675 
1676 	return new PayloadShMemSizeInstance(context, m_params, std::move(vec));
1677 }
1678 
initPrograms(vk::SourceCollections & programCollection) const1679 void MeshPayloadShMemSizeCase::initPrograms (vk::SourceCollections& programCollection) const
1680 {
1681 	const auto buildOptions = getMinMeshEXTBuildOptions(programCollection.usedVulkanVersion);
1682 
1683 	const std::string scDecl =
1684 		"layout (constant_id=0) const uint payloadElements = 1u;\n"
1685 		"layout (constant_id=1) const uint sharedMemoryElements = 1u;\n"
1686 		;
1687 
1688 	const std::string dsDecl =
1689 		"layout (set=0, binding=0, std430) buffer ResultBlock {\n"
1690 		"    uint sharedOK;\n"
1691 		"    uint payloadOK;\n"
1692 		"} result;\n"
1693 		;
1694 
1695 	std::string taskData;
1696 	std::string taskPayloadBody;
1697 	std::string meshPayloadBody;
1698 
1699 	if (m_params.hasPayload())
1700 	{
1701 		std::ostringstream taskDataStream;
1702 		taskDataStream
1703 			<< "struct TaskData {\n"
1704 			<< "    uint elements[payloadElements];\n"
1705 			<< "};\n"
1706 			<< "taskPayloadSharedEXT TaskData td;\n"
1707 			;
1708 		taskData = taskDataStream.str();
1709 
1710 		std::ostringstream taskBodyStream;
1711 		taskBodyStream
1712 			<< "    const uint payloadElementsPerInvocation = uint(ceil(float(payloadElements) / float(" << kLocalInvocations << ")));\n"
1713 			<< "    for (uint i = 0u; i < payloadElementsPerInvocation; ++i) {\n"
1714 			<< "        const uint elemIdx = payloadElementsPerInvocation * gl_LocalInvocationIndex + i;\n"
1715 			<< "        if (elemIdx < payloadElements) {\n"
1716 			<< "            td.elements[elemIdx] = elemIdx + 2000u;\n"
1717 			<< "        }\n"
1718 			<< "    }\n"
1719 			<< "\n"
1720 			;
1721 		taskPayloadBody = taskBodyStream.str();
1722 
1723 		std::ostringstream meshBodyStream;
1724 		meshBodyStream
1725 			<< "    if (gl_LocalInvocationIndex == 0u) {\n"
1726 			<< "        bool allOK = true;\n"
1727 			<< "        for (uint i = 0u; i < payloadElements; ++i) {\n"
1728 			<< "            if (td.elements[i] != i + 2000u) {\n"
1729 			<< "                allOK = false;\n"
1730 			<< "                break;\n"
1731 			<< "            }\n"
1732 			<< "        }\n"
1733 			<< "        result.payloadOK = (allOK ? 1u : 0u);\n"
1734 			<< "    }\n"
1735 			<< "\n"
1736 			;
1737 		meshPayloadBody = meshBodyStream.str();
1738 	}
1739 	else
1740 	{
1741 		meshPayloadBody = "    result.payloadOK = 1u;\n";
1742 	}
1743 
1744 	std::string sharedData;
1745 	std::string meshSharedDataBody;
1746 
1747 	if (m_params.hasSharedMemory())
1748 	{
1749 		sharedData = "shared uint sharedElements[sharedMemoryElements];\n";
1750 
1751 		std::ostringstream bodyStream;
1752 		bodyStream
1753 			<< "    const uint shMemElementsPerInvocation = uint(ceil(float(sharedMemoryElements) / float(" << kLocalInvocations << ")));\n"
1754 			<< "    for (uint i = 0u; i < shMemElementsPerInvocation; ++i) {\n"
1755 			<< "        const uint elemIdx = shMemElementsPerInvocation * gl_LocalInvocationIndex + i;\n"
1756 			<< "        if (elemIdx < sharedMemoryElements) {\n"
1757 			<< "            sharedElements[elemIdx] = elemIdx * 2u + 1000u;\n" // Write
1758 			<< "        }\n"
1759 			<< "    }\n"
1760 			<< "    memoryBarrierShared();\n"
1761 			<< "    barrier();\n"
1762 			<< "    for (uint i = 0u; i < shMemElementsPerInvocation; ++i) {\n"
1763 			<< "        const uint elemIdx = shMemElementsPerInvocation * gl_LocalInvocationIndex + i;\n"
1764 			<< "        if (elemIdx < sharedMemoryElements) {\n"
1765 			<< "            const uint accessIdx = sharedMemoryElements - 1u - elemIdx;\n"
1766 			<< "            sharedElements[accessIdx] += accessIdx;\n" // Read+Write a different element.
1767 			<< "        }\n"
1768 			<< "    }\n"
1769 			<< "    memoryBarrierShared();\n"
1770 			<< "    barrier();\n"
1771 			<< "    if (gl_LocalInvocationIndex == 0u) {\n"
1772 			<< "        bool allOK = true;\n"
1773 			<< "        for (uint i = 0u; i < sharedMemoryElements; ++i) {\n"
1774 			<< "            if (sharedElements[i] != i*3u + 1000u) {\n"
1775 			<< "                allOK = false;\n"
1776 			<< "                break;\n"
1777 			<< "            }\n"
1778 			<< "        }\n"
1779 			<< "        result.sharedOK = (allOK ? 1u : 0u);\n"
1780 			<< "    }\n"
1781 			<< "\n"
1782 			;
1783 		meshSharedDataBody = bodyStream.str();
1784 	}
1785 	else
1786 	{
1787 		meshSharedDataBody =
1788 			"    if (gl_LocalInvocationIndex == 0u) {\n"
1789 			"        result.sharedOK = 1u;\n"
1790 			"    }\n"
1791 			;
1792 	}
1793 
1794 	if (m_params.hasPayload())
1795 	{
1796 		std::ostringstream task;
1797 		task
1798 			<< "#version 450\n"
1799 			<< "#extension GL_EXT_mesh_shader : enable\n"
1800 			<< "\n"
1801 			<< "layout (local_size_x=" << kLocalInvocations << ", local_size_y=1, local_size_z=1) in;\n"
1802 			<< scDecl
1803 			<< dsDecl
1804 			<< taskData
1805 			<< "\n"
1806 			<< "void main () {\n"
1807 			<< taskPayloadBody
1808 			<< "    EmitMeshTasksEXT(1u, 1u, 1u);\n"
1809 			<< "}\n"
1810 			;
1811 		programCollection.glslSources.add("task") << glu::TaskSource(task.str()) << buildOptions;
1812 	}
1813 
1814 	std::ostringstream mesh;
1815 	mesh
1816 		<< "#version 450\n"
1817 		<< "#extension GL_EXT_mesh_shader : enable\n"
1818 		<< "\n"
1819 		<< "layout (local_size_x=" << kLocalInvocations << ", local_size_y=1, local_size_z=1) in;\n"
1820 		<< "layout (triangles) out;\n"
1821 		<< "layout (max_vertices=3, max_primitives=1) out;\n"
1822 		<< scDecl
1823 		<< dsDecl
1824 		<< taskData
1825 		<< sharedData
1826 		<< "\n"
1827 		<< "void main () {\n"
1828 		<< meshSharedDataBody
1829 		<< meshPayloadBody
1830 		<< "    SetMeshOutputsEXT(0u, 0u);\n"
1831 		<< "}\n"
1832 		;
1833 	programCollection.glslSources.add("mesh") << glu::MeshSource(mesh.str()) << buildOptions;
1834 }
1835 
1836 enum class LocationType
1837 {
1838 	PER_VERTEX,
1839 	PER_PRIMITIVE,
1840 };
1841 
1842 enum class ViewIndexType
1843 {
1844 	NO_VIEW_INDEX,
1845 	VIEW_INDEX_FRAG,
1846 	VIEW_INDEX_BOTH,
1847 };
1848 
1849 struct MaxMeshOutputParams
1850 {
1851 	bool			usePayload;
1852 	LocationType	locationType;
1853 	ViewIndexType	viewIndexType;
1854 
isMultiViewvkt::MeshShader::__anon7cffbaf10111::MaxMeshOutputParams1855 	bool isMultiView (void) const
1856 	{
1857 		return (viewIndexType != ViewIndexType::NO_VIEW_INDEX);
1858 	}
1859 
viewIndexInMeshvkt::MeshShader::__anon7cffbaf10111::MaxMeshOutputParams1860 	bool viewIndexInMesh (void) const
1861 	{
1862 		return (viewIndexType == ViewIndexType::VIEW_INDEX_BOTH);
1863 	}
1864 };
1865 
1866 class MaxMeshOutputSizeCase : public vkt::TestCase
1867 {
1868 public:
MaxMeshOutputSizeCase(tcu::TestContext & testCtx,const std::string & name,const std::string & description,const MaxMeshOutputParams & params)1869 					MaxMeshOutputSizeCase	(tcu::TestContext& testCtx, const std::string& name, const std::string& description, const MaxMeshOutputParams& params)
1870 						: vkt::TestCase	(testCtx, name, description)
1871 						, m_params		(params)
1872 						{}
~MaxMeshOutputSizeCase(void)1873 	virtual			~MaxMeshOutputSizeCase	(void) {}
1874 
1875 	TestInstance*	createInstance			(Context& context) const override;
1876 	void			checkSupport			(Context& context) const override;
1877 	void			initPrograms			(vk::SourceCollections& programCollection) const override;
1878 
1879 	// Small-ish numbers allow for more fine-grained control in the amount of memory, but it can't be too small or we hit the locations limit.
1880 	static constexpr uint32_t				kMaxPoints			= 96u;
1881 	static constexpr uint32_t				kNumViews			= 2u;	// For the multiView case.
1882 
1883 protected:
1884 	static constexpr uint32_t				kUvec4Size			= 16u;	// We'll use 4 scalars at a time in the form of a uvec4.
1885 	static constexpr uint32_t				kUvec4Comp			= 4u;	// 4 components per uvec4.
1886 	static constexpr uint32_t				kPayloadElementSize	= 4u;	// Each payload element will be a uint.
1887 
1888 	struct ParamsFromContext
1889 	{
1890 		uint32_t payloadElements;
1891 		uint32_t locationCount;
1892 	};
1893 	ParamsFromContext getParamsFromContext	(Context& context) const;
1894 
1895 	const MaxMeshOutputParams				m_params;
1896 };
1897 
1898 class MaxMeshOutputSizeInstance : public SpecConstantInstance
1899 {
1900 public:
MaxMeshOutputSizeInstance(Context & context,SpecConstVector && vec,uint32_t numViews)1901 						MaxMeshOutputSizeInstance	(Context& context, SpecConstVector&& vec, uint32_t numViews)
1902 							: SpecConstantInstance	(context, std::move(vec))
1903 							, m_numViews			(numViews)
1904 							{}
~MaxMeshOutputSizeInstance(void)1905 	virtual				~MaxMeshOutputSizeInstance	(void) {}
1906 
1907 	tcu::TestStatus		iterate						(void) override;
1908 
1909 protected:
1910 	Move<VkRenderPass>	makeCustomRenderPass		(const DeviceInterface& vkd, VkDevice device, uint32_t layerCount, VkFormat format);
1911 
1912 	const uint32_t		m_numViews;
1913 };
1914 
checkSupport(Context & context) const1915 void MaxMeshOutputSizeCase::checkSupport (Context &context) const
1916 {
1917 	checkTaskMeshShaderSupportEXT(context, m_params.usePayload/*requireTask*/, true/*requireMesh*/);
1918 
1919 	if (m_params.isMultiView())
1920 	{
1921 		const auto& multiviewFeatures = context.getMultiviewFeatures();
1922 		if (!multiviewFeatures.multiview)
1923 			TCU_THROW(NotSupportedError, "Multiview not supported");
1924 
1925 		const auto& meshFeatures = context.getMeshShaderFeaturesEXT();
1926 		if (!meshFeatures.multiviewMeshShader)
1927 			TCU_THROW(NotSupportedError, "Multiview not supported for mesh shaders");
1928 
1929 		const auto& meshProperties = context.getMeshShaderPropertiesEXT();
1930 		if (meshProperties.maxMeshMultiviewViewCount < kNumViews)
1931 			TCU_THROW(NotSupportedError, "maxMeshMultiviewViewCount too low");
1932 	}
1933 }
1934 
getParamsFromContext(Context & context) const1935 MaxMeshOutputSizeCase::ParamsFromContext MaxMeshOutputSizeCase::getParamsFromContext (Context& context) const
1936 {
1937 	const auto&	meshProperties		= context.getMeshShaderPropertiesEXT();
1938 	const auto	maxOutSize			= std::min(meshProperties.maxMeshOutputMemorySize, meshProperties.maxMeshPayloadAndOutputMemorySize);
1939 	const auto	maxMeshPayloadSize	= std::min(meshProperties.maxMeshPayloadAndSharedMemorySize, meshProperties.maxMeshPayloadAndOutputMemorySize);
1940 	const auto	maxTaskPayloadSize	= std::min(meshProperties.maxTaskPayloadSize, meshProperties.maxTaskPayloadAndSharedMemorySize);
1941 	const auto	maxPayloadSize		= std::min(maxMeshPayloadSize, maxTaskPayloadSize);
1942 	const auto	numViewFactor		= (m_params.viewIndexInMesh() ? kNumViews : 1u);
1943 
1944 	uint32_t payloadSize;
1945 	uint32_t outSize;
1946 
1947 	if (m_params.usePayload)
1948 	{
1949 		const auto totalMax = maxOutSize + maxPayloadSize;
1950 
1951 		if (totalMax <= meshProperties.maxMeshPayloadAndOutputMemorySize)
1952 		{
1953 			payloadSize	= maxPayloadSize;
1954 			outSize		= maxOutSize;
1955 		}
1956 		else
1957 		{
1958 			payloadSize	= maxPayloadSize;
1959 			outSize		= meshProperties.maxMeshPayloadAndOutputMemorySize - payloadSize;
1960 		}
1961 	}
1962 	else
1963 	{
1964 		payloadSize	= 0u;
1965 		outSize		= maxOutSize;
1966 	}
1967 
1968 	// This uses the equation in "Mesh Shader Output" spec section. Note per-vertex data already has gl_Position and gl_PointSize.
1969 	// Also note gl_PointSize uses 1 effective location (4 scalar components) despite being a float.
1970 	const auto granularity			= ((m_params.locationType == LocationType::PER_PRIMITIVE)
1971 									? meshProperties.meshOutputPerPrimitiveGranularity
1972 									: meshProperties.meshOutputPerVertexGranularity);
1973 	const auto actualPoints			= de::roundUp(kMaxPoints, granularity);
1974 	const auto sizeMultiplier		= actualPoints * kUvec4Size;
1975 	const auto builtinDataSize		= (16u/*gl_Position*/ + 16u/*gl_PointSize*/) * actualPoints;
1976 	const auto locationsDataSize	= (outSize - builtinDataSize) / numViewFactor;
1977 	const auto maxTotalLocations	= meshProperties.maxMeshOutputComponents / kUvec4Comp - 2u; // gl_Position and gl_PointSize use 1 location each.
1978 	const auto locationCount		= std::min(locationsDataSize / sizeMultiplier, maxTotalLocations);
1979 
1980 	ParamsFromContext params;
1981 	params.payloadElements	= payloadSize / kPayloadElementSize;
1982 	params.locationCount	= locationCount;
1983 
1984 	auto& log = context.getTestContext().getLog();
1985 	{
1986 		const auto actualOuputSize = builtinDataSize + locationCount * sizeMultiplier * numViewFactor;
1987 
1988 		log << tcu::TestLog::Message << "Payload elements: " << params.payloadElements << tcu::TestLog::EndMessage;
1989 		log << tcu::TestLog::Message << "Location count: " << params.locationCount << tcu::TestLog::EndMessage;
1990 		log << tcu::TestLog::Message << "Max mesh payload and output size (bytes): " << meshProperties.maxMeshPayloadAndOutputMemorySize << tcu::TestLog::EndMessage;
1991 		log << tcu::TestLog::Message << "Max output size (bytes): " << maxOutSize << tcu::TestLog::EndMessage;
1992 		log << tcu::TestLog::Message << "Payload size (bytes): " << payloadSize << tcu::TestLog::EndMessage;
1993 		log << tcu::TestLog::Message << "Output data size (bytes): " << actualOuputSize << tcu::TestLog::EndMessage;
1994 		log << tcu::TestLog::Message << "Output + payload size (bytes): " << (payloadSize + actualOuputSize) << tcu::TestLog::EndMessage;
1995 	}
1996 
1997 	return params;
1998 }
1999 
createInstance(Context & context) const2000 TestInstance* MaxMeshOutputSizeCase::createInstance (Context &context) const
2001 {
2002 	const auto		ctxParams		= getParamsFromContext(context);
2003 	SpecConstVector	specConstVec	{ ctxParams.payloadElements, ctxParams.locationCount };
2004 	const auto		numViews		= (m_params.isMultiView() ? kNumViews : 1u);
2005 
2006 	return new MaxMeshOutputSizeInstance(context, std::move(specConstVec), numViews);
2007 }
2008 
initPrograms(vk::SourceCollections & programCollection) const2009 void MaxMeshOutputSizeCase::initPrograms (vk::SourceCollections& programCollection) const
2010 {
2011 	const auto			buildOptions		= getMinMeshEXTBuildOptions(programCollection.usedVulkanVersion);
2012 	const std::string	locationQualifier	= ((m_params.locationType == LocationType::PER_PRIMITIVE) ? "perprimitiveEXT" : "");
2013 	const std::string	multiViewExtDecl	= "#extension GL_EXT_multiview : enable\n";
2014 
2015 	const std::string scDecl =
2016 		"layout (constant_id=0) const uint payloadElements = 1u;\n"
2017 		"layout (constant_id=1) const uint locationCount = 1u;\n"
2018 		;
2019 
2020 	std::string taskPayload;
2021 	std::string payloadVerification	= "    bool payloadOK = true;\n";
2022 	std::string locStruct			=
2023 		"struct LocationBlock {\n"
2024 		"    uvec4 elements[locationCount];\n"
2025 		"};\n"
2026 		;
2027 
2028 	if (m_params.usePayload)
2029 	{
2030 		taskPayload =
2031 			"struct TaskData {\n"
2032 			"    uint elements[payloadElements];\n"
2033 			"};\n"
2034 			"taskPayloadSharedEXT TaskData td;\n"
2035 			;
2036 
2037 		std::ostringstream task;
2038 		task
2039 			<< "#version 450\n"
2040 			<< "#extension GL_EXT_mesh_shader : enable\n"
2041 			<< "\n"
2042 			<< "layout (local_size_x=1, local_size_y=1, local_size_z=1) in;\n"
2043 			<< scDecl
2044 			<< taskPayload
2045 			<< "\n"
2046 			<< "void main (void) {\n"
2047 			<< "    for (uint i = 0; i < payloadElements; ++i) {\n"
2048 			<< "        td.elements[i] = 1000000u + i;\n"
2049 			<< "    }\n"
2050 			<< "    EmitMeshTasksEXT(1u, 1u, 1u);\n"
2051 			<< "}\n"
2052 			;
2053 		programCollection.glslSources.add("task") << glu::TaskSource(task.str()) << buildOptions;
2054 
2055 		payloadVerification +=
2056 			"    for (uint i = 0; i < payloadElements; ++i) {\n"
2057 			"        if (td.elements[i] != 1000000u + i) {\n"
2058 			"            payloadOK = false;\n"
2059 			"            break;\n"
2060 			"        }\n"
2061 			"    }\n"
2062 			;
2063 	}
2064 
2065 	// Do values depend on view indices?
2066 	const bool			valFromViewIndex	= m_params.viewIndexInMesh();
2067 	const std::string	extraCompOffset		= (valFromViewIndex ? "(4u * uint(gl_ViewIndex))" : "0u");
2068 
2069 	{
2070 		const std::string multiViewExt = (valFromViewIndex ? multiViewExtDecl : "");
2071 
2072 		std::ostringstream mesh;
2073 		mesh
2074 			<< "#version 450\n"
2075 			<< "#extension GL_EXT_mesh_shader : enable\n"
2076 			<< multiViewExt
2077 			<< "\n"
2078 			<< "layout (local_size_x=1, local_size_y=1, local_size_z=1) in;\n"
2079 			<< "layout (points) out;\n"
2080 			<< "layout (max_vertices=" << kMaxPoints << ", max_primitives=" << kMaxPoints << ") out;\n"
2081 			<< "\n"
2082 			<< "out gl_MeshPerVertexEXT {\n"
2083 			<< "    vec4  gl_Position;\n"
2084 			<< "    float gl_PointSize;\n"
2085 			<< "} gl_MeshVerticesEXT[];\n"
2086 			<< "\n"
2087 			<< scDecl
2088 			<< taskPayload
2089 			<< "\n"
2090 			<< locStruct
2091 			<< "layout (location=0) out " << locationQualifier << " LocationBlock loc[];\n"
2092 			<< "\n"
2093 			<< "void main (void) {\n"
2094 			<< payloadVerification
2095 			<< "\n"
2096 			<< "    SetMeshOutputsEXT(" << kMaxPoints << ", " << kMaxPoints << ");\n"
2097 			<< "    const uint payloadOffset = (payloadOK ? 10u : 0u);\n"
2098 			<< "    const uint compOffset = " << extraCompOffset << ";\n"
2099 			<< "    for (uint pointIdx = 0u; pointIdx < " << kMaxPoints << "; ++pointIdx) {\n"
2100 			<< "        const float xCoord = ((float(pointIdx) + 0.5) / float(" << kMaxPoints << ")) * 2.0 - 1.0;\n"
2101 			<< "        gl_MeshVerticesEXT[pointIdx].gl_Position = vec4(xCoord, 0.0, 0.0, 1.0);\n"
2102 			<< "        gl_MeshVerticesEXT[pointIdx].gl_PointSize = 1.0f;\n"
2103 			<< "        gl_PrimitivePointIndicesEXT[pointIdx] = pointIdx;\n"
2104 			<< "        for (uint elemIdx = 0; elemIdx < locationCount; ++elemIdx) {\n"
2105 			<< "            const uint baseVal = 200000000u + 100000u * pointIdx + 1000u * elemIdx + payloadOffset;\n"
2106 			<< "            loc[pointIdx].elements[elemIdx] = uvec4(baseVal + 1u + compOffset, baseVal + 2u + compOffset, baseVal + 3u + compOffset, baseVal + 4u + compOffset);\n"
2107 			<< "        }\n"
2108 			<< "    }\n"
2109 			<< "}\n"
2110 			;
2111 		programCollection.glslSources.add("mesh") << glu::MeshSource(mesh.str()) << buildOptions;
2112 	}
2113 
2114 	{
2115 		const std::string multiViewExt	= (m_params.isMultiView() ? multiViewExtDecl							: "");
2116 		const std::string outColorMod	= (m_params.isMultiView() ? "    outColor.r += float(gl_ViewIndex);\n"	: "");
2117 
2118 		std::ostringstream frag;
2119 		frag
2120 			<< "#version 450\n"
2121 			<< "#extension GL_EXT_mesh_shader : enable\n"
2122 			<< multiViewExt
2123 			<< "\n"
2124 			<< "layout (location=0) out vec4 outColor;\n"
2125 			<< scDecl
2126 			<< locStruct
2127 			<< "layout (location=0) in flat " << locationQualifier << " LocationBlock loc;\n"
2128 			<< "\n"
2129 			<< "void main (void) {\n"
2130 			<< "    bool pointOK = true;\n"
2131 			<< "    const uint pointIdx = uint(gl_FragCoord.x);\n"
2132 			<< "    const uint expectedPayloadOffset = 10u;\n"
2133 			<< "    const uint compOffset = " << extraCompOffset << ";\n"
2134 			<< "    for (uint elemIdx = 0; elemIdx < locationCount; ++elemIdx) {\n"
2135 			<< "        const uint baseVal = 200000000u + 100000u * pointIdx + 1000u * elemIdx + expectedPayloadOffset;\n"
2136 			<< "        const uvec4 expectedVal = uvec4(baseVal + 1u + compOffset, baseVal + 2u + compOffset, baseVal + 3u + compOffset, baseVal + 4u + compOffset);\n"
2137 			<< "        if (loc.elements[elemIdx] != expectedVal) {\n"
2138 			<< "            pointOK = false;\n"
2139 			<< "            break;\n"
2140 			<< "        }\n"
2141 			<< "    }\n"
2142 			<< "    const vec4 okColor = vec4(0.0, 0.0, 1.0, 1.0);\n"
2143 			<< "    const vec4 failColor = vec4(0.0, 0.0, 0.0, 1.0);\n"
2144 			<< "    outColor = (pointOK ? okColor : failColor);\n"
2145 			<< outColorMod
2146 			<< "}\n"
2147 			;
2148 		programCollection.glslSources.add("frag") << glu::FragmentSource(frag.str()) << buildOptions;
2149 	}
2150 }
2151 
makeCustomRenderPass(const DeviceInterface & vkd,VkDevice device,uint32_t layerCount,VkFormat format)2152 Move<VkRenderPass> MaxMeshOutputSizeInstance::makeCustomRenderPass (const DeviceInterface& vkd, VkDevice device, uint32_t layerCount, VkFormat format)
2153 {
2154 	DE_ASSERT(layerCount > 0u);
2155 
2156 	const VkAttachmentDescription colorAttachmentDescription =
2157 	{
2158 		0u,											// VkAttachmentDescriptionFlags    flags
2159 		format,										// VkFormat                        format
2160 		VK_SAMPLE_COUNT_1_BIT,						// VkSampleCountFlagBits           samples
2161 		VK_ATTACHMENT_LOAD_OP_CLEAR,				// VkAttachmentLoadOp              loadOp
2162 		VK_ATTACHMENT_STORE_OP_STORE,				// VkAttachmentStoreOp             storeOp
2163 		VK_ATTACHMENT_LOAD_OP_DONT_CARE,			// VkAttachmentLoadOp              stencilLoadOp
2164 		VK_ATTACHMENT_STORE_OP_DONT_CARE,			// VkAttachmentStoreOp             stencilStoreOp
2165 		VK_IMAGE_LAYOUT_UNDEFINED,					// VkImageLayout                   initialLayout
2166 		VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,	// VkImageLayout                   finalLayout
2167 	};
2168 
2169 	const VkAttachmentReference colorAttachmentRef = makeAttachmentReference(0u, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL);
2170 
2171 	const VkSubpassDescription subpassDescription =
2172 	{
2173 		0u,									// VkSubpassDescriptionFlags       flags
2174 		VK_PIPELINE_BIND_POINT_GRAPHICS,	// VkPipelineBindPoint             pipelineBindPoint
2175 		0u,									// deUint32                        inputAttachmentCount
2176 		nullptr,							// const VkAttachmentReference*    pInputAttachments
2177 		1u,									// deUint32                        colorAttachmentCount
2178 		&colorAttachmentRef,				// const VkAttachmentReference*    pColorAttachments
2179 		nullptr,							// const VkAttachmentReference*    pResolveAttachments
2180 		nullptr,							// const VkAttachmentReference*    pDepthStencilAttachment
2181 		0u,									// deUint32                        preserveAttachmentCount
2182 		nullptr								// const deUint32*                 pPreserveAttachments
2183 	};
2184 
2185 	const uint32_t viewMask = ((1u << layerCount) - 1u);
2186 	const VkRenderPassMultiviewCreateInfo multiviewCreateInfo =
2187 	{
2188 		VK_STRUCTURE_TYPE_RENDER_PASS_MULTIVIEW_CREATE_INFO,	//	VkStructureType	sType;
2189 		nullptr,												//	const void*		pNext;
2190 		1u,														//	uint32_t		subpassCount;
2191 		&viewMask,												//	const uint32_t*	pViewMasks;
2192 		0u,														//	uint32_t		dependencyCount;
2193 		nullptr,												//	const int32_t*	pViewOffsets;
2194 		1u,														//	uint32_t		correlationMaskCount;
2195 		&viewMask,												//	const uint32_t*	pCorrelationMasks;
2196 	};
2197 
2198 	const void* pNext = ((layerCount > 1u) ? &multiviewCreateInfo : nullptr);
2199 
2200 	const VkRenderPassCreateInfo renderPassInfo =
2201 	{
2202 		VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,				// VkStructureType                   sType
2203 		pNext,													// const void*                       pNext
2204 		0u,														// VkRenderPassCreateFlags           flags
2205 		1u,														// deUint32                          attachmentCount
2206 		&colorAttachmentDescription,							// const VkAttachmentDescription*    pAttachments
2207 		1u,														// deUint32                          subpassCount
2208 		&subpassDescription,									// const VkSubpassDescription*       pSubpasses
2209 		0u,														// deUint32                          dependencyCount
2210 		nullptr,												// const VkSubpassDependency*        pDependencies
2211 	};
2212 
2213 	return createRenderPass(vkd, device, &renderPassInfo);
2214 }
2215 
iterate(void)2216 tcu::TestStatus MaxMeshOutputSizeInstance::iterate (void)
2217 {
2218 	const auto&			vkd				= m_context.getDeviceInterface();
2219 	const auto			device			= m_context.getDevice();
2220 	auto&				alloc			= m_context.getDefaultAllocator();
2221 	const auto			queueIndex		= m_context.getUniversalQueueFamilyIndex();
2222 	const auto			queue			= m_context.getUniversalQueue();
2223 
2224 	const auto			colorFormat		= VK_FORMAT_R8G8B8A8_UNORM;
2225 	const auto			tcuColorFormat	= mapVkFormat(colorFormat);
2226 	const auto			pixelSize		= static_cast<uint32_t>(tcu::getPixelSize(tcuColorFormat));
2227 	const auto			colorUsage		= (VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
2228 	const auto			imageViewType	= ((m_numViews > 1u) ? VK_IMAGE_VIEW_TYPE_2D_ARRAY : VK_IMAGE_VIEW_TYPE_2D);
2229 	const auto			fbExtent		= makeExtent3D(MaxMeshOutputSizeCase::kMaxPoints, 1u, 1u);
2230 	const tcu::IVec3	iExtent3D		(static_cast<int>(fbExtent.width), static_cast<int>(fbExtent.height), static_cast<int>(m_numViews));
2231 	const tcu::Vec4		clearColor		(0.0f, 0.0f, 0.0f, 1.0f);
2232 	const tcu::Vec4		expectedColor	(0.0f, 0.0f, 1.0f, 1.0f);
2233 	const tcu::Vec4		colorThreshold	(0.0f, 0.0f, 0.0f, 0.0f);
2234 
2235 	// Create color attachment.
2236 	const VkImageCreateInfo colorAttachmentCreatInfo =
2237 	{
2238 		VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,	//	VkStructureType			sType;
2239 		nullptr,								//	const void*				pNext;
2240 		0u,										//	VkImageCreateFlags		flags;
2241 		VK_IMAGE_TYPE_2D,						//	VkImageType				imageType;
2242 		colorFormat,							//	VkFormat				format;
2243 		fbExtent,								//	VkExtent3D				extent;
2244 		1u,										//	uint32_t				mipLevels;
2245 		m_numViews,								//	uint32_t				arrayLayers;
2246 		VK_SAMPLE_COUNT_1_BIT,					//	VkSampleCountFlagBits	samples;
2247 		VK_IMAGE_TILING_OPTIMAL,				//	VkImageTiling			tiling;
2248 		colorUsage,								//	VkImageUsageFlags		usage;
2249 		VK_SHARING_MODE_EXCLUSIVE,				//	VkSharingMode			sharingMode;
2250 		0u,										//	uint32_t				queueFamilyIndexCount;
2251 		nullptr,								//	const uint32_t*			pQueueFamilyIndices;
2252 		VK_IMAGE_LAYOUT_UNDEFINED,				//	VkImageLayout			initialLayout;
2253 	};
2254 	ImageWithMemory	colorAttachment		(vkd, device, alloc, colorAttachmentCreatInfo, MemoryRequirement::Any);
2255 	const auto		colorSRR			= makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, m_numViews);
2256 	const auto		colorSRL			= makeImageSubresourceLayers(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 0u, m_numViews);
2257 	const auto		colorAttachmentView	= makeImageView(vkd, device, colorAttachment.get(), imageViewType, colorFormat, colorSRR);
2258 
2259 	// Verification buffer for the color attachment.
2260 	DE_ASSERT(fbExtent.depth == 1u);
2261 	const auto			verificationBufferUsage			= VK_BUFFER_USAGE_TRANSFER_DST_BIT;
2262 	const auto			verificationBufferSize			= static_cast<VkDeviceSize>(pixelSize * fbExtent.width * fbExtent.height * m_numViews);
2263 	const auto			verificationBufferCreateInfo	= makeBufferCreateInfo(verificationBufferSize, verificationBufferUsage);
2264 	BufferWithMemory	verificationBuffer				(vkd, device, alloc, verificationBufferCreateInfo, MemoryRequirement::HostVisible);
2265 	auto&				verificationBufferAlloc			= verificationBuffer.getAllocation();
2266 	void*				verificationBufferData			= verificationBufferAlloc.getHostPtr();
2267 
2268 	deMemset(verificationBufferData, 0, static_cast<size_t>(verificationBufferSize));
2269 
2270 	const auto	pipelineLayout	= makePipelineLayout(vkd, device);
2271 	const auto	renderPass		= makeCustomRenderPass(vkd, device, m_numViews, colorFormat);
2272 	const auto	framebuffer		= makeFramebuffer(vkd, device, renderPass.get(), colorAttachmentView.get(), fbExtent.width, fbExtent.height, 1u);
2273 
2274 	const auto&	binaries	= m_context.getBinaryCollection();
2275 	const bool	hasTask		= binaries.contains("task");
2276 	const auto	taskModule	= (hasTask ? createShaderModule(vkd, device, binaries.get("task")) : Move<VkShaderModule>());
2277 	const auto	meshModule	= createShaderModule(vkd, device, binaries.get("mesh"));
2278 	const auto	fragModule	= createShaderModule(vkd, device, binaries.get("frag"));
2279 
2280 	const std::vector<VkViewport>	viewports	(1u, makeViewport(fbExtent));
2281 	const std::vector<VkRect2D>		scissors	(1u, makeRect2D(fbExtent));
2282 
2283 	const auto					specMap		= makeSpecializationMap();
2284 	const VkSpecializationInfo	specInfo	=
2285 	{
2286 		static_cast<uint32_t>(specMap.size()),	//	uint32_t						mapEntryCount;
2287 		de::dataOrNull(specMap),				//	const VkSpecializationMapEntry*	pMapEntries;
2288 		de::dataSize(m_specConstants),			//	size_t							dataSize;
2289 		de::dataOrNull(m_specConstants),		//	const void*						pData;
2290 	};
2291 
2292 	std::vector<VkPipelineShaderStageCreateInfo>	shaderStages;
2293 	VkPipelineShaderStageCreateInfo					stageInfo		=
2294 	{
2295 		VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,	//	VkStructureType						sType;
2296 		nullptr,												//	const void*							pNext;
2297 		0u,														//	VkPipelineShaderStageCreateFlags	flags;
2298 		VK_SHADER_STAGE_FLAG_BITS_MAX_ENUM,						//	VkShaderStageFlagBits				stage;
2299 		DE_NULL,												//	VkShaderModule						module;
2300 		"main",													//	const char*							pName;
2301 		&specInfo,												//	const VkSpecializationInfo*			pSpecializationInfo;
2302 	};
2303 
2304 	if (hasTask)
2305 	{
2306 		stageInfo.stage = VK_SHADER_STAGE_TASK_BIT_EXT;
2307 		stageInfo.module = taskModule.get();
2308 		shaderStages.push_back(stageInfo);
2309 	}
2310 
2311 	{
2312 		stageInfo.stage = VK_SHADER_STAGE_MESH_BIT_EXT;
2313 		stageInfo.module = meshModule.get();
2314 		shaderStages.push_back(stageInfo);
2315 	}
2316 
2317 	{
2318 		stageInfo.stage = VK_SHADER_STAGE_FRAGMENT_BIT;
2319 		stageInfo.module = fragModule.get();
2320 		shaderStages.push_back(stageInfo);
2321 	}
2322 
2323 	const auto pipeline = makeGraphicsPipeline(vkd, device,
2324 		DE_NULL, pipelineLayout.get(), 0u,
2325 		shaderStages, renderPass.get(), viewports, scissors);
2326 
2327 	const auto cmdPool		= makeCommandPool(vkd, device, queueIndex);
2328 	const auto cmdBufferPtr	= allocateCommandBuffer(vkd, device, cmdPool.get(), VK_COMMAND_BUFFER_LEVEL_PRIMARY);
2329 	const auto cmdBuffer	= cmdBufferPtr.get();
2330 
2331 	beginCommandBuffer(vkd, cmdBuffer);
2332 	beginRenderPass(vkd, cmdBuffer, renderPass.get(), framebuffer.get(), scissors.at(0u), clearColor);
2333 	vkd.cmdBindPipeline(cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline.get());
2334 	vkd.cmdDrawMeshTasksEXT(cmdBuffer, 1u, 1u, 1u);
2335 	endRenderPass(vkd, cmdBuffer);
2336 
2337 	const auto preTransferBarrier = makeImageMemoryBarrier(
2338 		VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT,
2339 		VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
2340 		colorAttachment.get(), colorSRR);
2341 	cmdPipelineImageMemoryBarrier(vkd, cmdBuffer, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, &preTransferBarrier);
2342 
2343 	const auto copyRegion = makeBufferImageCopy(fbExtent, colorSRL);
2344 	vkd.cmdCopyImageToBuffer(cmdBuffer, colorAttachment.get(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, verificationBuffer.get(), 1u, &copyRegion);
2345 
2346 	const auto postTransferBarrier = makeMemoryBarrier(VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT);
2347 	cmdPipelineMemoryBarrier(vkd, cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT, &postTransferBarrier);
2348 
2349 	endCommandBuffer(vkd, cmdBuffer);
2350 	submitCommandsAndWait(vkd, device, queue, cmdBuffer);
2351 
2352 	invalidateAlloc(vkd, device, verificationBufferAlloc);
2353 	tcu::ConstPixelBufferAccess resultAccess	(tcuColorFormat, iExtent3D, verificationBufferData);
2354 	tcu::TextureLevel			referenceLevel	(tcuColorFormat, iExtent3D.x(), iExtent3D.y(), iExtent3D.z());
2355 	tcu::PixelBufferAccess		referenceAccess = referenceLevel.getAccess();
2356 
2357 	for (int z = 0; z < iExtent3D.z(); ++z)
2358 	{
2359 		const auto layer = tcu::getSubregion(referenceAccess, 0, 0, z, iExtent3D.x(), iExtent3D.y(), 1);
2360 		const tcu::Vec4 expectedLayerColor(static_cast<float>(z), expectedColor.y(), expectedColor.z(), expectedColor.w());
2361 		tcu::clear(layer, expectedLayerColor);
2362 	}
2363 
2364 	auto& log = m_context.getTestContext().getLog();
2365 	if (!tcu::floatThresholdCompare(log, "Result", "", referenceAccess, resultAccess, colorThreshold, tcu::COMPARE_LOG_ON_ERROR))
2366 		TCU_FAIL("Check log for details");
2367 
2368 	return tcu::TestStatus::pass("Pass");
2369 }
2370 
2371 } // anonymous
2372 
createMeshShaderPropertyTestsEXT(tcu::TestContext & testCtx)2373 tcu::TestCaseGroup* createMeshShaderPropertyTestsEXT (tcu::TestContext& testCtx)
2374 {
2375 	using GroupPtr = de::MovePtr<tcu::TestCaseGroup>;
2376 
2377 	GroupPtr mainGroup (new tcu::TestCaseGroup(testCtx, "properties", "Tests checking mesh shading properties"));
2378 
2379 	const struct
2380 	{
2381 		PayLoadShMemSizeType	testType;
2382 		const char*				name;
2383 	} taskPayloadShMemCases[] =
2384 	{
2385 		{ PayLoadShMemSizeType::PAYLOAD,		"task_payload_size"						},
2386 		{ PayLoadShMemSizeType::SHARED_MEMORY,	"task_shared_memory_size"				},
2387 		{ PayLoadShMemSizeType::BOTH,			"task_payload_and_shared_memory_size"	},
2388 	};
2389 
2390 	for (const auto& taskPayloadShMemCase : taskPayloadShMemCases)
2391 	{
2392 		const TaskPayloadShMemSizeParams params { taskPayloadShMemCase.testType };
2393 		mainGroup->addChild(new TaskPayloadShMemSizeCase(testCtx, taskPayloadShMemCase.name, "", params));
2394 	}
2395 
2396 	mainGroup->addChild(new MaxViewIndexCase(testCtx, "max_view_index", ""));
2397 	mainGroup->addChild(new MaxOutputLayersCase(testCtx, "max_output_layers", ""));
2398 
2399 	const struct
2400 	{
2401 		MaxPrimVertType		limitPrimVertType;
2402 		const char*			prefix;
2403 	} limitPrimVertCases[] =
2404 	{
2405 		{ MaxPrimVertType::PRIMITIVES,	"max_mesh_output_primitives_"	},
2406 		{ MaxPrimVertType::VERTICES,	"max_mesh_output_vertices_"		},
2407 	};
2408 
2409 	const uint32_t itemCounts[] = { 256u, 512u, 1024u, 2048u };
2410 
2411 	for (const auto& primVertCase : limitPrimVertCases)
2412 	{
2413 		for (const auto& count : itemCounts)
2414 		{
2415 			const MaxPrimVertParams params { primVertCase.limitPrimVertType, count };
2416 			mainGroup->addChild(new MaxMeshOutputPrimVertCase(testCtx, primVertCase.prefix + std::to_string(count), "", params));
2417 		}
2418 	}
2419 
2420 	mainGroup->addChild(new MaxMeshOutputComponentsCase(testCtx, "max_mesh_output_components", ""));
2421 
2422 	const struct
2423 	{
2424 		PayLoadShMemSizeType	testType;
2425 		const char*				name;
2426 	} meshPayloadShMemCases[] =
2427 	{
2428 		// No actual property for the first one, combines the two properties involving payload size.
2429 		{ PayLoadShMemSizeType::PAYLOAD,		"mesh_payload_size"						},
2430 		{ PayLoadShMemSizeType::SHARED_MEMORY,	"mesh_shared_memory_size"				},
2431 		{ PayLoadShMemSizeType::BOTH,			"mesh_payload_and_shared_memory_size"	},
2432 	};
2433 	for (const auto& meshPayloadShMemCase : meshPayloadShMemCases)
2434 	{
2435 		const MeshPayloadShMemSizeParams params { meshPayloadShMemCase.testType };
2436 		mainGroup->addChild(new MeshPayloadShMemSizeCase(testCtx, meshPayloadShMemCase.name, "", params));
2437 	}
2438 
2439 	const struct
2440 	{
2441 		bool			usePayload;
2442 		const char*		suffix;
2443 	} meshOutputPayloadCases[] =
2444 	{
2445 		{ false,	"_without_payload"	},
2446 		{ true,		"_with_payload"		},
2447 	};
2448 
2449 	const struct
2450 	{
2451 		LocationType	locationType;
2452 		const char*		suffix;
2453 	} locationTypeCases[] =
2454 	{
2455 		{ LocationType::PER_PRIMITIVE,	"_per_primitive"	},
2456 		{ LocationType::PER_VERTEX,		"_per_vertex"		},
2457 	};
2458 
2459 	const struct
2460 	{
2461 		ViewIndexType	viewIndexType;
2462 		const char*		suffix;
2463 	} multiviewCases[] =
2464 	{
2465 		{ ViewIndexType::NO_VIEW_INDEX,		"_no_view_index"				},
2466 		{ ViewIndexType::VIEW_INDEX_FRAG,	"_view_index_in_frag"			},
2467 		{ ViewIndexType::VIEW_INDEX_BOTH,	"_view_index_in_mesh_and_frag"	},
2468 	};
2469 
2470 	for (const auto& meshOutputPayloadCase : meshOutputPayloadCases)
2471 	{
2472 		for (const auto& locationTypeCase : locationTypeCases)
2473 		{
2474 			for (const auto& multiviewCase : multiviewCases)
2475 			{
2476 				const std::string			name	= std::string("max_mesh_output_size") + meshOutputPayloadCase.suffix + locationTypeCase.suffix + multiviewCase.suffix;
2477 				const MaxMeshOutputParams	params	=
2478 				{
2479 					meshOutputPayloadCase.usePayload,	//	bool			usePayload;
2480 					locationTypeCase.locationType,		//	LocationType	locationType;
2481 					multiviewCase.viewIndexType,		//	ViewIndexType	viewIndexType;
2482 				};
2483 
2484 				mainGroup->addChild(new MaxMeshOutputSizeCase(testCtx, name, "", params));
2485 			}
2486 		}
2487 	}
2488 
2489 	return mainGroup.release();
2490 }
2491 } // MeshShader
2492 } // vkt
2493